mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-11-03 21:44:33 +00:00 
			
		
		
		
	Compare commits
	
		
			20 Commits
		
	
	
		
			9a1ad6a5eb
			...
			feature/CG
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| 
						 | 
					70068cff51 | ||
| 
						 | 
					85c055fa30 | ||
| 
						 | 
					90fedbd2af | ||
| 
						 | 
					ec0c53fa68 | ||
| 
						 | 
					6ceee102e8 | ||
| 
						 | 
					6e57bdb6b3 | ||
| 
						 | 
					4c11e36d3d | ||
| 
						 | 
					9977c53035 | ||
| 
						 | 
					3a74fec62f | ||
| 
						 | 
					8fb0a13f39 | ||
| 
						 | 
					14a1406f54 | ||
| 
						 | 
					538e64e5b4 | ||
| 
						 | 
					b2dc17e160 | ||
| 
						 | 
					afbbcd2194 | ||
| 
						 | 
					d4e0b11bb1 | ||
| 
						 | 
					7144ee7ae8 | ||
| 
						 | 
					f1908c7bc9 | ||
| 
						 | 
					036ec31c48 | ||
| 
						 | 
					53f240200e | ||
| 
						 | 
					9720c9ba3f | 
							
								
								
									
										49
									
								
								lib/Bitwise.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										49
									
								
								lib/Bitwise.cc
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,49 @@
 | 
			
		||||
/*************************************************************************************
 | 
			
		||||
 | 
			
		||||
Grid physics library, www.github.com/paboyle/Grid
 | 
			
		||||
 | 
			
		||||
Source file: ./lib/Bitwise.cc
 | 
			
		||||
 | 
			
		||||
Copyright (C) 2016
 | 
			
		||||
 | 
			
		||||
Author: Guido Cossu <guido.cossu@ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
This program is free software; you can redistribute it and/or modify
 | 
			
		||||
it under the terms of the GNU General Public License as published by
 | 
			
		||||
the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
(at your option) any later version.
 | 
			
		||||
 | 
			
		||||
This program is distributed in the hope that it will be useful,
 | 
			
		||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
You should have received a copy of the GNU General Public License along
 | 
			
		||||
with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
See the full license in the file "LICENSE" in the top level distribution
 | 
			
		||||
directory
 | 
			
		||||
*************************************************************************************/
 | 
			
		||||
/*  END LEGAL */
 | 
			
		||||
#include <iostream>
 | 
			
		||||
#include <Bitwise.h>
 | 
			
		||||
#include <bitset>
 | 
			
		||||
#include <climits>
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
 | 
			
		||||
void show_binaryrep(const unsigned char* a, size_t size) {
 | 
			
		||||
  const unsigned char* beg = a;
 | 
			
		||||
  const unsigned char* end = a + size;
 | 
			
		||||
  unsigned int ctr = 0;
 | 
			
		||||
  while (beg != end) {
 | 
			
		||||
    std::cout << std::bitset<CHAR_BIT>(*beg++) << ' ';
 | 
			
		||||
    ctr++;
 | 
			
		||||
    if (ctr % GRID_REAL_BYTES == 0) std::cout << '\n';
 | 
			
		||||
  }
 | 
			
		||||
  std::cout << '\n';
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
} // namespace 
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										76
									
								
								lib/Bitwise.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										76
									
								
								lib/Bitwise.h
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,76 @@
 | 
			
		||||
/*************************************************************************************
 | 
			
		||||
 | 
			
		||||
Grid physics library, www.github.com/paboyle/Grid
 | 
			
		||||
 | 
			
		||||
Source file: ./lib/Bitwise.h
 | 
			
		||||
 | 
			
		||||
Copyright (C) 2016
 | 
			
		||||
 | 
			
		||||
Author: Guido Cossu <guido.cossu@ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
This program is free software; you can redistribute it and/or modify
 | 
			
		||||
it under the terms of the GNU General Public License as published by
 | 
			
		||||
the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
(at your option) any later version.
 | 
			
		||||
 | 
			
		||||
This program is distributed in the hope that it will be useful,
 | 
			
		||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
You should have received a copy of the GNU General Public License along
 | 
			
		||||
with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
See the full license in the file "LICENSE" in the top level distribution
 | 
			
		||||
directory
 | 
			
		||||
*************************************************************************************/
 | 
			
		||||
/*  END LEGAL */
 | 
			
		||||
#ifndef GRID_BITWISE_H
 | 
			
		||||
#define GRID_BITWISE_H
 | 
			
		||||
 | 
			
		||||
#include <cassert>
 | 
			
		||||
#include <cfloat>
 | 
			
		||||
#include <bitset>
 | 
			
		||||
#include <climits>
 | 
			
		||||
#include <Config.h>
 | 
			
		||||
 | 
			
		||||
#ifdef GRID_DEFAULT_PRECISION_SINGLE
 | 
			
		||||
#define GRID_REAL_BYTES 4
 | 
			
		||||
#endif
 | 
			
		||||
#ifdef GRID_DEFAULT_PRECISION_DOUBLE
 | 
			
		||||
#define GRID_REAL_BYTES 8
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
 | 
			
		||||
void show_binaryrep(const unsigned char* a, size_t size);
 | 
			
		||||
 | 
			
		||||
template <typename T>
 | 
			
		||||
void show_binaryrep(const T& a) {
 | 
			
		||||
  const char* beg = reinterpret_cast<const char*>(&a);
 | 
			
		||||
  const char* end = beg + sizeof(a);
 | 
			
		||||
  unsigned int ctr = 0;
 | 
			
		||||
  while (beg != end) {
 | 
			
		||||
  	std::cout << std::bitset<CHAR_BIT>(*beg++) << ' ';
 | 
			
		||||
  	ctr++;
 | 
			
		||||
  	if (ctr % GRID_REAL_BYTES == 0) std::cout << '\n';
 | 
			
		||||
  }
 | 
			
		||||
  std::cout << '\n';
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template <typename T>
 | 
			
		||||
void bitwise_xor(T& l, T& r, unsigned char* xors) {
 | 
			
		||||
  assert(sizeof(l) == sizeof(r));
 | 
			
		||||
  unsigned char* org = reinterpret_cast<unsigned char*>(&l);
 | 
			
		||||
  unsigned char* cur = reinterpret_cast<unsigned char*>(&r);
 | 
			
		||||
  int words = sizeof(l) / sizeof(*org);
 | 
			
		||||
  unsigned char result = 0;
 | 
			
		||||
  for (int w = 0; w < words; w++) xors[w] = (org[w] ^ cur[w]);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
}; // namespace 
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
@@ -62,6 +62,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
#include <Grid/serialisation/Serialisation.h>
 | 
			
		||||
#include "Config.h"
 | 
			
		||||
#include <Grid/Timer.h>
 | 
			
		||||
#include <Grid/Bitwise.h>
 | 
			
		||||
#include <Grid/PerfCount.h>
 | 
			
		||||
#include <Grid/Log.h>
 | 
			
		||||
#include <Grid/AlignedAllocator.h>
 | 
			
		||||
 
 | 
			
		||||
@@ -100,7 +100,7 @@ void Grid_quiesce_nodes(void) {
 | 
			
		||||
  me = shmem_my_pe();
 | 
			
		||||
#endif
 | 
			
		||||
  if (me) {
 | 
			
		||||
    std::cout.setstate(std::ios::badbit);
 | 
			
		||||
    std::cout.setstate(std::ios::badbit);// mute all nodes except 0
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -38,19 +38,21 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
#ifdef GRID_OMP
 | 
			
		||||
#include <omp.h>
 | 
			
		||||
#ifdef GRID_NUMA
 | 
			
		||||
#define PARALLEL_FOR_LOOP        _Pragma("omp parallel for schedule(static)")
 | 
			
		||||
#define PARALLEL_FOR_LOOP_INTERN _Pragma("omp for schedule(static)")
 | 
			
		||||
#else
 | 
			
		||||
#define PARALLEL_FOR_LOOP        _Pragma("omp parallel for schedule(runtime)")
 | 
			
		||||
#define PARALLEL_FOR_LOOP_INTERN _Pragma("omp for schedule(runtime)")
 | 
			
		||||
  #define PARALLEL_FOR_LOOP        _Pragma("omp parallel for schedule(static)")
 | 
			
		||||
  #define PARALLEL_FOR_LOOP_INTERN _Pragma("omp for schedule(static)")
 | 
			
		||||
  #else
 | 
			
		||||
  #define PARALLEL_FOR_LOOP        _Pragma("omp parallel for schedule(runtime)")
 | 
			
		||||
  #define PARALLEL_FOR_LOOP_INTERN _Pragma("omp for schedule(runtime)")
 | 
			
		||||
#endif
 | 
			
		||||
#define PARALLEL_NESTED_LOOP2 _Pragma("omp parallel for collapse(2)")
 | 
			
		||||
#define PARALLEL_REGION       _Pragma("omp parallel")
 | 
			
		||||
#define PARALLEL_NESTED_LOOP2    _Pragma("omp parallel for collapse(2)")
 | 
			
		||||
#define PARALLEL_REGION          _Pragma("omp parallel")
 | 
			
		||||
#define PARALLEL_FOR_LOOP_STATIC _Pragma("omp parallel for schedule(static)")
 | 
			
		||||
#else
 | 
			
		||||
#define PARALLEL_FOR_LOOP
 | 
			
		||||
#define PARALLEL_FOR_LOOP_INTERN
 | 
			
		||||
#define PARALLEL_NESTED_LOOP2
 | 
			
		||||
#define PARALLEL_REGION
 | 
			
		||||
#define PARALLEL_FOR_LOOP_STATIC
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
 
 | 
			
		||||
@@ -9,6 +9,7 @@ Copyright (C) 2015
 | 
			
		||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
Author: Guido Cossu <guido.cossu@ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
This program is free software; you can redistribute it and/or modify
 | 
			
		||||
it under the terms of the GNU General Public License as published by
 | 
			
		||||
@@ -33,6 +34,21 @@ directory
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
 | 
			
		||||
struct CG_state {
 | 
			
		||||
  bool do_repro;
 | 
			
		||||
  std::vector<RealD> residuals;
 | 
			
		||||
 | 
			
		||||
  CG_state() {reset();}
 | 
			
		||||
 | 
			
		||||
  void reset(){
 | 
			
		||||
    do_repro = false;
 | 
			
		||||
    residuals.clear();
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
enum CGexec_mode{ Default, ReproducibilityTest };
 | 
			
		||||
 | 
			
		||||
/////////////////////////////////////////////////////////////
 | 
			
		||||
// Base classes for iterative processes based on operators
 | 
			
		||||
// single input vec, single output vec.
 | 
			
		||||
@@ -45,10 +61,30 @@ class ConjugateGradient : public OperatorFunction<Field> {
 | 
			
		||||
                           // Defaults true.
 | 
			
		||||
  RealD Tolerance;
 | 
			
		||||
  Integer MaxIterations;
 | 
			
		||||
  ConjugateGradient(RealD tol, Integer maxit, bool err_on_no_conv = true)
 | 
			
		||||
      : Tolerance(tol),
 | 
			
		||||
        MaxIterations(maxit),
 | 
			
		||||
        ErrorOnNoConverge(err_on_no_conv){};
 | 
			
		||||
 | 
			
		||||
  // Reproducibility controls
 | 
			
		||||
  bool ReproTest;
 | 
			
		||||
  CG_state CGState; //to check reproducibility by repeating the CG
 | 
			
		||||
  ReproducibilityState<typename Field::vector_object> ReprTest; // for the inner proucts
 | 
			
		||||
 | 
			
		||||
  // Constructor
 | 
			
		||||
  ConjugateGradient(RealD tol, Integer maxit, CGexec_mode Mode = Default)
 | 
			
		||||
    : Tolerance(tol),MaxIterations(maxit){
 | 
			
		||||
    switch(Mode)
 | 
			
		||||
    {
 | 
			
		||||
      case Default  : 
 | 
			
		||||
      ErrorOnNoConverge = true;
 | 
			
		||||
      ReproTest = false;
 | 
			
		||||
      case ReproducibilityTest :
 | 
			
		||||
      ErrorOnNoConverge = false;
 | 
			
		||||
      ReproTest = true; 
 | 
			
		||||
    }
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  void set_reproducibility_interval(unsigned int interval){
 | 
			
		||||
    ReprTest.interval = interval;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  void operator()(LinearOperatorBase<Field> &Linop, const Field &src,
 | 
			
		||||
                  Field &psi) {
 | 
			
		||||
@@ -60,34 +96,37 @@ class ConjugateGradient : public OperatorFunction<Field> {
 | 
			
		||||
    Field p(src);
 | 
			
		||||
    Field mmp(src);
 | 
			
		||||
    Field r(src);
 | 
			
		||||
    Field psi_start(psi);// save for the repro test
 | 
			
		||||
 | 
			
		||||
    if (CGState.do_repro && ReproTest)
 | 
			
		||||
        std::cout << GridLogMessage << "Starting reproducibility test, full check every "
 | 
			
		||||
                  << ReprTest.interval << " calls" << std::endl;
 | 
			
		||||
 | 
			
		||||
    if(!ReprTest.do_check)
 | 
			
		||||
        ReprTest.reset();
 | 
			
		||||
    ReprTest.enable_reprocheck=ReproTest;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    // Initial residual computation & set up
 | 
			
		||||
    RealD guess = norm2(psi);
 | 
			
		||||
    RealD guess = norm2(psi, ReprTest);
 | 
			
		||||
    assert(std::isnan(guess) == 0);
 | 
			
		||||
 | 
			
		||||
    
 | 
			
		||||
    Linop.HermOpAndNorm(psi, mmp, d, b);
 | 
			
		||||
    
 | 
			
		||||
    Linop.HermOpAndNorm(psi, mmp, d, b);// eventually split this for the norm check
 | 
			
		||||
    
 | 
			
		||||
    r = src - mmp;
 | 
			
		||||
    p = r;
 | 
			
		||||
 | 
			
		||||
    a = norm2(p);
 | 
			
		||||
    a = norm2(p, ReprTest);
 | 
			
		||||
    cp = a;
 | 
			
		||||
    ssq = norm2(src);
 | 
			
		||||
    ssq = norm2(src, ReprTest);
 | 
			
		||||
 | 
			
		||||
    std::cout << GridLogIterative << std::setprecision(4)
 | 
			
		||||
              << "ConjugateGradient: guess " << guess << std::endl;
 | 
			
		||||
    std::cout << GridLogIterative << std::setprecision(4)
 | 
			
		||||
              << "ConjugateGradient:   src " << ssq << std::endl;
 | 
			
		||||
    std::cout << GridLogIterative << std::setprecision(4)
 | 
			
		||||
              << "ConjugateGradient:    mp " << d << std::endl;
 | 
			
		||||
    std::cout << GridLogIterative << std::setprecision(4)
 | 
			
		||||
              << "ConjugateGradient:   mmp " << b << std::endl;
 | 
			
		||||
    std::cout << GridLogIterative << std::setprecision(4)
 | 
			
		||||
              << "ConjugateGradient:  cp,r " << cp << std::endl;
 | 
			
		||||
    std::cout << GridLogIterative << std::setprecision(4)
 | 
			
		||||
              << "ConjugateGradient:     p " << a << std::endl;
 | 
			
		||||
    std::cout << GridLogIterative << "ConjugateGradient: guess " << guess << std::endl;
 | 
			
		||||
    std::cout << GridLogIterative << "ConjugateGradient:   src " << ssq << std::endl;
 | 
			
		||||
    std::cout << GridLogIterative << "ConjugateGradient:    mp " << d << std::endl;
 | 
			
		||||
    std::cout << GridLogIterative << "ConjugateGradient:   mmp " << b << std::endl;
 | 
			
		||||
    std::cout << GridLogIterative << "ConjugateGradient:  cp,r " << cp << std::endl;
 | 
			
		||||
    std::cout << GridLogIterative << "ConjugateGradient:     p " << a << std::endl;
 | 
			
		||||
 | 
			
		||||
    RealD rsq = Tolerance * Tolerance * ssq;
 | 
			
		||||
 | 
			
		||||
@@ -107,10 +146,10 @@ class ConjugateGradient : public OperatorFunction<Field> {
 | 
			
		||||
    SolverTimer.Start();
 | 
			
		||||
    int k;
 | 
			
		||||
    for (k = 1; k <= MaxIterations; k++) {
 | 
			
		||||
      c = cp;
 | 
			
		||||
      c = cp;// old residual
 | 
			
		||||
 | 
			
		||||
      MatrixTimer.Start();
 | 
			
		||||
      Linop.HermOpAndNorm(p, mmp, d, qq);
 | 
			
		||||
      Linop.HermOpAndNorm(p, mmp, d, qq);// mmp = Ap, d=pAp
 | 
			
		||||
      MatrixTimer.Stop();
 | 
			
		||||
 | 
			
		||||
      LinalgTimer.Start();
 | 
			
		||||
@@ -118,14 +157,31 @@ class ConjugateGradient : public OperatorFunction<Field> {
 | 
			
		||||
      //  ComplexD dck  = innerProduct(p,mmp);
 | 
			
		||||
 | 
			
		||||
      a = c / d;
 | 
			
		||||
      b_pred = a * (a * qq - d) / c;
 | 
			
		||||
      b_pred = a * (a * qq - d) / c;// a check
 | 
			
		||||
 | 
			
		||||
      cp = axpy_norm(r, -a, mmp, r);
 | 
			
		||||
 | 
			
		||||
      axpy(r, -a, mmp, r);// new residual r = r_old - a * Ap
 | 
			
		||||
      cp = norm2(r, ReprTest); // bookkeeping this norm
 | 
			
		||||
      if (ReproTest && !CGState.do_repro) {
 | 
			
		||||
        CGState.residuals.push_back(cp);  // save residuals state
 | 
			
		||||
                std::cout << GridLogIterative << "ReproTest: Saving state" << std::endl;
 | 
			
		||||
        }
 | 
			
		||||
      if (ReproTest && CGState.do_repro){
 | 
			
		||||
        // check that the residual agrees with the previous run
 | 
			
		||||
        std::cout << GridLogIterative << "ReproTest: Checking state k=" << k << std::endl;
 | 
			
		||||
        if (cp != CGState.residuals[k-1]){
 | 
			
		||||
                std::cout << GridLogMessage << "Failing reproducibility test";
 | 
			
		||||
                std::cout << GridLogMessage << " at k=" << k << std::endl;
 | 
			
		||||
                std::cout << GridLogMessage << "saved residual = " << CGState.residuals[k-1] 
 | 
			
		||||
                        << " cp = " << cp << std::endl;
 | 
			
		||||
                exit(1);  // exit after the first failure
 | 
			
		||||
        }
 | 
			
		||||
      }
 | 
			
		||||
      b = cp / c;
 | 
			
		||||
 | 
			
		||||
      // Fuse these loops ; should be really easy
 | 
			
		||||
      psi = a * p + psi;
 | 
			
		||||
      p = p * b + r;
 | 
			
		||||
      psi = a * p + psi; // update solution
 | 
			
		||||
      p = p * b + r;  // update search direction
 | 
			
		||||
 | 
			
		||||
      LinalgTimer.Stop();
 | 
			
		||||
      std::cout << GridLogIterative << "ConjugateGradient: Iteration " << k
 | 
			
		||||
@@ -156,6 +212,22 @@ class ConjugateGradient : public OperatorFunction<Field> {
 | 
			
		||||
 | 
			
		||||
        if (ErrorOnNoConverge) assert(true_residual / Tolerance < 10000.0);
 | 
			
		||||
 | 
			
		||||
        if (! (CGState.do_repro && ReproTest)){
 | 
			
		||||
                CGState.do_repro = true;
 | 
			
		||||
                ReprTest.do_check = true;
 | 
			
		||||
                ReprTest.reset_counter();
 | 
			
		||||
                this->operator()(Linop, src, psi_start);// run the repro test
 | 
			
		||||
                if (ReprTest.success)
 | 
			
		||||
                	std::cout << GridLogMessage << "Reproducibility test passed" << std::endl;
 | 
			
		||||
                else{
 | 
			
		||||
                	std::cout << GridLogMessage << "Reproducibility test failed" << std::endl;
 | 
			
		||||
                	exit(1);
 | 
			
		||||
                }
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        // Clear state
 | 
			
		||||
        CGState.reset();
 | 
			
		||||
        ReprTest.reset();
 | 
			
		||||
        return;
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
 
 | 
			
		||||
@@ -59,7 +59,7 @@ public:
 | 
			
		||||
 | 
			
		||||
    int Nstop;   // Number of evecs checked for convergence
 | 
			
		||||
    int Nk;      // Number of converged sought
 | 
			
		||||
    int Np;      // Np -- Number of spare vecs in kryloc space
 | 
			
		||||
    int Np;      // Np -- Number of spare vecs in krylov space
 | 
			
		||||
    int Nm;      // Nm -- total number of vectors
 | 
			
		||||
 | 
			
		||||
    RealD eresid;
 | 
			
		||||
 
 | 
			
		||||
@@ -65,6 +65,7 @@ const std::vector<int> & CartesianCommunicator::ThisProcessorCoor(void) { return
 | 
			
		||||
const std::vector<int> & CartesianCommunicator::ProcessorGrid(void)     { return _processors; };
 | 
			
		||||
int                      CartesianCommunicator::ProcessorCount(void)    { return _Nprocessors; };
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
// very VERY rarely (Log, serial RNG) we need world without a grid
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
@@ -89,11 +90,11 @@ void CartesianCommunicator::GlobalSumVector(ComplexD *c,int N)
 | 
			
		||||
#if !defined( GRID_COMMS_MPI3) && !defined (GRID_COMMS_MPI3L)
 | 
			
		||||
 | 
			
		||||
void CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list,
 | 
			
		||||
						       void *xmit,
 | 
			
		||||
						       int xmit_to_rank,
 | 
			
		||||
						       void *recv,
 | 
			
		||||
						       int recv_from_rank,
 | 
			
		||||
						       int bytes)
 | 
			
		||||
                                                       void *xmit,
 | 
			
		||||
                                                       int xmit_to_rank,
 | 
			
		||||
                                                       void *recv,
 | 
			
		||||
                                                       int recv_from_rank,
 | 
			
		||||
                                                       int bytes)
 | 
			
		||||
{
 | 
			
		||||
  SendToRecvFromBegin(list,xmit,xmit_to_rank,recv,recv_from_rank,bytes);
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
@@ -68,6 +68,8 @@ class CartesianCommunicator {
 | 
			
		||||
  static MPI_Comm communicator_world;
 | 
			
		||||
         MPI_Comm communicator;
 | 
			
		||||
  typedef MPI_Request CommsRequest_t;
 | 
			
		||||
  static char name[MPI_MAX_PROCESSOR_NAME]; // processing node physical name
 | 
			
		||||
  static int length; 
 | 
			
		||||
#else 
 | 
			
		||||
  typedef int CommsRequest_t;
 | 
			
		||||
#endif
 | 
			
		||||
@@ -149,6 +151,7 @@ class CartesianCommunicator {
 | 
			
		||||
  const std::vector<int> & ProcessorGrid(void)     ;
 | 
			
		||||
  int                      ProcessorCount(void)    ;
 | 
			
		||||
 | 
			
		||||
  void        		 PrintRankInfo(void)     ;
 | 
			
		||||
  ////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
  // very VERY rarely (Log, serial RNG) we need world without a grid
 | 
			
		||||
  ////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
 
 | 
			
		||||
@@ -35,6 +35,8 @@ namespace Grid {
 | 
			
		||||
// Info that is setup once and indept of cartesian layout
 | 
			
		||||
///////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
MPI_Comm CartesianCommunicator::communicator_world;
 | 
			
		||||
char CartesianCommunicator::name[MPI_MAX_PROCESSOR_NAME]; // processing node physical name
 | 
			
		||||
int CartesianCommunicator::length; 
 | 
			
		||||
 | 
			
		||||
// Should error check all MPI calls.
 | 
			
		||||
void CartesianCommunicator::Init(int *argc, char ***argv) {
 | 
			
		||||
@@ -44,6 +46,8 @@ void CartesianCommunicator::Init(int *argc, char ***argv) {
 | 
			
		||||
    MPI_Init(argc,argv);
 | 
			
		||||
  }
 | 
			
		||||
  MPI_Comm_dup (MPI_COMM_WORLD,&communicator_world);
 | 
			
		||||
 | 
			
		||||
  MPI_Get_processor_name(name, &length);
 | 
			
		||||
  ShmInitGeneric();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@@ -206,5 +210,10 @@ void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes)
 | 
			
		||||
  assert(ierr==0);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void CartesianCommunicator::PrintRankInfo(){
 | 
			
		||||
    std::cout << "Grid: Rank "<< _processor << "  -  Physical node name: " << name << std::endl;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
}// end of namespace
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -576,5 +576,10 @@ void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes)
 | 
			
		||||
  assert(ierr==0);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
  void CartesianCommunicator::PrintRankInfo(){
 | 
			
		||||
    std::cout << "Grid: Rank "<< _processor << "  -  Physical node name: " << name << std::endl;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -869,6 +869,9 @@ void *CartesianCommunicator::ShmBufferTranslate(int rank,void * local_p) {
 | 
			
		||||
  return NULL;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
  void CartesianCommunicator::PrintRankInfo(){
 | 
			
		||||
    std::cout << "Grid: Rank "<< _processor << "  -  Physical node name: " << name << std::endl;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -37,6 +37,11 @@ void CartesianCommunicator::Init(int *argc, char *** arv)
 | 
			
		||||
  ShmInitGeneric();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void CartesianCommunicator::PrintRankInfo(){
 | 
			
		||||
    std::cout << GridLogMessage << "No Rank Info available" << std::endl;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
 | 
			
		||||
{
 | 
			
		||||
  _processors = processors;
 | 
			
		||||
@@ -60,10 +65,10 @@ void CartesianCommunicator::GlobalSum(uint64_t &){}
 | 
			
		||||
void CartesianCommunicator::GlobalSumVector(double *,int N){}
 | 
			
		||||
 | 
			
		||||
void CartesianCommunicator::SendRecvPacket(void *xmit,
 | 
			
		||||
					   void *recv,
 | 
			
		||||
					   int xmit_to_rank,
 | 
			
		||||
					   int recv_from_rank,
 | 
			
		||||
					   int bytes)
 | 
			
		||||
                                           void *recv,
 | 
			
		||||
                                           int xmit_to_rank,
 | 
			
		||||
                                           int recv_from_rank,
 | 
			
		||||
                                           int bytes)
 | 
			
		||||
{
 | 
			
		||||
  assert(0);
 | 
			
		||||
}
 | 
			
		||||
@@ -71,19 +76,19 @@ void CartesianCommunicator::SendRecvPacket(void *xmit,
 | 
			
		||||
 | 
			
		||||
// Basic Halo comms primitive -- should never call in single node
 | 
			
		||||
void CartesianCommunicator::SendToRecvFrom(void *xmit,
 | 
			
		||||
					   int dest,
 | 
			
		||||
					   void *recv,
 | 
			
		||||
					   int from,
 | 
			
		||||
					   int bytes)
 | 
			
		||||
                                           int dest,
 | 
			
		||||
                                           void *recv,
 | 
			
		||||
                                           int from,
 | 
			
		||||
                                           int bytes)
 | 
			
		||||
{
 | 
			
		||||
  assert(0);
 | 
			
		||||
}
 | 
			
		||||
void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &list,
 | 
			
		||||
						void *xmit,
 | 
			
		||||
						int dest,
 | 
			
		||||
						void *recv,
 | 
			
		||||
						int from,
 | 
			
		||||
						int bytes)
 | 
			
		||||
                                                void *xmit,
 | 
			
		||||
                                                int dest,
 | 
			
		||||
                                                void *recv,
 | 
			
		||||
                                                int from,
 | 
			
		||||
                                                int bytes)
 | 
			
		||||
{
 | 
			
		||||
  assert(0);
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
@@ -333,5 +333,9 @@ void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes)
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
  void CartesianCommunicator::PrintRankInfo(){
 | 
			
		||||
    std::cout << GridLogMessage << "SHMEM: Rank Info not implemented yet" << std::endl;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -31,6 +31,87 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
#define GRID_LATTICE_REDUCTION_H
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
 | 
			
		||||
template <class T>
 | 
			
		||||
class ReproducibilityState {
 | 
			
		||||
 public:
 | 
			
		||||
  typedef typename T::vector_type vector_type;
 | 
			
		||||
  typedef std::vector<vector_type, alignedAllocator<vector_type> >  sum_type;
 | 
			
		||||
  unsigned int  n_call;
 | 
			
		||||
  bool          do_check;
 | 
			
		||||
  bool          enable_reprocheck;
 | 
			
		||||
  bool          success;
 | 
			
		||||
  unsigned int  interval; 
 | 
			
		||||
  std::vector<sum_type> th_states;
 | 
			
		||||
 | 
			
		||||
  void reset_counter() { n_call = 0; }
 | 
			
		||||
  void reset() {
 | 
			
		||||
    th_states.clear();
 | 
			
		||||
    do_check          = false;
 | 
			
		||||
    enable_reprocheck = false;
 | 
			
		||||
    success           = true;
 | 
			
		||||
    n_call            = 0;
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  ReproducibilityState():interval(1) { 
 | 
			
		||||
    reset(); 
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  void check(GridBase* grid, sum_type &sumarray){
 | 
			
		||||
    ///////////////////////  Reproducibility section, not threaded on purpouse
 | 
			
		||||
    if (enable_reprocheck) {
 | 
			
		||||
      if (do_check && (n_call % interval) == 0) {
 | 
			
		||||
        for (int thread = 0; thread < sumarray.size(); thread++) {
 | 
			
		||||
          int words = sizeof(sumarray[thread])/sizeof(unsigned char);
 | 
			
		||||
          unsigned char xors[words];
 | 
			
		||||
          bitwise_xor(sumarray[thread], th_states[n_call][thread],xors);
 | 
			
		||||
          // OR all words
 | 
			
		||||
          unsigned char res = 0;
 | 
			
		||||
          for (int w = 0; w < words; w++) res = res | xors[w];
 | 
			
		||||
 | 
			
		||||
          Grid_unquiesce_nodes();
 | 
			
		||||
          int rank = 0;
 | 
			
		||||
          while (rank < grid->_Nprocessors){
 | 
			
		||||
            if (rank == grid->ThisRank() ){
 | 
			
		||||
              if ( res ) {
 | 
			
		||||
                std::cout << "Reproducibility failure report" << std::endl;
 | 
			
		||||
                grid->PrintRankInfo();
 | 
			
		||||
                std::cout << "Call: "<< n_call << " Thread: " << thread << std::endl;
 | 
			
		||||
                std::cout << "Size of states: " << th_states.size() << std::endl;
 | 
			
		||||
                std::cout << std::setprecision(GRID_REAL_DIGITS+1) << std::scientific;
 | 
			
		||||
                std::cout << "Saved partial sum  : " << th_states[n_call][thread] << std::endl;
 | 
			
		||||
                std::cout << "Current partial sum: " << sumarray[thread] << std::endl;
 | 
			
		||||
                std::cout << "Saved state "  << std::endl; show_binaryrep(th_states[n_call][thread]);
 | 
			
		||||
                std::cout << "Current state" << std::endl; show_binaryrep(sumarray[thread]);
 | 
			
		||||
                std::cout << "XOR result"    << std::endl; show_binaryrep(xors, words);
 | 
			
		||||
              		//std::cout << std::defaultfloat;  //not supported by some compilers
 | 
			
		||||
                std::cout << std::setprecision(6);
 | 
			
		||||
                success = false; 
 | 
			
		||||
              }
 | 
			
		||||
            }
 | 
			
		||||
            rank++;
 | 
			
		||||
            grid->Barrier();
 | 
			
		||||
          }
 | 
			
		||||
          Grid_quiesce_nodes();                         
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
      } else if (!do_check)
 | 
			
		||||
      {
 | 
			
		||||
        std::cout << GridLogDebug << "Saving thread state for inner product. Call n. " 
 | 
			
		||||
        << n_call << std::endl;
 | 
			
		||||
        th_states.resize(n_call+1);
 | 
			
		||||
        th_states[n_call].resize(grid->SumArraySize());
 | 
			
		||||
          th_states[n_call] = sumarray;  // save threads state
 | 
			
		||||
          //n_call++;
 | 
			
		||||
        }
 | 
			
		||||
        n_call++;
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#ifdef GRID_WARN_SUBOPTIMAL
 | 
			
		||||
#warning "Optimisation alert all these reduction loops are NOT threaded "
 | 
			
		||||
#endif     
 | 
			
		||||
@@ -39,12 +120,25 @@ namespace Grid {
 | 
			
		||||
    // Deterministic Reduction operations
 | 
			
		||||
    ////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
  template<class vobj> inline RealD norm2(const Lattice<vobj> &arg){
 | 
			
		||||
    ComplexD nrm = innerProduct(arg,arg);
 | 
			
		||||
    return std::real(nrm); 
 | 
			
		||||
  }
 | 
			
		||||
        ReproducibilityState<vobj> repr;
 | 
			
		||||
        ComplexD nrm = innerProduct(arg, arg, repr);
 | 
			
		||||
        return std::real(nrm); 
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    template <class vobj>
 | 
			
		||||
    inline RealD norm2(const Lattice<vobj> &arg, ReproducibilityState<vobj>& rep) {
 | 
			
		||||
      ComplexD nrm = innerProduct(arg, arg, rep);
 | 
			
		||||
      return std::real(nrm);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    template<class vobj>
 | 
			
		||||
    inline ComplexD innerProduct(const Lattice<vobj> &left,const Lattice<vobj> &right) 
 | 
			
		||||
    inline ComplexD innerProduct(const Lattice<vobj> &left,const Lattice<vobj> &right){
 | 
			
		||||
      ReproducibilityState<vobj> repr;
 | 
			
		||||
      return innerProduct(left, right, repr);
 | 
			
		||||
    } 
 | 
			
		||||
    
 | 
			
		||||
    template<class vobj>
 | 
			
		||||
    inline ComplexD innerProduct(const Lattice<vobj> &left,const Lattice<vobj> &right, ReproducibilityState<vobj>& repr) 
 | 
			
		||||
    {
 | 
			
		||||
      typedef typename vobj::scalar_type scalar_type;
 | 
			
		||||
      typedef typename vobj::vector_type vector_type;
 | 
			
		||||
@@ -54,24 +148,28 @@ namespace Grid {
 | 
			
		||||
 | 
			
		||||
      std::vector<vector_type,alignedAllocator<vector_type> > sumarray(grid->SumArraySize());
 | 
			
		||||
      for(int i=0;i<grid->SumArraySize();i++){
 | 
			
		||||
	sumarray[i]=zero;
 | 
			
		||||
        sumarray[i]=zero;
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
PARALLEL_FOR_LOOP
 | 
			
		||||
      PARALLEL_FOR_LOOP_STATIC //request statically scheduled threads for reproducibility
 | 
			
		||||
      for(int thr=0;thr<grid->SumArraySize();thr++){
 | 
			
		||||
	int nwork, mywork, myoff;
 | 
			
		||||
	GridThread::GetWork(left._grid->oSites(),thr,mywork,myoff);
 | 
			
		||||
        int nwork, mywork, myoff;
 | 
			
		||||
        GridThread::GetWork(left._grid->oSites(),thr,mywork,myoff);
 | 
			
		||||
        
 | 
			
		||||
	decltype(innerProduct(left._odata[0],right._odata[0])) vnrm=zero; // private to thread; sub summation
 | 
			
		||||
        decltype(innerProduct(left._odata[0],right._odata[0])) vnrm = zero; // private to thread; sub summation
 | 
			
		||||
        for(int ss=myoff;ss<mywork+myoff; ss++){
 | 
			
		||||
	  vnrm = vnrm + innerProduct(left._odata[ss],right._odata[ss]);
 | 
			
		||||
	}
 | 
			
		||||
	sumarray[thr]=TensorRemove(vnrm) ;
 | 
			
		||||
          vnrm = vnrm + innerProduct(left._odata[ss],right._odata[ss]);
 | 
			
		||||
        }
 | 
			
		||||
        sumarray[thr]=TensorRemove(vnrm) ;
 | 
			
		||||
      }
 | 
			
		||||
      
 | 
			
		||||
      /////////// Reproducibility
 | 
			
		||||
      repr.check(grid, sumarray);
 | 
			
		||||
      ///////////////////////////
 | 
			
		||||
 | 
			
		||||
      vector_type vvnrm; vvnrm=zero;  // sum across threads
 | 
			
		||||
      for(int i=0;i<grid->SumArraySize();i++){
 | 
			
		||||
	vvnrm = vvnrm+sumarray[i];
 | 
			
		||||
        vvnrm = vvnrm+sumarray[i];
 | 
			
		||||
      } 
 | 
			
		||||
      nrm = Reduce(vvnrm);// sum across simd
 | 
			
		||||
      right._grid->GlobalSum(nrm);
 | 
			
		||||
@@ -79,26 +177,26 @@ PARALLEL_FOR_LOOP
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    template<class Op,class T1>
 | 
			
		||||
      inline auto sum(const LatticeUnaryExpression<Op,T1> & expr)
 | 
			
		||||
      ->typename decltype(expr.first.func(eval(0,std::get<0>(expr.second))))::scalar_object
 | 
			
		||||
    inline auto sum(const LatticeUnaryExpression<Op,T1> & expr)
 | 
			
		||||
    ->typename decltype(expr.first.func(eval(0,std::get<0>(expr.second))))::scalar_object
 | 
			
		||||
    {
 | 
			
		||||
      return sum(closure(expr));
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    template<class Op,class T1,class T2>
 | 
			
		||||
      inline auto sum(const LatticeBinaryExpression<Op,T1,T2> & expr)
 | 
			
		||||
      ->typename decltype(expr.first.func(eval(0,std::get<0>(expr.second)),eval(0,std::get<1>(expr.second))))::scalar_object
 | 
			
		||||
    inline auto sum(const LatticeBinaryExpression<Op,T1,T2> & expr)
 | 
			
		||||
    ->typename decltype(expr.first.func(eval(0,std::get<0>(expr.second)),eval(0,std::get<1>(expr.second))))::scalar_object
 | 
			
		||||
    {
 | 
			
		||||
      return sum(closure(expr));
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    template<class Op,class T1,class T2,class T3>
 | 
			
		||||
      inline auto sum(const LatticeTrinaryExpression<Op,T1,T2,T3> & expr)
 | 
			
		||||
      ->typename decltype(expr.first.func(eval(0,std::get<0>(expr.second)),
 | 
			
		||||
				 eval(0,std::get<1>(expr.second)),
 | 
			
		||||
				 eval(0,std::get<2>(expr.second))
 | 
			
		||||
				 ))::scalar_object
 | 
			
		||||
    inline auto sum(const LatticeTrinaryExpression<Op,T1,T2,T3> & expr)
 | 
			
		||||
    ->typename decltype(expr.first.func(eval(0,std::get<0>(expr.second)),
 | 
			
		||||
     eval(0,std::get<1>(expr.second)),
 | 
			
		||||
     eval(0,std::get<2>(expr.second))
 | 
			
		||||
     ))::scalar_object
 | 
			
		||||
    {
 | 
			
		||||
      return sum(closure(expr));
 | 
			
		||||
    }
 | 
			
		||||
@@ -111,24 +209,24 @@ PARALLEL_FOR_LOOP
 | 
			
		||||
 | 
			
		||||
      std::vector<vobj,alignedAllocator<vobj> > sumarray(grid->SumArraySize());
 | 
			
		||||
      for(int i=0;i<grid->SumArraySize();i++){
 | 
			
		||||
	sumarray[i]=zero;
 | 
			
		||||
        sumarray[i]=zero;
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
PARALLEL_FOR_LOOP
 | 
			
		||||
      PARALLEL_FOR_LOOP
 | 
			
		||||
      for(int thr=0;thr<grid->SumArraySize();thr++){
 | 
			
		||||
	int nwork, mywork, myoff;
 | 
			
		||||
	GridThread::GetWork(grid->oSites(),thr,mywork,myoff);
 | 
			
		||||
        int nwork, mywork, myoff;
 | 
			
		||||
        GridThread::GetWork(grid->oSites(),thr,mywork,myoff);
 | 
			
		||||
 | 
			
		||||
	vobj vvsum=zero;
 | 
			
		||||
        vobj vvsum=zero;
 | 
			
		||||
        for(int ss=myoff;ss<mywork+myoff; ss++){
 | 
			
		||||
	  vvsum = vvsum + arg._odata[ss];
 | 
			
		||||
	}
 | 
			
		||||
	sumarray[thr]=vvsum;
 | 
			
		||||
          vvsum = vvsum + arg._odata[ss];
 | 
			
		||||
        }
 | 
			
		||||
        sumarray[thr]=vvsum;
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      vobj vsum=zero;  // sum across threads
 | 
			
		||||
      for(int i=0;i<grid->SumArraySize();i++){
 | 
			
		||||
	vsum = vsum+sumarray[i];
 | 
			
		||||
        vsum = vsum+sumarray[i];
 | 
			
		||||
      } 
 | 
			
		||||
 | 
			
		||||
      typedef typename vobj::scalar_object sobj;
 | 
			
		||||
@@ -138,7 +236,7 @@ PARALLEL_FOR_LOOP
 | 
			
		||||
      extract(vsum,buf);
 | 
			
		||||
 | 
			
		||||
      for(int i=0;i<Nsimd;i++) ssum = ssum + buf[i];
 | 
			
		||||
      arg._grid->GlobalSum(ssum);
 | 
			
		||||
        arg._grid->GlobalSum(ssum);
 | 
			
		||||
 | 
			
		||||
      return ssum;
 | 
			
		||||
    }
 | 
			
		||||
@@ -146,23 +244,23 @@ PARALLEL_FOR_LOOP
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
template<class vobj> inline void sliceSum(const Lattice<vobj> &Data,std::vector<typename vobj::scalar_object> &result,int orthogdim)
 | 
			
		||||
{
 | 
			
		||||
  typedef typename vobj::scalar_object sobj;
 | 
			
		||||
  GridBase  *grid = Data._grid;
 | 
			
		||||
  assert(grid!=NULL);
 | 
			
		||||
    {
 | 
			
		||||
      typedef typename vobj::scalar_object sobj;
 | 
			
		||||
      GridBase  *grid = Data._grid;
 | 
			
		||||
      assert(grid!=NULL);
 | 
			
		||||
 | 
			
		||||
  // FIXME
 | 
			
		||||
  // std::cout<<GridLogMessage<<"WARNING ! SliceSum is unthreaded "<<grid->SumArraySize()<<" threads "<<std::endl;
 | 
			
		||||
 | 
			
		||||
  const int    Nd = grid->_ndimension;
 | 
			
		||||
  const int Nsimd = grid->Nsimd();
 | 
			
		||||
      const int    Nd = grid->_ndimension;
 | 
			
		||||
      const int Nsimd = grid->Nsimd();
 | 
			
		||||
 | 
			
		||||
  assert(orthogdim >= 0);
 | 
			
		||||
  assert(orthogdim < Nd);
 | 
			
		||||
      assert(orthogdim >= 0);
 | 
			
		||||
      assert(orthogdim < Nd);
 | 
			
		||||
 | 
			
		||||
  int fd=grid->_fdimensions[orthogdim];
 | 
			
		||||
  int ld=grid->_ldimensions[orthogdim];
 | 
			
		||||
  int rd=grid->_rdimensions[orthogdim];
 | 
			
		||||
      int fd=grid->_fdimensions[orthogdim];
 | 
			
		||||
      int ld=grid->_ldimensions[orthogdim];
 | 
			
		||||
      int rd=grid->_rdimensions[orthogdim];
 | 
			
		||||
 | 
			
		||||
  std::vector<vobj,alignedAllocator<vobj> > lvSum(rd); // will locally sum vectors first
 | 
			
		||||
  std::vector<sobj> lsSum(ld,zero); // sum across these down to scalars
 | 
			
		||||
 
 | 
			
		||||
@@ -66,6 +66,9 @@ namespace Optimization {
 | 
			
		||||
    double f[4];
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  struct Vsplat{
 | 
			
		||||
    //Complex float
 | 
			
		||||
    inline __m256 operator()(float a, float b){
 | 
			
		||||
 
 | 
			
		||||
@@ -60,6 +60,13 @@ directory
 | 
			
		||||
#include "Grid_neon.h"
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#ifdef GRID_DEFAULT_PRECISION_SINGLE
 | 
			
		||||
#define GRID_REAL_DIGITS FLT_DIG
 | 
			
		||||
#endif
 | 
			
		||||
#ifdef GRID_DEFAULT_PRECISION_DOUBLE
 | 
			
		||||
#define GRID_REAL_DIGITS DBL_DIG
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
 | 
			
		||||
//////////////////////////////////////
 | 
			
		||||
@@ -146,6 +153,23 @@ class Grid_simd {
 | 
			
		||||
  Grid_simd(const Grid_simd &rhs) : v(rhs.v){};  // compiles in movaps
 | 
			
		||||
  Grid_simd(const Grid_simd &&rhs) : v(rhs.v){};
 | 
			
		||||
 | 
			
		||||
  /////////////////////////////
 | 
			
		||||
  // Comparisons
 | 
			
		||||
  /////////////////////////////
 | 
			
		||||
  inline bool operator==(const Grid_simd& lhs){ 
 | 
			
		||||
    Grid_simd::conv_t conv1;
 | 
			
		||||
    Grid_simd::conv_t conv2;
 | 
			
		||||
    conv1.v = v;
 | 
			
		||||
    conv2.v = lhs.v;
 | 
			
		||||
    return std::equal(std::begin(conv1.s), std::end(conv1.s), std::begin(conv2.s));
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  inline bool operator!=(const Grid_simd& lhs){ 
 | 
			
		||||
    return !(*this == lhs);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  /////////////////////////////
 | 
			
		||||
  // Constructors
 | 
			
		||||
  /////////////////////////////
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										215
									
								
								tests/debug/Test_cayley_cg_reproducibility.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										215
									
								
								tests/debug/Test_cayley_cg_reproducibility.cc
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,215 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./tests/Test_cayley_cg_reproducibility.cc
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2015
 | 
			
		||||
 | 
			
		||||
    Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
    Author: Guido Cossu <guido.cossu@ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
#include <Grid/Grid.h>
 | 
			
		||||
 | 
			
		||||
using namespace std;
 | 
			
		||||
using namespace Grid;
 | 
			
		||||
using namespace Grid::QCD;
 | 
			
		||||
 | 
			
		||||
#define REPRODUCIBILITY_INTERVAL 1
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
template<class d>
 | 
			
		||||
struct scal {
 | 
			
		||||
  d internal;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
  Gamma::GammaMatrix Gmu [] = {
 | 
			
		||||
    Gamma::GammaX,
 | 
			
		||||
    Gamma::GammaY,
 | 
			
		||||
    Gamma::GammaZ,
 | 
			
		||||
    Gamma::GammaT
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
template<class What> 
 | 
			
		||||
void  TestCGinversions(What & Ddwf, 
 | 
			
		||||
                       GridCartesian         * FGrid,          GridRedBlackCartesian * FrbGrid,
 | 
			
		||||
                       GridCartesian         * UGrid,          GridRedBlackCartesian * UrbGrid,
 | 
			
		||||
                       RealD mass, RealD M5,
 | 
			
		||||
                       GridParallelRNG *RNG4,
 | 
			
		||||
                       GridParallelRNG *RNG5);
 | 
			
		||||
template<class What> 
 | 
			
		||||
void  TestCGschur(What & Ddwf, 
 | 
			
		||||
                  GridCartesian         * FGrid,               GridRedBlackCartesian * FrbGrid,
 | 
			
		||||
                  GridCartesian         * UGrid,               GridRedBlackCartesian * UrbGrid,
 | 
			
		||||
                  RealD mass, RealD M5,
 | 
			
		||||
                  GridParallelRNG *RNG4,
 | 
			
		||||
                  GridParallelRNG *RNG5);
 | 
			
		||||
 | 
			
		||||
template<class What> 
 | 
			
		||||
void  TestCGunprec(What & Ddwf, 
 | 
			
		||||
                   GridCartesian         * FGrid,              GridRedBlackCartesian * FrbGrid,
 | 
			
		||||
                   GridCartesian         * UGrid,              GridRedBlackCartesian * UrbGrid,
 | 
			
		||||
                   RealD mass, RealD M5,
 | 
			
		||||
                   GridParallelRNG *RNG4,
 | 
			
		||||
                   GridParallelRNG *RNG5);
 | 
			
		||||
 | 
			
		||||
template<class What> 
 | 
			
		||||
void  TestCGprec(What & Ddwf, 
 | 
			
		||||
                 GridCartesian         * FGrid,        GridRedBlackCartesian * FrbGrid,
 | 
			
		||||
                 GridCartesian         * UGrid,        GridRedBlackCartesian * UrbGrid,
 | 
			
		||||
                 RealD mass, RealD M5,
 | 
			
		||||
                 GridParallelRNG *RNG4,
 | 
			
		||||
                 GridParallelRNG *RNG5);
 | 
			
		||||
 | 
			
		||||
int main (int argc, char ** argv)
 | 
			
		||||
{
 | 
			
		||||
  Grid_init(&argc,&argv);
 | 
			
		||||
 | 
			
		||||
  int threads = GridThread::GetThreads();
 | 
			
		||||
  std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
 | 
			
		||||
 | 
			
		||||
  std::cout << GridLogMessage << "###########################################" << std::endl;
 | 
			
		||||
  std::cout << GridLogMessage << "This test disables the convergence alert" << std::endl;
 | 
			
		||||
  std::cout << GridLogMessage << "and checks whether in consecutive runs of the CG" << std::endl;
 | 
			
		||||
  std::cout << GridLogMessage << "the internal reductions are bit reproducible." << std::endl;
 | 
			
		||||
  std::cout << GridLogMessage << "A warning and a report is produced for every failure" << std::endl;
 | 
			
		||||
  std::cout << GridLogMessage << "###########################################" << std::endl;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  const int Ls=8;
 | 
			
		||||
  GridCartesian         * UGrid   = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
 | 
			
		||||
  GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
 | 
			
		||||
  GridCartesian         * FGrid   = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
 | 
			
		||||
  GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  std::vector<int> seeds4({1,2,3,4});
 | 
			
		||||
  std::vector<int> seeds5({5,6,7,8});
 | 
			
		||||
  GridParallelRNG          RNG5(FGrid);  RNG5.SeedFixedIntegers(seeds5);
 | 
			
		||||
  GridParallelRNG          RNG4(UGrid);  RNG4.SeedFixedIntegers(seeds4);
 | 
			
		||||
 | 
			
		||||
  LatticeGaugeField Umu(UGrid);
 | 
			
		||||
  SU3::HotConfiguration(RNG4,Umu);
 | 
			
		||||
  std::vector<LatticeColourMatrix> U(4,UGrid);
 | 
			
		||||
 | 
			
		||||
  RealD mass=0.1;
 | 
			
		||||
  RealD M5  =1.8;
 | 
			
		||||
  std::cout<<GridLogMessage <<"DomainWallFermion test"<<std::endl;
 | 
			
		||||
  DomainWallFermionR Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
 | 
			
		||||
  TestCGinversions<DomainWallFermionR>(Ddwf,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
 | 
			
		||||
 | 
			
		||||
  RealD b=1.5;// Scale factor b+c=2, b-c=1
 | 
			
		||||
  RealD c=0.5;
 | 
			
		||||
  std::cout<<GridLogMessage <<"MobiusFermion test"<<std::endl;
 | 
			
		||||
  MobiusFermionR Dmob(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,b,c);
 | 
			
		||||
  TestCGinversions<MobiusFermionR>(Dmob,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
 | 
			
		||||
 | 
			
		||||
  std::cout<<GridLogMessage <<"MobiusZolotarevFermion test"<<std::endl;
 | 
			
		||||
  MobiusZolotarevFermionR Dzolo(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,b,c,0.1,2.0);
 | 
			
		||||
  TestCGinversions<MobiusZolotarevFermionR>(Dzolo,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
 | 
			
		||||
 | 
			
		||||
  std::cout<<GridLogMessage <<"ScaledShamirFermion test"<<std::endl;
 | 
			
		||||
  ScaledShamirFermionR Dsham(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,2.0);
 | 
			
		||||
  TestCGinversions<ScaledShamirFermionR>(Dsham,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
 | 
			
		||||
 | 
			
		||||
  std::cout<<GridLogMessage <<"ShamirZolotarevFermion test"<<std::endl;
 | 
			
		||||
  ShamirZolotarevFermionR Dshamz(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,0.1,2.0);
 | 
			
		||||
  TestCGinversions<ShamirZolotarevFermionR>(Dshamz,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
 | 
			
		||||
 | 
			
		||||
  std::cout<<GridLogMessage <<"OverlapWilsonCayleyTanhFermion test"<<std::endl;
 | 
			
		||||
  OverlapWilsonCayleyTanhFermionR Dov(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,1.0);
 | 
			
		||||
  TestCGinversions<OverlapWilsonCayleyTanhFermionR>(Dov,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
 | 
			
		||||
 | 
			
		||||
  std::cout<<GridLogMessage <<"OverlapWilsonCayleyZolotarevFermion test"<<std::endl;
 | 
			
		||||
  OverlapWilsonCayleyZolotarevFermionR Dovz(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,0.1,2.0);
 | 
			
		||||
  TestCGinversions<OverlapWilsonCayleyZolotarevFermionR>(Dovz,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
 | 
			
		||||
 | 
			
		||||
  Grid_finalize();
 | 
			
		||||
}
 | 
			
		||||
template<class What> 
 | 
			
		||||
void  TestCGinversions(What & Ddwf, 
 | 
			
		||||
                       GridCartesian         * FGrid,          GridRedBlackCartesian * FrbGrid,
 | 
			
		||||
                       GridCartesian         * UGrid,          GridRedBlackCartesian * UrbGrid,
 | 
			
		||||
                       RealD mass, RealD M5,
 | 
			
		||||
                       GridParallelRNG *RNG4,
 | 
			
		||||
                       GridParallelRNG *RNG5)
 | 
			
		||||
{
 | 
			
		||||
  std::cout<<GridLogMessage << "Testing unpreconditioned inverter"<<std::endl;
 | 
			
		||||
  TestCGunprec<What>(Ddwf,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,RNG4,RNG5);
 | 
			
		||||
  std::cout<<GridLogMessage << "Testing red black preconditioned inverter"<<std::endl;
 | 
			
		||||
  TestCGprec<What>(Ddwf,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,RNG4,RNG5);
 | 
			
		||||
  std::cout<<GridLogMessage << "Testing red black Schur inverter"<<std::endl;
 | 
			
		||||
  TestCGschur<What>(Ddwf,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,RNG4,RNG5);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template<class What> 
 | 
			
		||||
void  TestCGunprec(What & Ddwf, 
 | 
			
		||||
                   GridCartesian         * FGrid,              GridRedBlackCartesian * FrbGrid,
 | 
			
		||||
                   GridCartesian         * UGrid,              GridRedBlackCartesian * UrbGrid,
 | 
			
		||||
                   RealD mass, RealD M5,
 | 
			
		||||
                   GridParallelRNG *RNG4,
 | 
			
		||||
                   GridParallelRNG *RNG5)
 | 
			
		||||
{
 | 
			
		||||
  LatticeFermion src   (FGrid); random(*RNG5,src);
 | 
			
		||||
  LatticeFermion result(FGrid); result=zero;
 | 
			
		||||
 | 
			
		||||
  MdagMLinearOperator<What,LatticeFermion> HermOp(Ddwf);
 | 
			
		||||
  ConjugateGradient<LatticeFermion> CG(1.0e-8,10000, ReproducibilityTest);
 | 
			
		||||
  CG.set_reproducibility_interval(REPRODUCIBILITY_INTERVAL);
 | 
			
		||||
  CG(HermOp,src,result);
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
template<class What> 
 | 
			
		||||
void  TestCGprec(What & Ddwf, 
 | 
			
		||||
                 GridCartesian         * FGrid,        GridRedBlackCartesian * FrbGrid,
 | 
			
		||||
                 GridCartesian         * UGrid,        GridRedBlackCartesian * UrbGrid,
 | 
			
		||||
                 RealD mass, RealD M5,
 | 
			
		||||
                 GridParallelRNG *RNG4,
 | 
			
		||||
                 GridParallelRNG *RNG5)
 | 
			
		||||
{
 | 
			
		||||
  LatticeFermion src   (FGrid); random(*RNG5,src);
 | 
			
		||||
  LatticeFermion    src_o(FrbGrid);
 | 
			
		||||
  LatticeFermion result_o(FrbGrid);
 | 
			
		||||
  pickCheckerboard(Odd,src_o,src);
 | 
			
		||||
  result_o=zero;
 | 
			
		||||
 | 
			
		||||
  SchurDiagMooeeOperator<What,LatticeFermion> HermOpEO(Ddwf);
 | 
			
		||||
  ConjugateGradient<LatticeFermion> CG(1.0e-8,10000, ReproducibilityTest);
 | 
			
		||||
  CG.set_reproducibility_interval(REPRODUCIBILITY_INTERVAL);
 | 
			
		||||
  CG(HermOpEO,src_o,result_o);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
template<class What> 
 | 
			
		||||
void  TestCGschur(What & Ddwf, 
 | 
			
		||||
                   GridCartesian         * FGrid,              GridRedBlackCartesian * FrbGrid,
 | 
			
		||||
                   GridCartesian         * UGrid,              GridRedBlackCartesian * UrbGrid,
 | 
			
		||||
                   RealD mass, RealD M5,
 | 
			
		||||
                   GridParallelRNG *RNG4,
 | 
			
		||||
                   GridParallelRNG *RNG5)
 | 
			
		||||
{
 | 
			
		||||
  LatticeFermion src   (FGrid); random(*RNG5,src);
 | 
			
		||||
  LatticeFermion result(FGrid); result=zero;
 | 
			
		||||
 | 
			
		||||
  ConjugateGradient<LatticeFermion> CG(1.0e-8,10000, ReproducibilityTest);
 | 
			
		||||
  CG.set_reproducibility_interval(REPRODUCIBILITY_INTERVAL);
 | 
			
		||||
  SchurRedBlackDiagMooeeSolve<LatticeFermion> SchurSolver(CG);
 | 
			
		||||
  SchurSolver(Ddwf,src,result);
 | 
			
		||||
}
 | 
			
		||||
@@ -63,6 +63,9 @@ class HmcRunner : public NerscHmcRunner {
 | 
			
		||||
    Real mass = -0.77;
 | 
			
		||||
    FermionAction FermOp(U, *FGrid, *FrbGrid, mass);
 | 
			
		||||
 | 
			
		||||
    // To enable the CG reproducibility tests use 
 | 
			
		||||
    //ConjugateGradient<FermionField> CG(1.0e-8, 10000, ReproducibilityTest);
 | 
			
		||||
    // This is the plain version
 | 
			
		||||
    ConjugateGradient<FermionField> CG(1.0e-8, 10000);
 | 
			
		||||
 | 
			
		||||
    TwoFlavourPseudoFermionAction<ImplPolicy> Nf2(FermOp, CG, CG);
 | 
			
		||||
 
 | 
			
		||||
@@ -71,7 +71,7 @@ int main (int argc, char ** argv)
 | 
			
		||||
  WilsonFermionR Dw(Umu,Grid,RBGrid,mass);
 | 
			
		||||
 | 
			
		||||
  MdagMLinearOperator<WilsonFermionR,LatticeFermion> HermOp(Dw);
 | 
			
		||||
  ConjugateGradient<LatticeFermion> CG(1.0e-8,10000);
 | 
			
		||||
  ConjugateGradient<LatticeFermion> CG(1.0e-8,10000,true, true);
 | 
			
		||||
  CG(HermOp,src,result);
 | 
			
		||||
 | 
			
		||||
  Grid_finalize();
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user