mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-11-04 14:04:32 +00:00 
			
		
		
		
	Compare commits
	
		
			26 Commits
		
	
	
		
			aff3d50bae
			...
			feature/hw
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| 
						 | 
					3064c9a6e2 | ||
| 
						 | 
					729882827c | ||
| 
						 | 
					baa668d3ac | ||
| 
						 | 
					3c82d16ed8 | ||
| 
						 | 
					5c8c0c2d7c | ||
| 
						 | 
					e5a100846c | ||
| 
						 | 
					a74e2dc12e | ||
| 
						 | 
					595f512a6e | ||
| 
						 | 
					a6499b22ff | ||
| 
						 | 
					b4e42a59c6 | ||
| 
						 | 
					8c913e0edd | ||
| 
						 | 
					fd3f93d8d3 | ||
| 
						 | 
					e9543cdacd | ||
| 
						 | 
					98f7b3d298 | ||
| 
						 | 
					b7b164ea24 | ||
| 
						 | 
					77124d99d5 | ||
| 
						 | 
					e1327e7ea0 | ||
| 
						 | 
					569f78c2cf | ||
| 
						 | 
					488c79d5a1 | ||
| 
						 | 
					dc6b0f20b2 | ||
| 
						 | 
					c0badc3e16 | ||
| 
						 | 
					58f6529b55 | ||
| 
						 | 
					e3f056dfbb | ||
| 
						 | 
					da0ffa7a79 | ||
| 
						 | 
					fcc7640b9c | ||
| 
						 | 
					0cbe2859e0 | 
@@ -49,13 +49,11 @@ inline void blockMaskedInnerProduct(Lattice<CComplex> &CoarseInner,
 | 
			
		||||
  Lattice<dotp> fine_inner_msk(fine);
 | 
			
		||||
 | 
			
		||||
  // Multiply could be fused with innerProduct
 | 
			
		||||
  // Single block sum kernel could do both masks.
 | 
			
		||||
  fine_inner = localInnerProduct(fineX,fineY);
 | 
			
		||||
  mult(fine_inner_msk, fine_inner,FineMask);
 | 
			
		||||
  blockSum(CoarseInner,fine_inner_msk);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class Geometry {
 | 
			
		||||
public:
 | 
			
		||||
  int npoint;
 | 
			
		||||
@@ -80,8 +78,12 @@ public:
 | 
			
		||||
    }
 | 
			
		||||
    directions   [2*_d]=0;
 | 
			
		||||
    displacements[2*_d]=0;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
    std::cout <<GridLogMessage << "Geometry "<<std::endl;
 | 
			
		||||
    for(int p=0;p<npoint;p++){
 | 
			
		||||
      std::cout <<GridLogMessage << "point " <<p<<" dir "<<directions[p]<<" delta " <<displacements[p]<<std::endl;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
  
 | 
			
		||||
template<class Fobj,class CComplex,int nbasis>
 | 
			
		||||
@@ -102,8 +104,8 @@ public:
 | 
			
		||||
  Aggregation(GridBase *_CoarseGrid,GridBase *_FineGrid,int _checkerboard) : 
 | 
			
		||||
    CoarseGrid(_CoarseGrid),
 | 
			
		||||
    FineGrid(_FineGrid),
 | 
			
		||||
    subspace(nbasis,_FineGrid),
 | 
			
		||||
    checkerboard(_checkerboard)
 | 
			
		||||
    checkerboard(_checkerboard),
 | 
			
		||||
    subspace(nbasis,_FineGrid)
 | 
			
		||||
  {
 | 
			
		||||
  };
 | 
			
		||||
  
 | 
			
		||||
@@ -285,6 +287,8 @@ public:
 | 
			
		||||
  ///////////////////////
 | 
			
		||||
  GridBase * Grid(void)         { return _grid; };   // this is all the linalg routines need to know
 | 
			
		||||
 | 
			
		||||
  virtual std::vector<int> Directions(void)   { return geom.directions; };
 | 
			
		||||
  virtual std::vector<int> Displacements(void){ return geom.displacements; };
 | 
			
		||||
  void M (const CoarseVector &in, CoarseVector &out)
 | 
			
		||||
  {
 | 
			
		||||
    conformable(_grid,in.Grid());
 | 
			
		||||
@@ -308,6 +312,9 @@ public:
 | 
			
		||||
 | 
			
		||||
    int osites=Grid()->oSites();
 | 
			
		||||
 | 
			
		||||
    autoView(st,Stencil,AcceleratorRead);
 | 
			
		||||
    siteVector *CBp=Stencil.CommBuf();
 | 
			
		||||
 | 
			
		||||
    accelerator_for(sss, Grid()->oSites()*nbasis, Nsimd, {
 | 
			
		||||
      int ss = sss/nbasis;
 | 
			
		||||
      int b  = sss%nbasis;
 | 
			
		||||
@@ -318,12 +325,12 @@ public:
 | 
			
		||||
 | 
			
		||||
      for(int point=0;point<geom.npoint;point++){
 | 
			
		||||
 | 
			
		||||
	SE=Stencil.GetEntry(ptype,point,ss);
 | 
			
		||||
	SE=st.GetEntry(ptype,point,ss);
 | 
			
		||||
	  
 | 
			
		||||
	if(SE->_is_local) { 
 | 
			
		||||
	  nbr = coalescedReadPermute(in_v[SE->_offset],ptype,SE->_permute);
 | 
			
		||||
	} else {
 | 
			
		||||
	  nbr = coalescedRead(Stencil.CommBuf()[SE->_offset]);
 | 
			
		||||
	  nbr = coalescedRead(CBp[SE->_offset]);
 | 
			
		||||
	}
 | 
			
		||||
	acceleratorSynchronise();
 | 
			
		||||
 | 
			
		||||
@@ -409,38 +416,23 @@ public:
 | 
			
		||||
      MdirCalc(in,out[p],p);
 | 
			
		||||
    }
 | 
			
		||||
  };
 | 
			
		||||
  void Mdir(const CoarseVector &in, CoarseVector &out, int dir, int disp){
 | 
			
		||||
 | 
			
		||||
  void Mdir(const CoarseVector &in, CoarseVector &out, int dir, int disp)
 | 
			
		||||
  {
 | 
			
		||||
    this->MdirComms(in);
 | 
			
		||||
 | 
			
		||||
    int ndim = in.Grid()->Nd();
 | 
			
		||||
 | 
			
		||||
    //////////////
 | 
			
		||||
    // 4D action like wilson
 | 
			
		||||
    // 0+ => 0 
 | 
			
		||||
    // 0- => 1
 | 
			
		||||
    // 1+ => 2 
 | 
			
		||||
    // 1- => 3
 | 
			
		||||
    // etc..
 | 
			
		||||
    //////////////
 | 
			
		||||
    // 5D action like DWF
 | 
			
		||||
    // 1+ => 0 
 | 
			
		||||
    // 1- => 1
 | 
			
		||||
    // 2+ => 2 
 | 
			
		||||
    // 2- => 3
 | 
			
		||||
    // etc..
 | 
			
		||||
    auto point = [dir, disp, ndim](){
 | 
			
		||||
      if(dir == 0 and disp == 0)
 | 
			
		||||
	return 8;
 | 
			
		||||
      else if ( ndim==4 ) { 
 | 
			
		||||
	return (4 * dir + 1 - disp) / 2;
 | 
			
		||||
      } else { 
 | 
			
		||||
	return (4 * (dir-1) + 1 - disp) / 2;
 | 
			
		||||
    int point=-1;
 | 
			
		||||
    for(int p=0;p<geom.npoint;p++){
 | 
			
		||||
      if( (dir==geom.directions[p])&&(disp==geom.displacements[p])) point=p;
 | 
			
		||||
    }
 | 
			
		||||
    }();
 | 
			
		||||
    assert(point!=-1);// Must find
 | 
			
		||||
 | 
			
		||||
    std::cout <<GridLogMessage << "Mdir point "<<point<<" dir "<<dir<<" disp "<<disp  <<std::endl;
 | 
			
		||||
    for(int p=0;p<geom.npoint;p++){
 | 
			
		||||
      std::cout <<GridLogMessage << "point " <<p<<" dir "<<geom.directions[p]<<" delta " <<geom.displacements[p]<<std::endl;
 | 
			
		||||
    }
 | 
			
		||||
    MdirCalc(in,out,point);
 | 
			
		||||
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  void Mdiag(const CoarseVector &in, CoarseVector &out)
 | 
			
		||||
@@ -460,6 +452,54 @@ public:
 | 
			
		||||
  {
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  void Test(Aggregation<Fobj,CComplex,nbasis> &_Aggregates,GridBase *FineGrid,LinearOperatorBase<Lattice<Fobj> > &linop)
 | 
			
		||||
  {
 | 
			
		||||
    typedef Lattice<Fobj> FineField;
 | 
			
		||||
    CoarseVector Cin(_grid);
 | 
			
		||||
    CoarseVector Cout(_grid);
 | 
			
		||||
    CoarseVector CFout(_grid);
 | 
			
		||||
 | 
			
		||||
    FineField Fin(FineGrid);
 | 
			
		||||
    FineField Fout(FineGrid);
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    std::vector<int> seeds({1,2,3,4,5});
 | 
			
		||||
    GridParallelRNG RNG(_grid);  RNG.SeedFixedIntegers(seeds);
 | 
			
		||||
    gaussian(RNG,Cin);
 | 
			
		||||
 | 
			
		||||
    _Aggregates.PromoteFromSubspace(Cin,Fin);
 | 
			
		||||
    _Aggregates.ProjectToSubspace(Cin,Fin);
 | 
			
		||||
 | 
			
		||||
    std::cout << GridLogMessage<< "************  "<<std::endl;
 | 
			
		||||
    std::cout << GridLogMessage<< " Testing M  "<<std::endl;
 | 
			
		||||
    std::cout << GridLogMessage<< "************  "<<std::endl;
 | 
			
		||||
    // Coarse operator
 | 
			
		||||
    this->M(Cin,Cout);
 | 
			
		||||
    // Fine projected operator
 | 
			
		||||
    _Aggregates.PromoteFromSubspace(Cin,Fin);
 | 
			
		||||
    linop.Op(Fin,Fout);
 | 
			
		||||
    _Aggregates.ProjectToSubspace(CFout,Fout);
 | 
			
		||||
 | 
			
		||||
    CFout = CFout-Cout;
 | 
			
		||||
    RealD diff = norm2(CFout);
 | 
			
		||||
    std::cout << GridLogMessage<< " diff  "<<diff<<std::endl;
 | 
			
		||||
    assert(diff<1.0e-5);
 | 
			
		||||
 | 
			
		||||
    std::cout << GridLogMessage<< "************  "<<std::endl;
 | 
			
		||||
    std::cout << GridLogMessage<< " Testing Mdag  "<<std::endl;
 | 
			
		||||
    std::cout << GridLogMessage<< "************  "<<std::endl;
 | 
			
		||||
    // Coarse operator
 | 
			
		||||
    Mdag(Cin,Cout);
 | 
			
		||||
    // Fine operator
 | 
			
		||||
    linop.AdjOp(Fin,Fout);
 | 
			
		||||
    _Aggregates.ProjectToSubspace(CFout,Fout);
 | 
			
		||||
 | 
			
		||||
    CFout = CFout-Cout;
 | 
			
		||||
    diff = norm2(CFout);
 | 
			
		||||
    std::cout << GridLogMessage<< " diff  "<<diff<<std::endl; 
 | 
			
		||||
    assert(diff<1.0e-5);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  void CoarsenOperator(GridBase *FineGrid,LinearOperatorBase<Lattice<Fobj> > &linop,
 | 
			
		||||
		       Aggregation<Fobj,CComplex,nbasis> & Subspace)
 | 
			
		||||
  {
 | 
			
		||||
@@ -496,8 +536,19 @@ public:
 | 
			
		||||
 | 
			
		||||
    CoarseScalar InnerProd(Grid()); 
 | 
			
		||||
 | 
			
		||||
    std::cout << GridLogMessage<< "CoarsenMatrix Orthog " << std::endl;
 | 
			
		||||
    // Orthogonalise the subblocks over the basis
 | 
			
		||||
    blockOrthogonalise(InnerProd,Subspace.subspace);
 | 
			
		||||
    std::cout << GridLogMessage<< "CoarsenMatrix Orthog done " << std::endl;
 | 
			
		||||
 | 
			
		||||
    auto OpDirections    = linop.Directions();
 | 
			
		||||
    auto OpDisplacements = linop.Displacements();
 | 
			
		||||
 | 
			
		||||
    std::cout<<" Coarsening an operator with "<< OpDirections.size()<<" terms "<<std::endl;
 | 
			
		||||
    for(int p=0;p<OpDirections.size();p++) {
 | 
			
		||||
      assert(OpDirections[p]==geom.directions[p]);
 | 
			
		||||
      assert(OpDisplacements[p]==geom.displacements[p]);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // Compute the matrix elements of linop between this orthonormal
 | 
			
		||||
    // set of vectors.
 | 
			
		||||
@@ -533,13 +584,27 @@ public:
 | 
			
		||||
    evenmask = where(mod(bcb,2)==(Integer)0,one,zero);
 | 
			
		||||
    oddmask  = one-evenmask;
 | 
			
		||||
 | 
			
		||||
    /*
 | 
			
		||||
    {
 | 
			
		||||
      phi=Subspace.subspace[0];
 | 
			
		||||
      linop.OpDirAll(phi,Mphi_p);
 | 
			
		||||
      for(int p=0;p<geom.npoint-1;p++){
 | 
			
		||||
	int dir=geom.directions[p];
 | 
			
		||||
	int disp=geom.displacements[p];
 | 
			
		||||
	linop.OpDir(phi,Mphi,dir,disp);
 | 
			
		||||
	Mphi=Mphi-Mphi_p[p];
 | 
			
		||||
	std::cout << GridLogMessage <<" Direction mapping check " <<norm2(Mphi)<<std::endl;
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
*/
 | 
			
		||||
    assert(self_stencil!=-1);
 | 
			
		||||
    int lhermitian=hermitian;
 | 
			
		||||
 | 
			
		||||
    for(int i=0;i<nbasis;i++){
 | 
			
		||||
 | 
			
		||||
      phi=Subspace.subspace[i];
 | 
			
		||||
 | 
			
		||||
      //      std::cout << GridLogMessage<< "CoarsenMatrix vector "<<i << std::endl;
 | 
			
		||||
      std::cout << GridLogMessage<< "CoarsenMatrix vector "<<i << std::endl;
 | 
			
		||||
      linop.OpDirAll(phi,Mphi_p);
 | 
			
		||||
      linop.OpDiag  (phi,Mphi_p[geom.npoint-1]);
 | 
			
		||||
 | 
			
		||||
@@ -550,7 +615,7 @@ public:
 | 
			
		||||
	int dir   = geom.directions[p];
 | 
			
		||||
	int disp  = geom.displacements[p];
 | 
			
		||||
 | 
			
		||||
	if ( (disp==-1) || (!hermitian ) ) {
 | 
			
		||||
	if ( (disp==-1) || (!lhermitian ) ) {
 | 
			
		||||
 | 
			
		||||
	  ////////////////////////////////////////////////////////////////////////
 | 
			
		||||
	  // Pick out contributions coming from this cell and neighbour cell
 | 
			
		||||
@@ -568,11 +633,23 @@ public:
 | 
			
		||||
	    autoView( A_self  , A[self_stencil], AcceleratorWrite);
 | 
			
		||||
 | 
			
		||||
	    accelerator_for(ss, Grid()->oSites(), Fobj::Nsimd(),{ coalescedWrite(A_p[ss](j,i),oZProj_v(ss)); });
 | 
			
		||||
 | 
			
		||||
	    if ( lhermitian && (disp==-1) ) {
 | 
			
		||||
	      for(int pp=0;pp<geom.npoint;pp++){// Find the opposite link and set <j|A|i> = <i|A|j>*
 | 
			
		||||
		int dirp   = geom.directions[pp];
 | 
			
		||||
		int dispp  = geom.displacements[pp];
 | 
			
		||||
		if ( (dirp==dir) && (dispp==1) ){
 | 
			
		||||
		  auto sft = conjugate(Cshift(oZProj,dir,1));
 | 
			
		||||
		  autoView( sft_v    ,  sft  , AcceleratorWrite);
 | 
			
		||||
		  autoView( A_pp     ,  A[pp], AcceleratorWrite);
 | 
			
		||||
		  accelerator_for(ss, Grid()->oSites(), Fobj::Nsimd(),{ coalescedWrite(A_pp[ss](i,j),sft_v(ss)); });
 | 
			
		||||
		}
 | 
			
		||||
	      }
 | 
			
		||||
	    }
 | 
			
		||||
	  }
 | 
			
		||||
	}
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      std::cout << GridLogMessage<< "CoarsenMatrix Diag "<<std::endl;
 | 
			
		||||
      ///////////////////////////////////////////
 | 
			
		||||
      // Faster alternate self coupling.. use hermiticity to save 2x
 | 
			
		||||
      ///////////////////////////////////////////
 | 
			
		||||
@@ -604,31 +681,35 @@ public:
 | 
			
		||||
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
    if(hermitian) {
 | 
			
		||||
      std::cout << GridLogMessage << " ForceHermitian, new code "<<std::endl;
 | 
			
		||||
      ForceHermitian();
 | 
			
		||||
 | 
			
		||||
    MemoryManager::PrintBytes();
 | 
			
		||||
 | 
			
		||||
    // Auto self test
 | 
			
		||||
    Test( Subspace,FineGrid,linop);
 | 
			
		||||
 | 
			
		||||
#if 0
 | 
			
		||||
    ///////////////////////////
 | 
			
		||||
    // test code worth preserving in if block
 | 
			
		||||
    ///////////////////////////
 | 
			
		||||
    std::cout<<GridLogMessage<< " Computed matrix elements "<< self_stencil <<std::endl;
 | 
			
		||||
    for(int p=0;p<geom.npoint;p++){
 | 
			
		||||
      std::cout<<GridLogMessage<< "A["<<p<<"]" << std::endl;
 | 
			
		||||
      std::cout<<GridLogMessage<< "\n"<<A[p] << std::endl;
 | 
			
		||||
    }
 | 
			
		||||
    std::cout<<GridLogMessage<< " picking by block0 "<< self_stencil <<std::endl;
 | 
			
		||||
 | 
			
		||||
    phi=Subspace.subspace[0];
 | 
			
		||||
    std::vector<int> bc(FineGrid->_ndimension,0);
 | 
			
		||||
    blockPick(Grid(),phi,tmp,bc);      // Pick out a block
 | 
			
		||||
    linop.Op(tmp,Mphi);                // Apply big dop
 | 
			
		||||
    blockProject(iProj,Mphi,Subspace.subspace); // project it and print it
 | 
			
		||||
    std::cout<<GridLogMessage<< " Computed matrix elements from block zero only "<<std::endl;
 | 
			
		||||
    std::cout<<GridLogMessage<< iProj <<std::endl;
 | 
			
		||||
    std::cout<<GridLogMessage<<"Computed Coarse Operator"<<std::endl;
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  void ForceHermitian(void) {
 | 
			
		||||
    CoarseMatrix Diff  (Grid());
 | 
			
		||||
    for(int p=0;p<geom.npoint;p++){
 | 
			
		||||
      int dir   = geom.directions[p];
 | 
			
		||||
      int disp  = geom.displacements[p];
 | 
			
		||||
      if(disp==-1) {
 | 
			
		||||
	// Find the opposite link
 | 
			
		||||
	for(int pp=0;pp<geom.npoint;pp++){
 | 
			
		||||
	  int dirp   = geom.directions[pp];
 | 
			
		||||
	  int dispp  = geom.displacements[pp];
 | 
			
		||||
	  if ( (dirp==dir) && (dispp==1) ){
 | 
			
		||||
	    //	    Diff = adj(Cshift(A[p],dir,1)) - A[pp]; 
 | 
			
		||||
	    //	    std::cout << GridLogMessage<<" Replacing stencil leg "<<pp<<" with leg "<<p<< " diff "<<norm2(Diff) <<std::endl;
 | 
			
		||||
	    A[pp] = adj(Cshift(A[p],dir,1));
 | 
			
		||||
	  }
 | 
			
		||||
	}
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
NAMESPACE_END(Grid);
 | 
			
		||||
 
 | 
			
		||||
@@ -52,6 +52,9 @@ public:
 | 
			
		||||
  virtual void AdjOp  (const Field &in, Field &out) = 0; // Abstract base
 | 
			
		||||
  virtual void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2)=0;
 | 
			
		||||
  virtual void HermOp(const Field &in, Field &out)=0;
 | 
			
		||||
 | 
			
		||||
  virtual std::vector<int> Directions(void)   =0;
 | 
			
		||||
  virtual std::vector<int> Displacements(void)=0;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@@ -76,6 +79,9 @@ class MdagMLinearOperator : public LinearOperatorBase<Field> {
 | 
			
		||||
public:
 | 
			
		||||
  MdagMLinearOperator(Matrix &Mat): _Mat(Mat){};
 | 
			
		||||
 | 
			
		||||
  virtual std::vector<int> Directions(void)   { return _Mat.Directions();};
 | 
			
		||||
  virtual std::vector<int> Displacements(void){ return _Mat.Displacements();};
 | 
			
		||||
 | 
			
		||||
  // Support for coarsening to a multigrid
 | 
			
		||||
  void OpDiag (const Field &in, Field &out) {
 | 
			
		||||
    _Mat.Mdiag(in,out);
 | 
			
		||||
@@ -111,6 +117,8 @@ class ShiftedMdagMLinearOperator : public LinearOperatorBase<Field> {
 | 
			
		||||
  Matrix &_Mat;
 | 
			
		||||
  RealD _shift;
 | 
			
		||||
public:
 | 
			
		||||
  virtual std::vector<int> Directions(void)   { return _Mat.Directions();};
 | 
			
		||||
  virtual std::vector<int> Displacements(void){ return _Mat.Displacements();};
 | 
			
		||||
  ShiftedMdagMLinearOperator(Matrix &Mat,RealD shift): _Mat(Mat), _shift(shift){};
 | 
			
		||||
  // Support for coarsening to a multigrid
 | 
			
		||||
  void OpDiag (const Field &in, Field &out) {
 | 
			
		||||
@@ -151,6 +159,8 @@ template<class Matrix,class Field>
 | 
			
		||||
class HermitianLinearOperator : public LinearOperatorBase<Field> {
 | 
			
		||||
  Matrix &_Mat;
 | 
			
		||||
public:
 | 
			
		||||
  virtual std::vector<int> Directions(void)   { return _Mat.Directions();};
 | 
			
		||||
  virtual std::vector<int> Displacements(void){ return _Mat.Displacements();};
 | 
			
		||||
  HermitianLinearOperator(Matrix &Mat): _Mat(Mat){};
 | 
			
		||||
  // Support for coarsening to a multigrid
 | 
			
		||||
  void OpDiag (const Field &in, Field &out) {
 | 
			
		||||
@@ -182,6 +192,8 @@ template<class Matrix,class Field>
 | 
			
		||||
class NonHermitianLinearOperator : public LinearOperatorBase<Field> {
 | 
			
		||||
  Matrix &_Mat;
 | 
			
		||||
public:
 | 
			
		||||
  virtual std::vector<int> Directions(void)   { return _Mat.Directions();};
 | 
			
		||||
  virtual std::vector<int> Displacements(void){ return _Mat.Displacements();};
 | 
			
		||||
  NonHermitianLinearOperator(Matrix &Mat): _Mat(Mat){};
 | 
			
		||||
  // Support for coarsening to a multigrid
 | 
			
		||||
  void OpDiag (const Field &in, Field &out) {
 | 
			
		||||
@@ -255,6 +267,8 @@ template<class Matrix,class Field>
 | 
			
		||||
  class SchurDiagMooeeOperator :  public SchurOperatorBase<Field> {
 | 
			
		||||
 public:
 | 
			
		||||
    Matrix &_Mat;
 | 
			
		||||
    virtual std::vector<int> Directions(void)   { return _Mat.Directions();};
 | 
			
		||||
    virtual std::vector<int> Displacements(void){ return _Mat.Displacements();};
 | 
			
		||||
    SchurDiagMooeeOperator (Matrix &Mat): _Mat(Mat){};
 | 
			
		||||
    virtual  void Mpc      (const Field &in, Field &out) {
 | 
			
		||||
      Field tmp(in.Grid());
 | 
			
		||||
@@ -281,6 +295,8 @@ template<class Matrix,class Field>
 | 
			
		||||
 protected:
 | 
			
		||||
    Matrix &_Mat;
 | 
			
		||||
 public:
 | 
			
		||||
    virtual std::vector<int> Directions(void)   { return _Mat.Directions();};
 | 
			
		||||
    virtual std::vector<int> Displacements(void){ return _Mat.Displacements();};
 | 
			
		||||
    SchurDiagOneOperator (Matrix &Mat): _Mat(Mat){};
 | 
			
		||||
    
 | 
			
		||||
    virtual void Mpc      (const Field &in, Field &out) {
 | 
			
		||||
@@ -307,6 +323,8 @@ template<class Matrix,class Field>
 | 
			
		||||
 protected:
 | 
			
		||||
    Matrix &_Mat;
 | 
			
		||||
 public:
 | 
			
		||||
    virtual std::vector<int> Directions(void)   { return _Mat.Directions();};
 | 
			
		||||
    virtual std::vector<int> Displacements(void){ return _Mat.Displacements();};
 | 
			
		||||
    SchurDiagTwoOperator (Matrix &Mat): _Mat(Mat){};
 | 
			
		||||
    
 | 
			
		||||
    virtual void Mpc      (const Field &in, Field &out) {
 | 
			
		||||
@@ -372,6 +390,8 @@ class NonHermitianSchurDiagMooeeOperator :  public NonHermitianSchurOperatorBase
 | 
			
		||||
{
 | 
			
		||||
 public:
 | 
			
		||||
  Matrix& _Mat;
 | 
			
		||||
  virtual std::vector<int> Directions(void)   { return _Mat.Directions();};
 | 
			
		||||
  virtual std::vector<int> Displacements(void){ return _Mat.Displacements();};
 | 
			
		||||
 NonHermitianSchurDiagMooeeOperator(Matrix& Mat): _Mat(Mat){};
 | 
			
		||||
  virtual void Mpc(const Field& in, Field& out) {
 | 
			
		||||
    Field tmp(in.Grid());
 | 
			
		||||
@@ -405,6 +425,8 @@ class NonHermitianSchurDiagOneOperator : public NonHermitianSchurOperatorBase<Fi
 | 
			
		||||
  Matrix &_Mat;
 | 
			
		||||
  
 | 
			
		||||
 public:
 | 
			
		||||
  virtual std::vector<int> Directions(void)   { return _Mat.Directions();};
 | 
			
		||||
  virtual std::vector<int> Displacements(void){ return _Mat.Displacements();};
 | 
			
		||||
  NonHermitianSchurDiagOneOperator (Matrix& Mat): _Mat(Mat){};
 | 
			
		||||
  virtual void Mpc(const Field& in, Field& out) {
 | 
			
		||||
    Field tmp(in.Grid());
 | 
			
		||||
@@ -435,6 +457,8 @@ class NonHermitianSchurDiagTwoOperator : public NonHermitianSchurOperatorBase<Fi
 | 
			
		||||
  Matrix& _Mat;
 | 
			
		||||
  
 | 
			
		||||
 public:
 | 
			
		||||
  virtual std::vector<int> Directions(void)   { return _Mat.Directions();};
 | 
			
		||||
  virtual std::vector<int> Displacements(void){ return _Mat.Displacements();};
 | 
			
		||||
 NonHermitianSchurDiagTwoOperator(Matrix& Mat): _Mat(Mat){};
 | 
			
		||||
 | 
			
		||||
  virtual void Mpc(const Field& in, Field& out) {
 | 
			
		||||
@@ -475,6 +499,8 @@ class SchurStaggeredOperator :  public SchurOperatorBase<Field> {
 | 
			
		||||
  Field tmp;
 | 
			
		||||
  RealD mass;
 | 
			
		||||
 public:
 | 
			
		||||
  virtual std::vector<int> Directions(void)   { return _Mat.Directions();};
 | 
			
		||||
  virtual std::vector<int> Displacements(void){ return _Mat.Displacements();};
 | 
			
		||||
  SchurStaggeredOperator (Matrix &Mat): _Mat(Mat), tmp(_Mat.RedBlackGrid()) 
 | 
			
		||||
  { 
 | 
			
		||||
    assert( _Mat.isTrivialEE() );
 | 
			
		||||
 
 | 
			
		||||
@@ -48,6 +48,8 @@ public:
 | 
			
		||||
  virtual  void Mdiag    (const Field &in, Field &out)=0;
 | 
			
		||||
  virtual  void Mdir     (const Field &in, Field &out,int dir, int disp)=0;
 | 
			
		||||
  virtual  void MdirAll  (const Field &in, std::vector<Field> &out)=0;
 | 
			
		||||
  virtual std::vector<int> Directions(void)   =0;
 | 
			
		||||
  virtual std::vector<int> Displacements(void)=0;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
/////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
@@ -73,6 +75,8 @@ public:
 | 
			
		||||
  virtual  void MooeeDag    (const Field &in, Field &out)=0;
 | 
			
		||||
  virtual  void MooeeInvDag (const Field &in, Field &out)=0;
 | 
			
		||||
 | 
			
		||||
  virtual std::vector<int> Directions(void)   =0;
 | 
			
		||||
  virtual std::vector<int> Displacements(void)=0;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
NAMESPACE_END(Grid);
 | 
			
		||||
 
 | 
			
		||||
@@ -28,6 +28,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
#ifndef GRID_ALGORITHMS_ITERATIVE_GENERIC_PCG
 | 
			
		||||
#define GRID_ALGORITHMS_ITERATIVE_GENERIC_PCG
 | 
			
		||||
 | 
			
		||||
NAMESPACE_BEGIN(Grid);
 | 
			
		||||
  /*
 | 
			
		||||
   * Compared to Tang-2009:  P=Pleft. P^T = PRight Q=MssInv. 
 | 
			
		||||
   * Script A = SolverMatrix 
 | 
			
		||||
@@ -50,53 +51,54 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
   * Vout = x
 | 
			
		||||
   */
 | 
			
		||||
 | 
			
		||||
// abstract base
 | 
			
		||||
template<class Field, class CoarseField>
 | 
			
		||||
 | 
			
		||||
template<class Field, class CoarseField, class Aggregates>
 | 
			
		||||
class TwoLevelFlexiblePcg : public LinearFunction<Field>
 | 
			
		||||
{
 | 
			
		||||
 public:
 | 
			
		||||
 | 
			
		||||
  int verbose;
 | 
			
		||||
 | 
			
		||||
  RealD   Tolerance;
 | 
			
		||||
  Integer MaxIterations;
 | 
			
		||||
  const int mmax = 5;
 | 
			
		||||
  GridBase *grid;
 | 
			
		||||
  GridBase *coarsegrid;
 | 
			
		||||
  const int mmax = 4;
 | 
			
		||||
  GridBase *FineGrid;
 | 
			
		||||
  GridBase *CoarseGrid;
 | 
			
		||||
 | 
			
		||||
  LinearOperatorBase<Field>   *_Linop
 | 
			
		||||
  OperatorFunction<Field>     *_Smoother,
 | 
			
		||||
  LinearFunction<CoarseField> *_CoarseSolver;
 | 
			
		||||
 | 
			
		||||
  // Need somthing that knows how to get from Coarse to fine and back again
 | 
			
		||||
  LinearOperatorBase<Field>   &_Linop;
 | 
			
		||||
  LinearFunction<Field>     &_Smoother;
 | 
			
		||||
  LinearFunction<CoarseField> &_CoarseSolver;
 | 
			
		||||
  Aggregates                  &_Aggregates;
 | 
			
		||||
  
 | 
			
		||||
  // more most opertor functions
 | 
			
		||||
  TwoLevelFlexiblePcg(RealD tol,
 | 
			
		||||
		      Integer maxit,
 | 
			
		||||
		      LinearOperatorBase<Field> *Linop,
 | 
			
		||||
		     LinearOperatorBase<Field> *SmootherLinop,
 | 
			
		||||
		     OperatorFunction<Field>   *Smoother,
 | 
			
		||||
		     OperatorFunction<CoarseField>  CoarseLinop
 | 
			
		||||
		      LinearFunction<Field>   *Smoother,
 | 
			
		||||
		      LinearFunction<CoarseField> *CoarseSolver,
 | 
			
		||||
		      Aggregates *AggP
 | 
			
		||||
		      ) : 
 | 
			
		||||
  Tolerance(tol), 
 | 
			
		||||
    MaxIterations(maxit),
 | 
			
		||||
      _Linop(Linop),
 | 
			
		||||
      _PreconditionerLinop(PrecLinop),
 | 
			
		||||
      _Preconditioner(Preconditioner)
 | 
			
		||||
    _Linop(*Linop),
 | 
			
		||||
    _Smoother(*Smoother),
 | 
			
		||||
    _CoarseSolver(*CoarseSolver),
 | 
			
		||||
    _Aggregates(*AggP)
 | 
			
		||||
  { 
 | 
			
		||||
    CoarseGrid=_Aggregates.CoarseGrid;
 | 
			
		||||
    FineGrid=_Aggregates.FineGrid;
 | 
			
		||||
    verbose=0;
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  // The Pcg routine is common to all, but the various matrices differ from derived 
 | 
			
		||||
  // implementation to derived implmentation
 | 
			
		||||
  void operator() (const Field &src, Field &psi){
 | 
			
		||||
  void operator() (const Field &src, Field &psi){
 | 
			
		||||
 | 
			
		||||
    psi.Checkerboard() = src.Checkerboard();
 | 
			
		||||
    grid             = src.Grid();
 | 
			
		||||
 | 
			
		||||
    RealD f;
 | 
			
		||||
    RealD rtzp,rtz,a,d,b;
 | 
			
		||||
    RealD rptzp;
 | 
			
		||||
    RealD tn;
 | 
			
		||||
    //    RealD rptzp;
 | 
			
		||||
    //    RealD tn;
 | 
			
		||||
    RealD guess = norm2(psi);
 | 
			
		||||
    RealD ssq   = norm2(src);
 | 
			
		||||
    RealD rsq   = ssq*Tolerance*Tolerance;
 | 
			
		||||
@@ -104,15 +106,15 @@ class TwoLevelFlexiblePcg : public LinearFunction<Field>
 | 
			
		||||
    /////////////////////////////
 | 
			
		||||
    // Set up history vectors
 | 
			
		||||
    /////////////////////////////
 | 
			
		||||
    std::vector<Field> p  (mmax,grid);
 | 
			
		||||
    std::vector<Field> mmp(mmax,grid);
 | 
			
		||||
    std::vector<Field> p  (mmax,FineGrid);
 | 
			
		||||
    std::vector<Field> mmp(mmax,FineGrid);
 | 
			
		||||
    std::vector<RealD> pAp(mmax);
 | 
			
		||||
 | 
			
		||||
    Field x  (grid); x = psi;
 | 
			
		||||
    Field z  (grid);
 | 
			
		||||
    Field tmp(grid);
 | 
			
		||||
    Field r  (grid);
 | 
			
		||||
    Field mu (grid);
 | 
			
		||||
    Field x  (FineGrid); x = psi;
 | 
			
		||||
    Field z  (FineGrid);
 | 
			
		||||
    Field tmp(FineGrid);
 | 
			
		||||
    Field r  (FineGrid);
 | 
			
		||||
    Field mu (FineGrid);
 | 
			
		||||
  
 | 
			
		||||
    //////////////////////////
 | 
			
		||||
    // x0 = Vstart -- possibly modify guess
 | 
			
		||||
@@ -121,13 +123,13 @@ class TwoLevelFlexiblePcg : public LinearFunction<Field>
 | 
			
		||||
    Vstart(x,src);
 | 
			
		||||
 | 
			
		||||
    // r0 = b -A x0
 | 
			
		||||
    HermOp(x,mmp); // Shouldn't this be something else?
 | 
			
		||||
    _Linop.HermOp(x,mmp[0]); // Shouldn't this be something else?
 | 
			
		||||
    axpy (r, -1.0,mmp[0], src);    // Recomputes r=src-Ax0
 | 
			
		||||
 | 
			
		||||
    //////////////////////////////////
 | 
			
		||||
    // Compute z = M1 x
 | 
			
		||||
    //////////////////////////////////
 | 
			
		||||
    M1(r,z,tmp,mp,SmootherMirs);
 | 
			
		||||
    M1(r,z);
 | 
			
		||||
    rtzp =real(innerProduct(r,z));
 | 
			
		||||
 | 
			
		||||
    ///////////////////////////////////////
 | 
			
		||||
@@ -143,7 +145,7 @@ class TwoLevelFlexiblePcg : public LinearFunction<Field>
 | 
			
		||||
      int peri_kp = (k+1) % mmax;
 | 
			
		||||
 | 
			
		||||
      rtz=rtzp;
 | 
			
		||||
      d= M3(p[peri_k],mp,mmp[peri_k],tmp);
 | 
			
		||||
      d= M3(p[peri_k],mmp[peri_k]);
 | 
			
		||||
      a = rtz/d;
 | 
			
		||||
    
 | 
			
		||||
      // Memorise this
 | 
			
		||||
@@ -153,13 +155,13 @@ class TwoLevelFlexiblePcg : public LinearFunction<Field>
 | 
			
		||||
      RealD rn = axpy_norm(r,-a,mmp[peri_k],r);
 | 
			
		||||
 | 
			
		||||
      // Compute z = M x
 | 
			
		||||
      M1(r,z,tmp,mp);
 | 
			
		||||
      M1(r,z);
 | 
			
		||||
 | 
			
		||||
      rtzp =real(innerProduct(r,z));
 | 
			
		||||
 | 
			
		||||
      M2(z,mu); // ADEF-2 this is identity. Axpy possible to eliminate
 | 
			
		||||
 | 
			
		||||
      p[peri_kp]=p[peri_k];
 | 
			
		||||
      p[peri_kp]=mu;
 | 
			
		||||
 | 
			
		||||
      // Standard search direction  p -> z + b p    ; b = 
 | 
			
		||||
      b = (rtzp)/rtz;
 | 
			
		||||
@@ -181,7 +183,7 @@ class TwoLevelFlexiblePcg : public LinearFunction<Field>
 | 
			
		||||
      // Stopping condition
 | 
			
		||||
      if ( rn <= rsq ) { 
 | 
			
		||||
 | 
			
		||||
	HermOp(x,mmp); // Shouldn't this be something else?
 | 
			
		||||
	_Linop.HermOp(x,mmp[0]); // Shouldn't this be something else?
 | 
			
		||||
	axpy(tmp,-1.0,src,mmp[0]);
 | 
			
		||||
	
 | 
			
		||||
	RealD psinorm = sqrt(norm2(x));
 | 
			
		||||
@@ -190,7 +192,8 @@ class TwoLevelFlexiblePcg : public LinearFunction<Field>
 | 
			
		||||
	RealD true_residual = tmpnorm/srcnorm;
 | 
			
		||||
	std::cout<<GridLogMessage<<"TwoLevelfPcg:   true residual is "<<true_residual<<std::endl;
 | 
			
		||||
	std::cout<<GridLogMessage<<"TwoLevelfPcg: target residual was"<<Tolerance<<std::endl;
 | 
			
		||||
	return k;
 | 
			
		||||
 | 
			
		||||
	return;
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
    // Non-convergence
 | 
			
		||||
@@ -199,48 +202,40 @@ class TwoLevelFlexiblePcg : public LinearFunction<Field>
 | 
			
		||||
 | 
			
		||||
 public:
 | 
			
		||||
 | 
			
		||||
  virtual void M(Field & in,Field & out,Field & tmp) {
 | 
			
		||||
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  virtual void M1(Field & in, Field & out) {// the smoother
 | 
			
		||||
 | 
			
		||||
  virtual void M1(Field & in, Field & out) 
 | 
			
		||||
  {// the smoother
 | 
			
		||||
    // [PTM+Q] in = [1 - Q A] M in + Q in = Min + Q [ in -A Min]
 | 
			
		||||
    Field tmp(grid);
 | 
			
		||||
    Field Min(grid);
 | 
			
		||||
    Field tmp(FineGrid);
 | 
			
		||||
    Field Min(FineGrid);
 | 
			
		||||
 | 
			
		||||
    PcgM(in,Min); // Smoother call
 | 
			
		||||
    CoarseField PleftProj(CoarseGrid);
 | 
			
		||||
    CoarseField PleftMss_proj(CoarseGrid);
 | 
			
		||||
 | 
			
		||||
    HermOp(Min,out);
 | 
			
		||||
    _Smoother(in,Min); // Smoother call
 | 
			
		||||
 | 
			
		||||
    _Linop.HermOp(Min,out);
 | 
			
		||||
    axpy(tmp,-1.0,out,in);          // tmp  = in - A Min
 | 
			
		||||
 | 
			
		||||
    ProjectToSubspace(tmp,PleftProj);     
 | 
			
		||||
    ApplyInverse(PleftProj,PleftMss_proj); // Ass^{-1} [in - A Min]_s
 | 
			
		||||
    PromoteFromSubspace(PleftMss_proj,tmp);// tmp = Q[in - A Min]  
 | 
			
		||||
    _Aggregates.ProjectToSubspace(PleftProj,tmp);     
 | 
			
		||||
    _CoarseSolver(PleftProj,PleftMss_proj); // Ass^{-1} [in - A Min]_s
 | 
			
		||||
    _Aggregates.PromoteFromSubspace(PleftMss_proj,tmp);// tmp = Q[in - A Min]  
 | 
			
		||||
    axpy(out,1.0,Min,tmp); // Min+tmp
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  virtual void M2(const Field & in, Field & out) {
 | 
			
		||||
  virtual void M2(const Field & in, Field & out) 
 | 
			
		||||
  {
 | 
			
		||||
    out=in;
 | 
			
		||||
    // Must override for Def2 only
 | 
			
		||||
    //  case PcgDef2:
 | 
			
		||||
    //    Pright(in,out);
 | 
			
		||||
    //    break;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  virtual RealD M3(const Field & p, Field & mmp){
 | 
			
		||||
  virtual RealD M3(const Field & p, Field & mmp)
 | 
			
		||||
  {
 | 
			
		||||
    double d,dd;
 | 
			
		||||
    HermOpAndNorm(p,mmp,d,dd);
 | 
			
		||||
    _Linop.HermOpAndNorm(p,mmp,d,dd);
 | 
			
		||||
    return dd;
 | 
			
		||||
    // Must override for Def1 only
 | 
			
		||||
    //  case PcgDef1:
 | 
			
		||||
    //    d=linop_d->Mprec(p,mmp,tmp,0,1);// Dag no
 | 
			
		||||
    //      linop_d->Mprec(mmp,mp,tmp,1);// Dag yes
 | 
			
		||||
    //    Pleft(mp,mmp);
 | 
			
		||||
    //    d=real(linop_d->inner(p,mmp));
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  virtual void VstartDef2(Field & xconst Field & src){
 | 
			
		||||
  virtual void Vstart(Field & x,const Field & src)
 | 
			
		||||
  {
 | 
			
		||||
    //case PcgDef2:
 | 
			
		||||
    //case PcgAdef2: 
 | 
			
		||||
    //case PcgAdef2f:
 | 
			
		||||
@@ -256,142 +251,79 @@ class TwoLevelFlexiblePcg : public LinearFunction<Field>
 | 
			
		||||
    //                   = src_s - (A guess)_s - src_s  + (A guess)_s 
 | 
			
		||||
    //                   = 0 
 | 
			
		||||
    ///////////////////////////////////
 | 
			
		||||
    Field r(grid);
 | 
			
		||||
    Field mmp(grid);
 | 
			
		||||
    Field r(FineGrid);
 | 
			
		||||
    Field mmp(FineGrid);
 | 
			
		||||
 | 
			
		||||
    HermOp(x,mmp);
 | 
			
		||||
    CoarseField PleftProj(CoarseGrid);
 | 
			
		||||
    CoarseField PleftMss_proj(CoarseGrid);
 | 
			
		||||
    
 | 
			
		||||
    _Linop.HermOp(x,mmp);
 | 
			
		||||
    axpy (r, -1.0, mmp, src);        // r_{-1} = src - A x
 | 
			
		||||
    ProjectToSubspace(r,PleftProj);     
 | 
			
		||||
    ApplyInverseCG(PleftProj,PleftMss_proj); // Ass^{-1} r_s
 | 
			
		||||
    PromoteFromSubspace(PleftMss_proj,mmp);  
 | 
			
		||||
    _Aggregates.ProjectToSubspace(PleftProj,r);     
 | 
			
		||||
    _CoarseSolver(PleftProj,PleftMss_proj); // Ass^{-1} r_s
 | 
			
		||||
    _Aggregates.PromoteFromSubspace(PleftMss_proj,mmp);  
 | 
			
		||||
    x=x+mmp;
 | 
			
		||||
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  virtual void Vstart(Field & x,const Field & src){
 | 
			
		||||
    return;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  /////////////////////////////////////////////////////////////////////
 | 
			
		||||
  // Only Def1 has non-trivial Vout. Override in Def1
 | 
			
		||||
  /////////////////////////////////////////////////////////////////////
 | 
			
		||||
  virtual void   Vout  (Field & in, Field & out,Field & src){
 | 
			
		||||
    out = in;
 | 
			
		||||
    //case PcgDef1:
 | 
			
		||||
    //    //Qb + PT x
 | 
			
		||||
    //    ProjectToSubspace(src,PleftProj);     
 | 
			
		||||
    //    ApplyInverse(PleftProj,PleftMss_proj); // Ass^{-1} r_s
 | 
			
		||||
    //    PromoteFromSubspace(PleftMss_proj,tmp);  
 | 
			
		||||
    //    
 | 
			
		||||
    //    Pright(in,out);
 | 
			
		||||
    //    
 | 
			
		||||
    //    linop_d->axpy(out,tmp,out,1.0);
 | 
			
		||||
    //    break;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  ////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
  // Pright and Pleft are common to all implementations
 | 
			
		||||
  ////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
  virtual void Pright(Field & in,Field & out){
 | 
			
		||||
  virtual void Pright(Field & in,Field & out)
 | 
			
		||||
  {
 | 
			
		||||
    // P_R  = [ 1              0 ] 
 | 
			
		||||
    //        [ -Mss^-1 Msb    0 ] 
 | 
			
		||||
    Field in_sbar(grid);
 | 
			
		||||
    Field in_sbar(FineGrid);
 | 
			
		||||
 | 
			
		||||
    ProjectToSubspace(in,PleftProj);     
 | 
			
		||||
    PromoteFromSubspace(PleftProj,out);  
 | 
			
		||||
    CoarseField PleftProj(CoarseGrid);
 | 
			
		||||
    CoarseField PleftMss_proj(CoarseGrid);
 | 
			
		||||
 | 
			
		||||
    _Aggregates.ProjectToSubspace(PleftProj,in);     
 | 
			
		||||
    _Aggregates.PromoteFromSubspace(PleftProj,out);  
 | 
			
		||||
    axpy(in_sbar,-1.0,out,in);       // in_sbar = in - in_s 
 | 
			
		||||
 | 
			
		||||
    HermOp(in_sbar,out);
 | 
			
		||||
    ProjectToSubspace(out,PleftProj);           // Mssbar in_sbar  (project)
 | 
			
		||||
    _Linop.HermOp(in_sbar,out);
 | 
			
		||||
    _Aggregates.ProjectToSubspace(PleftProj,out);           // Mssbar in_sbar  (project)
 | 
			
		||||
 | 
			
		||||
    ApplyInverse     (PleftProj,PleftMss_proj); // Mss^{-1} Mssbar 
 | 
			
		||||
    PromoteFromSubspace(PleftMss_proj,out);     // 
 | 
			
		||||
    _CoarseSolver(PleftProj,PleftMss_proj); // Mss^{-1} Mssbar 
 | 
			
		||||
    _Aggregates.PromoteFromSubspace(PleftMss_proj,out);     // 
 | 
			
		||||
 | 
			
		||||
    axpy(out,-1.0,out,in_sbar);     // in_sbar - Mss^{-1} Mssbar in_sbar
 | 
			
		||||
  }
 | 
			
		||||
  virtual void Pleft (Field & in,Field & out){
 | 
			
		||||
  virtual void Pleft (Field & in,Field & out)
 | 
			
		||||
  {
 | 
			
		||||
    // P_L  = [ 1  -Mbs Mss^-1] 
 | 
			
		||||
    //        [ 0   0         ] 
 | 
			
		||||
    Field in_sbar(grid);
 | 
			
		||||
    Field    tmp2(grid);
 | 
			
		||||
    Field    Mtmp(grid);
 | 
			
		||||
    Field in_sbar(FineGrid);
 | 
			
		||||
    Field    tmp2(FineGrid);
 | 
			
		||||
    Field    Mtmp(FineGrid);
 | 
			
		||||
 | 
			
		||||
    ProjectToSubspace(in,PleftProj);     
 | 
			
		||||
    PromoteFromSubspace(PleftProj,out);  
 | 
			
		||||
    CoarseField PleftProj(CoarseGrid);
 | 
			
		||||
    CoarseField PleftMss_proj(CoarseGrid);
 | 
			
		||||
 | 
			
		||||
    _Aggregates.ProjectToSubspace(PleftProj,in);     
 | 
			
		||||
    _Aggregates.PromoteFromSubspace(PleftProj,out);  
 | 
			
		||||
    axpy(in_sbar,-1.0,out,in);      // in_sbar = in - in_s
 | 
			
		||||
 | 
			
		||||
    ApplyInverse(PleftProj,PleftMss_proj); // Mss^{-1} in_s
 | 
			
		||||
    PromoteFromSubspace(PleftMss_proj,out);
 | 
			
		||||
    _CoarseSolver(PleftProj,PleftMss_proj); // Mss^{-1} in_s
 | 
			
		||||
    _Aggregates.PromoteFromSubspace(PleftMss_proj,out);
 | 
			
		||||
 | 
			
		||||
    HermOp(out,Mtmp);
 | 
			
		||||
    _Linop.HermOp(out,Mtmp);
 | 
			
		||||
 | 
			
		||||
    ProjectToSubspace(Mtmp,PleftProj);      // Msbar s Mss^{-1}
 | 
			
		||||
    PromoteFromSubspace(PleftProj,tmp2);
 | 
			
		||||
    _Aggregates.ProjectToSubspace(PleftProj,Mtmp);      // Msbar s Mss^{-1}
 | 
			
		||||
    _Aggregates.PromoteFromSubspace(PleftProj,tmp2);
 | 
			
		||||
 | 
			
		||||
    axpy(out,-1.0,tmp2,Mtmp);
 | 
			
		||||
    axpy(out,-1.0,out,in_sbar);     // in_sbar - Msbars Mss^{-1} in_s
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
};
 | 
			
		||||
NAMESPACE_END(Grid);
 | 
			
		||||
 | 
			
		||||
template<class Field>
 | 
			
		||||
class TwoLevelFlexiblePcgADef2 : public TwoLevelFlexiblePcg<Field> {
 | 
			
		||||
 public:
 | 
			
		||||
  virtual void M(Field & in,Field & out,Field & tmp){
 | 
			
		||||
 | 
			
		||||
  } 
 | 
			
		||||
  virtual void M1(Field & in, Field & out,Field & tmp,Field & mp){
 | 
			
		||||
 | 
			
		||||
  }
 | 
			
		||||
  virtual void M2(Field & in, Field & out){
 | 
			
		||||
 | 
			
		||||
  }
 | 
			
		||||
  virtual RealD M3(Field & p, Field & mp,Field & mmp, Field & tmp){
 | 
			
		||||
 | 
			
		||||
  }
 | 
			
		||||
  virtual void Vstart(Field & in, Field & src, Field & r, Field & mp, Field & mmp, Field & tmp){
 | 
			
		||||
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
/*
 | 
			
		||||
template<class Field>
 | 
			
		||||
class TwoLevelFlexiblePcgAD : public TwoLevelFlexiblePcg<Field> {
 | 
			
		||||
 public:
 | 
			
		||||
  virtual void M(Field & in,Field & out,Field & tmp); 
 | 
			
		||||
  virtual void M1(Field & in, Field & out,Field & tmp,Field & mp);
 | 
			
		||||
  virtual void M2(Field & in, Field & out);
 | 
			
		||||
  virtual RealD M3(Field & p, Field & mp,Field & mmp, Field & tmp);
 | 
			
		||||
  virtual void Vstart(Field & in, Field & src, Field & r, Field & mp, Field & mmp, Field & tmp);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template<class Field>
 | 
			
		||||
class TwoLevelFlexiblePcgDef1 : public TwoLevelFlexiblePcg<Field> {
 | 
			
		||||
 public:
 | 
			
		||||
  virtual void M(Field & in,Field & out,Field & tmp); 
 | 
			
		||||
  virtual void M1(Field & in, Field & out,Field & tmp,Field & mp);
 | 
			
		||||
  virtual void M2(Field & in, Field & out);
 | 
			
		||||
  virtual RealD M3(Field & p, Field & mp,Field & mmp, Field & tmp);
 | 
			
		||||
  virtual void Vstart(Field & in, Field & src, Field & r, Field & mp, Field & mmp, Field & tmp);
 | 
			
		||||
  virtual void   Vout  (Field & in, Field & out,Field & src,Field & tmp);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template<class Field>
 | 
			
		||||
class TwoLevelFlexiblePcgDef2 : public TwoLevelFlexiblePcg<Field> {
 | 
			
		||||
 public:
 | 
			
		||||
  virtual void M(Field & in,Field & out,Field & tmp); 
 | 
			
		||||
  virtual void M1(Field & in, Field & out,Field & tmp,Field & mp);
 | 
			
		||||
  virtual void M2(Field & in, Field & out);
 | 
			
		||||
  virtual RealD M3(Field & p, Field & mp,Field & mmp, Field & tmp);
 | 
			
		||||
  virtual void Vstart(Field & in, Field & src, Field & r, Field & mp, Field & mmp, Field & tmp);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template<class Field>
 | 
			
		||||
class TwoLevelFlexiblePcgV11: public TwoLevelFlexiblePcg<Field> {
 | 
			
		||||
 public:
 | 
			
		||||
  virtual void M(Field & in,Field & out,Field & tmp); 
 | 
			
		||||
  virtual void M1(Field & in, Field & out,Field & tmp,Field & mp);
 | 
			
		||||
  virtual void M2(Field & in, Field & out);
 | 
			
		||||
  virtual RealD M3(Field & p, Field & mp,Field & mmp, Field & tmp);
 | 
			
		||||
  virtual void Vstart(Field & in, Field & src, Field & r, Field & mp, Field & mmp, Field & tmp);
 | 
			
		||||
}
 | 
			
		||||
*/
 | 
			
		||||
#endif
 | 
			
		||||
 
 | 
			
		||||
@@ -60,6 +60,8 @@ public:
 | 
			
		||||
  DeflatedGuesser(const std::vector<Field> & _evec,const std::vector<RealD> & _eval) : evec(_evec), eval(_eval) {};
 | 
			
		||||
 | 
			
		||||
  virtual void operator()(const Field &src,Field &guess) {
 | 
			
		||||
    RealD t=-usecond();
 | 
			
		||||
    
 | 
			
		||||
    guess = Zero();
 | 
			
		||||
    assert(evec.size()==eval.size());
 | 
			
		||||
    auto N = evec.size();
 | 
			
		||||
@@ -68,6 +70,8 @@ public:
 | 
			
		||||
      axpy(guess,TensorRemove(innerProduct(tmp,src)) / eval[i],tmp,guess);
 | 
			
		||||
    }
 | 
			
		||||
    guess.Checkerboard() = src.Checkerboard();
 | 
			
		||||
    t+=usecond();
 | 
			
		||||
    std::cout<<GridLogMessage<<"\t\t\t" << "Deflated guess took "<< t/1000.0<< "ms" <<std::endl;
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -59,7 +59,7 @@ public:
 | 
			
		||||
    GridBase *grid = src.Grid();
 | 
			
		||||
    Field r(grid),  p(grid), Ap(grid), Ar(grid), z(grid);
 | 
			
		||||
      
 | 
			
		||||
    psi=zero;
 | 
			
		||||
    psi=Zero();
 | 
			
		||||
    r  = src;
 | 
			
		||||
    Preconditioner(r,p);
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -53,7 +53,11 @@ public:
 | 
			
		||||
  { 
 | 
			
		||||
    size_type bytes = __n*sizeof(_Tp);
 | 
			
		||||
    profilerAllocate(bytes);
 | 
			
		||||
#ifdef GRID_UVM
 | 
			
		||||
    _Tp *ptr = (_Tp*) MemoryManager::SharedAllocate(bytes);
 | 
			
		||||
#else 
 | 
			
		||||
    _Tp *ptr = (_Tp*) MemoryManager::CpuAllocate(bytes);
 | 
			
		||||
#endif
 | 
			
		||||
    assert( ( (_Tp*)ptr != (_Tp *)NULL ) );
 | 
			
		||||
    return ptr;
 | 
			
		||||
  }
 | 
			
		||||
@@ -62,7 +66,11 @@ public:
 | 
			
		||||
  { 
 | 
			
		||||
    size_type bytes = __n * sizeof(_Tp);
 | 
			
		||||
    profilerFree(bytes);
 | 
			
		||||
#ifdef GRID_UVM
 | 
			
		||||
    MemoryManager::SharedFree((void *)__p,bytes);
 | 
			
		||||
#else
 | 
			
		||||
    MemoryManager::CpuFree((void *)__p,bytes);
 | 
			
		||||
#endif
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // FIXME: hack for the copy constructor: it must be avoided to avoid single thread loop
 | 
			
		||||
 
 | 
			
		||||
@@ -9,11 +9,13 @@ NAMESPACE_BEGIN(Grid);
 | 
			
		||||
#define AccSmall (3)
 | 
			
		||||
#define Shared   (4)
 | 
			
		||||
#define SharedSmall (5)
 | 
			
		||||
uint64_t total_cache;
 | 
			
		||||
uint64_t total_shared;
 | 
			
		||||
uint64_t total_device;
 | 
			
		||||
uint64_t total_host;;
 | 
			
		||||
void MemoryManager::PrintBytes(void)
 | 
			
		||||
{
 | 
			
		||||
  std::cout << " MemoryManager : "<<total_cache <<" cache       bytes "<<std::endl;
 | 
			
		||||
  std::cout << " MemoryManager : "<<total_shared<<" shared      bytes "<<std::endl;
 | 
			
		||||
  std::cout << " MemoryManager : "<<total_device<<" accelerator bytes "<<std::endl;
 | 
			
		||||
  std::cout << " MemoryManager : "<<total_host  <<" cpu         bytes "<<std::endl;
 | 
			
		||||
@@ -35,6 +37,8 @@ void *MemoryManager::AcceleratorAllocate(size_t bytes)
 | 
			
		||||
  if ( ptr == (void *) NULL ) {
 | 
			
		||||
    ptr = (void *) acceleratorAllocDevice(bytes);
 | 
			
		||||
    total_device+=bytes;
 | 
			
		||||
  } else {
 | 
			
		||||
    //    std::cout <<"AcceleratorAllocate: cache hit Device pointer "<<std::hex<<ptr<<std::dec<<" "<<bytes<<std::endl;
 | 
			
		||||
  }
 | 
			
		||||
  return ptr;
 | 
			
		||||
}
 | 
			
		||||
@@ -53,8 +57,10 @@ void *MemoryManager::SharedAllocate(size_t bytes)
 | 
			
		||||
  if ( ptr == (void *) NULL ) {
 | 
			
		||||
    ptr = (void *) acceleratorAllocShared(bytes);
 | 
			
		||||
    total_shared+=bytes;
 | 
			
		||||
    //    std::cout <<"AcceleratorAllocate: allocated Shared pointer "<<std::hex<<ptr<<std::dec<<std::endl;
 | 
			
		||||
    //    std::cout <<"SharedAllocate: allocated Shared pointer "<<std::hex<<ptr<<std::dec<<std::endl;
 | 
			
		||||
    //    PrintBytes();
 | 
			
		||||
  } else {
 | 
			
		||||
    //    std::cout <<"SharedAllocate: cache hit Shared pointer "<<std::hex<<ptr<<std::dec<<" "<<bytes<<std::endl;
 | 
			
		||||
  }
 | 
			
		||||
  return ptr;
 | 
			
		||||
}
 | 
			
		||||
@@ -74,6 +80,9 @@ void *MemoryManager::CpuAllocate(size_t bytes)
 | 
			
		||||
  if ( ptr == (void *) NULL ) {
 | 
			
		||||
    ptr = (void *) acceleratorAllocShared(bytes);
 | 
			
		||||
    total_host+=bytes;
 | 
			
		||||
    //    std::cout <<"CpuAllocate: allocated Cpu pointer "<<std::hex<<ptr<<std::dec<<std::endl;
 | 
			
		||||
  } else {
 | 
			
		||||
    //    std::cout <<"CpufAllocate: cache hit Cpu pointer "<<std::hex<<ptr<<std::dec<<" "<<bytes<<std::endl;
 | 
			
		||||
  }
 | 
			
		||||
  return ptr;
 | 
			
		||||
}
 | 
			
		||||
@@ -120,7 +129,7 @@ void MemoryManager::Init(void)
 | 
			
		||||
  str= getenv("GRID_ALLOC_NCACHE_LARGE");
 | 
			
		||||
  if ( str ) {
 | 
			
		||||
    Nc = atoi(str);
 | 
			
		||||
    if ( (Nc>=0) && (Nc < NallocCacheMax)) {
 | 
			
		||||
    if ( (Nc>=0) && (Nc <= NallocCacheMax)) {
 | 
			
		||||
      Ncache[Cpu]=Nc;
 | 
			
		||||
      Ncache[Acc]=Nc;
 | 
			
		||||
      Ncache[Shared]=Nc;
 | 
			
		||||
@@ -130,7 +139,7 @@ void MemoryManager::Init(void)
 | 
			
		||||
  str= getenv("GRID_ALLOC_NCACHE_SMALL");
 | 
			
		||||
  if ( str ) {
 | 
			
		||||
    Nc = atoi(str);
 | 
			
		||||
    if ( (Nc>=0) && (Nc < NallocCacheMax)) {
 | 
			
		||||
    if ( (Nc>=0) && (Nc <= NallocCacheMax)) {
 | 
			
		||||
      Ncache[CpuSmall]=Nc;
 | 
			
		||||
      Ncache[AccSmall]=Nc;
 | 
			
		||||
      Ncache[SharedSmall]=Nc;
 | 
			
		||||
@@ -211,6 +220,7 @@ void *MemoryManager::Insert(void *ptr,size_t bytes,AllocationCacheEntry *entries
 | 
			
		||||
 | 
			
		||||
  if ( entries[v].valid ) {
 | 
			
		||||
    ret = entries[v].address;
 | 
			
		||||
    total_cache-=entries[v].bytes;
 | 
			
		||||
    entries[v].valid = 0;
 | 
			
		||||
    entries[v].address = NULL;
 | 
			
		||||
    entries[v].bytes = 0;
 | 
			
		||||
@@ -219,6 +229,7 @@ void *MemoryManager::Insert(void *ptr,size_t bytes,AllocationCacheEntry *entries
 | 
			
		||||
  entries[v].address=ptr;
 | 
			
		||||
  entries[v].bytes  =bytes;
 | 
			
		||||
  entries[v].valid  =1;
 | 
			
		||||
  total_cache+=entries[v].bytes;
 | 
			
		||||
 | 
			
		||||
  return ret;
 | 
			
		||||
}
 | 
			
		||||
@@ -243,6 +254,7 @@ void *MemoryManager::Lookup(size_t bytes,AllocationCacheEntry *entries,int ncach
 | 
			
		||||
  for(int e=0;e<ncache;e++){
 | 
			
		||||
    if ( entries[e].valid && ( entries[e].bytes == bytes ) ) {
 | 
			
		||||
      entries[e].valid = 0;
 | 
			
		||||
      total_cache-=bytes;
 | 
			
		||||
      return entries[e].address;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 
 | 
			
		||||
@@ -93,8 +93,8 @@ private:
 | 
			
		||||
  static void *Insert(void *ptr,size_t bytes,AllocationCacheEntry *entries,int ncache,int &victim) ;
 | 
			
		||||
  static void *Lookup(size_t bytes,AllocationCacheEntry *entries,int ncache) ;
 | 
			
		||||
 | 
			
		||||
  static void PrintBytes(void);
 | 
			
		||||
 public:
 | 
			
		||||
  static void PrintBytes(void);
 | 
			
		||||
  static void Init(void);
 | 
			
		||||
  static void InitMessage(void);
 | 
			
		||||
  static void *AcceleratorAllocate(size_t bytes);
 | 
			
		||||
 
 | 
			
		||||
@@ -36,7 +36,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
#include <Grid/lattice/Lattice_local.h>
 | 
			
		||||
#include <Grid/lattice/Lattice_reduction.h>
 | 
			
		||||
#include <Grid/lattice/Lattice_peekpoke.h>
 | 
			
		||||
//#include <Grid/lattice/Lattice_reality.h>
 | 
			
		||||
#include <Grid/lattice/Lattice_reality.h>
 | 
			
		||||
#include <Grid/lattice/Lattice_real_imag.h>
 | 
			
		||||
#include <Grid/lattice/Lattice_comparison_utils.h>
 | 
			
		||||
#include <Grid/lattice/Lattice_comparison.h>
 | 
			
		||||
 
 | 
			
		||||
@@ -52,6 +52,7 @@ public:
 | 
			
		||||
// This will be safe to call from accelerator_for and is trivially copy constructible
 | 
			
		||||
// The copy constructor for this will need to be used by device lambda functions
 | 
			
		||||
/////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
#undef LATTICE_BOUNDS_CHECK
 | 
			
		||||
template<class vobj> 
 | 
			
		||||
class LatticeView : public LatticeAccelerator<vobj>
 | 
			
		||||
{
 | 
			
		||||
@@ -61,14 +62,36 @@ public:
 | 
			
		||||
  void * cpu_ptr;
 | 
			
		||||
#ifdef GRID_SIMT
 | 
			
		||||
  accelerator_inline const typename vobj::scalar_object operator()(size_t i) const { 
 | 
			
		||||
#ifdef LATTICE_BOUNDS_CHECK
 | 
			
		||||
    assert(i<this->_odata_size);
 | 
			
		||||
    assert(i>=0);
 | 
			
		||||
#endif
 | 
			
		||||
    return coalescedRead(this->_odata[i]); 
 | 
			
		||||
  }
 | 
			
		||||
#else 
 | 
			
		||||
  accelerator_inline const vobj & operator()(size_t i) const { return this->_odata[i]; }
 | 
			
		||||
  accelerator_inline const vobj & operator()(size_t i) const {
 | 
			
		||||
#ifdef LATTICE_BOUNDS_CHECK
 | 
			
		||||
    assert(i<this->_odata_size);
 | 
			
		||||
    assert(i>=0);
 | 
			
		||||
#endif
 | 
			
		||||
    return this->_odata[i];
 | 
			
		||||
  }
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
  accelerator_inline const vobj & operator[](size_t i) const { return this->_odata[i]; };
 | 
			
		||||
  accelerator_inline vobj       & operator[](size_t i)       { return this->_odata[i]; };
 | 
			
		||||
  accelerator_inline const vobj & operator[](size_t i) const { 
 | 
			
		||||
#ifdef LATTICE_BOUNDS_CHECK
 | 
			
		||||
    assert(i<this->_odata_size);
 | 
			
		||||
    assert(i>=0);
 | 
			
		||||
#endif
 | 
			
		||||
    return this->_odata[i]; 
 | 
			
		||||
  };
 | 
			
		||||
  accelerator_inline vobj       & operator[](size_t i)       { 
 | 
			
		||||
#ifdef LATTICE_BOUNDS_CHECK
 | 
			
		||||
    assert(i<this->_odata_size);
 | 
			
		||||
    assert(i>=0);
 | 
			
		||||
#endif
 | 
			
		||||
    return this->_odata[i]; 
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  accelerator_inline uint64_t begin(void) const { return 0;};
 | 
			
		||||
  accelerator_inline uint64_t end(void)   const { return this->_odata_size; };
 | 
			
		||||
 
 | 
			
		||||
@@ -77,9 +77,16 @@ const int SpinorIndex = 2;
 | 
			
		||||
template<typename T> struct isSpinor {
 | 
			
		||||
  static constexpr bool value = (SpinorIndex==T::TensorLevel);
 | 
			
		||||
};
 | 
			
		||||
const int CoarseIndex = 4;
 | 
			
		||||
template<typename T> struct isCoarsened {
 | 
			
		||||
  static constexpr bool value = (CoarseIndex<=T::TensorLevel);
 | 
			
		||||
};
 | 
			
		||||
template <typename T> using IfSpinor    = Invoke<std::enable_if< isSpinor<T>::value,int> > ;
 | 
			
		||||
template <typename T> using IfNotSpinor = Invoke<std::enable_if<!isSpinor<T>::value,int> > ;
 | 
			
		||||
 | 
			
		||||
template <typename T> using IfCoarsened    = Invoke<std::enable_if< isCoarsened<T>::value,int> > ;
 | 
			
		||||
template <typename T> using IfNotCoarsened = Invoke<std::enable_if<!isCoarsened<T>::value,int> > ;
 | 
			
		||||
 | 
			
		||||
// ChrisK very keen to add extra space for Gparity doubling.
 | 
			
		||||
//
 | 
			
		||||
// Also add domain wall index, in a way where Wilson operator 
 | 
			
		||||
 
 | 
			
		||||
@@ -89,7 +89,8 @@ public:
 | 
			
		||||
  virtual void  Mdiag  (const FermionField &in, FermionField &out) { Mooee(in,out);};   // Same as Mooee applied to both CB's
 | 
			
		||||
  virtual void  Mdir   (const FermionField &in, FermionField &out,int dir,int disp)=0;   // case by case Wilson, Clover, Cayley, ContFrac, PartFrac
 | 
			
		||||
  virtual void  MdirAll(const FermionField &in, std::vector<FermionField> &out)=0;   // case by case Wilson, Clover, Cayley, ContFrac, PartFrac
 | 
			
		||||
 | 
			
		||||
  virtual std::vector<int> Directions(void)   =0;
 | 
			
		||||
  virtual std::vector<int> Displacements(void)=0;
 | 
			
		||||
 | 
			
		||||
  virtual void  MomentumSpacePropagator(FermionField &out,const FermionField &in,RealD _m,std::vector<double> twist) { assert(0);};
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -44,6 +44,9 @@ public:
 | 
			
		||||
  INHERIT_IMPL_TYPES(Impl);
 | 
			
		||||
  typedef StaggeredKernels<Impl> Kernels;
 | 
			
		||||
 | 
			
		||||
  virtual std::vector<int> Directions(void)   { return this->directions; };
 | 
			
		||||
  virtual std::vector<int> Displacements(void){ return this->displacements;};
 | 
			
		||||
 | 
			
		||||
  FermionField _tmp;
 | 
			
		||||
  FermionField &tmp(void) { return _tmp; }
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -49,6 +49,9 @@ public:
 | 
			
		||||
  INHERIT_IMPL_TYPES(Impl);
 | 
			
		||||
  typedef StaggeredKernels<Impl> Kernels;
 | 
			
		||||
 | 
			
		||||
  virtual std::vector<int> Directions(void)   { return this->directions; };
 | 
			
		||||
  virtual std::vector<int> Displacements(void){ return this->displacements;};
 | 
			
		||||
 | 
			
		||||
  FermionField _tmp;
 | 
			
		||||
  FermionField &tmp(void) { return _tmp; }
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -47,6 +47,9 @@ public:
 | 
			
		||||
  FermionField _tmp;
 | 
			
		||||
  FermionField &tmp(void) { return _tmp; }
 | 
			
		||||
 | 
			
		||||
  virtual std::vector<int> Directions(void)   { return this->directions; };
 | 
			
		||||
  virtual std::vector<int> Displacements(void){ return this->displacements;};
 | 
			
		||||
 | 
			
		||||
  ////////////////////////////////////////
 | 
			
		||||
  // Performance monitoring
 | 
			
		||||
  ////////////////////////////////////////
 | 
			
		||||
 
 | 
			
		||||
@@ -63,6 +63,9 @@ public:
 | 
			
		||||
  INHERIT_IMPL_TYPES(Impl);
 | 
			
		||||
  typedef WilsonKernels<Impl> Kernels;
 | 
			
		||||
 | 
			
		||||
  virtual std::vector<int> Directions(void)   { return this->directions; };
 | 
			
		||||
  virtual std::vector<int> Displacements(void){ return this->displacements;};
 | 
			
		||||
 | 
			
		||||
  ///////////////////////////////////////////////////////////////
 | 
			
		||||
  // Implement the abstract base
 | 
			
		||||
  ///////////////////////////////////////////////////////////////
 | 
			
		||||
 
 | 
			
		||||
@@ -72,6 +72,9 @@ public:
 | 
			
		||||
  typedef WilsonKernels<Impl> Kernels;
 | 
			
		||||
  PmuStat stat;
 | 
			
		||||
 | 
			
		||||
  virtual std::vector<int> Directions(void)   { return this->directions; };
 | 
			
		||||
  virtual std::vector<int> Displacements(void){ return this->displacements;};
 | 
			
		||||
 | 
			
		||||
  FermionField _tmp;
 | 
			
		||||
  FermionField &tmp(void) { return _tmp; }
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -79,6 +79,8 @@ public:
 | 
			
		||||
    _Mat.M(in,tmp);
 | 
			
		||||
    G5R5(out,tmp);
 | 
			
		||||
  }
 | 
			
		||||
  virtual std::vector<int> Directions(void)   { return _Mat.Directions();};
 | 
			
		||||
  virtual std::vector<int> Displacements(void){ return _Mat.Displacements();};
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@@ -127,6 +129,8 @@ public:
 | 
			
		||||
    _Mat.M(in,tmp);
 | 
			
		||||
    out=g5*tmp;
 | 
			
		||||
  }
 | 
			
		||||
  virtual std::vector<int> Directions(void)   { return _Mat.Directions();};
 | 
			
		||||
  virtual std::vector<int> Displacements(void){ return _Mat.Displacements();};
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
NAMESPACE_END(Grid);
 | 
			
		||||
 
 | 
			
		||||
@@ -133,14 +133,14 @@ void WilsonCloverFermion<Impl>::ImportGauge(const GaugeField &_Umu)
 | 
			
		||||
  pickCheckerboard(Even, CloverTermEven, CloverTerm);
 | 
			
		||||
  pickCheckerboard(Odd, CloverTermOdd, CloverTerm);
 | 
			
		||||
 | 
			
		||||
  pickCheckerboard(Even, CloverTermDagEven, closure(adj(CloverTerm)));
 | 
			
		||||
  pickCheckerboard(Odd, CloverTermDagOdd, closure(adj(CloverTerm)));
 | 
			
		||||
  pickCheckerboard(Even, CloverTermDagEven, adj(CloverTerm));
 | 
			
		||||
  pickCheckerboard(Odd, CloverTermDagOdd, adj(CloverTerm));
 | 
			
		||||
 | 
			
		||||
  pickCheckerboard(Even, CloverTermInvEven, CloverTermInv);
 | 
			
		||||
  pickCheckerboard(Odd, CloverTermInvOdd, CloverTermInv);
 | 
			
		||||
 | 
			
		||||
  pickCheckerboard(Even, CloverTermInvDagEven, closure(adj(CloverTermInv)));
 | 
			
		||||
  pickCheckerboard(Odd, CloverTermInvDagOdd, closure(adj(CloverTermInv)));
 | 
			
		||||
  pickCheckerboard(Even, CloverTermInvDagEven, adj(CloverTermInv));
 | 
			
		||||
  pickCheckerboard(Odd, CloverTermInvDagOdd, adj(CloverTermInv));
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template <class Impl>
 | 
			
		||||
 
 | 
			
		||||
@@ -128,7 +128,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s
 | 
			
		||||
}
 | 
			
		||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spProjTm (iVector<vtype,Nhs> &hspin,const iVector<vtype,Ns> &fspin)
 | 
			
		||||
{
 | 
			
		||||
  //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
 | 
			
		||||
  hspin(0)=fspin(0)-fspin(2);
 | 
			
		||||
  hspin(1)=fspin(1)-fspin(3);
 | 
			
		||||
}
 | 
			
		||||
@@ -138,40 +137,50 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s
 | 
			
		||||
 *  0 0 -1  0
 | 
			
		||||
 *  0 0  0 -1
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spProj5p (iVector<vtype,Nhs> &hspin,const iVector<vtype,Ns> &fspin)
 | 
			
		||||
{
 | 
			
		||||
  //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
 | 
			
		||||
  hspin(0)=fspin(0);
 | 
			
		||||
  hspin(1)=fspin(1);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spProj5m (iVector<vtype,Nhs> &hspin,const iVector<vtype,Ns> &fspin)
 | 
			
		||||
{
 | 
			
		||||
  //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
 | 
			
		||||
  hspin(0)=fspin(2);
 | 
			
		||||
  hspin(1)=fspin(3);
 | 
			
		||||
}
 | 
			
		||||
  
 | 
			
		||||
//  template<class vtype> accelerator_inline void fspProj5p (iVector<vtype,Ns> &rfspin,const iVector<vtype,Ns> &fspin)
 | 
			
		||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spProj5p (iVector<vtype,Ns> &rfspin,const iVector<vtype,Ns> &fspin)
 | 
			
		||||
{
 | 
			
		||||
  //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
 | 
			
		||||
  rfspin(0)=fspin(0);
 | 
			
		||||
  rfspin(1)=fspin(1);
 | 
			
		||||
  rfspin(2)=Zero();
 | 
			
		||||
  rfspin(3)=Zero();
 | 
			
		||||
}
 | 
			
		||||
//  template<class vtype> accelerator_inline void fspProj5m (iVector<vtype,Ns> &rfspin,const iVector<vtype,Ns> &fspin)
 | 
			
		||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spProj5m (iVector<vtype,Ns> &rfspin,const iVector<vtype,Ns> &fspin)
 | 
			
		||||
{
 | 
			
		||||
  //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
 | 
			
		||||
  rfspin(0)=Zero();
 | 
			
		||||
  rfspin(1)=Zero();
 | 
			
		||||
  rfspin(2)=fspin(2);
 | 
			
		||||
  rfspin(3)=fspin(3);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template<class vtype,int N,IfCoarsened<iVector<vtype,N> > = 0> accelerator_inline void spProj5p (iVector<vtype,N> &rfspin,const iVector<vtype,N> &fspin)
 | 
			
		||||
{
 | 
			
		||||
  const int hN = N>>1;
 | 
			
		||||
  for(int s=0;s<hN;s++){
 | 
			
		||||
    rfspin(s)=fspin(s);
 | 
			
		||||
    rfspin(s+hN)=Zero();
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
template<class vtype,int N,IfCoarsened<iVector<vtype,N> > = 0> accelerator_inline void spProj5m (iVector<vtype,N> &rfspin,const iVector<vtype,N> &fspin)
 | 
			
		||||
{
 | 
			
		||||
  const int hN = N>>1;
 | 
			
		||||
  for(int s=0;s<hN;s++){
 | 
			
		||||
    rfspin(s)=Zero();
 | 
			
		||||
    rfspin(s+hN)=fspin(s+hN);
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
// Reconstruction routines to move back again to four spin
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
@@ -183,7 +192,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s
 | 
			
		||||
 */
 | 
			
		||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spReconXp (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
 | 
			
		||||
{
 | 
			
		||||
  //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
 | 
			
		||||
  fspin(0)=hspin(0);
 | 
			
		||||
  fspin(1)=hspin(1);
 | 
			
		||||
  fspin(2)=timesMinusI(hspin(1));
 | 
			
		||||
@@ -191,7 +199,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s
 | 
			
		||||
}
 | 
			
		||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spReconXm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
 | 
			
		||||
{
 | 
			
		||||
  //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
 | 
			
		||||
  fspin(0)=hspin(0);
 | 
			
		||||
  fspin(1)=hspin(1);
 | 
			
		||||
  fspin(2)=timesI(hspin(1));
 | 
			
		||||
@@ -199,7 +206,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s
 | 
			
		||||
}
 | 
			
		||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumReconXp (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
 | 
			
		||||
{
 | 
			
		||||
  //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
 | 
			
		||||
  fspin(0)+=hspin(0);
 | 
			
		||||
  fspin(1)+=hspin(1);
 | 
			
		||||
  fspin(2)-=timesI(hspin(1));
 | 
			
		||||
@@ -207,7 +213,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void a
 | 
			
		||||
}
 | 
			
		||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumReconXm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
 | 
			
		||||
{
 | 
			
		||||
  //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
 | 
			
		||||
  fspin(0)+=hspin(0);
 | 
			
		||||
  fspin(1)+=hspin(1);
 | 
			
		||||
  fspin(2)+=timesI(hspin(1));
 | 
			
		||||
@@ -221,7 +226,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void a
 | 
			
		||||
 | 
			
		||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spReconYp (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
 | 
			
		||||
{
 | 
			
		||||
  //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
 | 
			
		||||
  fspin(0)=hspin(0);
 | 
			
		||||
  fspin(1)=hspin(1);
 | 
			
		||||
  fspin(2)= hspin(1);
 | 
			
		||||
@@ -229,7 +233,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s
 | 
			
		||||
}
 | 
			
		||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spReconYm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
 | 
			
		||||
{
 | 
			
		||||
  //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
 | 
			
		||||
  fspin(0)=hspin(0);
 | 
			
		||||
  fspin(1)=hspin(1);
 | 
			
		||||
  fspin(2)=-hspin(1);
 | 
			
		||||
@@ -237,7 +240,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s
 | 
			
		||||
}
 | 
			
		||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumReconYp (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
 | 
			
		||||
{
 | 
			
		||||
  //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
 | 
			
		||||
  fspin(0)+=hspin(0);
 | 
			
		||||
  fspin(1)+=hspin(1);
 | 
			
		||||
  fspin(2)+=hspin(1);
 | 
			
		||||
@@ -245,7 +247,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void a
 | 
			
		||||
}
 | 
			
		||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumReconYm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
 | 
			
		||||
{
 | 
			
		||||
  //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
 | 
			
		||||
  fspin(0)+=hspin(0);
 | 
			
		||||
  fspin(1)+=hspin(1);
 | 
			
		||||
  fspin(2)-=hspin(1);
 | 
			
		||||
@@ -260,7 +261,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void a
 | 
			
		||||
 */
 | 
			
		||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spReconZp (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
 | 
			
		||||
{
 | 
			
		||||
  //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
 | 
			
		||||
  fspin(0)=hspin(0);
 | 
			
		||||
  fspin(1)=hspin(1);
 | 
			
		||||
  fspin(2)=timesMinusI(hspin(0));
 | 
			
		||||
@@ -268,7 +268,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s
 | 
			
		||||
}
 | 
			
		||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spReconZm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
 | 
			
		||||
{
 | 
			
		||||
  //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
 | 
			
		||||
  fspin(0)=hspin(0);
 | 
			
		||||
  fspin(1)=hspin(1);
 | 
			
		||||
  fspin(2)=     timesI(hspin(0));
 | 
			
		||||
@@ -276,7 +275,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s
 | 
			
		||||
}
 | 
			
		||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumReconZp (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
 | 
			
		||||
{
 | 
			
		||||
  //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
 | 
			
		||||
  fspin(0)+=hspin(0);
 | 
			
		||||
  fspin(1)+=hspin(1);
 | 
			
		||||
  fspin(2)-=timesI(hspin(0));
 | 
			
		||||
@@ -284,7 +282,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void a
 | 
			
		||||
}
 | 
			
		||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumReconZm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
 | 
			
		||||
{
 | 
			
		||||
  //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
 | 
			
		||||
  fspin(0)+=hspin(0);
 | 
			
		||||
  fspin(1)+=hspin(1);
 | 
			
		||||
  fspin(2)+=timesI(hspin(0));
 | 
			
		||||
@@ -298,7 +295,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void a
 | 
			
		||||
 */
 | 
			
		||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spReconTp (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
 | 
			
		||||
{
 | 
			
		||||
  //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
 | 
			
		||||
  fspin(0)=hspin(0);
 | 
			
		||||
  fspin(1)=hspin(1);
 | 
			
		||||
  fspin(2)=hspin(0);
 | 
			
		||||
@@ -306,7 +302,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s
 | 
			
		||||
}
 | 
			
		||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spReconTm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
 | 
			
		||||
{
 | 
			
		||||
  //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
 | 
			
		||||
  fspin(0)=hspin(0);
 | 
			
		||||
  fspin(1)=hspin(1);
 | 
			
		||||
  fspin(2)=-hspin(0);
 | 
			
		||||
@@ -314,7 +309,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s
 | 
			
		||||
}
 | 
			
		||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumReconTp (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
 | 
			
		||||
{
 | 
			
		||||
  //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
 | 
			
		||||
  fspin(0)+=hspin(0);
 | 
			
		||||
  fspin(1)+=hspin(1);
 | 
			
		||||
  fspin(2)+=hspin(0);
 | 
			
		||||
@@ -322,7 +316,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void a
 | 
			
		||||
}
 | 
			
		||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumReconTm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
 | 
			
		||||
{
 | 
			
		||||
  //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
 | 
			
		||||
  fspin(0)+=hspin(0);
 | 
			
		||||
  fspin(1)+=hspin(1);
 | 
			
		||||
  fspin(2)-=hspin(0);
 | 
			
		||||
@@ -336,7 +329,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void a
 | 
			
		||||
 */
 | 
			
		||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spRecon5p (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
 | 
			
		||||
{
 | 
			
		||||
  //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
 | 
			
		||||
  fspin(0)=hspin(0)+hspin(0); // add is lower latency than mul
 | 
			
		||||
  fspin(1)=hspin(1)+hspin(1); // probably no measurable diffence though
 | 
			
		||||
  fspin(2)=Zero();
 | 
			
		||||
@@ -344,7 +336,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s
 | 
			
		||||
}
 | 
			
		||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spRecon5m (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
 | 
			
		||||
{
 | 
			
		||||
  //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
 | 
			
		||||
  fspin(0)=Zero();
 | 
			
		||||
  fspin(1)=Zero();
 | 
			
		||||
  fspin(2)=hspin(0)+hspin(0);
 | 
			
		||||
@@ -352,7 +343,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s
 | 
			
		||||
}
 | 
			
		||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumRecon5p (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
 | 
			
		||||
{
 | 
			
		||||
  //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
 | 
			
		||||
  fspin(0)+=hspin(0)+hspin(0);
 | 
			
		||||
  fspin(1)+=hspin(1)+hspin(1);
 | 
			
		||||
}
 | 
			
		||||
@@ -372,7 +362,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void a
 | 
			
		||||
//////////
 | 
			
		||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProjXp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
 | 
			
		||||
{
 | 
			
		||||
  //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
 | 
			
		||||
  for(int i=0;i<N;i++) {
 | 
			
		||||
    spProjXp(hspin._internal[i],fspin._internal[i]);
 | 
			
		||||
  }
 | 
			
		||||
@@ -426,26 +415,21 @@ template<class rtype,class vtype,int N> accelerator_inline void accumReconXp (iM
 | 
			
		||||
    }}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
////////
 | 
			
		||||
// Xm
 | 
			
		||||
////////
 | 
			
		||||
template<class rtype,class vtype> accelerator_inline void spProjXm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
 | 
			
		||||
{
 | 
			
		||||
  //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
 | 
			
		||||
  spProjXm(hspin._internal,fspin._internal);
 | 
			
		||||
}
 | 
			
		||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProjXm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
 | 
			
		||||
{
 | 
			
		||||
  //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
 | 
			
		||||
  for(int i=0;i<N;i++) {
 | 
			
		||||
    spProjXm(hspin._internal[i],fspin._internal[i]);
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
template<class rtype,class vtype,int N> accelerator_inline void spProjXm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
 | 
			
		||||
{
 | 
			
		||||
  //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
 | 
			
		||||
  for(int i=0;i<N;i++){ 
 | 
			
		||||
    for(int j=0;j<N;j++){
 | 
			
		||||
      spProjXm(hspin._internal[i][j],fspin._internal[i][j]);
 | 
			
		||||
@@ -455,19 +439,16 @@ template<class rtype,class vtype,int N> accelerator_inline void spProjXm (iMatri
 | 
			
		||||
 | 
			
		||||
template<class rtype,class vtype> accelerator_inline void spReconXm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
 | 
			
		||||
{
 | 
			
		||||
  //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
 | 
			
		||||
  spReconXm(hspin._internal,fspin._internal);
 | 
			
		||||
}
 | 
			
		||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spReconXm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
 | 
			
		||||
{
 | 
			
		||||
  //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
 | 
			
		||||
  for(int i=0;i<N;i++) {
 | 
			
		||||
    spReconXm(hspin._internal[i],fspin._internal[i]);
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
template<class rtype,class vtype,int N> accelerator_inline void spReconXm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
 | 
			
		||||
{
 | 
			
		||||
  //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
 | 
			
		||||
  for(int i=0;i<N;i++){ 
 | 
			
		||||
    for(int j=0;j<N;j++){
 | 
			
		||||
      spReconXm(hspin._internal[i][j],fspin._internal[i][j]);
 | 
			
		||||
@@ -476,45 +457,37 @@ template<class rtype,class vtype,int N> accelerator_inline void spReconXm (iMatr
 | 
			
		||||
 | 
			
		||||
template<class rtype,class vtype> accelerator_inline void accumReconXm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
 | 
			
		||||
{
 | 
			
		||||
  //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
 | 
			
		||||
  accumReconXm(hspin._internal,fspin._internal);
 | 
			
		||||
}
 | 
			
		||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void accumReconXm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
 | 
			
		||||
{
 | 
			
		||||
  //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
 | 
			
		||||
  for(int i=0;i<N;i++) {
 | 
			
		||||
    accumReconXm(hspin._internal[i],fspin._internal[i]);
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
template<class rtype,class vtype,int N> accelerator_inline void accumReconXm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
 | 
			
		||||
{
 | 
			
		||||
  //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
 | 
			
		||||
  for(int i=0;i<N;i++){ 
 | 
			
		||||
    for(int j=0;j<N;j++){
 | 
			
		||||
      accumReconXm(hspin._internal[i][j],fspin._internal[i][j]);
 | 
			
		||||
    }}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
////////
 | 
			
		||||
// Yp
 | 
			
		||||
////////
 | 
			
		||||
template<class rtype,class vtype> accelerator_inline void spProjYp (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
 | 
			
		||||
{
 | 
			
		||||
  //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
 | 
			
		||||
  spProjYp(hspin._internal,fspin._internal);
 | 
			
		||||
}
 | 
			
		||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProjYp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
 | 
			
		||||
{
 | 
			
		||||
  //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
 | 
			
		||||
  for(int i=0;i<N;i++) {
 | 
			
		||||
    spProjYp(hspin._internal[i],fspin._internal[i]);
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
template<class rtype,class vtype,int N> accelerator_inline void spProjYp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
 | 
			
		||||
{
 | 
			
		||||
  //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
 | 
			
		||||
  for(int i=0;i<N;i++){ 
 | 
			
		||||
    for(int j=0;j<N;j++){
 | 
			
		||||
      spProjYp(hspin._internal[i][j],fspin._internal[i][j]);
 | 
			
		||||
@@ -524,19 +497,16 @@ template<class rtype,class vtype,int N> accelerator_inline void spProjYp (iMatri
 | 
			
		||||
 | 
			
		||||
template<class rtype,class vtype> accelerator_inline void spReconYp (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
 | 
			
		||||
{
 | 
			
		||||
  //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
 | 
			
		||||
  spReconYp(hspin._internal,fspin._internal);
 | 
			
		||||
}
 | 
			
		||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spReconYp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
 | 
			
		||||
{
 | 
			
		||||
  //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
 | 
			
		||||
  for(int i=0;i<N;i++) {
 | 
			
		||||
    spReconYp(hspin._internal[i],fspin._internal[i]);
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
template<class rtype,class vtype,int N> accelerator_inline void spReconYp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
 | 
			
		||||
{
 | 
			
		||||
  //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
 | 
			
		||||
  for(int i=0;i<N;i++){ 
 | 
			
		||||
    for(int j=0;j<N;j++){
 | 
			
		||||
      spReconYp(hspin._internal[i][j],fspin._internal[i][j]);
 | 
			
		||||
@@ -545,66 +515,55 @@ template<class rtype,class vtype,int N> accelerator_inline void spReconYp (iMatr
 | 
			
		||||
 | 
			
		||||
template<class rtype,class vtype> accelerator_inline void accumReconYp (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
 | 
			
		||||
{
 | 
			
		||||
  //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
 | 
			
		||||
  accumReconYp(hspin._internal,fspin._internal);
 | 
			
		||||
}
 | 
			
		||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void accumReconYp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
 | 
			
		||||
{
 | 
			
		||||
  //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
 | 
			
		||||
  for(int i=0;i<N;i++) {
 | 
			
		||||
    accumReconYp(hspin._internal[i],fspin._internal[i]);
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
template<class rtype,class vtype,int N> accelerator_inline void accumReconYp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
 | 
			
		||||
{
 | 
			
		||||
  //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
 | 
			
		||||
  for(int i=0;i<N;i++){ 
 | 
			
		||||
    for(int j=0;j<N;j++){
 | 
			
		||||
      accumReconYp(hspin._internal[i][j],fspin._internal[i][j]);
 | 
			
		||||
    }}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
////////
 | 
			
		||||
// Ym
 | 
			
		||||
////////
 | 
			
		||||
template<class rtype,class vtype> accelerator_inline void spProjYm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
 | 
			
		||||
{
 | 
			
		||||
  //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
 | 
			
		||||
  spProjYm(hspin._internal,fspin._internal);
 | 
			
		||||
}
 | 
			
		||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProjYm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
 | 
			
		||||
{
 | 
			
		||||
  //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
 | 
			
		||||
  for(int i=0;i<N;i++) {
 | 
			
		||||
    spProjYm(hspin._internal[i],fspin._internal[i]);
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
template<class rtype,class vtype,int N> accelerator_inline void spProjYm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
 | 
			
		||||
{
 | 
			
		||||
  //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
 | 
			
		||||
  for(int i=0;i<N;i++){ 
 | 
			
		||||
    for(int j=0;j<N;j++){
 | 
			
		||||
      spProjYm(hspin._internal[i][j],fspin._internal[i][j]);
 | 
			
		||||
    }}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
template<class rtype,class vtype> accelerator_inline void spReconYm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
 | 
			
		||||
{
 | 
			
		||||
  //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
 | 
			
		||||
  spReconYm(hspin._internal,fspin._internal);
 | 
			
		||||
}
 | 
			
		||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spReconYm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
 | 
			
		||||
{
 | 
			
		||||
  //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,const iVector<vtype,N> >::type *temp;
 | 
			
		||||
  for(int i=0;i<N;i++) {
 | 
			
		||||
    spReconYm(hspin._internal[i],fspin._internal[i]);
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
template<class rtype,class vtype,int N> accelerator_inline void spReconYm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
 | 
			
		||||
{
 | 
			
		||||
  //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
 | 
			
		||||
  for(int i=0;i<N;i++){ 
 | 
			
		||||
    for(int j=0;j<N;j++){
 | 
			
		||||
      spReconYm(hspin._internal[i][j],fspin._internal[i][j]);
 | 
			
		||||
@@ -613,19 +572,16 @@ template<class rtype,class vtype,int N> accelerator_inline void spReconYm (iMatr
 | 
			
		||||
 | 
			
		||||
template<class rtype,class vtype> accelerator_inline void accumReconYm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
 | 
			
		||||
{
 | 
			
		||||
  //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
 | 
			
		||||
  accumReconYm(hspin._internal,fspin._internal);
 | 
			
		||||
}
 | 
			
		||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void accumReconYm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
 | 
			
		||||
{
 | 
			
		||||
  //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
 | 
			
		||||
  for(int i=0;i<N;i++) {
 | 
			
		||||
    accumReconYm(hspin._internal[i],fspin._internal[i]);
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
template<class rtype,class vtype,int N> accelerator_inline void accumReconYm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
 | 
			
		||||
{
 | 
			
		||||
  //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
 | 
			
		||||
  for(int i=0;i<N;i++){ 
 | 
			
		||||
    for(int j=0;j<N;j++){
 | 
			
		||||
      accumReconYm(hspin._internal[i][j],fspin._internal[i][j]);
 | 
			
		||||
@@ -638,19 +594,16 @@ template<class rtype,class vtype,int N> accelerator_inline void accumReconYm (iM
 | 
			
		||||
////////
 | 
			
		||||
template<class rtype,class vtype> accelerator_inline void spProjZp (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
 | 
			
		||||
{
 | 
			
		||||
  //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
 | 
			
		||||
  spProjZp(hspin._internal,fspin._internal);
 | 
			
		||||
}
 | 
			
		||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProjZp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
 | 
			
		||||
{
 | 
			
		||||
  //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
 | 
			
		||||
  for(int i=0;i<N;i++) {
 | 
			
		||||
    spProjZp(hspin._internal[i],fspin._internal[i]);
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
template<class rtype,class vtype,int N> accelerator_inline void spProjZp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
 | 
			
		||||
{
 | 
			
		||||
  //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
 | 
			
		||||
  for(int i=0;i<N;i++){ 
 | 
			
		||||
    for(int j=0;j<N;j++){
 | 
			
		||||
      spProjZp(hspin._internal[i][j],fspin._internal[i][j]);
 | 
			
		||||
@@ -660,19 +613,16 @@ template<class rtype,class vtype,int N> accelerator_inline void spProjZp (iMatri
 | 
			
		||||
 | 
			
		||||
template<class rtype,class vtype> accelerator_inline void spReconZp (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
 | 
			
		||||
{
 | 
			
		||||
  //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
 | 
			
		||||
  spReconZp(hspin._internal,fspin._internal);
 | 
			
		||||
}
 | 
			
		||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spReconZp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
 | 
			
		||||
{
 | 
			
		||||
  //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
 | 
			
		||||
  for(int i=0;i<N;i++) {
 | 
			
		||||
    spReconZp(hspin._internal[i],fspin._internal[i]);
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
template<class rtype,class vtype,int N> accelerator_inline void spReconZp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
 | 
			
		||||
{
 | 
			
		||||
  //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
 | 
			
		||||
  for(int i=0;i<N;i++){ 
 | 
			
		||||
    for(int j=0;j<N;j++){
 | 
			
		||||
      spReconZp(hspin._internal[i][j],fspin._internal[i][j]);
 | 
			
		||||
@@ -681,19 +631,16 @@ template<class rtype,class vtype,int N> accelerator_inline void spReconZp (iMatr
 | 
			
		||||
 | 
			
		||||
template<class rtype,class vtype> accelerator_inline void accumReconZp (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
 | 
			
		||||
{
 | 
			
		||||
  //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
 | 
			
		||||
  accumReconZp(hspin._internal,fspin._internal);
 | 
			
		||||
}
 | 
			
		||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void accumReconZp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
 | 
			
		||||
{
 | 
			
		||||
  //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
 | 
			
		||||
  for(int i=0;i<N;i++) {
 | 
			
		||||
    accumReconZp(hspin._internal[i],fspin._internal[i]);
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
template<class rtype,class vtype,int N> accelerator_inline void accumReconZp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
 | 
			
		||||
{
 | 
			
		||||
  //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
 | 
			
		||||
  for(int i=0;i<N;i++){ 
 | 
			
		||||
    for(int j=0;j<N;j++){
 | 
			
		||||
      accumReconZp(hspin._internal[i][j],fspin._internal[i][j]);
 | 
			
		||||
@@ -706,19 +653,16 @@ template<class rtype,class vtype,int N> accelerator_inline void accumReconZp (iM
 | 
			
		||||
////////
 | 
			
		||||
template<class rtype,class vtype> accelerator_inline void spProjZm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
 | 
			
		||||
{
 | 
			
		||||
  //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
 | 
			
		||||
  spProjZm(hspin._internal,fspin._internal);
 | 
			
		||||
}
 | 
			
		||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProjZm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
 | 
			
		||||
{
 | 
			
		||||
  //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
 | 
			
		||||
  for(int i=0;i<N;i++) {
 | 
			
		||||
    spProjZm(hspin._internal[i],fspin._internal[i]);
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
template<class rtype,class vtype,int N> accelerator_inline void spProjZm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
 | 
			
		||||
{
 | 
			
		||||
  //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
 | 
			
		||||
  for(int i=0;i<N;i++){ 
 | 
			
		||||
    for(int j=0;j<N;j++){
 | 
			
		||||
      spProjZm(hspin._internal[i][j],fspin._internal[i][j]);
 | 
			
		||||
@@ -728,19 +672,16 @@ template<class rtype,class vtype,int N> accelerator_inline void spProjZm (iMatri
 | 
			
		||||
 | 
			
		||||
template<class rtype,class vtype> accelerator_inline void spReconZm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
 | 
			
		||||
{
 | 
			
		||||
  //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
 | 
			
		||||
  spReconZm(hspin._internal,fspin._internal);
 | 
			
		||||
}
 | 
			
		||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spReconZm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
 | 
			
		||||
{
 | 
			
		||||
  //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
 | 
			
		||||
  for(int i=0;i<N;i++) {
 | 
			
		||||
    spReconZm(hspin._internal[i],fspin._internal[i]);
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
template<class rtype,class vtype,int N> accelerator_inline void spReconZm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
 | 
			
		||||
{
 | 
			
		||||
  //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
 | 
			
		||||
  for(int i=0;i<N;i++){ 
 | 
			
		||||
    for(int j=0;j<N;j++){
 | 
			
		||||
      spReconZm(hspin._internal[i][j],fspin._internal[i][j]);
 | 
			
		||||
@@ -749,19 +690,16 @@ template<class rtype,class vtype,int N> accelerator_inline void spReconZm (iMatr
 | 
			
		||||
 | 
			
		||||
template<class rtype,class vtype> accelerator_inline void accumReconZm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
 | 
			
		||||
{
 | 
			
		||||
  //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
 | 
			
		||||
  accumReconZm(hspin._internal,fspin._internal);
 | 
			
		||||
}
 | 
			
		||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void accumReconZm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
 | 
			
		||||
{
 | 
			
		||||
  //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
 | 
			
		||||
  for(int i=0;i<N;i++) {
 | 
			
		||||
    accumReconZm(hspin._internal[i],fspin._internal[i]);
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
template<class rtype,class vtype,int N> accelerator_inline void accumReconZm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
 | 
			
		||||
{
 | 
			
		||||
  //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
 | 
			
		||||
  for(int i=0;i<N;i++){ 
 | 
			
		||||
    for(int j=0;j<N;j++){
 | 
			
		||||
      accumReconZm(hspin._internal[i][j],fspin._internal[i][j]);
 | 
			
		||||
@@ -774,19 +712,16 @@ template<class rtype,class vtype,int N> accelerator_inline void accumReconZm (iM
 | 
			
		||||
////////
 | 
			
		||||
template<class rtype,class vtype> accelerator_inline void spProjTp (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
 | 
			
		||||
{
 | 
			
		||||
  //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
 | 
			
		||||
  spProjTp(hspin._internal,fspin._internal);
 | 
			
		||||
}
 | 
			
		||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProjTp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
 | 
			
		||||
{
 | 
			
		||||
  //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
 | 
			
		||||
  for(int i=0;i<N;i++) {
 | 
			
		||||
    spProjTp(hspin._internal[i],fspin._internal[i]);
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
template<class rtype,class vtype,int N> accelerator_inline void spProjTp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
 | 
			
		||||
{
 | 
			
		||||
  //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
 | 
			
		||||
  for(int i=0;i<N;i++){ 
 | 
			
		||||
    for(int j=0;j<N;j++){
 | 
			
		||||
      spProjTp(hspin._internal[i][j],fspin._internal[i][j]);
 | 
			
		||||
@@ -796,19 +731,16 @@ template<class rtype,class vtype,int N> accelerator_inline void spProjTp (iMatri
 | 
			
		||||
 | 
			
		||||
template<class rtype,class vtype> accelerator_inline void spReconTp (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
 | 
			
		||||
{
 | 
			
		||||
  //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
 | 
			
		||||
  spReconTp(hspin._internal,fspin._internal);
 | 
			
		||||
}
 | 
			
		||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spReconTp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
 | 
			
		||||
{
 | 
			
		||||
  //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
 | 
			
		||||
  for(int i=0;i<N;i++) {
 | 
			
		||||
    spReconTp(hspin._internal[i],fspin._internal[i]);
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
template<class rtype,class vtype,int N> accelerator_inline void spReconTp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
 | 
			
		||||
{
 | 
			
		||||
  //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
 | 
			
		||||
  for(int i=0;i<N;i++){ 
 | 
			
		||||
    for(int j=0;j<N;j++){
 | 
			
		||||
      spReconTp(hspin._internal[i][j],fspin._internal[i][j]);
 | 
			
		||||
@@ -817,44 +749,37 @@ template<class rtype,class vtype,int N> accelerator_inline void spReconTp (iMatr
 | 
			
		||||
 | 
			
		||||
template<class rtype,class vtype> accelerator_inline void accumReconTp (iScalar<rtype> &hspin, iScalar<vtype> &fspin)
 | 
			
		||||
{
 | 
			
		||||
  //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
 | 
			
		||||
  accumReconTp(hspin._internal,fspin._internal);
 | 
			
		||||
}
 | 
			
		||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void accumReconTp (iVector<rtype,N> &hspin, const iVector<vtype,N> &fspin)
 | 
			
		||||
{
 | 
			
		||||
  //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
 | 
			
		||||
  for(int i=0;i<N;i++) {
 | 
			
		||||
    accumReconTp(hspin._internal[i],fspin._internal[i]);
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
template<class rtype,class vtype,int N> accelerator_inline void accumReconTp (iMatrix<rtype,N> &hspin, const iMatrix<vtype,N> &fspin)
 | 
			
		||||
{
 | 
			
		||||
  //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
 | 
			
		||||
  for(int i=0;i<N;i++){ 
 | 
			
		||||
    for(int j=0;j<N;j++){
 | 
			
		||||
      accumReconTp(hspin._internal[i][j],fspin._internal[i][j]);
 | 
			
		||||
    }}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
////////
 | 
			
		||||
// Tm
 | 
			
		||||
////////
 | 
			
		||||
template<class rtype,class vtype> accelerator_inline void spProjTm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
 | 
			
		||||
{
 | 
			
		||||
  //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
 | 
			
		||||
  spProjTm(hspin._internal,fspin._internal);
 | 
			
		||||
}
 | 
			
		||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProjTm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
 | 
			
		||||
{
 | 
			
		||||
  //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
 | 
			
		||||
  for(int i=0;i<N;i++) {
 | 
			
		||||
    spProjTm(hspin._internal[i],fspin._internal[i]);
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
template<class rtype,class vtype,int N> accelerator_inline void spProjTm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
 | 
			
		||||
{
 | 
			
		||||
  //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
 | 
			
		||||
  for(int i=0;i<N;i++){ 
 | 
			
		||||
    for(int j=0;j<N;j++){
 | 
			
		||||
      spProjTm(hspin._internal[i][j],fspin._internal[i][j]);
 | 
			
		||||
@@ -864,19 +789,16 @@ template<class rtype,class vtype,int N> accelerator_inline void spProjTm (iMatri
 | 
			
		||||
 | 
			
		||||
template<class rtype,class vtype> accelerator_inline void spReconTm (iScalar<rtype> &hspin, const iScalar<vtype> &fspin)
 | 
			
		||||
{
 | 
			
		||||
  //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
 | 
			
		||||
  spReconTm(hspin._internal,fspin._internal);
 | 
			
		||||
}
 | 
			
		||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spReconTm (iVector<rtype,N> &hspin, const iVector<vtype,N> &fspin)
 | 
			
		||||
{
 | 
			
		||||
  //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
 | 
			
		||||
  for(int i=0;i<N;i++) {
 | 
			
		||||
    spReconTm(hspin._internal[i],fspin._internal[i]);
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
template<class rtype,class vtype,int N> accelerator_inline void spReconTm (iMatrix<rtype,N> &hspin, const iMatrix<vtype,N> &fspin)
 | 
			
		||||
{
 | 
			
		||||
  //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
 | 
			
		||||
  for(int i=0;i<N;i++){ 
 | 
			
		||||
    for(int j=0;j<N;j++){
 | 
			
		||||
      spReconTm(hspin._internal[i][j],fspin._internal[i][j]);
 | 
			
		||||
@@ -885,44 +807,37 @@ template<class rtype,class vtype,int N> accelerator_inline void spReconTm (iMatr
 | 
			
		||||
 | 
			
		||||
template<class rtype,class vtype> accelerator_inline void accumReconTm (iScalar<rtype> &hspin, const iScalar<vtype> &fspin)
 | 
			
		||||
{
 | 
			
		||||
  //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
 | 
			
		||||
  accumReconTm(hspin._internal,fspin._internal);
 | 
			
		||||
}
 | 
			
		||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void accumReconTm (iVector<rtype,N> &hspin, const iVector<vtype,N> &fspin)
 | 
			
		||||
{
 | 
			
		||||
  //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
 | 
			
		||||
  for(int i=0;i<N;i++) {
 | 
			
		||||
    accumReconTm(hspin._internal[i],fspin._internal[i]);
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
template<class rtype,class vtype,int N> accelerator_inline void accumReconTm (iMatrix<rtype,N> &hspin, const iMatrix<vtype,N> &fspin)
 | 
			
		||||
{
 | 
			
		||||
  //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
 | 
			
		||||
  for(int i=0;i<N;i++){ 
 | 
			
		||||
    for(int j=0;j<N;j++){
 | 
			
		||||
      accumReconTm(hspin._internal[i][j],fspin._internal[i][j]);
 | 
			
		||||
    }}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
////////
 | 
			
		||||
// 5p
 | 
			
		||||
////////
 | 
			
		||||
template<class rtype,class vtype> accelerator_inline void spProj5p (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
 | 
			
		||||
template<class rtype,class vtype,IfNotCoarsened<iScalar<vtype> > = 0> accelerator_inline void spProj5p (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
 | 
			
		||||
{
 | 
			
		||||
  //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
 | 
			
		||||
  spProj5p(hspin._internal,fspin._internal);
 | 
			
		||||
}
 | 
			
		||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProj5p (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
 | 
			
		||||
{
 | 
			
		||||
  //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
 | 
			
		||||
  for(int i=0;i<N;i++) {
 | 
			
		||||
    spProj5p(hspin._internal[i],fspin._internal[i]);
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
template<class rtype,class vtype,int N> accelerator_inline void spProj5p (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
 | 
			
		||||
template<class rtype,class vtype,int N,IfNotCoarsened<iScalar<vtype> > = 0> accelerator_inline void spProj5p (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
 | 
			
		||||
{
 | 
			
		||||
  //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
 | 
			
		||||
  for(int i=0;i<N;i++){ 
 | 
			
		||||
    for(int j=0;j<N;j++){
 | 
			
		||||
      spProj5p(hspin._internal[i][j],fspin._internal[i][j]);
 | 
			
		||||
@@ -931,19 +846,16 @@ template<class rtype,class vtype,int N> accelerator_inline void spProj5p (iMatri
 | 
			
		||||
 | 
			
		||||
template<class rtype,class vtype> accelerator_inline void spRecon5p (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
 | 
			
		||||
{
 | 
			
		||||
  //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
 | 
			
		||||
  spRecon5p(hspin._internal,fspin._internal);
 | 
			
		||||
}
 | 
			
		||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spRecon5p (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
 | 
			
		||||
{
 | 
			
		||||
  //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
 | 
			
		||||
  for(int i=0;i<N;i++) {
 | 
			
		||||
    spRecon5p(hspin._internal[i],fspin._internal[i]);
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
template<class rtype,class vtype,int N> accelerator_inline void spRecon5p (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
 | 
			
		||||
{
 | 
			
		||||
  //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
 | 
			
		||||
  for(int i=0;i<N;i++){ 
 | 
			
		||||
    for(int j=0;j<N;j++){
 | 
			
		||||
      spRecon5p(hspin._internal[i][j],fspin._internal[i][j]);
 | 
			
		||||
@@ -952,19 +864,16 @@ template<class rtype,class vtype,int N> accelerator_inline void spRecon5p (iMatr
 | 
			
		||||
 | 
			
		||||
template<class rtype,class vtype> accelerator_inline void accumRecon5p (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
 | 
			
		||||
{
 | 
			
		||||
  //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
 | 
			
		||||
  accumRecon5p(hspin._internal,fspin._internal);
 | 
			
		||||
}
 | 
			
		||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void accumRecon5p (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
 | 
			
		||||
{
 | 
			
		||||
  //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
 | 
			
		||||
  for(int i=0;i<N;i++) {
 | 
			
		||||
    accumRecon5p(hspin._internal[i],fspin._internal[i]);
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
template<class rtype,class vtype,int N> accelerator_inline void accumRecon5p (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
 | 
			
		||||
{
 | 
			
		||||
  //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
 | 
			
		||||
  for(int i=0;i<N;i++){ 
 | 
			
		||||
    for(int j=0;j<N;j++){
 | 
			
		||||
      accumRecon5p(hspin._internal[i][j],fspin._internal[i][j]);
 | 
			
		||||
@@ -972,24 +881,18 @@ template<class rtype,class vtype,int N> accelerator_inline void accumRecon5p (iM
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// four spinor projectors for chiral proj
 | 
			
		||||
//  template<class vtype> accelerator_inline void fspProj5p (iScalar<vtype> &hspin,const iScalar<vtype> &fspin)
 | 
			
		||||
template<class vtype> accelerator_inline void spProj5p (iScalar<vtype> &hspin,const iScalar<vtype> &fspin)
 | 
			
		||||
template<class vtype,IfNotCoarsened<iScalar<vtype> > = 0> accelerator_inline void spProj5p (iScalar<vtype> &hspin,const iScalar<vtype> &fspin)
 | 
			
		||||
{
 | 
			
		||||
  //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
 | 
			
		||||
  spProj5p(hspin._internal,fspin._internal);
 | 
			
		||||
}
 | 
			
		||||
//  template<class vtype,int N> accelerator_inline void fspProj5p (iVector<vtype,N> &hspin,iVector<vtype,N> &fspin)
 | 
			
		||||
template<class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProj5p (iVector<vtype,N> &hspin,const iVector<vtype,N> &fspin)
 | 
			
		||||
template<class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0,IfNotCoarsened<iScalar<vtype> > = 0> accelerator_inline void spProj5p (iVector<vtype,N> &hspin,const iVector<vtype,N> &fspin)
 | 
			
		||||
{
 | 
			
		||||
  //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
 | 
			
		||||
  for(int i=0;i<N;i++) {
 | 
			
		||||
    spProj5p(hspin._internal[i],fspin._internal[i]);
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
//  template<class vtype,int N> accelerator_inline void fspProj5p (iMatrix<vtype,N> &hspin,iMatrix<vtype,N> &fspin)
 | 
			
		||||
template<class vtype,int N> accelerator_inline void spProj5p (iMatrix<vtype,N> &hspin,const iMatrix<vtype,N> &fspin)
 | 
			
		||||
template<class vtype,int N,IfNotCoarsened<iScalar<vtype> > = 0> accelerator_inline void spProj5p (iMatrix<vtype,N> &hspin,const iMatrix<vtype,N> &fspin)
 | 
			
		||||
{
 | 
			
		||||
  //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
 | 
			
		||||
  for(int i=0;i<N;i++){ 
 | 
			
		||||
    for(int j=0;j<N;j++){
 | 
			
		||||
      spProj5p(hspin._internal[i][j],fspin._internal[i][j]);
 | 
			
		||||
@@ -1001,17 +904,17 @@ template<class vtype,int N> accelerator_inline void spProj5p (iMatrix<vtype,N> &
 | 
			
		||||
// 5m
 | 
			
		||||
////////
 | 
			
		||||
 | 
			
		||||
template<class rtype,class vtype> accelerator_inline void spProj5m (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
 | 
			
		||||
template<class rtype,class vtype,IfNotCoarsened<iScalar<vtype> > = 0> accelerator_inline void spProj5m (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
 | 
			
		||||
{
 | 
			
		||||
  spProj5m(hspin._internal,fspin._internal);
 | 
			
		||||
}
 | 
			
		||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<rtype,N> > = 0> accelerator_inline void spProj5m (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
 | 
			
		||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<rtype,N> > = 0,IfNotCoarsened<iScalar<vtype> > = 0> accelerator_inline void spProj5m (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
 | 
			
		||||
{
 | 
			
		||||
  for(int i=0;i<N;i++) {
 | 
			
		||||
    spProj5m(hspin._internal[i],fspin._internal[i]);
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
template<class rtype,class vtype,int N> accelerator_inline void spProj5m (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
 | 
			
		||||
template<class rtype,class vtype,int N,IfNotCoarsened<iScalar<vtype> > = 0> accelerator_inline void spProj5m (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
 | 
			
		||||
{
 | 
			
		||||
  for(int i=0;i<N;i++){ 
 | 
			
		||||
    for(int j=0;j<N;j++){
 | 
			
		||||
@@ -1021,19 +924,16 @@ template<class rtype,class vtype,int N> accelerator_inline void spProj5m (iMatri
 | 
			
		||||
 | 
			
		||||
template<class rtype,class vtype> accelerator_inline void spRecon5m (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
 | 
			
		||||
{
 | 
			
		||||
  //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
 | 
			
		||||
  spRecon5m(hspin._internal,fspin._internal);
 | 
			
		||||
}
 | 
			
		||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spRecon5m (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
 | 
			
		||||
{
 | 
			
		||||
  //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
 | 
			
		||||
  for(int i=0;i<N;i++) {
 | 
			
		||||
    spRecon5m(hspin._internal[i],fspin._internal[i]);
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
template<class rtype,class vtype,int N> accelerator_inline void spRecon5m (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
 | 
			
		||||
{
 | 
			
		||||
  //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
 | 
			
		||||
  for(int i=0;i<N;i++){ 
 | 
			
		||||
    for(int j=0;j<N;j++){
 | 
			
		||||
      spRecon5m(hspin._internal[i][j],fspin._internal[i][j]);
 | 
			
		||||
@@ -1042,19 +942,16 @@ template<class rtype,class vtype,int N> accelerator_inline void spRecon5m (iMatr
 | 
			
		||||
 | 
			
		||||
template<class rtype,class vtype> accelerator_inline void accumRecon5m (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
 | 
			
		||||
{
 | 
			
		||||
  //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
 | 
			
		||||
  accumRecon5m(hspin._internal,fspin._internal);
 | 
			
		||||
}
 | 
			
		||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void accumRecon5m (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
 | 
			
		||||
{
 | 
			
		||||
  //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
 | 
			
		||||
  for(int i=0;i<N;i++) {
 | 
			
		||||
    accumRecon5m(hspin._internal[i],fspin._internal[i]);
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
template<class rtype,class vtype,int N> accelerator_inline void accumRecon5m (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
 | 
			
		||||
{
 | 
			
		||||
  //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
 | 
			
		||||
  for(int i=0;i<N;i++){ 
 | 
			
		||||
    for(int j=0;j<N;j++){
 | 
			
		||||
      accumRecon5m(hspin._internal[i][j],fspin._internal[i][j]);
 | 
			
		||||
@@ -1063,24 +960,18 @@ template<class rtype,class vtype,int N> accelerator_inline void accumRecon5m (iM
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
// four spinor projectors for chiral proj
 | 
			
		||||
//  template<class vtype> accelerator_inline void fspProj5m (iScalar<vtype> &hspin,const iScalar<vtype> &fspin)
 | 
			
		||||
template<class vtype> accelerator_inline void spProj5m (iScalar<vtype> &hspin,const iScalar<vtype> &fspin)
 | 
			
		||||
template<class vtype,IfNotCoarsened<iScalar<vtype> > = 0> accelerator_inline void spProj5m (iScalar<vtype> &hspin,const iScalar<vtype> &fspin)
 | 
			
		||||
{
 | 
			
		||||
  //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
 | 
			
		||||
  spProj5m(hspin._internal,fspin._internal);
 | 
			
		||||
}
 | 
			
		||||
//  template<class vtype,int N> accelerator_inline void fspProj5m (iVector<vtype,N> &hspin,iVector<vtype,N> &fspin)
 | 
			
		||||
template<class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProj5m (iVector<vtype,N> &hspin,const iVector<vtype,N> &fspin)
 | 
			
		||||
template<class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0,IfNotCoarsened<iScalar<vtype> > = 0> accelerator_inline void spProj5m (iVector<vtype,N> &hspin,const iVector<vtype,N> &fspin)
 | 
			
		||||
{
 | 
			
		||||
  //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
 | 
			
		||||
  for(int i=0;i<N;i++) {
 | 
			
		||||
    spProj5m(hspin._internal[i],fspin._internal[i]);
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
//  template<class vtype,int N> accelerator_inline void fspProj5m (iMatrix<vtype,N> &hspin,iMatrix<vtype,N> &fspin)
 | 
			
		||||
template<class vtype,int N> accelerator_inline void spProj5m (iMatrix<vtype,N> &hspin,const iMatrix<vtype,N> &fspin)
 | 
			
		||||
template<class vtype,int N,IfNotCoarsened<iScalar<vtype> > = 0> accelerator_inline void spProj5m (iMatrix<vtype,N> &hspin,const iMatrix<vtype,N> &fspin)
 | 
			
		||||
{
 | 
			
		||||
  //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
 | 
			
		||||
  for(int i=0;i<N;i++){ 
 | 
			
		||||
    for(int j=0;j<N;j++){
 | 
			
		||||
      spProj5m(hspin._internal[i][j],fspin._internal[i][j]);
 | 
			
		||||
 
 | 
			
		||||
@@ -188,7 +188,6 @@ void G5R5(Lattice<vobj> &z,const Lattice<vobj> &x)
 | 
			
		||||
  z.Checkerboard() = x.Checkerboard();
 | 
			
		||||
  conformable(x,z);
 | 
			
		||||
  int Ls = grid->_rdimensions[0];
 | 
			
		||||
  Gamma G5(Gamma::Algebra::Gamma5);
 | 
			
		||||
  autoView( x_v, x, AcceleratorRead);
 | 
			
		||||
  autoView( z_v, z, AcceleratorWrite);
 | 
			
		||||
  uint64_t nloop = grid->oSites()/Ls;
 | 
			
		||||
@@ -196,7 +195,13 @@ void G5R5(Lattice<vobj> &z,const Lattice<vobj> &x)
 | 
			
		||||
    uint64_t ss = sss*Ls;
 | 
			
		||||
    for(int s=0;s<Ls;s++){
 | 
			
		||||
      int sp = Ls-1-s;
 | 
			
		||||
      coalescedWrite(z_v[ss+sp],G5*x_v(ss+s));
 | 
			
		||||
      auto tmp = x_v(ss+s);
 | 
			
		||||
      decltype(tmp) tmp_p;
 | 
			
		||||
      decltype(tmp) tmp_m;
 | 
			
		||||
      spProj5p(tmp_p,tmp);
 | 
			
		||||
      spProj5m(tmp_m,tmp);
 | 
			
		||||
      // Use of spProj5m, 5p captures the coarse space too
 | 
			
		||||
      coalescedWrite(z_v[ss+sp],tmp_p - tmp_m);
 | 
			
		||||
    }
 | 
			
		||||
  });
 | 
			
		||||
}
 | 
			
		||||
@@ -208,10 +213,20 @@ void G5C(Lattice<vobj> &z, const Lattice<vobj> &x)
 | 
			
		||||
  z.Checkerboard() = x.Checkerboard();
 | 
			
		||||
  conformable(x, z);
 | 
			
		||||
 | 
			
		||||
  Gamma G5(Gamma::Algebra::Gamma5);
 | 
			
		||||
  z = G5 * x;
 | 
			
		||||
  autoView( x_v, x, AcceleratorRead);
 | 
			
		||||
  autoView( z_v, z, AcceleratorWrite);
 | 
			
		||||
  uint64_t nloop = grid->oSites();
 | 
			
		||||
  accelerator_for(ss,nloop,vobj::Nsimd(),{
 | 
			
		||||
    auto tmp = x_v(ss);
 | 
			
		||||
    decltype(tmp) tmp_p;
 | 
			
		||||
    decltype(tmp) tmp_m;
 | 
			
		||||
    spProj5p(tmp_p,tmp);
 | 
			
		||||
    spProj5m(tmp_m,tmp);
 | 
			
		||||
    coalescedWrite(z_v[ss],tmp_p - tmp_m);
 | 
			
		||||
  });
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
template<class CComplex, int nbasis>
 | 
			
		||||
void G5C(Lattice<iVector<CComplex, nbasis>> &z, const Lattice<iVector<CComplex, nbasis>> &x)
 | 
			
		||||
{
 | 
			
		||||
@@ -234,6 +249,7 @@ void G5C(Lattice<iVector<CComplex, nbasis>> &z, const Lattice<iVector<CComplex,
 | 
			
		||||
    }
 | 
			
		||||
  });
 | 
			
		||||
}
 | 
			
		||||
*/
 | 
			
		||||
 | 
			
		||||
NAMESPACE_END(Grid);
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -151,6 +151,9 @@ inline void *acceleratorAllocShared(size_t bytes)
 | 
			
		||||
    ptr = (void *) NULL;
 | 
			
		||||
    printf(" cudaMallocManaged failed for %d %s \n",bytes,cudaGetErrorString(err));
 | 
			
		||||
  }
 | 
			
		||||
  //  size_t free,total;
 | 
			
		||||
  //  cudaMemGetInfo(&free,&total);
 | 
			
		||||
  //  std::cout << "Malloc managed "<<bytes<<" "<<free<<"/"<<total<<std::endl;
 | 
			
		||||
  return ptr;
 | 
			
		||||
};
 | 
			
		||||
inline void *acceleratorAllocDevice(size_t bytes)
 | 
			
		||||
@@ -161,6 +164,9 @@ inline void *acceleratorAllocDevice(size_t bytes)
 | 
			
		||||
    ptr = (void *) NULL;
 | 
			
		||||
    printf(" cudaMalloc failed for %d %s \n",bytes,cudaGetErrorString(err));
 | 
			
		||||
  }
 | 
			
		||||
  //  size_t free,total;
 | 
			
		||||
  //  cudaMemGetInfo(&free,&total);
 | 
			
		||||
  //  std::cout << "Malloc device "<<bytes<<" "<<free<<"/"<<total<<std::endl;
 | 
			
		||||
  return ptr;
 | 
			
		||||
};
 | 
			
		||||
inline void acceleratorFreeShared(void *ptr){ cudaFree(ptr);};
 | 
			
		||||
 
 | 
			
		||||
@@ -191,7 +191,7 @@ int main (int argc, char ** argv)
 | 
			
		||||
  std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
 | 
			
		||||
  std::cout<<GridLogMessage << "= Benchmarking SU3xSU3  CovShiftForward(z,x,y)"<<std::endl;
 | 
			
		||||
  std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
 | 
			
		||||
  std::cout<<GridLogMessage << "  L  "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl;
 | 
			
		||||
  std::cout<<GridLogMessage << "  L  "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GB/s (incl Cshift)\t\t GFlop/s"<<std::endl;
 | 
			
		||||
  std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
 | 
			
		||||
 | 
			
		||||
  for(int lat=LMIN;lat<=LMAX;lat+=LADD){
 | 
			
		||||
@@ -216,15 +216,16 @@ int main (int argc, char ** argv)
 | 
			
		||||
	
 | 
			
		||||
	
 | 
			
		||||
	    double bytes=3*vol*Nc*Nc*sizeof(Complex);
 | 
			
		||||
	    double ncbytes=5*vol*Nc*Nc*sizeof(Complex);
 | 
			
		||||
	    double flops=Nc*Nc*(6+8+8)*vol;
 | 
			
		||||
	    std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<"   \t\t"<<bytes/time<<"\t\t" << flops/time<<std::endl;
 | 
			
		||||
	    std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<"   \t\t"<<bytes/time<<"\t\t"<<ncbytes/time<<"\t\t" << flops/time<<std::endl;
 | 
			
		||||
      }
 | 
			
		||||
  }
 | 
			
		||||
#if 1
 | 
			
		||||
  std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
 | 
			
		||||
  std::cout<<GridLogMessage << "= Benchmarking SU3xSU3  z= x * Cshift(y)"<<std::endl;
 | 
			
		||||
  std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
 | 
			
		||||
  std::cout<<GridLogMessage << "  L  "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl;
 | 
			
		||||
  std::cout<<GridLogMessage << "  L  "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GB/s (incl Cshift)\t\t GFlop/s"<<std::endl;
 | 
			
		||||
  std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
 | 
			
		||||
 | 
			
		||||
  for(int lat=LMIN;lat<=LMAX;lat+=LADD){
 | 
			
		||||
@@ -258,10 +259,11 @@ int main (int argc, char ** argv)
 | 
			
		||||
	tmult  = tmult /Nloop;
 | 
			
		||||
	
 | 
			
		||||
	double bytes=3*vol*Nc*Nc*sizeof(Complex);
 | 
			
		||||
	double ncbytes=5*vol*Nc*Nc*sizeof(Complex);
 | 
			
		||||
	double flops=Nc*Nc*(6+8+8)*vol;
 | 
			
		||||
	std::cout<<GridLogMessage<<std::setprecision(3) << "total us "<<time<<" shift "<<tshift <<" mult "<<tmult<<std::endl;
 | 
			
		||||
	time = time * 1000; // convert to NS for GB/s
 | 
			
		||||
	std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<"   \t\t"<<bytes/time<<"\t\t" << flops/time<<std::endl;
 | 
			
		||||
	std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<"   \t\t"<<bytes/time<<"\t\t" <<ncbytes/time<<"\t\t" << flops/time<<std::endl;
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
#endif
 | 
			
		||||
 
 | 
			
		||||
@@ -29,7 +29,91 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
#include <Grid/Grid.h>
 | 
			
		||||
 | 
			
		||||
using namespace Grid;
 | 
			
		||||
 ;
 | 
			
		||||
 | 
			
		||||
void MomentumSpacePropagatorTest(RealD mass,RealD M5, LatticePropagator &prop)
 | 
			
		||||
{
 | 
			
		||||
  // what type LatticeComplex 
 | 
			
		||||
  GridBase *_grid = prop.Grid();
 | 
			
		||||
  
 | 
			
		||||
  typedef LatticeFermion FermionField;
 | 
			
		||||
  typedef LatticePropagator PropagatorField;
 | 
			
		||||
  typedef typename FermionField::vector_type vector_type;
 | 
			
		||||
  typedef typename FermionField::scalar_type ScalComplex;
 | 
			
		||||
  typedef iSinglet<ScalComplex> Tcomplex;
 | 
			
		||||
  typedef Lattice<iSinglet<vector_type> > LatComplex;
 | 
			
		||||
  
 | 
			
		||||
  Gamma::Algebra Gmu [] = {
 | 
			
		||||
    Gamma::Algebra::GammaX,
 | 
			
		||||
    Gamma::Algebra::GammaY,
 | 
			
		||||
    Gamma::Algebra::GammaZ,
 | 
			
		||||
    Gamma::Algebra::GammaT
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  Coordinate latt_size   = _grid->_fdimensions;
 | 
			
		||||
  
 | 
			
		||||
  PropagatorField   num  (_grid); num  = Zero();
 | 
			
		||||
 | 
			
		||||
  LatComplex    sk(_grid);  sk = Zero();
 | 
			
		||||
  LatComplex    sk2(_grid); sk2= Zero();
 | 
			
		||||
  LatComplex    W(_grid); W= Zero();
 | 
			
		||||
  LatComplex    a(_grid); a= Zero();
 | 
			
		||||
  LatComplex    one  (_grid); one = ScalComplex(1.0,0.0);
 | 
			
		||||
  LatComplex denom(_grid); denom= Zero();
 | 
			
		||||
  LatComplex cosha(_grid); 
 | 
			
		||||
  LatComplex kmu(_grid); 
 | 
			
		||||
  LatComplex Wea(_grid); 
 | 
			
		||||
  LatComplex Wema(_grid); 
 | 
			
		||||
 | 
			
		||||
  ScalComplex ci(0.0,1.0);
 | 
			
		||||
  SpinColourMatrixD identity = ComplexD(1.0);
 | 
			
		||||
 | 
			
		||||
  for(int mu=0;mu<Nd;mu++) {
 | 
			
		||||
    
 | 
			
		||||
    LatticeCoordinate(kmu,mu);
 | 
			
		||||
    
 | 
			
		||||
    RealD TwoPiL =  M_PI * 2.0/ latt_size[mu];
 | 
			
		||||
    
 | 
			
		||||
    kmu = TwoPiL * kmu;
 | 
			
		||||
    //    kmu = kmu + TwoPiL * one * twist[mu];//momentum for twisted boundary conditions
 | 
			
		||||
    
 | 
			
		||||
    sk2 = sk2 + 2.0*sin(kmu*0.5)*sin(kmu*0.5);
 | 
			
		||||
    sk  = sk  +     sin(kmu)    *sin(kmu); 
 | 
			
		||||
    
 | 
			
		||||
    num = num - sin(kmu)*ci*(Gamma(Gmu[mu])*identity);
 | 
			
		||||
    
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  W = one - M5 + sk2;
 | 
			
		||||
 | 
			
		||||
  ////////////////////////////////////////////
 | 
			
		||||
  // Cosh alpha -> alpha
 | 
			
		||||
  ////////////////////////////////////////////
 | 
			
		||||
  cosha =  (one + W*W + sk) / (abs(W)*2.0);
 | 
			
		||||
 | 
			
		||||
  // FIXME Need a Lattice acosh
 | 
			
		||||
  {
 | 
			
		||||
  autoView(cosha_v,cosha,CpuRead);
 | 
			
		||||
  autoView(a_v,a,CpuWrite);
 | 
			
		||||
  for(int idx=0;idx<_grid->lSites();idx++){
 | 
			
		||||
    Coordinate lcoor(Nd);
 | 
			
		||||
    Tcomplex cc;
 | 
			
		||||
    //    RealD sgn;
 | 
			
		||||
    _grid->LocalIndexToLocalCoor(idx,lcoor);
 | 
			
		||||
    peekLocalSite(cc,cosha_v,lcoor);
 | 
			
		||||
    assert((double)real(cc)>=1.0);
 | 
			
		||||
    assert(fabs((double)imag(cc))<=1.0e-15);
 | 
			
		||||
    cc = ScalComplex(::acosh(real(cc)),0.0);
 | 
			
		||||
    pokeLocalSite(cc,a_v,lcoor);
 | 
			
		||||
  }}
 | 
			
		||||
  
 | 
			
		||||
  Wea = ( exp( a) * abs(W)  );
 | 
			
		||||
  Wema= ( exp(-a) * abs(W)  );
 | 
			
		||||
  
 | 
			
		||||
  num   = num + ( one - Wema ) * mass * identity;
 | 
			
		||||
  denom= ( Wea - one ) + mass*mass * (one - Wema); 
 | 
			
		||||
  prop = num/denom;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
int main (int argc, char ** argv)
 | 
			
		||||
{
 | 
			
		||||
@@ -307,6 +391,17 @@ int main (int argc, char ** argv)
 | 
			
		||||
    RealD M5  =0.8;
 | 
			
		||||
    DomainWallFermionD Ddwf(Umu,*FGrid,*FrbGrid,GRID,RBGRID,mass,M5);
 | 
			
		||||
 | 
			
		||||
    /////////////////// Test code for (1-m)^2 ///////////////
 | 
			
		||||
    LatticePropagatorD prop1(&GRID);
 | 
			
		||||
    LatticePropagatorD prop2(&GRID);
 | 
			
		||||
    LatticeComplexD ratio(&GRID);
 | 
			
		||||
    MomentumSpacePropagatorTest(0.0,M5,prop1);
 | 
			
		||||
    MomentumSpacePropagatorTest(0.3,M5,prop2);
 | 
			
		||||
    ratio=localNorm2(prop2);
 | 
			
		||||
    ratio=ratio/localNorm2(prop1);
 | 
			
		||||
    std::cout << ratio;
 | 
			
		||||
    /////////////////// Test code for (1-m)^2 factor ///////////////
 | 
			
		||||
 | 
			
		||||
    // Momentum space prop
 | 
			
		||||
    std::cout << " Solving by FFT and Feynman rules" <<std::endl;
 | 
			
		||||
    bool fiveD = false; //calculate 4d free propagator
 | 
			
		||||
 
 | 
			
		||||
@@ -565,8 +565,8 @@ int main (int argc, char ** argv)
 | 
			
		||||
  std::cout<<GridLogMessage << " Running Multigrid                                "<< std::endl;
 | 
			
		||||
  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
 | 
			
		||||
 | 
			
		||||
  BiCGSTAB<CoarseVector>    CoarseMgridBiCGSTAB(0.01,1000);     
 | 
			
		||||
  BiCGSTAB<LatticeFermion>  FineMgridBiCGSTAB(0.0,24);
 | 
			
		||||
  BiCGSTAB<CoarseVector>    CoarseMgridBiCGSTAB(0.01,1000,false);     
 | 
			
		||||
  BiCGSTAB<LatticeFermion>  FineMgridBiCGSTAB(0.0,24,false);
 | 
			
		||||
  ZeroGuesser<CoarseVector> CoarseZeroGuesser;
 | 
			
		||||
  ZeroGuesser<LatticeFermion> FineZeroGuesser;
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
		Reference in New Issue
	
	Block a user