mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-10-31 12:04:33 +00:00 
			
		
		
		
	Compare commits
	
		
			26 Commits
		
	
	
		
			8f84bbed1b
			...
			feature/hw
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|  | 3064c9a6e2 | ||
|  | 729882827c | ||
|  | baa668d3ac | ||
|  | 3c82d16ed8 | ||
|  | 5c8c0c2d7c | ||
|  | e5a100846c | ||
|  | a74e2dc12e | ||
|  | 595f512a6e | ||
|  | a6499b22ff | ||
|  | b4e42a59c6 | ||
|  | 8c913e0edd | ||
|  | fd3f93d8d3 | ||
|  | e9543cdacd | ||
|  | 98f7b3d298 | ||
|  | b7b164ea24 | ||
|  | 77124d99d5 | ||
|  | e1327e7ea0 | ||
|  | 569f78c2cf | ||
|  | 488c79d5a1 | ||
|  | dc6b0f20b2 | ||
|  | c0badc3e16 | ||
|  | 58f6529b55 | ||
|  | e3f056dfbb | ||
|  | da0ffa7a79 | ||
|  | fcc7640b9c | ||
|  | 0cbe2859e0 | 
| @@ -49,13 +49,11 @@ inline void blockMaskedInnerProduct(Lattice<CComplex> &CoarseInner, | ||||
|   Lattice<dotp> fine_inner_msk(fine); | ||||
|  | ||||
|   // Multiply could be fused with innerProduct | ||||
|   // Single block sum kernel could do both masks. | ||||
|   fine_inner = localInnerProduct(fineX,fineY); | ||||
|   mult(fine_inner_msk, fine_inner,FineMask); | ||||
|   blockSum(CoarseInner,fine_inner_msk); | ||||
| } | ||||
|  | ||||
|  | ||||
| class Geometry { | ||||
| public: | ||||
|   int npoint; | ||||
| @@ -80,8 +78,12 @@ public: | ||||
|     } | ||||
|     directions   [2*_d]=0; | ||||
|     displacements[2*_d]=0; | ||||
|   } | ||||
|  | ||||
|     std::cout <<GridLogMessage << "Geometry "<<std::endl; | ||||
|     for(int p=0;p<npoint;p++){ | ||||
|       std::cout <<GridLogMessage << "point " <<p<<" dir "<<directions[p]<<" delta " <<displacements[p]<<std::endl; | ||||
|     } | ||||
|   } | ||||
| }; | ||||
|    | ||||
| template<class Fobj,class CComplex,int nbasis> | ||||
| @@ -102,8 +104,8 @@ public: | ||||
|   Aggregation(GridBase *_CoarseGrid,GridBase *_FineGrid,int _checkerboard) :  | ||||
|     CoarseGrid(_CoarseGrid), | ||||
|     FineGrid(_FineGrid), | ||||
|     subspace(nbasis,_FineGrid), | ||||
|     checkerboard(_checkerboard) | ||||
|     checkerboard(_checkerboard), | ||||
|     subspace(nbasis,_FineGrid) | ||||
|   { | ||||
|   }; | ||||
|    | ||||
| @@ -285,6 +287,8 @@ public: | ||||
|   /////////////////////// | ||||
|   GridBase * Grid(void)         { return _grid; };   // this is all the linalg routines need to know | ||||
|  | ||||
|   virtual std::vector<int> Directions(void)   { return geom.directions; }; | ||||
|   virtual std::vector<int> Displacements(void){ return geom.displacements; }; | ||||
|   void M (const CoarseVector &in, CoarseVector &out) | ||||
|   { | ||||
|     conformable(_grid,in.Grid()); | ||||
| @@ -308,6 +312,9 @@ public: | ||||
|  | ||||
|     int osites=Grid()->oSites(); | ||||
|  | ||||
|     autoView(st,Stencil,AcceleratorRead); | ||||
|     siteVector *CBp=Stencil.CommBuf(); | ||||
|  | ||||
|     accelerator_for(sss, Grid()->oSites()*nbasis, Nsimd, { | ||||
|       int ss = sss/nbasis; | ||||
|       int b  = sss%nbasis; | ||||
| @@ -318,12 +325,12 @@ public: | ||||
|  | ||||
|       for(int point=0;point<geom.npoint;point++){ | ||||
|  | ||||
| 	SE=Stencil.GetEntry(ptype,point,ss); | ||||
| 	SE=st.GetEntry(ptype,point,ss); | ||||
| 	   | ||||
| 	if(SE->_is_local) {  | ||||
| 	  nbr = coalescedReadPermute(in_v[SE->_offset],ptype,SE->_permute); | ||||
| 	} else { | ||||
| 	  nbr = coalescedRead(Stencil.CommBuf()[SE->_offset]); | ||||
| 	  nbr = coalescedRead(CBp[SE->_offset]); | ||||
| 	} | ||||
| 	acceleratorSynchronise(); | ||||
|  | ||||
| @@ -409,38 +416,23 @@ public: | ||||
|       MdirCalc(in,out[p],p); | ||||
|     } | ||||
|   }; | ||||
|   void Mdir(const CoarseVector &in, CoarseVector &out, int dir, int disp){ | ||||
|  | ||||
|   void Mdir(const CoarseVector &in, CoarseVector &out, int dir, int disp) | ||||
|   { | ||||
|     this->MdirComms(in); | ||||
|  | ||||
|     int ndim = in.Grid()->Nd(); | ||||
|  | ||||
|     ////////////// | ||||
|     // 4D action like wilson | ||||
|     // 0+ => 0  | ||||
|     // 0- => 1 | ||||
|     // 1+ => 2  | ||||
|     // 1- => 3 | ||||
|     // etc.. | ||||
|     ////////////// | ||||
|     // 5D action like DWF | ||||
|     // 1+ => 0  | ||||
|     // 1- => 1 | ||||
|     // 2+ => 2  | ||||
|     // 2- => 3 | ||||
|     // etc.. | ||||
|     auto point = [dir, disp, ndim](){ | ||||
|       if(dir == 0 and disp == 0) | ||||
| 	return 8; | ||||
|       else if ( ndim==4 ) {  | ||||
| 	return (4 * dir + 1 - disp) / 2; | ||||
|       } else {  | ||||
| 	return (4 * (dir-1) + 1 - disp) / 2; | ||||
|     int point=-1; | ||||
|     for(int p=0;p<geom.npoint;p++){ | ||||
|       if( (dir==geom.directions[p])&&(disp==geom.displacements[p])) point=p; | ||||
|     } | ||||
|     }(); | ||||
|     assert(point!=-1);// Must find | ||||
|  | ||||
|     std::cout <<GridLogMessage << "Mdir point "<<point<<" dir "<<dir<<" disp "<<disp  <<std::endl; | ||||
|     for(int p=0;p<geom.npoint;p++){ | ||||
|       std::cout <<GridLogMessage << "point " <<p<<" dir "<<geom.directions[p]<<" delta " <<geom.displacements[p]<<std::endl; | ||||
|     } | ||||
|     MdirCalc(in,out,point); | ||||
|  | ||||
|   }; | ||||
|  | ||||
|   void Mdiag(const CoarseVector &in, CoarseVector &out) | ||||
| @@ -460,6 +452,54 @@ public: | ||||
|   { | ||||
|   }; | ||||
|  | ||||
|   void Test(Aggregation<Fobj,CComplex,nbasis> &_Aggregates,GridBase *FineGrid,LinearOperatorBase<Lattice<Fobj> > &linop) | ||||
|   { | ||||
|     typedef Lattice<Fobj> FineField; | ||||
|     CoarseVector Cin(_grid); | ||||
|     CoarseVector Cout(_grid); | ||||
|     CoarseVector CFout(_grid); | ||||
|  | ||||
|     FineField Fin(FineGrid); | ||||
|     FineField Fout(FineGrid); | ||||
|  | ||||
|  | ||||
|     std::vector<int> seeds({1,2,3,4,5}); | ||||
|     GridParallelRNG RNG(_grid);  RNG.SeedFixedIntegers(seeds); | ||||
|     gaussian(RNG,Cin); | ||||
|  | ||||
|     _Aggregates.PromoteFromSubspace(Cin,Fin); | ||||
|     _Aggregates.ProjectToSubspace(Cin,Fin); | ||||
|  | ||||
|     std::cout << GridLogMessage<< "************  "<<std::endl; | ||||
|     std::cout << GridLogMessage<< " Testing M  "<<std::endl; | ||||
|     std::cout << GridLogMessage<< "************  "<<std::endl; | ||||
|     // Coarse operator | ||||
|     this->M(Cin,Cout); | ||||
|     // Fine projected operator | ||||
|     _Aggregates.PromoteFromSubspace(Cin,Fin); | ||||
|     linop.Op(Fin,Fout); | ||||
|     _Aggregates.ProjectToSubspace(CFout,Fout); | ||||
|  | ||||
|     CFout = CFout-Cout; | ||||
|     RealD diff = norm2(CFout); | ||||
|     std::cout << GridLogMessage<< " diff  "<<diff<<std::endl; | ||||
|     assert(diff<1.0e-5); | ||||
|  | ||||
|     std::cout << GridLogMessage<< "************  "<<std::endl; | ||||
|     std::cout << GridLogMessage<< " Testing Mdag  "<<std::endl; | ||||
|     std::cout << GridLogMessage<< "************  "<<std::endl; | ||||
|     // Coarse operator | ||||
|     Mdag(Cin,Cout); | ||||
|     // Fine operator | ||||
|     linop.AdjOp(Fin,Fout); | ||||
|     _Aggregates.ProjectToSubspace(CFout,Fout); | ||||
|  | ||||
|     CFout = CFout-Cout; | ||||
|     diff = norm2(CFout); | ||||
|     std::cout << GridLogMessage<< " diff  "<<diff<<std::endl;  | ||||
|     assert(diff<1.0e-5); | ||||
|   } | ||||
|  | ||||
|   void CoarsenOperator(GridBase *FineGrid,LinearOperatorBase<Lattice<Fobj> > &linop, | ||||
| 		       Aggregation<Fobj,CComplex,nbasis> & Subspace) | ||||
|   { | ||||
| @@ -496,8 +536,19 @@ public: | ||||
|  | ||||
|     CoarseScalar InnerProd(Grid());  | ||||
|  | ||||
|     std::cout << GridLogMessage<< "CoarsenMatrix Orthog " << std::endl; | ||||
|     // Orthogonalise the subblocks over the basis | ||||
|     blockOrthogonalise(InnerProd,Subspace.subspace); | ||||
|     std::cout << GridLogMessage<< "CoarsenMatrix Orthog done " << std::endl; | ||||
|  | ||||
|     auto OpDirections    = linop.Directions(); | ||||
|     auto OpDisplacements = linop.Displacements(); | ||||
|  | ||||
|     std::cout<<" Coarsening an operator with "<< OpDirections.size()<<" terms "<<std::endl; | ||||
|     for(int p=0;p<OpDirections.size();p++) { | ||||
|       assert(OpDirections[p]==geom.directions[p]); | ||||
|       assert(OpDisplacements[p]==geom.displacements[p]); | ||||
|     } | ||||
|  | ||||
|     // Compute the matrix elements of linop between this orthonormal | ||||
|     // set of vectors. | ||||
| @@ -533,13 +584,27 @@ public: | ||||
|     evenmask = where(mod(bcb,2)==(Integer)0,one,zero); | ||||
|     oddmask  = one-evenmask; | ||||
|  | ||||
|     /* | ||||
|     { | ||||
|       phi=Subspace.subspace[0]; | ||||
|       linop.OpDirAll(phi,Mphi_p); | ||||
|       for(int p=0;p<geom.npoint-1;p++){ | ||||
| 	int dir=geom.directions[p]; | ||||
| 	int disp=geom.displacements[p]; | ||||
| 	linop.OpDir(phi,Mphi,dir,disp); | ||||
| 	Mphi=Mphi-Mphi_p[p]; | ||||
| 	std::cout << GridLogMessage <<" Direction mapping check " <<norm2(Mphi)<<std::endl; | ||||
|       } | ||||
|     } | ||||
| */ | ||||
|     assert(self_stencil!=-1); | ||||
|     int lhermitian=hermitian; | ||||
|  | ||||
|     for(int i=0;i<nbasis;i++){ | ||||
|  | ||||
|       phi=Subspace.subspace[i]; | ||||
|  | ||||
|       //      std::cout << GridLogMessage<< "CoarsenMatrix vector "<<i << std::endl; | ||||
|       std::cout << GridLogMessage<< "CoarsenMatrix vector "<<i << std::endl; | ||||
|       linop.OpDirAll(phi,Mphi_p); | ||||
|       linop.OpDiag  (phi,Mphi_p[geom.npoint-1]); | ||||
|  | ||||
| @@ -550,7 +615,7 @@ public: | ||||
| 	int dir   = geom.directions[p]; | ||||
| 	int disp  = geom.displacements[p]; | ||||
|  | ||||
| 	if ( (disp==-1) || (!hermitian ) ) { | ||||
| 	if ( (disp==-1) || (!lhermitian ) ) { | ||||
|  | ||||
| 	  //////////////////////////////////////////////////////////////////////// | ||||
| 	  // Pick out contributions coming from this cell and neighbour cell | ||||
| @@ -568,11 +633,23 @@ public: | ||||
| 	    autoView( A_self  , A[self_stencil], AcceleratorWrite); | ||||
|  | ||||
| 	    accelerator_for(ss, Grid()->oSites(), Fobj::Nsimd(),{ coalescedWrite(A_p[ss](j,i),oZProj_v(ss)); }); | ||||
|  | ||||
| 	    if ( lhermitian && (disp==-1) ) { | ||||
| 	      for(int pp=0;pp<geom.npoint;pp++){// Find the opposite link and set <j|A|i> = <i|A|j>* | ||||
| 		int dirp   = geom.directions[pp]; | ||||
| 		int dispp  = geom.displacements[pp]; | ||||
| 		if ( (dirp==dir) && (dispp==1) ){ | ||||
| 		  auto sft = conjugate(Cshift(oZProj,dir,1)); | ||||
| 		  autoView( sft_v    ,  sft  , AcceleratorWrite); | ||||
| 		  autoView( A_pp     ,  A[pp], AcceleratorWrite); | ||||
| 		  accelerator_for(ss, Grid()->oSites(), Fobj::Nsimd(),{ coalescedWrite(A_pp[ss](i,j),sft_v(ss)); }); | ||||
| 		} | ||||
| 	      } | ||||
| 	    } | ||||
| 	  } | ||||
| 	} | ||||
|       } | ||||
|  | ||||
|       std::cout << GridLogMessage<< "CoarsenMatrix Diag "<<std::endl; | ||||
|       /////////////////////////////////////////// | ||||
|       // Faster alternate self coupling.. use hermiticity to save 2x | ||||
|       /////////////////////////////////////////// | ||||
| @@ -604,31 +681,35 @@ public: | ||||
|  | ||||
|       } | ||||
|     } | ||||
|     if(hermitian) { | ||||
|       std::cout << GridLogMessage << " ForceHermitian, new code "<<std::endl; | ||||
|       ForceHermitian(); | ||||
|  | ||||
|     MemoryManager::PrintBytes(); | ||||
|  | ||||
|     // Auto self test | ||||
|     Test( Subspace,FineGrid,linop); | ||||
|  | ||||
| #if 0 | ||||
|     /////////////////////////// | ||||
|     // test code worth preserving in if block | ||||
|     /////////////////////////// | ||||
|     std::cout<<GridLogMessage<< " Computed matrix elements "<< self_stencil <<std::endl; | ||||
|     for(int p=0;p<geom.npoint;p++){ | ||||
|       std::cout<<GridLogMessage<< "A["<<p<<"]" << std::endl; | ||||
|       std::cout<<GridLogMessage<< "\n"<<A[p] << std::endl; | ||||
|     } | ||||
|     std::cout<<GridLogMessage<< " picking by block0 "<< self_stencil <<std::endl; | ||||
|  | ||||
|     phi=Subspace.subspace[0]; | ||||
|     std::vector<int> bc(FineGrid->_ndimension,0); | ||||
|     blockPick(Grid(),phi,tmp,bc);      // Pick out a block | ||||
|     linop.Op(tmp,Mphi);                // Apply big dop | ||||
|     blockProject(iProj,Mphi,Subspace.subspace); // project it and print it | ||||
|     std::cout<<GridLogMessage<< " Computed matrix elements from block zero only "<<std::endl; | ||||
|     std::cout<<GridLogMessage<< iProj <<std::endl; | ||||
|     std::cout<<GridLogMessage<<"Computed Coarse Operator"<<std::endl; | ||||
| #endif | ||||
|  | ||||
|   } | ||||
|  | ||||
|   void ForceHermitian(void) { | ||||
|     CoarseMatrix Diff  (Grid()); | ||||
|     for(int p=0;p<geom.npoint;p++){ | ||||
|       int dir   = geom.directions[p]; | ||||
|       int disp  = geom.displacements[p]; | ||||
|       if(disp==-1) { | ||||
| 	// Find the opposite link | ||||
| 	for(int pp=0;pp<geom.npoint;pp++){ | ||||
| 	  int dirp   = geom.directions[pp]; | ||||
| 	  int dispp  = geom.displacements[pp]; | ||||
| 	  if ( (dirp==dir) && (dispp==1) ){ | ||||
| 	    //	    Diff = adj(Cshift(A[p],dir,1)) - A[pp];  | ||||
| 	    //	    std::cout << GridLogMessage<<" Replacing stencil leg "<<pp<<" with leg "<<p<< " diff "<<norm2(Diff) <<std::endl; | ||||
| 	    A[pp] = adj(Cshift(A[p],dir,1)); | ||||
| 	  } | ||||
| 	} | ||||
|       } | ||||
|     } | ||||
|   } | ||||
| }; | ||||
|  | ||||
| NAMESPACE_END(Grid); | ||||
|   | ||||
| @@ -52,6 +52,9 @@ public: | ||||
|   virtual void AdjOp  (const Field &in, Field &out) = 0; // Abstract base | ||||
|   virtual void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2)=0; | ||||
|   virtual void HermOp(const Field &in, Field &out)=0; | ||||
|  | ||||
|   virtual std::vector<int> Directions(void)   =0; | ||||
|   virtual std::vector<int> Displacements(void)=0; | ||||
| }; | ||||
|  | ||||
|  | ||||
| @@ -76,6 +79,9 @@ class MdagMLinearOperator : public LinearOperatorBase<Field> { | ||||
| public: | ||||
|   MdagMLinearOperator(Matrix &Mat): _Mat(Mat){}; | ||||
|  | ||||
|   virtual std::vector<int> Directions(void)   { return _Mat.Directions();}; | ||||
|   virtual std::vector<int> Displacements(void){ return _Mat.Displacements();}; | ||||
|  | ||||
|   // Support for coarsening to a multigrid | ||||
|   void OpDiag (const Field &in, Field &out) { | ||||
|     _Mat.Mdiag(in,out); | ||||
| @@ -111,6 +117,8 @@ class ShiftedMdagMLinearOperator : public LinearOperatorBase<Field> { | ||||
|   Matrix &_Mat; | ||||
|   RealD _shift; | ||||
| public: | ||||
|   virtual std::vector<int> Directions(void)   { return _Mat.Directions();}; | ||||
|   virtual std::vector<int> Displacements(void){ return _Mat.Displacements();}; | ||||
|   ShiftedMdagMLinearOperator(Matrix &Mat,RealD shift): _Mat(Mat), _shift(shift){}; | ||||
|   // Support for coarsening to a multigrid | ||||
|   void OpDiag (const Field &in, Field &out) { | ||||
| @@ -151,6 +159,8 @@ template<class Matrix,class Field> | ||||
| class HermitianLinearOperator : public LinearOperatorBase<Field> { | ||||
|   Matrix &_Mat; | ||||
| public: | ||||
|   virtual std::vector<int> Directions(void)   { return _Mat.Directions();}; | ||||
|   virtual std::vector<int> Displacements(void){ return _Mat.Displacements();}; | ||||
|   HermitianLinearOperator(Matrix &Mat): _Mat(Mat){}; | ||||
|   // Support for coarsening to a multigrid | ||||
|   void OpDiag (const Field &in, Field &out) { | ||||
| @@ -182,6 +192,8 @@ template<class Matrix,class Field> | ||||
| class NonHermitianLinearOperator : public LinearOperatorBase<Field> { | ||||
|   Matrix &_Mat; | ||||
| public: | ||||
|   virtual std::vector<int> Directions(void)   { return _Mat.Directions();}; | ||||
|   virtual std::vector<int> Displacements(void){ return _Mat.Displacements();}; | ||||
|   NonHermitianLinearOperator(Matrix &Mat): _Mat(Mat){}; | ||||
|   // Support for coarsening to a multigrid | ||||
|   void OpDiag (const Field &in, Field &out) { | ||||
| @@ -255,6 +267,8 @@ template<class Matrix,class Field> | ||||
|   class SchurDiagMooeeOperator :  public SchurOperatorBase<Field> { | ||||
|  public: | ||||
|     Matrix &_Mat; | ||||
|     virtual std::vector<int> Directions(void)   { return _Mat.Directions();}; | ||||
|     virtual std::vector<int> Displacements(void){ return _Mat.Displacements();}; | ||||
|     SchurDiagMooeeOperator (Matrix &Mat): _Mat(Mat){}; | ||||
|     virtual  void Mpc      (const Field &in, Field &out) { | ||||
|       Field tmp(in.Grid()); | ||||
| @@ -281,6 +295,8 @@ template<class Matrix,class Field> | ||||
|  protected: | ||||
|     Matrix &_Mat; | ||||
|  public: | ||||
|     virtual std::vector<int> Directions(void)   { return _Mat.Directions();}; | ||||
|     virtual std::vector<int> Displacements(void){ return _Mat.Displacements();}; | ||||
|     SchurDiagOneOperator (Matrix &Mat): _Mat(Mat){}; | ||||
|      | ||||
|     virtual void Mpc      (const Field &in, Field &out) { | ||||
| @@ -307,6 +323,8 @@ template<class Matrix,class Field> | ||||
|  protected: | ||||
|     Matrix &_Mat; | ||||
|  public: | ||||
|     virtual std::vector<int> Directions(void)   { return _Mat.Directions();}; | ||||
|     virtual std::vector<int> Displacements(void){ return _Mat.Displacements();}; | ||||
|     SchurDiagTwoOperator (Matrix &Mat): _Mat(Mat){}; | ||||
|      | ||||
|     virtual void Mpc      (const Field &in, Field &out) { | ||||
| @@ -372,6 +390,8 @@ class NonHermitianSchurDiagMooeeOperator :  public NonHermitianSchurOperatorBase | ||||
| { | ||||
|  public: | ||||
|   Matrix& _Mat; | ||||
|   virtual std::vector<int> Directions(void)   { return _Mat.Directions();}; | ||||
|   virtual std::vector<int> Displacements(void){ return _Mat.Displacements();}; | ||||
|  NonHermitianSchurDiagMooeeOperator(Matrix& Mat): _Mat(Mat){}; | ||||
|   virtual void Mpc(const Field& in, Field& out) { | ||||
|     Field tmp(in.Grid()); | ||||
| @@ -405,6 +425,8 @@ class NonHermitianSchurDiagOneOperator : public NonHermitianSchurOperatorBase<Fi | ||||
|   Matrix &_Mat; | ||||
|    | ||||
|  public: | ||||
|   virtual std::vector<int> Directions(void)   { return _Mat.Directions();}; | ||||
|   virtual std::vector<int> Displacements(void){ return _Mat.Displacements();}; | ||||
|   NonHermitianSchurDiagOneOperator (Matrix& Mat): _Mat(Mat){}; | ||||
|   virtual void Mpc(const Field& in, Field& out) { | ||||
|     Field tmp(in.Grid()); | ||||
| @@ -435,6 +457,8 @@ class NonHermitianSchurDiagTwoOperator : public NonHermitianSchurOperatorBase<Fi | ||||
|   Matrix& _Mat; | ||||
|    | ||||
|  public: | ||||
|   virtual std::vector<int> Directions(void)   { return _Mat.Directions();}; | ||||
|   virtual std::vector<int> Displacements(void){ return _Mat.Displacements();}; | ||||
|  NonHermitianSchurDiagTwoOperator(Matrix& Mat): _Mat(Mat){}; | ||||
|  | ||||
|   virtual void Mpc(const Field& in, Field& out) { | ||||
| @@ -475,6 +499,8 @@ class SchurStaggeredOperator :  public SchurOperatorBase<Field> { | ||||
|   Field tmp; | ||||
|   RealD mass; | ||||
|  public: | ||||
|   virtual std::vector<int> Directions(void)   { return _Mat.Directions();}; | ||||
|   virtual std::vector<int> Displacements(void){ return _Mat.Displacements();}; | ||||
|   SchurStaggeredOperator (Matrix &Mat): _Mat(Mat), tmp(_Mat.RedBlackGrid())  | ||||
|   {  | ||||
|     assert( _Mat.isTrivialEE() ); | ||||
|   | ||||
| @@ -48,6 +48,8 @@ public: | ||||
|   virtual  void Mdiag    (const Field &in, Field &out)=0; | ||||
|   virtual  void Mdir     (const Field &in, Field &out,int dir, int disp)=0; | ||||
|   virtual  void MdirAll  (const Field &in, std::vector<Field> &out)=0; | ||||
|   virtual std::vector<int> Directions(void)   =0; | ||||
|   virtual std::vector<int> Displacements(void)=0; | ||||
| }; | ||||
|  | ||||
| ///////////////////////////////////////////////////////////////////////////////////////////// | ||||
| @@ -73,6 +75,8 @@ public: | ||||
|   virtual  void MooeeDag    (const Field &in, Field &out)=0; | ||||
|   virtual  void MooeeInvDag (const Field &in, Field &out)=0; | ||||
|  | ||||
|   virtual std::vector<int> Directions(void)   =0; | ||||
|   virtual std::vector<int> Displacements(void)=0; | ||||
| }; | ||||
|  | ||||
| NAMESPACE_END(Grid); | ||||
|   | ||||
| @@ -28,6 +28,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
| #ifndef GRID_ALGORITHMS_ITERATIVE_GENERIC_PCG | ||||
| #define GRID_ALGORITHMS_ITERATIVE_GENERIC_PCG | ||||
|  | ||||
| NAMESPACE_BEGIN(Grid); | ||||
|   /* | ||||
|    * Compared to Tang-2009:  P=Pleft. P^T = PRight Q=MssInv.  | ||||
|    * Script A = SolverMatrix  | ||||
| @@ -50,53 +51,54 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
|    * Vout = x | ||||
|    */ | ||||
|  | ||||
| // abstract base | ||||
| template<class Field, class CoarseField> | ||||
|  | ||||
| template<class Field, class CoarseField, class Aggregates> | ||||
| class TwoLevelFlexiblePcg : public LinearFunction<Field> | ||||
| { | ||||
|  public: | ||||
|  | ||||
|   int verbose; | ||||
|  | ||||
|   RealD   Tolerance; | ||||
|   Integer MaxIterations; | ||||
|   const int mmax = 5; | ||||
|   GridBase *grid; | ||||
|   GridBase *coarsegrid; | ||||
|   const int mmax = 4; | ||||
|   GridBase *FineGrid; | ||||
|   GridBase *CoarseGrid; | ||||
|  | ||||
|   LinearOperatorBase<Field>   *_Linop | ||||
|   OperatorFunction<Field>     *_Smoother, | ||||
|   LinearFunction<CoarseField> *_CoarseSolver; | ||||
|  | ||||
|   // Need somthing that knows how to get from Coarse to fine and back again | ||||
|   LinearOperatorBase<Field>   &_Linop; | ||||
|   LinearFunction<Field>     &_Smoother; | ||||
|   LinearFunction<CoarseField> &_CoarseSolver; | ||||
|   Aggregates                  &_Aggregates; | ||||
|    | ||||
|   // more most opertor functions | ||||
|   TwoLevelFlexiblePcg(RealD tol, | ||||
| 		      Integer maxit, | ||||
| 		      LinearOperatorBase<Field> *Linop, | ||||
| 		     LinearOperatorBase<Field> *SmootherLinop, | ||||
| 		     OperatorFunction<Field>   *Smoother, | ||||
| 		     OperatorFunction<CoarseField>  CoarseLinop | ||||
| 		      LinearFunction<Field>   *Smoother, | ||||
| 		      LinearFunction<CoarseField> *CoarseSolver, | ||||
| 		      Aggregates *AggP | ||||
| 		      ) :  | ||||
|   Tolerance(tol),  | ||||
|     MaxIterations(maxit), | ||||
|       _Linop(Linop), | ||||
|       _PreconditionerLinop(PrecLinop), | ||||
|       _Preconditioner(Preconditioner) | ||||
|     _Linop(*Linop), | ||||
|     _Smoother(*Smoother), | ||||
|     _CoarseSolver(*CoarseSolver), | ||||
|     _Aggregates(*AggP) | ||||
|   {  | ||||
|     CoarseGrid=_Aggregates.CoarseGrid; | ||||
|     FineGrid=_Aggregates.FineGrid; | ||||
|     verbose=0; | ||||
|   }; | ||||
|  | ||||
|   // The Pcg routine is common to all, but the various matrices differ from derived  | ||||
|   // implementation to derived implmentation | ||||
|   void operator() (const Field &src, Field &psi){ | ||||
|   void operator() (const Field &src, Field &psi){ | ||||
|  | ||||
|     psi.Checkerboard() = src.Checkerboard(); | ||||
|     grid             = src.Grid(); | ||||
|  | ||||
|     RealD f; | ||||
|     RealD rtzp,rtz,a,d,b; | ||||
|     RealD rptzp; | ||||
|     RealD tn; | ||||
|     //    RealD rptzp; | ||||
|     //    RealD tn; | ||||
|     RealD guess = norm2(psi); | ||||
|     RealD ssq   = norm2(src); | ||||
|     RealD rsq   = ssq*Tolerance*Tolerance; | ||||
| @@ -104,15 +106,15 @@ class TwoLevelFlexiblePcg : public LinearFunction<Field> | ||||
|     ///////////////////////////// | ||||
|     // Set up history vectors | ||||
|     ///////////////////////////// | ||||
|     std::vector<Field> p  (mmax,grid); | ||||
|     std::vector<Field> mmp(mmax,grid); | ||||
|     std::vector<Field> p  (mmax,FineGrid); | ||||
|     std::vector<Field> mmp(mmax,FineGrid); | ||||
|     std::vector<RealD> pAp(mmax); | ||||
|  | ||||
|     Field x  (grid); x = psi; | ||||
|     Field z  (grid); | ||||
|     Field tmp(grid); | ||||
|     Field r  (grid); | ||||
|     Field mu (grid); | ||||
|     Field x  (FineGrid); x = psi; | ||||
|     Field z  (FineGrid); | ||||
|     Field tmp(FineGrid); | ||||
|     Field r  (FineGrid); | ||||
|     Field mu (FineGrid); | ||||
|    | ||||
|     ////////////////////////// | ||||
|     // x0 = Vstart -- possibly modify guess | ||||
| @@ -121,13 +123,13 @@ class TwoLevelFlexiblePcg : public LinearFunction<Field> | ||||
|     Vstart(x,src); | ||||
|  | ||||
|     // r0 = b -A x0 | ||||
|     HermOp(x,mmp); // Shouldn't this be something else? | ||||
|     _Linop.HermOp(x,mmp[0]); // Shouldn't this be something else? | ||||
|     axpy (r, -1.0,mmp[0], src);    // Recomputes r=src-Ax0 | ||||
|  | ||||
|     ////////////////////////////////// | ||||
|     // Compute z = M1 x | ||||
|     ////////////////////////////////// | ||||
|     M1(r,z,tmp,mp,SmootherMirs); | ||||
|     M1(r,z); | ||||
|     rtzp =real(innerProduct(r,z)); | ||||
|  | ||||
|     /////////////////////////////////////// | ||||
| @@ -143,7 +145,7 @@ class TwoLevelFlexiblePcg : public LinearFunction<Field> | ||||
|       int peri_kp = (k+1) % mmax; | ||||
|  | ||||
|       rtz=rtzp; | ||||
|       d= M3(p[peri_k],mp,mmp[peri_k],tmp); | ||||
|       d= M3(p[peri_k],mmp[peri_k]); | ||||
|       a = rtz/d; | ||||
|      | ||||
|       // Memorise this | ||||
| @@ -153,13 +155,13 @@ class TwoLevelFlexiblePcg : public LinearFunction<Field> | ||||
|       RealD rn = axpy_norm(r,-a,mmp[peri_k],r); | ||||
|  | ||||
|       // Compute z = M x | ||||
|       M1(r,z,tmp,mp); | ||||
|       M1(r,z); | ||||
|  | ||||
|       rtzp =real(innerProduct(r,z)); | ||||
|  | ||||
|       M2(z,mu); // ADEF-2 this is identity. Axpy possible to eliminate | ||||
|  | ||||
|       p[peri_kp]=p[peri_k]; | ||||
|       p[peri_kp]=mu; | ||||
|  | ||||
|       // Standard search direction  p -> z + b p    ; b =  | ||||
|       b = (rtzp)/rtz; | ||||
| @@ -181,7 +183,7 @@ class TwoLevelFlexiblePcg : public LinearFunction<Field> | ||||
|       // Stopping condition | ||||
|       if ( rn <= rsq ) {  | ||||
|  | ||||
| 	HermOp(x,mmp); // Shouldn't this be something else? | ||||
| 	_Linop.HermOp(x,mmp[0]); // Shouldn't this be something else? | ||||
| 	axpy(tmp,-1.0,src,mmp[0]); | ||||
| 	 | ||||
| 	RealD psinorm = sqrt(norm2(x)); | ||||
| @@ -190,7 +192,8 @@ class TwoLevelFlexiblePcg : public LinearFunction<Field> | ||||
| 	RealD true_residual = tmpnorm/srcnorm; | ||||
| 	std::cout<<GridLogMessage<<"TwoLevelfPcg:   true residual is "<<true_residual<<std::endl; | ||||
| 	std::cout<<GridLogMessage<<"TwoLevelfPcg: target residual was"<<Tolerance<<std::endl; | ||||
| 	return k; | ||||
|  | ||||
| 	return; | ||||
|       } | ||||
|     } | ||||
|     // Non-convergence | ||||
| @@ -199,48 +202,40 @@ class TwoLevelFlexiblePcg : public LinearFunction<Field> | ||||
|  | ||||
|  public: | ||||
|  | ||||
|   virtual void M(Field & in,Field & out,Field & tmp) { | ||||
|  | ||||
|   } | ||||
|  | ||||
|   virtual void M1(Field & in, Field & out) {// the smoother | ||||
|  | ||||
|   virtual void M1(Field & in, Field & out)  | ||||
|   {// the smoother | ||||
|     // [PTM+Q] in = [1 - Q A] M in + Q in = Min + Q [ in -A Min] | ||||
|     Field tmp(grid); | ||||
|     Field Min(grid); | ||||
|     Field tmp(FineGrid); | ||||
|     Field Min(FineGrid); | ||||
|  | ||||
|     PcgM(in,Min); // Smoother call | ||||
|     CoarseField PleftProj(CoarseGrid); | ||||
|     CoarseField PleftMss_proj(CoarseGrid); | ||||
|  | ||||
|     HermOp(Min,out); | ||||
|     _Smoother(in,Min); // Smoother call | ||||
|  | ||||
|     _Linop.HermOp(Min,out); | ||||
|     axpy(tmp,-1.0,out,in);          // tmp  = in - A Min | ||||
|  | ||||
|     ProjectToSubspace(tmp,PleftProj);      | ||||
|     ApplyInverse(PleftProj,PleftMss_proj); // Ass^{-1} [in - A Min]_s | ||||
|     PromoteFromSubspace(PleftMss_proj,tmp);// tmp = Q[in - A Min]   | ||||
|     _Aggregates.ProjectToSubspace(PleftProj,tmp);      | ||||
|     _CoarseSolver(PleftProj,PleftMss_proj); // Ass^{-1} [in - A Min]_s | ||||
|     _Aggregates.PromoteFromSubspace(PleftMss_proj,tmp);// tmp = Q[in - A Min]   | ||||
|     axpy(out,1.0,Min,tmp); // Min+tmp | ||||
|   } | ||||
|  | ||||
|   virtual void M2(const Field & in, Field & out) { | ||||
|   virtual void M2(const Field & in, Field & out)  | ||||
|   { | ||||
|     out=in; | ||||
|     // Must override for Def2 only | ||||
|     //  case PcgDef2: | ||||
|     //    Pright(in,out); | ||||
|     //    break; | ||||
|   } | ||||
|  | ||||
|   virtual RealD M3(const Field & p, Field & mmp){ | ||||
|   virtual RealD M3(const Field & p, Field & mmp) | ||||
|   { | ||||
|     double d,dd; | ||||
|     HermOpAndNorm(p,mmp,d,dd); | ||||
|     _Linop.HermOpAndNorm(p,mmp,d,dd); | ||||
|     return dd; | ||||
|     // Must override for Def1 only | ||||
|     //  case PcgDef1: | ||||
|     //    d=linop_d->Mprec(p,mmp,tmp,0,1);// Dag no | ||||
|     //      linop_d->Mprec(mmp,mp,tmp,1);// Dag yes | ||||
|     //    Pleft(mp,mmp); | ||||
|     //    d=real(linop_d->inner(p,mmp)); | ||||
|   } | ||||
|  | ||||
|   virtual void VstartDef2(Field & xconst Field & src){ | ||||
|   virtual void Vstart(Field & x,const Field & src) | ||||
|   { | ||||
|     //case PcgDef2: | ||||
|     //case PcgAdef2:  | ||||
|     //case PcgAdef2f: | ||||
| @@ -256,142 +251,79 @@ class TwoLevelFlexiblePcg : public LinearFunction<Field> | ||||
|     //                   = src_s - (A guess)_s - src_s  + (A guess)_s  | ||||
|     //                   = 0  | ||||
|     /////////////////////////////////// | ||||
|     Field r(grid); | ||||
|     Field mmp(grid); | ||||
|     Field r(FineGrid); | ||||
|     Field mmp(FineGrid); | ||||
|  | ||||
|     HermOp(x,mmp); | ||||
|     CoarseField PleftProj(CoarseGrid); | ||||
|     CoarseField PleftMss_proj(CoarseGrid); | ||||
|      | ||||
|     _Linop.HermOp(x,mmp); | ||||
|     axpy (r, -1.0, mmp, src);        // r_{-1} = src - A x | ||||
|     ProjectToSubspace(r,PleftProj);      | ||||
|     ApplyInverseCG(PleftProj,PleftMss_proj); // Ass^{-1} r_s | ||||
|     PromoteFromSubspace(PleftMss_proj,mmp);   | ||||
|     _Aggregates.ProjectToSubspace(PleftProj,r);      | ||||
|     _CoarseSolver(PleftProj,PleftMss_proj); // Ass^{-1} r_s | ||||
|     _Aggregates.PromoteFromSubspace(PleftMss_proj,mmp);   | ||||
|     x=x+mmp; | ||||
|  | ||||
|   } | ||||
|  | ||||
|   virtual void Vstart(Field & x,const Field & src){ | ||||
|     return; | ||||
|   } | ||||
|  | ||||
|   ///////////////////////////////////////////////////////////////////// | ||||
|   // Only Def1 has non-trivial Vout. Override in Def1 | ||||
|   ///////////////////////////////////////////////////////////////////// | ||||
|   virtual void   Vout  (Field & in, Field & out,Field & src){ | ||||
|     out = in; | ||||
|     //case PcgDef1: | ||||
|     //    //Qb + PT x | ||||
|     //    ProjectToSubspace(src,PleftProj);      | ||||
|     //    ApplyInverse(PleftProj,PleftMss_proj); // Ass^{-1} r_s | ||||
|     //    PromoteFromSubspace(PleftMss_proj,tmp);   | ||||
|     //     | ||||
|     //    Pright(in,out); | ||||
|     //     | ||||
|     //    linop_d->axpy(out,tmp,out,1.0); | ||||
|     //    break; | ||||
|   } | ||||
|  | ||||
|   //////////////////////////////////////////////////////////////////////////////////////////////// | ||||
|   // Pright and Pleft are common to all implementations | ||||
|   //////////////////////////////////////////////////////////////////////////////////////////////// | ||||
|   virtual void Pright(Field & in,Field & out){ | ||||
|   virtual void Pright(Field & in,Field & out) | ||||
|   { | ||||
|     // P_R  = [ 1              0 ]  | ||||
|     //        [ -Mss^-1 Msb    0 ]  | ||||
|     Field in_sbar(grid); | ||||
|     Field in_sbar(FineGrid); | ||||
|  | ||||
|     ProjectToSubspace(in,PleftProj);      | ||||
|     PromoteFromSubspace(PleftProj,out);   | ||||
|     CoarseField PleftProj(CoarseGrid); | ||||
|     CoarseField PleftMss_proj(CoarseGrid); | ||||
|  | ||||
|     _Aggregates.ProjectToSubspace(PleftProj,in);      | ||||
|     _Aggregates.PromoteFromSubspace(PleftProj,out);   | ||||
|     axpy(in_sbar,-1.0,out,in);       // in_sbar = in - in_s  | ||||
|  | ||||
|     HermOp(in_sbar,out); | ||||
|     ProjectToSubspace(out,PleftProj);           // Mssbar in_sbar  (project) | ||||
|     _Linop.HermOp(in_sbar,out); | ||||
|     _Aggregates.ProjectToSubspace(PleftProj,out);           // Mssbar in_sbar  (project) | ||||
|  | ||||
|     ApplyInverse     (PleftProj,PleftMss_proj); // Mss^{-1} Mssbar  | ||||
|     PromoteFromSubspace(PleftMss_proj,out);     //  | ||||
|     _CoarseSolver(PleftProj,PleftMss_proj); // Mss^{-1} Mssbar  | ||||
|     _Aggregates.PromoteFromSubspace(PleftMss_proj,out);     //  | ||||
|  | ||||
|     axpy(out,-1.0,out,in_sbar);     // in_sbar - Mss^{-1} Mssbar in_sbar | ||||
|   } | ||||
|   virtual void Pleft (Field & in,Field & out){ | ||||
|   virtual void Pleft (Field & in,Field & out) | ||||
|   { | ||||
|     // P_L  = [ 1  -Mbs Mss^-1]  | ||||
|     //        [ 0   0         ]  | ||||
|     Field in_sbar(grid); | ||||
|     Field    tmp2(grid); | ||||
|     Field    Mtmp(grid); | ||||
|     Field in_sbar(FineGrid); | ||||
|     Field    tmp2(FineGrid); | ||||
|     Field    Mtmp(FineGrid); | ||||
|  | ||||
|     ProjectToSubspace(in,PleftProj);      | ||||
|     PromoteFromSubspace(PleftProj,out);   | ||||
|     CoarseField PleftProj(CoarseGrid); | ||||
|     CoarseField PleftMss_proj(CoarseGrid); | ||||
|  | ||||
|     _Aggregates.ProjectToSubspace(PleftProj,in);      | ||||
|     _Aggregates.PromoteFromSubspace(PleftProj,out);   | ||||
|     axpy(in_sbar,-1.0,out,in);      // in_sbar = in - in_s | ||||
|  | ||||
|     ApplyInverse(PleftProj,PleftMss_proj); // Mss^{-1} in_s | ||||
|     PromoteFromSubspace(PleftMss_proj,out); | ||||
|     _CoarseSolver(PleftProj,PleftMss_proj); // Mss^{-1} in_s | ||||
|     _Aggregates.PromoteFromSubspace(PleftMss_proj,out); | ||||
|  | ||||
|     HermOp(out,Mtmp); | ||||
|     _Linop.HermOp(out,Mtmp); | ||||
|  | ||||
|     ProjectToSubspace(Mtmp,PleftProj);      // Msbar s Mss^{-1} | ||||
|     PromoteFromSubspace(PleftProj,tmp2); | ||||
|     _Aggregates.ProjectToSubspace(PleftProj,Mtmp);      // Msbar s Mss^{-1} | ||||
|     _Aggregates.PromoteFromSubspace(PleftProj,tmp2); | ||||
|  | ||||
|     axpy(out,-1.0,tmp2,Mtmp); | ||||
|     axpy(out,-1.0,out,in_sbar);     // in_sbar - Msbars Mss^{-1} in_s | ||||
|   } | ||||
| } | ||||
| }; | ||||
| NAMESPACE_END(Grid); | ||||
|  | ||||
| template<class Field> | ||||
| class TwoLevelFlexiblePcgADef2 : public TwoLevelFlexiblePcg<Field> { | ||||
|  public: | ||||
|   virtual void M(Field & in,Field & out,Field & tmp){ | ||||
|  | ||||
|   }  | ||||
|   virtual void M1(Field & in, Field & out,Field & tmp,Field & mp){ | ||||
|  | ||||
|   } | ||||
|   virtual void M2(Field & in, Field & out){ | ||||
|  | ||||
|   } | ||||
|   virtual RealD M3(Field & p, Field & mp,Field & mmp, Field & tmp){ | ||||
|  | ||||
|   } | ||||
|   virtual void Vstart(Field & in, Field & src, Field & r, Field & mp, Field & mmp, Field & tmp){ | ||||
|  | ||||
|   } | ||||
| } | ||||
| /* | ||||
| template<class Field> | ||||
| class TwoLevelFlexiblePcgAD : public TwoLevelFlexiblePcg<Field> { | ||||
|  public: | ||||
|   virtual void M(Field & in,Field & out,Field & tmp);  | ||||
|   virtual void M1(Field & in, Field & out,Field & tmp,Field & mp); | ||||
|   virtual void M2(Field & in, Field & out); | ||||
|   virtual RealD M3(Field & p, Field & mp,Field & mmp, Field & tmp); | ||||
|   virtual void Vstart(Field & in, Field & src, Field & r, Field & mp, Field & mmp, Field & tmp); | ||||
| } | ||||
|  | ||||
| template<class Field> | ||||
| class TwoLevelFlexiblePcgDef1 : public TwoLevelFlexiblePcg<Field> { | ||||
|  public: | ||||
|   virtual void M(Field & in,Field & out,Field & tmp);  | ||||
|   virtual void M1(Field & in, Field & out,Field & tmp,Field & mp); | ||||
|   virtual void M2(Field & in, Field & out); | ||||
|   virtual RealD M3(Field & p, Field & mp,Field & mmp, Field & tmp); | ||||
|   virtual void Vstart(Field & in, Field & src, Field & r, Field & mp, Field & mmp, Field & tmp); | ||||
|   virtual void   Vout  (Field & in, Field & out,Field & src,Field & tmp); | ||||
| } | ||||
|  | ||||
| template<class Field> | ||||
| class TwoLevelFlexiblePcgDef2 : public TwoLevelFlexiblePcg<Field> { | ||||
|  public: | ||||
|   virtual void M(Field & in,Field & out,Field & tmp);  | ||||
|   virtual void M1(Field & in, Field & out,Field & tmp,Field & mp); | ||||
|   virtual void M2(Field & in, Field & out); | ||||
|   virtual RealD M3(Field & p, Field & mp,Field & mmp, Field & tmp); | ||||
|   virtual void Vstart(Field & in, Field & src, Field & r, Field & mp, Field & mmp, Field & tmp); | ||||
| } | ||||
|  | ||||
| template<class Field> | ||||
| class TwoLevelFlexiblePcgV11: public TwoLevelFlexiblePcg<Field> { | ||||
|  public: | ||||
|   virtual void M(Field & in,Field & out,Field & tmp);  | ||||
|   virtual void M1(Field & in, Field & out,Field & tmp,Field & mp); | ||||
|   virtual void M2(Field & in, Field & out); | ||||
|   virtual RealD M3(Field & p, Field & mp,Field & mmp, Field & tmp); | ||||
|   virtual void Vstart(Field & in, Field & src, Field & r, Field & mp, Field & mmp, Field & tmp); | ||||
| } | ||||
| */ | ||||
| #endif | ||||
|   | ||||
| @@ -60,6 +60,8 @@ public: | ||||
|   DeflatedGuesser(const std::vector<Field> & _evec,const std::vector<RealD> & _eval) : evec(_evec), eval(_eval) {}; | ||||
|  | ||||
|   virtual void operator()(const Field &src,Field &guess) { | ||||
|     RealD t=-usecond(); | ||||
|      | ||||
|     guess = Zero(); | ||||
|     assert(evec.size()==eval.size()); | ||||
|     auto N = evec.size(); | ||||
| @@ -68,6 +70,8 @@ public: | ||||
|       axpy(guess,TensorRemove(innerProduct(tmp,src)) / eval[i],tmp,guess); | ||||
|     } | ||||
|     guess.Checkerboard() = src.Checkerboard(); | ||||
|     t+=usecond(); | ||||
|     std::cout<<GridLogMessage<<"\t\t\t" << "Deflated guess took "<< t/1000.0<< "ms" <<std::endl; | ||||
|   } | ||||
| }; | ||||
|  | ||||
|   | ||||
| @@ -59,7 +59,7 @@ public: | ||||
|     GridBase *grid = src.Grid(); | ||||
|     Field r(grid),  p(grid), Ap(grid), Ar(grid), z(grid); | ||||
|        | ||||
|     psi=zero; | ||||
|     psi=Zero(); | ||||
|     r  = src; | ||||
|     Preconditioner(r,p); | ||||
|  | ||||
|   | ||||
| @@ -53,7 +53,11 @@ public: | ||||
|   {  | ||||
|     size_type bytes = __n*sizeof(_Tp); | ||||
|     profilerAllocate(bytes); | ||||
| #ifdef GRID_UVM | ||||
|     _Tp *ptr = (_Tp*) MemoryManager::SharedAllocate(bytes); | ||||
| #else  | ||||
|     _Tp *ptr = (_Tp*) MemoryManager::CpuAllocate(bytes); | ||||
| #endif | ||||
|     assert( ( (_Tp*)ptr != (_Tp *)NULL ) ); | ||||
|     return ptr; | ||||
|   } | ||||
| @@ -62,7 +66,11 @@ public: | ||||
|   {  | ||||
|     size_type bytes = __n * sizeof(_Tp); | ||||
|     profilerFree(bytes); | ||||
| #ifdef GRID_UVM | ||||
|     MemoryManager::SharedFree((void *)__p,bytes); | ||||
| #else | ||||
|     MemoryManager::CpuFree((void *)__p,bytes); | ||||
| #endif | ||||
|   } | ||||
|  | ||||
|   // FIXME: hack for the copy constructor: it must be avoided to avoid single thread loop | ||||
|   | ||||
| @@ -9,11 +9,13 @@ NAMESPACE_BEGIN(Grid); | ||||
| #define AccSmall (3) | ||||
| #define Shared   (4) | ||||
| #define SharedSmall (5) | ||||
| uint64_t total_cache; | ||||
| uint64_t total_shared; | ||||
| uint64_t total_device; | ||||
| uint64_t total_host;; | ||||
| void MemoryManager::PrintBytes(void) | ||||
| { | ||||
|   std::cout << " MemoryManager : "<<total_cache <<" cache       bytes "<<std::endl; | ||||
|   std::cout << " MemoryManager : "<<total_shared<<" shared      bytes "<<std::endl; | ||||
|   std::cout << " MemoryManager : "<<total_device<<" accelerator bytes "<<std::endl; | ||||
|   std::cout << " MemoryManager : "<<total_host  <<" cpu         bytes "<<std::endl; | ||||
| @@ -35,6 +37,8 @@ void *MemoryManager::AcceleratorAllocate(size_t bytes) | ||||
|   if ( ptr == (void *) NULL ) { | ||||
|     ptr = (void *) acceleratorAllocDevice(bytes); | ||||
|     total_device+=bytes; | ||||
|   } else { | ||||
|     //    std::cout <<"AcceleratorAllocate: cache hit Device pointer "<<std::hex<<ptr<<std::dec<<" "<<bytes<<std::endl; | ||||
|   } | ||||
|   return ptr; | ||||
| } | ||||
| @@ -53,8 +57,10 @@ void *MemoryManager::SharedAllocate(size_t bytes) | ||||
|   if ( ptr == (void *) NULL ) { | ||||
|     ptr = (void *) acceleratorAllocShared(bytes); | ||||
|     total_shared+=bytes; | ||||
|     //    std::cout <<"AcceleratorAllocate: allocated Shared pointer "<<std::hex<<ptr<<std::dec<<std::endl; | ||||
|     //    std::cout <<"SharedAllocate: allocated Shared pointer "<<std::hex<<ptr<<std::dec<<std::endl; | ||||
|     //    PrintBytes(); | ||||
|   } else { | ||||
|     //    std::cout <<"SharedAllocate: cache hit Shared pointer "<<std::hex<<ptr<<std::dec<<" "<<bytes<<std::endl; | ||||
|   } | ||||
|   return ptr; | ||||
| } | ||||
| @@ -74,6 +80,9 @@ void *MemoryManager::CpuAllocate(size_t bytes) | ||||
|   if ( ptr == (void *) NULL ) { | ||||
|     ptr = (void *) acceleratorAllocShared(bytes); | ||||
|     total_host+=bytes; | ||||
|     //    std::cout <<"CpuAllocate: allocated Cpu pointer "<<std::hex<<ptr<<std::dec<<std::endl; | ||||
|   } else { | ||||
|     //    std::cout <<"CpufAllocate: cache hit Cpu pointer "<<std::hex<<ptr<<std::dec<<" "<<bytes<<std::endl; | ||||
|   } | ||||
|   return ptr; | ||||
| } | ||||
| @@ -120,7 +129,7 @@ void MemoryManager::Init(void) | ||||
|   str= getenv("GRID_ALLOC_NCACHE_LARGE"); | ||||
|   if ( str ) { | ||||
|     Nc = atoi(str); | ||||
|     if ( (Nc>=0) && (Nc < NallocCacheMax)) { | ||||
|     if ( (Nc>=0) && (Nc <= NallocCacheMax)) { | ||||
|       Ncache[Cpu]=Nc; | ||||
|       Ncache[Acc]=Nc; | ||||
|       Ncache[Shared]=Nc; | ||||
| @@ -130,7 +139,7 @@ void MemoryManager::Init(void) | ||||
|   str= getenv("GRID_ALLOC_NCACHE_SMALL"); | ||||
|   if ( str ) { | ||||
|     Nc = atoi(str); | ||||
|     if ( (Nc>=0) && (Nc < NallocCacheMax)) { | ||||
|     if ( (Nc>=0) && (Nc <= NallocCacheMax)) { | ||||
|       Ncache[CpuSmall]=Nc; | ||||
|       Ncache[AccSmall]=Nc; | ||||
|       Ncache[SharedSmall]=Nc; | ||||
| @@ -211,6 +220,7 @@ void *MemoryManager::Insert(void *ptr,size_t bytes,AllocationCacheEntry *entries | ||||
|  | ||||
|   if ( entries[v].valid ) { | ||||
|     ret = entries[v].address; | ||||
|     total_cache-=entries[v].bytes; | ||||
|     entries[v].valid = 0; | ||||
|     entries[v].address = NULL; | ||||
|     entries[v].bytes = 0; | ||||
| @@ -219,6 +229,7 @@ void *MemoryManager::Insert(void *ptr,size_t bytes,AllocationCacheEntry *entries | ||||
|   entries[v].address=ptr; | ||||
|   entries[v].bytes  =bytes; | ||||
|   entries[v].valid  =1; | ||||
|   total_cache+=entries[v].bytes; | ||||
|  | ||||
|   return ret; | ||||
| } | ||||
| @@ -243,6 +254,7 @@ void *MemoryManager::Lookup(size_t bytes,AllocationCacheEntry *entries,int ncach | ||||
|   for(int e=0;e<ncache;e++){ | ||||
|     if ( entries[e].valid && ( entries[e].bytes == bytes ) ) { | ||||
|       entries[e].valid = 0; | ||||
|       total_cache-=bytes; | ||||
|       return entries[e].address; | ||||
|     } | ||||
|   } | ||||
|   | ||||
| @@ -93,8 +93,8 @@ private: | ||||
|   static void *Insert(void *ptr,size_t bytes,AllocationCacheEntry *entries,int ncache,int &victim) ; | ||||
|   static void *Lookup(size_t bytes,AllocationCacheEntry *entries,int ncache) ; | ||||
|  | ||||
|   static void PrintBytes(void); | ||||
|  public: | ||||
|   static void PrintBytes(void); | ||||
|   static void Init(void); | ||||
|   static void InitMessage(void); | ||||
|   static void *AcceleratorAllocate(size_t bytes); | ||||
|   | ||||
| @@ -36,7 +36,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
| #include <Grid/lattice/Lattice_local.h> | ||||
| #include <Grid/lattice/Lattice_reduction.h> | ||||
| #include <Grid/lattice/Lattice_peekpoke.h> | ||||
| //#include <Grid/lattice/Lattice_reality.h> | ||||
| #include <Grid/lattice/Lattice_reality.h> | ||||
| #include <Grid/lattice/Lattice_real_imag.h> | ||||
| #include <Grid/lattice/Lattice_comparison_utils.h> | ||||
| #include <Grid/lattice/Lattice_comparison.h> | ||||
|   | ||||
| @@ -52,6 +52,7 @@ public: | ||||
| // This will be safe to call from accelerator_for and is trivially copy constructible | ||||
| // The copy constructor for this will need to be used by device lambda functions | ||||
| ///////////////////////////////////////////////////////////////////////////////////////// | ||||
| #undef LATTICE_BOUNDS_CHECK | ||||
| template<class vobj>  | ||||
| class LatticeView : public LatticeAccelerator<vobj> | ||||
| { | ||||
| @@ -61,14 +62,36 @@ public: | ||||
|   void * cpu_ptr; | ||||
| #ifdef GRID_SIMT | ||||
|   accelerator_inline const typename vobj::scalar_object operator()(size_t i) const {  | ||||
| #ifdef LATTICE_BOUNDS_CHECK | ||||
|     assert(i<this->_odata_size); | ||||
|     assert(i>=0); | ||||
| #endif | ||||
|     return coalescedRead(this->_odata[i]);  | ||||
|   } | ||||
| #else  | ||||
|   accelerator_inline const vobj & operator()(size_t i) const { return this->_odata[i]; } | ||||
|   accelerator_inline const vobj & operator()(size_t i) const { | ||||
| #ifdef LATTICE_BOUNDS_CHECK | ||||
|     assert(i<this->_odata_size); | ||||
|     assert(i>=0); | ||||
| #endif | ||||
|     return this->_odata[i]; | ||||
|   } | ||||
| #endif | ||||
|  | ||||
|   accelerator_inline const vobj & operator[](size_t i) const { return this->_odata[i]; }; | ||||
|   accelerator_inline vobj       & operator[](size_t i)       { return this->_odata[i]; }; | ||||
|   accelerator_inline const vobj & operator[](size_t i) const {  | ||||
| #ifdef LATTICE_BOUNDS_CHECK | ||||
|     assert(i<this->_odata_size); | ||||
|     assert(i>=0); | ||||
| #endif | ||||
|     return this->_odata[i];  | ||||
|   }; | ||||
|   accelerator_inline vobj       & operator[](size_t i)       {  | ||||
| #ifdef LATTICE_BOUNDS_CHECK | ||||
|     assert(i<this->_odata_size); | ||||
|     assert(i>=0); | ||||
| #endif | ||||
|     return this->_odata[i];  | ||||
|   }; | ||||
|  | ||||
|   accelerator_inline uint64_t begin(void) const { return 0;}; | ||||
|   accelerator_inline uint64_t end(void)   const { return this->_odata_size; }; | ||||
|   | ||||
| @@ -77,9 +77,16 @@ const int SpinorIndex = 2; | ||||
| template<typename T> struct isSpinor { | ||||
|   static constexpr bool value = (SpinorIndex==T::TensorLevel); | ||||
| }; | ||||
| const int CoarseIndex = 4; | ||||
| template<typename T> struct isCoarsened { | ||||
|   static constexpr bool value = (CoarseIndex<=T::TensorLevel); | ||||
| }; | ||||
| template <typename T> using IfSpinor    = Invoke<std::enable_if< isSpinor<T>::value,int> > ; | ||||
| template <typename T> using IfNotSpinor = Invoke<std::enable_if<!isSpinor<T>::value,int> > ; | ||||
|  | ||||
| template <typename T> using IfCoarsened    = Invoke<std::enable_if< isCoarsened<T>::value,int> > ; | ||||
| template <typename T> using IfNotCoarsened = Invoke<std::enable_if<!isCoarsened<T>::value,int> > ; | ||||
|  | ||||
| // ChrisK very keen to add extra space for Gparity doubling. | ||||
| // | ||||
| // Also add domain wall index, in a way where Wilson operator  | ||||
|   | ||||
| @@ -89,7 +89,8 @@ public: | ||||
|   virtual void  Mdiag  (const FermionField &in, FermionField &out) { Mooee(in,out);};   // Same as Mooee applied to both CB's | ||||
|   virtual void  Mdir   (const FermionField &in, FermionField &out,int dir,int disp)=0;   // case by case Wilson, Clover, Cayley, ContFrac, PartFrac | ||||
|   virtual void  MdirAll(const FermionField &in, std::vector<FermionField> &out)=0;   // case by case Wilson, Clover, Cayley, ContFrac, PartFrac | ||||
|  | ||||
|   virtual std::vector<int> Directions(void)   =0; | ||||
|   virtual std::vector<int> Displacements(void)=0; | ||||
|  | ||||
|   virtual void  MomentumSpacePropagator(FermionField &out,const FermionField &in,RealD _m,std::vector<double> twist) { assert(0);}; | ||||
|  | ||||
|   | ||||
| @@ -44,6 +44,9 @@ public: | ||||
|   INHERIT_IMPL_TYPES(Impl); | ||||
|   typedef StaggeredKernels<Impl> Kernels; | ||||
|  | ||||
|   virtual std::vector<int> Directions(void)   { return this->directions; }; | ||||
|   virtual std::vector<int> Displacements(void){ return this->displacements;}; | ||||
|  | ||||
|   FermionField _tmp; | ||||
|   FermionField &tmp(void) { return _tmp; } | ||||
|  | ||||
|   | ||||
| @@ -49,6 +49,9 @@ public: | ||||
|   INHERIT_IMPL_TYPES(Impl); | ||||
|   typedef StaggeredKernels<Impl> Kernels; | ||||
|  | ||||
|   virtual std::vector<int> Directions(void)   { return this->directions; }; | ||||
|   virtual std::vector<int> Displacements(void){ return this->displacements;}; | ||||
|  | ||||
|   FermionField _tmp; | ||||
|   FermionField &tmp(void) { return _tmp; } | ||||
|  | ||||
|   | ||||
| @@ -47,6 +47,9 @@ public: | ||||
|   FermionField _tmp; | ||||
|   FermionField &tmp(void) { return _tmp; } | ||||
|  | ||||
|   virtual std::vector<int> Directions(void)   { return this->directions; }; | ||||
|   virtual std::vector<int> Displacements(void){ return this->displacements;}; | ||||
|  | ||||
|   //////////////////////////////////////// | ||||
|   // Performance monitoring | ||||
|   //////////////////////////////////////// | ||||
|   | ||||
| @@ -63,6 +63,9 @@ public: | ||||
|   INHERIT_IMPL_TYPES(Impl); | ||||
|   typedef WilsonKernels<Impl> Kernels; | ||||
|  | ||||
|   virtual std::vector<int> Directions(void)   { return this->directions; }; | ||||
|   virtual std::vector<int> Displacements(void){ return this->displacements;}; | ||||
|  | ||||
|   /////////////////////////////////////////////////////////////// | ||||
|   // Implement the abstract base | ||||
|   /////////////////////////////////////////////////////////////// | ||||
|   | ||||
| @@ -72,6 +72,9 @@ public: | ||||
|   typedef WilsonKernels<Impl> Kernels; | ||||
|   PmuStat stat; | ||||
|  | ||||
|   virtual std::vector<int> Directions(void)   { return this->directions; }; | ||||
|   virtual std::vector<int> Displacements(void){ return this->displacements;}; | ||||
|  | ||||
|   FermionField _tmp; | ||||
|   FermionField &tmp(void) { return _tmp; } | ||||
|  | ||||
|   | ||||
| @@ -79,6 +79,8 @@ public: | ||||
|     _Mat.M(in,tmp); | ||||
|     G5R5(out,tmp); | ||||
|   } | ||||
|   virtual std::vector<int> Directions(void)   { return _Mat.Directions();}; | ||||
|   virtual std::vector<int> Displacements(void){ return _Mat.Displacements();}; | ||||
| }; | ||||
|  | ||||
|  | ||||
| @@ -127,6 +129,8 @@ public: | ||||
|     _Mat.M(in,tmp); | ||||
|     out=g5*tmp; | ||||
|   } | ||||
|   virtual std::vector<int> Directions(void)   { return _Mat.Directions();}; | ||||
|   virtual std::vector<int> Displacements(void){ return _Mat.Displacements();}; | ||||
| }; | ||||
|  | ||||
| NAMESPACE_END(Grid); | ||||
|   | ||||
| @@ -133,14 +133,14 @@ void WilsonCloverFermion<Impl>::ImportGauge(const GaugeField &_Umu) | ||||
|   pickCheckerboard(Even, CloverTermEven, CloverTerm); | ||||
|   pickCheckerboard(Odd, CloverTermOdd, CloverTerm); | ||||
|  | ||||
|   pickCheckerboard(Even, CloverTermDagEven, closure(adj(CloverTerm))); | ||||
|   pickCheckerboard(Odd, CloverTermDagOdd, closure(adj(CloverTerm))); | ||||
|   pickCheckerboard(Even, CloverTermDagEven, adj(CloverTerm)); | ||||
|   pickCheckerboard(Odd, CloverTermDagOdd, adj(CloverTerm)); | ||||
|  | ||||
|   pickCheckerboard(Even, CloverTermInvEven, CloverTermInv); | ||||
|   pickCheckerboard(Odd, CloverTermInvOdd, CloverTermInv); | ||||
|  | ||||
|   pickCheckerboard(Even, CloverTermInvDagEven, closure(adj(CloverTermInv))); | ||||
|   pickCheckerboard(Odd, CloverTermInvDagOdd, closure(adj(CloverTermInv))); | ||||
|   pickCheckerboard(Even, CloverTermInvDagEven, adj(CloverTermInv)); | ||||
|   pickCheckerboard(Odd, CloverTermInvDagOdd, adj(CloverTermInv)); | ||||
| } | ||||
|  | ||||
| template <class Impl> | ||||
|   | ||||
| @@ -128,7 +128,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s | ||||
| } | ||||
| template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spProjTm (iVector<vtype,Nhs> &hspin,const iVector<vtype,Ns> &fspin) | ||||
| { | ||||
|   //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE; | ||||
|   hspin(0)=fspin(0)-fspin(2); | ||||
|   hspin(1)=fspin(1)-fspin(3); | ||||
| } | ||||
| @@ -138,40 +137,50 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s | ||||
|  *  0 0 -1  0 | ||||
|  *  0 0  0 -1 | ||||
|  */ | ||||
|  | ||||
| template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spProj5p (iVector<vtype,Nhs> &hspin,const iVector<vtype,Ns> &fspin) | ||||
| { | ||||
|   //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE; | ||||
|   hspin(0)=fspin(0); | ||||
|   hspin(1)=fspin(1); | ||||
| } | ||||
|  | ||||
| template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spProj5m (iVector<vtype,Nhs> &hspin,const iVector<vtype,Ns> &fspin) | ||||
| { | ||||
|   //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE; | ||||
|   hspin(0)=fspin(2); | ||||
|   hspin(1)=fspin(3); | ||||
| } | ||||
|    | ||||
| //  template<class vtype> accelerator_inline void fspProj5p (iVector<vtype,Ns> &rfspin,const iVector<vtype,Ns> &fspin) | ||||
| template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spProj5p (iVector<vtype,Ns> &rfspin,const iVector<vtype,Ns> &fspin) | ||||
| { | ||||
|   //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE; | ||||
|   rfspin(0)=fspin(0); | ||||
|   rfspin(1)=fspin(1); | ||||
|   rfspin(2)=Zero(); | ||||
|   rfspin(3)=Zero(); | ||||
| } | ||||
| //  template<class vtype> accelerator_inline void fspProj5m (iVector<vtype,Ns> &rfspin,const iVector<vtype,Ns> &fspin) | ||||
| template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spProj5m (iVector<vtype,Ns> &rfspin,const iVector<vtype,Ns> &fspin) | ||||
| { | ||||
|   //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE; | ||||
|   rfspin(0)=Zero(); | ||||
|   rfspin(1)=Zero(); | ||||
|   rfspin(2)=fspin(2); | ||||
|   rfspin(3)=fspin(3); | ||||
| } | ||||
|  | ||||
| template<class vtype,int N,IfCoarsened<iVector<vtype,N> > = 0> accelerator_inline void spProj5p (iVector<vtype,N> &rfspin,const iVector<vtype,N> &fspin) | ||||
| { | ||||
|   const int hN = N>>1; | ||||
|   for(int s=0;s<hN;s++){ | ||||
|     rfspin(s)=fspin(s); | ||||
|     rfspin(s+hN)=Zero(); | ||||
|   } | ||||
| } | ||||
| template<class vtype,int N,IfCoarsened<iVector<vtype,N> > = 0> accelerator_inline void spProj5m (iVector<vtype,N> &rfspin,const iVector<vtype,N> &fspin) | ||||
| { | ||||
|   const int hN = N>>1; | ||||
|   for(int s=0;s<hN;s++){ | ||||
|     rfspin(s)=Zero(); | ||||
|     rfspin(s+hN)=fspin(s+hN); | ||||
|   } | ||||
| } | ||||
|  | ||||
| //////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
| // Reconstruction routines to move back again to four spin | ||||
| //////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
| @@ -183,7 +192,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s | ||||
|  */ | ||||
| template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spReconXp (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin) | ||||
| { | ||||
|   //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE; | ||||
|   fspin(0)=hspin(0); | ||||
|   fspin(1)=hspin(1); | ||||
|   fspin(2)=timesMinusI(hspin(1)); | ||||
| @@ -191,7 +199,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s | ||||
| } | ||||
| template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spReconXm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin) | ||||
| { | ||||
|   //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE; | ||||
|   fspin(0)=hspin(0); | ||||
|   fspin(1)=hspin(1); | ||||
|   fspin(2)=timesI(hspin(1)); | ||||
| @@ -199,7 +206,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s | ||||
| } | ||||
| template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumReconXp (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin) | ||||
| { | ||||
|   //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE; | ||||
|   fspin(0)+=hspin(0); | ||||
|   fspin(1)+=hspin(1); | ||||
|   fspin(2)-=timesI(hspin(1)); | ||||
| @@ -207,7 +213,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void a | ||||
| } | ||||
| template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumReconXm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin) | ||||
| { | ||||
|   //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE; | ||||
|   fspin(0)+=hspin(0); | ||||
|   fspin(1)+=hspin(1); | ||||
|   fspin(2)+=timesI(hspin(1)); | ||||
| @@ -221,7 +226,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void a | ||||
|  | ||||
| template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spReconYp (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin) | ||||
| { | ||||
|   //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE; | ||||
|   fspin(0)=hspin(0); | ||||
|   fspin(1)=hspin(1); | ||||
|   fspin(2)= hspin(1); | ||||
| @@ -229,7 +233,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s | ||||
| } | ||||
| template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spReconYm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin) | ||||
| { | ||||
|   //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE; | ||||
|   fspin(0)=hspin(0); | ||||
|   fspin(1)=hspin(1); | ||||
|   fspin(2)=-hspin(1); | ||||
| @@ -237,7 +240,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s | ||||
| } | ||||
| template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumReconYp (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin) | ||||
| { | ||||
|   //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE; | ||||
|   fspin(0)+=hspin(0); | ||||
|   fspin(1)+=hspin(1); | ||||
|   fspin(2)+=hspin(1); | ||||
| @@ -245,7 +247,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void a | ||||
| } | ||||
| template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumReconYm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin) | ||||
| { | ||||
|   //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE; | ||||
|   fspin(0)+=hspin(0); | ||||
|   fspin(1)+=hspin(1); | ||||
|   fspin(2)-=hspin(1); | ||||
| @@ -260,7 +261,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void a | ||||
|  */ | ||||
| template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spReconZp (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin) | ||||
| { | ||||
|   //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE; | ||||
|   fspin(0)=hspin(0); | ||||
|   fspin(1)=hspin(1); | ||||
|   fspin(2)=timesMinusI(hspin(0)); | ||||
| @@ -268,7 +268,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s | ||||
| } | ||||
| template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spReconZm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin) | ||||
| { | ||||
|   //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE; | ||||
|   fspin(0)=hspin(0); | ||||
|   fspin(1)=hspin(1); | ||||
|   fspin(2)=     timesI(hspin(0)); | ||||
| @@ -276,7 +275,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s | ||||
| } | ||||
| template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumReconZp (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin) | ||||
| { | ||||
|   //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE; | ||||
|   fspin(0)+=hspin(0); | ||||
|   fspin(1)+=hspin(1); | ||||
|   fspin(2)-=timesI(hspin(0)); | ||||
| @@ -284,7 +282,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void a | ||||
| } | ||||
| template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumReconZm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin) | ||||
| { | ||||
|   //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE; | ||||
|   fspin(0)+=hspin(0); | ||||
|   fspin(1)+=hspin(1); | ||||
|   fspin(2)+=timesI(hspin(0)); | ||||
| @@ -298,7 +295,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void a | ||||
|  */ | ||||
| template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spReconTp (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin) | ||||
| { | ||||
|   //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE; | ||||
|   fspin(0)=hspin(0); | ||||
|   fspin(1)=hspin(1); | ||||
|   fspin(2)=hspin(0); | ||||
| @@ -306,7 +302,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s | ||||
| } | ||||
| template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spReconTm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin) | ||||
| { | ||||
|   //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE; | ||||
|   fspin(0)=hspin(0); | ||||
|   fspin(1)=hspin(1); | ||||
|   fspin(2)=-hspin(0); | ||||
| @@ -314,7 +309,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s | ||||
| } | ||||
| template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumReconTp (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin) | ||||
| { | ||||
|   //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE; | ||||
|   fspin(0)+=hspin(0); | ||||
|   fspin(1)+=hspin(1); | ||||
|   fspin(2)+=hspin(0); | ||||
| @@ -322,7 +316,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void a | ||||
| } | ||||
| template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumReconTm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin) | ||||
| { | ||||
|   //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE; | ||||
|   fspin(0)+=hspin(0); | ||||
|   fspin(1)+=hspin(1); | ||||
|   fspin(2)-=hspin(0); | ||||
| @@ -336,7 +329,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void a | ||||
|  */ | ||||
| template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spRecon5p (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin) | ||||
| { | ||||
|   //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE; | ||||
|   fspin(0)=hspin(0)+hspin(0); // add is lower latency than mul | ||||
|   fspin(1)=hspin(1)+hspin(1); // probably no measurable diffence though | ||||
|   fspin(2)=Zero(); | ||||
| @@ -344,7 +336,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s | ||||
| } | ||||
| template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spRecon5m (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin) | ||||
| { | ||||
|   //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE; | ||||
|   fspin(0)=Zero(); | ||||
|   fspin(1)=Zero(); | ||||
|   fspin(2)=hspin(0)+hspin(0); | ||||
| @@ -352,7 +343,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s | ||||
| } | ||||
| template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumRecon5p (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin) | ||||
| { | ||||
|   //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE; | ||||
|   fspin(0)+=hspin(0)+hspin(0); | ||||
|   fspin(1)+=hspin(1)+hspin(1); | ||||
| } | ||||
| @@ -372,7 +362,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void a | ||||
| ////////// | ||||
| template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProjXp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin) | ||||
| { | ||||
|   //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp; | ||||
|   for(int i=0;i<N;i++) { | ||||
|     spProjXp(hspin._internal[i],fspin._internal[i]); | ||||
|   } | ||||
| @@ -426,26 +415,21 @@ template<class rtype,class vtype,int N> accelerator_inline void accumReconXp (iM | ||||
|     }} | ||||
| } | ||||
|  | ||||
|  | ||||
|  | ||||
| //////// | ||||
| // Xm | ||||
| //////// | ||||
| template<class rtype,class vtype> accelerator_inline void spProjXm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin) | ||||
| { | ||||
|   //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp; | ||||
|   spProjXm(hspin._internal,fspin._internal); | ||||
| } | ||||
| template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProjXm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin) | ||||
| { | ||||
|   //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp; | ||||
|   for(int i=0;i<N;i++) { | ||||
|     spProjXm(hspin._internal[i],fspin._internal[i]); | ||||
|   } | ||||
| } | ||||
| template<class rtype,class vtype,int N> accelerator_inline void spProjXm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin) | ||||
| { | ||||
|   //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp; | ||||
|   for(int i=0;i<N;i++){  | ||||
|     for(int j=0;j<N;j++){ | ||||
|       spProjXm(hspin._internal[i][j],fspin._internal[i][j]); | ||||
| @@ -455,19 +439,16 @@ template<class rtype,class vtype,int N> accelerator_inline void spProjXm (iMatri | ||||
|  | ||||
| template<class rtype,class vtype> accelerator_inline void spReconXm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin) | ||||
| { | ||||
|   //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp; | ||||
|   spReconXm(hspin._internal,fspin._internal); | ||||
| } | ||||
| template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spReconXm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin) | ||||
| { | ||||
|   //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp; | ||||
|   for(int i=0;i<N;i++) { | ||||
|     spReconXm(hspin._internal[i],fspin._internal[i]); | ||||
|   } | ||||
| } | ||||
| template<class rtype,class vtype,int N> accelerator_inline void spReconXm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin) | ||||
| { | ||||
|   //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp; | ||||
|   for(int i=0;i<N;i++){  | ||||
|     for(int j=0;j<N;j++){ | ||||
|       spReconXm(hspin._internal[i][j],fspin._internal[i][j]); | ||||
| @@ -476,45 +457,37 @@ template<class rtype,class vtype,int N> accelerator_inline void spReconXm (iMatr | ||||
|  | ||||
| template<class rtype,class vtype> accelerator_inline void accumReconXm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin) | ||||
| { | ||||
|   //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp; | ||||
|   accumReconXm(hspin._internal,fspin._internal); | ||||
| } | ||||
| template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void accumReconXm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin) | ||||
| { | ||||
|   //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp; | ||||
|   for(int i=0;i<N;i++) { | ||||
|     accumReconXm(hspin._internal[i],fspin._internal[i]); | ||||
|   } | ||||
| } | ||||
| template<class rtype,class vtype,int N> accelerator_inline void accumReconXm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin) | ||||
| { | ||||
|   //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp; | ||||
|   for(int i=0;i<N;i++){  | ||||
|     for(int j=0;j<N;j++){ | ||||
|       accumReconXm(hspin._internal[i][j],fspin._internal[i][j]); | ||||
|     }} | ||||
| } | ||||
|  | ||||
|  | ||||
|  | ||||
| //////// | ||||
| // Yp | ||||
| //////// | ||||
| template<class rtype,class vtype> accelerator_inline void spProjYp (iScalar<rtype> &hspin,const iScalar<vtype> &fspin) | ||||
| { | ||||
|   //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp; | ||||
|   spProjYp(hspin._internal,fspin._internal); | ||||
| } | ||||
| template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProjYp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin) | ||||
| { | ||||
|   //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp; | ||||
|   for(int i=0;i<N;i++) { | ||||
|     spProjYp(hspin._internal[i],fspin._internal[i]); | ||||
|   } | ||||
| } | ||||
| template<class rtype,class vtype,int N> accelerator_inline void spProjYp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin) | ||||
| { | ||||
|   //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp; | ||||
|   for(int i=0;i<N;i++){  | ||||
|     for(int j=0;j<N;j++){ | ||||
|       spProjYp(hspin._internal[i][j],fspin._internal[i][j]); | ||||
| @@ -524,19 +497,16 @@ template<class rtype,class vtype,int N> accelerator_inline void spProjYp (iMatri | ||||
|  | ||||
| template<class rtype,class vtype> accelerator_inline void spReconYp (iScalar<rtype> &hspin,const iScalar<vtype> &fspin) | ||||
| { | ||||
|   //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp; | ||||
|   spReconYp(hspin._internal,fspin._internal); | ||||
| } | ||||
| template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spReconYp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin) | ||||
| { | ||||
|   //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp; | ||||
|   for(int i=0;i<N;i++) { | ||||
|     spReconYp(hspin._internal[i],fspin._internal[i]); | ||||
|   } | ||||
| } | ||||
| template<class rtype,class vtype,int N> accelerator_inline void spReconYp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin) | ||||
| { | ||||
|   //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp; | ||||
|   for(int i=0;i<N;i++){  | ||||
|     for(int j=0;j<N;j++){ | ||||
|       spReconYp(hspin._internal[i][j],fspin._internal[i][j]); | ||||
| @@ -545,66 +515,55 @@ template<class rtype,class vtype,int N> accelerator_inline void spReconYp (iMatr | ||||
|  | ||||
| template<class rtype,class vtype> accelerator_inline void accumReconYp (iScalar<rtype> &hspin,const iScalar<vtype> &fspin) | ||||
| { | ||||
|   //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp; | ||||
|   accumReconYp(hspin._internal,fspin._internal); | ||||
| } | ||||
| template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void accumReconYp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin) | ||||
| { | ||||
|   //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp; | ||||
|   for(int i=0;i<N;i++) { | ||||
|     accumReconYp(hspin._internal[i],fspin._internal[i]); | ||||
|   } | ||||
| } | ||||
| template<class rtype,class vtype,int N> accelerator_inline void accumReconYp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin) | ||||
| { | ||||
|   //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp; | ||||
|   for(int i=0;i<N;i++){  | ||||
|     for(int j=0;j<N;j++){ | ||||
|       accumReconYp(hspin._internal[i][j],fspin._internal[i][j]); | ||||
|     }} | ||||
| } | ||||
|  | ||||
|  | ||||
| //////// | ||||
| // Ym | ||||
| //////// | ||||
| template<class rtype,class vtype> accelerator_inline void spProjYm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin) | ||||
| { | ||||
|   //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp; | ||||
|   spProjYm(hspin._internal,fspin._internal); | ||||
| } | ||||
| template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProjYm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin) | ||||
| { | ||||
|   //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp; | ||||
|   for(int i=0;i<N;i++) { | ||||
|     spProjYm(hspin._internal[i],fspin._internal[i]); | ||||
|   } | ||||
| } | ||||
| template<class rtype,class vtype,int N> accelerator_inline void spProjYm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin) | ||||
| { | ||||
|   //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp; | ||||
|   for(int i=0;i<N;i++){  | ||||
|     for(int j=0;j<N;j++){ | ||||
|       spProjYm(hspin._internal[i][j],fspin._internal[i][j]); | ||||
|     }} | ||||
| } | ||||
|  | ||||
|  | ||||
| template<class rtype,class vtype> accelerator_inline void spReconYm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin) | ||||
| { | ||||
|   //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp; | ||||
|   spReconYm(hspin._internal,fspin._internal); | ||||
| } | ||||
| template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spReconYm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin) | ||||
| { | ||||
|   //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,const iVector<vtype,N> >::type *temp; | ||||
|   for(int i=0;i<N;i++) { | ||||
|     spReconYm(hspin._internal[i],fspin._internal[i]); | ||||
|   } | ||||
| } | ||||
| template<class rtype,class vtype,int N> accelerator_inline void spReconYm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin) | ||||
| { | ||||
|   //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp; | ||||
|   for(int i=0;i<N;i++){  | ||||
|     for(int j=0;j<N;j++){ | ||||
|       spReconYm(hspin._internal[i][j],fspin._internal[i][j]); | ||||
| @@ -613,19 +572,16 @@ template<class rtype,class vtype,int N> accelerator_inline void spReconYm (iMatr | ||||
|  | ||||
| template<class rtype,class vtype> accelerator_inline void accumReconYm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin) | ||||
| { | ||||
|   //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp; | ||||
|   accumReconYm(hspin._internal,fspin._internal); | ||||
| } | ||||
| template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void accumReconYm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin) | ||||
| { | ||||
|   //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp; | ||||
|   for(int i=0;i<N;i++) { | ||||
|     accumReconYm(hspin._internal[i],fspin._internal[i]); | ||||
|   } | ||||
| } | ||||
| template<class rtype,class vtype,int N> accelerator_inline void accumReconYm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin) | ||||
| { | ||||
|   //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp; | ||||
|   for(int i=0;i<N;i++){  | ||||
|     for(int j=0;j<N;j++){ | ||||
|       accumReconYm(hspin._internal[i][j],fspin._internal[i][j]); | ||||
| @@ -638,19 +594,16 @@ template<class rtype,class vtype,int N> accelerator_inline void accumReconYm (iM | ||||
| //////// | ||||
| template<class rtype,class vtype> accelerator_inline void spProjZp (iScalar<rtype> &hspin,const iScalar<vtype> &fspin) | ||||
| { | ||||
|   //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp; | ||||
|   spProjZp(hspin._internal,fspin._internal); | ||||
| } | ||||
| template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProjZp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin) | ||||
| { | ||||
|   //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp; | ||||
|   for(int i=0;i<N;i++) { | ||||
|     spProjZp(hspin._internal[i],fspin._internal[i]); | ||||
|   } | ||||
| } | ||||
| template<class rtype,class vtype,int N> accelerator_inline void spProjZp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin) | ||||
| { | ||||
|   //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp; | ||||
|   for(int i=0;i<N;i++){  | ||||
|     for(int j=0;j<N;j++){ | ||||
|       spProjZp(hspin._internal[i][j],fspin._internal[i][j]); | ||||
| @@ -660,19 +613,16 @@ template<class rtype,class vtype,int N> accelerator_inline void spProjZp (iMatri | ||||
|  | ||||
| template<class rtype,class vtype> accelerator_inline void spReconZp (iScalar<rtype> &hspin,const iScalar<vtype> &fspin) | ||||
| { | ||||
|   //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp; | ||||
|   spReconZp(hspin._internal,fspin._internal); | ||||
| } | ||||
| template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spReconZp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin) | ||||
| { | ||||
|   //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp; | ||||
|   for(int i=0;i<N;i++) { | ||||
|     spReconZp(hspin._internal[i],fspin._internal[i]); | ||||
|   } | ||||
| } | ||||
| template<class rtype,class vtype,int N> accelerator_inline void spReconZp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin) | ||||
| { | ||||
|   //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp; | ||||
|   for(int i=0;i<N;i++){  | ||||
|     for(int j=0;j<N;j++){ | ||||
|       spReconZp(hspin._internal[i][j],fspin._internal[i][j]); | ||||
| @@ -681,19 +631,16 @@ template<class rtype,class vtype,int N> accelerator_inline void spReconZp (iMatr | ||||
|  | ||||
| template<class rtype,class vtype> accelerator_inline void accumReconZp (iScalar<rtype> &hspin,const iScalar<vtype> &fspin) | ||||
| { | ||||
|   //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp; | ||||
|   accumReconZp(hspin._internal,fspin._internal); | ||||
| } | ||||
| template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void accumReconZp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin) | ||||
| { | ||||
|   //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp; | ||||
|   for(int i=0;i<N;i++) { | ||||
|     accumReconZp(hspin._internal[i],fspin._internal[i]); | ||||
|   } | ||||
| } | ||||
| template<class rtype,class vtype,int N> accelerator_inline void accumReconZp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin) | ||||
| { | ||||
|   //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp; | ||||
|   for(int i=0;i<N;i++){  | ||||
|     for(int j=0;j<N;j++){ | ||||
|       accumReconZp(hspin._internal[i][j],fspin._internal[i][j]); | ||||
| @@ -706,19 +653,16 @@ template<class rtype,class vtype,int N> accelerator_inline void accumReconZp (iM | ||||
| //////// | ||||
| template<class rtype,class vtype> accelerator_inline void spProjZm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin) | ||||
| { | ||||
|   //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp; | ||||
|   spProjZm(hspin._internal,fspin._internal); | ||||
| } | ||||
| template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProjZm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin) | ||||
| { | ||||
|   //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp; | ||||
|   for(int i=0;i<N;i++) { | ||||
|     spProjZm(hspin._internal[i],fspin._internal[i]); | ||||
|   } | ||||
| } | ||||
| template<class rtype,class vtype,int N> accelerator_inline void spProjZm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin) | ||||
| { | ||||
|   //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp; | ||||
|   for(int i=0;i<N;i++){  | ||||
|     for(int j=0;j<N;j++){ | ||||
|       spProjZm(hspin._internal[i][j],fspin._internal[i][j]); | ||||
| @@ -728,19 +672,16 @@ template<class rtype,class vtype,int N> accelerator_inline void spProjZm (iMatri | ||||
|  | ||||
| template<class rtype,class vtype> accelerator_inline void spReconZm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin) | ||||
| { | ||||
|   //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp; | ||||
|   spReconZm(hspin._internal,fspin._internal); | ||||
| } | ||||
| template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spReconZm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin) | ||||
| { | ||||
|   //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp; | ||||
|   for(int i=0;i<N;i++) { | ||||
|     spReconZm(hspin._internal[i],fspin._internal[i]); | ||||
|   } | ||||
| } | ||||
| template<class rtype,class vtype,int N> accelerator_inline void spReconZm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin) | ||||
| { | ||||
|   //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp; | ||||
|   for(int i=0;i<N;i++){  | ||||
|     for(int j=0;j<N;j++){ | ||||
|       spReconZm(hspin._internal[i][j],fspin._internal[i][j]); | ||||
| @@ -749,19 +690,16 @@ template<class rtype,class vtype,int N> accelerator_inline void spReconZm (iMatr | ||||
|  | ||||
| template<class rtype,class vtype> accelerator_inline void accumReconZm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin) | ||||
| { | ||||
|   //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp; | ||||
|   accumReconZm(hspin._internal,fspin._internal); | ||||
| } | ||||
| template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void accumReconZm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin) | ||||
| { | ||||
|   //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp; | ||||
|   for(int i=0;i<N;i++) { | ||||
|     accumReconZm(hspin._internal[i],fspin._internal[i]); | ||||
|   } | ||||
| } | ||||
| template<class rtype,class vtype,int N> accelerator_inline void accumReconZm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin) | ||||
| { | ||||
|   //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp; | ||||
|   for(int i=0;i<N;i++){  | ||||
|     for(int j=0;j<N;j++){ | ||||
|       accumReconZm(hspin._internal[i][j],fspin._internal[i][j]); | ||||
| @@ -774,19 +712,16 @@ template<class rtype,class vtype,int N> accelerator_inline void accumReconZm (iM | ||||
| //////// | ||||
| template<class rtype,class vtype> accelerator_inline void spProjTp (iScalar<rtype> &hspin,const iScalar<vtype> &fspin) | ||||
| { | ||||
|   //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp; | ||||
|   spProjTp(hspin._internal,fspin._internal); | ||||
| } | ||||
| template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProjTp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin) | ||||
| { | ||||
|   //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp; | ||||
|   for(int i=0;i<N;i++) { | ||||
|     spProjTp(hspin._internal[i],fspin._internal[i]); | ||||
|   } | ||||
| } | ||||
| template<class rtype,class vtype,int N> accelerator_inline void spProjTp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin) | ||||
| { | ||||
|   //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp; | ||||
|   for(int i=0;i<N;i++){  | ||||
|     for(int j=0;j<N;j++){ | ||||
|       spProjTp(hspin._internal[i][j],fspin._internal[i][j]); | ||||
| @@ -796,19 +731,16 @@ template<class rtype,class vtype,int N> accelerator_inline void spProjTp (iMatri | ||||
|  | ||||
| template<class rtype,class vtype> accelerator_inline void spReconTp (iScalar<rtype> &hspin,const iScalar<vtype> &fspin) | ||||
| { | ||||
|   //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp; | ||||
|   spReconTp(hspin._internal,fspin._internal); | ||||
| } | ||||
| template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spReconTp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin) | ||||
| { | ||||
|   //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp; | ||||
|   for(int i=0;i<N;i++) { | ||||
|     spReconTp(hspin._internal[i],fspin._internal[i]); | ||||
|   } | ||||
| } | ||||
| template<class rtype,class vtype,int N> accelerator_inline void spReconTp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin) | ||||
| { | ||||
|   //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp; | ||||
|   for(int i=0;i<N;i++){  | ||||
|     for(int j=0;j<N;j++){ | ||||
|       spReconTp(hspin._internal[i][j],fspin._internal[i][j]); | ||||
| @@ -817,44 +749,37 @@ template<class rtype,class vtype,int N> accelerator_inline void spReconTp (iMatr | ||||
|  | ||||
| template<class rtype,class vtype> accelerator_inline void accumReconTp (iScalar<rtype> &hspin, iScalar<vtype> &fspin) | ||||
| { | ||||
|   //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp; | ||||
|   accumReconTp(hspin._internal,fspin._internal); | ||||
| } | ||||
| template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void accumReconTp (iVector<rtype,N> &hspin, const iVector<vtype,N> &fspin) | ||||
| { | ||||
|   //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp; | ||||
|   for(int i=0;i<N;i++) { | ||||
|     accumReconTp(hspin._internal[i],fspin._internal[i]); | ||||
|   } | ||||
| } | ||||
| template<class rtype,class vtype,int N> accelerator_inline void accumReconTp (iMatrix<rtype,N> &hspin, const iMatrix<vtype,N> &fspin) | ||||
| { | ||||
|   //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp; | ||||
|   for(int i=0;i<N;i++){  | ||||
|     for(int j=0;j<N;j++){ | ||||
|       accumReconTp(hspin._internal[i][j],fspin._internal[i][j]); | ||||
|     }} | ||||
| } | ||||
|  | ||||
|  | ||||
| //////// | ||||
| // Tm | ||||
| //////// | ||||
| template<class rtype,class vtype> accelerator_inline void spProjTm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin) | ||||
| { | ||||
|   //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp; | ||||
|   spProjTm(hspin._internal,fspin._internal); | ||||
| } | ||||
| template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProjTm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin) | ||||
| { | ||||
|   //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp; | ||||
|   for(int i=0;i<N;i++) { | ||||
|     spProjTm(hspin._internal[i],fspin._internal[i]); | ||||
|   } | ||||
| } | ||||
| template<class rtype,class vtype,int N> accelerator_inline void spProjTm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin) | ||||
| { | ||||
|   //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp; | ||||
|   for(int i=0;i<N;i++){  | ||||
|     for(int j=0;j<N;j++){ | ||||
|       spProjTm(hspin._internal[i][j],fspin._internal[i][j]); | ||||
| @@ -864,19 +789,16 @@ template<class rtype,class vtype,int N> accelerator_inline void spProjTm (iMatri | ||||
|  | ||||
| template<class rtype,class vtype> accelerator_inline void spReconTm (iScalar<rtype> &hspin, const iScalar<vtype> &fspin) | ||||
| { | ||||
|   //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp; | ||||
|   spReconTm(hspin._internal,fspin._internal); | ||||
| } | ||||
| template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spReconTm (iVector<rtype,N> &hspin, const iVector<vtype,N> &fspin) | ||||
| { | ||||
|   //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp; | ||||
|   for(int i=0;i<N;i++) { | ||||
|     spReconTm(hspin._internal[i],fspin._internal[i]); | ||||
|   } | ||||
| } | ||||
| template<class rtype,class vtype,int N> accelerator_inline void spReconTm (iMatrix<rtype,N> &hspin, const iMatrix<vtype,N> &fspin) | ||||
| { | ||||
|   //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp; | ||||
|   for(int i=0;i<N;i++){  | ||||
|     for(int j=0;j<N;j++){ | ||||
|       spReconTm(hspin._internal[i][j],fspin._internal[i][j]); | ||||
| @@ -885,44 +807,37 @@ template<class rtype,class vtype,int N> accelerator_inline void spReconTm (iMatr | ||||
|  | ||||
| template<class rtype,class vtype> accelerator_inline void accumReconTm (iScalar<rtype> &hspin, const iScalar<vtype> &fspin) | ||||
| { | ||||
|   //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp; | ||||
|   accumReconTm(hspin._internal,fspin._internal); | ||||
| } | ||||
| template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void accumReconTm (iVector<rtype,N> &hspin, const iVector<vtype,N> &fspin) | ||||
| { | ||||
|   //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp; | ||||
|   for(int i=0;i<N;i++) { | ||||
|     accumReconTm(hspin._internal[i],fspin._internal[i]); | ||||
|   } | ||||
| } | ||||
| template<class rtype,class vtype,int N> accelerator_inline void accumReconTm (iMatrix<rtype,N> &hspin, const iMatrix<vtype,N> &fspin) | ||||
| { | ||||
|   //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp; | ||||
|   for(int i=0;i<N;i++){  | ||||
|     for(int j=0;j<N;j++){ | ||||
|       accumReconTm(hspin._internal[i][j],fspin._internal[i][j]); | ||||
|     }} | ||||
| } | ||||
|  | ||||
|  | ||||
| //////// | ||||
| // 5p | ||||
| //////// | ||||
| template<class rtype,class vtype> accelerator_inline void spProj5p (iScalar<rtype> &hspin,const iScalar<vtype> &fspin) | ||||
| template<class rtype,class vtype,IfNotCoarsened<iScalar<vtype> > = 0> accelerator_inline void spProj5p (iScalar<rtype> &hspin,const iScalar<vtype> &fspin) | ||||
| { | ||||
|   //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp; | ||||
|   spProj5p(hspin._internal,fspin._internal); | ||||
| } | ||||
| template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProj5p (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin) | ||||
| { | ||||
|   //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp; | ||||
|   for(int i=0;i<N;i++) { | ||||
|     spProj5p(hspin._internal[i],fspin._internal[i]); | ||||
|   } | ||||
| } | ||||
| template<class rtype,class vtype,int N> accelerator_inline void spProj5p (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin) | ||||
| template<class rtype,class vtype,int N,IfNotCoarsened<iScalar<vtype> > = 0> accelerator_inline void spProj5p (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin) | ||||
| { | ||||
|   //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp; | ||||
|   for(int i=0;i<N;i++){  | ||||
|     for(int j=0;j<N;j++){ | ||||
|       spProj5p(hspin._internal[i][j],fspin._internal[i][j]); | ||||
| @@ -931,19 +846,16 @@ template<class rtype,class vtype,int N> accelerator_inline void spProj5p (iMatri | ||||
|  | ||||
| template<class rtype,class vtype> accelerator_inline void spRecon5p (iScalar<rtype> &hspin,const iScalar<vtype> &fspin) | ||||
| { | ||||
|   //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp; | ||||
|   spRecon5p(hspin._internal,fspin._internal); | ||||
| } | ||||
| template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spRecon5p (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin) | ||||
| { | ||||
|   //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp; | ||||
|   for(int i=0;i<N;i++) { | ||||
|     spRecon5p(hspin._internal[i],fspin._internal[i]); | ||||
|   } | ||||
| } | ||||
| template<class rtype,class vtype,int N> accelerator_inline void spRecon5p (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin) | ||||
| { | ||||
|   //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp; | ||||
|   for(int i=0;i<N;i++){  | ||||
|     for(int j=0;j<N;j++){ | ||||
|       spRecon5p(hspin._internal[i][j],fspin._internal[i][j]); | ||||
| @@ -952,19 +864,16 @@ template<class rtype,class vtype,int N> accelerator_inline void spRecon5p (iMatr | ||||
|  | ||||
| template<class rtype,class vtype> accelerator_inline void accumRecon5p (iScalar<rtype> &hspin,const iScalar<vtype> &fspin) | ||||
| { | ||||
|   //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp; | ||||
|   accumRecon5p(hspin._internal,fspin._internal); | ||||
| } | ||||
| template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void accumRecon5p (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin) | ||||
| { | ||||
|   //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp; | ||||
|   for(int i=0;i<N;i++) { | ||||
|     accumRecon5p(hspin._internal[i],fspin._internal[i]); | ||||
|   } | ||||
| } | ||||
| template<class rtype,class vtype,int N> accelerator_inline void accumRecon5p (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin) | ||||
| { | ||||
|   //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp; | ||||
|   for(int i=0;i<N;i++){  | ||||
|     for(int j=0;j<N;j++){ | ||||
|       accumRecon5p(hspin._internal[i][j],fspin._internal[i][j]); | ||||
| @@ -972,24 +881,18 @@ template<class rtype,class vtype,int N> accelerator_inline void accumRecon5p (iM | ||||
| } | ||||
|  | ||||
| // four spinor projectors for chiral proj | ||||
| //  template<class vtype> accelerator_inline void fspProj5p (iScalar<vtype> &hspin,const iScalar<vtype> &fspin) | ||||
| template<class vtype> accelerator_inline void spProj5p (iScalar<vtype> &hspin,const iScalar<vtype> &fspin) | ||||
| template<class vtype,IfNotCoarsened<iScalar<vtype> > = 0> accelerator_inline void spProj5p (iScalar<vtype> &hspin,const iScalar<vtype> &fspin) | ||||
| { | ||||
|   //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp; | ||||
|   spProj5p(hspin._internal,fspin._internal); | ||||
| } | ||||
| //  template<class vtype,int N> accelerator_inline void fspProj5p (iVector<vtype,N> &hspin,iVector<vtype,N> &fspin) | ||||
| template<class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProj5p (iVector<vtype,N> &hspin,const iVector<vtype,N> &fspin) | ||||
| template<class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0,IfNotCoarsened<iScalar<vtype> > = 0> accelerator_inline void spProj5p (iVector<vtype,N> &hspin,const iVector<vtype,N> &fspin) | ||||
| { | ||||
|   //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp; | ||||
|   for(int i=0;i<N;i++) { | ||||
|     spProj5p(hspin._internal[i],fspin._internal[i]); | ||||
|   } | ||||
| } | ||||
| //  template<class vtype,int N> accelerator_inline void fspProj5p (iMatrix<vtype,N> &hspin,iMatrix<vtype,N> &fspin) | ||||
| template<class vtype,int N> accelerator_inline void spProj5p (iMatrix<vtype,N> &hspin,const iMatrix<vtype,N> &fspin) | ||||
| template<class vtype,int N,IfNotCoarsened<iScalar<vtype> > = 0> accelerator_inline void spProj5p (iMatrix<vtype,N> &hspin,const iMatrix<vtype,N> &fspin) | ||||
| { | ||||
|   //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp; | ||||
|   for(int i=0;i<N;i++){  | ||||
|     for(int j=0;j<N;j++){ | ||||
|       spProj5p(hspin._internal[i][j],fspin._internal[i][j]); | ||||
| @@ -1001,17 +904,17 @@ template<class vtype,int N> accelerator_inline void spProj5p (iMatrix<vtype,N> & | ||||
| // 5m | ||||
| //////// | ||||
|  | ||||
| template<class rtype,class vtype> accelerator_inline void spProj5m (iScalar<rtype> &hspin,const iScalar<vtype> &fspin) | ||||
| template<class rtype,class vtype,IfNotCoarsened<iScalar<vtype> > = 0> accelerator_inline void spProj5m (iScalar<rtype> &hspin,const iScalar<vtype> &fspin) | ||||
| { | ||||
|   spProj5m(hspin._internal,fspin._internal); | ||||
| } | ||||
| template<class rtype,class vtype,int N,IfNotSpinor<iVector<rtype,N> > = 0> accelerator_inline void spProj5m (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin) | ||||
| template<class rtype,class vtype,int N,IfNotSpinor<iVector<rtype,N> > = 0,IfNotCoarsened<iScalar<vtype> > = 0> accelerator_inline void spProj5m (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin) | ||||
| { | ||||
|   for(int i=0;i<N;i++) { | ||||
|     spProj5m(hspin._internal[i],fspin._internal[i]); | ||||
|   } | ||||
| } | ||||
| template<class rtype,class vtype,int N> accelerator_inline void spProj5m (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin) | ||||
| template<class rtype,class vtype,int N,IfNotCoarsened<iScalar<vtype> > = 0> accelerator_inline void spProj5m (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin) | ||||
| { | ||||
|   for(int i=0;i<N;i++){  | ||||
|     for(int j=0;j<N;j++){ | ||||
| @@ -1021,19 +924,16 @@ template<class rtype,class vtype,int N> accelerator_inline void spProj5m (iMatri | ||||
|  | ||||
| template<class rtype,class vtype> accelerator_inline void spRecon5m (iScalar<rtype> &hspin,const iScalar<vtype> &fspin) | ||||
| { | ||||
|   //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp; | ||||
|   spRecon5m(hspin._internal,fspin._internal); | ||||
| } | ||||
| template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spRecon5m (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin) | ||||
| { | ||||
|   //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp; | ||||
|   for(int i=0;i<N;i++) { | ||||
|     spRecon5m(hspin._internal[i],fspin._internal[i]); | ||||
|   } | ||||
| } | ||||
| template<class rtype,class vtype,int N> accelerator_inline void spRecon5m (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin) | ||||
| { | ||||
|   //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp; | ||||
|   for(int i=0;i<N;i++){  | ||||
|     for(int j=0;j<N;j++){ | ||||
|       spRecon5m(hspin._internal[i][j],fspin._internal[i][j]); | ||||
| @@ -1042,19 +942,16 @@ template<class rtype,class vtype,int N> accelerator_inline void spRecon5m (iMatr | ||||
|  | ||||
| template<class rtype,class vtype> accelerator_inline void accumRecon5m (iScalar<rtype> &hspin,const iScalar<vtype> &fspin) | ||||
| { | ||||
|   //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp; | ||||
|   accumRecon5m(hspin._internal,fspin._internal); | ||||
| } | ||||
| template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void accumRecon5m (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin) | ||||
| { | ||||
|   //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp; | ||||
|   for(int i=0;i<N;i++) { | ||||
|     accumRecon5m(hspin._internal[i],fspin._internal[i]); | ||||
|   } | ||||
| } | ||||
| template<class rtype,class vtype,int N> accelerator_inline void accumRecon5m (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin) | ||||
| { | ||||
|   //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp; | ||||
|   for(int i=0;i<N;i++){  | ||||
|     for(int j=0;j<N;j++){ | ||||
|       accumRecon5m(hspin._internal[i][j],fspin._internal[i][j]); | ||||
| @@ -1063,24 +960,18 @@ template<class rtype,class vtype,int N> accelerator_inline void accumRecon5m (iM | ||||
|  | ||||
|  | ||||
| // four spinor projectors for chiral proj | ||||
| //  template<class vtype> accelerator_inline void fspProj5m (iScalar<vtype> &hspin,const iScalar<vtype> &fspin) | ||||
| template<class vtype> accelerator_inline void spProj5m (iScalar<vtype> &hspin,const iScalar<vtype> &fspin) | ||||
| template<class vtype,IfNotCoarsened<iScalar<vtype> > = 0> accelerator_inline void spProj5m (iScalar<vtype> &hspin,const iScalar<vtype> &fspin) | ||||
| { | ||||
|   //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp; | ||||
|   spProj5m(hspin._internal,fspin._internal); | ||||
| } | ||||
| //  template<class vtype,int N> accelerator_inline void fspProj5m (iVector<vtype,N> &hspin,iVector<vtype,N> &fspin) | ||||
| template<class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProj5m (iVector<vtype,N> &hspin,const iVector<vtype,N> &fspin) | ||||
| template<class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0,IfNotCoarsened<iScalar<vtype> > = 0> accelerator_inline void spProj5m (iVector<vtype,N> &hspin,const iVector<vtype,N> &fspin) | ||||
| { | ||||
|   //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp; | ||||
|   for(int i=0;i<N;i++) { | ||||
|     spProj5m(hspin._internal[i],fspin._internal[i]); | ||||
|   } | ||||
| } | ||||
| //  template<class vtype,int N> accelerator_inline void fspProj5m (iMatrix<vtype,N> &hspin,iMatrix<vtype,N> &fspin) | ||||
| template<class vtype,int N> accelerator_inline void spProj5m (iMatrix<vtype,N> &hspin,const iMatrix<vtype,N> &fspin) | ||||
| template<class vtype,int N,IfNotCoarsened<iScalar<vtype> > = 0> accelerator_inline void spProj5m (iMatrix<vtype,N> &hspin,const iMatrix<vtype,N> &fspin) | ||||
| { | ||||
|   //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp; | ||||
|   for(int i=0;i<N;i++){  | ||||
|     for(int j=0;j<N;j++){ | ||||
|       spProj5m(hspin._internal[i][j],fspin._internal[i][j]); | ||||
|   | ||||
| @@ -188,7 +188,6 @@ void G5R5(Lattice<vobj> &z,const Lattice<vobj> &x) | ||||
|   z.Checkerboard() = x.Checkerboard(); | ||||
|   conformable(x,z); | ||||
|   int Ls = grid->_rdimensions[0]; | ||||
|   Gamma G5(Gamma::Algebra::Gamma5); | ||||
|   autoView( x_v, x, AcceleratorRead); | ||||
|   autoView( z_v, z, AcceleratorWrite); | ||||
|   uint64_t nloop = grid->oSites()/Ls; | ||||
| @@ -196,7 +195,13 @@ void G5R5(Lattice<vobj> &z,const Lattice<vobj> &x) | ||||
|     uint64_t ss = sss*Ls; | ||||
|     for(int s=0;s<Ls;s++){ | ||||
|       int sp = Ls-1-s; | ||||
|       coalescedWrite(z_v[ss+sp],G5*x_v(ss+s)); | ||||
|       auto tmp = x_v(ss+s); | ||||
|       decltype(tmp) tmp_p; | ||||
|       decltype(tmp) tmp_m; | ||||
|       spProj5p(tmp_p,tmp); | ||||
|       spProj5m(tmp_m,tmp); | ||||
|       // Use of spProj5m, 5p captures the coarse space too | ||||
|       coalescedWrite(z_v[ss+sp],tmp_p - tmp_m); | ||||
|     } | ||||
|   }); | ||||
| } | ||||
| @@ -208,10 +213,20 @@ void G5C(Lattice<vobj> &z, const Lattice<vobj> &x) | ||||
|   z.Checkerboard() = x.Checkerboard(); | ||||
|   conformable(x, z); | ||||
|  | ||||
|   Gamma G5(Gamma::Algebra::Gamma5); | ||||
|   z = G5 * x; | ||||
|   autoView( x_v, x, AcceleratorRead); | ||||
|   autoView( z_v, z, AcceleratorWrite); | ||||
|   uint64_t nloop = grid->oSites(); | ||||
|   accelerator_for(ss,nloop,vobj::Nsimd(),{ | ||||
|     auto tmp = x_v(ss); | ||||
|     decltype(tmp) tmp_p; | ||||
|     decltype(tmp) tmp_m; | ||||
|     spProj5p(tmp_p,tmp); | ||||
|     spProj5m(tmp_m,tmp); | ||||
|     coalescedWrite(z_v[ss],tmp_p - tmp_m); | ||||
|   }); | ||||
| } | ||||
|  | ||||
| /* | ||||
| template<class CComplex, int nbasis> | ||||
| void G5C(Lattice<iVector<CComplex, nbasis>> &z, const Lattice<iVector<CComplex, nbasis>> &x) | ||||
| { | ||||
| @@ -234,6 +249,7 @@ void G5C(Lattice<iVector<CComplex, nbasis>> &z, const Lattice<iVector<CComplex, | ||||
|     } | ||||
|   }); | ||||
| } | ||||
| */ | ||||
|  | ||||
| NAMESPACE_END(Grid); | ||||
|  | ||||
|   | ||||
| @@ -151,6 +151,9 @@ inline void *acceleratorAllocShared(size_t bytes) | ||||
|     ptr = (void *) NULL; | ||||
|     printf(" cudaMallocManaged failed for %d %s \n",bytes,cudaGetErrorString(err)); | ||||
|   } | ||||
|   //  size_t free,total; | ||||
|   //  cudaMemGetInfo(&free,&total); | ||||
|   //  std::cout << "Malloc managed "<<bytes<<" "<<free<<"/"<<total<<std::endl; | ||||
|   return ptr; | ||||
| }; | ||||
| inline void *acceleratorAllocDevice(size_t bytes) | ||||
| @@ -161,6 +164,9 @@ inline void *acceleratorAllocDevice(size_t bytes) | ||||
|     ptr = (void *) NULL; | ||||
|     printf(" cudaMalloc failed for %d %s \n",bytes,cudaGetErrorString(err)); | ||||
|   } | ||||
|   //  size_t free,total; | ||||
|   //  cudaMemGetInfo(&free,&total); | ||||
|   //  std::cout << "Malloc device "<<bytes<<" "<<free<<"/"<<total<<std::endl; | ||||
|   return ptr; | ||||
| }; | ||||
| inline void acceleratorFreeShared(void *ptr){ cudaFree(ptr);}; | ||||
|   | ||||
| @@ -191,7 +191,7 @@ int main (int argc, char ** argv) | ||||
|   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; | ||||
|   std::cout<<GridLogMessage << "= Benchmarking SU3xSU3  CovShiftForward(z,x,y)"<<std::endl; | ||||
|   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; | ||||
|   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl; | ||||
|   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GB/s (incl Cshift)\t\t GFlop/s"<<std::endl; | ||||
|   std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl; | ||||
|  | ||||
|   for(int lat=LMIN;lat<=LMAX;lat+=LADD){ | ||||
| @@ -216,15 +216,16 @@ int main (int argc, char ** argv) | ||||
| 	 | ||||
| 	 | ||||
| 	    double bytes=3*vol*Nc*Nc*sizeof(Complex); | ||||
| 	    double ncbytes=5*vol*Nc*Nc*sizeof(Complex); | ||||
| 	    double flops=Nc*Nc*(6+8+8)*vol; | ||||
| 	    std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<"   \t\t"<<bytes/time<<"\t\t" << flops/time<<std::endl; | ||||
| 	    std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<"   \t\t"<<bytes/time<<"\t\t"<<ncbytes/time<<"\t\t" << flops/time<<std::endl; | ||||
|       } | ||||
|   } | ||||
| #if 1 | ||||
|   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; | ||||
|   std::cout<<GridLogMessage << "= Benchmarking SU3xSU3  z= x * Cshift(y)"<<std::endl; | ||||
|   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; | ||||
|   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl; | ||||
|   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GB/s (incl Cshift)\t\t GFlop/s"<<std::endl; | ||||
|   std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl; | ||||
|  | ||||
|   for(int lat=LMIN;lat<=LMAX;lat+=LADD){ | ||||
| @@ -258,10 +259,11 @@ int main (int argc, char ** argv) | ||||
| 	tmult  = tmult /Nloop; | ||||
| 	 | ||||
| 	double bytes=3*vol*Nc*Nc*sizeof(Complex); | ||||
| 	double ncbytes=5*vol*Nc*Nc*sizeof(Complex); | ||||
| 	double flops=Nc*Nc*(6+8+8)*vol; | ||||
| 	std::cout<<GridLogMessage<<std::setprecision(3) << "total us "<<time<<" shift "<<tshift <<" mult "<<tmult<<std::endl; | ||||
| 	time = time * 1000; // convert to NS for GB/s | ||||
| 	std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<"   \t\t"<<bytes/time<<"\t\t" << flops/time<<std::endl; | ||||
| 	std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<"   \t\t"<<bytes/time<<"\t\t" <<ncbytes/time<<"\t\t" << flops/time<<std::endl; | ||||
|       } | ||||
|     } | ||||
| #endif | ||||
|   | ||||
| @@ -29,7 +29,91 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
| #include <Grid/Grid.h> | ||||
|  | ||||
| using namespace Grid; | ||||
|  ; | ||||
|  | ||||
| void MomentumSpacePropagatorTest(RealD mass,RealD M5, LatticePropagator &prop) | ||||
| { | ||||
|   // what type LatticeComplex  | ||||
|   GridBase *_grid = prop.Grid(); | ||||
|    | ||||
|   typedef LatticeFermion FermionField; | ||||
|   typedef LatticePropagator PropagatorField; | ||||
|   typedef typename FermionField::vector_type vector_type; | ||||
|   typedef typename FermionField::scalar_type ScalComplex; | ||||
|   typedef iSinglet<ScalComplex> Tcomplex; | ||||
|   typedef Lattice<iSinglet<vector_type> > LatComplex; | ||||
|    | ||||
|   Gamma::Algebra Gmu [] = { | ||||
|     Gamma::Algebra::GammaX, | ||||
|     Gamma::Algebra::GammaY, | ||||
|     Gamma::Algebra::GammaZ, | ||||
|     Gamma::Algebra::GammaT | ||||
|   }; | ||||
|  | ||||
|   Coordinate latt_size   = _grid->_fdimensions; | ||||
|    | ||||
|   PropagatorField   num  (_grid); num  = Zero(); | ||||
|  | ||||
|   LatComplex    sk(_grid);  sk = Zero(); | ||||
|   LatComplex    sk2(_grid); sk2= Zero(); | ||||
|   LatComplex    W(_grid); W= Zero(); | ||||
|   LatComplex    a(_grid); a= Zero(); | ||||
|   LatComplex    one  (_grid); one = ScalComplex(1.0,0.0); | ||||
|   LatComplex denom(_grid); denom= Zero(); | ||||
|   LatComplex cosha(_grid);  | ||||
|   LatComplex kmu(_grid);  | ||||
|   LatComplex Wea(_grid);  | ||||
|   LatComplex Wema(_grid);  | ||||
|  | ||||
|   ScalComplex ci(0.0,1.0); | ||||
|   SpinColourMatrixD identity = ComplexD(1.0); | ||||
|  | ||||
|   for(int mu=0;mu<Nd;mu++) { | ||||
|      | ||||
|     LatticeCoordinate(kmu,mu); | ||||
|      | ||||
|     RealD TwoPiL =  M_PI * 2.0/ latt_size[mu]; | ||||
|      | ||||
|     kmu = TwoPiL * kmu; | ||||
|     //    kmu = kmu + TwoPiL * one * twist[mu];//momentum for twisted boundary conditions | ||||
|      | ||||
|     sk2 = sk2 + 2.0*sin(kmu*0.5)*sin(kmu*0.5); | ||||
|     sk  = sk  +     sin(kmu)    *sin(kmu);  | ||||
|      | ||||
|     num = num - sin(kmu)*ci*(Gamma(Gmu[mu])*identity); | ||||
|      | ||||
|   } | ||||
|    | ||||
|   W = one - M5 + sk2; | ||||
|  | ||||
|   //////////////////////////////////////////// | ||||
|   // Cosh alpha -> alpha | ||||
|   //////////////////////////////////////////// | ||||
|   cosha =  (one + W*W + sk) / (abs(W)*2.0); | ||||
|  | ||||
|   // FIXME Need a Lattice acosh | ||||
|   { | ||||
|   autoView(cosha_v,cosha,CpuRead); | ||||
|   autoView(a_v,a,CpuWrite); | ||||
|   for(int idx=0;idx<_grid->lSites();idx++){ | ||||
|     Coordinate lcoor(Nd); | ||||
|     Tcomplex cc; | ||||
|     //    RealD sgn; | ||||
|     _grid->LocalIndexToLocalCoor(idx,lcoor); | ||||
|     peekLocalSite(cc,cosha_v,lcoor); | ||||
|     assert((double)real(cc)>=1.0); | ||||
|     assert(fabs((double)imag(cc))<=1.0e-15); | ||||
|     cc = ScalComplex(::acosh(real(cc)),0.0); | ||||
|     pokeLocalSite(cc,a_v,lcoor); | ||||
|   }} | ||||
|    | ||||
|   Wea = ( exp( a) * abs(W)  ); | ||||
|   Wema= ( exp(-a) * abs(W)  ); | ||||
|    | ||||
|   num   = num + ( one - Wema ) * mass * identity; | ||||
|   denom= ( Wea - one ) + mass*mass * (one - Wema);  | ||||
|   prop = num/denom; | ||||
| } | ||||
|  | ||||
|  | ||||
| int main (int argc, char ** argv) | ||||
| { | ||||
| @@ -307,6 +391,17 @@ int main (int argc, char ** argv) | ||||
|     RealD M5  =0.8; | ||||
|     DomainWallFermionD Ddwf(Umu,*FGrid,*FrbGrid,GRID,RBGRID,mass,M5); | ||||
|  | ||||
|     /////////////////// Test code for (1-m)^2 /////////////// | ||||
|     LatticePropagatorD prop1(&GRID); | ||||
|     LatticePropagatorD prop2(&GRID); | ||||
|     LatticeComplexD ratio(&GRID); | ||||
|     MomentumSpacePropagatorTest(0.0,M5,prop1); | ||||
|     MomentumSpacePropagatorTest(0.3,M5,prop2); | ||||
|     ratio=localNorm2(prop2); | ||||
|     ratio=ratio/localNorm2(prop1); | ||||
|     std::cout << ratio; | ||||
|     /////////////////// Test code for (1-m)^2 factor /////////////// | ||||
|  | ||||
|     // Momentum space prop | ||||
|     std::cout << " Solving by FFT and Feynman rules" <<std::endl; | ||||
|     bool fiveD = false; //calculate 4d free propagator | ||||
|   | ||||
| @@ -565,8 +565,8 @@ int main (int argc, char ** argv) | ||||
|   std::cout<<GridLogMessage << " Running Multigrid                                "<< std::endl; | ||||
|   std::cout<<GridLogMessage << "**************************************************"<< std::endl; | ||||
|  | ||||
|   BiCGSTAB<CoarseVector>    CoarseMgridBiCGSTAB(0.01,1000);      | ||||
|   BiCGSTAB<LatticeFermion>  FineMgridBiCGSTAB(0.0,24); | ||||
|   BiCGSTAB<CoarseVector>    CoarseMgridBiCGSTAB(0.01,1000,false);      | ||||
|   BiCGSTAB<LatticeFermion>  FineMgridBiCGSTAB(0.0,24,false); | ||||
|   ZeroGuesser<CoarseVector> CoarseZeroGuesser; | ||||
|   ZeroGuesser<LatticeFermion> FineZeroGuesser; | ||||
|  | ||||
|   | ||||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
		Reference in New Issue
	
	Block a user