mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-10-25 18:19:34 +01:00 
			
		
		
		
	Compare commits
	
		
			26 Commits
		
	
	
		
			ffd7301649
			...
			feature/hw
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|  | 3064c9a6e2 | ||
|  | 729882827c | ||
|  | baa668d3ac | ||
|  | 3c82d16ed8 | ||
|  | 5c8c0c2d7c | ||
|  | e5a100846c | ||
|  | a74e2dc12e | ||
|  | 595f512a6e | ||
|  | a6499b22ff | ||
|  | b4e42a59c6 | ||
|  | 8c913e0edd | ||
|  | fd3f93d8d3 | ||
|  | e9543cdacd | ||
|  | 98f7b3d298 | ||
|  | b7b164ea24 | ||
|  | 77124d99d5 | ||
|  | e1327e7ea0 | ||
|  | 569f78c2cf | ||
|  | 488c79d5a1 | ||
|  | dc6b0f20b2 | ||
|  | c0badc3e16 | ||
|  | 58f6529b55 | ||
|  | e3f056dfbb | ||
|  | da0ffa7a79 | ||
|  | fcc7640b9c | ||
|  | 0cbe2859e0 | 
| @@ -49,13 +49,11 @@ inline void blockMaskedInnerProduct(Lattice<CComplex> &CoarseInner, | |||||||
|   Lattice<dotp> fine_inner_msk(fine); |   Lattice<dotp> fine_inner_msk(fine); | ||||||
|  |  | ||||||
|   // Multiply could be fused with innerProduct |   // Multiply could be fused with innerProduct | ||||||
|   // Single block sum kernel could do both masks. |  | ||||||
|   fine_inner = localInnerProduct(fineX,fineY); |   fine_inner = localInnerProduct(fineX,fineY); | ||||||
|   mult(fine_inner_msk, fine_inner,FineMask); |   mult(fine_inner_msk, fine_inner,FineMask); | ||||||
|   blockSum(CoarseInner,fine_inner_msk); |   blockSum(CoarseInner,fine_inner_msk); | ||||||
| } | } | ||||||
|  |  | ||||||
|  |  | ||||||
| class Geometry { | class Geometry { | ||||||
| public: | public: | ||||||
|   int npoint; |   int npoint; | ||||||
| @@ -80,8 +78,12 @@ public: | |||||||
|     } |     } | ||||||
|     directions   [2*_d]=0; |     directions   [2*_d]=0; | ||||||
|     displacements[2*_d]=0; |     displacements[2*_d]=0; | ||||||
|   } |  | ||||||
|  |  | ||||||
|  |     std::cout <<GridLogMessage << "Geometry "<<std::endl; | ||||||
|  |     for(int p=0;p<npoint;p++){ | ||||||
|  |       std::cout <<GridLogMessage << "point " <<p<<" dir "<<directions[p]<<" delta " <<displacements[p]<<std::endl; | ||||||
|  |     } | ||||||
|  |   } | ||||||
| }; | }; | ||||||
|    |    | ||||||
| template<class Fobj,class CComplex,int nbasis> | template<class Fobj,class CComplex,int nbasis> | ||||||
| @@ -102,8 +104,8 @@ public: | |||||||
|   Aggregation(GridBase *_CoarseGrid,GridBase *_FineGrid,int _checkerboard) :  |   Aggregation(GridBase *_CoarseGrid,GridBase *_FineGrid,int _checkerboard) :  | ||||||
|     CoarseGrid(_CoarseGrid), |     CoarseGrid(_CoarseGrid), | ||||||
|     FineGrid(_FineGrid), |     FineGrid(_FineGrid), | ||||||
|     subspace(nbasis,_FineGrid), |     checkerboard(_checkerboard), | ||||||
|     checkerboard(_checkerboard) |     subspace(nbasis,_FineGrid) | ||||||
|   { |   { | ||||||
|   }; |   }; | ||||||
|    |    | ||||||
| @@ -285,6 +287,8 @@ public: | |||||||
|   /////////////////////// |   /////////////////////// | ||||||
|   GridBase * Grid(void)         { return _grid; };   // this is all the linalg routines need to know |   GridBase * Grid(void)         { return _grid; };   // this is all the linalg routines need to know | ||||||
|  |  | ||||||
|  |   virtual std::vector<int> Directions(void)   { return geom.directions; }; | ||||||
|  |   virtual std::vector<int> Displacements(void){ return geom.displacements; }; | ||||||
|   void M (const CoarseVector &in, CoarseVector &out) |   void M (const CoarseVector &in, CoarseVector &out) | ||||||
|   { |   { | ||||||
|     conformable(_grid,in.Grid()); |     conformable(_grid,in.Grid()); | ||||||
| @@ -308,6 +312,9 @@ public: | |||||||
|  |  | ||||||
|     int osites=Grid()->oSites(); |     int osites=Grid()->oSites(); | ||||||
|  |  | ||||||
|  |     autoView(st,Stencil,AcceleratorRead); | ||||||
|  |     siteVector *CBp=Stencil.CommBuf(); | ||||||
|  |  | ||||||
|     accelerator_for(sss, Grid()->oSites()*nbasis, Nsimd, { |     accelerator_for(sss, Grid()->oSites()*nbasis, Nsimd, { | ||||||
|       int ss = sss/nbasis; |       int ss = sss/nbasis; | ||||||
|       int b  = sss%nbasis; |       int b  = sss%nbasis; | ||||||
| @@ -318,12 +325,12 @@ public: | |||||||
|  |  | ||||||
|       for(int point=0;point<geom.npoint;point++){ |       for(int point=0;point<geom.npoint;point++){ | ||||||
|  |  | ||||||
| 	SE=Stencil.GetEntry(ptype,point,ss); | 	SE=st.GetEntry(ptype,point,ss); | ||||||
| 	   | 	   | ||||||
| 	if(SE->_is_local) {  | 	if(SE->_is_local) {  | ||||||
| 	  nbr = coalescedReadPermute(in_v[SE->_offset],ptype,SE->_permute); | 	  nbr = coalescedReadPermute(in_v[SE->_offset],ptype,SE->_permute); | ||||||
| 	} else { | 	} else { | ||||||
| 	  nbr = coalescedRead(Stencil.CommBuf()[SE->_offset]); | 	  nbr = coalescedRead(CBp[SE->_offset]); | ||||||
| 	} | 	} | ||||||
| 	acceleratorSynchronise(); | 	acceleratorSynchronise(); | ||||||
|  |  | ||||||
| @@ -332,7 +339,7 @@ public: | |||||||
| 	} | 	} | ||||||
|       } |       } | ||||||
|       coalescedWrite(out_v[ss](b),res); |       coalescedWrite(out_v[ss](b),res); | ||||||
|       }); |     }); | ||||||
|  |  | ||||||
|     for(int p=0;p<geom.npoint;p++) AcceleratorViewContainer[p].ViewClose(); |     for(int p=0;p<geom.npoint;p++) AcceleratorViewContainer[p].ViewClose(); | ||||||
|   }; |   }; | ||||||
| @@ -409,38 +416,23 @@ public: | |||||||
|       MdirCalc(in,out[p],p); |       MdirCalc(in,out[p],p); | ||||||
|     } |     } | ||||||
|   }; |   }; | ||||||
|   void Mdir(const CoarseVector &in, CoarseVector &out, int dir, int disp){ |   void Mdir(const CoarseVector &in, CoarseVector &out, int dir, int disp) | ||||||
|  |   { | ||||||
|     this->MdirComms(in); |     this->MdirComms(in); | ||||||
|  |  | ||||||
|     int ndim = in.Grid()->Nd(); |     int ndim = in.Grid()->Nd(); | ||||||
|  |  | ||||||
|     ////////////// |     int point=-1; | ||||||
|     // 4D action like wilson |     for(int p=0;p<geom.npoint;p++){ | ||||||
|     // 0+ => 0  |       if( (dir==geom.directions[p])&&(disp==geom.displacements[p])) point=p; | ||||||
|     // 0- => 1 |     } | ||||||
|     // 1+ => 2  |     assert(point!=-1);// Must find | ||||||
|     // 1- => 3 |  | ||||||
|     // etc.. |  | ||||||
|     ////////////// |  | ||||||
|     // 5D action like DWF |  | ||||||
|     // 1+ => 0  |  | ||||||
|     // 1- => 1 |  | ||||||
|     // 2+ => 2  |  | ||||||
|     // 2- => 3 |  | ||||||
|     // etc.. |  | ||||||
|     auto point = [dir, disp, ndim](){ |  | ||||||
|       if(dir == 0 and disp == 0) |  | ||||||
| 	return 8; |  | ||||||
|       else if ( ndim==4 ) {  |  | ||||||
| 	return (4 * dir + 1 - disp) / 2; |  | ||||||
|       } else {  |  | ||||||
| 	return (4 * (dir-1) + 1 - disp) / 2; |  | ||||||
|       } |  | ||||||
|     }(); |  | ||||||
|  |  | ||||||
|  |     std::cout <<GridLogMessage << "Mdir point "<<point<<" dir "<<dir<<" disp "<<disp  <<std::endl; | ||||||
|  |     for(int p=0;p<geom.npoint;p++){ | ||||||
|  |       std::cout <<GridLogMessage << "point " <<p<<" dir "<<geom.directions[p]<<" delta " <<geom.displacements[p]<<std::endl; | ||||||
|  |     } | ||||||
|     MdirCalc(in,out,point); |     MdirCalc(in,out,point); | ||||||
|  |  | ||||||
|   }; |   }; | ||||||
|  |  | ||||||
|   void Mdiag(const CoarseVector &in, CoarseVector &out) |   void Mdiag(const CoarseVector &in, CoarseVector &out) | ||||||
| @@ -456,10 +448,58 @@ public: | |||||||
|     geom(CoarseGrid._ndimension), |     geom(CoarseGrid._ndimension), | ||||||
|     hermitian(hermitian_), |     hermitian(hermitian_), | ||||||
|     Stencil(&CoarseGrid,geom.npoint,Even,geom.directions,geom.displacements,0), |     Stencil(&CoarseGrid,geom.npoint,Even,geom.directions,geom.displacements,0), | ||||||
|       A(geom.npoint,&CoarseGrid) |     A(geom.npoint,&CoarseGrid) | ||||||
|   { |   { | ||||||
|   }; |   }; | ||||||
|  |  | ||||||
|  |   void Test(Aggregation<Fobj,CComplex,nbasis> &_Aggregates,GridBase *FineGrid,LinearOperatorBase<Lattice<Fobj> > &linop) | ||||||
|  |   { | ||||||
|  |     typedef Lattice<Fobj> FineField; | ||||||
|  |     CoarseVector Cin(_grid); | ||||||
|  |     CoarseVector Cout(_grid); | ||||||
|  |     CoarseVector CFout(_grid); | ||||||
|  |  | ||||||
|  |     FineField Fin(FineGrid); | ||||||
|  |     FineField Fout(FineGrid); | ||||||
|  |  | ||||||
|  |  | ||||||
|  |     std::vector<int> seeds({1,2,3,4,5}); | ||||||
|  |     GridParallelRNG RNG(_grid);  RNG.SeedFixedIntegers(seeds); | ||||||
|  |     gaussian(RNG,Cin); | ||||||
|  |  | ||||||
|  |     _Aggregates.PromoteFromSubspace(Cin,Fin); | ||||||
|  |     _Aggregates.ProjectToSubspace(Cin,Fin); | ||||||
|  |  | ||||||
|  |     std::cout << GridLogMessage<< "************  "<<std::endl; | ||||||
|  |     std::cout << GridLogMessage<< " Testing M  "<<std::endl; | ||||||
|  |     std::cout << GridLogMessage<< "************  "<<std::endl; | ||||||
|  |     // Coarse operator | ||||||
|  |     this->M(Cin,Cout); | ||||||
|  |     // Fine projected operator | ||||||
|  |     _Aggregates.PromoteFromSubspace(Cin,Fin); | ||||||
|  |     linop.Op(Fin,Fout); | ||||||
|  |     _Aggregates.ProjectToSubspace(CFout,Fout); | ||||||
|  |  | ||||||
|  |     CFout = CFout-Cout; | ||||||
|  |     RealD diff = norm2(CFout); | ||||||
|  |     std::cout << GridLogMessage<< " diff  "<<diff<<std::endl; | ||||||
|  |     assert(diff<1.0e-5); | ||||||
|  |  | ||||||
|  |     std::cout << GridLogMessage<< "************  "<<std::endl; | ||||||
|  |     std::cout << GridLogMessage<< " Testing Mdag  "<<std::endl; | ||||||
|  |     std::cout << GridLogMessage<< "************  "<<std::endl; | ||||||
|  |     // Coarse operator | ||||||
|  |     Mdag(Cin,Cout); | ||||||
|  |     // Fine operator | ||||||
|  |     linop.AdjOp(Fin,Fout); | ||||||
|  |     _Aggregates.ProjectToSubspace(CFout,Fout); | ||||||
|  |  | ||||||
|  |     CFout = CFout-Cout; | ||||||
|  |     diff = norm2(CFout); | ||||||
|  |     std::cout << GridLogMessage<< " diff  "<<diff<<std::endl;  | ||||||
|  |     assert(diff<1.0e-5); | ||||||
|  |   } | ||||||
|  |  | ||||||
|   void CoarsenOperator(GridBase *FineGrid,LinearOperatorBase<Lattice<Fobj> > &linop, |   void CoarsenOperator(GridBase *FineGrid,LinearOperatorBase<Lattice<Fobj> > &linop, | ||||||
| 		       Aggregation<Fobj,CComplex,nbasis> & Subspace) | 		       Aggregation<Fobj,CComplex,nbasis> & Subspace) | ||||||
|   { |   { | ||||||
| @@ -496,8 +536,19 @@ public: | |||||||
|  |  | ||||||
|     CoarseScalar InnerProd(Grid());  |     CoarseScalar InnerProd(Grid());  | ||||||
|  |  | ||||||
|  |     std::cout << GridLogMessage<< "CoarsenMatrix Orthog " << std::endl; | ||||||
|     // Orthogonalise the subblocks over the basis |     // Orthogonalise the subblocks over the basis | ||||||
|     blockOrthogonalise(InnerProd,Subspace.subspace); |     blockOrthogonalise(InnerProd,Subspace.subspace); | ||||||
|  |     std::cout << GridLogMessage<< "CoarsenMatrix Orthog done " << std::endl; | ||||||
|  |  | ||||||
|  |     auto OpDirections    = linop.Directions(); | ||||||
|  |     auto OpDisplacements = linop.Displacements(); | ||||||
|  |  | ||||||
|  |     std::cout<<" Coarsening an operator with "<< OpDirections.size()<<" terms "<<std::endl; | ||||||
|  |     for(int p=0;p<OpDirections.size();p++) { | ||||||
|  |       assert(OpDirections[p]==geom.directions[p]); | ||||||
|  |       assert(OpDisplacements[p]==geom.displacements[p]); | ||||||
|  |     } | ||||||
|  |  | ||||||
|     // Compute the matrix elements of linop between this orthonormal |     // Compute the matrix elements of linop between this orthonormal | ||||||
|     // set of vectors. |     // set of vectors. | ||||||
| @@ -533,13 +584,27 @@ public: | |||||||
|     evenmask = where(mod(bcb,2)==(Integer)0,one,zero); |     evenmask = where(mod(bcb,2)==(Integer)0,one,zero); | ||||||
|     oddmask  = one-evenmask; |     oddmask  = one-evenmask; | ||||||
|  |  | ||||||
|  |     /* | ||||||
|  |     { | ||||||
|  |       phi=Subspace.subspace[0]; | ||||||
|  |       linop.OpDirAll(phi,Mphi_p); | ||||||
|  |       for(int p=0;p<geom.npoint-1;p++){ | ||||||
|  | 	int dir=geom.directions[p]; | ||||||
|  | 	int disp=geom.displacements[p]; | ||||||
|  | 	linop.OpDir(phi,Mphi,dir,disp); | ||||||
|  | 	Mphi=Mphi-Mphi_p[p]; | ||||||
|  | 	std::cout << GridLogMessage <<" Direction mapping check " <<norm2(Mphi)<<std::endl; | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  | */ | ||||||
|     assert(self_stencil!=-1); |     assert(self_stencil!=-1); | ||||||
|  |     int lhermitian=hermitian; | ||||||
|  |  | ||||||
|     for(int i=0;i<nbasis;i++){ |     for(int i=0;i<nbasis;i++){ | ||||||
|  |  | ||||||
|       phi=Subspace.subspace[i]; |       phi=Subspace.subspace[i]; | ||||||
|  |  | ||||||
|       //      std::cout << GridLogMessage<< "CoarsenMatrix vector "<<i << std::endl; |       std::cout << GridLogMessage<< "CoarsenMatrix vector "<<i << std::endl; | ||||||
|       linop.OpDirAll(phi,Mphi_p); |       linop.OpDirAll(phi,Mphi_p); | ||||||
|       linop.OpDiag  (phi,Mphi_p[geom.npoint-1]); |       linop.OpDiag  (phi,Mphi_p[geom.npoint-1]); | ||||||
|  |  | ||||||
| @@ -550,7 +615,7 @@ public: | |||||||
| 	int dir   = geom.directions[p]; | 	int dir   = geom.directions[p]; | ||||||
| 	int disp  = geom.displacements[p]; | 	int disp  = geom.displacements[p]; | ||||||
|  |  | ||||||
| 	if ( (disp==-1) || (!hermitian ) ) { | 	if ( (disp==-1) || (!lhermitian ) ) { | ||||||
|  |  | ||||||
| 	  //////////////////////////////////////////////////////////////////////// | 	  //////////////////////////////////////////////////////////////////////// | ||||||
| 	  // Pick out contributions coming from this cell and neighbour cell | 	  // Pick out contributions coming from this cell and neighbour cell | ||||||
| @@ -568,11 +633,23 @@ public: | |||||||
| 	    autoView( A_self  , A[self_stencil], AcceleratorWrite); | 	    autoView( A_self  , A[self_stencil], AcceleratorWrite); | ||||||
|  |  | ||||||
| 	    accelerator_for(ss, Grid()->oSites(), Fobj::Nsimd(),{ coalescedWrite(A_p[ss](j,i),oZProj_v(ss)); }); | 	    accelerator_for(ss, Grid()->oSites(), Fobj::Nsimd(),{ coalescedWrite(A_p[ss](j,i),oZProj_v(ss)); }); | ||||||
|  | 	    if ( lhermitian && (disp==-1) ) { | ||||||
|  | 	      for(int pp=0;pp<geom.npoint;pp++){// Find the opposite link and set <j|A|i> = <i|A|j>* | ||||||
|  | 		int dirp   = geom.directions[pp]; | ||||||
|  | 		int dispp  = geom.displacements[pp]; | ||||||
|  | 		if ( (dirp==dir) && (dispp==1) ){ | ||||||
|  | 		  auto sft = conjugate(Cshift(oZProj,dir,1)); | ||||||
|  | 		  autoView( sft_v    ,  sft  , AcceleratorWrite); | ||||||
|  | 		  autoView( A_pp     ,  A[pp], AcceleratorWrite); | ||||||
|  | 		  accelerator_for(ss, Grid()->oSites(), Fobj::Nsimd(),{ coalescedWrite(A_pp[ss](i,j),sft_v(ss)); }); | ||||||
|  | 		} | ||||||
|  | 	      } | ||||||
|  | 	    } | ||||||
| 	  } | 	  } | ||||||
| 	} | 	} | ||||||
|       } |       } | ||||||
|  |  | ||||||
|  |       std::cout << GridLogMessage<< "CoarsenMatrix Diag "<<std::endl; | ||||||
|       /////////////////////////////////////////// |       /////////////////////////////////////////// | ||||||
|       // Faster alternate self coupling.. use hermiticity to save 2x |       // Faster alternate self coupling.. use hermiticity to save 2x | ||||||
|       /////////////////////////////////////////// |       /////////////////////////////////////////// | ||||||
| @@ -604,31 +681,35 @@ public: | |||||||
|  |  | ||||||
|       } |       } | ||||||
|     } |     } | ||||||
|     if(hermitian) { |  | ||||||
|       std::cout << GridLogMessage << " ForceHermitian, new code "<<std::endl; |     MemoryManager::PrintBytes(); | ||||||
|       ForceHermitian(); |  | ||||||
|  |     // Auto self test | ||||||
|  |     Test( Subspace,FineGrid,linop); | ||||||
|  |  | ||||||
|  | #if 0 | ||||||
|  |     /////////////////////////// | ||||||
|  |     // test code worth preserving in if block | ||||||
|  |     /////////////////////////// | ||||||
|  |     std::cout<<GridLogMessage<< " Computed matrix elements "<< self_stencil <<std::endl; | ||||||
|  |     for(int p=0;p<geom.npoint;p++){ | ||||||
|  |       std::cout<<GridLogMessage<< "A["<<p<<"]" << std::endl; | ||||||
|  |       std::cout<<GridLogMessage<< "\n"<<A[p] << std::endl; | ||||||
|     } |     } | ||||||
|  |     std::cout<<GridLogMessage<< " picking by block0 "<< self_stencil <<std::endl; | ||||||
|  |  | ||||||
|  |     phi=Subspace.subspace[0]; | ||||||
|  |     std::vector<int> bc(FineGrid->_ndimension,0); | ||||||
|  |     blockPick(Grid(),phi,tmp,bc);      // Pick out a block | ||||||
|  |     linop.Op(tmp,Mphi);                // Apply big dop | ||||||
|  |     blockProject(iProj,Mphi,Subspace.subspace); // project it and print it | ||||||
|  |     std::cout<<GridLogMessage<< " Computed matrix elements from block zero only "<<std::endl; | ||||||
|  |     std::cout<<GridLogMessage<< iProj <<std::endl; | ||||||
|  |     std::cout<<GridLogMessage<<"Computed Coarse Operator"<<std::endl; | ||||||
|  | #endif | ||||||
|  |  | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   void ForceHermitian(void) { |  | ||||||
|     CoarseMatrix Diff  (Grid()); |  | ||||||
|     for(int p=0;p<geom.npoint;p++){ |  | ||||||
|       int dir   = geom.directions[p]; |  | ||||||
|       int disp  = geom.displacements[p]; |  | ||||||
|       if(disp==-1) { |  | ||||||
| 	// Find the opposite link |  | ||||||
| 	for(int pp=0;pp<geom.npoint;pp++){ |  | ||||||
| 	  int dirp   = geom.directions[pp]; |  | ||||||
| 	  int dispp  = geom.displacements[pp]; |  | ||||||
| 	  if ( (dirp==dir) && (dispp==1) ){ |  | ||||||
| 	    //	    Diff = adj(Cshift(A[p],dir,1)) - A[pp];  |  | ||||||
| 	    //	    std::cout << GridLogMessage<<" Replacing stencil leg "<<pp<<" with leg "<<p<< " diff "<<norm2(Diff) <<std::endl; |  | ||||||
| 	    A[pp] = adj(Cshift(A[p],dir,1)); |  | ||||||
| 	  } |  | ||||||
| 	} |  | ||||||
|       } |  | ||||||
|     } |  | ||||||
|   } |  | ||||||
| }; | }; | ||||||
|  |  | ||||||
| NAMESPACE_END(Grid); | NAMESPACE_END(Grid); | ||||||
|   | |||||||
| @@ -52,6 +52,9 @@ public: | |||||||
|   virtual void AdjOp  (const Field &in, Field &out) = 0; // Abstract base |   virtual void AdjOp  (const Field &in, Field &out) = 0; // Abstract base | ||||||
|   virtual void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2)=0; |   virtual void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2)=0; | ||||||
|   virtual void HermOp(const Field &in, Field &out)=0; |   virtual void HermOp(const Field &in, Field &out)=0; | ||||||
|  |  | ||||||
|  |   virtual std::vector<int> Directions(void)   =0; | ||||||
|  |   virtual std::vector<int> Displacements(void)=0; | ||||||
| }; | }; | ||||||
|  |  | ||||||
|  |  | ||||||
| @@ -76,6 +79,9 @@ class MdagMLinearOperator : public LinearOperatorBase<Field> { | |||||||
| public: | public: | ||||||
|   MdagMLinearOperator(Matrix &Mat): _Mat(Mat){}; |   MdagMLinearOperator(Matrix &Mat): _Mat(Mat){}; | ||||||
|  |  | ||||||
|  |   virtual std::vector<int> Directions(void)   { return _Mat.Directions();}; | ||||||
|  |   virtual std::vector<int> Displacements(void){ return _Mat.Displacements();}; | ||||||
|  |  | ||||||
|   // Support for coarsening to a multigrid |   // Support for coarsening to a multigrid | ||||||
|   void OpDiag (const Field &in, Field &out) { |   void OpDiag (const Field &in, Field &out) { | ||||||
|     _Mat.Mdiag(in,out); |     _Mat.Mdiag(in,out); | ||||||
| @@ -111,6 +117,8 @@ class ShiftedMdagMLinearOperator : public LinearOperatorBase<Field> { | |||||||
|   Matrix &_Mat; |   Matrix &_Mat; | ||||||
|   RealD _shift; |   RealD _shift; | ||||||
| public: | public: | ||||||
|  |   virtual std::vector<int> Directions(void)   { return _Mat.Directions();}; | ||||||
|  |   virtual std::vector<int> Displacements(void){ return _Mat.Displacements();}; | ||||||
|   ShiftedMdagMLinearOperator(Matrix &Mat,RealD shift): _Mat(Mat), _shift(shift){}; |   ShiftedMdagMLinearOperator(Matrix &Mat,RealD shift): _Mat(Mat), _shift(shift){}; | ||||||
|   // Support for coarsening to a multigrid |   // Support for coarsening to a multigrid | ||||||
|   void OpDiag (const Field &in, Field &out) { |   void OpDiag (const Field &in, Field &out) { | ||||||
| @@ -151,6 +159,8 @@ template<class Matrix,class Field> | |||||||
| class HermitianLinearOperator : public LinearOperatorBase<Field> { | class HermitianLinearOperator : public LinearOperatorBase<Field> { | ||||||
|   Matrix &_Mat; |   Matrix &_Mat; | ||||||
| public: | public: | ||||||
|  |   virtual std::vector<int> Directions(void)   { return _Mat.Directions();}; | ||||||
|  |   virtual std::vector<int> Displacements(void){ return _Mat.Displacements();}; | ||||||
|   HermitianLinearOperator(Matrix &Mat): _Mat(Mat){}; |   HermitianLinearOperator(Matrix &Mat): _Mat(Mat){}; | ||||||
|   // Support for coarsening to a multigrid |   // Support for coarsening to a multigrid | ||||||
|   void OpDiag (const Field &in, Field &out) { |   void OpDiag (const Field &in, Field &out) { | ||||||
| @@ -182,6 +192,8 @@ template<class Matrix,class Field> | |||||||
| class NonHermitianLinearOperator : public LinearOperatorBase<Field> { | class NonHermitianLinearOperator : public LinearOperatorBase<Field> { | ||||||
|   Matrix &_Mat; |   Matrix &_Mat; | ||||||
| public: | public: | ||||||
|  |   virtual std::vector<int> Directions(void)   { return _Mat.Directions();}; | ||||||
|  |   virtual std::vector<int> Displacements(void){ return _Mat.Displacements();}; | ||||||
|   NonHermitianLinearOperator(Matrix &Mat): _Mat(Mat){}; |   NonHermitianLinearOperator(Matrix &Mat): _Mat(Mat){}; | ||||||
|   // Support for coarsening to a multigrid |   // Support for coarsening to a multigrid | ||||||
|   void OpDiag (const Field &in, Field &out) { |   void OpDiag (const Field &in, Field &out) { | ||||||
| @@ -255,6 +267,8 @@ template<class Matrix,class Field> | |||||||
|   class SchurDiagMooeeOperator :  public SchurOperatorBase<Field> { |   class SchurDiagMooeeOperator :  public SchurOperatorBase<Field> { | ||||||
|  public: |  public: | ||||||
|     Matrix &_Mat; |     Matrix &_Mat; | ||||||
|  |     virtual std::vector<int> Directions(void)   { return _Mat.Directions();}; | ||||||
|  |     virtual std::vector<int> Displacements(void){ return _Mat.Displacements();}; | ||||||
|     SchurDiagMooeeOperator (Matrix &Mat): _Mat(Mat){}; |     SchurDiagMooeeOperator (Matrix &Mat): _Mat(Mat){}; | ||||||
|     virtual  void Mpc      (const Field &in, Field &out) { |     virtual  void Mpc      (const Field &in, Field &out) { | ||||||
|       Field tmp(in.Grid()); |       Field tmp(in.Grid()); | ||||||
| @@ -281,6 +295,8 @@ template<class Matrix,class Field> | |||||||
|  protected: |  protected: | ||||||
|     Matrix &_Mat; |     Matrix &_Mat; | ||||||
|  public: |  public: | ||||||
|  |     virtual std::vector<int> Directions(void)   { return _Mat.Directions();}; | ||||||
|  |     virtual std::vector<int> Displacements(void){ return _Mat.Displacements();}; | ||||||
|     SchurDiagOneOperator (Matrix &Mat): _Mat(Mat){}; |     SchurDiagOneOperator (Matrix &Mat): _Mat(Mat){}; | ||||||
|      |      | ||||||
|     virtual void Mpc      (const Field &in, Field &out) { |     virtual void Mpc      (const Field &in, Field &out) { | ||||||
| @@ -307,6 +323,8 @@ template<class Matrix,class Field> | |||||||
|  protected: |  protected: | ||||||
|     Matrix &_Mat; |     Matrix &_Mat; | ||||||
|  public: |  public: | ||||||
|  |     virtual std::vector<int> Directions(void)   { return _Mat.Directions();}; | ||||||
|  |     virtual std::vector<int> Displacements(void){ return _Mat.Displacements();}; | ||||||
|     SchurDiagTwoOperator (Matrix &Mat): _Mat(Mat){}; |     SchurDiagTwoOperator (Matrix &Mat): _Mat(Mat){}; | ||||||
|      |      | ||||||
|     virtual void Mpc      (const Field &in, Field &out) { |     virtual void Mpc      (const Field &in, Field &out) { | ||||||
| @@ -372,6 +390,8 @@ class NonHermitianSchurDiagMooeeOperator :  public NonHermitianSchurOperatorBase | |||||||
| { | { | ||||||
|  public: |  public: | ||||||
|   Matrix& _Mat; |   Matrix& _Mat; | ||||||
|  |   virtual std::vector<int> Directions(void)   { return _Mat.Directions();}; | ||||||
|  |   virtual std::vector<int> Displacements(void){ return _Mat.Displacements();}; | ||||||
|  NonHermitianSchurDiagMooeeOperator(Matrix& Mat): _Mat(Mat){}; |  NonHermitianSchurDiagMooeeOperator(Matrix& Mat): _Mat(Mat){}; | ||||||
|   virtual void Mpc(const Field& in, Field& out) { |   virtual void Mpc(const Field& in, Field& out) { | ||||||
|     Field tmp(in.Grid()); |     Field tmp(in.Grid()); | ||||||
| @@ -405,6 +425,8 @@ class NonHermitianSchurDiagOneOperator : public NonHermitianSchurOperatorBase<Fi | |||||||
|   Matrix &_Mat; |   Matrix &_Mat; | ||||||
|    |    | ||||||
|  public: |  public: | ||||||
|  |   virtual std::vector<int> Directions(void)   { return _Mat.Directions();}; | ||||||
|  |   virtual std::vector<int> Displacements(void){ return _Mat.Displacements();}; | ||||||
|   NonHermitianSchurDiagOneOperator (Matrix& Mat): _Mat(Mat){}; |   NonHermitianSchurDiagOneOperator (Matrix& Mat): _Mat(Mat){}; | ||||||
|   virtual void Mpc(const Field& in, Field& out) { |   virtual void Mpc(const Field& in, Field& out) { | ||||||
|     Field tmp(in.Grid()); |     Field tmp(in.Grid()); | ||||||
| @@ -435,6 +457,8 @@ class NonHermitianSchurDiagTwoOperator : public NonHermitianSchurOperatorBase<Fi | |||||||
|   Matrix& _Mat; |   Matrix& _Mat; | ||||||
|    |    | ||||||
|  public: |  public: | ||||||
|  |   virtual std::vector<int> Directions(void)   { return _Mat.Directions();}; | ||||||
|  |   virtual std::vector<int> Displacements(void){ return _Mat.Displacements();}; | ||||||
|  NonHermitianSchurDiagTwoOperator(Matrix& Mat): _Mat(Mat){}; |  NonHermitianSchurDiagTwoOperator(Matrix& Mat): _Mat(Mat){}; | ||||||
|  |  | ||||||
|   virtual void Mpc(const Field& in, Field& out) { |   virtual void Mpc(const Field& in, Field& out) { | ||||||
| @@ -475,6 +499,8 @@ class SchurStaggeredOperator :  public SchurOperatorBase<Field> { | |||||||
|   Field tmp; |   Field tmp; | ||||||
|   RealD mass; |   RealD mass; | ||||||
|  public: |  public: | ||||||
|  |   virtual std::vector<int> Directions(void)   { return _Mat.Directions();}; | ||||||
|  |   virtual std::vector<int> Displacements(void){ return _Mat.Displacements();}; | ||||||
|   SchurStaggeredOperator (Matrix &Mat): _Mat(Mat), tmp(_Mat.RedBlackGrid())  |   SchurStaggeredOperator (Matrix &Mat): _Mat(Mat), tmp(_Mat.RedBlackGrid())  | ||||||
|   {  |   {  | ||||||
|     assert( _Mat.isTrivialEE() ); |     assert( _Mat.isTrivialEE() ); | ||||||
|   | |||||||
| @@ -48,6 +48,8 @@ public: | |||||||
|   virtual  void Mdiag    (const Field &in, Field &out)=0; |   virtual  void Mdiag    (const Field &in, Field &out)=0; | ||||||
|   virtual  void Mdir     (const Field &in, Field &out,int dir, int disp)=0; |   virtual  void Mdir     (const Field &in, Field &out,int dir, int disp)=0; | ||||||
|   virtual  void MdirAll  (const Field &in, std::vector<Field> &out)=0; |   virtual  void MdirAll  (const Field &in, std::vector<Field> &out)=0; | ||||||
|  |   virtual std::vector<int> Directions(void)   =0; | ||||||
|  |   virtual std::vector<int> Displacements(void)=0; | ||||||
| }; | }; | ||||||
|  |  | ||||||
| ///////////////////////////////////////////////////////////////////////////////////////////// | ///////////////////////////////////////////////////////////////////////////////////////////// | ||||||
| @@ -73,6 +75,8 @@ public: | |||||||
|   virtual  void MooeeDag    (const Field &in, Field &out)=0; |   virtual  void MooeeDag    (const Field &in, Field &out)=0; | ||||||
|   virtual  void MooeeInvDag (const Field &in, Field &out)=0; |   virtual  void MooeeInvDag (const Field &in, Field &out)=0; | ||||||
|  |  | ||||||
|  |   virtual std::vector<int> Directions(void)   =0; | ||||||
|  |   virtual std::vector<int> Displacements(void)=0; | ||||||
| }; | }; | ||||||
|  |  | ||||||
| NAMESPACE_END(Grid); | NAMESPACE_END(Grid); | ||||||
|   | |||||||
| @@ -1,4 +1,4 @@ | |||||||
|     /************************************************************************************* |    /************************************************************************************* | ||||||
|  |  | ||||||
|     Grid physics library, www.github.com/paboyle/Grid  |     Grid physics library, www.github.com/paboyle/Grid  | ||||||
|  |  | ||||||
| @@ -28,6 +28,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk> | |||||||
| #ifndef GRID_ALGORITHMS_ITERATIVE_GENERIC_PCG | #ifndef GRID_ALGORITHMS_ITERATIVE_GENERIC_PCG | ||||||
| #define GRID_ALGORITHMS_ITERATIVE_GENERIC_PCG | #define GRID_ALGORITHMS_ITERATIVE_GENERIC_PCG | ||||||
|  |  | ||||||
|  | NAMESPACE_BEGIN(Grid); | ||||||
|   /* |   /* | ||||||
|    * Compared to Tang-2009:  P=Pleft. P^T = PRight Q=MssInv.  |    * Compared to Tang-2009:  P=Pleft. P^T = PRight Q=MssInv.  | ||||||
|    * Script A = SolverMatrix  |    * Script A = SolverMatrix  | ||||||
| @@ -50,53 +51,54 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk> | |||||||
|    * Vout = x |    * Vout = x | ||||||
|    */ |    */ | ||||||
|  |  | ||||||
| // abstract base |  | ||||||
| template<class Field, class CoarseField> | template<class Field, class CoarseField, class Aggregates> | ||||||
| class TwoLevelFlexiblePcg : public LinearFunction<Field> | class TwoLevelFlexiblePcg : public LinearFunction<Field> | ||||||
| { | { | ||||||
|  public: |  public: | ||||||
|  |  | ||||||
|   int verbose; |   int verbose; | ||||||
|  |  | ||||||
|   RealD   Tolerance; |   RealD   Tolerance; | ||||||
|   Integer MaxIterations; |   Integer MaxIterations; | ||||||
|   const int mmax = 5; |   const int mmax = 4; | ||||||
|   GridBase *grid; |   GridBase *FineGrid; | ||||||
|   GridBase *coarsegrid; |   GridBase *CoarseGrid; | ||||||
|  |  | ||||||
|   LinearOperatorBase<Field>   *_Linop |   LinearOperatorBase<Field>   &_Linop; | ||||||
|   OperatorFunction<Field>     *_Smoother, |   LinearFunction<Field>     &_Smoother; | ||||||
|   LinearFunction<CoarseField> *_CoarseSolver; |   LinearFunction<CoarseField> &_CoarseSolver; | ||||||
|  |   Aggregates                  &_Aggregates; | ||||||
|   // Need somthing that knows how to get from Coarse to fine and back again |  | ||||||
|    |    | ||||||
|   // more most opertor functions |   // more most opertor functions | ||||||
|   TwoLevelFlexiblePcg(RealD tol, |   TwoLevelFlexiblePcg(RealD tol, | ||||||
| 		     Integer maxit, | 		      Integer maxit, | ||||||
| 		     LinearOperatorBase<Field> *Linop, | 		      LinearOperatorBase<Field> *Linop, | ||||||
| 		     LinearOperatorBase<Field> *SmootherLinop, | 		      LinearFunction<Field>   *Smoother, | ||||||
| 		     OperatorFunction<Field>   *Smoother, | 		      LinearFunction<CoarseField> *CoarseSolver, | ||||||
| 		     OperatorFunction<CoarseField>  CoarseLinop | 		      Aggregates *AggP | ||||||
| 		     ) :  | 		      ) :  | ||||||
|       Tolerance(tol),  |   Tolerance(tol),  | ||||||
|       MaxIterations(maxit), |     MaxIterations(maxit), | ||||||
|       _Linop(Linop), |     _Linop(*Linop), | ||||||
|       _PreconditionerLinop(PrecLinop), |     _Smoother(*Smoother), | ||||||
|       _Preconditioner(Preconditioner) |     _CoarseSolver(*CoarseSolver), | ||||||
|  |     _Aggregates(*AggP) | ||||||
|   {  |   {  | ||||||
|  |     CoarseGrid=_Aggregates.CoarseGrid; | ||||||
|  |     FineGrid=_Aggregates.FineGrid; | ||||||
|     verbose=0; |     verbose=0; | ||||||
|   }; |   }; | ||||||
|  |  | ||||||
|   // The Pcg routine is common to all, but the various matrices differ from derived  |   // The Pcg routine is common to all, but the various matrices differ from derived  | ||||||
|   // implementation to derived implmentation |   // implementation to derived implmentation | ||||||
|   void operator() (const Field &src, Field &psi){ |  | ||||||
|   void operator() (const Field &src, Field &psi){ |   void operator() (const Field &src, Field &psi){ | ||||||
|  |  | ||||||
|     psi.Checkerboard() = src.Checkerboard(); |     psi.Checkerboard() = src.Checkerboard(); | ||||||
|     grid             = src.Grid(); |  | ||||||
|  |  | ||||||
|     RealD f; |  | ||||||
|     RealD rtzp,rtz,a,d,b; |     RealD rtzp,rtz,a,d,b; | ||||||
|     RealD rptzp; |     //    RealD rptzp; | ||||||
|     RealD tn; |     //    RealD tn; | ||||||
|     RealD guess = norm2(psi); |     RealD guess = norm2(psi); | ||||||
|     RealD ssq   = norm2(src); |     RealD ssq   = norm2(src); | ||||||
|     RealD rsq   = ssq*Tolerance*Tolerance; |     RealD rsq   = ssq*Tolerance*Tolerance; | ||||||
| @@ -104,15 +106,15 @@ class TwoLevelFlexiblePcg : public LinearFunction<Field> | |||||||
|     ///////////////////////////// |     ///////////////////////////// | ||||||
|     // Set up history vectors |     // Set up history vectors | ||||||
|     ///////////////////////////// |     ///////////////////////////// | ||||||
|     std::vector<Field> p  (mmax,grid); |     std::vector<Field> p  (mmax,FineGrid); | ||||||
|     std::vector<Field> mmp(mmax,grid); |     std::vector<Field> mmp(mmax,FineGrid); | ||||||
|     std::vector<RealD> pAp(mmax); |     std::vector<RealD> pAp(mmax); | ||||||
|  |  | ||||||
|     Field x  (grid); x = psi; |     Field x  (FineGrid); x = psi; | ||||||
|     Field z  (grid); |     Field z  (FineGrid); | ||||||
|     Field tmp(grid); |     Field tmp(FineGrid); | ||||||
|     Field r  (grid); |     Field r  (FineGrid); | ||||||
|     Field mu (grid); |     Field mu (FineGrid); | ||||||
|    |    | ||||||
|     ////////////////////////// |     ////////////////////////// | ||||||
|     // x0 = Vstart -- possibly modify guess |     // x0 = Vstart -- possibly modify guess | ||||||
| @@ -121,13 +123,13 @@ class TwoLevelFlexiblePcg : public LinearFunction<Field> | |||||||
|     Vstart(x,src); |     Vstart(x,src); | ||||||
|  |  | ||||||
|     // r0 = b -A x0 |     // r0 = b -A x0 | ||||||
|     HermOp(x,mmp); // Shouldn't this be something else? |     _Linop.HermOp(x,mmp[0]); // Shouldn't this be something else? | ||||||
|     axpy (r, -1.0,mmp[0], src);    // Recomputes r=src-Ax0 |     axpy (r, -1.0,mmp[0], src);    // Recomputes r=src-Ax0 | ||||||
|  |  | ||||||
|     ////////////////////////////////// |     ////////////////////////////////// | ||||||
|     // Compute z = M1 x |     // Compute z = M1 x | ||||||
|     ////////////////////////////////// |     ////////////////////////////////// | ||||||
|     M1(r,z,tmp,mp,SmootherMirs); |     M1(r,z); | ||||||
|     rtzp =real(innerProduct(r,z)); |     rtzp =real(innerProduct(r,z)); | ||||||
|  |  | ||||||
|     /////////////////////////////////////// |     /////////////////////////////////////// | ||||||
| @@ -143,7 +145,7 @@ class TwoLevelFlexiblePcg : public LinearFunction<Field> | |||||||
|       int peri_kp = (k+1) % mmax; |       int peri_kp = (k+1) % mmax; | ||||||
|  |  | ||||||
|       rtz=rtzp; |       rtz=rtzp; | ||||||
|       d= M3(p[peri_k],mp,mmp[peri_k],tmp); |       d= M3(p[peri_k],mmp[peri_k]); | ||||||
|       a = rtz/d; |       a = rtz/d; | ||||||
|      |      | ||||||
|       // Memorise this |       // Memorise this | ||||||
| @@ -153,13 +155,13 @@ class TwoLevelFlexiblePcg : public LinearFunction<Field> | |||||||
|       RealD rn = axpy_norm(r,-a,mmp[peri_k],r); |       RealD rn = axpy_norm(r,-a,mmp[peri_k],r); | ||||||
|  |  | ||||||
|       // Compute z = M x |       // Compute z = M x | ||||||
|       M1(r,z,tmp,mp); |       M1(r,z); | ||||||
|  |  | ||||||
|       rtzp =real(innerProduct(r,z)); |       rtzp =real(innerProduct(r,z)); | ||||||
|  |  | ||||||
|       M2(z,mu); // ADEF-2 this is identity. Axpy possible to eliminate |       M2(z,mu); // ADEF-2 this is identity. Axpy possible to eliminate | ||||||
|  |  | ||||||
|       p[peri_kp]=p[peri_k]; |       p[peri_kp]=mu; | ||||||
|  |  | ||||||
|       // Standard search direction  p -> z + b p    ; b =  |       // Standard search direction  p -> z + b p    ; b =  | ||||||
|       b = (rtzp)/rtz; |       b = (rtzp)/rtz; | ||||||
| @@ -181,7 +183,7 @@ class TwoLevelFlexiblePcg : public LinearFunction<Field> | |||||||
|       // Stopping condition |       // Stopping condition | ||||||
|       if ( rn <= rsq ) {  |       if ( rn <= rsq ) {  | ||||||
|  |  | ||||||
| 	HermOp(x,mmp); // Shouldn't this be something else? | 	_Linop.HermOp(x,mmp[0]); // Shouldn't this be something else? | ||||||
| 	axpy(tmp,-1.0,src,mmp[0]); | 	axpy(tmp,-1.0,src,mmp[0]); | ||||||
| 	 | 	 | ||||||
| 	RealD psinorm = sqrt(norm2(x)); | 	RealD psinorm = sqrt(norm2(x)); | ||||||
| @@ -190,7 +192,8 @@ class TwoLevelFlexiblePcg : public LinearFunction<Field> | |||||||
| 	RealD true_residual = tmpnorm/srcnorm; | 	RealD true_residual = tmpnorm/srcnorm; | ||||||
| 	std::cout<<GridLogMessage<<"TwoLevelfPcg:   true residual is "<<true_residual<<std::endl; | 	std::cout<<GridLogMessage<<"TwoLevelfPcg:   true residual is "<<true_residual<<std::endl; | ||||||
| 	std::cout<<GridLogMessage<<"TwoLevelfPcg: target residual was"<<Tolerance<<std::endl; | 	std::cout<<GridLogMessage<<"TwoLevelfPcg: target residual was"<<Tolerance<<std::endl; | ||||||
| 	return k; |  | ||||||
|  | 	return; | ||||||
|       } |       } | ||||||
|     } |     } | ||||||
|     // Non-convergence |     // Non-convergence | ||||||
| @@ -199,48 +202,40 @@ class TwoLevelFlexiblePcg : public LinearFunction<Field> | |||||||
|  |  | ||||||
|  public: |  public: | ||||||
|  |  | ||||||
|   virtual void M(Field & in,Field & out,Field & tmp) { |   virtual void M1(Field & in, Field & out)  | ||||||
|  |   {// the smoother | ||||||
|   } |  | ||||||
|  |  | ||||||
|   virtual void M1(Field & in, Field & out) {// the smoother |  | ||||||
|  |  | ||||||
|     // [PTM+Q] in = [1 - Q A] M in + Q in = Min + Q [ in -A Min] |     // [PTM+Q] in = [1 - Q A] M in + Q in = Min + Q [ in -A Min] | ||||||
|     Field tmp(grid); |     Field tmp(FineGrid); | ||||||
|     Field Min(grid); |     Field Min(FineGrid); | ||||||
|  |  | ||||||
|     PcgM(in,Min); // Smoother call |     CoarseField PleftProj(CoarseGrid); | ||||||
|  |     CoarseField PleftMss_proj(CoarseGrid); | ||||||
|  |  | ||||||
|     HermOp(Min,out); |     _Smoother(in,Min); // Smoother call | ||||||
|  |  | ||||||
|  |     _Linop.HermOp(Min,out); | ||||||
|     axpy(tmp,-1.0,out,in);          // tmp  = in - A Min |     axpy(tmp,-1.0,out,in);          // tmp  = in - A Min | ||||||
|  |  | ||||||
|     ProjectToSubspace(tmp,PleftProj);      |     _Aggregates.ProjectToSubspace(PleftProj,tmp);      | ||||||
|     ApplyInverse(PleftProj,PleftMss_proj); // Ass^{-1} [in - A Min]_s |     _CoarseSolver(PleftProj,PleftMss_proj); // Ass^{-1} [in - A Min]_s | ||||||
|     PromoteFromSubspace(PleftMss_proj,tmp);// tmp = Q[in - A Min]   |     _Aggregates.PromoteFromSubspace(PleftMss_proj,tmp);// tmp = Q[in - A Min]   | ||||||
|     axpy(out,1.0,Min,tmp); // Min+tmp |     axpy(out,1.0,Min,tmp); // Min+tmp | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   virtual void M2(const Field & in, Field & out) { |   virtual void M2(const Field & in, Field & out)  | ||||||
|  |   { | ||||||
|     out=in; |     out=in; | ||||||
|     // Must override for Def2 only |  | ||||||
|     //  case PcgDef2: |  | ||||||
|     //    Pright(in,out); |  | ||||||
|     //    break; |  | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   virtual RealD M3(const Field & p, Field & mmp){ |   virtual RealD M3(const Field & p, Field & mmp) | ||||||
|  |   { | ||||||
|     double d,dd; |     double d,dd; | ||||||
|     HermOpAndNorm(p,mmp,d,dd); |     _Linop.HermOpAndNorm(p,mmp,d,dd); | ||||||
|     return dd; |     return dd; | ||||||
|     // Must override for Def1 only |  | ||||||
|     //  case PcgDef1: |  | ||||||
|     //    d=linop_d->Mprec(p,mmp,tmp,0,1);// Dag no |  | ||||||
|     //      linop_d->Mprec(mmp,mp,tmp,1);// Dag yes |  | ||||||
|     //    Pleft(mp,mmp); |  | ||||||
|     //    d=real(linop_d->inner(p,mmp)); |  | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   virtual void VstartDef2(Field & xconst Field & src){ |   virtual void Vstart(Field & x,const Field & src) | ||||||
|  |   { | ||||||
|     //case PcgDef2: |     //case PcgDef2: | ||||||
|     //case PcgAdef2:  |     //case PcgAdef2:  | ||||||
|     //case PcgAdef2f: |     //case PcgAdef2f: | ||||||
| @@ -256,142 +251,79 @@ class TwoLevelFlexiblePcg : public LinearFunction<Field> | |||||||
|     //                   = src_s - (A guess)_s - src_s  + (A guess)_s  |     //                   = src_s - (A guess)_s - src_s  + (A guess)_s  | ||||||
|     //                   = 0  |     //                   = 0  | ||||||
|     /////////////////////////////////// |     /////////////////////////////////// | ||||||
|     Field r(grid); |     Field r(FineGrid); | ||||||
|     Field mmp(grid); |     Field mmp(FineGrid); | ||||||
|  |  | ||||||
|  |     CoarseField PleftProj(CoarseGrid); | ||||||
|  |     CoarseField PleftMss_proj(CoarseGrid); | ||||||
|      |      | ||||||
|     HermOp(x,mmp); |     _Linop.HermOp(x,mmp); | ||||||
|     axpy (r, -1.0, mmp, src);        // r_{-1} = src - A x |     axpy (r, -1.0, mmp, src);        // r_{-1} = src - A x | ||||||
|     ProjectToSubspace(r,PleftProj);      |     _Aggregates.ProjectToSubspace(PleftProj,r);      | ||||||
|     ApplyInverseCG(PleftProj,PleftMss_proj); // Ass^{-1} r_s |     _CoarseSolver(PleftProj,PleftMss_proj); // Ass^{-1} r_s | ||||||
|     PromoteFromSubspace(PleftMss_proj,mmp);   |     _Aggregates.PromoteFromSubspace(PleftMss_proj,mmp);   | ||||||
|     x=x+mmp; |     x=x+mmp; | ||||||
|  |  | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   virtual void Vstart(Field & x,const Field & src){ |  | ||||||
|     return; |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   ///////////////////////////////////////////////////////////////////// |   ///////////////////////////////////////////////////////////////////// | ||||||
|   // Only Def1 has non-trivial Vout. Override in Def1 |   // Only Def1 has non-trivial Vout. Override in Def1 | ||||||
|   ///////////////////////////////////////////////////////////////////// |   ///////////////////////////////////////////////////////////////////// | ||||||
|   virtual void   Vout  (Field & in, Field & out,Field & src){ |   virtual void   Vout  (Field & in, Field & out,Field & src){ | ||||||
|     out = in; |     out = in; | ||||||
|     //case PcgDef1: |  | ||||||
|     //    //Qb + PT x |  | ||||||
|     //    ProjectToSubspace(src,PleftProj);      |  | ||||||
|     //    ApplyInverse(PleftProj,PleftMss_proj); // Ass^{-1} r_s |  | ||||||
|     //    PromoteFromSubspace(PleftMss_proj,tmp);   |  | ||||||
|     //     |  | ||||||
|     //    Pright(in,out); |  | ||||||
|     //     |  | ||||||
|     //    linop_d->axpy(out,tmp,out,1.0); |  | ||||||
|     //    break; |  | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   //////////////////////////////////////////////////////////////////////////////////////////////// |   //////////////////////////////////////////////////////////////////////////////////////////////// | ||||||
|   // Pright and Pleft are common to all implementations |   // Pright and Pleft are common to all implementations | ||||||
|   //////////////////////////////////////////////////////////////////////////////////////////////// |   //////////////////////////////////////////////////////////////////////////////////////////////// | ||||||
|   virtual void Pright(Field & in,Field & out){ |   virtual void Pright(Field & in,Field & out) | ||||||
|  |   { | ||||||
|     // P_R  = [ 1              0 ]  |     // P_R  = [ 1              0 ]  | ||||||
|     //        [ -Mss^-1 Msb    0 ]  |     //        [ -Mss^-1 Msb    0 ]  | ||||||
|     Field in_sbar(grid); |     Field in_sbar(FineGrid); | ||||||
|  |  | ||||||
|     ProjectToSubspace(in,PleftProj);      |     CoarseField PleftProj(CoarseGrid); | ||||||
|     PromoteFromSubspace(PleftProj,out);   |     CoarseField PleftMss_proj(CoarseGrid); | ||||||
|  |  | ||||||
|  |     _Aggregates.ProjectToSubspace(PleftProj,in);      | ||||||
|  |     _Aggregates.PromoteFromSubspace(PleftProj,out);   | ||||||
|     axpy(in_sbar,-1.0,out,in);       // in_sbar = in - in_s  |     axpy(in_sbar,-1.0,out,in);       // in_sbar = in - in_s  | ||||||
|  |  | ||||||
|     HermOp(in_sbar,out); |     _Linop.HermOp(in_sbar,out); | ||||||
|     ProjectToSubspace(out,PleftProj);           // Mssbar in_sbar  (project) |     _Aggregates.ProjectToSubspace(PleftProj,out);           // Mssbar in_sbar  (project) | ||||||
|  |  | ||||||
|     ApplyInverse     (PleftProj,PleftMss_proj); // Mss^{-1} Mssbar  |     _CoarseSolver(PleftProj,PleftMss_proj); // Mss^{-1} Mssbar  | ||||||
|     PromoteFromSubspace(PleftMss_proj,out);     //  |     _Aggregates.PromoteFromSubspace(PleftMss_proj,out);     //  | ||||||
|  |  | ||||||
|     axpy(out,-1.0,out,in_sbar);     // in_sbar - Mss^{-1} Mssbar in_sbar |     axpy(out,-1.0,out,in_sbar);     // in_sbar - Mss^{-1} Mssbar in_sbar | ||||||
|   } |   } | ||||||
|   virtual void Pleft (Field & in,Field & out){ |   virtual void Pleft (Field & in,Field & out) | ||||||
|  |   { | ||||||
|     // P_L  = [ 1  -Mbs Mss^-1]  |     // P_L  = [ 1  -Mbs Mss^-1]  | ||||||
|     //        [ 0   0         ]  |     //        [ 0   0         ]  | ||||||
|     Field in_sbar(grid); |     Field in_sbar(FineGrid); | ||||||
|     Field    tmp2(grid); |     Field    tmp2(FineGrid); | ||||||
|     Field    Mtmp(grid); |     Field    Mtmp(FineGrid); | ||||||
|  |  | ||||||
|     ProjectToSubspace(in,PleftProj);      |     CoarseField PleftProj(CoarseGrid); | ||||||
|     PromoteFromSubspace(PleftProj,out);   |     CoarseField PleftMss_proj(CoarseGrid); | ||||||
|  |  | ||||||
|  |     _Aggregates.ProjectToSubspace(PleftProj,in);      | ||||||
|  |     _Aggregates.PromoteFromSubspace(PleftProj,out);   | ||||||
|     axpy(in_sbar,-1.0,out,in);      // in_sbar = in - in_s |     axpy(in_sbar,-1.0,out,in);      // in_sbar = in - in_s | ||||||
|  |  | ||||||
|     ApplyInverse(PleftProj,PleftMss_proj); // Mss^{-1} in_s |     _CoarseSolver(PleftProj,PleftMss_proj); // Mss^{-1} in_s | ||||||
|     PromoteFromSubspace(PleftMss_proj,out); |     _Aggregates.PromoteFromSubspace(PleftMss_proj,out); | ||||||
|  |  | ||||||
|     HermOp(out,Mtmp); |     _Linop.HermOp(out,Mtmp); | ||||||
|  |  | ||||||
|     ProjectToSubspace(Mtmp,PleftProj);      // Msbar s Mss^{-1} |     _Aggregates.ProjectToSubspace(PleftProj,Mtmp);      // Msbar s Mss^{-1} | ||||||
|     PromoteFromSubspace(PleftProj,tmp2); |     _Aggregates.PromoteFromSubspace(PleftProj,tmp2); | ||||||
|  |  | ||||||
|     axpy(out,-1.0,tmp2,Mtmp); |     axpy(out,-1.0,tmp2,Mtmp); | ||||||
|     axpy(out,-1.0,out,in_sbar);     // in_sbar - Msbars Mss^{-1} in_s |     axpy(out,-1.0,out,in_sbar);     // in_sbar - Msbars Mss^{-1} in_s | ||||||
|   } |   } | ||||||
| } | }; | ||||||
|  | NAMESPACE_END(Grid); | ||||||
|  |  | ||||||
| template<class Field> |  | ||||||
| class TwoLevelFlexiblePcgADef2 : public TwoLevelFlexiblePcg<Field> { |  | ||||||
|  public: |  | ||||||
|   virtual void M(Field & in,Field & out,Field & tmp){ |  | ||||||
|  |  | ||||||
|   }  |  | ||||||
|   virtual void M1(Field & in, Field & out,Field & tmp,Field & mp){ |  | ||||||
|  |  | ||||||
|   } |  | ||||||
|   virtual void M2(Field & in, Field & out){ |  | ||||||
|  |  | ||||||
|   } |  | ||||||
|   virtual RealD M3(Field & p, Field & mp,Field & mmp, Field & tmp){ |  | ||||||
|  |  | ||||||
|   } |  | ||||||
|   virtual void Vstart(Field & in, Field & src, Field & r, Field & mp, Field & mmp, Field & tmp){ |  | ||||||
|  |  | ||||||
|   } |  | ||||||
| } |  | ||||||
| /* |  | ||||||
| template<class Field> |  | ||||||
| class TwoLevelFlexiblePcgAD : public TwoLevelFlexiblePcg<Field> { |  | ||||||
|  public: |  | ||||||
|   virtual void M(Field & in,Field & out,Field & tmp);  |  | ||||||
|   virtual void M1(Field & in, Field & out,Field & tmp,Field & mp); |  | ||||||
|   virtual void M2(Field & in, Field & out); |  | ||||||
|   virtual RealD M3(Field & p, Field & mp,Field & mmp, Field & tmp); |  | ||||||
|   virtual void Vstart(Field & in, Field & src, Field & r, Field & mp, Field & mmp, Field & tmp); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| template<class Field> |  | ||||||
| class TwoLevelFlexiblePcgDef1 : public TwoLevelFlexiblePcg<Field> { |  | ||||||
|  public: |  | ||||||
|   virtual void M(Field & in,Field & out,Field & tmp);  |  | ||||||
|   virtual void M1(Field & in, Field & out,Field & tmp,Field & mp); |  | ||||||
|   virtual void M2(Field & in, Field & out); |  | ||||||
|   virtual RealD M3(Field & p, Field & mp,Field & mmp, Field & tmp); |  | ||||||
|   virtual void Vstart(Field & in, Field & src, Field & r, Field & mp, Field & mmp, Field & tmp); |  | ||||||
|   virtual void   Vout  (Field & in, Field & out,Field & src,Field & tmp); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| template<class Field> |  | ||||||
| class TwoLevelFlexiblePcgDef2 : public TwoLevelFlexiblePcg<Field> { |  | ||||||
|  public: |  | ||||||
|   virtual void M(Field & in,Field & out,Field & tmp);  |  | ||||||
|   virtual void M1(Field & in, Field & out,Field & tmp,Field & mp); |  | ||||||
|   virtual void M2(Field & in, Field & out); |  | ||||||
|   virtual RealD M3(Field & p, Field & mp,Field & mmp, Field & tmp); |  | ||||||
|   virtual void Vstart(Field & in, Field & src, Field & r, Field & mp, Field & mmp, Field & tmp); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| template<class Field> |  | ||||||
| class TwoLevelFlexiblePcgV11: public TwoLevelFlexiblePcg<Field> { |  | ||||||
|  public: |  | ||||||
|   virtual void M(Field & in,Field & out,Field & tmp);  |  | ||||||
|   virtual void M1(Field & in, Field & out,Field & tmp,Field & mp); |  | ||||||
|   virtual void M2(Field & in, Field & out); |  | ||||||
|   virtual RealD M3(Field & p, Field & mp,Field & mmp, Field & tmp); |  | ||||||
|   virtual void Vstart(Field & in, Field & src, Field & r, Field & mp, Field & mmp, Field & tmp); |  | ||||||
| } |  | ||||||
| */ |  | ||||||
| #endif | #endif | ||||||
|   | |||||||
| @@ -60,6 +60,8 @@ public: | |||||||
|   DeflatedGuesser(const std::vector<Field> & _evec,const std::vector<RealD> & _eval) : evec(_evec), eval(_eval) {}; |   DeflatedGuesser(const std::vector<Field> & _evec,const std::vector<RealD> & _eval) : evec(_evec), eval(_eval) {}; | ||||||
|  |  | ||||||
|   virtual void operator()(const Field &src,Field &guess) { |   virtual void operator()(const Field &src,Field &guess) { | ||||||
|  |     RealD t=-usecond(); | ||||||
|  |      | ||||||
|     guess = Zero(); |     guess = Zero(); | ||||||
|     assert(evec.size()==eval.size()); |     assert(evec.size()==eval.size()); | ||||||
|     auto N = evec.size(); |     auto N = evec.size(); | ||||||
| @@ -68,6 +70,8 @@ public: | |||||||
|       axpy(guess,TensorRemove(innerProduct(tmp,src)) / eval[i],tmp,guess); |       axpy(guess,TensorRemove(innerProduct(tmp,src)) / eval[i],tmp,guess); | ||||||
|     } |     } | ||||||
|     guess.Checkerboard() = src.Checkerboard(); |     guess.Checkerboard() = src.Checkerboard(); | ||||||
|  |     t+=usecond(); | ||||||
|  |     std::cout<<GridLogMessage<<"\t\t\t" << "Deflated guess took "<< t/1000.0<< "ms" <<std::endl; | ||||||
|   } |   } | ||||||
| }; | }; | ||||||
|  |  | ||||||
|   | |||||||
| @@ -59,7 +59,7 @@ public: | |||||||
|     GridBase *grid = src.Grid(); |     GridBase *grid = src.Grid(); | ||||||
|     Field r(grid),  p(grid), Ap(grid), Ar(grid), z(grid); |     Field r(grid),  p(grid), Ap(grid), Ar(grid), z(grid); | ||||||
|        |        | ||||||
|     psi=zero; |     psi=Zero(); | ||||||
|     r  = src; |     r  = src; | ||||||
|     Preconditioner(r,p); |     Preconditioner(r,p); | ||||||
|  |  | ||||||
|   | |||||||
| @@ -53,7 +53,11 @@ public: | |||||||
|   {  |   {  | ||||||
|     size_type bytes = __n*sizeof(_Tp); |     size_type bytes = __n*sizeof(_Tp); | ||||||
|     profilerAllocate(bytes); |     profilerAllocate(bytes); | ||||||
|  | #ifdef GRID_UVM | ||||||
|  |     _Tp *ptr = (_Tp*) MemoryManager::SharedAllocate(bytes); | ||||||
|  | #else  | ||||||
|     _Tp *ptr = (_Tp*) MemoryManager::CpuAllocate(bytes); |     _Tp *ptr = (_Tp*) MemoryManager::CpuAllocate(bytes); | ||||||
|  | #endif | ||||||
|     assert( ( (_Tp*)ptr != (_Tp *)NULL ) ); |     assert( ( (_Tp*)ptr != (_Tp *)NULL ) ); | ||||||
|     return ptr; |     return ptr; | ||||||
|   } |   } | ||||||
| @@ -62,7 +66,11 @@ public: | |||||||
|   {  |   {  | ||||||
|     size_type bytes = __n * sizeof(_Tp); |     size_type bytes = __n * sizeof(_Tp); | ||||||
|     profilerFree(bytes); |     profilerFree(bytes); | ||||||
|  | #ifdef GRID_UVM | ||||||
|  |     MemoryManager::SharedFree((void *)__p,bytes); | ||||||
|  | #else | ||||||
|     MemoryManager::CpuFree((void *)__p,bytes); |     MemoryManager::CpuFree((void *)__p,bytes); | ||||||
|  | #endif | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   // FIXME: hack for the copy constructor: it must be avoided to avoid single thread loop |   // FIXME: hack for the copy constructor: it must be avoided to avoid single thread loop | ||||||
|   | |||||||
| @@ -9,11 +9,13 @@ NAMESPACE_BEGIN(Grid); | |||||||
| #define AccSmall (3) | #define AccSmall (3) | ||||||
| #define Shared   (4) | #define Shared   (4) | ||||||
| #define SharedSmall (5) | #define SharedSmall (5) | ||||||
|  | uint64_t total_cache; | ||||||
| uint64_t total_shared; | uint64_t total_shared; | ||||||
| uint64_t total_device; | uint64_t total_device; | ||||||
| uint64_t total_host;; | uint64_t total_host;; | ||||||
| void MemoryManager::PrintBytes(void) | void MemoryManager::PrintBytes(void) | ||||||
| { | { | ||||||
|  |   std::cout << " MemoryManager : "<<total_cache <<" cache       bytes "<<std::endl; | ||||||
|   std::cout << " MemoryManager : "<<total_shared<<" shared      bytes "<<std::endl; |   std::cout << " MemoryManager : "<<total_shared<<" shared      bytes "<<std::endl; | ||||||
|   std::cout << " MemoryManager : "<<total_device<<" accelerator bytes "<<std::endl; |   std::cout << " MemoryManager : "<<total_device<<" accelerator bytes "<<std::endl; | ||||||
|   std::cout << " MemoryManager : "<<total_host  <<" cpu         bytes "<<std::endl; |   std::cout << " MemoryManager : "<<total_host  <<" cpu         bytes "<<std::endl; | ||||||
| @@ -35,6 +37,8 @@ void *MemoryManager::AcceleratorAllocate(size_t bytes) | |||||||
|   if ( ptr == (void *) NULL ) { |   if ( ptr == (void *) NULL ) { | ||||||
|     ptr = (void *) acceleratorAllocDevice(bytes); |     ptr = (void *) acceleratorAllocDevice(bytes); | ||||||
|     total_device+=bytes; |     total_device+=bytes; | ||||||
|  |   } else { | ||||||
|  |     //    std::cout <<"AcceleratorAllocate: cache hit Device pointer "<<std::hex<<ptr<<std::dec<<" "<<bytes<<std::endl; | ||||||
|   } |   } | ||||||
|   return ptr; |   return ptr; | ||||||
| } | } | ||||||
| @@ -53,8 +57,10 @@ void *MemoryManager::SharedAllocate(size_t bytes) | |||||||
|   if ( ptr == (void *) NULL ) { |   if ( ptr == (void *) NULL ) { | ||||||
|     ptr = (void *) acceleratorAllocShared(bytes); |     ptr = (void *) acceleratorAllocShared(bytes); | ||||||
|     total_shared+=bytes; |     total_shared+=bytes; | ||||||
|     //    std::cout <<"AcceleratorAllocate: allocated Shared pointer "<<std::hex<<ptr<<std::dec<<std::endl; |     //    std::cout <<"SharedAllocate: allocated Shared pointer "<<std::hex<<ptr<<std::dec<<std::endl; | ||||||
|     //    PrintBytes(); |     //    PrintBytes(); | ||||||
|  |   } else { | ||||||
|  |     //    std::cout <<"SharedAllocate: cache hit Shared pointer "<<std::hex<<ptr<<std::dec<<" "<<bytes<<std::endl; | ||||||
|   } |   } | ||||||
|   return ptr; |   return ptr; | ||||||
| } | } | ||||||
| @@ -74,6 +80,9 @@ void *MemoryManager::CpuAllocate(size_t bytes) | |||||||
|   if ( ptr == (void *) NULL ) { |   if ( ptr == (void *) NULL ) { | ||||||
|     ptr = (void *) acceleratorAllocShared(bytes); |     ptr = (void *) acceleratorAllocShared(bytes); | ||||||
|     total_host+=bytes; |     total_host+=bytes; | ||||||
|  |     //    std::cout <<"CpuAllocate: allocated Cpu pointer "<<std::hex<<ptr<<std::dec<<std::endl; | ||||||
|  |   } else { | ||||||
|  |     //    std::cout <<"CpufAllocate: cache hit Cpu pointer "<<std::hex<<ptr<<std::dec<<" "<<bytes<<std::endl; | ||||||
|   } |   } | ||||||
|   return ptr; |   return ptr; | ||||||
| } | } | ||||||
| @@ -120,7 +129,7 @@ void MemoryManager::Init(void) | |||||||
|   str= getenv("GRID_ALLOC_NCACHE_LARGE"); |   str= getenv("GRID_ALLOC_NCACHE_LARGE"); | ||||||
|   if ( str ) { |   if ( str ) { | ||||||
|     Nc = atoi(str); |     Nc = atoi(str); | ||||||
|     if ( (Nc>=0) && (Nc < NallocCacheMax)) { |     if ( (Nc>=0) && (Nc <= NallocCacheMax)) { | ||||||
|       Ncache[Cpu]=Nc; |       Ncache[Cpu]=Nc; | ||||||
|       Ncache[Acc]=Nc; |       Ncache[Acc]=Nc; | ||||||
|       Ncache[Shared]=Nc; |       Ncache[Shared]=Nc; | ||||||
| @@ -130,7 +139,7 @@ void MemoryManager::Init(void) | |||||||
|   str= getenv("GRID_ALLOC_NCACHE_SMALL"); |   str= getenv("GRID_ALLOC_NCACHE_SMALL"); | ||||||
|   if ( str ) { |   if ( str ) { | ||||||
|     Nc = atoi(str); |     Nc = atoi(str); | ||||||
|     if ( (Nc>=0) && (Nc < NallocCacheMax)) { |     if ( (Nc>=0) && (Nc <= NallocCacheMax)) { | ||||||
|       Ncache[CpuSmall]=Nc; |       Ncache[CpuSmall]=Nc; | ||||||
|       Ncache[AccSmall]=Nc; |       Ncache[AccSmall]=Nc; | ||||||
|       Ncache[SharedSmall]=Nc; |       Ncache[SharedSmall]=Nc; | ||||||
| @@ -211,6 +220,7 @@ void *MemoryManager::Insert(void *ptr,size_t bytes,AllocationCacheEntry *entries | |||||||
|  |  | ||||||
|   if ( entries[v].valid ) { |   if ( entries[v].valid ) { | ||||||
|     ret = entries[v].address; |     ret = entries[v].address; | ||||||
|  |     total_cache-=entries[v].bytes; | ||||||
|     entries[v].valid = 0; |     entries[v].valid = 0; | ||||||
|     entries[v].address = NULL; |     entries[v].address = NULL; | ||||||
|     entries[v].bytes = 0; |     entries[v].bytes = 0; | ||||||
| @@ -219,6 +229,7 @@ void *MemoryManager::Insert(void *ptr,size_t bytes,AllocationCacheEntry *entries | |||||||
|   entries[v].address=ptr; |   entries[v].address=ptr; | ||||||
|   entries[v].bytes  =bytes; |   entries[v].bytes  =bytes; | ||||||
|   entries[v].valid  =1; |   entries[v].valid  =1; | ||||||
|  |   total_cache+=entries[v].bytes; | ||||||
|  |  | ||||||
|   return ret; |   return ret; | ||||||
| } | } | ||||||
| @@ -243,6 +254,7 @@ void *MemoryManager::Lookup(size_t bytes,AllocationCacheEntry *entries,int ncach | |||||||
|   for(int e=0;e<ncache;e++){ |   for(int e=0;e<ncache;e++){ | ||||||
|     if ( entries[e].valid && ( entries[e].bytes == bytes ) ) { |     if ( entries[e].valid && ( entries[e].bytes == bytes ) ) { | ||||||
|       entries[e].valid = 0; |       entries[e].valid = 0; | ||||||
|  |       total_cache-=bytes; | ||||||
|       return entries[e].address; |       return entries[e].address; | ||||||
|     } |     } | ||||||
|   } |   } | ||||||
|   | |||||||
| @@ -93,8 +93,8 @@ private: | |||||||
|   static void *Insert(void *ptr,size_t bytes,AllocationCacheEntry *entries,int ncache,int &victim) ; |   static void *Insert(void *ptr,size_t bytes,AllocationCacheEntry *entries,int ncache,int &victim) ; | ||||||
|   static void *Lookup(size_t bytes,AllocationCacheEntry *entries,int ncache) ; |   static void *Lookup(size_t bytes,AllocationCacheEntry *entries,int ncache) ; | ||||||
|  |  | ||||||
|   static void PrintBytes(void); |  | ||||||
|  public: |  public: | ||||||
|  |   static void PrintBytes(void); | ||||||
|   static void Init(void); |   static void Init(void); | ||||||
|   static void InitMessage(void); |   static void InitMessage(void); | ||||||
|   static void *AcceleratorAllocate(size_t bytes); |   static void *AcceleratorAllocate(size_t bytes); | ||||||
|   | |||||||
| @@ -36,7 +36,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk> | |||||||
| #include <Grid/lattice/Lattice_local.h> | #include <Grid/lattice/Lattice_local.h> | ||||||
| #include <Grid/lattice/Lattice_reduction.h> | #include <Grid/lattice/Lattice_reduction.h> | ||||||
| #include <Grid/lattice/Lattice_peekpoke.h> | #include <Grid/lattice/Lattice_peekpoke.h> | ||||||
| //#include <Grid/lattice/Lattice_reality.h> | #include <Grid/lattice/Lattice_reality.h> | ||||||
| #include <Grid/lattice/Lattice_real_imag.h> | #include <Grid/lattice/Lattice_real_imag.h> | ||||||
| #include <Grid/lattice/Lattice_comparison_utils.h> | #include <Grid/lattice/Lattice_comparison_utils.h> | ||||||
| #include <Grid/lattice/Lattice_comparison.h> | #include <Grid/lattice/Lattice_comparison.h> | ||||||
|   | |||||||
| @@ -52,6 +52,7 @@ public: | |||||||
| // This will be safe to call from accelerator_for and is trivially copy constructible | // This will be safe to call from accelerator_for and is trivially copy constructible | ||||||
| // The copy constructor for this will need to be used by device lambda functions | // The copy constructor for this will need to be used by device lambda functions | ||||||
| ///////////////////////////////////////////////////////////////////////////////////////// | ///////////////////////////////////////////////////////////////////////////////////////// | ||||||
|  | #undef LATTICE_BOUNDS_CHECK | ||||||
| template<class vobj>  | template<class vobj>  | ||||||
| class LatticeView : public LatticeAccelerator<vobj> | class LatticeView : public LatticeAccelerator<vobj> | ||||||
| { | { | ||||||
| @@ -61,14 +62,36 @@ public: | |||||||
|   void * cpu_ptr; |   void * cpu_ptr; | ||||||
| #ifdef GRID_SIMT | #ifdef GRID_SIMT | ||||||
|   accelerator_inline const typename vobj::scalar_object operator()(size_t i) const {  |   accelerator_inline const typename vobj::scalar_object operator()(size_t i) const {  | ||||||
|  | #ifdef LATTICE_BOUNDS_CHECK | ||||||
|  |     assert(i<this->_odata_size); | ||||||
|  |     assert(i>=0); | ||||||
|  | #endif | ||||||
|     return coalescedRead(this->_odata[i]);  |     return coalescedRead(this->_odata[i]);  | ||||||
|   } |   } | ||||||
| #else  | #else  | ||||||
|   accelerator_inline const vobj & operator()(size_t i) const { return this->_odata[i]; } |   accelerator_inline const vobj & operator()(size_t i) const { | ||||||
|  | #ifdef LATTICE_BOUNDS_CHECK | ||||||
|  |     assert(i<this->_odata_size); | ||||||
|  |     assert(i>=0); | ||||||
|  | #endif | ||||||
|  |     return this->_odata[i]; | ||||||
|  |   } | ||||||
| #endif | #endif | ||||||
|  |  | ||||||
|   accelerator_inline const vobj & operator[](size_t i) const { return this->_odata[i]; }; |   accelerator_inline const vobj & operator[](size_t i) const {  | ||||||
|   accelerator_inline vobj       & operator[](size_t i)       { return this->_odata[i]; }; | #ifdef LATTICE_BOUNDS_CHECK | ||||||
|  |     assert(i<this->_odata_size); | ||||||
|  |     assert(i>=0); | ||||||
|  | #endif | ||||||
|  |     return this->_odata[i];  | ||||||
|  |   }; | ||||||
|  |   accelerator_inline vobj       & operator[](size_t i)       {  | ||||||
|  | #ifdef LATTICE_BOUNDS_CHECK | ||||||
|  |     assert(i<this->_odata_size); | ||||||
|  |     assert(i>=0); | ||||||
|  | #endif | ||||||
|  |     return this->_odata[i];  | ||||||
|  |   }; | ||||||
|  |  | ||||||
|   accelerator_inline uint64_t begin(void) const { return 0;}; |   accelerator_inline uint64_t begin(void) const { return 0;}; | ||||||
|   accelerator_inline uint64_t end(void)   const { return this->_odata_size; }; |   accelerator_inline uint64_t end(void)   const { return this->_odata_size; }; | ||||||
|   | |||||||
| @@ -77,9 +77,16 @@ const int SpinorIndex = 2; | |||||||
| template<typename T> struct isSpinor { | template<typename T> struct isSpinor { | ||||||
|   static constexpr bool value = (SpinorIndex==T::TensorLevel); |   static constexpr bool value = (SpinorIndex==T::TensorLevel); | ||||||
| }; | }; | ||||||
|  | const int CoarseIndex = 4; | ||||||
|  | template<typename T> struct isCoarsened { | ||||||
|  |   static constexpr bool value = (CoarseIndex<=T::TensorLevel); | ||||||
|  | }; | ||||||
| template <typename T> using IfSpinor    = Invoke<std::enable_if< isSpinor<T>::value,int> > ; | template <typename T> using IfSpinor    = Invoke<std::enable_if< isSpinor<T>::value,int> > ; | ||||||
| template <typename T> using IfNotSpinor = Invoke<std::enable_if<!isSpinor<T>::value,int> > ; | template <typename T> using IfNotSpinor = Invoke<std::enable_if<!isSpinor<T>::value,int> > ; | ||||||
|  |  | ||||||
|  | template <typename T> using IfCoarsened    = Invoke<std::enable_if< isCoarsened<T>::value,int> > ; | ||||||
|  | template <typename T> using IfNotCoarsened = Invoke<std::enable_if<!isCoarsened<T>::value,int> > ; | ||||||
|  |  | ||||||
| // ChrisK very keen to add extra space for Gparity doubling. | // ChrisK very keen to add extra space for Gparity doubling. | ||||||
| // | // | ||||||
| // Also add domain wall index, in a way where Wilson operator  | // Also add domain wall index, in a way where Wilson operator  | ||||||
|   | |||||||
| @@ -89,7 +89,8 @@ public: | |||||||
|   virtual void  Mdiag  (const FermionField &in, FermionField &out) { Mooee(in,out);};   // Same as Mooee applied to both CB's |   virtual void  Mdiag  (const FermionField &in, FermionField &out) { Mooee(in,out);};   // Same as Mooee applied to both CB's | ||||||
|   virtual void  Mdir   (const FermionField &in, FermionField &out,int dir,int disp)=0;   // case by case Wilson, Clover, Cayley, ContFrac, PartFrac |   virtual void  Mdir   (const FermionField &in, FermionField &out,int dir,int disp)=0;   // case by case Wilson, Clover, Cayley, ContFrac, PartFrac | ||||||
|   virtual void  MdirAll(const FermionField &in, std::vector<FermionField> &out)=0;   // case by case Wilson, Clover, Cayley, ContFrac, PartFrac |   virtual void  MdirAll(const FermionField &in, std::vector<FermionField> &out)=0;   // case by case Wilson, Clover, Cayley, ContFrac, PartFrac | ||||||
|  |   virtual std::vector<int> Directions(void)   =0; | ||||||
|  |   virtual std::vector<int> Displacements(void)=0; | ||||||
|  |  | ||||||
|   virtual void  MomentumSpacePropagator(FermionField &out,const FermionField &in,RealD _m,std::vector<double> twist) { assert(0);}; |   virtual void  MomentumSpacePropagator(FermionField &out,const FermionField &in,RealD _m,std::vector<double> twist) { assert(0);}; | ||||||
|  |  | ||||||
|   | |||||||
| @@ -44,6 +44,9 @@ public: | |||||||
|   INHERIT_IMPL_TYPES(Impl); |   INHERIT_IMPL_TYPES(Impl); | ||||||
|   typedef StaggeredKernels<Impl> Kernels; |   typedef StaggeredKernels<Impl> Kernels; | ||||||
|  |  | ||||||
|  |   virtual std::vector<int> Directions(void)   { return this->directions; }; | ||||||
|  |   virtual std::vector<int> Displacements(void){ return this->displacements;}; | ||||||
|  |  | ||||||
|   FermionField _tmp; |   FermionField _tmp; | ||||||
|   FermionField &tmp(void) { return _tmp; } |   FermionField &tmp(void) { return _tmp; } | ||||||
|  |  | ||||||
|   | |||||||
| @@ -49,6 +49,9 @@ public: | |||||||
|   INHERIT_IMPL_TYPES(Impl); |   INHERIT_IMPL_TYPES(Impl); | ||||||
|   typedef StaggeredKernels<Impl> Kernels; |   typedef StaggeredKernels<Impl> Kernels; | ||||||
|  |  | ||||||
|  |   virtual std::vector<int> Directions(void)   { return this->directions; }; | ||||||
|  |   virtual std::vector<int> Displacements(void){ return this->displacements;}; | ||||||
|  |  | ||||||
|   FermionField _tmp; |   FermionField _tmp; | ||||||
|   FermionField &tmp(void) { return _tmp; } |   FermionField &tmp(void) { return _tmp; } | ||||||
|  |  | ||||||
|   | |||||||
| @@ -47,6 +47,9 @@ public: | |||||||
|   FermionField _tmp; |   FermionField _tmp; | ||||||
|   FermionField &tmp(void) { return _tmp; } |   FermionField &tmp(void) { return _tmp; } | ||||||
|  |  | ||||||
|  |   virtual std::vector<int> Directions(void)   { return this->directions; }; | ||||||
|  |   virtual std::vector<int> Displacements(void){ return this->displacements;}; | ||||||
|  |  | ||||||
|   //////////////////////////////////////// |   //////////////////////////////////////// | ||||||
|   // Performance monitoring |   // Performance monitoring | ||||||
|   //////////////////////////////////////// |   //////////////////////////////////////// | ||||||
|   | |||||||
| @@ -63,6 +63,9 @@ public: | |||||||
|   INHERIT_IMPL_TYPES(Impl); |   INHERIT_IMPL_TYPES(Impl); | ||||||
|   typedef WilsonKernels<Impl> Kernels; |   typedef WilsonKernels<Impl> Kernels; | ||||||
|  |  | ||||||
|  |   virtual std::vector<int> Directions(void)   { return this->directions; }; | ||||||
|  |   virtual std::vector<int> Displacements(void){ return this->displacements;}; | ||||||
|  |  | ||||||
|   /////////////////////////////////////////////////////////////// |   /////////////////////////////////////////////////////////////// | ||||||
|   // Implement the abstract base |   // Implement the abstract base | ||||||
|   /////////////////////////////////////////////////////////////// |   /////////////////////////////////////////////////////////////// | ||||||
|   | |||||||
| @@ -72,6 +72,9 @@ public: | |||||||
|   typedef WilsonKernels<Impl> Kernels; |   typedef WilsonKernels<Impl> Kernels; | ||||||
|   PmuStat stat; |   PmuStat stat; | ||||||
|  |  | ||||||
|  |   virtual std::vector<int> Directions(void)   { return this->directions; }; | ||||||
|  |   virtual std::vector<int> Displacements(void){ return this->displacements;}; | ||||||
|  |  | ||||||
|   FermionField _tmp; |   FermionField _tmp; | ||||||
|   FermionField &tmp(void) { return _tmp; } |   FermionField &tmp(void) { return _tmp; } | ||||||
|  |  | ||||||
|   | |||||||
| @@ -79,6 +79,8 @@ public: | |||||||
|     _Mat.M(in,tmp); |     _Mat.M(in,tmp); | ||||||
|     G5R5(out,tmp); |     G5R5(out,tmp); | ||||||
|   } |   } | ||||||
|  |   virtual std::vector<int> Directions(void)   { return _Mat.Directions();}; | ||||||
|  |   virtual std::vector<int> Displacements(void){ return _Mat.Displacements();}; | ||||||
| }; | }; | ||||||
|  |  | ||||||
|  |  | ||||||
| @@ -127,6 +129,8 @@ public: | |||||||
|     _Mat.M(in,tmp); |     _Mat.M(in,tmp); | ||||||
|     out=g5*tmp; |     out=g5*tmp; | ||||||
|   } |   } | ||||||
|  |   virtual std::vector<int> Directions(void)   { return _Mat.Directions();}; | ||||||
|  |   virtual std::vector<int> Displacements(void){ return _Mat.Displacements();}; | ||||||
| }; | }; | ||||||
|  |  | ||||||
| NAMESPACE_END(Grid); | NAMESPACE_END(Grid); | ||||||
|   | |||||||
| @@ -133,14 +133,14 @@ void WilsonCloverFermion<Impl>::ImportGauge(const GaugeField &_Umu) | |||||||
|   pickCheckerboard(Even, CloverTermEven, CloverTerm); |   pickCheckerboard(Even, CloverTermEven, CloverTerm); | ||||||
|   pickCheckerboard(Odd, CloverTermOdd, CloverTerm); |   pickCheckerboard(Odd, CloverTermOdd, CloverTerm); | ||||||
|  |  | ||||||
|   pickCheckerboard(Even, CloverTermDagEven, closure(adj(CloverTerm))); |   pickCheckerboard(Even, CloverTermDagEven, adj(CloverTerm)); | ||||||
|   pickCheckerboard(Odd, CloverTermDagOdd, closure(adj(CloverTerm))); |   pickCheckerboard(Odd, CloverTermDagOdd, adj(CloverTerm)); | ||||||
|  |  | ||||||
|   pickCheckerboard(Even, CloverTermInvEven, CloverTermInv); |   pickCheckerboard(Even, CloverTermInvEven, CloverTermInv); | ||||||
|   pickCheckerboard(Odd, CloverTermInvOdd, CloverTermInv); |   pickCheckerboard(Odd, CloverTermInvOdd, CloverTermInv); | ||||||
|  |  | ||||||
|   pickCheckerboard(Even, CloverTermInvDagEven, closure(adj(CloverTermInv))); |   pickCheckerboard(Even, CloverTermInvDagEven, adj(CloverTermInv)); | ||||||
|   pickCheckerboard(Odd, CloverTermInvDagOdd, closure(adj(CloverTermInv))); |   pickCheckerboard(Odd, CloverTermInvDagOdd, adj(CloverTermInv)); | ||||||
| } | } | ||||||
|  |  | ||||||
| template <class Impl> | template <class Impl> | ||||||
|   | |||||||
| @@ -128,7 +128,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s | |||||||
| } | } | ||||||
| template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spProjTm (iVector<vtype,Nhs> &hspin,const iVector<vtype,Ns> &fspin) | template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spProjTm (iVector<vtype,Nhs> &hspin,const iVector<vtype,Ns> &fspin) | ||||||
| { | { | ||||||
|   //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE; |  | ||||||
|   hspin(0)=fspin(0)-fspin(2); |   hspin(0)=fspin(0)-fspin(2); | ||||||
|   hspin(1)=fspin(1)-fspin(3); |   hspin(1)=fspin(1)-fspin(3); | ||||||
| } | } | ||||||
| @@ -138,40 +137,50 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s | |||||||
|  *  0 0 -1  0 |  *  0 0 -1  0 | ||||||
|  *  0 0  0 -1 |  *  0 0  0 -1 | ||||||
|  */ |  */ | ||||||
|  |  | ||||||
| template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spProj5p (iVector<vtype,Nhs> &hspin,const iVector<vtype,Ns> &fspin) | template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spProj5p (iVector<vtype,Nhs> &hspin,const iVector<vtype,Ns> &fspin) | ||||||
| { | { | ||||||
|   //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE; |  | ||||||
|   hspin(0)=fspin(0); |   hspin(0)=fspin(0); | ||||||
|   hspin(1)=fspin(1); |   hspin(1)=fspin(1); | ||||||
| } | } | ||||||
|  |  | ||||||
| template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spProj5m (iVector<vtype,Nhs> &hspin,const iVector<vtype,Ns> &fspin) | template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spProj5m (iVector<vtype,Nhs> &hspin,const iVector<vtype,Ns> &fspin) | ||||||
| { | { | ||||||
|   //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE; |  | ||||||
|   hspin(0)=fspin(2); |   hspin(0)=fspin(2); | ||||||
|   hspin(1)=fspin(3); |   hspin(1)=fspin(3); | ||||||
| } | } | ||||||
|    |    | ||||||
| //  template<class vtype> accelerator_inline void fspProj5p (iVector<vtype,Ns> &rfspin,const iVector<vtype,Ns> &fspin) |  | ||||||
| template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spProj5p (iVector<vtype,Ns> &rfspin,const iVector<vtype,Ns> &fspin) | template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spProj5p (iVector<vtype,Ns> &rfspin,const iVector<vtype,Ns> &fspin) | ||||||
| { | { | ||||||
|   //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE; |  | ||||||
|   rfspin(0)=fspin(0); |   rfspin(0)=fspin(0); | ||||||
|   rfspin(1)=fspin(1); |   rfspin(1)=fspin(1); | ||||||
|   rfspin(2)=Zero(); |   rfspin(2)=Zero(); | ||||||
|   rfspin(3)=Zero(); |   rfspin(3)=Zero(); | ||||||
| } | } | ||||||
| //  template<class vtype> accelerator_inline void fspProj5m (iVector<vtype,Ns> &rfspin,const iVector<vtype,Ns> &fspin) |  | ||||||
| template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spProj5m (iVector<vtype,Ns> &rfspin,const iVector<vtype,Ns> &fspin) | template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spProj5m (iVector<vtype,Ns> &rfspin,const iVector<vtype,Ns> &fspin) | ||||||
| { | { | ||||||
|   //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE; |  | ||||||
|   rfspin(0)=Zero(); |   rfspin(0)=Zero(); | ||||||
|   rfspin(1)=Zero(); |   rfspin(1)=Zero(); | ||||||
|   rfspin(2)=fspin(2); |   rfspin(2)=fspin(2); | ||||||
|   rfspin(3)=fspin(3); |   rfspin(3)=fspin(3); | ||||||
| } | } | ||||||
|  |  | ||||||
|  | template<class vtype,int N,IfCoarsened<iVector<vtype,N> > = 0> accelerator_inline void spProj5p (iVector<vtype,N> &rfspin,const iVector<vtype,N> &fspin) | ||||||
|  | { | ||||||
|  |   const int hN = N>>1; | ||||||
|  |   for(int s=0;s<hN;s++){ | ||||||
|  |     rfspin(s)=fspin(s); | ||||||
|  |     rfspin(s+hN)=Zero(); | ||||||
|  |   } | ||||||
|  | } | ||||||
|  | template<class vtype,int N,IfCoarsened<iVector<vtype,N> > = 0> accelerator_inline void spProj5m (iVector<vtype,N> &rfspin,const iVector<vtype,N> &fspin) | ||||||
|  | { | ||||||
|  |   const int hN = N>>1; | ||||||
|  |   for(int s=0;s<hN;s++){ | ||||||
|  |     rfspin(s)=Zero(); | ||||||
|  |     rfspin(s+hN)=fspin(s+hN); | ||||||
|  |   } | ||||||
|  | } | ||||||
|  |  | ||||||
| //////////////////////////////////////////////////////////////////////////////////////////////////////////////// | //////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||||
| // Reconstruction routines to move back again to four spin | // Reconstruction routines to move back again to four spin | ||||||
| //////////////////////////////////////////////////////////////////////////////////////////////////////////////// | //////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||||
| @@ -183,7 +192,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s | |||||||
|  */ |  */ | ||||||
| template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spReconXp (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin) | template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spReconXp (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin) | ||||||
| { | { | ||||||
|   //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE; |  | ||||||
|   fspin(0)=hspin(0); |   fspin(0)=hspin(0); | ||||||
|   fspin(1)=hspin(1); |   fspin(1)=hspin(1); | ||||||
|   fspin(2)=timesMinusI(hspin(1)); |   fspin(2)=timesMinusI(hspin(1)); | ||||||
| @@ -191,7 +199,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s | |||||||
| } | } | ||||||
| template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spReconXm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin) | template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spReconXm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin) | ||||||
| { | { | ||||||
|   //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE; |  | ||||||
|   fspin(0)=hspin(0); |   fspin(0)=hspin(0); | ||||||
|   fspin(1)=hspin(1); |   fspin(1)=hspin(1); | ||||||
|   fspin(2)=timesI(hspin(1)); |   fspin(2)=timesI(hspin(1)); | ||||||
| @@ -199,7 +206,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s | |||||||
| } | } | ||||||
| template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumReconXp (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin) | template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumReconXp (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin) | ||||||
| { | { | ||||||
|   //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE; |  | ||||||
|   fspin(0)+=hspin(0); |   fspin(0)+=hspin(0); | ||||||
|   fspin(1)+=hspin(1); |   fspin(1)+=hspin(1); | ||||||
|   fspin(2)-=timesI(hspin(1)); |   fspin(2)-=timesI(hspin(1)); | ||||||
| @@ -207,7 +213,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void a | |||||||
| } | } | ||||||
| template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumReconXm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin) | template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumReconXm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin) | ||||||
| { | { | ||||||
|   //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE; |  | ||||||
|   fspin(0)+=hspin(0); |   fspin(0)+=hspin(0); | ||||||
|   fspin(1)+=hspin(1); |   fspin(1)+=hspin(1); | ||||||
|   fspin(2)+=timesI(hspin(1)); |   fspin(2)+=timesI(hspin(1)); | ||||||
| @@ -221,7 +226,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void a | |||||||
|  |  | ||||||
| template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spReconYp (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin) | template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spReconYp (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin) | ||||||
| { | { | ||||||
|   //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE; |  | ||||||
|   fspin(0)=hspin(0); |   fspin(0)=hspin(0); | ||||||
|   fspin(1)=hspin(1); |   fspin(1)=hspin(1); | ||||||
|   fspin(2)= hspin(1); |   fspin(2)= hspin(1); | ||||||
| @@ -229,7 +233,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s | |||||||
| } | } | ||||||
| template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spReconYm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin) | template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spReconYm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin) | ||||||
| { | { | ||||||
|   //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE; |  | ||||||
|   fspin(0)=hspin(0); |   fspin(0)=hspin(0); | ||||||
|   fspin(1)=hspin(1); |   fspin(1)=hspin(1); | ||||||
|   fspin(2)=-hspin(1); |   fspin(2)=-hspin(1); | ||||||
| @@ -237,7 +240,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s | |||||||
| } | } | ||||||
| template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumReconYp (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin) | template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumReconYp (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin) | ||||||
| { | { | ||||||
|   //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE; |  | ||||||
|   fspin(0)+=hspin(0); |   fspin(0)+=hspin(0); | ||||||
|   fspin(1)+=hspin(1); |   fspin(1)+=hspin(1); | ||||||
|   fspin(2)+=hspin(1); |   fspin(2)+=hspin(1); | ||||||
| @@ -245,7 +247,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void a | |||||||
| } | } | ||||||
| template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumReconYm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin) | template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumReconYm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin) | ||||||
| { | { | ||||||
|   //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE; |  | ||||||
|   fspin(0)+=hspin(0); |   fspin(0)+=hspin(0); | ||||||
|   fspin(1)+=hspin(1); |   fspin(1)+=hspin(1); | ||||||
|   fspin(2)-=hspin(1); |   fspin(2)-=hspin(1); | ||||||
| @@ -260,7 +261,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void a | |||||||
|  */ |  */ | ||||||
| template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spReconZp (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin) | template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spReconZp (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin) | ||||||
| { | { | ||||||
|   //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE; |  | ||||||
|   fspin(0)=hspin(0); |   fspin(0)=hspin(0); | ||||||
|   fspin(1)=hspin(1); |   fspin(1)=hspin(1); | ||||||
|   fspin(2)=timesMinusI(hspin(0)); |   fspin(2)=timesMinusI(hspin(0)); | ||||||
| @@ -268,7 +268,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s | |||||||
| } | } | ||||||
| template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spReconZm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin) | template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spReconZm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin) | ||||||
| { | { | ||||||
|   //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE; |  | ||||||
|   fspin(0)=hspin(0); |   fspin(0)=hspin(0); | ||||||
|   fspin(1)=hspin(1); |   fspin(1)=hspin(1); | ||||||
|   fspin(2)=     timesI(hspin(0)); |   fspin(2)=     timesI(hspin(0)); | ||||||
| @@ -276,7 +275,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s | |||||||
| } | } | ||||||
| template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumReconZp (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin) | template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumReconZp (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin) | ||||||
| { | { | ||||||
|   //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE; |  | ||||||
|   fspin(0)+=hspin(0); |   fspin(0)+=hspin(0); | ||||||
|   fspin(1)+=hspin(1); |   fspin(1)+=hspin(1); | ||||||
|   fspin(2)-=timesI(hspin(0)); |   fspin(2)-=timesI(hspin(0)); | ||||||
| @@ -284,7 +282,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void a | |||||||
| } | } | ||||||
| template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumReconZm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin) | template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumReconZm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin) | ||||||
| { | { | ||||||
|   //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE; |  | ||||||
|   fspin(0)+=hspin(0); |   fspin(0)+=hspin(0); | ||||||
|   fspin(1)+=hspin(1); |   fspin(1)+=hspin(1); | ||||||
|   fspin(2)+=timesI(hspin(0)); |   fspin(2)+=timesI(hspin(0)); | ||||||
| @@ -298,7 +295,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void a | |||||||
|  */ |  */ | ||||||
| template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spReconTp (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin) | template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spReconTp (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin) | ||||||
| { | { | ||||||
|   //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE; |  | ||||||
|   fspin(0)=hspin(0); |   fspin(0)=hspin(0); | ||||||
|   fspin(1)=hspin(1); |   fspin(1)=hspin(1); | ||||||
|   fspin(2)=hspin(0); |   fspin(2)=hspin(0); | ||||||
| @@ -306,7 +302,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s | |||||||
| } | } | ||||||
| template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spReconTm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin) | template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spReconTm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin) | ||||||
| { | { | ||||||
|   //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE; |  | ||||||
|   fspin(0)=hspin(0); |   fspin(0)=hspin(0); | ||||||
|   fspin(1)=hspin(1); |   fspin(1)=hspin(1); | ||||||
|   fspin(2)=-hspin(0); |   fspin(2)=-hspin(0); | ||||||
| @@ -314,7 +309,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s | |||||||
| } | } | ||||||
| template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumReconTp (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin) | template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumReconTp (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin) | ||||||
| { | { | ||||||
|   //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE; |  | ||||||
|   fspin(0)+=hspin(0); |   fspin(0)+=hspin(0); | ||||||
|   fspin(1)+=hspin(1); |   fspin(1)+=hspin(1); | ||||||
|   fspin(2)+=hspin(0); |   fspin(2)+=hspin(0); | ||||||
| @@ -322,7 +316,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void a | |||||||
| } | } | ||||||
| template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumReconTm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin) | template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumReconTm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin) | ||||||
| { | { | ||||||
|   //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE; |  | ||||||
|   fspin(0)+=hspin(0); |   fspin(0)+=hspin(0); | ||||||
|   fspin(1)+=hspin(1); |   fspin(1)+=hspin(1); | ||||||
|   fspin(2)-=hspin(0); |   fspin(2)-=hspin(0); | ||||||
| @@ -336,7 +329,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void a | |||||||
|  */ |  */ | ||||||
| template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spRecon5p (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin) | template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spRecon5p (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin) | ||||||
| { | { | ||||||
|   //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE; |  | ||||||
|   fspin(0)=hspin(0)+hspin(0); // add is lower latency than mul |   fspin(0)=hspin(0)+hspin(0); // add is lower latency than mul | ||||||
|   fspin(1)=hspin(1)+hspin(1); // probably no measurable diffence though |   fspin(1)=hspin(1)+hspin(1); // probably no measurable diffence though | ||||||
|   fspin(2)=Zero(); |   fspin(2)=Zero(); | ||||||
| @@ -344,7 +336,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s | |||||||
| } | } | ||||||
| template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spRecon5m (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin) | template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spRecon5m (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin) | ||||||
| { | { | ||||||
|   //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE; |  | ||||||
|   fspin(0)=Zero(); |   fspin(0)=Zero(); | ||||||
|   fspin(1)=Zero(); |   fspin(1)=Zero(); | ||||||
|   fspin(2)=hspin(0)+hspin(0); |   fspin(2)=hspin(0)+hspin(0); | ||||||
| @@ -352,7 +343,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s | |||||||
| } | } | ||||||
| template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumRecon5p (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin) | template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumRecon5p (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin) | ||||||
| { | { | ||||||
|   //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE; |  | ||||||
|   fspin(0)+=hspin(0)+hspin(0); |   fspin(0)+=hspin(0)+hspin(0); | ||||||
|   fspin(1)+=hspin(1)+hspin(1); |   fspin(1)+=hspin(1)+hspin(1); | ||||||
| } | } | ||||||
| @@ -372,7 +362,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void a | |||||||
| ////////// | ////////// | ||||||
| template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProjXp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin) | template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProjXp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin) | ||||||
| { | { | ||||||
|   //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp; |  | ||||||
|   for(int i=0;i<N;i++) { |   for(int i=0;i<N;i++) { | ||||||
|     spProjXp(hspin._internal[i],fspin._internal[i]); |     spProjXp(hspin._internal[i],fspin._internal[i]); | ||||||
|   } |   } | ||||||
| @@ -426,26 +415,21 @@ template<class rtype,class vtype,int N> accelerator_inline void accumReconXp (iM | |||||||
|     }} |     }} | ||||||
| } | } | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
| //////// | //////// | ||||||
| // Xm | // Xm | ||||||
| //////// | //////// | ||||||
| template<class rtype,class vtype> accelerator_inline void spProjXm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin) | template<class rtype,class vtype> accelerator_inline void spProjXm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin) | ||||||
| { | { | ||||||
|   //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp; |  | ||||||
|   spProjXm(hspin._internal,fspin._internal); |   spProjXm(hspin._internal,fspin._internal); | ||||||
| } | } | ||||||
| template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProjXm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin) | template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProjXm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin) | ||||||
| { | { | ||||||
|   //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp; |  | ||||||
|   for(int i=0;i<N;i++) { |   for(int i=0;i<N;i++) { | ||||||
|     spProjXm(hspin._internal[i],fspin._internal[i]); |     spProjXm(hspin._internal[i],fspin._internal[i]); | ||||||
|   } |   } | ||||||
| } | } | ||||||
| template<class rtype,class vtype,int N> accelerator_inline void spProjXm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin) | template<class rtype,class vtype,int N> accelerator_inline void spProjXm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin) | ||||||
| { | { | ||||||
|   //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp; |  | ||||||
|   for(int i=0;i<N;i++){  |   for(int i=0;i<N;i++){  | ||||||
|     for(int j=0;j<N;j++){ |     for(int j=0;j<N;j++){ | ||||||
|       spProjXm(hspin._internal[i][j],fspin._internal[i][j]); |       spProjXm(hspin._internal[i][j],fspin._internal[i][j]); | ||||||
| @@ -455,19 +439,16 @@ template<class rtype,class vtype,int N> accelerator_inline void spProjXm (iMatri | |||||||
|  |  | ||||||
| template<class rtype,class vtype> accelerator_inline void spReconXm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin) | template<class rtype,class vtype> accelerator_inline void spReconXm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin) | ||||||
| { | { | ||||||
|   //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp; |  | ||||||
|   spReconXm(hspin._internal,fspin._internal); |   spReconXm(hspin._internal,fspin._internal); | ||||||
| } | } | ||||||
| template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spReconXm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin) | template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spReconXm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin) | ||||||
| { | { | ||||||
|   //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp; |  | ||||||
|   for(int i=0;i<N;i++) { |   for(int i=0;i<N;i++) { | ||||||
|     spReconXm(hspin._internal[i],fspin._internal[i]); |     spReconXm(hspin._internal[i],fspin._internal[i]); | ||||||
|   } |   } | ||||||
| } | } | ||||||
| template<class rtype,class vtype,int N> accelerator_inline void spReconXm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin) | template<class rtype,class vtype,int N> accelerator_inline void spReconXm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin) | ||||||
| { | { | ||||||
|   //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp; |  | ||||||
|   for(int i=0;i<N;i++){  |   for(int i=0;i<N;i++){  | ||||||
|     for(int j=0;j<N;j++){ |     for(int j=0;j<N;j++){ | ||||||
|       spReconXm(hspin._internal[i][j],fspin._internal[i][j]); |       spReconXm(hspin._internal[i][j],fspin._internal[i][j]); | ||||||
| @@ -476,45 +457,37 @@ template<class rtype,class vtype,int N> accelerator_inline void spReconXm (iMatr | |||||||
|  |  | ||||||
| template<class rtype,class vtype> accelerator_inline void accumReconXm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin) | template<class rtype,class vtype> accelerator_inline void accumReconXm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin) | ||||||
| { | { | ||||||
|   //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp; |  | ||||||
|   accumReconXm(hspin._internal,fspin._internal); |   accumReconXm(hspin._internal,fspin._internal); | ||||||
| } | } | ||||||
| template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void accumReconXm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin) | template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void accumReconXm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin) | ||||||
| { | { | ||||||
|   //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp; |  | ||||||
|   for(int i=0;i<N;i++) { |   for(int i=0;i<N;i++) { | ||||||
|     accumReconXm(hspin._internal[i],fspin._internal[i]); |     accumReconXm(hspin._internal[i],fspin._internal[i]); | ||||||
|   } |   } | ||||||
| } | } | ||||||
| template<class rtype,class vtype,int N> accelerator_inline void accumReconXm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin) | template<class rtype,class vtype,int N> accelerator_inline void accumReconXm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin) | ||||||
| { | { | ||||||
|   //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp; |  | ||||||
|   for(int i=0;i<N;i++){  |   for(int i=0;i<N;i++){  | ||||||
|     for(int j=0;j<N;j++){ |     for(int j=0;j<N;j++){ | ||||||
|       accumReconXm(hspin._internal[i][j],fspin._internal[i][j]); |       accumReconXm(hspin._internal[i][j],fspin._internal[i][j]); | ||||||
|     }} |     }} | ||||||
| } | } | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
| //////// | //////// | ||||||
| // Yp | // Yp | ||||||
| //////// | //////// | ||||||
| template<class rtype,class vtype> accelerator_inline void spProjYp (iScalar<rtype> &hspin,const iScalar<vtype> &fspin) | template<class rtype,class vtype> accelerator_inline void spProjYp (iScalar<rtype> &hspin,const iScalar<vtype> &fspin) | ||||||
| { | { | ||||||
|   //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp; |  | ||||||
|   spProjYp(hspin._internal,fspin._internal); |   spProjYp(hspin._internal,fspin._internal); | ||||||
| } | } | ||||||
| template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProjYp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin) | template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProjYp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin) | ||||||
| { | { | ||||||
|   //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp; |  | ||||||
|   for(int i=0;i<N;i++) { |   for(int i=0;i<N;i++) { | ||||||
|     spProjYp(hspin._internal[i],fspin._internal[i]); |     spProjYp(hspin._internal[i],fspin._internal[i]); | ||||||
|   } |   } | ||||||
| } | } | ||||||
| template<class rtype,class vtype,int N> accelerator_inline void spProjYp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin) | template<class rtype,class vtype,int N> accelerator_inline void spProjYp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin) | ||||||
| { | { | ||||||
|   //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp; |  | ||||||
|   for(int i=0;i<N;i++){  |   for(int i=0;i<N;i++){  | ||||||
|     for(int j=0;j<N;j++){ |     for(int j=0;j<N;j++){ | ||||||
|       spProjYp(hspin._internal[i][j],fspin._internal[i][j]); |       spProjYp(hspin._internal[i][j],fspin._internal[i][j]); | ||||||
| @@ -524,19 +497,16 @@ template<class rtype,class vtype,int N> accelerator_inline void spProjYp (iMatri | |||||||
|  |  | ||||||
| template<class rtype,class vtype> accelerator_inline void spReconYp (iScalar<rtype> &hspin,const iScalar<vtype> &fspin) | template<class rtype,class vtype> accelerator_inline void spReconYp (iScalar<rtype> &hspin,const iScalar<vtype> &fspin) | ||||||
| { | { | ||||||
|   //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp; |  | ||||||
|   spReconYp(hspin._internal,fspin._internal); |   spReconYp(hspin._internal,fspin._internal); | ||||||
| } | } | ||||||
| template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spReconYp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin) | template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spReconYp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin) | ||||||
| { | { | ||||||
|   //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp; |  | ||||||
|   for(int i=0;i<N;i++) { |   for(int i=0;i<N;i++) { | ||||||
|     spReconYp(hspin._internal[i],fspin._internal[i]); |     spReconYp(hspin._internal[i],fspin._internal[i]); | ||||||
|   } |   } | ||||||
| } | } | ||||||
| template<class rtype,class vtype,int N> accelerator_inline void spReconYp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin) | template<class rtype,class vtype,int N> accelerator_inline void spReconYp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin) | ||||||
| { | { | ||||||
|   //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp; |  | ||||||
|   for(int i=0;i<N;i++){  |   for(int i=0;i<N;i++){  | ||||||
|     for(int j=0;j<N;j++){ |     for(int j=0;j<N;j++){ | ||||||
|       spReconYp(hspin._internal[i][j],fspin._internal[i][j]); |       spReconYp(hspin._internal[i][j],fspin._internal[i][j]); | ||||||
| @@ -545,66 +515,55 @@ template<class rtype,class vtype,int N> accelerator_inline void spReconYp (iMatr | |||||||
|  |  | ||||||
| template<class rtype,class vtype> accelerator_inline void accumReconYp (iScalar<rtype> &hspin,const iScalar<vtype> &fspin) | template<class rtype,class vtype> accelerator_inline void accumReconYp (iScalar<rtype> &hspin,const iScalar<vtype> &fspin) | ||||||
| { | { | ||||||
|   //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp; |  | ||||||
|   accumReconYp(hspin._internal,fspin._internal); |   accumReconYp(hspin._internal,fspin._internal); | ||||||
| } | } | ||||||
| template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void accumReconYp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin) | template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void accumReconYp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin) | ||||||
| { | { | ||||||
|   //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp; |  | ||||||
|   for(int i=0;i<N;i++) { |   for(int i=0;i<N;i++) { | ||||||
|     accumReconYp(hspin._internal[i],fspin._internal[i]); |     accumReconYp(hspin._internal[i],fspin._internal[i]); | ||||||
|   } |   } | ||||||
| } | } | ||||||
| template<class rtype,class vtype,int N> accelerator_inline void accumReconYp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin) | template<class rtype,class vtype,int N> accelerator_inline void accumReconYp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin) | ||||||
| { | { | ||||||
|   //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp; |  | ||||||
|   for(int i=0;i<N;i++){  |   for(int i=0;i<N;i++){  | ||||||
|     for(int j=0;j<N;j++){ |     for(int j=0;j<N;j++){ | ||||||
|       accumReconYp(hspin._internal[i][j],fspin._internal[i][j]); |       accumReconYp(hspin._internal[i][j],fspin._internal[i][j]); | ||||||
|     }} |     }} | ||||||
| } | } | ||||||
|  |  | ||||||
|  |  | ||||||
| //////// | //////// | ||||||
| // Ym | // Ym | ||||||
| //////// | //////// | ||||||
| template<class rtype,class vtype> accelerator_inline void spProjYm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin) | template<class rtype,class vtype> accelerator_inline void spProjYm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin) | ||||||
| { | { | ||||||
|   //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp; |  | ||||||
|   spProjYm(hspin._internal,fspin._internal); |   spProjYm(hspin._internal,fspin._internal); | ||||||
| } | } | ||||||
| template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProjYm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin) | template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProjYm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin) | ||||||
| { | { | ||||||
|   //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp; |  | ||||||
|   for(int i=0;i<N;i++) { |   for(int i=0;i<N;i++) { | ||||||
|     spProjYm(hspin._internal[i],fspin._internal[i]); |     spProjYm(hspin._internal[i],fspin._internal[i]); | ||||||
|   } |   } | ||||||
| } | } | ||||||
| template<class rtype,class vtype,int N> accelerator_inline void spProjYm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin) | template<class rtype,class vtype,int N> accelerator_inline void spProjYm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin) | ||||||
| { | { | ||||||
|   //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp; |  | ||||||
|   for(int i=0;i<N;i++){  |   for(int i=0;i<N;i++){  | ||||||
|     for(int j=0;j<N;j++){ |     for(int j=0;j<N;j++){ | ||||||
|       spProjYm(hspin._internal[i][j],fspin._internal[i][j]); |       spProjYm(hspin._internal[i][j],fspin._internal[i][j]); | ||||||
|     }} |     }} | ||||||
| } | } | ||||||
|  |  | ||||||
|  |  | ||||||
| template<class rtype,class vtype> accelerator_inline void spReconYm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin) | template<class rtype,class vtype> accelerator_inline void spReconYm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin) | ||||||
| { | { | ||||||
|   //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp; |  | ||||||
|   spReconYm(hspin._internal,fspin._internal); |   spReconYm(hspin._internal,fspin._internal); | ||||||
| } | } | ||||||
| template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spReconYm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin) | template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spReconYm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin) | ||||||
| { | { | ||||||
|   //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,const iVector<vtype,N> >::type *temp; |  | ||||||
|   for(int i=0;i<N;i++) { |   for(int i=0;i<N;i++) { | ||||||
|     spReconYm(hspin._internal[i],fspin._internal[i]); |     spReconYm(hspin._internal[i],fspin._internal[i]); | ||||||
|   } |   } | ||||||
| } | } | ||||||
| template<class rtype,class vtype,int N> accelerator_inline void spReconYm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin) | template<class rtype,class vtype,int N> accelerator_inline void spReconYm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin) | ||||||
| { | { | ||||||
|   //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp; |  | ||||||
|   for(int i=0;i<N;i++){  |   for(int i=0;i<N;i++){  | ||||||
|     for(int j=0;j<N;j++){ |     for(int j=0;j<N;j++){ | ||||||
|       spReconYm(hspin._internal[i][j],fspin._internal[i][j]); |       spReconYm(hspin._internal[i][j],fspin._internal[i][j]); | ||||||
| @@ -613,19 +572,16 @@ template<class rtype,class vtype,int N> accelerator_inline void spReconYm (iMatr | |||||||
|  |  | ||||||
| template<class rtype,class vtype> accelerator_inline void accumReconYm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin) | template<class rtype,class vtype> accelerator_inline void accumReconYm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin) | ||||||
| { | { | ||||||
|   //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp; |  | ||||||
|   accumReconYm(hspin._internal,fspin._internal); |   accumReconYm(hspin._internal,fspin._internal); | ||||||
| } | } | ||||||
| template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void accumReconYm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin) | template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void accumReconYm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin) | ||||||
| { | { | ||||||
|   //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp; |  | ||||||
|   for(int i=0;i<N;i++) { |   for(int i=0;i<N;i++) { | ||||||
|     accumReconYm(hspin._internal[i],fspin._internal[i]); |     accumReconYm(hspin._internal[i],fspin._internal[i]); | ||||||
|   } |   } | ||||||
| } | } | ||||||
| template<class rtype,class vtype,int N> accelerator_inline void accumReconYm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin) | template<class rtype,class vtype,int N> accelerator_inline void accumReconYm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin) | ||||||
| { | { | ||||||
|   //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp; |  | ||||||
|   for(int i=0;i<N;i++){  |   for(int i=0;i<N;i++){  | ||||||
|     for(int j=0;j<N;j++){ |     for(int j=0;j<N;j++){ | ||||||
|       accumReconYm(hspin._internal[i][j],fspin._internal[i][j]); |       accumReconYm(hspin._internal[i][j],fspin._internal[i][j]); | ||||||
| @@ -638,66 +594,57 @@ template<class rtype,class vtype,int N> accelerator_inline void accumReconYm (iM | |||||||
| //////// | //////// | ||||||
| template<class rtype,class vtype> accelerator_inline void spProjZp (iScalar<rtype> &hspin,const iScalar<vtype> &fspin) | template<class rtype,class vtype> accelerator_inline void spProjZp (iScalar<rtype> &hspin,const iScalar<vtype> &fspin) | ||||||
| { | { | ||||||
|   //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp; |  | ||||||
|   spProjZp(hspin._internal,fspin._internal); |   spProjZp(hspin._internal,fspin._internal); | ||||||
| } | } | ||||||
| template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProjZp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin) | template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProjZp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin) | ||||||
| { | { | ||||||
|   //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp; |  | ||||||
|   for(int i=0;i<N;i++) { |   for(int i=0;i<N;i++) { | ||||||
|     spProjZp(hspin._internal[i],fspin._internal[i]); |     spProjZp(hspin._internal[i],fspin._internal[i]); | ||||||
|   } |   } | ||||||
| } | } | ||||||
| template<class rtype,class vtype,int N> accelerator_inline void spProjZp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin) | template<class rtype,class vtype,int N> accelerator_inline void spProjZp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin) | ||||||
| { | { | ||||||
|   //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp; |  | ||||||
|   for(int i=0;i<N;i++){  |   for(int i=0;i<N;i++){  | ||||||
|     for(int j=0;j<N;j++){ |     for(int j=0;j<N;j++){ | ||||||
|       spProjZp(hspin._internal[i][j],fspin._internal[i][j]); |       spProjZp(hspin._internal[i][j],fspin._internal[i][j]); | ||||||
|     }} |   }} | ||||||
| } | } | ||||||
|  |  | ||||||
|  |  | ||||||
| template<class rtype,class vtype> accelerator_inline void spReconZp (iScalar<rtype> &hspin,const iScalar<vtype> &fspin) | template<class rtype,class vtype> accelerator_inline void spReconZp (iScalar<rtype> &hspin,const iScalar<vtype> &fspin) | ||||||
| { | { | ||||||
|   //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp; |  | ||||||
|   spReconZp(hspin._internal,fspin._internal); |   spReconZp(hspin._internal,fspin._internal); | ||||||
| } | } | ||||||
| template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spReconZp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin) | template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spReconZp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin) | ||||||
| { | { | ||||||
|   //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp; |  | ||||||
|   for(int i=0;i<N;i++) { |   for(int i=0;i<N;i++) { | ||||||
|     spReconZp(hspin._internal[i],fspin._internal[i]); |     spReconZp(hspin._internal[i],fspin._internal[i]); | ||||||
|   } |   } | ||||||
| } | } | ||||||
| template<class rtype,class vtype,int N> accelerator_inline void spReconZp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin) | template<class rtype,class vtype,int N> accelerator_inline void spReconZp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin) | ||||||
| { | { | ||||||
|   //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp; |  | ||||||
|   for(int i=0;i<N;i++){  |   for(int i=0;i<N;i++){  | ||||||
|     for(int j=0;j<N;j++){ |     for(int j=0;j<N;j++){ | ||||||
|       spReconZp(hspin._internal[i][j],fspin._internal[i][j]); |       spReconZp(hspin._internal[i][j],fspin._internal[i][j]); | ||||||
|     }} |   }} | ||||||
| } | } | ||||||
|  |  | ||||||
| template<class rtype,class vtype> accelerator_inline void accumReconZp (iScalar<rtype> &hspin,const iScalar<vtype> &fspin) | template<class rtype,class vtype> accelerator_inline void accumReconZp (iScalar<rtype> &hspin,const iScalar<vtype> &fspin) | ||||||
| { | { | ||||||
|   //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp; |  | ||||||
|   accumReconZp(hspin._internal,fspin._internal); |   accumReconZp(hspin._internal,fspin._internal); | ||||||
| } | } | ||||||
| template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void accumReconZp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin) | template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void accumReconZp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin) | ||||||
| { | { | ||||||
|   //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp; |  | ||||||
|   for(int i=0;i<N;i++) { |   for(int i=0;i<N;i++) { | ||||||
|     accumReconZp(hspin._internal[i],fspin._internal[i]); |     accumReconZp(hspin._internal[i],fspin._internal[i]); | ||||||
|   } |   } | ||||||
| } | } | ||||||
| template<class rtype,class vtype,int N> accelerator_inline void accumReconZp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin) | template<class rtype,class vtype,int N> accelerator_inline void accumReconZp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin) | ||||||
| { | { | ||||||
|   //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp; |  | ||||||
|   for(int i=0;i<N;i++){  |   for(int i=0;i<N;i++){  | ||||||
|     for(int j=0;j<N;j++){ |     for(int j=0;j<N;j++){ | ||||||
|       accumReconZp(hspin._internal[i][j],fspin._internal[i][j]); |       accumReconZp(hspin._internal[i][j],fspin._internal[i][j]); | ||||||
|     }} |   }} | ||||||
| } | } | ||||||
|  |  | ||||||
|  |  | ||||||
| @@ -706,62 +653,53 @@ template<class rtype,class vtype,int N> accelerator_inline void accumReconZp (iM | |||||||
| //////// | //////// | ||||||
| template<class rtype,class vtype> accelerator_inline void spProjZm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin) | template<class rtype,class vtype> accelerator_inline void spProjZm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin) | ||||||
| { | { | ||||||
|   //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp; |  | ||||||
|   spProjZm(hspin._internal,fspin._internal); |   spProjZm(hspin._internal,fspin._internal); | ||||||
| } | } | ||||||
| template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProjZm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin) | template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProjZm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin) | ||||||
| { | { | ||||||
|   //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp; |  | ||||||
|   for(int i=0;i<N;i++) { |   for(int i=0;i<N;i++) { | ||||||
|     spProjZm(hspin._internal[i],fspin._internal[i]); |     spProjZm(hspin._internal[i],fspin._internal[i]); | ||||||
|   } |   } | ||||||
| } | } | ||||||
| template<class rtype,class vtype,int N> accelerator_inline void spProjZm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin) | template<class rtype,class vtype,int N> accelerator_inline void spProjZm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin) | ||||||
| { | { | ||||||
|   //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp; |  | ||||||
|   for(int i=0;i<N;i++){  |   for(int i=0;i<N;i++){  | ||||||
|     for(int j=0;j<N;j++){ |     for(int j=0;j<N;j++){ | ||||||
|       spProjZm(hspin._internal[i][j],fspin._internal[i][j]); |       spProjZm(hspin._internal[i][j],fspin._internal[i][j]); | ||||||
|     }} |   }} | ||||||
| } | } | ||||||
|  |  | ||||||
|  |  | ||||||
| template<class rtype,class vtype> accelerator_inline void spReconZm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin) | template<class rtype,class vtype> accelerator_inline void spReconZm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin) | ||||||
| { | { | ||||||
|   //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp; |  | ||||||
|   spReconZm(hspin._internal,fspin._internal); |   spReconZm(hspin._internal,fspin._internal); | ||||||
| } | } | ||||||
| template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spReconZm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin) | template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spReconZm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin) | ||||||
| { | { | ||||||
|   //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp; |  | ||||||
|   for(int i=0;i<N;i++) { |   for(int i=0;i<N;i++) { | ||||||
|     spReconZm(hspin._internal[i],fspin._internal[i]); |     spReconZm(hspin._internal[i],fspin._internal[i]); | ||||||
|   } |   } | ||||||
| } | } | ||||||
| template<class rtype,class vtype,int N> accelerator_inline void spReconZm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin) | template<class rtype,class vtype,int N> accelerator_inline void spReconZm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin) | ||||||
| { | { | ||||||
|   //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp; |  | ||||||
|   for(int i=0;i<N;i++){  |   for(int i=0;i<N;i++){  | ||||||
|     for(int j=0;j<N;j++){ |     for(int j=0;j<N;j++){ | ||||||
|       spReconZm(hspin._internal[i][j],fspin._internal[i][j]); |       spReconZm(hspin._internal[i][j],fspin._internal[i][j]); | ||||||
|     }} |   }} | ||||||
| } | } | ||||||
|  |  | ||||||
| template<class rtype,class vtype> accelerator_inline void accumReconZm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin) | template<class rtype,class vtype> accelerator_inline void accumReconZm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin) | ||||||
| { | { | ||||||
|   //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp; |  | ||||||
|   accumReconZm(hspin._internal,fspin._internal); |   accumReconZm(hspin._internal,fspin._internal); | ||||||
| } | } | ||||||
| template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void accumReconZm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin) | template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void accumReconZm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin) | ||||||
| { | { | ||||||
|   //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp; |  | ||||||
|   for(int i=0;i<N;i++) { |   for(int i=0;i<N;i++) { | ||||||
|     accumReconZm(hspin._internal[i],fspin._internal[i]); |     accumReconZm(hspin._internal[i],fspin._internal[i]); | ||||||
|   } |   } | ||||||
| } | } | ||||||
| template<class rtype,class vtype,int N> accelerator_inline void accumReconZm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin) | template<class rtype,class vtype,int N> accelerator_inline void accumReconZm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin) | ||||||
| { | { | ||||||
|   //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp; |  | ||||||
|   for(int i=0;i<N;i++){  |   for(int i=0;i<N;i++){  | ||||||
|     for(int j=0;j<N;j++){ |     for(int j=0;j<N;j++){ | ||||||
|       accumReconZm(hspin._internal[i][j],fspin._internal[i][j]); |       accumReconZm(hspin._internal[i][j],fspin._internal[i][j]); | ||||||
| @@ -774,41 +712,35 @@ template<class rtype,class vtype,int N> accelerator_inline void accumReconZm (iM | |||||||
| //////// | //////// | ||||||
| template<class rtype,class vtype> accelerator_inline void spProjTp (iScalar<rtype> &hspin,const iScalar<vtype> &fspin) | template<class rtype,class vtype> accelerator_inline void spProjTp (iScalar<rtype> &hspin,const iScalar<vtype> &fspin) | ||||||
| { | { | ||||||
|   //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp; |  | ||||||
|   spProjTp(hspin._internal,fspin._internal); |   spProjTp(hspin._internal,fspin._internal); | ||||||
| } | } | ||||||
| template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProjTp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin) | template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProjTp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin) | ||||||
| { | { | ||||||
|   //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp; |  | ||||||
|   for(int i=0;i<N;i++) { |   for(int i=0;i<N;i++) { | ||||||
|     spProjTp(hspin._internal[i],fspin._internal[i]); |     spProjTp(hspin._internal[i],fspin._internal[i]); | ||||||
|   } |   } | ||||||
| } | } | ||||||
| template<class rtype,class vtype,int N> accelerator_inline void spProjTp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin) | template<class rtype,class vtype,int N> accelerator_inline void spProjTp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin) | ||||||
| { | { | ||||||
|   //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp; |  | ||||||
|   for(int i=0;i<N;i++){  |   for(int i=0;i<N;i++){  | ||||||
|     for(int j=0;j<N;j++){ |     for(int j=0;j<N;j++){ | ||||||
|       spProjTp(hspin._internal[i][j],fspin._internal[i][j]); |       spProjTp(hspin._internal[i][j],fspin._internal[i][j]); | ||||||
|     }} |   }} | ||||||
| } | } | ||||||
|  |  | ||||||
|  |  | ||||||
| template<class rtype,class vtype> accelerator_inline void spReconTp (iScalar<rtype> &hspin,const iScalar<vtype> &fspin) | template<class rtype,class vtype> accelerator_inline void spReconTp (iScalar<rtype> &hspin,const iScalar<vtype> &fspin) | ||||||
| { | { | ||||||
|   //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp; |  | ||||||
|   spReconTp(hspin._internal,fspin._internal); |   spReconTp(hspin._internal,fspin._internal); | ||||||
| } | } | ||||||
| template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spReconTp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin) | template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spReconTp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin) | ||||||
| { | { | ||||||
|   //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp; |  | ||||||
|   for(int i=0;i<N;i++) { |   for(int i=0;i<N;i++) { | ||||||
|     spReconTp(hspin._internal[i],fspin._internal[i]); |     spReconTp(hspin._internal[i],fspin._internal[i]); | ||||||
|   } |   } | ||||||
| } | } | ||||||
| template<class rtype,class vtype,int N> accelerator_inline void spReconTp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin) | template<class rtype,class vtype,int N> accelerator_inline void spReconTp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin) | ||||||
| { | { | ||||||
|   //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp; |  | ||||||
|   for(int i=0;i<N;i++){  |   for(int i=0;i<N;i++){  | ||||||
|     for(int j=0;j<N;j++){ |     for(int j=0;j<N;j++){ | ||||||
|       spReconTp(hspin._internal[i][j],fspin._internal[i][j]); |       spReconTp(hspin._internal[i][j],fspin._internal[i][j]); | ||||||
| @@ -817,44 +749,37 @@ template<class rtype,class vtype,int N> accelerator_inline void spReconTp (iMatr | |||||||
|  |  | ||||||
| template<class rtype,class vtype> accelerator_inline void accumReconTp (iScalar<rtype> &hspin, iScalar<vtype> &fspin) | template<class rtype,class vtype> accelerator_inline void accumReconTp (iScalar<rtype> &hspin, iScalar<vtype> &fspin) | ||||||
| { | { | ||||||
|   //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp; |  | ||||||
|   accumReconTp(hspin._internal,fspin._internal); |   accumReconTp(hspin._internal,fspin._internal); | ||||||
| } | } | ||||||
| template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void accumReconTp (iVector<rtype,N> &hspin, const iVector<vtype,N> &fspin) | template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void accumReconTp (iVector<rtype,N> &hspin, const iVector<vtype,N> &fspin) | ||||||
| { | { | ||||||
|   //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp; |  | ||||||
|   for(int i=0;i<N;i++) { |   for(int i=0;i<N;i++) { | ||||||
|     accumReconTp(hspin._internal[i],fspin._internal[i]); |     accumReconTp(hspin._internal[i],fspin._internal[i]); | ||||||
|   } |   } | ||||||
| } | } | ||||||
| template<class rtype,class vtype,int N> accelerator_inline void accumReconTp (iMatrix<rtype,N> &hspin, const iMatrix<vtype,N> &fspin) | template<class rtype,class vtype,int N> accelerator_inline void accumReconTp (iMatrix<rtype,N> &hspin, const iMatrix<vtype,N> &fspin) | ||||||
| { | { | ||||||
|   //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp; |  | ||||||
|   for(int i=0;i<N;i++){  |   for(int i=0;i<N;i++){  | ||||||
|     for(int j=0;j<N;j++){ |     for(int j=0;j<N;j++){ | ||||||
|       accumReconTp(hspin._internal[i][j],fspin._internal[i][j]); |       accumReconTp(hspin._internal[i][j],fspin._internal[i][j]); | ||||||
|     }} |     }} | ||||||
| } | } | ||||||
|  |  | ||||||
|  |  | ||||||
| //////// | //////// | ||||||
| // Tm | // Tm | ||||||
| //////// | //////// | ||||||
| template<class rtype,class vtype> accelerator_inline void spProjTm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin) | template<class rtype,class vtype> accelerator_inline void spProjTm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin) | ||||||
| { | { | ||||||
|   //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp; |  | ||||||
|   spProjTm(hspin._internal,fspin._internal); |   spProjTm(hspin._internal,fspin._internal); | ||||||
| } | } | ||||||
| template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProjTm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin) | template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProjTm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin) | ||||||
| { | { | ||||||
|   //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp; |  | ||||||
|   for(int i=0;i<N;i++) { |   for(int i=0;i<N;i++) { | ||||||
|     spProjTm(hspin._internal[i],fspin._internal[i]); |     spProjTm(hspin._internal[i],fspin._internal[i]); | ||||||
|   } |   } | ||||||
| } | } | ||||||
| template<class rtype,class vtype,int N> accelerator_inline void spProjTm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin) | template<class rtype,class vtype,int N> accelerator_inline void spProjTm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin) | ||||||
| { | { | ||||||
|   //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp; |  | ||||||
|   for(int i=0;i<N;i++){  |   for(int i=0;i<N;i++){  | ||||||
|     for(int j=0;j<N;j++){ |     for(int j=0;j<N;j++){ | ||||||
|       spProjTm(hspin._internal[i][j],fspin._internal[i][j]); |       spProjTm(hspin._internal[i][j],fspin._internal[i][j]); | ||||||
| @@ -864,19 +789,16 @@ template<class rtype,class vtype,int N> accelerator_inline void spProjTm (iMatri | |||||||
|  |  | ||||||
| template<class rtype,class vtype> accelerator_inline void spReconTm (iScalar<rtype> &hspin, const iScalar<vtype> &fspin) | template<class rtype,class vtype> accelerator_inline void spReconTm (iScalar<rtype> &hspin, const iScalar<vtype> &fspin) | ||||||
| { | { | ||||||
|   //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp; |  | ||||||
|   spReconTm(hspin._internal,fspin._internal); |   spReconTm(hspin._internal,fspin._internal); | ||||||
| } | } | ||||||
| template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spReconTm (iVector<rtype,N> &hspin, const iVector<vtype,N> &fspin) | template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spReconTm (iVector<rtype,N> &hspin, const iVector<vtype,N> &fspin) | ||||||
| { | { | ||||||
|   //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp; |  | ||||||
|   for(int i=0;i<N;i++) { |   for(int i=0;i<N;i++) { | ||||||
|     spReconTm(hspin._internal[i],fspin._internal[i]); |     spReconTm(hspin._internal[i],fspin._internal[i]); | ||||||
|   } |   } | ||||||
| } | } | ||||||
| template<class rtype,class vtype,int N> accelerator_inline void spReconTm (iMatrix<rtype,N> &hspin, const iMatrix<vtype,N> &fspin) | template<class rtype,class vtype,int N> accelerator_inline void spReconTm (iMatrix<rtype,N> &hspin, const iMatrix<vtype,N> &fspin) | ||||||
| { | { | ||||||
|   //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp; |  | ||||||
|   for(int i=0;i<N;i++){  |   for(int i=0;i<N;i++){  | ||||||
|     for(int j=0;j<N;j++){ |     for(int j=0;j<N;j++){ | ||||||
|       spReconTm(hspin._internal[i][j],fspin._internal[i][j]); |       spReconTm(hspin._internal[i][j],fspin._internal[i][j]); | ||||||
| @@ -885,44 +807,37 @@ template<class rtype,class vtype,int N> accelerator_inline void spReconTm (iMatr | |||||||
|  |  | ||||||
| template<class rtype,class vtype> accelerator_inline void accumReconTm (iScalar<rtype> &hspin, const iScalar<vtype> &fspin) | template<class rtype,class vtype> accelerator_inline void accumReconTm (iScalar<rtype> &hspin, const iScalar<vtype> &fspin) | ||||||
| { | { | ||||||
|   //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp; |  | ||||||
|   accumReconTm(hspin._internal,fspin._internal); |   accumReconTm(hspin._internal,fspin._internal); | ||||||
| } | } | ||||||
| template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void accumReconTm (iVector<rtype,N> &hspin, const iVector<vtype,N> &fspin) | template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void accumReconTm (iVector<rtype,N> &hspin, const iVector<vtype,N> &fspin) | ||||||
| { | { | ||||||
|   //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp; |  | ||||||
|   for(int i=0;i<N;i++) { |   for(int i=0;i<N;i++) { | ||||||
|     accumReconTm(hspin._internal[i],fspin._internal[i]); |     accumReconTm(hspin._internal[i],fspin._internal[i]); | ||||||
|   } |   } | ||||||
| } | } | ||||||
| template<class rtype,class vtype,int N> accelerator_inline void accumReconTm (iMatrix<rtype,N> &hspin, const iMatrix<vtype,N> &fspin) | template<class rtype,class vtype,int N> accelerator_inline void accumReconTm (iMatrix<rtype,N> &hspin, const iMatrix<vtype,N> &fspin) | ||||||
| { | { | ||||||
|   //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp; |  | ||||||
|   for(int i=0;i<N;i++){  |   for(int i=0;i<N;i++){  | ||||||
|     for(int j=0;j<N;j++){ |     for(int j=0;j<N;j++){ | ||||||
|       accumReconTm(hspin._internal[i][j],fspin._internal[i][j]); |       accumReconTm(hspin._internal[i][j],fspin._internal[i][j]); | ||||||
|     }} |     }} | ||||||
| } | } | ||||||
|  |  | ||||||
|  |  | ||||||
| //////// | //////// | ||||||
| // 5p | // 5p | ||||||
| //////// | //////// | ||||||
| template<class rtype,class vtype> accelerator_inline void spProj5p (iScalar<rtype> &hspin,const iScalar<vtype> &fspin) | template<class rtype,class vtype,IfNotCoarsened<iScalar<vtype> > = 0> accelerator_inline void spProj5p (iScalar<rtype> &hspin,const iScalar<vtype> &fspin) | ||||||
| { | { | ||||||
|   //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp; |  | ||||||
|   spProj5p(hspin._internal,fspin._internal); |   spProj5p(hspin._internal,fspin._internal); | ||||||
| } | } | ||||||
| template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProj5p (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin) | template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProj5p (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin) | ||||||
| { | { | ||||||
|   //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp; |  | ||||||
|   for(int i=0;i<N;i++) { |   for(int i=0;i<N;i++) { | ||||||
|     spProj5p(hspin._internal[i],fspin._internal[i]); |     spProj5p(hspin._internal[i],fspin._internal[i]); | ||||||
|   } |   } | ||||||
| } | } | ||||||
| template<class rtype,class vtype,int N> accelerator_inline void spProj5p (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin) | template<class rtype,class vtype,int N,IfNotCoarsened<iScalar<vtype> > = 0> accelerator_inline void spProj5p (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin) | ||||||
| { | { | ||||||
|   //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp; |  | ||||||
|   for(int i=0;i<N;i++){  |   for(int i=0;i<N;i++){  | ||||||
|     for(int j=0;j<N;j++){ |     for(int j=0;j<N;j++){ | ||||||
|       spProj5p(hspin._internal[i][j],fspin._internal[i][j]); |       spProj5p(hspin._internal[i][j],fspin._internal[i][j]); | ||||||
| @@ -931,19 +846,16 @@ template<class rtype,class vtype,int N> accelerator_inline void spProj5p (iMatri | |||||||
|  |  | ||||||
| template<class rtype,class vtype> accelerator_inline void spRecon5p (iScalar<rtype> &hspin,const iScalar<vtype> &fspin) | template<class rtype,class vtype> accelerator_inline void spRecon5p (iScalar<rtype> &hspin,const iScalar<vtype> &fspin) | ||||||
| { | { | ||||||
|   //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp; |  | ||||||
|   spRecon5p(hspin._internal,fspin._internal); |   spRecon5p(hspin._internal,fspin._internal); | ||||||
| } | } | ||||||
| template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spRecon5p (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin) | template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spRecon5p (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin) | ||||||
| { | { | ||||||
|   //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp; |  | ||||||
|   for(int i=0;i<N;i++) { |   for(int i=0;i<N;i++) { | ||||||
|     spRecon5p(hspin._internal[i],fspin._internal[i]); |     spRecon5p(hspin._internal[i],fspin._internal[i]); | ||||||
|   } |   } | ||||||
| } | } | ||||||
| template<class rtype,class vtype,int N> accelerator_inline void spRecon5p (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin) | template<class rtype,class vtype,int N> accelerator_inline void spRecon5p (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin) | ||||||
| { | { | ||||||
|   //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp; |  | ||||||
|   for(int i=0;i<N;i++){  |   for(int i=0;i<N;i++){  | ||||||
|     for(int j=0;j<N;j++){ |     for(int j=0;j<N;j++){ | ||||||
|       spRecon5p(hspin._internal[i][j],fspin._internal[i][j]); |       spRecon5p(hspin._internal[i][j],fspin._internal[i][j]); | ||||||
| @@ -952,19 +864,16 @@ template<class rtype,class vtype,int N> accelerator_inline void spRecon5p (iMatr | |||||||
|  |  | ||||||
| template<class rtype,class vtype> accelerator_inline void accumRecon5p (iScalar<rtype> &hspin,const iScalar<vtype> &fspin) | template<class rtype,class vtype> accelerator_inline void accumRecon5p (iScalar<rtype> &hspin,const iScalar<vtype> &fspin) | ||||||
| { | { | ||||||
|   //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp; |  | ||||||
|   accumRecon5p(hspin._internal,fspin._internal); |   accumRecon5p(hspin._internal,fspin._internal); | ||||||
| } | } | ||||||
| template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void accumRecon5p (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin) | template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void accumRecon5p (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin) | ||||||
| { | { | ||||||
|   //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp; |  | ||||||
|   for(int i=0;i<N;i++) { |   for(int i=0;i<N;i++) { | ||||||
|     accumRecon5p(hspin._internal[i],fspin._internal[i]); |     accumRecon5p(hspin._internal[i],fspin._internal[i]); | ||||||
|   } |   } | ||||||
| } | } | ||||||
| template<class rtype,class vtype,int N> accelerator_inline void accumRecon5p (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin) | template<class rtype,class vtype,int N> accelerator_inline void accumRecon5p (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin) | ||||||
| { | { | ||||||
|   //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp; |  | ||||||
|   for(int i=0;i<N;i++){  |   for(int i=0;i<N;i++){  | ||||||
|     for(int j=0;j<N;j++){ |     for(int j=0;j<N;j++){ | ||||||
|       accumRecon5p(hspin._internal[i][j],fspin._internal[i][j]); |       accumRecon5p(hspin._internal[i][j],fspin._internal[i][j]); | ||||||
| @@ -972,24 +881,18 @@ template<class rtype,class vtype,int N> accelerator_inline void accumRecon5p (iM | |||||||
| } | } | ||||||
|  |  | ||||||
| // four spinor projectors for chiral proj | // four spinor projectors for chiral proj | ||||||
| //  template<class vtype> accelerator_inline void fspProj5p (iScalar<vtype> &hspin,const iScalar<vtype> &fspin) | template<class vtype,IfNotCoarsened<iScalar<vtype> > = 0> accelerator_inline void spProj5p (iScalar<vtype> &hspin,const iScalar<vtype> &fspin) | ||||||
| template<class vtype> accelerator_inline void spProj5p (iScalar<vtype> &hspin,const iScalar<vtype> &fspin) |  | ||||||
| { | { | ||||||
|   //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp; |  | ||||||
|   spProj5p(hspin._internal,fspin._internal); |   spProj5p(hspin._internal,fspin._internal); | ||||||
| } | } | ||||||
| //  template<class vtype,int N> accelerator_inline void fspProj5p (iVector<vtype,N> &hspin,iVector<vtype,N> &fspin) | template<class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0,IfNotCoarsened<iScalar<vtype> > = 0> accelerator_inline void spProj5p (iVector<vtype,N> &hspin,const iVector<vtype,N> &fspin) | ||||||
| template<class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProj5p (iVector<vtype,N> &hspin,const iVector<vtype,N> &fspin) |  | ||||||
| { | { | ||||||
|   //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp; |  | ||||||
|   for(int i=0;i<N;i++) { |   for(int i=0;i<N;i++) { | ||||||
|     spProj5p(hspin._internal[i],fspin._internal[i]); |     spProj5p(hspin._internal[i],fspin._internal[i]); | ||||||
|   } |   } | ||||||
| } | } | ||||||
| //  template<class vtype,int N> accelerator_inline void fspProj5p (iMatrix<vtype,N> &hspin,iMatrix<vtype,N> &fspin) | template<class vtype,int N,IfNotCoarsened<iScalar<vtype> > = 0> accelerator_inline void spProj5p (iMatrix<vtype,N> &hspin,const iMatrix<vtype,N> &fspin) | ||||||
| template<class vtype,int N> accelerator_inline void spProj5p (iMatrix<vtype,N> &hspin,const iMatrix<vtype,N> &fspin) |  | ||||||
| { | { | ||||||
|   //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp; |  | ||||||
|   for(int i=0;i<N;i++){  |   for(int i=0;i<N;i++){  | ||||||
|     for(int j=0;j<N;j++){ |     for(int j=0;j<N;j++){ | ||||||
|       spProj5p(hspin._internal[i][j],fspin._internal[i][j]); |       spProj5p(hspin._internal[i][j],fspin._internal[i][j]); | ||||||
| @@ -1001,17 +904,17 @@ template<class vtype,int N> accelerator_inline void spProj5p (iMatrix<vtype,N> & | |||||||
| // 5m | // 5m | ||||||
| //////// | //////// | ||||||
|  |  | ||||||
| template<class rtype,class vtype> accelerator_inline void spProj5m (iScalar<rtype> &hspin,const iScalar<vtype> &fspin) | template<class rtype,class vtype,IfNotCoarsened<iScalar<vtype> > = 0> accelerator_inline void spProj5m (iScalar<rtype> &hspin,const iScalar<vtype> &fspin) | ||||||
| { | { | ||||||
|   spProj5m(hspin._internal,fspin._internal); |   spProj5m(hspin._internal,fspin._internal); | ||||||
| } | } | ||||||
| template<class rtype,class vtype,int N,IfNotSpinor<iVector<rtype,N> > = 0> accelerator_inline void spProj5m (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin) | template<class rtype,class vtype,int N,IfNotSpinor<iVector<rtype,N> > = 0,IfNotCoarsened<iScalar<vtype> > = 0> accelerator_inline void spProj5m (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin) | ||||||
| { | { | ||||||
|   for(int i=0;i<N;i++) { |   for(int i=0;i<N;i++) { | ||||||
|     spProj5m(hspin._internal[i],fspin._internal[i]); |     spProj5m(hspin._internal[i],fspin._internal[i]); | ||||||
|   } |   } | ||||||
| } | } | ||||||
| template<class rtype,class vtype,int N> accelerator_inline void spProj5m (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin) | template<class rtype,class vtype,int N,IfNotCoarsened<iScalar<vtype> > = 0> accelerator_inline void spProj5m (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin) | ||||||
| { | { | ||||||
|   for(int i=0;i<N;i++){  |   for(int i=0;i<N;i++){  | ||||||
|     for(int j=0;j<N;j++){ |     for(int j=0;j<N;j++){ | ||||||
| @@ -1021,40 +924,34 @@ template<class rtype,class vtype,int N> accelerator_inline void spProj5m (iMatri | |||||||
|  |  | ||||||
| template<class rtype,class vtype> accelerator_inline void spRecon5m (iScalar<rtype> &hspin,const iScalar<vtype> &fspin) | template<class rtype,class vtype> accelerator_inline void spRecon5m (iScalar<rtype> &hspin,const iScalar<vtype> &fspin) | ||||||
| { | { | ||||||
|   //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp; |  | ||||||
|   spRecon5m(hspin._internal,fspin._internal); |   spRecon5m(hspin._internal,fspin._internal); | ||||||
| } | } | ||||||
| template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spRecon5m (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin) | template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spRecon5m (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin) | ||||||
| { | { | ||||||
|   //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp; |  | ||||||
|   for(int i=0;i<N;i++) { |   for(int i=0;i<N;i++) { | ||||||
|     spRecon5m(hspin._internal[i],fspin._internal[i]); |     spRecon5m(hspin._internal[i],fspin._internal[i]); | ||||||
|   } |   } | ||||||
| } | } | ||||||
| template<class rtype,class vtype,int N> accelerator_inline void spRecon5m (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin) | template<class rtype,class vtype,int N> accelerator_inline void spRecon5m (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin) | ||||||
| { | { | ||||||
|   //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp; |  | ||||||
|   for(int i=0;i<N;i++){  |   for(int i=0;i<N;i++){  | ||||||
|     for(int j=0;j<N;j++){ |     for(int j=0;j<N;j++){ | ||||||
|       spRecon5m(hspin._internal[i][j],fspin._internal[i][j]); |       spRecon5m(hspin._internal[i][j],fspin._internal[i][j]); | ||||||
|     }} |   }} | ||||||
| } | } | ||||||
|  |  | ||||||
| template<class rtype,class vtype> accelerator_inline void accumRecon5m (iScalar<rtype> &hspin,const iScalar<vtype> &fspin) | template<class rtype,class vtype> accelerator_inline void accumRecon5m (iScalar<rtype> &hspin,const iScalar<vtype> &fspin) | ||||||
| { | { | ||||||
|   //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp; |  | ||||||
|   accumRecon5m(hspin._internal,fspin._internal); |   accumRecon5m(hspin._internal,fspin._internal); | ||||||
| } | } | ||||||
| template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void accumRecon5m (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin) | template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void accumRecon5m (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin) | ||||||
| { | { | ||||||
|   //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp; |  | ||||||
|   for(int i=0;i<N;i++) { |   for(int i=0;i<N;i++) { | ||||||
|     accumRecon5m(hspin._internal[i],fspin._internal[i]); |     accumRecon5m(hspin._internal[i],fspin._internal[i]); | ||||||
|   } |   } | ||||||
| } | } | ||||||
| template<class rtype,class vtype,int N> accelerator_inline void accumRecon5m (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin) | template<class rtype,class vtype,int N> accelerator_inline void accumRecon5m (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin) | ||||||
| { | { | ||||||
|   //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp; |  | ||||||
|   for(int i=0;i<N;i++){  |   for(int i=0;i<N;i++){  | ||||||
|     for(int j=0;j<N;j++){ |     for(int j=0;j<N;j++){ | ||||||
|       accumRecon5m(hspin._internal[i][j],fspin._internal[i][j]); |       accumRecon5m(hspin._internal[i][j],fspin._internal[i][j]); | ||||||
| @@ -1063,24 +960,18 @@ template<class rtype,class vtype,int N> accelerator_inline void accumRecon5m (iM | |||||||
|  |  | ||||||
|  |  | ||||||
| // four spinor projectors for chiral proj | // four spinor projectors for chiral proj | ||||||
| //  template<class vtype> accelerator_inline void fspProj5m (iScalar<vtype> &hspin,const iScalar<vtype> &fspin) | template<class vtype,IfNotCoarsened<iScalar<vtype> > = 0> accelerator_inline void spProj5m (iScalar<vtype> &hspin,const iScalar<vtype> &fspin) | ||||||
| template<class vtype> accelerator_inline void spProj5m (iScalar<vtype> &hspin,const iScalar<vtype> &fspin) |  | ||||||
| { | { | ||||||
|   //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp; |  | ||||||
|   spProj5m(hspin._internal,fspin._internal); |   spProj5m(hspin._internal,fspin._internal); | ||||||
| } | } | ||||||
| //  template<class vtype,int N> accelerator_inline void fspProj5m (iVector<vtype,N> &hspin,iVector<vtype,N> &fspin) | template<class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0,IfNotCoarsened<iScalar<vtype> > = 0> accelerator_inline void spProj5m (iVector<vtype,N> &hspin,const iVector<vtype,N> &fspin) | ||||||
| template<class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProj5m (iVector<vtype,N> &hspin,const iVector<vtype,N> &fspin) |  | ||||||
| { | { | ||||||
|   //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp; |  | ||||||
|   for(int i=0;i<N;i++) { |   for(int i=0;i<N;i++) { | ||||||
|     spProj5m(hspin._internal[i],fspin._internal[i]); |     spProj5m(hspin._internal[i],fspin._internal[i]); | ||||||
|   } |   } | ||||||
| } | } | ||||||
| //  template<class vtype,int N> accelerator_inline void fspProj5m (iMatrix<vtype,N> &hspin,iMatrix<vtype,N> &fspin) | template<class vtype,int N,IfNotCoarsened<iScalar<vtype> > = 0> accelerator_inline void spProj5m (iMatrix<vtype,N> &hspin,const iMatrix<vtype,N> &fspin) | ||||||
| template<class vtype,int N> accelerator_inline void spProj5m (iMatrix<vtype,N> &hspin,const iMatrix<vtype,N> &fspin) |  | ||||||
| { | { | ||||||
|   //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp; |  | ||||||
|   for(int i=0;i<N;i++){  |   for(int i=0;i<N;i++){  | ||||||
|     for(int j=0;j<N;j++){ |     for(int j=0;j<N;j++){ | ||||||
|       spProj5m(hspin._internal[i][j],fspin._internal[i][j]); |       spProj5m(hspin._internal[i][j],fspin._internal[i][j]); | ||||||
|   | |||||||
| @@ -154,8 +154,8 @@ void axpby_ssp_pminus(Lattice<vobj> &z,Coeff a,const Lattice<vobj> &x,Coeff b,co | |||||||
|   accelerator_for(sss,nloop,vobj::Nsimd(),{ |   accelerator_for(sss,nloop,vobj::Nsimd(),{ | ||||||
|     uint64_t ss = sss*Ls; |     uint64_t ss = sss*Ls; | ||||||
|     decltype(coalescedRead(y_v[ss+sp])) tmp; |     decltype(coalescedRead(y_v[ss+sp])) tmp; | ||||||
|     spProj5m(tmp,y_v(ss+sp)); |     spProj5m(tmp,y_v(ss+sp));  | ||||||
|     tmp = a*x_v(ss+s)+b*tmp; |    tmp = a*x_v(ss+s)+b*tmp; | ||||||
|     coalescedWrite(z_v[ss+s],tmp); |     coalescedWrite(z_v[ss+s],tmp); | ||||||
|   }); |   }); | ||||||
| } | } | ||||||
| @@ -188,7 +188,6 @@ void G5R5(Lattice<vobj> &z,const Lattice<vobj> &x) | |||||||
|   z.Checkerboard() = x.Checkerboard(); |   z.Checkerboard() = x.Checkerboard(); | ||||||
|   conformable(x,z); |   conformable(x,z); | ||||||
|   int Ls = grid->_rdimensions[0]; |   int Ls = grid->_rdimensions[0]; | ||||||
|   Gamma G5(Gamma::Algebra::Gamma5); |  | ||||||
|   autoView( x_v, x, AcceleratorRead); |   autoView( x_v, x, AcceleratorRead); | ||||||
|   autoView( z_v, z, AcceleratorWrite); |   autoView( z_v, z, AcceleratorWrite); | ||||||
|   uint64_t nloop = grid->oSites()/Ls; |   uint64_t nloop = grid->oSites()/Ls; | ||||||
| @@ -196,7 +195,13 @@ void G5R5(Lattice<vobj> &z,const Lattice<vobj> &x) | |||||||
|     uint64_t ss = sss*Ls; |     uint64_t ss = sss*Ls; | ||||||
|     for(int s=0;s<Ls;s++){ |     for(int s=0;s<Ls;s++){ | ||||||
|       int sp = Ls-1-s; |       int sp = Ls-1-s; | ||||||
|       coalescedWrite(z_v[ss+sp],G5*x_v(ss+s)); |       auto tmp = x_v(ss+s); | ||||||
|  |       decltype(tmp) tmp_p; | ||||||
|  |       decltype(tmp) tmp_m; | ||||||
|  |       spProj5p(tmp_p,tmp); | ||||||
|  |       spProj5m(tmp_m,tmp); | ||||||
|  |       // Use of spProj5m, 5p captures the coarse space too | ||||||
|  |       coalescedWrite(z_v[ss+sp],tmp_p - tmp_m); | ||||||
|     } |     } | ||||||
|   }); |   }); | ||||||
| } | } | ||||||
| @@ -208,10 +213,20 @@ void G5C(Lattice<vobj> &z, const Lattice<vobj> &x) | |||||||
|   z.Checkerboard() = x.Checkerboard(); |   z.Checkerboard() = x.Checkerboard(); | ||||||
|   conformable(x, z); |   conformable(x, z); | ||||||
|  |  | ||||||
|   Gamma G5(Gamma::Algebra::Gamma5); |   autoView( x_v, x, AcceleratorRead); | ||||||
|   z = G5 * x; |   autoView( z_v, z, AcceleratorWrite); | ||||||
|  |   uint64_t nloop = grid->oSites(); | ||||||
|  |   accelerator_for(ss,nloop,vobj::Nsimd(),{ | ||||||
|  |     auto tmp = x_v(ss); | ||||||
|  |     decltype(tmp) tmp_p; | ||||||
|  |     decltype(tmp) tmp_m; | ||||||
|  |     spProj5p(tmp_p,tmp); | ||||||
|  |     spProj5m(tmp_m,tmp); | ||||||
|  |     coalescedWrite(z_v[ss],tmp_p - tmp_m); | ||||||
|  |   }); | ||||||
| } | } | ||||||
|  |  | ||||||
|  | /* | ||||||
| template<class CComplex, int nbasis> | template<class CComplex, int nbasis> | ||||||
| void G5C(Lattice<iVector<CComplex, nbasis>> &z, const Lattice<iVector<CComplex, nbasis>> &x) | void G5C(Lattice<iVector<CComplex, nbasis>> &z, const Lattice<iVector<CComplex, nbasis>> &x) | ||||||
| { | { | ||||||
| @@ -234,6 +249,7 @@ void G5C(Lattice<iVector<CComplex, nbasis>> &z, const Lattice<iVector<CComplex, | |||||||
|     } |     } | ||||||
|   }); |   }); | ||||||
| } | } | ||||||
|  | */ | ||||||
|  |  | ||||||
| NAMESPACE_END(Grid); | NAMESPACE_END(Grid); | ||||||
|  |  | ||||||
|   | |||||||
| @@ -151,6 +151,9 @@ inline void *acceleratorAllocShared(size_t bytes) | |||||||
|     ptr = (void *) NULL; |     ptr = (void *) NULL; | ||||||
|     printf(" cudaMallocManaged failed for %d %s \n",bytes,cudaGetErrorString(err)); |     printf(" cudaMallocManaged failed for %d %s \n",bytes,cudaGetErrorString(err)); | ||||||
|   } |   } | ||||||
|  |   //  size_t free,total; | ||||||
|  |   //  cudaMemGetInfo(&free,&total); | ||||||
|  |   //  std::cout << "Malloc managed "<<bytes<<" "<<free<<"/"<<total<<std::endl; | ||||||
|   return ptr; |   return ptr; | ||||||
| }; | }; | ||||||
| inline void *acceleratorAllocDevice(size_t bytes) | inline void *acceleratorAllocDevice(size_t bytes) | ||||||
| @@ -161,6 +164,9 @@ inline void *acceleratorAllocDevice(size_t bytes) | |||||||
|     ptr = (void *) NULL; |     ptr = (void *) NULL; | ||||||
|     printf(" cudaMalloc failed for %d %s \n",bytes,cudaGetErrorString(err)); |     printf(" cudaMalloc failed for %d %s \n",bytes,cudaGetErrorString(err)); | ||||||
|   } |   } | ||||||
|  |   //  size_t free,total; | ||||||
|  |   //  cudaMemGetInfo(&free,&total); | ||||||
|  |   //  std::cout << "Malloc device "<<bytes<<" "<<free<<"/"<<total<<std::endl; | ||||||
|   return ptr; |   return ptr; | ||||||
| }; | }; | ||||||
| inline void acceleratorFreeShared(void *ptr){ cudaFree(ptr);}; | inline void acceleratorFreeShared(void *ptr){ cudaFree(ptr);}; | ||||||
|   | |||||||
| @@ -191,7 +191,7 @@ int main (int argc, char ** argv) | |||||||
|   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; |   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; | ||||||
|   std::cout<<GridLogMessage << "= Benchmarking SU3xSU3  CovShiftForward(z,x,y)"<<std::endl; |   std::cout<<GridLogMessage << "= Benchmarking SU3xSU3  CovShiftForward(z,x,y)"<<std::endl; | ||||||
|   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; |   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; | ||||||
|   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl; |   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GB/s (incl Cshift)\t\t GFlop/s"<<std::endl; | ||||||
|   std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl; |   std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl; | ||||||
|  |  | ||||||
|   for(int lat=LMIN;lat<=LMAX;lat+=LADD){ |   for(int lat=LMIN;lat<=LMAX;lat+=LADD){ | ||||||
| @@ -216,15 +216,16 @@ int main (int argc, char ** argv) | |||||||
| 	 | 	 | ||||||
| 	 | 	 | ||||||
| 	    double bytes=3*vol*Nc*Nc*sizeof(Complex); | 	    double bytes=3*vol*Nc*Nc*sizeof(Complex); | ||||||
|  | 	    double ncbytes=5*vol*Nc*Nc*sizeof(Complex); | ||||||
| 	    double flops=Nc*Nc*(6+8+8)*vol; | 	    double flops=Nc*Nc*(6+8+8)*vol; | ||||||
| 	    std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<"   \t\t"<<bytes/time<<"\t\t" << flops/time<<std::endl; | 	    std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<"   \t\t"<<bytes/time<<"\t\t"<<ncbytes/time<<"\t\t" << flops/time<<std::endl; | ||||||
|       } |       } | ||||||
|   } |   } | ||||||
| #if 1 | #if 1 | ||||||
|   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; |   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; | ||||||
|   std::cout<<GridLogMessage << "= Benchmarking SU3xSU3  z= x * Cshift(y)"<<std::endl; |   std::cout<<GridLogMessage << "= Benchmarking SU3xSU3  z= x * Cshift(y)"<<std::endl; | ||||||
|   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; |   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; | ||||||
|   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl; |   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GB/s (incl Cshift)\t\t GFlop/s"<<std::endl; | ||||||
|   std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl; |   std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl; | ||||||
|  |  | ||||||
|   for(int lat=LMIN;lat<=LMAX;lat+=LADD){ |   for(int lat=LMIN;lat<=LMAX;lat+=LADD){ | ||||||
| @@ -258,10 +259,11 @@ int main (int argc, char ** argv) | |||||||
| 	tmult  = tmult /Nloop; | 	tmult  = tmult /Nloop; | ||||||
| 	 | 	 | ||||||
| 	double bytes=3*vol*Nc*Nc*sizeof(Complex); | 	double bytes=3*vol*Nc*Nc*sizeof(Complex); | ||||||
|  | 	double ncbytes=5*vol*Nc*Nc*sizeof(Complex); | ||||||
| 	double flops=Nc*Nc*(6+8+8)*vol; | 	double flops=Nc*Nc*(6+8+8)*vol; | ||||||
| 	std::cout<<GridLogMessage<<std::setprecision(3) << "total us "<<time<<" shift "<<tshift <<" mult "<<tmult<<std::endl; | 	std::cout<<GridLogMessage<<std::setprecision(3) << "total us "<<time<<" shift "<<tshift <<" mult "<<tmult<<std::endl; | ||||||
| 	time = time * 1000; // convert to NS for GB/s | 	time = time * 1000; // convert to NS for GB/s | ||||||
| 	std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<"   \t\t"<<bytes/time<<"\t\t" << flops/time<<std::endl; | 	std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<"   \t\t"<<bytes/time<<"\t\t" <<ncbytes/time<<"\t\t" << flops/time<<std::endl; | ||||||
|       } |       } | ||||||
|     } |     } | ||||||
| #endif | #endif | ||||||
|   | |||||||
| @@ -29,7 +29,91 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk> | |||||||
| #include <Grid/Grid.h> | #include <Grid/Grid.h> | ||||||
|  |  | ||||||
| using namespace Grid; | using namespace Grid; | ||||||
|  ; |  | ||||||
|  | void MomentumSpacePropagatorTest(RealD mass,RealD M5, LatticePropagator &prop) | ||||||
|  | { | ||||||
|  |   // what type LatticeComplex  | ||||||
|  |   GridBase *_grid = prop.Grid(); | ||||||
|  |    | ||||||
|  |   typedef LatticeFermion FermionField; | ||||||
|  |   typedef LatticePropagator PropagatorField; | ||||||
|  |   typedef typename FermionField::vector_type vector_type; | ||||||
|  |   typedef typename FermionField::scalar_type ScalComplex; | ||||||
|  |   typedef iSinglet<ScalComplex> Tcomplex; | ||||||
|  |   typedef Lattice<iSinglet<vector_type> > LatComplex; | ||||||
|  |    | ||||||
|  |   Gamma::Algebra Gmu [] = { | ||||||
|  |     Gamma::Algebra::GammaX, | ||||||
|  |     Gamma::Algebra::GammaY, | ||||||
|  |     Gamma::Algebra::GammaZ, | ||||||
|  |     Gamma::Algebra::GammaT | ||||||
|  |   }; | ||||||
|  |  | ||||||
|  |   Coordinate latt_size   = _grid->_fdimensions; | ||||||
|  |    | ||||||
|  |   PropagatorField   num  (_grid); num  = Zero(); | ||||||
|  |  | ||||||
|  |   LatComplex    sk(_grid);  sk = Zero(); | ||||||
|  |   LatComplex    sk2(_grid); sk2= Zero(); | ||||||
|  |   LatComplex    W(_grid); W= Zero(); | ||||||
|  |   LatComplex    a(_grid); a= Zero(); | ||||||
|  |   LatComplex    one  (_grid); one = ScalComplex(1.0,0.0); | ||||||
|  |   LatComplex denom(_grid); denom= Zero(); | ||||||
|  |   LatComplex cosha(_grid);  | ||||||
|  |   LatComplex kmu(_grid);  | ||||||
|  |   LatComplex Wea(_grid);  | ||||||
|  |   LatComplex Wema(_grid);  | ||||||
|  |  | ||||||
|  |   ScalComplex ci(0.0,1.0); | ||||||
|  |   SpinColourMatrixD identity = ComplexD(1.0); | ||||||
|  |  | ||||||
|  |   for(int mu=0;mu<Nd;mu++) { | ||||||
|  |      | ||||||
|  |     LatticeCoordinate(kmu,mu); | ||||||
|  |      | ||||||
|  |     RealD TwoPiL =  M_PI * 2.0/ latt_size[mu]; | ||||||
|  |      | ||||||
|  |     kmu = TwoPiL * kmu; | ||||||
|  |     //    kmu = kmu + TwoPiL * one * twist[mu];//momentum for twisted boundary conditions | ||||||
|  |      | ||||||
|  |     sk2 = sk2 + 2.0*sin(kmu*0.5)*sin(kmu*0.5); | ||||||
|  |     sk  = sk  +     sin(kmu)    *sin(kmu);  | ||||||
|  |      | ||||||
|  |     num = num - sin(kmu)*ci*(Gamma(Gmu[mu])*identity); | ||||||
|  |      | ||||||
|  |   } | ||||||
|  |    | ||||||
|  |   W = one - M5 + sk2; | ||||||
|  |  | ||||||
|  |   //////////////////////////////////////////// | ||||||
|  |   // Cosh alpha -> alpha | ||||||
|  |   //////////////////////////////////////////// | ||||||
|  |   cosha =  (one + W*W + sk) / (abs(W)*2.0); | ||||||
|  |  | ||||||
|  |   // FIXME Need a Lattice acosh | ||||||
|  |   { | ||||||
|  |   autoView(cosha_v,cosha,CpuRead); | ||||||
|  |   autoView(a_v,a,CpuWrite); | ||||||
|  |   for(int idx=0;idx<_grid->lSites();idx++){ | ||||||
|  |     Coordinate lcoor(Nd); | ||||||
|  |     Tcomplex cc; | ||||||
|  |     //    RealD sgn; | ||||||
|  |     _grid->LocalIndexToLocalCoor(idx,lcoor); | ||||||
|  |     peekLocalSite(cc,cosha_v,lcoor); | ||||||
|  |     assert((double)real(cc)>=1.0); | ||||||
|  |     assert(fabs((double)imag(cc))<=1.0e-15); | ||||||
|  |     cc = ScalComplex(::acosh(real(cc)),0.0); | ||||||
|  |     pokeLocalSite(cc,a_v,lcoor); | ||||||
|  |   }} | ||||||
|  |    | ||||||
|  |   Wea = ( exp( a) * abs(W)  ); | ||||||
|  |   Wema= ( exp(-a) * abs(W)  ); | ||||||
|  |    | ||||||
|  |   num   = num + ( one - Wema ) * mass * identity; | ||||||
|  |   denom= ( Wea - one ) + mass*mass * (one - Wema);  | ||||||
|  |   prop = num/denom; | ||||||
|  | } | ||||||
|  |  | ||||||
|  |  | ||||||
| int main (int argc, char ** argv) | int main (int argc, char ** argv) | ||||||
| { | { | ||||||
| @@ -307,6 +391,17 @@ int main (int argc, char ** argv) | |||||||
|     RealD M5  =0.8; |     RealD M5  =0.8; | ||||||
|     DomainWallFermionD Ddwf(Umu,*FGrid,*FrbGrid,GRID,RBGRID,mass,M5); |     DomainWallFermionD Ddwf(Umu,*FGrid,*FrbGrid,GRID,RBGRID,mass,M5); | ||||||
|  |  | ||||||
|  |     /////////////////// Test code for (1-m)^2 /////////////// | ||||||
|  |     LatticePropagatorD prop1(&GRID); | ||||||
|  |     LatticePropagatorD prop2(&GRID); | ||||||
|  |     LatticeComplexD ratio(&GRID); | ||||||
|  |     MomentumSpacePropagatorTest(0.0,M5,prop1); | ||||||
|  |     MomentumSpacePropagatorTest(0.3,M5,prop2); | ||||||
|  |     ratio=localNorm2(prop2); | ||||||
|  |     ratio=ratio/localNorm2(prop1); | ||||||
|  |     std::cout << ratio; | ||||||
|  |     /////////////////// Test code for (1-m)^2 factor /////////////// | ||||||
|  |  | ||||||
|     // Momentum space prop |     // Momentum space prop | ||||||
|     std::cout << " Solving by FFT and Feynman rules" <<std::endl; |     std::cout << " Solving by FFT and Feynman rules" <<std::endl; | ||||||
|     bool fiveD = false; //calculate 4d free propagator |     bool fiveD = false; //calculate 4d free propagator | ||||||
|   | |||||||
| @@ -565,8 +565,8 @@ int main (int argc, char ** argv) | |||||||
|   std::cout<<GridLogMessage << " Running Multigrid                                "<< std::endl; |   std::cout<<GridLogMessage << " Running Multigrid                                "<< std::endl; | ||||||
|   std::cout<<GridLogMessage << "**************************************************"<< std::endl; |   std::cout<<GridLogMessage << "**************************************************"<< std::endl; | ||||||
|  |  | ||||||
|   BiCGSTAB<CoarseVector>    CoarseMgridBiCGSTAB(0.01,1000);      |   BiCGSTAB<CoarseVector>    CoarseMgridBiCGSTAB(0.01,1000,false);      | ||||||
|   BiCGSTAB<LatticeFermion>  FineMgridBiCGSTAB(0.0,24); |   BiCGSTAB<LatticeFermion>  FineMgridBiCGSTAB(0.0,24,false); | ||||||
|   ZeroGuesser<CoarseVector> CoarseZeroGuesser; |   ZeroGuesser<CoarseVector> CoarseZeroGuesser; | ||||||
|   ZeroGuesser<LatticeFermion> FineZeroGuesser; |   ZeroGuesser<LatticeFermion> FineZeroGuesser; | ||||||
|  |  | ||||||
| @@ -590,5 +590,5 @@ int main (int argc, char ** argv) | |||||||
|   std::cout<<GridLogMessage << "Done "<< std::endl; |   std::cout<<GridLogMessage << "Done "<< std::endl; | ||||||
|   std::cout<<GridLogMessage << "**************************************************"<< std::endl; |   std::cout<<GridLogMessage << "**************************************************"<< std::endl; | ||||||
|   Grid_finalize(); |   Grid_finalize(); | ||||||
|    |  | ||||||
| } | } | ||||||
|   | |||||||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
		Reference in New Issue
	
	Block a user