mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-11-03 21:44:33 +00:00 
			
		
		
		
	Compare commits
	
		
			26 Commits
		
	
	
		
			8f84bbed1b
			...
			feature/hw
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| 
						 | 
					3064c9a6e2 | ||
| 
						 | 
					729882827c | ||
| 
						 | 
					baa668d3ac | ||
| 
						 | 
					3c82d16ed8 | ||
| 
						 | 
					5c8c0c2d7c | ||
| 
						 | 
					e5a100846c | ||
| 
						 | 
					a74e2dc12e | ||
| 
						 | 
					595f512a6e | ||
| 
						 | 
					a6499b22ff | ||
| 
						 | 
					b4e42a59c6 | ||
| 
						 | 
					8c913e0edd | ||
| 
						 | 
					fd3f93d8d3 | ||
| 
						 | 
					e9543cdacd | ||
| 
						 | 
					98f7b3d298 | ||
| 
						 | 
					b7b164ea24 | ||
| 
						 | 
					77124d99d5 | ||
| 
						 | 
					e1327e7ea0 | ||
| 
						 | 
					569f78c2cf | ||
| 
						 | 
					488c79d5a1 | ||
| 
						 | 
					dc6b0f20b2 | ||
| 
						 | 
					c0badc3e16 | ||
| 
						 | 
					58f6529b55 | ||
| 
						 | 
					e3f056dfbb | ||
| 
						 | 
					da0ffa7a79 | ||
| 
						 | 
					fcc7640b9c | ||
| 
						 | 
					0cbe2859e0 | 
@@ -49,13 +49,11 @@ inline void blockMaskedInnerProduct(Lattice<CComplex> &CoarseInner,
 | 
				
			|||||||
  Lattice<dotp> fine_inner_msk(fine);
 | 
					  Lattice<dotp> fine_inner_msk(fine);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  // Multiply could be fused with innerProduct
 | 
					  // Multiply could be fused with innerProduct
 | 
				
			||||||
  // Single block sum kernel could do both masks.
 | 
					 | 
				
			||||||
  fine_inner = localInnerProduct(fineX,fineY);
 | 
					  fine_inner = localInnerProduct(fineX,fineY);
 | 
				
			||||||
  mult(fine_inner_msk, fine_inner,FineMask);
 | 
					  mult(fine_inner_msk, fine_inner,FineMask);
 | 
				
			||||||
  blockSum(CoarseInner,fine_inner_msk);
 | 
					  blockSum(CoarseInner,fine_inner_msk);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 | 
				
			||||||
class Geometry {
 | 
					class Geometry {
 | 
				
			||||||
public:
 | 
					public:
 | 
				
			||||||
  int npoint;
 | 
					  int npoint;
 | 
				
			||||||
@@ -80,8 +78,12 @@ public:
 | 
				
			|||||||
    }
 | 
					    }
 | 
				
			||||||
    directions   [2*_d]=0;
 | 
					    directions   [2*_d]=0;
 | 
				
			||||||
    displacements[2*_d]=0;
 | 
					    displacements[2*_d]=0;
 | 
				
			||||||
  }
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    std::cout <<GridLogMessage << "Geometry "<<std::endl;
 | 
				
			||||||
 | 
					    for(int p=0;p<npoint;p++){
 | 
				
			||||||
 | 
					      std::cout <<GridLogMessage << "point " <<p<<" dir "<<directions[p]<<" delta " <<displacements[p]<<std::endl;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
  
 | 
					  
 | 
				
			||||||
template<class Fobj,class CComplex,int nbasis>
 | 
					template<class Fobj,class CComplex,int nbasis>
 | 
				
			||||||
@@ -102,8 +104,8 @@ public:
 | 
				
			|||||||
  Aggregation(GridBase *_CoarseGrid,GridBase *_FineGrid,int _checkerboard) : 
 | 
					  Aggregation(GridBase *_CoarseGrid,GridBase *_FineGrid,int _checkerboard) : 
 | 
				
			||||||
    CoarseGrid(_CoarseGrid),
 | 
					    CoarseGrid(_CoarseGrid),
 | 
				
			||||||
    FineGrid(_FineGrid),
 | 
					    FineGrid(_FineGrid),
 | 
				
			||||||
    subspace(nbasis,_FineGrid),
 | 
					    checkerboard(_checkerboard),
 | 
				
			||||||
    checkerboard(_checkerboard)
 | 
					    subspace(nbasis,_FineGrid)
 | 
				
			||||||
  {
 | 
					  {
 | 
				
			||||||
  };
 | 
					  };
 | 
				
			||||||
  
 | 
					  
 | 
				
			||||||
@@ -285,6 +287,8 @@ public:
 | 
				
			|||||||
  ///////////////////////
 | 
					  ///////////////////////
 | 
				
			||||||
  GridBase * Grid(void)         { return _grid; };   // this is all the linalg routines need to know
 | 
					  GridBase * Grid(void)         { return _grid; };   // this is all the linalg routines need to know
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  virtual std::vector<int> Directions(void)   { return geom.directions; };
 | 
				
			||||||
 | 
					  virtual std::vector<int> Displacements(void){ return geom.displacements; };
 | 
				
			||||||
  void M (const CoarseVector &in, CoarseVector &out)
 | 
					  void M (const CoarseVector &in, CoarseVector &out)
 | 
				
			||||||
  {
 | 
					  {
 | 
				
			||||||
    conformable(_grid,in.Grid());
 | 
					    conformable(_grid,in.Grid());
 | 
				
			||||||
@@ -308,6 +312,9 @@ public:
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    int osites=Grid()->oSites();
 | 
					    int osites=Grid()->oSites();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    autoView(st,Stencil,AcceleratorRead);
 | 
				
			||||||
 | 
					    siteVector *CBp=Stencil.CommBuf();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    accelerator_for(sss, Grid()->oSites()*nbasis, Nsimd, {
 | 
					    accelerator_for(sss, Grid()->oSites()*nbasis, Nsimd, {
 | 
				
			||||||
      int ss = sss/nbasis;
 | 
					      int ss = sss/nbasis;
 | 
				
			||||||
      int b  = sss%nbasis;
 | 
					      int b  = sss%nbasis;
 | 
				
			||||||
@@ -318,12 +325,12 @@ public:
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
      for(int point=0;point<geom.npoint;point++){
 | 
					      for(int point=0;point<geom.npoint;point++){
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	SE=Stencil.GetEntry(ptype,point,ss);
 | 
						SE=st.GetEntry(ptype,point,ss);
 | 
				
			||||||
	  
 | 
						  
 | 
				
			||||||
	if(SE->_is_local) { 
 | 
						if(SE->_is_local) { 
 | 
				
			||||||
	  nbr = coalescedReadPermute(in_v[SE->_offset],ptype,SE->_permute);
 | 
						  nbr = coalescedReadPermute(in_v[SE->_offset],ptype,SE->_permute);
 | 
				
			||||||
	} else {
 | 
						} else {
 | 
				
			||||||
	  nbr = coalescedRead(Stencil.CommBuf()[SE->_offset]);
 | 
						  nbr = coalescedRead(CBp[SE->_offset]);
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	acceleratorSynchronise();
 | 
						acceleratorSynchronise();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -332,7 +339,7 @@ public:
 | 
				
			|||||||
	}
 | 
						}
 | 
				
			||||||
      }
 | 
					      }
 | 
				
			||||||
      coalescedWrite(out_v[ss](b),res);
 | 
					      coalescedWrite(out_v[ss](b),res);
 | 
				
			||||||
      });
 | 
					    });
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    for(int p=0;p<geom.npoint;p++) AcceleratorViewContainer[p].ViewClose();
 | 
					    for(int p=0;p<geom.npoint;p++) AcceleratorViewContainer[p].ViewClose();
 | 
				
			||||||
  };
 | 
					  };
 | 
				
			||||||
@@ -409,38 +416,23 @@ public:
 | 
				
			|||||||
      MdirCalc(in,out[p],p);
 | 
					      MdirCalc(in,out[p],p);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
  };
 | 
					  };
 | 
				
			||||||
  void Mdir(const CoarseVector &in, CoarseVector &out, int dir, int disp){
 | 
					  void Mdir(const CoarseVector &in, CoarseVector &out, int dir, int disp)
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
    this->MdirComms(in);
 | 
					    this->MdirComms(in);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    int ndim = in.Grid()->Nd();
 | 
					    int ndim = in.Grid()->Nd();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    //////////////
 | 
					    int point=-1;
 | 
				
			||||||
    // 4D action like wilson
 | 
					    for(int p=0;p<geom.npoint;p++){
 | 
				
			||||||
    // 0+ => 0 
 | 
					      if( (dir==geom.directions[p])&&(disp==geom.displacements[p])) point=p;
 | 
				
			||||||
    // 0- => 1
 | 
					    }
 | 
				
			||||||
    // 1+ => 2 
 | 
					    assert(point!=-1);// Must find
 | 
				
			||||||
    // 1- => 3
 | 
					 | 
				
			||||||
    // etc..
 | 
					 | 
				
			||||||
    //////////////
 | 
					 | 
				
			||||||
    // 5D action like DWF
 | 
					 | 
				
			||||||
    // 1+ => 0 
 | 
					 | 
				
			||||||
    // 1- => 1
 | 
					 | 
				
			||||||
    // 2+ => 2 
 | 
					 | 
				
			||||||
    // 2- => 3
 | 
					 | 
				
			||||||
    // etc..
 | 
					 | 
				
			||||||
    auto point = [dir, disp, ndim](){
 | 
					 | 
				
			||||||
      if(dir == 0 and disp == 0)
 | 
					 | 
				
			||||||
	return 8;
 | 
					 | 
				
			||||||
      else if ( ndim==4 ) { 
 | 
					 | 
				
			||||||
	return (4 * dir + 1 - disp) / 2;
 | 
					 | 
				
			||||||
      } else { 
 | 
					 | 
				
			||||||
	return (4 * (dir-1) + 1 - disp) / 2;
 | 
					 | 
				
			||||||
      }
 | 
					 | 
				
			||||||
    }();
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    std::cout <<GridLogMessage << "Mdir point "<<point<<" dir "<<dir<<" disp "<<disp  <<std::endl;
 | 
				
			||||||
 | 
					    for(int p=0;p<geom.npoint;p++){
 | 
				
			||||||
 | 
					      std::cout <<GridLogMessage << "point " <<p<<" dir "<<geom.directions[p]<<" delta " <<geom.displacements[p]<<std::endl;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
    MdirCalc(in,out,point);
 | 
					    MdirCalc(in,out,point);
 | 
				
			||||||
 | 
					 | 
				
			||||||
  };
 | 
					  };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  void Mdiag(const CoarseVector &in, CoarseVector &out)
 | 
					  void Mdiag(const CoarseVector &in, CoarseVector &out)
 | 
				
			||||||
@@ -456,10 +448,58 @@ public:
 | 
				
			|||||||
    geom(CoarseGrid._ndimension),
 | 
					    geom(CoarseGrid._ndimension),
 | 
				
			||||||
    hermitian(hermitian_),
 | 
					    hermitian(hermitian_),
 | 
				
			||||||
    Stencil(&CoarseGrid,geom.npoint,Even,geom.directions,geom.displacements,0),
 | 
					    Stencil(&CoarseGrid,geom.npoint,Even,geom.directions,geom.displacements,0),
 | 
				
			||||||
      A(geom.npoint,&CoarseGrid)
 | 
					    A(geom.npoint,&CoarseGrid)
 | 
				
			||||||
  {
 | 
					  {
 | 
				
			||||||
  };
 | 
					  };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  void Test(Aggregation<Fobj,CComplex,nbasis> &_Aggregates,GridBase *FineGrid,LinearOperatorBase<Lattice<Fobj> > &linop)
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					    typedef Lattice<Fobj> FineField;
 | 
				
			||||||
 | 
					    CoarseVector Cin(_grid);
 | 
				
			||||||
 | 
					    CoarseVector Cout(_grid);
 | 
				
			||||||
 | 
					    CoarseVector CFout(_grid);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    FineField Fin(FineGrid);
 | 
				
			||||||
 | 
					    FineField Fout(FineGrid);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    std::vector<int> seeds({1,2,3,4,5});
 | 
				
			||||||
 | 
					    GridParallelRNG RNG(_grid);  RNG.SeedFixedIntegers(seeds);
 | 
				
			||||||
 | 
					    gaussian(RNG,Cin);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    _Aggregates.PromoteFromSubspace(Cin,Fin);
 | 
				
			||||||
 | 
					    _Aggregates.ProjectToSubspace(Cin,Fin);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    std::cout << GridLogMessage<< "************  "<<std::endl;
 | 
				
			||||||
 | 
					    std::cout << GridLogMessage<< " Testing M  "<<std::endl;
 | 
				
			||||||
 | 
					    std::cout << GridLogMessage<< "************  "<<std::endl;
 | 
				
			||||||
 | 
					    // Coarse operator
 | 
				
			||||||
 | 
					    this->M(Cin,Cout);
 | 
				
			||||||
 | 
					    // Fine projected operator
 | 
				
			||||||
 | 
					    _Aggregates.PromoteFromSubspace(Cin,Fin);
 | 
				
			||||||
 | 
					    linop.Op(Fin,Fout);
 | 
				
			||||||
 | 
					    _Aggregates.ProjectToSubspace(CFout,Fout);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    CFout = CFout-Cout;
 | 
				
			||||||
 | 
					    RealD diff = norm2(CFout);
 | 
				
			||||||
 | 
					    std::cout << GridLogMessage<< " diff  "<<diff<<std::endl;
 | 
				
			||||||
 | 
					    assert(diff<1.0e-5);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    std::cout << GridLogMessage<< "************  "<<std::endl;
 | 
				
			||||||
 | 
					    std::cout << GridLogMessage<< " Testing Mdag  "<<std::endl;
 | 
				
			||||||
 | 
					    std::cout << GridLogMessage<< "************  "<<std::endl;
 | 
				
			||||||
 | 
					    // Coarse operator
 | 
				
			||||||
 | 
					    Mdag(Cin,Cout);
 | 
				
			||||||
 | 
					    // Fine operator
 | 
				
			||||||
 | 
					    linop.AdjOp(Fin,Fout);
 | 
				
			||||||
 | 
					    _Aggregates.ProjectToSubspace(CFout,Fout);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    CFout = CFout-Cout;
 | 
				
			||||||
 | 
					    diff = norm2(CFout);
 | 
				
			||||||
 | 
					    std::cout << GridLogMessage<< " diff  "<<diff<<std::endl; 
 | 
				
			||||||
 | 
					    assert(diff<1.0e-5);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  void CoarsenOperator(GridBase *FineGrid,LinearOperatorBase<Lattice<Fobj> > &linop,
 | 
					  void CoarsenOperator(GridBase *FineGrid,LinearOperatorBase<Lattice<Fobj> > &linop,
 | 
				
			||||||
		       Aggregation<Fobj,CComplex,nbasis> & Subspace)
 | 
							       Aggregation<Fobj,CComplex,nbasis> & Subspace)
 | 
				
			||||||
  {
 | 
					  {
 | 
				
			||||||
@@ -496,8 +536,19 @@ public:
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    CoarseScalar InnerProd(Grid()); 
 | 
					    CoarseScalar InnerProd(Grid()); 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    std::cout << GridLogMessage<< "CoarsenMatrix Orthog " << std::endl;
 | 
				
			||||||
    // Orthogonalise the subblocks over the basis
 | 
					    // Orthogonalise the subblocks over the basis
 | 
				
			||||||
    blockOrthogonalise(InnerProd,Subspace.subspace);
 | 
					    blockOrthogonalise(InnerProd,Subspace.subspace);
 | 
				
			||||||
 | 
					    std::cout << GridLogMessage<< "CoarsenMatrix Orthog done " << std::endl;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    auto OpDirections    = linop.Directions();
 | 
				
			||||||
 | 
					    auto OpDisplacements = linop.Displacements();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    std::cout<<" Coarsening an operator with "<< OpDirections.size()<<" terms "<<std::endl;
 | 
				
			||||||
 | 
					    for(int p=0;p<OpDirections.size();p++) {
 | 
				
			||||||
 | 
					      assert(OpDirections[p]==geom.directions[p]);
 | 
				
			||||||
 | 
					      assert(OpDisplacements[p]==geom.displacements[p]);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    // Compute the matrix elements of linop between this orthonormal
 | 
					    // Compute the matrix elements of linop between this orthonormal
 | 
				
			||||||
    // set of vectors.
 | 
					    // set of vectors.
 | 
				
			||||||
@@ -533,13 +584,27 @@ public:
 | 
				
			|||||||
    evenmask = where(mod(bcb,2)==(Integer)0,one,zero);
 | 
					    evenmask = where(mod(bcb,2)==(Integer)0,one,zero);
 | 
				
			||||||
    oddmask  = one-evenmask;
 | 
					    oddmask  = one-evenmask;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    /*
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
 | 
					      phi=Subspace.subspace[0];
 | 
				
			||||||
 | 
					      linop.OpDirAll(phi,Mphi_p);
 | 
				
			||||||
 | 
					      for(int p=0;p<geom.npoint-1;p++){
 | 
				
			||||||
 | 
						int dir=geom.directions[p];
 | 
				
			||||||
 | 
						int disp=geom.displacements[p];
 | 
				
			||||||
 | 
						linop.OpDir(phi,Mphi,dir,disp);
 | 
				
			||||||
 | 
						Mphi=Mphi-Mphi_p[p];
 | 
				
			||||||
 | 
						std::cout << GridLogMessage <<" Direction mapping check " <<norm2(Mphi)<<std::endl;
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					*/
 | 
				
			||||||
    assert(self_stencil!=-1);
 | 
					    assert(self_stencil!=-1);
 | 
				
			||||||
 | 
					    int lhermitian=hermitian;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    for(int i=0;i<nbasis;i++){
 | 
					    for(int i=0;i<nbasis;i++){
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      phi=Subspace.subspace[i];
 | 
					      phi=Subspace.subspace[i];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      //      std::cout << GridLogMessage<< "CoarsenMatrix vector "<<i << std::endl;
 | 
					      std::cout << GridLogMessage<< "CoarsenMatrix vector "<<i << std::endl;
 | 
				
			||||||
      linop.OpDirAll(phi,Mphi_p);
 | 
					      linop.OpDirAll(phi,Mphi_p);
 | 
				
			||||||
      linop.OpDiag  (phi,Mphi_p[geom.npoint-1]);
 | 
					      linop.OpDiag  (phi,Mphi_p[geom.npoint-1]);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -550,7 +615,7 @@ public:
 | 
				
			|||||||
	int dir   = geom.directions[p];
 | 
						int dir   = geom.directions[p];
 | 
				
			||||||
	int disp  = geom.displacements[p];
 | 
						int disp  = geom.displacements[p];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if ( (disp==-1) || (!hermitian ) ) {
 | 
						if ( (disp==-1) || (!lhermitian ) ) {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	  ////////////////////////////////////////////////////////////////////////
 | 
						  ////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
	  // Pick out contributions coming from this cell and neighbour cell
 | 
						  // Pick out contributions coming from this cell and neighbour cell
 | 
				
			||||||
@@ -568,11 +633,23 @@ public:
 | 
				
			|||||||
	    autoView( A_self  , A[self_stencil], AcceleratorWrite);
 | 
						    autoView( A_self  , A[self_stencil], AcceleratorWrite);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	    accelerator_for(ss, Grid()->oSites(), Fobj::Nsimd(),{ coalescedWrite(A_p[ss](j,i),oZProj_v(ss)); });
 | 
						    accelerator_for(ss, Grid()->oSites(), Fobj::Nsimd(),{ coalescedWrite(A_p[ss](j,i),oZProj_v(ss)); });
 | 
				
			||||||
 | 
						    if ( lhermitian && (disp==-1) ) {
 | 
				
			||||||
 | 
						      for(int pp=0;pp<geom.npoint;pp++){// Find the opposite link and set <j|A|i> = <i|A|j>*
 | 
				
			||||||
 | 
							int dirp   = geom.directions[pp];
 | 
				
			||||||
 | 
							int dispp  = geom.displacements[pp];
 | 
				
			||||||
 | 
							if ( (dirp==dir) && (dispp==1) ){
 | 
				
			||||||
 | 
							  auto sft = conjugate(Cshift(oZProj,dir,1));
 | 
				
			||||||
 | 
							  autoView( sft_v    ,  sft  , AcceleratorWrite);
 | 
				
			||||||
 | 
							  autoView( A_pp     ,  A[pp], AcceleratorWrite);
 | 
				
			||||||
 | 
							  accelerator_for(ss, Grid()->oSites(), Fobj::Nsimd(),{ coalescedWrite(A_pp[ss](i,j),sft_v(ss)); });
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
						      }
 | 
				
			||||||
 | 
						    }
 | 
				
			||||||
	  }
 | 
						  }
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
      }
 | 
					      }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      std::cout << GridLogMessage<< "CoarsenMatrix Diag "<<std::endl;
 | 
				
			||||||
      ///////////////////////////////////////////
 | 
					      ///////////////////////////////////////////
 | 
				
			||||||
      // Faster alternate self coupling.. use hermiticity to save 2x
 | 
					      // Faster alternate self coupling.. use hermiticity to save 2x
 | 
				
			||||||
      ///////////////////////////////////////////
 | 
					      ///////////////////////////////////////////
 | 
				
			||||||
@@ -604,31 +681,35 @@ public:
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
      }
 | 
					      }
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
    if(hermitian) {
 | 
					
 | 
				
			||||||
      std::cout << GridLogMessage << " ForceHermitian, new code "<<std::endl;
 | 
					    MemoryManager::PrintBytes();
 | 
				
			||||||
      ForceHermitian();
 | 
					
 | 
				
			||||||
 | 
					    // Auto self test
 | 
				
			||||||
 | 
					    Test( Subspace,FineGrid,linop);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#if 0
 | 
				
			||||||
 | 
					    ///////////////////////////
 | 
				
			||||||
 | 
					    // test code worth preserving in if block
 | 
				
			||||||
 | 
					    ///////////////////////////
 | 
				
			||||||
 | 
					    std::cout<<GridLogMessage<< " Computed matrix elements "<< self_stencil <<std::endl;
 | 
				
			||||||
 | 
					    for(int p=0;p<geom.npoint;p++){
 | 
				
			||||||
 | 
					      std::cout<<GridLogMessage<< "A["<<p<<"]" << std::endl;
 | 
				
			||||||
 | 
					      std::cout<<GridLogMessage<< "\n"<<A[p] << std::endl;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					    std::cout<<GridLogMessage<< " picking by block0 "<< self_stencil <<std::endl;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    phi=Subspace.subspace[0];
 | 
				
			||||||
 | 
					    std::vector<int> bc(FineGrid->_ndimension,0);
 | 
				
			||||||
 | 
					    blockPick(Grid(),phi,tmp,bc);      // Pick out a block
 | 
				
			||||||
 | 
					    linop.Op(tmp,Mphi);                // Apply big dop
 | 
				
			||||||
 | 
					    blockProject(iProj,Mphi,Subspace.subspace); // project it and print it
 | 
				
			||||||
 | 
					    std::cout<<GridLogMessage<< " Computed matrix elements from block zero only "<<std::endl;
 | 
				
			||||||
 | 
					    std::cout<<GridLogMessage<< iProj <<std::endl;
 | 
				
			||||||
 | 
					    std::cout<<GridLogMessage<<"Computed Coarse Operator"<<std::endl;
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  void ForceHermitian(void) {
 | 
					 | 
				
			||||||
    CoarseMatrix Diff  (Grid());
 | 
					 | 
				
			||||||
    for(int p=0;p<geom.npoint;p++){
 | 
					 | 
				
			||||||
      int dir   = geom.directions[p];
 | 
					 | 
				
			||||||
      int disp  = geom.displacements[p];
 | 
					 | 
				
			||||||
      if(disp==-1) {
 | 
					 | 
				
			||||||
	// Find the opposite link
 | 
					 | 
				
			||||||
	for(int pp=0;pp<geom.npoint;pp++){
 | 
					 | 
				
			||||||
	  int dirp   = geom.directions[pp];
 | 
					 | 
				
			||||||
	  int dispp  = geom.displacements[pp];
 | 
					 | 
				
			||||||
	  if ( (dirp==dir) && (dispp==1) ){
 | 
					 | 
				
			||||||
	    //	    Diff = adj(Cshift(A[p],dir,1)) - A[pp]; 
 | 
					 | 
				
			||||||
	    //	    std::cout << GridLogMessage<<" Replacing stencil leg "<<pp<<" with leg "<<p<< " diff "<<norm2(Diff) <<std::endl;
 | 
					 | 
				
			||||||
	    A[pp] = adj(Cshift(A[p],dir,1));
 | 
					 | 
				
			||||||
	  }
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
      }
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
  }
 | 
					 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
NAMESPACE_END(Grid);
 | 
					NAMESPACE_END(Grid);
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -52,6 +52,9 @@ public:
 | 
				
			|||||||
  virtual void AdjOp  (const Field &in, Field &out) = 0; // Abstract base
 | 
					  virtual void AdjOp  (const Field &in, Field &out) = 0; // Abstract base
 | 
				
			||||||
  virtual void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2)=0;
 | 
					  virtual void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2)=0;
 | 
				
			||||||
  virtual void HermOp(const Field &in, Field &out)=0;
 | 
					  virtual void HermOp(const Field &in, Field &out)=0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  virtual std::vector<int> Directions(void)   =0;
 | 
				
			||||||
 | 
					  virtual std::vector<int> Displacements(void)=0;
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -76,6 +79,9 @@ class MdagMLinearOperator : public LinearOperatorBase<Field> {
 | 
				
			|||||||
public:
 | 
					public:
 | 
				
			||||||
  MdagMLinearOperator(Matrix &Mat): _Mat(Mat){};
 | 
					  MdagMLinearOperator(Matrix &Mat): _Mat(Mat){};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  virtual std::vector<int> Directions(void)   { return _Mat.Directions();};
 | 
				
			||||||
 | 
					  virtual std::vector<int> Displacements(void){ return _Mat.Displacements();};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  // Support for coarsening to a multigrid
 | 
					  // Support for coarsening to a multigrid
 | 
				
			||||||
  void OpDiag (const Field &in, Field &out) {
 | 
					  void OpDiag (const Field &in, Field &out) {
 | 
				
			||||||
    _Mat.Mdiag(in,out);
 | 
					    _Mat.Mdiag(in,out);
 | 
				
			||||||
@@ -111,6 +117,8 @@ class ShiftedMdagMLinearOperator : public LinearOperatorBase<Field> {
 | 
				
			|||||||
  Matrix &_Mat;
 | 
					  Matrix &_Mat;
 | 
				
			||||||
  RealD _shift;
 | 
					  RealD _shift;
 | 
				
			||||||
public:
 | 
					public:
 | 
				
			||||||
 | 
					  virtual std::vector<int> Directions(void)   { return _Mat.Directions();};
 | 
				
			||||||
 | 
					  virtual std::vector<int> Displacements(void){ return _Mat.Displacements();};
 | 
				
			||||||
  ShiftedMdagMLinearOperator(Matrix &Mat,RealD shift): _Mat(Mat), _shift(shift){};
 | 
					  ShiftedMdagMLinearOperator(Matrix &Mat,RealD shift): _Mat(Mat), _shift(shift){};
 | 
				
			||||||
  // Support for coarsening to a multigrid
 | 
					  // Support for coarsening to a multigrid
 | 
				
			||||||
  void OpDiag (const Field &in, Field &out) {
 | 
					  void OpDiag (const Field &in, Field &out) {
 | 
				
			||||||
@@ -151,6 +159,8 @@ template<class Matrix,class Field>
 | 
				
			|||||||
class HermitianLinearOperator : public LinearOperatorBase<Field> {
 | 
					class HermitianLinearOperator : public LinearOperatorBase<Field> {
 | 
				
			||||||
  Matrix &_Mat;
 | 
					  Matrix &_Mat;
 | 
				
			||||||
public:
 | 
					public:
 | 
				
			||||||
 | 
					  virtual std::vector<int> Directions(void)   { return _Mat.Directions();};
 | 
				
			||||||
 | 
					  virtual std::vector<int> Displacements(void){ return _Mat.Displacements();};
 | 
				
			||||||
  HermitianLinearOperator(Matrix &Mat): _Mat(Mat){};
 | 
					  HermitianLinearOperator(Matrix &Mat): _Mat(Mat){};
 | 
				
			||||||
  // Support for coarsening to a multigrid
 | 
					  // Support for coarsening to a multigrid
 | 
				
			||||||
  void OpDiag (const Field &in, Field &out) {
 | 
					  void OpDiag (const Field &in, Field &out) {
 | 
				
			||||||
@@ -182,6 +192,8 @@ template<class Matrix,class Field>
 | 
				
			|||||||
class NonHermitianLinearOperator : public LinearOperatorBase<Field> {
 | 
					class NonHermitianLinearOperator : public LinearOperatorBase<Field> {
 | 
				
			||||||
  Matrix &_Mat;
 | 
					  Matrix &_Mat;
 | 
				
			||||||
public:
 | 
					public:
 | 
				
			||||||
 | 
					  virtual std::vector<int> Directions(void)   { return _Mat.Directions();};
 | 
				
			||||||
 | 
					  virtual std::vector<int> Displacements(void){ return _Mat.Displacements();};
 | 
				
			||||||
  NonHermitianLinearOperator(Matrix &Mat): _Mat(Mat){};
 | 
					  NonHermitianLinearOperator(Matrix &Mat): _Mat(Mat){};
 | 
				
			||||||
  // Support for coarsening to a multigrid
 | 
					  // Support for coarsening to a multigrid
 | 
				
			||||||
  void OpDiag (const Field &in, Field &out) {
 | 
					  void OpDiag (const Field &in, Field &out) {
 | 
				
			||||||
@@ -255,6 +267,8 @@ template<class Matrix,class Field>
 | 
				
			|||||||
  class SchurDiagMooeeOperator :  public SchurOperatorBase<Field> {
 | 
					  class SchurDiagMooeeOperator :  public SchurOperatorBase<Field> {
 | 
				
			||||||
 public:
 | 
					 public:
 | 
				
			||||||
    Matrix &_Mat;
 | 
					    Matrix &_Mat;
 | 
				
			||||||
 | 
					    virtual std::vector<int> Directions(void)   { return _Mat.Directions();};
 | 
				
			||||||
 | 
					    virtual std::vector<int> Displacements(void){ return _Mat.Displacements();};
 | 
				
			||||||
    SchurDiagMooeeOperator (Matrix &Mat): _Mat(Mat){};
 | 
					    SchurDiagMooeeOperator (Matrix &Mat): _Mat(Mat){};
 | 
				
			||||||
    virtual  void Mpc      (const Field &in, Field &out) {
 | 
					    virtual  void Mpc      (const Field &in, Field &out) {
 | 
				
			||||||
      Field tmp(in.Grid());
 | 
					      Field tmp(in.Grid());
 | 
				
			||||||
@@ -281,6 +295,8 @@ template<class Matrix,class Field>
 | 
				
			|||||||
 protected:
 | 
					 protected:
 | 
				
			||||||
    Matrix &_Mat;
 | 
					    Matrix &_Mat;
 | 
				
			||||||
 public:
 | 
					 public:
 | 
				
			||||||
 | 
					    virtual std::vector<int> Directions(void)   { return _Mat.Directions();};
 | 
				
			||||||
 | 
					    virtual std::vector<int> Displacements(void){ return _Mat.Displacements();};
 | 
				
			||||||
    SchurDiagOneOperator (Matrix &Mat): _Mat(Mat){};
 | 
					    SchurDiagOneOperator (Matrix &Mat): _Mat(Mat){};
 | 
				
			||||||
    
 | 
					    
 | 
				
			||||||
    virtual void Mpc      (const Field &in, Field &out) {
 | 
					    virtual void Mpc      (const Field &in, Field &out) {
 | 
				
			||||||
@@ -307,6 +323,8 @@ template<class Matrix,class Field>
 | 
				
			|||||||
 protected:
 | 
					 protected:
 | 
				
			||||||
    Matrix &_Mat;
 | 
					    Matrix &_Mat;
 | 
				
			||||||
 public:
 | 
					 public:
 | 
				
			||||||
 | 
					    virtual std::vector<int> Directions(void)   { return _Mat.Directions();};
 | 
				
			||||||
 | 
					    virtual std::vector<int> Displacements(void){ return _Mat.Displacements();};
 | 
				
			||||||
    SchurDiagTwoOperator (Matrix &Mat): _Mat(Mat){};
 | 
					    SchurDiagTwoOperator (Matrix &Mat): _Mat(Mat){};
 | 
				
			||||||
    
 | 
					    
 | 
				
			||||||
    virtual void Mpc      (const Field &in, Field &out) {
 | 
					    virtual void Mpc      (const Field &in, Field &out) {
 | 
				
			||||||
@@ -372,6 +390,8 @@ class NonHermitianSchurDiagMooeeOperator :  public NonHermitianSchurOperatorBase
 | 
				
			|||||||
{
 | 
					{
 | 
				
			||||||
 public:
 | 
					 public:
 | 
				
			||||||
  Matrix& _Mat;
 | 
					  Matrix& _Mat;
 | 
				
			||||||
 | 
					  virtual std::vector<int> Directions(void)   { return _Mat.Directions();};
 | 
				
			||||||
 | 
					  virtual std::vector<int> Displacements(void){ return _Mat.Displacements();};
 | 
				
			||||||
 NonHermitianSchurDiagMooeeOperator(Matrix& Mat): _Mat(Mat){};
 | 
					 NonHermitianSchurDiagMooeeOperator(Matrix& Mat): _Mat(Mat){};
 | 
				
			||||||
  virtual void Mpc(const Field& in, Field& out) {
 | 
					  virtual void Mpc(const Field& in, Field& out) {
 | 
				
			||||||
    Field tmp(in.Grid());
 | 
					    Field tmp(in.Grid());
 | 
				
			||||||
@@ -405,6 +425,8 @@ class NonHermitianSchurDiagOneOperator : public NonHermitianSchurOperatorBase<Fi
 | 
				
			|||||||
  Matrix &_Mat;
 | 
					  Matrix &_Mat;
 | 
				
			||||||
  
 | 
					  
 | 
				
			||||||
 public:
 | 
					 public:
 | 
				
			||||||
 | 
					  virtual std::vector<int> Directions(void)   { return _Mat.Directions();};
 | 
				
			||||||
 | 
					  virtual std::vector<int> Displacements(void){ return _Mat.Displacements();};
 | 
				
			||||||
  NonHermitianSchurDiagOneOperator (Matrix& Mat): _Mat(Mat){};
 | 
					  NonHermitianSchurDiagOneOperator (Matrix& Mat): _Mat(Mat){};
 | 
				
			||||||
  virtual void Mpc(const Field& in, Field& out) {
 | 
					  virtual void Mpc(const Field& in, Field& out) {
 | 
				
			||||||
    Field tmp(in.Grid());
 | 
					    Field tmp(in.Grid());
 | 
				
			||||||
@@ -435,6 +457,8 @@ class NonHermitianSchurDiagTwoOperator : public NonHermitianSchurOperatorBase<Fi
 | 
				
			|||||||
  Matrix& _Mat;
 | 
					  Matrix& _Mat;
 | 
				
			||||||
  
 | 
					  
 | 
				
			||||||
 public:
 | 
					 public:
 | 
				
			||||||
 | 
					  virtual std::vector<int> Directions(void)   { return _Mat.Directions();};
 | 
				
			||||||
 | 
					  virtual std::vector<int> Displacements(void){ return _Mat.Displacements();};
 | 
				
			||||||
 NonHermitianSchurDiagTwoOperator(Matrix& Mat): _Mat(Mat){};
 | 
					 NonHermitianSchurDiagTwoOperator(Matrix& Mat): _Mat(Mat){};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  virtual void Mpc(const Field& in, Field& out) {
 | 
					  virtual void Mpc(const Field& in, Field& out) {
 | 
				
			||||||
@@ -475,6 +499,8 @@ class SchurStaggeredOperator :  public SchurOperatorBase<Field> {
 | 
				
			|||||||
  Field tmp;
 | 
					  Field tmp;
 | 
				
			||||||
  RealD mass;
 | 
					  RealD mass;
 | 
				
			||||||
 public:
 | 
					 public:
 | 
				
			||||||
 | 
					  virtual std::vector<int> Directions(void)   { return _Mat.Directions();};
 | 
				
			||||||
 | 
					  virtual std::vector<int> Displacements(void){ return _Mat.Displacements();};
 | 
				
			||||||
  SchurStaggeredOperator (Matrix &Mat): _Mat(Mat), tmp(_Mat.RedBlackGrid()) 
 | 
					  SchurStaggeredOperator (Matrix &Mat): _Mat(Mat), tmp(_Mat.RedBlackGrid()) 
 | 
				
			||||||
  { 
 | 
					  { 
 | 
				
			||||||
    assert( _Mat.isTrivialEE() );
 | 
					    assert( _Mat.isTrivialEE() );
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -48,6 +48,8 @@ public:
 | 
				
			|||||||
  virtual  void Mdiag    (const Field &in, Field &out)=0;
 | 
					  virtual  void Mdiag    (const Field &in, Field &out)=0;
 | 
				
			||||||
  virtual  void Mdir     (const Field &in, Field &out,int dir, int disp)=0;
 | 
					  virtual  void Mdir     (const Field &in, Field &out,int dir, int disp)=0;
 | 
				
			||||||
  virtual  void MdirAll  (const Field &in, std::vector<Field> &out)=0;
 | 
					  virtual  void MdirAll  (const Field &in, std::vector<Field> &out)=0;
 | 
				
			||||||
 | 
					  virtual std::vector<int> Directions(void)   =0;
 | 
				
			||||||
 | 
					  virtual std::vector<int> Displacements(void)=0;
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/////////////////////////////////////////////////////////////////////////////////////////////
 | 
					/////////////////////////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
@@ -73,6 +75,8 @@ public:
 | 
				
			|||||||
  virtual  void MooeeDag    (const Field &in, Field &out)=0;
 | 
					  virtual  void MooeeDag    (const Field &in, Field &out)=0;
 | 
				
			||||||
  virtual  void MooeeInvDag (const Field &in, Field &out)=0;
 | 
					  virtual  void MooeeInvDag (const Field &in, Field &out)=0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  virtual std::vector<int> Directions(void)   =0;
 | 
				
			||||||
 | 
					  virtual std::vector<int> Displacements(void)=0;
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
NAMESPACE_END(Grid);
 | 
					NAMESPACE_END(Grid);
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -1,4 +1,4 @@
 | 
				
			|||||||
    /*************************************************************************************
 | 
					   /*************************************************************************************
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
					    Grid physics library, www.github.com/paboyle/Grid 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -28,6 +28,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
				
			|||||||
#ifndef GRID_ALGORITHMS_ITERATIVE_GENERIC_PCG
 | 
					#ifndef GRID_ALGORITHMS_ITERATIVE_GENERIC_PCG
 | 
				
			||||||
#define GRID_ALGORITHMS_ITERATIVE_GENERIC_PCG
 | 
					#define GRID_ALGORITHMS_ITERATIVE_GENERIC_PCG
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					NAMESPACE_BEGIN(Grid);
 | 
				
			||||||
  /*
 | 
					  /*
 | 
				
			||||||
   * Compared to Tang-2009:  P=Pleft. P^T = PRight Q=MssInv. 
 | 
					   * Compared to Tang-2009:  P=Pleft. P^T = PRight Q=MssInv. 
 | 
				
			||||||
   * Script A = SolverMatrix 
 | 
					   * Script A = SolverMatrix 
 | 
				
			||||||
@@ -50,53 +51,54 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
				
			|||||||
   * Vout = x
 | 
					   * Vout = x
 | 
				
			||||||
   */
 | 
					   */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
// abstract base
 | 
					
 | 
				
			||||||
template<class Field, class CoarseField>
 | 
					template<class Field, class CoarseField, class Aggregates>
 | 
				
			||||||
class TwoLevelFlexiblePcg : public LinearFunction<Field>
 | 
					class TwoLevelFlexiblePcg : public LinearFunction<Field>
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
 public:
 | 
					 public:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  int verbose;
 | 
					  int verbose;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  RealD   Tolerance;
 | 
					  RealD   Tolerance;
 | 
				
			||||||
  Integer MaxIterations;
 | 
					  Integer MaxIterations;
 | 
				
			||||||
  const int mmax = 5;
 | 
					  const int mmax = 4;
 | 
				
			||||||
  GridBase *grid;
 | 
					  GridBase *FineGrid;
 | 
				
			||||||
  GridBase *coarsegrid;
 | 
					  GridBase *CoarseGrid;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  LinearOperatorBase<Field>   *_Linop
 | 
					  LinearOperatorBase<Field>   &_Linop;
 | 
				
			||||||
  OperatorFunction<Field>     *_Smoother,
 | 
					  LinearFunction<Field>     &_Smoother;
 | 
				
			||||||
  LinearFunction<CoarseField> *_CoarseSolver;
 | 
					  LinearFunction<CoarseField> &_CoarseSolver;
 | 
				
			||||||
 | 
					  Aggregates                  &_Aggregates;
 | 
				
			||||||
  // Need somthing that knows how to get from Coarse to fine and back again
 | 
					 | 
				
			||||||
  
 | 
					  
 | 
				
			||||||
  // more most opertor functions
 | 
					  // more most opertor functions
 | 
				
			||||||
  TwoLevelFlexiblePcg(RealD tol,
 | 
					  TwoLevelFlexiblePcg(RealD tol,
 | 
				
			||||||
		     Integer maxit,
 | 
							      Integer maxit,
 | 
				
			||||||
		     LinearOperatorBase<Field> *Linop,
 | 
							      LinearOperatorBase<Field> *Linop,
 | 
				
			||||||
		     LinearOperatorBase<Field> *SmootherLinop,
 | 
							      LinearFunction<Field>   *Smoother,
 | 
				
			||||||
		     OperatorFunction<Field>   *Smoother,
 | 
							      LinearFunction<CoarseField> *CoarseSolver,
 | 
				
			||||||
		     OperatorFunction<CoarseField>  CoarseLinop
 | 
							      Aggregates *AggP
 | 
				
			||||||
		     ) : 
 | 
							      ) : 
 | 
				
			||||||
      Tolerance(tol), 
 | 
					  Tolerance(tol), 
 | 
				
			||||||
      MaxIterations(maxit),
 | 
					    MaxIterations(maxit),
 | 
				
			||||||
      _Linop(Linop),
 | 
					    _Linop(*Linop),
 | 
				
			||||||
      _PreconditionerLinop(PrecLinop),
 | 
					    _Smoother(*Smoother),
 | 
				
			||||||
      _Preconditioner(Preconditioner)
 | 
					    _CoarseSolver(*CoarseSolver),
 | 
				
			||||||
 | 
					    _Aggregates(*AggP)
 | 
				
			||||||
  { 
 | 
					  { 
 | 
				
			||||||
 | 
					    CoarseGrid=_Aggregates.CoarseGrid;
 | 
				
			||||||
 | 
					    FineGrid=_Aggregates.FineGrid;
 | 
				
			||||||
    verbose=0;
 | 
					    verbose=0;
 | 
				
			||||||
  };
 | 
					  };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  // The Pcg routine is common to all, but the various matrices differ from derived 
 | 
					  // The Pcg routine is common to all, but the various matrices differ from derived 
 | 
				
			||||||
  // implementation to derived implmentation
 | 
					  // implementation to derived implmentation
 | 
				
			||||||
  void operator() (const Field &src, Field &psi){
 | 
					 | 
				
			||||||
  void operator() (const Field &src, Field &psi){
 | 
					  void operator() (const Field &src, Field &psi){
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    psi.Checkerboard() = src.Checkerboard();
 | 
					    psi.Checkerboard() = src.Checkerboard();
 | 
				
			||||||
    grid             = src.Grid();
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    RealD f;
 | 
					 | 
				
			||||||
    RealD rtzp,rtz,a,d,b;
 | 
					    RealD rtzp,rtz,a,d,b;
 | 
				
			||||||
    RealD rptzp;
 | 
					    //    RealD rptzp;
 | 
				
			||||||
    RealD tn;
 | 
					    //    RealD tn;
 | 
				
			||||||
    RealD guess = norm2(psi);
 | 
					    RealD guess = norm2(psi);
 | 
				
			||||||
    RealD ssq   = norm2(src);
 | 
					    RealD ssq   = norm2(src);
 | 
				
			||||||
    RealD rsq   = ssq*Tolerance*Tolerance;
 | 
					    RealD rsq   = ssq*Tolerance*Tolerance;
 | 
				
			||||||
@@ -104,15 +106,15 @@ class TwoLevelFlexiblePcg : public LinearFunction<Field>
 | 
				
			|||||||
    /////////////////////////////
 | 
					    /////////////////////////////
 | 
				
			||||||
    // Set up history vectors
 | 
					    // Set up history vectors
 | 
				
			||||||
    /////////////////////////////
 | 
					    /////////////////////////////
 | 
				
			||||||
    std::vector<Field> p  (mmax,grid);
 | 
					    std::vector<Field> p  (mmax,FineGrid);
 | 
				
			||||||
    std::vector<Field> mmp(mmax,grid);
 | 
					    std::vector<Field> mmp(mmax,FineGrid);
 | 
				
			||||||
    std::vector<RealD> pAp(mmax);
 | 
					    std::vector<RealD> pAp(mmax);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    Field x  (grid); x = psi;
 | 
					    Field x  (FineGrid); x = psi;
 | 
				
			||||||
    Field z  (grid);
 | 
					    Field z  (FineGrid);
 | 
				
			||||||
    Field tmp(grid);
 | 
					    Field tmp(FineGrid);
 | 
				
			||||||
    Field r  (grid);
 | 
					    Field r  (FineGrid);
 | 
				
			||||||
    Field mu (grid);
 | 
					    Field mu (FineGrid);
 | 
				
			||||||
  
 | 
					  
 | 
				
			||||||
    //////////////////////////
 | 
					    //////////////////////////
 | 
				
			||||||
    // x0 = Vstart -- possibly modify guess
 | 
					    // x0 = Vstart -- possibly modify guess
 | 
				
			||||||
@@ -121,13 +123,13 @@ class TwoLevelFlexiblePcg : public LinearFunction<Field>
 | 
				
			|||||||
    Vstart(x,src);
 | 
					    Vstart(x,src);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    // r0 = b -A x0
 | 
					    // r0 = b -A x0
 | 
				
			||||||
    HermOp(x,mmp); // Shouldn't this be something else?
 | 
					    _Linop.HermOp(x,mmp[0]); // Shouldn't this be something else?
 | 
				
			||||||
    axpy (r, -1.0,mmp[0], src);    // Recomputes r=src-Ax0
 | 
					    axpy (r, -1.0,mmp[0], src);    // Recomputes r=src-Ax0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    //////////////////////////////////
 | 
					    //////////////////////////////////
 | 
				
			||||||
    // Compute z = M1 x
 | 
					    // Compute z = M1 x
 | 
				
			||||||
    //////////////////////////////////
 | 
					    //////////////////////////////////
 | 
				
			||||||
    M1(r,z,tmp,mp,SmootherMirs);
 | 
					    M1(r,z);
 | 
				
			||||||
    rtzp =real(innerProduct(r,z));
 | 
					    rtzp =real(innerProduct(r,z));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    ///////////////////////////////////////
 | 
					    ///////////////////////////////////////
 | 
				
			||||||
@@ -143,7 +145,7 @@ class TwoLevelFlexiblePcg : public LinearFunction<Field>
 | 
				
			|||||||
      int peri_kp = (k+1) % mmax;
 | 
					      int peri_kp = (k+1) % mmax;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      rtz=rtzp;
 | 
					      rtz=rtzp;
 | 
				
			||||||
      d= M3(p[peri_k],mp,mmp[peri_k],tmp);
 | 
					      d= M3(p[peri_k],mmp[peri_k]);
 | 
				
			||||||
      a = rtz/d;
 | 
					      a = rtz/d;
 | 
				
			||||||
    
 | 
					    
 | 
				
			||||||
      // Memorise this
 | 
					      // Memorise this
 | 
				
			||||||
@@ -153,13 +155,13 @@ class TwoLevelFlexiblePcg : public LinearFunction<Field>
 | 
				
			|||||||
      RealD rn = axpy_norm(r,-a,mmp[peri_k],r);
 | 
					      RealD rn = axpy_norm(r,-a,mmp[peri_k],r);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      // Compute z = M x
 | 
					      // Compute z = M x
 | 
				
			||||||
      M1(r,z,tmp,mp);
 | 
					      M1(r,z);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      rtzp =real(innerProduct(r,z));
 | 
					      rtzp =real(innerProduct(r,z));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      M2(z,mu); // ADEF-2 this is identity. Axpy possible to eliminate
 | 
					      M2(z,mu); // ADEF-2 this is identity. Axpy possible to eliminate
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      p[peri_kp]=p[peri_k];
 | 
					      p[peri_kp]=mu;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      // Standard search direction  p -> z + b p    ; b = 
 | 
					      // Standard search direction  p -> z + b p    ; b = 
 | 
				
			||||||
      b = (rtzp)/rtz;
 | 
					      b = (rtzp)/rtz;
 | 
				
			||||||
@@ -181,7 +183,7 @@ class TwoLevelFlexiblePcg : public LinearFunction<Field>
 | 
				
			|||||||
      // Stopping condition
 | 
					      // Stopping condition
 | 
				
			||||||
      if ( rn <= rsq ) { 
 | 
					      if ( rn <= rsq ) { 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	HermOp(x,mmp); // Shouldn't this be something else?
 | 
						_Linop.HermOp(x,mmp[0]); // Shouldn't this be something else?
 | 
				
			||||||
	axpy(tmp,-1.0,src,mmp[0]);
 | 
						axpy(tmp,-1.0,src,mmp[0]);
 | 
				
			||||||
	
 | 
						
 | 
				
			||||||
	RealD psinorm = sqrt(norm2(x));
 | 
						RealD psinorm = sqrt(norm2(x));
 | 
				
			||||||
@@ -190,7 +192,8 @@ class TwoLevelFlexiblePcg : public LinearFunction<Field>
 | 
				
			|||||||
	RealD true_residual = tmpnorm/srcnorm;
 | 
						RealD true_residual = tmpnorm/srcnorm;
 | 
				
			||||||
	std::cout<<GridLogMessage<<"TwoLevelfPcg:   true residual is "<<true_residual<<std::endl;
 | 
						std::cout<<GridLogMessage<<"TwoLevelfPcg:   true residual is "<<true_residual<<std::endl;
 | 
				
			||||||
	std::cout<<GridLogMessage<<"TwoLevelfPcg: target residual was"<<Tolerance<<std::endl;
 | 
						std::cout<<GridLogMessage<<"TwoLevelfPcg: target residual was"<<Tolerance<<std::endl;
 | 
				
			||||||
	return k;
 | 
					
 | 
				
			||||||
 | 
						return;
 | 
				
			||||||
      }
 | 
					      }
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
    // Non-convergence
 | 
					    // Non-convergence
 | 
				
			||||||
@@ -199,48 +202,40 @@ class TwoLevelFlexiblePcg : public LinearFunction<Field>
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
 public:
 | 
					 public:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  virtual void M(Field & in,Field & out,Field & tmp) {
 | 
					  virtual void M1(Field & in, Field & out) 
 | 
				
			||||||
 | 
					  {// the smoother
 | 
				
			||||||
  }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  virtual void M1(Field & in, Field & out) {// the smoother
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    // [PTM+Q] in = [1 - Q A] M in + Q in = Min + Q [ in -A Min]
 | 
					    // [PTM+Q] in = [1 - Q A] M in + Q in = Min + Q [ in -A Min]
 | 
				
			||||||
    Field tmp(grid);
 | 
					    Field tmp(FineGrid);
 | 
				
			||||||
    Field Min(grid);
 | 
					    Field Min(FineGrid);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    PcgM(in,Min); // Smoother call
 | 
					    CoarseField PleftProj(CoarseGrid);
 | 
				
			||||||
 | 
					    CoarseField PleftMss_proj(CoarseGrid);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    HermOp(Min,out);
 | 
					    _Smoother(in,Min); // Smoother call
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    _Linop.HermOp(Min,out);
 | 
				
			||||||
    axpy(tmp,-1.0,out,in);          // tmp  = in - A Min
 | 
					    axpy(tmp,-1.0,out,in);          // tmp  = in - A Min
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    ProjectToSubspace(tmp,PleftProj);     
 | 
					    _Aggregates.ProjectToSubspace(PleftProj,tmp);     
 | 
				
			||||||
    ApplyInverse(PleftProj,PleftMss_proj); // Ass^{-1} [in - A Min]_s
 | 
					    _CoarseSolver(PleftProj,PleftMss_proj); // Ass^{-1} [in - A Min]_s
 | 
				
			||||||
    PromoteFromSubspace(PleftMss_proj,tmp);// tmp = Q[in - A Min]  
 | 
					    _Aggregates.PromoteFromSubspace(PleftMss_proj,tmp);// tmp = Q[in - A Min]  
 | 
				
			||||||
    axpy(out,1.0,Min,tmp); // Min+tmp
 | 
					    axpy(out,1.0,Min,tmp); // Min+tmp
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  virtual void M2(const Field & in, Field & out) {
 | 
					  virtual void M2(const Field & in, Field & out) 
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
    out=in;
 | 
					    out=in;
 | 
				
			||||||
    // Must override for Def2 only
 | 
					 | 
				
			||||||
    //  case PcgDef2:
 | 
					 | 
				
			||||||
    //    Pright(in,out);
 | 
					 | 
				
			||||||
    //    break;
 | 
					 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  virtual RealD M3(const Field & p, Field & mmp){
 | 
					  virtual RealD M3(const Field & p, Field & mmp)
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
    double d,dd;
 | 
					    double d,dd;
 | 
				
			||||||
    HermOpAndNorm(p,mmp,d,dd);
 | 
					    _Linop.HermOpAndNorm(p,mmp,d,dd);
 | 
				
			||||||
    return dd;
 | 
					    return dd;
 | 
				
			||||||
    // Must override for Def1 only
 | 
					 | 
				
			||||||
    //  case PcgDef1:
 | 
					 | 
				
			||||||
    //    d=linop_d->Mprec(p,mmp,tmp,0,1);// Dag no
 | 
					 | 
				
			||||||
    //      linop_d->Mprec(mmp,mp,tmp,1);// Dag yes
 | 
					 | 
				
			||||||
    //    Pleft(mp,mmp);
 | 
					 | 
				
			||||||
    //    d=real(linop_d->inner(p,mmp));
 | 
					 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  virtual void VstartDef2(Field & xconst Field & src){
 | 
					  virtual void Vstart(Field & x,const Field & src)
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
    //case PcgDef2:
 | 
					    //case PcgDef2:
 | 
				
			||||||
    //case PcgAdef2: 
 | 
					    //case PcgAdef2: 
 | 
				
			||||||
    //case PcgAdef2f:
 | 
					    //case PcgAdef2f:
 | 
				
			||||||
@@ -256,142 +251,79 @@ class TwoLevelFlexiblePcg : public LinearFunction<Field>
 | 
				
			|||||||
    //                   = src_s - (A guess)_s - src_s  + (A guess)_s 
 | 
					    //                   = src_s - (A guess)_s - src_s  + (A guess)_s 
 | 
				
			||||||
    //                   = 0 
 | 
					    //                   = 0 
 | 
				
			||||||
    ///////////////////////////////////
 | 
					    ///////////////////////////////////
 | 
				
			||||||
    Field r(grid);
 | 
					    Field r(FineGrid);
 | 
				
			||||||
    Field mmp(grid);
 | 
					    Field mmp(FineGrid);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    CoarseField PleftProj(CoarseGrid);
 | 
				
			||||||
 | 
					    CoarseField PleftMss_proj(CoarseGrid);
 | 
				
			||||||
    
 | 
					    
 | 
				
			||||||
    HermOp(x,mmp);
 | 
					    _Linop.HermOp(x,mmp);
 | 
				
			||||||
    axpy (r, -1.0, mmp, src);        // r_{-1} = src - A x
 | 
					    axpy (r, -1.0, mmp, src);        // r_{-1} = src - A x
 | 
				
			||||||
    ProjectToSubspace(r,PleftProj);     
 | 
					    _Aggregates.ProjectToSubspace(PleftProj,r);     
 | 
				
			||||||
    ApplyInverseCG(PleftProj,PleftMss_proj); // Ass^{-1} r_s
 | 
					    _CoarseSolver(PleftProj,PleftMss_proj); // Ass^{-1} r_s
 | 
				
			||||||
    PromoteFromSubspace(PleftMss_proj,mmp);  
 | 
					    _Aggregates.PromoteFromSubspace(PleftMss_proj,mmp);  
 | 
				
			||||||
    x=x+mmp;
 | 
					    x=x+mmp;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  virtual void Vstart(Field & x,const Field & src){
 | 
					 | 
				
			||||||
    return;
 | 
					 | 
				
			||||||
  }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  /////////////////////////////////////////////////////////////////////
 | 
					  /////////////////////////////////////////////////////////////////////
 | 
				
			||||||
  // Only Def1 has non-trivial Vout. Override in Def1
 | 
					  // Only Def1 has non-trivial Vout. Override in Def1
 | 
				
			||||||
  /////////////////////////////////////////////////////////////////////
 | 
					  /////////////////////////////////////////////////////////////////////
 | 
				
			||||||
  virtual void   Vout  (Field & in, Field & out,Field & src){
 | 
					  virtual void   Vout  (Field & in, Field & out,Field & src){
 | 
				
			||||||
    out = in;
 | 
					    out = in;
 | 
				
			||||||
    //case PcgDef1:
 | 
					 | 
				
			||||||
    //    //Qb + PT x
 | 
					 | 
				
			||||||
    //    ProjectToSubspace(src,PleftProj);     
 | 
					 | 
				
			||||||
    //    ApplyInverse(PleftProj,PleftMss_proj); // Ass^{-1} r_s
 | 
					 | 
				
			||||||
    //    PromoteFromSubspace(PleftMss_proj,tmp);  
 | 
					 | 
				
			||||||
    //    
 | 
					 | 
				
			||||||
    //    Pright(in,out);
 | 
					 | 
				
			||||||
    //    
 | 
					 | 
				
			||||||
    //    linop_d->axpy(out,tmp,out,1.0);
 | 
					 | 
				
			||||||
    //    break;
 | 
					 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  ////////////////////////////////////////////////////////////////////////////////////////////////
 | 
					  ////////////////////////////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
  // Pright and Pleft are common to all implementations
 | 
					  // Pright and Pleft are common to all implementations
 | 
				
			||||||
  ////////////////////////////////////////////////////////////////////////////////////////////////
 | 
					  ////////////////////////////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
  virtual void Pright(Field & in,Field & out){
 | 
					  virtual void Pright(Field & in,Field & out)
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
    // P_R  = [ 1              0 ] 
 | 
					    // P_R  = [ 1              0 ] 
 | 
				
			||||||
    //        [ -Mss^-1 Msb    0 ] 
 | 
					    //        [ -Mss^-1 Msb    0 ] 
 | 
				
			||||||
    Field in_sbar(grid);
 | 
					    Field in_sbar(FineGrid);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    ProjectToSubspace(in,PleftProj);     
 | 
					    CoarseField PleftProj(CoarseGrid);
 | 
				
			||||||
    PromoteFromSubspace(PleftProj,out);  
 | 
					    CoarseField PleftMss_proj(CoarseGrid);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    _Aggregates.ProjectToSubspace(PleftProj,in);     
 | 
				
			||||||
 | 
					    _Aggregates.PromoteFromSubspace(PleftProj,out);  
 | 
				
			||||||
    axpy(in_sbar,-1.0,out,in);       // in_sbar = in - in_s 
 | 
					    axpy(in_sbar,-1.0,out,in);       // in_sbar = in - in_s 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    HermOp(in_sbar,out);
 | 
					    _Linop.HermOp(in_sbar,out);
 | 
				
			||||||
    ProjectToSubspace(out,PleftProj);           // Mssbar in_sbar  (project)
 | 
					    _Aggregates.ProjectToSubspace(PleftProj,out);           // Mssbar in_sbar  (project)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    ApplyInverse     (PleftProj,PleftMss_proj); // Mss^{-1} Mssbar 
 | 
					    _CoarseSolver(PleftProj,PleftMss_proj); // Mss^{-1} Mssbar 
 | 
				
			||||||
    PromoteFromSubspace(PleftMss_proj,out);     // 
 | 
					    _Aggregates.PromoteFromSubspace(PleftMss_proj,out);     // 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    axpy(out,-1.0,out,in_sbar);     // in_sbar - Mss^{-1} Mssbar in_sbar
 | 
					    axpy(out,-1.0,out,in_sbar);     // in_sbar - Mss^{-1} Mssbar in_sbar
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
  virtual void Pleft (Field & in,Field & out){
 | 
					  virtual void Pleft (Field & in,Field & out)
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
    // P_L  = [ 1  -Mbs Mss^-1] 
 | 
					    // P_L  = [ 1  -Mbs Mss^-1] 
 | 
				
			||||||
    //        [ 0   0         ] 
 | 
					    //        [ 0   0         ] 
 | 
				
			||||||
    Field in_sbar(grid);
 | 
					    Field in_sbar(FineGrid);
 | 
				
			||||||
    Field    tmp2(grid);
 | 
					    Field    tmp2(FineGrid);
 | 
				
			||||||
    Field    Mtmp(grid);
 | 
					    Field    Mtmp(FineGrid);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    ProjectToSubspace(in,PleftProj);     
 | 
					    CoarseField PleftProj(CoarseGrid);
 | 
				
			||||||
    PromoteFromSubspace(PleftProj,out);  
 | 
					    CoarseField PleftMss_proj(CoarseGrid);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    _Aggregates.ProjectToSubspace(PleftProj,in);     
 | 
				
			||||||
 | 
					    _Aggregates.PromoteFromSubspace(PleftProj,out);  
 | 
				
			||||||
    axpy(in_sbar,-1.0,out,in);      // in_sbar = in - in_s
 | 
					    axpy(in_sbar,-1.0,out,in);      // in_sbar = in - in_s
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    ApplyInverse(PleftProj,PleftMss_proj); // Mss^{-1} in_s
 | 
					    _CoarseSolver(PleftProj,PleftMss_proj); // Mss^{-1} in_s
 | 
				
			||||||
    PromoteFromSubspace(PleftMss_proj,out);
 | 
					    _Aggregates.PromoteFromSubspace(PleftMss_proj,out);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    HermOp(out,Mtmp);
 | 
					    _Linop.HermOp(out,Mtmp);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    ProjectToSubspace(Mtmp,PleftProj);      // Msbar s Mss^{-1}
 | 
					    _Aggregates.ProjectToSubspace(PleftProj,Mtmp);      // Msbar s Mss^{-1}
 | 
				
			||||||
    PromoteFromSubspace(PleftProj,tmp2);
 | 
					    _Aggregates.PromoteFromSubspace(PleftProj,tmp2);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    axpy(out,-1.0,tmp2,Mtmp);
 | 
					    axpy(out,-1.0,tmp2,Mtmp);
 | 
				
			||||||
    axpy(out,-1.0,out,in_sbar);     // in_sbar - Msbars Mss^{-1} in_s
 | 
					    axpy(out,-1.0,out,in_sbar);     // in_sbar - Msbars Mss^{-1} in_s
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
}
 | 
					};
 | 
				
			||||||
 | 
					NAMESPACE_END(Grid);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
template<class Field>
 | 
					 | 
				
			||||||
class TwoLevelFlexiblePcgADef2 : public TwoLevelFlexiblePcg<Field> {
 | 
					 | 
				
			||||||
 public:
 | 
					 | 
				
			||||||
  virtual void M(Field & in,Field & out,Field & tmp){
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  } 
 | 
					 | 
				
			||||||
  virtual void M1(Field & in, Field & out,Field & tmp,Field & mp){
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  }
 | 
					 | 
				
			||||||
  virtual void M2(Field & in, Field & out){
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  }
 | 
					 | 
				
			||||||
  virtual RealD M3(Field & p, Field & mp,Field & mmp, Field & tmp){
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  }
 | 
					 | 
				
			||||||
  virtual void Vstart(Field & in, Field & src, Field & r, Field & mp, Field & mmp, Field & tmp){
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  }
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
/*
 | 
					 | 
				
			||||||
template<class Field>
 | 
					 | 
				
			||||||
class TwoLevelFlexiblePcgAD : public TwoLevelFlexiblePcg<Field> {
 | 
					 | 
				
			||||||
 public:
 | 
					 | 
				
			||||||
  virtual void M(Field & in,Field & out,Field & tmp); 
 | 
					 | 
				
			||||||
  virtual void M1(Field & in, Field & out,Field & tmp,Field & mp);
 | 
					 | 
				
			||||||
  virtual void M2(Field & in, Field & out);
 | 
					 | 
				
			||||||
  virtual RealD M3(Field & p, Field & mp,Field & mmp, Field & tmp);
 | 
					 | 
				
			||||||
  virtual void Vstart(Field & in, Field & src, Field & r, Field & mp, Field & mmp, Field & tmp);
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
template<class Field>
 | 
					 | 
				
			||||||
class TwoLevelFlexiblePcgDef1 : public TwoLevelFlexiblePcg<Field> {
 | 
					 | 
				
			||||||
 public:
 | 
					 | 
				
			||||||
  virtual void M(Field & in,Field & out,Field & tmp); 
 | 
					 | 
				
			||||||
  virtual void M1(Field & in, Field & out,Field & tmp,Field & mp);
 | 
					 | 
				
			||||||
  virtual void M2(Field & in, Field & out);
 | 
					 | 
				
			||||||
  virtual RealD M3(Field & p, Field & mp,Field & mmp, Field & tmp);
 | 
					 | 
				
			||||||
  virtual void Vstart(Field & in, Field & src, Field & r, Field & mp, Field & mmp, Field & tmp);
 | 
					 | 
				
			||||||
  virtual void   Vout  (Field & in, Field & out,Field & src,Field & tmp);
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
template<class Field>
 | 
					 | 
				
			||||||
class TwoLevelFlexiblePcgDef2 : public TwoLevelFlexiblePcg<Field> {
 | 
					 | 
				
			||||||
 public:
 | 
					 | 
				
			||||||
  virtual void M(Field & in,Field & out,Field & tmp); 
 | 
					 | 
				
			||||||
  virtual void M1(Field & in, Field & out,Field & tmp,Field & mp);
 | 
					 | 
				
			||||||
  virtual void M2(Field & in, Field & out);
 | 
					 | 
				
			||||||
  virtual RealD M3(Field & p, Field & mp,Field & mmp, Field & tmp);
 | 
					 | 
				
			||||||
  virtual void Vstart(Field & in, Field & src, Field & r, Field & mp, Field & mmp, Field & tmp);
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
template<class Field>
 | 
					 | 
				
			||||||
class TwoLevelFlexiblePcgV11: public TwoLevelFlexiblePcg<Field> {
 | 
					 | 
				
			||||||
 public:
 | 
					 | 
				
			||||||
  virtual void M(Field & in,Field & out,Field & tmp); 
 | 
					 | 
				
			||||||
  virtual void M1(Field & in, Field & out,Field & tmp,Field & mp);
 | 
					 | 
				
			||||||
  virtual void M2(Field & in, Field & out);
 | 
					 | 
				
			||||||
  virtual RealD M3(Field & p, Field & mp,Field & mmp, Field & tmp);
 | 
					 | 
				
			||||||
  virtual void Vstart(Field & in, Field & src, Field & r, Field & mp, Field & mmp, Field & tmp);
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
*/
 | 
					 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -60,6 +60,8 @@ public:
 | 
				
			|||||||
  DeflatedGuesser(const std::vector<Field> & _evec,const std::vector<RealD> & _eval) : evec(_evec), eval(_eval) {};
 | 
					  DeflatedGuesser(const std::vector<Field> & _evec,const std::vector<RealD> & _eval) : evec(_evec), eval(_eval) {};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  virtual void operator()(const Field &src,Field &guess) {
 | 
					  virtual void operator()(const Field &src,Field &guess) {
 | 
				
			||||||
 | 
					    RealD t=-usecond();
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
    guess = Zero();
 | 
					    guess = Zero();
 | 
				
			||||||
    assert(evec.size()==eval.size());
 | 
					    assert(evec.size()==eval.size());
 | 
				
			||||||
    auto N = evec.size();
 | 
					    auto N = evec.size();
 | 
				
			||||||
@@ -68,6 +70,8 @@ public:
 | 
				
			|||||||
      axpy(guess,TensorRemove(innerProduct(tmp,src)) / eval[i],tmp,guess);
 | 
					      axpy(guess,TensorRemove(innerProduct(tmp,src)) / eval[i],tmp,guess);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
    guess.Checkerboard() = src.Checkerboard();
 | 
					    guess.Checkerboard() = src.Checkerboard();
 | 
				
			||||||
 | 
					    t+=usecond();
 | 
				
			||||||
 | 
					    std::cout<<GridLogMessage<<"\t\t\t" << "Deflated guess took "<< t/1000.0<< "ms" <<std::endl;
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -59,7 +59,7 @@ public:
 | 
				
			|||||||
    GridBase *grid = src.Grid();
 | 
					    GridBase *grid = src.Grid();
 | 
				
			||||||
    Field r(grid),  p(grid), Ap(grid), Ar(grid), z(grid);
 | 
					    Field r(grid),  p(grid), Ap(grid), Ar(grid), z(grid);
 | 
				
			||||||
      
 | 
					      
 | 
				
			||||||
    psi=zero;
 | 
					    psi=Zero();
 | 
				
			||||||
    r  = src;
 | 
					    r  = src;
 | 
				
			||||||
    Preconditioner(r,p);
 | 
					    Preconditioner(r,p);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -53,7 +53,11 @@ public:
 | 
				
			|||||||
  { 
 | 
					  { 
 | 
				
			||||||
    size_type bytes = __n*sizeof(_Tp);
 | 
					    size_type bytes = __n*sizeof(_Tp);
 | 
				
			||||||
    profilerAllocate(bytes);
 | 
					    profilerAllocate(bytes);
 | 
				
			||||||
 | 
					#ifdef GRID_UVM
 | 
				
			||||||
 | 
					    _Tp *ptr = (_Tp*) MemoryManager::SharedAllocate(bytes);
 | 
				
			||||||
 | 
					#else 
 | 
				
			||||||
    _Tp *ptr = (_Tp*) MemoryManager::CpuAllocate(bytes);
 | 
					    _Tp *ptr = (_Tp*) MemoryManager::CpuAllocate(bytes);
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
    assert( ( (_Tp*)ptr != (_Tp *)NULL ) );
 | 
					    assert( ( (_Tp*)ptr != (_Tp *)NULL ) );
 | 
				
			||||||
    return ptr;
 | 
					    return ptr;
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
@@ -62,7 +66,11 @@ public:
 | 
				
			|||||||
  { 
 | 
					  { 
 | 
				
			||||||
    size_type bytes = __n * sizeof(_Tp);
 | 
					    size_type bytes = __n * sizeof(_Tp);
 | 
				
			||||||
    profilerFree(bytes);
 | 
					    profilerFree(bytes);
 | 
				
			||||||
 | 
					#ifdef GRID_UVM
 | 
				
			||||||
 | 
					    MemoryManager::SharedFree((void *)__p,bytes);
 | 
				
			||||||
 | 
					#else
 | 
				
			||||||
    MemoryManager::CpuFree((void *)__p,bytes);
 | 
					    MemoryManager::CpuFree((void *)__p,bytes);
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  // FIXME: hack for the copy constructor: it must be avoided to avoid single thread loop
 | 
					  // FIXME: hack for the copy constructor: it must be avoided to avoid single thread loop
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -9,11 +9,13 @@ NAMESPACE_BEGIN(Grid);
 | 
				
			|||||||
#define AccSmall (3)
 | 
					#define AccSmall (3)
 | 
				
			||||||
#define Shared   (4)
 | 
					#define Shared   (4)
 | 
				
			||||||
#define SharedSmall (5)
 | 
					#define SharedSmall (5)
 | 
				
			||||||
 | 
					uint64_t total_cache;
 | 
				
			||||||
uint64_t total_shared;
 | 
					uint64_t total_shared;
 | 
				
			||||||
uint64_t total_device;
 | 
					uint64_t total_device;
 | 
				
			||||||
uint64_t total_host;;
 | 
					uint64_t total_host;;
 | 
				
			||||||
void MemoryManager::PrintBytes(void)
 | 
					void MemoryManager::PrintBytes(void)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
 | 
					  std::cout << " MemoryManager : "<<total_cache <<" cache       bytes "<<std::endl;
 | 
				
			||||||
  std::cout << " MemoryManager : "<<total_shared<<" shared      bytes "<<std::endl;
 | 
					  std::cout << " MemoryManager : "<<total_shared<<" shared      bytes "<<std::endl;
 | 
				
			||||||
  std::cout << " MemoryManager : "<<total_device<<" accelerator bytes "<<std::endl;
 | 
					  std::cout << " MemoryManager : "<<total_device<<" accelerator bytes "<<std::endl;
 | 
				
			||||||
  std::cout << " MemoryManager : "<<total_host  <<" cpu         bytes "<<std::endl;
 | 
					  std::cout << " MemoryManager : "<<total_host  <<" cpu         bytes "<<std::endl;
 | 
				
			||||||
@@ -35,6 +37,8 @@ void *MemoryManager::AcceleratorAllocate(size_t bytes)
 | 
				
			|||||||
  if ( ptr == (void *) NULL ) {
 | 
					  if ( ptr == (void *) NULL ) {
 | 
				
			||||||
    ptr = (void *) acceleratorAllocDevice(bytes);
 | 
					    ptr = (void *) acceleratorAllocDevice(bytes);
 | 
				
			||||||
    total_device+=bytes;
 | 
					    total_device+=bytes;
 | 
				
			||||||
 | 
					  } else {
 | 
				
			||||||
 | 
					    //    std::cout <<"AcceleratorAllocate: cache hit Device pointer "<<std::hex<<ptr<<std::dec<<" "<<bytes<<std::endl;
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
  return ptr;
 | 
					  return ptr;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
@@ -53,8 +57,10 @@ void *MemoryManager::SharedAllocate(size_t bytes)
 | 
				
			|||||||
  if ( ptr == (void *) NULL ) {
 | 
					  if ( ptr == (void *) NULL ) {
 | 
				
			||||||
    ptr = (void *) acceleratorAllocShared(bytes);
 | 
					    ptr = (void *) acceleratorAllocShared(bytes);
 | 
				
			||||||
    total_shared+=bytes;
 | 
					    total_shared+=bytes;
 | 
				
			||||||
    //    std::cout <<"AcceleratorAllocate: allocated Shared pointer "<<std::hex<<ptr<<std::dec<<std::endl;
 | 
					    //    std::cout <<"SharedAllocate: allocated Shared pointer "<<std::hex<<ptr<<std::dec<<std::endl;
 | 
				
			||||||
    //    PrintBytes();
 | 
					    //    PrintBytes();
 | 
				
			||||||
 | 
					  } else {
 | 
				
			||||||
 | 
					    //    std::cout <<"SharedAllocate: cache hit Shared pointer "<<std::hex<<ptr<<std::dec<<" "<<bytes<<std::endl;
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
  return ptr;
 | 
					  return ptr;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
@@ -74,6 +80,9 @@ void *MemoryManager::CpuAllocate(size_t bytes)
 | 
				
			|||||||
  if ( ptr == (void *) NULL ) {
 | 
					  if ( ptr == (void *) NULL ) {
 | 
				
			||||||
    ptr = (void *) acceleratorAllocShared(bytes);
 | 
					    ptr = (void *) acceleratorAllocShared(bytes);
 | 
				
			||||||
    total_host+=bytes;
 | 
					    total_host+=bytes;
 | 
				
			||||||
 | 
					    //    std::cout <<"CpuAllocate: allocated Cpu pointer "<<std::hex<<ptr<<std::dec<<std::endl;
 | 
				
			||||||
 | 
					  } else {
 | 
				
			||||||
 | 
					    //    std::cout <<"CpufAllocate: cache hit Cpu pointer "<<std::hex<<ptr<<std::dec<<" "<<bytes<<std::endl;
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
  return ptr;
 | 
					  return ptr;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
@@ -120,7 +129,7 @@ void MemoryManager::Init(void)
 | 
				
			|||||||
  str= getenv("GRID_ALLOC_NCACHE_LARGE");
 | 
					  str= getenv("GRID_ALLOC_NCACHE_LARGE");
 | 
				
			||||||
  if ( str ) {
 | 
					  if ( str ) {
 | 
				
			||||||
    Nc = atoi(str);
 | 
					    Nc = atoi(str);
 | 
				
			||||||
    if ( (Nc>=0) && (Nc < NallocCacheMax)) {
 | 
					    if ( (Nc>=0) && (Nc <= NallocCacheMax)) {
 | 
				
			||||||
      Ncache[Cpu]=Nc;
 | 
					      Ncache[Cpu]=Nc;
 | 
				
			||||||
      Ncache[Acc]=Nc;
 | 
					      Ncache[Acc]=Nc;
 | 
				
			||||||
      Ncache[Shared]=Nc;
 | 
					      Ncache[Shared]=Nc;
 | 
				
			||||||
@@ -130,7 +139,7 @@ void MemoryManager::Init(void)
 | 
				
			|||||||
  str= getenv("GRID_ALLOC_NCACHE_SMALL");
 | 
					  str= getenv("GRID_ALLOC_NCACHE_SMALL");
 | 
				
			||||||
  if ( str ) {
 | 
					  if ( str ) {
 | 
				
			||||||
    Nc = atoi(str);
 | 
					    Nc = atoi(str);
 | 
				
			||||||
    if ( (Nc>=0) && (Nc < NallocCacheMax)) {
 | 
					    if ( (Nc>=0) && (Nc <= NallocCacheMax)) {
 | 
				
			||||||
      Ncache[CpuSmall]=Nc;
 | 
					      Ncache[CpuSmall]=Nc;
 | 
				
			||||||
      Ncache[AccSmall]=Nc;
 | 
					      Ncache[AccSmall]=Nc;
 | 
				
			||||||
      Ncache[SharedSmall]=Nc;
 | 
					      Ncache[SharedSmall]=Nc;
 | 
				
			||||||
@@ -211,6 +220,7 @@ void *MemoryManager::Insert(void *ptr,size_t bytes,AllocationCacheEntry *entries
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
  if ( entries[v].valid ) {
 | 
					  if ( entries[v].valid ) {
 | 
				
			||||||
    ret = entries[v].address;
 | 
					    ret = entries[v].address;
 | 
				
			||||||
 | 
					    total_cache-=entries[v].bytes;
 | 
				
			||||||
    entries[v].valid = 0;
 | 
					    entries[v].valid = 0;
 | 
				
			||||||
    entries[v].address = NULL;
 | 
					    entries[v].address = NULL;
 | 
				
			||||||
    entries[v].bytes = 0;
 | 
					    entries[v].bytes = 0;
 | 
				
			||||||
@@ -219,6 +229,7 @@ void *MemoryManager::Insert(void *ptr,size_t bytes,AllocationCacheEntry *entries
 | 
				
			|||||||
  entries[v].address=ptr;
 | 
					  entries[v].address=ptr;
 | 
				
			||||||
  entries[v].bytes  =bytes;
 | 
					  entries[v].bytes  =bytes;
 | 
				
			||||||
  entries[v].valid  =1;
 | 
					  entries[v].valid  =1;
 | 
				
			||||||
 | 
					  total_cache+=entries[v].bytes;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  return ret;
 | 
					  return ret;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
@@ -243,6 +254,7 @@ void *MemoryManager::Lookup(size_t bytes,AllocationCacheEntry *entries,int ncach
 | 
				
			|||||||
  for(int e=0;e<ncache;e++){
 | 
					  for(int e=0;e<ncache;e++){
 | 
				
			||||||
    if ( entries[e].valid && ( entries[e].bytes == bytes ) ) {
 | 
					    if ( entries[e].valid && ( entries[e].bytes == bytes ) ) {
 | 
				
			||||||
      entries[e].valid = 0;
 | 
					      entries[e].valid = 0;
 | 
				
			||||||
 | 
					      total_cache-=bytes;
 | 
				
			||||||
      return entries[e].address;
 | 
					      return entries[e].address;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -93,8 +93,8 @@ private:
 | 
				
			|||||||
  static void *Insert(void *ptr,size_t bytes,AllocationCacheEntry *entries,int ncache,int &victim) ;
 | 
					  static void *Insert(void *ptr,size_t bytes,AllocationCacheEntry *entries,int ncache,int &victim) ;
 | 
				
			||||||
  static void *Lookup(size_t bytes,AllocationCacheEntry *entries,int ncache) ;
 | 
					  static void *Lookup(size_t bytes,AllocationCacheEntry *entries,int ncache) ;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  static void PrintBytes(void);
 | 
					 | 
				
			||||||
 public:
 | 
					 public:
 | 
				
			||||||
 | 
					  static void PrintBytes(void);
 | 
				
			||||||
  static void Init(void);
 | 
					  static void Init(void);
 | 
				
			||||||
  static void InitMessage(void);
 | 
					  static void InitMessage(void);
 | 
				
			||||||
  static void *AcceleratorAllocate(size_t bytes);
 | 
					  static void *AcceleratorAllocate(size_t bytes);
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -36,7 +36,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
				
			|||||||
#include <Grid/lattice/Lattice_local.h>
 | 
					#include <Grid/lattice/Lattice_local.h>
 | 
				
			||||||
#include <Grid/lattice/Lattice_reduction.h>
 | 
					#include <Grid/lattice/Lattice_reduction.h>
 | 
				
			||||||
#include <Grid/lattice/Lattice_peekpoke.h>
 | 
					#include <Grid/lattice/Lattice_peekpoke.h>
 | 
				
			||||||
//#include <Grid/lattice/Lattice_reality.h>
 | 
					#include <Grid/lattice/Lattice_reality.h>
 | 
				
			||||||
#include <Grid/lattice/Lattice_real_imag.h>
 | 
					#include <Grid/lattice/Lattice_real_imag.h>
 | 
				
			||||||
#include <Grid/lattice/Lattice_comparison_utils.h>
 | 
					#include <Grid/lattice/Lattice_comparison_utils.h>
 | 
				
			||||||
#include <Grid/lattice/Lattice_comparison.h>
 | 
					#include <Grid/lattice/Lattice_comparison.h>
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -52,6 +52,7 @@ public:
 | 
				
			|||||||
// This will be safe to call from accelerator_for and is trivially copy constructible
 | 
					// This will be safe to call from accelerator_for and is trivially copy constructible
 | 
				
			||||||
// The copy constructor for this will need to be used by device lambda functions
 | 
					// The copy constructor for this will need to be used by device lambda functions
 | 
				
			||||||
/////////////////////////////////////////////////////////////////////////////////////////
 | 
					/////////////////////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					#undef LATTICE_BOUNDS_CHECK
 | 
				
			||||||
template<class vobj> 
 | 
					template<class vobj> 
 | 
				
			||||||
class LatticeView : public LatticeAccelerator<vobj>
 | 
					class LatticeView : public LatticeAccelerator<vobj>
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
@@ -61,14 +62,36 @@ public:
 | 
				
			|||||||
  void * cpu_ptr;
 | 
					  void * cpu_ptr;
 | 
				
			||||||
#ifdef GRID_SIMT
 | 
					#ifdef GRID_SIMT
 | 
				
			||||||
  accelerator_inline const typename vobj::scalar_object operator()(size_t i) const { 
 | 
					  accelerator_inline const typename vobj::scalar_object operator()(size_t i) const { 
 | 
				
			||||||
 | 
					#ifdef LATTICE_BOUNDS_CHECK
 | 
				
			||||||
 | 
					    assert(i<this->_odata_size);
 | 
				
			||||||
 | 
					    assert(i>=0);
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
    return coalescedRead(this->_odata[i]); 
 | 
					    return coalescedRead(this->_odata[i]); 
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
#else 
 | 
					#else 
 | 
				
			||||||
  accelerator_inline const vobj & operator()(size_t i) const { return this->_odata[i]; }
 | 
					  accelerator_inline const vobj & operator()(size_t i) const {
 | 
				
			||||||
 | 
					#ifdef LATTICE_BOUNDS_CHECK
 | 
				
			||||||
 | 
					    assert(i<this->_odata_size);
 | 
				
			||||||
 | 
					    assert(i>=0);
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					    return this->_odata[i];
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  accelerator_inline const vobj & operator[](size_t i) const { return this->_odata[i]; };
 | 
					  accelerator_inline const vobj & operator[](size_t i) const { 
 | 
				
			||||||
  accelerator_inline vobj       & operator[](size_t i)       { return this->_odata[i]; };
 | 
					#ifdef LATTICE_BOUNDS_CHECK
 | 
				
			||||||
 | 
					    assert(i<this->_odata_size);
 | 
				
			||||||
 | 
					    assert(i>=0);
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					    return this->_odata[i]; 
 | 
				
			||||||
 | 
					  };
 | 
				
			||||||
 | 
					  accelerator_inline vobj       & operator[](size_t i)       { 
 | 
				
			||||||
 | 
					#ifdef LATTICE_BOUNDS_CHECK
 | 
				
			||||||
 | 
					    assert(i<this->_odata_size);
 | 
				
			||||||
 | 
					    assert(i>=0);
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					    return this->_odata[i]; 
 | 
				
			||||||
 | 
					  };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  accelerator_inline uint64_t begin(void) const { return 0;};
 | 
					  accelerator_inline uint64_t begin(void) const { return 0;};
 | 
				
			||||||
  accelerator_inline uint64_t end(void)   const { return this->_odata_size; };
 | 
					  accelerator_inline uint64_t end(void)   const { return this->_odata_size; };
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -77,9 +77,16 @@ const int SpinorIndex = 2;
 | 
				
			|||||||
template<typename T> struct isSpinor {
 | 
					template<typename T> struct isSpinor {
 | 
				
			||||||
  static constexpr bool value = (SpinorIndex==T::TensorLevel);
 | 
					  static constexpr bool value = (SpinorIndex==T::TensorLevel);
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					const int CoarseIndex = 4;
 | 
				
			||||||
 | 
					template<typename T> struct isCoarsened {
 | 
				
			||||||
 | 
					  static constexpr bool value = (CoarseIndex<=T::TensorLevel);
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
template <typename T> using IfSpinor    = Invoke<std::enable_if< isSpinor<T>::value,int> > ;
 | 
					template <typename T> using IfSpinor    = Invoke<std::enable_if< isSpinor<T>::value,int> > ;
 | 
				
			||||||
template <typename T> using IfNotSpinor = Invoke<std::enable_if<!isSpinor<T>::value,int> > ;
 | 
					template <typename T> using IfNotSpinor = Invoke<std::enable_if<!isSpinor<T>::value,int> > ;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					template <typename T> using IfCoarsened    = Invoke<std::enable_if< isCoarsened<T>::value,int> > ;
 | 
				
			||||||
 | 
					template <typename T> using IfNotCoarsened = Invoke<std::enable_if<!isCoarsened<T>::value,int> > ;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
// ChrisK very keen to add extra space for Gparity doubling.
 | 
					// ChrisK very keen to add extra space for Gparity doubling.
 | 
				
			||||||
//
 | 
					//
 | 
				
			||||||
// Also add domain wall index, in a way where Wilson operator 
 | 
					// Also add domain wall index, in a way where Wilson operator 
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -89,7 +89,8 @@ public:
 | 
				
			|||||||
  virtual void  Mdiag  (const FermionField &in, FermionField &out) { Mooee(in,out);};   // Same as Mooee applied to both CB's
 | 
					  virtual void  Mdiag  (const FermionField &in, FermionField &out) { Mooee(in,out);};   // Same as Mooee applied to both CB's
 | 
				
			||||||
  virtual void  Mdir   (const FermionField &in, FermionField &out,int dir,int disp)=0;   // case by case Wilson, Clover, Cayley, ContFrac, PartFrac
 | 
					  virtual void  Mdir   (const FermionField &in, FermionField &out,int dir,int disp)=0;   // case by case Wilson, Clover, Cayley, ContFrac, PartFrac
 | 
				
			||||||
  virtual void  MdirAll(const FermionField &in, std::vector<FermionField> &out)=0;   // case by case Wilson, Clover, Cayley, ContFrac, PartFrac
 | 
					  virtual void  MdirAll(const FermionField &in, std::vector<FermionField> &out)=0;   // case by case Wilson, Clover, Cayley, ContFrac, PartFrac
 | 
				
			||||||
 | 
					  virtual std::vector<int> Directions(void)   =0;
 | 
				
			||||||
 | 
					  virtual std::vector<int> Displacements(void)=0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  virtual void  MomentumSpacePropagator(FermionField &out,const FermionField &in,RealD _m,std::vector<double> twist) { assert(0);};
 | 
					  virtual void  MomentumSpacePropagator(FermionField &out,const FermionField &in,RealD _m,std::vector<double> twist) { assert(0);};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -44,6 +44,9 @@ public:
 | 
				
			|||||||
  INHERIT_IMPL_TYPES(Impl);
 | 
					  INHERIT_IMPL_TYPES(Impl);
 | 
				
			||||||
  typedef StaggeredKernels<Impl> Kernels;
 | 
					  typedef StaggeredKernels<Impl> Kernels;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  virtual std::vector<int> Directions(void)   { return this->directions; };
 | 
				
			||||||
 | 
					  virtual std::vector<int> Displacements(void){ return this->displacements;};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  FermionField _tmp;
 | 
					  FermionField _tmp;
 | 
				
			||||||
  FermionField &tmp(void) { return _tmp; }
 | 
					  FermionField &tmp(void) { return _tmp; }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -49,6 +49,9 @@ public:
 | 
				
			|||||||
  INHERIT_IMPL_TYPES(Impl);
 | 
					  INHERIT_IMPL_TYPES(Impl);
 | 
				
			||||||
  typedef StaggeredKernels<Impl> Kernels;
 | 
					  typedef StaggeredKernels<Impl> Kernels;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  virtual std::vector<int> Directions(void)   { return this->directions; };
 | 
				
			||||||
 | 
					  virtual std::vector<int> Displacements(void){ return this->displacements;};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  FermionField _tmp;
 | 
					  FermionField _tmp;
 | 
				
			||||||
  FermionField &tmp(void) { return _tmp; }
 | 
					  FermionField &tmp(void) { return _tmp; }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -47,6 +47,9 @@ public:
 | 
				
			|||||||
  FermionField _tmp;
 | 
					  FermionField _tmp;
 | 
				
			||||||
  FermionField &tmp(void) { return _tmp; }
 | 
					  FermionField &tmp(void) { return _tmp; }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  virtual std::vector<int> Directions(void)   { return this->directions; };
 | 
				
			||||||
 | 
					  virtual std::vector<int> Displacements(void){ return this->displacements;};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  ////////////////////////////////////////
 | 
					  ////////////////////////////////////////
 | 
				
			||||||
  // Performance monitoring
 | 
					  // Performance monitoring
 | 
				
			||||||
  ////////////////////////////////////////
 | 
					  ////////////////////////////////////////
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -63,6 +63,9 @@ public:
 | 
				
			|||||||
  INHERIT_IMPL_TYPES(Impl);
 | 
					  INHERIT_IMPL_TYPES(Impl);
 | 
				
			||||||
  typedef WilsonKernels<Impl> Kernels;
 | 
					  typedef WilsonKernels<Impl> Kernels;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  virtual std::vector<int> Directions(void)   { return this->directions; };
 | 
				
			||||||
 | 
					  virtual std::vector<int> Displacements(void){ return this->displacements;};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  ///////////////////////////////////////////////////////////////
 | 
					  ///////////////////////////////////////////////////////////////
 | 
				
			||||||
  // Implement the abstract base
 | 
					  // Implement the abstract base
 | 
				
			||||||
  ///////////////////////////////////////////////////////////////
 | 
					  ///////////////////////////////////////////////////////////////
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -72,6 +72,9 @@ public:
 | 
				
			|||||||
  typedef WilsonKernels<Impl> Kernels;
 | 
					  typedef WilsonKernels<Impl> Kernels;
 | 
				
			||||||
  PmuStat stat;
 | 
					  PmuStat stat;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  virtual std::vector<int> Directions(void)   { return this->directions; };
 | 
				
			||||||
 | 
					  virtual std::vector<int> Displacements(void){ return this->displacements;};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  FermionField _tmp;
 | 
					  FermionField _tmp;
 | 
				
			||||||
  FermionField &tmp(void) { return _tmp; }
 | 
					  FermionField &tmp(void) { return _tmp; }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -79,6 +79,8 @@ public:
 | 
				
			|||||||
    _Mat.M(in,tmp);
 | 
					    _Mat.M(in,tmp);
 | 
				
			||||||
    G5R5(out,tmp);
 | 
					    G5R5(out,tmp);
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					  virtual std::vector<int> Directions(void)   { return _Mat.Directions();};
 | 
				
			||||||
 | 
					  virtual std::vector<int> Displacements(void){ return _Mat.Displacements();};
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -127,6 +129,8 @@ public:
 | 
				
			|||||||
    _Mat.M(in,tmp);
 | 
					    _Mat.M(in,tmp);
 | 
				
			||||||
    out=g5*tmp;
 | 
					    out=g5*tmp;
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					  virtual std::vector<int> Directions(void)   { return _Mat.Directions();};
 | 
				
			||||||
 | 
					  virtual std::vector<int> Displacements(void){ return _Mat.Displacements();};
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
NAMESPACE_END(Grid);
 | 
					NAMESPACE_END(Grid);
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -133,14 +133,14 @@ void WilsonCloverFermion<Impl>::ImportGauge(const GaugeField &_Umu)
 | 
				
			|||||||
  pickCheckerboard(Even, CloverTermEven, CloverTerm);
 | 
					  pickCheckerboard(Even, CloverTermEven, CloverTerm);
 | 
				
			||||||
  pickCheckerboard(Odd, CloverTermOdd, CloverTerm);
 | 
					  pickCheckerboard(Odd, CloverTermOdd, CloverTerm);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  pickCheckerboard(Even, CloverTermDagEven, closure(adj(CloverTerm)));
 | 
					  pickCheckerboard(Even, CloverTermDagEven, adj(CloverTerm));
 | 
				
			||||||
  pickCheckerboard(Odd, CloverTermDagOdd, closure(adj(CloverTerm)));
 | 
					  pickCheckerboard(Odd, CloverTermDagOdd, adj(CloverTerm));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  pickCheckerboard(Even, CloverTermInvEven, CloverTermInv);
 | 
					  pickCheckerboard(Even, CloverTermInvEven, CloverTermInv);
 | 
				
			||||||
  pickCheckerboard(Odd, CloverTermInvOdd, CloverTermInv);
 | 
					  pickCheckerboard(Odd, CloverTermInvOdd, CloverTermInv);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  pickCheckerboard(Even, CloverTermInvDagEven, closure(adj(CloverTermInv)));
 | 
					  pickCheckerboard(Even, CloverTermInvDagEven, adj(CloverTermInv));
 | 
				
			||||||
  pickCheckerboard(Odd, CloverTermInvDagOdd, closure(adj(CloverTermInv)));
 | 
					  pickCheckerboard(Odd, CloverTermInvDagOdd, adj(CloverTermInv));
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
template <class Impl>
 | 
					template <class Impl>
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -128,7 +128,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s
 | 
				
			|||||||
}
 | 
					}
 | 
				
			||||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spProjTm (iVector<vtype,Nhs> &hspin,const iVector<vtype,Ns> &fspin)
 | 
					template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spProjTm (iVector<vtype,Nhs> &hspin,const iVector<vtype,Ns> &fspin)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
 | 
					 | 
				
			||||||
  hspin(0)=fspin(0)-fspin(2);
 | 
					  hspin(0)=fspin(0)-fspin(2);
 | 
				
			||||||
  hspin(1)=fspin(1)-fspin(3);
 | 
					  hspin(1)=fspin(1)-fspin(3);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
@@ -138,40 +137,50 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s
 | 
				
			|||||||
 *  0 0 -1  0
 | 
					 *  0 0 -1  0
 | 
				
			||||||
 *  0 0  0 -1
 | 
					 *  0 0  0 -1
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
 | 
					 | 
				
			||||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spProj5p (iVector<vtype,Nhs> &hspin,const iVector<vtype,Ns> &fspin)
 | 
					template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spProj5p (iVector<vtype,Nhs> &hspin,const iVector<vtype,Ns> &fspin)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
 | 
					 | 
				
			||||||
  hspin(0)=fspin(0);
 | 
					  hspin(0)=fspin(0);
 | 
				
			||||||
  hspin(1)=fspin(1);
 | 
					  hspin(1)=fspin(1);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spProj5m (iVector<vtype,Nhs> &hspin,const iVector<vtype,Ns> &fspin)
 | 
					template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spProj5m (iVector<vtype,Nhs> &hspin,const iVector<vtype,Ns> &fspin)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
 | 
					 | 
				
			||||||
  hspin(0)=fspin(2);
 | 
					  hspin(0)=fspin(2);
 | 
				
			||||||
  hspin(1)=fspin(3);
 | 
					  hspin(1)=fspin(3);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
  
 | 
					  
 | 
				
			||||||
//  template<class vtype> accelerator_inline void fspProj5p (iVector<vtype,Ns> &rfspin,const iVector<vtype,Ns> &fspin)
 | 
					 | 
				
			||||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spProj5p (iVector<vtype,Ns> &rfspin,const iVector<vtype,Ns> &fspin)
 | 
					template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spProj5p (iVector<vtype,Ns> &rfspin,const iVector<vtype,Ns> &fspin)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
 | 
					 | 
				
			||||||
  rfspin(0)=fspin(0);
 | 
					  rfspin(0)=fspin(0);
 | 
				
			||||||
  rfspin(1)=fspin(1);
 | 
					  rfspin(1)=fspin(1);
 | 
				
			||||||
  rfspin(2)=Zero();
 | 
					  rfspin(2)=Zero();
 | 
				
			||||||
  rfspin(3)=Zero();
 | 
					  rfspin(3)=Zero();
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
//  template<class vtype> accelerator_inline void fspProj5m (iVector<vtype,Ns> &rfspin,const iVector<vtype,Ns> &fspin)
 | 
					 | 
				
			||||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spProj5m (iVector<vtype,Ns> &rfspin,const iVector<vtype,Ns> &fspin)
 | 
					template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spProj5m (iVector<vtype,Ns> &rfspin,const iVector<vtype,Ns> &fspin)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
 | 
					 | 
				
			||||||
  rfspin(0)=Zero();
 | 
					  rfspin(0)=Zero();
 | 
				
			||||||
  rfspin(1)=Zero();
 | 
					  rfspin(1)=Zero();
 | 
				
			||||||
  rfspin(2)=fspin(2);
 | 
					  rfspin(2)=fspin(2);
 | 
				
			||||||
  rfspin(3)=fspin(3);
 | 
					  rfspin(3)=fspin(3);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					template<class vtype,int N,IfCoarsened<iVector<vtype,N> > = 0> accelerator_inline void spProj5p (iVector<vtype,N> &rfspin,const iVector<vtype,N> &fspin)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  const int hN = N>>1;
 | 
				
			||||||
 | 
					  for(int s=0;s<hN;s++){
 | 
				
			||||||
 | 
					    rfspin(s)=fspin(s);
 | 
				
			||||||
 | 
					    rfspin(s+hN)=Zero();
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					template<class vtype,int N,IfCoarsened<iVector<vtype,N> > = 0> accelerator_inline void spProj5m (iVector<vtype,N> &rfspin,const iVector<vtype,N> &fspin)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  const int hN = N>>1;
 | 
				
			||||||
 | 
					  for(int s=0;s<hN;s++){
 | 
				
			||||||
 | 
					    rfspin(s)=Zero();
 | 
				
			||||||
 | 
					    rfspin(s+hN)=fspin(s+hN);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
					////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
// Reconstruction routines to move back again to four spin
 | 
					// Reconstruction routines to move back again to four spin
 | 
				
			||||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
					////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
@@ -183,7 +192,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s
 | 
				
			|||||||
 */
 | 
					 */
 | 
				
			||||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spReconXp (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
 | 
					template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spReconXp (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
 | 
					 | 
				
			||||||
  fspin(0)=hspin(0);
 | 
					  fspin(0)=hspin(0);
 | 
				
			||||||
  fspin(1)=hspin(1);
 | 
					  fspin(1)=hspin(1);
 | 
				
			||||||
  fspin(2)=timesMinusI(hspin(1));
 | 
					  fspin(2)=timesMinusI(hspin(1));
 | 
				
			||||||
@@ -191,7 +199,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s
 | 
				
			|||||||
}
 | 
					}
 | 
				
			||||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spReconXm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
 | 
					template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spReconXm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
 | 
					 | 
				
			||||||
  fspin(0)=hspin(0);
 | 
					  fspin(0)=hspin(0);
 | 
				
			||||||
  fspin(1)=hspin(1);
 | 
					  fspin(1)=hspin(1);
 | 
				
			||||||
  fspin(2)=timesI(hspin(1));
 | 
					  fspin(2)=timesI(hspin(1));
 | 
				
			||||||
@@ -199,7 +206,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s
 | 
				
			|||||||
}
 | 
					}
 | 
				
			||||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumReconXp (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
 | 
					template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumReconXp (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
 | 
					 | 
				
			||||||
  fspin(0)+=hspin(0);
 | 
					  fspin(0)+=hspin(0);
 | 
				
			||||||
  fspin(1)+=hspin(1);
 | 
					  fspin(1)+=hspin(1);
 | 
				
			||||||
  fspin(2)-=timesI(hspin(1));
 | 
					  fspin(2)-=timesI(hspin(1));
 | 
				
			||||||
@@ -207,7 +213,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void a
 | 
				
			|||||||
}
 | 
					}
 | 
				
			||||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumReconXm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
 | 
					template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumReconXm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
 | 
					 | 
				
			||||||
  fspin(0)+=hspin(0);
 | 
					  fspin(0)+=hspin(0);
 | 
				
			||||||
  fspin(1)+=hspin(1);
 | 
					  fspin(1)+=hspin(1);
 | 
				
			||||||
  fspin(2)+=timesI(hspin(1));
 | 
					  fspin(2)+=timesI(hspin(1));
 | 
				
			||||||
@@ -221,7 +226,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void a
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spReconYp (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
 | 
					template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spReconYp (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
 | 
					 | 
				
			||||||
  fspin(0)=hspin(0);
 | 
					  fspin(0)=hspin(0);
 | 
				
			||||||
  fspin(1)=hspin(1);
 | 
					  fspin(1)=hspin(1);
 | 
				
			||||||
  fspin(2)= hspin(1);
 | 
					  fspin(2)= hspin(1);
 | 
				
			||||||
@@ -229,7 +233,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s
 | 
				
			|||||||
}
 | 
					}
 | 
				
			||||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spReconYm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
 | 
					template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spReconYm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
 | 
					 | 
				
			||||||
  fspin(0)=hspin(0);
 | 
					  fspin(0)=hspin(0);
 | 
				
			||||||
  fspin(1)=hspin(1);
 | 
					  fspin(1)=hspin(1);
 | 
				
			||||||
  fspin(2)=-hspin(1);
 | 
					  fspin(2)=-hspin(1);
 | 
				
			||||||
@@ -237,7 +240,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s
 | 
				
			|||||||
}
 | 
					}
 | 
				
			||||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumReconYp (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
 | 
					template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumReconYp (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
 | 
					 | 
				
			||||||
  fspin(0)+=hspin(0);
 | 
					  fspin(0)+=hspin(0);
 | 
				
			||||||
  fspin(1)+=hspin(1);
 | 
					  fspin(1)+=hspin(1);
 | 
				
			||||||
  fspin(2)+=hspin(1);
 | 
					  fspin(2)+=hspin(1);
 | 
				
			||||||
@@ -245,7 +247,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void a
 | 
				
			|||||||
}
 | 
					}
 | 
				
			||||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumReconYm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
 | 
					template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumReconYm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
 | 
					 | 
				
			||||||
  fspin(0)+=hspin(0);
 | 
					  fspin(0)+=hspin(0);
 | 
				
			||||||
  fspin(1)+=hspin(1);
 | 
					  fspin(1)+=hspin(1);
 | 
				
			||||||
  fspin(2)-=hspin(1);
 | 
					  fspin(2)-=hspin(1);
 | 
				
			||||||
@@ -260,7 +261,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void a
 | 
				
			|||||||
 */
 | 
					 */
 | 
				
			||||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spReconZp (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
 | 
					template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spReconZp (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
 | 
					 | 
				
			||||||
  fspin(0)=hspin(0);
 | 
					  fspin(0)=hspin(0);
 | 
				
			||||||
  fspin(1)=hspin(1);
 | 
					  fspin(1)=hspin(1);
 | 
				
			||||||
  fspin(2)=timesMinusI(hspin(0));
 | 
					  fspin(2)=timesMinusI(hspin(0));
 | 
				
			||||||
@@ -268,7 +268,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s
 | 
				
			|||||||
}
 | 
					}
 | 
				
			||||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spReconZm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
 | 
					template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spReconZm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
 | 
					 | 
				
			||||||
  fspin(0)=hspin(0);
 | 
					  fspin(0)=hspin(0);
 | 
				
			||||||
  fspin(1)=hspin(1);
 | 
					  fspin(1)=hspin(1);
 | 
				
			||||||
  fspin(2)=     timesI(hspin(0));
 | 
					  fspin(2)=     timesI(hspin(0));
 | 
				
			||||||
@@ -276,7 +275,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s
 | 
				
			|||||||
}
 | 
					}
 | 
				
			||||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumReconZp (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
 | 
					template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumReconZp (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
 | 
					 | 
				
			||||||
  fspin(0)+=hspin(0);
 | 
					  fspin(0)+=hspin(0);
 | 
				
			||||||
  fspin(1)+=hspin(1);
 | 
					  fspin(1)+=hspin(1);
 | 
				
			||||||
  fspin(2)-=timesI(hspin(0));
 | 
					  fspin(2)-=timesI(hspin(0));
 | 
				
			||||||
@@ -284,7 +282,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void a
 | 
				
			|||||||
}
 | 
					}
 | 
				
			||||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumReconZm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
 | 
					template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumReconZm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
 | 
					 | 
				
			||||||
  fspin(0)+=hspin(0);
 | 
					  fspin(0)+=hspin(0);
 | 
				
			||||||
  fspin(1)+=hspin(1);
 | 
					  fspin(1)+=hspin(1);
 | 
				
			||||||
  fspin(2)+=timesI(hspin(0));
 | 
					  fspin(2)+=timesI(hspin(0));
 | 
				
			||||||
@@ -298,7 +295,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void a
 | 
				
			|||||||
 */
 | 
					 */
 | 
				
			||||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spReconTp (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
 | 
					template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spReconTp (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
 | 
					 | 
				
			||||||
  fspin(0)=hspin(0);
 | 
					  fspin(0)=hspin(0);
 | 
				
			||||||
  fspin(1)=hspin(1);
 | 
					  fspin(1)=hspin(1);
 | 
				
			||||||
  fspin(2)=hspin(0);
 | 
					  fspin(2)=hspin(0);
 | 
				
			||||||
@@ -306,7 +302,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s
 | 
				
			|||||||
}
 | 
					}
 | 
				
			||||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spReconTm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
 | 
					template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spReconTm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
 | 
					 | 
				
			||||||
  fspin(0)=hspin(0);
 | 
					  fspin(0)=hspin(0);
 | 
				
			||||||
  fspin(1)=hspin(1);
 | 
					  fspin(1)=hspin(1);
 | 
				
			||||||
  fspin(2)=-hspin(0);
 | 
					  fspin(2)=-hspin(0);
 | 
				
			||||||
@@ -314,7 +309,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s
 | 
				
			|||||||
}
 | 
					}
 | 
				
			||||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumReconTp (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
 | 
					template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumReconTp (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
 | 
					 | 
				
			||||||
  fspin(0)+=hspin(0);
 | 
					  fspin(0)+=hspin(0);
 | 
				
			||||||
  fspin(1)+=hspin(1);
 | 
					  fspin(1)+=hspin(1);
 | 
				
			||||||
  fspin(2)+=hspin(0);
 | 
					  fspin(2)+=hspin(0);
 | 
				
			||||||
@@ -322,7 +316,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void a
 | 
				
			|||||||
}
 | 
					}
 | 
				
			||||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumReconTm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
 | 
					template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumReconTm (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
 | 
					 | 
				
			||||||
  fspin(0)+=hspin(0);
 | 
					  fspin(0)+=hspin(0);
 | 
				
			||||||
  fspin(1)+=hspin(1);
 | 
					  fspin(1)+=hspin(1);
 | 
				
			||||||
  fspin(2)-=hspin(0);
 | 
					  fspin(2)-=hspin(0);
 | 
				
			||||||
@@ -336,7 +329,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void a
 | 
				
			|||||||
 */
 | 
					 */
 | 
				
			||||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spRecon5p (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
 | 
					template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spRecon5p (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
 | 
					 | 
				
			||||||
  fspin(0)=hspin(0)+hspin(0); // add is lower latency than mul
 | 
					  fspin(0)=hspin(0)+hspin(0); // add is lower latency than mul
 | 
				
			||||||
  fspin(1)=hspin(1)+hspin(1); // probably no measurable diffence though
 | 
					  fspin(1)=hspin(1)+hspin(1); // probably no measurable diffence though
 | 
				
			||||||
  fspin(2)=Zero();
 | 
					  fspin(2)=Zero();
 | 
				
			||||||
@@ -344,7 +336,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s
 | 
				
			|||||||
}
 | 
					}
 | 
				
			||||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spRecon5m (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
 | 
					template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void spRecon5m (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
 | 
					 | 
				
			||||||
  fspin(0)=Zero();
 | 
					  fspin(0)=Zero();
 | 
				
			||||||
  fspin(1)=Zero();
 | 
					  fspin(1)=Zero();
 | 
				
			||||||
  fspin(2)=hspin(0)+hspin(0);
 | 
					  fspin(2)=hspin(0)+hspin(0);
 | 
				
			||||||
@@ -352,7 +343,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void s
 | 
				
			|||||||
}
 | 
					}
 | 
				
			||||||
template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumRecon5p (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
 | 
					template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void accumRecon5p (iVector<vtype,Ns> &fspin,const iVector<vtype,Nhs> &hspin)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
 | 
					 | 
				
			||||||
  fspin(0)+=hspin(0)+hspin(0);
 | 
					  fspin(0)+=hspin(0)+hspin(0);
 | 
				
			||||||
  fspin(1)+=hspin(1)+hspin(1);
 | 
					  fspin(1)+=hspin(1)+hspin(1);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
@@ -372,7 +362,6 @@ template<class vtype,IfSpinor<iVector<vtype,Ns> > = 0> accelerator_inline void a
 | 
				
			|||||||
//////////
 | 
					//////////
 | 
				
			||||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProjXp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
 | 
					template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProjXp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
 | 
					 | 
				
			||||||
  for(int i=0;i<N;i++) {
 | 
					  for(int i=0;i<N;i++) {
 | 
				
			||||||
    spProjXp(hspin._internal[i],fspin._internal[i]);
 | 
					    spProjXp(hspin._internal[i],fspin._internal[i]);
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
@@ -426,26 +415,21 @@ template<class rtype,class vtype,int N> accelerator_inline void accumReconXp (iM
 | 
				
			|||||||
    }}
 | 
					    }}
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
////////
 | 
					////////
 | 
				
			||||||
// Xm
 | 
					// Xm
 | 
				
			||||||
////////
 | 
					////////
 | 
				
			||||||
template<class rtype,class vtype> accelerator_inline void spProjXm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
 | 
					template<class rtype,class vtype> accelerator_inline void spProjXm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
 | 
					 | 
				
			||||||
  spProjXm(hspin._internal,fspin._internal);
 | 
					  spProjXm(hspin._internal,fspin._internal);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProjXm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
 | 
					template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProjXm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
 | 
					 | 
				
			||||||
  for(int i=0;i<N;i++) {
 | 
					  for(int i=0;i<N;i++) {
 | 
				
			||||||
    spProjXm(hspin._internal[i],fspin._internal[i]);
 | 
					    spProjXm(hspin._internal[i],fspin._internal[i]);
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
template<class rtype,class vtype,int N> accelerator_inline void spProjXm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
 | 
					template<class rtype,class vtype,int N> accelerator_inline void spProjXm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
 | 
					 | 
				
			||||||
  for(int i=0;i<N;i++){ 
 | 
					  for(int i=0;i<N;i++){ 
 | 
				
			||||||
    for(int j=0;j<N;j++){
 | 
					    for(int j=0;j<N;j++){
 | 
				
			||||||
      spProjXm(hspin._internal[i][j],fspin._internal[i][j]);
 | 
					      spProjXm(hspin._internal[i][j],fspin._internal[i][j]);
 | 
				
			||||||
@@ -455,19 +439,16 @@ template<class rtype,class vtype,int N> accelerator_inline void spProjXm (iMatri
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
template<class rtype,class vtype> accelerator_inline void spReconXm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
 | 
					template<class rtype,class vtype> accelerator_inline void spReconXm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
 | 
					 | 
				
			||||||
  spReconXm(hspin._internal,fspin._internal);
 | 
					  spReconXm(hspin._internal,fspin._internal);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spReconXm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
 | 
					template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spReconXm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
 | 
					 | 
				
			||||||
  for(int i=0;i<N;i++) {
 | 
					  for(int i=0;i<N;i++) {
 | 
				
			||||||
    spReconXm(hspin._internal[i],fspin._internal[i]);
 | 
					    spReconXm(hspin._internal[i],fspin._internal[i]);
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
template<class rtype,class vtype,int N> accelerator_inline void spReconXm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
 | 
					template<class rtype,class vtype,int N> accelerator_inline void spReconXm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
 | 
					 | 
				
			||||||
  for(int i=0;i<N;i++){ 
 | 
					  for(int i=0;i<N;i++){ 
 | 
				
			||||||
    for(int j=0;j<N;j++){
 | 
					    for(int j=0;j<N;j++){
 | 
				
			||||||
      spReconXm(hspin._internal[i][j],fspin._internal[i][j]);
 | 
					      spReconXm(hspin._internal[i][j],fspin._internal[i][j]);
 | 
				
			||||||
@@ -476,45 +457,37 @@ template<class rtype,class vtype,int N> accelerator_inline void spReconXm (iMatr
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
template<class rtype,class vtype> accelerator_inline void accumReconXm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
 | 
					template<class rtype,class vtype> accelerator_inline void accumReconXm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
 | 
					 | 
				
			||||||
  accumReconXm(hspin._internal,fspin._internal);
 | 
					  accumReconXm(hspin._internal,fspin._internal);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void accumReconXm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
 | 
					template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void accumReconXm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
 | 
					 | 
				
			||||||
  for(int i=0;i<N;i++) {
 | 
					  for(int i=0;i<N;i++) {
 | 
				
			||||||
    accumReconXm(hspin._internal[i],fspin._internal[i]);
 | 
					    accumReconXm(hspin._internal[i],fspin._internal[i]);
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
template<class rtype,class vtype,int N> accelerator_inline void accumReconXm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
 | 
					template<class rtype,class vtype,int N> accelerator_inline void accumReconXm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
 | 
					 | 
				
			||||||
  for(int i=0;i<N;i++){ 
 | 
					  for(int i=0;i<N;i++){ 
 | 
				
			||||||
    for(int j=0;j<N;j++){
 | 
					    for(int j=0;j<N;j++){
 | 
				
			||||||
      accumReconXm(hspin._internal[i][j],fspin._internal[i][j]);
 | 
					      accumReconXm(hspin._internal[i][j],fspin._internal[i][j]);
 | 
				
			||||||
    }}
 | 
					    }}
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
////////
 | 
					////////
 | 
				
			||||||
// Yp
 | 
					// Yp
 | 
				
			||||||
////////
 | 
					////////
 | 
				
			||||||
template<class rtype,class vtype> accelerator_inline void spProjYp (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
 | 
					template<class rtype,class vtype> accelerator_inline void spProjYp (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
 | 
					 | 
				
			||||||
  spProjYp(hspin._internal,fspin._internal);
 | 
					  spProjYp(hspin._internal,fspin._internal);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProjYp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
 | 
					template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProjYp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
 | 
					 | 
				
			||||||
  for(int i=0;i<N;i++) {
 | 
					  for(int i=0;i<N;i++) {
 | 
				
			||||||
    spProjYp(hspin._internal[i],fspin._internal[i]);
 | 
					    spProjYp(hspin._internal[i],fspin._internal[i]);
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
template<class rtype,class vtype,int N> accelerator_inline void spProjYp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
 | 
					template<class rtype,class vtype,int N> accelerator_inline void spProjYp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
 | 
					 | 
				
			||||||
  for(int i=0;i<N;i++){ 
 | 
					  for(int i=0;i<N;i++){ 
 | 
				
			||||||
    for(int j=0;j<N;j++){
 | 
					    for(int j=0;j<N;j++){
 | 
				
			||||||
      spProjYp(hspin._internal[i][j],fspin._internal[i][j]);
 | 
					      spProjYp(hspin._internal[i][j],fspin._internal[i][j]);
 | 
				
			||||||
@@ -524,19 +497,16 @@ template<class rtype,class vtype,int N> accelerator_inline void spProjYp (iMatri
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
template<class rtype,class vtype> accelerator_inline void spReconYp (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
 | 
					template<class rtype,class vtype> accelerator_inline void spReconYp (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
 | 
					 | 
				
			||||||
  spReconYp(hspin._internal,fspin._internal);
 | 
					  spReconYp(hspin._internal,fspin._internal);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spReconYp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
 | 
					template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spReconYp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
 | 
					 | 
				
			||||||
  for(int i=0;i<N;i++) {
 | 
					  for(int i=0;i<N;i++) {
 | 
				
			||||||
    spReconYp(hspin._internal[i],fspin._internal[i]);
 | 
					    spReconYp(hspin._internal[i],fspin._internal[i]);
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
template<class rtype,class vtype,int N> accelerator_inline void spReconYp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
 | 
					template<class rtype,class vtype,int N> accelerator_inline void spReconYp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
 | 
					 | 
				
			||||||
  for(int i=0;i<N;i++){ 
 | 
					  for(int i=0;i<N;i++){ 
 | 
				
			||||||
    for(int j=0;j<N;j++){
 | 
					    for(int j=0;j<N;j++){
 | 
				
			||||||
      spReconYp(hspin._internal[i][j],fspin._internal[i][j]);
 | 
					      spReconYp(hspin._internal[i][j],fspin._internal[i][j]);
 | 
				
			||||||
@@ -545,66 +515,55 @@ template<class rtype,class vtype,int N> accelerator_inline void spReconYp (iMatr
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
template<class rtype,class vtype> accelerator_inline void accumReconYp (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
 | 
					template<class rtype,class vtype> accelerator_inline void accumReconYp (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
 | 
					 | 
				
			||||||
  accumReconYp(hspin._internal,fspin._internal);
 | 
					  accumReconYp(hspin._internal,fspin._internal);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void accumReconYp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
 | 
					template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void accumReconYp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
 | 
					 | 
				
			||||||
  for(int i=0;i<N;i++) {
 | 
					  for(int i=0;i<N;i++) {
 | 
				
			||||||
    accumReconYp(hspin._internal[i],fspin._internal[i]);
 | 
					    accumReconYp(hspin._internal[i],fspin._internal[i]);
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
template<class rtype,class vtype,int N> accelerator_inline void accumReconYp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
 | 
					template<class rtype,class vtype,int N> accelerator_inline void accumReconYp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
 | 
					 | 
				
			||||||
  for(int i=0;i<N;i++){ 
 | 
					  for(int i=0;i<N;i++){ 
 | 
				
			||||||
    for(int j=0;j<N;j++){
 | 
					    for(int j=0;j<N;j++){
 | 
				
			||||||
      accumReconYp(hspin._internal[i][j],fspin._internal[i][j]);
 | 
					      accumReconYp(hspin._internal[i][j],fspin._internal[i][j]);
 | 
				
			||||||
    }}
 | 
					    }}
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 | 
				
			||||||
////////
 | 
					////////
 | 
				
			||||||
// Ym
 | 
					// Ym
 | 
				
			||||||
////////
 | 
					////////
 | 
				
			||||||
template<class rtype,class vtype> accelerator_inline void spProjYm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
 | 
					template<class rtype,class vtype> accelerator_inline void spProjYm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
 | 
					 | 
				
			||||||
  spProjYm(hspin._internal,fspin._internal);
 | 
					  spProjYm(hspin._internal,fspin._internal);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProjYm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
 | 
					template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProjYm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
 | 
					 | 
				
			||||||
  for(int i=0;i<N;i++) {
 | 
					  for(int i=0;i<N;i++) {
 | 
				
			||||||
    spProjYm(hspin._internal[i],fspin._internal[i]);
 | 
					    spProjYm(hspin._internal[i],fspin._internal[i]);
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
template<class rtype,class vtype,int N> accelerator_inline void spProjYm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
 | 
					template<class rtype,class vtype,int N> accelerator_inline void spProjYm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
 | 
					 | 
				
			||||||
  for(int i=0;i<N;i++){ 
 | 
					  for(int i=0;i<N;i++){ 
 | 
				
			||||||
    for(int j=0;j<N;j++){
 | 
					    for(int j=0;j<N;j++){
 | 
				
			||||||
      spProjYm(hspin._internal[i][j],fspin._internal[i][j]);
 | 
					      spProjYm(hspin._internal[i][j],fspin._internal[i][j]);
 | 
				
			||||||
    }}
 | 
					    }}
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 | 
				
			||||||
template<class rtype,class vtype> accelerator_inline void spReconYm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
 | 
					template<class rtype,class vtype> accelerator_inline void spReconYm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
 | 
					 | 
				
			||||||
  spReconYm(hspin._internal,fspin._internal);
 | 
					  spReconYm(hspin._internal,fspin._internal);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spReconYm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
 | 
					template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spReconYm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,const iVector<vtype,N> >::type *temp;
 | 
					 | 
				
			||||||
  for(int i=0;i<N;i++) {
 | 
					  for(int i=0;i<N;i++) {
 | 
				
			||||||
    spReconYm(hspin._internal[i],fspin._internal[i]);
 | 
					    spReconYm(hspin._internal[i],fspin._internal[i]);
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
template<class rtype,class vtype,int N> accelerator_inline void spReconYm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
 | 
					template<class rtype,class vtype,int N> accelerator_inline void spReconYm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
 | 
					 | 
				
			||||||
  for(int i=0;i<N;i++){ 
 | 
					  for(int i=0;i<N;i++){ 
 | 
				
			||||||
    for(int j=0;j<N;j++){
 | 
					    for(int j=0;j<N;j++){
 | 
				
			||||||
      spReconYm(hspin._internal[i][j],fspin._internal[i][j]);
 | 
					      spReconYm(hspin._internal[i][j],fspin._internal[i][j]);
 | 
				
			||||||
@@ -613,19 +572,16 @@ template<class rtype,class vtype,int N> accelerator_inline void spReconYm (iMatr
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
template<class rtype,class vtype> accelerator_inline void accumReconYm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
 | 
					template<class rtype,class vtype> accelerator_inline void accumReconYm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
 | 
					 | 
				
			||||||
  accumReconYm(hspin._internal,fspin._internal);
 | 
					  accumReconYm(hspin._internal,fspin._internal);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void accumReconYm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
 | 
					template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void accumReconYm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
 | 
					 | 
				
			||||||
  for(int i=0;i<N;i++) {
 | 
					  for(int i=0;i<N;i++) {
 | 
				
			||||||
    accumReconYm(hspin._internal[i],fspin._internal[i]);
 | 
					    accumReconYm(hspin._internal[i],fspin._internal[i]);
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
template<class rtype,class vtype,int N> accelerator_inline void accumReconYm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
 | 
					template<class rtype,class vtype,int N> accelerator_inline void accumReconYm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
 | 
					 | 
				
			||||||
  for(int i=0;i<N;i++){ 
 | 
					  for(int i=0;i<N;i++){ 
 | 
				
			||||||
    for(int j=0;j<N;j++){
 | 
					    for(int j=0;j<N;j++){
 | 
				
			||||||
      accumReconYm(hspin._internal[i][j],fspin._internal[i][j]);
 | 
					      accumReconYm(hspin._internal[i][j],fspin._internal[i][j]);
 | 
				
			||||||
@@ -638,66 +594,57 @@ template<class rtype,class vtype,int N> accelerator_inline void accumReconYm (iM
 | 
				
			|||||||
////////
 | 
					////////
 | 
				
			||||||
template<class rtype,class vtype> accelerator_inline void spProjZp (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
 | 
					template<class rtype,class vtype> accelerator_inline void spProjZp (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
 | 
					 | 
				
			||||||
  spProjZp(hspin._internal,fspin._internal);
 | 
					  spProjZp(hspin._internal,fspin._internal);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProjZp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
 | 
					template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProjZp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
 | 
					 | 
				
			||||||
  for(int i=0;i<N;i++) {
 | 
					  for(int i=0;i<N;i++) {
 | 
				
			||||||
    spProjZp(hspin._internal[i],fspin._internal[i]);
 | 
					    spProjZp(hspin._internal[i],fspin._internal[i]);
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
template<class rtype,class vtype,int N> accelerator_inline void spProjZp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
 | 
					template<class rtype,class vtype,int N> accelerator_inline void spProjZp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
 | 
					 | 
				
			||||||
  for(int i=0;i<N;i++){ 
 | 
					  for(int i=0;i<N;i++){ 
 | 
				
			||||||
    for(int j=0;j<N;j++){
 | 
					    for(int j=0;j<N;j++){
 | 
				
			||||||
      spProjZp(hspin._internal[i][j],fspin._internal[i][j]);
 | 
					      spProjZp(hspin._internal[i][j],fspin._internal[i][j]);
 | 
				
			||||||
    }}
 | 
					  }}
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
template<class rtype,class vtype> accelerator_inline void spReconZp (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
 | 
					template<class rtype,class vtype> accelerator_inline void spReconZp (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
 | 
					 | 
				
			||||||
  spReconZp(hspin._internal,fspin._internal);
 | 
					  spReconZp(hspin._internal,fspin._internal);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spReconZp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
 | 
					template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spReconZp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
 | 
					 | 
				
			||||||
  for(int i=0;i<N;i++) {
 | 
					  for(int i=0;i<N;i++) {
 | 
				
			||||||
    spReconZp(hspin._internal[i],fspin._internal[i]);
 | 
					    spReconZp(hspin._internal[i],fspin._internal[i]);
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
template<class rtype,class vtype,int N> accelerator_inline void spReconZp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
 | 
					template<class rtype,class vtype,int N> accelerator_inline void spReconZp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
 | 
					 | 
				
			||||||
  for(int i=0;i<N;i++){ 
 | 
					  for(int i=0;i<N;i++){ 
 | 
				
			||||||
    for(int j=0;j<N;j++){
 | 
					    for(int j=0;j<N;j++){
 | 
				
			||||||
      spReconZp(hspin._internal[i][j],fspin._internal[i][j]);
 | 
					      spReconZp(hspin._internal[i][j],fspin._internal[i][j]);
 | 
				
			||||||
    }}
 | 
					  }}
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
template<class rtype,class vtype> accelerator_inline void accumReconZp (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
 | 
					template<class rtype,class vtype> accelerator_inline void accumReconZp (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
 | 
					 | 
				
			||||||
  accumReconZp(hspin._internal,fspin._internal);
 | 
					  accumReconZp(hspin._internal,fspin._internal);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void accumReconZp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
 | 
					template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void accumReconZp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
 | 
					 | 
				
			||||||
  for(int i=0;i<N;i++) {
 | 
					  for(int i=0;i<N;i++) {
 | 
				
			||||||
    accumReconZp(hspin._internal[i],fspin._internal[i]);
 | 
					    accumReconZp(hspin._internal[i],fspin._internal[i]);
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
template<class rtype,class vtype,int N> accelerator_inline void accumReconZp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
 | 
					template<class rtype,class vtype,int N> accelerator_inline void accumReconZp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
 | 
					 | 
				
			||||||
  for(int i=0;i<N;i++){ 
 | 
					  for(int i=0;i<N;i++){ 
 | 
				
			||||||
    for(int j=0;j<N;j++){
 | 
					    for(int j=0;j<N;j++){
 | 
				
			||||||
      accumReconZp(hspin._internal[i][j],fspin._internal[i][j]);
 | 
					      accumReconZp(hspin._internal[i][j],fspin._internal[i][j]);
 | 
				
			||||||
    }}
 | 
					  }}
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -706,62 +653,53 @@ template<class rtype,class vtype,int N> accelerator_inline void accumReconZp (iM
 | 
				
			|||||||
////////
 | 
					////////
 | 
				
			||||||
template<class rtype,class vtype> accelerator_inline void spProjZm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
 | 
					template<class rtype,class vtype> accelerator_inline void spProjZm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
 | 
					 | 
				
			||||||
  spProjZm(hspin._internal,fspin._internal);
 | 
					  spProjZm(hspin._internal,fspin._internal);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProjZm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
 | 
					template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProjZm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
 | 
					 | 
				
			||||||
  for(int i=0;i<N;i++) {
 | 
					  for(int i=0;i<N;i++) {
 | 
				
			||||||
    spProjZm(hspin._internal[i],fspin._internal[i]);
 | 
					    spProjZm(hspin._internal[i],fspin._internal[i]);
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
template<class rtype,class vtype,int N> accelerator_inline void spProjZm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
 | 
					template<class rtype,class vtype,int N> accelerator_inline void spProjZm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
 | 
					 | 
				
			||||||
  for(int i=0;i<N;i++){ 
 | 
					  for(int i=0;i<N;i++){ 
 | 
				
			||||||
    for(int j=0;j<N;j++){
 | 
					    for(int j=0;j<N;j++){
 | 
				
			||||||
      spProjZm(hspin._internal[i][j],fspin._internal[i][j]);
 | 
					      spProjZm(hspin._internal[i][j],fspin._internal[i][j]);
 | 
				
			||||||
    }}
 | 
					  }}
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
template<class rtype,class vtype> accelerator_inline void spReconZm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
 | 
					template<class rtype,class vtype> accelerator_inline void spReconZm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
 | 
					 | 
				
			||||||
  spReconZm(hspin._internal,fspin._internal);
 | 
					  spReconZm(hspin._internal,fspin._internal);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spReconZm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
 | 
					template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spReconZm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
 | 
					 | 
				
			||||||
  for(int i=0;i<N;i++) {
 | 
					  for(int i=0;i<N;i++) {
 | 
				
			||||||
    spReconZm(hspin._internal[i],fspin._internal[i]);
 | 
					    spReconZm(hspin._internal[i],fspin._internal[i]);
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
template<class rtype,class vtype,int N> accelerator_inline void spReconZm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
 | 
					template<class rtype,class vtype,int N> accelerator_inline void spReconZm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
 | 
					 | 
				
			||||||
  for(int i=0;i<N;i++){ 
 | 
					  for(int i=0;i<N;i++){ 
 | 
				
			||||||
    for(int j=0;j<N;j++){
 | 
					    for(int j=0;j<N;j++){
 | 
				
			||||||
      spReconZm(hspin._internal[i][j],fspin._internal[i][j]);
 | 
					      spReconZm(hspin._internal[i][j],fspin._internal[i][j]);
 | 
				
			||||||
    }}
 | 
					  }}
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
template<class rtype,class vtype> accelerator_inline void accumReconZm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
 | 
					template<class rtype,class vtype> accelerator_inline void accumReconZm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
 | 
					 | 
				
			||||||
  accumReconZm(hspin._internal,fspin._internal);
 | 
					  accumReconZm(hspin._internal,fspin._internal);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void accumReconZm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
 | 
					template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void accumReconZm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
 | 
					 | 
				
			||||||
  for(int i=0;i<N;i++) {
 | 
					  for(int i=0;i<N;i++) {
 | 
				
			||||||
    accumReconZm(hspin._internal[i],fspin._internal[i]);
 | 
					    accumReconZm(hspin._internal[i],fspin._internal[i]);
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
template<class rtype,class vtype,int N> accelerator_inline void accumReconZm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
 | 
					template<class rtype,class vtype,int N> accelerator_inline void accumReconZm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
 | 
					 | 
				
			||||||
  for(int i=0;i<N;i++){ 
 | 
					  for(int i=0;i<N;i++){ 
 | 
				
			||||||
    for(int j=0;j<N;j++){
 | 
					    for(int j=0;j<N;j++){
 | 
				
			||||||
      accumReconZm(hspin._internal[i][j],fspin._internal[i][j]);
 | 
					      accumReconZm(hspin._internal[i][j],fspin._internal[i][j]);
 | 
				
			||||||
@@ -774,41 +712,35 @@ template<class rtype,class vtype,int N> accelerator_inline void accumReconZm (iM
 | 
				
			|||||||
////////
 | 
					////////
 | 
				
			||||||
template<class rtype,class vtype> accelerator_inline void spProjTp (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
 | 
					template<class rtype,class vtype> accelerator_inline void spProjTp (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
 | 
					 | 
				
			||||||
  spProjTp(hspin._internal,fspin._internal);
 | 
					  spProjTp(hspin._internal,fspin._internal);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProjTp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
 | 
					template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProjTp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
 | 
					 | 
				
			||||||
  for(int i=0;i<N;i++) {
 | 
					  for(int i=0;i<N;i++) {
 | 
				
			||||||
    spProjTp(hspin._internal[i],fspin._internal[i]);
 | 
					    spProjTp(hspin._internal[i],fspin._internal[i]);
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
template<class rtype,class vtype,int N> accelerator_inline void spProjTp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
 | 
					template<class rtype,class vtype,int N> accelerator_inline void spProjTp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
 | 
					 | 
				
			||||||
  for(int i=0;i<N;i++){ 
 | 
					  for(int i=0;i<N;i++){ 
 | 
				
			||||||
    for(int j=0;j<N;j++){
 | 
					    for(int j=0;j<N;j++){
 | 
				
			||||||
      spProjTp(hspin._internal[i][j],fspin._internal[i][j]);
 | 
					      spProjTp(hspin._internal[i][j],fspin._internal[i][j]);
 | 
				
			||||||
    }}
 | 
					  }}
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
template<class rtype,class vtype> accelerator_inline void spReconTp (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
 | 
					template<class rtype,class vtype> accelerator_inline void spReconTp (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
 | 
					 | 
				
			||||||
  spReconTp(hspin._internal,fspin._internal);
 | 
					  spReconTp(hspin._internal,fspin._internal);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spReconTp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
 | 
					template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spReconTp (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
 | 
					 | 
				
			||||||
  for(int i=0;i<N;i++) {
 | 
					  for(int i=0;i<N;i++) {
 | 
				
			||||||
    spReconTp(hspin._internal[i],fspin._internal[i]);
 | 
					    spReconTp(hspin._internal[i],fspin._internal[i]);
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
template<class rtype,class vtype,int N> accelerator_inline void spReconTp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
 | 
					template<class rtype,class vtype,int N> accelerator_inline void spReconTp (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
 | 
					 | 
				
			||||||
  for(int i=0;i<N;i++){ 
 | 
					  for(int i=0;i<N;i++){ 
 | 
				
			||||||
    for(int j=0;j<N;j++){
 | 
					    for(int j=0;j<N;j++){
 | 
				
			||||||
      spReconTp(hspin._internal[i][j],fspin._internal[i][j]);
 | 
					      spReconTp(hspin._internal[i][j],fspin._internal[i][j]);
 | 
				
			||||||
@@ -817,44 +749,37 @@ template<class rtype,class vtype,int N> accelerator_inline void spReconTp (iMatr
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
template<class rtype,class vtype> accelerator_inline void accumReconTp (iScalar<rtype> &hspin, iScalar<vtype> &fspin)
 | 
					template<class rtype,class vtype> accelerator_inline void accumReconTp (iScalar<rtype> &hspin, iScalar<vtype> &fspin)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
 | 
					 | 
				
			||||||
  accumReconTp(hspin._internal,fspin._internal);
 | 
					  accumReconTp(hspin._internal,fspin._internal);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void accumReconTp (iVector<rtype,N> &hspin, const iVector<vtype,N> &fspin)
 | 
					template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void accumReconTp (iVector<rtype,N> &hspin, const iVector<vtype,N> &fspin)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
 | 
					 | 
				
			||||||
  for(int i=0;i<N;i++) {
 | 
					  for(int i=0;i<N;i++) {
 | 
				
			||||||
    accumReconTp(hspin._internal[i],fspin._internal[i]);
 | 
					    accumReconTp(hspin._internal[i],fspin._internal[i]);
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
template<class rtype,class vtype,int N> accelerator_inline void accumReconTp (iMatrix<rtype,N> &hspin, const iMatrix<vtype,N> &fspin)
 | 
					template<class rtype,class vtype,int N> accelerator_inline void accumReconTp (iMatrix<rtype,N> &hspin, const iMatrix<vtype,N> &fspin)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
 | 
					 | 
				
			||||||
  for(int i=0;i<N;i++){ 
 | 
					  for(int i=0;i<N;i++){ 
 | 
				
			||||||
    for(int j=0;j<N;j++){
 | 
					    for(int j=0;j<N;j++){
 | 
				
			||||||
      accumReconTp(hspin._internal[i][j],fspin._internal[i][j]);
 | 
					      accumReconTp(hspin._internal[i][j],fspin._internal[i][j]);
 | 
				
			||||||
    }}
 | 
					    }}
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 | 
				
			||||||
////////
 | 
					////////
 | 
				
			||||||
// Tm
 | 
					// Tm
 | 
				
			||||||
////////
 | 
					////////
 | 
				
			||||||
template<class rtype,class vtype> accelerator_inline void spProjTm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
 | 
					template<class rtype,class vtype> accelerator_inline void spProjTm (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
 | 
					 | 
				
			||||||
  spProjTm(hspin._internal,fspin._internal);
 | 
					  spProjTm(hspin._internal,fspin._internal);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProjTm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
 | 
					template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProjTm (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
 | 
					 | 
				
			||||||
  for(int i=0;i<N;i++) {
 | 
					  for(int i=0;i<N;i++) {
 | 
				
			||||||
    spProjTm(hspin._internal[i],fspin._internal[i]);
 | 
					    spProjTm(hspin._internal[i],fspin._internal[i]);
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
template<class rtype,class vtype,int N> accelerator_inline void spProjTm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
 | 
					template<class rtype,class vtype,int N> accelerator_inline void spProjTm (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
 | 
					 | 
				
			||||||
  for(int i=0;i<N;i++){ 
 | 
					  for(int i=0;i<N;i++){ 
 | 
				
			||||||
    for(int j=0;j<N;j++){
 | 
					    for(int j=0;j<N;j++){
 | 
				
			||||||
      spProjTm(hspin._internal[i][j],fspin._internal[i][j]);
 | 
					      spProjTm(hspin._internal[i][j],fspin._internal[i][j]);
 | 
				
			||||||
@@ -864,19 +789,16 @@ template<class rtype,class vtype,int N> accelerator_inline void spProjTm (iMatri
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
template<class rtype,class vtype> accelerator_inline void spReconTm (iScalar<rtype> &hspin, const iScalar<vtype> &fspin)
 | 
					template<class rtype,class vtype> accelerator_inline void spReconTm (iScalar<rtype> &hspin, const iScalar<vtype> &fspin)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
 | 
					 | 
				
			||||||
  spReconTm(hspin._internal,fspin._internal);
 | 
					  spReconTm(hspin._internal,fspin._internal);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spReconTm (iVector<rtype,N> &hspin, const iVector<vtype,N> &fspin)
 | 
					template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spReconTm (iVector<rtype,N> &hspin, const iVector<vtype,N> &fspin)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
 | 
					 | 
				
			||||||
  for(int i=0;i<N;i++) {
 | 
					  for(int i=0;i<N;i++) {
 | 
				
			||||||
    spReconTm(hspin._internal[i],fspin._internal[i]);
 | 
					    spReconTm(hspin._internal[i],fspin._internal[i]);
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
template<class rtype,class vtype,int N> accelerator_inline void spReconTm (iMatrix<rtype,N> &hspin, const iMatrix<vtype,N> &fspin)
 | 
					template<class rtype,class vtype,int N> accelerator_inline void spReconTm (iMatrix<rtype,N> &hspin, const iMatrix<vtype,N> &fspin)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
 | 
					 | 
				
			||||||
  for(int i=0;i<N;i++){ 
 | 
					  for(int i=0;i<N;i++){ 
 | 
				
			||||||
    for(int j=0;j<N;j++){
 | 
					    for(int j=0;j<N;j++){
 | 
				
			||||||
      spReconTm(hspin._internal[i][j],fspin._internal[i][j]);
 | 
					      spReconTm(hspin._internal[i][j],fspin._internal[i][j]);
 | 
				
			||||||
@@ -885,44 +807,37 @@ template<class rtype,class vtype,int N> accelerator_inline void spReconTm (iMatr
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
template<class rtype,class vtype> accelerator_inline void accumReconTm (iScalar<rtype> &hspin, const iScalar<vtype> &fspin)
 | 
					template<class rtype,class vtype> accelerator_inline void accumReconTm (iScalar<rtype> &hspin, const iScalar<vtype> &fspin)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
 | 
					 | 
				
			||||||
  accumReconTm(hspin._internal,fspin._internal);
 | 
					  accumReconTm(hspin._internal,fspin._internal);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void accumReconTm (iVector<rtype,N> &hspin, const iVector<vtype,N> &fspin)
 | 
					template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void accumReconTm (iVector<rtype,N> &hspin, const iVector<vtype,N> &fspin)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
 | 
					 | 
				
			||||||
  for(int i=0;i<N;i++) {
 | 
					  for(int i=0;i<N;i++) {
 | 
				
			||||||
    accumReconTm(hspin._internal[i],fspin._internal[i]);
 | 
					    accumReconTm(hspin._internal[i],fspin._internal[i]);
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
template<class rtype,class vtype,int N> accelerator_inline void accumReconTm (iMatrix<rtype,N> &hspin, const iMatrix<vtype,N> &fspin)
 | 
					template<class rtype,class vtype,int N> accelerator_inline void accumReconTm (iMatrix<rtype,N> &hspin, const iMatrix<vtype,N> &fspin)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
 | 
					 | 
				
			||||||
  for(int i=0;i<N;i++){ 
 | 
					  for(int i=0;i<N;i++){ 
 | 
				
			||||||
    for(int j=0;j<N;j++){
 | 
					    for(int j=0;j<N;j++){
 | 
				
			||||||
      accumReconTm(hspin._internal[i][j],fspin._internal[i][j]);
 | 
					      accumReconTm(hspin._internal[i][j],fspin._internal[i][j]);
 | 
				
			||||||
    }}
 | 
					    }}
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 | 
				
			||||||
////////
 | 
					////////
 | 
				
			||||||
// 5p
 | 
					// 5p
 | 
				
			||||||
////////
 | 
					////////
 | 
				
			||||||
template<class rtype,class vtype> accelerator_inline void spProj5p (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
 | 
					template<class rtype,class vtype,IfNotCoarsened<iScalar<vtype> > = 0> accelerator_inline void spProj5p (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
 | 
					 | 
				
			||||||
  spProj5p(hspin._internal,fspin._internal);
 | 
					  spProj5p(hspin._internal,fspin._internal);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProj5p (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
 | 
					template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProj5p (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
 | 
					 | 
				
			||||||
  for(int i=0;i<N;i++) {
 | 
					  for(int i=0;i<N;i++) {
 | 
				
			||||||
    spProj5p(hspin._internal[i],fspin._internal[i]);
 | 
					    spProj5p(hspin._internal[i],fspin._internal[i]);
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
template<class rtype,class vtype,int N> accelerator_inline void spProj5p (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
 | 
					template<class rtype,class vtype,int N,IfNotCoarsened<iScalar<vtype> > = 0> accelerator_inline void spProj5p (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
 | 
					 | 
				
			||||||
  for(int i=0;i<N;i++){ 
 | 
					  for(int i=0;i<N;i++){ 
 | 
				
			||||||
    for(int j=0;j<N;j++){
 | 
					    for(int j=0;j<N;j++){
 | 
				
			||||||
      spProj5p(hspin._internal[i][j],fspin._internal[i][j]);
 | 
					      spProj5p(hspin._internal[i][j],fspin._internal[i][j]);
 | 
				
			||||||
@@ -931,19 +846,16 @@ template<class rtype,class vtype,int N> accelerator_inline void spProj5p (iMatri
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
template<class rtype,class vtype> accelerator_inline void spRecon5p (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
 | 
					template<class rtype,class vtype> accelerator_inline void spRecon5p (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
 | 
					 | 
				
			||||||
  spRecon5p(hspin._internal,fspin._internal);
 | 
					  spRecon5p(hspin._internal,fspin._internal);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spRecon5p (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
 | 
					template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spRecon5p (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
 | 
					 | 
				
			||||||
  for(int i=0;i<N;i++) {
 | 
					  for(int i=0;i<N;i++) {
 | 
				
			||||||
    spRecon5p(hspin._internal[i],fspin._internal[i]);
 | 
					    spRecon5p(hspin._internal[i],fspin._internal[i]);
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
template<class rtype,class vtype,int N> accelerator_inline void spRecon5p (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
 | 
					template<class rtype,class vtype,int N> accelerator_inline void spRecon5p (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
 | 
					 | 
				
			||||||
  for(int i=0;i<N;i++){ 
 | 
					  for(int i=0;i<N;i++){ 
 | 
				
			||||||
    for(int j=0;j<N;j++){
 | 
					    for(int j=0;j<N;j++){
 | 
				
			||||||
      spRecon5p(hspin._internal[i][j],fspin._internal[i][j]);
 | 
					      spRecon5p(hspin._internal[i][j],fspin._internal[i][j]);
 | 
				
			||||||
@@ -952,19 +864,16 @@ template<class rtype,class vtype,int N> accelerator_inline void spRecon5p (iMatr
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
template<class rtype,class vtype> accelerator_inline void accumRecon5p (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
 | 
					template<class rtype,class vtype> accelerator_inline void accumRecon5p (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
 | 
					 | 
				
			||||||
  accumRecon5p(hspin._internal,fspin._internal);
 | 
					  accumRecon5p(hspin._internal,fspin._internal);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void accumRecon5p (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
 | 
					template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void accumRecon5p (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
 | 
					 | 
				
			||||||
  for(int i=0;i<N;i++) {
 | 
					  for(int i=0;i<N;i++) {
 | 
				
			||||||
    accumRecon5p(hspin._internal[i],fspin._internal[i]);
 | 
					    accumRecon5p(hspin._internal[i],fspin._internal[i]);
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
template<class rtype,class vtype,int N> accelerator_inline void accumRecon5p (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
 | 
					template<class rtype,class vtype,int N> accelerator_inline void accumRecon5p (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
 | 
					 | 
				
			||||||
  for(int i=0;i<N;i++){ 
 | 
					  for(int i=0;i<N;i++){ 
 | 
				
			||||||
    for(int j=0;j<N;j++){
 | 
					    for(int j=0;j<N;j++){
 | 
				
			||||||
      accumRecon5p(hspin._internal[i][j],fspin._internal[i][j]);
 | 
					      accumRecon5p(hspin._internal[i][j],fspin._internal[i][j]);
 | 
				
			||||||
@@ -972,24 +881,18 @@ template<class rtype,class vtype,int N> accelerator_inline void accumRecon5p (iM
 | 
				
			|||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
// four spinor projectors for chiral proj
 | 
					// four spinor projectors for chiral proj
 | 
				
			||||||
//  template<class vtype> accelerator_inline void fspProj5p (iScalar<vtype> &hspin,const iScalar<vtype> &fspin)
 | 
					template<class vtype,IfNotCoarsened<iScalar<vtype> > = 0> accelerator_inline void spProj5p (iScalar<vtype> &hspin,const iScalar<vtype> &fspin)
 | 
				
			||||||
template<class vtype> accelerator_inline void spProj5p (iScalar<vtype> &hspin,const iScalar<vtype> &fspin)
 | 
					 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
 | 
					 | 
				
			||||||
  spProj5p(hspin._internal,fspin._internal);
 | 
					  spProj5p(hspin._internal,fspin._internal);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
//  template<class vtype,int N> accelerator_inline void fspProj5p (iVector<vtype,N> &hspin,iVector<vtype,N> &fspin)
 | 
					template<class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0,IfNotCoarsened<iScalar<vtype> > = 0> accelerator_inline void spProj5p (iVector<vtype,N> &hspin,const iVector<vtype,N> &fspin)
 | 
				
			||||||
template<class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProj5p (iVector<vtype,N> &hspin,const iVector<vtype,N> &fspin)
 | 
					 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
 | 
					 | 
				
			||||||
  for(int i=0;i<N;i++) {
 | 
					  for(int i=0;i<N;i++) {
 | 
				
			||||||
    spProj5p(hspin._internal[i],fspin._internal[i]);
 | 
					    spProj5p(hspin._internal[i],fspin._internal[i]);
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
//  template<class vtype,int N> accelerator_inline void fspProj5p (iMatrix<vtype,N> &hspin,iMatrix<vtype,N> &fspin)
 | 
					template<class vtype,int N,IfNotCoarsened<iScalar<vtype> > = 0> accelerator_inline void spProj5p (iMatrix<vtype,N> &hspin,const iMatrix<vtype,N> &fspin)
 | 
				
			||||||
template<class vtype,int N> accelerator_inline void spProj5p (iMatrix<vtype,N> &hspin,const iMatrix<vtype,N> &fspin)
 | 
					 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
 | 
					 | 
				
			||||||
  for(int i=0;i<N;i++){ 
 | 
					  for(int i=0;i<N;i++){ 
 | 
				
			||||||
    for(int j=0;j<N;j++){
 | 
					    for(int j=0;j<N;j++){
 | 
				
			||||||
      spProj5p(hspin._internal[i][j],fspin._internal[i][j]);
 | 
					      spProj5p(hspin._internal[i][j],fspin._internal[i][j]);
 | 
				
			||||||
@@ -1001,17 +904,17 @@ template<class vtype,int N> accelerator_inline void spProj5p (iMatrix<vtype,N> &
 | 
				
			|||||||
// 5m
 | 
					// 5m
 | 
				
			||||||
////////
 | 
					////////
 | 
				
			||||||
 | 
					
 | 
				
			||||||
template<class rtype,class vtype> accelerator_inline void spProj5m (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
 | 
					template<class rtype,class vtype,IfNotCoarsened<iScalar<vtype> > = 0> accelerator_inline void spProj5m (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  spProj5m(hspin._internal,fspin._internal);
 | 
					  spProj5m(hspin._internal,fspin._internal);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<rtype,N> > = 0> accelerator_inline void spProj5m (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
 | 
					template<class rtype,class vtype,int N,IfNotSpinor<iVector<rtype,N> > = 0,IfNotCoarsened<iScalar<vtype> > = 0> accelerator_inline void spProj5m (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  for(int i=0;i<N;i++) {
 | 
					  for(int i=0;i<N;i++) {
 | 
				
			||||||
    spProj5m(hspin._internal[i],fspin._internal[i]);
 | 
					    spProj5m(hspin._internal[i],fspin._internal[i]);
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
template<class rtype,class vtype,int N> accelerator_inline void spProj5m (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
 | 
					template<class rtype,class vtype,int N,IfNotCoarsened<iScalar<vtype> > = 0> accelerator_inline void spProj5m (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  for(int i=0;i<N;i++){ 
 | 
					  for(int i=0;i<N;i++){ 
 | 
				
			||||||
    for(int j=0;j<N;j++){
 | 
					    for(int j=0;j<N;j++){
 | 
				
			||||||
@@ -1021,40 +924,34 @@ template<class rtype,class vtype,int N> accelerator_inline void spProj5m (iMatri
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
template<class rtype,class vtype> accelerator_inline void spRecon5m (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
 | 
					template<class rtype,class vtype> accelerator_inline void spRecon5m (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
 | 
					 | 
				
			||||||
  spRecon5m(hspin._internal,fspin._internal);
 | 
					  spRecon5m(hspin._internal,fspin._internal);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spRecon5m (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
 | 
					template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spRecon5m (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
 | 
					 | 
				
			||||||
  for(int i=0;i<N;i++) {
 | 
					  for(int i=0;i<N;i++) {
 | 
				
			||||||
    spRecon5m(hspin._internal[i],fspin._internal[i]);
 | 
					    spRecon5m(hspin._internal[i],fspin._internal[i]);
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
template<class rtype,class vtype,int N> accelerator_inline void spRecon5m (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
 | 
					template<class rtype,class vtype,int N> accelerator_inline void spRecon5m (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
 | 
					 | 
				
			||||||
  for(int i=0;i<N;i++){ 
 | 
					  for(int i=0;i<N;i++){ 
 | 
				
			||||||
    for(int j=0;j<N;j++){
 | 
					    for(int j=0;j<N;j++){
 | 
				
			||||||
      spRecon5m(hspin._internal[i][j],fspin._internal[i][j]);
 | 
					      spRecon5m(hspin._internal[i][j],fspin._internal[i][j]);
 | 
				
			||||||
    }}
 | 
					  }}
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
template<class rtype,class vtype> accelerator_inline void accumRecon5m (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
 | 
					template<class rtype,class vtype> accelerator_inline void accumRecon5m (iScalar<rtype> &hspin,const iScalar<vtype> &fspin)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
 | 
					 | 
				
			||||||
  accumRecon5m(hspin._internal,fspin._internal);
 | 
					  accumRecon5m(hspin._internal,fspin._internal);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void accumRecon5m (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
 | 
					template<class rtype,class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void accumRecon5m (iVector<rtype,N> &hspin,const iVector<vtype,N> &fspin)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
 | 
					 | 
				
			||||||
  for(int i=0;i<N;i++) {
 | 
					  for(int i=0;i<N;i++) {
 | 
				
			||||||
    accumRecon5m(hspin._internal[i],fspin._internal[i]);
 | 
					    accumRecon5m(hspin._internal[i],fspin._internal[i]);
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
template<class rtype,class vtype,int N> accelerator_inline void accumRecon5m (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
 | 
					template<class rtype,class vtype,int N> accelerator_inline void accumRecon5m (iMatrix<rtype,N> &hspin,const iMatrix<vtype,N> &fspin)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
 | 
					 | 
				
			||||||
  for(int i=0;i<N;i++){ 
 | 
					  for(int i=0;i<N;i++){ 
 | 
				
			||||||
    for(int j=0;j<N;j++){
 | 
					    for(int j=0;j<N;j++){
 | 
				
			||||||
      accumRecon5m(hspin._internal[i][j],fspin._internal[i][j]);
 | 
					      accumRecon5m(hspin._internal[i][j],fspin._internal[i][j]);
 | 
				
			||||||
@@ -1063,24 +960,18 @@ template<class rtype,class vtype,int N> accelerator_inline void accumRecon5m (iM
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
// four spinor projectors for chiral proj
 | 
					// four spinor projectors for chiral proj
 | 
				
			||||||
//  template<class vtype> accelerator_inline void fspProj5m (iScalar<vtype> &hspin,const iScalar<vtype> &fspin)
 | 
					template<class vtype,IfNotCoarsened<iScalar<vtype> > = 0> accelerator_inline void spProj5m (iScalar<vtype> &hspin,const iScalar<vtype> &fspin)
 | 
				
			||||||
template<class vtype> accelerator_inline void spProj5m (iScalar<vtype> &hspin,const iScalar<vtype> &fspin)
 | 
					 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  //typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,SpinorIndex>::notvalue,iScalar<vtype> >::type *temp;
 | 
					 | 
				
			||||||
  spProj5m(hspin._internal,fspin._internal);
 | 
					  spProj5m(hspin._internal,fspin._internal);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
//  template<class vtype,int N> accelerator_inline void fspProj5m (iVector<vtype,N> &hspin,iVector<vtype,N> &fspin)
 | 
					template<class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0,IfNotCoarsened<iScalar<vtype> > = 0> accelerator_inline void spProj5m (iVector<vtype,N> &hspin,const iVector<vtype,N> &fspin)
 | 
				
			||||||
template<class vtype,int N,IfNotSpinor<iVector<vtype,N> > = 0> accelerator_inline void spProj5m (iVector<vtype,N> &hspin,const iVector<vtype,N> &fspin)
 | 
					 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  //typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,SpinorIndex>::notvalue,iVector<vtype,N> >::type *temp;
 | 
					 | 
				
			||||||
  for(int i=0;i<N;i++) {
 | 
					  for(int i=0;i<N;i++) {
 | 
				
			||||||
    spProj5m(hspin._internal[i],fspin._internal[i]);
 | 
					    spProj5m(hspin._internal[i],fspin._internal[i]);
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
//  template<class vtype,int N> accelerator_inline void fspProj5m (iMatrix<vtype,N> &hspin,iMatrix<vtype,N> &fspin)
 | 
					template<class vtype,int N,IfNotCoarsened<iScalar<vtype> > = 0> accelerator_inline void spProj5m (iMatrix<vtype,N> &hspin,const iMatrix<vtype,N> &fspin)
 | 
				
			||||||
template<class vtype,int N> accelerator_inline void spProj5m (iMatrix<vtype,N> &hspin,const iMatrix<vtype,N> &fspin)
 | 
					 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  //typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,SpinorIndex>::notvalue,iMatrix<vtype,N> >::type *temp;
 | 
					 | 
				
			||||||
  for(int i=0;i<N;i++){ 
 | 
					  for(int i=0;i<N;i++){ 
 | 
				
			||||||
    for(int j=0;j<N;j++){
 | 
					    for(int j=0;j<N;j++){
 | 
				
			||||||
      spProj5m(hspin._internal[i][j],fspin._internal[i][j]);
 | 
					      spProj5m(hspin._internal[i][j],fspin._internal[i][j]);
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -154,8 +154,8 @@ void axpby_ssp_pminus(Lattice<vobj> &z,Coeff a,const Lattice<vobj> &x,Coeff b,co
 | 
				
			|||||||
  accelerator_for(sss,nloop,vobj::Nsimd(),{
 | 
					  accelerator_for(sss,nloop,vobj::Nsimd(),{
 | 
				
			||||||
    uint64_t ss = sss*Ls;
 | 
					    uint64_t ss = sss*Ls;
 | 
				
			||||||
    decltype(coalescedRead(y_v[ss+sp])) tmp;
 | 
					    decltype(coalescedRead(y_v[ss+sp])) tmp;
 | 
				
			||||||
    spProj5m(tmp,y_v(ss+sp));
 | 
					    spProj5m(tmp,y_v(ss+sp)); 
 | 
				
			||||||
    tmp = a*x_v(ss+s)+b*tmp;
 | 
					   tmp = a*x_v(ss+s)+b*tmp;
 | 
				
			||||||
    coalescedWrite(z_v[ss+s],tmp);
 | 
					    coalescedWrite(z_v[ss+s],tmp);
 | 
				
			||||||
  });
 | 
					  });
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
@@ -188,7 +188,6 @@ void G5R5(Lattice<vobj> &z,const Lattice<vobj> &x)
 | 
				
			|||||||
  z.Checkerboard() = x.Checkerboard();
 | 
					  z.Checkerboard() = x.Checkerboard();
 | 
				
			||||||
  conformable(x,z);
 | 
					  conformable(x,z);
 | 
				
			||||||
  int Ls = grid->_rdimensions[0];
 | 
					  int Ls = grid->_rdimensions[0];
 | 
				
			||||||
  Gamma G5(Gamma::Algebra::Gamma5);
 | 
					 | 
				
			||||||
  autoView( x_v, x, AcceleratorRead);
 | 
					  autoView( x_v, x, AcceleratorRead);
 | 
				
			||||||
  autoView( z_v, z, AcceleratorWrite);
 | 
					  autoView( z_v, z, AcceleratorWrite);
 | 
				
			||||||
  uint64_t nloop = grid->oSites()/Ls;
 | 
					  uint64_t nloop = grid->oSites()/Ls;
 | 
				
			||||||
@@ -196,7 +195,13 @@ void G5R5(Lattice<vobj> &z,const Lattice<vobj> &x)
 | 
				
			|||||||
    uint64_t ss = sss*Ls;
 | 
					    uint64_t ss = sss*Ls;
 | 
				
			||||||
    for(int s=0;s<Ls;s++){
 | 
					    for(int s=0;s<Ls;s++){
 | 
				
			||||||
      int sp = Ls-1-s;
 | 
					      int sp = Ls-1-s;
 | 
				
			||||||
      coalescedWrite(z_v[ss+sp],G5*x_v(ss+s));
 | 
					      auto tmp = x_v(ss+s);
 | 
				
			||||||
 | 
					      decltype(tmp) tmp_p;
 | 
				
			||||||
 | 
					      decltype(tmp) tmp_m;
 | 
				
			||||||
 | 
					      spProj5p(tmp_p,tmp);
 | 
				
			||||||
 | 
					      spProj5m(tmp_m,tmp);
 | 
				
			||||||
 | 
					      // Use of spProj5m, 5p captures the coarse space too
 | 
				
			||||||
 | 
					      coalescedWrite(z_v[ss+sp],tmp_p - tmp_m);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
  });
 | 
					  });
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
@@ -208,10 +213,20 @@ void G5C(Lattice<vobj> &z, const Lattice<vobj> &x)
 | 
				
			|||||||
  z.Checkerboard() = x.Checkerboard();
 | 
					  z.Checkerboard() = x.Checkerboard();
 | 
				
			||||||
  conformable(x, z);
 | 
					  conformable(x, z);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  Gamma G5(Gamma::Algebra::Gamma5);
 | 
					  autoView( x_v, x, AcceleratorRead);
 | 
				
			||||||
  z = G5 * x;
 | 
					  autoView( z_v, z, AcceleratorWrite);
 | 
				
			||||||
 | 
					  uint64_t nloop = grid->oSites();
 | 
				
			||||||
 | 
					  accelerator_for(ss,nloop,vobj::Nsimd(),{
 | 
				
			||||||
 | 
					    auto tmp = x_v(ss);
 | 
				
			||||||
 | 
					    decltype(tmp) tmp_p;
 | 
				
			||||||
 | 
					    decltype(tmp) tmp_m;
 | 
				
			||||||
 | 
					    spProj5p(tmp_p,tmp);
 | 
				
			||||||
 | 
					    spProj5m(tmp_m,tmp);
 | 
				
			||||||
 | 
					    coalescedWrite(z_v[ss],tmp_p - tmp_m);
 | 
				
			||||||
 | 
					  });
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
template<class CComplex, int nbasis>
 | 
					template<class CComplex, int nbasis>
 | 
				
			||||||
void G5C(Lattice<iVector<CComplex, nbasis>> &z, const Lattice<iVector<CComplex, nbasis>> &x)
 | 
					void G5C(Lattice<iVector<CComplex, nbasis>> &z, const Lattice<iVector<CComplex, nbasis>> &x)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
@@ -234,6 +249,7 @@ void G5C(Lattice<iVector<CComplex, nbasis>> &z, const Lattice<iVector<CComplex,
 | 
				
			|||||||
    }
 | 
					    }
 | 
				
			||||||
  });
 | 
					  });
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					*/
 | 
				
			||||||
 | 
					
 | 
				
			||||||
NAMESPACE_END(Grid);
 | 
					NAMESPACE_END(Grid);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -151,6 +151,9 @@ inline void *acceleratorAllocShared(size_t bytes)
 | 
				
			|||||||
    ptr = (void *) NULL;
 | 
					    ptr = (void *) NULL;
 | 
				
			||||||
    printf(" cudaMallocManaged failed for %d %s \n",bytes,cudaGetErrorString(err));
 | 
					    printf(" cudaMallocManaged failed for %d %s \n",bytes,cudaGetErrorString(err));
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					  //  size_t free,total;
 | 
				
			||||||
 | 
					  //  cudaMemGetInfo(&free,&total);
 | 
				
			||||||
 | 
					  //  std::cout << "Malloc managed "<<bytes<<" "<<free<<"/"<<total<<std::endl;
 | 
				
			||||||
  return ptr;
 | 
					  return ptr;
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
inline void *acceleratorAllocDevice(size_t bytes)
 | 
					inline void *acceleratorAllocDevice(size_t bytes)
 | 
				
			||||||
@@ -161,6 +164,9 @@ inline void *acceleratorAllocDevice(size_t bytes)
 | 
				
			|||||||
    ptr = (void *) NULL;
 | 
					    ptr = (void *) NULL;
 | 
				
			||||||
    printf(" cudaMalloc failed for %d %s \n",bytes,cudaGetErrorString(err));
 | 
					    printf(" cudaMalloc failed for %d %s \n",bytes,cudaGetErrorString(err));
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					  //  size_t free,total;
 | 
				
			||||||
 | 
					  //  cudaMemGetInfo(&free,&total);
 | 
				
			||||||
 | 
					  //  std::cout << "Malloc device "<<bytes<<" "<<free<<"/"<<total<<std::endl;
 | 
				
			||||||
  return ptr;
 | 
					  return ptr;
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
inline void acceleratorFreeShared(void *ptr){ cudaFree(ptr);};
 | 
					inline void acceleratorFreeShared(void *ptr){ cudaFree(ptr);};
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -191,7 +191,7 @@ int main (int argc, char ** argv)
 | 
				
			|||||||
  std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
 | 
					  std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
 | 
				
			||||||
  std::cout<<GridLogMessage << "= Benchmarking SU3xSU3  CovShiftForward(z,x,y)"<<std::endl;
 | 
					  std::cout<<GridLogMessage << "= Benchmarking SU3xSU3  CovShiftForward(z,x,y)"<<std::endl;
 | 
				
			||||||
  std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
 | 
					  std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
 | 
				
			||||||
  std::cout<<GridLogMessage << "  L  "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl;
 | 
					  std::cout<<GridLogMessage << "  L  "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GB/s (incl Cshift)\t\t GFlop/s"<<std::endl;
 | 
				
			||||||
  std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
 | 
					  std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  for(int lat=LMIN;lat<=LMAX;lat+=LADD){
 | 
					  for(int lat=LMIN;lat<=LMAX;lat+=LADD){
 | 
				
			||||||
@@ -216,15 +216,16 @@ int main (int argc, char ** argv)
 | 
				
			|||||||
	
 | 
						
 | 
				
			||||||
	
 | 
						
 | 
				
			||||||
	    double bytes=3*vol*Nc*Nc*sizeof(Complex);
 | 
						    double bytes=3*vol*Nc*Nc*sizeof(Complex);
 | 
				
			||||||
 | 
						    double ncbytes=5*vol*Nc*Nc*sizeof(Complex);
 | 
				
			||||||
	    double flops=Nc*Nc*(6+8+8)*vol;
 | 
						    double flops=Nc*Nc*(6+8+8)*vol;
 | 
				
			||||||
	    std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<"   \t\t"<<bytes/time<<"\t\t" << flops/time<<std::endl;
 | 
						    std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<"   \t\t"<<bytes/time<<"\t\t"<<ncbytes/time<<"\t\t" << flops/time<<std::endl;
 | 
				
			||||||
      }
 | 
					      }
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
#if 1
 | 
					#if 1
 | 
				
			||||||
  std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
 | 
					  std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
 | 
				
			||||||
  std::cout<<GridLogMessage << "= Benchmarking SU3xSU3  z= x * Cshift(y)"<<std::endl;
 | 
					  std::cout<<GridLogMessage << "= Benchmarking SU3xSU3  z= x * Cshift(y)"<<std::endl;
 | 
				
			||||||
  std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
 | 
					  std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
 | 
				
			||||||
  std::cout<<GridLogMessage << "  L  "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl;
 | 
					  std::cout<<GridLogMessage << "  L  "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GB/s (incl Cshift)\t\t GFlop/s"<<std::endl;
 | 
				
			||||||
  std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
 | 
					  std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  for(int lat=LMIN;lat<=LMAX;lat+=LADD){
 | 
					  for(int lat=LMIN;lat<=LMAX;lat+=LADD){
 | 
				
			||||||
@@ -258,10 +259,11 @@ int main (int argc, char ** argv)
 | 
				
			|||||||
	tmult  = tmult /Nloop;
 | 
						tmult  = tmult /Nloop;
 | 
				
			||||||
	
 | 
						
 | 
				
			||||||
	double bytes=3*vol*Nc*Nc*sizeof(Complex);
 | 
						double bytes=3*vol*Nc*Nc*sizeof(Complex);
 | 
				
			||||||
 | 
						double ncbytes=5*vol*Nc*Nc*sizeof(Complex);
 | 
				
			||||||
	double flops=Nc*Nc*(6+8+8)*vol;
 | 
						double flops=Nc*Nc*(6+8+8)*vol;
 | 
				
			||||||
	std::cout<<GridLogMessage<<std::setprecision(3) << "total us "<<time<<" shift "<<tshift <<" mult "<<tmult<<std::endl;
 | 
						std::cout<<GridLogMessage<<std::setprecision(3) << "total us "<<time<<" shift "<<tshift <<" mult "<<tmult<<std::endl;
 | 
				
			||||||
	time = time * 1000; // convert to NS for GB/s
 | 
						time = time * 1000; // convert to NS for GB/s
 | 
				
			||||||
	std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<"   \t\t"<<bytes/time<<"\t\t" << flops/time<<std::endl;
 | 
						std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<"   \t\t"<<bytes/time<<"\t\t" <<ncbytes/time<<"\t\t" << flops/time<<std::endl;
 | 
				
			||||||
      }
 | 
					      }
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -29,7 +29,91 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
				
			|||||||
#include <Grid/Grid.h>
 | 
					#include <Grid/Grid.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
using namespace Grid;
 | 
					using namespace Grid;
 | 
				
			||||||
 ;
 | 
					
 | 
				
			||||||
 | 
					void MomentumSpacePropagatorTest(RealD mass,RealD M5, LatticePropagator &prop)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  // what type LatticeComplex 
 | 
				
			||||||
 | 
					  GridBase *_grid = prop.Grid();
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  typedef LatticeFermion FermionField;
 | 
				
			||||||
 | 
					  typedef LatticePropagator PropagatorField;
 | 
				
			||||||
 | 
					  typedef typename FermionField::vector_type vector_type;
 | 
				
			||||||
 | 
					  typedef typename FermionField::scalar_type ScalComplex;
 | 
				
			||||||
 | 
					  typedef iSinglet<ScalComplex> Tcomplex;
 | 
				
			||||||
 | 
					  typedef Lattice<iSinglet<vector_type> > LatComplex;
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  Gamma::Algebra Gmu [] = {
 | 
				
			||||||
 | 
					    Gamma::Algebra::GammaX,
 | 
				
			||||||
 | 
					    Gamma::Algebra::GammaY,
 | 
				
			||||||
 | 
					    Gamma::Algebra::GammaZ,
 | 
				
			||||||
 | 
					    Gamma::Algebra::GammaT
 | 
				
			||||||
 | 
					  };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  Coordinate latt_size   = _grid->_fdimensions;
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  PropagatorField   num  (_grid); num  = Zero();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  LatComplex    sk(_grid);  sk = Zero();
 | 
				
			||||||
 | 
					  LatComplex    sk2(_grid); sk2= Zero();
 | 
				
			||||||
 | 
					  LatComplex    W(_grid); W= Zero();
 | 
				
			||||||
 | 
					  LatComplex    a(_grid); a= Zero();
 | 
				
			||||||
 | 
					  LatComplex    one  (_grid); one = ScalComplex(1.0,0.0);
 | 
				
			||||||
 | 
					  LatComplex denom(_grid); denom= Zero();
 | 
				
			||||||
 | 
					  LatComplex cosha(_grid); 
 | 
				
			||||||
 | 
					  LatComplex kmu(_grid); 
 | 
				
			||||||
 | 
					  LatComplex Wea(_grid); 
 | 
				
			||||||
 | 
					  LatComplex Wema(_grid); 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  ScalComplex ci(0.0,1.0);
 | 
				
			||||||
 | 
					  SpinColourMatrixD identity = ComplexD(1.0);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  for(int mu=0;mu<Nd;mu++) {
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    LatticeCoordinate(kmu,mu);
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    RealD TwoPiL =  M_PI * 2.0/ latt_size[mu];
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    kmu = TwoPiL * kmu;
 | 
				
			||||||
 | 
					    //    kmu = kmu + TwoPiL * one * twist[mu];//momentum for twisted boundary conditions
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    sk2 = sk2 + 2.0*sin(kmu*0.5)*sin(kmu*0.5);
 | 
				
			||||||
 | 
					    sk  = sk  +     sin(kmu)    *sin(kmu); 
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    num = num - sin(kmu)*ci*(Gamma(Gmu[mu])*identity);
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  W = one - M5 + sk2;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  ////////////////////////////////////////////
 | 
				
			||||||
 | 
					  // Cosh alpha -> alpha
 | 
				
			||||||
 | 
					  ////////////////////////////////////////////
 | 
				
			||||||
 | 
					  cosha =  (one + W*W + sk) / (abs(W)*2.0);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // FIXME Need a Lattice acosh
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					  autoView(cosha_v,cosha,CpuRead);
 | 
				
			||||||
 | 
					  autoView(a_v,a,CpuWrite);
 | 
				
			||||||
 | 
					  for(int idx=0;idx<_grid->lSites();idx++){
 | 
				
			||||||
 | 
					    Coordinate lcoor(Nd);
 | 
				
			||||||
 | 
					    Tcomplex cc;
 | 
				
			||||||
 | 
					    //    RealD sgn;
 | 
				
			||||||
 | 
					    _grid->LocalIndexToLocalCoor(idx,lcoor);
 | 
				
			||||||
 | 
					    peekLocalSite(cc,cosha_v,lcoor);
 | 
				
			||||||
 | 
					    assert((double)real(cc)>=1.0);
 | 
				
			||||||
 | 
					    assert(fabs((double)imag(cc))<=1.0e-15);
 | 
				
			||||||
 | 
					    cc = ScalComplex(::acosh(real(cc)),0.0);
 | 
				
			||||||
 | 
					    pokeLocalSite(cc,a_v,lcoor);
 | 
				
			||||||
 | 
					  }}
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  Wea = ( exp( a) * abs(W)  );
 | 
				
			||||||
 | 
					  Wema= ( exp(-a) * abs(W)  );
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  num   = num + ( one - Wema ) * mass * identity;
 | 
				
			||||||
 | 
					  denom= ( Wea - one ) + mass*mass * (one - Wema); 
 | 
				
			||||||
 | 
					  prop = num/denom;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
int main (int argc, char ** argv)
 | 
					int main (int argc, char ** argv)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
@@ -307,6 +391,17 @@ int main (int argc, char ** argv)
 | 
				
			|||||||
    RealD M5  =0.8;
 | 
					    RealD M5  =0.8;
 | 
				
			||||||
    DomainWallFermionD Ddwf(Umu,*FGrid,*FrbGrid,GRID,RBGRID,mass,M5);
 | 
					    DomainWallFermionD Ddwf(Umu,*FGrid,*FrbGrid,GRID,RBGRID,mass,M5);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    /////////////////// Test code for (1-m)^2 ///////////////
 | 
				
			||||||
 | 
					    LatticePropagatorD prop1(&GRID);
 | 
				
			||||||
 | 
					    LatticePropagatorD prop2(&GRID);
 | 
				
			||||||
 | 
					    LatticeComplexD ratio(&GRID);
 | 
				
			||||||
 | 
					    MomentumSpacePropagatorTest(0.0,M5,prop1);
 | 
				
			||||||
 | 
					    MomentumSpacePropagatorTest(0.3,M5,prop2);
 | 
				
			||||||
 | 
					    ratio=localNorm2(prop2);
 | 
				
			||||||
 | 
					    ratio=ratio/localNorm2(prop1);
 | 
				
			||||||
 | 
					    std::cout << ratio;
 | 
				
			||||||
 | 
					    /////////////////// Test code for (1-m)^2 factor ///////////////
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    // Momentum space prop
 | 
					    // Momentum space prop
 | 
				
			||||||
    std::cout << " Solving by FFT and Feynman rules" <<std::endl;
 | 
					    std::cout << " Solving by FFT and Feynman rules" <<std::endl;
 | 
				
			||||||
    bool fiveD = false; //calculate 4d free propagator
 | 
					    bool fiveD = false; //calculate 4d free propagator
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -565,8 +565,8 @@ int main (int argc, char ** argv)
 | 
				
			|||||||
  std::cout<<GridLogMessage << " Running Multigrid                                "<< std::endl;
 | 
					  std::cout<<GridLogMessage << " Running Multigrid                                "<< std::endl;
 | 
				
			||||||
  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
 | 
					  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  BiCGSTAB<CoarseVector>    CoarseMgridBiCGSTAB(0.01,1000);     
 | 
					  BiCGSTAB<CoarseVector>    CoarseMgridBiCGSTAB(0.01,1000,false);     
 | 
				
			||||||
  BiCGSTAB<LatticeFermion>  FineMgridBiCGSTAB(0.0,24);
 | 
					  BiCGSTAB<LatticeFermion>  FineMgridBiCGSTAB(0.0,24,false);
 | 
				
			||||||
  ZeroGuesser<CoarseVector> CoarseZeroGuesser;
 | 
					  ZeroGuesser<CoarseVector> CoarseZeroGuesser;
 | 
				
			||||||
  ZeroGuesser<LatticeFermion> FineZeroGuesser;
 | 
					  ZeroGuesser<LatticeFermion> FineZeroGuesser;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -590,5 +590,5 @@ int main (int argc, char ** argv)
 | 
				
			|||||||
  std::cout<<GridLogMessage << "Done "<< std::endl;
 | 
					  std::cout<<GridLogMessage << "Done "<< std::endl;
 | 
				
			||||||
  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
 | 
					  std::cout<<GridLogMessage << "**************************************************"<< std::endl;
 | 
				
			||||||
  Grid_finalize();
 | 
					  Grid_finalize();
 | 
				
			||||||
  
 | 
					
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 
 | 
				
			|||||||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
		Reference in New Issue
	
	Block a user