diff --git a/Grid/GridCore.h b/Grid/GridCore.h index a48d2d49..2209f960 100644 --- a/Grid/GridCore.h +++ b/Grid/GridCore.h @@ -47,9 +47,9 @@ Author: paboyle #include #include #include -#include +#include #include -#include +#include #include #include #include diff --git a/Grid/GridStd.h b/Grid/GridStd.h index 16cfcf50..ecb561ea 100644 --- a/Grid/GridStd.h +++ b/Grid/GridStd.h @@ -6,6 +6,7 @@ /////////////////// #include #include +#include #include #include #include diff --git a/Grid/Grid_Eigen_Dense.h b/Grid/Grid_Eigen_Dense.h index f9bccf2d..9556c03d 100644 --- a/Grid/Grid_Eigen_Dense.h +++ b/Grid/Grid_Eigen_Dense.h @@ -18,12 +18,23 @@ #pragma push_macro("__CUDA_ARCH__") #pragma push_macro("__NVCC__") #pragma push_macro("__CUDACC__") +#undef __CUDA_ARCH__ #undef __NVCC__ #undef __CUDACC__ -#undef __CUDA_ARCH__ #define __NVCC__REDEFINE__ +#endif + +/* SYCL save and restore compile environment*/ +#ifdef GRID_SYCL +#pragma push +#pragma push_macro("__SYCL_DEVICE_ONLY__") +#undef __SYCL_DEVICE_ONLY__ +#define EIGEN_DONT_VECTORIZE +//#undef EIGEN_USE_SYCL +#define __SYCL__REDEFINE__ #endif + #include #include @@ -31,7 +42,13 @@ #ifdef __NVCC__REDEFINE__ #pragma pop_macro("__CUDACC__") #pragma pop_macro("__NVCC__") -#pragma pop_macro("__CUDA_ARCH__") +#pragma pop_macro("GRID_SIMT") +#pragma pop +#endif + +/*SYCL restore*/ +#ifdef __SYCL__REDEFINE__ +#pragma pop_macro("__SYCL_DEVICE_ONLY__") #pragma pop #endif @@ -39,3 +56,4 @@ #pragma GCC diagnostic pop #endif + diff --git a/Grid/Makefile.am b/Grid/Makefile.am index b88ea4f2..f1fa462e 100644 --- a/Grid/Makefile.am +++ b/Grid/Makefile.am @@ -21,7 +21,7 @@ if BUILD_HDF5 extra_headers+=serialisation/Hdf5Type.h endif -all: version-cache +all: version-cache Version.h version-cache: @if [ `git status --porcelain | grep -v '??' | wc -l` -gt 0 ]; then\ @@ -42,7 +42,7 @@ version-cache: fi;\ rm -f vertmp -Version.h: +Version.h: version-cache cp version-cache Version.h .PHONY: version-cache diff --git a/Grid/algorithms/Algorithms.h b/Grid/algorithms/Algorithms.h index 48ea194b..7f27784b 100644 --- a/Grid/algorithms/Algorithms.h +++ b/Grid/algorithms/Algorithms.h @@ -29,9 +29,11 @@ Author: Peter Boyle #ifndef GRID_ALGORITHMS_H #define GRID_ALGORITHMS_H +NAMESPACE_CHECK(algorithms); #include #include #include +NAMESPACE_CHECK(SparseMatrix); #include #include @@ -41,10 +43,12 @@ Author: Peter Boyle #include #include #include - +NAMESPACE_CHECK(approx); #include #include +NAMESPACE_CHECK(ConjGrad); #include +NAMESPACE_CHECK(BiCGSTAB); #include #include #include @@ -62,7 +66,9 @@ Author: Peter Boyle #include #include +NAMESPACE_CHECK(PowerMethod); #include +NAMESPACE_CHECK(CoarsendMatrix); #include #endif diff --git a/Grid/algorithms/CoarsenedMatrix.h b/Grid/algorithms/CoarsenedMatrix.h index a6b01986..8d184aea 100644 --- a/Grid/algorithms/CoarsenedMatrix.h +++ b/Grid/algorithms/CoarsenedMatrix.h @@ -1,14 +1,3 @@ - // blockZaxpy in bockPromote - 3s, 5% - // noncoalesced linalg in Preconditionoer ~ 3s 5% - // Lancos tuning or replace 10-20s ~ 25%, open ended - // setup tuning 5s ~ 8% - // -- e.g. ordermin, orderstep tunables. - // MdagM path without norm in LinOp code. few seconds - - // Mdir calc blocking kernels - // Fuse kernels in blockMaskedInnerProduct - // preallocate Vectors in Cayley 5D ~ few percent few seconds - /************************************************************************************* Grid physics library, www.github.com/paboyle/Grid @@ -91,34 +80,7 @@ public: } directions [2*_d]=0; displacements[2*_d]=0; - - //// report back - std::cout< GetDelta(int point) { - std::vector delta(dimension,0); - delta[directions[point]] = displacements[point]; - return delta; - }; - */ }; @@ -149,25 +111,7 @@ public: CoarseScalar InnerProd(CoarseGrid); std::cout << GridLogMessage <<" Block Gramm-Schmidt pass 1"<oSites(),1,{ - eProj[ss](i)=CComplex(1.0); - }); - eProj=eProj - iProj; - std::cout< &hermop,int nn=nbasis) { @@ -190,12 +129,12 @@ public: FineField Mn(FineGrid); for(int b=0;b "< &hermop, int nn, double hi, @@ -280,10 +219,10 @@ public: hermop.HermOp(*Tn,y); - auto y_v = y.View(); - auto Tn_v = Tn->View(); - auto Tnp_v = Tnp->View(); - auto Tnm_v = Tnm->View(); + autoView( y_v , y, AcceleratorWrite); + autoView( Tn_v , (*Tn), AcceleratorWrite); + autoView( Tnp_v , (*Tnp), AcceleratorWrite); + autoView( Tnm_v , (*Tnm), AcceleratorWrite); const int Nsimd = CComplex::Nsimd(); accelerator_forNB(ss, FineGrid->oSites(), Nsimd, { coalescedWrite(y_v[ss],xscale*y_v(ss)+mscale*Tn_v(ss)); @@ -313,201 +252,6 @@ public: } assert(b==nn); } -#endif -#if 0 - virtual void CreateSubspaceChebyshev(GridParallelRNG &RNG,LinearOperatorBase &hermop, - int nn, - double hi, - double lo, - int orderfilter, - int ordermin, - int orderstep, - double filterlo - ) { - - RealD scale; - - FineField noise(FineGrid); - FineField Mn(FineGrid); - FineField tmp(FineGrid); - FineField combined(FineGrid); - - // New normalised noise - gaussian(RNG,noise); - scale = std::pow(norm2(noise),-0.5); - noise=noise*scale; - - // Initial matrix element - hermop.Op(noise,Mn); std::cout< "< Cheb(llo,hhi,oorder); \ - Cheb(hermop,noise,Mn); \ - scale = std::pow(norm2(Mn),-0.5); Mn=Mn*scale; \ - subspace[b] = Mn; \ - hermop.Op(Mn,tmp); \ - std::cout< "< Cheb(0.002,60.0,1500,-0.5,3.5); \ - - RealD alpha=-0.8; - RealD beta =-0.8; -#define FILTER(llo,hhi,oorder) \ - { \ - Chebyshev Cheb(llo,hhi,oorder); \ - /* JacobiPolynomial Cheb(0.0,60.0,oorder,alpha,beta);*/\ - Cheb(hermop,noise,Mn); \ - scale = std::pow(norm2(Mn),-0.5); Mn=Mn*scale; \ - subspace[b] = Mn; \ - hermop.Op(Mn,tmp); \ - std::cout< "< Cheb(llo,hhi,oorder); \ - Cheb(hermop,noise,combined); \ - } - - double node = 0.000; - FILTERb(lo,hi,orderfilter);// 0 - // FILTERc(node,hi,51);// 0 - noise = Mn; - int base = 0; - int mult = 100; - FILTER(node,hi,base+1*mult); - FILTER(node,hi,base+2*mult); - FILTER(node,hi,base+3*mult); - FILTER(node,hi,base+4*mult); - FILTER(node,hi,base+5*mult); - FILTER(node,hi,base+6*mult); - FILTER(node,hi,base+7*mult); - FILTER(node,hi,base+8*mult); - FILTER(node,hi,base+9*mult); - FILTER(node,hi,base+10*mult); - FILTER(node,hi,base+11*mult); - FILTER(node,hi,base+12*mult); - FILTER(node,hi,base+13*mult); - FILTER(node,hi,base+14*mult); - FILTER(node,hi,base+15*mult); - assert(b==nn); - } -#endif - -#if 0 - virtual void CreateSubspaceChebyshev(GridParallelRNG &RNG,LinearOperatorBase &hermop, - int nn, - double hi, - double lo, - int orderfilter, - int ordermin, - int orderstep, - double filterlo - ) { - - RealD scale; - - FineField noise(FineGrid); - FineField Mn(FineGrid); - FineField tmp(FineGrid); - FineField combined(FineGrid); - - // New normalised noise - gaussian(RNG,noise); - scale = std::pow(norm2(noise),-0.5); - noise=noise*scale; - - // Initial matrix element - hermop.Op(noise,Mn); std::cout< "< JacobiPoly(0.005,60.,1500); - // JacobiPolynomial JacobiPoly(0.002,60.0,1500,-0.5,3.5); - //JacobiPolynomial JacobiPoly(0.03,60.0,500,-0.5,3.5); - // JacobiPolynomial JacobiPoly(0.00,60.0,1000,-0.5,3.5); - JacobiPoly(hermop,noise,Mn); - scale = std::pow(norm2(Mn),-0.5); Mn=Mn*scale; - subspace[b] = Mn; - hermop.Op(Mn,tmp); - std::cout< "< "< Stencil; std::vector A; - + /////////////////////// // Interface /////////////////////// GridBase * Grid(void) { return _grid; }; // this is all the linalg routines need to know - RealD M (const CoarseVector &in, CoarseVector &out){ - + void M (const CoarseVector &in, CoarseVector &out) + { conformable(_grid,in.Grid()); conformable(in.Grid(),out.Grid()); - // RealD Nin = norm2(in); SimpleCompressor compressor; - double comms_usec = -usecond(); Stencil.HaloExchange(in,compressor); - comms_usec += usecond(); - - auto in_v = in.View(); - auto out_v = out.View(); + autoView( in_v , in, AcceleratorRead); + autoView( out_v , out, AcceleratorWrite); typedef LatticeView Aview; - + Vector AcceleratorViewContainer; - for(int p=0;poSites(); - // double flops = osites*Nsimd*nbasis*nbasis*8.0*geom.npoint; - // double bytes = osites*nbasis*nbasis*geom.npoint*sizeof(CComplex); - double usecs =-usecond(); - // assert(geom.npoint==9); accelerator_for(sss, Grid()->oSites()*nbasis, Nsimd, { int ss = sss/nbasis; @@ -580,41 +316,28 @@ public: int ptype; StencilEntry *SE; - int lane=SIMTlane(Nsimd); for(int point=0;point_is_local) { - nbr = coalescedReadPermute(in_v[SE->_offset],ptype,SE->_permute,lane); + nbr = coalescedReadPermute(in_v[SE->_offset],ptype,SE->_permute); } else { - nbr = coalescedRead(Stencil.CommBuf()[SE->_offset],lane); + nbr = coalescedRead(Stencil.CommBuf()[SE->_offset]); } - synchronise(); + acceleratorSynchronise(); for(int bb=0;bb Aview; Vector AcceleratorViewContainer; - for(int p=0;p_is_local) { - nbr = coalescedReadPermute(in_v[SE->_offset],ptype,SE->_permute,lane); + nbr = coalescedReadPermute(in_v[SE->_offset],ptype,SE->_permute); } else { - nbr = coalescedRead(Stencil.CommBuf()[SE->_offset],lane); + nbr = coalescedRead(Stencil.CommBuf()[SE->_offset]); } - synchronise(); + acceleratorSynchronise(); for(int bb=0;bboSites(),1,{ - - siteVector res = Zero(); - siteVector nbr; - int ptype; - StencilEntry *SE; - - SE=Stencil.GetEntry(ptype,point,ss); - - if(SE->_is_local&&SE->_permute) { - permute(nbr,in_v[SE->_offset],ptype); - } else if(SE->_is_local) { - nbr = in_v[SE->_offset]; - } else { - nbr = Stencil.CommBuf()[SE->_offset]; - } - synchronise(); - - res = res + Aview_p[point][ss]*nbr; - - out_v[ss]=res; - }); -#endif + for(int p=0;p &out) { @@ -864,14 +562,12 @@ public: blockMaskedInnerProduct(oZProj,omask,Subspace.subspace[j],Mphi); - auto iZProj_v = iZProj.View() ; - auto oZProj_v = oZProj.View() ; - auto A_p = A[p].View(); - auto A_self = A[self_stencil].View(); + autoView( iZProj_v , iZProj, AcceleratorRead) ; + autoView( oZProj_v , oZProj, AcceleratorRead) ; + autoView( A_p , A[p], AcceleratorWrite); + autoView( A_self , A[self_stencil], AcceleratorWrite); accelerator_for(ss, Grid()->oSites(), Fobj::Nsimd(),{ coalescedWrite(A_p[ss](j,i),oZProj_v(ss)); }); - // if( disp!= 0 ) { accelerator_for(ss, Grid()->oSites(), Fobj::Nsimd(),{ coalescedWrite(A_p[ss](j,i),oZProj_v(ss)); });} - // accelerator_for(ss, Grid()->oSites(), Fobj::Nsimd(),{ coalescedWrite(A_self[ss](j,i),A_self(ss)(j,i)+iZProj_v(ss)); }); } } @@ -885,11 +581,11 @@ public: mult(tmp,phi,oddmask ); linop.Op(tmp,Mphio); { - auto tmp_ = tmp.View(); - auto evenmask_ = evenmask.View(); - auto oddmask_ = oddmask.View(); - auto Mphie_ = Mphie.View(); - auto Mphio_ = Mphio.View(); + autoView( tmp_ , tmp, AcceleratorWrite); + autoView( evenmask_ , evenmask, AcceleratorRead); + autoView( oddmask_ , oddmask, AcceleratorRead); + autoView( Mphie_ , Mphie, AcceleratorRead); + autoView( Mphio_ , Mphio, AcceleratorRead); accelerator_for(ss, FineGrid->oSites(), Fobj::Nsimd(),{ coalescedWrite(tmp_[ss],evenmask_(ss)*Mphie_(ss) + oddmask_(ss)*Mphio_(ss)); }); @@ -897,8 +593,8 @@ public: blockProject(SelfProj,tmp,Subspace.subspace); - auto SelfProj_ = SelfProj.View(); - auto A_self = A[self_stencil].View(); + autoView( SelfProj_ , SelfProj, AcceleratorRead); + autoView( A_self , A[self_stencil], AcceleratorWrite); accelerator_for(ss, Grid()->oSites(), Fobj::Nsimd(),{ for(int j=0;j bc(FineGrid->_ndimension,0); - - blockPick(Grid(),phi,tmp,bc); // Pick out a block - linop.Op(tmp,Mphi); // Apply big dop - blockProject(iProj,Mphi,Subspace.subspace); // project it and print it - std::cout< #endif #endif - NAMESPACE_BEGIN(Grid); template struct FFTW { }; @@ -191,7 +189,7 @@ public: typedef typename sobj::scalar_type scalar; Lattice pgbuf(&pencil_g); - auto pgbuf_v = pgbuf.View(); + autoView(pgbuf_v , pgbuf, CpuWrite); typedef typename FFTW::FFTW_scalar FFTW_scalar; typedef typename FFTW::FFTW_plan FFTW_plan; @@ -232,15 +230,18 @@ public: result = source; int pc = processor_coor[dim]; for(int p=0;plSites(),{ + { + autoView(r_v,result,CpuRead); + autoView(p_v,pgbuf,CpuWrite); + thread_for(idx, sgrid->lSites(),{ Coordinate cbuf(Nd); sobj s; sgrid->LocalIndexToLocalCoor(idx,cbuf); - peekLocalSite(s,result,cbuf); + peekLocalSite(s,r_v,cbuf); cbuf[dim]+=((pc+p) % processors[dim])*L; - // cbuf[dim]+=p*L; - pokeLocalSite(s,pgbuf,cbuf); - }); + pokeLocalSite(s,p_v,cbuf); + }); + } if (p != processors[dim] - 1) { result = Cshift(result,dim,L); } @@ -269,15 +270,19 @@ public: flops+= flops_call*NN; // writing out result - thread_for(idx,sgrid->lSites(),{ + { + autoView(pgbuf_v,pgbuf,CpuRead); + autoView(result_v,result,CpuWrite); + thread_for(idx,sgrid->lSites(),{ Coordinate clbuf(Nd), cgbuf(Nd); sobj s; sgrid->LocalIndexToLocalCoor(idx,clbuf); cgbuf = clbuf; cgbuf[dim] = clbuf[dim]+L*pc; - peekLocalSite(s,pgbuf,cgbuf); - pokeLocalSite(s,result,clbuf); - }); + peekLocalSite(s,pgbuf_v,cgbuf); + pokeLocalSite(s,result_v,clbuf); + }); + } result = result*div; // destroying plan diff --git a/Grid/algorithms/LinearOperator.h b/Grid/algorithms/LinearOperator.h index 50600d2d..1add212c 100644 --- a/Grid/algorithms/LinearOperator.h +++ b/Grid/algorithms/LinearOperator.h @@ -43,7 +43,6 @@ NAMESPACE_BEGIN(Grid); ///////////////////////////////////////////////////////////////////////////////////////////// template class LinearOperatorBase { public: - // Support for coarsening to a multigrid virtual void OpDiag (const Field &in, Field &out) = 0; // Abstract base virtual void OpDir (const Field &in, Field &out,int dir,int disp) = 0; // Abstract base @@ -94,7 +93,10 @@ public: _Mat.Mdag(in,out); } void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){ - _Mat.MdagM(in,out,n1,n2); + _Mat.MdagM(in,out); + ComplexD dot = innerProduct(in,out); + n1=real(dot); + n2=norm2(out); } void HermOp(const Field &in, Field &out){ _Mat.MdagM(in,out); @@ -131,17 +133,14 @@ public: assert(0); } void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){ - _Mat.MdagM(in,out,n1,n2); - out = out + _shift*in; - - ComplexD dot; - dot= innerProduct(in,out); + HermOp(in,out); + ComplexD dot = innerProduct(in,out); n1=real(dot); n2=norm2(out); } void HermOp(const Field &in, Field &out){ - RealD n1,n2; - HermOpAndNorm(in,out,n1,n2); + _Mat.MdagM(in,out); + out = out + _shift*in; } }; @@ -170,7 +169,7 @@ public: _Mat.M(in,out); } void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){ - _Mat.M(in,out); + HermOp(in,out); ComplexD dot= innerProduct(in,out); n1=real(dot); n2=norm2(out); } @@ -208,338 +207,305 @@ public: } }; - ////////////////////////////////////////////////////////// - // Even Odd Schur decomp operators; there are several - // ways to introduce the even odd checkerboarding - ////////////////////////////////////////////////////////// +////////////////////////////////////////////////////////// +// Even Odd Schur decomp operators; there are several +// ways to introduce the even odd checkerboarding +////////////////////////////////////////////////////////// - template - class SchurOperatorBase : public LinearOperatorBase { - public: - virtual RealD Mpc (const Field &in, Field &out) =0; - virtual RealD MpcDag (const Field &in, Field &out) =0; - virtual void MpcDagMpc(const Field &in, Field &out,RealD &ni,RealD &no) { - Field tmp(in.Grid()); - tmp.Checkerboard() = in.Checkerboard(); - ni=Mpc(in,tmp); - no=MpcDag(tmp,out); - } - virtual void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){ - out.Checkerboard() = in.Checkerboard(); - MpcDagMpc(in,out,n1,n2); - } - virtual void HermOp(const Field &in, Field &out){ - RealD n1,n2; - HermOpAndNorm(in,out,n1,n2); - } - void Op (const Field &in, Field &out){ - Mpc(in,out); - } - void AdjOp (const Field &in, Field &out){ - MpcDag(in,out); - } - // Support for coarsening to a multigrid - void OpDiag (const Field &in, Field &out) { - assert(0); // must coarsen the unpreconditioned system - } - void OpDir (const Field &in, Field &out,int dir,int disp) { - assert(0); - } - void OpDirAll (const Field &in, std::vector &out){ - assert(0); - }; - }; - template - class SchurDiagMooeeOperator : public SchurOperatorBase { - public: - Matrix &_Mat; - SchurDiagMooeeOperator (Matrix &Mat): _Mat(Mat){}; - virtual RealD Mpc (const Field &in, Field &out) { - Field tmp(in.Grid()); - tmp.Checkerboard() = !in.Checkerboard(); - //std::cout <<"grid pointers: in._grid="<< in._grid << " out._grid=" << out._grid << " _Mat.Grid=" << _Mat.Grid() << " _Mat.RedBlackGrid=" << _Mat.RedBlackGrid() << std::endl; - - _Mat.Meooe(in,tmp); - _Mat.MooeeInv(tmp,out); - _Mat.Meooe(out,tmp); - - //std::cout << "cb in " << in.Checkerboard() << " cb out " << out.Checkerboard() << std::endl; - _Mat.Mooee(in,out); - return axpy_norm(out,-1.0,tmp,out); - } - virtual RealD MpcDag (const Field &in, Field &out){ - Field tmp(in.Grid()); - - _Mat.MeooeDag(in,tmp); - _Mat.MooeeInvDag(tmp,out); - _Mat.MeooeDag(out,tmp); - - _Mat.MooeeDag(in,out); - return axpy_norm(out,-1.0,tmp,out); - } - }; - template - class SchurDiagOneOperator : public SchurOperatorBase { - protected: - Matrix &_Mat; - public: - SchurDiagOneOperator (Matrix &Mat): _Mat(Mat){}; - - virtual RealD Mpc (const Field &in, Field &out) { - Field tmp(in.Grid()); - - _Mat.Meooe(in,out); - _Mat.MooeeInv(out,tmp); - _Mat.Meooe(tmp,out); - _Mat.MooeeInv(out,tmp); - - return axpy_norm(out,-1.0,tmp,in); - } - virtual RealD MpcDag (const Field &in, Field &out){ - Field tmp(in.Grid()); - - _Mat.MooeeInvDag(in,out); - _Mat.MeooeDag(out,tmp); - _Mat.MooeeInvDag(tmp,out); - _Mat.MeooeDag(out,tmp); - - return axpy_norm(out,-1.0,tmp,in); - } - }; - template - class SchurDiagTwoOperator : public SchurOperatorBase { - protected: - Matrix &_Mat; - public: - SchurDiagTwoOperator (Matrix &Mat): _Mat(Mat){}; - - virtual RealD Mpc (const Field &in, Field &out) { - Field tmp(in.Grid()); - - _Mat.MooeeInv(in,out); - _Mat.Meooe(out,tmp); - _Mat.MooeeInv(tmp,out); - _Mat.Meooe(out,tmp); - - return axpy_norm(out,-1.0,tmp,in); - } - virtual RealD MpcDag (const Field &in, Field &out){ - Field tmp(in.Grid()); - - _Mat.MeooeDag(in,out); - _Mat.MooeeInvDag(out,tmp); - _Mat.MeooeDag(tmp,out); - _Mat.MooeeInvDag(out,tmp); - - return axpy_norm(out,-1.0,tmp,in); - } - }; - - template - class NonHermitianSchurOperatorBase : public LinearOperatorBase - { - public: - virtual RealD Mpc (const Field& in, Field& out) = 0; - virtual RealD MpcDag (const Field& in, Field& out) = 0; - virtual void MpcDagMpc(const Field& in, Field& out, RealD& ni, RealD& no) { - Field tmp(in.Grid()); - tmp.Checkerboard() = in.Checkerboard(); - ni = Mpc(in,tmp); - no = MpcDag(tmp,out); - } - virtual void HermOpAndNorm(const Field& in, Field& out, RealD& n1, RealD& n2) { - assert(0); - } - virtual void HermOp(const Field& in, Field& out) { - assert(0); - } - void Op(const Field& in, Field& out) { - Mpc(in, out); - } - void AdjOp(const Field& in, Field& out) { - MpcDag(in, out); - } - // Support for coarsening to a multigrid - void OpDiag(const Field& in, Field& out) { - assert(0); // must coarsen the unpreconditioned system - } - void OpDir(const Field& in, Field& out, int dir, int disp) { - assert(0); - } - }; - - template - class NonHermitianSchurDiagMooeeOperator : public NonHermitianSchurOperatorBase - { - public: - Matrix& _Mat; - NonHermitianSchurDiagMooeeOperator(Matrix& Mat): _Mat(Mat){}; - virtual RealD Mpc(const Field& in, Field& out) { - Field tmp(in.Grid()); - tmp.Checkerboard() = !in.Checkerboard(); - - _Mat.Meooe(in, tmp); - _Mat.MooeeInv(tmp, out); - _Mat.Meooe(out, tmp); - - _Mat.Mooee(in, out); - - return axpy_norm(out, -1.0, tmp, out); - } - virtual RealD MpcDag(const Field& in, Field& out) { - Field tmp(in.Grid()); - - _Mat.MeooeDag(in, tmp); - _Mat.MooeeInvDag(tmp, out); - _Mat.MeooeDag(out, tmp); - - _Mat.MooeeDag(in, out); - - return axpy_norm(out, -1.0, tmp, out); - } - }; - - template - class NonHermitianSchurDiagOneOperator : public NonHermitianSchurOperatorBase - { - protected: - Matrix &_Mat; - - public: - NonHermitianSchurDiagOneOperator (Matrix& Mat): _Mat(Mat){}; - virtual RealD Mpc(const Field& in, Field& out) { - Field tmp(in.Grid()); - - _Mat.Meooe(in, out); - _Mat.MooeeInv(out, tmp); - _Mat.Meooe(tmp, out); - _Mat.MooeeInv(out, tmp); - - return axpy_norm(out, -1.0, tmp, in); - } - virtual RealD MpcDag(const Field& in, Field& out) { - Field tmp(in.Grid()); - - _Mat.MooeeInvDag(in, out); - _Mat.MeooeDag(out, tmp); - _Mat.MooeeInvDag(tmp, out); - _Mat.MeooeDag(out, tmp); - - return axpy_norm(out, -1.0, tmp, in); - } - }; - - template - class NonHermitianSchurDiagTwoOperator : public NonHermitianSchurOperatorBase - { - protected: - Matrix& _Mat; - - public: - NonHermitianSchurDiagTwoOperator(Matrix& Mat): _Mat(Mat){}; - - virtual RealD Mpc(const Field& in, Field& out) { - Field tmp(in.Grid()); - - _Mat.MooeeInv(in, out); - _Mat.Meooe(out, tmp); - _Mat.MooeeInv(tmp, out); - _Mat.Meooe(out, tmp); - - return axpy_norm(out, -1.0, tmp, in); - } - virtual RealD MpcDag(const Field& in, Field& out) { - Field tmp(in.Grid()); - - _Mat.MeooeDag(in, out); - _Mat.MooeeInvDag(out, tmp); - _Mat.MeooeDag(tmp, out); - _Mat.MooeeInvDag(out, tmp); - - return axpy_norm(out, -1.0, tmp, in); - } - }; - - /////////////////////////////////////////////////////////////////////////////////////////////////// - // Left handed Moo^-1 ; (Moo - Moe Mee^-1 Meo) psi = eta --> ( 1 - Moo^-1 Moe Mee^-1 Meo ) psi = Moo^-1 eta - // Right handed Moo^-1 ; (Moo - Moe Mee^-1 Meo) Moo^-1 Moo psi = eta --> ( 1 - Moe Mee^-1 Meo Moo^-1) phi=eta ; psi = Moo^-1 phi - /////////////////////////////////////////////////////////////////////////////////////////////////// - template using SchurDiagOneRH = SchurDiagTwoOperator ; - template using SchurDiagOneLH = SchurDiagOneOperator ; - /////////////////////////////////////////////////////////////////////////////////////////////////// - // Staggered use - /////////////////////////////////////////////////////////////////////////////////////////////////// - template - class SchurStaggeredOperator : public SchurOperatorBase { - protected: - Matrix &_Mat; - Field tmp; - RealD mass; - double tMpc; - double tIP; - double tMeo; - double taxpby_norm; - uint64_t ncall; -public: - void Report(void) - { - std::cout << GridLogMessage << " HermOpAndNorm.Mpc "<< tMpc/ncall<<" usec "< +class SchurOperatorBase : public LinearOperatorBase { + public: + virtual void Mpc (const Field &in, Field &out) =0; + virtual void MpcDag (const Field &in, Field &out) =0; + virtual void MpcDagMpc(const Field &in, Field &out) { + Field tmp(in.Grid()); + tmp.Checkerboard() = in.Checkerboard(); + Mpc(in,tmp); + MpcDag(tmp,out); + } virtual void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){ - ncall++; - tMpc-=usecond(); - n2 = Mpc(in,out); - tMpc+=usecond(); - tIP-=usecond(); - ComplexD dot= innerProduct(in,out); - tIP+=usecond(); - n1 = real(dot); + out.Checkerboard() = in.Checkerboard(); + MpcDagMpc(in,out); + ComplexD dot= innerProduct(in,out); + n1=real(dot); + n2=norm2(out); } virtual void HermOp(const Field &in, Field &out){ - ncall++; - tMpc-=usecond(); - _Mat.Meooe(in,out); - _Mat.Meooe(out,tmp); - tMpc+=usecond(); - taxpby_norm-=usecond(); - axpby(out,-1.0,mass*mass,tmp,in); - taxpby_norm+=usecond(); + out.Checkerboard() = in.Checkerboard(); + MpcDagMpc(in,out); } - virtual RealD Mpc (const Field &in, Field &out) - { + void Op (const Field &in, Field &out){ + Mpc(in,out); + } + void AdjOp (const Field &in, Field &out){ + MpcDag(in,out); + } + // Support for coarsening to a multigrid + void OpDiag (const Field &in, Field &out) { + assert(0); // must coarsen the unpreconditioned system + } + void OpDir (const Field &in, Field &out,int dir,int disp) { + assert(0); + } + void OpDirAll (const Field &in, std::vector &out){ + assert(0); + }; +}; +template + class SchurDiagMooeeOperator : public SchurOperatorBase { + public: + Matrix &_Mat; + SchurDiagMooeeOperator (Matrix &Mat): _Mat(Mat){}; + virtual void Mpc (const Field &in, Field &out) { + Field tmp(in.Grid()); + tmp.Checkerboard() = !in.Checkerboard(); + + _Mat.Meooe(in,tmp); + _Mat.MooeeInv(tmp,out); + _Mat.Meooe(out,tmp); + _Mat.Mooee(in,out); + axpy(out,-1.0,tmp,out); + } + virtual void MpcDag (const Field &in, Field &out){ + Field tmp(in.Grid()); + + _Mat.MeooeDag(in,tmp); + _Mat.MooeeInvDag(tmp,out); + _Mat.MeooeDag(out,tmp); + _Mat.MooeeDag(in,out); + axpy(out,-1.0,tmp,out); + } +}; +template + class SchurDiagOneOperator : public SchurOperatorBase { + protected: + Matrix &_Mat; + public: + SchurDiagOneOperator (Matrix &Mat): _Mat(Mat){}; + + virtual void Mpc (const Field &in, Field &out) { + Field tmp(in.Grid()); + _Mat.Meooe(in,out); + _Mat.MooeeInv(out,tmp); + _Mat.Meooe(tmp,out); + _Mat.MooeeInv(out,tmp); + axpy(out,-1.0,tmp,in); + } + virtual void MpcDag (const Field &in, Field &out){ + Field tmp(in.Grid()); + + _Mat.MooeeInvDag(in,out); + _Mat.MeooeDag(out,tmp); + _Mat.MooeeInvDag(tmp,out); + _Mat.MeooeDag(out,tmp); + axpy(out,-1.0,tmp,in); + } +}; +template + class SchurDiagTwoOperator : public SchurOperatorBase { + protected: + Matrix &_Mat; + public: + SchurDiagTwoOperator (Matrix &Mat): _Mat(Mat){}; + + virtual void Mpc (const Field &in, Field &out) { + Field tmp(in.Grid()); + + _Mat.MooeeInv(in,out); + _Mat.Meooe(out,tmp); + _Mat.MooeeInv(tmp,out); + _Mat.Meooe(out,tmp); + + axpy(out,-1.0,tmp,in); + } + virtual void MpcDag (const Field &in, Field &out){ + Field tmp(in.Grid()); + + _Mat.MeooeDag(in,out); + _Mat.MooeeInvDag(out,tmp); + _Mat.MeooeDag(tmp,out); + _Mat.MooeeInvDag(out,tmp); + + axpy(out,-1.0,tmp,in); + } +}; + +template +class NonHermitianSchurOperatorBase : public LinearOperatorBase +{ + public: + virtual void Mpc (const Field& in, Field& out) = 0; + virtual void MpcDag (const Field& in, Field& out) = 0; + virtual void MpcDagMpc(const Field& in, Field& out) { + Field tmp(in.Grid()); + tmp.Checkerboard() = in.Checkerboard(); + Mpc(in,tmp); + MpcDag(tmp,out); + } + virtual void HermOpAndNorm(const Field& in, Field& out, RealD& n1, RealD& n2) { + assert(0); + } + virtual void HermOp(const Field& in, Field& out) { + assert(0); + } + void Op(const Field& in, Field& out) { + Mpc(in, out); + } + void AdjOp(const Field& in, Field& out) { + MpcDag(in, out); + } + // Support for coarsening to a multigrid + void OpDiag(const Field& in, Field& out) { + assert(0); // must coarsen the unpreconditioned system + } + void OpDir(const Field& in, Field& out, int dir, int disp) { + assert(0); + } + void OpDirAll(const Field& in, std::vector& out){ + assert(0); + }; +}; + +template +class NonHermitianSchurDiagMooeeOperator : public NonHermitianSchurOperatorBase +{ + public: + Matrix& _Mat; + NonHermitianSchurDiagMooeeOperator(Matrix& Mat): _Mat(Mat){}; + virtual void Mpc(const Field& in, Field& out) { + Field tmp(in.Grid()); + tmp.Checkerboard() = !in.Checkerboard(); + + _Mat.Meooe(in, tmp); + _Mat.MooeeInv(tmp, out); + _Mat.Meooe(out, tmp); + + _Mat.Mooee(in, out); + + axpy(out, -1.0, tmp, out); + } + virtual void MpcDag(const Field& in, Field& out) { + Field tmp(in.Grid()); + + _Mat.MeooeDag(in, tmp); + _Mat.MooeeInvDag(tmp, out); + _Mat.MeooeDag(out, tmp); + + _Mat.MooeeDag(in, out); + + axpy(out, -1.0, tmp, out); + } +}; + +template +class NonHermitianSchurDiagOneOperator : public NonHermitianSchurOperatorBase +{ + protected: + Matrix &_Mat; + + public: + NonHermitianSchurDiagOneOperator (Matrix& Mat): _Mat(Mat){}; + virtual void Mpc(const Field& in, Field& out) { + Field tmp(in.Grid()); + + _Mat.Meooe(in, out); + _Mat.MooeeInv(out, tmp); + _Mat.Meooe(tmp, out); + _Mat.MooeeInv(out, tmp); + + axpy(out, -1.0, tmp, in); + } + virtual void MpcDag(const Field& in, Field& out) { + Field tmp(in.Grid()); + + _Mat.MooeeInvDag(in, out); + _Mat.MeooeDag(out, tmp); + _Mat.MooeeInvDag(tmp, out); + _Mat.MeooeDag(out, tmp); + + axpy(out, -1.0, tmp, in); + } +}; + +template +class NonHermitianSchurDiagTwoOperator : public NonHermitianSchurOperatorBase +{ + protected: + Matrix& _Mat; + + public: + NonHermitianSchurDiagTwoOperator(Matrix& Mat): _Mat(Mat){}; + + virtual void Mpc(const Field& in, Field& out) { + Field tmp(in.Grid()); + + _Mat.MooeeInv(in, out); + _Mat.Meooe(out, tmp); + _Mat.MooeeInv(tmp, out); + _Mat.Meooe(out, tmp); + + axpy(out, -1.0, tmp, in); + } + virtual void MpcDag(const Field& in, Field& out) { + Field tmp(in.Grid()); + + _Mat.MeooeDag(in, out); + _Mat.MooeeInvDag(out, tmp); + _Mat.MeooeDag(tmp, out); + _Mat.MooeeInvDag(out, tmp); + + axpy(out, -1.0, tmp, in); + } +}; + +/////////////////////////////////////////////////////////////////////////////////////////////////// +// Left handed Moo^-1 ; (Moo - Moe Mee^-1 Meo) psi = eta --> ( 1 - Moo^-1 Moe Mee^-1 Meo ) psi = Moo^-1 eta +// Right handed Moo^-1 ; (Moo - Moe Mee^-1 Meo) Moo^-1 Moo psi = eta --> ( 1 - Moe Mee^-1 Meo Moo^-1) phi=eta ; psi = Moo^-1 phi +/////////////////////////////////////////////////////////////////////////////////////////////////// +template using SchurDiagOneRH = SchurDiagTwoOperator ; +template using SchurDiagOneLH = SchurDiagOneOperator ; +/////////////////////////////////////////////////////////////////////////////////////////////////// +// Staggered use +/////////////////////////////////////////////////////////////////////////////////////////////////// +template +class SchurStaggeredOperator : public SchurOperatorBase { + protected: + Matrix &_Mat; + Field tmp; + RealD mass; + public: + SchurStaggeredOperator (Matrix &Mat): _Mat(Mat), tmp(_Mat.RedBlackGrid()) + { + assert( _Mat.isTrivialEE() ); + mass = _Mat.Mass(); + } + virtual void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){ + Mpc(in,out); + ComplexD dot= innerProduct(in,out); + n1 = real(dot); + n2 =0.0; + } + virtual void HermOp(const Field &in, Field &out){ + Mpc(in,out); + // _Mat.Meooe(in,out); + // _Mat.Meooe(out,tmp); + // axpby(out,-1.0,mass*mass,tmp,in); + } + virtual void Mpc (const Field &in, Field &out) + { Field tmp(in.Grid()); Field tmp2(in.Grid()); + + // _Mat.Mooee(in,out); + // _Mat.Mooee(out,tmp); - // std::cout << GridLogIterative << " HermOp.Mpc "< using SchurStagOperator = SchurStaggeredOperator; - ///////////////////////////////////////////////////////////// // Base classes for functions of operators ///////////////////////////////////////////////////////////// diff --git a/Grid/algorithms/SparseMatrix.h b/Grid/algorithms/SparseMatrix.h index b959f53c..8a265b3f 100644 --- a/Grid/algorithms/SparseMatrix.h +++ b/Grid/algorithms/SparseMatrix.h @@ -38,16 +38,12 @@ template class SparseMatrixBase { public: virtual GridBase *Grid(void) =0; // Full checkerboar operations - virtual RealD M (const Field &in, Field &out)=0; - virtual RealD Mdag (const Field &in, Field &out)=0; - virtual void MdagM(const Field &in, Field &out,RealD &ni,RealD &no) { - Field tmp (in.Grid()); - ni=M(in,tmp); - no=Mdag(tmp,out); - } + virtual void M (const Field &in, Field &out)=0; + virtual void Mdag (const Field &in, Field &out)=0; virtual void MdagM(const Field &in, Field &out) { - RealD ni, no; - MdagM(in,out,ni,no); + Field tmp (in.Grid()); + M(in,tmp); + Mdag(tmp,out); } virtual void Mdiag (const Field &in, Field &out)=0; virtual void Mdir (const Field &in, Field &out,int dir, int disp)=0; diff --git a/Grid/algorithms/approx/Chebyshev.h b/Grid/algorithms/approx/Chebyshev.h index 133db2b4..584ed1d5 100644 --- a/Grid/algorithms/approx/Chebyshev.h +++ b/Grid/algorithms/approx/Chebyshev.h @@ -234,10 +234,8 @@ public: GridBase *grid=in.Grid(); - // std::cout << "Chevyshef(): in.Grid()="< Bt(siteBlock * nrot); - auto Bp=&Bt[0]; - - // GPU readable copy of Eigen matrix - Vector Qt_jv(Nm*Nm); - double *Qt_p = & Qt_jv[0]; - for(int k=0;k -void basisRotateJ(Field &result,std::vector &basis,Eigen::MatrixXd& Qt,int j, int k0,int k1,int Nm) -{ - typedef decltype(basis[0].View()) View; - typedef typename Field::vector_object vobj; - GridBase* grid = basis[0].Grid(); - - result.Checkerboard() = basis[0].Checkerboard(); - auto result_v=result.View(); - Vector basis_v(basis.size(),result_v); - for(int k=0;k Qt_jv(Nm); - double * Qt_j = & Qt_jv[0]; - for(int k=0;koSites(),vobj::Nsimd(),{ - auto B=coalescedRead(zz); - for(int k=k0; k -void basisReorderInPlace(std::vector &_v,std::vector& sort_vals, std::vector& idx) -{ - int vlen = idx.size(); - - assert(vlen>=1); - assert(vlen<=sort_vals.size()); - assert(vlen<=_v.size()); - - for (size_t i=0;ii for which _vnew[j] = _vold[i], - // track the move idx[j] => idx[i] - // track the move idx[i] => i - ////////////////////////////////////// - size_t j; - for (j=i;j i); assert(j!=idx.size()); assert(idx[j]==i); - - swap(_v[i],_v[idx[i]]); // should use vector move constructor, no data copy - std::swap(sort_vals[i],sort_vals[idx[i]]); - - idx[j] = idx[i]; - idx[i] = i; - } - } -} - -inline std::vector basisSortGetIndex(std::vector& sort_vals) -{ - std::vector idx(sort_vals.size()); - std::iota(idx.begin(), idx.end(), 0); - - // sort indexes based on comparing values in v - std::sort(idx.begin(), idx.end(), [&sort_vals](int i1, int i2) { - return ::fabs(sort_vals[i1]) < ::fabs(sort_vals[i2]); - }); - return idx; -} - -template -void basisSortInPlace(std::vector & _v,std::vector& sort_vals, bool reverse) -{ - std::vector idx = basisSortGetIndex(sort_vals); - if (reverse) - std::reverse(idx.begin(), idx.end()); - - basisReorderInPlace(_v,sort_vals,idx); -} - -// PAB: faster to compute the inner products first then fuse loops. -// If performance critical can improve. -template -void basisDeflate(const std::vector &_v,const std::vector& eval,const Field& src_orig,Field& result) { - result = Zero(); - assert(_v.size()==eval.size()); - int N = (int)_v.size(); - for (int i=0;i +Author: Peter Boyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ +#ifndef GRID_PREC_GCR_NON_HERM_H +#define GRID_PREC_GCR_NON_HERM_H + +/////////////////////////////////////////////////////////////////////////////////////////////////////// +//VPGCR Abe and Zhang, 2005. +//INTERNATIONAL JOURNAL OF NUMERICAL ANALYSIS AND MODELING +//Computing and Information Volume 2, Number 2, Pages 147-161 +//NB. Likely not original reference since they are focussing on a preconditioner variant. +// but VPGCR was nicely written up in their paper +/////////////////////////////////////////////////////////////////////////////////////////////////////// +NAMESPACE_BEGIN(Grid); + +#define GCRLogLevel std::cout << GridLogMessage < +class PrecGeneralisedConjugateResidualNonHermitian : public LinearFunction { +public: + + RealD Tolerance; + Integer MaxIterations; + int verbose; + int mmax; + int nstep; + int steps; + int level; + GridStopWatch PrecTimer; + GridStopWatch MatTimer; + GridStopWatch LinalgTimer; + + LinearFunction &Preconditioner; + LinearOperatorBase &Linop; + + void Level(int lv) { level=lv; }; + + PrecGeneralisedConjugateResidualNonHermitian(RealD tol,Integer maxit,LinearOperatorBase &_Linop,LinearFunction &Prec,int _mmax,int _nstep) : + Tolerance(tol), + MaxIterations(maxit), + Linop(_Linop), + Preconditioner(Prec), + mmax(_mmax), + nstep(_nstep) + { + level=1; + verbose=1; + }; + + void operator() (const Field &src, Field &psi){ + + psi=Zero(); + RealD cp, ssq,rsq; + ssq=norm2(src); + rsq=Tolerance*Tolerance*ssq; + + Field r(src.Grid()); + + PrecTimer.Reset(); + MatTimer.Reset(); + LinalgTimer.Reset(); + + GridStopWatch SolverTimer; + SolverTimer.Start(); + + steps=0; + for(int k=0;k q(mmax,grid); + std::vector p(mmax,grid); + std::vector qq(mmax); + + GCRLogLevel<< "PGCR nStep("<(mmax-1))?(mmax-1):(kp); // if more than mmax done, we orthog all mmax history. + for(int back=0;back=0); + + b=-real(innerProduct(q[peri_back],Az))/qq[peri_back]; + p[peri_kp]=p[peri_kp]+b*p[peri_back]; + q[peri_kp]=q[peri_kp]+b*q[peri_back]; + + } + qq[peri_kp]=norm2(q[peri_kp]); // could use axpy_norm + LinalgTimer.Stop(); + } + assert(0); // never reached + return cp; + } +}; +NAMESPACE_END(Grid); +#endif diff --git a/Grid/allocator/AlignedAllocator.cc b/Grid/allocator/AlignedAllocator.cc index d53c4dc2..0d1707d9 100644 --- a/Grid/allocator/AlignedAllocator.cc +++ b/Grid/allocator/AlignedAllocator.cc @@ -6,72 +6,6 @@ NAMESPACE_BEGIN(Grid); MemoryStats *MemoryProfiler::stats = nullptr; bool MemoryProfiler::debug = false; -#ifdef GRID_NVCC -#define SMALL_LIMIT (0) -#else -#define SMALL_LIMIT (4096) -#endif - -#ifdef POINTER_CACHE -int PointerCache::victim; - -PointerCache::PointerCacheEntry PointerCache::Entries[PointerCache::Ncache]; - -void *PointerCache::Insert(void *ptr,size_t bytes) { - - if (bytes < SMALL_LIMIT ) return ptr; - -#ifdef GRID_OMP - assert(omp_in_parallel()==0); -#endif - - void * ret = NULL; - int v = -1; - - for(int e=0;e See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ /* END LEGAL */ -#ifndef GRID_ALIGNED_ALLOCATOR_H -#define GRID_ALIGNED_ALLOCATOR_H - -#ifdef HAVE_MALLOC_MALLOC_H -#include -#endif -#ifdef HAVE_MALLOC_H -#include -#endif - -#ifdef HAVE_MM_MALLOC_H -#include -#endif - -#define POINTER_CACHE -#define GRID_ALLOC_ALIGN (2*1024*1024) +#pragma once NAMESPACE_BEGIN(Grid); -// Move control to configure.ac and Config.h? -#ifdef POINTER_CACHE -class PointerCache { -private: -/*Pinning pages is costly*/ -/*Could maintain separate large and small allocation caches*/ -#ifdef GRID_NVCC - static const int Ncache=128; -#else - static const int Ncache=8; -#endif - static int victim; - - typedef struct { - void *address; - size_t bytes; - int valid; - } PointerCacheEntry; - - static PointerCacheEntry Entries[Ncache]; - -public: - - static void *Insert(void *ptr,size_t bytes) ; - static void *Lookup(size_t bytes) ; - -}; -#endif - -std::string sizeString(size_t bytes); - -struct MemoryStats -{ - size_t totalAllocated{0}, maxAllocated{0}, - currentlyAllocated{0}, totalFreed{0}; -}; - -class MemoryProfiler -{ -public: - static MemoryStats *stats; - static bool debug; -}; - -#define memString(bytes) std::to_string(bytes) + " (" + sizeString(bytes) + ")" -#define profilerDebugPrint \ - if (MemoryProfiler::stats) \ - { \ - auto s = MemoryProfiler::stats; \ - std::cout << GridLogDebug << "[Memory debug] Stats " << MemoryProfiler::stats << std::endl; \ - std::cout << GridLogDebug << "[Memory debug] total : " << memString(s->totalAllocated) \ - << std::endl; \ - std::cout << GridLogDebug << "[Memory debug] max : " << memString(s->maxAllocated) \ - << std::endl; \ - std::cout << GridLogDebug << "[Memory debug] current: " << memString(s->currentlyAllocated) \ - << std::endl; \ - std::cout << GridLogDebug << "[Memory debug] freed : " << memString(s->totalFreed) \ - << std::endl; \ - } - -#define profilerAllocate(bytes) \ - if (MemoryProfiler::stats) \ - { \ - auto s = MemoryProfiler::stats; \ - s->totalAllocated += (bytes); \ - s->currentlyAllocated += (bytes); \ - s->maxAllocated = std::max(s->maxAllocated, s->currentlyAllocated); \ - } \ - if (MemoryProfiler::debug) \ - { \ - std::cout << GridLogDebug << "[Memory debug] allocating " << memString(bytes) << std::endl; \ - profilerDebugPrint; \ - } - -#define profilerFree(bytes) \ - if (MemoryProfiler::stats) \ - { \ - auto s = MemoryProfiler::stats; \ - s->totalFreed += (bytes); \ - s->currentlyAllocated -= (bytes); \ - } \ - if (MemoryProfiler::debug) \ - { \ - std::cout << GridLogDebug << "[Memory debug] freeing " << memString(bytes) << std::endl; \ - profilerDebugPrint; \ - } - -void check_huge_pages(void *Buf,uint64_t BYTES); - -//////////////////////////////////////////////////////////////////// -// A lattice of something, but assume the something is SIMDized. -//////////////////////////////////////////////////////////////////// - template class alignedAllocator { public: @@ -161,70 +53,60 @@ public: { size_type bytes = __n*sizeof(_Tp); profilerAllocate(bytes); - - -#ifdef POINTER_CACHE - _Tp *ptr = (_Tp *) PointerCache::Lookup(bytes); -#else - pointer ptr = nullptr; -#endif - -#ifdef GRID_NVCC - //////////////////////////////////// - // Unified (managed) memory - //////////////////////////////////// - if ( ptr == (_Tp *) NULL ) { - // printf(" alignedAllocater cache miss %ld bytes ",bytes); BACKTRACEFP(stdout); - auto err = cudaMallocManaged((void **)&ptr,bytes); - if( err != cudaSuccess ) { - ptr = (_Tp *) NULL; - std::cerr << " cudaMallocManaged failed for " << bytes<<" bytes " < inline bool operator==(const alignedAllocator<_Tp>&, const alignedAllocator<_Tp>&){ return true; } +template inline bool operator!=(const alignedAllocator<_Tp>&, const alignedAllocator<_Tp>&){ return false; } -#ifdef GRID_NVCC - if ( __freeme ) cudaFree((void *)__freeme); -#else - #ifdef HAVE_MM_MALLOC_H - if ( __freeme ) _mm_free((void *)__freeme); - #else - if ( __freeme ) free((void *)__freeme); - #endif -#endif +template +class uvmAllocator { +public: + typedef std::size_t size_type; + typedef std::ptrdiff_t difference_type; + typedef _Tp* pointer; + typedef const _Tp* const_pointer; + typedef _Tp& reference; + typedef const _Tp& const_reference; + typedef _Tp value_type; + + template struct rebind { typedef uvmAllocator<_Tp1> other; }; + uvmAllocator() throw() { } + uvmAllocator(const uvmAllocator&) throw() { } + template uvmAllocator(const uvmAllocator<_Tp1>&) throw() { } + ~uvmAllocator() throw() { } + pointer address(reference __x) const { return &__x; } + size_type max_size() const throw() { return size_t(-1) / sizeof(_Tp); } + + pointer allocate(size_type __n, const void* _p= 0) + { + size_type bytes = __n*sizeof(_Tp); + profilerAllocate(bytes); + _Tp *ptr = (_Tp*) MemoryManager::SharedAllocate(bytes); + assert( ( (_Tp*)ptr != (_Tp *)NULL ) ); + return ptr; + } + + void deallocate(pointer __p, size_type __n) + { + size_type bytes = __n * sizeof(_Tp); + profilerFree(bytes); + MemoryManager::SharedFree((void *)__p,bytes); } // FIXME: hack for the copy constructor, eventually it must be avoided @@ -233,17 +115,17 @@ public: void construct(pointer __p) { }; void destroy(pointer __p) { }; }; -template inline bool operator==(const alignedAllocator<_Tp>&, const alignedAllocator<_Tp>&){ return true; } -template inline bool operator!=(const alignedAllocator<_Tp>&, const alignedAllocator<_Tp>&){ return false; } +template inline bool operator==(const uvmAllocator<_Tp>&, const uvmAllocator<_Tp>&){ return true; } +template inline bool operator!=(const uvmAllocator<_Tp>&, const uvmAllocator<_Tp>&){ return false; } //////////////////////////////////////////////////////////////////////////////// // Template typedefs //////////////////////////////////////////////////////////////////////////////// -template using commAllocator = alignedAllocator; -template using Vector = std::vector >; -template using commVector = std::vector >; -template using Matrix = std::vector > >; +template using commAllocator = uvmAllocator; +template using Vector = std::vector >; +template using commVector = std::vector >; +//template using Matrix = std::vector > >; NAMESPACE_END(Grid); -#endif + diff --git a/Grid/allocator/Allocator.h b/Grid/allocator/Allocator.h new file mode 100644 index 00000000..589ea36f --- /dev/null +++ b/Grid/allocator/Allocator.h @@ -0,0 +1,4 @@ +#pragma once +#include +#include +#include diff --git a/Grid/allocator/MemoryManager.cc b/Grid/allocator/MemoryManager.cc new file mode 100644 index 00000000..e11ce948 --- /dev/null +++ b/Grid/allocator/MemoryManager.cc @@ -0,0 +1,244 @@ +#include + +NAMESPACE_BEGIN(Grid); + +/*Allocation types, saying which pointer cache should be used*/ +#define Cpu (0) +#define CpuSmall (1) +#define Acc (2) +#define AccSmall (3) +#define Shared (4) +#define SharedSmall (5) +uint64_t total_shared; +uint64_t total_device; +uint64_t total_host;; +void MemoryManager::PrintBytes(void) +{ + std::cout << " MemoryManager : "<=0) && (Nc < NallocCacheMax)) { + Ncache[Cpu]=Nc; + Ncache[Acc]=Nc; + Ncache[Shared]=Nc; + } + } + + str= getenv("GRID_ALLOC_NCACHE_SMALL"); + if ( str ) { + Nc = atoi(str); + if ( (Nc>=0) && (Nc < NallocCacheMax)) { + Ncache[CpuSmall]=Nc; + Ncache[AccSmall]=Nc; + Ncache[SharedSmall]=Nc; + } + } + std::cout << GridLogMessage<< "MemoryManager::Init() setting up"<0); +#ifdef GRID_OMP + assert(omp_in_parallel()==0); +#endif + + void * ret = NULL; + int v = -1; + + for(int e=0;e0); +#ifdef GRID_OMP + assert(omp_in_parallel()==0); +#endif + for(int e=0;e +Author: Peter Boyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ +#pragma once +#include +#include + +NAMESPACE_BEGIN(Grid); + +// Move control to configure.ac and Config.h? + +#define ALLOCATION_CACHE +#define GRID_ALLOC_ALIGN (2*1024*1024) +#define GRID_ALLOC_SMALL_LIMIT (4096) + +/*Pinning pages is costly*/ +//////////////////////////////////////////////////////////////////////////// +// Advise the LatticeAccelerator class +//////////////////////////////////////////////////////////////////////////// +enum ViewAdvise { + AdviseDefault = 0x0, // Regular data + AdviseInfrequentUse = 0x1 // Advise that the data is used infrequently. This can + // significantly influence performance of bulk storage. + + // AdviseTransient = 0x2, // Data will mostly be read. On some architectures + // enables read-only copies of memory to be kept on + // host and device. + + // AdviseAcceleratorWriteDiscard = 0x4 // Field will be written in entirety on device + +}; + +//////////////////////////////////////////////////////////////////////////// +// View Access Mode +//////////////////////////////////////////////////////////////////////////// +enum ViewMode { + AcceleratorRead = 0x01, + AcceleratorWrite = 0x02, + AcceleratorWriteDiscard = 0x04, + CpuRead = 0x08, + CpuWrite = 0x10, + CpuWriteDiscard = 0x10 // same for now +}; + +class MemoryManager { +private: + + //////////////////////////////////////////////////////////// + // For caching recently freed allocations + //////////////////////////////////////////////////////////// + typedef struct { + void *address; + size_t bytes; + int valid; + } AllocationCacheEntry; + + static const int NallocCacheMax=128; + static const int NallocType=6; + static AllocationCacheEntry Entries[NallocType][NallocCacheMax]; + static int Victim[NallocType]; + static int Ncache[NallocType]; + + ///////////////////////////////////////////////// + // Free pool + ///////////////////////////////////////////////// + static void *Insert(void *ptr,size_t bytes,int type) ; + static void *Lookup(size_t bytes,int type) ; + static void *Insert(void *ptr,size_t bytes,AllocationCacheEntry *entries,int ncache,int &victim) ; + static void *Lookup(size_t bytes,AllocationCacheEntry *entries,int ncache) ; + + static void *AcceleratorAllocate(size_t bytes); + static void AcceleratorFree (void *ptr,size_t bytes); + static void PrintBytes(void); + public: + static void Init(void); + static void *SharedAllocate(size_t bytes); + static void SharedFree (void *ptr,size_t bytes); + static void *CpuAllocate(size_t bytes); + static void CpuFree (void *ptr,size_t bytes); + + //////////////////////////////////////////////////////// + // Footprint tracking + //////////////////////////////////////////////////////// + static uint64_t DeviceBytes; + static uint64_t DeviceLRUBytes; + static uint64_t DeviceMaxBytes; + static uint64_t HostToDeviceBytes; + static uint64_t DeviceToHostBytes; + static uint64_t HostToDeviceXfer; + static uint64_t DeviceToHostXfer; + + private: +#ifndef GRID_UVM + ////////////////////////////////////////////////////////////////////// + // Data tables for ViewCache + ////////////////////////////////////////////////////////////////////// + typedef std::list LRU_t; + typedef typename LRU_t::iterator LRUiterator; + typedef struct { + int LRU_valid; + LRUiterator LRU_entry; + uint64_t CpuPtr; + uint64_t AccPtr; + size_t bytes; + uint32_t transient; + uint32_t state; + uint32_t accLock; + uint32_t cpuLock; + } AcceleratorViewEntry; + + typedef std::unordered_map AccViewTable_t; + typedef typename AccViewTable_t::iterator AccViewTableIterator ; + + static AccViewTable_t AccViewTable; + static LRU_t LRU; + + ///////////////////////////////////////////////// + // Device motion + ///////////////////////////////////////////////// + static void Create(uint64_t CpuPtr,size_t bytes,ViewMode mode,ViewAdvise hint); + static void EvictVictims(uint64_t bytes); // Frees up + static void Evict(AcceleratorViewEntry &AccCache); + static void Flush(AcceleratorViewEntry &AccCache); + static void Clone(AcceleratorViewEntry &AccCache); + static void AccDiscard(AcceleratorViewEntry &AccCache); + static void CpuDiscard(AcceleratorViewEntry &AccCache); + + // static void LRUupdate(AcceleratorViewEntry &AccCache); + static void LRUinsert(AcceleratorViewEntry &AccCache); + static void LRUremove(AcceleratorViewEntry &AccCache); + + // manage entries in the table + static int EntryPresent(uint64_t CpuPtr); + static void EntryCreate(uint64_t CpuPtr,size_t bytes,ViewMode mode,ViewAdvise hint); + static void EntryErase (uint64_t CpuPtr); + static AccViewTableIterator EntryLookup(uint64_t CpuPtr); + static void EntrySet (uint64_t CpuPtr,AcceleratorViewEntry &entry); + + static void AcceleratorViewClose(uint64_t AccPtr); + static uint64_t AcceleratorViewOpen(uint64_t CpuPtr,size_t bytes,ViewMode mode,ViewAdvise hint); + static void CpuViewClose(uint64_t Ptr); + static uint64_t CpuViewOpen(uint64_t CpuPtr,size_t bytes,ViewMode mode,ViewAdvise hint); +#endif + static void NotifyDeletion(void * CpuPtr); + + public: + static void Print(void); + static int isOpen (void* CpuPtr); + static void ViewClose(void* CpuPtr,ViewMode mode); + static void *ViewOpen (void* CpuPtr,size_t bytes,ViewMode mode,ViewAdvise hint); + +}; + +NAMESPACE_END(Grid); + + diff --git a/Grid/allocator/MemoryManagerCache.cc b/Grid/allocator/MemoryManagerCache.cc new file mode 100644 index 00000000..5dd7575e --- /dev/null +++ b/Grid/allocator/MemoryManagerCache.cc @@ -0,0 +1,468 @@ +#include + +#ifndef GRID_UVM + +#warning "Using explicit device memory copies" +NAMESPACE_BEGIN(Grid); +#define dprintf(...) + +//////////////////////////////////////////////////////////// +// For caching copies of data on device +//////////////////////////////////////////////////////////// +MemoryManager::AccViewTable_t MemoryManager::AccViewTable; +MemoryManager::LRU_t MemoryManager::LRU; + +//////////////////////////////////////////////////////// +// Footprint tracking +//////////////////////////////////////////////////////// +uint64_t MemoryManager::DeviceBytes; +uint64_t MemoryManager::DeviceLRUBytes; +uint64_t MemoryManager::DeviceMaxBytes = 1024*1024*128; +uint64_t MemoryManager::HostToDeviceBytes; +uint64_t MemoryManager::DeviceToHostBytes; +uint64_t MemoryManager::HostToDeviceXfer; +uint64_t MemoryManager::DeviceToHostXfer; + +//////////////////////////////////// +// Priority ordering for unlocked entries +// Empty +// CpuDirty +// Consistent +// AccDirty +//////////////////////////////////// +#define Empty (0x0) /*Entry unoccupied */ +#define CpuDirty (0x1) /*CPU copy is golden, Acc buffer MAY not be allocated*/ +#define Consistent (0x2) /*ACC copy AND CPU copy are valid */ +#define AccDirty (0x4) /*ACC copy is golden */ +#define EvictNext (0x8) /*Priority for eviction*/ + +///////////////////////////////////////////////// +// Mechanics of data table maintenance +///////////////////////////////////////////////// +int MemoryManager::EntryPresent(uint64_t CpuPtr) +{ + if(AccViewTable.empty()) return 0; + + auto count = AccViewTable.count(CpuPtr); assert((count==0)||(count==1)); + return count; +} +void MemoryManager::EntryCreate(uint64_t CpuPtr,size_t bytes,ViewMode mode,ViewAdvise hint) +{ + assert(!EntryPresent(CpuPtr)); + AcceleratorViewEntry AccCache; + AccCache.CpuPtr = CpuPtr; + AccCache.AccPtr = (uint64_t)NULL; + AccCache.bytes = bytes; + AccCache.state = CpuDirty; + AccCache.LRU_valid=0; + AccCache.transient=0; + AccCache.accLock=0; + AccCache.cpuLock=0; + AccViewTable[CpuPtr] = AccCache; +} +MemoryManager::AccViewTableIterator MemoryManager::EntryLookup(uint64_t CpuPtr) +{ + assert(EntryPresent(CpuPtr)); + auto AccCacheIterator = AccViewTable.find(CpuPtr); + assert(AccCacheIterator!=AccViewTable.end()); + return AccCacheIterator; +} +void MemoryManager::EntryErase(uint64_t CpuPtr) +{ + auto AccCache = EntryLookup(CpuPtr); + AccViewTable.erase(CpuPtr); +} +void MemoryManager::LRUinsert(AcceleratorViewEntry &AccCache) +{ + assert(AccCache.LRU_valid==0); + if (AccCache.transient) { + LRU.push_back(AccCache.CpuPtr); + AccCache.LRU_entry = --LRU.end(); + } else { + LRU.push_front(AccCache.CpuPtr); + AccCache.LRU_entry = LRU.begin(); + } + AccCache.LRU_valid = 1; + DeviceLRUBytes+=AccCache.bytes; +} +void MemoryManager::LRUremove(AcceleratorViewEntry &AccCache) +{ + assert(AccCache.LRU_valid==1); + LRU.erase(AccCache.LRU_entry); + AccCache.LRU_valid = 0; + DeviceLRUBytes-=AccCache.bytes; +} +///////////////////////////////////////////////// +// Accelerator cache motion & consistency logic +///////////////////////////////////////////////// +void MemoryManager::AccDiscard(AcceleratorViewEntry &AccCache) +{ + /////////////////////////////////////////////////////////// + // Remove from Accelerator, remove entry, without flush + // Cannot be locked. If allocated Must be in LRU pool. + /////////////////////////////////////////////////////////// + assert(AccCache.state!=Empty); + + // dprintf("MemoryManager: Discard(%llx) %llx\n",(uint64_t)AccCache.CpuPtr,(uint64_t)AccCache.AccPtr); + assert(AccCache.accLock==0); + assert(AccCache.cpuLock==0); + assert(AccCache.CpuPtr!=(uint64_t)NULL); + if(AccCache.AccPtr) { + AcceleratorFree((void *)AccCache.AccPtr,AccCache.bytes); + DeviceBytes -=AccCache.bytes; + LRUremove(AccCache); + // dprintf("MemoryManager: Free(%llx) LRU %lld Total %lld\n",(uint64_t)AccCache.AccPtr,DeviceLRUBytes,DeviceBytes); + } + uint64_t CpuPtr = AccCache.CpuPtr; + EntryErase(CpuPtr); +} + +void MemoryManager::Evict(AcceleratorViewEntry &AccCache) +{ + /////////////////////////////////////////////////////////////////////////// + // Make CPU consistent, remove from Accelerator, remove entry + // Cannot be locked. If allocated must be in LRU pool. + /////////////////////////////////////////////////////////////////////////// + assert(AccCache.state!=Empty); + + // dprintf("MemoryManager: Evict(%llx) %llx\n",(uint64_t)AccCache.CpuPtr,(uint64_t)AccCache.AccPtr); + assert(AccCache.accLock==0); + assert(AccCache.cpuLock==0); + if(AccCache.state==AccDirty) { + Flush(AccCache); + } + assert(AccCache.CpuPtr!=(uint64_t)NULL); + if(AccCache.AccPtr) { + AcceleratorFree((void *)AccCache.AccPtr,AccCache.bytes); + DeviceBytes -=AccCache.bytes; + LRUremove(AccCache); + // dprintf("MemoryManager: Free(%llx) footprint now %lld \n",(uint64_t)AccCache.AccPtr,DeviceBytes); + } + uint64_t CpuPtr = AccCache.CpuPtr; + EntryErase(CpuPtr); +} +void MemoryManager::Flush(AcceleratorViewEntry &AccCache) +{ + assert(AccCache.state==AccDirty); + assert(AccCache.cpuLock==0); + assert(AccCache.accLock==0); + assert(AccCache.AccPtr!=(uint64_t)NULL); + assert(AccCache.CpuPtr!=(uint64_t)NULL); + acceleratorCopyFromDevice((void *)AccCache.AccPtr,(void *)AccCache.CpuPtr,AccCache.bytes); + // dprintf("MemoryManager: Flush %llx -> %llx\n",(uint64_t)AccCache.AccPtr,(uint64_t)AccCache.CpuPtr); fflush(stdout); + DeviceToHostBytes+=AccCache.bytes; + DeviceToHostXfer++; + AccCache.state=Consistent; +} +void MemoryManager::Clone(AcceleratorViewEntry &AccCache) +{ + assert(AccCache.state==CpuDirty); + assert(AccCache.cpuLock==0); + assert(AccCache.accLock==0); + assert(AccCache.CpuPtr!=(uint64_t)NULL); + if(AccCache.AccPtr==(uint64_t)NULL){ + AccCache.AccPtr=(uint64_t)AcceleratorAllocate(AccCache.bytes); + DeviceBytes+=AccCache.bytes; + } + // dprintf("MemoryManager: Clone %llx <- %llx\n",(uint64_t)AccCache.AccPtr,(uint64_t)AccCache.CpuPtr); fflush(stdout); + acceleratorCopyToDevice((void *)AccCache.CpuPtr,(void *)AccCache.AccPtr,AccCache.bytes); + HostToDeviceBytes+=AccCache.bytes; + HostToDeviceXfer++; + AccCache.state=Consistent; +} + +void MemoryManager::CpuDiscard(AcceleratorViewEntry &AccCache) +{ + assert(AccCache.state!=Empty); + assert(AccCache.cpuLock==0); + assert(AccCache.accLock==0); + assert(AccCache.CpuPtr!=(uint64_t)NULL); + if(AccCache.AccPtr==(uint64_t)NULL){ + AccCache.AccPtr=(uint64_t)AcceleratorAllocate(AccCache.bytes); + DeviceBytes+=AccCache.bytes; + } + AccCache.state=AccDirty; +} + +///////////////////////////////////////////////////////////////////////////////// +// View management +///////////////////////////////////////////////////////////////////////////////// +void MemoryManager::ViewClose(void* Ptr,ViewMode mode) +{ + if( (mode==AcceleratorRead)||(mode==AcceleratorWrite)||(mode==AcceleratorWriteDiscard) ){ + AcceleratorViewClose((uint64_t)Ptr); + } else if( (mode==CpuRead)||(mode==CpuWrite)){ + CpuViewClose((uint64_t)Ptr); + } else { + assert(0); + } +} +void *MemoryManager::ViewOpen(void* _CpuPtr,size_t bytes,ViewMode mode,ViewAdvise hint) +{ + uint64_t CpuPtr = (uint64_t)_CpuPtr; + if( (mode==AcceleratorRead)||(mode==AcceleratorWrite)||(mode==AcceleratorWriteDiscard) ){ + return (void *) AcceleratorViewOpen(CpuPtr,bytes,mode,hint); + } else if( (mode==CpuRead)||(mode==CpuWrite)){ + return (void *)CpuViewOpen(CpuPtr,bytes,mode,hint); + } else { + assert(0); + return NULL; + } +} +void MemoryManager::EvictVictims(uint64_t bytes) +{ + while(bytes+DeviceLRUBytes > DeviceMaxBytes){ + if ( DeviceLRUBytes > 0){ + assert(LRU.size()>0); + uint64_t victim = LRU.back(); + auto AccCacheIterator = EntryLookup(victim); + auto & AccCache = AccCacheIterator->second; + Evict(AccCache); + } + } +} +uint64_t MemoryManager::AcceleratorViewOpen(uint64_t CpuPtr,size_t bytes,ViewMode mode,ViewAdvise hint) +{ + //////////////////////////////////////////////////////////////////////////// + // Find if present, otherwise get or force an empty + //////////////////////////////////////////////////////////////////////////// + if ( EntryPresent(CpuPtr)==0 ){ + EvictVictims(bytes); + EntryCreate(CpuPtr,bytes,mode,hint); + } + + auto AccCacheIterator = EntryLookup(CpuPtr); + auto & AccCache = AccCacheIterator->second; + + assert((mode==AcceleratorRead)||(mode==AcceleratorWrite)||(mode==AcceleratorWriteDiscard)); + + assert(AccCache.cpuLock==0); // Programming error + + if(AccCache.state!=Empty) { + assert(AccCache.CpuPtr == CpuPtr); + assert(AccCache.bytes ==bytes); + } +/* + * State transitions and actions + * + * Action State StateNext Flush Clone + * + * AccRead Empty Consistent - Y + * AccWrite Empty AccDirty - Y + * AccRead CpuDirty Consistent - Y + * AccWrite CpuDirty AccDirty - Y + * AccRead Consistent Consistent - - + * AccWrite Consistent AccDirty - - + * AccRead AccDirty AccDirty - - + * AccWrite AccDirty AccDirty - - + */ + if(AccCache.state==Empty) { + assert(AccCache.LRU_valid==0); + AccCache.CpuPtr = CpuPtr; + AccCache.AccPtr = (uint64_t)NULL; + AccCache.bytes = bytes; + AccCache.state = CpuDirty; // Cpu starts primary + if(mode==AcceleratorWriteDiscard){ + CpuDiscard(AccCache); + AccCache.state = AccDirty; // Empty + AcceleratorWrite=> AccDirty + } else if(mode==AcceleratorWrite){ + Clone(AccCache); + AccCache.state = AccDirty; // Empty + AcceleratorWrite=> AccDirty + } else { + Clone(AccCache); + AccCache.state = Consistent; // Empty + AccRead => Consistent + } + AccCache.accLock= 1; + } else if(AccCache.state==CpuDirty ){ + if(mode==AcceleratorWriteDiscard) { + CpuDiscard(AccCache); + AccCache.state = AccDirty; // CpuDirty + AcceleratorWrite=> AccDirty + } else if(mode==AcceleratorWrite) { + Clone(AccCache); + AccCache.state = AccDirty; // CpuDirty + AcceleratorWrite=> AccDirty + } else { + Clone(AccCache); + AccCache.state = Consistent; // CpuDirty + AccRead => Consistent + } + AccCache.accLock++; + // printf("Copied CpuDirty entry into device accLock %d\n",AccCache.accLock); + } else if(AccCache.state==Consistent) { + if((mode==AcceleratorWrite)||(mode==AcceleratorWriteDiscard)) + AccCache.state = AccDirty; // Consistent + AcceleratorWrite=> AccDirty + else + AccCache.state = Consistent; // Consistent + AccRead => Consistent + AccCache.accLock++; + // printf("Consistent entry into device accLock %d\n",AccCache.accLock); + } else if(AccCache.state==AccDirty) { + if((mode==AcceleratorWrite)||(mode==AcceleratorWriteDiscard)) + AccCache.state = AccDirty; // AccDirty + AcceleratorWrite=> AccDirty + else + AccCache.state = AccDirty; // AccDirty + AccRead => AccDirty + AccCache.accLock++; + // printf("AccDirty entry into device accLock %d\n",AccCache.accLock); + } else { + assert(0); + } + + // If view is opened on device remove from LRU + if(AccCache.LRU_valid==1){ + // must possibly remove from LRU as now locked on GPU + LRUremove(AccCache); + } + + int transient =hint; + AccCache.transient= transient? EvictNext : 0; + + return AccCache.AccPtr; +} +//////////////////////////////////// +// look up & decrement lock count +//////////////////////////////////// +void MemoryManager::AcceleratorViewClose(uint64_t CpuPtr) +{ + auto AccCacheIterator = EntryLookup(CpuPtr); + auto & AccCache = AccCacheIterator->second; + + assert(AccCache.cpuLock==0); + assert(AccCache.accLock>0); + + AccCache.accLock--; + + // Move to LRU queue if not locked and close on device + if(AccCache.accLock==0) { + LRUinsert(AccCache); + } +} +void MemoryManager::CpuViewClose(uint64_t CpuPtr) +{ + auto AccCacheIterator = EntryLookup(CpuPtr); + auto & AccCache = AccCacheIterator->second; + + assert(AccCache.cpuLock>0); + assert(AccCache.accLock==0); + + AccCache.cpuLock--; +} +/* + * Action State StateNext Flush Clone + * + * CpuRead Empty CpuDirty - - + * CpuWrite Empty CpuDirty - - + * CpuRead CpuDirty CpuDirty - - + * CpuWrite CpuDirty CpuDirty - - + * CpuRead Consistent Consistent - - + * CpuWrite Consistent CpuDirty - - + * CpuRead AccDirty Consistent Y - + * CpuWrite AccDirty CpuDirty Y - + */ +uint64_t MemoryManager::CpuViewOpen(uint64_t CpuPtr,size_t bytes,ViewMode mode,ViewAdvise transient) +{ + //////////////////////////////////////////////////////////////////////////// + // Find if present, otherwise get or force an empty + //////////////////////////////////////////////////////////////////////////// + if ( EntryPresent(CpuPtr)==0 ){ + EvictVictims(bytes); + EntryCreate(CpuPtr,bytes,mode,transient); + } + + auto AccCacheIterator = EntryLookup(CpuPtr); + auto & AccCache = AccCacheIterator->second; + + assert((mode==CpuRead)||(mode==CpuWrite)); + assert(AccCache.accLock==0); // Programming error + + if(AccCache.state!=Empty) { + assert(AccCache.CpuPtr == CpuPtr); + assert(AccCache.bytes==bytes); + } + + if(AccCache.state==Empty) { + AccCache.CpuPtr = CpuPtr; + AccCache.AccPtr = (uint64_t)NULL; + AccCache.bytes = bytes; + AccCache.state = CpuDirty; // Empty + CpuRead/CpuWrite => CpuDirty + AccCache.accLock= 0; + AccCache.cpuLock= 1; + } else if(AccCache.state==CpuDirty ){ + // AccPtr dont care, deferred allocate + AccCache.state = CpuDirty; // CpuDirty +CpuRead/CpuWrite => CpuDirty + AccCache.cpuLock++; + } else if(AccCache.state==Consistent) { + assert(AccCache.AccPtr != (uint64_t)NULL); + if(mode==CpuWrite) + AccCache.state = CpuDirty; // Consistent +CpuWrite => CpuDirty + else + AccCache.state = Consistent; // Consistent +CpuRead => Consistent + AccCache.cpuLock++; + } else if(AccCache.state==AccDirty) { + assert(AccCache.AccPtr != (uint64_t)NULL); + Flush(AccCache); + if(mode==CpuWrite) AccCache.state = CpuDirty; // AccDirty +CpuWrite => CpuDirty, Flush + else AccCache.state = Consistent; // AccDirty +CpuRead => Consistent, Flush + AccCache.cpuLock++; + } else { + assert(0); // should be unreachable + } + + AccCache.transient= transient? EvictNext : 0; + + return AccCache.CpuPtr; +} +void MemoryManager::NotifyDeletion(void *_ptr) +{ + // Look up in ViewCache + uint64_t ptr = (uint64_t)_ptr; + if(EntryPresent(ptr)) { + auto e = EntryLookup(ptr); + AccDiscard(e->second); + } +} +void MemoryManager::Print(void) +{ + std::cout << GridLogDebug << "--------------------------------------------" << std::endl; + std::cout << GridLogDebug << "Memory Manager " << std::endl; + std::cout << GridLogDebug << "--------------------------------------------" << std::endl; + std::cout << GridLogDebug << DeviceBytes << " bytes allocated on device " << std::endl; + std::cout << GridLogDebug << DeviceLRUBytes<< " bytes evictable on device " << std::endl; + std::cout << GridLogDebug << DeviceMaxBytes<< " bytes max on device " << std::endl; + std::cout << GridLogDebug << HostToDeviceXfer << " transfers to device " << std::endl; + std::cout << GridLogDebug << DeviceToHostXfer << " transfers from device " << std::endl; + std::cout << GridLogDebug << HostToDeviceBytes<< " bytes transfered to device " << std::endl; + std::cout << GridLogDebug << DeviceToHostBytes<< " bytes transfered from device " << std::endl; + std::cout << GridLogDebug << AccViewTable.size()<< " vectors " << LRU.size()<<" evictable"<< std::endl; + std::cout << GridLogDebug << "--------------------------------------------" << std::endl; + std::cout << GridLogDebug << "CpuAddr\t\tAccAddr\t\tState\t\tcpuLock\taccLock\tLRU_valid "<second; + + std::string str; + if ( AccCache.state==Empty ) str = std::string("Empty"); + if ( AccCache.state==CpuDirty ) str = std::string("CpuDirty"); + if ( AccCache.state==AccDirty ) str = std::string("AccDirty"); + if ( AccCache.state==Consistent)str = std::string("Consistent"); + + std::cout << GridLogDebug << "0x"<second; + return AccCache.cpuLock+AccCache.accLock; + } else { + return 0; + } +} + +NAMESPACE_END(Grid); + +#endif diff --git a/Grid/allocator/MemoryManagerShared.cc b/Grid/allocator/MemoryManagerShared.cc new file mode 100644 index 00000000..537f7c32 --- /dev/null +++ b/Grid/allocator/MemoryManagerShared.cc @@ -0,0 +1,24 @@ +#include +#ifdef GRID_UVM + +#warning "Grid is assuming unified virtual memory address space" +NAMESPACE_BEGIN(Grid); +///////////////////////////////////////////////////////////////////////////////// +// View management is 1:1 address space mapping +///////////////////////////////////////////////////////////////////////////////// +uint64_t MemoryManager::DeviceBytes; +uint64_t MemoryManager::DeviceLRUBytes; +uint64_t MemoryManager::DeviceMaxBytes = 1024*1024*128; +uint64_t MemoryManager::HostToDeviceBytes; +uint64_t MemoryManager::DeviceToHostBytes; +uint64_t MemoryManager::HostToDeviceXfer; +uint64_t MemoryManager::DeviceToHostXfer; + +void MemoryManager::ViewClose(void* AccPtr,ViewMode mode){}; +void *MemoryManager::ViewOpen(void* CpuPtr,size_t bytes,ViewMode mode,ViewAdvise hint){ return CpuPtr; }; +int MemoryManager::isOpen (void* CpuPtr) { return 0;} +void MemoryManager::Print(void){}; +void MemoryManager::NotifyDeletion(void *ptr){}; + +NAMESPACE_END(Grid); +#endif diff --git a/Grid/allocator/MemoryStats.cc b/Grid/allocator/MemoryStats.cc new file mode 100644 index 00000000..0d1707d9 --- /dev/null +++ b/Grid/allocator/MemoryStats.cc @@ -0,0 +1,67 @@ +#include +#include + +NAMESPACE_BEGIN(Grid); + +MemoryStats *MemoryProfiler::stats = nullptr; +bool MemoryProfiler::debug = false; + +void check_huge_pages(void *Buf,uint64_t BYTES) +{ +#ifdef __linux__ + int fd = open("/proc/self/pagemap", O_RDONLY); + assert(fd >= 0); + const int page_size = 4096; + uint64_t virt_pfn = (uint64_t)Buf / page_size; + off_t offset = sizeof(uint64_t) * virt_pfn; + uint64_t npages = (BYTES + page_size-1) / page_size; + uint64_t pagedata[npages]; + uint64_t ret = lseek(fd, offset, SEEK_SET); + assert(ret == offset); + ret = ::read(fd, pagedata, sizeof(uint64_t)*npages); + assert(ret == sizeof(uint64_t) * npages); + int nhugepages = npages / 512; + int n4ktotal, nnothuge; + n4ktotal = 0; + nnothuge = 0; + for (int i = 0; i < nhugepages; ++i) { + uint64_t baseaddr = (pagedata[i*512] & 0x7fffffffffffffULL) * page_size; + for (int j = 0; j < 512; ++j) { + uint64_t pageaddr = (pagedata[i*512+j] & 0x7fffffffffffffULL) * page_size; + ++n4ktotal; + if (pageaddr != baseaddr + j * page_size) + ++nnothuge; + } + } + int rank = CartesianCommunicator::RankWorld(); + printf("rank %d Allocated %d 4k pages, %d not in huge pages\n", rank, n4ktotal, nnothuge); +#endif +} + +std::string sizeString(const size_t bytes) +{ + constexpr unsigned int bufSize = 256; + const char *suffixes[7] = {"", "K", "M", "G", "T", "P", "E"}; + char buf[256]; + size_t s = 0; + double count = bytes; + + while (count >= 1024 && s < 7) + { + s++; + count /= 1024; + } + if (count - floor(count) == 0.0) + { + snprintf(buf, bufSize, "%d %sB", (int)count, suffixes[s]); + } + else + { + snprintf(buf, bufSize, "%.1f %sB", count, suffixes[s]); + } + + return std::string(buf); +} + +NAMESPACE_END(Grid); + diff --git a/Grid/allocator/MemoryStats.h b/Grid/allocator/MemoryStats.h new file mode 100644 index 00000000..156c9747 --- /dev/null +++ b/Grid/allocator/MemoryStats.h @@ -0,0 +1,95 @@ +/************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/MemoryStats.h + + Copyright (C) 2015 + +Author: Azusa Yamaguchi +Author: Peter Boyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ +#pragma once + + +NAMESPACE_BEGIN(Grid); + +std::string sizeString(size_t bytes); + +struct MemoryStats +{ + size_t totalAllocated{0}, maxAllocated{0}, + currentlyAllocated{0}, totalFreed{0}; +}; + +class MemoryProfiler +{ +public: + static MemoryStats *stats; + static bool debug; +}; + +#define memString(bytes) std::to_string(bytes) + " (" + sizeString(bytes) + ")" +#define profilerDebugPrint \ + if (MemoryProfiler::stats) \ + { \ + auto s = MemoryProfiler::stats; \ + std::cout << GridLogDebug << "[Memory debug] Stats " << MemoryProfiler::stats << std::endl; \ + std::cout << GridLogDebug << "[Memory debug] total : " << memString(s->totalAllocated) \ + << std::endl; \ + std::cout << GridLogDebug << "[Memory debug] max : " << memString(s->maxAllocated) \ + << std::endl; \ + std::cout << GridLogDebug << "[Memory debug] current: " << memString(s->currentlyAllocated) \ + << std::endl; \ + std::cout << GridLogDebug << "[Memory debug] freed : " << memString(s->totalFreed) \ + << std::endl; \ + } + +#define profilerAllocate(bytes) \ + if (MemoryProfiler::stats) \ + { \ + auto s = MemoryProfiler::stats; \ + s->totalAllocated += (bytes); \ + s->currentlyAllocated += (bytes); \ + s->maxAllocated = std::max(s->maxAllocated, s->currentlyAllocated); \ + } \ + if (MemoryProfiler::debug) \ + { \ + std::cout << GridLogDebug << "[Memory debug] allocating " << memString(bytes) << std::endl; \ + profilerDebugPrint; \ + } + +#define profilerFree(bytes) \ + if (MemoryProfiler::stats) \ + { \ + auto s = MemoryProfiler::stats; \ + s->totalFreed += (bytes); \ + s->currentlyAllocated -= (bytes); \ + } \ + if (MemoryProfiler::debug) \ + { \ + std::cout << GridLogDebug << "[Memory debug] freeing " << memString(bytes) << std::endl; \ + profilerDebugPrint; \ + } + +void check_huge_pages(void *Buf,uint64_t BYTES); + +NAMESPACE_END(Grid); + diff --git a/Grid/cartesian/Cartesian_base.h b/Grid/cartesian/Cartesian_base.h index 87472cc9..ae1fd1fd 100644 --- a/Grid/cartesian/Cartesian_base.h +++ b/Grid/cartesian/Cartesian_base.h @@ -81,6 +81,7 @@ public: bool _isCheckerBoarded; int LocallyPeriodic; + Coordinate _checker_dim_mask; public: diff --git a/Grid/cartesian/Cartesian_full.h b/Grid/cartesian/Cartesian_full.h index c083817b..31a67bf0 100644 --- a/Grid/cartesian/Cartesian_full.h +++ b/Grid/cartesian/Cartesian_full.h @@ -38,6 +38,7 @@ class GridCartesian: public GridBase { public: int dummy; + Coordinate _checker_dim_mask; virtual int CheckerBoardFromOindexTable (int Oindex) { return 0; } @@ -104,6 +105,7 @@ public: _ldimensions.resize(_ndimension); _rdimensions.resize(_ndimension); _simd_layout.resize(_ndimension); + _checker_dim_mask.resize(_ndimension);; _lstart.resize(_ndimension); _lend.resize(_ndimension); @@ -114,6 +116,8 @@ public: for (int d = 0; d < _ndimension; d++) { + _checker_dim_mask[d]=0; + _fdimensions[d] = dimensions[d]; // Global dimensions _gdimensions[d] = _fdimensions[d]; // Global dimensions _simd_layout[d] = simd_layout[d]; diff --git a/Grid/cartesian/Cartesian_red_black.h b/Grid/cartesian/Cartesian_red_black.h index 34f763d2..b71981f5 100644 --- a/Grid/cartesian/Cartesian_red_black.h +++ b/Grid/cartesian/Cartesian_red_black.h @@ -35,12 +35,28 @@ static const int CbRed =0; static const int CbBlack=1; static const int Even =CbRed; static const int Odd =CbBlack; + +accelerator_inline int RedBlackCheckerBoardFromOindex (int oindex, Coordinate &rdim, Coordinate &chk_dim_msk) +{ + int nd=rdim.size(); + Coordinate coor(nd); + + Lexicographic::CoorFromIndex(coor,oindex,rdim); + + int linear=0; + for(int d=0;d _checker_board; diff --git a/Grid/communicator/Communicator_base.h b/Grid/communicator/Communicator_base.h index 11dbfcbb..436d75ef 100644 --- a/Grid/communicator/Communicator_base.h +++ b/Grid/communicator/Communicator_base.h @@ -114,6 +114,7 @@ public: void GlobalSumVector(RealD *,int N); void GlobalSum(uint32_t &); void GlobalSum(uint64_t &); + void GlobalSumVector(uint64_t*,int N); void GlobalSum(ComplexF &c); void GlobalSumVector(ComplexF *c,int N); void GlobalSum(ComplexD &c); diff --git a/Grid/communicator/Communicator_mpi3.cc b/Grid/communicator/Communicator_mpi3.cc index e9399ddc..3fe8a297 100644 --- a/Grid/communicator/Communicator_mpi3.cc +++ b/Grid/communicator/Communicator_mpi3.cc @@ -275,6 +275,10 @@ void CartesianCommunicator::GlobalSum(uint64_t &u){ int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_SUM,communicator); assert(ierr==0); } +void CartesianCommunicator::GlobalSumVector(uint64_t* u,int N){ + int ierr=MPI_Allreduce(MPI_IN_PLACE,u,N,MPI_UINT64_T,MPI_SUM,communicator); + assert(ierr==0); +} void CartesianCommunicator::GlobalXOR(uint32_t &u){ int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_BXOR,communicator); assert(ierr==0); diff --git a/Grid/communicator/Communicator_none.cc b/Grid/communicator/Communicator_none.cc index b8a15a0e..81900371 100644 --- a/Grid/communicator/Communicator_none.cc +++ b/Grid/communicator/Communicator_none.cc @@ -70,9 +70,10 @@ CartesianCommunicator::~CartesianCommunicator(){} void CartesianCommunicator::GlobalSum(float &){} void CartesianCommunicator::GlobalSumVector(float *,int N){} void CartesianCommunicator::GlobalSum(double &){} +void CartesianCommunicator::GlobalSumVector(double *,int N){} void CartesianCommunicator::GlobalSum(uint32_t &){} void CartesianCommunicator::GlobalSum(uint64_t &){} -void CartesianCommunicator::GlobalSumVector(double *,int N){} +void CartesianCommunicator::GlobalSumVector(uint64_t *,int N){} void CartesianCommunicator::GlobalXOR(uint32_t &){} void CartesianCommunicator::GlobalXOR(uint64_t &){} diff --git a/Grid/communicator/SharedMemory.cc b/Grid/communicator/SharedMemory.cc index 5bca9764..de10da3d 100644 --- a/Grid/communicator/SharedMemory.cc +++ b/Grid/communicator/SharedMemory.cc @@ -74,7 +74,9 @@ void *SharedMemory::ShmBufferMalloc(size_t bytes){ if (heap_bytes >= heap_size) { std::cout<< " ShmBufferMalloc exceeded shared heap size -- try increasing with --shm flag" < #include #include -#ifdef GRID_NVCC +#ifdef GRID_CUDA #include #endif @@ -170,17 +170,24 @@ void GlobalSharedMemory::GetShmDims(const Coordinate &WorldDims,Coordinate &ShmD std::vector primes({2,3,5}); int dim = 0; + int last_dim = ndimension - 1; int AutoShmSize = 1; while(AutoShmSize != WorldShmSize) { - for(int p=0;p #ifdef GRID_COMMS_SHMEM #include // uses same implementation of communicator #endif + +NAMESPACE_BEGIN(Grid); + +template +auto Cshift(const LatticeUnaryExpression &expr,int dim,int shift) + -> Lattice +{ + return Cshift(closure(expr),dim,shift); +} +template +auto Cshift(const LatticeBinaryExpression &expr,int dim,int shift) + -> Lattice +{ + return Cshift(closure(expr),dim,shift); +} +template +auto Cshift(const LatticeTrinaryExpression &expr,int dim,int shift) + -> Lattice +{ + return Cshift(closure(expr),dim,shift); +} +NAMESPACE_END(Grid); + #endif diff --git a/Grid/cshift/Cshift_common.h b/Grid/cshift/Cshift_common.h index 954342cb..4de2bbe2 100644 --- a/Grid/cshift/Cshift_common.h +++ b/Grid/cshift/Cshift_common.h @@ -29,6 +29,8 @@ Author: Peter Boyle NAMESPACE_BEGIN(Grid); +extern Vector > Cshift_table; + /////////////////////////////////////////////////////////////////// // Gather for when there is no need to SIMD split /////////////////////////////////////////////////////////////////// @@ -46,16 +48,16 @@ Gather_plane_simple (const Lattice &rhs,commVector &buffer,int dimen int e2=rhs.Grid()->_slice_block[dimension]; int ent = 0; - static Vector > table; table.resize(e1*e2); + if(Cshift_table.size()_slice_stride[dimension]; - auto rhs_v = rhs.View(); if ( cbmask == 0x3 ) { for(int n=0;n(off+bo+b,so+o+b); + Cshift_table[ent++] = std::pair(off+bo+b,so+o+b); } } } else { @@ -65,14 +67,19 @@ Gather_plane_simple (const Lattice &rhs,commVector &buffer,int dimen int o = n*stride; int ocb=1<CheckerBoardFromOindex(o+b); if ( ocb &cbmask ) { - table[ent++]=std::pair (off+bo++,so+o+b); + Cshift_table[ent++]=std::pair (off+bo++,so+o+b); } } } } - thread_for(i,ent,{ - buffer[table[i].first]=rhs_v[table[i].second]; - }); + { + autoView(rhs_v , rhs, AcceleratorRead); + auto buffer_p = & buffer[0]; + auto table = &Cshift_table[0]; + accelerator_for(i,ent,1,{ + buffer_p[table[i].first]=rhs_v[table[i].second]; + }); + } } /////////////////////////////////////////////////////////////////// @@ -95,36 +102,38 @@ Gather_plane_extract(const Lattice &rhs, int e2=rhs.Grid()->_slice_block[dimension]; int n1=rhs.Grid()->_slice_stride[dimension]; - auto rhs_v = rhs.View(); if ( cbmask ==0x3){ - thread_for_collapse(2,n,e1,{ - for(int b=0;b(temp,pointers,offset); - } - }); + }); } else { + autoView(rhs_v , rhs, AcceleratorRead); - // Case of SIMD split AND checker dim cannot currently be hit, except in - // Test_cshift_red_black code. - std::cout << " Dense packed buffer WARNING " <_rdimensions; + Coordinate cdm =rhs.Grid()->_checker_dim_mask; + std::cout << " Dense packed buffer WARNING " <CheckerBoardFromOindex(o+b); + int oindex = o+b; + + int cb = RedBlackCheckerBoardFromOindex(oindex, rdim, cdm); + + int ocb=1<(temp,pointers,offset); } - } - }); + }); } } @@ -145,7 +154,8 @@ template void Scatter_plane_simple (Lattice &rhs,commVector_slice_block[dimension]; int stride=rhs.Grid()->_slice_stride[dimension]; - static std::vector > table; table.resize(e1*e2); + if(Cshift_table.size() void Scatter_plane_simple (Lattice &rhs,commVector_slice_stride[dimension]; int bo =n*rhs.Grid()->_slice_block[dimension]; - table[ent++] = std::pair(so+o+b,bo+b); + Cshift_table[ent++] = std::pair(so+o+b,bo+b); } } @@ -165,16 +175,20 @@ template void Scatter_plane_simple (Lattice &rhs,commVector_slice_stride[dimension]; int ocb=1<CheckerBoardFromOindex(o+b);// Could easily be a table lookup if ( ocb & cbmask ) { - table[ent++]=std::pair (so+o+b,bo++); + Cshift_table[ent++]=std::pair (so+o+b,bo++); } } } } - auto rhs_v = rhs.View(); - thread_for(i,ent,{ - rhs_v[table[i].first]=buffer[table[i].second]; - }); + { + autoView( rhs_v, rhs, AcceleratorWrite); + auto buffer_p = & buffer[0]; + auto table = &Cshift_table[0]; + accelerator_for(i,ent,1,{ + rhs_v[table[i].first]=buffer_p[table[i].second]; + }); + } } ////////////////////////////////////////////////////// @@ -194,21 +208,19 @@ template void Scatter_plane_merge(Lattice &rhs,ExtractPointerA int e2=rhs.Grid()->_slice_block[dimension]; if(cbmask ==0x3 ) { - auto rhs_v = rhs.View(); - thread_for_collapse(2,n,e1,{ - for(int b=0;b_slice_stride[dimension]; int offset = b+n*rhs.Grid()->_slice_block[dimension]; merge(rhs_v[so+o+b],pointers,offset); - } - }); + }); } else { // Case of SIMD split AND checker dim cannot currently be hit, except in // Test_cshift_red_black code. // std::cout << "Scatter_plane merge assert(0); think this is buggy FIXME "<< std::endl;// think this is buggy FIXME std::cout<<" Unthreaded warning -- buffer is not densely packed ??"<_slice_stride[dimension]; @@ -225,6 +237,7 @@ template void Scatter_plane_merge(Lattice &rhs,ExtractPointerA ////////////////////////////////////////////////////// // local to node block strided copies ////////////////////////////////////////////////////// + template void Copy_plane(Lattice& lhs,const Lattice &rhs, int dimension,int lplane,int rplane,int cbmask) { int rd = rhs.Grid()->_rdimensions[dimension]; @@ -239,14 +252,16 @@ template void Copy_plane(Lattice& lhs,const Lattice &rhs int e1=rhs.Grid()->_slice_nblock[dimension]; // clearly loop invariant for icpc int e2=rhs.Grid()->_slice_block[dimension]; int stride = rhs.Grid()->_slice_stride[dimension]; - static std::vector > table; table.resize(e1*e2); + + if(Cshift_table.size()(lo+o,ro+o); + Cshift_table[ent++] = std::pair(lo+o,ro+o); } } } else { @@ -255,23 +270,24 @@ template void Copy_plane(Lattice& lhs,const Lattice &rhs int o =n*stride+b; int ocb=1<CheckerBoardFromOindex(o); if ( ocb&cbmask ) { - table[ent++] = std::pair(lo+o,ro+o); + Cshift_table[ent++] = std::pair(lo+o,ro+o); } } } } - auto rhs_v = rhs.View(); - auto lhs_v = lhs.View(); - thread_for(i,ent,{ - lhs_v[table[i].first]=rhs_v[table[i].second]; - }); - + { + autoView(rhs_v , rhs, AcceleratorRead); + autoView(lhs_v , lhs, AcceleratorWrite); + auto table = &Cshift_table[0]; + accelerator_for(i,ent,1,{ + lhs_v[table[i].first]=rhs_v[table[i].second]; + }); + } } template void Copy_plane_permute(Lattice& lhs,const Lattice &rhs, int dimension,int lplane,int rplane,int cbmask,int permute_type) { - int rd = rhs.Grid()->_rdimensions[dimension]; if ( !rhs.Grid()->CheckerBoarded(dimension) ) { @@ -285,29 +301,33 @@ template void Copy_plane_permute(Lattice& lhs,const Lattice_slice_block [dimension]; int stride = rhs.Grid()->_slice_stride[dimension]; - static std::vector > table; table.resize(e1*e2); + if(Cshift_table.size()(lo+o+b,ro+o+b); + Cshift_table[ent++] = std::pair(lo+o+b,ro+o+b); }} } else { for(int n=0;nCheckerBoardFromOindex(o+b); - if ( ocb&cbmask ) table[ent++] = std::pair(lo+o+b,ro+o+b); + if ( ocb&cbmask ) Cshift_table[ent++] = std::pair(lo+o+b,ro+o+b); }} } - auto rhs_v = rhs.View(); - auto lhs_v = lhs.View(); - thread_for(i,ent,{ - permute(lhs_v[table[i].first],rhs_v[table[i].second],permute_type); - }); + { + autoView( rhs_v, rhs, AcceleratorRead); + autoView( lhs_v, lhs, AcceleratorWrite); + auto table = &Cshift_table[0]; + accelerator_for(i,ent,1,{ + permute(lhs_v[table[i].first],rhs_v[table[i].second],permute_type); + }); + } } ////////////////////////////////////////////////////// diff --git a/Grid/cshift/Cshift_table.cc b/Grid/cshift/Cshift_table.cc new file mode 100644 index 00000000..d46e51c0 --- /dev/null +++ b/Grid/cshift/Cshift_table.cc @@ -0,0 +1,4 @@ +#include +NAMESPACE_BEGIN(Grid); +Vector > Cshift_table; +NAMESPACE_END(Grid); diff --git a/Grid/lattice/Lattice.h b/Grid/lattice/Lattice.h index 1eea98ed..a3017198 100644 --- a/Grid/lattice/Lattice.h +++ b/Grid/lattice/Lattice.h @@ -26,6 +26,7 @@ Author: Peter Boyle *************************************************************************************/ /* END LEGAL */ #pragma once +#include #include #include #include @@ -35,7 +36,7 @@ Author: Peter Boyle #include #include #include -#include +//#include #include #include #include @@ -43,4 +44,4 @@ Author: Peter Boyle #include #include #include - +#include diff --git a/Grid/lattice/Lattice_ET.h b/Grid/lattice/Lattice_ET.h index cf7147b9..91b456d9 100644 --- a/Grid/lattice/Lattice_ET.h +++ b/Grid/lattice/Lattice_ET.h @@ -9,6 +9,7 @@ Copyright (C) 2015 Author: Azusa Yamaguchi Author: Peter Boyle Author: neo +Author: Christoph Lehner &arg) { return arg[ss]; } + +// What needs this? +// Cannot be legal on accelerator +// Comparison must convert +#if 1 template accelerator_inline const lobj & eval(const uint64_t ss, const Lattice &arg) { - auto view = arg.View(); + auto view = arg.View(AcceleratorRead); return view[ss]; } +#endif /////////////////////////////////////////////////// // handle nodes in syntax tree- eval one operand @@ -179,16 +186,12 @@ inline void CBFromExpression(int &cb, const T1 &lat) // Lattice leaf cb = lat.Checkerboard(); } template ::value, T1>::type * = nullptr> -inline void CBFromExpression(int &cb, const T1 ¬lat) // non-lattice leaf -{ -} - +inline void CBFromExpression(int &cb, const T1 ¬lat) {} // non-lattice leaf template inline void CBFromExpression(int &cb,const LatticeUnaryExpression &expr) { CBFromExpression(cb, expr.arg1); // recurse AST } - template inline void CBFromExpression(int &cb,const LatticeBinaryExpression &expr) { @@ -203,6 +206,68 @@ inline void CBFromExpression(int &cb, const LatticeTrinaryExpression::value, T1>::type * = nullptr> +inline void ExpressionViewOpen(T1 &lat) // Lattice leaf +{ + lat.ViewOpen(AcceleratorRead); +} +template ::value, T1>::type * = nullptr> + inline void ExpressionViewOpen(T1 ¬lat) {} + +template inline +void ExpressionViewOpen(LatticeUnaryExpression &expr) +{ + ExpressionViewOpen(expr.arg1); // recurse AST +} + +template inline +void ExpressionViewOpen(LatticeBinaryExpression &expr) +{ + ExpressionViewOpen(expr.arg1); // recurse AST + ExpressionViewOpen(expr.arg2); // recurse AST +} +template +inline void ExpressionViewOpen(LatticeTrinaryExpression &expr) +{ + ExpressionViewOpen(expr.arg1); // recurse AST + ExpressionViewOpen(expr.arg2); // recurse AST + ExpressionViewOpen(expr.arg3); // recurse AST +} + +////////////////////////////////////////////////////////////////////////// +// ViewClose +////////////////////////////////////////////////////////////////////////// +template ::value, T1>::type * = nullptr> +inline void ExpressionViewClose( T1 &lat) // Lattice leaf +{ + lat.ViewClose(); +} +template ::value, T1>::type * = nullptr> +inline void ExpressionViewClose(T1 ¬lat) {} + +template inline +void ExpressionViewClose(LatticeUnaryExpression &expr) +{ + ExpressionViewClose(expr.arg1); // recurse AST +} +template inline +void ExpressionViewClose(LatticeBinaryExpression &expr) +{ + ExpressionViewClose(expr.arg1); // recurse AST + ExpressionViewClose(expr.arg2); // recurse AST +} +template +inline void ExpressionViewClose(LatticeTrinaryExpression &expr) +{ + ExpressionViewClose(expr.arg1); // recurse AST + ExpressionViewClose(expr.arg2); // recurse AST + ExpressionViewClose(expr.arg3); // recurse AST +} + //////////////////////////////////////////// // Unary operators and funcs //////////////////////////////////////////// diff --git a/Grid/lattice/Lattice_arith.h b/Grid/lattice/Lattice_arith.h index 3543d6aa..a3ae1f28 100644 --- a/Grid/lattice/Lattice_arith.h +++ b/Grid/lattice/Lattice_arith.h @@ -7,6 +7,7 @@ Copyright (C) 2015 Author: Peter Boyle +Author: Christoph Lehner This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -36,9 +37,9 @@ NAMESPACE_BEGIN(Grid); template inline void mult(Lattice &ret,const Lattice &lhs,const Lattice &rhs){ ret.Checkerboard() = lhs.Checkerboard(); - auto ret_v = ret.View(); - auto lhs_v = lhs.View(); - auto rhs_v = rhs.View(); + autoView( ret_v , ret, AcceleratorWrite); + autoView( lhs_v , lhs, AcceleratorRead); + autoView( rhs_v , rhs, AcceleratorRead); conformable(ret,rhs); conformable(lhs,rhs); accelerator_for(ss,lhs_v.size(),obj1::Nsimd(),{ @@ -55,9 +56,9 @@ void mac(Lattice &ret,const Lattice &lhs,const Lattice &rhs){ ret.Checkerboard() = lhs.Checkerboard(); conformable(ret,rhs); conformable(lhs,rhs); - auto ret_v = ret.View(); - auto lhs_v = lhs.View(); - auto rhs_v = rhs.View(); + autoView( ret_v , ret, AcceleratorWrite); + autoView( lhs_v , lhs, AcceleratorRead); + autoView( rhs_v , rhs, AcceleratorRead); accelerator_for(ss,lhs_v.size(),obj1::Nsimd(),{ decltype(coalescedRead(obj1())) tmp; auto lhs_t=lhs_v(ss); @@ -72,9 +73,9 @@ void sub(Lattice &ret,const Lattice &lhs,const Lattice &rhs){ ret.Checkerboard() = lhs.Checkerboard(); conformable(ret,rhs); conformable(lhs,rhs); - auto ret_v = ret.View(); - auto lhs_v = lhs.View(); - auto rhs_v = rhs.View(); + autoView( ret_v , ret, AcceleratorWrite); + autoView( lhs_v , lhs, AcceleratorRead); + autoView( rhs_v , rhs, AcceleratorRead); accelerator_for(ss,lhs_v.size(),obj1::Nsimd(),{ decltype(coalescedRead(obj1())) tmp; auto lhs_t=lhs_v(ss); @@ -88,9 +89,9 @@ void add(Lattice &ret,const Lattice &lhs,const Lattice &rhs){ ret.Checkerboard() = lhs.Checkerboard(); conformable(ret,rhs); conformable(lhs,rhs); - auto ret_v = ret.View(); - auto lhs_v = lhs.View(); - auto rhs_v = rhs.View(); + autoView( ret_v , ret, AcceleratorWrite); + autoView( lhs_v , lhs, AcceleratorRead); + autoView( rhs_v , rhs, AcceleratorRead); accelerator_for(ss,lhs_v.size(),obj1::Nsimd(),{ decltype(coalescedRead(obj1())) tmp; auto lhs_t=lhs_v(ss); @@ -107,8 +108,8 @@ template inline void mult(Lattice &ret,const Lattice &lhs,const obj3 &rhs){ ret.Checkerboard() = lhs.Checkerboard(); conformable(lhs,ret); - auto ret_v = ret.View(); - auto lhs_v = lhs.View(); + autoView( ret_v , ret, AcceleratorWrite); + autoView( lhs_v , lhs, AcceleratorRead); accelerator_for(ss,lhs_v.size(),obj1::Nsimd(),{ decltype(coalescedRead(obj1())) tmp; mult(&tmp,&lhs_v(ss),&rhs); @@ -120,8 +121,8 @@ template inline void mac(Lattice &ret,const Lattice &lhs,const obj3 &rhs){ ret.Checkerboard() = lhs.Checkerboard(); conformable(ret,lhs); - auto ret_v = ret.View(); - auto lhs_v = lhs.View(); + autoView( ret_v , ret, AcceleratorWrite); + autoView( lhs_v , lhs, AcceleratorRead); accelerator_for(ss,lhs_v.size(),obj1::Nsimd(),{ decltype(coalescedRead(obj1())) tmp; auto lhs_t=lhs_v(ss); @@ -134,8 +135,8 @@ template inline void sub(Lattice &ret,const Lattice &lhs,const obj3 &rhs){ ret.Checkerboard() = lhs.Checkerboard(); conformable(ret,lhs); - auto ret_v = ret.View(); - auto lhs_v = lhs.View(); + autoView( ret_v , ret, AcceleratorWrite); + autoView( lhs_v , lhs, AcceleratorRead); accelerator_for(ss,lhs_v.size(),obj1::Nsimd(),{ decltype(coalescedRead(obj1())) tmp; auto lhs_t=lhs_v(ss); @@ -147,8 +148,8 @@ template inline void add(Lattice &ret,const Lattice &lhs,const obj3 &rhs){ ret.Checkerboard() = lhs.Checkerboard(); conformable(lhs,ret); - auto ret_v = ret.View(); - auto lhs_v = lhs.View(); + autoView( ret_v , ret, AcceleratorWrite); + autoView( lhs_v , lhs, AcceleratorRead); accelerator_for(ss,lhs_v.size(),obj1::Nsimd(),{ decltype(coalescedRead(obj1())) tmp; auto lhs_t=lhs_v(ss); @@ -164,8 +165,8 @@ template inline void mult(Lattice &ret,const obj2 &lhs,const Lattice &rhs){ ret.Checkerboard() = rhs.Checkerboard(); conformable(ret,rhs); - auto ret_v = ret.View(); - auto rhs_v = lhs.View(); + autoView( ret_v , ret, AcceleratorWrite); + autoView( rhs_v , lhs, AcceleratorRead); accelerator_for(ss,rhs_v.size(),obj1::Nsimd(),{ decltype(coalescedRead(obj1())) tmp; auto rhs_t=rhs_v(ss); @@ -178,8 +179,8 @@ template inline void mac(Lattice &ret,const obj2 &lhs,const Lattice &rhs){ ret.Checkerboard() = rhs.Checkerboard(); conformable(ret,rhs); - auto ret_v = ret.View(); - auto rhs_v = lhs.View(); + autoView( ret_v , ret, AcceleratorWrite); + autoView( rhs_v , lhs, AcceleratorRead); accelerator_for(ss,rhs_v.size(),obj1::Nsimd(),{ decltype(coalescedRead(obj1())) tmp; auto rhs_t=rhs_v(ss); @@ -192,8 +193,8 @@ template inline void sub(Lattice &ret,const obj2 &lhs,const Lattice &rhs){ ret.Checkerboard() = rhs.Checkerboard(); conformable(ret,rhs); - auto ret_v = ret.View(); - auto rhs_v = lhs.View(); + autoView( ret_v , ret, AcceleratorWrite); + autoView( rhs_v , lhs, AcceleratorRead); accelerator_for(ss,rhs_v.size(),obj1::Nsimd(),{ decltype(coalescedRead(obj1())) tmp; auto rhs_t=rhs_v(ss); @@ -205,8 +206,8 @@ template inline void add(Lattice &ret,const obj2 &lhs,const Lattice &rhs){ ret.Checkerboard() = rhs.Checkerboard(); conformable(ret,rhs); - auto ret_v = ret.View(); - auto rhs_v = lhs.View(); + autoView( ret_v , ret, AcceleratorWrite); + autoView( rhs_v , lhs, AcceleratorRead); accelerator_for(ss,rhs_v.size(),obj1::Nsimd(),{ decltype(coalescedRead(obj1())) tmp; auto rhs_t=rhs_v(ss); @@ -220,9 +221,9 @@ void axpy(Lattice &ret,sobj a,const Lattice &x,const Lattice & ret.Checkerboard() = x.Checkerboard(); conformable(ret,x); conformable(x,y); - auto ret_v = ret.View(); - auto x_v = x.View(); - auto y_v = y.View(); + autoView( ret_v , ret, AcceleratorWrite); + autoView( x_v , x, AcceleratorRead); + autoView( y_v , y, AcceleratorRead); accelerator_for(ss,x_v.size(),vobj::Nsimd(),{ auto tmp = a*x_v(ss)+y_v(ss); coalescedWrite(ret_v[ss],tmp); @@ -233,9 +234,9 @@ void axpby(Lattice &ret,sobj a,sobj b,const Lattice &x,const Lattice ret.Checkerboard() = x.Checkerboard(); conformable(ret,x); conformable(x,y); - auto ret_v = ret.View(); - auto x_v = x.View(); - auto y_v = y.View(); + autoView( ret_v , ret, AcceleratorWrite); + autoView( x_v , x, AcceleratorRead); + autoView( y_v , y, AcceleratorRead); accelerator_for(ss,x_v.size(),vobj::Nsimd(),{ auto tmp = a*x_v(ss)+b*y_v(ss); coalescedWrite(ret_v[ss],tmp); diff --git a/Grid/lattice/Lattice_base.h b/Grid/lattice/Lattice_base.h index ec7c54ec..73b1b6a1 100644 --- a/Grid/lattice/Lattice_base.h +++ b/Grid/lattice/Lattice_base.h @@ -9,6 +9,7 @@ Copyright (C) 2015 Author: Azusa Yamaguchi Author: Peter Boyle Author: paboyle +Author: Christoph Lehner This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -28,6 +29,7 @@ See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ /* END LEGAL */ + #pragma once #define STREAMING_STORES @@ -36,129 +38,6 @@ NAMESPACE_BEGIN(Grid); extern int GridCshiftPermuteMap[4][16]; -/////////////////////////////////////////////////////////////////// -// Base class which can be used by traits to pick up behaviour -/////////////////////////////////////////////////////////////////// -class LatticeBase {}; - -///////////////////////////////////////////////////////////////////////////////////////// -// Conformable checks; same instance of Grid required -///////////////////////////////////////////////////////////////////////////////////////// -void accelerator_inline conformable(GridBase *lhs,GridBase *rhs) -{ - assert(lhs == rhs); -} - -//////////////////////////////////////////////////////////////////////////// -// Minimal base class containing only data valid to access from accelerator -// _odata will be a managed pointer in CUDA -//////////////////////////////////////////////////////////////////////////// -// Force access to lattice through a view object. -// prevents writing of code that will not offload to GPU, but perhaps annoyingly -// strict since host could could in principle direct access through the lattice object -// Need to decide programming model. -#define LATTICE_VIEW_STRICT -template class LatticeAccelerator : public LatticeBase -{ -protected: - GridBase *_grid; - int checkerboard; - vobj *_odata; // A managed pointer - uint64_t _odata_size; -public: - accelerator_inline LatticeAccelerator() : checkerboard(0), _odata(nullptr), _odata_size(0), _grid(nullptr) { }; - accelerator_inline uint64_t oSites(void) const { return _odata_size; }; - accelerator_inline int Checkerboard(void) const { return checkerboard; }; - accelerator_inline int &Checkerboard(void) { return this->checkerboard; }; // can assign checkerboard on a container, not a view - accelerator_inline void Conformable(GridBase * &grid) const - { - if (grid) conformable(grid, _grid); - else grid = _grid; - }; -}; - -///////////////////////////////////////////////////////////////////////////////////////// -// A View class which provides accessor to the data. -// This will be safe to call from accelerator_for and is trivially copy constructible -// The copy constructor for this will need to be used by device lambda functions -///////////////////////////////////////////////////////////////////////////////////////// -template -class LatticeView : public LatticeAccelerator -{ -public: - - - // Rvalue -#ifdef __CUDA_ARCH__ - accelerator_inline const typename vobj::scalar_object operator()(size_t i) const { return coalescedRead(this->_odata[i]); } -#else - accelerator_inline const vobj & operator()(size_t i) const { return this->_odata[i]; } -#endif - - accelerator_inline const vobj & operator[](size_t i) const { return this->_odata[i]; }; - accelerator_inline vobj & operator[](size_t i) { return this->_odata[i]; }; - - accelerator_inline uint64_t begin(void) const { return 0;}; - accelerator_inline uint64_t end(void) const { return this->_odata_size; }; - accelerator_inline uint64_t size(void) const { return this->_odata_size; }; - - LatticeView(const LatticeAccelerator &refer_to_me) : LatticeAccelerator (refer_to_me) - { - } -}; - -///////////////////////////////////////////////////////////////////////////////////////// -// Lattice expression types used by ET to assemble the AST -// -// Need to be able to detect code paths according to the whether a lattice object or not -// so introduce some trait type things -///////////////////////////////////////////////////////////////////////////////////////// - -class LatticeExpressionBase {}; - -template using is_lattice = std::is_base_of; -template using is_lattice_expr = std::is_base_of; - -template struct ViewMapBase { typedef T Type; }; -template struct ViewMapBase { typedef LatticeView Type; }; -template using ViewMap = ViewMapBase::value >; - -template -class LatticeUnaryExpression : public LatticeExpressionBase -{ -public: - typedef typename ViewMap<_T1>::Type T1; - Op op; - T1 arg1; - LatticeUnaryExpression(Op _op,const _T1 &_arg1) : op(_op), arg1(_arg1) {}; -}; - -template -class LatticeBinaryExpression : public LatticeExpressionBase -{ -public: - typedef typename ViewMap<_T1>::Type T1; - typedef typename ViewMap<_T2>::Type T2; - Op op; - T1 arg1; - T2 arg2; - LatticeBinaryExpression(Op _op,const _T1 &_arg1,const _T2 &_arg2) : op(_op), arg1(_arg1), arg2(_arg2) {}; -}; - -template -class LatticeTrinaryExpression : public LatticeExpressionBase -{ -public: - typedef typename ViewMap<_T1>::Type T1; - typedef typename ViewMap<_T2>::Type T2; - typedef typename ViewMap<_T3>::Type T3; - Op op; - T1 arg1; - T2 arg2; - T3 arg3; - LatticeTrinaryExpression(Op _op,const _T1 &_arg1,const _T2 &_arg2,const _T3 &_arg3) : op(_op), arg1(_arg1), arg2(_arg2), arg3(_arg3) {}; -}; - ///////////////////////////////////////////////////////////////////////////////////////// // The real lattice class, with normal copy and assignment semantics. // This contains extra (host resident) grid pointer data that may be accessed by host code @@ -194,24 +73,33 @@ private: dealloc(); this->_odata_size = size; - if ( size ) + if ( size ) this->_odata = alloc.allocate(this->_odata_size); else this->_odata = nullptr; } } public: + + ///////////////////////////////////////////////////////////////////////////////// + // Can use to make accelerator dirty without copy from host ; useful for temporaries "dont care" prev contents + ///////////////////////////////////////////////////////////////////////////////// + void SetViewMode(ViewMode mode) { + LatticeView accessor(*( (LatticeAccelerator *) this),mode); + accessor.ViewClose(); + } ///////////////////////////////////////////////////////////////////////////////// // Return a view object that may be dereferenced in site loops. // The view is trivially copy constructible and may be copied to an accelerator device // in device lambdas ///////////////////////////////////////////////////////////////////////////////// - LatticeView View (void) const + + LatticeView View (ViewMode mode) const { - LatticeView accessor(*( (LatticeAccelerator *) this)); + LatticeView accessor(*( (LatticeAccelerator *) this),mode); return accessor; } - + ~Lattice() { if ( this->_odata_size ) { dealloc(); @@ -231,12 +119,16 @@ public: CBFromExpression(cb,expr); assert( (cb==Odd) || (cb==Even)); this->checkerboard=cb; - - auto me = View(); + + auto exprCopy = expr; + ExpressionViewOpen(exprCopy); + auto me = View(AcceleratorWriteDiscard); accelerator_for(ss,me.size(),1,{ - auto tmp = eval(ss,expr); + auto tmp = eval(ss,exprCopy); vstream(me[ss],tmp); }); + me.ViewClose(); + ExpressionViewClose(exprCopy); return *this; } template inline Lattice & operator=(const LatticeBinaryExpression &expr) @@ -251,11 +143,15 @@ public: assert( (cb==Odd) || (cb==Even)); this->checkerboard=cb; - auto me = View(); + auto exprCopy = expr; + ExpressionViewOpen(exprCopy); + auto me = View(AcceleratorWriteDiscard); accelerator_for(ss,me.size(),1,{ - auto tmp = eval(ss,expr); + auto tmp = eval(ss,exprCopy); vstream(me[ss],tmp); }); + me.ViewClose(); + ExpressionViewClose(exprCopy); return *this; } template inline Lattice & operator=(const LatticeTrinaryExpression &expr) @@ -269,11 +165,15 @@ public: CBFromExpression(cb,expr); assert( (cb==Odd) || (cb==Even)); this->checkerboard=cb; - auto me = View(); + auto exprCopy = expr; + ExpressionViewOpen(exprCopy); + auto me = View(AcceleratorWriteDiscard); accelerator_for(ss,me.size(),1,{ - auto tmp = eval(ss,expr); + auto tmp = eval(ss,exprCopy); vstream(me[ss],tmp); }); + me.ViewClose(); + ExpressionViewClose(exprCopy); return *this; } //GridFromExpression is tricky to do @@ -324,10 +224,11 @@ public: } template inline Lattice & operator = (const sobj & r){ - auto me = View(); + auto me = View(CpuWrite); thread_for(ss,me.size(),{ - me[ss] = r; + me[ss]= r; }); + me.ViewClose(); return *this; } @@ -337,11 +238,12 @@ public: /////////////////////////////////////////// // user defined constructor /////////////////////////////////////////// - Lattice(GridBase *grid) { + Lattice(GridBase *grid,ViewMode mode=AcceleratorWriteDiscard) { this->_grid = grid; resize(this->_grid->oSites()); assert((((uint64_t)&this->_odata[0])&0xF) ==0); this->checkerboard=0; + SetViewMode(mode); } // virtual ~Lattice(void) = default; @@ -357,7 +259,6 @@ public: // copy constructor /////////////////////////////////////////// Lattice(const Lattice& r){ - // std::cout << "Lattice constructor(const Lattice &) "<_grid = r.Grid(); resize(this->_grid->oSites()); *this = r; @@ -380,11 +281,12 @@ public: typename std::enable_if::value,int>::type i=0; conformable(*this,r); this->checkerboard = r.Checkerboard(); - auto me = View(); - auto him= r.View(); + auto me = View(AcceleratorWriteDiscard); + auto him= r.View(AcceleratorRead); accelerator_for(ss,me.size(),vobj::Nsimd(),{ coalescedWrite(me[ss],him(ss)); }); + me.ViewClose(); him.ViewClose(); return *this; } @@ -394,11 +296,12 @@ public: inline Lattice & operator = (const Lattice & r){ this->checkerboard = r.Checkerboard(); conformable(*this,r); - auto me = View(); - auto him= r.View(); + auto me = View(AcceleratorWriteDiscard); + auto him= r.View(AcceleratorRead); accelerator_for(ss,me.size(),vobj::Nsimd(),{ coalescedWrite(me[ss],him(ss)); }); + me.ViewClose(); him.ViewClose(); return *this; } /////////////////////////////////////////// diff --git a/Grid/lattice/Lattice_basis.h b/Grid/lattice/Lattice_basis.h new file mode 100644 index 00000000..9f1155eb --- /dev/null +++ b/Grid/lattice/Lattice_basis.h @@ -0,0 +1,226 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/lattice/Lattice_basis.h + +Copyright (C) 2015 + +Author: Peter Boyle +Author: paboyle +Author: Christoph Lehner + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ + /* END LEGAL */ + +#pragma once + +NAMESPACE_BEGIN(Grid); + +template +void basisOrthogonalize(std::vector &basis,Field &w,int k) +{ + // If assume basis[j] are already orthonormal, + // can take all inner products in parallel saving 2x bandwidth + // Save 3x bandwidth on the second line of loop. + // perhaps 2.5x speed up. + // 2x overall in Multigrid Lanczos + for(int j=0; j +void basisRotate(VField &basis,Matrix& Qt,int j0, int j1, int k0,int k1,int Nm) +{ + typedef decltype(basis[0]) Field; + typedef decltype(basis[0].View(AcceleratorRead)) View; + + Vector basis_v; basis_v.reserve(basis.size()); + GridBase* grid = basis[0].Grid(); + + for(int k=0;koSites(); + uint64_t siteBlock=(grid->oSites()+nrot-1)/nrot; // Maximum 1 additional vector overhead + + typedef typename std::remove_reference::type vobj; + + Vector Bt(siteBlock * nrot); + auto Bp=&Bt[0]; + + // GPU readable copy of matrix + Vector Qt_jv(Nm*Nm); + double *Qt_p = & Qt_jv[0]; + thread_for(i,Nm*Nm,{ + int j = i/Nm; + int k = i%Nm; + Qt_p[i]=Qt(j,k); + }); + + // Block the loop to keep storage footprint down + for(uint64_t s=0;s +void basisRotateJ(Field &result,std::vector &basis,Eigen::MatrixXd& Qt,int j, int k0,int k1,int Nm) +{ + typedef decltype(basis[0].View(AcceleratorRead)) View; + typedef typename Field::vector_object vobj; + GridBase* grid = basis[0].Grid(); + + result.Checkerboard() = basis[0].Checkerboard(); + + Vector basis_v; basis_v.reserve(basis.size()); + for(int k=0;k Qt_jv(Nm); + double * Qt_j = & Qt_jv[0]; + for(int k=0;koSites(),vobj::Nsimd(),{ + auto B=coalescedRead(zz); + for(int k=k0; k +void basisReorderInPlace(std::vector &_v,std::vector& sort_vals, std::vector& idx) +{ + int vlen = idx.size(); + + assert(vlen>=1); + assert(vlen<=sort_vals.size()); + assert(vlen<=_v.size()); + + for (size_t i=0;ii for which _vnew[j] = _vold[i], + // track the move idx[j] => idx[i] + // track the move idx[i] => i + ////////////////////////////////////// + size_t j; + for (j=i;j i); assert(j!=idx.size()); assert(idx[j]==i); + + swap(_v[i],_v[idx[i]]); // should use vector move constructor, no data copy + std::swap(sort_vals[i],sort_vals[idx[i]]); + + idx[j] = idx[i]; + idx[i] = i; + } + } +} + +inline std::vector basisSortGetIndex(std::vector& sort_vals) +{ + std::vector idx(sort_vals.size()); + std::iota(idx.begin(), idx.end(), 0); + + // sort indexes based on comparing values in v + std::sort(idx.begin(), idx.end(), [&sort_vals](int i1, int i2) { + return ::fabs(sort_vals[i1]) < ::fabs(sort_vals[i2]); + }); + return idx; +} + +template +void basisSortInPlace(std::vector & _v,std::vector& sort_vals, bool reverse) +{ + std::vector idx = basisSortGetIndex(sort_vals); + if (reverse) + std::reverse(idx.begin(), idx.end()); + + basisReorderInPlace(_v,sort_vals,idx); +} + +// PAB: faster to compute the inner products first then fuse loops. +// If performance critical can improve. +template +void basisDeflate(const std::vector &_v,const std::vector& eval,const Field& src_orig,Field& result) { + result = Zero(); + assert(_v.size()==eval.size()); + int N = (int)_v.size(); + for (int i=0;i inline Lattice LLComparison(vfunctor op,const Lattice &lhs,const Lattice &rhs) { Lattice ret(rhs.Grid()); - auto lhs_v = lhs.View(); - auto rhs_v = rhs.View(); - auto ret_v = ret.View(); + autoView( lhs_v, lhs, CpuRead); + autoView( rhs_v, rhs, CpuRead); + autoView( ret_v, ret, CpuWrite); thread_for( ss, rhs_v.size(), { ret_v[ss]=op(lhs_v[ss],rhs_v[ss]); }); @@ -93,8 +93,8 @@ template inline Lattice LSComparison(vfunctor op,const Lattice &lhs,const robj &rhs) { Lattice ret(lhs.Grid()); - auto lhs_v = lhs.View(); - auto ret_v = ret.View(); + autoView( lhs_v, lhs, CpuRead); + autoView( ret_v, ret, CpuWrite); thread_for( ss, lhs_v.size(), { ret_v[ss]=op(lhs_v[ss],rhs); }); @@ -107,8 +107,8 @@ template inline Lattice SLComparison(vfunctor op,const lobj &lhs,const Lattice &rhs) { Lattice ret(rhs.Grid()); - auto rhs_v = rhs.View(); - auto ret_v = ret.View(); + autoView( rhs_v, rhs, CpuRead); + autoView( ret_v, ret, CpuWrite); thread_for( ss, rhs_v.size(), { ret_v[ss]=op(lhs,rhs_v[ss]); }); diff --git a/Grid/lattice/Lattice_coordinate.h b/Grid/lattice/Lattice_coordinate.h index a1abe58d..cd0f11ee 100644 --- a/Grid/lattice/Lattice_coordinate.h +++ b/Grid/lattice/Lattice_coordinate.h @@ -37,7 +37,7 @@ template inline void LatticeCoordinate(Lattice &l,int mu) GridBase *grid = l.Grid(); int Nsimd = grid->iSites(); - auto l_v = l.View(); + autoView(l_v, l, CpuWrite); thread_for( o, grid->oSites(), { vector_type vI; Coordinate gcoor; @@ -51,23 +51,5 @@ template inline void LatticeCoordinate(Lattice &l,int mu) }); }; -// LatticeCoordinate(); -// FIXME for debug; deprecate this; made obscelete by -template void lex_sites(Lattice &l){ - auto l_v = l.View(); - Real *v_ptr = (Real *)&l_v[0]; - size_t o_len = l.Grid()->oSites(); - size_t v_len = sizeof(vobj)/sizeof(vRealF); - size_t vec_len = vRealF::Nsimd(); - - for(int i=0;i inline auto localNorm2 (const Lattice &rhs)-> Lattice { Lattice ret(rhs.Grid()); - auto rhs_v = rhs.View(); - auto ret_v = ret.View(); + autoView( rhs_v , rhs, AcceleratorRead); + autoView( ret_v , ret, AcceleratorWrite); accelerator_for(ss,rhs_v.size(),vobj::Nsimd(),{ coalescedWrite(ret_v[ss],innerProduct(rhs_v(ss),rhs_v(ss))); }); @@ -56,9 +56,9 @@ template inline auto localInnerProduct (const Lattice &lhs,const Lattice &rhs) -> Lattice { Lattice ret(rhs.Grid()); - auto lhs_v = lhs.View(); - auto rhs_v = rhs.View(); - auto ret_v = ret.View(); + autoView( lhs_v , lhs, AcceleratorRead); + autoView( rhs_v , rhs, AcceleratorRead); + autoView( ret_v , ret, AcceleratorWrite); accelerator_for(ss,rhs_v.size(),vobj::Nsimd(),{ coalescedWrite(ret_v[ss],innerProduct(lhs_v(ss),rhs_v(ss))); }); @@ -73,9 +73,9 @@ inline auto outerProduct (const Lattice &lhs,const Lattice &rhs) -> Latt typedef decltype(coalescedRead(ll())) sll; typedef decltype(coalescedRead(rr())) srr; Lattice ret(rhs.Grid()); - auto lhs_v = lhs.View(); - auto rhs_v = rhs.View(); - auto ret_v = ret.View(); + autoView( lhs_v , lhs, AcceleratorRead); + autoView( rhs_v , rhs, AcceleratorRead); + autoView( ret_v , ret, AcceleratorWrite); accelerator_for(ss,rhs_v.size(),1,{ // FIXME had issues with scalar version of outer // Use vector [] operator and don't read coalesce this loop diff --git a/Grid/lattice/Lattice_matrix_reduction.h b/Grid/lattice/Lattice_matrix_reduction.h index 0980ad8a..7c470fef 100644 --- a/Grid/lattice/Lattice_matrix_reduction.h +++ b/Grid/lattice/Lattice_matrix_reduction.h @@ -51,9 +51,9 @@ static void sliceMaddMatrix (Lattice &R,Eigen::MatrixXcd &aa,const Lattice int block =FullGrid->_slice_block [Orthog]; int nblock=FullGrid->_slice_nblock[Orthog]; int ostride=FullGrid->_ostride[Orthog]; - auto X_v = X.View(); - auto Y_v = Y.View(); - auto R_v = R.View(); + autoView( X_v , X, CpuRead); + autoView( Y_v , Y, CpuRead); + autoView( R_v , R, CpuWrite); thread_region { std::vector s_x(Nblock); @@ -97,8 +97,8 @@ static void sliceMulMatrix (Lattice &R,Eigen::MatrixXcd &aa,const Lattice< int nblock=FullGrid->_slice_nblock[Orthog]; int ostride=FullGrid->_ostride[Orthog]; - auto X_v = X.View(); - auto R_v = R.View(); + autoView( X_v , X, CpuRead); + autoView( R_v , R, CpuWrite); thread_region { @@ -156,8 +156,8 @@ static void sliceInnerProductMatrix( Eigen::MatrixXcd &mat, const Lattice int ostride=FullGrid->_ostride[Orthog]; typedef typename vobj::vector_typeD vector_typeD; - auto lhs_v = lhs.View(); - auto rhs_v = rhs.View(); + autoView( lhs_v , lhs, CpuRead); + autoView( rhs_v , rhs, CpuRead); thread_region { std::vector Left(Nblock); std::vector Right(Nblock); diff --git a/Grid/lattice/Lattice_peekpoke.h b/Grid/lattice/Lattice_peekpoke.h index feca2f44..c79becf2 100644 --- a/Grid/lattice/Lattice_peekpoke.h +++ b/Grid/lattice/Lattice_peekpoke.h @@ -46,9 +46,9 @@ auto PeekIndex(const Lattice &lhs,int i) -> Lattice(vobj(),i))> ret(lhs.Grid()); ret.Checkerboard()=lhs.Checkerboard(); - auto ret_v = ret.View(); - auto lhs_v = lhs.View(); - thread_for( ss, lhs_v.size(), { + autoView( ret_v, ret, AcceleratorWrite); + autoView( lhs_v, lhs, AcceleratorRead); + accelerator_for( ss, lhs_v.size(), 1, { ret_v[ss] = peekIndex(lhs_v[ss],i); }); return ret; @@ -58,9 +58,9 @@ auto PeekIndex(const Lattice &lhs,int i,int j) -> Lattice(vobj(),i,j))> ret(lhs.Grid()); ret.Checkerboard()=lhs.Checkerboard(); - auto ret_v = ret.View(); - auto lhs_v = lhs.View(); - thread_for( ss, lhs_v.size(), { + autoView( ret_v, ret, AcceleratorWrite); + autoView( lhs_v, lhs, AcceleratorRead); + accelerator_for( ss, lhs_v.size(), 1, { ret_v[ss] = peekIndex(lhs_v[ss],i,j); }); return ret; @@ -72,18 +72,18 @@ auto PeekIndex(const Lattice &lhs,int i,int j) -> Lattice void PokeIndex(Lattice &lhs,const Lattice(vobj(),0))> & rhs,int i) { - auto rhs_v = rhs.View(); - auto lhs_v = lhs.View(); - thread_for( ss, lhs_v.size(), { + autoView( rhs_v, rhs, AcceleratorRead); + autoView( lhs_v, lhs, AcceleratorWrite); + accelerator_for( ss, lhs_v.size(), 1, { pokeIndex(lhs_v[ss],rhs_v[ss],i); }); } template void PokeIndex(Lattice &lhs,const Lattice(vobj(),0,0))> & rhs,int i,int j) { - auto rhs_v = rhs.View(); - auto lhs_v = lhs.View(); - thread_for( ss, lhs_v.size(), { + autoView( rhs_v, rhs, AcceleratorRead); + autoView( lhs_v, lhs, AcceleratorWrite); + accelerator_for( ss, lhs_v.size(), 1, { pokeIndex(lhs_v[ss],rhs_v[ss],i,j); }); } @@ -111,7 +111,7 @@ void pokeSite(const sobj &s,Lattice &l,const Coordinate &site){ // extract-modify-merge cycle is easiest way and this is not perf critical ExtractBuffer buf(Nsimd); - auto l_v = l.View(); + autoView( l_v , l, CpuWrite); if ( rank == grid->ThisRank() ) { extract(l_v[odx],buf); buf[idx] = s; @@ -141,7 +141,7 @@ void peekSite(sobj &s,const Lattice &l,const Coordinate &site){ grid->GlobalCoorToRankIndex(rank,odx,idx,site); ExtractBuffer buf(Nsimd); - auto l_v = l.View(); + autoView( l_v , l, CpuWrite); extract(l_v[odx],buf); s = buf[idx]; @@ -151,21 +151,21 @@ void peekSite(sobj &s,const Lattice &l,const Coordinate &site){ return; }; - ////////////////////////////////////////////////////////// // Peek a scalar object from the SIMD array ////////////////////////////////////////////////////////// +// Must be CPU read view template -accelerator_inline void peekLocalSite(sobj &s,const Lattice &l,Coordinate &site){ - - GridBase *grid = l.Grid(); - +inline void peekLocalSite(sobj &s,const LatticeView &l,Coordinate &site) +{ + GridBase *grid = l.getGrid(); + assert(l.mode==CpuRead); typedef typename vobj::scalar_type scalar_type; typedef typename vobj::vector_type vector_type; int Nsimd = grid->Nsimd(); - assert( l.Checkerboard()== l.Grid()->CheckerBoard(site)); + assert( l.Checkerboard()== grid->CheckerBoard(site)); assert( sizeof(sobj)*Nsimd == sizeof(vobj)); static const int words=sizeof(vobj)/sizeof(vector_type); @@ -173,8 +173,7 @@ accelerator_inline void peekLocalSite(sobj &s,const Lattice &l,Coordinate idx= grid->iIndex(site); odx= grid->oIndex(site); - auto l_v = l.View(); - scalar_type * vp = (scalar_type *)&l_v[odx]; + scalar_type * vp = (scalar_type *)&l[odx]; scalar_type * pt = (scalar_type *)&s; for(int w=0;w &l,Coordinate return; }; - +// Must be CPU write view template -accelerator_inline void pokeLocalSite(const sobj &s,Lattice &l,Coordinate &site){ - - GridBase *grid=l.Grid(); +inline void pokeLocalSite(const sobj &s,LatticeView &l,Coordinate &site) +{ + GridBase *grid=l.getGrid(); + assert(l.mode==CpuWrite); typedef typename vobj::scalar_type scalar_type; typedef typename vobj::vector_type vector_type; int Nsimd = grid->Nsimd(); - assert( l.Checkerboard()== l.Grid()->CheckerBoard(site)); + assert( l.Checkerboard()== grid->CheckerBoard(site)); assert( sizeof(sobj)*Nsimd == sizeof(vobj)); static const int words=sizeof(vobj)/sizeof(vector_type); @@ -202,13 +202,11 @@ accelerator_inline void pokeLocalSite(const sobj &s,Lattice &l,Coordinate idx= grid->iIndex(site); odx= grid->oIndex(site); - auto l_v = l.View(); - scalar_type * vp = (scalar_type *)&l_v[odx]; + scalar_type * vp = (scalar_type *)&l[odx]; scalar_type * pt = (scalar_type *)&s; for(int w=0;w inline Lattice adj(const Lattice &lhs){ Lattice ret(lhs.Grid()); - auto lhs_v = lhs.View(); - auto ret_v = ret.View(); + + autoView( lhs_v, lhs, AcceleratorRead); + autoView( ret_v, ret, AcceleratorWrite); + + ret.Checkerboard()=lhs.Checkerboard(); accelerator_for( ss, lhs_v.size(), vobj::Nsimd(), { coalescedWrite(ret_v[ss], adj(lhs_v(ss))); }); @@ -50,8 +53,11 @@ template inline Lattice adj(const Lattice &lhs){ template inline Lattice conjugate(const Lattice &lhs){ Lattice ret(lhs.Grid()); - auto lhs_v = lhs.View(); - auto ret_v = ret.View(); + + autoView( lhs_v, lhs, AcceleratorRead); + autoView( ret_v, ret, AcceleratorWrite); + + ret.Checkerboard() = lhs.Checkerboard(); accelerator_for( ss, lhs_v.size(), vobj::Nsimd(), { coalescedWrite( ret_v[ss] , conjugate(lhs_v(ss))); }); diff --git a/Grid/lattice/Lattice_reduction.h b/Grid/lattice/Lattice_reduction.h index 3c5b03e5..c2955485 100644 --- a/Grid/lattice/Lattice_reduction.h +++ b/Grid/lattice/Lattice_reduction.h @@ -5,6 +5,7 @@ Author: Azusa Yamaguchi Author: Peter Boyle Author: paboyle +Author: Christoph Lehner This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or @@ -24,7 +25,7 @@ Author: paboyle #include -#ifdef GRID_NVCC +#if defined(GRID_CUDA)||defined(GRID_HIP) #include #endif @@ -38,7 +39,36 @@ inline typename vobj::scalar_object sum_cpu(const vobj *arg, Integer osites) { typedef typename vobj::scalar_object sobj; - const int Nsimd = vobj::Nsimd(); + // const int Nsimd = vobj::Nsimd(); + const int nthread = GridThread::GetThreads(); + + Vector sumarray(nthread); + for(int i=0;i +inline typename vobj::scalar_objectD sumD_cpu(const vobj *arg, Integer osites) +{ + typedef typename vobj::scalar_objectD sobj; + const int nthread = GridThread::GetThreads(); Vector sumarray(nthread); @@ -62,23 +92,43 @@ inline typename vobj::scalar_object sum_cpu(const vobj *arg, Integer osites) ssum = ssum+sumarray[i]; } - return ssum; + typedef typename vobj::scalar_object ssobj; + ssobj ret = ssum; + return ret; } + + template inline typename vobj::scalar_object sum(const vobj *arg, Integer osites) { -#ifdef GRID_NVCC +#if defined(GRID_CUDA)||defined(GRID_HIP) return sum_gpu(arg,osites); #else return sum_cpu(arg,osites); #endif } +template +inline typename vobj::scalar_objectD sumD(const vobj *arg, Integer osites) +{ +#if defined(GRID_CUDA)||defined(GRID_HIP) + return sumD_gpu(arg,osites); +#else + return sumD_cpu(arg,osites); +#endif +} + template inline typename vobj::scalar_object sum(const Lattice &arg) { - auto arg_v = arg.View(); +#if defined(GRID_CUDA)||defined(GRID_HIP) + autoView( arg_v, arg, AcceleratorRead); Integer osites = arg.Grid()->oSites(); - auto ssum= sum(&arg_v[0],osites); + auto ssum= sum_gpu(&arg_v[0],osites); +#else + autoView(arg_v, arg, CpuRead); + Integer osites = arg.Grid()->oSites(); + auto ssum= sum_cpu(&arg_v[0],osites); +#endif arg.Grid()->GlobalSum(ssum); return ssum; } @@ -93,55 +143,49 @@ template inline RealD norm2(const Lattice &arg){ // Double inner product template -inline ComplexD innerProduct(const Lattice &left,const Lattice &right) +inline ComplexD rankInnerProduct(const Lattice &left,const Lattice &right) { typedef typename vobj::scalar_type scalar_type; typedef typename vobj::vector_typeD vector_type; ComplexD nrm; GridBase *grid = left.Grid(); - - // Might make all code paths go this way. - auto left_v = left.View(); - auto right_v=right.View(); const uint64_t nsimd = grid->Nsimd(); const uint64_t sites = grid->oSites(); -#ifdef GRID_NVCC - // GPU - SIMT lane compliance... - typedef decltype(innerProduct(left_v[0],right_v[0])) inner_t; + // Might make all code paths go this way. + typedef decltype(innerProductD(vobj(),vobj())) inner_t; Vector inner_tmp(sites); auto inner_tmp_v = &inner_tmp[0]; - + + { + autoView( left_v , left, AcceleratorRead); + autoView( right_v,right, AcceleratorRead); - accelerator_for( ss, sites, nsimd,{ - auto x_l = left_v(ss); - auto y_l = right_v(ss); - coalescedWrite(inner_tmp_v[ss],innerProduct(x_l,y_l)); - }) + // GPU - SIMT lane compliance... + accelerator_for( ss, sites, 1,{ + auto x_l = left_v[ss]; + auto y_l = right_v[ss]; + inner_tmp_v[ss]=innerProductD(x_l,y_l); + }); + } // This is in single precision and fails some tests - // Need a sumD that sums in double - nrm = TensorRemove(sumD_gpu(inner_tmp_v,sites)); -#else - // CPU - typedef decltype(innerProductD(left_v[0],right_v[0])) inner_t; - Vector inner_tmp(sites); - auto inner_tmp_v = &inner_tmp[0]; - - accelerator_for( ss, sites, nsimd,{ - auto x_l = left_v[ss]; - auto y_l = right_v[ss]; - inner_tmp_v[ss]=innerProductD(x_l,y_l); - }) - nrm = TensorRemove(sum(inner_tmp_v,sites)); -#endif - grid->GlobalSum(nrm); - + auto anrm = sum(inner_tmp_v,sites); + nrm = anrm; return nrm; } +template +inline ComplexD innerProduct(const Lattice &left,const Lattice &right) { + GridBase *grid = left.Grid(); + ComplexD nrm = rankInnerProduct(left,right); + grid->GlobalSum(nrm); + return nrm; +} + + ///////////////////////// // Fast axpby_norm // z = a x + b y @@ -167,45 +211,67 @@ axpby_norm_fast(Lattice &z,sobj a,sobj b,const Lattice &x,const Latt GridBase *grid = x.Grid(); - auto x_v=x.View(); - auto y_v=y.View(); - auto z_v=z.View(); - const uint64_t nsimd = grid->Nsimd(); const uint64_t sites = grid->oSites(); -#ifdef GRID_NVCC // GPU - typedef decltype(innerProduct(x_v[0],y_v[0])) inner_t; - Vector inner_tmp(sites); - auto inner_tmp_v = &inner_tmp[0]; + autoView( x_v, x, AcceleratorRead); + autoView( y_v, y, AcceleratorRead); + autoView( z_v, z, AcceleratorWrite); - accelerator_for( ss, sites, nsimd,{ - auto tmp = a*x_v(ss)+b*y_v(ss); - coalescedWrite(inner_tmp_v[ss],innerProduct(tmp,tmp)); - coalescedWrite(z_v[ss],tmp); - }); - - nrm = real(TensorRemove(sumD_gpu(inner_tmp_v,sites))); -#else - // CPU typedef decltype(innerProductD(x_v[0],y_v[0])) inner_t; Vector inner_tmp(sites); auto inner_tmp_v = &inner_tmp[0]; - - accelerator_for( ss, sites, nsimd,{ - auto tmp = a*x_v(ss)+b*y_v(ss); + + accelerator_for( ss, sites, 1,{ + auto tmp = a*x_v[ss]+b*y_v[ss]; inner_tmp_v[ss]=innerProductD(tmp,tmp); z_v[ss]=tmp; }); - // Already promoted to double nrm = real(TensorRemove(sum(inner_tmp_v,sites))); -#endif grid->GlobalSum(nrm); return nrm; } - +template strong_inline void +innerProductNorm(ComplexD& ip, RealD &nrm, const Lattice &left,const Lattice &right) +{ + conformable(left,right); + + typedef typename vobj::scalar_type scalar_type; + typedef typename vobj::vector_typeD vector_type; + Vector tmp(2); + + GridBase *grid = left.Grid(); + + const uint64_t nsimd = grid->Nsimd(); + const uint64_t sites = grid->oSites(); + + // GPU + typedef decltype(innerProductD(vobj(),vobj())) inner_t; + typedef decltype(innerProductD(vobj(),vobj())) norm_t; + Vector inner_tmp(sites); + Vector norm_tmp(sites); + auto inner_tmp_v = &inner_tmp[0]; + auto norm_tmp_v = &norm_tmp[0]; + { + autoView(left_v,left, AcceleratorRead); + autoView(right_v,right,AcceleratorRead); + accelerator_for( ss, sites, 1,{ + auto left_tmp = left_v[ss]; + inner_tmp_v[ss]=innerProductD(left_tmp,right_v[ss]); + norm_tmp_v [ss]=innerProductD(left_tmp,left_tmp); + }); + } + + tmp[0] = TensorRemove(sum(inner_tmp_v,sites)); + tmp[1] = TensorRemove(sum(norm_tmp_v,sites)); + + grid->GlobalSumVector(&tmp[0],2); // keep norm Complex -> can use GlobalSumVector + ip = tmp[0]; + nrm = real(tmp[1]); +} + template inline auto sum(const LatticeUnaryExpression & expr) ->typename decltype(expr.op.func(eval(0,expr.arg1)))::scalar_object @@ -271,7 +337,7 @@ template inline void sliceSum(const Lattice &Data,std::vector< // sum over reduced dimension planes, breaking out orthog dir // Parallel over orthog direction - auto Data_v=Data.View(); + autoView( Data_v, Data, CpuRead); thread_for( r,rd, { int so=r*grid->_ostride[orthogdim]; // base offset for start of plane for(int n=0;n & result, const Latti int e2= grid->_slice_block [orthogdim]; int stride=grid->_slice_stride[orthogdim]; - auto lhv=lhs.View(); - auto rhv=rhs.View(); + autoView( lhv, lhs, CpuRead); + autoView( rhv, rhs, CpuRead); thread_for( r,rd,{ int so=r*grid->_ostride[orthogdim]; // base offset for start of plane @@ -457,14 +523,12 @@ static void sliceMaddVector(Lattice &R,std::vector &a,const Lattice tensor_reduced at; at=av; - auto Rv=R.View(); - auto Xv=X.View(); - auto Yv=Y.View(); - thread_for_collapse(2, n, e1, { - for(int b=0;b &R,Eigen::MatrixXcd &aa,const Lattice int nblock=FullGrid->_slice_nblock[Orthog]; int ostride=FullGrid->_ostride[Orthog]; - auto X_v=X.View(); - auto Y_v=Y.View(); - auto R_v=R.View(); + autoView( X_v, X, CpuRead); + autoView( Y_v, Y, CpuRead); + autoView( R_v, R, CpuWrite); thread_region { Vector s_x(Nblock); @@ -564,13 +628,14 @@ static void sliceMulMatrix (Lattice &R,Eigen::MatrixXcd &aa,const Lattice< // int nl=1; //FIXME package in a convenient iterator + // thread_for2d_in_region //Should loop over a plane orthogonal to direction "Orthog" int stride=FullGrid->_slice_stride[Orthog]; int block =FullGrid->_slice_block [Orthog]; int nblock=FullGrid->_slice_nblock[Orthog]; int ostride=FullGrid->_ostride[Orthog]; - auto R_v = R.View(); - auto X_v = X.View(); + autoView( R_v, R, CpuWrite); + autoView( X_v, X, CpuRead); thread_region { std::vector s_x(Nblock); @@ -628,8 +693,8 @@ static void sliceInnerProductMatrix( Eigen::MatrixXcd &mat, const Lattice typedef typename vobj::vector_typeD vector_typeD; - auto lhs_v=lhs.View(); - auto rhs_v=rhs.View(); + autoView( lhs_v, lhs, CpuRead); + autoView( rhs_v, rhs, CpuRead); thread_region { std::vector Left(Nblock); diff --git a/Grid/lattice/Lattice_reduction_gpu.h b/Grid/lattice/Lattice_reduction_gpu.h index c5d75356..5f490507 100644 --- a/Grid/lattice/Lattice_reduction_gpu.h +++ b/Grid/lattice/Lattice_reduction_gpu.h @@ -1,7 +1,13 @@ NAMESPACE_BEGIN(Grid); -#define WARP_SIZE 32 +#ifdef GRID_HIP +extern hipDeviceProp_t *gpu_props; +#endif +#ifdef GRID_CUDA extern cudaDeviceProp *gpu_props; +#endif + +#define WARP_SIZE 32 __device__ unsigned int retirementCount = 0; template @@ -19,7 +25,12 @@ template void getNumBlocksAndThreads(const Iterator n, const size_t sizeofsobj, Iterator &threads, Iterator &blocks) { int device; +#ifdef GRID_CUDA cudaGetDevice(&device); +#endif +#ifdef GRID_HIP + hipGetDevice(&device); +#endif Iterator warpSize = gpu_props[device].warpSize; Iterator sharedMemPerBlock = gpu_props[device].sharedMemPerBlock; @@ -147,7 +158,7 @@ __global__ void reduceKernel(const vobj *lat, sobj *buffer, Iterator n) { sobj *smem = (sobj *)shmem_pointer; // wait until all outstanding memory instructions in this thread are finished - __threadfence(); + acceleratorFence(); if (tid==0) { unsigned int ticket = atomicInc(&retirementCount, gridDim.x); @@ -156,8 +167,8 @@ __global__ void reduceKernel(const vobj *lat, sobj *buffer, Iterator n) { } // each thread must read the correct value of amLast - __syncthreads(); - + acceleratorSynchroniseAll(); + if (amLast) { // reduce buffer[0], ..., buffer[gridDim.x-1] Iterator i = tid; @@ -199,13 +210,7 @@ inline typename vobj::scalar_objectD sumD_gpu(const vobj *lat, Integer osites) sobj *buffer_v = &buffer[0]; reduceKernel<<< numBlocks, numThreads, smemSize >>>(lat, buffer_v, size); - cudaDeviceSynchronize(); - - cudaError err = cudaGetLastError(); - if ( cudaSuccess != err ) { - printf("Cuda error %s\n",cudaGetErrorString( err )); - exit(0); - } + accelerator_barrier(); auto result = buffer_v[0]; return result; } diff --git a/Grid/lattice/Lattice_rng.h b/Grid/lattice/Lattice_rng.h index 1bb1f087..e5e63716 100644 --- a/Grid/lattice/Lattice_rng.h +++ b/Grid/lattice/Lattice_rng.h @@ -375,7 +375,7 @@ public: int osites = _grid->oSites(); // guaranteed to be <= l.Grid()->oSites() by a factor multiplicity int words = sizeof(scalar_object) / sizeof(scalar_type); - auto l_v = l.View(); + autoView(l_v, l, CpuWrite); thread_for( ss, osites, { ExtractBuffer buf(Nsimd); for (int m = 0; m < multiplicity; m++) { // Draw from same generator multiplicity times @@ -461,8 +461,8 @@ public: } { - // Obtain one reseeded generator per thread - int Nthread = GridThread::GetThreads(); + // Obtain one reseeded generator per thread + int Nthread = 32; // Hardwire a good level or parallelism std::vector seeders(Nthread); for(int t=0;t inline auto trace(const Lattice &lhs) -> Lattice { Lattice ret(lhs.Grid()); - auto ret_v = ret.View(); - auto lhs_v = lhs.View(); + autoView(ret_v , ret, AcceleratorWrite); + autoView(lhs_v , lhs, AcceleratorRead); accelerator_for( ss, lhs_v.size(), vobj::Nsimd(), { coalescedWrite(ret_v[ss], trace(lhs_v(ss))); }); return ret; }; +*/ //////////////////////////////////////////////////////////////////////////////////////////////////// // Trace Index level dependent operation @@ -56,8 +58,8 @@ template inline auto TraceIndex(const Lattice &lhs) -> Lattice(vobj()))> { Lattice(vobj()))> ret(lhs.Grid()); - auto ret_v = ret.View(); - auto lhs_v = lhs.View(); + autoView( ret_v , ret, AcceleratorWrite); + autoView( lhs_v , lhs, AcceleratorRead); accelerator_for( ss, lhs_v.size(), vobj::Nsimd(), { coalescedWrite(ret_v[ss], traceIndex(lhs_v(ss))); }); diff --git a/Grid/lattice/Lattice_transfer.h b/Grid/lattice/Lattice_transfer.h index c80e7db2..beceecc9 100644 --- a/Grid/lattice/Lattice_transfer.h +++ b/Grid/lattice/Lattice_transfer.h @@ -6,6 +6,7 @@ Copyright (C) 2015 Author: Peter Boyle +Author: Christoph Lehner This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -46,11 +47,12 @@ inline void subdivides(GridBase *coarse,GridBase *fine) //////////////////////////////////////////////////////////////////////////////////////////// // remove and insert a half checkerboard //////////////////////////////////////////////////////////////////////////////////////////// -template inline void pickCheckerboard(int cb,Lattice &half,const Lattice &full){ +template inline void pickCheckerboard(int cb,Lattice &half,const Lattice &full) +{ half.Checkerboard() = cb; - auto half_v = half.View(); - auto full_v = full.View(); + autoView( half_v, half, CpuWrite); + autoView( full_v, full, CpuRead); thread_for(ss, full.Grid()->oSites(),{ int cbos; Coordinate coor; @@ -63,10 +65,11 @@ template inline void pickCheckerboard(int cb,Lattice &half,con } }); } -template inline void setCheckerboard(Lattice &full,const Lattice &half){ +template inline void setCheckerboard(Lattice &full,const Lattice &half) +{ int cb = half.Checkerboard(); - auto half_v = half.View(); - auto full_v = full.View(); + autoView( half_v , half, CpuRead); + autoView( full_v , full, CpuWrite); thread_for(ss,full.Grid()->oSites(),{ Coordinate coor; @@ -81,96 +84,138 @@ template inline void setCheckerboard(Lattice &full,const Latti } }); } - -template -inline void blockProject(Lattice > &coarseData, - const Lattice &fineData, - const std::vector > &Basis) -{ - GridBase * fine = fineData.Grid(); - GridBase * coarse= coarseData.Grid(); - Lattice ip(coarse); - - // auto fineData_ = fineData.View(); - auto coarseData_ = coarseData.View(); - auto ip_ = ip.View(); - for(int v=0;voSites(), vobj::Nsimd(), { - coalescedWrite(coarseData_[sc](v),ip_(sc)); - }); - } +//////////////////////////////////////////////////////////////////////////////////////////// +// Flexible Type Conversion for internal promotion to double as well as graceful +// treatment of scalar-compatible types +//////////////////////////////////////////////////////////////////////////////////////////// +accelerator_inline void convertType(ComplexD & out, const std::complex & in) { + out = in; } -template -inline void blockProject1(Lattice > &coarseData, - const Lattice &fineData, - const std::vector > &Basis) -{ - typedef iVector coarseSiteData; - coarseSiteData elide; - typedef decltype(coalescedRead(elide)) ScalarComplex; - GridBase * fine = fineData.Grid(); - GridBase * coarse= coarseData.Grid(); - int _ndimension = coarse->_ndimension; +accelerator_inline void convertType(ComplexF & out, const std::complex & in) { + out = in; +} - // checks - assert( nbasis == Basis.size() ); - subdivides(coarse,fine); - for(int i=0;i_rdimensions[d] / coarse->_rdimensions[d]; - assert(block_r[d]*coarse->_rdimensions[d] == fine->_rdimensions[d]); - } - int blockVol = fine->oSites()/coarse->oSites(); +accelerator_inline void convertType(vComplexF & out, const vComplexD2 & in) { + out.v = Optimization::PrecisionChange::DtoS(in._internal[0].v,in._internal[1].v); +} - coarseData=Zero(); +accelerator_inline void convertType(vComplexD2 & out, const vComplexF & in) { + Optimization::PrecisionChange::StoD(in.v,out._internal[0].v,out._internal[1].v); +} - auto fineData_ = fineData.View(); - auto coarseData_ = coarseData.View(); - //////////////////////////////////////////////////////////////////////////////////////////////////////// - // To make this lock free, loop over coars parallel, and then loop over fine associated with coarse. - // Otherwise do fine inner product per site, and make the update atomic - //////////////////////////////////////////////////////////////////////////////////////////////////////// - accelerator_for( sci, nbasis*coarse->oSites(), vobj::Nsimd(), { +template + accelerator_inline void convertType(iMatrix & out, const iMatrix & in); +template + accelerator_inline void convertType(iVector & out, const iVector & in); - auto sc=sci/nbasis; - auto i=sci%nbasis; - auto Basis_ = Basis[i].View(); +template::value, T1>::type* = nullptr> +accelerator_inline void convertType(T1 & out, const iScalar & in) { + convertType(out,in._internal); +} - Coordinate coor_c(_ndimension); - Lexicographic::CoorFromIndex(coor_c,sc,coarse->_rdimensions); // Block coordinate +template +accelerator_inline void convertType(iScalar & out, const T2 & in) { + convertType(out._internal,in); +} - int sf; - decltype(innerProduct(Basis_(sf),fineData_(sf))) reduce=Zero(); +template +accelerator_inline void convertType(iMatrix & out, const iMatrix & in) { + for (int i=0;i +accelerator_inline void convertType(iVector & out, const iVector & in) { + for (int i=0;i::value, T>::type* = nullptr> +accelerator_inline void convertType(T & out, const T & in) { + out = in; +} - Lexicographic::CoorFromIndex(coor_b,sb,block_r); - for(int d=0;d<_ndimension;d++) coor_f[d]=coor_c[d]*block_r[d]+coor_b[d]; - Lexicographic::IndexFromCoor(coor_f,sf,fine->_rdimensions); - - reduce=reduce+innerProduct(Basis_(sf),fineData_(sf)); - } - coalescedWrite(coarseData_[sc](i),reduce); +template +accelerator_inline void convertType(Lattice & out, const Lattice & in) { + autoView( out_v , out,AcceleratorWrite); + autoView( in_v , in ,AcceleratorRead); + accelerator_for(ss,out_v.size(),T1::Nsimd(),{ + convertType(out_v[ss],in_v(ss)); }); - return; } -template -inline void blockZAXPY(Lattice &fineZ, - const Lattice &coarseA, - const Lattice &fineX, - const Lattice &fineY) +//////////////////////////////////////////////////////////////////////////////////////////// +// precision-promoted local inner product +//////////////////////////////////////////////////////////////////////////////////////////// +template +inline auto localInnerProductD(const Lattice &lhs,const Lattice &rhs) +-> Lattice> +{ + autoView( lhs_v , lhs, AcceleratorRead); + autoView( rhs_v , rhs, AcceleratorRead); + + typedef decltype(TensorRemove(innerProductD2(lhs_v[0],rhs_v[0]))) t_inner; + Lattice> ret(lhs.Grid()); + + { + autoView(ret_v, ret,AcceleratorWrite); + accelerator_for(ss,rhs_v.size(),vobj::Nsimd(),{ + convertType(ret_v[ss],innerProductD2(lhs_v(ss),rhs_v(ss))); + }); + } + return ret; +} + +//////////////////////////////////////////////////////////////////////////////////////////// +// block routines +//////////////////////////////////////////////////////////////////////////////////////////// +template +inline void blockProject(Lattice > &coarseData, + const Lattice &fineData, + const VLattice &Basis) +{ + GridBase * fine = fineData.Grid(); + GridBase * coarse= coarseData.Grid(); + + Lattice> ip(coarse); + Lattice fineDataRed = fineData; + + autoView( coarseData_ , coarseData, AcceleratorWrite); + autoView( ip_ , ip, AcceleratorWrite); + for(int v=0;v + accelerator_for( sc, coarse->oSites(), vobj::Nsimd(), { + convertType(coarseData_[sc](v),ip_[sc]); + }); + + // improve numerical stability of projection + // |fine> = |fine> - |basis> + ip=-ip; + blockZAXPY(fineDataRed,ip,Basis[v],fineDataRed); + } +} + + +template + inline void blockZAXPY(Lattice &fineZ, + const Lattice &coarseA, + const Lattice &fineX, + const Lattice &fineY) { GridBase * fine = fineZ.Grid(); GridBase * coarse= coarseA.Grid(); @@ -182,7 +227,7 @@ inline void blockZAXPY(Lattice &fineZ, conformable(fineX,fineZ); int _ndimension = coarse->_ndimension; - + Coordinate block_r (_ndimension); // FIXME merge with subdivide checking routine as this is redundant @@ -191,29 +236,66 @@ inline void blockZAXPY(Lattice &fineZ, assert(block_r[d]*coarse->_rdimensions[d]==fine->_rdimensions[d]); } - auto fineZ_ = fineZ.View(); - auto fineX_ = fineX.View(); - auto fineY_ = fineY.View(); - auto coarseA_= coarseA.View(); + autoView( fineZ_ , fineZ, AcceleratorWrite); + autoView( fineX_ , fineX, AcceleratorRead); + autoView( fineY_ , fineY, AcceleratorRead); + autoView( coarseA_, coarseA, AcceleratorRead); accelerator_for(sf, fine->oSites(), CComplex::Nsimd(), { - - int sc; - Coordinate coor_c(_ndimension); - Coordinate coor_f(_ndimension); - Lexicographic::CoorFromIndex(coor_f,sf,fine->_rdimensions); - for(int d=0;d<_ndimension;d++) coor_c[d]=coor_f[d]/block_r[d]; - Lexicographic::IndexFromCoor(coor_c,sc,coarse->_rdimensions); + int sc; + Coordinate coor_c(_ndimension); + Coordinate coor_f(_ndimension); - // z = A x + y - coalescedWrite(fineZ_[sf],coarseA_(sc)*fineX_(sf)+fineY_(sf)); + Lexicographic::CoorFromIndex(coor_f,sf,fine->_rdimensions); + for(int d=0;d<_ndimension;d++) coor_c[d]=coor_f[d]/block_r[d]; + Lexicographic::IndexFromCoor(coor_c,sc,coarse->_rdimensions); - }); + // z = A x + y +#ifdef GRID_SIMT + typename vobj2::tensor_reduced::scalar_object cA; + typename vobj::scalar_object cAx; +#else + typename vobj2::tensor_reduced cA; + vobj cAx; +#endif + convertType(cA,TensorRemove(coarseA_(sc))); + auto prod = cA*fineX_(sf); + convertType(cAx,prod); + coalescedWrite(fineZ_[sf],cAx+fineY_(sf)); + + }); return; } + template + inline void blockInnerProductD(Lattice &CoarseInner, + const Lattice &fineX, + const Lattice &fineY) +{ + typedef iScalar dotp; + + GridBase *coarse(CoarseInner.Grid()); + GridBase *fine (fineX.Grid()); + + Lattice fine_inner(fine); fine_inner.Checkerboard() = fineX.Checkerboard(); + Lattice coarse_inner(coarse); + + // Precision promotion + fine_inner = localInnerProductD(fineX,fineY); + blockSum(coarse_inner,fine_inner); + { + autoView( CoarseInner_ , CoarseInner,AcceleratorWrite); + autoView( coarse_inner_ , coarse_inner,AcceleratorRead); + accelerator_for(ss, coarse->oSites(), 1, { + convertType(CoarseInner_[ss], TensorRemove(coarse_inner_[ss])); + }); + } + +} + +template // deprecate inline void blockInnerProduct(Lattice &CoarseInner, const Lattice &fineX, const Lattice &fineY) @@ -227,15 +309,17 @@ inline void blockInnerProduct(Lattice &CoarseInner, Lattice coarse_inner(coarse); // Precision promotion? - auto CoarseInner_ = CoarseInner.View(); - auto coarse_inner_ = coarse_inner.View(); - fine_inner = localInnerProduct(fineX,fineY); blockSum(coarse_inner,fine_inner); - accelerator_for(ss, coarse->oSites(), 1, { - CoarseInner_[ss] = coarse_inner_[ss]; - }); + { + autoView( CoarseInner_ , CoarseInner, AcceleratorWrite); + autoView( coarse_inner_ , coarse_inner, AcceleratorRead); + accelerator_for(ss, coarse->oSites(), 1, { + CoarseInner_[ss] = coarse_inner_[ss]; + }); + } } + template inline void blockNormalise(Lattice &ip,Lattice &fineX) { @@ -248,7 +332,7 @@ inline void blockNormalise(Lattice &ip,Lattice &fineX) // useful in multigrid project; // Generic name : Coarsen? template -inline void blockSum(Lattice &coarseData,const Lattice &fineData) +inline void blockSum(Lattice &coarseData,const Lattice &fineData) { GridBase * fine = fineData.Grid(); GridBase * coarse= coarseData.Grid(); @@ -256,9 +340,9 @@ inline void blockSum(Lattice &coarseData,const Lattice &fineData) subdivides(coarse,fine); // require they map int _ndimension = coarse->_ndimension; - + Coordinate block_r (_ndimension); - + for(int d=0 ; d<_ndimension;d++){ block_r[d] = fine->_rdimensions[d] / coarse->_rdimensions[d]; } @@ -266,32 +350,33 @@ inline void blockSum(Lattice &coarseData,const Lattice &fineData) // Turn this around to loop threaded over sc and interior loop // over sf would thread better - auto coarseData_ = coarseData.View(); - auto fineData_ = fineData.View(); + autoView( coarseData_ , coarseData, AcceleratorWrite); + autoView( fineData_ , fineData, AcceleratorRead); accelerator_for(sc,coarse->oSites(),1,{ - // One thread per sub block - Coordinate coor_c(_ndimension); - Lexicographic::CoorFromIndex(coor_c,sc,coarse->_rdimensions); // Block coordinate - coarseData_[sc]=Zero(); + // One thread per sub block + Coordinate coor_c(_ndimension); + Lexicographic::CoorFromIndex(coor_c,sc,coarse->_rdimensions); // Block coordinate + coarseData_[sc]=Zero(); - for(int sb=0;sb_rdimensions); + for(int sb=0;sb_rdimensions); - }); + coarseData_[sc]=coarseData_[sc]+fineData_[sf]; + } + + }); return; } + template inline void blockPick(GridBase *coarse,const Lattice &unpicked,Lattice &picked,Coordinate coor) { @@ -313,8 +398,8 @@ inline void blockPick(GridBase *coarse,const Lattice &unpicked,Lattice -inline void blockOrthogonalise(Lattice &ip,std::vector > &Basis) +template +inline void blockOrthonormalize(Lattice &ip,VLattice &Basis) { GridBase *coarse = ip.Grid(); GridBase *fine = Basis[0].Grid(); @@ -322,23 +407,30 @@ inline void blockOrthogonalise(Lattice &ip,std::vector > int nbasis = Basis.size() ; // checks - subdivides(coarse,fine); + subdivides(coarse,fine); for(int i=0;i (Basis[v],ip,Basis[u],Basis[v]); + blockZAXPY(Basis[v],ip,Basis[u],Basis[v]); } blockNormalise(ip,Basis[v]); } } +template +inline void blockOrthogonalise(Lattice &ip,std::vector > &Basis) // deprecated inaccurate naming +{ + blockOrthonormalize(ip,Basis); +} + #if 0 +// TODO: CPU optimized version here template inline void blockPromote(const Lattice > &coarseData, Lattice &fineData, @@ -360,8 +452,8 @@ inline void blockPromote(const Lattice > &coarseData, for(int d=0 ; d<_ndimension;d++){ block_r[d] = fine->_rdimensions[d] / coarse->_rdimensions[d]; } - auto fineData_ = fineData.View(); - auto coarseData_ = coarseData.View(); + autoView( fineData_ , fineData, AcceleratorWrite); + autoView( coarseData_ , coarseData, AcceleratorRead); // Loop with a cache friendly loop ordering accelerator_for(sf,fine->oSites(),1,{ @@ -374,7 +466,7 @@ inline void blockPromote(const Lattice > &coarseData, Lexicographic::IndexFromCoor(coor_c,sc,coarse->_rdimensions); for(int i=0;i > &coarseData, } #else -template +template inline void blockPromote(const Lattice > &coarseData, Lattice &fineData, - const std::vector > &Basis) + const VLattice &Basis) { GridBase * fine = fineData.Grid(); GridBase * coarse= coarseData.Grid(); - fineData=Zero(); for(int i=0;i > ip = PeekIndex<0>(coarseData,i); - Lattice cip(coarse); - auto cip_ = cip.View(); - auto ip_ = ip.View(); - accelerator_forNB(sc,coarse->oSites(),CComplex::Nsimd(),{ - coalescedWrite(cip_[sc], ip_(sc)()); - }); - blockZAXPY(fineData,cip,Basis[i],fineData); + + //Lattice cip(coarse); + //autoView( cip_ , cip, AcceleratorWrite); + //autoView( ip_ , ip, AcceleratorRead); + //accelerator_forNB(sc,coarse->oSites(),CComplex::Nsimd(),{ + // coalescedWrite(cip_[sc], ip_(sc)()); + // }); + //blockZAXPY(fineData,cip,Basis[i],fineData); + blockZAXPY(fineData,ip,Basis[i],fineData); } } #endif @@ -427,15 +520,17 @@ void localConvert(const Lattice &in,Lattice &out) assert(ig->lSites() == og->lSites()); } + autoView(in_v,in,CpuRead); + autoView(out_v,out,CpuWrite); thread_for(idx, ig->lSites(),{ sobj s; ssobj ss; Coordinate lcoor(ni); ig->LocalIndexToLocalCoor(idx,lcoor); - peekLocalSite(s,in,lcoor); + peekLocalSite(s,in_v,lcoor); ss=s; - pokeLocalSite(ss,out,lcoor); + pokeLocalSite(ss,out_v,lcoor); }); } @@ -470,8 +565,9 @@ void localCopyRegion(const Lattice &From,Lattice & To,Coordinate Fro Coordinate rdt = Tg->_rdimensions; Coordinate ist = Tg->_istride; Coordinate ost = Tg->_ostride; - auto t_v = To.View(); - auto f_v = From.View(); + + autoView( t_v , To, AcceleratorWrite); + autoView( f_v , From, AcceleratorRead); accelerator_for(idx,Fg->lSites(),1,{ sobj s; Coordinate Fcoor(nd); @@ -494,8 +590,6 @@ void localCopyRegion(const Lattice &From,Lattice & To,Coordinate Fro for(int w=0;w &lowDim,Lattice & higherDim,int slice } // the above should guarantee that the operations are local + autoView(lowDimv,lowDim,CpuRead); + autoView(higherDimv,higherDim,CpuWrite); thread_for(idx,lg->lSites(),{ sobj s; Coordinate lcoor(nl); @@ -538,8 +634,8 @@ void InsertSlice(const Lattice &lowDim,Lattice & higherDim,int slice hcoor[d]=lcoor[ddl++]; } } - peekLocalSite(s,lowDim,lcoor); - pokeLocalSite(s,higherDim,hcoor); + peekLocalSite(s,lowDimv,lcoor); + pokeLocalSite(s,higherDimv,hcoor); }); } @@ -567,6 +663,8 @@ void ExtractSlice(Lattice &lowDim,const Lattice & higherDim,int slic } } // the above should guarantee that the operations are local + autoView(lowDimv,lowDim,CpuWrite); + autoView(higherDimv,higherDim,CpuRead); thread_for(idx,lg->lSites(),{ sobj s; Coordinate lcoor(nl); @@ -579,8 +677,8 @@ void ExtractSlice(Lattice &lowDim,const Lattice & higherDim,int slic hcoor[d]=lcoor[ddl++]; } } - peekLocalSite(s,higherDim,hcoor); - pokeLocalSite(s,lowDim,lcoor); + peekLocalSite(s,higherDimv,hcoor); + pokeLocalSite(s,lowDimv,lcoor); }); } @@ -608,6 +706,8 @@ void InsertSliceLocal(const Lattice &lowDim, Lattice & higherDim,int } // the above should guarantee that the operations are local + autoView(lowDimv,lowDim,CpuRead); + autoView(higherDimv,higherDim,CpuWrite); thread_for(idx,lg->lSites(),{ sobj s; Coordinate lcoor(nl); @@ -616,8 +716,8 @@ void InsertSliceLocal(const Lattice &lowDim, Lattice & higherDim,int if( lcoor[orthog] == slice_lo ) { hcoor=lcoor; hcoor[orthog] = slice_hi; - peekLocalSite(s,lowDim,lcoor); - pokeLocalSite(s,higherDim,hcoor); + peekLocalSite(s,lowDimv,lcoor); + pokeLocalSite(s,higherDimv,hcoor); } }); } @@ -645,6 +745,8 @@ void ExtractSliceLocal(Lattice &lowDim,const Lattice & higherDim,int } // the above should guarantee that the operations are local + autoView(lowDimv,lowDim,CpuWrite); + autoView(higherDimv,higherDim,CpuRead); thread_for(idx,lg->lSites(),{ sobj s; Coordinate lcoor(nl); @@ -653,8 +755,8 @@ void ExtractSliceLocal(Lattice &lowDim,const Lattice & higherDim,int if( lcoor[orthog] == slice_lo ) { hcoor=lcoor; hcoor[orthog] = slice_hi; - peekLocalSite(s,higherDim,hcoor); - pokeLocalSite(s,lowDim,lcoor); + peekLocalSite(s,higherDimv,hcoor); + pokeLocalSite(s,lowDimv,lcoor); } }); } @@ -718,7 +820,7 @@ unvectorizeToLexOrdArray(std::vector &out, const Lattice &in) } //loop over outer index - auto in_v = in.View(); + autoView( in_v , in, CpuRead); thread_for(in_oidx,in_grid->oSites(),{ //Assemble vector of pointers to output elements ExtractPointerArray out_ptrs(in_nsimd); @@ -811,7 +913,7 @@ vectorizeFromLexOrdArray( std::vector &in, Lattice &out) icoor[lane].resize(ndim); grid->iCoorFromIindex(icoor[lane],lane); } - auto out_v = out.View(); + autoView( out_v , out, CpuWrite); thread_for(oidx, grid->oSites(),{ //Assemble vector of pointers to output elements ExtractPointerArray ptrs(nsimd); @@ -914,7 +1016,7 @@ void precisionChange(Lattice &out, const Lattice &in) std::vector in_slex_conv(in_grid->lSites()); unvectorizeToLexOrdArray(in_slex_conv, in); - auto out_v = out.View(); + autoView( out_v , out, CpuWrite); thread_for(out_oidx,out_grid->oSites(),{ Coordinate out_ocoor(ndim); out_grid->oCoorFromOindex(out_ocoor, out_oidx); diff --git a/Grid/lattice/Lattice_transpose.h b/Grid/lattice/Lattice_transpose.h index 9b0b3483..adfe3380 100644 --- a/Grid/lattice/Lattice_transpose.h +++ b/Grid/lattice/Lattice_transpose.h @@ -38,17 +38,19 @@ NAMESPACE_BEGIN(Grid); //////////////////////////////////////////////////////////////////////////////////////////////////// // Transpose //////////////////////////////////////////////////////////////////////////////////////////////////// +/* template inline Lattice transpose(const Lattice &lhs){ Lattice ret(lhs.Grid()); - auto ret_v = ret.View(); - auto lhs_v = lhs.View(); + autoView( ret_v, ret, AcceleratorWrite); + autoView( lhs_v, lhs, AcceleratorRead); accelerator_for(ss,lhs_v.size(),vobj::Nsimd(),{ coalescedWrite(ret_v[ss], transpose(lhs_v(ss))); }); return ret; }; - +*/ + //////////////////////////////////////////////////////////////////////////////////////////////////// // Index level dependent transpose //////////////////////////////////////////////////////////////////////////////////////////////////// @@ -56,8 +58,8 @@ template inline auto TransposeIndex(const Lattice &lhs) -> Lattice(vobj()))> { Lattice(vobj()))> ret(lhs.Grid()); - auto ret_v = ret.View(); - auto lhs_v = lhs.View(); + autoView( ret_v, ret, AcceleratorWrite); + autoView( lhs_v, lhs, AcceleratorRead); accelerator_for(ss,lhs_v.size(),vobj::Nsimd(),{ coalescedWrite(ret_v[ss] , transposeIndex(lhs_v(ss))); }); diff --git a/Grid/lattice/Lattice_unary.h b/Grid/lattice/Lattice_unary.h index 591afe72..07424b3d 100644 --- a/Grid/lattice/Lattice_unary.h +++ b/Grid/lattice/Lattice_unary.h @@ -35,8 +35,8 @@ NAMESPACE_BEGIN(Grid); template Lattice pow(const Lattice &rhs_i,RealD y){ Lattice ret_i(rhs_i.Grid()); - auto rhs = rhs_i.View(); - auto ret = ret_i.View(); + autoView( rhs, rhs_i, AcceleratorRead); + autoView( ret, ret_i, AcceleratorWrite); ret.Checkerboard() = rhs.Checkerboard(); accelerator_for(ss,rhs.size(),1,{ ret[ss]=pow(rhs[ss],y); @@ -45,8 +45,8 @@ template Lattice pow(const Lattice &rhs_i,RealD y){ } template Lattice mod(const Lattice &rhs_i,Integer y){ Lattice ret_i(rhs_i.Grid()); - auto rhs = rhs_i.View(); - auto ret = ret_i.View(); + autoView( rhs , rhs_i, AcceleratorRead); + autoView( ret , ret_i, AcceleratorWrite); ret.Checkerboard() = rhs.Checkerboard(); accelerator_for(ss,rhs.size(),obj::Nsimd(),{ coalescedWrite(ret[ss],mod(rhs(ss),y)); @@ -56,8 +56,8 @@ template Lattice mod(const Lattice &rhs_i,Integer y){ template Lattice div(const Lattice &rhs_i,Integer y){ Lattice ret_i(rhs_i.Grid()); - auto ret = ret_i.View(); - auto rhs = rhs_i.View(); + autoView( ret , ret_i, AcceleratorWrite); + autoView( rhs , rhs_i, AcceleratorRead); ret.Checkerboard() = rhs_i.Checkerboard(); accelerator_for(ss,rhs.size(),obj::Nsimd(),{ coalescedWrite(ret[ss],div(rhs(ss),y)); @@ -67,8 +67,8 @@ template Lattice div(const Lattice &rhs_i,Integer y){ template Lattice expMat(const Lattice &rhs_i, RealD alpha, Integer Nexp = DEFAULT_MAT_EXP){ Lattice ret_i(rhs_i.Grid()); - auto rhs = rhs_i.View(); - auto ret = ret_i.View(); + autoView( rhs , rhs_i, AcceleratorRead); + autoView( ret , ret_i, AcceleratorWrite); ret.Checkerboard() = rhs.Checkerboard(); accelerator_for(ss,rhs.size(),obj::Nsimd(),{ coalescedWrite(ret[ss],Exponentiate(rhs(ss),alpha, Nexp)); diff --git a/Grid/lattice/Lattice_view.h b/Grid/lattice/Lattice_view.h new file mode 100644 index 00000000..3b76b921 --- /dev/null +++ b/Grid/lattice/Lattice_view.h @@ -0,0 +1,168 @@ +#pragma once +NAMESPACE_BEGIN(Grid); +/////////////////////////////////////////////////////////////////// +// Base class which can be used by traits to pick up behaviour +/////////////////////////////////////////////////////////////////// +class LatticeBase {}; + +///////////////////////////////////////////////////////////////////////////////////////// +// Conformable checks; same instance of Grid required +///////////////////////////////////////////////////////////////////////////////////////// +void accelerator_inline conformable(GridBase *lhs,GridBase *rhs) +{ + assert(lhs == rhs); +} + +//////////////////////////////////////////////////////////////////////////// +// Minimal base class containing only data valid to access from accelerator +// _odata will be a managed pointer in CUDA +//////////////////////////////////////////////////////////////////////////// +// Force access to lattice through a view object. +// prevents writing of code that will not offload to GPU, but perhaps annoyingly +// strict since host could could in principle direct access through the lattice object +// Need to decide programming model. +#define LATTICE_VIEW_STRICT +template class LatticeAccelerator : public LatticeBase +{ +protected: + //public: + GridBase *_grid; + int checkerboard; + vobj *_odata; // A managed pointer + uint64_t _odata_size; + ViewAdvise advise; +public: + accelerator_inline LatticeAccelerator() : checkerboard(0), _odata(nullptr), _odata_size(0), _grid(nullptr), advise(AdviseDefault) { }; + accelerator_inline uint64_t oSites(void) const { return _odata_size; }; + accelerator_inline int Checkerboard(void) const { return checkerboard; }; + accelerator_inline int &Checkerboard(void) { return this->checkerboard; }; // can assign checkerboard on a container, not a view + accelerator_inline ViewAdvise Advise(void) const { return advise; }; + accelerator_inline ViewAdvise &Advise(void) { return this->advise; }; // can assign advise on a container, not a view + accelerator_inline void Conformable(GridBase * &grid) const + { + if (grid) conformable(grid, _grid); + else grid = _grid; + }; + // Host only + GridBase * getGrid(void) const { return _grid; }; +}; + +///////////////////////////////////////////////////////////////////////////////////////// +// A View class which provides accessor to the data. +// This will be safe to call from accelerator_for and is trivially copy constructible +// The copy constructor for this will need to be used by device lambda functions +///////////////////////////////////////////////////////////////////////////////////////// +template +class LatticeView : public LatticeAccelerator +{ +public: + // Rvalue + ViewMode mode; + void * cpu_ptr; +#ifdef GRID_SIMT + accelerator_inline const typename vobj::scalar_object operator()(size_t i) const { + return coalescedRead(this->_odata[i]); + } +#else + accelerator_inline const vobj & operator()(size_t i) const { return this->_odata[i]; } +#endif + + accelerator_inline const vobj & operator[](size_t i) const { return this->_odata[i]; }; + accelerator_inline vobj & operator[](size_t i) { return this->_odata[i]; }; + + accelerator_inline uint64_t begin(void) const { return 0;}; + accelerator_inline uint64_t end(void) const { return this->_odata_size; }; + accelerator_inline uint64_t size(void) const { return this->_odata_size; }; + + LatticeView(const LatticeAccelerator &refer_to_me) : LatticeAccelerator (refer_to_me){} + LatticeView(const LatticeView &refer_to_me) = default; // Trivially copyable + LatticeView(const LatticeAccelerator &refer_to_me,ViewMode mode) : LatticeAccelerator (refer_to_me) + { + this->ViewOpen(mode); + } + + // Host functions + void ViewOpen(ViewMode mode) + { // Translate the pointer, could save a copy. Could use a "Handle" and not save _odata originally in base + // std::cout << "View Open"<_odata<cpu_ptr = (void *)this->_odata; + this->mode = mode; + this->_odata =(vobj *) + MemoryManager::ViewOpen(this->cpu_ptr, + this->_odata_size*sizeof(vobj), + mode, + this->advise); + } + void ViewClose(void) + { // Inform the manager + // std::cout << "View Close"<cpu_ptr<cpu_ptr,this->mode); + } + +}; +// Little autoscope assister +template +class ViewCloser +{ + View v; // Take a copy of view and call view close when I go out of scope automatically + public: + ViewCloser(View &_v) : v(_v) {}; + ~ViewCloser() { v.ViewClose(); } +}; + +#define autoView(l_v,l,mode) \ + auto l_v = l.View(mode); \ + ViewCloser _autoView##l_v(l_v); + +///////////////////////////////////////////////////////////////////////////////////////// +// Lattice expression types used by ET to assemble the AST +// +// Need to be able to detect code paths according to the whether a lattice object or not +// so introduce some trait type things +///////////////////////////////////////////////////////////////////////////////////////// + +class LatticeExpressionBase {}; + +template using is_lattice = std::is_base_of; +template using is_lattice_expr = std::is_base_of; + +template struct ViewMapBase { typedef T Type; }; +template struct ViewMapBase { typedef LatticeView Type; }; +template using ViewMap = ViewMapBase::value >; + +template +class LatticeUnaryExpression : public LatticeExpressionBase +{ +public: + typedef typename ViewMap<_T1>::Type T1; + Op op; + T1 arg1; + LatticeUnaryExpression(Op _op,const _T1 &_arg1) : op(_op), arg1(_arg1) {}; +}; + +template +class LatticeBinaryExpression : public LatticeExpressionBase +{ +public: + typedef typename ViewMap<_T1>::Type T1; + typedef typename ViewMap<_T2>::Type T2; + Op op; + T1 arg1; + T2 arg2; + LatticeBinaryExpression(Op _op,const _T1 &_arg1,const _T2 &_arg2) : op(_op), arg1(_arg1), arg2(_arg2) {}; +}; + +template +class LatticeTrinaryExpression : public LatticeExpressionBase +{ +public: + typedef typename ViewMap<_T1>::Type T1; + typedef typename ViewMap<_T2>::Type T2; + typedef typename ViewMap<_T3>::Type T3; + Op op; + T1 arg1; + T2 arg2; + T3 arg3; + LatticeTrinaryExpression(Op _op,const _T1 &_arg1,const _T2 &_arg2,const _T3 &_arg3) : op(_op), arg1(_arg1), arg2(_arg2), arg3(_arg3) {}; +}; +NAMESPACE_END(Grid); diff --git a/Grid/parallelIO/BinaryIO.h b/Grid/parallelIO/BinaryIO.h index f90c34a9..1f11add9 100644 --- a/Grid/parallelIO/BinaryIO.h +++ b/Grid/parallelIO/BinaryIO.h @@ -341,7 +341,7 @@ class BinaryIO { int ieee32big = (format == std::string("IEEE32BIG")); int ieee32 = (format == std::string("IEEE32")); int ieee64big = (format == std::string("IEEE64BIG")); - int ieee64 = (format == std::string("IEEE64")); + int ieee64 = (format == std::string("IEEE64") || format == std::string("IEEE64LITTLE")); assert(ieee64||ieee32|ieee64big||ieee32big); assert((ieee64+ieee32+ieee64big+ieee32big)==1); ////////////////////////////////////////////////////////////////////////////// diff --git a/Grid/parallelIO/MetaData.h b/Grid/parallelIO/MetaData.h index 2e211838..4c1cfbdb 100644 --- a/Grid/parallelIO/MetaData.h +++ b/Grid/parallelIO/MetaData.h @@ -301,6 +301,30 @@ struct GaugeSimpleUnmunger { }; }; +template +struct GaugeDoubleStoredMunger{ + void operator()(fobj &in, sobj &out) { + for (int mu = 0; mu < Nds; mu++) { + for (int i = 0; i < Nc; i++) { + for (int j = 0; j < Nc; j++) { + out(mu)()(i, j) = in(mu)()(i, j); + }} + } + }; +}; + +template +struct GaugeDoubleStoredUnmunger { + void operator()(sobj &in, fobj &out) { + for (int mu = 0; mu < Nds; mu++) { + for (int i = 0; i < Nc; i++) { + for (int j = 0; j < Nc; j++) { + out(mu)()(i, j) = in(mu)()(i, j); + }} + } + }; +}; + template struct Gauge3x2munger{ void operator() (fobj &in,sobj &out){ diff --git a/Grid/parallelIO/NerscIO.h b/Grid/parallelIO/NerscIO.h index d3b62d1f..5522ba91 100644 --- a/Grid/parallelIO/NerscIO.h +++ b/Grid/parallelIO/NerscIO.h @@ -146,7 +146,7 @@ public: int ieee32big = (format == std::string("IEEE32BIG")); int ieee32 = (format == std::string("IEEE32")); int ieee64big = (format == std::string("IEEE64BIG")); - int ieee64 = (format == std::string("IEEE64")); + int ieee64 = (format == std::string("IEEE64") || format == std::string("IEEE64LITTLE")); uint32_t nersc_csum,scidac_csuma,scidac_csumb; // depending on datatype, set up munger; diff --git a/Grid/parallelIO/OpenQcdIO.h b/Grid/parallelIO/OpenQcdIO.h new file mode 100644 index 00000000..00911595 --- /dev/null +++ b/Grid/parallelIO/OpenQcdIO.h @@ -0,0 +1,224 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/parallelIO/OpenQcdIO.h + +Copyright (C) 2015 - 2020 + +Author: Daniel Richtmann + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#pragma once + +NAMESPACE_BEGIN(Grid); + +struct OpenQcdHeader : Serializable { + GRID_SERIALIZABLE_CLASS_MEMBERS(OpenQcdHeader, + int, Nt, + int, Nx, + int, Ny, + int, Nz, + double, plaq); +}; + +class OpenQcdIO : public BinaryIO { +public: + static constexpr double normalisationFactor = Nc; // normalisation difference: grid 18, openqcd 6 + + static inline int readHeader(std::string file, GridBase* grid, FieldMetaData& field) { + OpenQcdHeader header; + + { + std::ifstream fin(file, std::ios::in | std::ios::binary); + fin.read(reinterpret_cast(&header), sizeof(OpenQcdHeader)); + assert(!fin.fail()); + field.data_start = fin.tellg(); + fin.close(); + } + + header.plaq /= normalisationFactor; + + // sanity check (should trigger on endian issues) + assert(0 < header.Nt && header.Nt <= 1024); + assert(0 < header.Nx && header.Nx <= 1024); + assert(0 < header.Ny && header.Ny <= 1024); + assert(0 < header.Nz && header.Nz <= 1024); + + field.dimension[0] = header.Nx; + field.dimension[1] = header.Ny; + field.dimension[2] = header.Nz; + field.dimension[3] = header.Nt; + + std::cout << GridLogDebug << "header: " << header << std::endl; + std::cout << GridLogDebug << "grid dimensions: " << grid->_fdimensions << std::endl; + std::cout << GridLogDebug << "file dimensions: " << field.dimension << std::endl; + + assert(grid->_ndimension == Nd); + for(int d = 0; d < Nd; d++) + assert(grid->_fdimensions[d] == field.dimension[d]); + + field.plaquette = header.plaq; + + return field.data_start; + } + + template + static inline void readConfiguration(Lattice>& Umu, + FieldMetaData& header, + std::string file) { + typedef Lattice> DoubleStoredGaugeField; + + assert(Ns == 4 and Nd == 4 and Nc == 3); + + auto grid = dynamic_cast(Umu.Grid()); + assert(grid != nullptr); assert(grid->_ndimension == Nd); + + uint64_t offset = readHeader(file, Umu.Grid(), header); + + FieldMetaData clone(header); + + std::string format("IEEE64"); // they always store little endian double precsision + uint32_t nersc_csum, scidac_csuma, scidac_csumb; + + GridCartesian* grid_openqcd = createOpenQcdGrid(grid); + GridRedBlackCartesian* grid_rb = SpaceTimeGrid::makeFourDimRedBlackGrid(grid); + + typedef DoubleStoredColourMatrixD fobj; + typedef typename DoubleStoredGaugeField::vector_object::scalar_object sobj; + typedef typename DoubleStoredGaugeField::vector_object::Realified::scalar_type word; + + word w = 0; + + std::vector iodata(grid_openqcd->lSites()); // Munge, checksum, byte order in here + std::vector scalardata(grid->lSites()); + + IOobject(w, grid_openqcd, iodata, file, offset, format, BINARYIO_READ | BINARYIO_LEXICOGRAPHIC, + nersc_csum, scidac_csuma, scidac_csumb); + + GridStopWatch timer; + timer.Start(); + + DoubleStoredGaugeField Umu_ds(grid); + + auto munge = GaugeDoubleStoredMunger(); + + Coordinate ldim = grid->LocalDimensions(); + thread_for(idx_g, grid->lSites(), { + Coordinate coor; + grid->LocalIndexToLocalCoor(idx_g, coor); + + bool isOdd = grid_rb->CheckerBoard(coor) == Odd; + + if(!isOdd) continue; + + int idx_o = (coor[Tdir] * ldim[Xdir] * ldim[Ydir] * ldim[Zdir] + + coor[Xdir] * ldim[Ydir] * ldim[Zdir] + + coor[Ydir] * ldim[Zdir] + + coor[Zdir])/2; + + munge(iodata[idx_o], scalardata[idx_g]); + }); + + grid->Barrier(); timer.Stop(); + std::cout << Grid::GridLogMessage << "OpenQcdIO::readConfiguration: munge overhead " << timer.Elapsed() << std::endl; + + timer.Reset(); timer.Start(); + + vectorizeFromLexOrdArray(scalardata, Umu_ds); + + grid->Barrier(); timer.Stop(); + std::cout << Grid::GridLogMessage << "OpenQcdIO::readConfiguration: vectorize overhead " << timer.Elapsed() << std::endl; + + timer.Reset(); timer.Start(); + + undoDoubleStore(Umu, Umu_ds); + + grid->Barrier(); timer.Stop(); + std::cout << Grid::GridLogMessage << "OpenQcdIO::readConfiguration: redistribute overhead " << timer.Elapsed() << std::endl; + + GaugeStatistics(Umu, clone); + + RealD plaq_diff = fabs(clone.plaquette - header.plaquette); + + // clang-format off + std::cout << GridLogMessage << "OpenQcd Configuration " << file + << " plaquette " << clone.plaquette + << " header " << header.plaquette + << " difference " << plaq_diff + << std::endl; + // clang-format on + + RealD precTol = (getPrecision::value == 1) ? 2e-7 : 2e-15; + RealD tol = precTol * std::sqrt(grid->_Nprocessors); // taken from RQCD chroma code + + if(plaq_diff >= tol) + std::cout << " Plaquette mismatch (diff = " << plaq_diff << ", tol = " << tol << ")" << std::endl; + assert(plaq_diff < tol); + + std::cout << GridLogMessage << "OpenQcd Configuration " << file << " and plaquette agree" << std::endl; + } + + template + static inline void writeConfiguration(Lattice>& Umu, + std::string file) { + std::cout << GridLogError << "Writing to openQCD file format is not implemented" << std::endl; + exit(EXIT_FAILURE); + } + +private: + static inline GridCartesian* createOpenQcdGrid(GridCartesian* grid) { + // exploit GridCartesian to be able to still use IOobject + Coordinate gdim = grid->GlobalDimensions(); + Coordinate ldim = grid->LocalDimensions(); + Coordinate pcoor = grid->ThisProcessorCoor(); + + // openqcd does rb on the z direction + gdim[Zdir] /= 2; + ldim[Zdir] /= 2; + + // and has the order T X Y Z (from slowest to fastest) + std::swap(gdim[Xdir], gdim[Zdir]); + std::swap(ldim[Xdir], ldim[Zdir]); + std::swap(pcoor[Xdir], pcoor[Zdir]); + + GridCartesian* ret = SpaceTimeGrid::makeFourDimGrid(gdim, grid->_simd_layout, grid->ProcessorGrid()); + ret->_ldimensions = ldim; + ret->_processor_coor = pcoor; + return ret; + } + + template + static inline void undoDoubleStore(Lattice>& Umu, + Lattice> const& Umu_ds) { + conformable(Umu.Grid(), Umu_ds.Grid()); + Lattice> U(Umu.Grid()); + + // they store T+, T-, X+, X-, Y+, Y-, Z+, Z- + for(int mu_g = 0; mu_g < Nd; ++mu_g) { + int mu_o = (mu_g + 1) % Nd; + U = PeekIndex(Umu_ds, 2 * mu_o) + + Cshift(PeekIndex(Umu_ds, 2 * mu_o + 1), mu_g, +1); + PokeIndex(Umu, U, mu_g); + } + } +}; + +NAMESPACE_END(Grid); diff --git a/Grid/parallelIO/OpenQcdIOChromaReference.h b/Grid/parallelIO/OpenQcdIOChromaReference.h new file mode 100644 index 00000000..bab54fe8 --- /dev/null +++ b/Grid/parallelIO/OpenQcdIOChromaReference.h @@ -0,0 +1,281 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/parallelIO/OpenQcdIOChromaReference.h + +Copyright (C) 2015 - 2020 + +Author: Daniel Richtmann + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#define CHECK {std::cerr << __FILE__ << " @l " << __LINE__ << ": CHECK" << grid->ThisRank() << std::endl;} +#define CHECK_VAR(a) { std::cerr << __FILE__ << "@l" << __LINE__ << " on "<< grid->ThisRank() << ": " << __func__ << " " << #a << "=" << (a) << std::endl; } +// #undef CHECK +// #define CHECK + +NAMESPACE_BEGIN(Grid); + +class ParRdr { +private: + bool const swap; + + MPI_Status status; + MPI_File fp; + + int err; + + MPI_Datatype oddSiteType; + MPI_Datatype fileViewType; + + GridBase* grid; + +public: + ParRdr(MPI_Comm comm, std::string const& filename, GridBase* gridPtr) + : swap(false) + , grid(gridPtr) { + err = MPI_File_open(comm, const_cast(filename.c_str()), MPI_MODE_RDONLY, MPI_INFO_NULL, &fp); + assert(err == MPI_SUCCESS); + } + + virtual ~ParRdr() { MPI_File_close(&fp); } + + inline void errInfo(int const err, std::string const& func) { + static char estring[MPI_MAX_ERROR_STRING]; + int eclass = -1, len = 0; + MPI_Error_class(err, &eclass); + MPI_Error_string(err, estring, &len); + std::cerr << func << " - Error " << eclass << ": " << estring << std::endl; + } + + int readHeader(FieldMetaData& field) { + assert((grid->_ndimension == Nd) && (Nd == 4)); + assert(Nc == 3); + + OpenQcdHeader header; + + readBlock(reinterpret_cast(&header), 0, sizeof(OpenQcdHeader), MPI_CHAR); + + header.plaq /= 3.; // TODO change this into normalizationfactor + + // sanity check (should trigger on endian issues) TODO remove? + assert(0 < header.Nt && header.Nt <= 1024); + assert(0 < header.Nx && header.Nx <= 1024); + assert(0 < header.Ny && header.Ny <= 1024); + assert(0 < header.Nz && header.Nz <= 1024); + + field.dimension[0] = header.Nx; + field.dimension[1] = header.Ny; + field.dimension[2] = header.Nz; + field.dimension[3] = header.Nt; + + for(int d = 0; d < Nd; d++) + assert(grid->FullDimensions()[d] == field.dimension[d]); + + field.plaquette = header.plaq; + + field.data_start = sizeof(OpenQcdHeader); + + return field.data_start; + } + + void readBlock(void* const dest, uint64_t const pos, uint64_t const nbytes, MPI_Datatype const datatype) { + err = MPI_File_read_at_all(fp, pos, dest, nbytes, datatype, &status); + errInfo(err, "MPI_File_read_at_all"); + // CHECK_VAR(err) + + int read = -1; + MPI_Get_count(&status, datatype, &read); + // CHECK_VAR(read) + assert(nbytes == (uint64_t)read); + assert(err == MPI_SUCCESS); + } + + void createTypes() { + constexpr int elem_size = Nd * 2 * 2 * Nc * Nc * sizeof(double); // 2_complex 2_fwdbwd + + err = MPI_Type_contiguous(elem_size, MPI_BYTE, &oddSiteType); assert(err == MPI_SUCCESS); + err = MPI_Type_commit(&oddSiteType); assert(err == MPI_SUCCESS); + + Coordinate const L = grid->GlobalDimensions(); + Coordinate const l = grid->LocalDimensions(); + Coordinate const i = grid->ThisProcessorCoor(); + + Coordinate sizes({L[2] / 2, L[1], L[0], L[3]}); + Coordinate subsizes({l[2] / 2, l[1], l[0], l[3]}); + Coordinate starts({i[2] * l[2] / 2, i[1] * l[1], i[0] * l[0], i[3] * l[3]}); + + err = MPI_Type_create_subarray(grid->_ndimension, &sizes[0], &subsizes[0], &starts[0], MPI_ORDER_FORTRAN, oddSiteType, &fileViewType); assert(err == MPI_SUCCESS); + err = MPI_Type_commit(&fileViewType); assert(err == MPI_SUCCESS); + } + + void freeTypes() { + err = MPI_Type_free(&fileViewType); assert(err == MPI_SUCCESS); + err = MPI_Type_free(&oddSiteType); assert(err == MPI_SUCCESS); + } + + bool readGauge(std::vector& domain_buff, FieldMetaData& meta) { + auto hdr_offset = readHeader(meta); + CHECK + createTypes(); + err = MPI_File_set_view(fp, hdr_offset, oddSiteType, fileViewType, "native", MPI_INFO_NULL); errInfo(err, "MPI_File_set_view0"); assert(err == MPI_SUCCESS); + CHECK + int const domainSites = grid->lSites(); + domain_buff.resize(Nd * domainSites); // 2_fwdbwd * 4_Nd * domainSites / 2_onlyodd + + // the actual READ + constexpr uint64_t cm_size = 2 * Nc * Nc * sizeof(double); // 2_complex + constexpr uint64_t os_size = Nd * 2 * cm_size; // 2_fwdbwd + constexpr uint64_t max_elems = std::numeric_limits::max(); // int adressable elems: floor is fine + uint64_t const n_os = domainSites / 2; + + for(uint64_t os_idx = 0; os_idx < n_os;) { + uint64_t const read_os = os_idx + max_elems <= n_os ? max_elems : n_os - os_idx; + uint64_t const cm = os_idx * Nd * 2; + readBlock(&(domain_buff[cm]), os_idx, read_os, oddSiteType); + os_idx += read_os; + } + + CHECK + err = MPI_File_set_view(fp, 0, MPI_BYTE, MPI_BYTE, "native", MPI_INFO_NULL); + errInfo(err, "MPI_File_set_view1"); + assert(err == MPI_SUCCESS); + freeTypes(); + + std::cout << GridLogMessage << "read sum: " << n_os * os_size << " bytes" << std::endl; + return true; + } +}; + +class OpenQcdIOChromaReference : public BinaryIO { +public: + template + static inline void readConfiguration(Lattice>& Umu, + Grid::FieldMetaData& header, + std::string file) { + typedef Lattice> DoubledGaugeField; + + assert(Ns == 4 and Nd == 4 and Nc == 3); + + auto grid = Umu.Grid(); + + typedef ColourMatrixD fobj; + + std::vector iodata( + Nd * grid->lSites()); // actual size = 2*Nd*lsites but have only lsites/2 sites in file + + { + ParRdr rdr(MPI_COMM_WORLD, file, grid); + rdr.readGauge(iodata, header); + } // equivalent to using binaryio + + std::vector> Umu_ds_scalar(grid->lSites()); + + copyToLatticeObject(Umu_ds_scalar, iodata, grid); // equivalent to munging + + DoubledGaugeField Umu_ds(grid); + + vectorizeFromLexOrdArray(Umu_ds_scalar, Umu_ds); + + redistribute(Umu, Umu_ds); // equivalent to undoDoublestore + + FieldMetaData clone(header); + + GaugeStatistics(Umu, clone); + + RealD plaq_diff = fabs(clone.plaquette - header.plaquette); + + // clang-format off + std::cout << GridLogMessage << "OpenQcd Configuration " << file + << " plaquette " << clone.plaquette + << " header " << header.plaquette + << " difference " << plaq_diff + << std::endl; + // clang-format on + + RealD precTol = (getPrecision::value == 1) ? 2e-7 : 2e-15; + RealD tol = precTol * std::sqrt(grid->_Nprocessors); // taken from RQCD chroma code + + if(plaq_diff >= tol) + std::cout << " Plaquette mismatch (diff = " << plaq_diff << ", tol = " << tol << ")" << std::endl; + assert(plaq_diff < tol); + + std::cout << GridLogMessage << "OpenQcd Configuration " << file << " and plaquette agree" << std::endl; + } + +private: + template + static inline void redistribute(Lattice>& Umu, + Lattice> const& Umu_ds) { + Grid::conformable(Umu.Grid(), Umu_ds.Grid()); + Lattice> U(Umu.Grid()); + + U = PeekIndex(Umu_ds, 2) + Cshift(PeekIndex(Umu_ds, 3), 0, +1); PokeIndex(Umu, U, 0); + U = PeekIndex(Umu_ds, 4) + Cshift(PeekIndex(Umu_ds, 5), 1, +1); PokeIndex(Umu, U, 1); + U = PeekIndex(Umu_ds, 6) + Cshift(PeekIndex(Umu_ds, 7), 2, +1); PokeIndex(Umu, U, 2); + U = PeekIndex(Umu_ds, 0) + Cshift(PeekIndex(Umu_ds, 1), 3, +1); PokeIndex(Umu, U, 3); + } + + static inline void copyToLatticeObject(std::vector& u_fb, + std::vector const& node_buff, + GridBase* grid) { + assert(node_buff.size() == Nd * grid->lSites()); + + Coordinate const& l = grid->LocalDimensions(); + + Coordinate coord(Nd); + int& x = coord[0]; + int& y = coord[1]; + int& z = coord[2]; + int& t = coord[3]; + + int buff_idx = 0; + for(t = 0; t < l[3]; ++t) // IMPORTANT: openQCD file ordering + for(x = 0; x < l[0]; ++x) + for(y = 0; y < l[1]; ++y) + for(z = 0; z < l[2]; ++z) { + if((t + z + y + x) % 2 == 0) continue; + + int local_idx; + Lexicographic::IndexFromCoor(coord, local_idx, grid->LocalDimensions()); + for(int mu = 0; mu < 2 * Nd; ++mu) + for(int c1 = 0; c1 < Nc; ++c1) { + for(int c2 = 0; c2 < Nc; ++c2) { + u_fb[local_idx](mu)()(c1,c2) = node_buff[mu+buff_idx]()()(c1,c2); + } + } + buff_idx += 2 * Nd; + } + + assert(node_buff.size() == buff_idx); + } +}; + +NAMESPACE_END(Grid); diff --git a/Grid/perfmon/PerfCount.h b/Grid/perfmon/PerfCount.h index b8229eec..dd25b41e 100644 --- a/Grid/perfmon/PerfCount.h +++ b/Grid/perfmon/PerfCount.h @@ -44,7 +44,7 @@ Author: paboyle #include #endif #ifdef __x86_64__ -#ifdef GRID_NVCC +#ifdef GRID_CUDA accelerator_inline uint64_t __rdtsc(void) { return 0; } accelerator_inline uint64_t __rdpmc(int ) { return 0; } #else @@ -95,7 +95,8 @@ inline uint64_t cyclecount(void){ } #elif defined __x86_64__ inline uint64_t cyclecount(void){ - return __rdtsc(); + uint64_t ret = __rdtsc(); + return (uint64_t)ret; } #else @@ -111,7 +112,6 @@ class PerformanceCounter { private: typedef struct { - public: uint32_t type; uint64_t config; const char *name; diff --git a/Grid/perfmon/Timer.h b/Grid/perfmon/Timer.h index 88b4e1cc..2a44faee 100644 --- a/Grid/perfmon/Timer.h +++ b/Grid/perfmon/Timer.h @@ -110,15 +110,15 @@ public: #endif accumulator = std::chrono::duration_cast(start-start); } - GridTime Elapsed(void) { + GridTime Elapsed(void) const { assert(running == false); return std::chrono::duration_cast( accumulator ); } - uint64_t useconds(void){ + uint64_t useconds(void) const { assert(running == false); return (uint64_t) accumulator.count(); } - bool isRunning(void){ + bool isRunning(void) const { return running; } }; diff --git a/Grid/pugixml/pugixml.cc b/Grid/pugixml/pugixml.cc index e7b395ad..45e6496a 100644 --- a/Grid/pugixml/pugixml.cc +++ b/Grid/pugixml/pugixml.cc @@ -12773,7 +12773,7 @@ namespace pugi #undef PUGI__THROW_ERROR #undef PUGI__CHECK_ERROR -#ifdef GRID_NVCC +#ifdef GRID_CUDA #pragma pop #endif diff --git a/Grid/qcd/QCD.h b/Grid/qcd/QCD.h index 2c8e60da..faacac63 100644 --- a/Grid/qcd/QCD.h +++ b/Grid/qcd/QCD.h @@ -133,23 +133,23 @@ typedef iSpinColourMatrix vSpinColourMatrix; typedef iSpinColourMatrix vSpinColourMatrixF; typedef iSpinColourMatrix vSpinColourMatrixD; - // SpinColourSpinColour matrix - typedef iSpinColourSpinColourMatrix SpinColourSpinColourMatrix; - typedef iSpinColourSpinColourMatrix SpinColourSpinColourMatrixF; - typedef iSpinColourSpinColourMatrix SpinColourSpinColourMatrixD; +// SpinColourSpinColour matrix +typedef iSpinColourSpinColourMatrix SpinColourSpinColourMatrix; +typedef iSpinColourSpinColourMatrix SpinColourSpinColourMatrixF; +typedef iSpinColourSpinColourMatrix SpinColourSpinColourMatrixD; - typedef iSpinColourSpinColourMatrix vSpinColourSpinColourMatrix; - typedef iSpinColourSpinColourMatrix vSpinColourSpinColourMatrixF; - typedef iSpinColourSpinColourMatrix vSpinColourSpinColourMatrixD; +typedef iSpinColourSpinColourMatrix vSpinColourSpinColourMatrix; +typedef iSpinColourSpinColourMatrix vSpinColourSpinColourMatrixF; +typedef iSpinColourSpinColourMatrix vSpinColourSpinColourMatrixD; - // SpinColourSpinColour matrix - typedef iSpinColourSpinColourMatrix SpinColourSpinColourMatrix; - typedef iSpinColourSpinColourMatrix SpinColourSpinColourMatrixF; - typedef iSpinColourSpinColourMatrix SpinColourSpinColourMatrixD; +// SpinColourSpinColour matrix +typedef iSpinColourSpinColourMatrix SpinColourSpinColourMatrix; +typedef iSpinColourSpinColourMatrix SpinColourSpinColourMatrixF; +typedef iSpinColourSpinColourMatrix SpinColourSpinColourMatrixD; - typedef iSpinColourSpinColourMatrix vSpinColourSpinColourMatrix; - typedef iSpinColourSpinColourMatrix vSpinColourSpinColourMatrixF; - typedef iSpinColourSpinColourMatrix vSpinColourSpinColourMatrixD; +typedef iSpinColourSpinColourMatrix vSpinColourSpinColourMatrix; +typedef iSpinColourSpinColourMatrix vSpinColourSpinColourMatrixF; +typedef iSpinColourSpinColourMatrix vSpinColourSpinColourMatrixD; // LorentzColour typedef iLorentzColourMatrix LorentzColourMatrix; @@ -443,16 +443,16 @@ template void pokeLorentz(vobj &lhs,const decltype(peekIndex propagator assignements ////////////////////////////////////////////// - //template - template - void FermToProp(typename Fimpl::PropagatorField &p, const typename Fimpl::FermionField &f, const int s, const int c) +//template +template +void FermToProp(typename Fimpl::PropagatorField &p, const typename Fimpl::FermionField &f, const int s, const int c) { for(int j = 0; j < Ns; ++j) { auto pjs = peekSpin(p, j, s); auto fj = peekSpin(f, j); - for(int i = 0; i < Fimpl::Dimension; ++i) + for(int i = 0; i < Fimpl::Dimension; ++i) { pokeColour(pjs, peekColour(fj, i), i, c); } @@ -460,16 +460,16 @@ template void pokeLorentz(vobj &lhs,const decltype(peekIndex - template - void PropToFerm(typename Fimpl::FermionField &f, const typename Fimpl::PropagatorField &p, const int s, const int c) +//template +template +void PropToFerm(typename Fimpl::FermionField &f, const typename Fimpl::PropagatorField &p, const int s, const int c) { for(int j = 0; j < Ns; ++j) { auto pjs = peekSpin(p, j, s); auto fj = peekSpin(f, j); - for(int i = 0; i < Fimpl::Dimension; ++i) + for(int i = 0; i < Fimpl::Dimension; ++i) { pokeColour(fj, peekColour(pjs, i, c), i); } diff --git a/Grid/qcd/action/fermion/CayleyFermion5D.h b/Grid/qcd/action/fermion/CayleyFermion5D.h index c2ccb98b..c7d68d73 100644 --- a/Grid/qcd/action/fermion/CayleyFermion5D.h +++ b/Grid/qcd/action/fermion/CayleyFermion5D.h @@ -40,8 +40,8 @@ public: public: // override multiply - virtual RealD M (const FermionField &in, FermionField &out); - virtual RealD Mdag (const FermionField &in, FermionField &out); + virtual void M (const FermionField &in, FermionField &out); + virtual void Mdag (const FermionField &in, FermionField &out); // half checkerboard operations virtual void Meooe (const FermionField &in, FermionField &out); @@ -141,7 +141,33 @@ public: Vector > MatpInvDag; Vector > MatmInvDag; + /////////////////////////////////////////////////////////////// + // Conserved current utilities + /////////////////////////////////////////////////////////////// + + // Virtual can't template + void ContractConservedCurrent(PropagatorField &q_in_1, + PropagatorField &q_in_2, + PropagatorField &q_out, + PropagatorField &phys_src, + Current curr_type, + unsigned int mu); + + void SeqConservedCurrent(PropagatorField &q_in, + PropagatorField &q_out, + PropagatorField &phys_src, + Current curr_type, + unsigned int mu, + unsigned int tmin, + unsigned int tmax, + ComplexField &lattice_cmplx); + + void ContractJ5q(PropagatorField &q_in,ComplexField &J5q); + void ContractJ5q(FermionField &q_in,ComplexField &J5q); + + /////////////////////////////////////////////////////////////// // Constructors + /////////////////////////////////////////////////////////////// CayleyFermion5D(GaugeField &_Umu, GridCartesian &FiveDimGrid, GridRedBlackCartesian &FiveDimRedBlackGrid, diff --git a/Grid/qcd/action/fermion/ContinuedFractionFermion5D.h b/Grid/qcd/action/fermion/ContinuedFractionFermion5D.h index 5aa7bfbd..2300afd3 100644 --- a/Grid/qcd/action/fermion/ContinuedFractionFermion5D.h +++ b/Grid/qcd/action/fermion/ContinuedFractionFermion5D.h @@ -41,8 +41,8 @@ public: public: // override multiply - virtual RealD M (const FermionField &in, FermionField &out); - virtual RealD Mdag (const FermionField &in, FermionField &out); + virtual void M (const FermionField &in, FermionField &out); + virtual void Mdag (const FermionField &in, FermionField &out); // half checkerboard operaions virtual void Meooe (const FermionField &in, FermionField &out); diff --git a/Grid/qcd/action/fermion/DomainWallEOFAFermion.h b/Grid/qcd/action/fermion/DomainWallEOFAFermion.h index a2d0e733..bcc97176 100644 --- a/Grid/qcd/action/fermion/DomainWallEOFAFermion.h +++ b/Grid/qcd/action/fermion/DomainWallEOFAFermion.h @@ -53,8 +53,8 @@ public: virtual void DtildeInv (const FermionField& in, FermionField& out); // override multiply - virtual RealD M (const FermionField& in, FermionField& out); - virtual RealD Mdag (const FermionField& in, FermionField& out); + virtual void M (const FermionField& in, FermionField& out); + virtual void Mdag (const FermionField& in, FermionField& out); // half checkerboard operations virtual void Mooee (const FermionField& in, FermionField& out); diff --git a/Grid/qcd/action/fermion/DomainWallVec5dImpl.h b/Grid/qcd/action/fermion/DomainWallVec5dImpl.h index 890c680b..0c8a0930 100644 --- a/Grid/qcd/action/fermion/DomainWallVec5dImpl.h +++ b/Grid/qcd/action/fermion/DomainWallVec5dImpl.h @@ -114,19 +114,22 @@ public: U = adj(Cshift(U, mu, -1)); PokeIndex(Uadj, U, mu); } - - for (int lidx = 0; lidx < GaugeGrid->lSites(); lidx++) { + + autoView(Umu_v,Umu,CpuRead); + autoView(Uadj_v,Uadj,CpuRead); + autoView(Uds_v,Uds,CpuWrite); + thread_for( lidx, GaugeGrid->lSites(), { Coordinate lcoor; GaugeGrid->LocalIndexToLocalCoor(lidx, lcoor); - peekLocalSite(ScalarUmu, Umu, lcoor); + peekLocalSite(ScalarUmu, Umu_v, lcoor); for (int mu = 0; mu < 4; mu++) ScalarUds(mu) = ScalarUmu(mu); - peekLocalSite(ScalarUmu, Uadj, lcoor); + peekLocalSite(ScalarUmu, Uadj_v, lcoor); for (int mu = 0; mu < 4; mu++) ScalarUds(mu + 4) = ScalarUmu(mu); - pokeLocalSite(ScalarUds, Uds, lcoor); - } + pokeLocalSite(ScalarUds, Uds_v, lcoor); + }); } inline void InsertForce4D(GaugeField &mat, FermionField &Btilde,FermionField &A, int mu) diff --git a/Grid/qcd/action/fermion/Fermion.h b/Grid/qcd/action/fermion/Fermion.h index fb6f18bb..16252340 100644 --- a/Grid/qcd/action/fermion/Fermion.h +++ b/Grid/qcd/action/fermion/Fermion.h @@ -57,6 +57,7 @@ NAMESPACE_CHECK(WilsonClover); #include // 5d base used by all 5d overlap types NAMESPACE_CHECK(Wilson5D); +#include #include #include NAMESPACE_CHECK(Staggered); @@ -282,11 +283,15 @@ typedef ImprovedStaggeredFermion ImprovedStaggeredFermionR; typedef ImprovedStaggeredFermion ImprovedStaggeredFermionF; typedef ImprovedStaggeredFermion ImprovedStaggeredFermionD; +typedef NaiveStaggeredFermion NaiveStaggeredFermionR; +typedef NaiveStaggeredFermion NaiveStaggeredFermionF; +typedef NaiveStaggeredFermion NaiveStaggeredFermionD; + typedef ImprovedStaggeredFermion5D ImprovedStaggeredFermion5DR; typedef ImprovedStaggeredFermion5D ImprovedStaggeredFermion5DF; typedef ImprovedStaggeredFermion5D ImprovedStaggeredFermion5DD; -#ifndef GRID_NVCC +#ifndef GRID_CUDA typedef ImprovedStaggeredFermion5D ImprovedStaggeredFermionVec5dR; typedef ImprovedStaggeredFermion5D ImprovedStaggeredFermionVec5dF; typedef ImprovedStaggeredFermion5D ImprovedStaggeredFermionVec5dD; diff --git a/Grid/qcd/action/fermion/FermionOperator.h b/Grid/qcd/action/fermion/FermionOperator.h index cbc6ca63..570e350d 100644 --- a/Grid/qcd/action/fermion/FermionOperator.h +++ b/Grid/qcd/action/fermion/FermionOperator.h @@ -58,8 +58,8 @@ public: virtual GridBase *GaugeRedBlackGrid(void) =0; // override multiply - virtual RealD M (const FermionField &in, FermionField &out)=0; - virtual RealD Mdag (const FermionField &in, FermionField &out)=0; + virtual void M (const FermionField &in, FermionField &out)=0; + virtual void Mdag (const FermionField &in, FermionField &out)=0; // half checkerboard operaions virtual void Meooe (const FermionField &in, FermionField &out)=0; @@ -86,15 +86,14 @@ public: virtual void DhopDerivEO(GaugeField &mat,const FermionField &U,const FermionField &V,int dag)=0; virtual void DhopDerivOE(GaugeField &mat,const FermionField &U,const FermionField &V,int dag)=0; - virtual void Mdiag (const FermionField &in, FermionField &out) { Mooee(in,out);}; // Same as Mooee applied to both CB's virtual void Mdir (const FermionField &in, FermionField &out,int dir,int disp)=0; // case by case Wilson, Clover, Cayley, ContFrac, PartFrac virtual void MdirAll(const FermionField &in, std::vector &out)=0; // case by case Wilson, Clover, Cayley, ContFrac, PartFrac - virtual void MomentumSpacePropagator(FermionField &out,const FermionField &in,RealD _m,std::vector twist) { assert(0);}; + virtual void MomentumSpacePropagator(FermionField &out,const FermionField &in,RealD _m,std::vector twist) { assert(0);}; - virtual void FreePropagator(const FermionField &in,FermionField &out,RealD mass,std::vector boundary,std::vector twist) + virtual void FreePropagator(const FermionField &in,FermionField &out,RealD mass,std::vector boundary,std::vector twist) { FFT theFFT((GridCartesian *) in.Grid()); @@ -148,15 +147,19 @@ public: virtual void ContractConservedCurrent(PropagatorField &q_in_1, PropagatorField &q_in_2, PropagatorField &q_out, + PropagatorField &phys_src, Current curr_type, - unsigned int mu)=0; + unsigned int mu) + {assert(0);}; virtual void SeqConservedCurrent(PropagatorField &q_in, PropagatorField &q_out, + PropagatorField &phys_src, Current curr_type, unsigned int mu, unsigned int tmin, unsigned int tmax, - ComplexField &lattice_cmplx)=0; + ComplexField &lattice_cmplx) + {assert(0);}; // Only reimplemented in Wilson5D // Default to just a zero correlation function diff --git a/Grid/qcd/action/fermion/GparityWilsonImpl.h b/Grid/qcd/action/fermion/GparityWilsonImpl.h index f87d2260..0b726db9 100644 --- a/Grid/qcd/action/fermion/GparityWilsonImpl.h +++ b/Grid/qcd/action/fermion/GparityWilsonImpl.h @@ -38,6 +38,7 @@ public: static const bool isFundamental = Representation::isFundamental; static const int Nhcs = Options::Nhcs; static const bool LsVectorised=false; + static const bool isGparity=true; typedef ConjugateGaugeImpl< GaugeImplTypes > Gimpl; INHERIT_GIMPL_TYPES(Gimpl); @@ -46,7 +47,7 @@ public: typedef typename Options::template PrecisionMapper::LowerPrecVector SimdL; template using iImplSpinor = iVector, Ns>, Ngp>; - template using iImplPropagator = iVector, Ns>, Ngp>; + template using iImplPropagator = iMatrix, Ns>, Ngp>; template using iImplHalfSpinor = iVector, Nhs>, Ngp>; template using iImplHalfCommSpinor = iVector, Nhcs>, Ngp>; template using iImplDoubledGaugeField = iVector >, Nds>, Ngp>; @@ -80,6 +81,7 @@ public: { assert(0); } + template static accelerator_inline void multLink(_Spinor &phi, const SiteDoubledGaugeField &U, @@ -94,11 +96,11 @@ public: int sl = St._simd_layout[direction]; Coordinate icoor; -#ifdef __CUDA_ARCH__ +#ifdef GRID_SIMT _Spinor tmp; const int Nsimd =SiteDoubledGaugeField::Nsimd(); - int s = SIMTlane(Nsimd); + int s = acceleratorSIMTlane(Nsimd); St.iCoorFromIindex(icoor,s); int mmu = mu % Nd; @@ -191,6 +193,16 @@ public: #endif } + + template + inline void multLinkField(_SpinorField & out, + const DoubledGaugeField &Umu, + const _SpinorField & phi, + int mu) + { + assert(0); + } + template static accelerator_inline void loadLinkElement(Simd ®, ref &memory) { @@ -220,15 +232,17 @@ public: if ( Params.twists[mu] ) { Uconj = where(coor==neglink,-Uconj,Uconj); } - - auto U_v = U.View(); - auto Uds_v = Uds.View(); - auto Uconj_v = Uconj.View(); - auto Utmp_v= Utmp.View(); - thread_foreach(ss,U_v,{ - Uds_v[ss](0)(mu) = U_v[ss](); - Uds_v[ss](1)(mu) = Uconj_v[ss](); - }); + + { + autoView( U_v , U, CpuRead); + autoView( Uconj_v , Uconj, CpuRead); + autoView( Uds_v , Uds, CpuWrite); + autoView( Utmp_v, Utmp, CpuWrite); + thread_foreach(ss,U_v,{ + Uds_v[ss](0)(mu) = U_v[ss](); + Uds_v[ss](1)(mu) = Uconj_v[ss](); + }); + } U = adj(Cshift(U ,mu,-1)); // correct except for spanning the boundary Uconj = adj(Cshift(Uconj,mu,-1)); @@ -238,19 +252,25 @@ public: Utmp = where(coor==0,Uconj,Utmp); } - thread_foreach(ss,Utmp_v,{ - Uds_v[ss](0)(mu+4) = Utmp_v[ss](); - }); - + { + autoView( Uds_v , Uds, CpuWrite); + autoView( Utmp_v, Utmp, CpuWrite); + thread_foreach(ss,Utmp_v,{ + Uds_v[ss](0)(mu+4) = Utmp_v[ss](); + }); + } Utmp = Uconj; if ( Params.twists[mu] ) { Utmp = where(coor==0,U,Utmp); } - - thread_foreach(ss,Utmp_v,{ - Uds_v[ss](1)(mu+4) = Utmp_v[ss](); - }); - + + { + autoView( Uds_v , Uds, CpuWrite); + autoView( Utmp_v, Utmp, CpuWrite); + thread_foreach(ss,Utmp_v,{ + Uds_v[ss](1)(mu+4) = Utmp_v[ss](); + }); + } } } @@ -260,11 +280,14 @@ public: GaugeLinkField link(mat.Grid()); // use lorentz for flavour as hack. auto tmp = TraceIndex(outerProduct(Btilde, A)); - auto link_v = link.View(); - auto tmp_v = tmp.View(); - thread_foreach(ss,tmp_v,{ - link_v[ss]() = tmp_v[ss](0, 0) + conjugate(tmp_v[ss](1, 1)); - }); + + { + autoView( link_v , link, CpuWrite); + autoView( tmp_v , tmp, CpuRead); + thread_foreach(ss,tmp_v,{ + link_v[ss]() = tmp_v[ss](0, 0) + conjugate(tmp_v[ss](1, 1)); + }); + } PokeIndex(mat, link, mu); return; } @@ -294,16 +317,18 @@ public: GaugeLinkField tmp(mat.Grid()); tmp = Zero(); - auto tmp_v = tmp.View(); - auto Atilde_v = Atilde.View(); - auto Btilde_v = Btilde.View(); - thread_for(ss,tmp.Grid()->oSites(),{ - for (int s = 0; s < Ls; s++) { - int sF = s + Ls * ss; - auto ttmp = traceIndex(outerProduct(Btilde_v[sF], Atilde_v[sF])); - tmp_v[ss]() = tmp_v[ss]() + ttmp(0, 0) + conjugate(ttmp(1, 1)); - } - }); + { + autoView( tmp_v , tmp, CpuWrite); + autoView( Atilde_v , Atilde, CpuRead); + autoView( Btilde_v , Btilde, CpuRead); + thread_for(ss,tmp.Grid()->oSites(),{ + for (int s = 0; s < Ls; s++) { + int sF = s + Ls * ss; + auto ttmp = traceIndex(outerProduct(Btilde_v[sF], Atilde_v[sF])); + tmp_v[ss]() = tmp_v[ss]() + ttmp(0, 0) + conjugate(ttmp(1, 1)); + } + }); + } PokeIndex(mat, tmp, mu); return; } diff --git a/Grid/qcd/action/fermion/ImprovedStaggeredFermion.h b/Grid/qcd/action/fermion/ImprovedStaggeredFermion.h index 5cb95ca6..ecf44ed7 100644 --- a/Grid/qcd/action/fermion/ImprovedStaggeredFermion.h +++ b/Grid/qcd/action/fermion/ImprovedStaggeredFermion.h @@ -71,8 +71,8 @@ public: // override multiply; cut number routines if pass dagger argument // and also make interface more uniformly consistent ////////////////////////////////////////////////////////////////// - RealD M(const FermionField &in, FermionField &out); - RealD Mdag(const FermionField &in, FermionField &out); + void M(const FermionField &in, FermionField &out); + void Mdag(const FermionField &in, FermionField &out); ///////////////////////////////////////////////////////// // half checkerboard operations @@ -185,10 +185,12 @@ public: void ContractConservedCurrent(PropagatorField &q_in_1, PropagatorField &q_in_2, PropagatorField &q_out, + PropagatorField &src, Current curr_type, unsigned int mu); void SeqConservedCurrent(PropagatorField &q_in, PropagatorField &q_out, + PropagatorField &srct, Current curr_type, unsigned int mu, unsigned int tmin, diff --git a/Grid/qcd/action/fermion/ImprovedStaggeredFermion5D.h b/Grid/qcd/action/fermion/ImprovedStaggeredFermion5D.h index 8e3d4be5..625eda63 100644 --- a/Grid/qcd/action/fermion/ImprovedStaggeredFermion5D.h +++ b/Grid/qcd/action/fermion/ImprovedStaggeredFermion5D.h @@ -1,4 +1,3 @@ - /************************************************************************************* Grid physics library, www.github.com/paboyle/Grid @@ -62,8 +61,8 @@ public: double DhopCalls; double DhopCommTime; double DhopComputeTime; - double DhopComputeTime2; - double DhopFaceTime; + double DhopComputeTime2; + double DhopFaceTime; /////////////////////////////////////////////////////////////// // Implement the abstract base @@ -74,8 +73,8 @@ public: GridBase *FermionRedBlackGrid(void) { return _FiveDimRedBlackGrid;} // full checkerboard operations; leave unimplemented as abstract for now - RealD M (const FermionField &in, FermionField &out); - RealD Mdag (const FermionField &in, FermionField &out); + void M (const FermionField &in, FermionField &out); + void Mdag (const FermionField &in, FermionField &out); // half checkerboard operations void Meooe (const FermionField &in, FermionField &out); @@ -217,15 +216,17 @@ public: void ContractConservedCurrent(PropagatorField &q_in_1, PropagatorField &q_in_2, PropagatorField &q_out, + PropagatorField &src, Current curr_type, unsigned int mu); void SeqConservedCurrent(PropagatorField &q_in, PropagatorField &q_out, + PropagatorField &src, Current curr_type, unsigned int mu, unsigned int tmin, - unsigned int tmax, - ComplexField &lattice_cmplx); + unsigned int tmax, + ComplexField &lattice_cmplx); }; NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/MobiusEOFAFermion.h b/Grid/qcd/action/fermion/MobiusEOFAFermion.h index 6b214233..6e4f79eb 100644 --- a/Grid/qcd/action/fermion/MobiusEOFAFermion.h +++ b/Grid/qcd/action/fermion/MobiusEOFAFermion.h @@ -56,8 +56,8 @@ public: virtual void DtildeInv (const FermionField& in, FermionField& out); // override multiply - virtual RealD M (const FermionField& in, FermionField& out); - virtual RealD Mdag (const FermionField& in, FermionField& out); + virtual void M (const FermionField& in, FermionField& out); + virtual void Mdag (const FermionField& in, FermionField& out); // half checkerboard operations virtual void Mooee (const FermionField& in, FermionField& out); diff --git a/Grid/qcd/action/fermion/MobiusFermion.h b/Grid/qcd/action/fermion/MobiusFermion.h index 1cbb6609..1e948092 100644 --- a/Grid/qcd/action/fermion/MobiusFermion.h +++ b/Grid/qcd/action/fermion/MobiusFermion.h @@ -59,7 +59,7 @@ public: { RealD eps = 1.0; - std::cout<Ls);// eps is ignored for higham assert(zdata->n==this->Ls); diff --git a/Grid/qcd/action/fermion/NaiveStaggeredFermion.h b/Grid/qcd/action/fermion/NaiveStaggeredFermion.h new file mode 100644 index 00000000..ca38a64f --- /dev/null +++ b/Grid/qcd/action/fermion/NaiveStaggeredFermion.h @@ -0,0 +1,194 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/ImprovedStaggered.h + +Copyright (C) 2015 + +Author: Azusa Yamaguchi, Peter Boyle + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ + /* END LEGAL */ +#ifndef GRID_QCD_NAIVE_STAG_FERMION_H +#define GRID_QCD_NAIVE_STAG_FERMION_H + +NAMESPACE_BEGIN(Grid); + +class NaiveStaggeredFermionStatic { +public: + static const std::vector directions; + static const std::vector displacements; + static const int npoint = 8; +}; + +template +class NaiveStaggeredFermion : public StaggeredKernels, public NaiveStaggeredFermionStatic { +public: + INHERIT_IMPL_TYPES(Impl); + typedef StaggeredKernels Kernels; + + FermionField _tmp; + FermionField &tmp(void) { return _tmp; } + + //////////////////////////////////////// + // Performance monitoring + //////////////////////////////////////// + void Report(void); + void ZeroCounters(void); + double DhopTotalTime; + double DhopCalls; + double DhopCommTime; + double DhopComputeTime; + double DhopComputeTime2; + double DhopFaceTime; + + /////////////////////////////////////////////////////////////// + // Implement the abstract base + /////////////////////////////////////////////////////////////// + GridBase *GaugeGrid(void) { return _grid; } + GridBase *GaugeRedBlackGrid(void) { return _cbgrid; } + GridBase *FermionGrid(void) { return _grid; } + GridBase *FermionRedBlackGrid(void) { return _cbgrid; } + + ////////////////////////////////////////////////////////////////// + // override multiply; cut number routines if pass dagger argument + // and also make interface more uniformly consistent + ////////////////////////////////////////////////////////////////// + void M(const FermionField &in, FermionField &out); + void Mdag(const FermionField &in, FermionField &out); + + ///////////////////////////////////////////////////////// + // half checkerboard operations + ///////////////////////////////////////////////////////// + void Meooe(const FermionField &in, FermionField &out); + void MeooeDag(const FermionField &in, FermionField &out); + void Mooee(const FermionField &in, FermionField &out); + void MooeeDag(const FermionField &in, FermionField &out); + void MooeeInv(const FermionField &in, FermionField &out); + void MooeeInvDag(const FermionField &in, FermionField &out); + + //////////////////////// + // Derivative interface + //////////////////////// + // Interface calls an internal routine + void DhopDeriv (GaugeField &mat, const FermionField &U, const FermionField &V, int dag); + void DhopDerivOE(GaugeField &mat, const FermionField &U, const FermionField &V, int dag); + void DhopDerivEO(GaugeField &mat, const FermionField &U, const FermionField &V, int dag); + + /////////////////////////////////////////////////////////////// + // non-hermitian hopping term; half cb or both + /////////////////////////////////////////////////////////////// + void Dhop (const FermionField &in, FermionField &out, int dag); + void DhopOE(const FermionField &in, FermionField &out, int dag); + void DhopEO(const FermionField &in, FermionField &out, int dag); + + /////////////////////////////////////////////////////////////// + // Multigrid assistance; force term uses too + /////////////////////////////////////////////////////////////// + void Mdir(const FermionField &in, FermionField &out, int dir, int disp); + void MdirAll(const FermionField &in, std::vector &out); + void DhopDir(const FermionField &in, FermionField &out, int dir, int disp); + + /////////////////////////////////////////////////////////////// + // Extra methods added by derived + /////////////////////////////////////////////////////////////// + void DerivInternal(StencilImpl &st, + DoubledGaugeField &U, + GaugeField &mat, + const FermionField &A, const FermionField &B, int dag); + + void DhopInternal(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, + const FermionField &in, FermionField &out, int dag); + void DhopInternalSerialComms(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, + const FermionField &in, FermionField &out, int dag); + void DhopInternalOverlappedComms(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, + const FermionField &in, FermionField &out, int dag); + + ////////////////////////////////////////////////////////////////////////// + // Grid own interface Constructor + ////////////////////////////////////////////////////////////////////////// + NaiveStaggeredFermion(GaugeField &_U, GridCartesian &Fgrid, + GridRedBlackCartesian &Hgrid, RealD _mass, + RealD _c1, RealD _u0, + const ImplParams &p = ImplParams()); + NaiveStaggeredFermion(GridCartesian &Fgrid, + GridRedBlackCartesian &Hgrid, RealD _mass, + RealD _c1, RealD _u0, + const ImplParams &p = ImplParams()); + + // DoubleStore impl dependent + void ImportGauge (const GaugeField &_U ); + DoubledGaugeField &GetU(void) { return Umu ; } ; + void CopyGaugeCheckerboards(void); + + /////////////////////////////////////////////////////////////// + // Data members require to support the functionality + /////////////////////////////////////////////////////////////// + + // protected: +public: + // any other parameters of action ??? + virtual int isTrivialEE(void) { return 1; }; + virtual RealD Mass(void) { return mass; } + RealD mass; + RealD u0; + RealD c1; + + GridBase *_grid; + GridBase *_cbgrid; + + // Defines the stencils for even and odd + StencilImpl Stencil; + StencilImpl StencilEven; + StencilImpl StencilOdd; + + // Copy of the gauge field , with even and odd subsets + DoubledGaugeField Umu; + DoubledGaugeField UmuEven; + DoubledGaugeField UmuOdd; + + LebesgueOrder Lebesgue; + LebesgueOrder LebesgueEvenOdd; + + /////////////////////////////////////////////////////////////// + // Conserved current utilities + /////////////////////////////////////////////////////////////// + void ContractConservedCurrent(PropagatorField &q_in_1, + PropagatorField &q_in_2, + PropagatorField &q_out, + PropagatorField &src, + Current curr_type, + unsigned int mu); + void SeqConservedCurrent(PropagatorField &q_in, + PropagatorField &q_out, + PropagatorField &srct, + Current curr_type, + unsigned int mu, + unsigned int tmin, + unsigned int tmax, + ComplexField &lattice_cmplx); +}; + +typedef NaiveStaggeredFermion NaiveStaggeredFermionF; +typedef NaiveStaggeredFermion NaiveStaggeredFermionD; + +NAMESPACE_END(Grid); + +#endif diff --git a/Grid/qcd/action/fermion/PartialFractionFermion5D.h b/Grid/qcd/action/fermion/PartialFractionFermion5D.h index 928abd3f..54f8547f 100644 --- a/Grid/qcd/action/fermion/PartialFractionFermion5D.h +++ b/Grid/qcd/action/fermion/PartialFractionFermion5D.h @@ -47,8 +47,8 @@ public: void M_internal(const FermionField &in, FermionField &out,int dag); // override multiply - virtual RealD M (const FermionField &in, FermionField &out); - virtual RealD Mdag (const FermionField &in, FermionField &out); + virtual void M (const FermionField &in, FermionField &out); + virtual void Mdag (const FermionField &in, FermionField &out); // half checkerboard operaions virtual void Meooe (const FermionField &in, FermionField &out); diff --git a/Grid/qcd/action/fermion/StaggeredKernels.h b/Grid/qcd/action/fermion/StaggeredKernels.h index 6ef0ab9d..30deee06 100644 --- a/Grid/qcd/action/fermion/StaggeredKernels.h +++ b/Grid/qcd/action/fermion/StaggeredKernels.h @@ -47,23 +47,34 @@ template class StaggeredKernels : public FermionOperator , pub INHERIT_IMPL_TYPES(Impl); typedef FermionOperator Base; -public: - - void DhopDirKernel(StencilImpl &st, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, SiteSpinor * buf, - int sF, int sU, const FermionFieldView &in, FermionFieldView &out, int dir,int disp); + public: + + void DhopImproved(StencilImpl &st, LebesgueOrder &lo, + DoubledGaugeField &U, DoubledGaugeField &UUU, + const FermionField &in, FermionField &out, int dag, int interior,int exterior); + void DhopNaive(StencilImpl &st, LebesgueOrder &lo, + DoubledGaugeField &U, + const FermionField &in, FermionField &out, int dag, int interior,int exterior); + + void DhopDirKernel(StencilImpl &st, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, SiteSpinor * buf, + int sF, int sU, const FermionFieldView &in, FermionFieldView &out, int dir,int disp); + protected: /////////////////////////////////////////////////////////////////////////////////////// // Generic Nc kernels /////////////////////////////////////////////////////////////////////////////////////// - void DhopSiteGeneric(StencilImpl &st, LebesgueOrder &lo, + template accelerator_inline + void DhopSiteGeneric(StencilView &st, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, SiteSpinor * buf, int LLs, int sU, const FermionFieldView &in, FermionFieldView &out,int dag); - void DhopSiteGenericInt(StencilImpl &st, LebesgueOrder &lo, + template accelerator_inline + void DhopSiteGenericInt(StencilView &st, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, SiteSpinor * buf, int LLs, int sU, const FermionFieldView &in, FermionFieldView &out,int dag); - void DhopSiteGenericExt(StencilImpl &st, LebesgueOrder &lo, + template accelerator_inline + void DhopSiteGenericExt(StencilView &st, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, SiteSpinor * buf, int LLs, int sU, const FermionFieldView &in, FermionFieldView &out,int dag); @@ -71,15 +82,18 @@ public: /////////////////////////////////////////////////////////////////////////////////////// // Nc=3 specific kernels /////////////////////////////////////////////////////////////////////////////////////// - void DhopSiteHand(StencilImpl &st, LebesgueOrder &lo, + template accelerator_inline + void DhopSiteHand(StencilView &st, DoubledGaugeFieldView &U,DoubledGaugeFieldView &UUU, SiteSpinor * buf, int LLs, int sU, const FermionFieldView &in, FermionFieldView &out,int dag); - void DhopSiteHandInt(StencilImpl &st, LebesgueOrder &lo, + template accelerator_inline + void DhopSiteHandInt(StencilView &st, DoubledGaugeFieldView &U,DoubledGaugeFieldView &UUU, SiteSpinor * buf, int LLs, int sU, const FermionFieldView &in, FermionFieldView &out,int dag); - void DhopSiteHandExt(StencilImpl &st, LebesgueOrder &lo, + template accelerator_inline + void DhopSiteHandExt(StencilView &st, DoubledGaugeFieldView &U,DoubledGaugeFieldView &UUU, SiteSpinor * buf, int LLs, int sU, const FermionFieldView &in, FermionFieldView &out,int dag); @@ -87,27 +101,10 @@ public: /////////////////////////////////////////////////////////////////////////////////////// // Asm Nc=3 specific kernels /////////////////////////////////////////////////////////////////////////////////////// - void DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo, + void DhopSiteAsm(StencilView &st, DoubledGaugeFieldView &U,DoubledGaugeFieldView &UUU, SiteSpinor * buf, int LLs, int sU, const FermionFieldView &in, FermionFieldView &out,int dag); - /////////////////////////////////////////////////////////////////////////////////////////////////// - // Generic interface; fan out to right routine - /////////////////////////////////////////////////////////////////////////////////////////////////// - void DhopSite(StencilImpl &st, LebesgueOrder &lo, - DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, - SiteSpinor * buf, int LLs, int sU, - const FermionFieldView &in, FermionFieldView &out, int interior=1,int exterior=1); - - void DhopSiteDag(StencilImpl &st, LebesgueOrder &lo, - DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, - SiteSpinor * buf, int LLs, int sU, - const FermionFieldView &in, FermionFieldView &out, int interior=1,int exterior=1); - - void DhopSite(StencilImpl &st, LebesgueOrder &lo, - DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, - SiteSpinor * buf, int LLs, int sU, - const FermionFieldView &in, FermionFieldView &out, int dag, int interior,int exterior); public: diff --git a/Grid/qcd/action/fermion/StaggeredVec5dImpl.h b/Grid/qcd/action/fermion/StaggeredVec5dImpl.h index 2d4de18e..18fe993c 100644 --- a/Grid/qcd/action/fermion/StaggeredVec5dImpl.h +++ b/Grid/qcd/action/fermion/StaggeredVec5dImpl.h @@ -113,20 +113,7 @@ public: inline void InsertGaugeField(DoubledGaugeField &U_ds,const GaugeLinkField &U,int mu) { - GridBase *GaugeGrid = U_ds.Grid(); - thread_for(lidx, GaugeGrid->lSites(),{ - - SiteScalarGaugeLink ScalarU; - SiteDoubledGaugeField ScalarUds; - - Coordinate lcoor; - GaugeGrid->LocalIndexToLocalCoor(lidx, lcoor); - peekLocalSite(ScalarUds, U_ds, lcoor); - - peekLocalSite(ScalarU, U, lcoor); - ScalarUds(mu) = ScalarU(); - - }); + assert(0); } inline void DoubleStore(GridBase *GaugeGrid, DoubledGaugeField &UUUds, // for Naik term diff --git a/Grid/qcd/action/fermion/WilsonCloverFermion.h b/Grid/qcd/action/fermion/WilsonCloverFermion.h index 3847b0d9..91ad6d6d 100644 --- a/Grid/qcd/action/fermion/WilsonCloverFermion.h +++ b/Grid/qcd/action/fermion/WilsonCloverFermion.h @@ -109,9 +109,8 @@ public: ImportGauge(_Umu); } - virtual RealD M(const FermionField &in, FermionField &out); - virtual RealD Mdag(const FermionField &in, FermionField &out); - + virtual void M(const FermionField &in, FermionField &out); + virtual void Mdag(const FermionField &in, FermionField &out); virtual void Mooee(const FermionField &in, FermionField &out); virtual void MooeeDag(const FermionField &in, FermionField &out); virtual void MooeeInv(const FermionField &in, FermionField &out); @@ -258,15 +257,16 @@ private: CloverFieldType CloverTermDagEven, CloverTermDagOdd; // Clover term Dag EO CloverFieldType CloverTermInvDagEven, CloverTermInvDagOdd; // Clover term Inv Dag EO + public: // eventually these can be compressed into 6x6 blocks instead of the 12x12 // using the DeGrand-Rossi basis for the gamma matrices CloverFieldType fillCloverYZ(const GaugeLinkField &F) { CloverFieldType T(F.Grid()); T = Zero(); - auto T_v = T.View(); - auto F_v = F.View(); - thread_for(i, CloverTerm.Grid()->oSites(), + autoView(T_v,T,AcceleratorWrite); + autoView(F_v,F,AcceleratorRead); + accelerator_for(i, CloverTerm.Grid()->oSites(),1, { T_v[i]()(0, 1) = timesMinusI(F_v[i]()()); T_v[i]()(1, 0) = timesMinusI(F_v[i]()()); @@ -282,9 +282,9 @@ private: CloverFieldType T(F.Grid()); T = Zero(); - auto T_v = T.View(); - auto F_v = F.View(); - thread_for(i, CloverTerm.Grid()->oSites(), + autoView(T_v, T,AcceleratorWrite); + autoView(F_v, F,AcceleratorRead); + accelerator_for(i, CloverTerm.Grid()->oSites(),1, { T_v[i]()(0, 1) = -F_v[i]()(); T_v[i]()(1, 0) = F_v[i]()(); @@ -300,9 +300,9 @@ private: CloverFieldType T(F.Grid()); T = Zero(); - auto T_v = T.View(); - auto F_v = F.View(); - thread_for(i, CloverTerm.Grid()->oSites(), + autoView(T_v,T,AcceleratorWrite); + autoView(F_v,F,AcceleratorRead); + accelerator_for(i, CloverTerm.Grid()->oSites(),1, { T_v[i]()(0, 0) = timesMinusI(F_v[i]()()); T_v[i]()(1, 1) = timesI(F_v[i]()()); @@ -318,9 +318,9 @@ private: CloverFieldType T(F.Grid()); T = Zero(); - auto T_v = T.View(); - auto F_v = F.View(); - thread_for(i, CloverTerm.Grid()->oSites(), + autoView( T_v , T, AcceleratorWrite); + autoView( F_v , F, AcceleratorRead); + accelerator_for(i, CloverTerm.Grid()->oSites(),1, { T_v[i]()(0, 1) = timesI(F_v[i]()()); T_v[i]()(1, 0) = timesI(F_v[i]()()); @@ -336,9 +336,9 @@ private: CloverFieldType T(F.Grid()); T = Zero(); - auto T_v = T.View(); - auto F_v = F.View(); - thread_for(i, CloverTerm.Grid()->oSites(), + autoView( T_v ,T,AcceleratorWrite); + autoView( F_v ,F,AcceleratorRead); + accelerator_for(i, CloverTerm.Grid()->oSites(),1, { T_v[i]()(0, 1) = -(F_v[i]()()); T_v[i]()(1, 0) = (F_v[i]()()); @@ -355,9 +355,9 @@ private: T = Zero(); - auto T_v = T.View(); - auto F_v = F.View(); - thread_for(i, CloverTerm.Grid()->oSites(), + autoView( T_v , T,AcceleratorWrite); + autoView( F_v , F,AcceleratorRead); + accelerator_for(i, CloverTerm.Grid()->oSites(),1, { T_v[i]()(0, 0) = timesI(F_v[i]()()); T_v[i]()(1, 1) = timesMinusI(F_v[i]()()); diff --git a/Grid/qcd/action/fermion/WilsonFermion.h b/Grid/qcd/action/fermion/WilsonFermion.h index eb9efa41..bf8926d0 100644 --- a/Grid/qcd/action/fermion/WilsonFermion.h +++ b/Grid/qcd/action/fermion/WilsonFermion.h @@ -92,8 +92,8 @@ public: // override multiply; cut number routines if pass dagger argument // and also make interface more uniformly consistent ////////////////////////////////////////////////////////////////// - virtual RealD M(const FermionField &in, FermionField &out); - virtual RealD Mdag(const FermionField &in, FermionField &out); + virtual void M(const FermionField &in, FermionField &out); + virtual void Mdag(const FermionField &in, FermionField &out); ///////////////////////////////////////////////////////// // half checkerboard operations @@ -193,15 +193,17 @@ public: void ContractConservedCurrent(PropagatorField &q_in_1, PropagatorField &q_in_2, PropagatorField &q_out, + PropagatorField &phys_src, Current curr_type, unsigned int mu); void SeqConservedCurrent(PropagatorField &q_in, PropagatorField &q_out, + PropagatorField &phys_src, Current curr_type, unsigned int mu, unsigned int tmin, - unsigned int tmax, - ComplexField &lattice_cmplx); + unsigned int tmax, + ComplexField &lattice_cmplx); }; typedef WilsonFermion WilsonFermionF; diff --git a/Grid/qcd/action/fermion/WilsonFermion5D.h b/Grid/qcd/action/fermion/WilsonFermion5D.h index 58b54421..804b1d10 100644 --- a/Grid/qcd/action/fermion/WilsonFermion5D.h +++ b/Grid/qcd/action/fermion/WilsonFermion5D.h @@ -1,4 +1,3 @@ - /************************************************************************************* Grid physics library, www.github.com/paboyle/Grid @@ -99,8 +98,8 @@ public: GridBase *FermionRedBlackGrid(void) { return _FiveDimRedBlackGrid;} // full checkerboard operations; leave unimplemented as abstract for now - virtual RealD M (const FermionField &in, FermionField &out){assert(0); return 0.0;}; - virtual RealD Mdag (const FermionField &in, FermionField &out){assert(0); return 0.0;}; + virtual void M (const FermionField &in, FermionField &out){assert(0);}; + virtual void Mdag (const FermionField &in, FermionField &out){assert(0);}; // half checkerboard operations; leave unimplemented as abstract for now virtual void Meooe (const FermionField &in, FermionField &out){assert(0);}; @@ -217,25 +216,7 @@ public: // Comms buffer std::vector > comm_buf; - - /////////////////////////////////////////////////////////////// - // Conserved current utilities - /////////////////////////////////////////////////////////////// - void ContractConservedCurrent(PropagatorField &q_in_1, - PropagatorField &q_in_2, - PropagatorField &q_out, - Current curr_type, - unsigned int mu); - void SeqConservedCurrent(PropagatorField &q_in, - PropagatorField &q_out, - Current curr_type, - unsigned int mu, - unsigned int tmin, - unsigned int tmax, - ComplexField &lattice_cmplx); - void ContractJ5q(PropagatorField &q_in,ComplexField &J5q); - void ContractJ5q(FermionField &q_in,ComplexField &J5q); }; diff --git a/Grid/qcd/action/fermion/WilsonImpl.h b/Grid/qcd/action/fermion/WilsonImpl.h index 47160730..52e1ee00 100644 --- a/Grid/qcd/action/fermion/WilsonImpl.h +++ b/Grid/qcd/action/fermion/WilsonImpl.h @@ -41,6 +41,7 @@ public: static const int Dimension = Representation::Dimension; static const bool isFundamental = Representation::isFundamental; static const bool LsVectorised=false; + static const bool isGparity=false; static const int Nhcs = Options::Nhcs; typedef PeriodicGaugeImpl > Gimpl; @@ -98,8 +99,21 @@ public: { multLink(phi,U,chi,mu); } - - + + template + inline void multLinkField(_SpinorField & out, + const DoubledGaugeField &Umu, + const _SpinorField & phi, + int mu) + { + autoView( out_v, out, AcceleratorWrite); + autoView( phi_v, phi, AcceleratorRead); + autoView( Umu_v, Umu, AcceleratorRead); + accelerator_for(sss,out.Grid()->oSites(),1,{ + multLink(out_v[sss],Umu_v[sss],phi_v[sss],mu); + }); + } + template static accelerator_inline void loadLinkElement(Simd ®, ref &memory) { @@ -177,18 +191,19 @@ public: int Ls=Btilde.Grid()->_fdimensions[0]; GaugeLinkField tmp(mat.Grid()); tmp = Zero(); - auto tmp_v = tmp.View(); - auto Btilde_v = Btilde.View(); - auto Atilde_v = Atilde.View(); - thread_for(sss,tmp.Grid()->oSites(),{ - int sU=sss; - for(int s=0;s(outerProduct(Btilde_v[sF],Atilde_v[sF])); // ordering here - } - }); + { + autoView( tmp_v , tmp, AcceleratorWrite); + autoView( Btilde_v , Btilde, AcceleratorRead); + autoView( Atilde_v , Atilde, AcceleratorRead); + accelerator_for(sss,tmp.Grid()->oSites(),1,{ + int sU=sss; + for(int s=0;s(outerProduct(Btilde_v[sF],Atilde_v[sF])); // ordering here + } + }); + } PokeIndex(mat,tmp,mu); - } }; diff --git a/Grid/qcd/action/fermion/WilsonKernels.h b/Grid/qcd/action/fermion/WilsonKernels.h index 7348a463..1bac9211 100644 --- a/Grid/qcd/action/fermion/WilsonKernels.h +++ b/Grid/qcd/action/fermion/WilsonKernels.h @@ -66,41 +66,6 @@ public: static void DhopDirKernel(StencilImpl &st, DoubledGaugeField &U,SiteHalfSpinor * buf, int Ls, int Nsite, const FermionField &in, FermionField &out, int dirdisp, int gamma); - ////////////////////////////////////////////////////////////////////////////// - // Utilities for inserting Wilson conserved current. - ////////////////////////////////////////////////////////////////////////////// - static void ContractConservedCurrentSiteFwd(const SitePropagator &q_in_1, - const SitePropagator &q_in_2, - SitePropagator &q_out, - DoubledGaugeFieldView &U, - unsigned int sU, - unsigned int mu, - bool switch_sign = false); - - static void ContractConservedCurrentSiteBwd(const SitePropagator &q_in_1, - const SitePropagator &q_in_2, - SitePropagator &q_out, - DoubledGaugeFieldView &U, - unsigned int sU, - unsigned int mu, - bool switch_sign = false); - - static void SeqConservedCurrentSiteFwd(const SitePropagator &q_in, - SitePropagator &q_out, - DoubledGaugeFieldView &U, - unsigned int sU, - unsigned int mu, - vPredicate t_mask, - bool switch_sign = false); - - static void SeqConservedCurrentSiteBwd(const SitePropagator &q_in, - SitePropagator &q_out, - DoubledGaugeFieldView &U, - unsigned int sU, - unsigned int mu, - vPredicate t_mask, - bool switch_sign = false); - private: static accelerator_inline void DhopDirK(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor * buf, diff --git a/Grid/qcd/action/fermion/WilsonTMFermion5D.h b/Grid/qcd/action/fermion/WilsonTMFermion5D.h index 71acf763..982e722a 100644 --- a/Grid/qcd/action/fermion/WilsonTMFermion5D.h +++ b/Grid/qcd/action/fermion/WilsonTMFermion5D.h @@ -120,7 +120,8 @@ class WilsonTMFermion5D : public WilsonFermion5D } } - virtual RealD M(const FermionField &in, FermionField &out) { + virtual void M(const FermionField &in, FermionField &out) + { out.Checkerboard() = in.Checkerboard(); this->Dhop(in, out, DaggerNo); FermionField tmp(out.Grid()); @@ -129,11 +130,12 @@ class WilsonTMFermion5D : public WilsonFermion5D ComplexD b(0.0,this->mu[s]); axpbg5y_ssp(tmp,a,in,b,in,s,s); } - return axpy_norm(out, 1.0, tmp, out); + axpy(out, 1.0, tmp, out); } // needed for fast PV - void update(const std::vector& _mass, const std::vector& _mu) { + void update(const std::vector& _mass, const std::vector& _mu) + { assert(_mass.size() == _mu.size()); assert(_mass.size() == this->FermionGrid()->_fdimensions[0]); this->mass = _mass; diff --git a/Grid/qcd/action/fermion/implementation/CayleyFermion5DImplementation.h b/Grid/qcd/action/fermion/implementation/CayleyFermion5DImplementation.h index c3c14ae9..e79b64dc 100644 --- a/Grid/qcd/action/fermion/implementation/CayleyFermion5DImplementation.h +++ b/Grid/qcd/action/fermion/implementation/CayleyFermion5DImplementation.h @@ -180,7 +180,7 @@ template void CayleyFermion5D::CayleyReport(void) std::cout << GridLogMessage << "#### MooeeInv calls report " << std::endl; std::cout << GridLogMessage << "CayleyFermion5D Number of MooeeInv Calls : " << MooeeInvCalls << std::endl; std::cout << GridLogMessage << "CayleyFermion5D ComputeTime/Calls : " << MooeeInvTime / MooeeInvCalls << " us" << std::endl; -#ifdef GRID_NVCC +#ifdef GRID_CUDA RealD mflops = ( -16.*Nc*Ns+this->Ls*(1.+18.*Nc*Ns) )*volume*MooeeInvCalls/MooeeInvTime/2; // 2 for red black counting std::cout << GridLogMessage << "Average mflops/s per call : " << mflops << std::endl; std::cout << GridLogMessage << "Average mflops/s per call per rank : " << mflops/NP << std::endl; @@ -323,7 +323,7 @@ void CayleyFermion5D::MeooeDag5D (const FermionField &psi, FermionField } template -RealD CayleyFermion5D::M (const FermionField &psi, FermionField &chi) +void CayleyFermion5D::M (const FermionField &psi, FermionField &chi) { FermionField Din(psi.Grid()); @@ -335,11 +335,10 @@ RealD CayleyFermion5D::M (const FermionField &psi, FermionField &chi) axpby(chi,1.0,1.0,chi,psi); M5D(psi,chi); - return(norm2(chi)); } template -RealD CayleyFermion5D::Mdag (const FermionField &psi, FermionField &chi) +void CayleyFermion5D::Mdag (const FermionField &psi, FermionField &chi) { // Under adjoint //D1+ D1- P- -> D1+^dag P+ D2-^dag @@ -354,7 +353,6 @@ RealD CayleyFermion5D::Mdag (const FermionField &psi, FermionField &chi) M5Ddag(psi,chi); // ((b D_W + D_w hop terms +1) on s-diag axpby (chi,1.0,1.0,chi,psi); - return norm2(chi); } // half checkerboard operations @@ -588,6 +586,356 @@ void CayleyFermion5D::SetCoefficientsInternal(RealD zolo_hi,VectorMooeeInternalCompute(1,inv,MatpInvDag,MatmInvDag); } + +template +void CayleyFermion5D::ContractJ5q(FermionField &q_in,ComplexField &J5q) +{ + conformable(this->GaugeGrid(), J5q.Grid()); + conformable(q_in.Grid(), this->FermionGrid()); + Gamma G5(Gamma::Algebra::Gamma5); + // 4d field + int Ls = this->Ls; + FermionField psi(this->GaugeGrid()); + FermionField p_plus (this->GaugeGrid()); + FermionField p_minus(this->GaugeGrid()); + FermionField p(this->GaugeGrid()); + + ExtractSlice(p_plus , q_in, Ls/2-1 , 0); + ExtractSlice(p_minus, q_in, Ls/2 , 0); + p_plus = p_plus + G5*p_plus; + p_minus= p_minus - G5*p_minus; + p=0.5*(p_plus+p_minus); + J5q = localInnerProduct(p,p); +} + +template +void CayleyFermion5D::ContractJ5q(PropagatorField &q_in,ComplexField &J5q) +{ + conformable(this->GaugeGrid(), J5q.Grid()); + conformable(q_in.Grid(), this->FermionGrid()); + Gamma G5(Gamma::Algebra::Gamma5); + // 4d field + int Ls = this->Ls; + PropagatorField psi(this->GaugeGrid()); + PropagatorField p_plus (this->GaugeGrid()); + PropagatorField p_minus(this->GaugeGrid()); + PropagatorField p(this->GaugeGrid()); + + ExtractSlice(p_plus , q_in, Ls/2-1 , 0); + ExtractSlice(p_minus, q_in, Ls/2 , 0); + p_plus = p_plus + G5*p_plus; + p_minus= p_minus - G5*p_minus; + p=0.5*(p_plus+p_minus); + J5q = localInnerProduct(p,p); +} + +#define Pp(Q) (0.5*(Q+g5*Q)) +#define Pm(Q) (0.5*(Q-g5*Q)) +#define Q_4d(Q) (Pm((Q)[0]) + Pp((Q)[Ls-1])) +#define TopRowWithSource(Q) (phys_src + (1.0-mass)*Q_4d(Q)) + +template +void CayleyFermion5D::ContractConservedCurrent( PropagatorField &q_in_1, + PropagatorField &q_in_2, + PropagatorField &q_out, + PropagatorField &phys_src, + Current curr_type, + unsigned int mu) +{ +#if (!defined(GRID_CUDA)) && (!defined(GRID_HIP)) + Gamma::Algebra Gmu [] = { + Gamma::Algebra::GammaX, + Gamma::Algebra::GammaY, + Gamma::Algebra::GammaZ, + Gamma::Algebra::GammaT, + Gamma::Algebra::Gamma5 + }; + + auto UGrid= this->GaugeGrid(); + auto FGrid= this->FermionGrid(); + RealD sgn=1.0; + if ( curr_type == Current::Axial ) sgn = -1.0; + + int Ls = this->Ls; + + std::vector L_Q(Ls,UGrid); + std::vector R_Q(Ls,UGrid); + for(int s=0;s R_TmLsGq(Ls,UGrid); + std::vector L_TmLsGq(Ls,UGrid); + for(int s=0;sbs[s]; + auto c=this->cs[s]; + auto bpc = 1.0/(b+c); // -0.5 factor in gauge links + if (s == 0) { + p5d =(b*Pm(L_TmLsGq[Ls-1])+ c*Pp(L_TmLsGq[Ls-1]) + b*Pp(L_TmLsTmp) + c*Pm(L_TmLsTmp )); + tmp =(b*Pm(R_TmLsGq0) + c*Pp(R_TmLsGq0 ) + b*Pp(R_TmLsGq[1]) + c*Pm(R_TmLsGq[1])); + } else if (s == Ls-1) { + p5d =(b*Pm(L_TmLsGq0) + c*Pp(L_TmLsGq0 ) + b*Pp(L_TmLsGq[1]) + c*Pm(L_TmLsGq[1])); + tmp =(b*Pm(R_TmLsGq[Ls-1])+ c*Pp(R_TmLsGq[Ls-1]) + b*Pp(R_TmLsTmp) + c*Pm(R_TmLsTmp )); + } else { + p5d =(b*Pm(L_TmLsGq[sr]) + c*Pp(L_TmLsGq[sr])+ b*Pp(L_TmLsGq[srp])+ c*Pm(L_TmLsGq[srp])); + tmp =(b*Pm(R_TmLsGq[s]) + c*Pp(R_TmLsGq[s]) + b*Pp(R_TmLsGq[sp ])+ c*Pm(R_TmLsGq[sp])); + } + tmp = Cshift(tmp,mu,1); + Impl::multLinkField(us_p5d,this->Umu,tmp,mu); + + gp5d=g5*p5d*g5; + gus_p5d=gmu*us_p5d; + + C = bpc*(adj(gp5d)*us_p5d); + C-= bpc*(adj(gp5d)*gus_p5d); + + if (s == 0) { + p5d =(b*Pm(R_TmLsGq0) + c*Pp(R_TmLsGq0 ) + b*Pp(R_TmLsGq[1]) + c*Pm(R_TmLsGq[1])); + tmp =(b*Pm(L_TmLsGq[Ls-1])+ c*Pp(L_TmLsGq[Ls-1]) + b*Pp(L_TmLsTmp) + c*Pm(L_TmLsTmp )); + } else if (s == Ls-1) { + p5d =(b*Pm(R_TmLsGq[Ls-1])+ c*Pp(R_TmLsGq[Ls-1]) + b*Pp(R_TmLsTmp) + c*Pm(R_TmLsTmp )); + tmp =(b*Pm(L_TmLsGq0) + c*Pp(L_TmLsGq0 ) + b*Pp(L_TmLsGq[1]) + c*Pm(L_TmLsGq[1])); + } else { + p5d =(b*Pm(R_TmLsGq[s]) + c*Pp(R_TmLsGq[s]) + b*Pp(R_TmLsGq[sp ])+ c*Pm(R_TmLsGq[sp])); + tmp =(b*Pm(L_TmLsGq[sr]) + c*Pp(L_TmLsGq[sr]) + b*Pp(L_TmLsGq[srp])+ c*Pm(L_TmLsGq[srp])); + } + tmp = Cshift(tmp,mu,1); + Impl::multLinkField(us_p5d,this->Umu,tmp,mu); + + gp5d=gmu*p5d; + gus_p5d=g5*us_p5d*g5; + + C-= bpc*(adj(gus_p5d)*gp5d); + C-= bpc*(adj(gus_p5d)*p5d); + + if (s < Ls/2) q_out += sgn*C; + else q_out += C; + + } +#endif +} + +template +void CayleyFermion5D::SeqConservedCurrent(PropagatorField &q_in, + PropagatorField &q_out, + PropagatorField &phys_src, + Current curr_type, + unsigned int mu, + unsigned int tmin, + unsigned int tmax, + ComplexField &ph)// Complex phase factor +{ + assert(mu>=0); + assert(muLs; + auto UGrid= this->GaugeGrid(); + auto FGrid= this->FermionGrid(); + Gamma::Algebra Gmu [] = { + Gamma::Algebra::GammaX, + Gamma::Algebra::GammaY, + Gamma::Algebra::GammaZ, + Gamma::Algebra::GammaT + }; + Gamma gmu=Gamma(Gmu[mu]); + + PropagatorField L_Q(UGrid); + PropagatorField R_Q(UGrid); + + PropagatorField tmp(UGrid); + PropagatorField Utmp(UGrid); + LatticeInteger zz (UGrid); zz=0.0; + LatticeInteger lcoor(UGrid); LatticeCoordinate(lcoor,Nd-1); + for (int s=0;sUmu,tmp,mu); + tmp = G_s*( Utmp*ph - gmu*Utmp*ph ); // Forward hop + tmp = where((lcoor>=tmin),tmp,zz); // Mask the time + tmp = where((lcoor<=tmax),tmp,zz); + L_Q = tmp; + + tmp = R_Q*ph; + tmp = Cshift(tmp,mu,-1); + Impl::multLinkField(Utmp,this->Umu,tmp,mu+Nd);// Adjoint link + tmp = -G_s*( Utmp + gmu*Utmp ); + tmp = where((lcoor>=tmin+tshift),tmp,zz); // Mask the time + tmp = where((lcoor<=tmax+tshift),tmp,zz); // Position of current complicated + L_Q= L_Q+tmp; + + InsertSlice(L_Q, q_out, s , 0); + } +#endif + +#if (!defined(GRID_CUDA)) && (!defined(GRID_HIP)) + int tshift = (mu == Nd-1) ? 1 : 0; + //////////////////////////////////////////////// + // GENERAL CAYLEY CASE + //////////////////////////////////////////////// + Gamma::Algebra Gmu [] = { + Gamma::Algebra::GammaX, + Gamma::Algebra::GammaY, + Gamma::Algebra::GammaZ, + Gamma::Algebra::GammaT, + Gamma::Algebra::Gamma5 + }; + Gamma gmu=Gamma(Gmu[mu]); + Gamma g5(Gamma::Algebra::Gamma5); + + int Ls = this->Ls; + auto UGrid= this->GaugeGrid(); + auto FGrid= this->FermionGrid(); + + std::vector R_Q(Ls,UGrid); + PropagatorField L_Q(UGrid); + PropagatorField tmp(UGrid); + PropagatorField Utmp(UGrid); + + LatticeInteger zz (UGrid); zz=0.0; + LatticeInteger lcoor(UGrid); LatticeCoordinate(lcoor,Nd-1); + + for(int s=0;s R_TmLsGq(Ls,UGrid); + for(int s=0;s G_s(Ls,1.0); + if ( curr_type == Current::Axial ) { + for(int s=0;sbs[s]; + auto c=this->cs[s]; + // auto bpc = G_s[s]*1.0/(b+c); // -0.5 factor in gauge links + + if (s == 0) { + tmp =(b*Pm(R_TmLsGq0) + c*Pp(R_TmLsGq0 ) + b*Pp(R_TmLsGq[1]) + c*Pm(R_TmLsGq[1])); + } else if (s == Ls-1) { + tmp =(b*Pm(R_TmLsGq[Ls-1])+ c*Pp(R_TmLsGq[Ls-1]) + b*Pp(R_TmLsTmp) + c*Pm(R_TmLsTmp )); + } else { + tmp =(b*Pm(R_TmLsGq[s]) + c*Pp(R_TmLsGq[s]) + b*Pp(R_TmLsGq[sp ])+ c*Pm(R_TmLsGq[sp])); + } + + tmp = Cshift(tmp,mu,1); + Impl::multLinkField(Utmp,this->Umu,tmp,mu); + tmp = G_s[s]*( Utmp*ph - gmu*Utmp*ph ); // Forward hop + tmp = where((lcoor>=tmin),tmp,zz); // Mask the time + L_Q = where((lcoor<=tmax),tmp,zz); // Position of current complicated + + if (s == 0) { + tmp =(b*Pm(R_TmLsGq0) + c*Pp(R_TmLsGq0 ) + b*Pp(R_TmLsGq[1]) + c*Pm(R_TmLsGq[1])); + } else if (s == Ls-1) { + tmp =(b*Pm(R_TmLsGq[Ls-1])+ c*Pp(R_TmLsGq[Ls-1]) + b*Pp(R_TmLsTmp) + c*Pm(R_TmLsTmp )); + } else { + tmp =(b*Pm(R_TmLsGq[s]) + c*Pp(R_TmLsGq[s]) + b*Pp(R_TmLsGq[sp])+ c*Pm(R_TmLsGq[sp])); + } + tmp = tmp *ph; + tmp = Cshift(tmp,mu,-1); + Impl::multLinkField(Utmp,this->Umu,tmp,mu+Nd); // Adjoint link + tmp = -G_s[s]*( Utmp + gmu*Utmp ); + tmp = where((lcoor>=tmin+tshift),tmp,zz); // Mask the time + L_Q += where((lcoor<=tmax+tshift),tmp,zz); // Position of current complicated + + InsertSlice(L_Q, q_out, s , 0); + } +#endif +} +#undef Pp +#undef Pm +#undef Q_4d +#undef TopRowWithSource + + + #if 0 template void CayleyFermion5D::MooeeInternalCompute(int dag, int inv, diff --git a/Grid/qcd/action/fermion/implementation/CayleyFermion5Dcache.h b/Grid/qcd/action/fermion/implementation/CayleyFermion5Dcache.h index dbdf134b..d2537ccf 100644 --- a/Grid/qcd/action/fermion/implementation/CayleyFermion5Dcache.h +++ b/Grid/qcd/action/fermion/implementation/CayleyFermion5Dcache.h @@ -50,9 +50,9 @@ CayleyFermion5D::M5D(const FermionField &psi_i, chi_i.Checkerboard()=psi_i.Checkerboard(); GridBase *grid=psi_i.Grid(); - auto psi = psi_i.View(); - auto phi = phi_i.View(); - auto chi = chi_i.View(); + autoView(psi , psi_i,AcceleratorRead); + autoView(phi , phi_i,AcceleratorRead); + autoView(chi , chi_i,AcceleratorWrite); assert(phi.Checkerboard() == psi.Checkerboard()); auto pdiag = &diag[0]; @@ -93,9 +93,9 @@ CayleyFermion5D::M5Ddag(const FermionField &psi_i, { chi_i.Checkerboard()=psi_i.Checkerboard(); GridBase *grid=psi_i.Grid(); - auto psi = psi_i.View(); - auto phi = phi_i.View(); - auto chi = chi_i.View(); + autoView(psi , psi_i,AcceleratorRead); + autoView(phi , phi_i,AcceleratorRead); + autoView(chi , chi_i,AcceleratorWrite); assert(phi.Checkerboard() == psi.Checkerboard()); auto pdiag = &diag[0]; @@ -131,8 +131,8 @@ CayleyFermion5D::MooeeInv (const FermionField &psi_i, FermionField &chi chi_i.Checkerboard()=psi_i.Checkerboard(); GridBase *grid=psi_i.Grid(); - auto psi = psi_i.View(); - auto chi = chi_i.View(); + autoView(psi , psi_i,AcceleratorRead); + autoView(chi , chi_i,AcceleratorWrite); int Ls=this->Ls; @@ -193,8 +193,8 @@ CayleyFermion5D::MooeeInvDag (const FermionField &psi_i, FermionField &chi GridBase *grid=psi_i.Grid(); int Ls=this->Ls; - auto psi = psi_i.View(); - auto chi = chi_i.View(); + autoView(psi , psi_i,AcceleratorRead); + autoView(chi , chi_i,AcceleratorWrite); auto plee = & lee [0]; auto pdee = & dee [0]; diff --git a/Grid/qcd/action/fermion/implementation/CayleyFermion5Dvec.h b/Grid/qcd/action/fermion/implementation/CayleyFermion5Dvec.h index 034ce642..b54f63ad 100644 --- a/Grid/qcd/action/fermion/implementation/CayleyFermion5Dvec.h +++ b/Grid/qcd/action/fermion/implementation/CayleyFermion5Dvec.h @@ -65,9 +65,9 @@ CayleyFermion5D::M5D(const FermionField &psi_i, EnableIf sfinae=0; chi_i.Checkerboard()=psi_i.Checkerboard(); GridBase *grid=psi_i.Grid(); - auto psi = psi_i.View(); - auto phi = phi_i.View(); - auto chi = chi_i.View(); + autoView(psi, psi_i,CpuRead); + autoView(phi, phi_i,CpuRead); + autoView(chi, chi_i,CpuWrite); int Ls = this->Ls; int LLs = grid->_rdimensions[0]; const int nsimd= Simd::Nsimd(); @@ -213,9 +213,9 @@ CayleyFermion5D::M5Ddag(const FermionField &psi_i, EnableIf sfinae=0; chi_i.Checkerboard()=psi_i.Checkerboard(); GridBase *grid=psi_i.Grid(); - auto psi=psi_i.View(); - auto phi=phi_i.View(); - auto chi=chi_i.View(); + autoView(psi,psi_i,CpuRead); + autoView(phi,phi_i,CpuRead); + autoView(chi,chi_i,CpuWrite); int Ls = this->Ls; int LLs = grid->_rdimensions[0]; int nsimd= Simd::Nsimd(); @@ -357,8 +357,8 @@ CayleyFermion5D::MooeeInternalAsm(const FermionField &psi_i, FermionField Vector > &Matm) { EnableIf sfinae=0; - auto psi = psi_i.View(); - auto chi = chi_i.View(); + autoView(psi , psi_i,CpuRead); + autoView(chi , chi_i,CpuWrite); #ifndef AVX512 { SiteHalfSpinor BcastP; @@ -535,8 +535,8 @@ CayleyFermion5D::MooeeInternalZAsm(const FermionField &psi_i, FermionField EnableIf sfinae=0; #ifndef AVX512 { - auto psi = psi_i.View(); - auto chi = chi_i.View(); + autoView(psi , psi_i,CpuRead); + autoView(chi , chi_i,CpuWrite); SiteHalfSpinor BcastP; SiteHalfSpinor BcastM; @@ -586,8 +586,8 @@ CayleyFermion5D::MooeeInternalZAsm(const FermionField &psi_i, FermionField } #else { - auto psi = psi_i.View(); - auto chi = chi_i.View(); + autoView(psi , psi_i,CpuRead); + autoView(chi , chi_i,CpuWrite); // pointers // MASK_REGS; #define Chi_00 %zmm0 diff --git a/Grid/qcd/action/fermion/implementation/ContinuedFractionFermion5DImplementation.h b/Grid/qcd/action/fermion/implementation/ContinuedFractionFermion5DImplementation.h index beeb3e00..6687800e 100644 --- a/Grid/qcd/action/fermion/implementation/ContinuedFractionFermion5DImplementation.h +++ b/Grid/qcd/action/fermion/implementation/ContinuedFractionFermion5DImplementation.h @@ -94,7 +94,7 @@ void ContinuedFractionFermion5D::SetCoefficientsZolotarev(RealD zolo_hi,Ap template -RealD ContinuedFractionFermion5D::M (const FermionField &psi, FermionField &chi) +void ContinuedFractionFermion5D::M (const FermionField &psi, FermionField &chi) { int Ls = this->Ls; @@ -116,15 +116,14 @@ RealD ContinuedFractionFermion5D::M (const FermionField &psi, F } sign=-sign; } - return norm2(chi); } template -RealD ContinuedFractionFermion5D::Mdag (const FermionField &psi, FermionField &chi) +void ContinuedFractionFermion5D::Mdag (const FermionField &psi, FermionField &chi) { // This matrix is already hermitian. (g5 Dw) = Dw dag g5 = (g5 Dw)dag // The rest of matrix is symmetric. // Can ignore "dag" - return M(psi,chi); + M(psi,chi); } template void ContinuedFractionFermion5D::Mdir (const FermionField &psi, FermionField &chi,int dir,int disp){ diff --git a/Grid/qcd/action/fermion/implementation/DomainWallEOFAFermionCache.h b/Grid/qcd/action/fermion/implementation/DomainWallEOFAFermionCache.h index 46d3fa1f..9a8454ef 100644 --- a/Grid/qcd/action/fermion/implementation/DomainWallEOFAFermionCache.h +++ b/Grid/qcd/action/fermion/implementation/DomainWallEOFAFermionCache.h @@ -46,9 +46,9 @@ void DomainWallEOFAFermion::M5D(const FermionField& psi_i, const FermionFi chi_i.Checkerboard() = psi_i.Checkerboard(); int Ls = this->Ls; GridBase* grid = psi_i.Grid(); - auto phi = phi_i.View(); - auto psi = psi_i.View(); - auto chi = chi_i.View(); + autoView( phi , phi_i, AcceleratorRead); + autoView( psi , psi_i, AcceleratorRead); + autoView( chi , chi_i, AcceleratorWrite); assert(phi.Checkerboard() == psi.Checkerboard()); auto pdiag = &diag[0]; auto pupper = &upper[0]; @@ -82,9 +82,9 @@ void DomainWallEOFAFermion::M5Ddag(const FermionField& psi_i, const Fermio GridBase* grid = psi_i.Grid(); int Ls = this->Ls; - auto psi = psi_i.View(); - auto phi = phi_i.View(); - auto chi = chi_i.View(); + autoView( psi , psi_i, AcceleratorRead); + autoView( phi , phi_i, AcceleratorRead); + autoView( chi , chi_i, AcceleratorWrite); assert(phi.Checkerboard() == psi.Checkerboard()); auto pdiag = &diag[0]; auto pupper = &upper[0]; @@ -116,8 +116,8 @@ void DomainWallEOFAFermion::MooeeInv(const FermionField& psi_i, FermionFie { chi_i.Checkerboard() = psi_i.Checkerboard(); GridBase* grid = psi_i.Grid(); - auto psi=psi_i.View(); - auto chi=chi_i.View(); + autoView( psi, psi_i, AcceleratorRead); + autoView( chi, chi_i, AcceleratorWrite); int Ls = this->Ls; auto plee = & this->lee[0]; @@ -172,8 +172,8 @@ void DomainWallEOFAFermion::MooeeInvDag(const FermionField& psi_i, Fermion { chi_i.Checkerboard() = psi_i.Checkerboard(); GridBase* grid = psi_i.Grid(); - auto psi = psi_i.View(); - auto chi = chi_i.View(); + autoView( psi, psi_i, AcceleratorRead); + autoView( chi, chi_i, AcceleratorWrite); int Ls = this->Ls; auto plee = & this->lee[0]; diff --git a/Grid/qcd/action/fermion/implementation/DomainWallEOFAFermionImplementation.h b/Grid/qcd/action/fermion/implementation/DomainWallEOFAFermionImplementation.h index 3684fd6c..64ee4033 100644 --- a/Grid/qcd/action/fermion/implementation/DomainWallEOFAFermionImplementation.h +++ b/Grid/qcd/action/fermion/implementation/DomainWallEOFAFermionImplementation.h @@ -89,7 +89,7 @@ void DomainWallEOFAFermion::DtildeInv(const FermionField& psi, FermionFiel /*****************************************************************************************************/ template -RealD DomainWallEOFAFermion::M(const FermionField& psi, FermionField& chi) +void DomainWallEOFAFermion::M(const FermionField& psi, FermionField& chi) { FermionField Din(psi.Grid()); @@ -97,11 +97,10 @@ RealD DomainWallEOFAFermion::M(const FermionField& psi, FermionField& chi) this->DW(Din, chi, DaggerNo); axpby(chi, 1.0, 1.0, chi, psi); this->M5D(psi, chi); - return(norm2(chi)); } template -RealD DomainWallEOFAFermion::Mdag(const FermionField& psi, FermionField& chi) +void DomainWallEOFAFermion::Mdag(const FermionField& psi, FermionField& chi) { FermionField Din(psi.Grid()); @@ -109,7 +108,6 @@ RealD DomainWallEOFAFermion::Mdag(const FermionField& psi, FermionField& c this->MeooeDag5D(Din, chi); this->M5Ddag(psi, chi); axpby(chi, 1.0, 1.0, chi, psi); - return(norm2(chi)); } /******************************************************************** diff --git a/Grid/qcd/action/fermion/implementation/ImprovedStaggeredFermion5DImplementation.h b/Grid/qcd/action/fermion/implementation/ImprovedStaggeredFermion5DImplementation.h index fdaa2f71..888691c4 100644 --- a/Grid/qcd/action/fermion/implementation/ImprovedStaggeredFermion5DImplementation.h +++ b/Grid/qcd/action/fermion/implementation/ImprovedStaggeredFermion5DImplementation.h @@ -221,10 +221,10 @@ void ImprovedStaggeredFermion5D::DhopDir(const FermionField &in, FermionFi Compressor compressor; Stencil.HaloExchange(in,compressor); - auto Umu_v = Umu.View(); - auto UUUmu_v = UUUmu.View(); - auto in_v = in.View(); - auto out_v = out.View(); + autoView( Umu_v , Umu, CpuRead); + autoView( UUUmu_v , UUUmu, CpuRead); + autoView( in_v , in, CpuRead); + autoView( out_v , out, CpuWrite); thread_for( ss,Umu.Grid()->oSites(),{ for(int s=0;s::DhopInternal(StencilImpl & st, LebesgueOr DoubledGaugeField & U,DoubledGaugeField & UUU, const FermionField &in, FermionField &out,int dag) { -#ifdef GRID_OMP if ( StaggeredKernelsStatic::Comms == StaggeredKernelsStatic::CommsAndCompute ) DhopInternalOverlappedComms(st,lo,U,UUU,in,out,dag); else -#endif DhopInternalSerialComms(st,lo,U,UUU,in,out,dag); } @@ -294,9 +292,7 @@ void ImprovedStaggeredFermion5D::DhopInternalOverlappedComms(StencilImpl & DoubledGaugeField & U,DoubledGaugeField & UUU, const FermionField &in, FermionField &out,int dag) { -#ifdef GRID_OMP // assert((dag==DaggerNo) ||(dag==DaggerYes)); - Compressor compressor; int LLs = in.Grid()->_rdimensions[0]; @@ -305,99 +301,42 @@ void ImprovedStaggeredFermion5D::DhopInternalOverlappedComms(StencilImpl & DhopFaceTime-=usecond(); st.Prepare(); st.HaloGather(in,compressor); + DhopFaceTime+=usecond(); + + DhopCommTime -=usecond(); + std::vector > requests; + st.CommunicateBegin(requests); + // st.HaloExchangeOptGather(in,compressor); // Wilson compressor + DhopFaceTime-=usecond(); st.CommsMergeSHM(compressor);// Could do this inside parallel region overlapped with comms DhopFaceTime+=usecond(); - double ctime=0; - double ptime=0; - ////////////////////////////////////////////////////////////////////////////////////////////////////// - // Ugly explicit thread mapping introduced for OPA reasons. + // Remove explicit thread mapping introduced for OPA reasons. ////////////////////////////////////////////////////////////////////////////////////////////////////// -#pragma omp parallel reduction(max:ctime) reduction(max:ptime) + DhopComputeTime-=usecond(); { - int tid = omp_get_thread_num(); - int nthreads = omp_get_num_threads(); - int ncomms = CartesianCommunicator::nCommThreads; - if (ncomms == -1) ncomms = 1; - assert(nthreads > ncomms); - if (tid >= ncomms) { - double start = usecond(); - nthreads -= ncomms; - int ttid = tid - ncomms; - int n = U.Grid()->oSites(); // 4d vol - int chunk = n / nthreads; - int rem = n % nthreads; - int myblock, myn; - if (ttid < rem) { - myblock = ttid * chunk + ttid; - myn = chunk+1; - } else { - myblock = ttid*chunk + rem; - myn = chunk; - } - - // do the compute - auto U_v = U.View(); - auto UUU_v = UUU.View(); - auto in_v = in.View(); - auto out_v = out.View(); - - if (dag == DaggerYes) { - for (int ss = myblock; ss < myblock+myn; ++ss) { - int sU = ss; - // Interior = 1; Exterior = 0; must implement for staggered - Kernels::DhopSiteDag(st,lo,U_v,UUU_v,st.CommBuf(),LLs,sU,in_v,out_v,1,0); //<--------- - } - } else { - for (int ss = myblock; ss < myblock+myn; ++ss) { - // Interior = 1; Exterior = 0; - int sU = ss; - Kernels::DhopSite(st,lo,U_v,UUU_v,st.CommBuf(),LLs,sU,in_v,out_v,1,0); //<------------ - } - } - ptime = usecond() - start; - } else { - double start = usecond(); - st.CommunicateThreaded(); - ctime = usecond() - start; - } + int interior=1; + int exterior=0; + Kernels::DhopImproved(st,lo,U,UUU,in,out,dag,interior,exterior); } - DhopCommTime += ctime; - DhopComputeTime+=ptime; - - // First to enter, last to leave timing - st.CollateThreads(); + DhopComputeTime+=usecond(); DhopFaceTime-=usecond(); st.CommsMerge(compressor); DhopFaceTime+=usecond(); - DhopComputeTime2-=usecond(); + st.CommunicateComplete(requests); + DhopCommTime +=usecond(); - auto U_v = U.View(); - auto UUU_v = UUU.View(); - auto in_v = in.View(); - auto out_v = out.View(); - if (dag == DaggerYes) { - int sz=st.surface_list.size(); - thread_for( ss,sz,{ - int sU = st.surface_list[ss]; - Kernels::DhopSiteDag(st,lo,U_v,UUU_v,st.CommBuf(),LLs,sU,in_v,out_v,0,1); //<---------- - }); - } else { - int sz=st.surface_list.size(); - thread_for( ss,sz,{ - int sU = st.surface_list[ss]; - Kernels::DhopSite(st,lo,U_v,UUU_v,st.CommBuf(),LLs,sU,in_v,out_v,0,1);//<---------- - }); + DhopComputeTime2-=usecond(); + { + int interior=0; + int exterior=1; + Kernels::DhopImproved(st,lo,U,UUU,in,out,dag,interior,exterior); } DhopComputeTime2+=usecond(); -#else - assert(0); -#endif - } template @@ -408,8 +347,6 @@ void ImprovedStaggeredFermion5D::DhopInternalSerialComms(StencilImpl & st, Compressor compressor; int LLs = in.Grid()->_rdimensions[0]; - - //double t1=usecond(); DhopTotalTime -= usecond(); DhopCommTime -= usecond(); @@ -418,28 +355,13 @@ void ImprovedStaggeredFermion5D::DhopInternalSerialComms(StencilImpl & st, DhopComputeTime -= usecond(); // Dhop takes the 4d grid from U, and makes a 5d index for fermion - auto U_v = U.View(); - auto UUU_v = UUU.View(); - auto in_v = in.View(); - auto out_v = out.View(); - if (dag == DaggerYes) { - thread_for( ss,U.Grid()->oSites(),{ - int sU=ss; - Kernels::DhopSiteDag(st, lo, U_v, UUU_v, st.CommBuf(), LLs, sU,in_v, out_v); - }); - } else { - thread_for( ss,U.Grid()->oSites(),{ - int sU=ss; - Kernels::DhopSite(st,lo,U_v,UUU_v,st.CommBuf(),LLs,sU,in_v,out_v); - }); + { + int interior=1; + int exterior=1; + Kernels::DhopImproved(st,lo,U,UUU,in,out,dag,interior,exterior); } DhopComputeTime += usecond(); DhopTotalTime += usecond(); - //double t2=usecond(); - //std::cout << __FILE__ << " " << __func__ << " Total Time " << DhopTotalTime << std::endl; - //std::cout << __FILE__ << " " << __func__ << " Total Time Org " << t2-t1 << std::endl; - //std::cout << __FILE__ << " " << __func__ << " Comml Time " << DhopCommTime << std::endl; - //std::cout << __FILE__ << " " << __func__ << " Compute Time " << DhopComputeTime << std::endl; } /*CHANGE END*/ @@ -548,21 +470,24 @@ void ImprovedStaggeredFermion5D::MdirAll(const FermionField &in, std::vect assert(0); } template -RealD ImprovedStaggeredFermion5D::M(const FermionField &in, FermionField &out) { +void ImprovedStaggeredFermion5D::M(const FermionField &in, FermionField &out) +{ out.Checkerboard() = in.Checkerboard(); Dhop(in, out, DaggerNo); - return axpy_norm(out, mass, in, out); + axpy(out, mass, in, out); } template -RealD ImprovedStaggeredFermion5D::Mdag(const FermionField &in, FermionField &out) { +void ImprovedStaggeredFermion5D::Mdag(const FermionField &in, FermionField &out) +{ out.Checkerboard() = in.Checkerboard(); Dhop(in, out, DaggerYes); - return axpy_norm(out, mass, in, out); + axpy(out, mass, in, out); } template -void ImprovedStaggeredFermion5D::Meooe(const FermionField &in, FermionField &out) { +void ImprovedStaggeredFermion5D::Meooe(const FermionField &in, FermionField &out) +{ if (in.Checkerboard() == Odd) { DhopEO(in, out, DaggerNo); } else { @@ -570,7 +495,8 @@ void ImprovedStaggeredFermion5D::Meooe(const FermionField &in, FermionFiel } } template -void ImprovedStaggeredFermion5D::MeooeDag(const FermionField &in, FermionField &out) { +void ImprovedStaggeredFermion5D::MeooeDag(const FermionField &in, FermionField &out) +{ if (in.Checkerboard() == Odd) { DhopEO(in, out, DaggerYes); } else { @@ -579,27 +505,30 @@ void ImprovedStaggeredFermion5D::MeooeDag(const FermionField &in, FermionF } template -void ImprovedStaggeredFermion5D::Mooee(const FermionField &in, FermionField &out) { +void ImprovedStaggeredFermion5D::Mooee(const FermionField &in, FermionField &out) +{ out.Checkerboard() = in.Checkerboard(); typename FermionField::scalar_type scal(mass); out = scal * in; } template -void ImprovedStaggeredFermion5D::MooeeDag(const FermionField &in, FermionField &out) { +void ImprovedStaggeredFermion5D::MooeeDag(const FermionField &in, FermionField &out) +{ out.Checkerboard() = in.Checkerboard(); Mooee(in, out); } template -void ImprovedStaggeredFermion5D::MooeeInv(const FermionField &in, FermionField &out) { +void ImprovedStaggeredFermion5D::MooeeInv(const FermionField &in, FermionField &out) +{ out.Checkerboard() = in.Checkerboard(); out = (1.0 / (mass)) * in; } template -void ImprovedStaggeredFermion5D::MooeeInvDag(const FermionField &in, - FermionField &out) { +void ImprovedStaggeredFermion5D::MooeeInvDag(const FermionField &in,FermionField &out) +{ out.Checkerboard() = in.Checkerboard(); MooeeInv(in, out); } @@ -611,6 +540,7 @@ template void ImprovedStaggeredFermion5D::ContractConservedCurrent(PropagatorField &q_in_1, PropagatorField &q_in_2, PropagatorField &q_out, + PropagatorField &src, Current curr_type, unsigned int mu) { @@ -620,11 +550,12 @@ void ImprovedStaggeredFermion5D::ContractConservedCurrent(PropagatorField template void ImprovedStaggeredFermion5D::SeqConservedCurrent(PropagatorField &q_in, PropagatorField &q_out, + PropagatorField &src, Current curr_type, unsigned int mu, unsigned int tmin, - unsigned int tmax, - ComplexField &lattice_cmplx) + unsigned int tmax, + ComplexField &lattice_cmplx) { assert(0); diff --git a/Grid/qcd/action/fermion/implementation/ImprovedStaggeredFermionImplementation.h b/Grid/qcd/action/fermion/implementation/ImprovedStaggeredFermionImplementation.h index 0b723c47..05d9a17e 100644 --- a/Grid/qcd/action/fermion/implementation/ImprovedStaggeredFermionImplementation.h +++ b/Grid/qcd/action/fermion/implementation/ImprovedStaggeredFermionImplementation.h @@ -171,21 +171,24 @@ void ImprovedStaggeredFermion::ImportGauge(const GaugeField &_Uthin,const ///////////////////////////// template -RealD ImprovedStaggeredFermion::M(const FermionField &in, FermionField &out) { +void ImprovedStaggeredFermion::M(const FermionField &in, FermionField &out) +{ out.Checkerboard() = in.Checkerboard(); Dhop(in, out, DaggerNo); - return axpy_norm(out, mass, in, out); + axpy(out, mass, in, out); } template -RealD ImprovedStaggeredFermion::Mdag(const FermionField &in, FermionField &out) { +void ImprovedStaggeredFermion::Mdag(const FermionField &in, FermionField &out) +{ out.Checkerboard() = in.Checkerboard(); Dhop(in, out, DaggerYes); - return axpy_norm(out, mass, in, out); + axpy(out, mass, in, out); } template -void ImprovedStaggeredFermion::Meooe(const FermionField &in, FermionField &out) { +void ImprovedStaggeredFermion::Meooe(const FermionField &in, FermionField &out) +{ if (in.Checkerboard() == Odd) { DhopEO(in, out, DaggerNo); } else { @@ -193,7 +196,8 @@ void ImprovedStaggeredFermion::Meooe(const FermionField &in, FermionField } } template -void ImprovedStaggeredFermion::MeooeDag(const FermionField &in, FermionField &out) { +void ImprovedStaggeredFermion::MeooeDag(const FermionField &in, FermionField &out) +{ if (in.Checkerboard() == Odd) { DhopEO(in, out, DaggerYes); } else { @@ -202,27 +206,30 @@ void ImprovedStaggeredFermion::MeooeDag(const FermionField &in, FermionFie } template -void ImprovedStaggeredFermion::Mooee(const FermionField &in, FermionField &out) { +void ImprovedStaggeredFermion::Mooee(const FermionField &in, FermionField &out) +{ out.Checkerboard() = in.Checkerboard(); typename FermionField::scalar_type scal(mass); out = scal * in; } template -void ImprovedStaggeredFermion::MooeeDag(const FermionField &in, FermionField &out) { +void ImprovedStaggeredFermion::MooeeDag(const FermionField &in, FermionField &out) +{ out.Checkerboard() = in.Checkerboard(); Mooee(in, out); } template -void ImprovedStaggeredFermion::MooeeInv(const FermionField &in, FermionField &out) { +void ImprovedStaggeredFermion::MooeeInv(const FermionField &in, FermionField &out) +{ out.Checkerboard() = in.Checkerboard(); out = (1.0 / (mass)) * in; } template -void ImprovedStaggeredFermion::MooeeInvDag(const FermionField &in, - FermionField &out) { +void ImprovedStaggeredFermion::MooeeInvDag(const FermionField &in,FermionField &out) +{ out.Checkerboard() = in.Checkerboard(); MooeeInv(in, out); } @@ -234,7 +241,8 @@ void ImprovedStaggeredFermion::MooeeInvDag(const FermionField &in, template void ImprovedStaggeredFermion::DerivInternal(StencilImpl &st, DoubledGaugeField &U, DoubledGaugeField &UUU, GaugeField & mat, - const FermionField &A, const FermionField &B, int dag) { + const FermionField &A, const FermionField &B, int dag) +{ assert((dag == DaggerNo) || (dag == DaggerYes)); Compressor compressor; @@ -250,10 +258,10 @@ void ImprovedStaggeredFermion::DerivInternal(StencilImpl &st, DoubledGauge //////////////////////// // Call the single hop //////////////////////// - auto U_v = U.View(); - auto UUU_v = UUU.View(); - auto B_v = B.View(); - auto Btilde_v = Btilde.View(); + autoView( U_v , U, CpuRead); + autoView( UUU_v , UUU, CpuRead); + autoView( B_v , B, CpuWrite); + autoView( Btilde_v , Btilde, CpuWrite); thread_for(sss,B.Grid()->oSites(),{ Kernels::DhopDirKernel(st, U_v, UUU_v, st.CommBuf(), sss, sss, B_v, Btilde_v, mu,1); }); @@ -284,8 +292,8 @@ void ImprovedStaggeredFermion::DerivInternal(StencilImpl &st, DoubledGauge } template -void ImprovedStaggeredFermion::DhopDeriv(GaugeField &mat, const FermionField &U, const FermionField &V, int dag) { - +void ImprovedStaggeredFermion::DhopDeriv(GaugeField &mat, const FermionField &U, const FermionField &V, int dag) +{ conformable(U.Grid(), _grid); conformable(U.Grid(), V.Grid()); conformable(U.Grid(), mat.Grid()); @@ -296,8 +304,8 @@ void ImprovedStaggeredFermion::DhopDeriv(GaugeField &mat, const FermionFie } template -void ImprovedStaggeredFermion::DhopDerivOE(GaugeField &mat, const FermionField &U, const FermionField &V, int dag) { - +void ImprovedStaggeredFermion::DhopDerivOE(GaugeField &mat, const FermionField &U, const FermionField &V, int dag) +{ conformable(U.Grid(), _cbgrid); conformable(U.Grid(), V.Grid()); conformable(U.Grid(), mat.Grid()); @@ -310,8 +318,8 @@ void ImprovedStaggeredFermion::DhopDerivOE(GaugeField &mat, const FermionF } template -void ImprovedStaggeredFermion::DhopDerivEO(GaugeField &mat, const FermionField &U, const FermionField &V, int dag) { - +void ImprovedStaggeredFermion::DhopDerivEO(GaugeField &mat, const FermionField &U, const FermionField &V, int dag) +{ conformable(U.Grid(), _cbgrid); conformable(U.Grid(), V.Grid()); conformable(U.Grid(), mat.Grid()); @@ -378,10 +386,10 @@ void ImprovedStaggeredFermion::DhopDir(const FermionField &in, FermionFiel Compressor compressor; Stencil.HaloExchange(in, compressor); - auto Umu_v = Umu.View(); - auto UUUmu_v = UUUmu.View(); - auto in_v = in.View(); - auto out_v = out.View(); + autoView( Umu_v , Umu, CpuRead); + autoView( UUUmu_v , UUUmu, CpuRead); + autoView( in_v , in, CpuRead); + autoView( out_v , out, CpuWrite); thread_for( sss, in.Grid()->oSites(),{ Kernels::DhopDirKernel(Stencil, Umu_v, UUUmu_v, Stencil.CommBuf(), sss, sss, in_v, out_v, dir, disp); }); @@ -395,11 +403,9 @@ void ImprovedStaggeredFermion::DhopInternal(StencilImpl &st, LebesgueOrder const FermionField &in, FermionField &out, int dag) { -#ifdef GRID_OMP if ( StaggeredKernelsStatic::Comms == StaggeredKernelsStatic::CommsAndCompute ) DhopInternalOverlappedComms(st,lo,U,UUU,in,out,dag); else -#endif DhopInternalSerialComms(st,lo,U,UUU,in,out,dag); } template @@ -409,7 +415,6 @@ void ImprovedStaggeredFermion::DhopInternalOverlappedComms(StencilImpl &st const FermionField &in, FermionField &out, int dag) { -#ifdef GRID_OMP Compressor compressor; int len = U.Grid()->oSites(); @@ -418,60 +423,30 @@ void ImprovedStaggeredFermion::DhopInternalOverlappedComms(StencilImpl &st DhopFaceTime -= usecond(); st.Prepare(); st.HaloGather(in,compressor); - st.CommsMergeSHM(compressor); DhopFaceTime += usecond(); + DhopCommTime -=usecond(); + std::vector > requests; + st.CommunicateBegin(requests); + + DhopFaceTime-=usecond(); + st.CommsMergeSHM(compressor); + DhopFaceTime+= usecond(); + ////////////////////////////////////////////////////////////////////////////////////////////////////// - // Ugly explicit thread mapping introduced for OPA reasons. + // Removed explicit thread comms ////////////////////////////////////////////////////////////////////////////////////////////////////// DhopComputeTime -= usecond(); -#pragma omp parallel { - int tid = omp_get_thread_num(); - int nthreads = omp_get_num_threads(); - int ncomms = CartesianCommunicator::nCommThreads; - if (ncomms == -1) ncomms = 1; - assert(nthreads > ncomms); - - if (tid >= ncomms) { - nthreads -= ncomms; - int ttid = tid - ncomms; - int n = len; - int chunk = n / nthreads; - int rem = n % nthreads; - int myblock, myn; - if (ttid < rem) { - myblock = ttid * chunk + ttid; - myn = chunk+1; - } else { - myblock = ttid*chunk + rem; - myn = chunk; - } - - // do the compute - auto U_v = U.View(); - auto UUU_v = UUU.View(); - auto in_v = in.View(); - auto out_v = out.View(); - if (dag == DaggerYes) { - for (int ss = myblock; ss < myblock+myn; ++ss) { - int sU = ss; - // Interior = 1; Exterior = 0; must implement for staggered - Kernels::DhopSiteDag(st,lo,U_v,UUU_v,st.CommBuf(),1,sU,in_v,out_v,1,0); - } - } else { - for (int ss = myblock; ss < myblock+myn; ++ss) { - // Interior = 1; Exterior = 0; - int sU = ss; - Kernels::DhopSite(st,lo,U_v,UUU_v,st.CommBuf(),1,sU,in_v,out_v,1,0); - } - } - } else { - st.CommunicateThreaded(); - } + int interior=1; + int exterior=0; + Kernels::DhopImproved(st,lo,U,UUU,in,out,dag,interior,exterior); } DhopComputeTime += usecond(); + st.CommunicateComplete(requests); + DhopCommTime +=usecond(); + // First to enter, last to leave timing DhopFaceTime -= usecond(); st.CommsMerge(compressor); @@ -479,28 +454,11 @@ void ImprovedStaggeredFermion::DhopInternalOverlappedComms(StencilImpl &st DhopComputeTime2 -= usecond(); { - auto U_v = U.View(); - auto UUU_v = UUU.View(); - auto in_v = in.View(); - auto out_v = out.View(); - if (dag == DaggerYes) { - int sz=st.surface_list.size(); - thread_for(ss,sz,{ - int sU = st.surface_list[ss]; - Kernels::DhopSiteDag(st,lo,U_v,UUU_v,st.CommBuf(),1,sU,in_v,out_v,0,1); - }); - } else { - int sz=st.surface_list.size(); - thread_for(ss,sz,{ - int sU = st.surface_list[ss]; - Kernels::DhopSite(st,lo,U_v,UUU_v,st.CommBuf(),1,sU,in_v,out_v,0,1); - }); - } + int interior=0; + int exterior=1; + Kernels::DhopImproved(st,lo,U,UUU,in,out,dag,interior,exterior); } DhopComputeTime2 += usecond(); -#else - assert(0); -#endif } @@ -520,19 +478,11 @@ void ImprovedStaggeredFermion::DhopInternalSerialComms(StencilImpl &st, Le st.HaloExchange(in, compressor); DhopCommTime += usecond(); - auto U_v = U.View(); - auto UUU_v = UUU.View(); - auto in_v = in.View(); - auto out_v = out.View(); DhopComputeTime -= usecond(); - if (dag == DaggerYes) { - thread_for(sss, in.Grid()->oSites(),{ - Kernels::DhopSiteDag(st, lo, U_v, UUU_v, st.CommBuf(), 1, sss, in_v, out_v); - }); - } else { - thread_for(sss, in.Grid()->oSites(),{ - Kernels::DhopSite(st, lo, U_v, UUU_v, st.CommBuf(), 1, sss, in_v, out_v); - }); + { + int interior=1; + int exterior=1; + Kernels::DhopImproved(st,lo,U,UUU,in,out,dag,interior,exterior); } DhopComputeTime += usecond(); DhopTotalTime += usecond(); @@ -600,6 +550,7 @@ template void ImprovedStaggeredFermion::ContractConservedCurrent(PropagatorField &q_in_1, PropagatorField &q_in_2, PropagatorField &q_out, + PropagatorField &src, Current curr_type, unsigned int mu) { @@ -609,6 +560,7 @@ void ImprovedStaggeredFermion::ContractConservedCurrent(PropagatorField &q template void ImprovedStaggeredFermion::SeqConservedCurrent(PropagatorField &q_in, PropagatorField &q_out, + PropagatorField &src, Current curr_type, unsigned int mu, unsigned int tmin, diff --git a/Grid/qcd/action/fermion/implementation/MobiusEOFAFermionCache.h b/Grid/qcd/action/fermion/implementation/MobiusEOFAFermionCache.h index f74c7a51..41b9170d 100644 --- a/Grid/qcd/action/fermion/implementation/MobiusEOFAFermionCache.h +++ b/Grid/qcd/action/fermion/implementation/MobiusEOFAFermionCache.h @@ -44,9 +44,9 @@ void MobiusEOFAFermion::M5D(const FermionField &psi_i, const FermionField chi_i.Checkerboard() = psi_i.Checkerboard(); GridBase *grid = psi_i.Grid(); int Ls = this->Ls; - auto psi = psi_i.View(); - auto phi = phi_i.View(); - auto chi = chi_i.View(); + autoView(psi , psi_i, AcceleratorRead); + autoView(phi , phi_i, AcceleratorRead); + autoView(chi , chi_i, AcceleratorWrite); assert(phi.Checkerboard() == psi.Checkerboard()); @@ -84,9 +84,9 @@ void MobiusEOFAFermion::M5D_shift(const FermionField &psi_i, const Fermion chi_i.Checkerboard() = psi_i.Checkerboard(); GridBase *grid = psi_i.Grid(); int Ls = this->Ls; - auto psi = psi_i.View(); - auto phi = phi_i.View(); - auto chi = chi_i.View(); + autoView(psi , psi_i, AcceleratorRead); + autoView(phi , phi_i, AcceleratorRead); + autoView(chi , chi_i, AcceleratorWrite); auto pm = this->pm; int shift_s = (pm == 1) ? (Ls-1) : 0; // s-component modified by shift operator @@ -132,9 +132,9 @@ void MobiusEOFAFermion::M5Ddag(const FermionField &psi_i, const FermionFie chi_i.Checkerboard() = psi_i.Checkerboard(); GridBase *grid = psi_i.Grid(); int Ls = this->Ls; - auto psi = psi_i.View(); - auto phi = phi_i.View(); - auto chi = chi_i.View(); + autoView(psi , psi_i, AcceleratorRead); + autoView(phi , phi_i, AcceleratorRead); + autoView(chi , chi_i, AcceleratorWrite); assert(phi.Checkerboard() == psi.Checkerboard()); @@ -174,9 +174,9 @@ void MobiusEOFAFermion::M5Ddag_shift(const FermionField &psi_i, const Ferm GridBase *grid = psi_i.Grid(); int Ls = this->Ls; int shift_s = (this->pm == 1) ? (Ls-1) : 0; // s-component modified by shift operator - auto psi = psi_i.View(); - auto phi = phi_i.View(); - auto chi = chi_i.View(); + autoView(psi , psi_i, AcceleratorRead); + autoView(phi , phi_i, AcceleratorRead); + autoView(chi , chi_i, AcceleratorWrite); assert(phi.Checkerboard() == psi.Checkerboard()); @@ -226,8 +226,8 @@ void MobiusEOFAFermion::MooeeInv(const FermionField &psi_i, FermionField & chi_i.Checkerboard() = psi_i.Checkerboard(); GridBase *grid = psi_i.Grid(); int Ls = this->Ls; - auto psi = psi_i.View(); - auto chi = chi_i.View(); + autoView(psi , psi_i, AcceleratorRead); + autoView(chi , chi_i, AcceleratorWrite); auto plee = & this->lee [0]; auto pdee = & this->dee [0]; @@ -286,8 +286,8 @@ void MobiusEOFAFermion::MooeeInv_shift(const FermionField &psi_i, FermionF chi_i.Checkerboard() = psi_i.Checkerboard(); GridBase *grid = psi_i.Grid(); int Ls = this->Ls; - auto psi = psi_i.View(); - auto chi = chi_i.View(); + autoView(psi , psi_i, AcceleratorRead); + autoView(chi , chi_i, AcceleratorWrite); auto pm = this->pm; auto plee = & this->lee [0]; @@ -354,8 +354,8 @@ void MobiusEOFAFermion::MooeeInvDag(const FermionField &psi_i, FermionFiel chi_i.Checkerboard() = psi_i.Checkerboard(); GridBase *grid = psi_i.Grid(); int Ls = this->Ls; - auto psi = psi_i.View(); - auto chi = chi_i.View(); + autoView(psi , psi_i, AcceleratorRead); + autoView(chi , chi_i, AcceleratorWrite); auto plee = & this->lee [0]; auto pdee = & this->dee [0]; @@ -410,8 +410,8 @@ void MobiusEOFAFermion::MooeeInvDag_shift(const FermionField &psi_i, Fermi { chi_i.Checkerboard() = psi_i.Checkerboard(); GridBase *grid = psi_i.Grid(); - auto psi = psi_i.View(); - auto chi = chi_i.View(); + autoView(psi , psi_i, AcceleratorRead); + autoView(chi , chi_i, AcceleratorWrite); int Ls = this->Ls; auto pm = this->pm; diff --git a/Grid/qcd/action/fermion/implementation/MobiusEOFAFermionImplementation.h b/Grid/qcd/action/fermion/implementation/MobiusEOFAFermionImplementation.h index 256423e6..9b9db178 100644 --- a/Grid/qcd/action/fermion/implementation/MobiusEOFAFermionImplementation.h +++ b/Grid/qcd/action/fermion/implementation/MobiusEOFAFermionImplementation.h @@ -166,7 +166,7 @@ void MobiusEOFAFermion::DtildeInv(const FermionField& psi, FermionField& c /*****************************************************************************************************/ template -RealD MobiusEOFAFermion::M(const FermionField& psi, FermionField& chi) +void MobiusEOFAFermion::M(const FermionField& psi, FermionField& chi) { FermionField Din(psi.Grid()); @@ -174,11 +174,10 @@ RealD MobiusEOFAFermion::M(const FermionField& psi, FermionField& chi) this->DW(Din, chi, DaggerNo); axpby(chi, 1.0, 1.0, chi, psi); this->M5D(psi, chi); - return(norm2(chi)); } template -RealD MobiusEOFAFermion::Mdag(const FermionField& psi, FermionField& chi) +void MobiusEOFAFermion::Mdag(const FermionField& psi, FermionField& chi) { FermionField Din(psi.Grid()); @@ -186,7 +185,6 @@ RealD MobiusEOFAFermion::Mdag(const FermionField& psi, FermionField& chi) this->MeooeDag5D(Din, chi); this->M5Ddag(psi, chi); axpby(chi, 1.0, 1.0, chi, psi); - return(norm2(chi)); } /******************************************************************** diff --git a/Grid/qcd/action/fermion/implementation/NaiveStaggeredFermionImplementation.h b/Grid/qcd/action/fermion/implementation/NaiveStaggeredFermionImplementation.h new file mode 100644 index 00000000..788e02cf --- /dev/null +++ b/Grid/qcd/action/fermion/implementation/NaiveStaggeredFermionImplementation.h @@ -0,0 +1,499 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/ImprovedStaggeredFermion.cc + +Copyright (C) 2015 + +Author: Azusa Yamaguchi, Peter Boyle + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#include + +#pragma once + +NAMESPACE_BEGIN(Grid); + +///////////////////////////////// +// Constructor and gauge import +///////////////////////////////// + +template +NaiveStaggeredFermion::NaiveStaggeredFermion(GridCartesian &Fgrid, GridRedBlackCartesian &Hgrid, + RealD _mass, + RealD _c1, RealD _u0, + const ImplParams &p) + : Kernels(p), + _grid(&Fgrid), + _cbgrid(&Hgrid), + Stencil(&Fgrid, npoint, Even, directions, displacements,p), + StencilEven(&Hgrid, npoint, Even, directions, displacements,p), // source is Even + StencilOdd(&Hgrid, npoint, Odd, directions, displacements,p), // source is Odd + mass(_mass), + Lebesgue(_grid), + LebesgueEvenOdd(_cbgrid), + Umu(&Fgrid), + UmuEven(&Hgrid), + UmuOdd(&Hgrid), + _tmp(&Hgrid) +{ + int vol4; + int LLs=1; + c1=_c1; + u0=_u0; + vol4= _grid->oSites(); + Stencil.BuildSurfaceList(LLs,vol4); + vol4= _cbgrid->oSites(); + StencilEven.BuildSurfaceList(LLs,vol4); + StencilOdd.BuildSurfaceList(LLs,vol4); +} + +template +NaiveStaggeredFermion::NaiveStaggeredFermion(GaugeField &_U, GridCartesian &Fgrid, + GridRedBlackCartesian &Hgrid, RealD _mass, + RealD _c1, RealD _u0, + const ImplParams &p) + : NaiveStaggeredFermion(Fgrid,Hgrid,_mass,_c1,_u0,p) +{ + ImportGauge(_U); +} + +//////////////////////////////////////////////////////////// +// Momentum space propagator should be +// https://arxiv.org/pdf/hep-lat/9712010.pdf +// +// mom space action. +// gamma_mu i ( c1 sin pmu + c2 sin 3 pmu ) + m +// +// must track through staggered flavour/spin reduction in literature to +// turn to free propagator for the one component chi field, a la page 4/5 +// of above link to implmement fourier based solver. +//////////////////////////////////////////////////////////// + +template +void NaiveStaggeredFermion::CopyGaugeCheckerboards(void) +{ + pickCheckerboard(Even, UmuEven, Umu); + pickCheckerboard(Odd, UmuOdd , Umu); +} +template +void NaiveStaggeredFermion::ImportGauge(const GaugeField &_U) +{ + GaugeLinkField U(GaugeGrid()); + DoubledGaugeField _UUU(GaugeGrid()); + //////////////////////////////////////////////////////// + // Double Store should take two fields for Naik and one hop separately. + // Discard teh Naik as Naive + //////////////////////////////////////////////////////// + Impl::DoubleStore(GaugeGrid(), _UUU, Umu, _U, _U ); + + //////////////////////////////////////////////////////// + // Apply scale factors to get the right fermion Kinetic term + // Could pass coeffs into the double store to save work. + // 0.5 ( U p(x+mu) - Udag(x-mu) p(x-mu) ) + //////////////////////////////////////////////////////// + for (int mu = 0; mu < Nd; mu++) { + + U = PeekIndex(Umu, mu); + PokeIndex(Umu, U*( 0.5*c1/u0), mu ); + + U = PeekIndex(Umu, mu+4); + PokeIndex(Umu, U*(-0.5*c1/u0), mu+4); + + } + + CopyGaugeCheckerboards(); +} + +///////////////////////////// +// Implement the interface +///////////////////////////// + +template +void NaiveStaggeredFermion::M(const FermionField &in, FermionField &out) { + out.Checkerboard() = in.Checkerboard(); + Dhop(in, out, DaggerNo); + axpy(out, mass, in, out); +} + +template +void NaiveStaggeredFermion::Mdag(const FermionField &in, FermionField &out) { + out.Checkerboard() = in.Checkerboard(); + Dhop(in, out, DaggerYes); + axpy(out, mass, in, out); +} + +template +void NaiveStaggeredFermion::Meooe(const FermionField &in, FermionField &out) { + if (in.Checkerboard() == Odd) { + DhopEO(in, out, DaggerNo); + } else { + DhopOE(in, out, DaggerNo); + } +} +template +void NaiveStaggeredFermion::MeooeDag(const FermionField &in, FermionField &out) { + if (in.Checkerboard() == Odd) { + DhopEO(in, out, DaggerYes); + } else { + DhopOE(in, out, DaggerYes); + } +} + +template +void NaiveStaggeredFermion::Mooee(const FermionField &in, FermionField &out) { + out.Checkerboard() = in.Checkerboard(); + typename FermionField::scalar_type scal(mass); + out = scal * in; +} + +template +void NaiveStaggeredFermion::MooeeDag(const FermionField &in, FermionField &out) { + out.Checkerboard() = in.Checkerboard(); + Mooee(in, out); +} + +template +void NaiveStaggeredFermion::MooeeInv(const FermionField &in, FermionField &out) { + out.Checkerboard() = in.Checkerboard(); + out = (1.0 / (mass)) * in; +} + +template +void NaiveStaggeredFermion::MooeeInvDag(const FermionField &in, FermionField &out) +{ + out.Checkerboard() = in.Checkerboard(); + MooeeInv(in, out); +} + +/////////////////////////////////// +// Internal +/////////////////////////////////// + +template +void NaiveStaggeredFermion::DerivInternal(StencilImpl &st, DoubledGaugeField &U, + GaugeField & mat, + const FermionField &A, const FermionField &B, int dag) +{ + assert((dag == DaggerNo) || (dag == DaggerYes)); + + Compressor compressor; + + FermionField Btilde(B.Grid()); + FermionField Atilde(B.Grid()); + Atilde = A; + + st.HaloExchange(B, compressor); + + for (int mu = 0; mu < Nd; mu++) { + + //////////////////////// + // Call the single hop + //////////////////////// + autoView( U_v , U, CpuRead); + autoView( B_v , B, CpuWrite); + autoView( Btilde_v , Btilde, CpuWrite); + thread_for(sss,B.Grid()->oSites(),{ + Kernels::DhopDirKernel(st, U_v, U_v, st.CommBuf(), sss, sss, B_v, Btilde_v, mu,1); + }); + + assert(0);// need to figure out the force interface with a blasted three link term. + + } +} + +template +void NaiveStaggeredFermion::DhopDeriv(GaugeField &mat, const FermionField &U, const FermionField &V, int dag) { + + conformable(U.Grid(), _grid); + conformable(U.Grid(), V.Grid()); + conformable(U.Grid(), mat.Grid()); + + mat.Checkerboard() = U.Checkerboard(); + + DerivInternal(Stencil, Umu, mat, U, V, dag); +} + +template +void NaiveStaggeredFermion::DhopDerivOE(GaugeField &mat, const FermionField &U, const FermionField &V, int dag) { + + conformable(U.Grid(), _cbgrid); + conformable(U.Grid(), V.Grid()); + conformable(U.Grid(), mat.Grid()); + + assert(V.Checkerboard() == Even); + assert(U.Checkerboard() == Odd); + mat.Checkerboard() = Odd; + + DerivInternal(StencilEven, UmuOdd, mat, U, V, dag); +} + +template +void NaiveStaggeredFermion::DhopDerivEO(GaugeField &mat, const FermionField &U, const FermionField &V, int dag) { + + conformable(U.Grid(), _cbgrid); + conformable(U.Grid(), V.Grid()); + conformable(U.Grid(), mat.Grid()); + + assert(V.Checkerboard() == Odd); + assert(U.Checkerboard() == Even); + mat.Checkerboard() = Even; + + DerivInternal(StencilOdd, UmuEven, mat, U, V, dag); +} + +template +void NaiveStaggeredFermion::Dhop(const FermionField &in, FermionField &out, int dag) +{ + DhopCalls+=2; + conformable(in.Grid(), _grid); // verifies full grid + conformable(in.Grid(), out.Grid()); + + out.Checkerboard() = in.Checkerboard(); + + DhopInternal(Stencil, Lebesgue, Umu, in, out, dag); +} + +template +void NaiveStaggeredFermion::DhopOE(const FermionField &in, FermionField &out, int dag) +{ + DhopCalls+=1; + conformable(in.Grid(), _cbgrid); // verifies half grid + conformable(in.Grid(), out.Grid()); // drops the cb check + + assert(in.Checkerboard() == Even); + out.Checkerboard() = Odd; + + DhopInternal(StencilEven, LebesgueEvenOdd, UmuOdd, in, out, dag); +} + +template +void NaiveStaggeredFermion::DhopEO(const FermionField &in, FermionField &out, int dag) +{ + DhopCalls+=1; + conformable(in.Grid(), _cbgrid); // verifies half grid + conformable(in.Grid(), out.Grid()); // drops the cb check + + assert(in.Checkerboard() == Odd); + out.Checkerboard() = Even; + + DhopInternal(StencilOdd, LebesgueEvenOdd, UmuEven, in, out, dag); +} + +template +void NaiveStaggeredFermion::Mdir(const FermionField &in, FermionField &out, int dir, int disp) +{ + DhopDir(in, out, dir, disp); +} +template +void NaiveStaggeredFermion::MdirAll(const FermionField &in, std::vector &out) +{ + assert(0); // Not implemented yet +} + +template +void NaiveStaggeredFermion::DhopDir(const FermionField &in, FermionField &out, int dir, int disp) +{ + + Compressor compressor; + Stencil.HaloExchange(in, compressor); + autoView( Umu_v , Umu, CpuRead); + autoView( in_v , in, CpuRead); + autoView( out_v , out, CpuWrite); + // thread_for( sss, in.Grid()->oSites(),{ + // Kernels::DhopDirKernel(Stencil, Umu_v, Stencil.CommBuf(), sss, sss, in_v, out_v, dir, disp); + // }); + assert(0); +}; + + +template +void NaiveStaggeredFermion::DhopInternal(StencilImpl &st, LebesgueOrder &lo, + DoubledGaugeField &U, + const FermionField &in, + FermionField &out, int dag) +{ + if ( StaggeredKernelsStatic::Comms == StaggeredKernelsStatic::CommsAndCompute ) + DhopInternalOverlappedComms(st,lo,U,in,out,dag); + else + DhopInternalSerialComms(st,lo,U,in,out,dag); +} +template +void NaiveStaggeredFermion::DhopInternalOverlappedComms(StencilImpl &st, LebesgueOrder &lo, + DoubledGaugeField &U, + const FermionField &in, + FermionField &out, int dag) +{ + Compressor compressor; + int len = U.Grid()->oSites(); + + DhopTotalTime -= usecond(); + + DhopFaceTime -= usecond(); + st.Prepare(); + st.HaloGather(in,compressor); + DhopFaceTime += usecond(); + + DhopCommTime -=usecond(); + std::vector > requests; + st.CommunicateBegin(requests); + + DhopFaceTime-=usecond(); + st.CommsMergeSHM(compressor); + DhopFaceTime+= usecond(); + + ////////////////////////////////////////////////////////////////////////////////////////////////////// + // Removed explicit thread comms + ////////////////////////////////////////////////////////////////////////////////////////////////////// + DhopComputeTime -= usecond(); + { + int interior=1; + int exterior=0; + Kernels::DhopNaive(st,lo,U,in,out,dag,interior,exterior); + } + DhopComputeTime += usecond(); + + st.CommunicateComplete(requests); + DhopCommTime +=usecond(); + + // First to enter, last to leave timing + DhopFaceTime -= usecond(); + st.CommsMerge(compressor); + DhopFaceTime -= usecond(); + + DhopComputeTime2 -= usecond(); + { + int interior=0; + int exterior=1; + Kernels::DhopNaive(st,lo,U,in,out,dag,interior,exterior); + } + DhopComputeTime2 += usecond(); +} + +template +void NaiveStaggeredFermion::DhopInternalSerialComms(StencilImpl &st, LebesgueOrder &lo, + DoubledGaugeField &U, + const FermionField &in, + FermionField &out, int dag) +{ + assert((dag == DaggerNo) || (dag == DaggerYes)); + + DhopTotalTime -= usecond(); + + DhopCommTime -= usecond(); + Compressor compressor; + st.HaloExchange(in, compressor); + DhopCommTime += usecond(); + + DhopComputeTime -= usecond(); + { + int interior=1; + int exterior=1; + Kernels::DhopNaive(st,lo,U,in,out,dag,interior,exterior); + } + DhopComputeTime += usecond(); + DhopTotalTime += usecond(); +}; + + //////////////////////////////////////////////////////////////// + // Reporting + //////////////////////////////////////////////////////////////// +template +void NaiveStaggeredFermion::Report(void) +{ + Coordinate latt = _grid->GlobalDimensions(); + RealD volume = 1; for(int mu=0;mu_Nprocessors; + RealD NN = _grid->NodeCount(); + + std::cout << GridLogMessage << "#### Dhop calls report " << std::endl; + + std::cout << GridLogMessage << "NaiveStaggeredFermion Number of DhopEO Calls : " + << DhopCalls << std::endl; + std::cout << GridLogMessage << "NaiveStaggeredFermion TotalTime /Calls : " + << DhopTotalTime / DhopCalls << " us" << std::endl; + std::cout << GridLogMessage << "NaiveStaggeredFermion CommTime /Calls : " + << DhopCommTime / DhopCalls << " us" << std::endl; + std::cout << GridLogMessage << "NaiveStaggeredFermion ComputeTime/Calls : " + << DhopComputeTime / DhopCalls << " us" << std::endl; + + // Average the compute time + _grid->GlobalSum(DhopComputeTime); + DhopComputeTime/=NP; + + RealD mflops = 1154*volume*DhopCalls/DhopComputeTime/2; // 2 for red black counting + std::cout << GridLogMessage << "Average mflops/s per call : " << mflops << std::endl; + std::cout << GridLogMessage << "Average mflops/s per call per rank : " << mflops/NP << std::endl; + std::cout << GridLogMessage << "Average mflops/s per call per node : " << mflops/NN << std::endl; + + RealD Fullmflops = 1154*volume*DhopCalls/(DhopTotalTime)/2; // 2 for red black counting + std::cout << GridLogMessage << "Average mflops/s per call (full) : " << Fullmflops << std::endl; + std::cout << GridLogMessage << "Average mflops/s per call per rank (full): " << Fullmflops/NP << std::endl; + std::cout << GridLogMessage << "Average mflops/s per call per node (full): " << Fullmflops/NN << std::endl; + + std::cout << GridLogMessage << "NaiveStaggeredFermion Stencil" < +void NaiveStaggeredFermion::ZeroCounters(void) +{ + DhopCalls = 0; + DhopTotalTime = 0; + DhopCommTime = 0; + DhopComputeTime = 0; + DhopFaceTime = 0; + + Stencil.ZeroCounters(); + StencilEven.ZeroCounters(); + StencilOdd.ZeroCounters(); +} + + +//////////////////////////////////////////////////////// +// Conserved current - not yet implemented. +//////////////////////////////////////////////////////// +template +void NaiveStaggeredFermion::ContractConservedCurrent(PropagatorField &q_in_1, + PropagatorField &q_in_2, + PropagatorField &q_out, + PropagatorField &src, + Current curr_type, + unsigned int mu) +{ + assert(0); +} + +template +void NaiveStaggeredFermion::SeqConservedCurrent(PropagatorField &q_in, + PropagatorField &q_out, + PropagatorField &src, + Current curr_type, + unsigned int mu, + unsigned int tmin, + unsigned int tmax, + ComplexField &lattice_cmplx) +{ + assert(0); + +} + +NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/implementation/PartialFractionFermion5DImplementation.h b/Grid/qcd/action/fermion/implementation/PartialFractionFermion5DImplementation.h index edc674cc..0206828b 100644 --- a/Grid/qcd/action/fermion/implementation/PartialFractionFermion5DImplementation.h +++ b/Grid/qcd/action/fermion/implementation/PartialFractionFermion5DImplementation.h @@ -269,16 +269,14 @@ void PartialFractionFermion5D::M_internal(const FermionField &psi, Fermi } template -RealD PartialFractionFermion5D::M (const FermionField &in, FermionField &out) +void PartialFractionFermion5D::M (const FermionField &in, FermionField &out) { M_internal(in,out,DaggerNo); - return norm2(out); } template -RealD PartialFractionFermion5D::Mdag (const FermionField &in, FermionField &out) +void PartialFractionFermion5D::Mdag (const FermionField &in, FermionField &out) { M_internal(in,out,DaggerYes); - return norm2(out); } template diff --git a/Grid/qcd/action/fermion/implementation/StaggeredKernelsAsm.h b/Grid/qcd/action/fermion/implementation/StaggeredKernelsAsm.h index 1a13e73a..63fd2a2f 100644 --- a/Grid/qcd/action/fermion/implementation/StaggeredKernelsAsm.h +++ b/Grid/qcd/action/fermion/implementation/StaggeredKernelsAsm.h @@ -618,10 +618,10 @@ Author: paboyle NAMESPACE_BEGIN(Grid); template -void StaggeredKernels::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo, +void StaggeredKernels::DhopSiteAsm(StencilView &st, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, - SiteSpinor *buf, int LLs, + SiteSpinor *buf, int sF, int sU, const FermionFieldView &in, FermionFieldView &out,int dag) { assert(0); @@ -680,12 +680,13 @@ void StaggeredKernels::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo, gauge2 =(uint64_t)&UU[sU]( Z ); \ gauge3 =(uint64_t)&UU[sU]( T ); + // This is the single precision 5th direction vectorised kernel #include -template <> void StaggeredKernels::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo, +template <> void StaggeredKernels::DhopSiteAsm(StencilView &st, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, - SiteSpinor *buf, int LLs, + SiteSpinor *buf, int sF, int sU, const FermionFieldView &in, FermionFieldView &out,int dag) { #ifdef AVX512 @@ -702,9 +703,10 @@ template <> void StaggeredKernels::DhopSiteAsm(StencilImpl StencilEntry *SE2; StencilEntry *SE3; - for(int s=0;s void StaggeredKernels::DhopSiteAsm(StencilImpl } #include -template <> void StaggeredKernels::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo, +template <> void StaggeredKernels::DhopSiteAsm(StencilView &st, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, - SiteSpinor *buf, int LLs, + SiteSpinor *buf, int sF, int sU, const FermionFieldView &in, FermionFieldView &out, int dag) { #ifdef AVX512 @@ -756,8 +758,9 @@ template <> void StaggeredKernels::DhopSiteAsm(StencilImpl StencilEntry *SE2; StencilEntry *SE3; - for(int s=0;s void StaggeredKernels::DhopSiteAsm(StencilImpl // This is the single precision 5th direction vectorised kernel #include -template <> void StaggeredKernels::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo, +template <> void StaggeredKernels::DhopSiteAsm(StencilView &st, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, - SiteSpinor *buf, int LLs, + SiteSpinor *buf, int sF, int sU, const FermionFieldView &in, FermionFieldView &out,int dag) { #ifdef AVX512 @@ -841,9 +844,9 @@ template <> void StaggeredKernels::DhopSiteAsm(StencilImpl &st, StencilEntry *SE2; StencilEntry *SE3; - for(int s=0;s void StaggeredKernels::DhopSiteAsm(StencilImpl &st, } #include -template <> void StaggeredKernels::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo, +template <> void StaggeredKernels::DhopSiteAsm(StencilView &st, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, - SiteSpinor *buf, int LLs, + SiteSpinor *buf, int sF, int sU, const FermionFieldView &in, FermionFieldView &out,int dag) { #ifdef AVX512 @@ -910,9 +913,9 @@ template <> void StaggeredKernels::DhopSiteAsm(StencilImpl &st, StencilEntry *SE2; StencilEntry *SE3; - for(int s=0;s -void StaggeredKernels::DhopSiteHand(StencilImpl &st, LebesgueOrder &lo, +template +void StaggeredKernels::DhopSiteHand(StencilView &st, DoubledGaugeFieldView &U,DoubledGaugeFieldView &UUU, - SiteSpinor *buf, int LLs, int sU, + SiteSpinor *buf, int sF, int sU, const FermionFieldView &in, FermionFieldView &out,int dag) { typedef typename Simd::scalar_type S; @@ -181,8 +182,9 @@ void StaggeredKernels::DhopSiteHand(StencilImpl &st, LebesgueOrder &lo, StencilEntry *SE; int skew; - for(int s=0;s::DhopSiteHand(StencilImpl &st, LebesgueOrder &lo, HAND_STENCIL_LEG (U,Ym,2,skew,odd); HAND_STENCIL_LEG (U,Zm,1,skew,even); HAND_STENCIL_LEG (U,Tm,0,skew,odd); + if (Naik) { skew = 8; HAND_STENCIL_LEG(UUU,Xp,3,skew,even); HAND_STENCIL_LEG(UUU,Yp,2,skew,odd); @@ -202,7 +205,7 @@ void StaggeredKernels::DhopSiteHand(StencilImpl &st, LebesgueOrder &lo, HAND_STENCIL_LEG(UUU,Ym,2,skew,odd); HAND_STENCIL_LEG(UUU,Zm,1,skew,even); HAND_STENCIL_LEG(UUU,Tm,0,skew,odd); - + } if ( dag ) { result()()(0) = - even_0 - odd_0; result()()(1) = - even_1 - odd_1; @@ -218,9 +221,10 @@ void StaggeredKernels::DhopSiteHand(StencilImpl &st, LebesgueOrder &lo, template -void StaggeredKernels::DhopSiteHandInt(StencilImpl &st, LebesgueOrder &lo, +template +void StaggeredKernels::DhopSiteHandInt(StencilView &st, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, - SiteSpinor *buf, int LLs, int sU, + SiteSpinor *buf, int sF, int sU, const FermionFieldView &in, FermionFieldView &out,int dag) { typedef typename Simd::scalar_type S; @@ -253,8 +257,9 @@ void StaggeredKernels::DhopSiteHandInt(StencilImpl &st, LebesgueOrder &lo, StencilEntry *SE; int skew; - for(int s=0;s::DhopSiteHandInt(StencilImpl &st, LebesgueOrder &lo, HAND_STENCIL_LEG_INT(U,Ym,2,skew,odd); HAND_STENCIL_LEG_INT(U,Zm,1,skew,even); HAND_STENCIL_LEG_INT(U,Tm,0,skew,odd); + if (Naik) { skew = 8; HAND_STENCIL_LEG_INT(UUU,Xp,3,skew,even); HAND_STENCIL_LEG_INT(UUU,Yp,2,skew,odd); @@ -277,7 +283,7 @@ void StaggeredKernels::DhopSiteHandInt(StencilImpl &st, LebesgueOrder &lo, HAND_STENCIL_LEG_INT(UUU,Ym,2,skew,odd); HAND_STENCIL_LEG_INT(UUU,Zm,1,skew,even); HAND_STENCIL_LEG_INT(UUU,Tm,0,skew,odd); - + } // Assume every site must be connected to at least one interior point. No 1^4 subvols. if ( dag ) { result()()(0) = - even_0 - odd_0; @@ -294,9 +300,10 @@ void StaggeredKernels::DhopSiteHandInt(StencilImpl &st, LebesgueOrder &lo, template -void StaggeredKernels::DhopSiteHandExt(StencilImpl &st, LebesgueOrder &lo, +template +void StaggeredKernels::DhopSiteHandExt(StencilView &st, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, - SiteSpinor *buf, int LLs, int sU, + SiteSpinor *buf, int sF, int sU, const FermionFieldView &in, FermionFieldView &out,int dag) { typedef typename Simd::scalar_type S; @@ -329,8 +336,9 @@ void StaggeredKernels::DhopSiteHandExt(StencilImpl &st, LebesgueOrder &lo, StencilEntry *SE; int skew; - for(int s=0;s::DhopSiteHandExt(StencilImpl &st, LebesgueOrder &lo, HAND_STENCIL_LEG_EXT(U,Ym,2,skew,odd); HAND_STENCIL_LEG_EXT(U,Zm,1,skew,even); HAND_STENCIL_LEG_EXT(U,Tm,0,skew,odd); + if (Naik) { skew = 8; HAND_STENCIL_LEG_EXT(UUU,Xp,3,skew,even); HAND_STENCIL_LEG_EXT(UUU,Yp,2,skew,odd); @@ -353,7 +362,7 @@ void StaggeredKernels::DhopSiteHandExt(StencilImpl &st, LebesgueOrder &lo, HAND_STENCIL_LEG_EXT(UUU,Ym,2,skew,odd); HAND_STENCIL_LEG_EXT(UUU,Zm,1,skew,even); HAND_STENCIL_LEG_EXT(UUU,Tm,0,skew,odd); - + } // Add sum of all exterior connected stencil legs if ( nmu ) { if ( dag ) { @@ -370,6 +379,7 @@ void StaggeredKernels::DhopSiteHandExt(StencilImpl &st, LebesgueOrder &lo, } } +/* #define DHOP_SITE_HAND_INSTANTIATE(IMPL) \ template void StaggeredKernels::DhopSiteHand(StencilImpl &st, LebesgueOrder &lo, \ DoubledGaugeFieldView &U,DoubledGaugeFieldView &UUU, \ @@ -385,7 +395,7 @@ void StaggeredKernels::DhopSiteHandExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U,DoubledGaugeFieldView &UUU, \ SiteSpinor *buf, int LLs, int sU, \ const FermionFieldView &in, FermionFieldView &out, int dag); \ - +*/ #undef LOAD_CHI NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/implementation/StaggeredKernelsImplementation.h b/Grid/qcd/action/fermion/implementation/StaggeredKernelsImplementation.h index d301556c..141725a7 100644 --- a/Grid/qcd/action/fermion/implementation/StaggeredKernelsImplementation.h +++ b/Grid/qcd/action/fermion/implementation/StaggeredKernelsImplementation.h @@ -37,9 +37,9 @@ NAMESPACE_BEGIN(Grid); if (SE->_is_local ) { \ if (SE->_permute) { \ chi_p = χ \ - permute(chi, in[SE->_offset], ptype); \ + permute(chi, in[SE->_offset], ptype); \ } else { \ - chi_p = &in[SE->_offset]; \ + chi_p = &in[SE->_offset]; \ } \ } else { \ chi_p = &buf[SE->_offset]; \ @@ -51,15 +51,15 @@ NAMESPACE_BEGIN(Grid); if (SE->_is_local ) { \ if (SE->_permute) { \ chi_p = χ \ - permute(chi, in[SE->_offset], ptype); \ + permute(chi, in[SE->_offset], ptype); \ } else { \ - chi_p = &in[SE->_offset]; \ + chi_p = &in[SE->_offset]; \ } \ } else if ( st.same_node[Dir] ) { \ chi_p = &buf[SE->_offset]; \ } \ if (SE->_is_local || st.same_node[Dir] ) { \ - multLink(Uchi, U[sU], *chi_p, Dir); \ + multLink(Uchi, U[sU], *chi_p, Dir); \ } #define GENERIC_STENCIL_LEG_EXT(U,Dir,skew,multLink) \ @@ -67,7 +67,7 @@ NAMESPACE_BEGIN(Grid); if ((!SE->_is_local) && (!st.same_node[Dir]) ) { \ nmu++; \ chi_p = &buf[SE->_offset]; \ - multLink(Uchi, U[sU], *chi_p, Dir); \ + multLink(Uchi, U[sU], *chi_p, Dir); \ } template @@ -78,10 +78,12 @@ StaggeredKernels::StaggeredKernels(const ImplParams &p) : Base(p){}; // Int, Ext, Int+Ext cases for comms overlap //////////////////////////////////////////////////////////////////////////////////// template -void StaggeredKernels::DhopSiteGeneric(StencilImpl &st, LebesgueOrder &lo, +template +void StaggeredKernels::DhopSiteGeneric(StencilView &st, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, - SiteSpinor *buf, int LLs, int sU, - const FermionFieldView &in, FermionFieldView &out, int dag) { + SiteSpinor *buf, int sF, int sU, + const FermionFieldView &in, FermionFieldView &out, int dag) +{ const SiteSpinor *chi_p; SiteSpinor chi; SiteSpinor Uchi; @@ -89,8 +91,10 @@ void StaggeredKernels::DhopSiteGeneric(StencilImpl &st, LebesgueOrder &lo, int ptype; int skew; - for(int s=0;s::DhopSiteGeneric(StencilImpl &st, LebesgueOrder &lo, GENERIC_STENCIL_LEG(U,Ym,skew,Impl::multLinkAdd); GENERIC_STENCIL_LEG(U,Zm,skew,Impl::multLinkAdd); GENERIC_STENCIL_LEG(U,Tm,skew,Impl::multLinkAdd); + if ( Naik ) { skew=8; GENERIC_STENCIL_LEG(UUU,Xp,skew,Impl::multLinkAdd); GENERIC_STENCIL_LEG(UUU,Yp,skew,Impl::multLinkAdd); @@ -109,6 +114,7 @@ void StaggeredKernels::DhopSiteGeneric(StencilImpl &st, LebesgueOrder &lo, GENERIC_STENCIL_LEG(UUU,Ym,skew,Impl::multLinkAdd); GENERIC_STENCIL_LEG(UUU,Zm,skew,Impl::multLinkAdd); GENERIC_STENCIL_LEG(UUU,Tm,skew,Impl::multLinkAdd); + } if ( dag ) { Uchi = - Uchi; } @@ -120,9 +126,10 @@ void StaggeredKernels::DhopSiteGeneric(StencilImpl &st, LebesgueOrder &lo, // Only contributions from interior of our node /////////////////////////////////////////////////// template -void StaggeredKernels::DhopSiteGenericInt(StencilImpl &st, LebesgueOrder &lo, +template +void StaggeredKernels::DhopSiteGenericInt(StencilView &st, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, - SiteSpinor *buf, int LLs, int sU, + SiteSpinor *buf, int sF, int sU, const FermionFieldView &in, FermionFieldView &out,int dag) { const SiteSpinor *chi_p; SiteSpinor chi; @@ -131,8 +138,9 @@ void StaggeredKernels::DhopSiteGenericInt(StencilImpl &st, LebesgueOrder & int ptype; int skew ; - for(int s=0;s::DhopSiteGenericInt(StencilImpl &st, LebesgueOrder & GENERIC_STENCIL_LEG_INT(U,Ym,skew,Impl::multLinkAdd); GENERIC_STENCIL_LEG_INT(U,Zm,skew,Impl::multLinkAdd); GENERIC_STENCIL_LEG_INT(U,Tm,skew,Impl::multLinkAdd); + if ( Naik ) { skew=8; GENERIC_STENCIL_LEG_INT(UUU,Xp,skew,Impl::multLinkAdd); GENERIC_STENCIL_LEG_INT(UUU,Yp,skew,Impl::multLinkAdd); @@ -152,6 +161,7 @@ void StaggeredKernels::DhopSiteGenericInt(StencilImpl &st, LebesgueOrder & GENERIC_STENCIL_LEG_INT(UUU,Ym,skew,Impl::multLinkAdd); GENERIC_STENCIL_LEG_INT(UUU,Zm,skew,Impl::multLinkAdd); GENERIC_STENCIL_LEG_INT(UUU,Tm,skew,Impl::multLinkAdd); + } if ( dag ) { Uchi = - Uchi; } @@ -164,9 +174,10 @@ void StaggeredKernels::DhopSiteGenericInt(StencilImpl &st, LebesgueOrder & // Only contributions from exterior of our node /////////////////////////////////////////////////// template -void StaggeredKernels::DhopSiteGenericExt(StencilImpl &st, LebesgueOrder &lo, +template +void StaggeredKernels::DhopSiteGenericExt(StencilView &st, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, - SiteSpinor *buf, int LLs, int sU, + SiteSpinor *buf, int sF, int sU, const FermionFieldView &in, FermionFieldView &out,int dag) { const SiteSpinor *chi_p; // SiteSpinor chi; @@ -176,8 +187,9 @@ void StaggeredKernels::DhopSiteGenericExt(StencilImpl &st, LebesgueOrder & int nmu=0; int skew ; - for(int s=0;s::DhopSiteGenericExt(StencilImpl &st, LebesgueOrder & GENERIC_STENCIL_LEG_EXT(U,Ym,skew,Impl::multLinkAdd); GENERIC_STENCIL_LEG_EXT(U,Zm,skew,Impl::multLinkAdd); GENERIC_STENCIL_LEG_EXT(U,Tm,skew,Impl::multLinkAdd); + if ( Naik ) { skew=8; GENERIC_STENCIL_LEG_EXT(UUU,Xp,skew,Impl::multLinkAdd); GENERIC_STENCIL_LEG_EXT(UUU,Yp,skew,Impl::multLinkAdd); @@ -197,7 +210,7 @@ void StaggeredKernels::DhopSiteGenericExt(StencilImpl &st, LebesgueOrder & GENERIC_STENCIL_LEG_EXT(UUU,Ym,skew,Impl::multLinkAdd); GENERIC_STENCIL_LEG_EXT(UUU,Zm,skew,Impl::multLinkAdd); GENERIC_STENCIL_LEG_EXT(UUU,Tm,skew,Impl::multLinkAdd); - + } if ( nmu ) { if ( dag ) { out[sF] = out[sF] - Uchi; @@ -211,72 +224,9 @@ void StaggeredKernels::DhopSiteGenericExt(StencilImpl &st, LebesgueOrder & //////////////////////////////////////////////////////////////////////////////////// // Driving / wrapping routine to select right kernel //////////////////////////////////////////////////////////////////////////////////// - template -void StaggeredKernels::DhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, - SiteSpinor *buf, int LLs, int sU, - const FermionFieldView &in, FermionFieldView &out, - int interior,int exterior) -{ - int dag=1; - DhopSite(st,lo,U,UUU,buf,LLs,sU,in,out,dag,interior,exterior); -}; - -template -void StaggeredKernels::DhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, - SiteSpinor *buf, int LLs, int sU, - const FermionFieldView &in, FermionFieldView &out, - int interior,int exterior) -{ - int dag=0; - DhopSite(st,lo,U,UUU,buf,LLs,sU,in,out,dag,interior,exterior); -}; - -template -void StaggeredKernels::DhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, - SiteSpinor *buf, int LLs, - int sU, const FermionFieldView &in, FermionFieldView &out, - int dag,int interior,int exterior) -{ - switch(Opt) { -#ifdef AVX512 - case OptInlineAsm: - if ( interior && exterior ) { - DhopSiteAsm(st,lo,U,UUU,buf,LLs,sU,in,out,dag); - } else { - std::cout << GridLogError << "Cannot overlap comms and compute with Staggered assembly"<::DhopDirKernel( StencilImpl &st, DoubledGaugeFieldVi assert(0); } +#define KERNEL_CALLNB(A,improved) \ + const uint64_t NN = Nsite*Ls; \ + accelerator_forNB( ss, NN, Simd::Nsimd(), { \ + int sF = ss; \ + int sU = ss/Ls; \ + ThisKernel:: template A(st_v,U_v,UUU_v,buf,sF,sU,in_v,out_v,dag); \ + }); + +#define KERNEL_CALL(A,improved) KERNEL_CALLNB(A,improved); accelerator_barrier(); + +#define ASM_CALL(A) \ + const uint64_t NN = Nsite*Ls; \ + thread_for( ss, NN, { \ + int sF = ss; \ + int sU = ss/Ls; \ + ThisKernel::A(st_v,U_v,UUU_v,buf,sF,sU,in_v,out_v,dag); \ + }); + +template +void StaggeredKernels::DhopImproved(StencilImpl &st, LebesgueOrder &lo, + DoubledGaugeField &U, DoubledGaugeField &UUU, + const FermionField &in, FermionField &out, int dag, int interior,int exterior) +{ + GridBase *FGrid=in.Grid(); + GridBase *UGrid=U.Grid(); + typedef StaggeredKernels ThisKernel; + autoView( UUU_v , UUU, AcceleratorRead); + autoView( U_v , U, AcceleratorRead); + autoView( in_v , in, AcceleratorRead); + autoView( out_v , out, AcceleratorWrite); + autoView( st_v , st, AcceleratorRead); + SiteSpinor * buf = st.CommBuf(); + + int Ls=1; + if(FGrid->Nd()==UGrid->Nd()+1){ + Ls = FGrid->_rdimensions[0]; + } + int Nsite = UGrid->oSites(); + + if( interior && exterior ) { + if (Opt == OptGeneric ) { KERNEL_CALL(DhopSiteGeneric,1); return;} +#ifndef GRID_CUDA + if (Opt == OptHandUnroll ) { KERNEL_CALL(DhopSiteHand,1); return;} + if (Opt == OptInlineAsm ) { ASM_CALL(DhopSiteAsm); return;} +#endif + } else if( interior ) { + if (Opt == OptGeneric ) { KERNEL_CALL(DhopSiteGenericInt,1); return;} +#ifndef GRID_CUDA + if (Opt == OptHandUnroll ) { KERNEL_CALL(DhopSiteHandInt,1); return;} +#endif + } else if( exterior ) { + if (Opt == OptGeneric ) { KERNEL_CALL(DhopSiteGenericExt,1); return;} +#ifndef GRID_CUDA + if (Opt == OptHandUnroll ) { KERNEL_CALL(DhopSiteHandExt,1); return;} +#endif + } + assert(0 && " Kernel optimisation case not covered "); +} +template +void StaggeredKernels::DhopNaive(StencilImpl &st, LebesgueOrder &lo, + DoubledGaugeField &U, + const FermionField &in, FermionField &out, int dag, int interior,int exterior) +{ + GridBase *FGrid=in.Grid(); + GridBase *UGrid=U.Grid(); + typedef StaggeredKernels ThisKernel; + autoView( UUU_v , U, AcceleratorRead); + autoView( U_v , U, AcceleratorRead); + autoView( in_v , in, AcceleratorRead); + autoView( out_v , out, AcceleratorWrite); + autoView( st_v , st, AcceleratorRead); + SiteSpinor * buf = st.CommBuf(); + + int Ls=1; + if(FGrid->Nd()==UGrid->Nd()+1){ + Ls = FGrid->_rdimensions[0]; + } + int Nsite = UGrid->oSites(); + + if( interior && exterior ) { + if (Opt == OptGeneric ) { KERNEL_CALL(DhopSiteGeneric,0); return;} +#ifndef GRID_CUDA + if (Opt == OptHandUnroll ) { KERNEL_CALL(DhopSiteHand,0); return;} +#endif + } else if( interior ) { + if (Opt == OptGeneric ) { KERNEL_CALL(DhopSiteGenericInt,0); return;} +#ifndef GRID_CUDA + if (Opt == OptHandUnroll ) { KERNEL_CALL(DhopSiteHandInt,0); return;} +#endif + } else if( exterior ) { + if (Opt == OptGeneric ) { KERNEL_CALL(DhopSiteGenericExt,0); return;} +#ifndef GRID_CUDA + if (Opt == OptHandUnroll ) { KERNEL_CALL(DhopSiteHandExt,0); return;} +#endif + } +} + + +#undef KERNEL_CALLNB +#undef KERNEL_CALL +#undef ASM_CALL + NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/implementation/WilsonCloverFermionImplementation.h b/Grid/qcd/action/fermion/implementation/WilsonCloverFermionImplementation.h index 5744d3bb..df1bce7c 100644 --- a/Grid/qcd/action/fermion/implementation/WilsonCloverFermionImplementation.h +++ b/Grid/qcd/action/fermion/implementation/WilsonCloverFermionImplementation.h @@ -35,7 +35,7 @@ NAMESPACE_BEGIN(Grid); // *NOT* EO template -RealD WilsonCloverFermion::M(const FermionField &in, FermionField &out) +void WilsonCloverFermion::M(const FermionField &in, FermionField &out) { FermionField temp(out.Grid()); @@ -47,11 +47,10 @@ RealD WilsonCloverFermion::M(const FermionField &in, FermionField &out) Mooee(in, temp); out += temp; - return norm2(out); } template -RealD WilsonCloverFermion::Mdag(const FermionField &in, FermionField &out) +void WilsonCloverFermion::Mdag(const FermionField &in, FermionField &out) { FermionField temp(out.Grid()); @@ -63,7 +62,6 @@ RealD WilsonCloverFermion::Mdag(const FermionField &in, FermionField &out) MooeeDag(in, temp); out += temp; - return norm2(out); } template @@ -100,46 +98,49 @@ void WilsonCloverFermion::ImportGauge(const GaugeField &_Umu) Coordinate lcoor; typename SiteCloverType::scalar_object Qx = Zero(), Qxinv = Zero(); - for (int site = 0; site < lvol; site++) { - grid->LocalIndexToLocalCoor(site, lcoor); - EigenCloverOp = Eigen::MatrixXcd::Zero(Ns * DimRep, Ns * DimRep); - peekLocalSite(Qx, CloverTerm, lcoor); - Qxinv = Zero(); - //if (csw!=0){ - for (int j = 0; j < Ns; j++) - for (int k = 0; k < Ns; k++) - for (int a = 0; a < DimRep; a++) - for (int b = 0; b < DimRep; b++){ - auto zz = Qx()(j, k)(a, b); - EigenCloverOp(a + j * DimRep, b + k * DimRep) = std::complex(zz); - } - // if (site==0) std::cout << "site =" << site << "\n" << EigenCloverOp << std::endl; - - EigenInvCloverOp = EigenCloverOp.inverse(); - //std::cout << EigenInvCloverOp << std::endl; - for (int j = 0; j < Ns; j++) - for (int k = 0; k < Ns; k++) - for (int a = 0; a < DimRep; a++) - for (int b = 0; b < DimRep; b++) - Qxinv()(j, k)(a, b) = EigenInvCloverOp(a + j * DimRep, b + k * DimRep); - // if (site==0) std::cout << "site =" << site << "\n" << EigenInvCloverOp << std::endl; - // } - pokeLocalSite(Qxinv, CloverTermInv, lcoor); + autoView(CTv,CloverTerm,CpuRead); + autoView(CTIv,CloverTermInv,CpuWrite); + for (int site = 0; site < lvol; site++) { + grid->LocalIndexToLocalCoor(site, lcoor); + EigenCloverOp = Eigen::MatrixXcd::Zero(Ns * DimRep, Ns * DimRep); + peekLocalSite(Qx, CTv, lcoor); + Qxinv = Zero(); + //if (csw!=0){ + for (int j = 0; j < Ns; j++) + for (int k = 0; k < Ns; k++) + for (int a = 0; a < DimRep; a++) + for (int b = 0; b < DimRep; b++){ + auto zz = Qx()(j, k)(a, b); + EigenCloverOp(a + j * DimRep, b + k * DimRep) = std::complex(zz); + } + // if (site==0) std::cout << "site =" << site << "\n" << EigenCloverOp << std::endl; + + EigenInvCloverOp = EigenCloverOp.inverse(); + //std::cout << EigenInvCloverOp << std::endl; + for (int j = 0; j < Ns; j++) + for (int k = 0; k < Ns; k++) + for (int a = 0; a < DimRep; a++) + for (int b = 0; b < DimRep; b++) + Qxinv()(j, k)(a, b) = EigenInvCloverOp(a + j * DimRep, b + k * DimRep); + // if (site==0) std::cout << "site =" << site << "\n" << EigenInvCloverOp << std::endl; + // } + pokeLocalSite(Qxinv, CTIv, lcoor); + } } // Separate the even and odd parts pickCheckerboard(Even, CloverTermEven, CloverTerm); pickCheckerboard(Odd, CloverTermOdd, CloverTerm); - pickCheckerboard(Even, CloverTermDagEven, adj(CloverTerm)); - pickCheckerboard(Odd, CloverTermDagOdd, adj(CloverTerm)); + pickCheckerboard(Even, CloverTermDagEven, closure(adj(CloverTerm))); + pickCheckerboard(Odd, CloverTermDagOdd, closure(adj(CloverTerm))); pickCheckerboard(Even, CloverTermInvEven, CloverTermInv); pickCheckerboard(Odd, CloverTermInvOdd, CloverTermInv); - pickCheckerboard(Even, CloverTermInvDagEven, adj(CloverTermInv)); - pickCheckerboard(Odd, CloverTermInvDagOdd, adj(CloverTermInv)); + pickCheckerboard(Even, CloverTermInvDagEven, closure(adj(CloverTermInv))); + pickCheckerboard(Odd, CloverTermInvDagOdd, closure(adj(CloverTermInv))); } template diff --git a/Grid/qcd/action/fermion/implementation/WilsonFermion5DImplementation.h b/Grid/qcd/action/fermion/implementation/WilsonFermion5DImplementation.h index 613eaa7b..2cc308cc 100644 --- a/Grid/qcd/action/fermion/implementation/WilsonFermion5DImplementation.h +++ b/Grid/qcd/action/fermion/implementation/WilsonFermion5DImplementation.h @@ -580,16 +580,21 @@ void WilsonFermion5D::MomentumSpacePropagatorHt_5d(FermionField &out,const cosha = (one + W*W + sk) / (abs(W)*2.0); // FIXME Need a Lattice acosh - for(int idx=0;idx<_grid->lSites();idx++){ - Coordinate lcoor(Nd); - Tcomplex cc; - // RealD sgn; - _grid->LocalIndexToLocalCoor(idx,lcoor); - peekLocalSite(cc,cosha,lcoor); - assert((double)real(cc)>=1.0); - assert(fabs((double)imag(cc))<=1.0e-15); - cc = ScalComplex(::acosh(real(cc)),0.0); - pokeLocalSite(cc,a,lcoor); + + { + autoView(cosha_v,cosha,CpuRead); + autoView(a_v,a,CpuWrite); + for(int idx=0;idx<_grid->lSites();idx++){ + Coordinate lcoor(Nd); + Tcomplex cc; + // RealD sgn; + _grid->LocalIndexToLocalCoor(idx,lcoor); + peekLocalSite(cc,cosha_v,lcoor); + assert((double)real(cc)>=1.0); + assert(fabs((double)imag(cc))<=1.0e-15); + cc = ScalComplex(::acosh(real(cc)),0.0); + pokeLocalSite(cc,a_v,lcoor); + } } Wea = ( exp( a) * abs(W) ); @@ -775,17 +780,20 @@ void WilsonFermion5D::MomentumSpacePropagatorHt(FermionField &out,const Fe cosha = (one + W*W + sk) / (abs(W)*2.0); // FIXME Need a Lattice acosh + { + autoView(cosha_v,cosha,CpuRead); + autoView(a_v,a,CpuWrite); for(int idx=0;idx<_grid->lSites();idx++){ Coordinate lcoor(Nd); Tcomplex cc; // RealD sgn; _grid->LocalIndexToLocalCoor(idx,lcoor); - peekLocalSite(cc,cosha,lcoor); + peekLocalSite(cc,cosha_v,lcoor); assert((double)real(cc)>=1.0); assert(fabs((double)imag(cc))<=1.0e-15); cc = ScalComplex(::acosh(real(cc)),0.0); - pokeLocalSite(cc,a,lcoor); - } + pokeLocalSite(cc,a_v,lcoor); + }} Wea = ( exp( a) * abs(W) ); Wema= ( exp(-a) * abs(W) ); @@ -861,7 +869,6 @@ void WilsonFermion5D::MomentumSpacePropagatorHw(FermionField &out,const Fe * Conserved current utilities for Wilson fermions, for contracting propagators * to make a conserved current sink or inserting the conserved current * sequentially. - ******************************************************************************/ // Helper macro to reverse Simd vector. Fixme: slow, generic implementation. #define REVERSE_LS(qSite, qSiteRev, Nsimd) \ @@ -877,220 +884,10 @@ void WilsonFermion5D::MomentumSpacePropagatorHw(FermionField &out,const Fe merge(qSiteRev, qSiteVec); \ } -// psi = chiralProjectPlus(Result_s[Ls/2-1]); -// psi+= chiralProjectMinus(Result_s[Ls/2]); -// PJ5q+=localInnerProduct(psi,psi); - -template -Lattice spProj5p(const Lattice & in) -{ - GridBase *grid=in.Grid(); - Gamma G5(Gamma::Algebra::Gamma5); - Lattice ret(grid); - auto ret_v = ret.View(); - auto in_v = in.View(); - thread_for(ss,grid->oSites(),{ - ret_v[ss] = in_v[ss] + G5*in_v[ss]; - }); - return ret; -} -template -Lattice spProj5m(const Lattice & in) -{ - Gamma G5(Gamma::Algebra::Gamma5); - GridBase *grid=in.Grid(); - Lattice ret(grid); - auto ret_v = ret.View(); - auto in_v = in.View(); - thread_for(ss,grid->oSites(),{ - ret_v[ss] = in_v[ss] - G5*in_v[ss]; - }); - return ret; -} - -template -void WilsonFermion5D::ContractJ5q(FermionField &q_in,ComplexField &J5q) -{ - conformable(GaugeGrid(), J5q.Grid()); - conformable(q_in.Grid(), FermionGrid()); - - // 4d field - int Ls = this->Ls; - FermionField psi(GaugeGrid()); - FermionField p_plus (GaugeGrid()); - FermionField p_minus(GaugeGrid()); - FermionField p(GaugeGrid()); - - ExtractSlice(p_plus , q_in, Ls/2 , 0); - ExtractSlice(p_minus, q_in, Ls/2-1 , 0); - p_plus = spProj5p(p_plus ); - p_minus= spProj5m(p_minus); - p=p_plus+p_minus; - J5q = localInnerProduct(p,p); -} - -template -void WilsonFermion5D::ContractJ5q(PropagatorField &q_in,ComplexField &J5q) -{ - conformable(GaugeGrid(), J5q.Grid()); - conformable(q_in.Grid(), FermionGrid()); - - // 4d field - int Ls = this->Ls; - PropagatorField psi(GaugeGrid()); - PropagatorField p_plus (GaugeGrid()); - PropagatorField p_minus(GaugeGrid()); - PropagatorField p(GaugeGrid()); - - ExtractSlice(p_plus , q_in, Ls/2 , 0); - ExtractSlice(p_minus, q_in, Ls/2-1 , 0); - p_plus = spProj5p(p_plus ); - p_minus= spProj5m(p_minus); - p=p_plus+p_minus; - J5q = localInnerProduct(p,p); -} - -template -void WilsonFermion5D::ContractConservedCurrent(PropagatorField &q_in_1, - PropagatorField &q_in_2, - PropagatorField &q_out, - Current curr_type, - unsigned int mu) -{ - conformable(q_in_1.Grid(), FermionGrid()); - conformable(q_in_1.Grid(), q_in_2.Grid()); - conformable(_FourDimGrid, q_out.Grid()); - - PropagatorField tmp1(FermionGrid()), tmp2(FermionGrid()); - unsigned int LLs = q_in_1.Grid()->_rdimensions[0]; - q_out = Zero(); - - // Forward, need q1(x + mu, s), q2(x, Ls - 1 - s). Backward, need q1(x, s), - // q2(x + mu, Ls - 1 - s). 5D lattice so shift 4D coordinate mu by one. - tmp1 = Cshift(q_in_1, mu + 1, 1); - tmp2 = Cshift(q_in_2, mu + 1, 1); - auto q_in_1_v = q_in_1.View(); - auto q_in_2_v = q_in_2.View(); - auto tmp1_v = tmp1.View(); - auto tmp2_v = tmp2.View(); - auto q_out_v = q_out.View(); - auto Umu_v = Umu.View(); - thread_for(sU, Umu.Grid()->oSites(),{ - - unsigned int sF1 = sU * LLs; - unsigned int sF2 = (sU + 1) * LLs - 1; - - for (unsigned int s = 0; s < LLs; ++s) - { - bool axial_sign = ((curr_type == Current::Axial) && \ - (s < (LLs / 2))); - SitePropagator qSite2, qmuSite2; - - // If vectorised in 5th dimension, reverse q2 vector to match up - // sites correctly. - if (Impl::LsVectorised) - { - REVERSE_LS(q_in_2_v[sF2], qSite2, Ls / LLs); - REVERSE_LS(tmp2_v[sF2], qmuSite2, Ls / LLs); - } - else - { - qSite2 = q_in_2_v[sF2]; - qmuSite2 = tmp2_v[sF2]; - } - Kernels::ContractConservedCurrentSiteFwd(tmp1_v[sF1], - qSite2, - q_out_v[sU], - Umu_v, sU, mu, axial_sign); - Kernels::ContractConservedCurrentSiteBwd(q_in_1_v[sF1], - qmuSite2, - q_out_v[sU], - Umu_v, sU, mu, axial_sign); - sF1++; - sF2--; - } - }); -} + ******************************************************************************/ -template -void WilsonFermion5D::SeqConservedCurrent(PropagatorField &q_in, - PropagatorField &q_out, - Current curr_type, - unsigned int mu, - unsigned int tmin, - unsigned int tmax, - ComplexField &lattice_cmplx) -{ - conformable(q_in.Grid(), FermionGrid()); - conformable(q_in.Grid(), q_out.Grid()); - PropagatorField tmp(GaugeGrid()),tmp2(GaugeGrid()); - unsigned int tshift = (mu == Tp) ? 1 : 0; - unsigned int LLs = q_in.Grid()->_rdimensions[0]; - unsigned int LLt = GridDefaultLatt()[Tp]; - q_out = Zero(); - LatticeInteger coords(_FourDimGrid); - LatticeCoordinate(coords, Tp); - - auto q_out_v = q_out.View(); - auto tmp2_v = tmp2.View(); - auto coords_v= coords.View(); - auto Umu_v = Umu.View(); - for (unsigned int s = 0; s < LLs; ++s) - { - bool axial_sign = ((curr_type == Current::Axial) && (s < (LLs / 2))); - bool tadpole_sign = (curr_type == Current::Tadpole); - bool switch_sgn = tadpole_sign || axial_sign; - - - //forward direction: Need q(x + mu, s)*A(x) - ExtractSlice(tmp2, q_in, s, 0); //q(x,s) - tmp = Cshift(tmp2, mu, 1); //q(x+mu,s) - tmp2 = tmp*lattice_cmplx; //q(x+mu,s)*A(x) - - thread_for(sU, Umu.Grid()->oSites(),{ - // Compute the sequential conserved current insertion only if our simd - // object contains a timeslice we need. - vPredicate t_mask; - t_mask() = ((coords_v[sU] >= tmin) && (coords_v[sU] <= tmax)); - Integer timeSlices = Reduce(t_mask()); - - if (timeSlices > 0) - { - unsigned int sF = sU * LLs + s; - Kernels::SeqConservedCurrentSiteFwd(tmp2_v[sU], - q_out_v[sF], Umu_v, sU, - mu, t_mask, switch_sgn); - } - - }); - - //backward direction: Need q(x - mu, s)*A(x-mu) - ExtractSlice(tmp2, q_in, s, 0); //q(x,s) - tmp = lattice_cmplx*tmp2; //q(x,s)*A(x) - tmp2 = Cshift(tmp, mu, -1); //q(x-mu,s)*A(x-mu,s) - - thread_for(sU, Umu.Grid()->oSites(), - { - vPredicate t_mask; - t_mask()= ((coords_v[sU] >= (tmin + tshift)) && (coords_v[sU] <= (tmax + tshift))); - - //if tmax = LLt-1 (last timeslice) include timeslice 0 if the time is shifted (mu=3) - unsigned int t0 = 0; - if((tmax==LLt-1) && (tshift==1)) t_mask() = (t_mask() || (coords_v[sU] == t0 )); - - Integer timeSlices = Reduce(t_mask()); - - if (timeSlices > 0) { - unsigned int sF = sU * LLs + s; - Kernels::SeqConservedCurrentSiteBwd(tmp2_v[sU], - q_out_v[sF], Umu_v, sU, - mu, t_mask, axial_sign); - } - }); - } -} NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/implementation/WilsonFermionImplementation.h b/Grid/qcd/action/fermion/implementation/WilsonFermionImplementation.h index cb852a63..4977ea68 100644 --- a/Grid/qcd/action/fermion/implementation/WilsonFermionImplementation.h +++ b/Grid/qcd/action/fermion/implementation/WilsonFermionImplementation.h @@ -67,7 +67,12 @@ WilsonFermion::WilsonFermion(GaugeField &_Umu, GridCartesian &Fgrid, diag_mass = 4.0 + mass; } - + int vol4; + vol4=Fgrid.oSites(); + Stencil.BuildSurfaceList(1,vol4); + vol4=Hgrid.oSites(); + StencilEven.BuildSurfaceList(1,vol4); + StencilOdd.BuildSurfaceList(1,vol4); } template @@ -187,21 +192,24 @@ void WilsonFermion::ImportGauge(const GaugeField &_Umu) ///////////////////////////// template -RealD WilsonFermion::M(const FermionField &in, FermionField &out) { +void WilsonFermion::M(const FermionField &in, FermionField &out) +{ out.Checkerboard() = in.Checkerboard(); Dhop(in, out, DaggerNo); - return axpy_norm(out, diag_mass, in, out); + axpy(out, diag_mass, in, out); } template -RealD WilsonFermion::Mdag(const FermionField &in, FermionField &out) { +void WilsonFermion::Mdag(const FermionField &in, FermionField &out) +{ out.Checkerboard() = in.Checkerboard(); Dhop(in, out, DaggerYes); - return axpy_norm(out, diag_mass, in, out); + axpy(out, diag_mass, in, out); } template -void WilsonFermion::Meooe(const FermionField &in, FermionField &out) { +void WilsonFermion::Meooe(const FermionField &in, FermionField &out) +{ if (in.Checkerboard() == Odd) { DhopEO(in, out, DaggerNo); } else { @@ -210,7 +218,8 @@ void WilsonFermion::Meooe(const FermionField &in, FermionField &out) { } template -void WilsonFermion::MeooeDag(const FermionField &in, FermionField &out) { +void WilsonFermion::MeooeDag(const FermionField &in, FermionField &out) +{ if (in.Checkerboard() == Odd) { DhopEO(in, out, DaggerYes); } else { @@ -219,26 +228,30 @@ void WilsonFermion::MeooeDag(const FermionField &in, FermionField &out) { } template -void WilsonFermion::Mooee(const FermionField &in, FermionField &out) { +void WilsonFermion::Mooee(const FermionField &in, FermionField &out) +{ out.Checkerboard() = in.Checkerboard(); typename FermionField::scalar_type scal(diag_mass); out = scal * in; } template -void WilsonFermion::MooeeDag(const FermionField &in, FermionField &out) { +void WilsonFermion::MooeeDag(const FermionField &in, FermionField &out) +{ out.Checkerboard() = in.Checkerboard(); Mooee(in, out); } template -void WilsonFermion::MooeeInv(const FermionField &in, FermionField &out) { +void WilsonFermion::MooeeInv(const FermionField &in, FermionField &out) +{ out.Checkerboard() = in.Checkerboard(); out = (1.0/(diag_mass))*in; } template -void WilsonFermion::MooeeInvDag(const FermionField &in, FermionField &out) { +void WilsonFermion::MooeeInvDag(const FermionField &in, FermionField &out) +{ out.Checkerboard() = in.Checkerboard(); MooeeInv(in,out); } @@ -341,7 +354,8 @@ void WilsonFermion::DerivInternal(StencilImpl &st, DoubledGaugeField &U, } template -void WilsonFermion::DhopDeriv(GaugeField &mat, const FermionField &U, const FermionField &V, int dag) { +void WilsonFermion::DhopDeriv(GaugeField &mat, const FermionField &U, const FermionField &V, int dag) +{ conformable(U.Grid(), _grid); conformable(U.Grid(), V.Grid()); conformable(U.Grid(), mat.Grid()); @@ -352,7 +366,8 @@ void WilsonFermion::DhopDeriv(GaugeField &mat, const FermionField &U, cons } template -void WilsonFermion::DhopDerivOE(GaugeField &mat, const FermionField &U, const FermionField &V, int dag) { +void WilsonFermion::DhopDerivOE(GaugeField &mat, const FermionField &U, const FermionField &V, int dag) +{ conformable(U.Grid(), _cbgrid); conformable(U.Grid(), V.Grid()); //conformable(U.Grid(), mat.Grid()); not general, leaving as a comment (Guido) @@ -366,7 +381,8 @@ void WilsonFermion::DhopDerivOE(GaugeField &mat, const FermionField &U, co } template -void WilsonFermion::DhopDerivEO(GaugeField &mat, const FermionField &U, const FermionField &V, int dag) { +void WilsonFermion::DhopDerivEO(GaugeField &mat, const FermionField &U, const FermionField &V, int dag) +{ conformable(U.Grid(), _cbgrid); conformable(U.Grid(), V.Grid()); //conformable(U.Grid(), mat.Grid()); @@ -379,8 +395,8 @@ void WilsonFermion::DhopDerivEO(GaugeField &mat, const FermionField &U, co } template -void WilsonFermion::Dhop(const FermionField &in, FermionField &out, int dag) { - DhopCalls+=2; +void WilsonFermion::Dhop(const FermionField &in, FermionField &out, int dag) +{ conformable(in.Grid(), _grid); // verifies full grid conformable(in.Grid(), out.Grid()); @@ -390,8 +406,8 @@ void WilsonFermion::Dhop(const FermionField &in, FermionField &out, int da } template -void WilsonFermion::DhopOE(const FermionField &in, FermionField &out, int dag) { - DhopCalls+=1; +void WilsonFermion::DhopOE(const FermionField &in, FermionField &out, int dag) +{ conformable(in.Grid(), _cbgrid); // verifies half grid conformable(in.Grid(), out.Grid()); // drops the cb check @@ -402,8 +418,8 @@ void WilsonFermion::DhopOE(const FermionField &in, FermionField &out, int } template -void WilsonFermion::DhopEO(const FermionField &in, FermionField &out,int dag) { - DhopCalls+=1; +void WilsonFermion::DhopEO(const FermionField &in, FermionField &out,int dag) +{ conformable(in.Grid(), _cbgrid); // verifies half grid conformable(in.Grid(), out.Grid()); // drops the cb check @@ -482,7 +498,8 @@ template void WilsonFermion::DhopInternalOverlappedComms(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, const FermionField &in, - FermionField &out, int dag) { + FermionField &out, int dag) +{ assert((dag == DaggerNo) || (dag == DaggerYes)); Compressor compressor(dag); @@ -547,7 +564,8 @@ template void WilsonFermion::DhopInternalSerial(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, const FermionField &in, - FermionField &out, int dag) { + FermionField &out, int dag) +{ assert((dag == DaggerNo) || (dag == DaggerYes)); Compressor compressor(dag); DhopCommTime-=usecond(); @@ -574,6 +592,7 @@ template void WilsonFermion::ContractConservedCurrent(PropagatorField &q_in_1, PropagatorField &q_in_2, PropagatorField &q_out, + PropagatorField &src, Current curr_type, unsigned int mu) { @@ -581,35 +600,14 @@ void WilsonFermion::ContractConservedCurrent(PropagatorField &q_in_1, conformable(_grid, q_in_1.Grid()); conformable(_grid, q_in_2.Grid()); conformable(_grid, q_out.Grid()); - PropagatorField tmp1(_grid), tmp2(_grid); - q_out = Zero(); - - // Forward, need q1(x + mu), q2(x). Backward, need q1(x), q2(x + mu). - // Inefficient comms method but not performance critical. - tmp1 = Cshift(q_in_1, mu, 1); - tmp2 = Cshift(q_in_2, mu, 1); - auto tmp1_v = tmp1.View(); - auto tmp2_v = tmp2.View(); - auto q_in_1_v=q_in_1.View(); - auto q_in_2_v=q_in_2.View(); - auto q_out_v = q_out.View(); - auto Umu_v = Umu.View(); - thread_for(sU, Umu.Grid()->oSites(),{ - Kernels::ContractConservedCurrentSiteFwd(tmp1_v[sU], - q_in_2_v[sU], - q_out_v[sU], - Umu_v, sU, mu); - Kernels::ContractConservedCurrentSiteBwd(q_in_1_v[sU], - tmp2_v[sU], - q_out_v[sU], - Umu_v, sU, mu); - }); + assert(0); } template void WilsonFermion::SeqConservedCurrent(PropagatorField &q_in, PropagatorField &q_out, + PropagatorField &src, Current curr_type, unsigned int mu, unsigned int tmin, @@ -618,59 +616,7 @@ void WilsonFermion::SeqConservedCurrent(PropagatorField &q_in, { conformable(_grid, q_in.Grid()); conformable(_grid, q_out.Grid()); - - // Lattice> ph(_grid), coor(_grid); - Complex i(0.0,1.0); - PropagatorField tmpFwd(_grid), tmpBwd(_grid), tmp(_grid); - unsigned int tshift = (mu == Tp) ? 1 : 0; - unsigned int LLt = GridDefaultLatt()[Tp]; - - q_out = Zero(); - LatticeInteger coords(_grid); - LatticeCoordinate(coords, Tp); - - // Need q(x + mu) and q(x - mu). - tmp = Cshift(q_in, mu, 1); - tmpFwd = tmp*lattice_cmplx; - tmp = lattice_cmplx*q_in; - tmpBwd = Cshift(tmp, mu, -1); - - auto coords_v = coords.View(); - auto tmpFwd_v = tmpFwd.View(); - auto tmpBwd_v = tmpBwd.View(); - auto Umu_v = Umu.View(); - auto q_out_v = q_out.View(); - - thread_for(sU, Umu.Grid()->oSites(), { - - // Compute the sequential conserved current insertion only if our simd - // object contains a timeslice we need. - vPredicate t_mask; - t_mask() = ((coords_v[sU] >= tmin) && (coords_v[sU] <= tmax)); - Integer timeSlices = Reduce(t_mask()); - - if (timeSlices > 0) { - Kernels::SeqConservedCurrentSiteFwd(tmpFwd_v[sU], - q_out_v[sU], - Umu_v, sU, mu, t_mask); - } - - // Repeat for backward direction. - t_mask() = ((coords_v[sU] >= (tmin + tshift)) && - (coords_v[sU] <= (tmax + tshift))); - - //if tmax = LLt-1 (last timeslice) include timeslice 0 if the time is shifted (mu=3) - unsigned int t0 = 0; - if((tmax==LLt-1) && (tshift==1)) t_mask() = (t_mask() || (coords_v[sU] == t0 )); - - timeSlices = Reduce(t_mask()); - - if (timeSlices > 0) { - Kernels::SeqConservedCurrentSiteBwd(tmpBwd_v[sU], - q_out_v[sU], - Umu_v, sU, mu, t_mask); - } - }); + assert(0); } NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/implementation/WilsonKernelsImplementation.h b/Grid/qcd/action/fermion/implementation/WilsonKernelsImplementation.h index 81216e03..c2b62416 100644 --- a/Grid/qcd/action/fermion/implementation/WilsonKernelsImplementation.h +++ b/Grid/qcd/action/fermion/implementation/WilsonKernelsImplementation.h @@ -39,19 +39,21 @@ NAMESPACE_BEGIN(Grid); // Generic implementation; move to different file? //////////////////////////////////////////// +/* accelerator_inline void get_stencil(StencilEntry * mem, StencilEntry &chip) { -#ifdef __CUDA_ARCH__ - static_assert(sizeof(StencilEntry)==sizeof(uint4),"Unexpected Stencil Entry Size"); +#ifdef GRID_SIMT + static_assert(sizeof(StencilEntry)==sizeof(uint4),"Unexpected Stencil Entry Size"); uint4 * mem_pun = (uint4 *)mem; // force 128 bit loads uint4 * chip_pun = (uint4 *)&chip; * chip_pun = * mem_pun; -#else +#else chip = *mem; #endif return; } - +*/ + #define GENERIC_STENCIL_LEG(Dir,spProj,Recon) \ SE = st.GetEntry(ptype, Dir, sF); \ if (SE->_is_local) { \ @@ -61,10 +63,10 @@ accelerator_inline void get_stencil(StencilEntry * mem, StencilEntry &chip) } else { \ chi = coalescedRead(buf[SE->_offset],lane); \ } \ - synchronise(); \ + acceleratorSynchronise(); \ Impl::multLink(Uchi, U[sU], chi, Dir, SE, st); \ Recon(result, Uchi); - + #define GENERIC_STENCIL_LEG_INT(Dir,spProj,Recon) \ SE = st.GetEntry(ptype, Dir, sF); \ if (SE->_is_local) { \ @@ -74,12 +76,12 @@ accelerator_inline void get_stencil(StencilEntry * mem, StencilEntry &chip) } else if ( st.same_node[Dir] ) { \ chi = coalescedRead(buf[SE->_offset],lane); \ } \ - synchronise(); \ + acceleratorSynchronise(); \ if (SE->_is_local || st.same_node[Dir] ) { \ Impl::multLink(Uchi, U[sU], chi, Dir, SE, st); \ Recon(result, Uchi); \ } \ - synchronise(); + acceleratorSynchronise(); #define GENERIC_STENCIL_LEG_EXT(Dir,spProj,Recon) \ SE = st.GetEntry(ptype, Dir, sF); \ @@ -89,7 +91,7 @@ accelerator_inline void get_stencil(StencilEntry * mem, StencilEntry &chip) Recon(result, Uchi); \ nmu++; \ } \ - synchronise(); + acceleratorSynchronise(); #define GENERIC_DHOPDIR_LEG_BODY(Dir,spProj,Recon) \ if (SE->_is_local ) { \ @@ -99,9 +101,9 @@ accelerator_inline void get_stencil(StencilEntry * mem, StencilEntry &chip) } else { \ chi = coalescedRead(buf[SE->_offset],lane); \ } \ - synchronise(); \ + acceleratorSynchronise(); \ Impl::multLink(Uchi, U[sU], chi, dir, SE, st); \ - Recon(result, Uchi); + Recon(result, Uchi); #define GENERIC_DHOPDIR_LEG(Dir,spProj,Recon) \ if (gamma == Dir) { \ @@ -126,7 +128,7 @@ void WilsonKernels::GenericDhopSiteDag(StencilView &st, DoubledGaugeFieldV StencilEntry *SE; int ptype; const int Nsimd = SiteHalfSpinor::Nsimd(); - const int lane=SIMTlane(Nsimd); + const int lane=acceleratorSIMTlane(Nsimd); GENERIC_STENCIL_LEG(Xp,spProjXp,spReconXp); GENERIC_STENCIL_LEG(Yp,spProjYp,accumReconYp); GENERIC_STENCIL_LEG(Zp,spProjZp,accumReconZp); @@ -141,7 +143,7 @@ void WilsonKernels::GenericDhopSiteDag(StencilView &st, DoubledGaugeFieldV template void WilsonKernels::GenericDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, int sF, - int sU, const FermionFieldView &in, FermionFieldView &out) + int sU, const FermionFieldView &in, FermionFieldView &out) { typedef decltype(coalescedRead(buf[0])) calcHalfSpinor; typedef decltype(coalescedRead(in[0])) calcSpinor; @@ -153,7 +155,7 @@ void WilsonKernels::GenericDhopSite(StencilView &st, DoubledGaugeFieldView int ptype; const int Nsimd = SiteHalfSpinor::Nsimd(); - const int lane=SIMTlane(Nsimd); + const int lane=acceleratorSIMTlane(Nsimd); GENERIC_STENCIL_LEG(Xm,spProjXp,spReconXp); GENERIC_STENCIL_LEG(Ym,spProjYp,accumReconYp); GENERIC_STENCIL_LEG(Zm,spProjZp,accumReconZp); @@ -181,7 +183,7 @@ void WilsonKernels::GenericDhopSiteDagInt(StencilView &st, DoubledGaugeFi StencilEntry *SE; int ptype; const int Nsimd = SiteHalfSpinor::Nsimd(); - const int lane=SIMTlane(Nsimd); + const int lane=acceleratorSIMTlane(Nsimd); result=Zero(); GENERIC_STENCIL_LEG_INT(Xp,spProjXp,accumReconXp); @@ -198,12 +200,12 @@ void WilsonKernels::GenericDhopSiteDagInt(StencilView &st, DoubledGaugeFi template void WilsonKernels::GenericDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, int sF, - int sU, const FermionFieldView &in, FermionFieldView &out) + int sU, const FermionFieldView &in, FermionFieldView &out) { typedef decltype(coalescedRead(buf[0])) calcHalfSpinor; typedef decltype(coalescedRead(in[0])) calcSpinor; const int Nsimd = SiteHalfSpinor::Nsimd(); - const int lane=SIMTlane(Nsimd); + const int lane=acceleratorSIMTlane(Nsimd); calcHalfSpinor chi; // calcHalfSpinor *chi_p; @@ -239,7 +241,7 @@ void WilsonKernels::GenericDhopSiteDagExt(StencilView &st, DoubledGaugeFi int ptype; int nmu=0; const int Nsimd = SiteHalfSpinor::Nsimd(); - const int lane=SIMTlane(Nsimd); + const int lane=acceleratorSIMTlane(Nsimd); result=Zero(); GENERIC_STENCIL_LEG_EXT(Xp,spProjXp,accumReconXp); GENERIC_STENCIL_LEG_EXT(Yp,spProjYp,accumReconYp); @@ -249,7 +251,7 @@ void WilsonKernels::GenericDhopSiteDagExt(StencilView &st, DoubledGaugeFi GENERIC_STENCIL_LEG_EXT(Ym,spProjYm,accumReconYm); GENERIC_STENCIL_LEG_EXT(Zm,spProjZm,accumReconZm); GENERIC_STENCIL_LEG_EXT(Tm,spProjTm,accumReconTm); - if ( nmu ) { + if ( nmu ) { auto out_t = coalescedRead(out[sF],lane); out_t = out_t + result; coalescedWrite(out[sF],out_t,lane); @@ -259,7 +261,7 @@ void WilsonKernels::GenericDhopSiteDagExt(StencilView &st, DoubledGaugeFi template void WilsonKernels::GenericDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, int sF, - int sU, const FermionFieldView &in, FermionFieldView &out) + int sU, const FermionFieldView &in, FermionFieldView &out) { typedef decltype(coalescedRead(buf[0])) calcHalfSpinor; typedef decltype(coalescedRead(in[0])) calcSpinor; @@ -270,7 +272,7 @@ void WilsonKernels::GenericDhopSiteExt(StencilView &st, DoubledGaugeField int ptype; int nmu=0; const int Nsimd = SiteHalfSpinor::Nsimd(); - const int lane=SIMTlane(Nsimd); + const int lane=acceleratorSIMTlane(Nsimd); result=Zero(); GENERIC_STENCIL_LEG_EXT(Xm,spProjXp,accumReconXp); GENERIC_STENCIL_LEG_EXT(Ym,spProjYp,accumReconYp); @@ -280,7 +282,7 @@ void WilsonKernels::GenericDhopSiteExt(StencilView &st, DoubledGaugeField GENERIC_STENCIL_LEG_EXT(Yp,spProjYm,accumReconYm); GENERIC_STENCIL_LEG_EXT(Zp,spProjZm,accumReconZm); GENERIC_STENCIL_LEG_EXT(Tp,spProjTm,accumReconTm); - if ( nmu ) { + if ( nmu ) { auto out_t = coalescedRead(out[sF],lane); out_t = out_t + result; coalescedWrite(out[sF],out_t,lane); @@ -300,12 +302,12 @@ void WilsonKernels::GenericDhopSiteExt(StencilView &st, DoubledGaugeField StencilEntry *SE; \ int ptype; \ const int Nsimd = SiteHalfSpinor::Nsimd(); \ - const int lane=SIMTlane(Nsimd); \ + const int lane=acceleratorSIMTlane(Nsimd); \ \ SE = st.GetEntry(ptype, dir, sF); \ GENERIC_DHOPDIR_LEG_BODY(Dir,spProj,spRecon); \ coalescedWrite(out[sF], result,lane); \ - } + } DhopDirMacro(Xp,spProjXp,spReconXp); DhopDirMacro(Yp,spProjYp,spReconYp); @@ -316,9 +318,9 @@ DhopDirMacro(Ym,spProjYm,spReconYm); DhopDirMacro(Zm,spProjZm,spReconZm); DhopDirMacro(Tm,spProjTm,spReconTm); -template +template void WilsonKernels::DhopDirK( StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, int sF, - int sU, const FermionFieldView &in, FermionFieldView &out, int dir, int gamma) + int sU, const FermionFieldView &in, FermionFieldView &out, int dir, int gamma) { typedef decltype(coalescedRead(buf[0])) calcHalfSpinor; typedef decltype(coalescedRead(in[0])) calcSpinor; @@ -328,7 +330,7 @@ void WilsonKernels::DhopDirK( StencilView &st, DoubledGaugeFieldView &U,Si StencilEntry *SE; int ptype; const int Nsimd = SiteHalfSpinor::Nsimd(); - const int lane=SIMTlane(Nsimd); + const int lane=acceleratorSIMTlane(Nsimd); SE = st.GetEntry(ptype, dir, sF); GENERIC_DHOPDIR_LEG(Xp,spProjXp,spReconXp); @@ -344,54 +346,55 @@ void WilsonKernels::DhopDirK( StencilView &st, DoubledGaugeFieldView &U,Si template void WilsonKernels::DhopDirAll( StencilImpl &st, DoubledGaugeField &U,SiteHalfSpinor *buf, int Ls, - int Nsite, const FermionField &in, std::vector &out) + int Nsite, const FermionField &in, std::vector &out) { - auto U_v = U.View(); - auto in_v = in.View(); - auto st_v = st.View(); + autoView(U_v ,U,AcceleratorRead); + autoView(in_v ,in,AcceleratorRead); + autoView(st_v ,st,AcceleratorRead); - auto out_Xm = out[0].View(); - auto out_Ym = out[1].View(); - auto out_Zm = out[2].View(); - auto out_Tm = out[3].View(); - auto out_Xp = out[4].View(); - auto out_Yp = out[5].View(); - auto out_Zp = out[6].View(); - auto out_Tp = out[7].View(); - - accelerator_forNB(sss,Nsite*Ls,Simd::Nsimd(),{ - int sU=sss/Ls; - int sF =sss; - DhopDirXm(st_v,U_v,st.CommBuf(),sF,sU,in_v,out_Xm,0); - DhopDirYm(st_v,U_v,st.CommBuf(),sF,sU,in_v,out_Ym,1); - DhopDirZm(st_v,U_v,st.CommBuf(),sF,sU,in_v,out_Zm,2); - DhopDirTm(st_v,U_v,st.CommBuf(),sF,sU,in_v,out_Tm,3); - DhopDirXp(st_v,U_v,st.CommBuf(),sF,sU,in_v,out_Xp,4); - DhopDirYp(st_v,U_v,st.CommBuf(),sF,sU,in_v,out_Yp,5); - DhopDirZp(st_v,U_v,st.CommBuf(),sF,sU,in_v,out_Zp,6); - DhopDirTp(st_v,U_v,st.CommBuf(),sF,sU,in_v,out_Tp,7); + autoView(out_Xm,out[0],AcceleratorWrite); + autoView(out_Ym,out[1],AcceleratorWrite); + autoView(out_Zm,out[2],AcceleratorWrite); + autoView(out_Tm,out[3],AcceleratorWrite); + autoView(out_Xp,out[4],AcceleratorWrite); + autoView(out_Yp,out[5],AcceleratorWrite); + autoView(out_Zp,out[6],AcceleratorWrite); + autoView(out_Tp,out[7],AcceleratorWrite); + auto CBp=st.CommBuf(); + accelerator_for(sss,Nsite*Ls,Simd::Nsimd(),{ + int sU=sss/Ls; + int sF =sss; + DhopDirXm(st_v,U_v,CBp,sF,sU,in_v,out_Xm,0); + DhopDirYm(st_v,U_v,CBp,sF,sU,in_v,out_Ym,1); + DhopDirZm(st_v,U_v,CBp,sF,sU,in_v,out_Zm,2); + DhopDirTm(st_v,U_v,CBp,sF,sU,in_v,out_Tm,3); + DhopDirXp(st_v,U_v,CBp,sF,sU,in_v,out_Xp,4); + DhopDirYp(st_v,U_v,CBp,sF,sU,in_v,out_Yp,5); + DhopDirZp(st_v,U_v,CBp,sF,sU,in_v,out_Zp,6); + DhopDirTp(st_v,U_v,CBp,sF,sU,in_v,out_Tp,7); }); } template void WilsonKernels::DhopDirKernel( StencilImpl &st, DoubledGaugeField &U,SiteHalfSpinor *buf, int Ls, - int Nsite, const FermionField &in, FermionField &out, int dirdisp, int gamma) + int Nsite, const FermionField &in, FermionField &out, int dirdisp, int gamma) { assert(dirdisp<=7); assert(dirdisp>=0); - auto U_v = U.View(); - auto in_v = in.View(); - auto out_v = out.View(); - auto st_v = st.View(); + autoView(U_v ,U ,AcceleratorRead); + autoView(in_v ,in ,AcceleratorRead); + autoView(out_v,out,AcceleratorWrite); + autoView(st_v ,st ,AcceleratorRead); + auto CBp=st.CommBuf(); #define LoopBody(Dir) \ - case Dir : \ - accelerator_forNB(ss,Nsite,Simd::Nsimd(),{ \ + case Dir : \ + accelerator_for(ss,Nsite,Simd::Nsimd(),{ \ for(int s=0;s::DhopDirKernel( StencilImpl &st, DoubledGaugeField &U,S break; } #undef LoopBody -} +} #define KERNEL_CALLNB(A) \ const uint64_t NN = Nsite*Ls; \ @@ -421,7 +424,7 @@ void WilsonKernels::DhopDirKernel( StencilImpl &st, DoubledGaugeField &U,S WilsonKernels::A(st_v,U_v,buf,sF,sU,in_v,out_v); \ }); -#define KERNEL_CALL(A) KERNEL_CALLNB(A); accelerator_barrier(); +#define KERNEL_CALL(A) KERNEL_CALLNB(A); accelerator_barrier(); #define ASM_CALL(A) \ thread_for( ss, Nsite, { \ @@ -433,30 +436,30 @@ void WilsonKernels::DhopDirKernel( StencilImpl &st, DoubledGaugeField &U,S template void WilsonKernels::DhopKernel(int Opt,StencilImpl &st, DoubledGaugeField &U, SiteHalfSpinor * buf, int Ls, int Nsite, const FermionField &in, FermionField &out, - int interior,int exterior) + int interior,int exterior) { - auto U_v = U.View(); - auto in_v = in.View(); - auto out_v = out.View(); - auto st_v = st.View(); + autoView(U_v , U,AcceleratorRead); + autoView(in_v , in,AcceleratorRead); + autoView(out_v,out,AcceleratorWrite); + autoView(st_v , st,AcceleratorRead); - if( interior && exterior ) { + if( interior && exterior ) { if (Opt == WilsonKernelsStatic::OptGeneric ) { KERNEL_CALL(GenericDhopSite); return;} -#ifndef GRID_NVCC +#ifndef GRID_CUDA if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL(HandDhopSite); return;} - if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSite); /* printf("."); */ return;} + if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSite); return;} #endif } else if( interior ) { if (Opt == WilsonKernelsStatic::OptGeneric ) { KERNEL_CALLNB(GenericDhopSiteInt); return;} -#ifndef GRID_NVCC +#ifndef GRID_CUDA if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALLNB(HandDhopSiteInt); return;} - if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSiteInt); /* printf("-"); */ return;} + if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSiteInt); return;} #endif - } else if( exterior ) { + } else if( exterior ) { if (Opt == WilsonKernelsStatic::OptGeneric ) { KERNEL_CALL(GenericDhopSiteExt); return;} -#ifndef GRID_NVCC +#ifndef GRID_CUDA if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL(HandDhopSiteExt); return;} - if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSiteExt); /* printf("+"); */ return;} + if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSiteExt); return;} #endif } assert(0 && " Kernel optimisation case not covered "); @@ -464,28 +467,28 @@ void WilsonKernels::DhopKernel(int Opt,StencilImpl &st, DoubledGaugeField template void WilsonKernels::DhopDagKernel(int Opt,StencilImpl &st, DoubledGaugeField &U, SiteHalfSpinor * buf, int Ls, int Nsite, const FermionField &in, FermionField &out, - int interior,int exterior) + int interior,int exterior) { - auto U_v = U.View(); - auto in_v = in.View(); - auto out_v = out.View(); - auto st_v = st.View(); + autoView(U_v ,U,AcceleratorRead); + autoView(in_v ,in,AcceleratorRead); + autoView(out_v,out,AcceleratorWrite); + autoView(st_v ,st,AcceleratorRead); - if( interior && exterior ) { + if( interior && exterior ) { if (Opt == WilsonKernelsStatic::OptGeneric ) { KERNEL_CALL(GenericDhopSiteDag); return;} -#ifndef GRID_NVCC +#ifndef GRID_CUDA if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL(HandDhopSiteDag); return;} if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSiteDag); return;} #endif } else if( interior ) { if (Opt == WilsonKernelsStatic::OptGeneric ) { KERNEL_CALL(GenericDhopSiteDagInt); return;} -#ifndef GRID_NVCC +#ifndef GRID_CUDA if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL(HandDhopSiteDagInt); return;} if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSiteDagInt); return;} #endif - } else if( exterior ) { + } else if( exterior ) { if (Opt == WilsonKernelsStatic::OptGeneric ) { KERNEL_CALL(GenericDhopSiteDagExt); return;} -#ifndef GRID_NVCC +#ifndef GRID_CUDA if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL(HandDhopSiteDagExt); return;} if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSiteDagExt); return;} #endif @@ -493,131 +496,8 @@ void WilsonKernels::DhopKernel(int Opt,StencilImpl &st, DoubledGaugeField assert(0 && " Kernel optimisation case not covered "); } -/******************************************************************************* - * Conserved current utilities for Wilson fermions, for contracting propagators - * to make a conserved current sink or inserting the conserved current - * sequentially. Common to both 4D and 5D. - ******************************************************************************/ -// N.B. Functions below assume a -1/2 factor within U. -#define WilsonCurrentFwd(expr, mu) ((expr - Gamma::gmu[mu]*expr)) -#define WilsonCurrentBwd(expr, mu) ((expr + Gamma::gmu[mu]*expr)) - -/******************************************************************************* - * Name: ContractConservedCurrentSiteFwd - * Operation: (1/2) * q2[x] * U(x) * (g[mu] - 1) * q1[x + mu] - * Notes: - DoubledGaugeField U assumed to contain -1/2 factor. - * - Pass in q_in_1 shifted in +ve mu direction. - ******************************************************************************/ -template -void WilsonKernels::ContractConservedCurrentSiteFwd(const SitePropagator &q_in_1, - const SitePropagator &q_in_2, - SitePropagator &q_out, - DoubledGaugeFieldView &U, - unsigned int sU, - unsigned int mu, - bool switch_sign) -{ - SitePropagator result, tmp; - Gamma g5(Gamma::Algebra::Gamma5); - - Impl::multLink(tmp, U[sU], q_in_1, mu); - - result = g5 * adj(q_in_2) * g5 * WilsonCurrentFwd(tmp, mu); - - if (switch_sign) { - q_out -= result; - } else { - q_out += result; - } -} - -/******************************************************************************* - * Name: ContractConservedCurrentSiteBwd - * Operation: (1/2) * q2[x + mu] * U^dag(x) * (g[mu] + 1) * q1[x] - * Notes: - DoubledGaugeField U assumed to contain -1/2 factor. - * - Pass in q_in_2 shifted in +ve mu direction. - ******************************************************************************/ -template -void WilsonKernels::ContractConservedCurrentSiteBwd(const SitePropagator &q_in_1, - const SitePropagator &q_in_2, - SitePropagator &q_out, - DoubledGaugeFieldView &U, - unsigned int sU, - unsigned int mu, - bool switch_sign) -{ - SitePropagator result, tmp; - Gamma g5(Gamma::Algebra::Gamma5); - - Impl::multLink(tmp, U[sU], q_in_1, mu + Nd); - - result = g5 * adj(q_in_2) * g5 * WilsonCurrentBwd(tmp, mu); - if (switch_sign) { - q_out += result; - } else { - q_out -= result; - } -} - -/******************************************************************************* - * Name: SeqConservedCurrentSiteFwd - * Operation: (1/2) * U(x) * (g[mu] - 1) * q[x + mu] - * Notes: - DoubledGaugeField U assumed to contain -1/2 factor. - * - Pass in q_in shifted in +ve mu direction. - ******************************************************************************/ -template -void WilsonKernels::SeqConservedCurrentSiteFwd(const SitePropagator &q_in, - SitePropagator &q_out, - DoubledGaugeFieldView &U, - unsigned int sU, - unsigned int mu, - vPredicate t_mask, - bool switch_sign) -{ - SitePropagator result; - - Impl::multLink(result, U[sU], q_in, mu); - result = WilsonCurrentFwd(result, mu); - - // Zero any unwanted timeslice entries. - result = predicatedWhere(t_mask, result, 0.*result); - - if (switch_sign) { - q_out -= result; - } else { - q_out += result; - } -} - -/******************************************************************************* - * Name: SeqConservedCurrentSiteFwd - * Operation: (1/2) * U^dag(x) * (g[mu] + 1) * q[x - mu] - * Notes: - DoubledGaugeField U assumed to contain -1/2 factor. - * - Pass in q_in shifted in -ve mu direction. - ******************************************************************************/ -template -void WilsonKernels::SeqConservedCurrentSiteBwd(const SitePropagator &q_in, - SitePropagator &q_out, - DoubledGaugeFieldView &U, - unsigned int sU, - unsigned int mu, - vPredicate t_mask, - bool switch_sign) -{ - SitePropagator result; - Impl::multLink(result, U[sU], q_in, mu + Nd); - result = WilsonCurrentBwd(result, mu); - - // Zero any unwanted timeslice entries. - result = predicatedWhere(t_mask, result, 0.*result); - - if (switch_sign) { - q_out += result; - } else { - q_out -= result; - } -} - +#undef KERNEL_CALLNB +#undef KERNEL_CALL +#undef ASM_CALL NAMESPACE_END(Grid); - diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplD/CayleyFermion5DInstantiationGparityWilsonImplD.cc b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplD/CayleyFermion5DInstantiationGparityWilsonImplD.cc deleted file mode 100644 index 5130db9c..00000000 --- a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplD/CayleyFermion5DInstantiationGparityWilsonImplD.cc +++ /dev/null @@ -1,44 +0,0 @@ -/************************************************************************************* - -Grid physics library, www.github.com/paboyle/Grid - -Source file: ./lib/qcd/action/fermion/WilsonKernels.cc - -Copyright (C) 2015 - -Author: Peter Boyle -Author: Peter Boyle -Author: paboyle - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -See the full license in the file "LICENSE" in the top level distribution -directory -*************************************************************************************/ -/* END LEGAL */ -#include -#include -#include - - //#include - //#include - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class CayleyFermion5D; - -NAMESPACE_END(Grid); - diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplD/CayleyFermion5DInstantiationGparityWilsonImplD.cc b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplD/CayleyFermion5DInstantiationGparityWilsonImplD.cc new file mode 120000 index 00000000..cb1db625 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplD/CayleyFermion5DInstantiationGparityWilsonImplD.cc @@ -0,0 +1 @@ +../CayleyFermion5DInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplD/ContinuedFractionFermion5DInstantiationGparityWilsonImplD.cc b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplD/ContinuedFractionFermion5DInstantiationGparityWilsonImplD.cc deleted file mode 100644 index ca0d6cea..00000000 --- a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplD/ContinuedFractionFermion5DInstantiationGparityWilsonImplD.cc +++ /dev/null @@ -1,38 +0,0 @@ -/************************************************************************************* - - Grid physics library, www.github.com/paboyle/Grid - - Source file: ./lib/qcd/action/fermion/ContinuedFractionFermion5D.cc - - Copyright (C) 2015 - -Author: Peter Boyle -Author: Peter Boyle - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - - See the full license in the file "LICENSE" in the top level distribution directory -*************************************************************************************/ -/* END LEGAL */ -#include -#include -#include - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class ContinuedFractionFermion5D; - -NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplD/ContinuedFractionFermion5DInstantiationGparityWilsonImplD.cc b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplD/ContinuedFractionFermion5DInstantiationGparityWilsonImplD.cc new file mode 120000 index 00000000..c2d4b8fc --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplD/ContinuedFractionFermion5DInstantiationGparityWilsonImplD.cc @@ -0,0 +1 @@ +../ContinuedFractionFermion5DInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplD/DomainWallEOFAFermionInstantiationGparityWilsonImplD.cc b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplD/DomainWallEOFAFermionInstantiationGparityWilsonImplD.cc deleted file mode 100644 index f7198131..00000000 --- a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplD/DomainWallEOFAFermionInstantiationGparityWilsonImplD.cc +++ /dev/null @@ -1,44 +0,0 @@ -/************************************************************************************* - -Grid physics library, www.github.com/paboyle/Grid - -Source file: ./lib/qcd/action/fermion/DomainWallEOFAFermion.cc - -Copyright (C) 2017 - -Author: Peter Boyle -Author: Peter Boyle -Author: Peter Boyle -Author: paboyle -Author: David Murphy - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -See the full license in the file "LICENSE" in the top level distribution directory -*************************************************************************************/ - /* END LEGAL */ - -#include -#include -#include -#include -#include - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class DomainWallEOFAFermion; - -NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplD/DomainWallEOFAFermionInstantiationGparityWilsonImplD.cc b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplD/DomainWallEOFAFermionInstantiationGparityWilsonImplD.cc new file mode 120000 index 00000000..2f550a2b --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplD/DomainWallEOFAFermionInstantiationGparityWilsonImplD.cc @@ -0,0 +1 @@ +../DomainWallEOFAFermionInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplD/MobiusEOFAFermionInstantiationGparityWilsonImplD.cc b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplD/MobiusEOFAFermionInstantiationGparityWilsonImplD.cc deleted file mode 100644 index ce7eaac9..00000000 --- a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplD/MobiusEOFAFermionInstantiationGparityWilsonImplD.cc +++ /dev/null @@ -1,44 +0,0 @@ -/************************************************************************************* - -Grid physics library, www.github.com/paboyle/Grid - -Source file: ./lib/qcd/action/fermion/MobiusEOFAFermion.cc - -Copyright (C) 2017 - -Author: Peter Boyle -Author: Peter Boyle -Author: Peter Boyle -Author: paboyle -Author: David Murphy - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -See the full license in the file "LICENSE" in the top level distribution directory -*************************************************************************************/ - /* END LEGAL */ - -#include -#include -#include -#include -#include - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class MobiusEOFAFermion; - -NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplD/MobiusEOFAFermionInstantiationGparityWilsonImplD.cc b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplD/MobiusEOFAFermionInstantiationGparityWilsonImplD.cc new file mode 120000 index 00000000..7a8f1172 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplD/MobiusEOFAFermionInstantiationGparityWilsonImplD.cc @@ -0,0 +1 @@ +../MobiusEOFAFermionInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplD/PartialFractionFermion5DInstantiationGparityWilsonImplD.cc b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplD/PartialFractionFermion5DInstantiationGparityWilsonImplD.cc deleted file mode 100644 index 757719b6..00000000 --- a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplD/PartialFractionFermion5DInstantiationGparityWilsonImplD.cc +++ /dev/null @@ -1,39 +0,0 @@ -/************************************************************************************* - - Grid physics library, www.github.com/paboyle/Grid - - Source file: ./lib/qcd/action/fermion/PartialFractionFermion5D.cc - - Copyright (C) 2015 - -Author: Peter Boyle -Author: Peter Boyle - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - - See the full license in the file "LICENSE" in the top level distribution directory -*************************************************************************************/ -/* END LEGAL */ -#include -#include -#include - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class PartialFractionFermion5D; - -NAMESPACE_END(Grid); - diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplD/PartialFractionFermion5DInstantiationGparityWilsonImplD.cc b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplD/PartialFractionFermion5DInstantiationGparityWilsonImplD.cc new file mode 120000 index 00000000..7f4cea71 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplD/PartialFractionFermion5DInstantiationGparityWilsonImplD.cc @@ -0,0 +1 @@ +../PartialFractionFermion5DInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplD/WilsonCloverFermionInstantiationGparityWilsonImplD.cc b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplD/WilsonCloverFermionInstantiationGparityWilsonImplD.cc deleted file mode 100644 index af99dfb6..00000000 --- a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplD/WilsonCloverFermionInstantiationGparityWilsonImplD.cc +++ /dev/null @@ -1,40 +0,0 @@ -/************************************************************************************* - - Grid physics library, www.github.com/paboyle/Grid - - Source file: ./lib/qcd/action/fermion/WilsonCloverFermion.cc - - Copyright (C) 2017 - - Author: paboyle - Author: Guido Cossu - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - - See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ -/* END LEGAL */ - -#include -#include -#include -#include - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class WilsonCloverFermion; - -NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplD/WilsonCloverFermionInstantiationGparityWilsonImplD.cc b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplD/WilsonCloverFermionInstantiationGparityWilsonImplD.cc new file mode 120000 index 00000000..9cc05107 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplD/WilsonCloverFermionInstantiationGparityWilsonImplD.cc @@ -0,0 +1 @@ +../WilsonCloverFermionInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplD/WilsonFermion5DInstantiationGparityWilsonImplD.cc b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplD/WilsonFermion5DInstantiationGparityWilsonImplD.cc deleted file mode 100644 index 0dac989c..00000000 --- a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplD/WilsonFermion5DInstantiationGparityWilsonImplD.cc +++ /dev/null @@ -1,40 +0,0 @@ -/************************************************************************************* - -Grid physics library, www.github.com/paboyle/Grid - -Source file: ./lib/qcd/action/fermion/WilsonKernels.cc - -Copyright (C) 2015 - -Author: Peter Boyle -Author: Peter Boyle -Author: paboyle - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -See the full license in the file "LICENSE" in the top level distribution -directory -*************************************************************************************/ -/* END LEGAL */ -#include -#include - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class WilsonFermion5D; - -NAMESPACE_END(Grid); - diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplD/WilsonFermion5DInstantiationGparityWilsonImplD.cc b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplD/WilsonFermion5DInstantiationGparityWilsonImplD.cc new file mode 120000 index 00000000..804d0884 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplD/WilsonFermion5DInstantiationGparityWilsonImplD.cc @@ -0,0 +1 @@ +../WilsonFermion5DInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplD/WilsonFermionInstantiationGparityWilsonImplD.cc b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplD/WilsonFermionInstantiationGparityWilsonImplD.cc deleted file mode 100644 index 6fd9c5ca..00000000 --- a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplD/WilsonFermionInstantiationGparityWilsonImplD.cc +++ /dev/null @@ -1,40 +0,0 @@ -/************************************************************************************* - -Grid physics library, www.github.com/paboyle/Grid - -Source file: ./lib/qcd/action/fermion/WilsonKernels.cc - -Copyright (C) 2015 - -Author: Peter Boyle -Author: Peter Boyle -Author: paboyle - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -See the full license in the file "LICENSE" in the top level distribution -directory -*************************************************************************************/ -/* END LEGAL */ -#include -#include - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class WilsonFermion; - -NAMESPACE_END(Grid); - diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplD/WilsonFermionInstantiationGparityWilsonImplD.cc b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplD/WilsonFermionInstantiationGparityWilsonImplD.cc new file mode 120000 index 00000000..5f6ab65e --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplD/WilsonFermionInstantiationGparityWilsonImplD.cc @@ -0,0 +1 @@ +../WilsonFermionInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplD/WilsonKernelsInstantiationGparityWilsonImplD.cc b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplD/WilsonKernelsInstantiationGparityWilsonImplD.cc deleted file mode 100644 index 75f143cb..00000000 --- a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplD/WilsonKernelsInstantiationGparityWilsonImplD.cc +++ /dev/null @@ -1,74 +0,0 @@ -/************************************************************************************* - -Grid physics library, www.github.com/paboyle/Grid - -Source file: ./lib/qcd/action/fermion/WilsonKernels.cc - -Copyright (C) 2015 - -Author: Peter Boyle -Author: Peter Boyle -Author: paboyle - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -See the full license in the file "LICENSE" in the top level distribution -directory -*************************************************************************************/ -/* END LEGAL */ -#include -#include -#include -#include -#include - -NAMESPACE_BEGIN(Grid); - -// Move these - -#include "impl.h" - -// G-parity requires more specialised implementation. -template <> -void WilsonKernels::ContractConservedCurrentSiteFwd(const SitePropagator &q_in_1, - const SitePropagator &q_in_2, - SitePropagator &q_out, - DoubledGaugeFieldView &U, - unsigned int sU, - unsigned int mu, - bool switch_sign) -{ - assert(0); -} -template <> -void WilsonKernels::ContractConservedCurrentSiteBwd( const SitePropagator &q_in_1, - const SitePropagator &q_in_2, - SitePropagator &q_out, - DoubledGaugeFieldView &U, - unsigned int mu, - unsigned int sU, - bool switch_sign) -{ - assert(0); -} - -HAND_SPECIALISE_GPARITY(IMPLEMENTATION); - - -template class WilsonKernels; - - -NAMESPACE_END(Grid); - diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplD/WilsonKernelsInstantiationGparityWilsonImplD.cc b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplD/WilsonKernelsInstantiationGparityWilsonImplD.cc new file mode 120000 index 00000000..87adea48 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplD/WilsonKernelsInstantiationGparityWilsonImplD.cc @@ -0,0 +1 @@ +../WilsonKernelsInstantiationGparity.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplD/WilsonTMFermionInstantiationGparityWilsonImplD.cc b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplD/WilsonTMFermionInstantiationGparityWilsonImplD.cc deleted file mode 100644 index adfa310c..00000000 --- a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplD/WilsonTMFermionInstantiationGparityWilsonImplD.cc +++ /dev/null @@ -1,37 +0,0 @@ -/************************************************************************************* - - Grid physics library, www.github.com/paboyle/Grid - - Source file: ./lib/qcd/action/fermion/WilsonTMFermion.cc - - Copyright (C) 2015 - -Author: paboyle - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - - See the full license in the file "LICENSE" in the top level distribution directory -*************************************************************************************/ -/* END LEGAL */ -#include -#include -#include - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class WilsonTMFermion; - -NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplD/WilsonTMFermionInstantiationGparityWilsonImplD.cc b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplD/WilsonTMFermionInstantiationGparityWilsonImplD.cc new file mode 120000 index 00000000..d5789bcf --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplD/WilsonTMFermionInstantiationGparityWilsonImplD.cc @@ -0,0 +1 @@ +../WilsonTMFermionInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplDF/CayleyFermion5DInstantiationGparityWilsonImplDF.cc b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplDF/CayleyFermion5DInstantiationGparityWilsonImplDF.cc deleted file mode 100644 index 5130db9c..00000000 --- a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplDF/CayleyFermion5DInstantiationGparityWilsonImplDF.cc +++ /dev/null @@ -1,44 +0,0 @@ -/************************************************************************************* - -Grid physics library, www.github.com/paboyle/Grid - -Source file: ./lib/qcd/action/fermion/WilsonKernels.cc - -Copyright (C) 2015 - -Author: Peter Boyle -Author: Peter Boyle -Author: paboyle - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -See the full license in the file "LICENSE" in the top level distribution -directory -*************************************************************************************/ -/* END LEGAL */ -#include -#include -#include - - //#include - //#include - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class CayleyFermion5D; - -NAMESPACE_END(Grid); - diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplDF/CayleyFermion5DInstantiationGparityWilsonImplDF.cc b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplDF/CayleyFermion5DInstantiationGparityWilsonImplDF.cc new file mode 120000 index 00000000..cb1db625 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplDF/CayleyFermion5DInstantiationGparityWilsonImplDF.cc @@ -0,0 +1 @@ +../CayleyFermion5DInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplDF/ContinuedFractionFermion5DInstantiationGparityWilsonImplDF.cc b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplDF/ContinuedFractionFermion5DInstantiationGparityWilsonImplDF.cc deleted file mode 100644 index ca0d6cea..00000000 --- a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplDF/ContinuedFractionFermion5DInstantiationGparityWilsonImplDF.cc +++ /dev/null @@ -1,38 +0,0 @@ -/************************************************************************************* - - Grid physics library, www.github.com/paboyle/Grid - - Source file: ./lib/qcd/action/fermion/ContinuedFractionFermion5D.cc - - Copyright (C) 2015 - -Author: Peter Boyle -Author: Peter Boyle - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - - See the full license in the file "LICENSE" in the top level distribution directory -*************************************************************************************/ -/* END LEGAL */ -#include -#include -#include - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class ContinuedFractionFermion5D; - -NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplDF/ContinuedFractionFermion5DInstantiationGparityWilsonImplDF.cc b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplDF/ContinuedFractionFermion5DInstantiationGparityWilsonImplDF.cc new file mode 120000 index 00000000..c2d4b8fc --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplDF/ContinuedFractionFermion5DInstantiationGparityWilsonImplDF.cc @@ -0,0 +1 @@ +../ContinuedFractionFermion5DInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplDF/DomainWallEOFAFermionInstantiationGparityWilsonImplDF.cc b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplDF/DomainWallEOFAFermionInstantiationGparityWilsonImplDF.cc deleted file mode 100644 index f7198131..00000000 --- a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplDF/DomainWallEOFAFermionInstantiationGparityWilsonImplDF.cc +++ /dev/null @@ -1,44 +0,0 @@ -/************************************************************************************* - -Grid physics library, www.github.com/paboyle/Grid - -Source file: ./lib/qcd/action/fermion/DomainWallEOFAFermion.cc - -Copyright (C) 2017 - -Author: Peter Boyle -Author: Peter Boyle -Author: Peter Boyle -Author: paboyle -Author: David Murphy - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -See the full license in the file "LICENSE" in the top level distribution directory -*************************************************************************************/ - /* END LEGAL */ - -#include -#include -#include -#include -#include - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class DomainWallEOFAFermion; - -NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplDF/DomainWallEOFAFermionInstantiationGparityWilsonImplDF.cc b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplDF/DomainWallEOFAFermionInstantiationGparityWilsonImplDF.cc new file mode 120000 index 00000000..2f550a2b --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplDF/DomainWallEOFAFermionInstantiationGparityWilsonImplDF.cc @@ -0,0 +1 @@ +../DomainWallEOFAFermionInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplDF/MobiusEOFAFermionInstantiationGparityWilsonImplDF.cc b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplDF/MobiusEOFAFermionInstantiationGparityWilsonImplDF.cc deleted file mode 100644 index ce7eaac9..00000000 --- a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplDF/MobiusEOFAFermionInstantiationGparityWilsonImplDF.cc +++ /dev/null @@ -1,44 +0,0 @@ -/************************************************************************************* - -Grid physics library, www.github.com/paboyle/Grid - -Source file: ./lib/qcd/action/fermion/MobiusEOFAFermion.cc - -Copyright (C) 2017 - -Author: Peter Boyle -Author: Peter Boyle -Author: Peter Boyle -Author: paboyle -Author: David Murphy - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -See the full license in the file "LICENSE" in the top level distribution directory -*************************************************************************************/ - /* END LEGAL */ - -#include -#include -#include -#include -#include - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class MobiusEOFAFermion; - -NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplDF/MobiusEOFAFermionInstantiationGparityWilsonImplDF.cc b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplDF/MobiusEOFAFermionInstantiationGparityWilsonImplDF.cc new file mode 120000 index 00000000..7a8f1172 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplDF/MobiusEOFAFermionInstantiationGparityWilsonImplDF.cc @@ -0,0 +1 @@ +../MobiusEOFAFermionInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplDF/PartialFractionFermion5DInstantiationGparityWilsonImplDF.cc b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplDF/PartialFractionFermion5DInstantiationGparityWilsonImplDF.cc deleted file mode 100644 index 757719b6..00000000 --- a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplDF/PartialFractionFermion5DInstantiationGparityWilsonImplDF.cc +++ /dev/null @@ -1,39 +0,0 @@ -/************************************************************************************* - - Grid physics library, www.github.com/paboyle/Grid - - Source file: ./lib/qcd/action/fermion/PartialFractionFermion5D.cc - - Copyright (C) 2015 - -Author: Peter Boyle -Author: Peter Boyle - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - - See the full license in the file "LICENSE" in the top level distribution directory -*************************************************************************************/ -/* END LEGAL */ -#include -#include -#include - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class PartialFractionFermion5D; - -NAMESPACE_END(Grid); - diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplDF/PartialFractionFermion5DInstantiationGparityWilsonImplDF.cc b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplDF/PartialFractionFermion5DInstantiationGparityWilsonImplDF.cc new file mode 120000 index 00000000..7f4cea71 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplDF/PartialFractionFermion5DInstantiationGparityWilsonImplDF.cc @@ -0,0 +1 @@ +../PartialFractionFermion5DInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplDF/WilsonCloverFermionInstantiationGparityWilsonImplDF.cc b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplDF/WilsonCloverFermionInstantiationGparityWilsonImplDF.cc deleted file mode 100644 index af99dfb6..00000000 --- a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplDF/WilsonCloverFermionInstantiationGparityWilsonImplDF.cc +++ /dev/null @@ -1,40 +0,0 @@ -/************************************************************************************* - - Grid physics library, www.github.com/paboyle/Grid - - Source file: ./lib/qcd/action/fermion/WilsonCloverFermion.cc - - Copyright (C) 2017 - - Author: paboyle - Author: Guido Cossu - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - - See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ -/* END LEGAL */ - -#include -#include -#include -#include - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class WilsonCloverFermion; - -NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplDF/WilsonCloverFermionInstantiationGparityWilsonImplDF.cc b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplDF/WilsonCloverFermionInstantiationGparityWilsonImplDF.cc new file mode 120000 index 00000000..9cc05107 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplDF/WilsonCloverFermionInstantiationGparityWilsonImplDF.cc @@ -0,0 +1 @@ +../WilsonCloverFermionInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplDF/WilsonFermion5DInstantiationGparityWilsonImplDF.cc b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplDF/WilsonFermion5DInstantiationGparityWilsonImplDF.cc deleted file mode 100644 index 0dac989c..00000000 --- a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplDF/WilsonFermion5DInstantiationGparityWilsonImplDF.cc +++ /dev/null @@ -1,40 +0,0 @@ -/************************************************************************************* - -Grid physics library, www.github.com/paboyle/Grid - -Source file: ./lib/qcd/action/fermion/WilsonKernels.cc - -Copyright (C) 2015 - -Author: Peter Boyle -Author: Peter Boyle -Author: paboyle - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -See the full license in the file "LICENSE" in the top level distribution -directory -*************************************************************************************/ -/* END LEGAL */ -#include -#include - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class WilsonFermion5D; - -NAMESPACE_END(Grid); - diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplDF/WilsonFermion5DInstantiationGparityWilsonImplDF.cc b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplDF/WilsonFermion5DInstantiationGparityWilsonImplDF.cc new file mode 120000 index 00000000..804d0884 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplDF/WilsonFermion5DInstantiationGparityWilsonImplDF.cc @@ -0,0 +1 @@ +../WilsonFermion5DInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplDF/WilsonFermionInstantiationGparityWilsonImplDF.cc b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplDF/WilsonFermionInstantiationGparityWilsonImplDF.cc deleted file mode 100644 index 6fd9c5ca..00000000 --- a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplDF/WilsonFermionInstantiationGparityWilsonImplDF.cc +++ /dev/null @@ -1,40 +0,0 @@ -/************************************************************************************* - -Grid physics library, www.github.com/paboyle/Grid - -Source file: ./lib/qcd/action/fermion/WilsonKernels.cc - -Copyright (C) 2015 - -Author: Peter Boyle -Author: Peter Boyle -Author: paboyle - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -See the full license in the file "LICENSE" in the top level distribution -directory -*************************************************************************************/ -/* END LEGAL */ -#include -#include - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class WilsonFermion; - -NAMESPACE_END(Grid); - diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplDF/WilsonFermionInstantiationGparityWilsonImplDF.cc b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplDF/WilsonFermionInstantiationGparityWilsonImplDF.cc new file mode 120000 index 00000000..5f6ab65e --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplDF/WilsonFermionInstantiationGparityWilsonImplDF.cc @@ -0,0 +1 @@ +../WilsonFermionInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplDF/WilsonKernelsInstantiationGparityWilsonImplDF.cc b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplDF/WilsonKernelsInstantiationGparityWilsonImplDF.cc deleted file mode 100644 index 75f143cb..00000000 --- a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplDF/WilsonKernelsInstantiationGparityWilsonImplDF.cc +++ /dev/null @@ -1,74 +0,0 @@ -/************************************************************************************* - -Grid physics library, www.github.com/paboyle/Grid - -Source file: ./lib/qcd/action/fermion/WilsonKernels.cc - -Copyright (C) 2015 - -Author: Peter Boyle -Author: Peter Boyle -Author: paboyle - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -See the full license in the file "LICENSE" in the top level distribution -directory -*************************************************************************************/ -/* END LEGAL */ -#include -#include -#include -#include -#include - -NAMESPACE_BEGIN(Grid); - -// Move these - -#include "impl.h" - -// G-parity requires more specialised implementation. -template <> -void WilsonKernels::ContractConservedCurrentSiteFwd(const SitePropagator &q_in_1, - const SitePropagator &q_in_2, - SitePropagator &q_out, - DoubledGaugeFieldView &U, - unsigned int sU, - unsigned int mu, - bool switch_sign) -{ - assert(0); -} -template <> -void WilsonKernels::ContractConservedCurrentSiteBwd( const SitePropagator &q_in_1, - const SitePropagator &q_in_2, - SitePropagator &q_out, - DoubledGaugeFieldView &U, - unsigned int mu, - unsigned int sU, - bool switch_sign) -{ - assert(0); -} - -HAND_SPECIALISE_GPARITY(IMPLEMENTATION); - - -template class WilsonKernels; - - -NAMESPACE_END(Grid); - diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplDF/WilsonKernelsInstantiationGparityWilsonImplDF.cc b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplDF/WilsonKernelsInstantiationGparityWilsonImplDF.cc new file mode 120000 index 00000000..87adea48 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplDF/WilsonKernelsInstantiationGparityWilsonImplDF.cc @@ -0,0 +1 @@ +../WilsonKernelsInstantiationGparity.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplDF/WilsonTMFermionInstantiationGparityWilsonImplDF.cc b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplDF/WilsonTMFermionInstantiationGparityWilsonImplDF.cc deleted file mode 100644 index adfa310c..00000000 --- a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplDF/WilsonTMFermionInstantiationGparityWilsonImplDF.cc +++ /dev/null @@ -1,37 +0,0 @@ -/************************************************************************************* - - Grid physics library, www.github.com/paboyle/Grid - - Source file: ./lib/qcd/action/fermion/WilsonTMFermion.cc - - Copyright (C) 2015 - -Author: paboyle - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - - See the full license in the file "LICENSE" in the top level distribution directory -*************************************************************************************/ -/* END LEGAL */ -#include -#include -#include - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class WilsonTMFermion; - -NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplDF/WilsonTMFermionInstantiationGparityWilsonImplDF.cc b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplDF/WilsonTMFermionInstantiationGparityWilsonImplDF.cc new file mode 120000 index 00000000..d5789bcf --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplDF/WilsonTMFermionInstantiationGparityWilsonImplDF.cc @@ -0,0 +1 @@ +../WilsonTMFermionInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplF/CayleyFermion5DInstantiationGparityWilsonImplF.cc b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplF/CayleyFermion5DInstantiationGparityWilsonImplF.cc deleted file mode 100644 index 5130db9c..00000000 --- a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplF/CayleyFermion5DInstantiationGparityWilsonImplF.cc +++ /dev/null @@ -1,44 +0,0 @@ -/************************************************************************************* - -Grid physics library, www.github.com/paboyle/Grid - -Source file: ./lib/qcd/action/fermion/WilsonKernels.cc - -Copyright (C) 2015 - -Author: Peter Boyle -Author: Peter Boyle -Author: paboyle - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -See the full license in the file "LICENSE" in the top level distribution -directory -*************************************************************************************/ -/* END LEGAL */ -#include -#include -#include - - //#include - //#include - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class CayleyFermion5D; - -NAMESPACE_END(Grid); - diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplF/CayleyFermion5DInstantiationGparityWilsonImplF.cc b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplF/CayleyFermion5DInstantiationGparityWilsonImplF.cc new file mode 120000 index 00000000..cb1db625 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplF/CayleyFermion5DInstantiationGparityWilsonImplF.cc @@ -0,0 +1 @@ +../CayleyFermion5DInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplF/ContinuedFractionFermion5DInstantiationGparityWilsonImplF.cc b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplF/ContinuedFractionFermion5DInstantiationGparityWilsonImplF.cc deleted file mode 100644 index ca0d6cea..00000000 --- a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplF/ContinuedFractionFermion5DInstantiationGparityWilsonImplF.cc +++ /dev/null @@ -1,38 +0,0 @@ -/************************************************************************************* - - Grid physics library, www.github.com/paboyle/Grid - - Source file: ./lib/qcd/action/fermion/ContinuedFractionFermion5D.cc - - Copyright (C) 2015 - -Author: Peter Boyle -Author: Peter Boyle - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - - See the full license in the file "LICENSE" in the top level distribution directory -*************************************************************************************/ -/* END LEGAL */ -#include -#include -#include - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class ContinuedFractionFermion5D; - -NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplF/ContinuedFractionFermion5DInstantiationGparityWilsonImplF.cc b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplF/ContinuedFractionFermion5DInstantiationGparityWilsonImplF.cc new file mode 120000 index 00000000..c2d4b8fc --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplF/ContinuedFractionFermion5DInstantiationGparityWilsonImplF.cc @@ -0,0 +1 @@ +../ContinuedFractionFermion5DInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplF/DomainWallEOFAFermionInstantiationGparityWilsonImplF.cc b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplF/DomainWallEOFAFermionInstantiationGparityWilsonImplF.cc deleted file mode 100644 index f7198131..00000000 --- a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplF/DomainWallEOFAFermionInstantiationGparityWilsonImplF.cc +++ /dev/null @@ -1,44 +0,0 @@ -/************************************************************************************* - -Grid physics library, www.github.com/paboyle/Grid - -Source file: ./lib/qcd/action/fermion/DomainWallEOFAFermion.cc - -Copyright (C) 2017 - -Author: Peter Boyle -Author: Peter Boyle -Author: Peter Boyle -Author: paboyle -Author: David Murphy - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -See the full license in the file "LICENSE" in the top level distribution directory -*************************************************************************************/ - /* END LEGAL */ - -#include -#include -#include -#include -#include - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class DomainWallEOFAFermion; - -NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplF/DomainWallEOFAFermionInstantiationGparityWilsonImplF.cc b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplF/DomainWallEOFAFermionInstantiationGparityWilsonImplF.cc new file mode 120000 index 00000000..2f550a2b --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplF/DomainWallEOFAFermionInstantiationGparityWilsonImplF.cc @@ -0,0 +1 @@ +../DomainWallEOFAFermionInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplF/MobiusEOFAFermionInstantiationGparityWilsonImplF.cc b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplF/MobiusEOFAFermionInstantiationGparityWilsonImplF.cc deleted file mode 100644 index ce7eaac9..00000000 --- a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplF/MobiusEOFAFermionInstantiationGparityWilsonImplF.cc +++ /dev/null @@ -1,44 +0,0 @@ -/************************************************************************************* - -Grid physics library, www.github.com/paboyle/Grid - -Source file: ./lib/qcd/action/fermion/MobiusEOFAFermion.cc - -Copyright (C) 2017 - -Author: Peter Boyle -Author: Peter Boyle -Author: Peter Boyle -Author: paboyle -Author: David Murphy - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -See the full license in the file "LICENSE" in the top level distribution directory -*************************************************************************************/ - /* END LEGAL */ - -#include -#include -#include -#include -#include - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class MobiusEOFAFermion; - -NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplF/MobiusEOFAFermionInstantiationGparityWilsonImplF.cc b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplF/MobiusEOFAFermionInstantiationGparityWilsonImplF.cc new file mode 120000 index 00000000..7a8f1172 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplF/MobiusEOFAFermionInstantiationGparityWilsonImplF.cc @@ -0,0 +1 @@ +../MobiusEOFAFermionInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplF/PartialFractionFermion5DInstantiationGparityWilsonImplF.cc b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplF/PartialFractionFermion5DInstantiationGparityWilsonImplF.cc deleted file mode 100644 index 757719b6..00000000 --- a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplF/PartialFractionFermion5DInstantiationGparityWilsonImplF.cc +++ /dev/null @@ -1,39 +0,0 @@ -/************************************************************************************* - - Grid physics library, www.github.com/paboyle/Grid - - Source file: ./lib/qcd/action/fermion/PartialFractionFermion5D.cc - - Copyright (C) 2015 - -Author: Peter Boyle -Author: Peter Boyle - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - - See the full license in the file "LICENSE" in the top level distribution directory -*************************************************************************************/ -/* END LEGAL */ -#include -#include -#include - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class PartialFractionFermion5D; - -NAMESPACE_END(Grid); - diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplF/PartialFractionFermion5DInstantiationGparityWilsonImplF.cc b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplF/PartialFractionFermion5DInstantiationGparityWilsonImplF.cc new file mode 120000 index 00000000..7f4cea71 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplF/PartialFractionFermion5DInstantiationGparityWilsonImplF.cc @@ -0,0 +1 @@ +../PartialFractionFermion5DInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplF/WilsonCloverFermionInstantiationGparityWilsonImplF.cc b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplF/WilsonCloverFermionInstantiationGparityWilsonImplF.cc deleted file mode 100644 index af99dfb6..00000000 --- a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplF/WilsonCloverFermionInstantiationGparityWilsonImplF.cc +++ /dev/null @@ -1,40 +0,0 @@ -/************************************************************************************* - - Grid physics library, www.github.com/paboyle/Grid - - Source file: ./lib/qcd/action/fermion/WilsonCloverFermion.cc - - Copyright (C) 2017 - - Author: paboyle - Author: Guido Cossu - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - - See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ -/* END LEGAL */ - -#include -#include -#include -#include - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class WilsonCloverFermion; - -NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplF/WilsonCloverFermionInstantiationGparityWilsonImplF.cc b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplF/WilsonCloverFermionInstantiationGparityWilsonImplF.cc new file mode 120000 index 00000000..9cc05107 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplF/WilsonCloverFermionInstantiationGparityWilsonImplF.cc @@ -0,0 +1 @@ +../WilsonCloverFermionInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplF/WilsonFermion5DInstantiationGparityWilsonImplF.cc b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplF/WilsonFermion5DInstantiationGparityWilsonImplF.cc deleted file mode 100644 index 0dac989c..00000000 --- a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplF/WilsonFermion5DInstantiationGparityWilsonImplF.cc +++ /dev/null @@ -1,40 +0,0 @@ -/************************************************************************************* - -Grid physics library, www.github.com/paboyle/Grid - -Source file: ./lib/qcd/action/fermion/WilsonKernels.cc - -Copyright (C) 2015 - -Author: Peter Boyle -Author: Peter Boyle -Author: paboyle - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -See the full license in the file "LICENSE" in the top level distribution -directory -*************************************************************************************/ -/* END LEGAL */ -#include -#include - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class WilsonFermion5D; - -NAMESPACE_END(Grid); - diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplF/WilsonFermion5DInstantiationGparityWilsonImplF.cc b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplF/WilsonFermion5DInstantiationGparityWilsonImplF.cc new file mode 120000 index 00000000..804d0884 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplF/WilsonFermion5DInstantiationGparityWilsonImplF.cc @@ -0,0 +1 @@ +../WilsonFermion5DInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplF/WilsonFermionInstantiationGparityWilsonImplF.cc b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplF/WilsonFermionInstantiationGparityWilsonImplF.cc deleted file mode 100644 index 6fd9c5ca..00000000 --- a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplF/WilsonFermionInstantiationGparityWilsonImplF.cc +++ /dev/null @@ -1,40 +0,0 @@ -/************************************************************************************* - -Grid physics library, www.github.com/paboyle/Grid - -Source file: ./lib/qcd/action/fermion/WilsonKernels.cc - -Copyright (C) 2015 - -Author: Peter Boyle -Author: Peter Boyle -Author: paboyle - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -See the full license in the file "LICENSE" in the top level distribution -directory -*************************************************************************************/ -/* END LEGAL */ -#include -#include - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class WilsonFermion; - -NAMESPACE_END(Grid); - diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplF/WilsonFermionInstantiationGparityWilsonImplF.cc b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplF/WilsonFermionInstantiationGparityWilsonImplF.cc new file mode 120000 index 00000000..5f6ab65e --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplF/WilsonFermionInstantiationGparityWilsonImplF.cc @@ -0,0 +1 @@ +../WilsonFermionInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplF/WilsonKernelsInstantiationGparityWilsonImplF.cc b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplF/WilsonKernelsInstantiationGparityWilsonImplF.cc deleted file mode 100644 index 75f143cb..00000000 --- a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplF/WilsonKernelsInstantiationGparityWilsonImplF.cc +++ /dev/null @@ -1,74 +0,0 @@ -/************************************************************************************* - -Grid physics library, www.github.com/paboyle/Grid - -Source file: ./lib/qcd/action/fermion/WilsonKernels.cc - -Copyright (C) 2015 - -Author: Peter Boyle -Author: Peter Boyle -Author: paboyle - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -See the full license in the file "LICENSE" in the top level distribution -directory -*************************************************************************************/ -/* END LEGAL */ -#include -#include -#include -#include -#include - -NAMESPACE_BEGIN(Grid); - -// Move these - -#include "impl.h" - -// G-parity requires more specialised implementation. -template <> -void WilsonKernels::ContractConservedCurrentSiteFwd(const SitePropagator &q_in_1, - const SitePropagator &q_in_2, - SitePropagator &q_out, - DoubledGaugeFieldView &U, - unsigned int sU, - unsigned int mu, - bool switch_sign) -{ - assert(0); -} -template <> -void WilsonKernels::ContractConservedCurrentSiteBwd( const SitePropagator &q_in_1, - const SitePropagator &q_in_2, - SitePropagator &q_out, - DoubledGaugeFieldView &U, - unsigned int mu, - unsigned int sU, - bool switch_sign) -{ - assert(0); -} - -HAND_SPECIALISE_GPARITY(IMPLEMENTATION); - - -template class WilsonKernels; - - -NAMESPACE_END(Grid); - diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplF/WilsonKernelsInstantiationGparityWilsonImplF.cc b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplF/WilsonKernelsInstantiationGparityWilsonImplF.cc new file mode 120000 index 00000000..87adea48 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplF/WilsonKernelsInstantiationGparityWilsonImplF.cc @@ -0,0 +1 @@ +../WilsonKernelsInstantiationGparity.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplF/WilsonTMFermionInstantiationGparityWilsonImplF.cc b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplF/WilsonTMFermionInstantiationGparityWilsonImplF.cc deleted file mode 100644 index adfa310c..00000000 --- a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplF/WilsonTMFermionInstantiationGparityWilsonImplF.cc +++ /dev/null @@ -1,37 +0,0 @@ -/************************************************************************************* - - Grid physics library, www.github.com/paboyle/Grid - - Source file: ./lib/qcd/action/fermion/WilsonTMFermion.cc - - Copyright (C) 2015 - -Author: paboyle - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - - See the full license in the file "LICENSE" in the top level distribution directory -*************************************************************************************/ -/* END LEGAL */ -#include -#include -#include - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class WilsonTMFermion; - -NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplF/WilsonTMFermionInstantiationGparityWilsonImplF.cc b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplF/WilsonTMFermionInstantiationGparityWilsonImplF.cc new file mode 120000 index 00000000..d5789bcf --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplF/WilsonTMFermionInstantiationGparityWilsonImplF.cc @@ -0,0 +1 @@ +../WilsonTMFermionInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplFH/CayleyFermion5DInstantiationGparityWilsonImplFH.cc b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplFH/CayleyFermion5DInstantiationGparityWilsonImplFH.cc deleted file mode 100644 index 5130db9c..00000000 --- a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplFH/CayleyFermion5DInstantiationGparityWilsonImplFH.cc +++ /dev/null @@ -1,44 +0,0 @@ -/************************************************************************************* - -Grid physics library, www.github.com/paboyle/Grid - -Source file: ./lib/qcd/action/fermion/WilsonKernels.cc - -Copyright (C) 2015 - -Author: Peter Boyle -Author: Peter Boyle -Author: paboyle - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -See the full license in the file "LICENSE" in the top level distribution -directory -*************************************************************************************/ -/* END LEGAL */ -#include -#include -#include - - //#include - //#include - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class CayleyFermion5D; - -NAMESPACE_END(Grid); - diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplFH/CayleyFermion5DInstantiationGparityWilsonImplFH.cc b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplFH/CayleyFermion5DInstantiationGparityWilsonImplFH.cc new file mode 120000 index 00000000..cb1db625 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplFH/CayleyFermion5DInstantiationGparityWilsonImplFH.cc @@ -0,0 +1 @@ +../CayleyFermion5DInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplFH/ContinuedFractionFermion5DInstantiationGparityWilsonImplFH.cc b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplFH/ContinuedFractionFermion5DInstantiationGparityWilsonImplFH.cc deleted file mode 100644 index ca0d6cea..00000000 --- a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplFH/ContinuedFractionFermion5DInstantiationGparityWilsonImplFH.cc +++ /dev/null @@ -1,38 +0,0 @@ -/************************************************************************************* - - Grid physics library, www.github.com/paboyle/Grid - - Source file: ./lib/qcd/action/fermion/ContinuedFractionFermion5D.cc - - Copyright (C) 2015 - -Author: Peter Boyle -Author: Peter Boyle - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - - See the full license in the file "LICENSE" in the top level distribution directory -*************************************************************************************/ -/* END LEGAL */ -#include -#include -#include - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class ContinuedFractionFermion5D; - -NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplFH/ContinuedFractionFermion5DInstantiationGparityWilsonImplFH.cc b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplFH/ContinuedFractionFermion5DInstantiationGparityWilsonImplFH.cc new file mode 120000 index 00000000..c2d4b8fc --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplFH/ContinuedFractionFermion5DInstantiationGparityWilsonImplFH.cc @@ -0,0 +1 @@ +../ContinuedFractionFermion5DInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplFH/DomainWallEOFAFermionInstantiationGparityWilsonImplFH.cc b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplFH/DomainWallEOFAFermionInstantiationGparityWilsonImplFH.cc deleted file mode 100644 index f7198131..00000000 --- a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplFH/DomainWallEOFAFermionInstantiationGparityWilsonImplFH.cc +++ /dev/null @@ -1,44 +0,0 @@ -/************************************************************************************* - -Grid physics library, www.github.com/paboyle/Grid - -Source file: ./lib/qcd/action/fermion/DomainWallEOFAFermion.cc - -Copyright (C) 2017 - -Author: Peter Boyle -Author: Peter Boyle -Author: Peter Boyle -Author: paboyle -Author: David Murphy - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -See the full license in the file "LICENSE" in the top level distribution directory -*************************************************************************************/ - /* END LEGAL */ - -#include -#include -#include -#include -#include - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class DomainWallEOFAFermion; - -NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplFH/DomainWallEOFAFermionInstantiationGparityWilsonImplFH.cc b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplFH/DomainWallEOFAFermionInstantiationGparityWilsonImplFH.cc new file mode 120000 index 00000000..2f550a2b --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplFH/DomainWallEOFAFermionInstantiationGparityWilsonImplFH.cc @@ -0,0 +1 @@ +../DomainWallEOFAFermionInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplFH/MobiusEOFAFermionInstantiationGparityWilsonImplFH.cc b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplFH/MobiusEOFAFermionInstantiationGparityWilsonImplFH.cc deleted file mode 100644 index ce7eaac9..00000000 --- a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplFH/MobiusEOFAFermionInstantiationGparityWilsonImplFH.cc +++ /dev/null @@ -1,44 +0,0 @@ -/************************************************************************************* - -Grid physics library, www.github.com/paboyle/Grid - -Source file: ./lib/qcd/action/fermion/MobiusEOFAFermion.cc - -Copyright (C) 2017 - -Author: Peter Boyle -Author: Peter Boyle -Author: Peter Boyle -Author: paboyle -Author: David Murphy - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -See the full license in the file "LICENSE" in the top level distribution directory -*************************************************************************************/ - /* END LEGAL */ - -#include -#include -#include -#include -#include - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class MobiusEOFAFermion; - -NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplFH/MobiusEOFAFermionInstantiationGparityWilsonImplFH.cc b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplFH/MobiusEOFAFermionInstantiationGparityWilsonImplFH.cc new file mode 120000 index 00000000..7a8f1172 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplFH/MobiusEOFAFermionInstantiationGparityWilsonImplFH.cc @@ -0,0 +1 @@ +../MobiusEOFAFermionInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplFH/PartialFractionFermion5DInstantiationGparityWilsonImplFH.cc b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplFH/PartialFractionFermion5DInstantiationGparityWilsonImplFH.cc deleted file mode 100644 index 757719b6..00000000 --- a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplFH/PartialFractionFermion5DInstantiationGparityWilsonImplFH.cc +++ /dev/null @@ -1,39 +0,0 @@ -/************************************************************************************* - - Grid physics library, www.github.com/paboyle/Grid - - Source file: ./lib/qcd/action/fermion/PartialFractionFermion5D.cc - - Copyright (C) 2015 - -Author: Peter Boyle -Author: Peter Boyle - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - - See the full license in the file "LICENSE" in the top level distribution directory -*************************************************************************************/ -/* END LEGAL */ -#include -#include -#include - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class PartialFractionFermion5D; - -NAMESPACE_END(Grid); - diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplFH/PartialFractionFermion5DInstantiationGparityWilsonImplFH.cc b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplFH/PartialFractionFermion5DInstantiationGparityWilsonImplFH.cc new file mode 120000 index 00000000..7f4cea71 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplFH/PartialFractionFermion5DInstantiationGparityWilsonImplFH.cc @@ -0,0 +1 @@ +../PartialFractionFermion5DInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplFH/WilsonCloverFermionInstantiationGparityWilsonImplFH.cc b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplFH/WilsonCloverFermionInstantiationGparityWilsonImplFH.cc deleted file mode 100644 index af99dfb6..00000000 --- a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplFH/WilsonCloverFermionInstantiationGparityWilsonImplFH.cc +++ /dev/null @@ -1,40 +0,0 @@ -/************************************************************************************* - - Grid physics library, www.github.com/paboyle/Grid - - Source file: ./lib/qcd/action/fermion/WilsonCloverFermion.cc - - Copyright (C) 2017 - - Author: paboyle - Author: Guido Cossu - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - - See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ -/* END LEGAL */ - -#include -#include -#include -#include - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class WilsonCloverFermion; - -NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplFH/WilsonCloverFermionInstantiationGparityWilsonImplFH.cc b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplFH/WilsonCloverFermionInstantiationGparityWilsonImplFH.cc new file mode 120000 index 00000000..9cc05107 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplFH/WilsonCloverFermionInstantiationGparityWilsonImplFH.cc @@ -0,0 +1 @@ +../WilsonCloverFermionInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplFH/WilsonFermion5DInstantiationGparityWilsonImplFH.cc b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplFH/WilsonFermion5DInstantiationGparityWilsonImplFH.cc deleted file mode 100644 index 0dac989c..00000000 --- a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplFH/WilsonFermion5DInstantiationGparityWilsonImplFH.cc +++ /dev/null @@ -1,40 +0,0 @@ -/************************************************************************************* - -Grid physics library, www.github.com/paboyle/Grid - -Source file: ./lib/qcd/action/fermion/WilsonKernels.cc - -Copyright (C) 2015 - -Author: Peter Boyle -Author: Peter Boyle -Author: paboyle - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -See the full license in the file "LICENSE" in the top level distribution -directory -*************************************************************************************/ -/* END LEGAL */ -#include -#include - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class WilsonFermion5D; - -NAMESPACE_END(Grid); - diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplFH/WilsonFermion5DInstantiationGparityWilsonImplFH.cc b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplFH/WilsonFermion5DInstantiationGparityWilsonImplFH.cc new file mode 120000 index 00000000..804d0884 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplFH/WilsonFermion5DInstantiationGparityWilsonImplFH.cc @@ -0,0 +1 @@ +../WilsonFermion5DInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplFH/WilsonFermionInstantiationGparityWilsonImplFH.cc b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplFH/WilsonFermionInstantiationGparityWilsonImplFH.cc deleted file mode 100644 index 6fd9c5ca..00000000 --- a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplFH/WilsonFermionInstantiationGparityWilsonImplFH.cc +++ /dev/null @@ -1,40 +0,0 @@ -/************************************************************************************* - -Grid physics library, www.github.com/paboyle/Grid - -Source file: ./lib/qcd/action/fermion/WilsonKernels.cc - -Copyright (C) 2015 - -Author: Peter Boyle -Author: Peter Boyle -Author: paboyle - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -See the full license in the file "LICENSE" in the top level distribution -directory -*************************************************************************************/ -/* END LEGAL */ -#include -#include - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class WilsonFermion; - -NAMESPACE_END(Grid); - diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplFH/WilsonFermionInstantiationGparityWilsonImplFH.cc b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplFH/WilsonFermionInstantiationGparityWilsonImplFH.cc new file mode 120000 index 00000000..5f6ab65e --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplFH/WilsonFermionInstantiationGparityWilsonImplFH.cc @@ -0,0 +1 @@ +../WilsonFermionInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplFH/WilsonKernelsInstantiationGparityWilsonImplFH.cc b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplFH/WilsonKernelsInstantiationGparityWilsonImplFH.cc deleted file mode 100644 index 75f143cb..00000000 --- a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplFH/WilsonKernelsInstantiationGparityWilsonImplFH.cc +++ /dev/null @@ -1,74 +0,0 @@ -/************************************************************************************* - -Grid physics library, www.github.com/paboyle/Grid - -Source file: ./lib/qcd/action/fermion/WilsonKernels.cc - -Copyright (C) 2015 - -Author: Peter Boyle -Author: Peter Boyle -Author: paboyle - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -See the full license in the file "LICENSE" in the top level distribution -directory -*************************************************************************************/ -/* END LEGAL */ -#include -#include -#include -#include -#include - -NAMESPACE_BEGIN(Grid); - -// Move these - -#include "impl.h" - -// G-parity requires more specialised implementation. -template <> -void WilsonKernels::ContractConservedCurrentSiteFwd(const SitePropagator &q_in_1, - const SitePropagator &q_in_2, - SitePropagator &q_out, - DoubledGaugeFieldView &U, - unsigned int sU, - unsigned int mu, - bool switch_sign) -{ - assert(0); -} -template <> -void WilsonKernels::ContractConservedCurrentSiteBwd( const SitePropagator &q_in_1, - const SitePropagator &q_in_2, - SitePropagator &q_out, - DoubledGaugeFieldView &U, - unsigned int mu, - unsigned int sU, - bool switch_sign) -{ - assert(0); -} - -HAND_SPECIALISE_GPARITY(IMPLEMENTATION); - - -template class WilsonKernels; - - -NAMESPACE_END(Grid); - diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplFH/WilsonKernelsInstantiationGparityWilsonImplFH.cc b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplFH/WilsonKernelsInstantiationGparityWilsonImplFH.cc new file mode 120000 index 00000000..87adea48 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplFH/WilsonKernelsInstantiationGparityWilsonImplFH.cc @@ -0,0 +1 @@ +../WilsonKernelsInstantiationGparity.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplFH/WilsonTMFermionInstantiationGparityWilsonImplFH.cc b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplFH/WilsonTMFermionInstantiationGparityWilsonImplFH.cc deleted file mode 100644 index adfa310c..00000000 --- a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplFH/WilsonTMFermionInstantiationGparityWilsonImplFH.cc +++ /dev/null @@ -1,37 +0,0 @@ -/************************************************************************************* - - Grid physics library, www.github.com/paboyle/Grid - - Source file: ./lib/qcd/action/fermion/WilsonTMFermion.cc - - Copyright (C) 2015 - -Author: paboyle - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - - See the full license in the file "LICENSE" in the top level distribution directory -*************************************************************************************/ -/* END LEGAL */ -#include -#include -#include - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class WilsonTMFermion; - -NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplFH/WilsonTMFermionInstantiationGparityWilsonImplFH.cc b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplFH/WilsonTMFermionInstantiationGparityWilsonImplFH.cc new file mode 120000 index 00000000..d5789bcf --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplFH/WilsonTMFermionInstantiationGparityWilsonImplFH.cc @@ -0,0 +1 @@ +../WilsonTMFermionInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/NaiveStaggeredFermionInstantiation.cc b/Grid/qcd/action/fermion/instantiation/NaiveStaggeredFermionInstantiation.cc new file mode 100644 index 00000000..c424cb2d --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/NaiveStaggeredFermionInstantiation.cc @@ -0,0 +1,36 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/ImprovedStaggeredFermion.cc + +Copyright (C) 2015 + +Author: Azusa Yamaguchi, Peter Boyle + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ + /* END LEGAL */ +#include + +NAMESPACE_BEGIN(Grid); + +const std::vector NaiveStaggeredFermionStatic::directions({0, 1, 2, 3, 0, 1, 2, 3}); +const std::vector NaiveStaggeredFermionStatic::displacements({1, 1, 1, 1, -1, -1, -1, -1}); + +NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/NaiveStaggeredFermionInstantiation.cc.master b/Grid/qcd/action/fermion/instantiation/NaiveStaggeredFermionInstantiation.cc.master new file mode 100644 index 00000000..75b75678 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/NaiveStaggeredFermionInstantiation.cc.master @@ -0,0 +1,37 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/NaiveStaggeredFermion.cc + +Copyright (C) 2015 + +Author: Azusa Yamaguchi, Peter Boyle + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ + /* END LEGAL */ +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class NaiveStaggeredFermion; + +NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/StaggeredImplD/ImprovedStaggeredFermion5DInstantiationStaggeredImplD.cc b/Grid/qcd/action/fermion/instantiation/StaggeredImplD/ImprovedStaggeredFermion5DInstantiationStaggeredImplD.cc deleted file mode 100644 index 572b375c..00000000 --- a/Grid/qcd/action/fermion/instantiation/StaggeredImplD/ImprovedStaggeredFermion5DInstantiationStaggeredImplD.cc +++ /dev/null @@ -1,44 +0,0 @@ -/************************************************************************************* - - Grid physics library, www.github.com/paboyle/Grid - - Source file: ./lib/qcd/action/fermion/ImprovedStaggeredFermion5D.cc - - Copyright (C) 2015 - -Author: Azusa Yamaguchi -Author: Peter Boyle - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - - See the full license in the file "LICENSE" in the top level distribution directory -*************************************************************************************/ -/* END LEGAL */ -#include - -#include -#include -#include - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class ImprovedStaggeredFermion5D; - -NAMESPACE_END(Grid); - - - - diff --git a/Grid/qcd/action/fermion/instantiation/StaggeredImplD/ImprovedStaggeredFermion5DInstantiationStaggeredImplD.cc b/Grid/qcd/action/fermion/instantiation/StaggeredImplD/ImprovedStaggeredFermion5DInstantiationStaggeredImplD.cc new file mode 120000 index 00000000..a8082840 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/StaggeredImplD/ImprovedStaggeredFermion5DInstantiationStaggeredImplD.cc @@ -0,0 +1 @@ +../ImprovedStaggeredFermion5DInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/StaggeredImplD/ImprovedStaggeredFermionInstantiationStaggeredImplD.cc b/Grid/qcd/action/fermion/instantiation/StaggeredImplD/ImprovedStaggeredFermionInstantiationStaggeredImplD.cc deleted file mode 100644 index d35b7349..00000000 --- a/Grid/qcd/action/fermion/instantiation/StaggeredImplD/ImprovedStaggeredFermionInstantiationStaggeredImplD.cc +++ /dev/null @@ -1,37 +0,0 @@ -/************************************************************************************* - -Grid physics library, www.github.com/paboyle/Grid - -Source file: ./lib/qcd/action/fermion/ImprovedStaggeredFermion.cc - -Copyright (C) 2015 - -Author: Azusa Yamaguchi, Peter Boyle - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -See the full license in the file "LICENSE" in the top level distribution -directory -*************************************************************************************/ - /* END LEGAL */ -#include -#include - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class ImprovedStaggeredFermion; - -NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/StaggeredImplD/ImprovedStaggeredFermionInstantiationStaggeredImplD.cc b/Grid/qcd/action/fermion/instantiation/StaggeredImplD/ImprovedStaggeredFermionInstantiationStaggeredImplD.cc new file mode 120000 index 00000000..e239b39a --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/StaggeredImplD/ImprovedStaggeredFermionInstantiationStaggeredImplD.cc @@ -0,0 +1 @@ +../ImprovedStaggeredFermionInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/StaggeredImplD/NaiveStaggeredFermionInstantiationStaggeredImplD.cc b/Grid/qcd/action/fermion/instantiation/StaggeredImplD/NaiveStaggeredFermionInstantiationStaggeredImplD.cc new file mode 120000 index 00000000..42057f56 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/StaggeredImplD/NaiveStaggeredFermionInstantiationStaggeredImplD.cc @@ -0,0 +1 @@ +../NaiveStaggeredFermionInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/StaggeredImplD/StaggeredKernelsInstantiationStaggeredImplD.cc b/Grid/qcd/action/fermion/instantiation/StaggeredImplD/StaggeredKernelsInstantiationStaggeredImplD.cc deleted file mode 100644 index c3acf963..00000000 --- a/Grid/qcd/action/fermion/instantiation/StaggeredImplD/StaggeredKernelsInstantiationStaggeredImplD.cc +++ /dev/null @@ -1,39 +0,0 @@ -/************************************************************************************* - -Grid physics library, www.github.com/paboyle/Grid - -Source file: ./lib/qcd/action/fermion/WilsonKernels.cc - -Copyright (C) 2015 - -Author: Azusa Yamaguchi, Peter Boyle - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -See the full license in the file "LICENSE" in the top level distribution -directory -*************************************************************************************/ -/* END LEGAL */ -#include -#include -#include - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class StaggeredKernels; - -NAMESPACE_END(Grid); - diff --git a/Grid/qcd/action/fermion/instantiation/StaggeredImplD/StaggeredKernelsInstantiationStaggeredImplD.cc b/Grid/qcd/action/fermion/instantiation/StaggeredImplD/StaggeredKernelsInstantiationStaggeredImplD.cc new file mode 120000 index 00000000..a4d1cbc9 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/StaggeredImplD/StaggeredKernelsInstantiationStaggeredImplD.cc @@ -0,0 +1 @@ +../StaggeredKernelsInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/StaggeredImplF/ImprovedStaggeredFermion5DInstantiationStaggeredImplF.cc b/Grid/qcd/action/fermion/instantiation/StaggeredImplF/ImprovedStaggeredFermion5DInstantiationStaggeredImplF.cc deleted file mode 100644 index 572b375c..00000000 --- a/Grid/qcd/action/fermion/instantiation/StaggeredImplF/ImprovedStaggeredFermion5DInstantiationStaggeredImplF.cc +++ /dev/null @@ -1,44 +0,0 @@ -/************************************************************************************* - - Grid physics library, www.github.com/paboyle/Grid - - Source file: ./lib/qcd/action/fermion/ImprovedStaggeredFermion5D.cc - - Copyright (C) 2015 - -Author: Azusa Yamaguchi -Author: Peter Boyle - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - - See the full license in the file "LICENSE" in the top level distribution directory -*************************************************************************************/ -/* END LEGAL */ -#include - -#include -#include -#include - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class ImprovedStaggeredFermion5D; - -NAMESPACE_END(Grid); - - - - diff --git a/Grid/qcd/action/fermion/instantiation/StaggeredImplF/ImprovedStaggeredFermion5DInstantiationStaggeredImplF.cc b/Grid/qcd/action/fermion/instantiation/StaggeredImplF/ImprovedStaggeredFermion5DInstantiationStaggeredImplF.cc new file mode 120000 index 00000000..a8082840 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/StaggeredImplF/ImprovedStaggeredFermion5DInstantiationStaggeredImplF.cc @@ -0,0 +1 @@ +../ImprovedStaggeredFermion5DInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/StaggeredImplF/ImprovedStaggeredFermionInstantiationStaggeredImplF.cc b/Grid/qcd/action/fermion/instantiation/StaggeredImplF/ImprovedStaggeredFermionInstantiationStaggeredImplF.cc deleted file mode 100644 index d35b7349..00000000 --- a/Grid/qcd/action/fermion/instantiation/StaggeredImplF/ImprovedStaggeredFermionInstantiationStaggeredImplF.cc +++ /dev/null @@ -1,37 +0,0 @@ -/************************************************************************************* - -Grid physics library, www.github.com/paboyle/Grid - -Source file: ./lib/qcd/action/fermion/ImprovedStaggeredFermion.cc - -Copyright (C) 2015 - -Author: Azusa Yamaguchi, Peter Boyle - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -See the full license in the file "LICENSE" in the top level distribution -directory -*************************************************************************************/ - /* END LEGAL */ -#include -#include - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class ImprovedStaggeredFermion; - -NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/StaggeredImplF/ImprovedStaggeredFermionInstantiationStaggeredImplF.cc b/Grid/qcd/action/fermion/instantiation/StaggeredImplF/ImprovedStaggeredFermionInstantiationStaggeredImplF.cc new file mode 120000 index 00000000..e239b39a --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/StaggeredImplF/ImprovedStaggeredFermionInstantiationStaggeredImplF.cc @@ -0,0 +1 @@ +../ImprovedStaggeredFermionInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/StaggeredImplF/NaiveStaggeredFermionInstantiationStaggeredImplF.cc b/Grid/qcd/action/fermion/instantiation/StaggeredImplF/NaiveStaggeredFermionInstantiationStaggeredImplF.cc new file mode 120000 index 00000000..42057f56 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/StaggeredImplF/NaiveStaggeredFermionInstantiationStaggeredImplF.cc @@ -0,0 +1 @@ +../NaiveStaggeredFermionInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/StaggeredImplF/StaggeredKernelsInstantiationStaggeredImplF.cc b/Grid/qcd/action/fermion/instantiation/StaggeredImplF/StaggeredKernelsInstantiationStaggeredImplF.cc deleted file mode 100644 index c3acf963..00000000 --- a/Grid/qcd/action/fermion/instantiation/StaggeredImplF/StaggeredKernelsInstantiationStaggeredImplF.cc +++ /dev/null @@ -1,39 +0,0 @@ -/************************************************************************************* - -Grid physics library, www.github.com/paboyle/Grid - -Source file: ./lib/qcd/action/fermion/WilsonKernels.cc - -Copyright (C) 2015 - -Author: Azusa Yamaguchi, Peter Boyle - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -See the full license in the file "LICENSE" in the top level distribution -directory -*************************************************************************************/ -/* END LEGAL */ -#include -#include -#include - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class StaggeredKernels; - -NAMESPACE_END(Grid); - diff --git a/Grid/qcd/action/fermion/instantiation/StaggeredImplF/StaggeredKernelsInstantiationStaggeredImplF.cc b/Grid/qcd/action/fermion/instantiation/StaggeredImplF/StaggeredKernelsInstantiationStaggeredImplF.cc new file mode 120000 index 00000000..a4d1cbc9 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/StaggeredImplF/StaggeredKernelsInstantiationStaggeredImplF.cc @@ -0,0 +1 @@ +../StaggeredKernelsInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/WilsonAdjImplD/WilsonCloverFermionInstantiationWilsonAdjImplD.cc b/Grid/qcd/action/fermion/instantiation/WilsonAdjImplD/WilsonCloverFermionInstantiationWilsonAdjImplD.cc deleted file mode 100644 index af99dfb6..00000000 --- a/Grid/qcd/action/fermion/instantiation/WilsonAdjImplD/WilsonCloverFermionInstantiationWilsonAdjImplD.cc +++ /dev/null @@ -1,40 +0,0 @@ -/************************************************************************************* - - Grid physics library, www.github.com/paboyle/Grid - - Source file: ./lib/qcd/action/fermion/WilsonCloverFermion.cc - - Copyright (C) 2017 - - Author: paboyle - Author: Guido Cossu - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - - See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ -/* END LEGAL */ - -#include -#include -#include -#include - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class WilsonCloverFermion; - -NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/WilsonAdjImplD/WilsonCloverFermionInstantiationWilsonAdjImplD.cc b/Grid/qcd/action/fermion/instantiation/WilsonAdjImplD/WilsonCloverFermionInstantiationWilsonAdjImplD.cc new file mode 120000 index 00000000..9cc05107 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonAdjImplD/WilsonCloverFermionInstantiationWilsonAdjImplD.cc @@ -0,0 +1 @@ +../WilsonCloverFermionInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/WilsonAdjImplD/WilsonFermionInstantiationWilsonAdjImplD.cc b/Grid/qcd/action/fermion/instantiation/WilsonAdjImplD/WilsonFermionInstantiationWilsonAdjImplD.cc deleted file mode 100644 index 6fd9c5ca..00000000 --- a/Grid/qcd/action/fermion/instantiation/WilsonAdjImplD/WilsonFermionInstantiationWilsonAdjImplD.cc +++ /dev/null @@ -1,40 +0,0 @@ -/************************************************************************************* - -Grid physics library, www.github.com/paboyle/Grid - -Source file: ./lib/qcd/action/fermion/WilsonKernels.cc - -Copyright (C) 2015 - -Author: Peter Boyle -Author: Peter Boyle -Author: paboyle - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -See the full license in the file "LICENSE" in the top level distribution -directory -*************************************************************************************/ -/* END LEGAL */ -#include -#include - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class WilsonFermion; - -NAMESPACE_END(Grid); - diff --git a/Grid/qcd/action/fermion/instantiation/WilsonAdjImplD/WilsonFermionInstantiationWilsonAdjImplD.cc b/Grid/qcd/action/fermion/instantiation/WilsonAdjImplD/WilsonFermionInstantiationWilsonAdjImplD.cc new file mode 120000 index 00000000..5f6ab65e --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonAdjImplD/WilsonFermionInstantiationWilsonAdjImplD.cc @@ -0,0 +1 @@ +../WilsonFermionInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/WilsonAdjImplD/WilsonTMFermionInstantiationWilsonAdjImplD.cc b/Grid/qcd/action/fermion/instantiation/WilsonAdjImplD/WilsonTMFermionInstantiationWilsonAdjImplD.cc deleted file mode 100644 index adfa310c..00000000 --- a/Grid/qcd/action/fermion/instantiation/WilsonAdjImplD/WilsonTMFermionInstantiationWilsonAdjImplD.cc +++ /dev/null @@ -1,37 +0,0 @@ -/************************************************************************************* - - Grid physics library, www.github.com/paboyle/Grid - - Source file: ./lib/qcd/action/fermion/WilsonTMFermion.cc - - Copyright (C) 2015 - -Author: paboyle - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - - See the full license in the file "LICENSE" in the top level distribution directory -*************************************************************************************/ -/* END LEGAL */ -#include -#include -#include - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class WilsonTMFermion; - -NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/WilsonAdjImplD/WilsonTMFermionInstantiationWilsonAdjImplD.cc b/Grid/qcd/action/fermion/instantiation/WilsonAdjImplD/WilsonTMFermionInstantiationWilsonAdjImplD.cc new file mode 120000 index 00000000..d5789bcf --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonAdjImplD/WilsonTMFermionInstantiationWilsonAdjImplD.cc @@ -0,0 +1 @@ +../WilsonTMFermionInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/WilsonAdjImplF/WilsonCloverFermionInstantiationWilsonAdjImplF.cc b/Grid/qcd/action/fermion/instantiation/WilsonAdjImplF/WilsonCloverFermionInstantiationWilsonAdjImplF.cc deleted file mode 100644 index af99dfb6..00000000 --- a/Grid/qcd/action/fermion/instantiation/WilsonAdjImplF/WilsonCloverFermionInstantiationWilsonAdjImplF.cc +++ /dev/null @@ -1,40 +0,0 @@ -/************************************************************************************* - - Grid physics library, www.github.com/paboyle/Grid - - Source file: ./lib/qcd/action/fermion/WilsonCloverFermion.cc - - Copyright (C) 2017 - - Author: paboyle - Author: Guido Cossu - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - - See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ -/* END LEGAL */ - -#include -#include -#include -#include - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class WilsonCloverFermion; - -NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/WilsonAdjImplF/WilsonCloverFermionInstantiationWilsonAdjImplF.cc b/Grid/qcd/action/fermion/instantiation/WilsonAdjImplF/WilsonCloverFermionInstantiationWilsonAdjImplF.cc new file mode 120000 index 00000000..9cc05107 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonAdjImplF/WilsonCloverFermionInstantiationWilsonAdjImplF.cc @@ -0,0 +1 @@ +../WilsonCloverFermionInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/WilsonAdjImplF/WilsonFermionInstantiationWilsonAdjImplF.cc b/Grid/qcd/action/fermion/instantiation/WilsonAdjImplF/WilsonFermionInstantiationWilsonAdjImplF.cc deleted file mode 100644 index 6fd9c5ca..00000000 --- a/Grid/qcd/action/fermion/instantiation/WilsonAdjImplF/WilsonFermionInstantiationWilsonAdjImplF.cc +++ /dev/null @@ -1,40 +0,0 @@ -/************************************************************************************* - -Grid physics library, www.github.com/paboyle/Grid - -Source file: ./lib/qcd/action/fermion/WilsonKernels.cc - -Copyright (C) 2015 - -Author: Peter Boyle -Author: Peter Boyle -Author: paboyle - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -See the full license in the file "LICENSE" in the top level distribution -directory -*************************************************************************************/ -/* END LEGAL */ -#include -#include - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class WilsonFermion; - -NAMESPACE_END(Grid); - diff --git a/Grid/qcd/action/fermion/instantiation/WilsonAdjImplF/WilsonFermionInstantiationWilsonAdjImplF.cc b/Grid/qcd/action/fermion/instantiation/WilsonAdjImplF/WilsonFermionInstantiationWilsonAdjImplF.cc new file mode 120000 index 00000000..5f6ab65e --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonAdjImplF/WilsonFermionInstantiationWilsonAdjImplF.cc @@ -0,0 +1 @@ +../WilsonFermionInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/WilsonAdjImplF/WilsonTMFermionInstantiationWilsonAdjImplF.cc b/Grid/qcd/action/fermion/instantiation/WilsonAdjImplF/WilsonTMFermionInstantiationWilsonAdjImplF.cc deleted file mode 100644 index adfa310c..00000000 --- a/Grid/qcd/action/fermion/instantiation/WilsonAdjImplF/WilsonTMFermionInstantiationWilsonAdjImplF.cc +++ /dev/null @@ -1,37 +0,0 @@ -/************************************************************************************* - - Grid physics library, www.github.com/paboyle/Grid - - Source file: ./lib/qcd/action/fermion/WilsonTMFermion.cc - - Copyright (C) 2015 - -Author: paboyle - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - - See the full license in the file "LICENSE" in the top level distribution directory -*************************************************************************************/ -/* END LEGAL */ -#include -#include -#include - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class WilsonTMFermion; - -NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/WilsonAdjImplF/WilsonTMFermionInstantiationWilsonAdjImplF.cc b/Grid/qcd/action/fermion/instantiation/WilsonAdjImplF/WilsonTMFermionInstantiationWilsonAdjImplF.cc new file mode 120000 index 00000000..d5789bcf --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonAdjImplF/WilsonTMFermionInstantiationWilsonAdjImplF.cc @@ -0,0 +1 @@ +../WilsonTMFermionInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplD/CayleyFermion5DInstantiationWilsonImplD.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplD/CayleyFermion5DInstantiationWilsonImplD.cc deleted file mode 100644 index 5130db9c..00000000 --- a/Grid/qcd/action/fermion/instantiation/WilsonImplD/CayleyFermion5DInstantiationWilsonImplD.cc +++ /dev/null @@ -1,44 +0,0 @@ -/************************************************************************************* - -Grid physics library, www.github.com/paboyle/Grid - -Source file: ./lib/qcd/action/fermion/WilsonKernels.cc - -Copyright (C) 2015 - -Author: Peter Boyle -Author: Peter Boyle -Author: paboyle - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -See the full license in the file "LICENSE" in the top level distribution -directory -*************************************************************************************/ -/* END LEGAL */ -#include -#include -#include - - //#include - //#include - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class CayleyFermion5D; - -NAMESPACE_END(Grid); - diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplD/CayleyFermion5DInstantiationWilsonImplD.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplD/CayleyFermion5DInstantiationWilsonImplD.cc new file mode 120000 index 00000000..cb1db625 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonImplD/CayleyFermion5DInstantiationWilsonImplD.cc @@ -0,0 +1 @@ +../CayleyFermion5DInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplD/ContinuedFractionFermion5DInstantiationWilsonImplD.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplD/ContinuedFractionFermion5DInstantiationWilsonImplD.cc deleted file mode 100644 index ca0d6cea..00000000 --- a/Grid/qcd/action/fermion/instantiation/WilsonImplD/ContinuedFractionFermion5DInstantiationWilsonImplD.cc +++ /dev/null @@ -1,38 +0,0 @@ -/************************************************************************************* - - Grid physics library, www.github.com/paboyle/Grid - - Source file: ./lib/qcd/action/fermion/ContinuedFractionFermion5D.cc - - Copyright (C) 2015 - -Author: Peter Boyle -Author: Peter Boyle - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - - See the full license in the file "LICENSE" in the top level distribution directory -*************************************************************************************/ -/* END LEGAL */ -#include -#include -#include - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class ContinuedFractionFermion5D; - -NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplD/ContinuedFractionFermion5DInstantiationWilsonImplD.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplD/ContinuedFractionFermion5DInstantiationWilsonImplD.cc new file mode 120000 index 00000000..c2d4b8fc --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonImplD/ContinuedFractionFermion5DInstantiationWilsonImplD.cc @@ -0,0 +1 @@ +../ContinuedFractionFermion5DInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplD/DomainWallEOFAFermionInstantiationWilsonImplD.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplD/DomainWallEOFAFermionInstantiationWilsonImplD.cc deleted file mode 100644 index f7198131..00000000 --- a/Grid/qcd/action/fermion/instantiation/WilsonImplD/DomainWallEOFAFermionInstantiationWilsonImplD.cc +++ /dev/null @@ -1,44 +0,0 @@ -/************************************************************************************* - -Grid physics library, www.github.com/paboyle/Grid - -Source file: ./lib/qcd/action/fermion/DomainWallEOFAFermion.cc - -Copyright (C) 2017 - -Author: Peter Boyle -Author: Peter Boyle -Author: Peter Boyle -Author: paboyle -Author: David Murphy - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -See the full license in the file "LICENSE" in the top level distribution directory -*************************************************************************************/ - /* END LEGAL */ - -#include -#include -#include -#include -#include - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class DomainWallEOFAFermion; - -NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplD/DomainWallEOFAFermionInstantiationWilsonImplD.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplD/DomainWallEOFAFermionInstantiationWilsonImplD.cc new file mode 120000 index 00000000..2f550a2b --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonImplD/DomainWallEOFAFermionInstantiationWilsonImplD.cc @@ -0,0 +1 @@ +../DomainWallEOFAFermionInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplD/MobiusEOFAFermionInstantiationWilsonImplD.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplD/MobiusEOFAFermionInstantiationWilsonImplD.cc deleted file mode 100644 index ce7eaac9..00000000 --- a/Grid/qcd/action/fermion/instantiation/WilsonImplD/MobiusEOFAFermionInstantiationWilsonImplD.cc +++ /dev/null @@ -1,44 +0,0 @@ -/************************************************************************************* - -Grid physics library, www.github.com/paboyle/Grid - -Source file: ./lib/qcd/action/fermion/MobiusEOFAFermion.cc - -Copyright (C) 2017 - -Author: Peter Boyle -Author: Peter Boyle -Author: Peter Boyle -Author: paboyle -Author: David Murphy - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -See the full license in the file "LICENSE" in the top level distribution directory -*************************************************************************************/ - /* END LEGAL */ - -#include -#include -#include -#include -#include - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class MobiusEOFAFermion; - -NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplD/MobiusEOFAFermionInstantiationWilsonImplD.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplD/MobiusEOFAFermionInstantiationWilsonImplD.cc new file mode 120000 index 00000000..7a8f1172 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonImplD/MobiusEOFAFermionInstantiationWilsonImplD.cc @@ -0,0 +1 @@ +../MobiusEOFAFermionInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplD/PartialFractionFermion5DInstantiationWilsonImplD.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplD/PartialFractionFermion5DInstantiationWilsonImplD.cc deleted file mode 100644 index 757719b6..00000000 --- a/Grid/qcd/action/fermion/instantiation/WilsonImplD/PartialFractionFermion5DInstantiationWilsonImplD.cc +++ /dev/null @@ -1,39 +0,0 @@ -/************************************************************************************* - - Grid physics library, www.github.com/paboyle/Grid - - Source file: ./lib/qcd/action/fermion/PartialFractionFermion5D.cc - - Copyright (C) 2015 - -Author: Peter Boyle -Author: Peter Boyle - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - - See the full license in the file "LICENSE" in the top level distribution directory -*************************************************************************************/ -/* END LEGAL */ -#include -#include -#include - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class PartialFractionFermion5D; - -NAMESPACE_END(Grid); - diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplD/PartialFractionFermion5DInstantiationWilsonImplD.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplD/PartialFractionFermion5DInstantiationWilsonImplD.cc new file mode 120000 index 00000000..7f4cea71 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonImplD/PartialFractionFermion5DInstantiationWilsonImplD.cc @@ -0,0 +1 @@ +../PartialFractionFermion5DInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplD/WilsonCloverFermionInstantiationWilsonImplD.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplD/WilsonCloverFermionInstantiationWilsonImplD.cc deleted file mode 100644 index af99dfb6..00000000 --- a/Grid/qcd/action/fermion/instantiation/WilsonImplD/WilsonCloverFermionInstantiationWilsonImplD.cc +++ /dev/null @@ -1,40 +0,0 @@ -/************************************************************************************* - - Grid physics library, www.github.com/paboyle/Grid - - Source file: ./lib/qcd/action/fermion/WilsonCloverFermion.cc - - Copyright (C) 2017 - - Author: paboyle - Author: Guido Cossu - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - - See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ -/* END LEGAL */ - -#include -#include -#include -#include - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class WilsonCloverFermion; - -NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplD/WilsonCloverFermionInstantiationWilsonImplD.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplD/WilsonCloverFermionInstantiationWilsonImplD.cc new file mode 120000 index 00000000..9cc05107 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonImplD/WilsonCloverFermionInstantiationWilsonImplD.cc @@ -0,0 +1 @@ +../WilsonCloverFermionInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplD/WilsonFermion5DInstantiationWilsonImplD.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplD/WilsonFermion5DInstantiationWilsonImplD.cc deleted file mode 100644 index 0dac989c..00000000 --- a/Grid/qcd/action/fermion/instantiation/WilsonImplD/WilsonFermion5DInstantiationWilsonImplD.cc +++ /dev/null @@ -1,40 +0,0 @@ -/************************************************************************************* - -Grid physics library, www.github.com/paboyle/Grid - -Source file: ./lib/qcd/action/fermion/WilsonKernels.cc - -Copyright (C) 2015 - -Author: Peter Boyle -Author: Peter Boyle -Author: paboyle - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -See the full license in the file "LICENSE" in the top level distribution -directory -*************************************************************************************/ -/* END LEGAL */ -#include -#include - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class WilsonFermion5D; - -NAMESPACE_END(Grid); - diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplD/WilsonFermion5DInstantiationWilsonImplD.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplD/WilsonFermion5DInstantiationWilsonImplD.cc new file mode 120000 index 00000000..804d0884 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonImplD/WilsonFermion5DInstantiationWilsonImplD.cc @@ -0,0 +1 @@ +../WilsonFermion5DInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplD/WilsonFermionInstantiationWilsonImplD.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplD/WilsonFermionInstantiationWilsonImplD.cc deleted file mode 100644 index 6fd9c5ca..00000000 --- a/Grid/qcd/action/fermion/instantiation/WilsonImplD/WilsonFermionInstantiationWilsonImplD.cc +++ /dev/null @@ -1,40 +0,0 @@ -/************************************************************************************* - -Grid physics library, www.github.com/paboyle/Grid - -Source file: ./lib/qcd/action/fermion/WilsonKernels.cc - -Copyright (C) 2015 - -Author: Peter Boyle -Author: Peter Boyle -Author: paboyle - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -See the full license in the file "LICENSE" in the top level distribution -directory -*************************************************************************************/ -/* END LEGAL */ -#include -#include - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class WilsonFermion; - -NAMESPACE_END(Grid); - diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplD/WilsonFermionInstantiationWilsonImplD.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplD/WilsonFermionInstantiationWilsonImplD.cc new file mode 120000 index 00000000..5f6ab65e --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonImplD/WilsonFermionInstantiationWilsonImplD.cc @@ -0,0 +1 @@ +../WilsonFermionInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplD/WilsonTMFermionInstantiationWilsonImplD.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplD/WilsonTMFermionInstantiationWilsonImplD.cc deleted file mode 100644 index adfa310c..00000000 --- a/Grid/qcd/action/fermion/instantiation/WilsonImplD/WilsonTMFermionInstantiationWilsonImplD.cc +++ /dev/null @@ -1,37 +0,0 @@ -/************************************************************************************* - - Grid physics library, www.github.com/paboyle/Grid - - Source file: ./lib/qcd/action/fermion/WilsonTMFermion.cc - - Copyright (C) 2015 - -Author: paboyle - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - - See the full license in the file "LICENSE" in the top level distribution directory -*************************************************************************************/ -/* END LEGAL */ -#include -#include -#include - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class WilsonTMFermion; - -NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplD/WilsonTMFermionInstantiationWilsonImplD.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplD/WilsonTMFermionInstantiationWilsonImplD.cc new file mode 120000 index 00000000..d5789bcf --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonImplD/WilsonTMFermionInstantiationWilsonImplD.cc @@ -0,0 +1 @@ +../WilsonTMFermionInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplDF/CayleyFermion5DInstantiationWilsonImplDF.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplDF/CayleyFermion5DInstantiationWilsonImplDF.cc deleted file mode 100644 index 5130db9c..00000000 --- a/Grid/qcd/action/fermion/instantiation/WilsonImplDF/CayleyFermion5DInstantiationWilsonImplDF.cc +++ /dev/null @@ -1,44 +0,0 @@ -/************************************************************************************* - -Grid physics library, www.github.com/paboyle/Grid - -Source file: ./lib/qcd/action/fermion/WilsonKernels.cc - -Copyright (C) 2015 - -Author: Peter Boyle -Author: Peter Boyle -Author: paboyle - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -See the full license in the file "LICENSE" in the top level distribution -directory -*************************************************************************************/ -/* END LEGAL */ -#include -#include -#include - - //#include - //#include - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class CayleyFermion5D; - -NAMESPACE_END(Grid); - diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplDF/CayleyFermion5DInstantiationWilsonImplDF.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplDF/CayleyFermion5DInstantiationWilsonImplDF.cc new file mode 120000 index 00000000..cb1db625 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonImplDF/CayleyFermion5DInstantiationWilsonImplDF.cc @@ -0,0 +1 @@ +../CayleyFermion5DInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplDF/ContinuedFractionFermion5DInstantiationWilsonImplDF.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplDF/ContinuedFractionFermion5DInstantiationWilsonImplDF.cc deleted file mode 100644 index ca0d6cea..00000000 --- a/Grid/qcd/action/fermion/instantiation/WilsonImplDF/ContinuedFractionFermion5DInstantiationWilsonImplDF.cc +++ /dev/null @@ -1,38 +0,0 @@ -/************************************************************************************* - - Grid physics library, www.github.com/paboyle/Grid - - Source file: ./lib/qcd/action/fermion/ContinuedFractionFermion5D.cc - - Copyright (C) 2015 - -Author: Peter Boyle -Author: Peter Boyle - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - - See the full license in the file "LICENSE" in the top level distribution directory -*************************************************************************************/ -/* END LEGAL */ -#include -#include -#include - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class ContinuedFractionFermion5D; - -NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplDF/ContinuedFractionFermion5DInstantiationWilsonImplDF.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplDF/ContinuedFractionFermion5DInstantiationWilsonImplDF.cc new file mode 120000 index 00000000..c2d4b8fc --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonImplDF/ContinuedFractionFermion5DInstantiationWilsonImplDF.cc @@ -0,0 +1 @@ +../ContinuedFractionFermion5DInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplDF/DomainWallEOFAFermionInstantiationWilsonImplDF.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplDF/DomainWallEOFAFermionInstantiationWilsonImplDF.cc deleted file mode 100644 index f7198131..00000000 --- a/Grid/qcd/action/fermion/instantiation/WilsonImplDF/DomainWallEOFAFermionInstantiationWilsonImplDF.cc +++ /dev/null @@ -1,44 +0,0 @@ -/************************************************************************************* - -Grid physics library, www.github.com/paboyle/Grid - -Source file: ./lib/qcd/action/fermion/DomainWallEOFAFermion.cc - -Copyright (C) 2017 - -Author: Peter Boyle -Author: Peter Boyle -Author: Peter Boyle -Author: paboyle -Author: David Murphy - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -See the full license in the file "LICENSE" in the top level distribution directory -*************************************************************************************/ - /* END LEGAL */ - -#include -#include -#include -#include -#include - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class DomainWallEOFAFermion; - -NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplDF/DomainWallEOFAFermionInstantiationWilsonImplDF.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplDF/DomainWallEOFAFermionInstantiationWilsonImplDF.cc new file mode 120000 index 00000000..2f550a2b --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonImplDF/DomainWallEOFAFermionInstantiationWilsonImplDF.cc @@ -0,0 +1 @@ +../DomainWallEOFAFermionInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplDF/MobiusEOFAFermionInstantiationWilsonImplDF.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplDF/MobiusEOFAFermionInstantiationWilsonImplDF.cc deleted file mode 100644 index ce7eaac9..00000000 --- a/Grid/qcd/action/fermion/instantiation/WilsonImplDF/MobiusEOFAFermionInstantiationWilsonImplDF.cc +++ /dev/null @@ -1,44 +0,0 @@ -/************************************************************************************* - -Grid physics library, www.github.com/paboyle/Grid - -Source file: ./lib/qcd/action/fermion/MobiusEOFAFermion.cc - -Copyright (C) 2017 - -Author: Peter Boyle -Author: Peter Boyle -Author: Peter Boyle -Author: paboyle -Author: David Murphy - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -See the full license in the file "LICENSE" in the top level distribution directory -*************************************************************************************/ - /* END LEGAL */ - -#include -#include -#include -#include -#include - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class MobiusEOFAFermion; - -NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplDF/MobiusEOFAFermionInstantiationWilsonImplDF.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplDF/MobiusEOFAFermionInstantiationWilsonImplDF.cc new file mode 120000 index 00000000..7a8f1172 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonImplDF/MobiusEOFAFermionInstantiationWilsonImplDF.cc @@ -0,0 +1 @@ +../MobiusEOFAFermionInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplDF/PartialFractionFermion5DInstantiationWilsonImplDF.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplDF/PartialFractionFermion5DInstantiationWilsonImplDF.cc deleted file mode 100644 index 757719b6..00000000 --- a/Grid/qcd/action/fermion/instantiation/WilsonImplDF/PartialFractionFermion5DInstantiationWilsonImplDF.cc +++ /dev/null @@ -1,39 +0,0 @@ -/************************************************************************************* - - Grid physics library, www.github.com/paboyle/Grid - - Source file: ./lib/qcd/action/fermion/PartialFractionFermion5D.cc - - Copyright (C) 2015 - -Author: Peter Boyle -Author: Peter Boyle - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - - See the full license in the file "LICENSE" in the top level distribution directory -*************************************************************************************/ -/* END LEGAL */ -#include -#include -#include - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class PartialFractionFermion5D; - -NAMESPACE_END(Grid); - diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplDF/PartialFractionFermion5DInstantiationWilsonImplDF.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplDF/PartialFractionFermion5DInstantiationWilsonImplDF.cc new file mode 120000 index 00000000..7f4cea71 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonImplDF/PartialFractionFermion5DInstantiationWilsonImplDF.cc @@ -0,0 +1 @@ +../PartialFractionFermion5DInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplDF/WilsonCloverFermionInstantiationWilsonImplDF.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplDF/WilsonCloverFermionInstantiationWilsonImplDF.cc deleted file mode 100644 index af99dfb6..00000000 --- a/Grid/qcd/action/fermion/instantiation/WilsonImplDF/WilsonCloverFermionInstantiationWilsonImplDF.cc +++ /dev/null @@ -1,40 +0,0 @@ -/************************************************************************************* - - Grid physics library, www.github.com/paboyle/Grid - - Source file: ./lib/qcd/action/fermion/WilsonCloverFermion.cc - - Copyright (C) 2017 - - Author: paboyle - Author: Guido Cossu - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - - See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ -/* END LEGAL */ - -#include -#include -#include -#include - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class WilsonCloverFermion; - -NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplDF/WilsonCloverFermionInstantiationWilsonImplDF.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplDF/WilsonCloverFermionInstantiationWilsonImplDF.cc new file mode 120000 index 00000000..9cc05107 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonImplDF/WilsonCloverFermionInstantiationWilsonImplDF.cc @@ -0,0 +1 @@ +../WilsonCloverFermionInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplDF/WilsonFermion5DInstantiationWilsonImplDF.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplDF/WilsonFermion5DInstantiationWilsonImplDF.cc deleted file mode 100644 index 0dac989c..00000000 --- a/Grid/qcd/action/fermion/instantiation/WilsonImplDF/WilsonFermion5DInstantiationWilsonImplDF.cc +++ /dev/null @@ -1,40 +0,0 @@ -/************************************************************************************* - -Grid physics library, www.github.com/paboyle/Grid - -Source file: ./lib/qcd/action/fermion/WilsonKernels.cc - -Copyright (C) 2015 - -Author: Peter Boyle -Author: Peter Boyle -Author: paboyle - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -See the full license in the file "LICENSE" in the top level distribution -directory -*************************************************************************************/ -/* END LEGAL */ -#include -#include - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class WilsonFermion5D; - -NAMESPACE_END(Grid); - diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplDF/WilsonFermion5DInstantiationWilsonImplDF.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplDF/WilsonFermion5DInstantiationWilsonImplDF.cc new file mode 120000 index 00000000..804d0884 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonImplDF/WilsonFermion5DInstantiationWilsonImplDF.cc @@ -0,0 +1 @@ +../WilsonFermion5DInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplDF/WilsonFermionInstantiationWilsonImplDF.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplDF/WilsonFermionInstantiationWilsonImplDF.cc deleted file mode 100644 index 6fd9c5ca..00000000 --- a/Grid/qcd/action/fermion/instantiation/WilsonImplDF/WilsonFermionInstantiationWilsonImplDF.cc +++ /dev/null @@ -1,40 +0,0 @@ -/************************************************************************************* - -Grid physics library, www.github.com/paboyle/Grid - -Source file: ./lib/qcd/action/fermion/WilsonKernels.cc - -Copyright (C) 2015 - -Author: Peter Boyle -Author: Peter Boyle -Author: paboyle - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -See the full license in the file "LICENSE" in the top level distribution -directory -*************************************************************************************/ -/* END LEGAL */ -#include -#include - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class WilsonFermion; - -NAMESPACE_END(Grid); - diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplDF/WilsonFermionInstantiationWilsonImplDF.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplDF/WilsonFermionInstantiationWilsonImplDF.cc new file mode 120000 index 00000000..5f6ab65e --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonImplDF/WilsonFermionInstantiationWilsonImplDF.cc @@ -0,0 +1 @@ +../WilsonFermionInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplDF/WilsonTMFermionInstantiationWilsonImplDF.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplDF/WilsonTMFermionInstantiationWilsonImplDF.cc deleted file mode 100644 index adfa310c..00000000 --- a/Grid/qcd/action/fermion/instantiation/WilsonImplDF/WilsonTMFermionInstantiationWilsonImplDF.cc +++ /dev/null @@ -1,37 +0,0 @@ -/************************************************************************************* - - Grid physics library, www.github.com/paboyle/Grid - - Source file: ./lib/qcd/action/fermion/WilsonTMFermion.cc - - Copyright (C) 2015 - -Author: paboyle - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - - See the full license in the file "LICENSE" in the top level distribution directory -*************************************************************************************/ -/* END LEGAL */ -#include -#include -#include - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class WilsonTMFermion; - -NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplDF/WilsonTMFermionInstantiationWilsonImplDF.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplDF/WilsonTMFermionInstantiationWilsonImplDF.cc new file mode 120000 index 00000000..d5789bcf --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonImplDF/WilsonTMFermionInstantiationWilsonImplDF.cc @@ -0,0 +1 @@ +../WilsonTMFermionInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplF/CayleyFermion5DInstantiationWilsonImplF.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplF/CayleyFermion5DInstantiationWilsonImplF.cc deleted file mode 100644 index 5130db9c..00000000 --- a/Grid/qcd/action/fermion/instantiation/WilsonImplF/CayleyFermion5DInstantiationWilsonImplF.cc +++ /dev/null @@ -1,44 +0,0 @@ -/************************************************************************************* - -Grid physics library, www.github.com/paboyle/Grid - -Source file: ./lib/qcd/action/fermion/WilsonKernels.cc - -Copyright (C) 2015 - -Author: Peter Boyle -Author: Peter Boyle -Author: paboyle - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -See the full license in the file "LICENSE" in the top level distribution -directory -*************************************************************************************/ -/* END LEGAL */ -#include -#include -#include - - //#include - //#include - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class CayleyFermion5D; - -NAMESPACE_END(Grid); - diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplF/CayleyFermion5DInstantiationWilsonImplF.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplF/CayleyFermion5DInstantiationWilsonImplF.cc new file mode 120000 index 00000000..cb1db625 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonImplF/CayleyFermion5DInstantiationWilsonImplF.cc @@ -0,0 +1 @@ +../CayleyFermion5DInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplF/ContinuedFractionFermion5DInstantiationWilsonImplF.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplF/ContinuedFractionFermion5DInstantiationWilsonImplF.cc deleted file mode 100644 index ca0d6cea..00000000 --- a/Grid/qcd/action/fermion/instantiation/WilsonImplF/ContinuedFractionFermion5DInstantiationWilsonImplF.cc +++ /dev/null @@ -1,38 +0,0 @@ -/************************************************************************************* - - Grid physics library, www.github.com/paboyle/Grid - - Source file: ./lib/qcd/action/fermion/ContinuedFractionFermion5D.cc - - Copyright (C) 2015 - -Author: Peter Boyle -Author: Peter Boyle - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - - See the full license in the file "LICENSE" in the top level distribution directory -*************************************************************************************/ -/* END LEGAL */ -#include -#include -#include - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class ContinuedFractionFermion5D; - -NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplF/ContinuedFractionFermion5DInstantiationWilsonImplF.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplF/ContinuedFractionFermion5DInstantiationWilsonImplF.cc new file mode 120000 index 00000000..c2d4b8fc --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonImplF/ContinuedFractionFermion5DInstantiationWilsonImplF.cc @@ -0,0 +1 @@ +../ContinuedFractionFermion5DInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplF/DomainWallEOFAFermionInstantiationWilsonImplF.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplF/DomainWallEOFAFermionInstantiationWilsonImplF.cc deleted file mode 100644 index f7198131..00000000 --- a/Grid/qcd/action/fermion/instantiation/WilsonImplF/DomainWallEOFAFermionInstantiationWilsonImplF.cc +++ /dev/null @@ -1,44 +0,0 @@ -/************************************************************************************* - -Grid physics library, www.github.com/paboyle/Grid - -Source file: ./lib/qcd/action/fermion/DomainWallEOFAFermion.cc - -Copyright (C) 2017 - -Author: Peter Boyle -Author: Peter Boyle -Author: Peter Boyle -Author: paboyle -Author: David Murphy - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -See the full license in the file "LICENSE" in the top level distribution directory -*************************************************************************************/ - /* END LEGAL */ - -#include -#include -#include -#include -#include - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class DomainWallEOFAFermion; - -NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplF/DomainWallEOFAFermionInstantiationWilsonImplF.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplF/DomainWallEOFAFermionInstantiationWilsonImplF.cc new file mode 120000 index 00000000..2f550a2b --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonImplF/DomainWallEOFAFermionInstantiationWilsonImplF.cc @@ -0,0 +1 @@ +../DomainWallEOFAFermionInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplF/MobiusEOFAFermionInstantiationWilsonImplF.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplF/MobiusEOFAFermionInstantiationWilsonImplF.cc deleted file mode 100644 index ce7eaac9..00000000 --- a/Grid/qcd/action/fermion/instantiation/WilsonImplF/MobiusEOFAFermionInstantiationWilsonImplF.cc +++ /dev/null @@ -1,44 +0,0 @@ -/************************************************************************************* - -Grid physics library, www.github.com/paboyle/Grid - -Source file: ./lib/qcd/action/fermion/MobiusEOFAFermion.cc - -Copyright (C) 2017 - -Author: Peter Boyle -Author: Peter Boyle -Author: Peter Boyle -Author: paboyle -Author: David Murphy - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -See the full license in the file "LICENSE" in the top level distribution directory -*************************************************************************************/ - /* END LEGAL */ - -#include -#include -#include -#include -#include - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class MobiusEOFAFermion; - -NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplF/MobiusEOFAFermionInstantiationWilsonImplF.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplF/MobiusEOFAFermionInstantiationWilsonImplF.cc new file mode 120000 index 00000000..7a8f1172 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonImplF/MobiusEOFAFermionInstantiationWilsonImplF.cc @@ -0,0 +1 @@ +../MobiusEOFAFermionInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplF/PartialFractionFermion5DInstantiationWilsonImplF.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplF/PartialFractionFermion5DInstantiationWilsonImplF.cc deleted file mode 100644 index 757719b6..00000000 --- a/Grid/qcd/action/fermion/instantiation/WilsonImplF/PartialFractionFermion5DInstantiationWilsonImplF.cc +++ /dev/null @@ -1,39 +0,0 @@ -/************************************************************************************* - - Grid physics library, www.github.com/paboyle/Grid - - Source file: ./lib/qcd/action/fermion/PartialFractionFermion5D.cc - - Copyright (C) 2015 - -Author: Peter Boyle -Author: Peter Boyle - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - - See the full license in the file "LICENSE" in the top level distribution directory -*************************************************************************************/ -/* END LEGAL */ -#include -#include -#include - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class PartialFractionFermion5D; - -NAMESPACE_END(Grid); - diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplF/PartialFractionFermion5DInstantiationWilsonImplF.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplF/PartialFractionFermion5DInstantiationWilsonImplF.cc new file mode 120000 index 00000000..7f4cea71 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonImplF/PartialFractionFermion5DInstantiationWilsonImplF.cc @@ -0,0 +1 @@ +../PartialFractionFermion5DInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplF/WilsonCloverFermionInstantiationWilsonImplF.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplF/WilsonCloverFermionInstantiationWilsonImplF.cc deleted file mode 100644 index af99dfb6..00000000 --- a/Grid/qcd/action/fermion/instantiation/WilsonImplF/WilsonCloverFermionInstantiationWilsonImplF.cc +++ /dev/null @@ -1,40 +0,0 @@ -/************************************************************************************* - - Grid physics library, www.github.com/paboyle/Grid - - Source file: ./lib/qcd/action/fermion/WilsonCloverFermion.cc - - Copyright (C) 2017 - - Author: paboyle - Author: Guido Cossu - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - - See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ -/* END LEGAL */ - -#include -#include -#include -#include - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class WilsonCloverFermion; - -NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplF/WilsonCloverFermionInstantiationWilsonImplF.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplF/WilsonCloverFermionInstantiationWilsonImplF.cc new file mode 120000 index 00000000..9cc05107 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonImplF/WilsonCloverFermionInstantiationWilsonImplF.cc @@ -0,0 +1 @@ +../WilsonCloverFermionInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplF/WilsonFermion5DInstantiationWilsonImplF.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplF/WilsonFermion5DInstantiationWilsonImplF.cc deleted file mode 100644 index 0dac989c..00000000 --- a/Grid/qcd/action/fermion/instantiation/WilsonImplF/WilsonFermion5DInstantiationWilsonImplF.cc +++ /dev/null @@ -1,40 +0,0 @@ -/************************************************************************************* - -Grid physics library, www.github.com/paboyle/Grid - -Source file: ./lib/qcd/action/fermion/WilsonKernels.cc - -Copyright (C) 2015 - -Author: Peter Boyle -Author: Peter Boyle -Author: paboyle - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -See the full license in the file "LICENSE" in the top level distribution -directory -*************************************************************************************/ -/* END LEGAL */ -#include -#include - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class WilsonFermion5D; - -NAMESPACE_END(Grid); - diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplF/WilsonFermion5DInstantiationWilsonImplF.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplF/WilsonFermion5DInstantiationWilsonImplF.cc new file mode 120000 index 00000000..804d0884 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonImplF/WilsonFermion5DInstantiationWilsonImplF.cc @@ -0,0 +1 @@ +../WilsonFermion5DInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplF/WilsonFermionInstantiationWilsonImplF.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplF/WilsonFermionInstantiationWilsonImplF.cc deleted file mode 100644 index 6fd9c5ca..00000000 --- a/Grid/qcd/action/fermion/instantiation/WilsonImplF/WilsonFermionInstantiationWilsonImplF.cc +++ /dev/null @@ -1,40 +0,0 @@ -/************************************************************************************* - -Grid physics library, www.github.com/paboyle/Grid - -Source file: ./lib/qcd/action/fermion/WilsonKernels.cc - -Copyright (C) 2015 - -Author: Peter Boyle -Author: Peter Boyle -Author: paboyle - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -See the full license in the file "LICENSE" in the top level distribution -directory -*************************************************************************************/ -/* END LEGAL */ -#include -#include - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class WilsonFermion; - -NAMESPACE_END(Grid); - diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplF/WilsonFermionInstantiationWilsonImplF.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplF/WilsonFermionInstantiationWilsonImplF.cc new file mode 120000 index 00000000..5f6ab65e --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonImplF/WilsonFermionInstantiationWilsonImplF.cc @@ -0,0 +1 @@ +../WilsonFermionInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplF/WilsonTMFermionInstantiationWilsonImplF.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplF/WilsonTMFermionInstantiationWilsonImplF.cc deleted file mode 100644 index adfa310c..00000000 --- a/Grid/qcd/action/fermion/instantiation/WilsonImplF/WilsonTMFermionInstantiationWilsonImplF.cc +++ /dev/null @@ -1,37 +0,0 @@ -/************************************************************************************* - - Grid physics library, www.github.com/paboyle/Grid - - Source file: ./lib/qcd/action/fermion/WilsonTMFermion.cc - - Copyright (C) 2015 - -Author: paboyle - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - - See the full license in the file "LICENSE" in the top level distribution directory -*************************************************************************************/ -/* END LEGAL */ -#include -#include -#include - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class WilsonTMFermion; - -NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplF/WilsonTMFermionInstantiationWilsonImplF.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplF/WilsonTMFermionInstantiationWilsonImplF.cc new file mode 120000 index 00000000..d5789bcf --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonImplF/WilsonTMFermionInstantiationWilsonImplF.cc @@ -0,0 +1 @@ +../WilsonTMFermionInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplFH/CayleyFermion5DInstantiationWilsonImplFH.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplFH/CayleyFermion5DInstantiationWilsonImplFH.cc deleted file mode 100644 index 5130db9c..00000000 --- a/Grid/qcd/action/fermion/instantiation/WilsonImplFH/CayleyFermion5DInstantiationWilsonImplFH.cc +++ /dev/null @@ -1,44 +0,0 @@ -/************************************************************************************* - -Grid physics library, www.github.com/paboyle/Grid - -Source file: ./lib/qcd/action/fermion/WilsonKernels.cc - -Copyright (C) 2015 - -Author: Peter Boyle -Author: Peter Boyle -Author: paboyle - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -See the full license in the file "LICENSE" in the top level distribution -directory -*************************************************************************************/ -/* END LEGAL */ -#include -#include -#include - - //#include - //#include - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class CayleyFermion5D; - -NAMESPACE_END(Grid); - diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplFH/CayleyFermion5DInstantiationWilsonImplFH.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplFH/CayleyFermion5DInstantiationWilsonImplFH.cc new file mode 120000 index 00000000..cb1db625 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonImplFH/CayleyFermion5DInstantiationWilsonImplFH.cc @@ -0,0 +1 @@ +../CayleyFermion5DInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplFH/ContinuedFractionFermion5DInstantiationWilsonImplFH.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplFH/ContinuedFractionFermion5DInstantiationWilsonImplFH.cc deleted file mode 100644 index ca0d6cea..00000000 --- a/Grid/qcd/action/fermion/instantiation/WilsonImplFH/ContinuedFractionFermion5DInstantiationWilsonImplFH.cc +++ /dev/null @@ -1,38 +0,0 @@ -/************************************************************************************* - - Grid physics library, www.github.com/paboyle/Grid - - Source file: ./lib/qcd/action/fermion/ContinuedFractionFermion5D.cc - - Copyright (C) 2015 - -Author: Peter Boyle -Author: Peter Boyle - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - - See the full license in the file "LICENSE" in the top level distribution directory -*************************************************************************************/ -/* END LEGAL */ -#include -#include -#include - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class ContinuedFractionFermion5D; - -NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplFH/ContinuedFractionFermion5DInstantiationWilsonImplFH.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplFH/ContinuedFractionFermion5DInstantiationWilsonImplFH.cc new file mode 120000 index 00000000..c2d4b8fc --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonImplFH/ContinuedFractionFermion5DInstantiationWilsonImplFH.cc @@ -0,0 +1 @@ +../ContinuedFractionFermion5DInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplFH/DomainWallEOFAFermionInstantiationWilsonImplFH.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplFH/DomainWallEOFAFermionInstantiationWilsonImplFH.cc deleted file mode 100644 index f7198131..00000000 --- a/Grid/qcd/action/fermion/instantiation/WilsonImplFH/DomainWallEOFAFermionInstantiationWilsonImplFH.cc +++ /dev/null @@ -1,44 +0,0 @@ -/************************************************************************************* - -Grid physics library, www.github.com/paboyle/Grid - -Source file: ./lib/qcd/action/fermion/DomainWallEOFAFermion.cc - -Copyright (C) 2017 - -Author: Peter Boyle -Author: Peter Boyle -Author: Peter Boyle -Author: paboyle -Author: David Murphy - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -See the full license in the file "LICENSE" in the top level distribution directory -*************************************************************************************/ - /* END LEGAL */ - -#include -#include -#include -#include -#include - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class DomainWallEOFAFermion; - -NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplFH/DomainWallEOFAFermionInstantiationWilsonImplFH.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplFH/DomainWallEOFAFermionInstantiationWilsonImplFH.cc new file mode 120000 index 00000000..2f550a2b --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonImplFH/DomainWallEOFAFermionInstantiationWilsonImplFH.cc @@ -0,0 +1 @@ +../DomainWallEOFAFermionInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplFH/MobiusEOFAFermionInstantiationWilsonImplFH.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplFH/MobiusEOFAFermionInstantiationWilsonImplFH.cc deleted file mode 100644 index ce7eaac9..00000000 --- a/Grid/qcd/action/fermion/instantiation/WilsonImplFH/MobiusEOFAFermionInstantiationWilsonImplFH.cc +++ /dev/null @@ -1,44 +0,0 @@ -/************************************************************************************* - -Grid physics library, www.github.com/paboyle/Grid - -Source file: ./lib/qcd/action/fermion/MobiusEOFAFermion.cc - -Copyright (C) 2017 - -Author: Peter Boyle -Author: Peter Boyle -Author: Peter Boyle -Author: paboyle -Author: David Murphy - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -See the full license in the file "LICENSE" in the top level distribution directory -*************************************************************************************/ - /* END LEGAL */ - -#include -#include -#include -#include -#include - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class MobiusEOFAFermion; - -NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplFH/MobiusEOFAFermionInstantiationWilsonImplFH.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplFH/MobiusEOFAFermionInstantiationWilsonImplFH.cc new file mode 120000 index 00000000..7a8f1172 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonImplFH/MobiusEOFAFermionInstantiationWilsonImplFH.cc @@ -0,0 +1 @@ +../MobiusEOFAFermionInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplFH/PartialFractionFermion5DInstantiationWilsonImplFH.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplFH/PartialFractionFermion5DInstantiationWilsonImplFH.cc deleted file mode 100644 index 757719b6..00000000 --- a/Grid/qcd/action/fermion/instantiation/WilsonImplFH/PartialFractionFermion5DInstantiationWilsonImplFH.cc +++ /dev/null @@ -1,39 +0,0 @@ -/************************************************************************************* - - Grid physics library, www.github.com/paboyle/Grid - - Source file: ./lib/qcd/action/fermion/PartialFractionFermion5D.cc - - Copyright (C) 2015 - -Author: Peter Boyle -Author: Peter Boyle - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - - See the full license in the file "LICENSE" in the top level distribution directory -*************************************************************************************/ -/* END LEGAL */ -#include -#include -#include - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class PartialFractionFermion5D; - -NAMESPACE_END(Grid); - diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplFH/PartialFractionFermion5DInstantiationWilsonImplFH.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplFH/PartialFractionFermion5DInstantiationWilsonImplFH.cc new file mode 120000 index 00000000..7f4cea71 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonImplFH/PartialFractionFermion5DInstantiationWilsonImplFH.cc @@ -0,0 +1 @@ +../PartialFractionFermion5DInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplFH/WilsonCloverFermionInstantiationWilsonImplFH.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplFH/WilsonCloverFermionInstantiationWilsonImplFH.cc deleted file mode 100644 index af99dfb6..00000000 --- a/Grid/qcd/action/fermion/instantiation/WilsonImplFH/WilsonCloverFermionInstantiationWilsonImplFH.cc +++ /dev/null @@ -1,40 +0,0 @@ -/************************************************************************************* - - Grid physics library, www.github.com/paboyle/Grid - - Source file: ./lib/qcd/action/fermion/WilsonCloverFermion.cc - - Copyright (C) 2017 - - Author: paboyle - Author: Guido Cossu - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - - See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ -/* END LEGAL */ - -#include -#include -#include -#include - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class WilsonCloverFermion; - -NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplFH/WilsonCloverFermionInstantiationWilsonImplFH.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplFH/WilsonCloverFermionInstantiationWilsonImplFH.cc new file mode 120000 index 00000000..9cc05107 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonImplFH/WilsonCloverFermionInstantiationWilsonImplFH.cc @@ -0,0 +1 @@ +../WilsonCloverFermionInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplFH/WilsonFermion5DInstantiationWilsonImplFH.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplFH/WilsonFermion5DInstantiationWilsonImplFH.cc deleted file mode 100644 index 0dac989c..00000000 --- a/Grid/qcd/action/fermion/instantiation/WilsonImplFH/WilsonFermion5DInstantiationWilsonImplFH.cc +++ /dev/null @@ -1,40 +0,0 @@ -/************************************************************************************* - -Grid physics library, www.github.com/paboyle/Grid - -Source file: ./lib/qcd/action/fermion/WilsonKernels.cc - -Copyright (C) 2015 - -Author: Peter Boyle -Author: Peter Boyle -Author: paboyle - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -See the full license in the file "LICENSE" in the top level distribution -directory -*************************************************************************************/ -/* END LEGAL */ -#include -#include - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class WilsonFermion5D; - -NAMESPACE_END(Grid); - diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplFH/WilsonFermion5DInstantiationWilsonImplFH.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplFH/WilsonFermion5DInstantiationWilsonImplFH.cc new file mode 120000 index 00000000..804d0884 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonImplFH/WilsonFermion5DInstantiationWilsonImplFH.cc @@ -0,0 +1 @@ +../WilsonFermion5DInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplFH/WilsonFermionInstantiationWilsonImplFH.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplFH/WilsonFermionInstantiationWilsonImplFH.cc deleted file mode 100644 index 6fd9c5ca..00000000 --- a/Grid/qcd/action/fermion/instantiation/WilsonImplFH/WilsonFermionInstantiationWilsonImplFH.cc +++ /dev/null @@ -1,40 +0,0 @@ -/************************************************************************************* - -Grid physics library, www.github.com/paboyle/Grid - -Source file: ./lib/qcd/action/fermion/WilsonKernels.cc - -Copyright (C) 2015 - -Author: Peter Boyle -Author: Peter Boyle -Author: paboyle - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -See the full license in the file "LICENSE" in the top level distribution -directory -*************************************************************************************/ -/* END LEGAL */ -#include -#include - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class WilsonFermion; - -NAMESPACE_END(Grid); - diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplFH/WilsonFermionInstantiationWilsonImplFH.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplFH/WilsonFermionInstantiationWilsonImplFH.cc new file mode 120000 index 00000000..5f6ab65e --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonImplFH/WilsonFermionInstantiationWilsonImplFH.cc @@ -0,0 +1 @@ +../WilsonFermionInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplFH/WilsonTMFermionInstantiationWilsonImplFH.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplFH/WilsonTMFermionInstantiationWilsonImplFH.cc deleted file mode 100644 index adfa310c..00000000 --- a/Grid/qcd/action/fermion/instantiation/WilsonImplFH/WilsonTMFermionInstantiationWilsonImplFH.cc +++ /dev/null @@ -1,37 +0,0 @@ -/************************************************************************************* - - Grid physics library, www.github.com/paboyle/Grid - - Source file: ./lib/qcd/action/fermion/WilsonTMFermion.cc - - Copyright (C) 2015 - -Author: paboyle - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - - See the full license in the file "LICENSE" in the top level distribution directory -*************************************************************************************/ -/* END LEGAL */ -#include -#include -#include - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class WilsonTMFermion; - -NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplFH/WilsonTMFermionInstantiationWilsonImplFH.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplFH/WilsonTMFermionInstantiationWilsonImplFH.cc new file mode 120000 index 00000000..d5789bcf --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonImplFH/WilsonTMFermionInstantiationWilsonImplFH.cc @@ -0,0 +1 @@ +../WilsonTMFermionInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/WilsonKernelsInstantiationGparity.cc.master b/Grid/qcd/action/fermion/instantiation/WilsonKernelsInstantiationGparity.cc.master index 75f143cb..1aaf929b 100644 --- a/Grid/qcd/action/fermion/instantiation/WilsonKernelsInstantiationGparity.cc.master +++ b/Grid/qcd/action/fermion/instantiation/WilsonKernelsInstantiationGparity.cc.master @@ -40,33 +40,8 @@ NAMESPACE_BEGIN(Grid); #include "impl.h" -// G-parity requires more specialised implementation. -template <> -void WilsonKernels::ContractConservedCurrentSiteFwd(const SitePropagator &q_in_1, - const SitePropagator &q_in_2, - SitePropagator &q_out, - DoubledGaugeFieldView &U, - unsigned int sU, - unsigned int mu, - bool switch_sign) -{ - assert(0); -} -template <> -void WilsonKernels::ContractConservedCurrentSiteBwd( const SitePropagator &q_in_1, - const SitePropagator &q_in_2, - SitePropagator &q_out, - DoubledGaugeFieldView &U, - unsigned int mu, - unsigned int sU, - bool switch_sign) -{ - assert(0); -} - HAND_SPECIALISE_GPARITY(IMPLEMENTATION); - template class WilsonKernels; diff --git a/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexAntiSymmetricImplD/WilsonCloverFermionInstantiationWilsonTwoIndexAntiSymmetricImplD.cc b/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexAntiSymmetricImplD/WilsonCloverFermionInstantiationWilsonTwoIndexAntiSymmetricImplD.cc deleted file mode 100644 index af99dfb6..00000000 --- a/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexAntiSymmetricImplD/WilsonCloverFermionInstantiationWilsonTwoIndexAntiSymmetricImplD.cc +++ /dev/null @@ -1,40 +0,0 @@ -/************************************************************************************* - - Grid physics library, www.github.com/paboyle/Grid - - Source file: ./lib/qcd/action/fermion/WilsonCloverFermion.cc - - Copyright (C) 2017 - - Author: paboyle - Author: Guido Cossu - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - - See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ -/* END LEGAL */ - -#include -#include -#include -#include - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class WilsonCloverFermion; - -NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexAntiSymmetricImplD/WilsonCloverFermionInstantiationWilsonTwoIndexAntiSymmetricImplD.cc b/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexAntiSymmetricImplD/WilsonCloverFermionInstantiationWilsonTwoIndexAntiSymmetricImplD.cc new file mode 120000 index 00000000..9cc05107 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexAntiSymmetricImplD/WilsonCloverFermionInstantiationWilsonTwoIndexAntiSymmetricImplD.cc @@ -0,0 +1 @@ +../WilsonCloverFermionInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexAntiSymmetricImplD/WilsonFermionInstantiationWilsonTwoIndexAntiSymmetricImplD.cc b/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexAntiSymmetricImplD/WilsonFermionInstantiationWilsonTwoIndexAntiSymmetricImplD.cc deleted file mode 100644 index 6fd9c5ca..00000000 --- a/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexAntiSymmetricImplD/WilsonFermionInstantiationWilsonTwoIndexAntiSymmetricImplD.cc +++ /dev/null @@ -1,40 +0,0 @@ -/************************************************************************************* - -Grid physics library, www.github.com/paboyle/Grid - -Source file: ./lib/qcd/action/fermion/WilsonKernels.cc - -Copyright (C) 2015 - -Author: Peter Boyle -Author: Peter Boyle -Author: paboyle - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -See the full license in the file "LICENSE" in the top level distribution -directory -*************************************************************************************/ -/* END LEGAL */ -#include -#include - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class WilsonFermion; - -NAMESPACE_END(Grid); - diff --git a/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexAntiSymmetricImplD/WilsonFermionInstantiationWilsonTwoIndexAntiSymmetricImplD.cc b/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexAntiSymmetricImplD/WilsonFermionInstantiationWilsonTwoIndexAntiSymmetricImplD.cc new file mode 120000 index 00000000..5f6ab65e --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexAntiSymmetricImplD/WilsonFermionInstantiationWilsonTwoIndexAntiSymmetricImplD.cc @@ -0,0 +1 @@ +../WilsonFermionInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexAntiSymmetricImplD/WilsonTMFermionInstantiationWilsonTwoIndexAntiSymmetricImplD.cc b/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexAntiSymmetricImplD/WilsonTMFermionInstantiationWilsonTwoIndexAntiSymmetricImplD.cc deleted file mode 100644 index adfa310c..00000000 --- a/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexAntiSymmetricImplD/WilsonTMFermionInstantiationWilsonTwoIndexAntiSymmetricImplD.cc +++ /dev/null @@ -1,37 +0,0 @@ -/************************************************************************************* - - Grid physics library, www.github.com/paboyle/Grid - - Source file: ./lib/qcd/action/fermion/WilsonTMFermion.cc - - Copyright (C) 2015 - -Author: paboyle - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - - See the full license in the file "LICENSE" in the top level distribution directory -*************************************************************************************/ -/* END LEGAL */ -#include -#include -#include - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class WilsonTMFermion; - -NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexAntiSymmetricImplD/WilsonTMFermionInstantiationWilsonTwoIndexAntiSymmetricImplD.cc b/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexAntiSymmetricImplD/WilsonTMFermionInstantiationWilsonTwoIndexAntiSymmetricImplD.cc new file mode 120000 index 00000000..d5789bcf --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexAntiSymmetricImplD/WilsonTMFermionInstantiationWilsonTwoIndexAntiSymmetricImplD.cc @@ -0,0 +1 @@ +../WilsonTMFermionInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexAntiSymmetricImplF/WilsonCloverFermionInstantiationWilsonTwoIndexAntiSymmetricImplF.cc b/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexAntiSymmetricImplF/WilsonCloverFermionInstantiationWilsonTwoIndexAntiSymmetricImplF.cc deleted file mode 100644 index af99dfb6..00000000 --- a/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexAntiSymmetricImplF/WilsonCloverFermionInstantiationWilsonTwoIndexAntiSymmetricImplF.cc +++ /dev/null @@ -1,40 +0,0 @@ -/************************************************************************************* - - Grid physics library, www.github.com/paboyle/Grid - - Source file: ./lib/qcd/action/fermion/WilsonCloverFermion.cc - - Copyright (C) 2017 - - Author: paboyle - Author: Guido Cossu - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - - See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ -/* END LEGAL */ - -#include -#include -#include -#include - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class WilsonCloverFermion; - -NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexAntiSymmetricImplF/WilsonCloverFermionInstantiationWilsonTwoIndexAntiSymmetricImplF.cc b/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexAntiSymmetricImplF/WilsonCloverFermionInstantiationWilsonTwoIndexAntiSymmetricImplF.cc new file mode 120000 index 00000000..9cc05107 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexAntiSymmetricImplF/WilsonCloverFermionInstantiationWilsonTwoIndexAntiSymmetricImplF.cc @@ -0,0 +1 @@ +../WilsonCloverFermionInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexAntiSymmetricImplF/WilsonFermionInstantiationWilsonTwoIndexAntiSymmetricImplF.cc b/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexAntiSymmetricImplF/WilsonFermionInstantiationWilsonTwoIndexAntiSymmetricImplF.cc deleted file mode 100644 index 6fd9c5ca..00000000 --- a/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexAntiSymmetricImplF/WilsonFermionInstantiationWilsonTwoIndexAntiSymmetricImplF.cc +++ /dev/null @@ -1,40 +0,0 @@ -/************************************************************************************* - -Grid physics library, www.github.com/paboyle/Grid - -Source file: ./lib/qcd/action/fermion/WilsonKernels.cc - -Copyright (C) 2015 - -Author: Peter Boyle -Author: Peter Boyle -Author: paboyle - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -See the full license in the file "LICENSE" in the top level distribution -directory -*************************************************************************************/ -/* END LEGAL */ -#include -#include - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class WilsonFermion; - -NAMESPACE_END(Grid); - diff --git a/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexAntiSymmetricImplF/WilsonFermionInstantiationWilsonTwoIndexAntiSymmetricImplF.cc b/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexAntiSymmetricImplF/WilsonFermionInstantiationWilsonTwoIndexAntiSymmetricImplF.cc new file mode 120000 index 00000000..5f6ab65e --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexAntiSymmetricImplF/WilsonFermionInstantiationWilsonTwoIndexAntiSymmetricImplF.cc @@ -0,0 +1 @@ +../WilsonFermionInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexAntiSymmetricImplF/WilsonTMFermionInstantiationWilsonTwoIndexAntiSymmetricImplF.cc b/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexAntiSymmetricImplF/WilsonTMFermionInstantiationWilsonTwoIndexAntiSymmetricImplF.cc deleted file mode 100644 index adfa310c..00000000 --- a/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexAntiSymmetricImplF/WilsonTMFermionInstantiationWilsonTwoIndexAntiSymmetricImplF.cc +++ /dev/null @@ -1,37 +0,0 @@ -/************************************************************************************* - - Grid physics library, www.github.com/paboyle/Grid - - Source file: ./lib/qcd/action/fermion/WilsonTMFermion.cc - - Copyright (C) 2015 - -Author: paboyle - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - - See the full license in the file "LICENSE" in the top level distribution directory -*************************************************************************************/ -/* END LEGAL */ -#include -#include -#include - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class WilsonTMFermion; - -NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexAntiSymmetricImplF/WilsonTMFermionInstantiationWilsonTwoIndexAntiSymmetricImplF.cc b/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexAntiSymmetricImplF/WilsonTMFermionInstantiationWilsonTwoIndexAntiSymmetricImplF.cc new file mode 120000 index 00000000..d5789bcf --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexAntiSymmetricImplF/WilsonTMFermionInstantiationWilsonTwoIndexAntiSymmetricImplF.cc @@ -0,0 +1 @@ +../WilsonTMFermionInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexSymmetricImplD/WilsonCloverFermionInstantiationWilsonTwoIndexSymmetricImplD.cc b/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexSymmetricImplD/WilsonCloverFermionInstantiationWilsonTwoIndexSymmetricImplD.cc deleted file mode 100644 index af99dfb6..00000000 --- a/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexSymmetricImplD/WilsonCloverFermionInstantiationWilsonTwoIndexSymmetricImplD.cc +++ /dev/null @@ -1,40 +0,0 @@ -/************************************************************************************* - - Grid physics library, www.github.com/paboyle/Grid - - Source file: ./lib/qcd/action/fermion/WilsonCloverFermion.cc - - Copyright (C) 2017 - - Author: paboyle - Author: Guido Cossu - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - - See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ -/* END LEGAL */ - -#include -#include -#include -#include - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class WilsonCloverFermion; - -NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexSymmetricImplD/WilsonCloverFermionInstantiationWilsonTwoIndexSymmetricImplD.cc b/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexSymmetricImplD/WilsonCloverFermionInstantiationWilsonTwoIndexSymmetricImplD.cc new file mode 120000 index 00000000..9cc05107 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexSymmetricImplD/WilsonCloverFermionInstantiationWilsonTwoIndexSymmetricImplD.cc @@ -0,0 +1 @@ +../WilsonCloverFermionInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexSymmetricImplD/WilsonFermionInstantiationWilsonTwoIndexSymmetricImplD.cc b/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexSymmetricImplD/WilsonFermionInstantiationWilsonTwoIndexSymmetricImplD.cc deleted file mode 100644 index 6fd9c5ca..00000000 --- a/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexSymmetricImplD/WilsonFermionInstantiationWilsonTwoIndexSymmetricImplD.cc +++ /dev/null @@ -1,40 +0,0 @@ -/************************************************************************************* - -Grid physics library, www.github.com/paboyle/Grid - -Source file: ./lib/qcd/action/fermion/WilsonKernels.cc - -Copyright (C) 2015 - -Author: Peter Boyle -Author: Peter Boyle -Author: paboyle - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -See the full license in the file "LICENSE" in the top level distribution -directory -*************************************************************************************/ -/* END LEGAL */ -#include -#include - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class WilsonFermion; - -NAMESPACE_END(Grid); - diff --git a/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexSymmetricImplD/WilsonFermionInstantiationWilsonTwoIndexSymmetricImplD.cc b/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexSymmetricImplD/WilsonFermionInstantiationWilsonTwoIndexSymmetricImplD.cc new file mode 120000 index 00000000..5f6ab65e --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexSymmetricImplD/WilsonFermionInstantiationWilsonTwoIndexSymmetricImplD.cc @@ -0,0 +1 @@ +../WilsonFermionInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexSymmetricImplD/WilsonTMFermionInstantiationWilsonTwoIndexSymmetricImplD.cc b/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexSymmetricImplD/WilsonTMFermionInstantiationWilsonTwoIndexSymmetricImplD.cc deleted file mode 100644 index adfa310c..00000000 --- a/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexSymmetricImplD/WilsonTMFermionInstantiationWilsonTwoIndexSymmetricImplD.cc +++ /dev/null @@ -1,37 +0,0 @@ -/************************************************************************************* - - Grid physics library, www.github.com/paboyle/Grid - - Source file: ./lib/qcd/action/fermion/WilsonTMFermion.cc - - Copyright (C) 2015 - -Author: paboyle - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - - See the full license in the file "LICENSE" in the top level distribution directory -*************************************************************************************/ -/* END LEGAL */ -#include -#include -#include - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class WilsonTMFermion; - -NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexSymmetricImplD/WilsonTMFermionInstantiationWilsonTwoIndexSymmetricImplD.cc b/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexSymmetricImplD/WilsonTMFermionInstantiationWilsonTwoIndexSymmetricImplD.cc new file mode 120000 index 00000000..d5789bcf --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexSymmetricImplD/WilsonTMFermionInstantiationWilsonTwoIndexSymmetricImplD.cc @@ -0,0 +1 @@ +../WilsonTMFermionInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexSymmetricImplF/WilsonCloverFermionInstantiationWilsonTwoIndexSymmetricImplF.cc b/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexSymmetricImplF/WilsonCloverFermionInstantiationWilsonTwoIndexSymmetricImplF.cc deleted file mode 100644 index af99dfb6..00000000 --- a/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexSymmetricImplF/WilsonCloverFermionInstantiationWilsonTwoIndexSymmetricImplF.cc +++ /dev/null @@ -1,40 +0,0 @@ -/************************************************************************************* - - Grid physics library, www.github.com/paboyle/Grid - - Source file: ./lib/qcd/action/fermion/WilsonCloverFermion.cc - - Copyright (C) 2017 - - Author: paboyle - Author: Guido Cossu - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - - See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ -/* END LEGAL */ - -#include -#include -#include -#include - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class WilsonCloverFermion; - -NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexSymmetricImplF/WilsonCloverFermionInstantiationWilsonTwoIndexSymmetricImplF.cc b/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexSymmetricImplF/WilsonCloverFermionInstantiationWilsonTwoIndexSymmetricImplF.cc new file mode 120000 index 00000000..9cc05107 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexSymmetricImplF/WilsonCloverFermionInstantiationWilsonTwoIndexSymmetricImplF.cc @@ -0,0 +1 @@ +../WilsonCloverFermionInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexSymmetricImplF/WilsonFermionInstantiationWilsonTwoIndexSymmetricImplF.cc b/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexSymmetricImplF/WilsonFermionInstantiationWilsonTwoIndexSymmetricImplF.cc deleted file mode 100644 index 6fd9c5ca..00000000 --- a/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexSymmetricImplF/WilsonFermionInstantiationWilsonTwoIndexSymmetricImplF.cc +++ /dev/null @@ -1,40 +0,0 @@ -/************************************************************************************* - -Grid physics library, www.github.com/paboyle/Grid - -Source file: ./lib/qcd/action/fermion/WilsonKernels.cc - -Copyright (C) 2015 - -Author: Peter Boyle -Author: Peter Boyle -Author: paboyle - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -See the full license in the file "LICENSE" in the top level distribution -directory -*************************************************************************************/ -/* END LEGAL */ -#include -#include - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class WilsonFermion; - -NAMESPACE_END(Grid); - diff --git a/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexSymmetricImplF/WilsonFermionInstantiationWilsonTwoIndexSymmetricImplF.cc b/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexSymmetricImplF/WilsonFermionInstantiationWilsonTwoIndexSymmetricImplF.cc new file mode 120000 index 00000000..5f6ab65e --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexSymmetricImplF/WilsonFermionInstantiationWilsonTwoIndexSymmetricImplF.cc @@ -0,0 +1 @@ +../WilsonFermionInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexSymmetricImplF/WilsonTMFermionInstantiationWilsonTwoIndexSymmetricImplF.cc b/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexSymmetricImplF/WilsonTMFermionInstantiationWilsonTwoIndexSymmetricImplF.cc deleted file mode 100644 index adfa310c..00000000 --- a/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexSymmetricImplF/WilsonTMFermionInstantiationWilsonTwoIndexSymmetricImplF.cc +++ /dev/null @@ -1,37 +0,0 @@ -/************************************************************************************* - - Grid physics library, www.github.com/paboyle/Grid - - Source file: ./lib/qcd/action/fermion/WilsonTMFermion.cc - - Copyright (C) 2015 - -Author: paboyle - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - - See the full license in the file "LICENSE" in the top level distribution directory -*************************************************************************************/ -/* END LEGAL */ -#include -#include -#include - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class WilsonTMFermion; - -NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexSymmetricImplF/WilsonTMFermionInstantiationWilsonTwoIndexSymmetricImplF.cc b/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexSymmetricImplF/WilsonTMFermionInstantiationWilsonTwoIndexSymmetricImplF.cc new file mode 120000 index 00000000..d5789bcf --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexSymmetricImplF/WilsonTMFermionInstantiationWilsonTwoIndexSymmetricImplF.cc @@ -0,0 +1 @@ +../WilsonTMFermionInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/ZWilsonImplD/CayleyFermion5DInstantiationZWilsonImplD.cc b/Grid/qcd/action/fermion/instantiation/ZWilsonImplD/CayleyFermion5DInstantiationZWilsonImplD.cc deleted file mode 100644 index 5130db9c..00000000 --- a/Grid/qcd/action/fermion/instantiation/ZWilsonImplD/CayleyFermion5DInstantiationZWilsonImplD.cc +++ /dev/null @@ -1,44 +0,0 @@ -/************************************************************************************* - -Grid physics library, www.github.com/paboyle/Grid - -Source file: ./lib/qcd/action/fermion/WilsonKernels.cc - -Copyright (C) 2015 - -Author: Peter Boyle -Author: Peter Boyle -Author: paboyle - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -See the full license in the file "LICENSE" in the top level distribution -directory -*************************************************************************************/ -/* END LEGAL */ -#include -#include -#include - - //#include - //#include - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class CayleyFermion5D; - -NAMESPACE_END(Grid); - diff --git a/Grid/qcd/action/fermion/instantiation/ZWilsonImplD/CayleyFermion5DInstantiationZWilsonImplD.cc b/Grid/qcd/action/fermion/instantiation/ZWilsonImplD/CayleyFermion5DInstantiationZWilsonImplD.cc new file mode 120000 index 00000000..cb1db625 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/ZWilsonImplD/CayleyFermion5DInstantiationZWilsonImplD.cc @@ -0,0 +1 @@ +../CayleyFermion5DInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/ZWilsonImplD/ContinuedFractionFermion5DInstantiationZWilsonImplD.cc b/Grid/qcd/action/fermion/instantiation/ZWilsonImplD/ContinuedFractionFermion5DInstantiationZWilsonImplD.cc deleted file mode 100644 index ca0d6cea..00000000 --- a/Grid/qcd/action/fermion/instantiation/ZWilsonImplD/ContinuedFractionFermion5DInstantiationZWilsonImplD.cc +++ /dev/null @@ -1,38 +0,0 @@ -/************************************************************************************* - - Grid physics library, www.github.com/paboyle/Grid - - Source file: ./lib/qcd/action/fermion/ContinuedFractionFermion5D.cc - - Copyright (C) 2015 - -Author: Peter Boyle -Author: Peter Boyle - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - - See the full license in the file "LICENSE" in the top level distribution directory -*************************************************************************************/ -/* END LEGAL */ -#include -#include -#include - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class ContinuedFractionFermion5D; - -NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/ZWilsonImplD/ContinuedFractionFermion5DInstantiationZWilsonImplD.cc b/Grid/qcd/action/fermion/instantiation/ZWilsonImplD/ContinuedFractionFermion5DInstantiationZWilsonImplD.cc new file mode 120000 index 00000000..c2d4b8fc --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/ZWilsonImplD/ContinuedFractionFermion5DInstantiationZWilsonImplD.cc @@ -0,0 +1 @@ +../ContinuedFractionFermion5DInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/ZWilsonImplD/DomainWallEOFAFermionInstantiationZWilsonImplD.cc b/Grid/qcd/action/fermion/instantiation/ZWilsonImplD/DomainWallEOFAFermionInstantiationZWilsonImplD.cc deleted file mode 100644 index f7198131..00000000 --- a/Grid/qcd/action/fermion/instantiation/ZWilsonImplD/DomainWallEOFAFermionInstantiationZWilsonImplD.cc +++ /dev/null @@ -1,44 +0,0 @@ -/************************************************************************************* - -Grid physics library, www.github.com/paboyle/Grid - -Source file: ./lib/qcd/action/fermion/DomainWallEOFAFermion.cc - -Copyright (C) 2017 - -Author: Peter Boyle -Author: Peter Boyle -Author: Peter Boyle -Author: paboyle -Author: David Murphy - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -See the full license in the file "LICENSE" in the top level distribution directory -*************************************************************************************/ - /* END LEGAL */ - -#include -#include -#include -#include -#include - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class DomainWallEOFAFermion; - -NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/ZWilsonImplD/DomainWallEOFAFermionInstantiationZWilsonImplD.cc b/Grid/qcd/action/fermion/instantiation/ZWilsonImplD/DomainWallEOFAFermionInstantiationZWilsonImplD.cc new file mode 120000 index 00000000..2f550a2b --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/ZWilsonImplD/DomainWallEOFAFermionInstantiationZWilsonImplD.cc @@ -0,0 +1 @@ +../DomainWallEOFAFermionInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/ZWilsonImplD/MobiusEOFAFermionInstantiationZWilsonImplD.cc b/Grid/qcd/action/fermion/instantiation/ZWilsonImplD/MobiusEOFAFermionInstantiationZWilsonImplD.cc deleted file mode 100644 index ce7eaac9..00000000 --- a/Grid/qcd/action/fermion/instantiation/ZWilsonImplD/MobiusEOFAFermionInstantiationZWilsonImplD.cc +++ /dev/null @@ -1,44 +0,0 @@ -/************************************************************************************* - -Grid physics library, www.github.com/paboyle/Grid - -Source file: ./lib/qcd/action/fermion/MobiusEOFAFermion.cc - -Copyright (C) 2017 - -Author: Peter Boyle -Author: Peter Boyle -Author: Peter Boyle -Author: paboyle -Author: David Murphy - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -See the full license in the file "LICENSE" in the top level distribution directory -*************************************************************************************/ - /* END LEGAL */ - -#include -#include -#include -#include -#include - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class MobiusEOFAFermion; - -NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/ZWilsonImplD/MobiusEOFAFermionInstantiationZWilsonImplD.cc b/Grid/qcd/action/fermion/instantiation/ZWilsonImplD/MobiusEOFAFermionInstantiationZWilsonImplD.cc new file mode 120000 index 00000000..7a8f1172 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/ZWilsonImplD/MobiusEOFAFermionInstantiationZWilsonImplD.cc @@ -0,0 +1 @@ +../MobiusEOFAFermionInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/ZWilsonImplD/PartialFractionFermion5DInstantiationZWilsonImplD.cc b/Grid/qcd/action/fermion/instantiation/ZWilsonImplD/PartialFractionFermion5DInstantiationZWilsonImplD.cc deleted file mode 100644 index 757719b6..00000000 --- a/Grid/qcd/action/fermion/instantiation/ZWilsonImplD/PartialFractionFermion5DInstantiationZWilsonImplD.cc +++ /dev/null @@ -1,39 +0,0 @@ -/************************************************************************************* - - Grid physics library, www.github.com/paboyle/Grid - - Source file: ./lib/qcd/action/fermion/PartialFractionFermion5D.cc - - Copyright (C) 2015 - -Author: Peter Boyle -Author: Peter Boyle - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - - See the full license in the file "LICENSE" in the top level distribution directory -*************************************************************************************/ -/* END LEGAL */ -#include -#include -#include - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class PartialFractionFermion5D; - -NAMESPACE_END(Grid); - diff --git a/Grid/qcd/action/fermion/instantiation/ZWilsonImplD/PartialFractionFermion5DInstantiationZWilsonImplD.cc b/Grid/qcd/action/fermion/instantiation/ZWilsonImplD/PartialFractionFermion5DInstantiationZWilsonImplD.cc new file mode 120000 index 00000000..7f4cea71 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/ZWilsonImplD/PartialFractionFermion5DInstantiationZWilsonImplD.cc @@ -0,0 +1 @@ +../PartialFractionFermion5DInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/ZWilsonImplD/WilsonFermion5DInstantiationZWilsonImplD.cc b/Grid/qcd/action/fermion/instantiation/ZWilsonImplD/WilsonFermion5DInstantiationZWilsonImplD.cc deleted file mode 100644 index 0dac989c..00000000 --- a/Grid/qcd/action/fermion/instantiation/ZWilsonImplD/WilsonFermion5DInstantiationZWilsonImplD.cc +++ /dev/null @@ -1,40 +0,0 @@ -/************************************************************************************* - -Grid physics library, www.github.com/paboyle/Grid - -Source file: ./lib/qcd/action/fermion/WilsonKernels.cc - -Copyright (C) 2015 - -Author: Peter Boyle -Author: Peter Boyle -Author: paboyle - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -See the full license in the file "LICENSE" in the top level distribution -directory -*************************************************************************************/ -/* END LEGAL */ -#include -#include - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class WilsonFermion5D; - -NAMESPACE_END(Grid); - diff --git a/Grid/qcd/action/fermion/instantiation/ZWilsonImplD/WilsonFermion5DInstantiationZWilsonImplD.cc b/Grid/qcd/action/fermion/instantiation/ZWilsonImplD/WilsonFermion5DInstantiationZWilsonImplD.cc new file mode 120000 index 00000000..804d0884 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/ZWilsonImplD/WilsonFermion5DInstantiationZWilsonImplD.cc @@ -0,0 +1 @@ +../WilsonFermion5DInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/ZWilsonImplDF/CayleyFermion5DInstantiationZWilsonImplDF.cc b/Grid/qcd/action/fermion/instantiation/ZWilsonImplDF/CayleyFermion5DInstantiationZWilsonImplDF.cc deleted file mode 100644 index 5130db9c..00000000 --- a/Grid/qcd/action/fermion/instantiation/ZWilsonImplDF/CayleyFermion5DInstantiationZWilsonImplDF.cc +++ /dev/null @@ -1,44 +0,0 @@ -/************************************************************************************* - -Grid physics library, www.github.com/paboyle/Grid - -Source file: ./lib/qcd/action/fermion/WilsonKernels.cc - -Copyright (C) 2015 - -Author: Peter Boyle -Author: Peter Boyle -Author: paboyle - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -See the full license in the file "LICENSE" in the top level distribution -directory -*************************************************************************************/ -/* END LEGAL */ -#include -#include -#include - - //#include - //#include - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class CayleyFermion5D; - -NAMESPACE_END(Grid); - diff --git a/Grid/qcd/action/fermion/instantiation/ZWilsonImplDF/CayleyFermion5DInstantiationZWilsonImplDF.cc b/Grid/qcd/action/fermion/instantiation/ZWilsonImplDF/CayleyFermion5DInstantiationZWilsonImplDF.cc new file mode 120000 index 00000000..cb1db625 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/ZWilsonImplDF/CayleyFermion5DInstantiationZWilsonImplDF.cc @@ -0,0 +1 @@ +../CayleyFermion5DInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/ZWilsonImplDF/ContinuedFractionFermion5DInstantiationZWilsonImplDF.cc b/Grid/qcd/action/fermion/instantiation/ZWilsonImplDF/ContinuedFractionFermion5DInstantiationZWilsonImplDF.cc deleted file mode 100644 index ca0d6cea..00000000 --- a/Grid/qcd/action/fermion/instantiation/ZWilsonImplDF/ContinuedFractionFermion5DInstantiationZWilsonImplDF.cc +++ /dev/null @@ -1,38 +0,0 @@ -/************************************************************************************* - - Grid physics library, www.github.com/paboyle/Grid - - Source file: ./lib/qcd/action/fermion/ContinuedFractionFermion5D.cc - - Copyright (C) 2015 - -Author: Peter Boyle -Author: Peter Boyle - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - - See the full license in the file "LICENSE" in the top level distribution directory -*************************************************************************************/ -/* END LEGAL */ -#include -#include -#include - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class ContinuedFractionFermion5D; - -NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/ZWilsonImplDF/ContinuedFractionFermion5DInstantiationZWilsonImplDF.cc b/Grid/qcd/action/fermion/instantiation/ZWilsonImplDF/ContinuedFractionFermion5DInstantiationZWilsonImplDF.cc new file mode 120000 index 00000000..c2d4b8fc --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/ZWilsonImplDF/ContinuedFractionFermion5DInstantiationZWilsonImplDF.cc @@ -0,0 +1 @@ +../ContinuedFractionFermion5DInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/ZWilsonImplDF/DomainWallEOFAFermionInstantiationZWilsonImplDF.cc b/Grid/qcd/action/fermion/instantiation/ZWilsonImplDF/DomainWallEOFAFermionInstantiationZWilsonImplDF.cc deleted file mode 100644 index f7198131..00000000 --- a/Grid/qcd/action/fermion/instantiation/ZWilsonImplDF/DomainWallEOFAFermionInstantiationZWilsonImplDF.cc +++ /dev/null @@ -1,44 +0,0 @@ -/************************************************************************************* - -Grid physics library, www.github.com/paboyle/Grid - -Source file: ./lib/qcd/action/fermion/DomainWallEOFAFermion.cc - -Copyright (C) 2017 - -Author: Peter Boyle -Author: Peter Boyle -Author: Peter Boyle -Author: paboyle -Author: David Murphy - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -See the full license in the file "LICENSE" in the top level distribution directory -*************************************************************************************/ - /* END LEGAL */ - -#include -#include -#include -#include -#include - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class DomainWallEOFAFermion; - -NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/ZWilsonImplDF/DomainWallEOFAFermionInstantiationZWilsonImplDF.cc b/Grid/qcd/action/fermion/instantiation/ZWilsonImplDF/DomainWallEOFAFermionInstantiationZWilsonImplDF.cc new file mode 120000 index 00000000..2f550a2b --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/ZWilsonImplDF/DomainWallEOFAFermionInstantiationZWilsonImplDF.cc @@ -0,0 +1 @@ +../DomainWallEOFAFermionInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/ZWilsonImplDF/MobiusEOFAFermionInstantiationZWilsonImplDF.cc b/Grid/qcd/action/fermion/instantiation/ZWilsonImplDF/MobiusEOFAFermionInstantiationZWilsonImplDF.cc deleted file mode 100644 index ce7eaac9..00000000 --- a/Grid/qcd/action/fermion/instantiation/ZWilsonImplDF/MobiusEOFAFermionInstantiationZWilsonImplDF.cc +++ /dev/null @@ -1,44 +0,0 @@ -/************************************************************************************* - -Grid physics library, www.github.com/paboyle/Grid - -Source file: ./lib/qcd/action/fermion/MobiusEOFAFermion.cc - -Copyright (C) 2017 - -Author: Peter Boyle -Author: Peter Boyle -Author: Peter Boyle -Author: paboyle -Author: David Murphy - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -See the full license in the file "LICENSE" in the top level distribution directory -*************************************************************************************/ - /* END LEGAL */ - -#include -#include -#include -#include -#include - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class MobiusEOFAFermion; - -NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/ZWilsonImplDF/MobiusEOFAFermionInstantiationZWilsonImplDF.cc b/Grid/qcd/action/fermion/instantiation/ZWilsonImplDF/MobiusEOFAFermionInstantiationZWilsonImplDF.cc new file mode 120000 index 00000000..7a8f1172 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/ZWilsonImplDF/MobiusEOFAFermionInstantiationZWilsonImplDF.cc @@ -0,0 +1 @@ +../MobiusEOFAFermionInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/ZWilsonImplDF/PartialFractionFermion5DInstantiationZWilsonImplDF.cc b/Grid/qcd/action/fermion/instantiation/ZWilsonImplDF/PartialFractionFermion5DInstantiationZWilsonImplDF.cc deleted file mode 100644 index 757719b6..00000000 --- a/Grid/qcd/action/fermion/instantiation/ZWilsonImplDF/PartialFractionFermion5DInstantiationZWilsonImplDF.cc +++ /dev/null @@ -1,39 +0,0 @@ -/************************************************************************************* - - Grid physics library, www.github.com/paboyle/Grid - - Source file: ./lib/qcd/action/fermion/PartialFractionFermion5D.cc - - Copyright (C) 2015 - -Author: Peter Boyle -Author: Peter Boyle - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - - See the full license in the file "LICENSE" in the top level distribution directory -*************************************************************************************/ -/* END LEGAL */ -#include -#include -#include - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class PartialFractionFermion5D; - -NAMESPACE_END(Grid); - diff --git a/Grid/qcd/action/fermion/instantiation/ZWilsonImplDF/PartialFractionFermion5DInstantiationZWilsonImplDF.cc b/Grid/qcd/action/fermion/instantiation/ZWilsonImplDF/PartialFractionFermion5DInstantiationZWilsonImplDF.cc new file mode 120000 index 00000000..7f4cea71 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/ZWilsonImplDF/PartialFractionFermion5DInstantiationZWilsonImplDF.cc @@ -0,0 +1 @@ +../PartialFractionFermion5DInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/ZWilsonImplDF/WilsonFermion5DInstantiationZWilsonImplDF.cc b/Grid/qcd/action/fermion/instantiation/ZWilsonImplDF/WilsonFermion5DInstantiationZWilsonImplDF.cc deleted file mode 100644 index 0dac989c..00000000 --- a/Grid/qcd/action/fermion/instantiation/ZWilsonImplDF/WilsonFermion5DInstantiationZWilsonImplDF.cc +++ /dev/null @@ -1,40 +0,0 @@ -/************************************************************************************* - -Grid physics library, www.github.com/paboyle/Grid - -Source file: ./lib/qcd/action/fermion/WilsonKernels.cc - -Copyright (C) 2015 - -Author: Peter Boyle -Author: Peter Boyle -Author: paboyle - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -See the full license in the file "LICENSE" in the top level distribution -directory -*************************************************************************************/ -/* END LEGAL */ -#include -#include - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class WilsonFermion5D; - -NAMESPACE_END(Grid); - diff --git a/Grid/qcd/action/fermion/instantiation/ZWilsonImplDF/WilsonFermion5DInstantiationZWilsonImplDF.cc b/Grid/qcd/action/fermion/instantiation/ZWilsonImplDF/WilsonFermion5DInstantiationZWilsonImplDF.cc new file mode 120000 index 00000000..804d0884 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/ZWilsonImplDF/WilsonFermion5DInstantiationZWilsonImplDF.cc @@ -0,0 +1 @@ +../WilsonFermion5DInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/ZWilsonImplF/CayleyFermion5DInstantiationZWilsonImplF.cc b/Grid/qcd/action/fermion/instantiation/ZWilsonImplF/CayleyFermion5DInstantiationZWilsonImplF.cc deleted file mode 100644 index 5130db9c..00000000 --- a/Grid/qcd/action/fermion/instantiation/ZWilsonImplF/CayleyFermion5DInstantiationZWilsonImplF.cc +++ /dev/null @@ -1,44 +0,0 @@ -/************************************************************************************* - -Grid physics library, www.github.com/paboyle/Grid - -Source file: ./lib/qcd/action/fermion/WilsonKernels.cc - -Copyright (C) 2015 - -Author: Peter Boyle -Author: Peter Boyle -Author: paboyle - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -See the full license in the file "LICENSE" in the top level distribution -directory -*************************************************************************************/ -/* END LEGAL */ -#include -#include -#include - - //#include - //#include - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class CayleyFermion5D; - -NAMESPACE_END(Grid); - diff --git a/Grid/qcd/action/fermion/instantiation/ZWilsonImplF/CayleyFermion5DInstantiationZWilsonImplF.cc b/Grid/qcd/action/fermion/instantiation/ZWilsonImplF/CayleyFermion5DInstantiationZWilsonImplF.cc new file mode 120000 index 00000000..cb1db625 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/ZWilsonImplF/CayleyFermion5DInstantiationZWilsonImplF.cc @@ -0,0 +1 @@ +../CayleyFermion5DInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/ZWilsonImplF/ContinuedFractionFermion5DInstantiationZWilsonImplF.cc b/Grid/qcd/action/fermion/instantiation/ZWilsonImplF/ContinuedFractionFermion5DInstantiationZWilsonImplF.cc deleted file mode 100644 index ca0d6cea..00000000 --- a/Grid/qcd/action/fermion/instantiation/ZWilsonImplF/ContinuedFractionFermion5DInstantiationZWilsonImplF.cc +++ /dev/null @@ -1,38 +0,0 @@ -/************************************************************************************* - - Grid physics library, www.github.com/paboyle/Grid - - Source file: ./lib/qcd/action/fermion/ContinuedFractionFermion5D.cc - - Copyright (C) 2015 - -Author: Peter Boyle -Author: Peter Boyle - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - - See the full license in the file "LICENSE" in the top level distribution directory -*************************************************************************************/ -/* END LEGAL */ -#include -#include -#include - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class ContinuedFractionFermion5D; - -NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/ZWilsonImplF/ContinuedFractionFermion5DInstantiationZWilsonImplF.cc b/Grid/qcd/action/fermion/instantiation/ZWilsonImplF/ContinuedFractionFermion5DInstantiationZWilsonImplF.cc new file mode 120000 index 00000000..c2d4b8fc --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/ZWilsonImplF/ContinuedFractionFermion5DInstantiationZWilsonImplF.cc @@ -0,0 +1 @@ +../ContinuedFractionFermion5DInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/ZWilsonImplF/DomainWallEOFAFermionInstantiationZWilsonImplF.cc b/Grid/qcd/action/fermion/instantiation/ZWilsonImplF/DomainWallEOFAFermionInstantiationZWilsonImplF.cc deleted file mode 100644 index f7198131..00000000 --- a/Grid/qcd/action/fermion/instantiation/ZWilsonImplF/DomainWallEOFAFermionInstantiationZWilsonImplF.cc +++ /dev/null @@ -1,44 +0,0 @@ -/************************************************************************************* - -Grid physics library, www.github.com/paboyle/Grid - -Source file: ./lib/qcd/action/fermion/DomainWallEOFAFermion.cc - -Copyright (C) 2017 - -Author: Peter Boyle -Author: Peter Boyle -Author: Peter Boyle -Author: paboyle -Author: David Murphy - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -See the full license in the file "LICENSE" in the top level distribution directory -*************************************************************************************/ - /* END LEGAL */ - -#include -#include -#include -#include -#include - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class DomainWallEOFAFermion; - -NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/ZWilsonImplF/DomainWallEOFAFermionInstantiationZWilsonImplF.cc b/Grid/qcd/action/fermion/instantiation/ZWilsonImplF/DomainWallEOFAFermionInstantiationZWilsonImplF.cc new file mode 120000 index 00000000..2f550a2b --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/ZWilsonImplF/DomainWallEOFAFermionInstantiationZWilsonImplF.cc @@ -0,0 +1 @@ +../DomainWallEOFAFermionInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/ZWilsonImplF/MobiusEOFAFermionInstantiationZWilsonImplF.cc b/Grid/qcd/action/fermion/instantiation/ZWilsonImplF/MobiusEOFAFermionInstantiationZWilsonImplF.cc deleted file mode 100644 index ce7eaac9..00000000 --- a/Grid/qcd/action/fermion/instantiation/ZWilsonImplF/MobiusEOFAFermionInstantiationZWilsonImplF.cc +++ /dev/null @@ -1,44 +0,0 @@ -/************************************************************************************* - -Grid physics library, www.github.com/paboyle/Grid - -Source file: ./lib/qcd/action/fermion/MobiusEOFAFermion.cc - -Copyright (C) 2017 - -Author: Peter Boyle -Author: Peter Boyle -Author: Peter Boyle -Author: paboyle -Author: David Murphy - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -See the full license in the file "LICENSE" in the top level distribution directory -*************************************************************************************/ - /* END LEGAL */ - -#include -#include -#include -#include -#include - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class MobiusEOFAFermion; - -NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/ZWilsonImplF/MobiusEOFAFermionInstantiationZWilsonImplF.cc b/Grid/qcd/action/fermion/instantiation/ZWilsonImplF/MobiusEOFAFermionInstantiationZWilsonImplF.cc new file mode 120000 index 00000000..7a8f1172 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/ZWilsonImplF/MobiusEOFAFermionInstantiationZWilsonImplF.cc @@ -0,0 +1 @@ +../MobiusEOFAFermionInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/ZWilsonImplF/PartialFractionFermion5DInstantiationZWilsonImplF.cc b/Grid/qcd/action/fermion/instantiation/ZWilsonImplF/PartialFractionFermion5DInstantiationZWilsonImplF.cc deleted file mode 100644 index 757719b6..00000000 --- a/Grid/qcd/action/fermion/instantiation/ZWilsonImplF/PartialFractionFermion5DInstantiationZWilsonImplF.cc +++ /dev/null @@ -1,39 +0,0 @@ -/************************************************************************************* - - Grid physics library, www.github.com/paboyle/Grid - - Source file: ./lib/qcd/action/fermion/PartialFractionFermion5D.cc - - Copyright (C) 2015 - -Author: Peter Boyle -Author: Peter Boyle - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - - See the full license in the file "LICENSE" in the top level distribution directory -*************************************************************************************/ -/* END LEGAL */ -#include -#include -#include - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class PartialFractionFermion5D; - -NAMESPACE_END(Grid); - diff --git a/Grid/qcd/action/fermion/instantiation/ZWilsonImplF/PartialFractionFermion5DInstantiationZWilsonImplF.cc b/Grid/qcd/action/fermion/instantiation/ZWilsonImplF/PartialFractionFermion5DInstantiationZWilsonImplF.cc new file mode 120000 index 00000000..7f4cea71 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/ZWilsonImplF/PartialFractionFermion5DInstantiationZWilsonImplF.cc @@ -0,0 +1 @@ +../PartialFractionFermion5DInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/ZWilsonImplF/WilsonFermion5DInstantiationZWilsonImplF.cc b/Grid/qcd/action/fermion/instantiation/ZWilsonImplF/WilsonFermion5DInstantiationZWilsonImplF.cc deleted file mode 100644 index 0dac989c..00000000 --- a/Grid/qcd/action/fermion/instantiation/ZWilsonImplF/WilsonFermion5DInstantiationZWilsonImplF.cc +++ /dev/null @@ -1,40 +0,0 @@ -/************************************************************************************* - -Grid physics library, www.github.com/paboyle/Grid - -Source file: ./lib/qcd/action/fermion/WilsonKernels.cc - -Copyright (C) 2015 - -Author: Peter Boyle -Author: Peter Boyle -Author: paboyle - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -See the full license in the file "LICENSE" in the top level distribution -directory -*************************************************************************************/ -/* END LEGAL */ -#include -#include - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class WilsonFermion5D; - -NAMESPACE_END(Grid); - diff --git a/Grid/qcd/action/fermion/instantiation/ZWilsonImplF/WilsonFermion5DInstantiationZWilsonImplF.cc b/Grid/qcd/action/fermion/instantiation/ZWilsonImplF/WilsonFermion5DInstantiationZWilsonImplF.cc new file mode 120000 index 00000000..804d0884 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/ZWilsonImplF/WilsonFermion5DInstantiationZWilsonImplF.cc @@ -0,0 +1 @@ +../WilsonFermion5DInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/ZWilsonImplFH/CayleyFermion5DInstantiationZWilsonImplFH.cc b/Grid/qcd/action/fermion/instantiation/ZWilsonImplFH/CayleyFermion5DInstantiationZWilsonImplFH.cc deleted file mode 100644 index 5130db9c..00000000 --- a/Grid/qcd/action/fermion/instantiation/ZWilsonImplFH/CayleyFermion5DInstantiationZWilsonImplFH.cc +++ /dev/null @@ -1,44 +0,0 @@ -/************************************************************************************* - -Grid physics library, www.github.com/paboyle/Grid - -Source file: ./lib/qcd/action/fermion/WilsonKernels.cc - -Copyright (C) 2015 - -Author: Peter Boyle -Author: Peter Boyle -Author: paboyle - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -See the full license in the file "LICENSE" in the top level distribution -directory -*************************************************************************************/ -/* END LEGAL */ -#include -#include -#include - - //#include - //#include - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class CayleyFermion5D; - -NAMESPACE_END(Grid); - diff --git a/Grid/qcd/action/fermion/instantiation/ZWilsonImplFH/CayleyFermion5DInstantiationZWilsonImplFH.cc b/Grid/qcd/action/fermion/instantiation/ZWilsonImplFH/CayleyFermion5DInstantiationZWilsonImplFH.cc new file mode 120000 index 00000000..cb1db625 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/ZWilsonImplFH/CayleyFermion5DInstantiationZWilsonImplFH.cc @@ -0,0 +1 @@ +../CayleyFermion5DInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/ZWilsonImplFH/ContinuedFractionFermion5DInstantiationZWilsonImplFH.cc b/Grid/qcd/action/fermion/instantiation/ZWilsonImplFH/ContinuedFractionFermion5DInstantiationZWilsonImplFH.cc deleted file mode 100644 index ca0d6cea..00000000 --- a/Grid/qcd/action/fermion/instantiation/ZWilsonImplFH/ContinuedFractionFermion5DInstantiationZWilsonImplFH.cc +++ /dev/null @@ -1,38 +0,0 @@ -/************************************************************************************* - - Grid physics library, www.github.com/paboyle/Grid - - Source file: ./lib/qcd/action/fermion/ContinuedFractionFermion5D.cc - - Copyright (C) 2015 - -Author: Peter Boyle -Author: Peter Boyle - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - - See the full license in the file "LICENSE" in the top level distribution directory -*************************************************************************************/ -/* END LEGAL */ -#include -#include -#include - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class ContinuedFractionFermion5D; - -NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/ZWilsonImplFH/ContinuedFractionFermion5DInstantiationZWilsonImplFH.cc b/Grid/qcd/action/fermion/instantiation/ZWilsonImplFH/ContinuedFractionFermion5DInstantiationZWilsonImplFH.cc new file mode 120000 index 00000000..c2d4b8fc --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/ZWilsonImplFH/ContinuedFractionFermion5DInstantiationZWilsonImplFH.cc @@ -0,0 +1 @@ +../ContinuedFractionFermion5DInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/ZWilsonImplFH/DomainWallEOFAFermionInstantiationZWilsonImplFH.cc b/Grid/qcd/action/fermion/instantiation/ZWilsonImplFH/DomainWallEOFAFermionInstantiationZWilsonImplFH.cc deleted file mode 100644 index f7198131..00000000 --- a/Grid/qcd/action/fermion/instantiation/ZWilsonImplFH/DomainWallEOFAFermionInstantiationZWilsonImplFH.cc +++ /dev/null @@ -1,44 +0,0 @@ -/************************************************************************************* - -Grid physics library, www.github.com/paboyle/Grid - -Source file: ./lib/qcd/action/fermion/DomainWallEOFAFermion.cc - -Copyright (C) 2017 - -Author: Peter Boyle -Author: Peter Boyle -Author: Peter Boyle -Author: paboyle -Author: David Murphy - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -See the full license in the file "LICENSE" in the top level distribution directory -*************************************************************************************/ - /* END LEGAL */ - -#include -#include -#include -#include -#include - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class DomainWallEOFAFermion; - -NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/ZWilsonImplFH/DomainWallEOFAFermionInstantiationZWilsonImplFH.cc b/Grid/qcd/action/fermion/instantiation/ZWilsonImplFH/DomainWallEOFAFermionInstantiationZWilsonImplFH.cc new file mode 120000 index 00000000..2f550a2b --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/ZWilsonImplFH/DomainWallEOFAFermionInstantiationZWilsonImplFH.cc @@ -0,0 +1 @@ +../DomainWallEOFAFermionInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/ZWilsonImplFH/MobiusEOFAFermionInstantiationZWilsonImplFH.cc b/Grid/qcd/action/fermion/instantiation/ZWilsonImplFH/MobiusEOFAFermionInstantiationZWilsonImplFH.cc deleted file mode 100644 index ce7eaac9..00000000 --- a/Grid/qcd/action/fermion/instantiation/ZWilsonImplFH/MobiusEOFAFermionInstantiationZWilsonImplFH.cc +++ /dev/null @@ -1,44 +0,0 @@ -/************************************************************************************* - -Grid physics library, www.github.com/paboyle/Grid - -Source file: ./lib/qcd/action/fermion/MobiusEOFAFermion.cc - -Copyright (C) 2017 - -Author: Peter Boyle -Author: Peter Boyle -Author: Peter Boyle -Author: paboyle -Author: David Murphy - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -See the full license in the file "LICENSE" in the top level distribution directory -*************************************************************************************/ - /* END LEGAL */ - -#include -#include -#include -#include -#include - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class MobiusEOFAFermion; - -NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/ZWilsonImplFH/MobiusEOFAFermionInstantiationZWilsonImplFH.cc b/Grid/qcd/action/fermion/instantiation/ZWilsonImplFH/MobiusEOFAFermionInstantiationZWilsonImplFH.cc new file mode 120000 index 00000000..7a8f1172 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/ZWilsonImplFH/MobiusEOFAFermionInstantiationZWilsonImplFH.cc @@ -0,0 +1 @@ +../MobiusEOFAFermionInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/ZWilsonImplFH/PartialFractionFermion5DInstantiationZWilsonImplFH.cc b/Grid/qcd/action/fermion/instantiation/ZWilsonImplFH/PartialFractionFermion5DInstantiationZWilsonImplFH.cc deleted file mode 100644 index 757719b6..00000000 --- a/Grid/qcd/action/fermion/instantiation/ZWilsonImplFH/PartialFractionFermion5DInstantiationZWilsonImplFH.cc +++ /dev/null @@ -1,39 +0,0 @@ -/************************************************************************************* - - Grid physics library, www.github.com/paboyle/Grid - - Source file: ./lib/qcd/action/fermion/PartialFractionFermion5D.cc - - Copyright (C) 2015 - -Author: Peter Boyle -Author: Peter Boyle - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - - See the full license in the file "LICENSE" in the top level distribution directory -*************************************************************************************/ -/* END LEGAL */ -#include -#include -#include - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class PartialFractionFermion5D; - -NAMESPACE_END(Grid); - diff --git a/Grid/qcd/action/fermion/instantiation/ZWilsonImplFH/PartialFractionFermion5DInstantiationZWilsonImplFH.cc b/Grid/qcd/action/fermion/instantiation/ZWilsonImplFH/PartialFractionFermion5DInstantiationZWilsonImplFH.cc new file mode 120000 index 00000000..7f4cea71 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/ZWilsonImplFH/PartialFractionFermion5DInstantiationZWilsonImplFH.cc @@ -0,0 +1 @@ +../PartialFractionFermion5DInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/ZWilsonImplFH/WilsonFermion5DInstantiationZWilsonImplFH.cc b/Grid/qcd/action/fermion/instantiation/ZWilsonImplFH/WilsonFermion5DInstantiationZWilsonImplFH.cc deleted file mode 100644 index 0dac989c..00000000 --- a/Grid/qcd/action/fermion/instantiation/ZWilsonImplFH/WilsonFermion5DInstantiationZWilsonImplFH.cc +++ /dev/null @@ -1,40 +0,0 @@ -/************************************************************************************* - -Grid physics library, www.github.com/paboyle/Grid - -Source file: ./lib/qcd/action/fermion/WilsonKernels.cc - -Copyright (C) 2015 - -Author: Peter Boyle -Author: Peter Boyle -Author: paboyle - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -See the full license in the file "LICENSE" in the top level distribution -directory -*************************************************************************************/ -/* END LEGAL */ -#include -#include - -NAMESPACE_BEGIN(Grid); - -#include "impl.h" -template class WilsonFermion5D; - -NAMESPACE_END(Grid); - diff --git a/Grid/qcd/action/fermion/instantiation/ZWilsonImplFH/WilsonFermion5DInstantiationZWilsonImplFH.cc b/Grid/qcd/action/fermion/instantiation/ZWilsonImplFH/WilsonFermion5DInstantiationZWilsonImplFH.cc new file mode 120000 index 00000000..804d0884 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/ZWilsonImplFH/WilsonFermion5DInstantiationZWilsonImplFH.cc @@ -0,0 +1 @@ +../WilsonFermion5DInstantiation.cc.master \ No newline at end of file diff --git a/Grid/qcd/action/fermion/instantiation/generate_instantiations.sh b/Grid/qcd/action/fermion/instantiation/generate_instantiations.sh index 330dcfa8..72a9eaf9 100755 --- a/Grid/qcd/action/fermion/instantiation/generate_instantiations.sh +++ b/Grid/qcd/action/fermion/instantiation/generate_instantiations.sh @@ -88,6 +88,7 @@ done CC_LIST=" \ ImprovedStaggeredFermion5DInstantiation \ ImprovedStaggeredFermionInstantiation \ + NaiveStaggeredFermionInstantiation \ StaggeredKernelsInstantiation " for impl in $STAG_IMPL_LIST diff --git a/Grid/qcd/action/gauge/GaugeImplTypes.h b/Grid/qcd/action/gauge/GaugeImplTypes.h index b9a5296d..9b7d5a60 100644 --- a/Grid/qcd/action/gauge/GaugeImplTypes.h +++ b/Grid/qcd/action/gauge/GaugeImplTypes.h @@ -86,9 +86,9 @@ public: // Move this elsewhere? FIXME static inline void AddLink(Field &U, LinkField &W, int mu) { // U[mu] += W - auto U_v = U.View(); - auto W_v = W.View(); - thread_for( ss, U.Grid()->oSites(), { + autoView(U_v,U,AcceleratorWrite); + autoView(W_v,W,AcceleratorRead); + accelerator_for( ss, U.Grid()->oSites(), 1, { U_v[ss](mu) = U_v[ss](mu) + W_v[ss](); }); } @@ -131,15 +131,14 @@ public: //static std::chrono::duration diff; //auto start = std::chrono::high_resolution_clock::now(); - auto U_v = U.View(); - auto P_v = P.View(); - thread_for(ss, P.Grid()->oSites(),{ + autoView(U_v,U,AcceleratorWrite); + autoView(P_v,P,AcceleratorRead); + accelerator_for(ss, P.Grid()->oSites(),1,{ for (int mu = 0; mu < Nd; mu++) { U_v[ss](mu) = ProjectOnGroup(Exponentiate(P_v[ss](mu), ep, Nexp) * U_v[ss](mu)); } }); - - //auto end = std::chrono::high_resolution_clock::now(); + //auto end = std::chrono::high_resolution_clock::now(); // diff += end - start; // std::cout << "Time to exponentiate matrix " << diff.count() << " s\n"; } diff --git a/Grid/qcd/action/gauge/GaugeImplementations.h b/Grid/qcd/action/gauge/GaugeImplementations.h index a14aec1b..19bc5aa6 100644 --- a/Grid/qcd/action/gauge/GaugeImplementations.h +++ b/Grid/qcd/action/gauge/GaugeImplementations.h @@ -59,7 +59,7 @@ public: } static inline GaugeLinkField CovShiftIdentityBackward(const GaugeLinkField &Link, int mu) { - return Cshift(adj(Link), mu, -1); + return Cshift(closure(adj(Link)), mu, -1); } static inline GaugeLinkField CovShiftIdentityForward(const GaugeLinkField &Link, int mu) { diff --git a/Grid/qcd/action/scalar/ScalarInteractionAction.h b/Grid/qcd/action/scalar/ScalarInteractionAction.h index 3be84480..5a5f9251 100644 --- a/Grid/qcd/action/scalar/ScalarInteractionAction.h +++ b/Grid/qcd/action/scalar/ScalarInteractionAction.h @@ -89,8 +89,8 @@ public: action = (2.0 * Ndim + mass_square) * phisquared - lambda * phisquared * phisquared; - auto p_v = p.View(); - auto action_v = action.View(); + autoView( p_v , p, CpuRead); + autoView( action_v , action, CpuWrite); for (int mu = 0; mu < Ndim; mu++) { // pshift = Cshift(p, mu, +1); // not efficient, implement with stencils @@ -146,8 +146,8 @@ public: for (int point = 0; point < npoint; point++) { - auto p_v = p.View(); - auto force_v = force.View(); + autoView( p_v , p, CpuRead); + autoView( force_v , force, CpuWrite); int permute_type; StencilEntry *SE; diff --git a/Grid/qcd/hmc/HMC_aggregate.h b/Grid/qcd/hmc/HMC_aggregate.h index e4d2ce83..cb510953 100644 --- a/Grid/qcd/hmc/HMC_aggregate.h +++ b/Grid/qcd/hmc/HMC_aggregate.h @@ -39,6 +39,10 @@ directory #include #include #include +#include +#if !defined(GRID_COMMS_NONE) +#include +#endif NAMESPACE_CHECK(Ildg); #include diff --git a/Grid/qcd/modules/Registration.h b/Grid/qcd/modules/Registration.h index ec28f020..28a9fdae 100644 --- a/Grid/qcd/modules/Registration.h +++ b/Grid/qcd/modules/Registration.h @@ -80,8 +80,9 @@ static Registrar, static Registrar< ConjugateGradientModule, HMC_SolverModuleFactory > __CGWFmodXMLInit("ConjugateGradient"); + static Registrar< BiCGSTABModule, - HMC_SolverModuleFactory > __CGWFmodXMLInit("BiCGSTAB"); + HMC_SolverModuleFactory > __BiCGWFmodXMLInit("BiCGSTAB"); static Registrar< ConjugateResidualModule, HMC_SolverModuleFactory > __CRWFmodXMLInit("ConjugateResidual"); diff --git a/Grid/qcd/smearing/GaugeConfiguration.h b/Grid/qcd/smearing/GaugeConfiguration.h index f4d00c72..0ff7fc25 100644 --- a/Grid/qcd/smearing/GaugeConfiguration.h +++ b/Grid/qcd/smearing/GaugeConfiguration.h @@ -49,7 +49,7 @@ public: private: const unsigned int smearingLevels; - Smear_Stout StoutSmearing; + Smear_Stout *StoutSmearing; std::vector SmearedSet; // Member functions @@ -72,7 +72,7 @@ private: previous_u = *ThinLinks; for (int smearLvl = 0; smearLvl < smearingLevels; ++smearLvl) { - StoutSmearing.smear(SmearedSet[smearLvl], previous_u); + StoutSmearing->smear(SmearedSet[smearLvl], previous_u); previous_u = SmearedSet[smearLvl]; // For debug purposes @@ -93,7 +93,7 @@ private: GaugeLinkField SigmaKPrime_mu(grid); GaugeLinkField GaugeKmu(grid), Cmu(grid); - StoutSmearing.BaseSmear(C, GaugeK); + StoutSmearing->BaseSmear(C, GaugeK); SigmaK = Zero(); iLambda = Zero(); @@ -107,7 +107,7 @@ private: pokeLorentz(SigmaK, SigmaKPrime_mu * e_iQ + adj(Cmu) * iLambda_mu, mu); pokeLorentz(iLambda, iLambda_mu, mu); } - StoutSmearing.derivative(SigmaK, iLambda, + StoutSmearing->derivative(SigmaK, iLambda, GaugeK); // derivative of SmearBase return SigmaK; } @@ -144,14 +144,14 @@ private: // Exponential iQ2 = iQ * iQ; iQ3 = iQ * iQ2; - StoutSmearing.set_uw(u, w, iQ2, iQ3); - StoutSmearing.set_fj(f0, f1, f2, u, w); + StoutSmearing->set_uw(u, w, iQ2, iQ3); + StoutSmearing->set_fj(f0, f1, f2, u, w); e_iQ = f0 * unity + timesMinusI(f1) * iQ - f2 * iQ2; // Getting B1, B2, Gamma and Lambda // simplify this part, reduntant calculations in set_fj - xi0 = StoutSmearing.func_xi0(w); - xi1 = StoutSmearing.func_xi1(w); + xi0 = StoutSmearing->func_xi0(w); + xi1 = StoutSmearing->func_xi1(w); u2 = u * u; w2 = w * w; cosw = cos(w); @@ -219,7 +219,7 @@ public: /* Standard constructor */ SmearedConfiguration(GridCartesian* UGrid, unsigned int Nsmear, Smear_Stout& Stout) - : smearingLevels(Nsmear), StoutSmearing(Stout), ThinLinks(NULL) + : smearingLevels(Nsmear), StoutSmearing(&Stout), ThinLinks(NULL) { for (unsigned int i = 0; i < smearingLevels; ++i) SmearedSet.push_back(*(new GaugeField(UGrid))); @@ -227,7 +227,7 @@ public: /*! For just thin links */ SmearedConfiguration() - : smearingLevels(0), StoutSmearing(), SmearedSet(), ThinLinks(NULL) {} + : smearingLevels(0), StoutSmearing(nullptr), SmearedSet(), ThinLinks(NULL) {} // attach the smeared routines to the thin links U and fill the smeared set void set_Field(GaugeField &U) diff --git a/Grid/qcd/utils/A2Autils.h b/Grid/qcd/utils/A2Autils.h index c7c7d329..b63d8571 100644 --- a/Grid/qcd/utils/A2Autils.h +++ b/Grid/qcd/utils/A2Autils.h @@ -185,13 +185,14 @@ void A2Autils::MesonField(TensorType &mat, for(int i=0;i::MesonField(TensorType &mat, int base = Nmom*i+Nmom*Lblock*j+Nmom*Lblock*Rblock*r; for ( int m=0;m::PionFieldXX(Eigen::Tensor &mat, for(int i=0;i::PionFieldXX(Eigen::Tensor &mat, } for(int j=0;j::PionFieldWVmom(Eigen::Tensor &mat, for(int i=0;i::PionFieldWVmom(Eigen::Tensor &mat, int base = Nmom*i+Nmom*Lblock*j+Nmom*Lblock*Rblock*r; for ( int m=0;m::AslashField(TensorType &mat, for(int i=0;i::AslashField(TensorType &mat, for ( int m=0;m::ContractWWVV(std::vector &WWVV, for(int d_o=0;d_o::ContractWWVV(std::vector &WWVV, thread_for(ss,grid->oSites(),{ for(int d_o=0;d_o::OuterProductWWVV(PropagatorField &WWVV, const vobj &rhs, const int Ns, const int ss) { - auto WWVV_v = WWVV.View(); + autoView(WWVV_v,WWVV,CpuWrite); for (int s1 = 0; s1 < Ns; s1++){ for (int s2 = 0; s2 < Ns; s2++){ WWVV_v[ss]()(s1,s2)(0, 0) += lhs()(s1)(0) * rhs()(s2)(0); @@ -1122,10 +1121,10 @@ void A2Autils::ContractFourQuarkColourDiagonal(const PropagatorField &WWV GridBase *grid = WWVV0.Grid(); - auto WWVV0_v = WWVV0.View(); - auto WWVV1_v = WWVV1.View(); - auto O_trtr_v= O_trtr.View(); - auto O_fig8_v= O_fig8.View(); + autoView(WWVV0_v , WWVV0,CpuRead); + autoView(WWVV1_v , WWVV1,CpuRead); + autoView(O_trtr_v, O_trtr,CpuWrite); + autoView(O_fig8_v, O_fig8,CpuWrite); thread_for(ss,grid->oSites(),{ typedef typename ComplexField::vector_object vobj; @@ -1166,10 +1165,10 @@ void A2Autils::ContractFourQuarkColourMix(const PropagatorField &WWVV0, GridBase *grid = WWVV0.Grid(); - auto WWVV0_v = WWVV0.View(); - auto WWVV1_v = WWVV1.View(); - auto O_trtr_v= O_trtr.View(); - auto O_fig8_v= O_fig8.View(); + autoView( WWVV0_v , WWVV0,CpuRead); + autoView( WWVV1_v , WWVV1,CpuRead); + autoView( O_trtr_v, O_trtr,CpuWrite); + autoView( O_fig8_v, O_fig8,CpuWrite); thread_for(ss,grid->oSites(),{ diff --git a/Grid/qcd/utils/BaryonUtils.h b/Grid/qcd/utils/BaryonUtils.h index d65b9176..b268b684 100644 --- a/Grid/qcd/utils/BaryonUtils.h +++ b/Grid/qcd/utils/BaryonUtils.h @@ -7,6 +7,7 @@ Copyright (C) 2019 Author: Felix Erben + Author: Raoul Hodgson This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -46,7 +47,7 @@ public: typedef typename SpinMatrixField::vector_object sobj; static const int epsilon[6][3] ; - static const Complex epsilon_sgn[6]; + static const Real epsilon_sgn[6]; private: template @@ -58,9 +59,12 @@ public: const Gamma GammaA_right, const Gamma GammaB_right, const int parity, - const int * wick_contractions, + const bool * wick_contractions, robj &result); public: + static void Wick_Contractions(std::string qi, + std::string qf, + bool* wick_contractions); static void ContractBaryons(const PropagatorField &q1_left, const PropagatorField &q2_left, const PropagatorField &q3_left, @@ -68,8 +72,7 @@ public: const Gamma GammaB_left, const Gamma GammaA_right, const Gamma GammaB_right, - const char * quarks_left, - const char * quarks_right, + const bool* wick_contractions, const int parity, ComplexField &baryon_corr); template @@ -80,10 +83,59 @@ public: const Gamma GammaB_left, const Gamma GammaA_right, const Gamma GammaB_right, - const char * quarks_left, - const char * quarks_right, + const bool* wick_contractions, const int parity, + const int nt, robj &result); + private: + template + static void Baryon_Gamma_3pt_Group1_Site( + const mobj &Dq1_ti, + const mobj2 &Dq2_spec, + const mobj2 &Dq3_spec, + const mobj &Dq4_tf, + const Gamma GammaJ, + const Gamma GammaBi, + const Gamma GammaBf, + int wick_contraction, + robj &result); + + template + static void Baryon_Gamma_3pt_Group2_Site( + const mobj2 &Dq1_spec, + const mobj &Dq2_ti, + const mobj2 &Dq3_spec, + const mobj &Dq4_tf, + const Gamma GammaJ, + const Gamma GammaBi, + const Gamma GammaBf, + int wick_contraction, + robj &result); + + template + static void Baryon_Gamma_3pt_Group3_Site( + const mobj2 &Dq1_spec, + const mobj2 &Dq2_spec, + const mobj &Dq3_ti, + const mobj &Dq4_tf, + const Gamma GammaJ, + const Gamma GammaBi, + const Gamma GammaBf, + int wick_contraction, + robj &result); + public: + template + static void Baryon_Gamma_3pt( + const PropagatorField &q_ti, + const mobj &Dq_spec1, + const mobj &Dq_spec2, + const PropagatorField &q_tf, + int group, + int wick_contraction, + const Gamma GammaJ, + const Gamma GammaBi, + const Gamma GammaBf, + SpinMatrixField &stn_corr); private: template static void Sigma_to_Nucleon_Q1_Eye_site(const mobj &Dq_loop, @@ -151,101 +203,152 @@ public: template const int BaryonUtils::epsilon[6][3] = {{0,1,2},{1,2,0},{2,0,1},{0,2,1},{2,1,0},{1,0,2}}; -template +/*template const Complex BaryonUtils::epsilon_sgn[6] = {Complex(1), Complex(1), Complex(1), Complex(-1), Complex(-1), Complex(-1)}; +*/ +template +const Real BaryonUtils::epsilon_sgn[6] = {1.,1.,1.,-1.,-1.,-1.}; +//This is the old version template template void BaryonUtils::baryon_site(const mobj &D1, - const mobj &D2, - const mobj &D3, - const Gamma GammaA_left, - const Gamma GammaB_left, - const Gamma GammaA_right, - const Gamma GammaB_right, - const int parity, - const int * wick_contraction, - robj &result) + const mobj &D2, + const mobj &D3, + const Gamma GammaA_i, + const Gamma GammaB_i, + const Gamma GammaA_f, + const Gamma GammaB_f, + const int parity, + const bool * wick_contraction, + robj &result) { - Gamma g4(Gamma::Algebra::GammaT); //needed for parity P_\pm = 0.5*(1 \pm \gamma_4) + Gamma g4(Gamma::Algebra::GammaT); //needed for parity P_\pm = 0.5*(1 \pm \gamma_4) + + auto D1_GAi = D1 * GammaA_i; + auto D1_GAi_g4 = D1_GAi * g4; + auto D1_GAi_P = 0.5*(D1_GAi + (Real)parity * D1_GAi_g4); + auto GAf_D1_GAi_P = GammaA_f * D1_GAi_P; + auto GBf_D1_GAi_P = GammaB_f * D1_GAi_P; - auto gD1a = GammaA_left * GammaA_right * D1; - auto gD1b = GammaA_left * g4 * GammaA_right * D1; - auto pD1 = 0.5* (gD1a + (double)parity * gD1b); - auto gD3 = GammaB_right * D3; + auto D2_GBi = D2 * GammaB_i; + auto GBf_D2_GBi = GammaB_f * D2_GBi; + auto GAf_D2_GBi = GammaA_f * D2_GBi; - for (int ie_left=0; ie_left < 6 ; ie_left++){ - int a_left = epsilon[ie_left][0]; //a - int b_left = epsilon[ie_left][1]; //b - int c_left = epsilon[ie_left][2]; //c - for (int ie_right=0; ie_right < 6 ; ie_right++){ - int a_right = epsilon[ie_right][0]; //a' - int b_right = epsilon[ie_right][1]; //b' - int c_right = epsilon[ie_right][2]; //c' + auto GBf_D3 = GammaB_f * D3; + auto GAf_D3 = GammaA_f * D3; + + for (int ie_f=0; ie_f < 6 ; ie_f++){ + int a_f = epsilon[ie_f][0]; //a + int b_f = epsilon[ie_f][1]; //b + int c_f = epsilon[ie_f][2]; //c + for (int ie_i=0; ie_i < 6 ; ie_i++){ + int a_i = epsilon[ie_i][0]; //a' + int b_i = epsilon[ie_i][1]; //b' + int c_i = epsilon[ie_i][2]; //c' + + Real ee = epsilon_sgn[ie_f] * epsilon_sgn[ie_i]; //This is the \delta_{456}^{123} part - if (wick_contraction[0]){ - auto D2g = D2 * GammaB_left; - for (int alpha_right=0; alpha_right +void BaryonUtils::Wick_Contractions(std::string qi, std::string qf, bool* wick_contractions) { + const int epsilon[6][3] = {{0,1,2},{1,2,0},{2,0,1},{0,2,1},{2,1,0},{1,0,2}}; + for (int ie=0; ie < 6 ; ie++) { + wick_contractions[ie] = (qi.size() == 3 && qf.size() == 3 + && qi[0] == qf[epsilon[ie][0]] + && qi[1] == qf[epsilon[ie][1]] + && qi[2] == qf[epsilon[ie][2]]); } } +/* The array wick_contractions must be of length 6. The order * + * corresponds to the to that shown in the Hadrons documentation * + * at https://aportelli.github.io/Hadrons-doc/#/mcontraction * + * This can be computed from the quark flavours using the * + * Wick_Contractions function above */ template void BaryonUtils::ContractBaryons(const PropagatorField &q1_left, const PropagatorField &q2_left, @@ -254,43 +357,61 @@ void BaryonUtils::ContractBaryons(const PropagatorField &q1_left, const Gamma GammaB_left, const Gamma GammaA_right, const Gamma GammaB_right, - const char * quarks_left, - const char * quarks_right, + const bool* wick_contractions, const int parity, ComplexField &baryon_corr) { - std::cout << "Contraction <" << quarks_right[0] << quarks_right[1] << quarks_right[2] << "|" << quarks_left[0] << quarks_left[1] << quarks_left[2] << ">" << std::endl; - std::cout << "GammaA (left) " << (GammaA_left.g) << std::endl; - std::cout << "GammaB (left) " << (GammaB_left.g) << std::endl; - std::cout << "GammaA (right) " << (GammaA_right.g) << std::endl; - std::cout << "GammaB (right) " << (GammaB_right.g) << std::endl; + + assert(Ns==4 && "Baryon code only implemented for N_spin = 4"); + assert(Nc==3 && "Baryon code only implemented for N_colour = 3"); + + std::cout << "GammaA (left) " << (GammaA_left.g) << std::endl; + std::cout << "GammaB (left) " << (GammaB_left.g) << std::endl; + std::cout << "GammaA (right) " << (GammaA_right.g) << std::endl; + std::cout << "GammaB (right) " << (GammaB_right.g) << std::endl; assert(parity==1 || parity == -1 && "Parity must be +1 or -1"); GridBase *grid = q1_left.Grid(); + + autoView(vbaryon_corr, baryon_corr,CpuWrite); + autoView( v1 , q1_left, CpuRead); + autoView( v2 , q2_left, CpuRead); + autoView( v3 , q3_left, CpuRead); - int wick_contraction[6]; - for (int ie=0; ie < 6 ; ie++) - wick_contraction[ie] = (quarks_left[0] == quarks_right[epsilon[ie][0]] && quarks_left[1] == quarks_right[epsilon[ie][1]] && quarks_left[2] == quarks_right[epsilon[ie][2]]) ? 1 : 0; - - auto vbaryon_corr= baryon_corr.View(); - auto v1 = q1_left.View(); - auto v2 = q2_left.View(); - auto v3 = q3_left.View(); - - // accelerator_for(ss, grid->oSites(), grid->Nsimd(), { - thread_for(ss,grid->oSites(),{ - //for(int ss=0; ss < grid->oSites(); ss++){ + Real bytes =0.; + bytes += grid->oSites() * (432.*sizeof(vComplex) + 126.*sizeof(int) + 36.*sizeof(Real)); + for (int ie=0; ie < 6 ; ie++){ + if(ie==0 or ie==3){ + bytes += grid->oSites() * (4.*sizeof(int) + 4752.*sizeof(vComplex)) * wick_contractions[ie]; + } + else{ + bytes += grid->oSites() * (64.*sizeof(int) + 5184.*sizeof(vComplex)) * wick_contractions[ie]; + } + } + Real t=0.; + t =-usecond(); + accelerator_for(ss, grid->oSites(), grid->Nsimd(), { auto D1 = v1[ss]; auto D2 = v2[ss]; auto D3 = v3[ss]; - vobj result=Zero(); - baryon_site(D1,D2,D3,GammaA_left,GammaB_left,GammaA_right,GammaB_right,parity,wick_contraction,result); + baryon_site(D1,D2,D3,GammaA_left,GammaB_left,GammaA_right,GammaB_right,parity,wick_contractions,result); vbaryon_corr[ss] = result; } );//end loop over lattice sites + + t += usecond(); + + std::cout << std::setw(10) << bytes/t*1.0e6/1024/1024/1024 << " GB/s " << std::endl; + } + +/* The array wick_contractions must be of length 6. The order * + * corresponds to the to that shown in the Hadrons documentation * + * at https://aportelli.github.io/Hadrons-doc/#/mcontraction * + * This can also be computed from the quark flavours using the * + * Wick_Contractions function above */ template template void BaryonUtils::ContractBaryons_Sliced(const mobj &D1, @@ -300,30 +421,363 @@ void BaryonUtils::ContractBaryons_Sliced(const mobj &D1, const Gamma GammaB_left, const Gamma GammaA_right, const Gamma GammaB_right, - const char * quarks_left, - const char * quarks_right, + const bool* wick_contractions, const int parity, + const int nt, robj &result) { - std::cout << "Contraction <" << quarks_right[0] << quarks_right[1] << quarks_right[2] << "|" << quarks_left[0] << quarks_left[1] << quarks_left[2] << ">" << std::endl; - std::cout << "GammaA (left) " << (GammaA_left.g) << std::endl; - std::cout << "GammaB (left) " << (GammaB_left.g) << std::endl; - std::cout << "GammaA (right) " << (GammaA_right.g) << std::endl; - std::cout << "GammaB (right) " << (GammaB_right.g) << std::endl; + + assert(Ns==4 && "Baryon code only implemented for N_spin = 4"); + assert(Nc==3 && "Baryon code only implemented for N_colour = 3"); + + std::cout << "GammaA (left) " << (GammaA_left.g) << std::endl; + std::cout << "GammaB (left) " << (GammaB_left.g) << std::endl; + std::cout << "GammaA (right) " << (GammaA_right.g) << std::endl; + std::cout << "GammaB (right) " << (GammaB_right.g) << std::endl; assert(parity==1 || parity == -1 && "Parity must be +1 or -1"); - int wick_contraction[6]; - for (int ie=0; ie < 6 ; ie++) - wick_contraction[ie] = (quarks_left[0] == quarks_right[epsilon[ie][0]] && quarks_left[1] == quarks_right[epsilon[ie][1]] && quarks_left[2] == quarks_right[epsilon[ie][2]]) ? 1 : 0; - - result=Zero(); - baryon_site(D1,D2,D3,GammaA_left,GammaB_left,GammaA_right,GammaB_right,parity,wick_contraction,result); + for (int t=0; t +template +void BaryonUtils::Baryon_Gamma_3pt_Group1_Site( + const mobj &Dq1_ti, + const mobj2 &Dq2_spec, + const mobj2 &Dq3_spec, + const mobj &Dq4_tf, + const Gamma GammaJ, + const Gamma GammaBi, + const Gamma GammaBf, + int wick_contraction, + robj &result) +{ + Gamma g5(Gamma::Algebra::Gamma5); + + auto adjD4_g_D1 = g5 * adj(Dq4_tf) * g5 * GammaJ * Dq1_ti; + auto Gf_adjD4_g_D1 = GammaBf * adjD4_g_D1; + auto D2_Gi = Dq2_spec * GammaBi; + auto Gf_D2_Gi = GammaBf * D2_Gi; + auto Gf_D3 = GammaBf * Dq3_spec; + + int a_f, b_f, c_f; + int a_i, b_i, c_i; + + Real ee; + + for (int ie_f=0; ie_f < 6 ; ie_f++){ + a_f = epsilon[ie_f][0]; //a + b_f = epsilon[ie_f][1]; //b + c_f = epsilon[ie_f][2]; //c + for (int ie_i=0; ie_i < 6 ; ie_i++){ + a_i = epsilon[ie_i][0]; //a' + b_i = epsilon[ie_i][1]; //b' + c_i = epsilon[ie_i][2]; //c' + + ee = epsilon_sgn[ie_f] * epsilon_sgn[ie_i]; + + for (int alpha_f=0; alpha_f +template +void BaryonUtils::Baryon_Gamma_3pt_Group2_Site( + const mobj2 &Dq1_spec, + const mobj &Dq2_ti, + const mobj2 &Dq3_spec, + const mobj &Dq4_tf, + const Gamma GammaJ, + const Gamma GammaBi, + const Gamma GammaBf, + int wick_contraction, + robj &result) +{ + Gamma g5(Gamma::Algebra::Gamma5); + + auto adjD4_g_D2_Gi = g5 * adj(Dq4_tf) * g5 * GammaJ * Dq2_ti * GammaBi; + auto Gf_adjD4_g_D2_Gi = GammaBf * adjD4_g_D2_Gi; + auto Gf_D1 = GammaBf * Dq1_spec; + auto Gf_D3 = GammaBf * Dq3_spec; + + int a_f, b_f, c_f; + int a_i, b_i, c_i; + + Real ee; + + for (int ie_f=0; ie_f < 6 ; ie_f++){ + a_f = epsilon[ie_f][0]; //a + b_f = epsilon[ie_f][1]; //b + c_f = epsilon[ie_f][2]; //c + for (int ie_i=0; ie_i < 6 ; ie_i++){ + a_i = epsilon[ie_i][0]; //a' + b_i = epsilon[ie_i][1]; //b' + c_i = epsilon[ie_i][2]; //c' + + ee = epsilon_sgn[ie_f] * epsilon_sgn[ie_i]; + + for (int alpha_f=0; alpha_f +template +void BaryonUtils::Baryon_Gamma_3pt_Group3_Site( + const mobj2 &Dq1_spec, + const mobj2 &Dq2_spec, + const mobj &Dq3_ti, + const mobj &Dq4_tf, + const Gamma GammaJ, + const Gamma GammaBi, + const Gamma GammaBf, + int wick_contraction, + robj &result) +{ + Gamma g5(Gamma::Algebra::Gamma5); + + auto adjD4_g_D3 = g5 * adj(Dq4_tf) * g5 * GammaJ * Dq3_ti; + auto Gf_adjD4_g_D3 = GammaBf * adjD4_g_D3; + auto Gf_D1 = GammaBf * Dq1_spec; + auto D2_Gi = Dq2_spec * GammaBi; + auto Gf_D2_Gi = GammaBf * D2_Gi; + + int a_f, b_f, c_f; + int a_i, b_i, c_i; + + Real ee; + + for (int ie_f=0; ie_f < 6 ; ie_f++){ + a_f = epsilon[ie_f][0]; //a + b_f = epsilon[ie_f][1]; //b + c_f = epsilon[ie_f][2]; //c + for (int ie_i=0; ie_i < 6 ; ie_i++){ + a_i = epsilon[ie_i][0]; //a' + b_i = epsilon[ie_i][1]; //b' + c_i = epsilon[ie_i][2]; //c' + + ee = epsilon_sgn[ie_f] * epsilon_sgn[ie_i]; + + for (int alpha_f=0; alpha_f +template +void BaryonUtils::Baryon_Gamma_3pt( + const PropagatorField &q_ti, + const mobj &Dq_spec1, + const mobj &Dq_spec2, + const PropagatorField &q_tf, + int group, + int wick_contraction, + const Gamma GammaJ, + const Gamma GammaBi, + const Gamma GammaBf, + SpinMatrixField &stn_corr) +{ + GridBase *grid = q_tf.Grid(); + + autoView( vcorr, stn_corr, CpuWrite); + autoView( vq_ti , q_ti, CpuRead); + autoView( vq_tf , q_tf, CpuRead); + + if (group == 1) { + accelerator_for(ss, grid->oSites(), grid->Nsimd(), { + auto Dq_ti = vq_ti[ss]; + auto Dq_tf = vq_tf[ss]; + sobj result=Zero(); + Baryon_Gamma_3pt_Group1_Site(Dq_ti,Dq_spec1,Dq_spec2,Dq_tf,GammaJ,GammaBi,GammaBf,wick_contraction,result); + vcorr[ss] += result; + });//end loop over lattice sites + } else if (group == 2) { + accelerator_for(ss, grid->oSites(), grid->Nsimd(), { + auto Dq_ti = vq_ti[ss]; + auto Dq_tf = vq_tf[ss]; + sobj result=Zero(); + Baryon_Gamma_3pt_Group2_Site(Dq_spec1,Dq_ti,Dq_spec2,Dq_tf,GammaJ,GammaBi,GammaBf,wick_contraction,result); + vcorr[ss] += result; + });//end loop over lattice sites + } else if (group == 3) { + accelerator_for(ss, grid->oSites(), grid->Nsimd(), { + auto Dq_ti = vq_ti[ss]; + auto Dq_tf = vq_tf[ss]; + sobj result=Zero(); + Baryon_Gamma_3pt_Group3_Site(Dq_spec1,Dq_spec2,Dq_ti,Dq_tf,GammaJ,GammaBi,GammaBf,wick_contraction,result); + + vcorr[ss] += result; + });//end loop over lattice sites + } +} + + +/*********************************************************************** + * End of BaryonGamma3pt-function code. * + * * * The following code is for Sigma -> N rare hypeon decays * **********************************************************************/ @@ -558,15 +1012,18 @@ void BaryonUtils::Sigma_to_Nucleon_Eye(const PropagatorField &qq_loop, const std::string op, SpinMatrixField &stn_corr) { + + assert(Ns==4 && "Baryon code only implemented for N_spin = 4"); + assert(Nc==3 && "Baryon code only implemented for N_colour = 3"); + GridBase *grid = qs_ti.Grid(); - auto vcorr= stn_corr.View(); - auto vq_loop = qq_loop.View(); - auto vd_tf = qd_tf.View(); - auto vs_ti = qs_ti.View(); + autoView( vcorr, stn_corr, CpuWrite); + autoView( vq_loop , qq_loop, CpuRead); + autoView( vd_tf , qd_tf, CpuRead); + autoView( vs_ti , qs_ti, CpuRead); - // accelerator_for(ss, grid->oSites(), grid->Nsimd(), { - thread_for(ss,grid->oSites(),{ + accelerator_for(ss, grid->oSites(), grid->Nsimd(), { auto Dq_loop = vq_loop[ss]; auto Dd_tf = vd_tf[ss]; auto Ds_ti = vs_ti[ss]; @@ -595,14 +1052,17 @@ void BaryonUtils::Sigma_to_Nucleon_NonEye(const PropagatorField &qq_ti, const std::string op, SpinMatrixField &stn_corr) { + + assert(Ns==4 && "Baryon code only implemented for N_spin = 4"); + assert(Nc==3 && "Baryon code only implemented for N_colour = 3"); + GridBase *grid = qs_ti.Grid(); - auto vcorr= stn_corr.View(); - auto vq_ti = qq_ti.View(); - auto vq_tf = qq_tf.View(); - auto vd_tf = qd_tf.View(); - auto vs_ti = qs_ti.View(); - + autoView( vcorr , stn_corr, CpuWrite); + autoView( vq_ti , qq_ti, CpuRead); + autoView( vq_tf , qq_tf, CpuRead); + autoView( vd_tf , qd_tf, CpuRead); + autoView( vs_ti , qs_ti, CpuRead); // accelerator_for(ss, grid->oSites(), grid->Nsimd(), { thread_for(ss,grid->oSites(),{ auto Dq_ti = vq_ti[ss]; diff --git a/Grid/qcd/utils/CovariantCshift.h b/Grid/qcd/utils/CovariantCshift.h index ed96f3bf..6ac69150 100644 --- a/Grid/qcd/utils/CovariantCshift.h +++ b/Grid/qcd/utils/CovariantCshift.h @@ -52,6 +52,26 @@ namespace PeriodicBC { tmp = adj(Link)*field; return Cshift(tmp,mu,-1);// moves towards positive mu } + + template auto + CovShiftForward(const Lattice &Link, + int mu, + const LatticeUnaryExpression &expr) + -> Lattice + { + Lattice arg(expr); + return CovShiftForward(Link,mu,arg); + } + template auto + CovShiftBackward(const Lattice &Link, + int mu, + const LatticeUnaryExpression &expr) + -> Lattice + { + Lattice arg(expr); + return CovShiftForward(Link,mu,arg); + } + } @@ -122,6 +142,26 @@ namespace ConjugateBC { return Cshift(tmp,mu,-1);// moves towards positive mu } + template auto + CovShiftForward(const Lattice &Link, + int mu, + const LatticeUnaryExpression &expr) + -> Lattice + { + Lattice arg(expr); + return CovShiftForward(Link,mu,arg); + } + template auto + CovShiftBackward(const Lattice &Link, + int mu, + const LatticeUnaryExpression &expr) + -> Lattice + { + Lattice arg(expr); + return CovShiftForward(Link,mu,arg); + } + + } diff --git a/Grid/qcd/utils/LinalgUtils.h b/Grid/qcd/utils/LinalgUtils.h index 56f8f164..1e016e4e 100644 --- a/Grid/qcd/utils/LinalgUtils.h +++ b/Grid/qcd/utils/LinalgUtils.h @@ -47,8 +47,8 @@ void axpibg5x(Lattice &z,const Lattice &x,Coeff a,Coeff b) GridBase *grid=x.Grid(); Gamma G5(Gamma::Algebra::Gamma5); - auto x_v = x.View(); - auto z_v = z.View(); + autoView(x_v, x, AcceleratorRead); + autoView(z_v, z, AcceleratorWrite); accelerator_for( ss, x_v.size(),vobj::Nsimd(), { auto tmp = a*x_v(ss) + G5*(b*timesI(x_v(ss))); coalescedWrite(z_v[ss],tmp); @@ -63,9 +63,9 @@ void axpby_ssp(Lattice &z, Coeff a,const Lattice &x,Coeff b,const La conformable(x,z); GridBase *grid=x.Grid(); int Ls = grid->_rdimensions[0]; - auto x_v = x.View(); - auto y_v = y.View(); - auto z_v = z.View(); + autoView( x_v, x, AcceleratorRead); + autoView( y_v, y, AcceleratorRead); + autoView( z_v, z, AcceleratorWrite); // FIXME -- need a new class of accelerator_loop to implement this // uint64_t nloop = grid->oSites()/Ls; @@ -85,9 +85,9 @@ void ag5xpby_ssp(Lattice &z,Coeff a,const Lattice &x,Coeff b,const L GridBase *grid=x.Grid(); int Ls = grid->_rdimensions[0]; Gamma G5(Gamma::Algebra::Gamma5); - auto x_v = x.View(); - auto y_v = y.View(); - auto z_v = z.View(); + autoView( x_v, x, AcceleratorRead); + autoView( y_v, y, AcceleratorRead); + autoView( z_v, z, AcceleratorWrite); uint64_t nloop = grid->oSites()/Ls; accelerator_for(sss,nloop,vobj::Nsimd(),{ uint64_t ss = sss*Ls; @@ -104,9 +104,9 @@ void axpbg5y_ssp(Lattice &z,Coeff a,const Lattice &x,Coeff b,const L conformable(x,z); GridBase *grid=x.Grid(); int Ls = grid->_rdimensions[0]; - auto x_v = x.View(); - auto y_v = y.View(); - auto z_v = z.View(); + autoView( x_v, x, AcceleratorRead); + autoView( y_v, y, AcceleratorRead); + autoView( z_v, z, AcceleratorWrite); Gamma G5(Gamma::Algebra::Gamma5); uint64_t nloop = grid->oSites()/Ls; accelerator_for(sss,nloop,vobj::Nsimd(),{ @@ -125,9 +125,9 @@ void ag5xpbg5y_ssp(Lattice &z,Coeff a,const Lattice &x,Coeff b,const GridBase *grid=x.Grid(); int Ls = grid->_rdimensions[0]; - auto x_v = x.View(); - auto y_v = y.View(); - auto z_v = z.View(); + autoView( x_v, x, AcceleratorRead); + autoView( y_v, y, AcceleratorRead); + autoView( z_v, z, AcceleratorWrite); Gamma G5(Gamma::Algebra::Gamma5); uint64_t nloop = grid->oSites()/Ls; accelerator_for(sss,nloop,vobj::Nsimd(),{ @@ -147,9 +147,9 @@ void axpby_ssp_pminus(Lattice &z,Coeff a,const Lattice &x,Coeff b,co GridBase *grid=x.Grid(); int Ls = grid->_rdimensions[0]; - auto x_v = x.View(); - auto y_v = y.View(); - auto z_v = z.View(); + autoView( x_v, x, AcceleratorRead); + autoView( y_v, y, AcceleratorRead); + autoView( z_v, z, AcceleratorWrite); uint64_t nloop = grid->oSites()/Ls; accelerator_for(sss,nloop,vobj::Nsimd(),{ uint64_t ss = sss*Ls; @@ -168,9 +168,9 @@ void axpby_ssp_pplus(Lattice &z,Coeff a,const Lattice &x,Coeff b,con conformable(x,z); GridBase *grid=x.Grid(); int Ls = grid->_rdimensions[0]; - auto x_v = x.View(); - auto y_v = y.View(); - auto z_v = z.View(); + autoView( x_v, x, AcceleratorRead); + autoView( y_v, y, AcceleratorRead); + autoView( z_v, z, AcceleratorWrite); uint64_t nloop = grid->oSites()/Ls; accelerator_for(sss,nloop,vobj::Nsimd(),{ uint64_t ss = sss*Ls; @@ -189,8 +189,8 @@ void G5R5(Lattice &z,const Lattice &x) conformable(x,z); int Ls = grid->_rdimensions[0]; Gamma G5(Gamma::Algebra::Gamma5); - auto x_v = x.View(); - auto z_v = z.View(); + autoView( x_v, x, AcceleratorRead); + autoView( z_v, z, AcceleratorWrite); uint64_t nloop = grid->oSites()/Ls; accelerator_for(sss,nloop,vobj::Nsimd(),{ uint64_t ss = sss*Ls; @@ -222,8 +222,8 @@ void G5C(Lattice> &z, const LatticeoSites(),CComplex::Nsimd(), { for(int n = 0; n < nb; ++n) { diff --git a/Grid/qcd/utils/SUn.h b/Grid/qcd/utils/SUn.h index 7ad80d00..0cc0cc1a 100644 --- a/Grid/qcd/utils/SUn.h +++ b/Grid/qcd/utils/SUn.h @@ -222,11 +222,11 @@ public: conformable(subgroup, Determinant); int i0, i1; su2SubGroupIndex(i0, i1, su2_index); - auto subgroup_v = subgroup.View(); - auto source_v = source.View(); - auto Determinant_v = Determinant.View(); - thread_for(ss, grid->oSites(), { + autoView( subgroup_v , subgroup,AcceleratorWrite); + autoView( source_v , source,AcceleratorRead); + autoView( Determinant_v , Determinant,AcceleratorWrite); + accelerator_for(ss, grid->oSites(), 1, { subgroup_v[ss]()()(0, 0) = source_v[ss]()()(i0, i0); subgroup_v[ss]()()(0, 1) = source_v[ss]()()(i0, i1); @@ -257,15 +257,16 @@ public: su2SubGroupIndex(i0, i1, su2_index); dest = 1.0; // start out with identity - auto dest_v = dest.View(); - auto subgroup_v = subgroup.View(); - thread_for(ss, grid->oSites(), + autoView( dest_v , dest, AcceleratorWrite); + autoView( subgroup_v, subgroup, AcceleratorRead); + accelerator_for(ss, grid->oSites(),1, { dest_v[ss]()()(i0, i0) = subgroup_v[ss]()()(0, 0); dest_v[ss]()()(i0, i1) = subgroup_v[ss]()()(0, 1); dest_v[ss]()()(i1, i0) = subgroup_v[ss]()()(1, 0); dest_v[ss]()()(i1, i1) = subgroup_v[ss]()()(1, 1); }); + } /////////////////////////////////////////////// @@ -608,8 +609,8 @@ public: // reunitarise?? template - static void LieRandomize(GridParallelRNG &pRNG, LatticeMatrixType &out, - double scale = 1.0) { + static void LieRandomize(GridParallelRNG &pRNG, LatticeMatrixType &out, double scale = 1.0) + { GridBase *grid = out.Grid(); typedef typename LatticeMatrixType::vector_type vector_type; @@ -618,8 +619,7 @@ public: typedef iSinglet vTComplexType; typedef Lattice LatticeComplexType; - typedef typename GridTypeMapper< - typename LatticeMatrixType::vector_object>::scalar_object MatrixType; + typedef typename GridTypeMapper::scalar_object MatrixType; LatticeComplexType ca(grid); LatticeMatrixType lie(grid); @@ -629,6 +629,7 @@ public: MatrixType ta; lie = Zero(); + for (int a = 0; a < AdjointDimension; a++) { random(pRNG, ca); @@ -640,6 +641,7 @@ public: la = ci * ca * ta; lie = lie + la; // e^{i la ta} + } taExp(lie, out); } diff --git a/Grid/qcd/utils/WilsonLoops.h b/Grid/qcd/utils/WilsonLoops.h index 0367c9fa..fdd53698 100644 --- a/Grid/qcd/utils/WilsonLoops.h +++ b/Grid/qcd/utils/WilsonLoops.h @@ -485,7 +485,7 @@ public: // Up staple ___ ___ // | | - tmp = Cshift(adj(U[nu]), nu, -1); + tmp = Cshift(closure(adj(U[nu])), nu, -1); tmp = adj(U2[mu]) * tmp; tmp = Cshift(tmp, mu, -2); @@ -519,7 +519,7 @@ public: // // | | - tmp = Cshift(adj(U2[nu]), nu, -2); + tmp = Cshift(closure(adj(U2[nu])), nu, -2); tmp = Gimpl::CovShiftBackward(U[mu], mu, tmp); tmp = U2[nu] * Cshift(tmp, nu, 2); Stap += Cshift(tmp, mu, 1); diff --git a/Grid/serialisation/BaseIO.h b/Grid/serialisation/BaseIO.h index bf424fc7..49406201 100644 --- a/Grid/serialisation/BaseIO.h +++ b/Grid/serialisation/BaseIO.h @@ -87,11 +87,7 @@ namespace Grid { template struct is_tensor_fixed> : public std::true_type {}; - template class MapPointer_> - struct is_tensor_fixed, MapOptions_, MapPointer_>> - : public std::true_type {}; + template struct is_tensor_fixed> : public std::true_type {}; // Is this a variable-size Eigen tensor template struct is_tensor_variable : public std::false_type {}; diff --git a/Grid/serialisation/MacroMagic.h b/Grid/serialisation/MacroMagic.h index 7866327e..0495b91e 100644 --- a/Grid/serialisation/MacroMagic.h +++ b/Grid/serialisation/MacroMagic.h @@ -114,7 +114,8 @@ THE SOFTWARE. #define GRID_MACRO_WRITE_MEMBER(A,B) ::Grid::write(WR,#B,obj. B); #define GRID_SERIALIZABLE_CLASS_MEMBERS(cname,...)\ - std::string SerialisableClassName(void) const {return std::string(#cname);} \ +static inline std::string SerialisableClassName(void) {return std::string(#cname);} \ +static constexpr bool isEnum = false; \ GRID_MACRO_EVAL(GRID_MACRO_MAP(GRID_MACRO_MEMBER,__VA_ARGS__))\ template \ static inline void write(Writer &WR,const std::string &s, const cname &obj){ \ @@ -162,6 +163,8 @@ public:\ public:\ accelerator name(void) : value_(undefname) {}; \ accelerator name(int value): value_(value) {}; \ + static inline std::string SerialisableClassName(void) {return std::string(#name);}\ + static constexpr bool isEnum = true; \ template \ static inline void write(::Grid::Writer &WR,const std::string &s, const name &obj) \ {\ diff --git a/Grid/serialisation/Serialisation.h b/Grid/serialisation/Serialisation.h index c95226b1..e14120af 100644 --- a/Grid/serialisation/Serialisation.h +++ b/Grid/serialisation/Serialisation.h @@ -36,7 +36,7 @@ Author: Peter Boyle #include "BinaryIO.h" #include "TextIO.h" #include "XmlIO.h" -#ifndef GRID_NVCC +#if (!defined(GRID_CUDA)) && (!defined(GRID_HIP)) #include "JSON_IO.h" #endif diff --git a/Grid/serialisation/VectorUtils.h b/Grid/serialisation/VectorUtils.h index a5a73992..dd5ff0b8 100644 --- a/Grid/serialisation/VectorUtils.h +++ b/Grid/serialisation/VectorUtils.h @@ -432,12 +432,10 @@ namespace Grid { std::vector strToVec(const std::string s) { std::istringstream sstr(s); - T buf; std::vector v; - while(!sstr.eof()) + for(T buf; sstr >> buf;) { - sstr >> buf; v.push_back(buf); } diff --git a/Grid/simd/Grid_gpu_vec.h b/Grid/simd/Grid_gpu_vec.h index 4584fb36..b9c6a81b 100644 --- a/Grid/simd/Grid_gpu_vec.h +++ b/Grid/simd/Grid_gpu_vec.h @@ -32,7 +32,12 @@ Author: Peter Boyle */ //---------------------------------------------------------------------- +#ifdef GRID_CUDA #include +#endif +#ifdef GRID_HIP +#include +#endif namespace Grid { @@ -142,7 +147,7 @@ typedef GpuVector GpuVectorI; accelerator_inline float half2float(half h) { float f; -#ifdef __CUDA_ARCH__ +#ifdef GRID_SIMT f = __half2float(h); #else //f = __half2float(h); @@ -156,7 +161,7 @@ accelerator_inline float half2float(half h) accelerator_inline half float2half(float f) { half h; -#ifdef __CUDA_ARCH__ +#ifdef GRID_SIMT h = __float2half(f); #else Grid_half hh = sfw_float_to_half(f); diff --git a/Grid/simd/Simd.h b/Grid/simd/Simd.h index bc8cd2fd..37aee2ed 100644 --- a/Grid/simd/Simd.h +++ b/Grid/simd/Simd.h @@ -31,7 +31,7 @@ directory #ifndef GRID_SIMD_H #define GRID_SIMD_H -#ifdef GRID_NVCC +#if defined(GRID_CUDA) || defined(GRID_HIP) #include #endif @@ -65,7 +65,7 @@ typedef RealD Real; typedef RealF Real; #endif -#ifdef GRID_NVCC +#if defined(GRID_CUDA) || defined(GRID_HIP) typedef thrust::complex ComplexF; typedef thrust::complex ComplexD; typedef thrust::complex Complex; diff --git a/Grid/stencil/Stencil.h b/Grid/stencil/Stencil.h index a56d256d..1e198972 100644 --- a/Grid/stencil/Stencil.h +++ b/Grid/stencil/Stencil.h @@ -67,7 +67,8 @@ void Gather_plane_simple_table (Vector >& table,const Lattice { int num=table.size(); std::pair *table_v = & table[0]; - auto rhs_v = rhs.View(); + + auto rhs_v = rhs.View(AcceleratorRead); accelerator_forNB( i,num, vobj::Nsimd(), { typedef decltype(coalescedRead(buffer[0])) compressed_t; compressed_t tmp_c; @@ -75,6 +76,7 @@ void Gather_plane_simple_table (Vector >& table,const Lattice compress.Compress(&tmp_c,0,rhs_v(so+table_v[i].second)); coalescedWrite(buffer[off+o],tmp_c); }); + rhs_v.ViewClose(); // Further optimisatoin: i) software prefetch the first element of the next table entry, prefetch the table } @@ -94,7 +96,7 @@ void Gather_plane_exchange_table(Vector >& table,const Lattic int num=table.size()/2; int so = plane*rhs.Grid()->_ostride[dimension]; // base offset for start of plane - auto rhs_v = rhs.View(); + auto rhs_v = rhs.View(AcceleratorRead); auto p0=&pointers[0][0]; auto p1=&pointers[1][0]; auto tp=&table[0]; @@ -104,10 +106,11 @@ void Gather_plane_exchange_table(Vector >& table,const Lattic so+tp[2*j+1].second, type); }); + rhs_v.ViewClose(); } struct StencilEntry { -#ifdef GRID_NVCC +#ifdef GRID_CUDA uint64_t _byte_offset; // 8 bytes uint32_t _offset; // 4 bytes #else @@ -122,7 +125,7 @@ struct StencilEntry { // Could pack to 8 + 4 + 4 = 128 bit and use template -class CartesianStencilView { +class CartesianStencilAccelerator { public: typedef AcceleratorVector StencilVector; @@ -130,14 +133,15 @@ class CartesianStencilView { //////////////////////////////////////// // Basic Grid and stencil info //////////////////////////////////////// - int _checkerboard; - int _npoints; // Move to template param? + int _checkerboard; + int _npoints; // Move to template param? + int _osites; StencilVector _directions; StencilVector _distances; StencilVector _comm_buf_size; StencilVector _permute_type; StencilVector same_node; - Coordinate _simd_layout; + Coordinate _simd_layout; Parameters parameters; StencilEntry* _entries_p; cobj* u_recv_buf_p; @@ -175,13 +179,43 @@ class CartesianStencilView { { Lexicographic::CoorFromIndex(coor,lane,this->_simd_layout); } +}; + +template +class CartesianStencilView : public CartesianStencilAccelerator +{ + private: + int *closed; + StencilEntry *cpu_ptr; + ViewMode mode; + public: + // default copy constructor + CartesianStencilView (const CartesianStencilView &refer_to_me) = default; + + CartesianStencilView (const CartesianStencilAccelerator &refer_to_me,ViewMode _mode) + : CartesianStencilAccelerator(refer_to_me), + cpu_ptr(this->_entries_p), + mode(_mode) + { + this->_entries_p =(StencilEntry *) + MemoryManager::ViewOpen(this->_entries_p, + this->_npoints*this->_osites*sizeof(StencilEntry), + mode, + AdviseDefault); + } + + void ViewClose(void) + { + MemoryManager::ViewClose(this->cpu_ptr,this->mode); + } }; + //////////////////////////////////////// // The Stencil Class itself //////////////////////////////////////// template -class CartesianStencil : public CartesianStencilView { // Stencil runs along coordinate axes only; NO diagonal fill in. +class CartesianStencil : public CartesianStencilAccelerator { // Stencil runs along coordinate axes only; NO diagonal fill in. public: typedef typename cobj::vector_type vector_type; @@ -226,8 +260,8 @@ public: // Generalise as required later if needed //////////////////////////////////////////////////////////////////////// - View_type View(void) const { - View_type accessor(*( (View_type *) this)); + View_type View(ViewMode mode) const { + View_type accessor(*( (View_type *) this),mode); return accessor; } @@ -662,9 +696,9 @@ public: _unified_buffer_size=0; surface_list.resize(0); - int osites = _grid->oSites(); + this->_osites = _grid->oSites(); - _entries.resize(this->_npoints* osites); + _entries.resize(this->_npoints* this->_osites); this->_entries_p = &_entries[0]; for(int ii=0;ii NAMESPACE_BEGIN(Grid); -//accelerator_inline void SIMTsynchronise(void) -accelerator_inline void synchronise(void) -{ -#ifdef __CUDA_ARCH__ -// __syncthreads(); - __syncwarp(); -#endif - return; -} -#ifndef __CUDA_ARCH__ +#ifndef GRID_SIMT ////////////////////////////////////////// // Trivial mapping of vectors on host ////////////////////////////////////////// -accelerator_inline int SIMTlane(int Nsimd) { return 0; } // CUDA specific - template accelerator_inline vobj coalescedRead(const vobj & __restrict__ vec,int lane=0) { @@ -66,7 +55,6 @@ vobj coalescedReadPermute(const vobj & __restrict__ vec,int ptype,int doperm,int template accelerator_inline void coalescedWrite(vobj & __restrict__ vec,const vobj & __restrict__ extracted,int lane=0) { - // vstream(vec, extracted); vec = extracted; } template accelerator_inline @@ -75,25 +63,24 @@ void coalescedWriteNonTemporal(vobj & __restrict__ vec,const vobj & __restrict__ vstream(vec, extracted); } #else -accelerator_inline int SIMTlane(int Nsimd) { return threadIdx.y; } // CUDA specific ////////////////////////////////////////// // Extract and insert slices on the GPU ////////////////////////////////////////// template accelerator_inline -typename vobj::scalar_object coalescedRead(const vobj & __restrict__ vec,int lane=SIMTlane(vobj::Nsimd())) +typename vobj::scalar_object coalescedRead(const vobj & __restrict__ vec,int lane=acceleratorSIMTlane(vobj::Nsimd())) { return extractLane(lane,vec); } template accelerator_inline -typename vobj::scalar_object coalescedReadPermute(const vobj & __restrict__ vec,int ptype,int doperm,int lane=SIMTlane(vobj::Nsimd())) +typename vobj::scalar_object coalescedReadPermute(const vobj & __restrict__ vec,int ptype,int doperm,int lane=acceleratorSIMTlane(vobj::Nsimd())) { int mask = vobj::Nsimd() >> (ptype + 1); int plane= doperm ? lane ^ mask : lane; return extractLane(plane,vec); } template accelerator_inline -void coalescedWrite(vobj & __restrict__ vec,const typename vobj::scalar_object & __restrict__ extracted,int lane=SIMTlane(vobj::Nsimd())) +void coalescedWrite(vobj & __restrict__ vec,const typename vobj::scalar_object & __restrict__ extracted,int lane=acceleratorSIMTlane(vobj::Nsimd())) { insertLane(lane,vec,extracted); } diff --git a/Grid/tensors/Tensor_class.h b/Grid/tensors/Tensor_class.h index 75e42721..36becc49 100644 --- a/Grid/tensors/Tensor_class.h +++ b/Grid/tensors/Tensor_class.h @@ -6,6 +6,7 @@ Copyright (C) 2015 Author: Azusa Yamaguchi Author: Peter Boyle Author: Michael Marshall +Author: Christoph Lehner This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -55,8 +56,23 @@ class GridTensorBase {}; using Complexified = typename Traits::Complexified; \ using Realified = typename Traits::Realified; \ using DoublePrecision = typename Traits::DoublePrecision; \ + using DoublePrecision2= typename Traits::DoublePrecision2; \ static constexpr int TensorLevel = Traits::TensorLevel +/////////////////////////////////////////////////////////// +// Allows to turn scalar>>> back to double. +/////////////////////////////////////////////////////////// +template +accelerator_inline typename std::enable_if::value, T>::type +TensorRemove(T arg) { + return arg; +} +template +accelerator_inline auto TensorRemove(iScalar arg) + -> decltype(TensorRemove(arg._internal)) { + return TensorRemove(arg._internal); +} + template class iScalar { public: @@ -133,9 +149,10 @@ public: operator ComplexD() const { return (TensorRemove(_internal)); } + // instantiation of "Grid::iScalar::operator Grid::RealD() const [with vtype=Grid::Real, U=Grid::Real, V=Grid::RealD, =0, =0U]" template = 0,IfNotSimd = 0> accelerator_inline operator RealD() const { - return TensorRemove(_internal); + return (RealD) TensorRemove(_internal); } template = 0, IfNotSimd = 0> accelerator_inline operator Integer() const { @@ -167,20 +184,6 @@ public: strong_inline scalar_type * end() { return begin() + Traits::count; } }; -/////////////////////////////////////////////////////////// -// Allows to turn scalar>>> back to double. -/////////////////////////////////////////////////////////// -template -accelerator_inline typename std::enable_if::value, T>::type -TensorRemove(T arg) { - return arg; -} -template -accelerator_inline auto TensorRemove(iScalar arg) - -> decltype(TensorRemove(arg._internal)) { - return TensorRemove(arg._internal); -} - template class iVector { public: diff --git a/Grid/tensors/Tensor_exp.h b/Grid/tensors/Tensor_exp.h index 11d37f9c..0a1d6389 100644 --- a/Grid/tensors/Tensor_exp.h +++ b/Grid/tensors/Tensor_exp.h @@ -55,7 +55,7 @@ template accelerator_inline iVector Exponentiate(c // Specialisation: Cayley-Hamilton exponential for SU(3) -#ifndef GRID_NVCC +#ifndef GRID_CUDA template::TensorLevel == 0>::type * =nullptr> accelerator_inline iMatrix Exponentiate(const iMatrix &arg, RealD alpha , Integer Nexp = DEFAULT_MAT_EXP ) { diff --git a/Grid/tensors/Tensor_inner.h b/Grid/tensors/Tensor_inner.h index 03f72966..fd651cae 100644 --- a/Grid/tensors/Tensor_inner.h +++ b/Grid/tensors/Tensor_inner.h @@ -8,6 +8,7 @@ Author: Azusa Yamaguchi Author: Peter Boyle +Author: Christoph Lehner This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -194,6 +195,79 @@ auto innerProductD (const iScalar& lhs,const iScalar& rhs) -> iScalar accelerator_inline + auto innerProductD2 (const iVector& lhs,const iVector& rhs) -> iScalar +{ + typedef decltype(innerProductD2(lhs._internal[0],rhs._internal[0])) ret_t; + iScalar ret; + zeroit(ret); + for(int c1=0;c1 accelerator_inline + auto innerProductD2 (const iMatrix& lhs,const iMatrix& rhs) -> iScalar +{ + typedef decltype(innerProductD2(lhs._internal[0][0],rhs._internal[0][0])) ret_t; + iScalar ret; + ret=Zero(); + for(int c1=0;c1 accelerator_inline + auto innerProductD2 (const iScalar& lhs,const iScalar& rhs) -> iScalar +{ + typedef decltype(innerProductD2(lhs._internal,rhs._internal)) ret_t; + iScalar ret; + ret._internal = innerProductD2(lhs._internal,rhs._internal); + return ret; +} + ////////////////////// // Keep same precison ////////////////////// diff --git a/Grid/tensors/Tensor_traits.h b/Grid/tensors/Tensor_traits.h index 9067d43d..04d7343e 100644 --- a/Grid/tensors/Tensor_traits.h +++ b/Grid/tensors/Tensor_traits.h @@ -6,6 +6,7 @@ Author: Azusa Yamaguchi Author: Peter Boyle Author: Christopher Kelly Author: Michael Marshall +Author: Christoph Lehner This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or @@ -37,6 +38,60 @@ NAMESPACE_BEGIN(Grid); template struct isGridTensor> : public std::true_type { static constexpr bool notvalue = false; }; template struct isGridTensor> : public std::true_type { static constexpr bool notvalue = false; }; + // Traits to identify scalars + template struct isGridScalar : public std::false_type { static constexpr bool notvalue = true; }; + template struct isGridScalar> : public std::true_type { static constexpr bool notvalue = false; }; + + // Store double-precision data in single-precision grids for precision promoted localInnerProductD + template + class TypePair { + public: + T _internal[2]; + TypePair& operator=(const Grid::Zero& o) { + _internal[0] = Zero(); + _internal[1] = Zero(); + return *this; + } + + TypePair operator+(const TypePair& o) const { + TypePair r; + r._internal[0] = _internal[0] + o._internal[0]; + r._internal[1] = _internal[1] + o._internal[1]; + return r; + } + + TypePair& operator+=(const TypePair& o) { + _internal[0] += o._internal[0]; + _internal[1] += o._internal[1]; + return *this; + } + + friend accelerator_inline void add(TypePair* ret, const TypePair* a, const TypePair* b) { + add(&ret->_internal[0],&a->_internal[0],&b->_internal[0]); + add(&ret->_internal[1],&a->_internal[1],&b->_internal[1]); + } + }; + typedef TypePair ComplexD2; + typedef TypePair RealD2; + typedef TypePair vComplexD2; + typedef TypePair vRealD2; + + // Traits to identify fundamental data types + template struct isGridFundamental : public std::false_type { static constexpr bool notvalue = true; }; + template<> struct isGridFundamental : public std::true_type { static constexpr bool notvalue = false; }; + template<> struct isGridFundamental : public std::true_type { static constexpr bool notvalue = false; }; + template<> struct isGridFundamental : public std::true_type { static constexpr bool notvalue = false; }; + template<> struct isGridFundamental : public std::true_type { static constexpr bool notvalue = false; }; + template<> struct isGridFundamental : public std::true_type { static constexpr bool notvalue = false; }; + template<> struct isGridFundamental : public std::true_type { static constexpr bool notvalue = false; }; + template<> struct isGridFundamental : public std::true_type { static constexpr bool notvalue = false; }; + template<> struct isGridFundamental : public std::true_type { static constexpr bool notvalue = false; }; + template<> struct isGridFundamental : public std::true_type { static constexpr bool notvalue = false; }; + template<> struct isGridFundamental : public std::true_type { static constexpr bool notvalue = false; }; + template<> struct isGridFundamental : public std::true_type { static constexpr bool notvalue = false; }; + template<> struct isGridFundamental : public std::true_type { static constexpr bool notvalue = false; }; + + ////////////////////////////////////////////////////////////////////////////////// // Want to recurse: GridTypeMapper >::scalar_type == ComplexD. // Use of a helper class like this allows us to template specialise and "dress" @@ -81,6 +136,7 @@ NAMESPACE_BEGIN(Grid); typedef ComplexF Complexified; typedef RealF Realified; typedef RealD DoublePrecision; + typedef RealD2 DoublePrecision2; }; template<> struct GridTypeMapper : public GridTypeMapper_Base { typedef RealD scalar_type; @@ -93,6 +149,20 @@ NAMESPACE_BEGIN(Grid); typedef ComplexD Complexified; typedef RealD Realified; typedef RealD DoublePrecision; + typedef RealD DoublePrecision2; + }; + template<> struct GridTypeMapper : public GridTypeMapper_Base { + typedef RealD2 scalar_type; + typedef RealD2 scalar_typeD; + typedef RealD2 vector_type; + typedef RealD2 vector_typeD; + typedef RealD2 tensor_reduced; + typedef RealD2 scalar_object; + typedef RealD2 scalar_objectD; + typedef ComplexD2 Complexified; + typedef RealD2 Realified; + typedef RealD2 DoublePrecision; + typedef RealD2 DoublePrecision2; }; template<> struct GridTypeMapper : public GridTypeMapper_Base { typedef ComplexF scalar_type; @@ -105,6 +175,7 @@ NAMESPACE_BEGIN(Grid); typedef ComplexF Complexified; typedef RealF Realified; typedef ComplexD DoublePrecision; + typedef ComplexD2 DoublePrecision2; }; template<> struct GridTypeMapper : public GridTypeMapper_Base { typedef ComplexD scalar_type; @@ -117,6 +188,20 @@ NAMESPACE_BEGIN(Grid); typedef ComplexD Complexified; typedef RealD Realified; typedef ComplexD DoublePrecision; + typedef ComplexD DoublePrecision2; + }; + template<> struct GridTypeMapper : public GridTypeMapper_Base { + typedef ComplexD2 scalar_type; + typedef ComplexD2 scalar_typeD; + typedef ComplexD2 vector_type; + typedef ComplexD2 vector_typeD; + typedef ComplexD2 tensor_reduced; + typedef ComplexD2 scalar_object; + typedef ComplexD2 scalar_objectD; + typedef ComplexD2 Complexified; + typedef RealD2 Realified; + typedef ComplexD2 DoublePrecision; + typedef ComplexD2 DoublePrecision2; }; template<> struct GridTypeMapper : public GridTypeMapper_Base { typedef Integer scalar_type; @@ -129,6 +214,7 @@ NAMESPACE_BEGIN(Grid); typedef void Complexified; typedef void Realified; typedef void DoublePrecision; + typedef void DoublePrecision2; }; template<> struct GridTypeMapper : public GridTypeMapper_Base { @@ -142,6 +228,7 @@ NAMESPACE_BEGIN(Grid); typedef vComplexF Complexified; typedef vRealF Realified; typedef vRealD DoublePrecision; + typedef vRealD2 DoublePrecision2; }; template<> struct GridTypeMapper : public GridTypeMapper_Base { typedef RealD scalar_type; @@ -154,6 +241,20 @@ NAMESPACE_BEGIN(Grid); typedef vComplexD Complexified; typedef vRealD Realified; typedef vRealD DoublePrecision; + typedef vRealD DoublePrecision2; + }; + template<> struct GridTypeMapper : public GridTypeMapper_Base { + typedef RealD2 scalar_type; + typedef RealD2 scalar_typeD; + typedef vRealD2 vector_type; + typedef vRealD2 vector_typeD; + typedef vRealD2 tensor_reduced; + typedef RealD2 scalar_object; + typedef RealD2 scalar_objectD; + typedef vComplexD2 Complexified; + typedef vRealD2 Realified; + typedef vRealD2 DoublePrecision; + typedef vRealD2 DoublePrecision2; }; template<> struct GridTypeMapper : public GridTypeMapper_Base { // Fixme this is incomplete until Grid supports fp16 or bfp16 arithmetic types @@ -167,6 +268,7 @@ NAMESPACE_BEGIN(Grid); typedef vComplexH Complexified; typedef vRealH Realified; typedef vRealD DoublePrecision; + typedef vRealD DoublePrecision2; }; template<> struct GridTypeMapper : public GridTypeMapper_Base { // Fixme this is incomplete until Grid supports fp16 or bfp16 arithmetic types @@ -180,6 +282,7 @@ NAMESPACE_BEGIN(Grid); typedef vComplexH Complexified; typedef vRealH Realified; typedef vComplexD DoublePrecision; + typedef vComplexD DoublePrecision2; }; template<> struct GridTypeMapper : public GridTypeMapper_Base { typedef ComplexF scalar_type; @@ -192,6 +295,7 @@ NAMESPACE_BEGIN(Grid); typedef vComplexF Complexified; typedef vRealF Realified; typedef vComplexD DoublePrecision; + typedef vComplexD2 DoublePrecision2; }; template<> struct GridTypeMapper : public GridTypeMapper_Base { typedef ComplexD scalar_type; @@ -204,6 +308,20 @@ NAMESPACE_BEGIN(Grid); typedef vComplexD Complexified; typedef vRealD Realified; typedef vComplexD DoublePrecision; + typedef vComplexD DoublePrecision2; + }; + template<> struct GridTypeMapper : public GridTypeMapper_Base { + typedef ComplexD2 scalar_type; + typedef ComplexD2 scalar_typeD; + typedef vComplexD2 vector_type; + typedef vComplexD2 vector_typeD; + typedef vComplexD2 tensor_reduced; + typedef ComplexD2 scalar_object; + typedef ComplexD2 scalar_objectD; + typedef vComplexD2 Complexified; + typedef vRealD2 Realified; + typedef vComplexD2 DoublePrecision; + typedef vComplexD2 DoublePrecision2; }; template<> struct GridTypeMapper : public GridTypeMapper_Base { typedef Integer scalar_type; @@ -216,6 +334,7 @@ NAMESPACE_BEGIN(Grid); typedef void Complexified; typedef void Realified; typedef void DoublePrecision; + typedef void DoublePrecision2; }; #define GridTypeMapper_RepeatedTypes \ @@ -234,6 +353,7 @@ NAMESPACE_BEGIN(Grid); using Complexified = iScalar; using Realified = iScalar; using DoublePrecision = iScalar; + using DoublePrecision2= iScalar; static constexpr int Rank = BaseTraits::Rank + 1; static constexpr std::size_t count = BaseTraits::count; static constexpr int Dimension(int dim) { @@ -248,6 +368,7 @@ NAMESPACE_BEGIN(Grid); using Complexified = iVector; using Realified = iVector; using DoublePrecision = iVector; + using DoublePrecision2= iVector; static constexpr int Rank = BaseTraits::Rank + 1; static constexpr std::size_t count = BaseTraits::count * N; static constexpr int Dimension(int dim) { @@ -262,6 +383,7 @@ NAMESPACE_BEGIN(Grid); using Complexified = iMatrix; using Realified = iMatrix; using DoublePrecision = iMatrix; + using DoublePrecision2= iMatrix; static constexpr int Rank = BaseTraits::Rank + 2; static constexpr std::size_t count = BaseTraits::count * N * N; static constexpr int Dimension(int dim) { diff --git a/Grid/threads/Accelerator.cc b/Grid/threads/Accelerator.cc new file mode 100644 index 00000000..2c4ad9df --- /dev/null +++ b/Grid/threads/Accelerator.cc @@ -0,0 +1,207 @@ +#include + +NAMESPACE_BEGIN(Grid); +uint32_t accelerator_threads=2; +uint32_t acceleratorThreads(void) {return accelerator_threads;}; +void acceleratorThreads(uint32_t t) {accelerator_threads = t;}; + +#ifdef GRID_CUDA +cudaDeviceProp *gpu_props; +void acceleratorInit(void) +{ + int nDevices = 1; + cudaGetDeviceCount(&nDevices); + gpu_props = new cudaDeviceProp[nDevices]; + + char * localRankStr = NULL; + int rank = 0, world_rank=0; +#define ENV_LOCAL_RANK_OMPI "OMPI_COMM_WORLD_LOCAL_RANK" +#define ENV_LOCAL_RANK_MVAPICH "MV2_COMM_WORLD_LOCAL_RANK" +#define ENV_RANK_OMPI "OMPI_COMM_WORLD_RANK" +#define ENV_RANK_MVAPICH "MV2_COMM_WORLD_RANK" + // We extract the local rank initialization using an environment variable + if ((localRankStr = getenv(ENV_LOCAL_RANK_OMPI)) != NULL) + { + rank = atoi(localRankStr); + } + if ((localRankStr = getenv(ENV_LOCAL_RANK_MVAPICH)) != NULL) + { + rank = atoi(localRankStr); + } + if ((localRankStr = getenv(ENV_RANK_OMPI )) != NULL) { world_rank = atoi(localRankStr);} + if ((localRankStr = getenv(ENV_RANK_MVAPICH)) != NULL) { world_rank = atoi(localRankStr);} + + size_t totalDeviceMem=0; + for (int i = 0; i < nDevices; i++) { + +#define GPU_PROP_FMT(canMapHostMemory,FMT) printf("AcceleratorCudaInit: " #canMapHostMemory ": " FMT" \n",prop.canMapHostMemory); +#define GPU_PROP(canMapHostMemory) GPU_PROP_FMT(canMapHostMemory,"%d"); + cudaGetDeviceProperties(&gpu_props[i], i); + cudaDeviceProp prop; + prop = gpu_props[i]; + totalDeviceMem = prop.totalGlobalMem; + if ( world_rank == 0) { + printf("AcceleratorCudaInit: ========================\n"); + printf("AcceleratorCudaInit: Device Number : %d\n", i); + printf("AcceleratorCudaInit: ========================\n"); + printf("AcceleratorCudaInit: Device identifier: %s\n", prop.name); + + GPU_PROP_FMT(totalGlobalMem,"%lld"); + GPU_PROP(managedMemory); + GPU_PROP(isMultiGpuBoard); + GPU_PROP(warpSize); + // GPU_PROP(unifiedAddressing); + // GPU_PROP(l2CacheSize); + // GPU_PROP(singleToDoublePrecisionPerfRatio); + } + } + MemoryManager::DeviceMaxBytes = (8*totalDeviceMem)/10; // Assume 80% ours +#undef GPU_PROP_FMT +#undef GPU_PROP + +#ifdef GRID_IBM_SUMMIT + // IBM Jsrun makes cuda Device numbering screwy and not match rank + if ( world_rank == 0 ) printf("AcceleratorCudaInit: IBM Summit or similar - NOT setting device to node rank\n"); +#else + if ( world_rank == 0 ) printf("AcceleratorCudaInit: setting device to node rank\n"); + cudaSetDevice(rank); +#endif + if ( world_rank == 0 ) printf("AcceleratorCudaInit: ================================================\n"); +} +#endif + +#ifdef GRID_HIP +hipDeviceProp_t *gpu_props; +void acceleratorInit(void) +{ + int nDevices = 1; + hipGetDeviceCount(&nDevices); + gpu_props = new hipDeviceProp_t[nDevices]; + + char * localRankStr = NULL; + int rank = 0, world_rank=0; +#define ENV_LOCAL_RANK_OMPI "OMPI_COMM_WORLD_LOCAL_RANK" +#define ENV_LOCAL_RANK_MVAPICH "MV2_COMM_WORLD_LOCAL_RANK" +#define ENV_RANK_OMPI "OMPI_COMM_WORLD_RANK" +#define ENV_RANK_MVAPICH "MV2_COMM_WORLD_RANK" + // We extract the local rank initialization using an environment variable + if ((localRankStr = getenv(ENV_LOCAL_RANK_OMPI)) != NULL) + { + rank = atoi(localRankStr); + } + if ((localRankStr = getenv(ENV_LOCAL_RANK_MVAPICH)) != NULL) + { + rank = atoi(localRankStr); + } + if ((localRankStr = getenv(ENV_RANK_OMPI )) != NULL) { world_rank = atoi(localRankStr);} + if ((localRankStr = getenv(ENV_RANK_MVAPICH)) != NULL) { world_rank = atoi(localRankStr);} + + for (int i = 0; i < nDevices; i++) { + +#define GPU_PROP_FMT(canMapHostMemory,FMT) printf("AcceleratorHipInit: " #canMapHostMemory ": " FMT" \n",prop.canMapHostMemory); +#define GPU_PROP(canMapHostMemory) GPU_PROP_FMT(canMapHostMemory,"%d"); + + hipGetDeviceProperties(&gpu_props[i], i); + if ( world_rank == 0) { + hipDeviceProp_t prop; + prop = gpu_props[i]; + printf("AcceleratorHipInit: ========================\n"); + printf("AcceleratorHipInit: Device Number : %d\n", i); + printf("AcceleratorHipInit: ========================\n"); + printf("AcceleratorHipInit: Device identifier: %s\n", prop.name); + + // GPU_PROP(managedMemory); + GPU_PROP(isMultiGpuBoard); + GPU_PROP(warpSize); + // GPU_PROP(unifiedAddressing); + // GPU_PROP(l2CacheSize); + // GPU_PROP(singleToDoublePrecisionPerfRatio); + } + } +#undef GPU_PROP_FMT +#undef GPU_PROP +#ifdef GRID_IBM_SUMMIT + // IBM Jsrun makes cuda Device numbering screwy and not match rank + if ( world_rank == 0 ) printf("AcceleratorHipInit: IBM Summit or similar - NOT setting device to node rank\n"); +#else + if ( world_rank == 0 ) printf("AcceleratorHipInit: setting device to node rank\n"); + hipSetDevice(rank); +#endif + if ( world_rank == 0 ) printf("AcceleratorHipInit: ================================================\n"); +} +#endif + + +#ifdef GRID_SYCL + +cl::sycl::queue *theGridAccelerator; + +void acceleratorInit(void) +{ + int nDevices = 1; + cl::sycl::gpu_selector selector; + cl::sycl::device selectedDevice { selector }; + theGridAccelerator = new sycl::queue (selectedDevice); + + char * localRankStr = NULL; + int rank = 0, world_rank=0; +#define ENV_LOCAL_RANK_OMPI "OMPI_COMM_WORLD_LOCAL_RANK" +#define ENV_LOCAL_RANK_MVAPICH "MV2_COMM_WORLD_LOCAL_RANK" +#define ENV_RANK_OMPI "OMPI_COMM_WORLD_RANK" +#define ENV_RANK_MVAPICH "MV2_COMM_WORLD_RANK" + // We extract the local rank initialization using an environment variable + if ((localRankStr = getenv(ENV_LOCAL_RANK_OMPI)) != NULL) + { + rank = atoi(localRankStr); + } + if ((localRankStr = getenv(ENV_LOCAL_RANK_MVAPICH)) != NULL) + { + rank = atoi(localRankStr); + } + if ((localRankStr = getenv(ENV_RANK_OMPI )) != NULL) { world_rank = atoi(localRankStr);} + if ((localRankStr = getenv(ENV_RANK_MVAPICH)) != NULL) { world_rank = atoi(localRankStr);} + + auto devices = cl::sycl::device::get_devices(); + for(int d = 0;d().c_str()); + +#define GPU_PROP_FMT(prop,FMT) \ + printf("AcceleratorSyclInit: " #prop ": " FMT" \n",devices[d].get_info()); + +#define GPU_PROP(prop) GPU_PROP_FMT(prop,"%ld"); + + GPU_PROP_STR(vendor); + GPU_PROP_STR(version); + // GPU_PROP_STR(device_type); + /* + GPU_PROP(max_compute_units); + GPU_PROP(native_vector_width_char); + GPU_PROP(native_vector_width_short); + GPU_PROP(native_vector_width_int); + GPU_PROP(native_vector_width_long); + GPU_PROP(native_vector_width_float); + GPU_PROP(native_vector_width_double); + GPU_PROP(native_vector_width_half); + GPU_PROP(address_bits); + GPU_PROP(half_fp_config); + GPU_PROP(single_fp_config); + */ + // GPU_PROP(double_fp_config); + GPU_PROP(global_mem_size); + + } + if ( world_rank == 0 ) { + auto name = theGridAccelerator->get_device().get_info(); + printf("AcceleratorSyclInit: Selected device is %s\n",name.c_str()); + printf("AcceleratorSyclInit: ================================================\n"); + } +} +#endif + +#if (!defined(GRID_CUDA)) && (!defined(GRID_SYCL))&& (!defined(GRID_HIP)) +void acceleratorInit(void){} +#endif + +NAMESPACE_END(Grid); diff --git a/Grid/threads/Accelerator.h b/Grid/threads/Accelerator.h new file mode 100644 index 00000000..74a3ea22 --- /dev/null +++ b/Grid/threads/Accelerator.h @@ -0,0 +1,426 @@ +/************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/Accelerator.h + + Copyright (C) 2015 + +Author: Peter Boyle +Author: paboyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ +#pragma once + +#include + +#ifdef HAVE_MALLOC_MALLOC_H +#include +#endif +#ifdef HAVE_MALLOC_H +#include +#endif +#ifdef HAVE_MM_MALLOC_H +#include +#endif + +NAMESPACE_BEGIN(Grid); + +////////////////////////////////////////////////////////////////////////////////// +// Accelerator primitives; fall back to threading if not CUDA or SYCL +////////////////////////////////////////////////////////////////////////////////// +// +// Function attributes +// +// accelerator +// accelerator_inline +// +// Parallel looping +// +// accelerator_for +// accelerator_forNB +// uint32_t accelerator_barrier(); // device synchronise +// +// Parallelism control: Number of threads in thread block is acceleratorThreads*Nsimd +// +// uint32_t acceleratorThreads(void); +// void acceleratorThreads(uint32_t); +// +// Warp control and info: +// +// acceleratorInit; +// void acceleratorSynchronise(void); // synch warp etc.. +// int acceleratorSIMTlane(int Nsimd); +// +// Memory management: +// +// void *acceleratorAllocShared(size_t bytes); +// void acceleratorFreeShared(void *ptr); +// +// void *acceleratorAllocDevice(size_t bytes); +// void acceleratorFreeDevice(void *ptr); +// +// void *acceleratorCopyToDevice(void *from,void *to,size_t bytes); +// void *acceleratorCopyFromDevice(void *from,void *to,size_t bytes); +// +////////////////////////////////////////////////////////////////////////////////// + +uint32_t acceleratorThreads(void); +void acceleratorThreads(uint32_t); +void acceleratorInit(void); + +////////////////////////////////////////////// +// CUDA acceleration +////////////////////////////////////////////// + +#ifdef GRID_CUDA + +#ifdef __CUDA_ARCH__ +#define GRID_SIMT +#endif + +#define accelerator __host__ __device__ +#define accelerator_inline __host__ __device__ inline + +accelerator_inline int acceleratorSIMTlane(int Nsimd) { +#ifdef GRID_SIMT + return threadIdx.z; +#else + return 0; +#endif +} // CUDA specific + +#define accelerator_for2dNB( iter1, num1, iter2, num2, nsimd, ... ) \ + { \ + typedef uint64_t Iterator; \ + auto lambda = [=] accelerator \ + (Iterator iter1,Iterator iter2,Iterator lane) mutable { \ + __VA_ARGS__; \ + }; \ + int nt=acceleratorThreads(); \ + dim3 cu_threads(acceleratorThreads(),1,nsimd); \ + dim3 cu_blocks ((num1+nt-1)/nt,num2,1); \ + LambdaApply<<>>(num1,num2,nsimd,lambda); \ + } + +template __global__ +void LambdaApply(uint64_t num1, uint64_t num2, uint64_t num3, lambda Lambda) +{ + uint64_t x = threadIdx.x + blockDim.x*blockIdx.x; + uint64_t y = threadIdx.y + blockDim.y*blockIdx.y; + uint64_t z = threadIdx.z; + if ( (x < num1) && (y +#include +NAMESPACE_BEGIN(Grid); + +extern cl::sycl::queue *theGridAccelerator; + +#ifdef __SYCL_DEVICE_ONLY__ +#define GRID_SIMT +#endif + +#define accelerator +#define accelerator_inline strong_inline + +accelerator_inline int acceleratorSIMTlane(int Nsimd) { +#ifdef GRID_SIMT + return __spirv::initLocalInvocationId<3, cl::sycl::id<3>>()[2]; +#else + return 0; +#endif +} // SYCL specific + +#define accelerator_for2dNB( iter1, num1, iter2, num2, nsimd, ... ) \ + theGridAccelerator->submit([&](cl::sycl::handler &cgh) { \ + unsigned long nt=acceleratorThreads(); \ + unsigned long unum1 = num1; \ + unsigned long unum2 = num2; \ + cl::sycl::range<3> local {nt,1,nsimd}; \ + cl::sycl::range<3> global{unum1,unum2,nsimd}; \ + cgh.parallel_for( \ + cl::sycl::nd_range<3>(global,local), \ + [=] (cl::sycl::nd_item<3> item) mutable { \ + auto iter1 = item.get_global_id(0); \ + auto iter2 = item.get_global_id(1); \ + auto lane = item.get_global_id(2); \ + { __VA_ARGS__ }; \ + }); \ + }); + +#define accelerator_barrier(dummy) theGridAccelerator->wait(); + +inline void *acceleratorAllocShared(size_t bytes){ return malloc_shared(bytes,*theGridAccelerator);}; +inline void *acceleratorAllocDevice(size_t bytes){ return malloc_device(bytes,*theGridAccelerator);}; +inline void acceleratorFreeShared(void *ptr){free(ptr,*theGridAccelerator);}; +inline void acceleratorFreeDevice(void *ptr){free(ptr,*theGridAccelerator);}; +inline void acceleratorCopyToDevice(void *from,void *to,size_t bytes) { theGridAccelerator->memcpy(to,from,bytes); theGridAccelerator->wait();} +inline void acceleratorCopyFromDevice(void *from,void *to,size_t bytes){ theGridAccelerator->memcpy(to,from,bytes); theGridAccelerator->wait();} + +#endif + +////////////////////////////////////////////// +// HIP acceleration +////////////////////////////////////////////// +#ifdef GRID_HIP +NAMESPACE_END(Grid); +#include +NAMESPACE_BEGIN(Grid); + +#ifdef __HIP_DEVICE_COMPILE__ +#define GRID_SIMT +#endif + +#define accelerator __host__ __device__ +#define accelerator_inline __host__ __device__ inline + +/*These routines define mapping from thread grid to loop & vector lane indexing */ +accelerator_inline int acceleratorSIMTlane(int Nsimd) { +#ifdef GRID_SIMT + return hipThreadIdx_z; +#else + return 0; +#endif +} // HIP specific + +#define accelerator_for2dNB( iter1, num1, iter2, num2, nsimd, ... ) \ + { \ + typedef uint64_t Iterator; \ + auto lambda = [=] accelerator \ + (Iterator iter1,Iterator iter2,Iterator lane ) mutable { \ + { __VA_ARGS__;} \ + }; \ + int nt=acceleratorThreads(); \ + dim3 hip_threads(nt,1,nsimd); \ + dim3 hip_blocks ((num1+nt-1)/nt,num2,1); \ + hipLaunchKernelGGL(LambdaApply,hip_blocks,hip_threads, \ + 0,0, \ + num1,num2,nsimd,lambda); \ + } + +template __global__ +void LambdaApply(uint64_t numx, uint64_t numy, uint64_t numz, lambda Lambda) +{ + uint64_t x = hipThreadIdx_x + hipBlockDim_x*hipBlockIdx_x; + uint64_t y = hipThreadIdx_y + hipBlockDim_y*hipBlockIdx_y; + uint64_t z = hipThreadIdx_z ;//+ hipBlockDim_z*hipBlockIdx_z; + if ( (x < numx) && (y /* END LEGAL */ #pragma once -#ifndef MAX -#define MAX(x,y) ((x)>(y)?(x):(y)) -#define MIN(x,y) ((x)>(y)?(y):(x)) -#endif - -#define strong_inline __attribute__((always_inline)) inline -#define UNROLL _Pragma("unroll") - -////////////////////////////////////////////////////////////////////////////////// -// New primitives; explicit host thread calls, and accelerator data parallel calls -////////////////////////////////////////////////////////////////////////////////// - -#ifdef _OPENMP -#define GRID_OMP -#include -#endif - -#ifdef GRID_OMP -#define DO_PRAGMA_(x) _Pragma (#x) -#define DO_PRAGMA(x) DO_PRAGMA_(x) -#define thread_num(a) omp_get_thread_num() -#define thread_max(a) omp_get_max_threads() -#else -#define DO_PRAGMA_(x) -#define DO_PRAGMA(x) -#define thread_num(a) (0) -#define thread_max(a) (1) -#endif - -#define thread_for( i, num, ... ) DO_PRAGMA(omp parallel for schedule(static)) for ( uint64_t i=0;i __global__ -void LambdaApplySIMT(uint64_t Isites, uint64_t Osites, lambda Lambda) -{ - uint64_t isite = threadIdx.y; - uint64_t osite = threadIdx.x+blockDim.x*blockIdx.x; - if ( (osite >>(nsimd,num,lambda); \ - } - -// Copy the for_each_n style ; Non-blocking variant (default -#define accelerator_for( iterator, num, nsimd, ... ) \ - accelerator_forNB(iterator, num, nsimd, { __VA_ARGS__ } ); \ - accelerator_barrier(dummy); - -#else - -#define accelerator -#define accelerator_inline strong_inline -#define accelerator_for(iterator,num,nsimd, ... ) thread_for(iterator, num, { __VA_ARGS__ }); -#define accelerator_forNB(iterator,num,nsimd, ... ) thread_for(iterator, num, { __VA_ARGS__ }); -#define accelerator_barrier(dummy) - -#endif +#include +#include diff --git a/Grid/threads/ThreadReduction.h b/Grid/threads/ThreadReduction.h new file mode 100644 index 00000000..f0d24d50 --- /dev/null +++ b/Grid/threads/ThreadReduction.h @@ -0,0 +1,127 @@ +/************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/ThreadReduction.h + + Copyright (C) 2015 + +Author: Peter Boyle +Author: paboyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ +#pragma once + +// Introduce a class to gain deterministic bit reproducible reduction. +// make static; perhaps just a namespace is required. +NAMESPACE_BEGIN(Grid); + +class GridThread { +public: + static int _threads; + static int _hyperthreads; + static int _cores; + + static void SetCores(int cr) { +#ifdef GRID_OMP + _cores = cr; +#else + _cores = 1; +#endif + } + static void SetThreads(int thr) { +#ifdef GRID_OMP + _threads = MIN(thr,omp_get_max_threads()) ; + omp_set_num_threads(_threads); +#else + _threads = 1; +#endif + }; + static void SetMaxThreads(void) { +#ifdef GRID_OMP + _threads = omp_get_max_threads(); + omp_set_num_threads(_threads); +#else + _threads = 1; +#endif + }; + static int GetHyperThreads(void) { assert(_threads%_cores ==0); return _threads/_cores; }; + static int GetCores(void) { return _cores; }; + static int GetThreads(void) { return _threads; }; + static int SumArraySize(void) {return _threads;}; + + static void GetWork(int nwork, int me, int & mywork, int & myoff){ + GetWork(nwork,me,mywork,myoff,_threads); + } + static void GetWork(int nwork, int me, int & mywork, int & myoff,int units){ + int basework = nwork/units; + int backfill = units-(nwork%units); + if ( me >= units ) { + mywork = myoff = 0; + } else { + mywork = (nwork+me)/units; + myoff = basework * me; + if ( me > backfill ) + myoff+= (me-backfill); + } + return; + }; + + static void GetWorkBarrier(int nwork, int &me, int & mywork, int & myoff){ + me = ThreadBarrier(); + GetWork(nwork,me,mywork,myoff); + }; + + static int ThreadBarrier(void) { +#ifdef GRID_OMP +#pragma omp barrier + return omp_get_thread_num(); +#else + return 0; +#endif + }; + + template static void ThreadSum( std::vector &sum_array,obj &val,int me){ + sum_array[me] = val; + val=Zero(); + ThreadBarrier(); + for(int i=0;i<_threads;i++) val+= sum_array[i]; + ThreadBarrier(); + } + + static void bcopy(const void *src, void *dst, size_t len) { +#ifdef GRID_OMP +#pragma omp parallel + { + const char *c_src =(char *) src; + char *c_dest=(char *) dst; + int me,mywork,myoff; + GridThread::GetWorkBarrier(len,me, mywork,myoff); + bcopy(&c_src[myoff],&c_dest[myoff],mywork); + } +#else + bcopy(src,dst,len); +#endif + } + + +}; + +NAMESPACE_END(Grid); + diff --git a/Grid/threads/Threads.h b/Grid/threads/Threads.h index 29cae060..a9fa13ea 100644 --- a/Grid/threads/Threads.h +++ b/Grid/threads/Threads.h @@ -28,101 +28,47 @@ Author: paboyle /* END LEGAL */ #pragma once +#ifndef MAX +#define MAX(x,y) ((x)>(y)?(x):(y)) +#define MIN(x,y) ((x)>(y)?(y):(x)) +#endif -// Introduce a class to gain deterministic bit reproducible reduction. -// make static; perhaps just a namespace is required. -NAMESPACE_BEGIN(Grid); +#define strong_inline __attribute__((always_inline)) inline +#define UNROLL _Pragma("unroll") -class GridThread { -public: - static int _threads; - static int _hyperthreads; - static int _cores; +////////////////////////////////////////////////////////////////////////////////// +// New primitives; explicit host thread calls, and accelerator data parallel calls +////////////////////////////////////////////////////////////////////////////////// + +#ifdef _OPENMP +#define GRID_OMP +#include +#endif - static void SetCores(int cr) { #ifdef GRID_OMP - _cores = cr; +#define DO_PRAGMA_(x) _Pragma (#x) +#define DO_PRAGMA(x) DO_PRAGMA_(x) +#define thread_num(a) omp_get_thread_num() +#define thread_max(a) omp_get_max_threads() #else - _cores = 1; +#define DO_PRAGMA_(x) +#define DO_PRAGMA(x) +#define thread_num(a) (0) +#define thread_max(a) (1) #endif - } - static void SetThreads(int thr) { -#ifdef GRID_OMP - _threads = MIN(thr,omp_get_max_threads()) ; - omp_set_num_threads(_threads); -#else - _threads = 1; -#endif - }; - static void SetMaxThreads(void) { -#ifdef GRID_OMP - _threads = omp_get_max_threads(); - omp_set_num_threads(_threads); -#else - _threads = 1; -#endif - }; - static int GetHyperThreads(void) { assert(_threads%_cores ==0); return _threads/_cores; }; - static int GetCores(void) { return _cores; }; - static int GetThreads(void) { return _threads; }; - static int SumArraySize(void) {return _threads;}; - static void GetWork(int nwork, int me, int & mywork, int & myoff){ - GetWork(nwork,me,mywork,myoff,_threads); - } - static void GetWork(int nwork, int me, int & mywork, int & myoff,int units){ - int basework = nwork/units; - int backfill = units-(nwork%units); - if ( me >= units ) { - mywork = myoff = 0; - } else { - mywork = (nwork+me)/units; - myoff = basework * me; - if ( me > backfill ) - myoff+= (me-backfill); - } - return; - }; - - static void GetWorkBarrier(int nwork, int &me, int & mywork, int & myoff){ - me = ThreadBarrier(); - GetWork(nwork,me,mywork,myoff); - }; - - static int ThreadBarrier(void) { -#ifdef GRID_OMP -#pragma omp barrier - return omp_get_thread_num(); -#else - return 0; -#endif - }; - - template static void ThreadSum( std::vector &sum_array,obj &val,int me){ - sum_array[me] = val; - val=Zero(); - ThreadBarrier(); - for(int i=0;i<_threads;i++) val+= sum_array[i]; - ThreadBarrier(); - } - - static void bcopy(const void *src, void *dst, size_t len) { -#ifdef GRID_OMP -#pragma omp parallel - { - const char *c_src =(char *) src; - char *c_dest=(char *) dst; - int me,mywork,myoff; - GridThread::GetWorkBarrier(len,me, mywork,myoff); - bcopy(&c_src[myoff],&c_dest[myoff],mywork); - } -#else - bcopy(src,dst,len); -#endif - } - - -}; - -NAMESPACE_END(Grid); +#define thread_for( i, num, ... ) DO_PRAGMA(omp parallel for schedule(static)) for ( uint64_t i=0;i=0); assert(sz<=MaxEntries); +#endif _size = sz; } accelerator_inline void resize(size_type sz,const value &val) { - assert(sz>=0); - assert(sz<=MaxEntries); - _size = sz; + resize(sz); for(int s=0;s ©me) { diff --git a/Grid/util/Init.cc b/Grid/util/Init.cc index 570f4234..656e29a9 100644 --- a/Grid/util/Init.cc +++ b/Grid/util/Init.cc @@ -73,8 +73,6 @@ feenableexcept (unsigned int excepts) } #endif -uint32_t gpu_threads=8; - NAMESPACE_BEGIN(Grid); ////////////////////////////////////////////////////// @@ -192,16 +190,12 @@ void GridParseLayout(char **argv,int argc, assert(ompthreads.size()==1); GridThread::SetThreads(ompthreads[0]); } - if( GridCmdOptionExists(argv,argv+argc,"--gpu-threads") ){ + if( GridCmdOptionExists(argv,argv+argc,"--accelerator-threads") ){ std::vector gputhreads(0); -#ifndef GRID_NVCC - std::cout << GridLogWarning << "'--gpu-threads' option used but Grid was" - << " not compiled with GPU support" << std::endl; -#endif - arg= GridCmdOptionPayload(argv,argv+argc,"--gpu-threads"); + arg= GridCmdOptionPayload(argv,argv+argc,"--accelerator-threads"); GridCmdOptionIntVector(arg,gputhreads); assert(gputhreads.size()==1); - gpu_threads=gputhreads[0]; + acceleratorThreads(gputhreads[0]); } if( GridCmdOptionExists(argv,argv+argc,"--cores") ){ @@ -241,8 +235,6 @@ static int Grid_is_initialised; ///////////////////////////////////////////////////////// void GridBanner(void) { - static int printed =0; - if( !printed ) { std::cout < & vec); template void GridCmdOptionIntVector(std::string &str,VectorInt & vec); +void GridCmdOptionInt(std::string &str,int & val); void GridParseLayout(char **argv,int argc, diff --git a/benchmarks/Benchmark_IO.cc b/benchmarks/Benchmark_IO.cc index 3d3b0ce0..c8c0937f 100644 --- a/benchmarks/Benchmark_IO.cc +++ b/benchmarks/Benchmark_IO.cc @@ -14,6 +14,7 @@ std::string filestem(const int l) int main (int argc, char ** argv) { +#ifdef HAVE_LIME Grid_init(&argc,&argv); int64_t threads = GridThread::GetThreads(); @@ -42,6 +43,6 @@ int main (int argc, char ** argv) } Grid_finalize(); - +#endif return EXIT_SUCCESS; } diff --git a/benchmarks/Benchmark_IO.hpp b/benchmarks/Benchmark_IO.hpp index 91fcb61f..d3416353 100644 --- a/benchmarks/Benchmark_IO.hpp +++ b/benchmarks/Benchmark_IO.hpp @@ -2,7 +2,7 @@ #define Benchmark_IO_hpp_ #include - +#ifdef HAVE_LIME #define MSG std::cout << GridLogMessage #define SEP \ "=============================================================================" @@ -104,4 +104,5 @@ void readBenchmark(const Coordinate &latt, const std::string filename, } +#endif //LIME #endif // Benchmark_IO_hpp_ diff --git a/benchmarks/Benchmark_IO_vs_dir.cc b/benchmarks/Benchmark_IO_vs_dir.cc index cb4831ed..6e6c9ae0 100644 --- a/benchmarks/Benchmark_IO_vs_dir.cc +++ b/benchmarks/Benchmark_IO_vs_dir.cc @@ -8,6 +8,7 @@ using namespace Grid; int main (int argc, char ** argv) { +#ifdef HAVE_LIME std::vector dir; unsigned int Ls; bool rb; @@ -73,6 +74,6 @@ int main (int argc, char ** argv) } Grid_finalize(); - +#endif return EXIT_SUCCESS; } diff --git a/benchmarks/Benchmark_ITT.cc b/benchmarks/Benchmark_ITT.cc index 7ad4a147..dc09549c 100644 --- a/benchmarks/Benchmark_ITT.cc +++ b/benchmarks/Benchmark_ITT.cc @@ -30,7 +30,6 @@ Author: paboyle using namespace Grid; - std::vector L_list; std::vector Ls_list; std::vector mflop_list; @@ -76,7 +75,6 @@ struct controls { int Opt; int CommsOverlap; Grid::CartesianCommunicator::CommunicatorPolicy_t CommsAsynch; - // int HugePages; }; class Benchmark { @@ -119,14 +117,15 @@ public: std::cout<({45,12,81,9})); - for(int lat=8;lat<=lmax;lat+=4){ + for(int lat=8;lat<=lmax;lat+=8){ Coordinate latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); int64_t vol= latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; + GridCartesian Grid(latt_size,simd_layout,mpi_layout); // NP= Grid.RankCount(); @@ -242,9 +237,9 @@ public: Vec rn ; random(sRNG,rn); - LatticeVec z(&Grid); z=rn; - LatticeVec x(&Grid); x=rn; - LatticeVec y(&Grid); y=rn; + LatticeVec z(&Grid); z=Zero(); + LatticeVec x(&Grid); x=Zero(); + LatticeVec y(&Grid); y=Zero(); double a=2.0; uint64_t Nloop=NLOOP; @@ -252,9 +247,9 @@ public: double start=usecond(); for(int i=0;i mflops_all; - - /////////////////////////////////////////////////////// - // Set/Get the layout & grid size - /////////////////////////////////////////////////////// - int threads = GridThread::GetThreads(); - Coordinate mpi = GridDefaultMpi(); assert(mpi.size()==4); - Coordinate local({L,L,L,L}); - - GridCartesian * TmpGrid = SpaceTimeGrid::makeFourDimGrid(Coordinate({64,64,64,64}), - GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi()); - uint64_t NP = TmpGrid->RankCount(); - uint64_t NN = TmpGrid->NodeCount(); - NN_global=NN; - uint64_t SHM=NP/NN; - - Coordinate internal; - if ( SHM == 1 ) internal = Coordinate({1,1,1,1}); - else if ( SHM == 2 ) internal = Coordinate({2,1,1,1}); - else if ( SHM == 4 ) internal = Coordinate({2,2,1,1}); - else if ( SHM == 8 ) internal = Coordinate({2,2,2,1}); - else assert(0); - - Coordinate nodes({mpi[0]/internal[0],mpi[1]/internal[1],mpi[2]/internal[2],mpi[3]/internal[3]}); - Coordinate latt4({local[0]*nodes[0],local[1]*nodes[1],local[2]*nodes[2],local[3]*nodes[3]}); - - ///////// Welcome message //////////// - std::cout< seeds4({1,2,3,4}); - std::vector seeds5({5,6,7,8}); - GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4); - GridParallelRNG RNG5(sFGrid); RNG5.SeedFixedIntegers(seeds5); - std::cout << GridLogMessage << "Initialised RNGs" << std::endl; - - ///////// Source preparation //////////// - LatticeFermion src (sFGrid); - LatticeFermion tmp (sFGrid); - std::cout << GridLogMessage << "allocated src and tmp" << std::endl; - random(RNG5,src); - std::cout << GridLogMessage << "intialised random source" << std::endl; - - RealD N2 = 1.0/::sqrt(norm2(src)); - src = src*N2; - - LatticeGaugeField Umu(UGrid); SU3::HotConfiguration(RNG4,Umu); - - WilsonFermion5DR sDw(Umu,*sFGrid,*sFrbGrid,*sUGrid,*sUrbGrid,M5); - LatticeFermion src_e (sFrbGrid); - LatticeFermion src_o (sFrbGrid); - LatticeFermion r_e (sFrbGrid); - LatticeFermion r_o (sFrbGrid); - LatticeFermion r_eo (sFGrid); - LatticeFermion err (sFGrid); - { - - pickCheckerboard(Even,src_e,src); - pickCheckerboard(Odd,src_o,src); - -#if defined(AVX512) - const int num_cases = 6; - std::string fmt("A/S ; A/O ; U/S ; U/O ; G/S ; G/O "); -#else - const int num_cases = 4; - std::string fmt("U/S ; U/O ; G/S ; G/O "); -#endif - controls Cases [] = { -#ifdef AVX512 - { WilsonKernelsStatic::OptInlineAsm , WilsonKernelsStatic::CommsThenCompute ,CartesianCommunicator::CommunicatorPolicySequential }, - { WilsonKernelsStatic::OptInlineAsm , WilsonKernelsStatic::CommsAndCompute ,CartesianCommunicator::CommunicatorPolicySequential }, -#endif - { WilsonKernelsStatic::OptHandUnroll, WilsonKernelsStatic::CommsThenCompute ,CartesianCommunicator::CommunicatorPolicySequential }, - { WilsonKernelsStatic::OptHandUnroll, WilsonKernelsStatic::CommsAndCompute ,CartesianCommunicator::CommunicatorPolicySequential }, - { WilsonKernelsStatic::OptGeneric , WilsonKernelsStatic::CommsThenCompute ,CartesianCommunicator::CommunicatorPolicySequential }, - { WilsonKernelsStatic::OptGeneric , WilsonKernelsStatic::CommsAndCompute ,CartesianCommunicator::CommunicatorPolicySequential } - }; - - for(int c=0;cBarrier(); - for(int i=0;iBarrier(); - double t1=usecond(); - - sDw.ZeroCounters(); - time_statistics timestat; - std::vector t_time(ncall); - for(uint64_t i=0;iBarrier(); - - double volume=Ls; for(int mu=0;mumflops_best ) mflops_best = mflops; - if ( mflopsRankCount(); uint64_t NN = TmpGrid->NodeCount(); NN_global=NN; uint64_t SHM=NP/NN; - Coordinate internal; - if ( SHM == 1 ) internal = Coordinate({1,1,1,1}); - else if ( SHM == 2 ) internal = Coordinate({2,1,1,1}); - else if ( SHM == 4 ) internal = Coordinate({2,2,1,1}); - else if ( SHM == 8 ) internal = Coordinate({2,2,2,1}); - else assert(0); - - Coordinate nodes({mpi[0]/internal[0],mpi[1]/internal[1],mpi[2]/internal[2],mpi[3]/internal[3]}); - Coordinate latt4({local[0]*nodes[0],local[1]*nodes[1],local[2]*nodes[2],local[3]*nodes[3]}); + Coordinate latt4({local[0]*mpi[0],local[1]*mpi[1],local[2]*mpi[2],local[3]*mpi[3]}); ///////// Welcome message //////////// std::cout< U(4,FGrid); - auto Umu_v = Umu.View(); - auto Umu5d_v = Umu5d.View(); - for(int ss=0;ssoSites();ss++){ - for(int s=0;s(Umu5d,mu); - } - for(int mu=0;muBarrier(); for(int i=0;iBarrier(); double t1=usecond(); - // uint64_t ncall = (uint64_t) 2.5*1000.0*1000.0*nwarm/(t1-t0); - // if (ncall < 500) ncall = 500; - uint64_t ncall = 1000; + uint64_t ncall = 50; FGrid->Broadcast(0,&ncall,sizeof(ncall)); @@ -649,24 +406,11 @@ public: std::cout< seeds4({1,2,3,4}); + GridParallelRNG RNG4(FGrid); RNG4.SeedFixedIntegers(seeds4); + std::cout << GridLogMessage << "Initialised RNGs" << std::endl; + + RealD mass=0.1; + RealD c1=9.0/8.0; + RealD c2=-1.0/24.0; + RealD u0=1.0; + + typedef ImprovedStaggeredFermionF Action; + typedef typename Action::FermionField Fermion; + typedef LatticeGaugeFieldF Gauge; + + Gauge Umu(FGrid); SU3::HotConfiguration(RNG4,Umu); + + typename Action::ImplParams params; + Action Ds(Umu,Umu,*FGrid,*FrbGrid,mass,c1,c2,u0,params); + + ///////// Source preparation //////////// + Fermion src (FGrid); random(RNG4,src); + Fermion src_e (FrbGrid); + Fermion src_o (FrbGrid); + Fermion r_e (FrbGrid); + Fermion r_o (FrbGrid); + Fermion r_eo (FGrid); + + { + + pickCheckerboard(Even,src_e,src); + pickCheckerboard(Odd,src_o,src); + + const int num_cases = 4; + std::string fmt("G/S/C ; G/O/C ; G/S/S ; G/O/S "); + + controls Cases [] = { + { StaggeredKernelsStatic::OptGeneric , StaggeredKernelsStatic::CommsThenCompute ,CartesianCommunicator::CommunicatorPolicyConcurrent }, + { StaggeredKernelsStatic::OptGeneric , StaggeredKernelsStatic::CommsAndCompute ,CartesianCommunicator::CommunicatorPolicyConcurrent }, + { StaggeredKernelsStatic::OptGeneric , StaggeredKernelsStatic::CommsThenCompute ,CartesianCommunicator::CommunicatorPolicySequential }, + { StaggeredKernelsStatic::OptGeneric , StaggeredKernelsStatic::CommsAndCompute ,CartesianCommunicator::CommunicatorPolicySequential } + }; + + for(int c=0;cBarrier(); + for(int i=0;iBarrier(); + double t1=usecond(); + uint64_t ncall = 500; + + FGrid->Broadcast(0,&ncall,sizeof(ncall)); + + // std::cout << GridLogMessage << " Estimate " << ncall << " calls per second"< t_time(ncall); + for(uint64_t i=0;iBarrier(); + + double volume=1; for(int mu=0;mumflops_best ) mflops_best = mflops; + if ( mflops L_list({16,24,32}); int selm1=sel-1; - std::vector robust_list; std::vector wilson; std::vector dwf4; - std::vector dwf5; + std::vector staggered; - if ( do_wilson ) { - int Ls=1; - std::cout< -#ifdef GRID_NVCC +#ifdef GRID_CUDA #define CUDA_PROFILE #endif @@ -129,8 +129,8 @@ int main (int argc, char ** argv) LatticeGaugeField Umu5d(FGrid); std::vector U(4,FGrid); { - auto Umu5d_v = Umu5d.View(); - auto Umu_v = Umu.View(); + autoView( Umu5d_v, Umu5d, CpuWrite); + autoView( Umu_v , Umu , CpuRead); for(int ss=0;ssoSites();ss++){ for(int s=0;s & latt4, int Ls, int threads,int report ) LatticeGaugeField Umu5d(FGrid); // replicate across fifth dimension - auto Umu5d_v = Umu5d.View(); - auto Umu_v = Umu.View(); - for(int ss=0;ssoSites();ss++){ - for(int s=0;soSites();ss++){ + for(int s=0;s > &mat, for(int b=0;b > &mat, for(int b=0;b > &mat int ss= so+n*stride+b; for(int i=0;i > &m for(int i=0;i > &m // Trigger unroll for ( int m=0;m +Author: paboyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +#include + +using namespace std; +using namespace Grid; + + Gamma::Algebra Gmu [] = { + Gamma::Algebra::GammaX, + Gamma::Algebra::GammaY, + Gamma::Algebra::GammaZ, + Gamma::Algebra::GammaT + }; + +void benchDw(std::vector & L, int Ls); + +int main (int argc, char ** argv) +{ + Grid_init(&argc,&argv); + + + const int Ls=12; + std::vector< std::vector > latts; +#if 1 + latts.push_back(std::vector ({24,24,24,24}) ); + latts.push_back(std::vector ({48,24,24,24}) ); + latts.push_back(std::vector ({96,24,24,24}) ); + latts.push_back(std::vector ({96,48,24,24}) ); + // latts.push_back(std::vector ({96,48,48,24}) ); + // latts.push_back(std::vector ({96,48,48,48}) ); +#else + // latts.push_back(std::vector ({96,48,48,48}) ); + latts.push_back(std::vector ({96,96,96,192}) ); +#endif + + std::cout << GridLogMessage<< "*****************************************************************" < latt4 = latts[l]; + std::cout << GridLogMessage <<"\t"; + for(int d=0;d & latt4, int Ls) +{ + ///////////////////////////////////////////////////////////////////////////////////// + // for Nc=3 + ///////////////////////////////////////////////////////////////////////////////////// + // Dw : Ls*24*(7+48)= Ls*1320 + // + // M5D: Ls*(4*2*Nc mul + 4*2*Nc madd ) = 3*4*2*Nc*Ls = Ls*72 + // Meo: Ls*24*(7+48) + Ls*72 = Ls*1392 + // + // Mee: 3*Ns*2*Nc*Ls // Chroma 6*N5*Nc*Ns + // + // LeemInv : 2*2*Nc*madd*Ls + // LeeInv : 2*2*Nc*madd*Ls + // DeeInv : 4*2*Nc*mul *Ls + // UeeInv : 2*2*Nc*madd*Ls + // UeemInv : 2*2*Nc*madd*Ls = Nc*Ls*(8+8+8+8+8) = 40*Nc*Ls// Chroma (10*N5 - 8)*Nc*Ns ~ (40 N5 - 32)Nc flops + // QUDA counts as dense LsxLs real matrix x Ls x NcNsNreim => Nc*4*2 x Ls^2 FMA = 16Nc Ls^2 flops + // Mpc => 1452*cbvol*2*Ls flops // + // => (1344+Ls*48)*Ls*cbvol*2 flops QUDA = 1920 @Ls=12 and 2112 @Ls=16 + ///////////////////////////////////////////////////////////////////////////////////// + GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(latt4, GridDefaultSimd(Nd,vComplexF::Nsimd()),GridDefaultMpi()); + GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); + GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid); + GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid); + // long unsigned int single_site_flops = 8*Nc*(7+16*Nc)*Ls; + long unsigned int single_site_mpc_flops = 8*Nc*(7+16*Nc)*2*Ls + 40*Nc*2*Ls + 4*Nc*2*Ls; + long unsigned int single_site_quda_flops = 8*Nc*(7+16*Nc)*2*Ls + 16*Nc*Ls*Ls + 4*Nc*2*Ls; + std::vector seeds4({1,2,3,4}); + std::vector seeds5({5,6,7,8}); + + + ColourMatrixF cm = ComplexF(1.0,0.0); + + int ncall=300; + RealD mass=0.1; + RealD M5 =1.8; + RealD NP = UGrid->_Nprocessors; + double volume=1; for(int mu=0;mu Mpc(Dw); + Chebyshev Cheby(0.0,60.0,order); + + { + Mpc.Mpc(src_o,r_o); + Mpc.Mpc(src_o,r_o); + Mpc.Mpc(src_o,r_o); + + double t0=usecond(); + for(int i=0;i(Umu,mu); } ref = Zero(); - /* - { // Naive wilson implementation - ref = Zero(); - for(int mu=0;mu +Author: paboyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +#include + +using namespace std; +using namespace Grid; + ; + +int main (int argc, char ** argv) +{ + Grid_init(&argc,&argv); + + Coordinate latt_size = GridDefaultLatt(); + Coordinate simd_layout = GridDefaultSimd(Nd,vComplexF::Nsimd()); + Coordinate mpi_layout = GridDefaultMpi(); + GridCartesian Grid(latt_size,simd_layout,mpi_layout); + GridRedBlackCartesian RBGrid(&Grid); + + int threads = GridThread::GetThreads(); + std::cout< seeds({1,2,3,4}); + GridParallelRNG pRNG(&Grid); + pRNG.SeedFixedIntegers(seeds); + // pRNG.SeedFixedIntegers(std::vector({45,12,81,9}); + + typedef typename ImprovedStaggeredFermionF::FermionField FermionField; + typename ImprovedStaggeredFermionF::ImplParams params; + + FermionField src (&Grid); random(pRNG,src); + FermionField result(&Grid); result=Zero(); + FermionField ref(&Grid); ref=Zero(); + FermionField tmp(&Grid); tmp=Zero(); + FermionField err(&Grid); tmp=Zero(); + LatticeGaugeFieldF Umu(&Grid); random(pRNG,Umu); + std::vector U(4,&Grid); + + double volume=1; + for(int mu=0;mu(Umu,U[nn],nn); + } +#endif + + for(int mu=0;mu(Umu,mu); + } + + RealD mass=0.1; + RealD c1=9.0/8.0; + RealD c2=-1.0/24.0; + RealD u0=1.0; + ImprovedStaggeredFermionF Ds(Umu,Umu,Grid,RBGrid,mass,c1,c2,u0,params); + + std::cout< + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ + +#include + + + +using namespace Grid; + +int main(int argc, char** argv) { +#if !defined(GRID_COMMS_NONE) + Grid_init(&argc, &argv); + + auto simd_layout = GridDefaultSimd(Nd, vComplex::Nsimd()); + auto mpi_layout = GridDefaultMpi(); + auto latt_size = GridDefaultLatt(); + + GridCartesian grid(latt_size, simd_layout, mpi_layout); + + GridParallelRNG pRNG(&grid); + + pRNG.SeedFixedIntegers(std::vector({45, 12, 81, 9})); + + LatticeGaugeField Umu_ref(&grid); + LatticeGaugeField Umu_me(&grid); + LatticeGaugeField Umu_diff(&grid); + + FieldMetaData header_ref; + FieldMetaData header_me; + + Umu_ref = Zero(); + Umu_me = Zero(); + + std::string file("/home/daniel/configs/openqcd/test_16x8_pbcn6"); + + if(GridCmdOptionExists(argv, argv + argc, "--config")) { + file = GridCmdOptionPayload(argv, argv + argc, "--config"); + std::cout << "file: " << file << std::endl; + assert(!file.empty()); + } + + OpenQcdIOChromaReference::readConfiguration(Umu_ref, header_ref, file); + OpenQcdIO::readConfiguration(Umu_me, header_me, file); + + std::cout << GridLogMessage << header_ref << std::endl; + std::cout << GridLogMessage << header_me << std::endl; + + Umu_diff = Umu_ref - Umu_me; + + // clang-format off + std::cout << GridLogMessage + << "norm2(Umu_ref) = " << norm2(Umu_ref) + << " norm2(Umu_me) = " << norm2(Umu_me) + << " norm2(Umu_diff) = " << norm2(Umu_diff) << std::endl; + // clang-format on + + Grid_finalize(); +#endif +} diff --git a/tests/Test_dwf_mixedcg_prec.cc b/tests/Test_dwf_mixedcg_prec.cc index 0b8463d9..be881db9 100644 --- a/tests/Test_dwf_mixedcg_prec.cc +++ b/tests/Test_dwf_mixedcg_prec.cc @@ -29,7 +29,6 @@ Author: Peter Boyle using namespace std; using namespace Grid; - ; template struct scal { @@ -51,6 +50,7 @@ int main (int argc, char ** argv) std::cout << GridLogMessage << "::::: NB: to enable a quick bit reproducibility check use the --checksums flag. " << std::endl; + { GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplexD::Nsimd()),GridDefaultMpi()); GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid); @@ -100,6 +100,8 @@ int main (int argc, char ** argv) ConjugateGradient CG(1.0e-8,10000); CG(HermOpEO,src_o,result_o_2); + MemoryManager::Print(); + LatticeFermionD diff_o(FrbGrid); RealD diff = axpy_norm(diff_o, -1.0, result_o, result_o_2); @@ -130,7 +132,9 @@ int main (int argc, char ** argv) std::cout << GridLogMessage << " CG checksums "<oSites();i++){ auto SE = gStencil.GetEntry(0,i); - auto check = Check.View(); - auto foo = Foo.View(); - + autoView(check, Check, CpuWrite); + autoView( foo, Foo, CpuRead); // Encapsulate in a general wrapper check[i] = foo[SE->_offset]; auto tmp=check[i]; if (SE->_permute & 0x1 ) { permute(check[i],tmp,0); tmp=check[i];} @@ -147,8 +146,8 @@ int main(int argc, char ** argv) }}}} if (nrm > 1.0e-4) { - auto check = Check.View(); - auto bar = Bar.View(); + autoView( check , Check, CpuRead); + autoView( bar , Bar, CpuRead); for(int i=0;i + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ +#include + +using namespace Grid; + +int main(int argc, char** argv) { + Grid_init(&argc, &argv); + + const int nIter = 100; + + // clang-format off + GridCartesian *Grid_d = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd, vComplexD::Nsimd()), GridDefaultMpi()); + GridCartesian *Grid_f = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd, vComplexF::Nsimd()), GridDefaultMpi()); + // clang-format on + + GridParallelRNG pRNG_d(Grid_d); + GridParallelRNG pRNG_f(Grid_f); + + std::vector seeds_d({1, 2, 3, 4}); + std::vector seeds_f({5, 6, 7, 8}); + + pRNG_d.SeedFixedIntegers(seeds_d); + pRNG_f.SeedFixedIntegers(seeds_f); + + // clang-format off + LatticeFermionD x_d(Grid_d); random(pRNG_d, x_d); + LatticeFermionD y_d(Grid_d); random(pRNG_d, y_d); + LatticeFermionF x_f(Grid_f); random(pRNG_f, x_f); + LatticeFermionF y_f(Grid_f); random(pRNG_f, y_f); + // clang-format on + + GridStopWatch sw_ref; + GridStopWatch sw_res; + + { // double precision + ComplexD ip_d_ref, ip_d_res, diff_ip_d; + RealD norm2_d_ref, norm2_d_res, diff_norm2_d; + + sw_ref.Reset(); + sw_ref.Start(); + for(int i = 0; i < nIter; ++i) { + ip_d_ref = innerProduct(x_d, y_d); + norm2_d_ref = norm2(x_d); + } + sw_ref.Stop(); + + sw_res.Reset(); + sw_res.Start(); + for(int i = 0; i < nIter; ++i) { innerProductNorm(ip_d_res, norm2_d_res, x_d, y_d); } + sw_res.Stop(); + + diff_ip_d = ip_d_ref - ip_d_res; + diff_norm2_d = norm2_d_ref - norm2_d_res; + + // clang-format off + std::cout << GridLogMessage << "Double: ip_ref = " << ip_d_ref << " ip_res = " << ip_d_res << " diff = " << diff_ip_d << std::endl; + std::cout << GridLogMessage << "Double: norm2_ref = " << norm2_d_ref << " norm2_res = " << norm2_d_res << " diff = " << diff_norm2_d << std::endl; + std::cout << GridLogMessage << "Double: time_ref = " << sw_ref.Elapsed() << " time_res = " << sw_res.Elapsed() << std::endl; + // clang-format on + + assert(diff_ip_d == 0.); + assert(diff_norm2_d == 0.); + + std::cout << GridLogMessage << "Double: all checks passed" << std::endl; + } + + { // single precision + ComplexD ip_f_ref, ip_f_res, diff_ip_f; + RealD norm2_f_ref, norm2_f_res, diff_norm2_f; + + sw_ref.Reset(); + sw_ref.Start(); + for(int i = 0; i < nIter; ++i) { + ip_f_ref = innerProduct(x_f, y_f); + norm2_f_ref = norm2(x_f); + } + sw_ref.Stop(); + + sw_res.Reset(); + sw_res.Start(); + for(int i = 0; i < nIter; ++i) { innerProductNorm(ip_f_res, norm2_f_res, x_f, y_f); } + sw_res.Stop(); + + diff_ip_f = ip_f_ref - ip_f_res; + diff_norm2_f = norm2_f_ref - norm2_f_res; + + // clang-format off + std::cout << GridLogMessage << "Single: ip_ref = " << ip_f_ref << " ip_res = " << ip_f_res << " diff = " << diff_ip_f << std::endl; + std::cout << GridLogMessage << "Single: norm2_ref = " << norm2_f_ref << " norm2_res = " << norm2_f_res << " diff = " << diff_norm2_f << std::endl; + std::cout << GridLogMessage << "Single: time_ref = " << sw_ref.Elapsed() << " time_res = " << sw_res.Elapsed() << std::endl; + // clang-format on + + assert(diff_ip_f == 0.); + assert(diff_norm2_f == 0.); + + std::cout << GridLogMessage << "Single: all checks passed" << std::endl; + } + + Grid_finalize(); +} diff --git a/tests/Test_stencil.cc b/tests/Test_stencil.cc index 0bd97fea..16a9138a 100644 --- a/tests/Test_stencil.cc +++ b/tests/Test_stencil.cc @@ -109,8 +109,8 @@ int main(int argc, char ** argv) { StencilEntry *SE; SE = myStencil.GetEntry(permute_type,0,i); - auto check = Check.View(); - auto foo = Foo.View(); + autoView( check , Check, CpuWrite); + autoView( foo , Foo, CpuRead); if ( SE->_is_local && SE->_permute ) permute(check[i],foo[SE->_offset],permute_type); else if (SE->_is_local) @@ -151,8 +151,8 @@ int main(int argc, char ** argv) { }}}} if (nrm > 1.0e-4) { - auto check = Check.View(); - auto bar = Bar.View(); + autoView( check , Check, CpuRead); + autoView( bar , Bar, CpuRead); for(int i=0;i " <_offset << " "<< SE->_is_local<_is_local && SE->_permute ) permute(ocheck[i],efoo[SE->_offset],permute_type); else if (SE->_is_local) @@ -226,8 +226,8 @@ int main(int argc, char ** argv) { SE = OStencil.GetEntry(permute_type,0,i); // std::cout << "ODD source "<< i<<" -> " <_offset << " "<< SE->_is_local<_is_local && SE->_permute ) permute(echeck[i],ofoo[SE->_offset],permute_type); else if (SE->_is_local) diff --git a/tests/core/Test_contfrac_even_odd.cc b/tests/core/Test_contfrac_even_odd.cc index 25affd00..5311f869 100644 --- a/tests/core/Test_contfrac_even_odd.cc +++ b/tests/core/Test_contfrac_even_odd.cc @@ -238,11 +238,11 @@ void TestWhat(What & Ddwf, RealD t1,t2; SchurDiagMooeeOperator HermOpEO(Ddwf); - HermOpEO.MpcDagMpc(chi_e,dchi_e,t1,t2); - HermOpEO.MpcDagMpc(chi_o,dchi_o,t1,t2); + HermOpEO.MpcDagMpc(chi_e,dchi_e); + HermOpEO.MpcDagMpc(chi_o,dchi_o); - HermOpEO.MpcDagMpc(phi_e,dphi_e,t1,t2); - HermOpEO.MpcDagMpc(phi_o,dphi_o,t1,t2); + HermOpEO.MpcDagMpc(phi_e,dphi_e); + HermOpEO.MpcDagMpc(phi_o,dphi_o); pDce = innerProduct(phi_e,dchi_e); pDco = innerProduct(phi_o,dchi_o); diff --git a/tests/core/Test_cshift_red_black.cc b/tests/core/Test_cshift_red_black.cc index 34325072..4fdd5fc0 100644 --- a/tests/core/Test_cshift_red_black.cc +++ b/tests/core/Test_cshift_red_black.cc @@ -82,7 +82,7 @@ int main (int argc, char ** argv) pickCheckerboard(Odd,Uo,U); // std::cout< HermOpEO(Ddwf); - HermOpEO.MpcDagMpc(chi_e, dchi_e, t1, t2); - HermOpEO.MpcDagMpc(chi_o, dchi_o, t1, t2); + HermOpEO.MpcDagMpc(chi_e, dchi_e); + HermOpEO.MpcDagMpc(chi_o, dchi_o); - HermOpEO.MpcDagMpc(phi_e, dphi_e, t1, t2); - HermOpEO.MpcDagMpc(phi_o, dphi_o, t1, t2); + HermOpEO.MpcDagMpc(phi_e, dphi_e); + HermOpEO.MpcDagMpc(phi_o, dphi_o); pDce = innerProduct(phi_e, dchi_e); pDco = innerProduct(phi_o, dchi_o); diff --git a/tests/core/Test_dwf_even_odd.cc b/tests/core/Test_dwf_even_odd.cc index d654e588..6093ee8f 100644 --- a/tests/core/Test_dwf_even_odd.cc +++ b/tests/core/Test_dwf_even_odd.cc @@ -216,11 +216,11 @@ int main (int argc, char ** argv) SchurDiagMooeeOperator HermOpEO(Ddwf); - HermOpEO.MpcDagMpc(chi_e,dchi_e,t1,t2); - HermOpEO.MpcDagMpc(chi_o,dchi_o,t1,t2); + HermOpEO.MpcDagMpc(chi_e,dchi_e); + HermOpEO.MpcDagMpc(chi_o,dchi_o); - HermOpEO.MpcDagMpc(phi_e,dphi_e,t1,t2); - HermOpEO.MpcDagMpc(phi_o,dphi_o,t1,t2); + HermOpEO.MpcDagMpc(phi_e,dphi_e); + HermOpEO.MpcDagMpc(phi_o,dphi_o); pDce = innerProduct(phi_e,dchi_e); pDco = innerProduct(phi_o,dchi_o); diff --git a/tests/core/Test_gpwilson_even_odd.cc b/tests/core/Test_gpwilson_even_odd.cc index ac4cde99..bf37f4d5 100644 --- a/tests/core/Test_gpwilson_even_odd.cc +++ b/tests/core/Test_gpwilson_even_odd.cc @@ -201,11 +201,11 @@ int main (int argc, char ** argv) RealD t1,t2; SchurDiagMooeeOperator HermOpEO(Dw); - HermOpEO.MpcDagMpc(chi_e,dchi_e,t1,t2); - HermOpEO.MpcDagMpc(chi_o,dchi_o,t1,t2); + HermOpEO.MpcDagMpc(chi_e,dchi_e); + HermOpEO.MpcDagMpc(chi_o,dchi_o); - HermOpEO.MpcDagMpc(phi_e,dphi_e,t1,t2); - HermOpEO.MpcDagMpc(phi_o,dphi_o,t1,t2); + HermOpEO.MpcDagMpc(phi_e,dphi_e); + HermOpEO.MpcDagMpc(phi_o,dphi_o); pDce = innerProduct(phi_e,dchi_e); pDco = innerProduct(phi_o,dchi_o); diff --git a/tests/core/Test_main.cc b/tests/core/Test_main.cc index cd2373ce..08752a46 100644 --- a/tests/core/Test_main.cc +++ b/tests/core/Test_main.cc @@ -73,7 +73,7 @@ int main(int argc, char **argv) { omp_set_num_threads(omp); #endif - for (int lat = 8; lat <= 16; lat += 40) { + for (int lat = 16; lat <= 16; lat += 40) { std::cout << "Lat " << lat << std::endl; latt_size[0] = lat; @@ -159,15 +159,17 @@ int main(int argc, char **argv) { LatticeColourMatrix newFoo = Foo; // confirm correctness of copy constructor Bar = Foo - newFoo; - std::cout << "Copy constructor diff check: "; + std::cout << "Copy constructor diff check: \n"; double test_cc = norm2(Bar); if (test_cc < 1e-5){ std::cout << "OK\n"; - } - else{ + } else{ + std::cout << "Foo\n"< HermOpEO(Ddwf); - HermOpEO.MpcDagMpc(chi_e, dchi_e, t1, t2); - HermOpEO.MpcDagMpc(chi_o, dchi_o, t1, t2); + HermOpEO.MpcDagMpc(chi_e, dchi_e); + HermOpEO.MpcDagMpc(chi_o, dchi_o); - HermOpEO.MpcDagMpc(phi_e, dphi_e, t1, t2); - HermOpEO.MpcDagMpc(phi_o, dphi_o, t1, t2); + HermOpEO.MpcDagMpc(phi_e, dphi_e); + HermOpEO.MpcDagMpc(phi_o, dphi_o); pDce = innerProduct(phi_e, dchi_e); pDco = innerProduct(phi_o, dchi_o); diff --git a/tests/core/Test_mobius_even_odd.cc b/tests/core/Test_mobius_even_odd.cc index 0a035dc8..7f808cac 100644 --- a/tests/core/Test_mobius_even_odd.cc +++ b/tests/core/Test_mobius_even_odd.cc @@ -266,11 +266,11 @@ int main (int argc, char ** argv) SchurDiagMooeeOperator HermOpEO(Ddwf); - HermOpEO.MpcDagMpc(chi_e,dchi_e,t1,t2); - HermOpEO.MpcDagMpc(chi_o,dchi_o,t1,t2); + HermOpEO.MpcDagMpc(chi_e,dchi_e); + HermOpEO.MpcDagMpc(chi_o,dchi_o); - HermOpEO.MpcDagMpc(phi_e,dphi_e,t1,t2); - HermOpEO.MpcDagMpc(phi_o,dphi_o,t1,t2); + HermOpEO.MpcDagMpc(phi_e,dphi_e); + HermOpEO.MpcDagMpc(phi_o,dphi_o); pDce = innerProduct(phi_e,dchi_e); pDco = innerProduct(phi_o,dchi_o); diff --git a/tests/core/Test_staggered.cc b/tests/core/Test_staggered.cc index c85d4090..1f42ff0d 100644 --- a/tests/core/Test_staggered.cc +++ b/tests/core/Test_staggered.cc @@ -270,11 +270,11 @@ int main (int argc, char ** argv) pickCheckerboard(Odd ,phi_o,phi); SchurDiagMooeeOperator HermOpEO(Ds); - HermOpEO.MpcDagMpc(chi_e,dchi_e,t1,t2); - HermOpEO.MpcDagMpc(chi_o,dchi_o,t1,t2); + HermOpEO.MpcDagMpc(chi_e,dchi_e); + HermOpEO.MpcDagMpc(chi_o,dchi_o); - HermOpEO.MpcDagMpc(phi_e,dphi_e,t1,t2); - HermOpEO.MpcDagMpc(phi_o,dphi_o,t1,t2); + HermOpEO.MpcDagMpc(phi_e,dphi_e); + HermOpEO.MpcDagMpc(phi_o,dphi_o); pDce = innerProduct(phi_e,dchi_e); pDco = innerProduct(phi_o,dchi_o); diff --git a/tests/core/Test_staggered5D.cc b/tests/core/Test_staggered5D.cc index 7055d183..3d175890 100644 --- a/tests/core/Test_staggered5D.cc +++ b/tests/core/Test_staggered5D.cc @@ -88,14 +88,15 @@ int main (int argc, char ** argv) // replicate across fifth dimension //////////////////////////////////// LatticeGaugeField Umu5d(FGrid); - auto umu5d = Umu5d.View(); - auto umu = Umu.View(); - for(int ss=0;ssoSites();ss++){ - for(int s=0;soSites();ss++){ + for(int s=0;s U(4,FGrid); for(int mu=0;mu HermOpEO(Ds); - HermOpEO.MpcDagMpc(chi_e,dchi_e,t1,t2); - HermOpEO.MpcDagMpc(chi_o,dchi_o,t1,t2); + HermOpEO.MpcDagMpc(chi_e,dchi_e); + HermOpEO.MpcDagMpc(chi_o,dchi_o); - HermOpEO.MpcDagMpc(phi_e,dphi_e,t1,t2); - HermOpEO.MpcDagMpc(phi_o,dphi_o,t1,t2); + HermOpEO.MpcDagMpc(phi_e,dphi_e); + HermOpEO.MpcDagMpc(phi_o,dphi_o); pDce = innerProduct(phi_e,dchi_e); pDco = innerProduct(phi_o,dchi_o); diff --git a/tests/core/Test_staggered_naive.cc b/tests/core/Test_staggered_naive.cc new file mode 100644 index 00000000..f96bac93 --- /dev/null +++ b/tests/core/Test_staggered_naive.cc @@ -0,0 +1,282 @@ + /************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./benchmarks/Benchmark_wilson.cc + + Copyright (C) 2015 + +Author: Peter Boyle +Author: paboyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +#include + +using namespace std; +using namespace Grid; + ; + +int main (int argc, char ** argv) +{ + Grid_init(&argc,&argv); + + Coordinate latt_size = GridDefaultLatt(); + Coordinate simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd()); + Coordinate mpi_layout = GridDefaultMpi(); + GridCartesian Grid(latt_size,simd_layout,mpi_layout); + GridRedBlackCartesian RBGrid(&Grid); + + int threads = GridThread::GetThreads(); + std::cout< seeds({1,2,3,4}); + GridParallelRNG pRNG(&Grid); + pRNG.SeedFixedIntegers(seeds); + // pRNG.SeedFixedIntegers(std::vector({45,12,81,9}); + + typedef typename NaiveStaggeredFermionR::FermionField FermionField; + typedef typename NaiveStaggeredFermionR::ComplexField ComplexField; + typename NaiveStaggeredFermionR::ImplParams params; + + FermionField src (&Grid); random(pRNG,src); + FermionField result(&Grid); result=Zero(); + FermionField ref(&Grid); ref=Zero(); + FermionField tmp(&Grid); tmp=Zero(); + FermionField err(&Grid); tmp=Zero(); + FermionField phi (&Grid); random(pRNG,phi); + FermionField chi (&Grid); random(pRNG,chi); + LatticeGaugeField Umu(&Grid); SU3::HotConfiguration(pRNG,Umu); + std::vector U(4,&Grid); + + + double volume=1; + for(int mu=0;mu(Umu,mu); + /* Debug force unit + U[mu] = 1.0; + PokeIndex(Umu,U[mu],mu); + */ + } + + ref = Zero(); + + RealD mass=0.1; + RealD c1=9.0/8.0; + RealD u0=1.0; + + { // Simple improved staggered implementation + ref = Zero(); + RealD c1tad = 0.5*c1/u0; + + Lattice > coor(&Grid); + + Lattice > x(&Grid); LatticeCoordinate(x,0); + Lattice > y(&Grid); LatticeCoordinate(y,1); + Lattice > z(&Grid); LatticeCoordinate(z,2); + Lattice > t(&Grid); LatticeCoordinate(t,3); + + Lattice > lin_z(&Grid); lin_z=x+y; + Lattice > lin_t(&Grid); lin_t=x+y+z; + + for(int mu=0;mu * = < chi | Deo^dag| phi> "< HermOpEO(Ds); + HermOpEO.MpcDagMpc(chi_e,dchi_e,t1,t2); + HermOpEO.MpcDagMpc(chi_o,dchi_o,t1,t2); + + HermOpEO.MpcDagMpc(phi_e,dphi_e,t1,t2); + HermOpEO.MpcDagMpc(phi_o,dphi_o,t1,t2); + + pDce = innerProduct(phi_e,dchi_e); + pDco = innerProduct(phi_o,dchi_o); + cDpe = innerProduct(chi_e,dphi_e); + cDpo = innerProduct(chi_o,dphi_o); + + std::cout< HermOpEO(Dw); - HermOpEO.MpcDagMpc(chi_e,dchi_e,t1,t2); - HermOpEO.MpcDagMpc(chi_o,dchi_o,t1,t2); + HermOpEO.MpcDagMpc(chi_e,dchi_e); + HermOpEO.MpcDagMpc(chi_o,dchi_o); - HermOpEO.MpcDagMpc(phi_e,dphi_e,t1,t2); - HermOpEO.MpcDagMpc(phi_o,dphi_o,t1,t2); + HermOpEO.MpcDagMpc(phi_e,dphi_e); + HermOpEO.MpcDagMpc(phi_o,dphi_o); pDce = innerProduct(phi_e,dchi_e); pDco = innerProduct(phi_o,dchi_o); diff --git a/tests/core/Test_wilson_twisted_mass_even_odd.cc b/tests/core/Test_wilson_twisted_mass_even_odd.cc index 58b0b60f..ba80fd0e 100644 --- a/tests/core/Test_wilson_twisted_mass_even_odd.cc +++ b/tests/core/Test_wilson_twisted_mass_even_odd.cc @@ -208,11 +208,11 @@ int main (int argc, char ** argv) RealD t1,t2; SchurDiagMooeeOperator HermOpEO(Dw); - HermOpEO.MpcDagMpc(chi_e,dchi_e,t1,t2); - HermOpEO.MpcDagMpc(chi_o,dchi_o,t1,t2); + HermOpEO.MpcDagMpc(chi_e,dchi_e); + HermOpEO.MpcDagMpc(chi_o,dchi_o); - HermOpEO.MpcDagMpc(phi_e,dphi_e,t1,t2); - HermOpEO.MpcDagMpc(phi_o,dphi_o,t1,t2); + HermOpEO.MpcDagMpc(phi_e,dphi_e); + HermOpEO.MpcDagMpc(phi_o,dphi_o); pDce = innerProduct(phi_e,dchi_e); pDco = innerProduct(phi_o,dchi_o); diff --git a/tests/core/Test_zmobius_even_odd.cc b/tests/core/Test_zmobius_even_odd.cc index 1150930b..a52e9bc2 100644 --- a/tests/core/Test_zmobius_even_odd.cc +++ b/tests/core/Test_zmobius_even_odd.cc @@ -280,11 +280,11 @@ int main (int argc, char ** argv) SchurDiagMooeeOperator HermOpEO(Ddwf); - HermOpEO.MpcDagMpc(chi_e,dchi_e,t1,t2); - HermOpEO.MpcDagMpc(chi_o,dchi_o,t1,t2); + HermOpEO.MpcDagMpc(chi_e,dchi_e); + HermOpEO.MpcDagMpc(chi_o,dchi_o); - HermOpEO.MpcDagMpc(phi_e,dphi_e,t1,t2); - HermOpEO.MpcDagMpc(phi_o,dphi_o,t1,t2); + HermOpEO.MpcDagMpc(phi_e,dphi_e); + HermOpEO.MpcDagMpc(phi_o,dphi_o); pDce = innerProduct(phi_e,dchi_e); pDco = innerProduct(phi_o,dchi_o); diff --git a/tests/debug/Test_cayley_mres.cc b/tests/debug/Test_cayley_mres.cc new file mode 100644 index 00000000..2ad605b8 --- /dev/null +++ b/tests/debug/Test_cayley_mres.cc @@ -0,0 +1,622 @@ +/************************************************************************************* + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./tests/Test_cayley_cg.cc + + Copyright (C) 2015 + +Author: Peter Boyle +Author: paboyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +#include +#include + +using namespace std; +using namespace Grid; + + +template +void TestConserved(What & Ddwf, What & Ddwfrev, + LatticeGaugeField &Umu, + GridCartesian * FGrid, GridRedBlackCartesian * FrbGrid, + GridCartesian * UGrid, GridRedBlackCartesian * UrbGrid, + RealD mass, RealD M5, + GridParallelRNG *RNG4, + GridParallelRNG *RNG5); + + Gamma::Algebra Gmu [] = { + Gamma::Algebra::GammaX, + Gamma::Algebra::GammaY, + Gamma::Algebra::GammaZ, + Gamma::Algebra::GammaT, + Gamma::Algebra::Gamma5 + }; + +int main (int argc, char ** argv) +{ + Grid_init(&argc,&argv); + + int threads = GridThread::GetThreads(); + std::cout< omegas; + std::vector < ComplexD > omegasrev(Ls); + +#if 1 + omegas.push_back( std::complex(1.45806438985048,-0) ); + omegas.push_back( std::complex(0.830951166685955,-0) ); + omegas.push_back( std::complex(0.341985020453729,-0) ); + omegas.push_back( std::complex(0.126074299502912,-0) ); + // omegas.push_back( std::complex(0.0686324988446592,0.0550658530827402) ); + // omegas.push_back( std::complex(0.0686324988446592,-0.0550658530827402) ); + omegas.push_back( std::complex(0.0686324988446592,0)); + omegas.push_back( std::complex(0.0686324988446592,0)); + omegas.push_back( std::complex(0.0990136651962626,-0) ); + omegas.push_back( std::complex(0.21137902619029,-0) ); + omegas.push_back( std::complex(0.542352409156791,-0) ); + omegas.push_back( std::complex(1.18231318389348,-0) ); +#else + omegas.push_back( std::complex(0.8,0.0)); + omegas.push_back( std::complex(1.1,0.0)); + omegas.push_back( std::complex(1.2,0.0)); + omegas.push_back( std::complex(1.3,0.0)); + omegas.push_back( std::complex(0.5,0.2)); + omegas.push_back( std::complex(0.5,-0.2)); + omegas.push_back( std::complex(0.8,0.0)); + omegas.push_back( std::complex(1.1,0.0)); + omegas.push_back( std::complex(1.2,0.0)); + omegas.push_back( std::complex(1.3,0.0)); +#endif + + GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), + GridDefaultSimd(Nd,vComplex::Nsimd()), + GridDefaultMpi()); + GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); + GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid); + GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid); + + + GridCartesian * UGridF = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), + GridDefaultSimd(Nd,vComplexF::Nsimd()), + GridDefaultMpi()); + GridRedBlackCartesian * UrbGridF = SpaceTimeGrid::makeFourDimRedBlackGrid(UGridF); + GridCartesian * FGridF = SpaceTimeGrid::makeFiveDimGrid(Ls,UGridF); + GridRedBlackCartesian * FrbGridF = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGridF); + + + std::vector seeds4({1,2,3,4}); + std::vector seeds5({5,6,7,8}); + GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5); + GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4); + + LatticeGaugeField Umu(UGrid); + SU3::ColdConfiguration(Umu); + // SU3::HotConfiguration(RNG4,Umu); + + RealD mass=0.3; + RealD M5 =1.0; + std::cout<(Ddwf,Ddwf,Umu,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); + + RealD b=1.5;// Scale factor b+c=2, b-c=1 + RealD c=0.5; + // std::vector gamma(Ls,ComplexD(1.0,0.0)); + + std::cout<(Dmob,Dmob,Umu,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); + + std::cout<(Dsham,Dsham,Umu,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); + + std::cout<(ZDmob,ZDmobrev,Umu,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); + + Grid_finalize(); +} + + + +template +void TestConserved(Action & Ddwf, + Action & Ddwfrev, + LatticeGaugeField &Umu, + GridCartesian * FGrid, GridRedBlackCartesian * FrbGrid, + GridCartesian * UGrid, GridRedBlackCartesian * UrbGrid, + RealD mass, RealD M5, + GridParallelRNG *RNG4, + GridParallelRNG *RNG5) +{ + int Ls=Ddwf.Ls; + + LatticePropagator phys_src(UGrid); + + std::vector U(4,UGrid); + + LatticePropagator seqsrc(FGrid); + LatticePropagator prop5(FGrid); + LatticePropagator prop5rev(FGrid); + LatticePropagator prop4(UGrid); + LatticePropagator Axial_mu(UGrid); + LatticePropagator Vector_mu(UGrid); + LatticeComplex PA (UGrid); + LatticeComplex SV (UGrid); + LatticeComplex VV (UGrid); + LatticeComplex PJ5q(UGrid); + LatticeComplex PP (UGrid); + LatticePropagator seqprop(UGrid); + + SpinColourMatrix kronecker; kronecker=1.0; + Coordinate coor({0,0,0,0}); + phys_src=Zero(); + pokeSite(kronecker,phys_src,coor); + + MdagMLinearOperator HermOp(Ddwf); + MdagMLinearOperator HermOprev(Ddwfrev); + ConjugateGradient CG(1.0e-16,100000); + for(int s=0;s(src4,phys_src,s,c); + + LatticeFermion src5 (FGrid); + Ddwf.ImportPhysicalFermionSource(src4,src5); + + LatticeFermion result5(FGrid); result5=Zero(); + + // CGNE + LatticeFermion Mdagsrc5 (FGrid); + Ddwf.Mdag(src5,Mdagsrc5); + CG(HermOp,Mdagsrc5,result5); + FermToProp(prop5,result5,s,c); + + LatticeFermion result4(UGrid); + Ddwf.ExportPhysicalFermionSolution(result5,result4); + FermToProp(prop4,result4,s,c); + + Ddwfrev.ImportPhysicalFermionSource(src4,src5); + Ddwfrev.Mdag(src5,Mdagsrc5); + CG(HermOprev,Mdagsrc5,result5); + FermToProp(prop5rev,result5,s,c); + } + } + +#if 1 + auto curr = Current::Axial; + const int mu_J=Nd-1; +#else + auto curr = Current::Vector; + const int mu_J=0; +#endif + const int t_J=0; + + LatticeComplex ph (UGrid); ph=1.0; + + Ddwf.SeqConservedCurrent(prop5, + seqsrc, + phys_src, + curr, + mu_J, + t_J, + t_J,// whole lattice + ph); + + for(int s=0;s(src5,seqsrc,s,c); + + LatticeFermion result5(FGrid); result5=Zero(); + + // CGNE + LatticeFermion Mdagsrc5 (FGrid); + Ddwf.Mdag(src5,Mdagsrc5); + CG(HermOp,Mdagsrc5,result5); + + LatticeFermion result4(UGrid); + Ddwf.ExportPhysicalFermionSolution(result5,result4); + FermToProp(seqprop,result4,s,c); + } + } + + Gamma g5(Gamma::Algebra::Gamma5); + Gamma gT(Gamma::Algebra::GammaT); + + std::vector sumPA; + std::vector sumSV; + std::vector sumVV; + std::vector sumPP; + std::vector sumPJ5q; + + Ddwf.ContractConservedCurrent(prop5rev,prop5,Axial_mu,phys_src,Current::Axial,Tdir); + Ddwf.ContractConservedCurrent(prop5rev,prop5,Vector_mu,phys_src,Current::Vector,Tdir); + Ddwf.ContractJ5q(prop5,PJ5q); + + PA = trace(g5*Axial_mu); + SV = trace(Vector_mu); + VV = trace(gT*Vector_mu); + PP = trace(adj(prop4)*prop4); + + // Spatial sum + sliceSum(PA,sumPA,Tdir); + sliceSum(SV,sumSV,Tdir); + sliceSum(VV,sumVV,Tdir); + sliceSum(PP,sumPP,Tdir); + sliceSum(PJ5q,sumPJ5q,Tdir); + + int Nt=sumPA.size(); + for(int t=0;t check_buf; + + test_S = trace(qSite*g); + test_V = trace(qSite*g*Gamma::gmu[mu_J]); + + Ddwf.ContractConservedCurrent(prop5rev,prop5,cur,phys_src,curr,mu_J); + + c = trace(cur*g); + sliceSum(c, check_buf, Tp); + check_S = TensorRemove(check_buf[t_J]); + + auto gmu=Gamma::gmu[mu_J]; + c = trace(cur*g*gmu); + sliceSum(c, check_buf, Tp); + check_V = TensorRemove(check_buf[t_J]); + + + std::cout< sumPAref; + std::vector sumPA; + std::vector sumPP; + std::vector sumPJ5qref; + std::vector sumPJ5q; + std::vector sumDefect; + + // Spatial sum + sliceSum(PAmu[Tdir],sumPAref,Tdir); + sliceSum(PA,sumPA,Tdir); + sliceSum(PJ5q,sumPJ5q,Tdir); + sliceSum(PP,sumPP,Tdir); + sliceSum(Defect,sumDefect,Tdir); + + Ddwf.ContractJ5q(prop5,PJ5q); + sliceSum(PJ5q,sumPJ5qref,Tdir); + + int Nt=sumPA.size(); + for(int t=0;t U(4,FGrid); + { + autoView( Umu5d_v , Umu5d, CpuWrite); + autoView( Umu_v , Umu , CpuRead); + for(int ss=0;ssoSites();ss++){ + for(int s=0;s(Umu5d,mu); + } + LatticeFermion ref(FGrid); + LatticeFermion tmp(FGrid); + ref = Zero(); + for(int mu=0;muoSites(),{ + uint64_t ss= sss*Ls; + typedef vSpinColourVector spinor; + spinor tmp1, tmp2; + for(int s=0;s(mom,mommu,mu); // fourth order exponential approx - auto mom_v = mom.View(); - auto Uprime_v = Uprime.View(); - auto U_v = U.View(); + autoView( mom_v, mom, CpuRead); + autoView( U_v , U, CpuRead); + autoView(Uprime_v, Uprime, CpuWrite); thread_foreach( i,mom_v,{ Uprime_v[i](mu) = U_v[i](mu) diff --git a/tests/forces/Test_dwf_force.cc b/tests/forces/Test_dwf_force.cc index 009f50b3..81a1b8c4 100644 --- a/tests/forces/Test_dwf_force.cc +++ b/tests/forces/Test_dwf_force.cc @@ -100,9 +100,9 @@ int main (int argc, char ** argv) // fourth order exponential approx - auto mom_v = mom.View(); - auto U_v = U.View(); - auto Uprime_v = Uprime.View(); + autoView( mom_v, mom, CpuRead); + autoView( U_v , U, CpuRead); + autoView(Uprime_v, Uprime, CpuWrite); thread_foreach( i,mom_v,{ Uprime_v[i](mu) = U_v[i](mu) diff --git a/tests/forces/Test_dwf_force_eofa.cc b/tests/forces/Test_dwf_force_eofa.cc index 670e7589..0b0ba346 100644 --- a/tests/forces/Test_dwf_force_eofa.cc +++ b/tests/forces/Test_dwf_force_eofa.cc @@ -110,9 +110,9 @@ int main (int argc, char** argv) PokeIndex(mom, mommu, mu); // fourth order exponential approx - auto mom_v = mom.View(); - auto U_v = U.View(); - auto Uprime_v = Uprime.View(); + autoView( mom_v, mom, CpuRead); + autoView( U_v , U, CpuRead); + autoView(Uprime_v, Uprime, CpuWrite); thread_foreach(i,mom_v,{ Uprime_v[i](mu) = U_v[i](mu) diff --git a/tests/forces/Test_dwf_gpforce.cc b/tests/forces/Test_dwf_gpforce.cc index d762e22a..b39fdd14 100644 --- a/tests/forces/Test_dwf_gpforce.cc +++ b/tests/forces/Test_dwf_gpforce.cc @@ -119,9 +119,9 @@ int main (int argc, char ** argv) PokeIndex(mom,mommu,mu); // fourth order exponential approx - auto mom_v = mom.View(); - auto U_v = U.View(); - auto Uprime_v = Uprime.View(); + autoView( mom_v, mom, CpuRead); + autoView( U_v , U, CpuRead); + autoView(Uprime_v, Uprime, CpuWrite); thread_foreach(i,mom_v,{ Uprime_v[i](mu) = U_v[i](mu) diff --git a/tests/forces/Test_dwf_gpforce_eofa.cc b/tests/forces/Test_dwf_gpforce_eofa.cc index 66ae9dcf..58258a5e 100644 --- a/tests/forces/Test_dwf_gpforce_eofa.cc +++ b/tests/forces/Test_dwf_gpforce_eofa.cc @@ -114,9 +114,9 @@ int main (int argc, char** argv) PokeIndex(mom, mommu, mu); // fourth order exponential approx - auto mom_v = mom.View(); - auto U_v = U.View(); - auto Uprime_v = Uprime.View(); + autoView( mom_v, mom, CpuRead); + autoView( U_v , U, CpuRead); + autoView(Uprime_v, Uprime, CpuWrite); thread_foreach(i,mom_v,{ Uprime_v[i](mu) = U_v[i](mu) diff --git a/tests/forces/Test_gp_plaq_force.cc b/tests/forces/Test_gp_plaq_force.cc index c4e214bb..21f0b9d0 100644 --- a/tests/forces/Test_gp_plaq_force.cc +++ b/tests/forces/Test_gp_plaq_force.cc @@ -85,9 +85,9 @@ int main (int argc, char ** argv) PokeIndex(mom,mommu,mu); // fourth order exponential approx - auto Uprime_v = Uprime.View(); - auto U_v = U.View(); - auto mom_v = mom.View(); + autoView(Uprime_v, Uprime, CpuWrite); + autoView( U_v , U, CpuRead); + autoView( mom_v, mom, CpuRead); thread_foreach(i,mom_v,{ // exp(pmu dt) * Umu Uprime_v[i](mu) = U_v[i](mu) + mom_v[i](mu)*U_v[i](mu)*dt ; }); diff --git a/tests/forces/Test_gp_rect_force.cc b/tests/forces/Test_gp_rect_force.cc index 2573af6a..bb4ea6de 100644 --- a/tests/forces/Test_gp_rect_force.cc +++ b/tests/forces/Test_gp_rect_force.cc @@ -87,9 +87,9 @@ int main (int argc, char ** argv) PokeIndex(mom,mommu,mu); // fourth order exponential approx - auto mom_v = mom.View(); - auto Uprime_v= Uprime.View(); - auto U_v = U.View(); + autoView( mom_v, mom, CpuRead); + autoView(Uprime_v, Uprime, CpuWrite); + autoView( U_v , U, CpuRead); thread_foreach(i,mom_v,{ // exp(pmu dt) * Umu Uprime_v[i](mu) = U_v[i](mu) + mom_v[i](mu)*U_v[i](mu)*dt ; }); diff --git a/tests/forces/Test_gpdwf_force.cc b/tests/forces/Test_gpdwf_force.cc index 09a1dc4b..bdc332d9 100644 --- a/tests/forces/Test_gpdwf_force.cc +++ b/tests/forces/Test_gpdwf_force.cc @@ -105,9 +105,9 @@ int main (int argc, char ** argv) PokeIndex(mom,mommu,mu); // fourth order exponential approx - auto U_v = U.View(); - auto mom_v = mom.View(); - auto Uprime_v = Uprime.View(); + autoView( U_v , U, CpuRead); + autoView( mom_v, mom, CpuRead); + autoView(Uprime_v, Uprime, CpuWrite); thread_foreach(i,mom_v,{ Uprime_v[i](mu) = U_v[i](mu) + mom_v[i](mu)*U_v[i](mu)*dt diff --git a/tests/forces/Test_gpwilson_force.cc b/tests/forces/Test_gpwilson_force.cc index cd30d898..1c85a5d9 100644 --- a/tests/forces/Test_gpwilson_force.cc +++ b/tests/forces/Test_gpwilson_force.cc @@ -99,9 +99,9 @@ int main (int argc, char ** argv) PokeIndex(mom,mommu,mu); // fourth order exponential approx - auto mom_v = mom.View(); - auto U_v = U.View(); - auto Uprime_v = Uprime.View(); + autoView( mom_v, mom, CpuRead); + autoView( U_v , U, CpuRead); + autoView(Uprime_v, Uprime, CpuWrite); thread_foreach(i,mom_v,{ Uprime_v[i](mu) = U_v[i](mu) diff --git a/tests/forces/Test_mobius_force.cc b/tests/forces/Test_mobius_force.cc index a1c4e930..11e69652 100644 --- a/tests/forces/Test_mobius_force.cc +++ b/tests/forces/Test_mobius_force.cc @@ -101,9 +101,9 @@ int main (int argc, char ** argv) PokeIndex(mom,mommu,mu); // fourth order exponential approx - auto U_v = U.View(); - auto mom_v = mom.View(); - auto Uprime_v = Uprime.View(); + autoView( U_v , U, CpuRead); + autoView( mom_v, mom, CpuRead); + autoView(Uprime_v, Uprime, CpuWrite); thread_foreach(i,mom_v,{ Uprime_v[i](mu) = U_v[i](mu) + mom_v[i](mu)*U_v[i](mu)*dt diff --git a/tests/forces/Test_mobius_force_eofa.cc b/tests/forces/Test_mobius_force_eofa.cc index f71e2d41..f85501fa 100644 --- a/tests/forces/Test_mobius_force_eofa.cc +++ b/tests/forces/Test_mobius_force_eofa.cc @@ -112,9 +112,9 @@ int main (int argc, char** argv) PokeIndex(mom, mommu, mu); // fourth order exponential approx - auto mom_v = mom.View(); - auto U_v = U.View(); - auto Uprime_v = Uprime.View(); + autoView( mom_v, mom, CpuRead); + autoView( U_v , U, CpuRead); + autoView(Uprime_v, Uprime, CpuWrite); thread_foreach(i,mom_v,{ Uprime_v[i](mu) = U_v[i](mu) diff --git a/tests/forces/Test_mobius_gpforce_eofa.cc b/tests/forces/Test_mobius_gpforce_eofa.cc index 4975f36d..68163e63 100644 --- a/tests/forces/Test_mobius_gpforce_eofa.cc +++ b/tests/forces/Test_mobius_gpforce_eofa.cc @@ -115,9 +115,9 @@ int main (int argc, char** argv) SU3::GaussianFundamentalLieAlgebraMatrix(RNG4, mommu); // Traceless antihermitian momentum; gaussian in lie alg PokeIndex(mom, mommu, mu); - auto U_v = U.View(); - auto mom_v = mom.View(); - auto Uprime_v = Uprime.View(); + autoView( U_v , U, CpuRead); + autoView( mom_v, mom, CpuRead); + autoView(Uprime_v, Uprime, CpuWrite); // fourth order exponential approx thread_foreach( i, mom_v,{ Uprime_v[i](mu) = U_v[i](mu) + mom_v[i](mu)*U_v[i](mu)*dt + mom_v[i](mu) *mom_v[i](mu) *U_v[i](mu)*(dt*dt/2.0) diff --git a/tests/forces/Test_partfrac_force.cc b/tests/forces/Test_partfrac_force.cc index 3ea2c6aa..17dce530 100644 --- a/tests/forces/Test_partfrac_force.cc +++ b/tests/forces/Test_partfrac_force.cc @@ -101,9 +101,9 @@ int main (int argc, char ** argv) PokeIndex(mom,mommu,mu); // fourth order exponential approx - auto U_v = U.View(); - auto mom_v = mom.View(); - auto Uprime_v = Uprime.View(); + autoView( U_v , U, CpuRead); + autoView( mom_v, mom, CpuRead); + autoView(Uprime_v, Uprime, CpuWrite); thread_foreach(i,mom_v,{ Uprime_v[i](mu) = U_v[i](mu) + mom_v[i](mu)*U_v[i](mu)*dt diff --git a/tests/forces/Test_rect_force.cc b/tests/forces/Test_rect_force.cc index 9a78de24..ed72f2c0 100644 --- a/tests/forces/Test_rect_force.cc +++ b/tests/forces/Test_rect_force.cc @@ -87,9 +87,9 @@ int main (int argc, char ** argv) PokeIndex(mom,mommu,mu); // fourth order exponential approx - auto Uprime_v = Uprime.View(); - auto U_v = U.View(); - auto mom_v = mom.View(); + autoView(Uprime_v, Uprime, CpuWrite); + autoView( U_v , U, CpuRead); + autoView( mom_v, mom, CpuRead); thread_foreach(i,mom_v,{ // exp(pmu dt) * Umu Uprime_v[i](mu) = U_v[i](mu) + mom_v[i](mu)*U_v[i](mu)*dt ; }); diff --git a/tests/forces/Test_wilson_force.cc b/tests/forces/Test_wilson_force.cc index 47f1516a..c8b3a7f4 100644 --- a/tests/forces/Test_wilson_force.cc +++ b/tests/forces/Test_wilson_force.cc @@ -105,9 +105,9 @@ int main (int argc, char ** argv) PokeIndex(mom,mommu,mu); // fourth order exponential approx - auto U_v = U.View(); - auto mom_v = mom.View(); - auto Uprime_v = Uprime.View(); + autoView( U_v , U, CpuRead); + autoView( mom_v, mom, CpuRead); + autoView(Uprime_v, Uprime, CpuWrite); thread_foreach( i,mom_v,{ Uprime_v[i](mu) = U_v[i](mu); Uprime_v[i](mu) += mom_v[i](mu)*U_v[i](mu)*dt ; diff --git a/tests/forces/Test_wilsonclover_force.cc b/tests/forces/Test_wilsonclover_force.cc index d9ace23c..f26f0ac9 100644 --- a/tests/forces/Test_wilsonclover_force.cc +++ b/tests/forces/Test_wilsonclover_force.cc @@ -105,9 +105,9 @@ int main(int argc, char **argv) Hmom -= real(sum(trace(mommu * mommu))); PokeIndex(mom, mommu, mu); - auto Uprime_v = Uprime.View(); - auto U_v = U.View(); - auto mom_v = mom.View(); + autoView(Uprime_v, Uprime, CpuWrite); + autoView( U_v , U, CpuRead); + autoView( mom_v, mom, CpuRead); thread_foreach(ss,mom_v, { Uprime_v[ss]._internal[mu] = ProjectOnGroup(Exponentiate(mom_v[ss]._internal[mu], dt, 12) * U_v[ss]._internal[mu]); diff --git a/tests/forces/Test_zmobius_force.cc b/tests/forces/Test_zmobius_force.cc index 2730885f..e24ae601 100644 --- a/tests/forces/Test_zmobius_force.cc +++ b/tests/forces/Test_zmobius_force.cc @@ -114,9 +114,9 @@ int main (int argc, char ** argv) PokeIndex(mom,mommu,mu); // fourth order exponential approx - auto mom_v = mom.View(); - auto U_v = U.View(); - auto Uprime_v = Uprime.View(); + autoView( mom_v, mom, CpuRead); + autoView( U_v , U, CpuRead); + autoView(Uprime_v, Uprime, CpuWrite); thread_foreach(i,mom_v,{ Uprime_v[i](mu) = U_v[i](mu) diff --git a/tests/hmc/Test_hmc_WilsonMixedRepresentationsFermionGauge.cc b/tests/hmc/Test_hmc_WilsonMixedRepresentationsFermionGauge.cc index 6fa90f32..3b8cdda6 100644 --- a/tests/hmc/Test_hmc_WilsonMixedRepresentationsFermionGauge.cc +++ b/tests/hmc/Test_hmc_WilsonMixedRepresentationsFermionGauge.cc @@ -35,7 +35,7 @@ directory int main(int argc, char **argv) { -#ifndef GRID_NVCC +#ifndef GRID_CUDA using namespace Grid; diff --git a/tests/hmc/Test_multishift_sqrt.cc b/tests/hmc/Test_multishift_sqrt.cc index 834e6fc8..31697c12 100644 --- a/tests/hmc/Test_multishift_sqrt.cc +++ b/tests/hmc/Test_multishift_sqrt.cc @@ -31,7 +31,6 @@ Author: paboyle using namespace std; using namespace Grid; - ; template class DumbOperator : public LinearOperatorBase { public: @@ -57,6 +56,7 @@ public: // Support for coarsening to a multigrid void OpDiag (const Field &in, Field &out) {}; void OpDir (const Field &in, Field &out,int dir,int disp){}; + void OpDirAll (const Field &in, std::vector &out) {}; void Op (const Field &in, Field &out){ out = scale * in; @@ -104,7 +104,7 @@ int main (int argc, char ** argv) GridDefaultMpi()); double lo=0.001; - double hi=1.0; + double hi=20.0; int precision=64; int degree=10; AlgRemez remez(lo,hi,precision); diff --git a/tests/solver/Test_dwf_fpgcr.cc b/tests/solver/Test_dwf_fpgcr.cc index 226bd933..156f678a 100644 --- a/tests/solver/Test_dwf_fpgcr.cc +++ b/tests/solver/Test_dwf_fpgcr.cc @@ -70,9 +70,6 @@ int main (int argc, char ** argv) SU3::HotConfiguration(RNG4,Umu); - TrivialPrecon simple; - - PrecGeneralisedConjugateResidual PGCR(1.0e-6,10000,simple,4,160); ConjugateResidual CR(1.0e-6,10000); @@ -86,15 +83,19 @@ int main (int argc, char ** argv) std::cout< HermOp(Ddwf); + TrivialPrecon simple; + PrecGeneralisedConjugateResidual PGCR(1.0e-6,10000,HermOp,simple,4,160); + result=Zero(); - PGCR(HermOp,src,result); + PGCR(src,result); std::cout< g5HermOp(Ddwf); + PrecGeneralisedConjugateResidual PGCR5(1.0e-6,10000,g5HermOp,simple,4,160); result=Zero(); - PGCR(g5HermOp,src,result); + PGCR5(src,result); std::cout<oSites();site++){ subspace_g5[site](nn) = subspace[site](nn); diff --git a/tests/solver/Test_dwf_multigrid.cc b/tests/solver/Test_dwf_multigrid.cc new file mode 100644 index 00000000..9e11c160 --- /dev/null +++ b/tests/solver/Test_dwf_multigrid.cc @@ -0,0 +1,594 @@ +/************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./tests/Test_dwf_hdcr.cc + + Copyright (C) 2015 + +Author: Antonin Portelli +Author: Peter Boyle +Author: paboyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +#include +#include +#include +#include + +using namespace std; +using namespace Grid; +/* Params + * Grid: + * block1(4) + * block2(4) + * + * Subspace + * * Fine : Subspace(nbasis,hi,lo,order,first,step) -- 32, 60,0.02,500,100,100 + * * Coarse: Subspace(nbasis,hi,lo,order,first,step) -- 32, 18,0.02,500,100,100 + + * Smoother: + * * Fine: Cheby(hi, lo, order) -- 60,0.5,10 + * * Coarse: Cheby(hi, lo, order) -- 12,0.1,4 + + * Lanczos: + * CoarseCoarse IRL( Nk, Nm, Nstop, poly(lo,hi,order)) 24,36,24,0.002,4.0,61 + */ + +template class SolverWrapper : public LinearFunction { +private: + LinearOperatorBase & _Matrix; + OperatorFunction & _Solver; + LinearFunction & _Guess; +public: + + ///////////////////////////////////////////////////// + // Wrap the usual normal equations trick + ///////////////////////////////////////////////////// + SolverWrapper(LinearOperatorBase &Matrix, + OperatorFunction &Solver, + LinearFunction &Guess) + : _Matrix(Matrix), _Solver(Solver), _Guess(Guess) {}; + + void operator() (const Field &in, Field &out){ + + _Guess(in,out); + _Solver(_Matrix,in,out); // Mdag M out = Mdag in + + } +}; + + +// Must use a non-hermitian solver +template +class PVdagMLinearOperator : public LinearOperatorBase { + Matrix &_Mat; + Matrix &_PV; +public: + PVdagMLinearOperator(Matrix &Mat,Matrix &PV): _Mat(Mat),_PV(PV){}; + + void OpDiag (const Field &in, Field &out) { + assert(0); + } + void OpDir (const Field &in, Field &out,int dir,int disp) { + assert(0); + } + void OpDirAll (const Field &in, std::vector &out){ + assert(0); + }; + void Op (const Field &in, Field &out){ + Field tmp(in.Grid()); + _Mat.M(in,tmp); + _PV.Mdag(tmp,out); + } + void AdjOp (const Field &in, Field &out){ + Field tmp(in.Grid()); + _PV.M(tmp,out); + _Mat.Mdag(in,tmp); + } + void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){ + assert(0); + } + void HermOp(const Field &in, Field &out){ + assert(0); + } +}; + + +RealD InverseApproximation(RealD x){ + return 1.0/x; +} + +template class ChebyshevSmoother : public LinearFunction +{ +public: + typedef LinearOperatorBase FineOperator; + Matrix & _SmootherMatrix; + FineOperator & _SmootherOperator; + + Chebyshev Cheby; + + ChebyshevSmoother(RealD _lo,RealD _hi,int _ord, FineOperator &SmootherOperator,Matrix &SmootherMatrix) : + _SmootherOperator(SmootherOperator), + _SmootherMatrix(SmootherMatrix), + Cheby(_lo,_hi,_ord,InverseApproximation) + {}; + + void operator() (const Field &in, Field &out) + { + Field tmp(in.Grid()); + MdagMLinearOperator MdagMOp(_SmootherMatrix); + _SmootherOperator.AdjOp(in,tmp); + Cheby(MdagMOp,tmp,out); + } +}; + +template class MirsSmoother : public LinearFunction +{ +public: + typedef LinearOperatorBase FineOperator; + Matrix & SmootherMatrix; + FineOperator & SmootherOperator; + RealD tol; + RealD shift; + int maxit; + + MirsSmoother(RealD _shift,RealD _tol,int _maxit,FineOperator &_SmootherOperator,Matrix &_SmootherMatrix) : + shift(_shift),tol(_tol),maxit(_maxit), + SmootherOperator(_SmootherOperator), + SmootherMatrix(_SmootherMatrix) + {}; + + void operator() (const Field &in, Field &out) + { + ZeroGuesser Guess; + ConjugateGradient CG(tol,maxit,false); + + Field src(in.Grid()); + + ShiftedMdagMLinearOperator,Field> MdagMOp(SmootherMatrix,shift); + SmootherOperator.AdjOp(in,src); + Guess(src,out); + CG(MdagMOp,src,out); + } +}; + +#define GridLogLevel std::cout << GridLogMessage < +class HDCRPreconditioner : public LinearFunction< Lattice > { +public: + + typedef Aggregation Aggregates; + typedef CoarsenedMatrix CoarseOperator; + typedef typename Aggregation::CoarseVector CoarseVector; + typedef typename Aggregation::CoarseMatrix CoarseMatrix; + typedef typename Aggregation::FineField FineField; + typedef LinearOperatorBase FineOperator; + typedef LinearFunction FineSmoother; + + Aggregates & _Aggregates; + FineOperator & _FineOperator; + FineSmoother & _Smoother; + CoarseSolver & _CoarseSolve; + + int level; void Level(int lv) {level = lv; }; + + + HDCRPreconditioner(Aggregates &Agg, + FineOperator &Fine, + FineSmoother &Smoother, + CoarseSolver &CoarseSolve_) + : _Aggregates(Agg), + _FineOperator(Fine), + _Smoother(Smoother), + _CoarseSolve(CoarseSolve_), + level(1) { } + + virtual void operator()(const FineField &in, FineField & out) + { + auto CoarseGrid = _Aggregates.CoarseGrid; + CoarseVector Csrc(CoarseGrid); + CoarseVector Csol(CoarseGrid); + FineField vec1(in.Grid()); + FineField vec2(in.Grid()); + + double t; + // Fine Smoother + t=-usecond(); + _Smoother(in,out); + t+=usecond(); + GridLogLevel << "Smoother took "<< t/1000.0<< "ms" < +class MultiGridPreconditioner : public LinearFunction< Lattice > { +public: + + typedef Aggregation Aggregates; + typedef CoarsenedMatrix CoarseOperator; + typedef typename Aggregation::CoarseVector CoarseVector; + typedef typename Aggregation::CoarseMatrix CoarseMatrix; + typedef typename Aggregation::FineField FineField; + typedef LinearOperatorBase FineOperator; + typedef LinearFunction FineSmoother; + + Aggregates & _Aggregates; + CoarseOperator & _CoarseOperator; + FineOperator & _FineOperator; + Guesser & _Guess; + FineSmoother & _Smoother; + CoarseSolver & _CoarseSolve; + + int level; void Level(int lv) {level = lv; }; + + + MultiGridPreconditioner(Aggregates &Agg, CoarseOperator &Coarse, + FineOperator &Fine, + FineSmoother &Smoother, + Guesser &Guess_, + CoarseSolver &CoarseSolve_) + : _Aggregates(Agg), + _CoarseOperator(Coarse), + _FineOperator(Fine), + _Smoother(Smoother), + _Guess(Guess_), + _CoarseSolve(CoarseSolve_), + level(1) { } + + virtual void operator()(const FineField &in, FineField & out) + { + CoarseVector Csrc(_CoarseOperator.Grid()); + CoarseVector Csol(_CoarseOperator.Grid()); + FineField vec1(in.Grid()); + FineField vec2(in.Grid()); + + double t; + // Fine Smoother + t=-usecond(); + _Smoother(in,out); + t+=usecond(); + GridLogLevel << "Smoother took "<< t/1000.0<< "ms" < block ({2,2,2,2}); + std::vector blockc ({2,2,2,2}); + const int nbasis= 32; + const int nbasisc= 32; + auto clatt = GridDefaultLatt(); + for(int d=0;d seeds4({1,2,3,4}); + std::vector seeds5({5,6,7,8}); + std::vector cseeds({5,6,7,8}); + GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5); + GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4); + GridParallelRNG CRNG(Coarse5d);CRNG.SeedFixedIntegers(cseeds); + LatticeFermion src(FGrid); gaussian(RNG5,src);// src=src+g5*src; + LatticeFermion result(FGrid); + LatticeGaugeField Umu(UGrid); + + FieldMetaData header; + std::string file("./ckpoint_lat.4000"); + NerscIO::readConfiguration(Umu,header,file); + + std::cout< Subspace; + typedef CoarsenedMatrix CoarseOperator; + typedef CoarseOperator::CoarseVector CoarseVector; + typedef CoarseOperator::siteVector siteVector; + std::cout< HermDefOp(Ddwf); + + Subspace Aggregates(Coarse5d,FGrid,0); + + assert ( (nbasis & 0x1)==0); + { + int nb=nbasis/2; + Aggregates.CreateSubspaceChebyshev(RNG5,HermDefOp,nb,60.0,0.02,500,100,100,0.0); + for(int n=0;n Level1Op; + typedef CoarsenedMatrix,nbasisc> Level2Op; + + Gamma5R5HermitianLinearOperator HermIndefOp(Ddwf); + Gamma5R5HermitianLinearOperator HermIndefOpPV(Dpv); + + std::cout< CoarseBiCGSTAB(tol,MaxIt); + ConjugateGradient CoarseCG(tol,MaxIt); + // GeneralisedMinimalResidual CoarseGMRES(tol,MaxIt,20); + + BiCGSTAB FineBiCGSTAB(tol,MaxIt); + ConjugateGradient FineCG(tol,MaxIt); + // GeneralisedMinimalResidual FineGMRES(tol,MaxIt,20); + + MdagMLinearOperator FineMdagM(Ddwf); // M^\dag M + PVdagMLinearOperator FinePVdagM(Ddwf,Dpv);// M_{pv}^\dag M + SchurDiagMooeeOperator FineDiagMooee(Ddwf); // M_ee - Meo Moo^-1 Moe + SchurDiagOneOperator FineDiagOne(Ddwf); // 1 - M_ee^{-1} Meo Moo^{-1} Moe e + + MdagMLinearOperator CoarseMdagM(LDOp); + PVdagMLinearOperator CoarsePVdagM(LDOp,LDOpPV); + + std::cout< IRLCheby(0.03,12.0,71); // 1 iter + FunctionHermOp IRLOpCheby(IRLCheby,CoarseMdagM); + PlainHermOp IRLOp (CoarseMdagM); + int Nk=64; + int Nm=128; + int Nstop=Nk; + ImplicitlyRestartedLanczos IRL(IRLOpCheby,IRLOp,Nstop,Nk,Nm,1.0e-3,20); + + int Nconv; + std::vector eval(Nm); + std::vector evec(Nm,Coarse5d); + IRL.calc(eval,evec,c_src,Nconv); + + std::cout< DeflCoarseGuesser(evec,eval); + NormalEquations DeflCoarseCGNE (LDOp,CoarseCG,DeflCoarseGuesser); + c_res=Zero(); + DeflCoarseCGNE(c_src,c_res); + + + std::cout< CoarseMgridCG(0.001,1000); + ChebyshevSmoother FineSmoother(0.5,60.0,10,HermIndefOp,Ddwf); + + typedef HDCRPreconditioner > TwoLevelHDCR; + TwoLevelHDCR TwoLevelPrecon(Aggregates, + HermIndefOp, + FineSmoother, + DeflCoarseCGNE); + TwoLevelPrecon.Level(1); + // PrecGeneralisedConjugateResidual l1PGCR(1.0e-8,100,HermIndefOp,TwoLevelPrecon,16,16); + PrecGeneralisedConjugateResidualNonHermitian l1PGCR(1.0e-8,100,HermIndefOp,TwoLevelPrecon,16,16); + l1PGCR.Level(1); + + f_res=Zero(); + + CoarseCG.Tolerance=0.02; + l1PGCR(f_src,f_res); + + std::cout< CoarseMgridBiCGSTAB(0.01,1000); + BiCGSTAB FineMgridBiCGSTAB(0.0,24); + ZeroGuesser CoarseZeroGuesser; + ZeroGuesser FineZeroGuesser; + + SolverWrapper FineBiCGSmoother( FinePVdagM, FineMgridBiCGSTAB, FineZeroGuesser); + SolverWrapper CoarsePVdagMSolver(CoarsePVdagM,CoarseMgridBiCGSTAB,CoarseZeroGuesser); + typedef HDCRPreconditioner > TwoLevelMG; + + TwoLevelMG _TwoLevelMG(Aggregates, + FinePVdagM, + FineBiCGSmoother, + CoarsePVdagMSolver); + _TwoLevelMG.Level(1); + + PrecGeneralisedConjugateResidualNonHermitian pvPGCR(1.0e-8,100,FinePVdagM,_TwoLevelMG,16,16); + pvPGCR.Level(1); + + f_res=Zero(); + pvPGCR(f_src,f_res); + + std::cout< + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +#include +#include +//#include +#include + +using namespace std; +using namespace Grid; +/* Params + * Grid: + * block1(4) + * block2(4) + * + * Subspace + * * Fine : Subspace(nbasis,hi,lo,order,first,step) -- 32, 60,0.02,500,100,100 + * * Coarse: Subspace(nbasis,hi,lo,order,first,step) -- 32, 18,0.02,500,100,100 + + * Smoother: + * * Fine: Cheby(hi, lo, order) -- 60,0.5,10 + * * Coarse: Cheby(hi, lo, order) -- 12,0.1,4 + + * Lanczos: + * CoarseCoarse IRL( Nk, Nm, Nstop, poly(lo,hi,order)) 24,36,24,0.002,4.0,61 + */ + +template class SolverWrapper : public LinearFunction { +private: + LinearOperatorBase & _Matrix; + OperatorFunction & _Solver; + LinearFunction & _Guess; +public: + + ///////////////////////////////////////////////////// + // Wrap the usual normal equations trick + ///////////////////////////////////////////////////// + SolverWrapper(LinearOperatorBase &Matrix, + OperatorFunction &Solver, + LinearFunction &Guess) + : _Matrix(Matrix), _Solver(Solver), _Guess(Guess) {}; + + void operator() (const Field &in, Field &out){ + + _Guess(in,out); + _Solver(_Matrix,in,out); // Mdag M out = Mdag in + + } +}; + + +// Must use a non-hermitian solver +template +class PVdagMLinearOperator : public LinearOperatorBase { + Matrix &_Mat; + Matrix &_PV; +public: + PVdagMLinearOperator(Matrix &Mat,Matrix &PV): _Mat(Mat),_PV(PV){}; + + void OpDiag (const Field &in, Field &out) { + assert(0); + } + void OpDir (const Field &in, Field &out,int dir,int disp) { + assert(0); + } + void OpDirAll (const Field &in, std::vector &out){ + assert(0); + }; + void Op (const Field &in, Field &out){ + Field tmp(in.Grid()); + _Mat.M(in,tmp); + _PV.Mdag(tmp,out); + } + void AdjOp (const Field &in, Field &out){ + Field tmp(in.Grid()); + _PV.M(tmp,out); + _Mat.Mdag(in,tmp); + } + void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){ + assert(0); + } + void HermOp(const Field &in, Field &out){ + assert(0); + } +}; + + +RealD InverseApproximation(RealD x){ + return 1.0/x; +} + +template class ChebyshevSmoother : public LinearFunction +{ +public: + typedef LinearOperatorBase FineOperator; + Matrix & _SmootherMatrix; + FineOperator & _SmootherOperator; + + Chebyshev Cheby; + + ChebyshevSmoother(RealD _lo,RealD _hi,int _ord, FineOperator &SmootherOperator,Matrix &SmootherMatrix) : + _SmootherOperator(SmootherOperator), + _SmootherMatrix(SmootherMatrix), + Cheby(_lo,_hi,_ord,InverseApproximation) + {}; + + void operator() (const Field &in, Field &out) + { + Field tmp(in.Grid()); + MdagMLinearOperator MdagMOp(_SmootherMatrix); + _SmootherOperator.AdjOp(in,tmp); + Cheby(MdagMOp,tmp,out); + } +}; + +template class MirsSmoother : public LinearFunction +{ +public: + typedef LinearOperatorBase FineOperator; + Matrix & SmootherMatrix; + FineOperator & SmootherOperator; + RealD tol; + RealD shift; + int maxit; + + MirsSmoother(RealD _shift,RealD _tol,int _maxit,FineOperator &_SmootherOperator,Matrix &_SmootherMatrix) : + shift(_shift),tol(_tol),maxit(_maxit), + SmootherOperator(_SmootherOperator), + SmootherMatrix(_SmootherMatrix) + {}; + + void operator() (const Field &in, Field &out) + { + ZeroGuesser Guess; + ConjugateGradient CG(tol,maxit,false); + + Field src(in.Grid()); + + ShiftedMdagMLinearOperator,Field> MdagMOp(SmootherMatrix,shift); + SmootherOperator.AdjOp(in,src); + Guess(src,out); + CG(MdagMOp,src,out); + } +}; + +#define GridLogLevel std::cout << GridLogMessage < +class HDCRPreconditioner : public LinearFunction< Lattice > { +public: + + typedef Aggregation Aggregates; + typedef CoarsenedMatrix CoarseOperator; + typedef typename Aggregation::CoarseVector CoarseVector; + typedef typename Aggregation::CoarseMatrix CoarseMatrix; + typedef typename Aggregation::FineField FineField; + typedef LinearOperatorBase FineOperator; + typedef LinearFunction FineSmoother; + + Aggregates & _Aggregates; + FineOperator & _FineOperator; + FineSmoother & _Smoother; + CoarseSolver & _CoarseSolve; + + int level; void Level(int lv) {level = lv; }; + + + HDCRPreconditioner(Aggregates &Agg, + FineOperator &Fine, + FineSmoother &Smoother, + CoarseSolver &CoarseSolve_) + : _Aggregates(Agg), + _FineOperator(Fine), + _Smoother(Smoother), + _CoarseSolve(CoarseSolve_), + level(1) { } + + virtual void operator()(const FineField &in, FineField & out) + { + auto CoarseGrid = _Aggregates.CoarseGrid; + CoarseVector Csrc(CoarseGrid); + CoarseVector Csol(CoarseGrid); + FineField vec1(in.Grid()); + FineField vec2(in.Grid()); + + double t; + // Fine Smoother + t=-usecond(); + _Smoother(in,out); + t+=usecond(); + GridLogLevel << "Smoother took "<< t/1000.0<< "ms" < block ({2,2,2,2}); + const int nbasis= 8; + + auto clatt = GridDefaultLatt(); + for(int d=0;d seeds({1,2,3,4}); + GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds); + GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds); + GridParallelRNG CRNG(Coarse5d);CRNG.SeedFixedIntegers(seeds); + + LatticeGaugeField Umu(UGrid); + FieldMetaData header; + std::string file("./ckpoint_lat.4000"); + NerscIO::readConfiguration(Umu,header,file); + + std::cout< Subspace; + typedef CoarsenedMatrix CoarseOperator; + typedef CoarseOperator::CoarseVector CoarseVector; + typedef CoarseOperator::siteVector siteVector; + + std::cout< SubspaceOp(Dw); + + Subspace Aggregates4D(Coarse4d,UGrid,0); + Subspace Aggregates5D(Coarse5d,FGrid,0); + + assert ( (nbasis & 0x1)==0); + std::cout< Level1Op; + + NonHermitianLinearOperator LinOpDwf(Ddwf); + + Level1Op LDOp (*Coarse5d,0); + + std::cout< CoarseMdagM(LDOp); + BiCGSTAB CoarseBiCGSTAB(tol,MaxIt); + ConjugateGradient CoarseCG(tol,MaxIt); + + c_res=Zero(); + CoarseCG(CoarseMdagM,c_src,c_res); + + std::cout<