diff --git a/Grid/algorithms/CoarsenedMatrix.h b/Grid/algorithms/CoarsenedMatrix.h index fb14ac32..e56b39c5 100644 --- a/Grid/algorithms/CoarsenedMatrix.h +++ b/Grid/algorithms/CoarsenedMatrix.h @@ -252,19 +252,14 @@ public: /////////////////////// GridBase * Grid(void) { return _grid; }; // this is all the linalg routines need to know - RealD M (const CoarseVector &in, CoarseVector &out) + void M (const CoarseVector &in, CoarseVector &out) { - conformable(_grid,in.Grid()); conformable(in.Grid(),out.Grid()); - // RealD Nin = norm2(in); SimpleCompressor compressor; - double comms_usec = -usecond(); Stencil.HaloExchange(in,compressor); - comms_usec += usecond(); - autoView( in_v , in, AcceleratorRead); autoView( out_v , out, AcceleratorWrite); typedef LatticeView Aview; @@ -278,12 +273,7 @@ public: typedef decltype(coalescedRead(in_v[0])) calcVector; typedef decltype(coalescedRead(in_v[0](0))) calcComplex; - GridStopWatch ArithmeticTimer; int osites=Grid()->oSites(); - // double flops = osites*Nsimd*nbasis*nbasis*8.0*geom.npoint; - // double bytes = osites*nbasis*nbasis*geom.npoint*sizeof(CComplex); - double usecs =-usecond(); - // assert(geom.npoint==9); accelerator_for(sss, Grid()->oSites()*nbasis, Nsimd, { int ss = sss/nbasis; @@ -310,18 +300,11 @@ public: } coalescedWrite(out_v[ss](b),res); }); - usecs +=usecond(); - - double nrm_usec=-usecond(); - RealD Nout= norm2(out); - nrm_usec+=usecond(); for(int p=0;poSites(), Fobj::Nsimd(),{ coalescedWrite(A_p[ss](j,i),oZProj_v(ss)); }); - // if( disp!= 0 ) { accelerator_for(ss, Grid()->oSites(), Fobj::Nsimd(),{ coalescedWrite(A_p[ss](j,i),oZProj_v(ss)); });} - // accelerator_for(ss, Grid()->oSites(), Fobj::Nsimd(),{ coalescedWrite(A_self[ss](j,i),A_self(ss)(j,i)+iZProj_v(ss)); }); } } diff --git a/Grid/algorithms/LinearOperator.h b/Grid/algorithms/LinearOperator.h index 50600d2d..1add212c 100644 --- a/Grid/algorithms/LinearOperator.h +++ b/Grid/algorithms/LinearOperator.h @@ -43,7 +43,6 @@ NAMESPACE_BEGIN(Grid); ///////////////////////////////////////////////////////////////////////////////////////////// template class LinearOperatorBase { public: - // Support for coarsening to a multigrid virtual void OpDiag (const Field &in, Field &out) = 0; // Abstract base virtual void OpDir (const Field &in, Field &out,int dir,int disp) = 0; // Abstract base @@ -94,7 +93,10 @@ public: _Mat.Mdag(in,out); } void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){ - _Mat.MdagM(in,out,n1,n2); + _Mat.MdagM(in,out); + ComplexD dot = innerProduct(in,out); + n1=real(dot); + n2=norm2(out); } void HermOp(const Field &in, Field &out){ _Mat.MdagM(in,out); @@ -131,17 +133,14 @@ public: assert(0); } void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){ - _Mat.MdagM(in,out,n1,n2); - out = out + _shift*in; - - ComplexD dot; - dot= innerProduct(in,out); + HermOp(in,out); + ComplexD dot = innerProduct(in,out); n1=real(dot); n2=norm2(out); } void HermOp(const Field &in, Field &out){ - RealD n1,n2; - HermOpAndNorm(in,out,n1,n2); + _Mat.MdagM(in,out); + out = out + _shift*in; } }; @@ -170,7 +169,7 @@ public: _Mat.M(in,out); } void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){ - _Mat.M(in,out); + HermOp(in,out); ComplexD dot= innerProduct(in,out); n1=real(dot); n2=norm2(out); } @@ -208,338 +207,305 @@ public: } }; - ////////////////////////////////////////////////////////// - // Even Odd Schur decomp operators; there are several - // ways to introduce the even odd checkerboarding - ////////////////////////////////////////////////////////// +////////////////////////////////////////////////////////// +// Even Odd Schur decomp operators; there are several +// ways to introduce the even odd checkerboarding +////////////////////////////////////////////////////////// - template - class SchurOperatorBase : public LinearOperatorBase { - public: - virtual RealD Mpc (const Field &in, Field &out) =0; - virtual RealD MpcDag (const Field &in, Field &out) =0; - virtual void MpcDagMpc(const Field &in, Field &out,RealD &ni,RealD &no) { - Field tmp(in.Grid()); - tmp.Checkerboard() = in.Checkerboard(); - ni=Mpc(in,tmp); - no=MpcDag(tmp,out); - } - virtual void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){ - out.Checkerboard() = in.Checkerboard(); - MpcDagMpc(in,out,n1,n2); - } - virtual void HermOp(const Field &in, Field &out){ - RealD n1,n2; - HermOpAndNorm(in,out,n1,n2); - } - void Op (const Field &in, Field &out){ - Mpc(in,out); - } - void AdjOp (const Field &in, Field &out){ - MpcDag(in,out); - } - // Support for coarsening to a multigrid - void OpDiag (const Field &in, Field &out) { - assert(0); // must coarsen the unpreconditioned system - } - void OpDir (const Field &in, Field &out,int dir,int disp) { - assert(0); - } - void OpDirAll (const Field &in, std::vector &out){ - assert(0); - }; - }; - template - class SchurDiagMooeeOperator : public SchurOperatorBase { - public: - Matrix &_Mat; - SchurDiagMooeeOperator (Matrix &Mat): _Mat(Mat){}; - virtual RealD Mpc (const Field &in, Field &out) { - Field tmp(in.Grid()); - tmp.Checkerboard() = !in.Checkerboard(); - //std::cout <<"grid pointers: in._grid="<< in._grid << " out._grid=" << out._grid << " _Mat.Grid=" << _Mat.Grid() << " _Mat.RedBlackGrid=" << _Mat.RedBlackGrid() << std::endl; - - _Mat.Meooe(in,tmp); - _Mat.MooeeInv(tmp,out); - _Mat.Meooe(out,tmp); - - //std::cout << "cb in " << in.Checkerboard() << " cb out " << out.Checkerboard() << std::endl; - _Mat.Mooee(in,out); - return axpy_norm(out,-1.0,tmp,out); - } - virtual RealD MpcDag (const Field &in, Field &out){ - Field tmp(in.Grid()); - - _Mat.MeooeDag(in,tmp); - _Mat.MooeeInvDag(tmp,out); - _Mat.MeooeDag(out,tmp); - - _Mat.MooeeDag(in,out); - return axpy_norm(out,-1.0,tmp,out); - } - }; - template - class SchurDiagOneOperator : public SchurOperatorBase { - protected: - Matrix &_Mat; - public: - SchurDiagOneOperator (Matrix &Mat): _Mat(Mat){}; - - virtual RealD Mpc (const Field &in, Field &out) { - Field tmp(in.Grid()); - - _Mat.Meooe(in,out); - _Mat.MooeeInv(out,tmp); - _Mat.Meooe(tmp,out); - _Mat.MooeeInv(out,tmp); - - return axpy_norm(out,-1.0,tmp,in); - } - virtual RealD MpcDag (const Field &in, Field &out){ - Field tmp(in.Grid()); - - _Mat.MooeeInvDag(in,out); - _Mat.MeooeDag(out,tmp); - _Mat.MooeeInvDag(tmp,out); - _Mat.MeooeDag(out,tmp); - - return axpy_norm(out,-1.0,tmp,in); - } - }; - template - class SchurDiagTwoOperator : public SchurOperatorBase { - protected: - Matrix &_Mat; - public: - SchurDiagTwoOperator (Matrix &Mat): _Mat(Mat){}; - - virtual RealD Mpc (const Field &in, Field &out) { - Field tmp(in.Grid()); - - _Mat.MooeeInv(in,out); - _Mat.Meooe(out,tmp); - _Mat.MooeeInv(tmp,out); - _Mat.Meooe(out,tmp); - - return axpy_norm(out,-1.0,tmp,in); - } - virtual RealD MpcDag (const Field &in, Field &out){ - Field tmp(in.Grid()); - - _Mat.MeooeDag(in,out); - _Mat.MooeeInvDag(out,tmp); - _Mat.MeooeDag(tmp,out); - _Mat.MooeeInvDag(out,tmp); - - return axpy_norm(out,-1.0,tmp,in); - } - }; - - template - class NonHermitianSchurOperatorBase : public LinearOperatorBase - { - public: - virtual RealD Mpc (const Field& in, Field& out) = 0; - virtual RealD MpcDag (const Field& in, Field& out) = 0; - virtual void MpcDagMpc(const Field& in, Field& out, RealD& ni, RealD& no) { - Field tmp(in.Grid()); - tmp.Checkerboard() = in.Checkerboard(); - ni = Mpc(in,tmp); - no = MpcDag(tmp,out); - } - virtual void HermOpAndNorm(const Field& in, Field& out, RealD& n1, RealD& n2) { - assert(0); - } - virtual void HermOp(const Field& in, Field& out) { - assert(0); - } - void Op(const Field& in, Field& out) { - Mpc(in, out); - } - void AdjOp(const Field& in, Field& out) { - MpcDag(in, out); - } - // Support for coarsening to a multigrid - void OpDiag(const Field& in, Field& out) { - assert(0); // must coarsen the unpreconditioned system - } - void OpDir(const Field& in, Field& out, int dir, int disp) { - assert(0); - } - }; - - template - class NonHermitianSchurDiagMooeeOperator : public NonHermitianSchurOperatorBase - { - public: - Matrix& _Mat; - NonHermitianSchurDiagMooeeOperator(Matrix& Mat): _Mat(Mat){}; - virtual RealD Mpc(const Field& in, Field& out) { - Field tmp(in.Grid()); - tmp.Checkerboard() = !in.Checkerboard(); - - _Mat.Meooe(in, tmp); - _Mat.MooeeInv(tmp, out); - _Mat.Meooe(out, tmp); - - _Mat.Mooee(in, out); - - return axpy_norm(out, -1.0, tmp, out); - } - virtual RealD MpcDag(const Field& in, Field& out) { - Field tmp(in.Grid()); - - _Mat.MeooeDag(in, tmp); - _Mat.MooeeInvDag(tmp, out); - _Mat.MeooeDag(out, tmp); - - _Mat.MooeeDag(in, out); - - return axpy_norm(out, -1.0, tmp, out); - } - }; - - template - class NonHermitianSchurDiagOneOperator : public NonHermitianSchurOperatorBase - { - protected: - Matrix &_Mat; - - public: - NonHermitianSchurDiagOneOperator (Matrix& Mat): _Mat(Mat){}; - virtual RealD Mpc(const Field& in, Field& out) { - Field tmp(in.Grid()); - - _Mat.Meooe(in, out); - _Mat.MooeeInv(out, tmp); - _Mat.Meooe(tmp, out); - _Mat.MooeeInv(out, tmp); - - return axpy_norm(out, -1.0, tmp, in); - } - virtual RealD MpcDag(const Field& in, Field& out) { - Field tmp(in.Grid()); - - _Mat.MooeeInvDag(in, out); - _Mat.MeooeDag(out, tmp); - _Mat.MooeeInvDag(tmp, out); - _Mat.MeooeDag(out, tmp); - - return axpy_norm(out, -1.0, tmp, in); - } - }; - - template - class NonHermitianSchurDiagTwoOperator : public NonHermitianSchurOperatorBase - { - protected: - Matrix& _Mat; - - public: - NonHermitianSchurDiagTwoOperator(Matrix& Mat): _Mat(Mat){}; - - virtual RealD Mpc(const Field& in, Field& out) { - Field tmp(in.Grid()); - - _Mat.MooeeInv(in, out); - _Mat.Meooe(out, tmp); - _Mat.MooeeInv(tmp, out); - _Mat.Meooe(out, tmp); - - return axpy_norm(out, -1.0, tmp, in); - } - virtual RealD MpcDag(const Field& in, Field& out) { - Field tmp(in.Grid()); - - _Mat.MeooeDag(in, out); - _Mat.MooeeInvDag(out, tmp); - _Mat.MeooeDag(tmp, out); - _Mat.MooeeInvDag(out, tmp); - - return axpy_norm(out, -1.0, tmp, in); - } - }; - - /////////////////////////////////////////////////////////////////////////////////////////////////// - // Left handed Moo^-1 ; (Moo - Moe Mee^-1 Meo) psi = eta --> ( 1 - Moo^-1 Moe Mee^-1 Meo ) psi = Moo^-1 eta - // Right handed Moo^-1 ; (Moo - Moe Mee^-1 Meo) Moo^-1 Moo psi = eta --> ( 1 - Moe Mee^-1 Meo Moo^-1) phi=eta ; psi = Moo^-1 phi - /////////////////////////////////////////////////////////////////////////////////////////////////// - template using SchurDiagOneRH = SchurDiagTwoOperator ; - template using SchurDiagOneLH = SchurDiagOneOperator ; - /////////////////////////////////////////////////////////////////////////////////////////////////// - // Staggered use - /////////////////////////////////////////////////////////////////////////////////////////////////// - template - class SchurStaggeredOperator : public SchurOperatorBase { - protected: - Matrix &_Mat; - Field tmp; - RealD mass; - double tMpc; - double tIP; - double tMeo; - double taxpby_norm; - uint64_t ncall; -public: - void Report(void) - { - std::cout << GridLogMessage << " HermOpAndNorm.Mpc "<< tMpc/ncall<<" usec "< +class SchurOperatorBase : public LinearOperatorBase { + public: + virtual void Mpc (const Field &in, Field &out) =0; + virtual void MpcDag (const Field &in, Field &out) =0; + virtual void MpcDagMpc(const Field &in, Field &out) { + Field tmp(in.Grid()); + tmp.Checkerboard() = in.Checkerboard(); + Mpc(in,tmp); + MpcDag(tmp,out); + } virtual void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){ - ncall++; - tMpc-=usecond(); - n2 = Mpc(in,out); - tMpc+=usecond(); - tIP-=usecond(); - ComplexD dot= innerProduct(in,out); - tIP+=usecond(); - n1 = real(dot); + out.Checkerboard() = in.Checkerboard(); + MpcDagMpc(in,out); + ComplexD dot= innerProduct(in,out); + n1=real(dot); + n2=norm2(out); } virtual void HermOp(const Field &in, Field &out){ - ncall++; - tMpc-=usecond(); - _Mat.Meooe(in,out); - _Mat.Meooe(out,tmp); - tMpc+=usecond(); - taxpby_norm-=usecond(); - axpby(out,-1.0,mass*mass,tmp,in); - taxpby_norm+=usecond(); + out.Checkerboard() = in.Checkerboard(); + MpcDagMpc(in,out); } - virtual RealD Mpc (const Field &in, Field &out) - { + void Op (const Field &in, Field &out){ + Mpc(in,out); + } + void AdjOp (const Field &in, Field &out){ + MpcDag(in,out); + } + // Support for coarsening to a multigrid + void OpDiag (const Field &in, Field &out) { + assert(0); // must coarsen the unpreconditioned system + } + void OpDir (const Field &in, Field &out,int dir,int disp) { + assert(0); + } + void OpDirAll (const Field &in, std::vector &out){ + assert(0); + }; +}; +template + class SchurDiagMooeeOperator : public SchurOperatorBase { + public: + Matrix &_Mat; + SchurDiagMooeeOperator (Matrix &Mat): _Mat(Mat){}; + virtual void Mpc (const Field &in, Field &out) { + Field tmp(in.Grid()); + tmp.Checkerboard() = !in.Checkerboard(); + + _Mat.Meooe(in,tmp); + _Mat.MooeeInv(tmp,out); + _Mat.Meooe(out,tmp); + _Mat.Mooee(in,out); + axpy(out,-1.0,tmp,out); + } + virtual void MpcDag (const Field &in, Field &out){ + Field tmp(in.Grid()); + + _Mat.MeooeDag(in,tmp); + _Mat.MooeeInvDag(tmp,out); + _Mat.MeooeDag(out,tmp); + _Mat.MooeeDag(in,out); + axpy(out,-1.0,tmp,out); + } +}; +template + class SchurDiagOneOperator : public SchurOperatorBase { + protected: + Matrix &_Mat; + public: + SchurDiagOneOperator (Matrix &Mat): _Mat(Mat){}; + + virtual void Mpc (const Field &in, Field &out) { + Field tmp(in.Grid()); + _Mat.Meooe(in,out); + _Mat.MooeeInv(out,tmp); + _Mat.Meooe(tmp,out); + _Mat.MooeeInv(out,tmp); + axpy(out,-1.0,tmp,in); + } + virtual void MpcDag (const Field &in, Field &out){ + Field tmp(in.Grid()); + + _Mat.MooeeInvDag(in,out); + _Mat.MeooeDag(out,tmp); + _Mat.MooeeInvDag(tmp,out); + _Mat.MeooeDag(out,tmp); + axpy(out,-1.0,tmp,in); + } +}; +template + class SchurDiagTwoOperator : public SchurOperatorBase { + protected: + Matrix &_Mat; + public: + SchurDiagTwoOperator (Matrix &Mat): _Mat(Mat){}; + + virtual void Mpc (const Field &in, Field &out) { + Field tmp(in.Grid()); + + _Mat.MooeeInv(in,out); + _Mat.Meooe(out,tmp); + _Mat.MooeeInv(tmp,out); + _Mat.Meooe(out,tmp); + + axpy(out,-1.0,tmp,in); + } + virtual void MpcDag (const Field &in, Field &out){ + Field tmp(in.Grid()); + + _Mat.MeooeDag(in,out); + _Mat.MooeeInvDag(out,tmp); + _Mat.MeooeDag(tmp,out); + _Mat.MooeeInvDag(out,tmp); + + axpy(out,-1.0,tmp,in); + } +}; + +template +class NonHermitianSchurOperatorBase : public LinearOperatorBase +{ + public: + virtual void Mpc (const Field& in, Field& out) = 0; + virtual void MpcDag (const Field& in, Field& out) = 0; + virtual void MpcDagMpc(const Field& in, Field& out) { + Field tmp(in.Grid()); + tmp.Checkerboard() = in.Checkerboard(); + Mpc(in,tmp); + MpcDag(tmp,out); + } + virtual void HermOpAndNorm(const Field& in, Field& out, RealD& n1, RealD& n2) { + assert(0); + } + virtual void HermOp(const Field& in, Field& out) { + assert(0); + } + void Op(const Field& in, Field& out) { + Mpc(in, out); + } + void AdjOp(const Field& in, Field& out) { + MpcDag(in, out); + } + // Support for coarsening to a multigrid + void OpDiag(const Field& in, Field& out) { + assert(0); // must coarsen the unpreconditioned system + } + void OpDir(const Field& in, Field& out, int dir, int disp) { + assert(0); + } + void OpDirAll(const Field& in, std::vector& out){ + assert(0); + }; +}; + +template +class NonHermitianSchurDiagMooeeOperator : public NonHermitianSchurOperatorBase +{ + public: + Matrix& _Mat; + NonHermitianSchurDiagMooeeOperator(Matrix& Mat): _Mat(Mat){}; + virtual void Mpc(const Field& in, Field& out) { + Field tmp(in.Grid()); + tmp.Checkerboard() = !in.Checkerboard(); + + _Mat.Meooe(in, tmp); + _Mat.MooeeInv(tmp, out); + _Mat.Meooe(out, tmp); + + _Mat.Mooee(in, out); + + axpy(out, -1.0, tmp, out); + } + virtual void MpcDag(const Field& in, Field& out) { + Field tmp(in.Grid()); + + _Mat.MeooeDag(in, tmp); + _Mat.MooeeInvDag(tmp, out); + _Mat.MeooeDag(out, tmp); + + _Mat.MooeeDag(in, out); + + axpy(out, -1.0, tmp, out); + } +}; + +template +class NonHermitianSchurDiagOneOperator : public NonHermitianSchurOperatorBase +{ + protected: + Matrix &_Mat; + + public: + NonHermitianSchurDiagOneOperator (Matrix& Mat): _Mat(Mat){}; + virtual void Mpc(const Field& in, Field& out) { + Field tmp(in.Grid()); + + _Mat.Meooe(in, out); + _Mat.MooeeInv(out, tmp); + _Mat.Meooe(tmp, out); + _Mat.MooeeInv(out, tmp); + + axpy(out, -1.0, tmp, in); + } + virtual void MpcDag(const Field& in, Field& out) { + Field tmp(in.Grid()); + + _Mat.MooeeInvDag(in, out); + _Mat.MeooeDag(out, tmp); + _Mat.MooeeInvDag(tmp, out); + _Mat.MeooeDag(out, tmp); + + axpy(out, -1.0, tmp, in); + } +}; + +template +class NonHermitianSchurDiagTwoOperator : public NonHermitianSchurOperatorBase +{ + protected: + Matrix& _Mat; + + public: + NonHermitianSchurDiagTwoOperator(Matrix& Mat): _Mat(Mat){}; + + virtual void Mpc(const Field& in, Field& out) { + Field tmp(in.Grid()); + + _Mat.MooeeInv(in, out); + _Mat.Meooe(out, tmp); + _Mat.MooeeInv(tmp, out); + _Mat.Meooe(out, tmp); + + axpy(out, -1.0, tmp, in); + } + virtual void MpcDag(const Field& in, Field& out) { + Field tmp(in.Grid()); + + _Mat.MeooeDag(in, out); + _Mat.MooeeInvDag(out, tmp); + _Mat.MeooeDag(tmp, out); + _Mat.MooeeInvDag(out, tmp); + + axpy(out, -1.0, tmp, in); + } +}; + +/////////////////////////////////////////////////////////////////////////////////////////////////// +// Left handed Moo^-1 ; (Moo - Moe Mee^-1 Meo) psi = eta --> ( 1 - Moo^-1 Moe Mee^-1 Meo ) psi = Moo^-1 eta +// Right handed Moo^-1 ; (Moo - Moe Mee^-1 Meo) Moo^-1 Moo psi = eta --> ( 1 - Moe Mee^-1 Meo Moo^-1) phi=eta ; psi = Moo^-1 phi +/////////////////////////////////////////////////////////////////////////////////////////////////// +template using SchurDiagOneRH = SchurDiagTwoOperator ; +template using SchurDiagOneLH = SchurDiagOneOperator ; +/////////////////////////////////////////////////////////////////////////////////////////////////// +// Staggered use +/////////////////////////////////////////////////////////////////////////////////////////////////// +template +class SchurStaggeredOperator : public SchurOperatorBase { + protected: + Matrix &_Mat; + Field tmp; + RealD mass; + public: + SchurStaggeredOperator (Matrix &Mat): _Mat(Mat), tmp(_Mat.RedBlackGrid()) + { + assert( _Mat.isTrivialEE() ); + mass = _Mat.Mass(); + } + virtual void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){ + Mpc(in,out); + ComplexD dot= innerProduct(in,out); + n1 = real(dot); + n2 =0.0; + } + virtual void HermOp(const Field &in, Field &out){ + Mpc(in,out); + // _Mat.Meooe(in,out); + // _Mat.Meooe(out,tmp); + // axpby(out,-1.0,mass*mass,tmp,in); + } + virtual void Mpc (const Field &in, Field &out) + { Field tmp(in.Grid()); Field tmp2(in.Grid()); + + // _Mat.Mooee(in,out); + // _Mat.Mooee(out,tmp); - // std::cout << GridLogIterative << " HermOp.Mpc "< using SchurStagOperator = SchurStaggeredOperator; - ///////////////////////////////////////////////////////////// // Base classes for functions of operators ///////////////////////////////////////////////////////////// diff --git a/Grid/algorithms/SparseMatrix.h b/Grid/algorithms/SparseMatrix.h index b959f53c..8a265b3f 100644 --- a/Grid/algorithms/SparseMatrix.h +++ b/Grid/algorithms/SparseMatrix.h @@ -38,16 +38,12 @@ template class SparseMatrixBase { public: virtual GridBase *Grid(void) =0; // Full checkerboar operations - virtual RealD M (const Field &in, Field &out)=0; - virtual RealD Mdag (const Field &in, Field &out)=0; - virtual void MdagM(const Field &in, Field &out,RealD &ni,RealD &no) { - Field tmp (in.Grid()); - ni=M(in,tmp); - no=Mdag(tmp,out); - } + virtual void M (const Field &in, Field &out)=0; + virtual void Mdag (const Field &in, Field &out)=0; virtual void MdagM(const Field &in, Field &out) { - RealD ni, no; - MdagM(in,out,ni,no); + Field tmp (in.Grid()); + M(in,tmp); + Mdag(tmp,out); } virtual void Mdiag (const Field &in, Field &out)=0; virtual void Mdir (const Field &in, Field &out,int dir, int disp)=0; diff --git a/Grid/algorithms/approx/Chebyshev.h b/Grid/algorithms/approx/Chebyshev.h index 133db2b4..584ed1d5 100644 --- a/Grid/algorithms/approx/Chebyshev.h +++ b/Grid/algorithms/approx/Chebyshev.h @@ -234,10 +234,8 @@ public: GridBase *grid=in.Grid(); - // std::cout << "Chevyshef(): in.Grid()="< Bt(siteBlock * nrot); - auto Bp=&Bt[0]; - - // GPU readable copy of Eigen matrix - Vector Qt_jv(Nm*Nm); - double *Qt_p = & Qt_jv[0]; - for(int k=0;k -void basisRotateJ(Field &result,std::vector &basis,Eigen::MatrixXd& Qt,int j, int k0,int k1,int Nm) -{ - GridBase* grid = basis[0].Grid(); - typedef typename Field::vector_object vobj; - typedef decltype(basis[0].View(AcceleratorWrite)) View; - - result.Checkerboard() = basis[0].Checkerboard(); - - autoView(result_v,result, AcceleratorWrite); - Vector basis_v; basis_v.reserve(basis.size()); - View * basis_vp = &basis_v[0]; - - for(int k=0;k Qt_jv(Nm); double * Qt_j = & Qt_jv[0]; - - for(int k=0;koSites(),vobj::Nsimd(),{ - auto B=coalescedRead(basis_vp[k0][ss]); - B=Zero(); - for(int k=k0; k -void basisReorderInPlace(std::vector &_v,std::vector& sort_vals, std::vector& idx) -{ - int vlen = idx.size(); - - assert(vlen>=1); - assert(vlen<=sort_vals.size()); - assert(vlen<=_v.size()); - - for (size_t i=0;ii for which _vnew[j] = _vold[i], - // track the move idx[j] => idx[i] - // track the move idx[i] => i - ////////////////////////////////////// - size_t j; - for (j=i;j i); assert(j!=idx.size()); assert(idx[j]==i); - - swap(_v[i],_v[idx[i]]); // should use vector move constructor, no data copy - std::swap(sort_vals[i],sort_vals[idx[i]]); - - idx[j] = idx[i]; - idx[i] = i; - } - } -} - -inline std::vector basisSortGetIndex(std::vector& sort_vals) -{ - std::vector idx(sort_vals.size()); - std::iota(idx.begin(), idx.end(), 0); - - // sort indexes based on comparing values in v - std::sort(idx.begin(), idx.end(), [&sort_vals](int i1, int i2) { - return ::fabs(sort_vals[i1]) < ::fabs(sort_vals[i2]); - }); - return idx; -} - -template -void basisSortInPlace(std::vector & _v,std::vector& sort_vals, bool reverse) -{ - std::vector idx = basisSortGetIndex(sort_vals); - if (reverse) - std::reverse(idx.begin(), idx.end()); - - basisReorderInPlace(_v,sort_vals,idx); -} - -// PAB: faster to compute the inner products first then fuse loops. -// If performance critical can improve. -template -void basisDeflate(const std::vector &_v,const std::vector& eval,const Field& src_orig,Field& result) { - result = Zero(); - assert(_v.size()==eval.size()); - int N = (int)_v.size(); - for (int i=0;i= heap_size) { std::cout<< " ShmBufferMalloc exceeded shared heap size -- try increasing with --shm flag" < #ifdef GRID_COMMS_SHMEM #include // uses same implementation of communicator #endif + +NAMESPACE_BEGIN(Grid); + +template +auto Cshift(const LatticeUnaryExpression &expr,int dim,int shift) + -> Lattice +{ + return Cshift(closure(expr),dim,shift); +} +template +auto Cshift(const LatticeBinaryExpression &expr,int dim,int shift) + -> Lattice +{ + return Cshift(closure(expr),dim,shift); +} +template +auto Cshift(const LatticeTrinaryExpression &expr,int dim,int shift) + -> Lattice +{ + return Cshift(closure(expr),dim,shift); +} +NAMESPACE_END(Grid); + #endif diff --git a/Grid/lattice/Lattice.h b/Grid/lattice/Lattice.h index 0405f512..a3017198 100644 --- a/Grid/lattice/Lattice.h +++ b/Grid/lattice/Lattice.h @@ -36,7 +36,7 @@ Author: Peter Boyle #include #include #include -#include +//#include #include #include #include @@ -44,4 +44,4 @@ Author: Peter Boyle #include #include #include - +#include diff --git a/Grid/lattice/Lattice_ET.h b/Grid/lattice/Lattice_ET.h index 733addf8..91b456d9 100644 --- a/Grid/lattice/Lattice_ET.h +++ b/Grid/lattice/Lattice_ET.h @@ -9,6 +9,7 @@ Copyright (C) 2015 Author: Azusa Yamaguchi Author: Peter Boyle Author: neo +Author: Christoph Lehner &arg) template accelerator_inline const lobj & eval(const uint64_t ss, const Lattice &arg) { - auto view = arg.View(); + auto view = arg.View(AcceleratorRead); return view[ss]; } #endif diff --git a/Grid/lattice/Lattice_arith.h b/Grid/lattice/Lattice_arith.h index c204af5c..a3ae1f28 100644 --- a/Grid/lattice/Lattice_arith.h +++ b/Grid/lattice/Lattice_arith.h @@ -7,6 +7,7 @@ Copyright (C) 2015 Author: Peter Boyle +Author: Christoph Lehner This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/Grid/lattice/Lattice_base.h b/Grid/lattice/Lattice_base.h index 65f71441..907dc5e2 100644 --- a/Grid/lattice/Lattice_base.h +++ b/Grid/lattice/Lattice_base.h @@ -9,6 +9,7 @@ Copyright (C) 2015 Author: Azusa Yamaguchi Author: Peter Boyle Author: paboyle +Author: Christoph Lehner This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -91,6 +92,7 @@ public: // The view is trivially copy constructible and may be copied to an accelerator device // in device lambdas ///////////////////////////////////////////////////////////////////////////////// + LatticeView View (ViewMode mode) const { LatticeView accessor(*( (LatticeAccelerator *) this),mode); diff --git a/Grid/lattice/Lattice_basis.h b/Grid/lattice/Lattice_basis.h new file mode 100644 index 00000000..9f1155eb --- /dev/null +++ b/Grid/lattice/Lattice_basis.h @@ -0,0 +1,226 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/lattice/Lattice_basis.h + +Copyright (C) 2015 + +Author: Peter Boyle +Author: paboyle +Author: Christoph Lehner + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ + /* END LEGAL */ + +#pragma once + +NAMESPACE_BEGIN(Grid); + +template +void basisOrthogonalize(std::vector &basis,Field &w,int k) +{ + // If assume basis[j] are already orthonormal, + // can take all inner products in parallel saving 2x bandwidth + // Save 3x bandwidth on the second line of loop. + // perhaps 2.5x speed up. + // 2x overall in Multigrid Lanczos + for(int j=0; j +void basisRotate(VField &basis,Matrix& Qt,int j0, int j1, int k0,int k1,int Nm) +{ + typedef decltype(basis[0]) Field; + typedef decltype(basis[0].View(AcceleratorRead)) View; + + Vector basis_v; basis_v.reserve(basis.size()); + GridBase* grid = basis[0].Grid(); + + for(int k=0;koSites(); + uint64_t siteBlock=(grid->oSites()+nrot-1)/nrot; // Maximum 1 additional vector overhead + + typedef typename std::remove_reference::type vobj; + + Vector Bt(siteBlock * nrot); + auto Bp=&Bt[0]; + + // GPU readable copy of matrix + Vector Qt_jv(Nm*Nm); + double *Qt_p = & Qt_jv[0]; + thread_for(i,Nm*Nm,{ + int j = i/Nm; + int k = i%Nm; + Qt_p[i]=Qt(j,k); + }); + + // Block the loop to keep storage footprint down + for(uint64_t s=0;s +void basisRotateJ(Field &result,std::vector &basis,Eigen::MatrixXd& Qt,int j, int k0,int k1,int Nm) +{ + typedef decltype(basis[0].View(AcceleratorRead)) View; + typedef typename Field::vector_object vobj; + GridBase* grid = basis[0].Grid(); + + result.Checkerboard() = basis[0].Checkerboard(); + + Vector basis_v; basis_v.reserve(basis.size()); + for(int k=0;k Qt_jv(Nm); + double * Qt_j = & Qt_jv[0]; + for(int k=0;koSites(),vobj::Nsimd(),{ + auto B=coalescedRead(zz); + for(int k=k0; k +void basisReorderInPlace(std::vector &_v,std::vector& sort_vals, std::vector& idx) +{ + int vlen = idx.size(); + + assert(vlen>=1); + assert(vlen<=sort_vals.size()); + assert(vlen<=_v.size()); + + for (size_t i=0;ii for which _vnew[j] = _vold[i], + // track the move idx[j] => idx[i] + // track the move idx[i] => i + ////////////////////////////////////// + size_t j; + for (j=i;j i); assert(j!=idx.size()); assert(idx[j]==i); + + swap(_v[i],_v[idx[i]]); // should use vector move constructor, no data copy + std::swap(sort_vals[i],sort_vals[idx[i]]); + + idx[j] = idx[i]; + idx[i] = i; + } + } +} + +inline std::vector basisSortGetIndex(std::vector& sort_vals) +{ + std::vector idx(sort_vals.size()); + std::iota(idx.begin(), idx.end(), 0); + + // sort indexes based on comparing values in v + std::sort(idx.begin(), idx.end(), [&sort_vals](int i1, int i2) { + return ::fabs(sort_vals[i1]) < ::fabs(sort_vals[i2]); + }); + return idx; +} + +template +void basisSortInPlace(std::vector & _v,std::vector& sort_vals, bool reverse) +{ + std::vector idx = basisSortGetIndex(sort_vals); + if (reverse) + std::reverse(idx.begin(), idx.end()); + + basisReorderInPlace(_v,sort_vals,idx); +} + +// PAB: faster to compute the inner products first then fuse loops. +// If performance critical can improve. +template +void basisDeflate(const std::vector &_v,const std::vector& eval,const Field& src_orig,Field& result) { + result = Zero(); + assert(_v.size()==eval.size()); + int N = (int)_v.size(); + for (int i=0;i inline Lattice adj(const Lattice &lhs){ Lattice ret(lhs.Grid()); + autoView( lhs_v, lhs, AcceleratorRead); autoView( ret_v, ret, AcceleratorWrite); + + ret.Checkerboard()=lhs.Checkerboard(); accelerator_for( ss, lhs_v.size(), vobj::Nsimd(), { coalescedWrite(ret_v[ss], adj(lhs_v(ss))); }); @@ -50,8 +53,11 @@ template inline Lattice adj(const Lattice &lhs){ template inline Lattice conjugate(const Lattice &lhs){ Lattice ret(lhs.Grid()); + autoView( lhs_v, lhs, AcceleratorRead); autoView( ret_v, ret, AcceleratorWrite); + + ret.Checkerboard() = lhs.Checkerboard(); accelerator_for( ss, lhs_v.size(), vobj::Nsimd(), { coalescedWrite( ret_v[ss] , conjugate(lhs_v(ss))); }); diff --git a/Grid/lattice/Lattice_reduction.h b/Grid/lattice/Lattice_reduction.h index 16742947..67709c94 100644 --- a/Grid/lattice/Lattice_reduction.h +++ b/Grid/lattice/Lattice_reduction.h @@ -5,6 +5,7 @@ Author: Azusa Yamaguchi Author: Peter Boyle Author: paboyle +Author: Christoph Lehner This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or @@ -64,6 +65,37 @@ inline typename vobj::scalar_object sum_cpu(const vobj *arg, Integer osites) return ssum; } +template +inline typename vobj::scalar_objectD sumD_cpu(const vobj *arg, Integer osites) +{ + typedef typename vobj::scalar_objectD sobj; + + const int nthread = GridThread::GetThreads(); + + Vector sumarray(nthread); + for(int i=0;i inline typename vobj::scalar_object sum(const vobj *arg, Integer osites) @@ -74,6 +106,15 @@ inline typename vobj::scalar_object sum(const vobj *arg, Integer osites) return sum_cpu(arg,osites); #endif } +template +inline typename vobj::scalar_objectD sumD(const vobj *arg, Integer osites) +{ +#if defined(GRID_CUDA)||defined(GRID_HIP) + return sumD_gpu(arg,osites); +#else + return sumD_cpu(arg,osites); +#endif +} template inline typename vobj::scalar_object sum(const Lattice &arg) @@ -101,44 +142,49 @@ template inline RealD norm2(const Lattice &arg){ // Double inner product template -inline ComplexD innerProduct(const Lattice &left,const Lattice &right) +inline ComplexD rankInnerProduct(const Lattice &left,const Lattice &right) { typedef typename vobj::scalar_type scalar_type; typedef typename vobj::vector_typeD vector_type; ComplexD nrm; GridBase *grid = left.Grid(); - + const uint64_t nsimd = grid->Nsimd(); const uint64_t sites = grid->oSites(); // Might make all code paths go this way. - autoView( left_v , left, AcceleratorRead); - autoView( right_v,right, AcceleratorRead); - - // GPU - SIMT lane compliance... - typedef decltype(innerProduct(left_v[0],right_v[0])) inner_t; + typedef decltype(innerProduct(vobj(),vobj())) inner_t; Vector inner_tmp(sites); auto inner_tmp_v = &inner_tmp[0]; + + { + autoView( left_v , left, AcceleratorRead); + autoView( right_v,right, AcceleratorRead); - accelerator_for( ss, sites, nsimd,{ - auto x_l = left_v(ss); - auto y_l = right_v(ss); - coalescedWrite(inner_tmp_v[ss],innerProduct(x_l,y_l)); - }) + // GPU - SIMT lane compliance... + accelerator_for( ss, sites, nsimd,{ + auto x_l = left_v(ss); + auto y_l = right_v(ss); + coalescedWrite(inner_tmp_v[ss],innerProduct(x_l,y_l)); + }) + } // This is in single precision and fails some tests // Need a sumD that sums in double -#if defined(GRID_CUDA)||defined(GRID_HIP) - nrm = TensorRemove(sumD_gpu(inner_tmp_v,sites)); -#else - nrm = TensorRemove(sum_cpu(inner_tmp_v,sites)); -#endif - grid->GlobalSum(nrm); - + nrm = TensorRemove(sumD(inner_tmp_v,sites)); return nrm; } +template +inline ComplexD innerProduct(const Lattice &left,const Lattice &right) { + GridBase *grid = left.Grid(); + ComplexD nrm = rankInnerProduct(left,right); + grid->GlobalSum(nrm); + return nrm; +} + + ///////////////////////// // Fast axpby_norm // z = a x + b y @@ -181,17 +227,51 @@ axpby_norm_fast(Lattice &z,sobj a,sobj b,const Lattice &x,const Latt coalescedWrite(inner_tmp_v[ss],innerProduct(tmp,tmp)); coalescedWrite(z_v[ss],tmp); }); -#if defined(GRID_CUDA)||defined(GRID_HIP) - nrm = real(TensorRemove(sumD_gpu(inner_tmp_v,sites))); -#else - // Already promoted to double - nrm = real(TensorRemove(sum(inner_tmp_v,sites))); -#endif + nrm = real(TensorRemove(sumD(inner_tmp_v,sites))); grid->GlobalSum(nrm); return nrm; } - +template strong_inline void +innerProductNorm(ComplexD& ip, RealD &nrm, const Lattice &left,const Lattice &right) +{ + conformable(left,right); + + typedef typename vobj::scalar_type scalar_type; + typedef typename vobj::vector_typeD vector_type; + Vector tmp(2); + + GridBase *grid = left.Grid(); + + + const uint64_t nsimd = grid->Nsimd(); + const uint64_t sites = grid->oSites(); + + // GPU + typedef decltype(innerProduct(vobj(),vobj())) inner_t; + typedef decltype(innerProduct(vobj(),vobj())) norm_t; + Vector inner_tmp(sites); + Vector norm_tmp(sites); + auto inner_tmp_v = &inner_tmp[0]; + auto norm_tmp_v = &norm_tmp[0]; + { + autoView(left_v,left, AcceleratorRead); + autoView(right_v,right,AcceleratorRead); + accelerator_for( ss, sites, nsimd,{ + auto left_tmp = left_v(ss); + coalescedWrite(inner_tmp_v[ss],innerProduct(left_tmp,right_v(ss))); + coalescedWrite(norm_tmp_v[ss],innerProduct(left_tmp,left_tmp)); + }); + } + + tmp[0] = TensorRemove(sumD(inner_tmp_v,sites)); + tmp[1] = TensorRemove(sumD(norm_tmp_v,sites)); + + grid->GlobalSumVector(&tmp[0],2); // keep norm Complex -> can use GlobalSumVector + ip = tmp[0]; + nrm = real(tmp[1]); +} + template inline auto sum(const LatticeUnaryExpression & expr) ->typename decltype(expr.op.func(eval(0,expr.arg1)))::scalar_object diff --git a/Grid/lattice/Lattice_trace.h b/Grid/lattice/Lattice_trace.h index b5d80ccc..5fe5173d 100644 --- a/Grid/lattice/Lattice_trace.h +++ b/Grid/lattice/Lattice_trace.h @@ -37,6 +37,7 @@ NAMESPACE_BEGIN(Grid); //////////////////////////////////////////////////////////////////////////////////////////////////// // Trace //////////////////////////////////////////////////////////////////////////////////////////////////// +/* template inline auto trace(const Lattice &lhs) -> Lattice { @@ -48,6 +49,7 @@ inline auto trace(const Lattice &lhs) -> Lattice }); return ret; }; +*/ //////////////////////////////////////////////////////////////////////////////////////////////////// // Trace Index level dependent operation diff --git a/Grid/lattice/Lattice_transfer.h b/Grid/lattice/Lattice_transfer.h index 7362060a..44d7674f 100644 --- a/Grid/lattice/Lattice_transfer.h +++ b/Grid/lattice/Lattice_transfer.h @@ -6,6 +6,7 @@ Copyright (C) 2015 Author: Peter Boyle +Author: Christoph Lehner This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -83,32 +84,138 @@ template inline void setCheckerboard(Lattice &full,const Latti } }); } - -template + +//////////////////////////////////////////////////////////////////////////////////////////// +// Flexible Type Conversion for internal promotion to double as well as graceful +// treatment of scalar-compatible types +//////////////////////////////////////////////////////////////////////////////////////////// +accelerator_inline void convertType(ComplexD & out, const std::complex & in) { + out = in; +} + +accelerator_inline void convertType(ComplexF & out, const std::complex & in) { + out = in; +} + +#ifdef GRID_SIMT +accelerator_inline void convertType(vComplexF & out, const ComplexF & in) { + ((ComplexF*)&out)[SIMTlane(vComplexF::Nsimd())] = in; +} +accelerator_inline void convertType(vComplexD & out, const ComplexD & in) { + ((ComplexD*)&out)[SIMTlane(vComplexD::Nsimd())] = in; +} +accelerator_inline void convertType(vComplexD2 & out, const ComplexD & in) { + ((ComplexD*)&out)[SIMTlane(vComplexD::Nsimd()*2)] = in; +} +#endif + +accelerator_inline void convertType(vComplexF & out, const vComplexD2 & in) { + out.v = Optimization::PrecisionChange::DtoS(in._internal[0].v,in._internal[1].v); +} + +accelerator_inline void convertType(vComplexD2 & out, const vComplexF & in) { + Optimization::PrecisionChange::StoD(in.v,out._internal[0].v,out._internal[1].v); +} + +template + accelerator_inline void convertType(iMatrix & out, const iMatrix & in); +template + accelerator_inline void convertType(iVector & out, const iVector & in); + +template::value, T1>::type* = nullptr> +accelerator_inline void convertType(T1 & out, const iScalar & in) { + convertType(out,in._internal); +} + +template +accelerator_inline void convertType(iScalar & out, const T2 & in) { + convertType(out._internal,in); +} + +template +accelerator_inline void convertType(iMatrix & out, const iMatrix & in) { + for (int i=0;i +accelerator_inline void convertType(iVector & out, const iVector & in) { + for (int i=0;i::value, T>::type* = nullptr> +accelerator_inline void convertType(T & out, const T & in) { + out = in; +} + +template +accelerator_inline void convertType(Lattice & out, const Lattice & in) { + autoView( out_v , out,AcceleratorWrite); + autoView( in_v , in ,AcceleratorRead); + accelerator_for(ss,out_v.size(),T1::Nsimd(),{ + convertType(out_v[ss],in_v(ss)); + }); +} + +//////////////////////////////////////////////////////////////////////////////////////////// +// precision-promoted local inner product +//////////////////////////////////////////////////////////////////////////////////////////// +template +inline auto localInnerProductD(const Lattice &lhs,const Lattice &rhs) +-> Lattice> +{ + autoView( lhs_v , lhs, AcceleratorRead); + autoView( rhs_v , rhs, AcceleratorRead); + + typedef decltype(TensorRemove(innerProductD2(lhs_v[0],rhs_v[0]))) t_inner; + Lattice> ret(lhs.Grid()); + + { + autoView(ret_v, ret,AcceleratorWrite); + accelerator_for(ss,rhs_v.size(),vobj::Nsimd(),{ + convertType(ret_v[ss],innerProductD2(lhs_v(ss),rhs_v(ss))); + }); + } + return ret; +} + +//////////////////////////////////////////////////////////////////////////////////////////// +// block routines +//////////////////////////////////////////////////////////////////////////////////////////// +template inline void blockProject(Lattice > &coarseData, - const Lattice &fineData, - const std::vector > &Basis) + const Lattice &fineData, + const VLattice &Basis) { GridBase * fine = fineData.Grid(); GridBase * coarse= coarseData.Grid(); - Lattice ip(coarse); + Lattice> ip(coarse); + Lattice fineDataRed = fineData; autoView( coarseData_ , coarseData, AcceleratorWrite); autoView( ip_ , ip, AcceleratorWrite); for(int v=0;v accelerator_for( sc, coarse->oSites(), vobj::Nsimd(), { - coalescedWrite(coarseData_[sc](v),ip_(sc)); + convertType(coarseData_[sc](v),ip_[sc]); }); + + // improve numerical stability of projection + // |fine> = |fine> - |basis> + ip=-ip; + blockZAXPY(fineDataRed,ip,Basis[v],fineDataRed); } } -template -inline void blockZAXPY(Lattice &fineZ, - const Lattice &coarseA, - const Lattice &fineX, - const Lattice &fineY) + +template + inline void blockZAXPY(Lattice &fineZ, + const Lattice &coarseA, + const Lattice &fineX, + const Lattice &fineY) { GridBase * fine = fineZ.Grid(); GridBase * coarse= coarseA.Grid(); @@ -120,7 +227,7 @@ inline void blockZAXPY(Lattice &fineZ, conformable(fineX,fineZ); int _ndimension = coarse->_ndimension; - + Coordinate block_r (_ndimension); // FIXME merge with subdivide checking routine as this is redundant @@ -135,23 +242,60 @@ inline void blockZAXPY(Lattice &fineZ, autoView( coarseA_, coarseA, AcceleratorRead); accelerator_for(sf, fine->oSites(), CComplex::Nsimd(), { - - int sc; - Coordinate coor_c(_ndimension); - Coordinate coor_f(_ndimension); - Lexicographic::CoorFromIndex(coor_f,sf,fine->_rdimensions); - for(int d=0;d<_ndimension;d++) coor_c[d]=coor_f[d]/block_r[d]; - Lexicographic::IndexFromCoor(coor_c,sc,coarse->_rdimensions); + int sc; + Coordinate coor_c(_ndimension); + Coordinate coor_f(_ndimension); - // z = A x + y - coalescedWrite(fineZ_[sf],coarseA_(sc)*fineX_(sf)+fineY_(sf)); + Lexicographic::CoorFromIndex(coor_f,sf,fine->_rdimensions); + for(int d=0;d<_ndimension;d++) coor_c[d]=coor_f[d]/block_r[d]; + Lexicographic::IndexFromCoor(coor_c,sc,coarse->_rdimensions); - }); + // z = A x + y +#ifdef GRID_SIMT + typename vobj2::tensor_reduced::scalar_object cA; + typename vobj::scalar_object cAx; +#else + typename vobj2::tensor_reduced cA; + vobj cAx; +#endif + convertType(cA,TensorRemove(coarseA_(sc))); + auto prod = cA*fineX_(sf); + convertType(cAx,prod); + coalescedWrite(fineZ_[sf],cAx+fineY_(sf)); + + }); return; } + template + inline void blockInnerProductD(Lattice &CoarseInner, + const Lattice &fineX, + const Lattice &fineY) +{ + typedef iScalar dotp; + + GridBase *coarse(CoarseInner.Grid()); + GridBase *fine (fineX.Grid()); + + Lattice fine_inner(fine); fine_inner.Checkerboard() = fineX.Checkerboard(); + Lattice coarse_inner(coarse); + + // Precision promotion + fine_inner = localInnerProductD(fineX,fineY); + blockSum(coarse_inner,fine_inner); + { + autoView( CoarseInner_ , CoarseInner,AcceleratorWrite); + autoView( coarse_inner_ , coarse_inner,AcceleratorRead); + accelerator_for(ss, coarse->oSites(), 1, { + convertType(CoarseInner_[ss], TensorRemove(coarse_inner_[ss])); + }); + } + +} + +template // deprecate inline void blockInnerProduct(Lattice &CoarseInner, const Lattice &fineX, const Lattice &fineY) @@ -167,12 +311,15 @@ inline void blockInnerProduct(Lattice &CoarseInner, // Precision promotion? fine_inner = localInnerProduct(fineX,fineY); blockSum(coarse_inner,fine_inner); - autoView( CoarseInner_ , CoarseInner, AcceleratorWrite); - autoView( coarse_inner_ , coarse_inner, AcceleratorRead); - accelerator_for(ss, coarse->oSites(), 1, { - CoarseInner_[ss] = coarse_inner_[ss]; - }); + { + autoView( CoarseInner_ , CoarseInner, AcceleratorWrite); + autoView( coarse_inner_ , coarse_inner, AcceleratorRead); + accelerator_for(ss, coarse->oSites(), 1, { + CoarseInner_[ss] = coarse_inner_[ss]; + }); + } } + template inline void blockNormalise(Lattice &ip,Lattice &fineX) { @@ -185,7 +332,7 @@ inline void blockNormalise(Lattice &ip,Lattice &fineX) // useful in multigrid project; // Generic name : Coarsen? template -inline void blockSum(Lattice &coarseData,const Lattice &fineData) +inline void blockSum(Lattice &coarseData,const Lattice &fineData) { GridBase * fine = fineData.Grid(); GridBase * coarse= coarseData.Grid(); @@ -193,9 +340,9 @@ inline void blockSum(Lattice &coarseData,const Lattice &fineData) subdivides(coarse,fine); // require they map int _ndimension = coarse->_ndimension; - + Coordinate block_r (_ndimension); - + for(int d=0 ; d<_ndimension;d++){ block_r[d] = fine->_rdimensions[d] / coarse->_rdimensions[d]; } @@ -208,27 +355,28 @@ inline void blockSum(Lattice &coarseData,const Lattice &fineData) accelerator_for(sc,coarse->oSites(),1,{ - // One thread per sub block - Coordinate coor_c(_ndimension); - Lexicographic::CoorFromIndex(coor_c,sc,coarse->_rdimensions); // Block coordinate - coarseData_[sc]=Zero(); + // One thread per sub block + Coordinate coor_c(_ndimension); + Lexicographic::CoorFromIndex(coor_c,sc,coarse->_rdimensions); // Block coordinate + coarseData_[sc]=Zero(); - for(int sb=0;sb_rdimensions); + for(int sb=0;sb_rdimensions); - }); + coarseData_[sc]=coarseData_[sc]+fineData_[sf]; + } + + }); return; } + template inline void blockPick(GridBase *coarse,const Lattice &unpicked,Lattice &picked,Coordinate coor) { @@ -250,8 +398,8 @@ inline void blockPick(GridBase *coarse,const Lattice &unpicked,Lattice -inline void blockOrthogonalise(Lattice &ip,std::vector > &Basis) +template +inline void blockOrthonormalize(Lattice &ip,VLattice &Basis) { GridBase *coarse = ip.Grid(); GridBase *fine = Basis[0].Grid(); @@ -259,23 +407,30 @@ inline void blockOrthogonalise(Lattice &ip,std::vector > int nbasis = Basis.size() ; // checks - subdivides(coarse,fine); + subdivides(coarse,fine); for(int i=0;i (Basis[v],ip,Basis[u],Basis[v]); + blockZAXPY(Basis[v],ip,Basis[u],Basis[v]); } blockNormalise(ip,Basis[v]); } } +template +inline void blockOrthogonalise(Lattice &ip,std::vector > &Basis) // deprecated inaccurate naming +{ + blockOrthonormalize(ip,Basis); +} + #if 0 +// TODO: CPU optimized version here template inline void blockPromote(const Lattice > &coarseData, Lattice &fineData, @@ -320,17 +475,17 @@ inline void blockPromote(const Lattice > &coarseData, } #else -template +template inline void blockPromote(const Lattice > &coarseData, Lattice &fineData, - const std::vector > &Basis) + const VLattice &Basis) { GridBase * fine = fineData.Grid(); GridBase * coarse= coarseData.Grid(); - fineData=Zero(); for(int i=0;i > ip = PeekIndex<0>(coarseData,i); + Lattice cip(coarse); autoView( cip_ , cip, AcceleratorWrite); autoView( ip_ , ip, AcceleratorRead); @@ -407,6 +562,7 @@ void localCopyRegion(const Lattice &From,Lattice & To,Coordinate Fro Coordinate rdt = Tg->_rdimensions; Coordinate ist = Tg->_istride; Coordinate ost = Tg->_ostride; + autoView( t_v , To, AcceleratorWrite); autoView( f_v , From, AcceleratorRead); accelerator_for(idx,Fg->lSites(),1,{ diff --git a/Grid/lattice/Lattice_transpose.h b/Grid/lattice/Lattice_transpose.h index b11175dd..adfe3380 100644 --- a/Grid/lattice/Lattice_transpose.h +++ b/Grid/lattice/Lattice_transpose.h @@ -38,6 +38,7 @@ NAMESPACE_BEGIN(Grid); //////////////////////////////////////////////////////////////////////////////////////////////////// // Transpose //////////////////////////////////////////////////////////////////////////////////////////////////// +/* template inline Lattice transpose(const Lattice &lhs){ Lattice ret(lhs.Grid()); @@ -48,7 +49,8 @@ inline Lattice transpose(const Lattice &lhs){ }); return ret; }; - +*/ + //////////////////////////////////////////////////////////////////////////////////////////////////// // Index level dependent transpose //////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/Grid/parallelIO/BinaryIO.h b/Grid/parallelIO/BinaryIO.h index f90c34a9..1f11add9 100644 --- a/Grid/parallelIO/BinaryIO.h +++ b/Grid/parallelIO/BinaryIO.h @@ -341,7 +341,7 @@ class BinaryIO { int ieee32big = (format == std::string("IEEE32BIG")); int ieee32 = (format == std::string("IEEE32")); int ieee64big = (format == std::string("IEEE64BIG")); - int ieee64 = (format == std::string("IEEE64")); + int ieee64 = (format == std::string("IEEE64") || format == std::string("IEEE64LITTLE")); assert(ieee64||ieee32|ieee64big||ieee32big); assert((ieee64+ieee32+ieee64big+ieee32big)==1); ////////////////////////////////////////////////////////////////////////////// diff --git a/Grid/parallelIO/MetaData.h b/Grid/parallelIO/MetaData.h index 2e211838..4c1cfbdb 100644 --- a/Grid/parallelIO/MetaData.h +++ b/Grid/parallelIO/MetaData.h @@ -301,6 +301,30 @@ struct GaugeSimpleUnmunger { }; }; +template +struct GaugeDoubleStoredMunger{ + void operator()(fobj &in, sobj &out) { + for (int mu = 0; mu < Nds; mu++) { + for (int i = 0; i < Nc; i++) { + for (int j = 0; j < Nc; j++) { + out(mu)()(i, j) = in(mu)()(i, j); + }} + } + }; +}; + +template +struct GaugeDoubleStoredUnmunger { + void operator()(sobj &in, fobj &out) { + for (int mu = 0; mu < Nds; mu++) { + for (int i = 0; i < Nc; i++) { + for (int j = 0; j < Nc; j++) { + out(mu)()(i, j) = in(mu)()(i, j); + }} + } + }; +}; + template struct Gauge3x2munger{ void operator() (fobj &in,sobj &out){ diff --git a/Grid/parallelIO/NerscIO.h b/Grid/parallelIO/NerscIO.h index d3b62d1f..5522ba91 100644 --- a/Grid/parallelIO/NerscIO.h +++ b/Grid/parallelIO/NerscIO.h @@ -146,7 +146,7 @@ public: int ieee32big = (format == std::string("IEEE32BIG")); int ieee32 = (format == std::string("IEEE32")); int ieee64big = (format == std::string("IEEE64BIG")); - int ieee64 = (format == std::string("IEEE64")); + int ieee64 = (format == std::string("IEEE64") || format == std::string("IEEE64LITTLE")); uint32_t nersc_csum,scidac_csuma,scidac_csumb; // depending on datatype, set up munger; diff --git a/Grid/parallelIO/OpenQcdIO.h b/Grid/parallelIO/OpenQcdIO.h new file mode 100644 index 00000000..00911595 --- /dev/null +++ b/Grid/parallelIO/OpenQcdIO.h @@ -0,0 +1,224 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/parallelIO/OpenQcdIO.h + +Copyright (C) 2015 - 2020 + +Author: Daniel Richtmann + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#pragma once + +NAMESPACE_BEGIN(Grid); + +struct OpenQcdHeader : Serializable { + GRID_SERIALIZABLE_CLASS_MEMBERS(OpenQcdHeader, + int, Nt, + int, Nx, + int, Ny, + int, Nz, + double, plaq); +}; + +class OpenQcdIO : public BinaryIO { +public: + static constexpr double normalisationFactor = Nc; // normalisation difference: grid 18, openqcd 6 + + static inline int readHeader(std::string file, GridBase* grid, FieldMetaData& field) { + OpenQcdHeader header; + + { + std::ifstream fin(file, std::ios::in | std::ios::binary); + fin.read(reinterpret_cast(&header), sizeof(OpenQcdHeader)); + assert(!fin.fail()); + field.data_start = fin.tellg(); + fin.close(); + } + + header.plaq /= normalisationFactor; + + // sanity check (should trigger on endian issues) + assert(0 < header.Nt && header.Nt <= 1024); + assert(0 < header.Nx && header.Nx <= 1024); + assert(0 < header.Ny && header.Ny <= 1024); + assert(0 < header.Nz && header.Nz <= 1024); + + field.dimension[0] = header.Nx; + field.dimension[1] = header.Ny; + field.dimension[2] = header.Nz; + field.dimension[3] = header.Nt; + + std::cout << GridLogDebug << "header: " << header << std::endl; + std::cout << GridLogDebug << "grid dimensions: " << grid->_fdimensions << std::endl; + std::cout << GridLogDebug << "file dimensions: " << field.dimension << std::endl; + + assert(grid->_ndimension == Nd); + for(int d = 0; d < Nd; d++) + assert(grid->_fdimensions[d] == field.dimension[d]); + + field.plaquette = header.plaq; + + return field.data_start; + } + + template + static inline void readConfiguration(Lattice>& Umu, + FieldMetaData& header, + std::string file) { + typedef Lattice> DoubleStoredGaugeField; + + assert(Ns == 4 and Nd == 4 and Nc == 3); + + auto grid = dynamic_cast(Umu.Grid()); + assert(grid != nullptr); assert(grid->_ndimension == Nd); + + uint64_t offset = readHeader(file, Umu.Grid(), header); + + FieldMetaData clone(header); + + std::string format("IEEE64"); // they always store little endian double precsision + uint32_t nersc_csum, scidac_csuma, scidac_csumb; + + GridCartesian* grid_openqcd = createOpenQcdGrid(grid); + GridRedBlackCartesian* grid_rb = SpaceTimeGrid::makeFourDimRedBlackGrid(grid); + + typedef DoubleStoredColourMatrixD fobj; + typedef typename DoubleStoredGaugeField::vector_object::scalar_object sobj; + typedef typename DoubleStoredGaugeField::vector_object::Realified::scalar_type word; + + word w = 0; + + std::vector iodata(grid_openqcd->lSites()); // Munge, checksum, byte order in here + std::vector scalardata(grid->lSites()); + + IOobject(w, grid_openqcd, iodata, file, offset, format, BINARYIO_READ | BINARYIO_LEXICOGRAPHIC, + nersc_csum, scidac_csuma, scidac_csumb); + + GridStopWatch timer; + timer.Start(); + + DoubleStoredGaugeField Umu_ds(grid); + + auto munge = GaugeDoubleStoredMunger(); + + Coordinate ldim = grid->LocalDimensions(); + thread_for(idx_g, grid->lSites(), { + Coordinate coor; + grid->LocalIndexToLocalCoor(idx_g, coor); + + bool isOdd = grid_rb->CheckerBoard(coor) == Odd; + + if(!isOdd) continue; + + int idx_o = (coor[Tdir] * ldim[Xdir] * ldim[Ydir] * ldim[Zdir] + + coor[Xdir] * ldim[Ydir] * ldim[Zdir] + + coor[Ydir] * ldim[Zdir] + + coor[Zdir])/2; + + munge(iodata[idx_o], scalardata[idx_g]); + }); + + grid->Barrier(); timer.Stop(); + std::cout << Grid::GridLogMessage << "OpenQcdIO::readConfiguration: munge overhead " << timer.Elapsed() << std::endl; + + timer.Reset(); timer.Start(); + + vectorizeFromLexOrdArray(scalardata, Umu_ds); + + grid->Barrier(); timer.Stop(); + std::cout << Grid::GridLogMessage << "OpenQcdIO::readConfiguration: vectorize overhead " << timer.Elapsed() << std::endl; + + timer.Reset(); timer.Start(); + + undoDoubleStore(Umu, Umu_ds); + + grid->Barrier(); timer.Stop(); + std::cout << Grid::GridLogMessage << "OpenQcdIO::readConfiguration: redistribute overhead " << timer.Elapsed() << std::endl; + + GaugeStatistics(Umu, clone); + + RealD plaq_diff = fabs(clone.plaquette - header.plaquette); + + // clang-format off + std::cout << GridLogMessage << "OpenQcd Configuration " << file + << " plaquette " << clone.plaquette + << " header " << header.plaquette + << " difference " << plaq_diff + << std::endl; + // clang-format on + + RealD precTol = (getPrecision::value == 1) ? 2e-7 : 2e-15; + RealD tol = precTol * std::sqrt(grid->_Nprocessors); // taken from RQCD chroma code + + if(plaq_diff >= tol) + std::cout << " Plaquette mismatch (diff = " << plaq_diff << ", tol = " << tol << ")" << std::endl; + assert(plaq_diff < tol); + + std::cout << GridLogMessage << "OpenQcd Configuration " << file << " and plaquette agree" << std::endl; + } + + template + static inline void writeConfiguration(Lattice>& Umu, + std::string file) { + std::cout << GridLogError << "Writing to openQCD file format is not implemented" << std::endl; + exit(EXIT_FAILURE); + } + +private: + static inline GridCartesian* createOpenQcdGrid(GridCartesian* grid) { + // exploit GridCartesian to be able to still use IOobject + Coordinate gdim = grid->GlobalDimensions(); + Coordinate ldim = grid->LocalDimensions(); + Coordinate pcoor = grid->ThisProcessorCoor(); + + // openqcd does rb on the z direction + gdim[Zdir] /= 2; + ldim[Zdir] /= 2; + + // and has the order T X Y Z (from slowest to fastest) + std::swap(gdim[Xdir], gdim[Zdir]); + std::swap(ldim[Xdir], ldim[Zdir]); + std::swap(pcoor[Xdir], pcoor[Zdir]); + + GridCartesian* ret = SpaceTimeGrid::makeFourDimGrid(gdim, grid->_simd_layout, grid->ProcessorGrid()); + ret->_ldimensions = ldim; + ret->_processor_coor = pcoor; + return ret; + } + + template + static inline void undoDoubleStore(Lattice>& Umu, + Lattice> const& Umu_ds) { + conformable(Umu.Grid(), Umu_ds.Grid()); + Lattice> U(Umu.Grid()); + + // they store T+, T-, X+, X-, Y+, Y-, Z+, Z- + for(int mu_g = 0; mu_g < Nd; ++mu_g) { + int mu_o = (mu_g + 1) % Nd; + U = PeekIndex(Umu_ds, 2 * mu_o) + + Cshift(PeekIndex(Umu_ds, 2 * mu_o + 1), mu_g, +1); + PokeIndex(Umu, U, mu_g); + } + } +}; + +NAMESPACE_END(Grid); diff --git a/Grid/parallelIO/OpenQcdIOChromaReference.h b/Grid/parallelIO/OpenQcdIOChromaReference.h new file mode 100644 index 00000000..bab54fe8 --- /dev/null +++ b/Grid/parallelIO/OpenQcdIOChromaReference.h @@ -0,0 +1,281 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/parallelIO/OpenQcdIOChromaReference.h + +Copyright (C) 2015 - 2020 + +Author: Daniel Richtmann + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#define CHECK {std::cerr << __FILE__ << " @l " << __LINE__ << ": CHECK" << grid->ThisRank() << std::endl;} +#define CHECK_VAR(a) { std::cerr << __FILE__ << "@l" << __LINE__ << " on "<< grid->ThisRank() << ": " << __func__ << " " << #a << "=" << (a) << std::endl; } +// #undef CHECK +// #define CHECK + +NAMESPACE_BEGIN(Grid); + +class ParRdr { +private: + bool const swap; + + MPI_Status status; + MPI_File fp; + + int err; + + MPI_Datatype oddSiteType; + MPI_Datatype fileViewType; + + GridBase* grid; + +public: + ParRdr(MPI_Comm comm, std::string const& filename, GridBase* gridPtr) + : swap(false) + , grid(gridPtr) { + err = MPI_File_open(comm, const_cast(filename.c_str()), MPI_MODE_RDONLY, MPI_INFO_NULL, &fp); + assert(err == MPI_SUCCESS); + } + + virtual ~ParRdr() { MPI_File_close(&fp); } + + inline void errInfo(int const err, std::string const& func) { + static char estring[MPI_MAX_ERROR_STRING]; + int eclass = -1, len = 0; + MPI_Error_class(err, &eclass); + MPI_Error_string(err, estring, &len); + std::cerr << func << " - Error " << eclass << ": " << estring << std::endl; + } + + int readHeader(FieldMetaData& field) { + assert((grid->_ndimension == Nd) && (Nd == 4)); + assert(Nc == 3); + + OpenQcdHeader header; + + readBlock(reinterpret_cast(&header), 0, sizeof(OpenQcdHeader), MPI_CHAR); + + header.plaq /= 3.; // TODO change this into normalizationfactor + + // sanity check (should trigger on endian issues) TODO remove? + assert(0 < header.Nt && header.Nt <= 1024); + assert(0 < header.Nx && header.Nx <= 1024); + assert(0 < header.Ny && header.Ny <= 1024); + assert(0 < header.Nz && header.Nz <= 1024); + + field.dimension[0] = header.Nx; + field.dimension[1] = header.Ny; + field.dimension[2] = header.Nz; + field.dimension[3] = header.Nt; + + for(int d = 0; d < Nd; d++) + assert(grid->FullDimensions()[d] == field.dimension[d]); + + field.plaquette = header.plaq; + + field.data_start = sizeof(OpenQcdHeader); + + return field.data_start; + } + + void readBlock(void* const dest, uint64_t const pos, uint64_t const nbytes, MPI_Datatype const datatype) { + err = MPI_File_read_at_all(fp, pos, dest, nbytes, datatype, &status); + errInfo(err, "MPI_File_read_at_all"); + // CHECK_VAR(err) + + int read = -1; + MPI_Get_count(&status, datatype, &read); + // CHECK_VAR(read) + assert(nbytes == (uint64_t)read); + assert(err == MPI_SUCCESS); + } + + void createTypes() { + constexpr int elem_size = Nd * 2 * 2 * Nc * Nc * sizeof(double); // 2_complex 2_fwdbwd + + err = MPI_Type_contiguous(elem_size, MPI_BYTE, &oddSiteType); assert(err == MPI_SUCCESS); + err = MPI_Type_commit(&oddSiteType); assert(err == MPI_SUCCESS); + + Coordinate const L = grid->GlobalDimensions(); + Coordinate const l = grid->LocalDimensions(); + Coordinate const i = grid->ThisProcessorCoor(); + + Coordinate sizes({L[2] / 2, L[1], L[0], L[3]}); + Coordinate subsizes({l[2] / 2, l[1], l[0], l[3]}); + Coordinate starts({i[2] * l[2] / 2, i[1] * l[1], i[0] * l[0], i[3] * l[3]}); + + err = MPI_Type_create_subarray(grid->_ndimension, &sizes[0], &subsizes[0], &starts[0], MPI_ORDER_FORTRAN, oddSiteType, &fileViewType); assert(err == MPI_SUCCESS); + err = MPI_Type_commit(&fileViewType); assert(err == MPI_SUCCESS); + } + + void freeTypes() { + err = MPI_Type_free(&fileViewType); assert(err == MPI_SUCCESS); + err = MPI_Type_free(&oddSiteType); assert(err == MPI_SUCCESS); + } + + bool readGauge(std::vector& domain_buff, FieldMetaData& meta) { + auto hdr_offset = readHeader(meta); + CHECK + createTypes(); + err = MPI_File_set_view(fp, hdr_offset, oddSiteType, fileViewType, "native", MPI_INFO_NULL); errInfo(err, "MPI_File_set_view0"); assert(err == MPI_SUCCESS); + CHECK + int const domainSites = grid->lSites(); + domain_buff.resize(Nd * domainSites); // 2_fwdbwd * 4_Nd * domainSites / 2_onlyodd + + // the actual READ + constexpr uint64_t cm_size = 2 * Nc * Nc * sizeof(double); // 2_complex + constexpr uint64_t os_size = Nd * 2 * cm_size; // 2_fwdbwd + constexpr uint64_t max_elems = std::numeric_limits::max(); // int adressable elems: floor is fine + uint64_t const n_os = domainSites / 2; + + for(uint64_t os_idx = 0; os_idx < n_os;) { + uint64_t const read_os = os_idx + max_elems <= n_os ? max_elems : n_os - os_idx; + uint64_t const cm = os_idx * Nd * 2; + readBlock(&(domain_buff[cm]), os_idx, read_os, oddSiteType); + os_idx += read_os; + } + + CHECK + err = MPI_File_set_view(fp, 0, MPI_BYTE, MPI_BYTE, "native", MPI_INFO_NULL); + errInfo(err, "MPI_File_set_view1"); + assert(err == MPI_SUCCESS); + freeTypes(); + + std::cout << GridLogMessage << "read sum: " << n_os * os_size << " bytes" << std::endl; + return true; + } +}; + +class OpenQcdIOChromaReference : public BinaryIO { +public: + template + static inline void readConfiguration(Lattice>& Umu, + Grid::FieldMetaData& header, + std::string file) { + typedef Lattice> DoubledGaugeField; + + assert(Ns == 4 and Nd == 4 and Nc == 3); + + auto grid = Umu.Grid(); + + typedef ColourMatrixD fobj; + + std::vector iodata( + Nd * grid->lSites()); // actual size = 2*Nd*lsites but have only lsites/2 sites in file + + { + ParRdr rdr(MPI_COMM_WORLD, file, grid); + rdr.readGauge(iodata, header); + } // equivalent to using binaryio + + std::vector> Umu_ds_scalar(grid->lSites()); + + copyToLatticeObject(Umu_ds_scalar, iodata, grid); // equivalent to munging + + DoubledGaugeField Umu_ds(grid); + + vectorizeFromLexOrdArray(Umu_ds_scalar, Umu_ds); + + redistribute(Umu, Umu_ds); // equivalent to undoDoublestore + + FieldMetaData clone(header); + + GaugeStatistics(Umu, clone); + + RealD plaq_diff = fabs(clone.plaquette - header.plaquette); + + // clang-format off + std::cout << GridLogMessage << "OpenQcd Configuration " << file + << " plaquette " << clone.plaquette + << " header " << header.plaquette + << " difference " << plaq_diff + << std::endl; + // clang-format on + + RealD precTol = (getPrecision::value == 1) ? 2e-7 : 2e-15; + RealD tol = precTol * std::sqrt(grid->_Nprocessors); // taken from RQCD chroma code + + if(plaq_diff >= tol) + std::cout << " Plaquette mismatch (diff = " << plaq_diff << ", tol = " << tol << ")" << std::endl; + assert(plaq_diff < tol); + + std::cout << GridLogMessage << "OpenQcd Configuration " << file << " and plaquette agree" << std::endl; + } + +private: + template + static inline void redistribute(Lattice>& Umu, + Lattice> const& Umu_ds) { + Grid::conformable(Umu.Grid(), Umu_ds.Grid()); + Lattice> U(Umu.Grid()); + + U = PeekIndex(Umu_ds, 2) + Cshift(PeekIndex(Umu_ds, 3), 0, +1); PokeIndex(Umu, U, 0); + U = PeekIndex(Umu_ds, 4) + Cshift(PeekIndex(Umu_ds, 5), 1, +1); PokeIndex(Umu, U, 1); + U = PeekIndex(Umu_ds, 6) + Cshift(PeekIndex(Umu_ds, 7), 2, +1); PokeIndex(Umu, U, 2); + U = PeekIndex(Umu_ds, 0) + Cshift(PeekIndex(Umu_ds, 1), 3, +1); PokeIndex(Umu, U, 3); + } + + static inline void copyToLatticeObject(std::vector& u_fb, + std::vector const& node_buff, + GridBase* grid) { + assert(node_buff.size() == Nd * grid->lSites()); + + Coordinate const& l = grid->LocalDimensions(); + + Coordinate coord(Nd); + int& x = coord[0]; + int& y = coord[1]; + int& z = coord[2]; + int& t = coord[3]; + + int buff_idx = 0; + for(t = 0; t < l[3]; ++t) // IMPORTANT: openQCD file ordering + for(x = 0; x < l[0]; ++x) + for(y = 0; y < l[1]; ++y) + for(z = 0; z < l[2]; ++z) { + if((t + z + y + x) % 2 == 0) continue; + + int local_idx; + Lexicographic::IndexFromCoor(coord, local_idx, grid->LocalDimensions()); + for(int mu = 0; mu < 2 * Nd; ++mu) + for(int c1 = 0; c1 < Nc; ++c1) { + for(int c2 = 0; c2 < Nc; ++c2) { + u_fb[local_idx](mu)()(c1,c2) = node_buff[mu+buff_idx]()()(c1,c2); + } + } + buff_idx += 2 * Nd; + } + + assert(node_buff.size() == buff_idx); + } +}; + +NAMESPACE_END(Grid); diff --git a/Grid/perfmon/Timer.h b/Grid/perfmon/Timer.h index 88b4e1cc..2a44faee 100644 --- a/Grid/perfmon/Timer.h +++ b/Grid/perfmon/Timer.h @@ -110,15 +110,15 @@ public: #endif accumulator = std::chrono::duration_cast(start-start); } - GridTime Elapsed(void) { + GridTime Elapsed(void) const { assert(running == false); return std::chrono::duration_cast( accumulator ); } - uint64_t useconds(void){ + uint64_t useconds(void) const { assert(running == false); return (uint64_t) accumulator.count(); } - bool isRunning(void){ + bool isRunning(void) const { return running; } }; diff --git a/Grid/qcd/action/fermion/CayleyFermion5D.h b/Grid/qcd/action/fermion/CayleyFermion5D.h index f27f4c23..c7d68d73 100644 --- a/Grid/qcd/action/fermion/CayleyFermion5D.h +++ b/Grid/qcd/action/fermion/CayleyFermion5D.h @@ -40,8 +40,8 @@ public: public: // override multiply - virtual RealD M (const FermionField &in, FermionField &out); - virtual RealD Mdag (const FermionField &in, FermionField &out); + virtual void M (const FermionField &in, FermionField &out); + virtual void Mdag (const FermionField &in, FermionField &out); // half checkerboard operations virtual void Meooe (const FermionField &in, FermionField &out); diff --git a/Grid/qcd/action/fermion/ContinuedFractionFermion5D.h b/Grid/qcd/action/fermion/ContinuedFractionFermion5D.h index 5aa7bfbd..2300afd3 100644 --- a/Grid/qcd/action/fermion/ContinuedFractionFermion5D.h +++ b/Grid/qcd/action/fermion/ContinuedFractionFermion5D.h @@ -41,8 +41,8 @@ public: public: // override multiply - virtual RealD M (const FermionField &in, FermionField &out); - virtual RealD Mdag (const FermionField &in, FermionField &out); + virtual void M (const FermionField &in, FermionField &out); + virtual void Mdag (const FermionField &in, FermionField &out); // half checkerboard operaions virtual void Meooe (const FermionField &in, FermionField &out); diff --git a/Grid/qcd/action/fermion/DomainWallEOFAFermion.h b/Grid/qcd/action/fermion/DomainWallEOFAFermion.h index a2d0e733..bcc97176 100644 --- a/Grid/qcd/action/fermion/DomainWallEOFAFermion.h +++ b/Grid/qcd/action/fermion/DomainWallEOFAFermion.h @@ -53,8 +53,8 @@ public: virtual void DtildeInv (const FermionField& in, FermionField& out); // override multiply - virtual RealD M (const FermionField& in, FermionField& out); - virtual RealD Mdag (const FermionField& in, FermionField& out); + virtual void M (const FermionField& in, FermionField& out); + virtual void Mdag (const FermionField& in, FermionField& out); // half checkerboard operations virtual void Mooee (const FermionField& in, FermionField& out); diff --git a/Grid/qcd/action/fermion/FermionOperator.h b/Grid/qcd/action/fermion/FermionOperator.h index f0c2a039..570e350d 100644 --- a/Grid/qcd/action/fermion/FermionOperator.h +++ b/Grid/qcd/action/fermion/FermionOperator.h @@ -58,8 +58,8 @@ public: virtual GridBase *GaugeRedBlackGrid(void) =0; // override multiply - virtual RealD M (const FermionField &in, FermionField &out)=0; - virtual RealD Mdag (const FermionField &in, FermionField &out)=0; + virtual void M (const FermionField &in, FermionField &out)=0; + virtual void Mdag (const FermionField &in, FermionField &out)=0; // half checkerboard operaions virtual void Meooe (const FermionField &in, FermionField &out)=0; @@ -86,15 +86,14 @@ public: virtual void DhopDerivEO(GaugeField &mat,const FermionField &U,const FermionField &V,int dag)=0; virtual void DhopDerivOE(GaugeField &mat,const FermionField &U,const FermionField &V,int dag)=0; - virtual void Mdiag (const FermionField &in, FermionField &out) { Mooee(in,out);}; // Same as Mooee applied to both CB's virtual void Mdir (const FermionField &in, FermionField &out,int dir,int disp)=0; // case by case Wilson, Clover, Cayley, ContFrac, PartFrac virtual void MdirAll(const FermionField &in, std::vector &out)=0; // case by case Wilson, Clover, Cayley, ContFrac, PartFrac - virtual void MomentumSpacePropagator(FermionField &out,const FermionField &in,RealD _m,std::vector twist) { assert(0);}; + virtual void MomentumSpacePropagator(FermionField &out,const FermionField &in,RealD _m,std::vector twist) { assert(0);}; - virtual void FreePropagator(const FermionField &in,FermionField &out,RealD mass,std::vector boundary,std::vector twist) + virtual void FreePropagator(const FermionField &in,FermionField &out,RealD mass,std::vector boundary,std::vector twist) { FFT theFFT((GridCartesian *) in.Grid()); diff --git a/Grid/qcd/action/fermion/ImprovedStaggeredFermion.h b/Grid/qcd/action/fermion/ImprovedStaggeredFermion.h index 0cfae7b6..ecf44ed7 100644 --- a/Grid/qcd/action/fermion/ImprovedStaggeredFermion.h +++ b/Grid/qcd/action/fermion/ImprovedStaggeredFermion.h @@ -71,8 +71,8 @@ public: // override multiply; cut number routines if pass dagger argument // and also make interface more uniformly consistent ////////////////////////////////////////////////////////////////// - RealD M(const FermionField &in, FermionField &out); - RealD Mdag(const FermionField &in, FermionField &out); + void M(const FermionField &in, FermionField &out); + void Mdag(const FermionField &in, FermionField &out); ///////////////////////////////////////////////////////// // half checkerboard operations diff --git a/Grid/qcd/action/fermion/ImprovedStaggeredFermion5D.h b/Grid/qcd/action/fermion/ImprovedStaggeredFermion5D.h index 2578c288..625eda63 100644 --- a/Grid/qcd/action/fermion/ImprovedStaggeredFermion5D.h +++ b/Grid/qcd/action/fermion/ImprovedStaggeredFermion5D.h @@ -1,4 +1,3 @@ - /************************************************************************************* Grid physics library, www.github.com/paboyle/Grid @@ -74,8 +73,8 @@ public: GridBase *FermionRedBlackGrid(void) { return _FiveDimRedBlackGrid;} // full checkerboard operations; leave unimplemented as abstract for now - RealD M (const FermionField &in, FermionField &out); - RealD Mdag (const FermionField &in, FermionField &out); + void M (const FermionField &in, FermionField &out); + void Mdag (const FermionField &in, FermionField &out); // half checkerboard operations void Meooe (const FermionField &in, FermionField &out); diff --git a/Grid/qcd/action/fermion/MobiusEOFAFermion.h b/Grid/qcd/action/fermion/MobiusEOFAFermion.h index 6b214233..6e4f79eb 100644 --- a/Grid/qcd/action/fermion/MobiusEOFAFermion.h +++ b/Grid/qcd/action/fermion/MobiusEOFAFermion.h @@ -56,8 +56,8 @@ public: virtual void DtildeInv (const FermionField& in, FermionField& out); // override multiply - virtual RealD M (const FermionField& in, FermionField& out); - virtual RealD Mdag (const FermionField& in, FermionField& out); + virtual void M (const FermionField& in, FermionField& out); + virtual void Mdag (const FermionField& in, FermionField& out); // half checkerboard operations virtual void Mooee (const FermionField& in, FermionField& out); diff --git a/Grid/qcd/action/fermion/MobiusFermion.h b/Grid/qcd/action/fermion/MobiusFermion.h index 1cbb6609..1e948092 100644 --- a/Grid/qcd/action/fermion/MobiusFermion.h +++ b/Grid/qcd/action/fermion/MobiusFermion.h @@ -59,7 +59,7 @@ public: { RealD eps = 1.0; - std::cout<Ls);// eps is ignored for higham assert(zdata->n==this->Ls); diff --git a/Grid/qcd/action/fermion/NaiveStaggeredFermion.h b/Grid/qcd/action/fermion/NaiveStaggeredFermion.h index 8715f3c2..ca38a64f 100644 --- a/Grid/qcd/action/fermion/NaiveStaggeredFermion.h +++ b/Grid/qcd/action/fermion/NaiveStaggeredFermion.h @@ -71,8 +71,8 @@ public: // override multiply; cut number routines if pass dagger argument // and also make interface more uniformly consistent ////////////////////////////////////////////////////////////////// - RealD M(const FermionField &in, FermionField &out); - RealD Mdag(const FermionField &in, FermionField &out); + void M(const FermionField &in, FermionField &out); + void Mdag(const FermionField &in, FermionField &out); ///////////////////////////////////////////////////////// // half checkerboard operations diff --git a/Grid/qcd/action/fermion/PartialFractionFermion5D.h b/Grid/qcd/action/fermion/PartialFractionFermion5D.h index 928abd3f..54f8547f 100644 --- a/Grid/qcd/action/fermion/PartialFractionFermion5D.h +++ b/Grid/qcd/action/fermion/PartialFractionFermion5D.h @@ -47,8 +47,8 @@ public: void M_internal(const FermionField &in, FermionField &out,int dag); // override multiply - virtual RealD M (const FermionField &in, FermionField &out); - virtual RealD Mdag (const FermionField &in, FermionField &out); + virtual void M (const FermionField &in, FermionField &out); + virtual void Mdag (const FermionField &in, FermionField &out); // half checkerboard operaions virtual void Meooe (const FermionField &in, FermionField &out); diff --git a/Grid/qcd/action/fermion/WilsonCloverFermion.h b/Grid/qcd/action/fermion/WilsonCloverFermion.h index aa8fb150..bc52111b 100644 --- a/Grid/qcd/action/fermion/WilsonCloverFermion.h +++ b/Grid/qcd/action/fermion/WilsonCloverFermion.h @@ -109,9 +109,8 @@ public: ImportGauge(_Umu); } - virtual RealD M(const FermionField &in, FermionField &out); - virtual RealD Mdag(const FermionField &in, FermionField &out); - + virtual void M(const FermionField &in, FermionField &out); + virtual void Mdag(const FermionField &in, FermionField &out); virtual void Mooee(const FermionField &in, FermionField &out); virtual void MooeeDag(const FermionField &in, FermionField &out); virtual void MooeeInv(const FermionField &in, FermionField &out); diff --git a/Grid/qcd/action/fermion/WilsonFermion.h b/Grid/qcd/action/fermion/WilsonFermion.h index 2e0bc9bf..1c4dd3cf 100644 --- a/Grid/qcd/action/fermion/WilsonFermion.h +++ b/Grid/qcd/action/fermion/WilsonFermion.h @@ -78,8 +78,8 @@ public: // override multiply; cut number routines if pass dagger argument // and also make interface more uniformly consistent ////////////////////////////////////////////////////////////////// - virtual RealD M(const FermionField &in, FermionField &out); - virtual RealD Mdag(const FermionField &in, FermionField &out); + virtual void M(const FermionField &in, FermionField &out); + virtual void Mdag(const FermionField &in, FermionField &out); ///////////////////////////////////////////////////////// // half checkerboard operations diff --git a/Grid/qcd/action/fermion/WilsonFermion5D.h b/Grid/qcd/action/fermion/WilsonFermion5D.h index ea71376c..804b1d10 100644 --- a/Grid/qcd/action/fermion/WilsonFermion5D.h +++ b/Grid/qcd/action/fermion/WilsonFermion5D.h @@ -1,4 +1,3 @@ - /************************************************************************************* Grid physics library, www.github.com/paboyle/Grid @@ -99,8 +98,8 @@ public: GridBase *FermionRedBlackGrid(void) { return _FiveDimRedBlackGrid;} // full checkerboard operations; leave unimplemented as abstract for now - virtual RealD M (const FermionField &in, FermionField &out){assert(0); return 0.0;}; - virtual RealD Mdag (const FermionField &in, FermionField &out){assert(0); return 0.0;}; + virtual void M (const FermionField &in, FermionField &out){assert(0);}; + virtual void Mdag (const FermionField &in, FermionField &out){assert(0);}; // half checkerboard operations; leave unimplemented as abstract for now virtual void Meooe (const FermionField &in, FermionField &out){assert(0);}; diff --git a/Grid/qcd/action/fermion/WilsonTMFermion5D.h b/Grid/qcd/action/fermion/WilsonTMFermion5D.h index 71acf763..982e722a 100644 --- a/Grid/qcd/action/fermion/WilsonTMFermion5D.h +++ b/Grid/qcd/action/fermion/WilsonTMFermion5D.h @@ -120,7 +120,8 @@ class WilsonTMFermion5D : public WilsonFermion5D } } - virtual RealD M(const FermionField &in, FermionField &out) { + virtual void M(const FermionField &in, FermionField &out) + { out.Checkerboard() = in.Checkerboard(); this->Dhop(in, out, DaggerNo); FermionField tmp(out.Grid()); @@ -129,11 +130,12 @@ class WilsonTMFermion5D : public WilsonFermion5D ComplexD b(0.0,this->mu[s]); axpbg5y_ssp(tmp,a,in,b,in,s,s); } - return axpy_norm(out, 1.0, tmp, out); + axpy(out, 1.0, tmp, out); } // needed for fast PV - void update(const std::vector& _mass, const std::vector& _mu) { + void update(const std::vector& _mass, const std::vector& _mu) + { assert(_mass.size() == _mu.size()); assert(_mass.size() == this->FermionGrid()->_fdimensions[0]); this->mass = _mass; diff --git a/Grid/qcd/action/fermion/implementation/CayleyFermion5DImplementation.h b/Grid/qcd/action/fermion/implementation/CayleyFermion5DImplementation.h index 7542dd34..e79b64dc 100644 --- a/Grid/qcd/action/fermion/implementation/CayleyFermion5DImplementation.h +++ b/Grid/qcd/action/fermion/implementation/CayleyFermion5DImplementation.h @@ -323,7 +323,7 @@ void CayleyFermion5D::MeooeDag5D (const FermionField &psi, FermionField } template -RealD CayleyFermion5D::M (const FermionField &psi, FermionField &chi) +void CayleyFermion5D::M (const FermionField &psi, FermionField &chi) { FermionField Din(psi.Grid()); @@ -335,11 +335,10 @@ RealD CayleyFermion5D::M (const FermionField &psi, FermionField &chi) axpby(chi,1.0,1.0,chi,psi); M5D(psi,chi); - return(norm2(chi)); } template -RealD CayleyFermion5D::Mdag (const FermionField &psi, FermionField &chi) +void CayleyFermion5D::Mdag (const FermionField &psi, FermionField &chi) { // Under adjoint //D1+ D1- P- -> D1+^dag P+ D2-^dag @@ -354,7 +353,6 @@ RealD CayleyFermion5D::Mdag (const FermionField &psi, FermionField &chi) M5Ddag(psi,chi); // ((b D_W + D_w hop terms +1) on s-diag axpby (chi,1.0,1.0,chi,psi); - return norm2(chi); } // half checkerboard operations diff --git a/Grid/qcd/action/fermion/implementation/ContinuedFractionFermion5DImplementation.h b/Grid/qcd/action/fermion/implementation/ContinuedFractionFermion5DImplementation.h index beeb3e00..6687800e 100644 --- a/Grid/qcd/action/fermion/implementation/ContinuedFractionFermion5DImplementation.h +++ b/Grid/qcd/action/fermion/implementation/ContinuedFractionFermion5DImplementation.h @@ -94,7 +94,7 @@ void ContinuedFractionFermion5D::SetCoefficientsZolotarev(RealD zolo_hi,Ap template -RealD ContinuedFractionFermion5D::M (const FermionField &psi, FermionField &chi) +void ContinuedFractionFermion5D::M (const FermionField &psi, FermionField &chi) { int Ls = this->Ls; @@ -116,15 +116,14 @@ RealD ContinuedFractionFermion5D::M (const FermionField &psi, F } sign=-sign; } - return norm2(chi); } template -RealD ContinuedFractionFermion5D::Mdag (const FermionField &psi, FermionField &chi) +void ContinuedFractionFermion5D::Mdag (const FermionField &psi, FermionField &chi) { // This matrix is already hermitian. (g5 Dw) = Dw dag g5 = (g5 Dw)dag // The rest of matrix is symmetric. // Can ignore "dag" - return M(psi,chi); + M(psi,chi); } template void ContinuedFractionFermion5D::Mdir (const FermionField &psi, FermionField &chi,int dir,int disp){ diff --git a/Grid/qcd/action/fermion/implementation/DomainWallEOFAFermionImplementation.h b/Grid/qcd/action/fermion/implementation/DomainWallEOFAFermionImplementation.h index 3684fd6c..64ee4033 100644 --- a/Grid/qcd/action/fermion/implementation/DomainWallEOFAFermionImplementation.h +++ b/Grid/qcd/action/fermion/implementation/DomainWallEOFAFermionImplementation.h @@ -89,7 +89,7 @@ void DomainWallEOFAFermion::DtildeInv(const FermionField& psi, FermionFiel /*****************************************************************************************************/ template -RealD DomainWallEOFAFermion::M(const FermionField& psi, FermionField& chi) +void DomainWallEOFAFermion::M(const FermionField& psi, FermionField& chi) { FermionField Din(psi.Grid()); @@ -97,11 +97,10 @@ RealD DomainWallEOFAFermion::M(const FermionField& psi, FermionField& chi) this->DW(Din, chi, DaggerNo); axpby(chi, 1.0, 1.0, chi, psi); this->M5D(psi, chi); - return(norm2(chi)); } template -RealD DomainWallEOFAFermion::Mdag(const FermionField& psi, FermionField& chi) +void DomainWallEOFAFermion::Mdag(const FermionField& psi, FermionField& chi) { FermionField Din(psi.Grid()); @@ -109,7 +108,6 @@ RealD DomainWallEOFAFermion::Mdag(const FermionField& psi, FermionField& c this->MeooeDag5D(Din, chi); this->M5Ddag(psi, chi); axpby(chi, 1.0, 1.0, chi, psi); - return(norm2(chi)); } /******************************************************************** diff --git a/Grid/qcd/action/fermion/implementation/ImprovedStaggeredFermion5DImplementation.h b/Grid/qcd/action/fermion/implementation/ImprovedStaggeredFermion5DImplementation.h index 87acca0e..888691c4 100644 --- a/Grid/qcd/action/fermion/implementation/ImprovedStaggeredFermion5DImplementation.h +++ b/Grid/qcd/action/fermion/implementation/ImprovedStaggeredFermion5DImplementation.h @@ -470,21 +470,24 @@ void ImprovedStaggeredFermion5D::MdirAll(const FermionField &in, std::vect assert(0); } template -RealD ImprovedStaggeredFermion5D::M(const FermionField &in, FermionField &out) { +void ImprovedStaggeredFermion5D::M(const FermionField &in, FermionField &out) +{ out.Checkerboard() = in.Checkerboard(); Dhop(in, out, DaggerNo); - return axpy_norm(out, mass, in, out); + axpy(out, mass, in, out); } template -RealD ImprovedStaggeredFermion5D::Mdag(const FermionField &in, FermionField &out) { +void ImprovedStaggeredFermion5D::Mdag(const FermionField &in, FermionField &out) +{ out.Checkerboard() = in.Checkerboard(); Dhop(in, out, DaggerYes); - return axpy_norm(out, mass, in, out); + axpy(out, mass, in, out); } template -void ImprovedStaggeredFermion5D::Meooe(const FermionField &in, FermionField &out) { +void ImprovedStaggeredFermion5D::Meooe(const FermionField &in, FermionField &out) +{ if (in.Checkerboard() == Odd) { DhopEO(in, out, DaggerNo); } else { @@ -492,7 +495,8 @@ void ImprovedStaggeredFermion5D::Meooe(const FermionField &in, FermionFiel } } template -void ImprovedStaggeredFermion5D::MeooeDag(const FermionField &in, FermionField &out) { +void ImprovedStaggeredFermion5D::MeooeDag(const FermionField &in, FermionField &out) +{ if (in.Checkerboard() == Odd) { DhopEO(in, out, DaggerYes); } else { @@ -501,27 +505,30 @@ void ImprovedStaggeredFermion5D::MeooeDag(const FermionField &in, FermionF } template -void ImprovedStaggeredFermion5D::Mooee(const FermionField &in, FermionField &out) { +void ImprovedStaggeredFermion5D::Mooee(const FermionField &in, FermionField &out) +{ out.Checkerboard() = in.Checkerboard(); typename FermionField::scalar_type scal(mass); out = scal * in; } template -void ImprovedStaggeredFermion5D::MooeeDag(const FermionField &in, FermionField &out) { +void ImprovedStaggeredFermion5D::MooeeDag(const FermionField &in, FermionField &out) +{ out.Checkerboard() = in.Checkerboard(); Mooee(in, out); } template -void ImprovedStaggeredFermion5D::MooeeInv(const FermionField &in, FermionField &out) { +void ImprovedStaggeredFermion5D::MooeeInv(const FermionField &in, FermionField &out) +{ out.Checkerboard() = in.Checkerboard(); out = (1.0 / (mass)) * in; } template -void ImprovedStaggeredFermion5D::MooeeInvDag(const FermionField &in, - FermionField &out) { +void ImprovedStaggeredFermion5D::MooeeInvDag(const FermionField &in,FermionField &out) +{ out.Checkerboard() = in.Checkerboard(); MooeeInv(in, out); } diff --git a/Grid/qcd/action/fermion/implementation/ImprovedStaggeredFermionImplementation.h b/Grid/qcd/action/fermion/implementation/ImprovedStaggeredFermionImplementation.h index 076d0c7c..05d9a17e 100644 --- a/Grid/qcd/action/fermion/implementation/ImprovedStaggeredFermionImplementation.h +++ b/Grid/qcd/action/fermion/implementation/ImprovedStaggeredFermionImplementation.h @@ -171,21 +171,24 @@ void ImprovedStaggeredFermion::ImportGauge(const GaugeField &_Uthin,const ///////////////////////////// template -RealD ImprovedStaggeredFermion::M(const FermionField &in, FermionField &out) { +void ImprovedStaggeredFermion::M(const FermionField &in, FermionField &out) +{ out.Checkerboard() = in.Checkerboard(); Dhop(in, out, DaggerNo); - return axpy_norm(out, mass, in, out); + axpy(out, mass, in, out); } template -RealD ImprovedStaggeredFermion::Mdag(const FermionField &in, FermionField &out) { +void ImprovedStaggeredFermion::Mdag(const FermionField &in, FermionField &out) +{ out.Checkerboard() = in.Checkerboard(); Dhop(in, out, DaggerYes); - return axpy_norm(out, mass, in, out); + axpy(out, mass, in, out); } template -void ImprovedStaggeredFermion::Meooe(const FermionField &in, FermionField &out) { +void ImprovedStaggeredFermion::Meooe(const FermionField &in, FermionField &out) +{ if (in.Checkerboard() == Odd) { DhopEO(in, out, DaggerNo); } else { @@ -193,7 +196,8 @@ void ImprovedStaggeredFermion::Meooe(const FermionField &in, FermionField } } template -void ImprovedStaggeredFermion::MeooeDag(const FermionField &in, FermionField &out) { +void ImprovedStaggeredFermion::MeooeDag(const FermionField &in, FermionField &out) +{ if (in.Checkerboard() == Odd) { DhopEO(in, out, DaggerYes); } else { @@ -202,27 +206,30 @@ void ImprovedStaggeredFermion::MeooeDag(const FermionField &in, FermionFie } template -void ImprovedStaggeredFermion::Mooee(const FermionField &in, FermionField &out) { +void ImprovedStaggeredFermion::Mooee(const FermionField &in, FermionField &out) +{ out.Checkerboard() = in.Checkerboard(); typename FermionField::scalar_type scal(mass); out = scal * in; } template -void ImprovedStaggeredFermion::MooeeDag(const FermionField &in, FermionField &out) { +void ImprovedStaggeredFermion::MooeeDag(const FermionField &in, FermionField &out) +{ out.Checkerboard() = in.Checkerboard(); Mooee(in, out); } template -void ImprovedStaggeredFermion::MooeeInv(const FermionField &in, FermionField &out) { +void ImprovedStaggeredFermion::MooeeInv(const FermionField &in, FermionField &out) +{ out.Checkerboard() = in.Checkerboard(); out = (1.0 / (mass)) * in; } template -void ImprovedStaggeredFermion::MooeeInvDag(const FermionField &in, - FermionField &out) { +void ImprovedStaggeredFermion::MooeeInvDag(const FermionField &in,FermionField &out) +{ out.Checkerboard() = in.Checkerboard(); MooeeInv(in, out); } @@ -234,7 +241,8 @@ void ImprovedStaggeredFermion::MooeeInvDag(const FermionField &in, template void ImprovedStaggeredFermion::DerivInternal(StencilImpl &st, DoubledGaugeField &U, DoubledGaugeField &UUU, GaugeField & mat, - const FermionField &A, const FermionField &B, int dag) { + const FermionField &A, const FermionField &B, int dag) +{ assert((dag == DaggerNo) || (dag == DaggerYes)); Compressor compressor; @@ -284,8 +292,8 @@ void ImprovedStaggeredFermion::DerivInternal(StencilImpl &st, DoubledGauge } template -void ImprovedStaggeredFermion::DhopDeriv(GaugeField &mat, const FermionField &U, const FermionField &V, int dag) { - +void ImprovedStaggeredFermion::DhopDeriv(GaugeField &mat, const FermionField &U, const FermionField &V, int dag) +{ conformable(U.Grid(), _grid); conformable(U.Grid(), V.Grid()); conformable(U.Grid(), mat.Grid()); @@ -296,8 +304,8 @@ void ImprovedStaggeredFermion::DhopDeriv(GaugeField &mat, const FermionFie } template -void ImprovedStaggeredFermion::DhopDerivOE(GaugeField &mat, const FermionField &U, const FermionField &V, int dag) { - +void ImprovedStaggeredFermion::DhopDerivOE(GaugeField &mat, const FermionField &U, const FermionField &V, int dag) +{ conformable(U.Grid(), _cbgrid); conformable(U.Grid(), V.Grid()); conformable(U.Grid(), mat.Grid()); @@ -310,8 +318,8 @@ void ImprovedStaggeredFermion::DhopDerivOE(GaugeField &mat, const FermionF } template -void ImprovedStaggeredFermion::DhopDerivEO(GaugeField &mat, const FermionField &U, const FermionField &V, int dag) { - +void ImprovedStaggeredFermion::DhopDerivEO(GaugeField &mat, const FermionField &U, const FermionField &V, int dag) +{ conformable(U.Grid(), _cbgrid); conformable(U.Grid(), V.Grid()); conformable(U.Grid(), mat.Grid()); diff --git a/Grid/qcd/action/fermion/implementation/MobiusEOFAFermionImplementation.h b/Grid/qcd/action/fermion/implementation/MobiusEOFAFermionImplementation.h index 256423e6..9b9db178 100644 --- a/Grid/qcd/action/fermion/implementation/MobiusEOFAFermionImplementation.h +++ b/Grid/qcd/action/fermion/implementation/MobiusEOFAFermionImplementation.h @@ -166,7 +166,7 @@ void MobiusEOFAFermion::DtildeInv(const FermionField& psi, FermionField& c /*****************************************************************************************************/ template -RealD MobiusEOFAFermion::M(const FermionField& psi, FermionField& chi) +void MobiusEOFAFermion::M(const FermionField& psi, FermionField& chi) { FermionField Din(psi.Grid()); @@ -174,11 +174,10 @@ RealD MobiusEOFAFermion::M(const FermionField& psi, FermionField& chi) this->DW(Din, chi, DaggerNo); axpby(chi, 1.0, 1.0, chi, psi); this->M5D(psi, chi); - return(norm2(chi)); } template -RealD MobiusEOFAFermion::Mdag(const FermionField& psi, FermionField& chi) +void MobiusEOFAFermion::Mdag(const FermionField& psi, FermionField& chi) { FermionField Din(psi.Grid()); @@ -186,7 +185,6 @@ RealD MobiusEOFAFermion::Mdag(const FermionField& psi, FermionField& chi) this->MeooeDag5D(Din, chi); this->M5Ddag(psi, chi); axpby(chi, 1.0, 1.0, chi, psi); - return(norm2(chi)); } /******************************************************************** diff --git a/Grid/qcd/action/fermion/implementation/NaiveStaggeredFermionImplementation.h b/Grid/qcd/action/fermion/implementation/NaiveStaggeredFermionImplementation.h index 49696aa7..788e02cf 100644 --- a/Grid/qcd/action/fermion/implementation/NaiveStaggeredFermionImplementation.h +++ b/Grid/qcd/action/fermion/implementation/NaiveStaggeredFermionImplementation.h @@ -128,17 +128,17 @@ void NaiveStaggeredFermion::ImportGauge(const GaugeField &_U) ///////////////////////////// template -RealD NaiveStaggeredFermion::M(const FermionField &in, FermionField &out) { +void NaiveStaggeredFermion::M(const FermionField &in, FermionField &out) { out.Checkerboard() = in.Checkerboard(); Dhop(in, out, DaggerNo); - return axpy_norm(out, mass, in, out); + axpy(out, mass, in, out); } template -RealD NaiveStaggeredFermion::Mdag(const FermionField &in, FermionField &out) { +void NaiveStaggeredFermion::Mdag(const FermionField &in, FermionField &out) { out.Checkerboard() = in.Checkerboard(); Dhop(in, out, DaggerYes); - return axpy_norm(out, mass, in, out); + axpy(out, mass, in, out); } template diff --git a/Grid/qcd/action/fermion/implementation/PartialFractionFermion5DImplementation.h b/Grid/qcd/action/fermion/implementation/PartialFractionFermion5DImplementation.h index edc674cc..0206828b 100644 --- a/Grid/qcd/action/fermion/implementation/PartialFractionFermion5DImplementation.h +++ b/Grid/qcd/action/fermion/implementation/PartialFractionFermion5DImplementation.h @@ -269,16 +269,14 @@ void PartialFractionFermion5D::M_internal(const FermionField &psi, Fermi } template -RealD PartialFractionFermion5D::M (const FermionField &in, FermionField &out) +void PartialFractionFermion5D::M (const FermionField &in, FermionField &out) { M_internal(in,out,DaggerNo); - return norm2(out); } template -RealD PartialFractionFermion5D::Mdag (const FermionField &in, FermionField &out) +void PartialFractionFermion5D::Mdag (const FermionField &in, FermionField &out) { M_internal(in,out,DaggerYes); - return norm2(out); } template diff --git a/Grid/qcd/action/fermion/implementation/WilsonCloverFermionImplementation.h b/Grid/qcd/action/fermion/implementation/WilsonCloverFermionImplementation.h index 5744d3bb..36447153 100644 --- a/Grid/qcd/action/fermion/implementation/WilsonCloverFermionImplementation.h +++ b/Grid/qcd/action/fermion/implementation/WilsonCloverFermionImplementation.h @@ -35,7 +35,7 @@ NAMESPACE_BEGIN(Grid); // *NOT* EO template -RealD WilsonCloverFermion::M(const FermionField &in, FermionField &out) +void WilsonCloverFermion::M(const FermionField &in, FermionField &out) { FermionField temp(out.Grid()); @@ -47,11 +47,10 @@ RealD WilsonCloverFermion::M(const FermionField &in, FermionField &out) Mooee(in, temp); out += temp; - return norm2(out); } template -RealD WilsonCloverFermion::Mdag(const FermionField &in, FermionField &out) +void WilsonCloverFermion::Mdag(const FermionField &in, FermionField &out) { FermionField temp(out.Grid()); @@ -63,7 +62,6 @@ RealD WilsonCloverFermion::Mdag(const FermionField &in, FermionField &out) MooeeDag(in, temp); out += temp; - return norm2(out); } template @@ -132,14 +130,14 @@ void WilsonCloverFermion::ImportGauge(const GaugeField &_Umu) pickCheckerboard(Even, CloverTermEven, CloverTerm); pickCheckerboard(Odd, CloverTermOdd, CloverTerm); - pickCheckerboard(Even, CloverTermDagEven, adj(CloverTerm)); - pickCheckerboard(Odd, CloverTermDagOdd, adj(CloverTerm)); + pickCheckerboard(Even, CloverTermDagEven, closure(adj(CloverTerm))); + pickCheckerboard(Odd, CloverTermDagOdd, closure(adj(CloverTerm))); pickCheckerboard(Even, CloverTermInvEven, CloverTermInv); pickCheckerboard(Odd, CloverTermInvOdd, CloverTermInv); - pickCheckerboard(Even, CloverTermInvDagEven, adj(CloverTermInv)); - pickCheckerboard(Odd, CloverTermInvDagOdd, adj(CloverTermInv)); + pickCheckerboard(Even, CloverTermInvDagEven, closure(adj(CloverTermInv))); + pickCheckerboard(Odd, CloverTermInvDagOdd, closure(adj(CloverTermInv))); } template diff --git a/Grid/qcd/action/fermion/implementation/WilsonFermionImplementation.h b/Grid/qcd/action/fermion/implementation/WilsonFermionImplementation.h index 3db59b1d..c6e76ac6 100644 --- a/Grid/qcd/action/fermion/implementation/WilsonFermionImplementation.h +++ b/Grid/qcd/action/fermion/implementation/WilsonFermionImplementation.h @@ -102,21 +102,24 @@ void WilsonFermion::ImportGauge(const GaugeField &_Umu) ///////////////////////////// template -RealD WilsonFermion::M(const FermionField &in, FermionField &out) { +void WilsonFermion::M(const FermionField &in, FermionField &out) +{ out.Checkerboard() = in.Checkerboard(); Dhop(in, out, DaggerNo); - return axpy_norm(out, diag_mass, in, out); + axpy(out, diag_mass, in, out); } template -RealD WilsonFermion::Mdag(const FermionField &in, FermionField &out) { +void WilsonFermion::Mdag(const FermionField &in, FermionField &out) +{ out.Checkerboard() = in.Checkerboard(); Dhop(in, out, DaggerYes); - return axpy_norm(out, diag_mass, in, out); + axpy(out, diag_mass, in, out); } template -void WilsonFermion::Meooe(const FermionField &in, FermionField &out) { +void WilsonFermion::Meooe(const FermionField &in, FermionField &out) +{ if (in.Checkerboard() == Odd) { DhopEO(in, out, DaggerNo); } else { @@ -125,7 +128,8 @@ void WilsonFermion::Meooe(const FermionField &in, FermionField &out) { } template -void WilsonFermion::MeooeDag(const FermionField &in, FermionField &out) { +void WilsonFermion::MeooeDag(const FermionField &in, FermionField &out) +{ if (in.Checkerboard() == Odd) { DhopEO(in, out, DaggerYes); } else { @@ -134,26 +138,30 @@ void WilsonFermion::MeooeDag(const FermionField &in, FermionField &out) { } template -void WilsonFermion::Mooee(const FermionField &in, FermionField &out) { +void WilsonFermion::Mooee(const FermionField &in, FermionField &out) +{ out.Checkerboard() = in.Checkerboard(); typename FermionField::scalar_type scal(diag_mass); out = scal * in; } template -void WilsonFermion::MooeeDag(const FermionField &in, FermionField &out) { +void WilsonFermion::MooeeDag(const FermionField &in, FermionField &out) +{ out.Checkerboard() = in.Checkerboard(); Mooee(in, out); } template -void WilsonFermion::MooeeInv(const FermionField &in, FermionField &out) { +void WilsonFermion::MooeeInv(const FermionField &in, FermionField &out) +{ out.Checkerboard() = in.Checkerboard(); out = (1.0/(diag_mass))*in; } template -void WilsonFermion::MooeeInvDag(const FermionField &in, FermionField &out) { +void WilsonFermion::MooeeInvDag(const FermionField &in, FermionField &out) +{ out.Checkerboard() = in.Checkerboard(); MooeeInv(in,out); } @@ -249,7 +257,8 @@ void WilsonFermion::DerivInternal(StencilImpl &st, DoubledGaugeField &U, } template -void WilsonFermion::DhopDeriv(GaugeField &mat, const FermionField &U, const FermionField &V, int dag) { +void WilsonFermion::DhopDeriv(GaugeField &mat, const FermionField &U, const FermionField &V, int dag) +{ conformable(U.Grid(), _grid); conformable(U.Grid(), V.Grid()); conformable(U.Grid(), mat.Grid()); @@ -260,7 +269,8 @@ void WilsonFermion::DhopDeriv(GaugeField &mat, const FermionField &U, cons } template -void WilsonFermion::DhopDerivOE(GaugeField &mat, const FermionField &U, const FermionField &V, int dag) { +void WilsonFermion::DhopDerivOE(GaugeField &mat, const FermionField &U, const FermionField &V, int dag) +{ conformable(U.Grid(), _cbgrid); conformable(U.Grid(), V.Grid()); //conformable(U.Grid(), mat.Grid()); not general, leaving as a comment (Guido) @@ -274,7 +284,8 @@ void WilsonFermion::DhopDerivOE(GaugeField &mat, const FermionField &U, co } template -void WilsonFermion::DhopDerivEO(GaugeField &mat, const FermionField &U, const FermionField &V, int dag) { +void WilsonFermion::DhopDerivEO(GaugeField &mat, const FermionField &U, const FermionField &V, int dag) +{ conformable(U.Grid(), _cbgrid); conformable(U.Grid(), V.Grid()); //conformable(U.Grid(), mat.Grid()); @@ -287,7 +298,8 @@ void WilsonFermion::DhopDerivEO(GaugeField &mat, const FermionField &U, co } template -void WilsonFermion::Dhop(const FermionField &in, FermionField &out, int dag) { +void WilsonFermion::Dhop(const FermionField &in, FermionField &out, int dag) +{ conformable(in.Grid(), _grid); // verifies full grid conformable(in.Grid(), out.Grid()); @@ -297,7 +309,8 @@ void WilsonFermion::Dhop(const FermionField &in, FermionField &out, int da } template -void WilsonFermion::DhopOE(const FermionField &in, FermionField &out, int dag) { +void WilsonFermion::DhopOE(const FermionField &in, FermionField &out, int dag) +{ conformable(in.Grid(), _cbgrid); // verifies half grid conformable(in.Grid(), out.Grid()); // drops the cb check @@ -308,7 +321,8 @@ void WilsonFermion::DhopOE(const FermionField &in, FermionField &out, int } template -void WilsonFermion::DhopEO(const FermionField &in, FermionField &out,int dag) { +void WilsonFermion::DhopEO(const FermionField &in, FermionField &out,int dag) +{ conformable(in.Grid(), _cbgrid); // verifies half grid conformable(in.Grid(), out.Grid()); // drops the cb check @@ -386,7 +400,8 @@ template void WilsonFermion::DhopInternalOverlappedComms(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, const FermionField &in, - FermionField &out, int dag) { + FermionField &out, int dag) +{ assert((dag == DaggerNo) || (dag == DaggerYes)); Compressor compressor(dag); @@ -436,7 +451,8 @@ template void WilsonFermion::DhopInternalSerial(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, const FermionField &in, - FermionField &out, int dag) { + FermionField &out, int dag) +{ assert((dag == DaggerNo) || (dag == DaggerYes)); Compressor compressor(dag); st.HaloExchange(in, compressor); diff --git a/Grid/qcd/action/fermion/implementation/WilsonKernelsImplementation.h b/Grid/qcd/action/fermion/implementation/WilsonKernelsImplementation.h index aca9d545..9ca29367 100644 --- a/Grid/qcd/action/fermion/implementation/WilsonKernelsImplementation.h +++ b/Grid/qcd/action/fermion/implementation/WilsonKernelsImplementation.h @@ -447,19 +447,19 @@ void WilsonKernels::DhopKernel(int Opt,StencilImpl &st, DoubledGaugeField if (Opt == WilsonKernelsStatic::OptGeneric ) { KERNEL_CALL(GenericDhopSite); return;} #ifndef GRID_CUDA if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL(HandDhopSite); return;} - if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSite); printf("."); return;} + if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSite); return;} #endif } else if( interior ) { if (Opt == WilsonKernelsStatic::OptGeneric ) { KERNEL_CALLNB(GenericDhopSiteInt); return;} #ifndef GRID_CUDA if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALLNB(HandDhopSiteInt); return;} - if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSiteInt); printf("-"); return;} + if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSiteInt); return;} #endif } else if( exterior ) { if (Opt == WilsonKernelsStatic::OptGeneric ) { KERNEL_CALL(GenericDhopSiteExt); return;} #ifndef GRID_CUDA if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL(HandDhopSiteExt); return;} - if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSiteExt); printf("+"); return;} + if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSiteExt); return;} #endif } assert(0 && " Kernel optimisation case not covered "); diff --git a/Grid/qcd/action/gauge/GaugeImplementations.h b/Grid/qcd/action/gauge/GaugeImplementations.h index a14aec1b..19bc5aa6 100644 --- a/Grid/qcd/action/gauge/GaugeImplementations.h +++ b/Grid/qcd/action/gauge/GaugeImplementations.h @@ -59,7 +59,7 @@ public: } static inline GaugeLinkField CovShiftIdentityBackward(const GaugeLinkField &Link, int mu) { - return Cshift(adj(Link), mu, -1); + return Cshift(closure(adj(Link)), mu, -1); } static inline GaugeLinkField CovShiftIdentityForward(const GaugeLinkField &Link, int mu) { diff --git a/Grid/qcd/hmc/HMC_aggregate.h b/Grid/qcd/hmc/HMC_aggregate.h index e4d2ce83..cb510953 100644 --- a/Grid/qcd/hmc/HMC_aggregate.h +++ b/Grid/qcd/hmc/HMC_aggregate.h @@ -39,6 +39,10 @@ directory #include #include #include +#include +#if !defined(GRID_COMMS_NONE) +#include +#endif NAMESPACE_CHECK(Ildg); #include diff --git a/Grid/qcd/modules/Registration.h b/Grid/qcd/modules/Registration.h index c1149b83..28a9fdae 100644 --- a/Grid/qcd/modules/Registration.h +++ b/Grid/qcd/modules/Registration.h @@ -80,6 +80,7 @@ static Registrar, static Registrar< ConjugateGradientModule, HMC_SolverModuleFactory > __CGWFmodXMLInit("ConjugateGradient"); + static Registrar< BiCGSTABModule, HMC_SolverModuleFactory > __BiCGWFmodXMLInit("BiCGSTAB"); static Registrar< ConjugateResidualModule, diff --git a/Grid/qcd/utils/BaryonUtils.h b/Grid/qcd/utils/BaryonUtils.h index 32beac9c..cfb19d69 100644 --- a/Grid/qcd/utils/BaryonUtils.h +++ b/Grid/qcd/utils/BaryonUtils.h @@ -46,7 +46,7 @@ public: typedef typename SpinMatrixField::vector_object sobj; static const int epsilon[6][3] ; - static const Complex epsilon_sgn[6]; + static const Real epsilon_sgn[6]; private: template @@ -151,14 +151,18 @@ public: template const int BaryonUtils::epsilon[6][3] = {{0,1,2},{1,2,0},{2,0,1},{0,2,1},{2,1,0},{1,0,2}}; -template +/*template const Complex BaryonUtils::epsilon_sgn[6] = {Complex(1), Complex(1), Complex(1), Complex(-1), Complex(-1), Complex(-1)}; +*/ +template +const Real BaryonUtils::epsilon_sgn[6] = {1.,1.,1.,-1.,-1.,-1.}; +//This is the old version template template void BaryonUtils::baryon_site(const mobj &D1, @@ -173,12 +177,14 @@ void BaryonUtils::baryon_site(const mobj &D1, robj &result) { - Gamma g4(Gamma::Algebra::GammaT); //needed for parity P_\pm = 0.5*(1 \pm \gamma_4) - + Gamma g4(Gamma::Algebra::GammaT); //needed for parity P_\pm = 0.5*(1 \pm \gamma_4) auto gD1a = GammaA_left * GammaA_right * D1; auto gD1b = GammaA_left * g4 * GammaA_right * D1; - auto pD1 = 0.5* (gD1a + (double)parity * gD1b); + auto pD1 = 0.5* (gD1a + (Real)parity * gD1b); auto gD3 = GammaB_right * D3; + auto D2g = D2 * GammaB_left; + auto pD1g = pD1 * GammaB_left; + auto gD3g = gD3 * GammaB_left; for (int ie_left=0; ie_left < 6 ; ie_left++){ int a_left = epsilon[ie_left][0]; //a @@ -188,59 +194,78 @@ void BaryonUtils::baryon_site(const mobj &D1, int a_right = epsilon[ie_right][0]; //a' int b_right = epsilon[ie_right][1]; //b' int c_right = epsilon[ie_right][2]; //c' + Real ee = epsilon_sgn[ie_left] * epsilon_sgn[ie_right]; //This is the \delta_{456}^{123} part if (wick_contraction[0]){ - auto D2g = D2 * GammaB_left; - for (int alpha_right=0; alpha_right::ContractBaryons(const PropagatorField &q1_left, const int parity, ComplexField &baryon_corr) { + + assert(Ns==4 && "Baryon code only implemented for N_spin = 4"); + assert(Nc==3 && "Baryon code only implemented for N_colour = 3"); + std::cout << "Contraction <" << quarks_right[0] << quarks_right[1] << quarks_right[2] << "|" << quarks_left[0] << quarks_left[1] << quarks_left[2] << ">" << std::endl; - std::cout << "GammaA (left) " << (GammaA_left.g) << std::endl; - std::cout << "GammaB (left) " << (GammaB_left.g) << std::endl; - std::cout << "GammaA (right) " << (GammaA_right.g) << std::endl; - std::cout << "GammaB (right) " << (GammaB_right.g) << std::endl; + std::cout << "GammaA (left) " << (GammaA_left.g) << std::endl; + std::cout << "GammaB (left) " << (GammaB_left.g) << std::endl; + std::cout << "GammaA (right) " << (GammaA_right.g) << std::endl; + std::cout << "GammaB (right) " << (GammaB_right.g) << std::endl; assert(parity==1 || parity == -1 && "Parity must be +1 or -1"); @@ -278,18 +307,32 @@ void BaryonUtils::ContractBaryons(const PropagatorField &q1_left, autoView( v2 , q2_left, CpuRead); autoView( v3 , q3_left, CpuRead); - // accelerator_for(ss, grid->oSites(), grid->Nsimd(), { - thread_for(ss,grid->oSites(),{ - //for(int ss=0; ss < grid->oSites(); ss++){ + Real bytes =0.; + bytes += grid->oSites() * (432.*sizeof(vComplex) + 126.*sizeof(int) + 36.*sizeof(Real)); + for (int ie=0; ie < 6 ; ie++){ + if(ie==0 or ie==3){ + bytes += grid->oSites() * (4.*sizeof(int) + 4752.*sizeof(vComplex)) * wick_contraction[ie]; + } + else{ + bytes += grid->oSites() * (64.*sizeof(int) + 5184.*sizeof(vComplex)) * wick_contraction[ie]; + } + } + Real t=0.; + t =-usecond(); + accelerator_for(ss, grid->oSites(), grid->Nsimd(), { auto D1 = v1[ss]; auto D2 = v2[ss]; auto D3 = v3[ss]; - vobj result=Zero(); baryon_site(D1,D2,D3,GammaA_left,GammaB_left,GammaA_right,GammaB_right,parity,wick_contraction,result); vbaryon_corr[ss] = result; } );//end loop over lattice sites + + t += usecond(); + + std::cout << std::setw(10) << bytes/t*1.0e6/1024/1024/1024 << " GB/s " << std::endl; + } template template @@ -305,11 +348,15 @@ void BaryonUtils::ContractBaryons_Sliced(const mobj &D1, const int parity, robj &result) { + + assert(Ns==4 && "Baryon code only implemented for N_spin = 4"); + assert(Nc==3 && "Baryon code only implemented for N_colour = 3"); + std::cout << "Contraction <" << quarks_right[0] << quarks_right[1] << quarks_right[2] << "|" << quarks_left[0] << quarks_left[1] << quarks_left[2] << ">" << std::endl; - std::cout << "GammaA (left) " << (GammaA_left.g) << std::endl; - std::cout << "GammaB (left) " << (GammaB_left.g) << std::endl; - std::cout << "GammaA (right) " << (GammaA_right.g) << std::endl; - std::cout << "GammaB (right) " << (GammaB_right.g) << std::endl; + std::cout << "GammaA (left) " << (GammaA_left.g) << std::endl; + std::cout << "GammaB (left) " << (GammaB_left.g) << std::endl; + std::cout << "GammaA (right) " << (GammaA_right.g) << std::endl; + std::cout << "GammaB (right) " << (GammaB_right.g) << std::endl; assert(parity==1 || parity == -1 && "Parity must be +1 or -1"); @@ -317,8 +364,8 @@ void BaryonUtils::ContractBaryons_Sliced(const mobj &D1, for (int ie=0; ie < 6 ; ie++) wick_contraction[ie] = (quarks_left[0] == quarks_right[epsilon[ie][0]] && quarks_left[1] == quarks_right[epsilon[ie][1]] && quarks_left[2] == quarks_right[epsilon[ie][2]]) ? 1 : 0; - result=Zero(); - baryon_site(D1,D2,D3,GammaA_left,GammaB_left,GammaA_right,GammaB_right,parity,wick_contraction,result); + result=Zero(); + baryon_site(D1,D2,D3,GammaA_left,GammaB_left,GammaA_right,GammaB_right,parity,wick_contraction,result); } /*********************************************************************** @@ -558,6 +605,10 @@ void BaryonUtils::Sigma_to_Nucleon_Eye(const PropagatorField &qq_loop, const std::string op, SpinMatrixField &stn_corr) { + + assert(Ns==4 && "Baryon code only implemented for N_spin = 4"); + assert(Nc==3 && "Baryon code only implemented for N_colour = 3"); + GridBase *grid = qs_ti.Grid(); autoView( vcorr, stn_corr, CpuWrite); @@ -565,8 +616,7 @@ void BaryonUtils::Sigma_to_Nucleon_Eye(const PropagatorField &qq_loop, autoView( vd_tf , qd_tf, CpuRead); autoView( vs_ti , qs_ti, CpuRead); - // accelerator_for(ss, grid->oSites(), grid->Nsimd(), { - thread_for(ss,grid->oSites(),{ + accelerator_for(ss, grid->oSites(), grid->Nsimd(), { auto Dq_loop = vq_loop[ss]; auto Dd_tf = vd_tf[ss]; auto Ds_ti = vs_ti[ss]; @@ -595,6 +645,10 @@ void BaryonUtils::Sigma_to_Nucleon_NonEye(const PropagatorField &qq_ti, const std::string op, SpinMatrixField &stn_corr) { + + assert(Ns==4 && "Baryon code only implemented for N_spin = 4"); + assert(Nc==3 && "Baryon code only implemented for N_colour = 3"); + GridBase *grid = qs_ti.Grid(); autoView( vcorr , stn_corr, CpuWrite); diff --git a/Grid/qcd/utils/CovariantCshift.h b/Grid/qcd/utils/CovariantCshift.h index ed96f3bf..6ac69150 100644 --- a/Grid/qcd/utils/CovariantCshift.h +++ b/Grid/qcd/utils/CovariantCshift.h @@ -52,6 +52,26 @@ namespace PeriodicBC { tmp = adj(Link)*field; return Cshift(tmp,mu,-1);// moves towards positive mu } + + template auto + CovShiftForward(const Lattice &Link, + int mu, + const LatticeUnaryExpression &expr) + -> Lattice + { + Lattice arg(expr); + return CovShiftForward(Link,mu,arg); + } + template auto + CovShiftBackward(const Lattice &Link, + int mu, + const LatticeUnaryExpression &expr) + -> Lattice + { + Lattice arg(expr); + return CovShiftForward(Link,mu,arg); + } + } @@ -122,6 +142,26 @@ namespace ConjugateBC { return Cshift(tmp,mu,-1);// moves towards positive mu } + template auto + CovShiftForward(const Lattice &Link, + int mu, + const LatticeUnaryExpression &expr) + -> Lattice + { + Lattice arg(expr); + return CovShiftForward(Link,mu,arg); + } + template auto + CovShiftBackward(const Lattice &Link, + int mu, + const LatticeUnaryExpression &expr) + -> Lattice + { + Lattice arg(expr); + return CovShiftForward(Link,mu,arg); + } + + } diff --git a/Grid/qcd/utils/WilsonLoops.h b/Grid/qcd/utils/WilsonLoops.h index 0367c9fa..fdd53698 100644 --- a/Grid/qcd/utils/WilsonLoops.h +++ b/Grid/qcd/utils/WilsonLoops.h @@ -485,7 +485,7 @@ public: // Up staple ___ ___ // | | - tmp = Cshift(adj(U[nu]), nu, -1); + tmp = Cshift(closure(adj(U[nu])), nu, -1); tmp = adj(U2[mu]) * tmp; tmp = Cshift(tmp, mu, -2); @@ -519,7 +519,7 @@ public: // // | | - tmp = Cshift(adj(U2[nu]), nu, -2); + tmp = Cshift(closure(adj(U2[nu])), nu, -2); tmp = Gimpl::CovShiftBackward(U[mu], mu, tmp); tmp = U2[nu] * Cshift(tmp, nu, 2); Stap += Cshift(tmp, mu, 1); diff --git a/Grid/serialisation/BaseIO.h b/Grid/serialisation/BaseIO.h index bf424fc7..49406201 100644 --- a/Grid/serialisation/BaseIO.h +++ b/Grid/serialisation/BaseIO.h @@ -87,11 +87,7 @@ namespace Grid { template struct is_tensor_fixed> : public std::true_type {}; - template class MapPointer_> - struct is_tensor_fixed, MapOptions_, MapPointer_>> - : public std::true_type {}; + template struct is_tensor_fixed> : public std::true_type {}; // Is this a variable-size Eigen tensor template struct is_tensor_variable : public std::false_type {}; diff --git a/Grid/serialisation/MacroMagic.h b/Grid/serialisation/MacroMagic.h index 7866327e..0495b91e 100644 --- a/Grid/serialisation/MacroMagic.h +++ b/Grid/serialisation/MacroMagic.h @@ -114,7 +114,8 @@ THE SOFTWARE. #define GRID_MACRO_WRITE_MEMBER(A,B) ::Grid::write(WR,#B,obj. B); #define GRID_SERIALIZABLE_CLASS_MEMBERS(cname,...)\ - std::string SerialisableClassName(void) const {return std::string(#cname);} \ +static inline std::string SerialisableClassName(void) {return std::string(#cname);} \ +static constexpr bool isEnum = false; \ GRID_MACRO_EVAL(GRID_MACRO_MAP(GRID_MACRO_MEMBER,__VA_ARGS__))\ template \ static inline void write(Writer &WR,const std::string &s, const cname &obj){ \ @@ -162,6 +163,8 @@ public:\ public:\ accelerator name(void) : value_(undefname) {}; \ accelerator name(int value): value_(value) {}; \ + static inline std::string SerialisableClassName(void) {return std::string(#name);}\ + static constexpr bool isEnum = true; \ template \ static inline void write(::Grid::Writer &WR,const std::string &s, const name &obj) \ {\ diff --git a/Grid/serialisation/VectorUtils.h b/Grid/serialisation/VectorUtils.h index a5a73992..dd5ff0b8 100644 --- a/Grid/serialisation/VectorUtils.h +++ b/Grid/serialisation/VectorUtils.h @@ -432,12 +432,10 @@ namespace Grid { std::vector strToVec(const std::string s) { std::istringstream sstr(s); - T buf; std::vector v; - while(!sstr.eof()) + for(T buf; sstr >> buf;) { - sstr >> buf; v.push_back(buf); } diff --git a/Grid/tensors/Tensor_class.h b/Grid/tensors/Tensor_class.h index 75e42721..dbcbae8d 100644 --- a/Grid/tensors/Tensor_class.h +++ b/Grid/tensors/Tensor_class.h @@ -6,6 +6,7 @@ Copyright (C) 2015 Author: Azusa Yamaguchi Author: Peter Boyle Author: Michael Marshall +Author: Christoph Lehner This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -55,6 +56,7 @@ class GridTensorBase {}; using Complexified = typename Traits::Complexified; \ using Realified = typename Traits::Realified; \ using DoublePrecision = typename Traits::DoublePrecision; \ + using DoublePrecision2= typename Traits::DoublePrecision2; \ static constexpr int TensorLevel = Traits::TensorLevel template diff --git a/Grid/tensors/Tensor_inner.h b/Grid/tensors/Tensor_inner.h index 03f72966..fd651cae 100644 --- a/Grid/tensors/Tensor_inner.h +++ b/Grid/tensors/Tensor_inner.h @@ -8,6 +8,7 @@ Author: Azusa Yamaguchi Author: Peter Boyle +Author: Christoph Lehner This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -194,6 +195,79 @@ auto innerProductD (const iScalar& lhs,const iScalar& rhs) -> iScalar accelerator_inline + auto innerProductD2 (const iVector& lhs,const iVector& rhs) -> iScalar +{ + typedef decltype(innerProductD2(lhs._internal[0],rhs._internal[0])) ret_t; + iScalar ret; + zeroit(ret); + for(int c1=0;c1 accelerator_inline + auto innerProductD2 (const iMatrix& lhs,const iMatrix& rhs) -> iScalar +{ + typedef decltype(innerProductD2(lhs._internal[0][0],rhs._internal[0][0])) ret_t; + iScalar ret; + ret=Zero(); + for(int c1=0;c1 accelerator_inline + auto innerProductD2 (const iScalar& lhs,const iScalar& rhs) -> iScalar +{ + typedef decltype(innerProductD2(lhs._internal,rhs._internal)) ret_t; + iScalar ret; + ret._internal = innerProductD2(lhs._internal,rhs._internal); + return ret; +} + ////////////////////// // Keep same precison ////////////////////// diff --git a/Grid/tensors/Tensor_traits.h b/Grid/tensors/Tensor_traits.h index 9067d43d..04d7343e 100644 --- a/Grid/tensors/Tensor_traits.h +++ b/Grid/tensors/Tensor_traits.h @@ -6,6 +6,7 @@ Author: Azusa Yamaguchi Author: Peter Boyle Author: Christopher Kelly Author: Michael Marshall +Author: Christoph Lehner This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or @@ -37,6 +38,60 @@ NAMESPACE_BEGIN(Grid); template struct isGridTensor> : public std::true_type { static constexpr bool notvalue = false; }; template struct isGridTensor> : public std::true_type { static constexpr bool notvalue = false; }; + // Traits to identify scalars + template struct isGridScalar : public std::false_type { static constexpr bool notvalue = true; }; + template struct isGridScalar> : public std::true_type { static constexpr bool notvalue = false; }; + + // Store double-precision data in single-precision grids for precision promoted localInnerProductD + template + class TypePair { + public: + T _internal[2]; + TypePair& operator=(const Grid::Zero& o) { + _internal[0] = Zero(); + _internal[1] = Zero(); + return *this; + } + + TypePair operator+(const TypePair& o) const { + TypePair r; + r._internal[0] = _internal[0] + o._internal[0]; + r._internal[1] = _internal[1] + o._internal[1]; + return r; + } + + TypePair& operator+=(const TypePair& o) { + _internal[0] += o._internal[0]; + _internal[1] += o._internal[1]; + return *this; + } + + friend accelerator_inline void add(TypePair* ret, const TypePair* a, const TypePair* b) { + add(&ret->_internal[0],&a->_internal[0],&b->_internal[0]); + add(&ret->_internal[1],&a->_internal[1],&b->_internal[1]); + } + }; + typedef TypePair ComplexD2; + typedef TypePair RealD2; + typedef TypePair vComplexD2; + typedef TypePair vRealD2; + + // Traits to identify fundamental data types + template struct isGridFundamental : public std::false_type { static constexpr bool notvalue = true; }; + template<> struct isGridFundamental : public std::true_type { static constexpr bool notvalue = false; }; + template<> struct isGridFundamental : public std::true_type { static constexpr bool notvalue = false; }; + template<> struct isGridFundamental : public std::true_type { static constexpr bool notvalue = false; }; + template<> struct isGridFundamental : public std::true_type { static constexpr bool notvalue = false; }; + template<> struct isGridFundamental : public std::true_type { static constexpr bool notvalue = false; }; + template<> struct isGridFundamental : public std::true_type { static constexpr bool notvalue = false; }; + template<> struct isGridFundamental : public std::true_type { static constexpr bool notvalue = false; }; + template<> struct isGridFundamental : public std::true_type { static constexpr bool notvalue = false; }; + template<> struct isGridFundamental : public std::true_type { static constexpr bool notvalue = false; }; + template<> struct isGridFundamental : public std::true_type { static constexpr bool notvalue = false; }; + template<> struct isGridFundamental : public std::true_type { static constexpr bool notvalue = false; }; + template<> struct isGridFundamental : public std::true_type { static constexpr bool notvalue = false; }; + + ////////////////////////////////////////////////////////////////////////////////// // Want to recurse: GridTypeMapper >::scalar_type == ComplexD. // Use of a helper class like this allows us to template specialise and "dress" @@ -81,6 +136,7 @@ NAMESPACE_BEGIN(Grid); typedef ComplexF Complexified; typedef RealF Realified; typedef RealD DoublePrecision; + typedef RealD2 DoublePrecision2; }; template<> struct GridTypeMapper : public GridTypeMapper_Base { typedef RealD scalar_type; @@ -93,6 +149,20 @@ NAMESPACE_BEGIN(Grid); typedef ComplexD Complexified; typedef RealD Realified; typedef RealD DoublePrecision; + typedef RealD DoublePrecision2; + }; + template<> struct GridTypeMapper : public GridTypeMapper_Base { + typedef RealD2 scalar_type; + typedef RealD2 scalar_typeD; + typedef RealD2 vector_type; + typedef RealD2 vector_typeD; + typedef RealD2 tensor_reduced; + typedef RealD2 scalar_object; + typedef RealD2 scalar_objectD; + typedef ComplexD2 Complexified; + typedef RealD2 Realified; + typedef RealD2 DoublePrecision; + typedef RealD2 DoublePrecision2; }; template<> struct GridTypeMapper : public GridTypeMapper_Base { typedef ComplexF scalar_type; @@ -105,6 +175,7 @@ NAMESPACE_BEGIN(Grid); typedef ComplexF Complexified; typedef RealF Realified; typedef ComplexD DoublePrecision; + typedef ComplexD2 DoublePrecision2; }; template<> struct GridTypeMapper : public GridTypeMapper_Base { typedef ComplexD scalar_type; @@ -117,6 +188,20 @@ NAMESPACE_BEGIN(Grid); typedef ComplexD Complexified; typedef RealD Realified; typedef ComplexD DoublePrecision; + typedef ComplexD DoublePrecision2; + }; + template<> struct GridTypeMapper : public GridTypeMapper_Base { + typedef ComplexD2 scalar_type; + typedef ComplexD2 scalar_typeD; + typedef ComplexD2 vector_type; + typedef ComplexD2 vector_typeD; + typedef ComplexD2 tensor_reduced; + typedef ComplexD2 scalar_object; + typedef ComplexD2 scalar_objectD; + typedef ComplexD2 Complexified; + typedef RealD2 Realified; + typedef ComplexD2 DoublePrecision; + typedef ComplexD2 DoublePrecision2; }; template<> struct GridTypeMapper : public GridTypeMapper_Base { typedef Integer scalar_type; @@ -129,6 +214,7 @@ NAMESPACE_BEGIN(Grid); typedef void Complexified; typedef void Realified; typedef void DoublePrecision; + typedef void DoublePrecision2; }; template<> struct GridTypeMapper : public GridTypeMapper_Base { @@ -142,6 +228,7 @@ NAMESPACE_BEGIN(Grid); typedef vComplexF Complexified; typedef vRealF Realified; typedef vRealD DoublePrecision; + typedef vRealD2 DoublePrecision2; }; template<> struct GridTypeMapper : public GridTypeMapper_Base { typedef RealD scalar_type; @@ -154,6 +241,20 @@ NAMESPACE_BEGIN(Grid); typedef vComplexD Complexified; typedef vRealD Realified; typedef vRealD DoublePrecision; + typedef vRealD DoublePrecision2; + }; + template<> struct GridTypeMapper : public GridTypeMapper_Base { + typedef RealD2 scalar_type; + typedef RealD2 scalar_typeD; + typedef vRealD2 vector_type; + typedef vRealD2 vector_typeD; + typedef vRealD2 tensor_reduced; + typedef RealD2 scalar_object; + typedef RealD2 scalar_objectD; + typedef vComplexD2 Complexified; + typedef vRealD2 Realified; + typedef vRealD2 DoublePrecision; + typedef vRealD2 DoublePrecision2; }; template<> struct GridTypeMapper : public GridTypeMapper_Base { // Fixme this is incomplete until Grid supports fp16 or bfp16 arithmetic types @@ -167,6 +268,7 @@ NAMESPACE_BEGIN(Grid); typedef vComplexH Complexified; typedef vRealH Realified; typedef vRealD DoublePrecision; + typedef vRealD DoublePrecision2; }; template<> struct GridTypeMapper : public GridTypeMapper_Base { // Fixme this is incomplete until Grid supports fp16 or bfp16 arithmetic types @@ -180,6 +282,7 @@ NAMESPACE_BEGIN(Grid); typedef vComplexH Complexified; typedef vRealH Realified; typedef vComplexD DoublePrecision; + typedef vComplexD DoublePrecision2; }; template<> struct GridTypeMapper : public GridTypeMapper_Base { typedef ComplexF scalar_type; @@ -192,6 +295,7 @@ NAMESPACE_BEGIN(Grid); typedef vComplexF Complexified; typedef vRealF Realified; typedef vComplexD DoublePrecision; + typedef vComplexD2 DoublePrecision2; }; template<> struct GridTypeMapper : public GridTypeMapper_Base { typedef ComplexD scalar_type; @@ -204,6 +308,20 @@ NAMESPACE_BEGIN(Grid); typedef vComplexD Complexified; typedef vRealD Realified; typedef vComplexD DoublePrecision; + typedef vComplexD DoublePrecision2; + }; + template<> struct GridTypeMapper : public GridTypeMapper_Base { + typedef ComplexD2 scalar_type; + typedef ComplexD2 scalar_typeD; + typedef vComplexD2 vector_type; + typedef vComplexD2 vector_typeD; + typedef vComplexD2 tensor_reduced; + typedef ComplexD2 scalar_object; + typedef ComplexD2 scalar_objectD; + typedef vComplexD2 Complexified; + typedef vRealD2 Realified; + typedef vComplexD2 DoublePrecision; + typedef vComplexD2 DoublePrecision2; }; template<> struct GridTypeMapper : public GridTypeMapper_Base { typedef Integer scalar_type; @@ -216,6 +334,7 @@ NAMESPACE_BEGIN(Grid); typedef void Complexified; typedef void Realified; typedef void DoublePrecision; + typedef void DoublePrecision2; }; #define GridTypeMapper_RepeatedTypes \ @@ -234,6 +353,7 @@ NAMESPACE_BEGIN(Grid); using Complexified = iScalar; using Realified = iScalar; using DoublePrecision = iScalar; + using DoublePrecision2= iScalar; static constexpr int Rank = BaseTraits::Rank + 1; static constexpr std::size_t count = BaseTraits::count; static constexpr int Dimension(int dim) { @@ -248,6 +368,7 @@ NAMESPACE_BEGIN(Grid); using Complexified = iVector; using Realified = iVector; using DoublePrecision = iVector; + using DoublePrecision2= iVector; static constexpr int Rank = BaseTraits::Rank + 1; static constexpr std::size_t count = BaseTraits::count * N; static constexpr int Dimension(int dim) { @@ -262,6 +383,7 @@ NAMESPACE_BEGIN(Grid); using Complexified = iMatrix; using Realified = iMatrix; using DoublePrecision = iMatrix; + using DoublePrecision2= iMatrix; static constexpr int Rank = BaseTraits::Rank + 2; static constexpr std::size_t count = BaseTraits::count * N * N; static constexpr int Dimension(int dim) { diff --git a/Grid/util/Init.h b/Grid/util/Init.h index f7f032ba..dad963a0 100644 --- a/Grid/util/Init.h +++ b/Grid/util/Init.h @@ -56,6 +56,7 @@ std::string GridCmdVectorIntToString(const VectorInt & vec); void GridCmdOptionCSL(std::string str,std::vector & vec); template void GridCmdOptionIntVector(std::string &str,VectorInt & vec); +void GridCmdOptionInt(std::string &str,int & val); void GridParseLayout(char **argv,int argc, diff --git a/benchmarks/Benchmark_ITT.cc b/benchmarks/Benchmark_ITT.cc index 0d904225..dc09549c 100644 --- a/benchmarks/Benchmark_ITT.cc +++ b/benchmarks/Benchmark_ITT.cc @@ -30,7 +30,6 @@ Author: paboyle using namespace Grid; - std::vector L_list; std::vector Ls_list; std::vector mflop_list; @@ -76,7 +75,6 @@ struct controls { int Opt; int CommsOverlap; Grid::CartesianCommunicator::CommunicatorPolicy_t CommsAsynch; - // int HugePages; }; class Benchmark { @@ -119,14 +117,15 @@ public: std::cout<({45,12,81,9})); - for(int lat=8;lat<=lmax;lat+=4){ + for(int lat=8;lat<=lmax;lat+=8){ Coordinate latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); int64_t vol= latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; + GridCartesian Grid(latt_size,simd_layout,mpi_layout); // NP= Grid.RankCount(); @@ -270,191 +265,8 @@ public: } }; -#if 0 - static double DWF5(int Ls,int L) - { - // RealD mass=0.1; - RealD M5 =1.8; - double mflops; - double mflops_best = 0; - double mflops_worst= 0; - std::vector mflops_all; - - /////////////////////////////////////////////////////// - // Set/Get the layout & grid size - /////////////////////////////////////////////////////// - int threads = GridThread::GetThreads(); - Coordinate mpi = GridDefaultMpi(); assert(mpi.size()==4); - Coordinate local({L,L,L,L}); - - GridCartesian * TmpGrid = SpaceTimeGrid::makeFourDimGrid(Coordinate({64,64,64,64}), - GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi()); - uint64_t NP = TmpGrid->RankCount(); - uint64_t NN = TmpGrid->NodeCount(); - NN_global=NN; - uint64_t SHM=NP/NN; - - Coordinate internal; - if ( SHM == 1 ) internal = Coordinate({1,1,1,1}); - else if ( SHM == 2 ) internal = Coordinate({2,1,1,1}); - else if ( SHM == 4 ) internal = Coordinate({2,2,1,1}); - else if ( SHM == 8 ) internal = Coordinate({2,2,2,1}); - else assert(0); - - Coordinate nodes({mpi[0]/internal[0],mpi[1]/internal[1],mpi[2]/internal[2],mpi[3]/internal[3]}); - Coordinate latt4({local[0]*nodes[0],local[1]*nodes[1],local[2]*nodes[2],local[3]*nodes[3]}); - - ///////// Welcome message //////////// - std::cout< seeds4({1,2,3,4}); - std::vector seeds5({5,6,7,8}); - GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4); - GridParallelRNG RNG5(sFGrid); RNG5.SeedFixedIntegers(seeds5); - std::cout << GridLogMessage << "Initialised RNGs" << std::endl; - - ///////// Source preparation //////////// - LatticeFermion src (sFGrid); - LatticeFermion tmp (sFGrid); - std::cout << GridLogMessage << "allocated src and tmp" << std::endl; - random(RNG5,src); - std::cout << GridLogMessage << "intialised random source" << std::endl; - - RealD N2 = 1.0/::sqrt(norm2(src)); - src = src*N2; - - LatticeGaugeField Umu(UGrid); SU3::HotConfiguration(RNG4,Umu); - - WilsonFermion5DR sDw(Umu,*sFGrid,*sFrbGrid,*sUGrid,*sUrbGrid,M5); - LatticeFermion src_e (sFrbGrid); - LatticeFermion src_o (sFrbGrid); - LatticeFermion r_e (sFrbGrid); - LatticeFermion r_o (sFrbGrid); - LatticeFermion r_eo (sFGrid); - LatticeFermion err (sFGrid); - { - - pickCheckerboard(Even,src_e,src); - pickCheckerboard(Odd,src_o,src); - -#if defined(AVX512) - const int num_cases = 6; - std::string fmt("A/S ; A/O ; U/S ; U/O ; G/S ; G/O "); -#else - const int num_cases = 4; - std::string fmt("U/S ; U/O ; G/S ; G/O "); -#endif - controls Cases [] = { -#ifdef AVX512 - { WilsonKernelsStatic::OptInlineAsm , WilsonKernelsStatic::CommsThenCompute ,CartesianCommunicator::CommunicatorPolicySequential }, - { WilsonKernelsStatic::OptInlineAsm , WilsonKernelsStatic::CommsAndCompute ,CartesianCommunicator::CommunicatorPolicySequential }, -#endif - { WilsonKernelsStatic::OptHandUnroll, WilsonKernelsStatic::CommsThenCompute ,CartesianCommunicator::CommunicatorPolicySequential }, - { WilsonKernelsStatic::OptHandUnroll, WilsonKernelsStatic::CommsAndCompute ,CartesianCommunicator::CommunicatorPolicySequential }, - { WilsonKernelsStatic::OptGeneric , WilsonKernelsStatic::CommsThenCompute ,CartesianCommunicator::CommunicatorPolicySequential }, - { WilsonKernelsStatic::OptGeneric , WilsonKernelsStatic::CommsAndCompute ,CartesianCommunicator::CommunicatorPolicySequential } - }; - - for(int c=0;cBarrier(); - for(int i=0;iBarrier(); - double t1=usecond(); - - sDw.ZeroCounters(); - time_statistics timestat; - std::vector t_time(ncall); - for(uint64_t i=0;iBarrier(); - - double volume=Ls; for(int mu=0;mumflops_best ) mflops_best = mflops; - if ( mflopsRankCount(); uint64_t NN = TmpGrid->NodeCount(); NN_global=NN; uint64_t SHM=NP/NN; - Coordinate internal; - if ( SHM == 1 ) internal = Coordinate({1,1,1,1}); - else if ( SHM == 2 ) internal = Coordinate({2,1,1,1}); - else if ( SHM == 4 ) internal = Coordinate({2,2,1,1}); - else if ( SHM == 8 ) internal = Coordinate({2,2,2,1}); - else assert(0); - - Coordinate nodes({mpi[0]/internal[0],mpi[1]/internal[1],mpi[2]/internal[2],mpi[3]/internal[3]}); - Coordinate latt4({local[0]*nodes[0],local[1]*nodes[1],local[2]*nodes[2],local[3]*nodes[3]}); + Coordinate latt4({local[0]*mpi[0],local[1]*mpi[1],local[2]*mpi[2],local[3]*mpi[3]}); ///////// Welcome message //////////// std::cout< U(4,FGrid); - { - autoView( Umu_v , Umu , CpuRead); - autoView( Umu5d_v, Umu5d, CpuWrite); - for(int ss=0;ssoSites();ss++){ - for(int s=0;s(Umu5d,mu); - } - for(int mu=0;muBarrier(); for(int i=0;iBarrier(); double t1=usecond(); - // uint64_t ncall = (uint64_t) 2.5*1000.0*1000.0*nwarm/(t1-t0); - // if (ncall < 500) ncall = 500; - uint64_t ncall = 1000; + uint64_t ncall = 50; FGrid->Broadcast(0,&ncall,sizeof(ncall)); @@ -651,24 +406,11 @@ public: std::cout< seeds4({1,2,3,4}); + GridParallelRNG RNG4(FGrid); RNG4.SeedFixedIntegers(seeds4); + std::cout << GridLogMessage << "Initialised RNGs" << std::endl; + + RealD mass=0.1; + RealD c1=9.0/8.0; + RealD c2=-1.0/24.0; + RealD u0=1.0; + + typedef ImprovedStaggeredFermionF Action; + typedef typename Action::FermionField Fermion; + typedef LatticeGaugeFieldF Gauge; + + Gauge Umu(FGrid); SU3::HotConfiguration(RNG4,Umu); + + typename Action::ImplParams params; + Action Ds(Umu,Umu,*FGrid,*FrbGrid,mass,c1,c2,u0,params); + + ///////// Source preparation //////////// + Fermion src (FGrid); random(RNG4,src); + Fermion src_e (FrbGrid); + Fermion src_o (FrbGrid); + Fermion r_e (FrbGrid); + Fermion r_o (FrbGrid); + Fermion r_eo (FGrid); + + { + + pickCheckerboard(Even,src_e,src); + pickCheckerboard(Odd,src_o,src); + + const int num_cases = 4; + std::string fmt("G/S/C ; G/O/C ; G/S/S ; G/O/S "); + + controls Cases [] = { + { StaggeredKernelsStatic::OptGeneric , StaggeredKernelsStatic::CommsThenCompute ,CartesianCommunicator::CommunicatorPolicyConcurrent }, + { StaggeredKernelsStatic::OptGeneric , StaggeredKernelsStatic::CommsAndCompute ,CartesianCommunicator::CommunicatorPolicyConcurrent }, + { StaggeredKernelsStatic::OptGeneric , StaggeredKernelsStatic::CommsThenCompute ,CartesianCommunicator::CommunicatorPolicySequential }, + { StaggeredKernelsStatic::OptGeneric , StaggeredKernelsStatic::CommsAndCompute ,CartesianCommunicator::CommunicatorPolicySequential } + }; + + for(int c=0;cBarrier(); + for(int i=0;iBarrier(); + double t1=usecond(); + uint64_t ncall = 500; + + FGrid->Broadcast(0,&ncall,sizeof(ncall)); + + // std::cout << GridLogMessage << " Estimate " << ncall << " calls per second"< t_time(ncall); + for(uint64_t i=0;iBarrier(); + + double volume=1; for(int mu=0;mumflops_best ) mflops_best = mflops; + if ( mflops L_list({16,24,32}); int selm1=sel-1; - std::vector robust_list; std::vector wilson; std::vector dwf4; - std::vector dwf5; + std::vector staggered; - if ( do_wilson ) { - int Ls=1; - std::cout< +Author: paboyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +#include + +using namespace std; +using namespace Grid; + + Gamma::Algebra Gmu [] = { + Gamma::Algebra::GammaX, + Gamma::Algebra::GammaY, + Gamma::Algebra::GammaZ, + Gamma::Algebra::GammaT + }; + +void benchDw(std::vector & L, int Ls); + +int main (int argc, char ** argv) +{ + Grid_init(&argc,&argv); + + + const int Ls=12; + std::vector< std::vector > latts; +#if 1 + latts.push_back(std::vector ({24,24,24,24}) ); + latts.push_back(std::vector ({48,24,24,24}) ); + latts.push_back(std::vector ({96,24,24,24}) ); + latts.push_back(std::vector ({96,48,24,24}) ); + // latts.push_back(std::vector ({96,48,48,24}) ); + // latts.push_back(std::vector ({96,48,48,48}) ); +#else + // latts.push_back(std::vector ({96,48,48,48}) ); + latts.push_back(std::vector ({96,96,96,192}) ); +#endif + + std::cout << GridLogMessage<< "*****************************************************************" < latt4 = latts[l]; + std::cout << GridLogMessage <<"\t"; + for(int d=0;d & latt4, int Ls) +{ + ///////////////////////////////////////////////////////////////////////////////////// + // for Nc=3 + ///////////////////////////////////////////////////////////////////////////////////// + // Dw : Ls*24*(7+48)= Ls*1320 + // + // M5D: Ls*(4*2*Nc mul + 4*2*Nc madd ) = 3*4*2*Nc*Ls = Ls*72 + // Meo: Ls*24*(7+48) + Ls*72 = Ls*1392 + // + // Mee: 3*Ns*2*Nc*Ls // Chroma 6*N5*Nc*Ns + // + // LeemInv : 2*2*Nc*madd*Ls + // LeeInv : 2*2*Nc*madd*Ls + // DeeInv : 4*2*Nc*mul *Ls + // UeeInv : 2*2*Nc*madd*Ls + // UeemInv : 2*2*Nc*madd*Ls = Nc*Ls*(8+8+8+8+8) = 40*Nc*Ls// Chroma (10*N5 - 8)*Nc*Ns ~ (40 N5 - 32)Nc flops + // QUDA counts as dense LsxLs real matrix x Ls x NcNsNreim => Nc*4*2 x Ls^2 FMA = 16Nc Ls^2 flops + // Mpc => 1452*cbvol*2*Ls flops // + // => (1344+Ls*48)*Ls*cbvol*2 flops QUDA = 1920 @Ls=12 and 2112 @Ls=16 + ///////////////////////////////////////////////////////////////////////////////////// + GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(latt4, GridDefaultSimd(Nd,vComplexF::Nsimd()),GridDefaultMpi()); + GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); + GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid); + GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid); + // long unsigned int single_site_flops = 8*Nc*(7+16*Nc)*Ls; + long unsigned int single_site_mpc_flops = 8*Nc*(7+16*Nc)*2*Ls + 40*Nc*2*Ls + 4*Nc*2*Ls; + long unsigned int single_site_quda_flops = 8*Nc*(7+16*Nc)*2*Ls + 16*Nc*Ls*Ls + 4*Nc*2*Ls; + std::vector seeds4({1,2,3,4}); + std::vector seeds5({5,6,7,8}); + + + ColourMatrixF cm = ComplexF(1.0,0.0); + + int ncall=300; + RealD mass=0.1; + RealD M5 =1.8; + RealD NP = UGrid->_Nprocessors; + double volume=1; for(int mu=0;mu Mpc(Dw); + Chebyshev Cheby(0.0,60.0,order); + + { + Mpc.Mpc(src_o,r_o); + Mpc.Mpc(src_o,r_o); + Mpc.Mpc(src_o,r_o); + + double t0=usecond(); + for(int i=0;i(Umu,mu); } + ref = Zero(); RealD mass=0.1; RealD c1=9.0/8.0; diff --git a/configure.ac b/configure.ac index 74d37605..c0c935c6 100644 --- a/configure.ac +++ b/configure.ac @@ -309,10 +309,21 @@ case ${ac_gen_scalar} in esac ##################### Compiler dependent choices -case ${CXX} in + +#Strip any optional compiler arguments from nvcc call (eg -ccbin) for compiler comparison +CXXBASE=${CXX} +CXXTEST=${CXX} +if echo "${CXX}" | grep -q "nvcc"; then + CXXTEST="nvcc" +fi + +case ${CXXTEST} in nvcc) - CXX="nvcc -x cu " - CXXLD="nvcc -link" +# CXX="nvcc -keep -v -x cu " +# CXXLD="nvcc -v -link" + CXX="${CXXBASE} -x cu " + CXXLD="${CXXBASE} -link" +# CXXFLAGS="$CXXFLAGS -Xcompiler -fno-strict-aliasing -Xcompiler -Wno-unusable-partial-specialization --expt-extended-lambda --expt-relaxed-constexpr" CXXFLAGS="$CXXFLAGS -Xcompiler -fno-strict-aliasing --expt-extended-lambda --expt-relaxed-constexpr" if test $ac_openmp = yes; then CXXFLAGS="$CXXFLAGS -Xcompiler -fopenmp" diff --git a/tests/IO/Test_openqcd_io.cc b/tests/IO/Test_openqcd_io.cc new file mode 100644 index 00000000..765509a9 --- /dev/null +++ b/tests/IO/Test_openqcd_io.cc @@ -0,0 +1,84 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./tests/io/Test_openqcd_io.cc + +Copyright (C) 2015 - 2020 + +Author: Daniel Richtmann + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ + +#include + + + +using namespace Grid; + +int main(int argc, char** argv) { +#if !defined(GRID_COMMS_NONE) + Grid_init(&argc, &argv); + + auto simd_layout = GridDefaultSimd(Nd, vComplex::Nsimd()); + auto mpi_layout = GridDefaultMpi(); + auto latt_size = GridDefaultLatt(); + + GridCartesian grid(latt_size, simd_layout, mpi_layout); + + GridParallelRNG pRNG(&grid); + + pRNG.SeedFixedIntegers(std::vector({45, 12, 81, 9})); + + LatticeGaugeField Umu_ref(&grid); + LatticeGaugeField Umu_me(&grid); + LatticeGaugeField Umu_diff(&grid); + + FieldMetaData header_ref; + FieldMetaData header_me; + + Umu_ref = Zero(); + Umu_me = Zero(); + + std::string file("/home/daniel/configs/openqcd/test_16x8_pbcn6"); + + if(GridCmdOptionExists(argv, argv + argc, "--config")) { + file = GridCmdOptionPayload(argv, argv + argc, "--config"); + std::cout << "file: " << file << std::endl; + assert(!file.empty()); + } + + OpenQcdIOChromaReference::readConfiguration(Umu_ref, header_ref, file); + OpenQcdIO::readConfiguration(Umu_me, header_me, file); + + std::cout << GridLogMessage << header_ref << std::endl; + std::cout << GridLogMessage << header_me << std::endl; + + Umu_diff = Umu_ref - Umu_me; + + // clang-format off + std::cout << GridLogMessage + << "norm2(Umu_ref) = " << norm2(Umu_ref) + << " norm2(Umu_me) = " << norm2(Umu_me) + << " norm2(Umu_diff) = " << norm2(Umu_diff) << std::endl; + // clang-format on + + Grid_finalize(); +#endif +} diff --git a/tests/Test_innerproduct_norm.cc b/tests/Test_innerproduct_norm.cc new file mode 100644 index 00000000..a8718c6b --- /dev/null +++ b/tests/Test_innerproduct_norm.cc @@ -0,0 +1,126 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./tests/Test_innerproduct_norm.cc + +Copyright (C) 2015 + +Author: Daniel Richtmann + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ +#include + +using namespace Grid; + +int main(int argc, char** argv) { + Grid_init(&argc, &argv); + + const int nIter = 100; + + // clang-format off + GridCartesian *Grid_d = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd, vComplexD::Nsimd()), GridDefaultMpi()); + GridCartesian *Grid_f = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd, vComplexF::Nsimd()), GridDefaultMpi()); + // clang-format on + + GridParallelRNG pRNG_d(Grid_d); + GridParallelRNG pRNG_f(Grid_f); + + std::vector seeds_d({1, 2, 3, 4}); + std::vector seeds_f({5, 6, 7, 8}); + + pRNG_d.SeedFixedIntegers(seeds_d); + pRNG_f.SeedFixedIntegers(seeds_f); + + // clang-format off + LatticeFermionD x_d(Grid_d); random(pRNG_d, x_d); + LatticeFermionD y_d(Grid_d); random(pRNG_d, y_d); + LatticeFermionF x_f(Grid_f); random(pRNG_f, x_f); + LatticeFermionF y_f(Grid_f); random(pRNG_f, y_f); + // clang-format on + + GridStopWatch sw_ref; + GridStopWatch sw_res; + + { // double precision + ComplexD ip_d_ref, ip_d_res, diff_ip_d; + RealD norm2_d_ref, norm2_d_res, diff_norm2_d; + + sw_ref.Reset(); + sw_ref.Start(); + for(int i = 0; i < nIter; ++i) { + ip_d_ref = innerProduct(x_d, y_d); + norm2_d_ref = norm2(x_d); + } + sw_ref.Stop(); + + sw_res.Reset(); + sw_res.Start(); + for(int i = 0; i < nIter; ++i) { innerProductNorm(ip_d_res, norm2_d_res, x_d, y_d); } + sw_res.Stop(); + + diff_ip_d = ip_d_ref - ip_d_res; + diff_norm2_d = norm2_d_ref - norm2_d_res; + + // clang-format off + std::cout << GridLogMessage << "Double: ip_ref = " << ip_d_ref << " ip_res = " << ip_d_res << " diff = " << diff_ip_d << std::endl; + std::cout << GridLogMessage << "Double: norm2_ref = " << norm2_d_ref << " norm2_res = " << norm2_d_res << " diff = " << diff_norm2_d << std::endl; + std::cout << GridLogMessage << "Double: time_ref = " << sw_ref.Elapsed() << " time_res = " << sw_res.Elapsed() << std::endl; + // clang-format on + + assert(diff_ip_d == 0.); + assert(diff_norm2_d == 0.); + + std::cout << GridLogMessage << "Double: all checks passed" << std::endl; + } + + { // single precision + ComplexD ip_f_ref, ip_f_res, diff_ip_f; + RealD norm2_f_ref, norm2_f_res, diff_norm2_f; + + sw_ref.Reset(); + sw_ref.Start(); + for(int i = 0; i < nIter; ++i) { + ip_f_ref = innerProduct(x_f, y_f); + norm2_f_ref = norm2(x_f); + } + sw_ref.Stop(); + + sw_res.Reset(); + sw_res.Start(); + for(int i = 0; i < nIter; ++i) { innerProductNorm(ip_f_res, norm2_f_res, x_f, y_f); } + sw_res.Stop(); + + diff_ip_f = ip_f_ref - ip_f_res; + diff_norm2_f = norm2_f_ref - norm2_f_res; + + // clang-format off + std::cout << GridLogMessage << "Single: ip_ref = " << ip_f_ref << " ip_res = " << ip_f_res << " diff = " << diff_ip_f << std::endl; + std::cout << GridLogMessage << "Single: norm2_ref = " << norm2_f_ref << " norm2_res = " << norm2_f_res << " diff = " << diff_norm2_f << std::endl; + std::cout << GridLogMessage << "Single: time_ref = " << sw_ref.Elapsed() << " time_res = " << sw_res.Elapsed() << std::endl; + // clang-format on + + assert(diff_ip_f == 0.); + assert(diff_norm2_f == 0.); + + std::cout << GridLogMessage << "Single: all checks passed" << std::endl; + } + + Grid_finalize(); +} diff --git a/tests/core/Test_contfrac_even_odd.cc b/tests/core/Test_contfrac_even_odd.cc index 25affd00..5311f869 100644 --- a/tests/core/Test_contfrac_even_odd.cc +++ b/tests/core/Test_contfrac_even_odd.cc @@ -238,11 +238,11 @@ void TestWhat(What & Ddwf, RealD t1,t2; SchurDiagMooeeOperator HermOpEO(Ddwf); - HermOpEO.MpcDagMpc(chi_e,dchi_e,t1,t2); - HermOpEO.MpcDagMpc(chi_o,dchi_o,t1,t2); + HermOpEO.MpcDagMpc(chi_e,dchi_e); + HermOpEO.MpcDagMpc(chi_o,dchi_o); - HermOpEO.MpcDagMpc(phi_e,dphi_e,t1,t2); - HermOpEO.MpcDagMpc(phi_o,dphi_o,t1,t2); + HermOpEO.MpcDagMpc(phi_e,dphi_e); + HermOpEO.MpcDagMpc(phi_o,dphi_o); pDce = innerProduct(phi_e,dchi_e); pDco = innerProduct(phi_o,dchi_o); diff --git a/tests/core/Test_dwf_eofa_even_odd.cc b/tests/core/Test_dwf_eofa_even_odd.cc index 1d2f2909..01fff9ea 100644 --- a/tests/core/Test_dwf_eofa_even_odd.cc +++ b/tests/core/Test_dwf_eofa_even_odd.cc @@ -218,11 +218,11 @@ int main (int argc, char ** argv) RealD t1,t2; SchurDiagMooeeOperator HermOpEO(Ddwf); - HermOpEO.MpcDagMpc(chi_e, dchi_e, t1, t2); - HermOpEO.MpcDagMpc(chi_o, dchi_o, t1, t2); + HermOpEO.MpcDagMpc(chi_e, dchi_e); + HermOpEO.MpcDagMpc(chi_o, dchi_o); - HermOpEO.MpcDagMpc(phi_e, dphi_e, t1, t2); - HermOpEO.MpcDagMpc(phi_o, dphi_o, t1, t2); + HermOpEO.MpcDagMpc(phi_e, dphi_e); + HermOpEO.MpcDagMpc(phi_o, dphi_o); pDce = innerProduct(phi_e, dchi_e); pDco = innerProduct(phi_o, dchi_o); diff --git a/tests/core/Test_dwf_even_odd.cc b/tests/core/Test_dwf_even_odd.cc index d654e588..6093ee8f 100644 --- a/tests/core/Test_dwf_even_odd.cc +++ b/tests/core/Test_dwf_even_odd.cc @@ -216,11 +216,11 @@ int main (int argc, char ** argv) SchurDiagMooeeOperator HermOpEO(Ddwf); - HermOpEO.MpcDagMpc(chi_e,dchi_e,t1,t2); - HermOpEO.MpcDagMpc(chi_o,dchi_o,t1,t2); + HermOpEO.MpcDagMpc(chi_e,dchi_e); + HermOpEO.MpcDagMpc(chi_o,dchi_o); - HermOpEO.MpcDagMpc(phi_e,dphi_e,t1,t2); - HermOpEO.MpcDagMpc(phi_o,dphi_o,t1,t2); + HermOpEO.MpcDagMpc(phi_e,dphi_e); + HermOpEO.MpcDagMpc(phi_o,dphi_o); pDce = innerProduct(phi_e,dchi_e); pDco = innerProduct(phi_o,dchi_o); diff --git a/tests/core/Test_gpwilson_even_odd.cc b/tests/core/Test_gpwilson_even_odd.cc index ac4cde99..bf37f4d5 100644 --- a/tests/core/Test_gpwilson_even_odd.cc +++ b/tests/core/Test_gpwilson_even_odd.cc @@ -201,11 +201,11 @@ int main (int argc, char ** argv) RealD t1,t2; SchurDiagMooeeOperator HermOpEO(Dw); - HermOpEO.MpcDagMpc(chi_e,dchi_e,t1,t2); - HermOpEO.MpcDagMpc(chi_o,dchi_o,t1,t2); + HermOpEO.MpcDagMpc(chi_e,dchi_e); + HermOpEO.MpcDagMpc(chi_o,dchi_o); - HermOpEO.MpcDagMpc(phi_e,dphi_e,t1,t2); - HermOpEO.MpcDagMpc(phi_o,dphi_o,t1,t2); + HermOpEO.MpcDagMpc(phi_e,dphi_e); + HermOpEO.MpcDagMpc(phi_o,dphi_o); pDce = innerProduct(phi_e,dchi_e); pDco = innerProduct(phi_o,dchi_o); diff --git a/tests/core/Test_mobius_eofa_even_odd.cc b/tests/core/Test_mobius_eofa_even_odd.cc index bfd53c72..68091229 100644 --- a/tests/core/Test_mobius_eofa_even_odd.cc +++ b/tests/core/Test_mobius_eofa_even_odd.cc @@ -220,11 +220,11 @@ int main (int argc, char ** argv) RealD t1,t2; SchurDiagMooeeOperator HermOpEO(Ddwf); - HermOpEO.MpcDagMpc(chi_e, dchi_e, t1, t2); - HermOpEO.MpcDagMpc(chi_o, dchi_o, t1, t2); + HermOpEO.MpcDagMpc(chi_e, dchi_e); + HermOpEO.MpcDagMpc(chi_o, dchi_o); - HermOpEO.MpcDagMpc(phi_e, dphi_e, t1, t2); - HermOpEO.MpcDagMpc(phi_o, dphi_o, t1, t2); + HermOpEO.MpcDagMpc(phi_e, dphi_e); + HermOpEO.MpcDagMpc(phi_o, dphi_o); pDce = innerProduct(phi_e, dchi_e); pDco = innerProduct(phi_o, dchi_o); diff --git a/tests/core/Test_mobius_even_odd.cc b/tests/core/Test_mobius_even_odd.cc index 0a035dc8..7f808cac 100644 --- a/tests/core/Test_mobius_even_odd.cc +++ b/tests/core/Test_mobius_even_odd.cc @@ -266,11 +266,11 @@ int main (int argc, char ** argv) SchurDiagMooeeOperator HermOpEO(Ddwf); - HermOpEO.MpcDagMpc(chi_e,dchi_e,t1,t2); - HermOpEO.MpcDagMpc(chi_o,dchi_o,t1,t2); + HermOpEO.MpcDagMpc(chi_e,dchi_e); + HermOpEO.MpcDagMpc(chi_o,dchi_o); - HermOpEO.MpcDagMpc(phi_e,dphi_e,t1,t2); - HermOpEO.MpcDagMpc(phi_o,dphi_o,t1,t2); + HermOpEO.MpcDagMpc(phi_e,dphi_e); + HermOpEO.MpcDagMpc(phi_o,dphi_o); pDce = innerProduct(phi_e,dchi_e); pDco = innerProduct(phi_o,dchi_o); diff --git a/tests/core/Test_staggered.cc b/tests/core/Test_staggered.cc index c85d4090..1f42ff0d 100644 --- a/tests/core/Test_staggered.cc +++ b/tests/core/Test_staggered.cc @@ -270,11 +270,11 @@ int main (int argc, char ** argv) pickCheckerboard(Odd ,phi_o,phi); SchurDiagMooeeOperator HermOpEO(Ds); - HermOpEO.MpcDagMpc(chi_e,dchi_e,t1,t2); - HermOpEO.MpcDagMpc(chi_o,dchi_o,t1,t2); + HermOpEO.MpcDagMpc(chi_e,dchi_e); + HermOpEO.MpcDagMpc(chi_o,dchi_o); - HermOpEO.MpcDagMpc(phi_e,dphi_e,t1,t2); - HermOpEO.MpcDagMpc(phi_o,dphi_o,t1,t2); + HermOpEO.MpcDagMpc(phi_e,dphi_e); + HermOpEO.MpcDagMpc(phi_o,dphi_o); pDce = innerProduct(phi_e,dchi_e); pDco = innerProduct(phi_o,dchi_o); diff --git a/tests/core/Test_staggered5D.cc b/tests/core/Test_staggered5D.cc index e4cd007f..3d175890 100644 --- a/tests/core/Test_staggered5D.cc +++ b/tests/core/Test_staggered5D.cc @@ -290,11 +290,11 @@ int main (int argc, char ** argv) pickCheckerboard(Odd ,phi_o,phi); SchurDiagMooeeOperator HermOpEO(Ds); - HermOpEO.MpcDagMpc(chi_e,dchi_e,t1,t2); - HermOpEO.MpcDagMpc(chi_o,dchi_o,t1,t2); + HermOpEO.MpcDagMpc(chi_e,dchi_e); + HermOpEO.MpcDagMpc(chi_o,dchi_o); - HermOpEO.MpcDagMpc(phi_e,dphi_e,t1,t2); - HermOpEO.MpcDagMpc(phi_o,dphi_o,t1,t2); + HermOpEO.MpcDagMpc(phi_e,dphi_e); + HermOpEO.MpcDagMpc(phi_o,dphi_o); pDce = innerProduct(phi_e,dchi_e); pDco = innerProduct(phi_o,dchi_o); diff --git a/tests/core/Test_wilson_even_odd.cc b/tests/core/Test_wilson_even_odd.cc index 8f325f1c..dc49cf81 100644 --- a/tests/core/Test_wilson_even_odd.cc +++ b/tests/core/Test_wilson_even_odd.cc @@ -207,11 +207,11 @@ int main (int argc, char ** argv) RealD t1,t2; SchurDiagMooeeOperator HermOpEO(Dw); - HermOpEO.MpcDagMpc(chi_e,dchi_e,t1,t2); - HermOpEO.MpcDagMpc(chi_o,dchi_o,t1,t2); + HermOpEO.MpcDagMpc(chi_e,dchi_e); + HermOpEO.MpcDagMpc(chi_o,dchi_o); - HermOpEO.MpcDagMpc(phi_e,dphi_e,t1,t2); - HermOpEO.MpcDagMpc(phi_o,dphi_o,t1,t2); + HermOpEO.MpcDagMpc(phi_e,dphi_e); + HermOpEO.MpcDagMpc(phi_o,dphi_o); pDce = innerProduct(phi_e,dchi_e); pDco = innerProduct(phi_o,dchi_o); diff --git a/tests/core/Test_wilson_twisted_mass_even_odd.cc b/tests/core/Test_wilson_twisted_mass_even_odd.cc index 58b0b60f..ba80fd0e 100644 --- a/tests/core/Test_wilson_twisted_mass_even_odd.cc +++ b/tests/core/Test_wilson_twisted_mass_even_odd.cc @@ -208,11 +208,11 @@ int main (int argc, char ** argv) RealD t1,t2; SchurDiagMooeeOperator HermOpEO(Dw); - HermOpEO.MpcDagMpc(chi_e,dchi_e,t1,t2); - HermOpEO.MpcDagMpc(chi_o,dchi_o,t1,t2); + HermOpEO.MpcDagMpc(chi_e,dchi_e); + HermOpEO.MpcDagMpc(chi_o,dchi_o); - HermOpEO.MpcDagMpc(phi_e,dphi_e,t1,t2); - HermOpEO.MpcDagMpc(phi_o,dphi_o,t1,t2); + HermOpEO.MpcDagMpc(phi_e,dphi_e); + HermOpEO.MpcDagMpc(phi_o,dphi_o); pDce = innerProduct(phi_e,dchi_e); pDco = innerProduct(phi_o,dchi_o); diff --git a/tests/core/Test_zmobius_even_odd.cc b/tests/core/Test_zmobius_even_odd.cc index 1150930b..a52e9bc2 100644 --- a/tests/core/Test_zmobius_even_odd.cc +++ b/tests/core/Test_zmobius_even_odd.cc @@ -280,11 +280,11 @@ int main (int argc, char ** argv) SchurDiagMooeeOperator HermOpEO(Ddwf); - HermOpEO.MpcDagMpc(chi_e,dchi_e,t1,t2); - HermOpEO.MpcDagMpc(chi_o,dchi_o,t1,t2); + HermOpEO.MpcDagMpc(chi_e,dchi_e); + HermOpEO.MpcDagMpc(chi_o,dchi_o); - HermOpEO.MpcDagMpc(phi_e,dphi_e,t1,t2); - HermOpEO.MpcDagMpc(phi_o,dphi_o,t1,t2); + HermOpEO.MpcDagMpc(phi_e,dphi_e); + HermOpEO.MpcDagMpc(phi_o,dphi_o); pDce = innerProduct(phi_e,dchi_e); pDco = innerProduct(phi_o,dchi_o); diff --git a/tests/solver/Test_dwf_fpgcr.cc b/tests/solver/Test_dwf_fpgcr.cc index 226bd933..156f678a 100644 --- a/tests/solver/Test_dwf_fpgcr.cc +++ b/tests/solver/Test_dwf_fpgcr.cc @@ -70,9 +70,6 @@ int main (int argc, char ** argv) SU3::HotConfiguration(RNG4,Umu); - TrivialPrecon simple; - - PrecGeneralisedConjugateResidual PGCR(1.0e-6,10000,simple,4,160); ConjugateResidual CR(1.0e-6,10000); @@ -86,15 +83,19 @@ int main (int argc, char ** argv) std::cout< HermOp(Ddwf); + TrivialPrecon simple; + PrecGeneralisedConjugateResidual PGCR(1.0e-6,10000,HermOp,simple,4,160); + result=Zero(); - PGCR(HermOp,src,result); + PGCR(src,result); std::cout< g5HermOp(Ddwf); + PrecGeneralisedConjugateResidual PGCR5(1.0e-6,10000,g5HermOp,simple,4,160); result=Zero(); - PGCR(g5HermOp,src,result); + PGCR5(src,result); std::cout<