diff --git a/Grid/DisableWarnings.h b/Grid/DisableWarnings.h index b3257c62..8ea219fb 100644 --- a/Grid/DisableWarnings.h +++ b/Grid/DisableWarnings.h @@ -30,8 +30,34 @@ directory #ifndef DISABLE_WARNINGS_H #define DISABLE_WARNINGS_H + + +#if defined __GNUC__ && __GNUC__>=6 +#pragma GCC diagnostic ignored "-Wignored-attributes" +#endif + //disables and intel compiler specific warning (in json.hpp) #pragma warning disable 488 +#ifdef __NVCC__ + //disables nvcc specific warning in json.hpp +#pragma clang diagnostic ignored "-Wdeprecated-register" +#pragma diag_suppress unsigned_compare_with_zero +#pragma diag_suppress cast_to_qualified_type + + //disables nvcc specific warning in many files +#pragma diag_suppress esa_on_defaulted_function_ignored +#pragma diag_suppress extra_semicolon + +//Eigen only +#endif + +// Disable vectorisation in Eigen on the Power8/9 and PowerPC +#ifdef __ALTIVEC__ +#define EIGEN_DONT_VECTORIZE +#endif +#ifdef __VSX__ +#define EIGEN_DONT_VECTORIZE +#endif #endif diff --git a/Grid/GridCore.h b/Grid/GridCore.h index 3f31701a..a48d2d49 100644 --- a/Grid/GridCore.h +++ b/Grid/GridCore.h @@ -38,16 +38,19 @@ Author: paboyle #ifndef GRID_BASE_H #define GRID_BASE_H -#include +#include +#include +#include +#include #include #include +#include #include #include #include -#include #include -#include +#include #include #include #include @@ -57,5 +60,6 @@ Author: paboyle #include #include #include +NAMESPACE_CHECK(GridCore) #endif diff --git a/Grid/GridQCDcore.h b/Grid/GridQCDcore.h index 7f50761f..cae6f43f 100644 --- a/Grid/GridQCDcore.h +++ b/Grid/GridQCDcore.h @@ -38,5 +38,6 @@ Author: paboyle #include #include #include +NAMESPACE_CHECK(GridQCDCore); #endif diff --git a/Grid/GridStd.h b/Grid/GridStd.h index 097e62ab..16cfcf50 100644 --- a/Grid/GridStd.h +++ b/Grid/GridStd.h @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include diff --git a/Grid/Grid_Eigen_Dense.h b/Grid/Grid_Eigen_Dense.h index dd330dd9..f9bccf2d 100644 --- a/Grid/Grid_Eigen_Dense.h +++ b/Grid/Grid_Eigen_Dense.h @@ -1,14 +1,41 @@ +#include #pragma once // Force Eigen to use MKL if Grid has been configured with --enable-mkl #ifdef USE_MKL #define EIGEN_USE_MKL_ALL #endif + #if defined __GNUC__ #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wdeprecated-declarations" #endif + +/* NVCC save and restore compile environment*/ +#ifdef __NVCC__ +#pragma push +#pragma diag_suppress code_is_unreachable +#pragma push_macro("__CUDA_ARCH__") +#pragma push_macro("__NVCC__") +#pragma push_macro("__CUDACC__") +#undef __NVCC__ +#undef __CUDACC__ +#undef __CUDA_ARCH__ +#define __NVCC__REDEFINE__ +#endif + #include +#include + +/* NVCC restore */ +#ifdef __NVCC__REDEFINE__ +#pragma pop_macro("__CUDACC__") +#pragma pop_macro("__NVCC__") +#pragma pop_macro("__CUDA_ARCH__") +#pragma pop +#endif + #if defined __GNUC__ #pragma GCC diagnostic pop #endif + diff --git a/Grid/Grid_Eigen_Tensor.h b/Grid/Grid_Eigen_Tensor.h new file mode 100644 index 00000000..3a2120c5 --- /dev/null +++ b/Grid/Grid_Eigen_Tensor.h @@ -0,0 +1 @@ +#include diff --git a/Grid/Namespace.h b/Grid/Namespace.h new file mode 100644 index 00000000..29b229fa --- /dev/null +++ b/Grid/Namespace.h @@ -0,0 +1,38 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/Namespace.h + +Copyright (C) 2016 + +Author: Peter Boyle + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#pragma once + +#include +#include + +#define NAMESPACE_BEGIN(A) namespace A { +#define NAMESPACE_END(A) } +#define GRID_NAMESPACE_BEGIN NAMESPACE_BEGIN(Grid) +#define GRID_NAMESPACE_END NAMESPACE_END(Grid) +#define NAMESPACE_CHECK(x) struct namespaceTEST##x {}; static_assert(std::is_same::value,"Not in :: at" ); diff --git a/Grid/algorithms/CoarsenedMatrix.h b/Grid/algorithms/CoarsenedMatrix.h index a6c6c030..a373bc0a 100644 --- a/Grid/algorithms/CoarsenedMatrix.h +++ b/Grid/algorithms/CoarsenedMatrix.h @@ -1,4 +1,4 @@ - /************************************************************************************* +/************************************************************************************* Grid physics library, www.github.com/paboyle/Grid @@ -26,501 +26,487 @@ Author: paboyle 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ +*************************************************************************************/ +/* END LEGAL */ #ifndef GRID_ALGORITHM_COARSENED_MATRIX_H #define GRID_ALGORITHM_COARSENED_MATRIX_H -namespace Grid { +NAMESPACE_BEGIN(Grid); - class Geometry { - // int dimension; - public: - int npoint; - std::vector directions ; - std::vector displacements; +class Geometry { + // int dimension; +public: + int npoint; + std::vector directions ; + std::vector displacements; Geometry(int _d) { - int base = (_d==5) ? 1:0; + int base = (_d==5) ? 1:0; - // make coarse grid stencil for 4d , not 5d - if ( _d==5 ) _d=4; + // make coarse grid stencil for 4d , not 5d + if ( _d==5 ) _d=4; - npoint = 2*_d+1; - directions.resize(npoint); - displacements.resize(npoint); - for(int d=0;d<_d;d++){ - directions[2*d ] = d+base; - directions[2*d+1] = d+base; - displacements[2*d ] = +1; - displacements[2*d+1] = -1; - } - directions [2*_d]=0; - displacements[2*_d]=0; + npoint = 2*_d+1; + directions.resize(npoint); + displacements.resize(npoint); + for(int d=0;d<_d;d++){ + directions[2*d ] = d+base; + directions[2*d+1] = d+base; + displacements[2*d ] = +1; + displacements[2*d+1] = -1; + } + directions [2*_d]=0; + displacements[2*_d]=0; - //// report back - std::cout< GetDelta(int point) { - std::vector delta(dimension,0); - delta[directions[point]] = displacements[point]; - return delta; - }; - */ - + /* + // Original cleaner code + Geometry(int _d) : dimension(_d), npoint(2*_d+1), directions(npoint), displacements(npoint) { + for(int d=0;d GetDelta(int point) { + std::vector delta(dimension,0); + delta[directions[point]] = displacements[point]; + return delta; }; + */ + +}; - template - class Aggregation { - public: - typedef iVector siteVector; - typedef Lattice CoarseVector; - typedef Lattice > CoarseMatrix; +template +class Aggregation { +public: + typedef iVector siteVector; + typedef Lattice CoarseVector; + typedef Lattice > CoarseMatrix; - typedef Lattice< CComplex > CoarseScalar; // used for inner products on fine field - typedef Lattice FineField; - - GridBase *CoarseGrid; - GridBase *FineGrid; - std::vector > subspace; - int checkerboard; + typedef Lattice< CComplex > CoarseScalar; // used for inner products on fine field + typedef Lattice FineField; + GridBase *CoarseGrid; + GridBase *FineGrid; + std::vector > subspace; + int checkerboard; + int Checkerboard(void){return checkerboard;} Aggregation(GridBase *_CoarseGrid,GridBase *_FineGrid,int _checkerboard) : CoarseGrid(_CoarseGrid), - FineGrid(_FineGrid), - subspace(nbasis,_FineGrid), - checkerboard(_checkerboard) - { - }; + FineGrid(_FineGrid), + subspace(nbasis,_FineGrid), + checkerboard(_checkerboard) + { + }; - void Orthogonalise(void){ - CoarseScalar InnerProd(CoarseGrid); - std::cout << GridLogMessage <<" Gramm-Schmidt pass 1"<oSites();ss++){ - eProj._odata[ss](i)=CComplex(1.0); - } - eProj=eProj - iProj; - std::cout<oSites(),{ + eProj[ss](i)=CComplex(1.0); + }); + eProj=eProj - iProj; + std::cout< &hermop,int nn=nbasis) { - // Run a Lanczos with sloppy convergence - const int Nstop = nn; - const int Nk = nn+20; - const int Np = nn+20; - const int Nm = Nk+Np; - const int MaxIt= 10000; - RealD resid = 1.0e-3; + // Run a Lanczos with sloppy convergence + const int Nstop = nn; + const int Nk = nn+20; + const int Np = nn+20; + const int Nm = Nk+Np; + const int MaxIt= 10000; + RealD resid = 1.0e-3; - Chebyshev Cheb(0.5,64.0,21); - ImplicitlyRestartedLanczos IRL(hermop,Cheb,Nstop,Nk,Nm,resid,MaxIt); - // IRL.lock = 1; + Chebyshev Cheb(0.5,64.0,21); + ImplicitlyRestartedLanczos IRL(hermop,Cheb,Nstop,Nk,Nm,resid,MaxIt); + // IRL.lock = 1; - FineField noise(FineGrid); gaussian(RNG,noise); - FineField tmp(FineGrid); - std::vector eval(Nm); - std::vector evec(Nm,FineGrid); + FineField noise(FineGrid); gaussian(RNG,noise); + FineField tmp(FineGrid); + std::vector eval(Nm); + std::vector evec(Nm,FineGrid); - int Nconv; - IRL.calc(eval,evec, - noise, - Nconv); + int Nconv; + IRL.calc(eval,evec, + noise, + Nconv); - // pull back nn vectors - for(int b=0;b "< "< "< "< "< "< - class CoarsenedMatrix : public SparseMatrixBase > > { - public: + + Orthogonalise(); + + } +}; +// Fine Object == (per site) type of fine field +// nbasis == number of deflation vectors +template +class CoarsenedMatrix : public SparseMatrixBase > > { +public: - typedef iVector siteVector; - typedef Lattice CoarseVector; - typedef Lattice > CoarseMatrix; + typedef iVector siteVector; + typedef Lattice CoarseVector; + typedef Lattice > CoarseMatrix; - typedef Lattice< CComplex > CoarseScalar; // used for inner products on fine field - typedef Lattice FineField; + typedef Lattice< CComplex > CoarseScalar; // used for inner products on fine field + typedef Lattice FineField; - //////////////////// - // Data members - //////////////////// - Geometry geom; - GridBase * _grid; - CartesianStencil Stencil; + //////////////////// + // Data members + //////////////////// + Geometry geom; + GridBase * _grid; - std::vector A; + CartesianStencil Stencil; + + std::vector A; - /////////////////////// - // Interface - /////////////////////// - GridBase * Grid(void) { return _grid; }; // this is all the linalg routines need to know + /////////////////////// + // Interface + /////////////////////// + GridBase * Grid(void) { return _grid; }; // this is all the linalg routines need to know - RealD M (const CoarseVector &in, CoarseVector &out){ + RealD M (const CoarseVector &in, CoarseVector &out){ - conformable(_grid,in._grid); - conformable(in._grid,out._grid); + conformable(_grid,in.Grid()); + conformable(in.Grid(),out.Grid()); - SimpleCompressor compressor; - Stencil.HaloExchange(in,compressor); + SimpleCompressor compressor; + Stencil.HaloExchange(in,compressor); + auto in_v = in.View(); + auto out_v = in.View(); + thread_for(ss,Grid()->oSites(),{ + siteVector res = Zero(); + siteVector nbr; + int ptype; + StencilEntry *SE; + for(int point=0;pointoSites();ss++){ - siteVector res = zero; - siteVector nbr; - int ptype; - StencilEntry *SE; - for(int point=0;point_is_local&&SE->_permute) { - permute(nbr,in._odata[SE->_offset],ptype); - } else if(SE->_is_local) { - nbr = in._odata[SE->_offset]; - } else { - nbr = Stencil.CommBuf()[SE->_offset]; - } - res = res + A[point]._odata[ss]*nbr; + if(SE->_is_local&&SE->_permute) { + permute(nbr,in_v[SE->_offset],ptype); + } else if(SE->_is_local) { + nbr = in_v[SE->_offset]; + } else { + nbr = Stencil.CommBuf()[SE->_offset]; } - vstream(out._odata[ss],res); + auto A_point = A[point].View(); + res = res + A_point[ss]*nbr; } - return norm2(out); - }; + vstream(out_v[ss],res); + }); + return norm2(out); + }; - RealD Mdag (const CoarseVector &in, CoarseVector &out){ - // // corresponds to Petrov-Galerkin coarsening - // return M(in,out); + RealD Mdag (const CoarseVector &in, CoarseVector &out){ + // // corresponds to Petrov-Galerkin coarsening + // return M(in,out); + + // corresponds to Galerkin coarsening + CoarseVector tmp(Grid()); + G5C(tmp, in); + M(tmp, out); + G5C(out, out); + return norm2(out); + }; - // corresponds to Galerkin coarsening - CoarseVector tmp(Grid()); - G5C(tmp, in); - M(tmp, out); - G5C(out, out); - return norm2(out); - }; + void Mdir(const CoarseVector &in, CoarseVector &out, int dir, int disp){ + + conformable(_grid,in.Grid()); + conformable(in.Grid(),out.Grid()); + + SimpleCompressor compressor; + Stencil.HaloExchange(in,compressor); + + auto point = [dir, disp](){ + if(dir == 0 and disp == 0) + return 8; + else + return (4 * dir + 1 - disp) / 2; + }(); - void Mdir(const CoarseVector &in, CoarseVector &out, int dir, int disp){ - - conformable(_grid,in._grid); - conformable(in._grid,out._grid); - - SimpleCompressor compressor; - Stencil.HaloExchange(in,compressor); - - auto point = [dir, disp](){ - if(dir == 0 and disp == 0) - return 8; - else - return (4 * dir + 1 - disp) / 2; - }(); - - parallel_for(int ss=0;ssoSites();ss++){ - siteVector res = zero; - siteVector nbr; - int ptype; - StencilEntry *SE; - - SE=Stencil.GetEntry(ptype,point,ss); - - if(SE->_is_local&&SE->_permute) { - permute(nbr,in._odata[SE->_offset],ptype); - } else if(SE->_is_local) { - nbr = in._odata[SE->_offset]; - } else { - nbr = Stencil.CommBuf()[SE->_offset]; - } - - res = res + A[point]._odata[ss]*nbr; - - vstream(out._odata[ss],res); + auto out_v = out.View(); + auto in_v = in.View(); + thread_for(ss,Grid()->oSites(),{ + siteVector res = Zero(); + siteVector nbr; + int ptype; + StencilEntry *SE; + + SE=Stencil.GetEntry(ptype,point,ss); + + if(SE->_is_local&&SE->_permute) { + permute(nbr,in_v[SE->_offset],ptype); + } else if(SE->_is_local) { + nbr = in_v[SE->_offset]; + } else { + nbr = Stencil.CommBuf()[SE->_offset]; } - }; - void Mdiag(const CoarseVector &in, CoarseVector &out){ - Mdir(in, out, 0, 0); // use the self coupling (= last) point of the stencil - }; + auto A_point = A[point].View(); + res = res + A_point[ss]*nbr; + + vstream(out_v[ss],res); + }); + }; - CoarsenedMatrix(GridCartesian &CoarseGrid) : + void Mdiag(const CoarseVector &in, CoarseVector &out){ + Mdir(in, out, 0, 0); // use the self coupling (= last) point of the stencil + }; - _grid(&CoarseGrid), - geom(CoarseGrid._ndimension), - Stencil(&CoarseGrid,geom.npoint,Even,geom.directions,geom.displacements), - A(geom.npoint,&CoarseGrid) - { - }; + + CoarsenedMatrix(GridCartesian &CoarseGrid) : - void CoarsenOperator(GridBase *FineGrid,LinearOperatorBase > &linop, - Aggregation & Subspace){ + _grid(&CoarseGrid), + geom(CoarseGrid._ndimension), + Stencil(&CoarseGrid,geom.npoint,Even,geom.directions,geom.displacements,0), + A(geom.npoint,&CoarseGrid) + { + }; - FineField iblock(FineGrid); // contributions from within this block - FineField oblock(FineGrid); // contributions from outwith this block + void CoarsenOperator(GridBase *FineGrid,LinearOperatorBase > &linop, + Aggregation & Subspace){ - FineField phi(FineGrid); - FineField tmp(FineGrid); - FineField zz(FineGrid); zz=zero; - FineField Mphi(FineGrid); + FineField iblock(FineGrid); // contributions from within this block + FineField oblock(FineGrid); // contributions from outwith this block - Lattice > coor(FineGrid); + FineField phi(FineGrid); + FineField tmp(FineGrid); + FineField zz(FineGrid); zz=Zero(); + FineField Mphi(FineGrid); - CoarseVector iProj(Grid()); - CoarseVector oProj(Grid()); - CoarseScalar InnerProd(Grid()); + Lattice > coor(FineGrid); - // Orthogonalise the subblocks over the basis - blockOrthogonalise(InnerProd,Subspace.subspace); + CoarseVector iProj(Grid()); + CoarseVector oProj(Grid()); + CoarseScalar InnerProd(Grid()); - // Compute the matrix elements of linop between this orthonormal - // set of vectors. - int self_stencil=-1; - for(int p=0;p_rdimensions[dir])/(Grid()->_rdimensions[dir]); + Integer block=(FineGrid->_rdimensions[dir])/(Grid()->_rdimensions[dir]); - LatticeCoordinate(coor,dir); + LatticeCoordinate(coor,dir); - if ( disp==0 ){ - linop.OpDiag(phi,Mphi); - } - else { - linop.OpDir(phi,Mphi,dir,disp); - } - - //////////////////////////////////////////////////////////////////////// - // Pick out contributions coming from this cell and neighbour cell - //////////////////////////////////////////////////////////////////////// - if ( disp==0 ) { - iblock = Mphi; - oblock = zero; - } else if ( disp==1 ) { - oblock = where(mod(coor,block)==(block-1),Mphi,zz); - iblock = where(mod(coor,block)!=(block-1),Mphi,zz); - } else if ( disp==-1 ) { - oblock = where(mod(coor,block)==(Integer)0,Mphi,zz); - iblock = where(mod(coor,block)!=(Integer)0,Mphi,zz); - } else { - assert(0); - } - - Subspace.ProjectToSubspace(iProj,iblock); - Subspace.ProjectToSubspace(oProj,oblock); - // blockProject(iProj,iblock,Subspace.subspace); - // blockProject(oProj,oblock,Subspace.subspace); - parallel_for(int ss=0;ssoSites();ss++){ - for(int j=0;joSites(),{ + for(int j=0;j bc(FineGrid->_ndimension,0); + phi=Subspace.subspace[0]; + std::vector bc(FineGrid->_ndimension,0); - blockPick(Grid(),phi,tmp,bc); // Pick out a block - linop.Op(tmp,Mphi); // Apply big dop - blockProject(iProj,Mphi,Subspace.subspace); // project it and print it - std::cout<({55,72,19,17,34})); - Lattice > val(Grid()); random(RNG,val); - - Complex one(1.0); - - iMatrix ident; ident=one; - - val = val*adj(val); - val = val + 1.0; - - A[8] = val*ident; - - // for(int s=0;soSites();s++) { - // A[8]._odata[s]=val._odata[s]; - // } - } - void ForceHermitian(void) { - for(int d=0;d<4;d++){ - int dd=d+1; - A[2*d] = adj(Cshift(A[2*d+1],dd,1)); - } - // A[8] = 0.5*(A[8] + adj(A[8])); - } - void AssertHermitian(void) { - CoarseMatrix AA (Grid()); - CoarseMatrix AAc (Grid()); - CoarseMatrix Diff (Grid()); - for(int d=0;d<4;d++){ + // A[8] = 0.5*(A[8] + adj(A[8])); + } + void AssertHermitian(void) { + CoarseMatrix AA (Grid()); + CoarseMatrix AAc (Grid()); + CoarseMatrix Diff (Grid()); + for(int d=0;d<4;d++){ - int dd=d+1; - AAc = Cshift(A[2*d+1],dd,1); - AA = A[2*d]; + int dd=d+1; + AAc = Cshift(A[2*d+1],dd,1); + AA = A[2*d]; - Diff = AA - adj(AAc); + Diff = AA - adj(AAc); - std::cout< 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ +*************************************************************************************/ +/* END LEGAL */ #ifndef _GRID_FFT_H_ #define _GRID_FFT_H_ @@ -38,64 +38,64 @@ Author: Peter Boyle #endif -namespace Grid { +NAMESPACE_BEGIN(Grid); - template struct FFTW { }; +template struct FFTW { }; #ifdef HAVE_FFTW - template<> struct FFTW { - public: +template<> struct FFTW { +public: - typedef fftw_complex FFTW_scalar; - typedef fftw_plan FFTW_plan; + typedef fftw_complex FFTW_scalar; + typedef fftw_plan FFTW_plan; - static FFTW_plan fftw_plan_many_dft(int rank, const int *n,int howmany, - FFTW_scalar *in, const int *inembed, - int istride, int idist, - FFTW_scalar *out, const int *onembed, - int ostride, int odist, - int sign, unsigned flags) { - return ::fftw_plan_many_dft(rank,n,howmany,in,inembed,istride,idist,out,onembed,ostride,odist,sign,flags); - } + static FFTW_plan fftw_plan_many_dft(int rank, const int *n,int howmany, + FFTW_scalar *in, const int *inembed, + int istride, int idist, + FFTW_scalar *out, const int *onembed, + int ostride, int odist, + int sign, unsigned flags) { + return ::fftw_plan_many_dft(rank,n,howmany,in,inembed,istride,idist,out,onembed,ostride,odist,sign,flags); + } - static void fftw_flops(const FFTW_plan p,double *add, double *mul, double *fmas){ - ::fftw_flops(p,add,mul,fmas); - } + static void fftw_flops(const FFTW_plan p,double *add, double *mul, double *fmas){ + ::fftw_flops(p,add,mul,fmas); + } - inline static void fftw_execute_dft(const FFTW_plan p,FFTW_scalar *in,FFTW_scalar *out) { - ::fftw_execute_dft(p,in,out); - } - inline static void fftw_destroy_plan(const FFTW_plan p) { - ::fftw_destroy_plan(p); - } - }; + inline static void fftw_execute_dft(const FFTW_plan p,FFTW_scalar *in,FFTW_scalar *out) { + ::fftw_execute_dft(p,in,out); + } + inline static void fftw_destroy_plan(const FFTW_plan p) { + ::fftw_destroy_plan(p); + } +}; - template<> struct FFTW { - public: +template<> struct FFTW { +public: - typedef fftwf_complex FFTW_scalar; - typedef fftwf_plan FFTW_plan; + typedef fftwf_complex FFTW_scalar; + typedef fftwf_plan FFTW_plan; - static FFTW_plan fftw_plan_many_dft(int rank, const int *n,int howmany, - FFTW_scalar *in, const int *inembed, - int istride, int idist, - FFTW_scalar *out, const int *onembed, - int ostride, int odist, - int sign, unsigned flags) { - return ::fftwf_plan_many_dft(rank,n,howmany,in,inembed,istride,idist,out,onembed,ostride,odist,sign,flags); - } + static FFTW_plan fftw_plan_many_dft(int rank, const int *n,int howmany, + FFTW_scalar *in, const int *inembed, + int istride, int idist, + FFTW_scalar *out, const int *onembed, + int ostride, int odist, + int sign, unsigned flags) { + return ::fftwf_plan_many_dft(rank,n,howmany,in,inembed,istride,idist,out,onembed,ostride,odist,sign,flags); + } - static void fftw_flops(const FFTW_plan p,double *add, double *mul, double *fmas){ - ::fftwf_flops(p,add,mul,fmas); - } + static void fftw_flops(const FFTW_plan p,double *add, double *mul, double *fmas){ + ::fftwf_flops(p,add,mul,fmas); + } - inline static void fftw_execute_dft(const FFTW_plan p,FFTW_scalar *in,FFTW_scalar *out) { - ::fftwf_execute_dft(p,in,out); - } - inline static void fftw_destroy_plan(const FFTW_plan p) { - ::fftwf_destroy_plan(p); - } - }; + inline static void fftw_execute_dft(const FFTW_plan p,FFTW_scalar *in,FFTW_scalar *out) { + ::fftwf_execute_dft(p,in,out); + } + inline static void fftw_destroy_plan(const FFTW_plan p) { + ::fftwf_destroy_plan(p); + } +}; #endif @@ -104,203 +104,188 @@ namespace Grid { #define FFTW_BACKWARD (+1) #endif - class FFT { - private: +class FFT { +private: - GridCartesian *vgrid; - GridCartesian *sgrid; + GridCartesian *vgrid; + GridCartesian *sgrid; - int Nd; - double flops; - double flops_call; - uint64_t usec; + int Nd; + double flops; + double flops_call; + uint64_t usec; - std::vector dimensions; - std::vector processors; - std::vector processor_coor; + Coordinate dimensions; + Coordinate processors; + Coordinate processor_coor; - public: +public: - static const int forward=FFTW_FORWARD; - static const int backward=FFTW_BACKWARD; + static const int forward=FFTW_FORWARD; + static const int backward=FFTW_BACKWARD; - double Flops(void) {return flops;} - double MFlops(void) {return flops/usec;} - double USec(void) {return (double)usec;} + double Flops(void) {return flops;} + double MFlops(void) {return flops/usec;} + double USec(void) {return (double)usec;} - FFT ( GridCartesian * grid ) : + FFT ( GridCartesian * grid ) : vgrid(grid), Nd(grid->_ndimension), dimensions(grid->_fdimensions), processors(grid->_processors), processor_coor(grid->_processor_coor) - { - flops=0; - usec =0; - std::vector layout(Nd,1); - sgrid = new GridCartesian(dimensions,layout,processors); - }; - - ~FFT ( void) { - delete sgrid; - } - - template - void FFT_dim_mask(Lattice &result,const Lattice &source,std::vector mask,int sign){ - - conformable(result._grid,vgrid); - conformable(source._grid,vgrid); - Lattice tmp(vgrid); - tmp = source; - for(int d=0;d - void FFT_all_dim(Lattice &result,const Lattice &source,int sign){ - std::vector mask(Nd,1); - FFT_dim_mask(result,source,mask,sign); - } - - - template - void FFT_dim(Lattice &result,const Lattice &source,int dim, int sign){ -#ifndef HAVE_FFTW - assert(0); -#else - conformable(result._grid,vgrid); - conformable(source._grid,vgrid); - - int L = vgrid->_ldimensions[dim]; - int G = vgrid->_fdimensions[dim]; - - std::vector layout(Nd,1); - std::vector pencil_gd(vgrid->_fdimensions); - - pencil_gd[dim] = G*processors[dim]; - - // Pencil global vol LxLxGxLxL per node - GridCartesian pencil_g(pencil_gd,layout,processors); - - // Construct pencils - typedef typename vobj::scalar_object sobj; - typedef typename sobj::scalar_type scalar; - - Lattice pgbuf(&pencil_g); - - - typedef typename FFTW::FFTW_scalar FFTW_scalar; - typedef typename FFTW::FFTW_plan FFTW_plan; - - int Ncomp = sizeof(sobj)/sizeof(scalar); - int Nlow = 1; - for(int d=0;d_ldimensions[d]; - } - - int rank = 1; /* 1d transforms */ - int n[] = {G}; /* 1d transforms of length G */ - int howmany = Ncomp; - int odist,idist,istride,ostride; - idist = odist = 1; /* Distance between consecutive FT's */ - istride = ostride = Ncomp*Nlow; /* distance between two elements in the same FT */ - int *inembed = n, *onembed = n; - - scalar div; - if ( sign == backward ) div = 1.0/G; - else if ( sign == forward ) div = 1.0; - else assert(0); - - FFTW_plan p; - { - FFTW_scalar *in = (FFTW_scalar *)&pgbuf._odata[0]; - FFTW_scalar *out= (FFTW_scalar *)&pgbuf._odata[0]; - p = FFTW::fftw_plan_many_dft(rank,n,howmany, - in,inembed, - istride,idist, - out,onembed, - ostride, odist, - sign,FFTW_ESTIMATE); - } - - // Barrel shift and collect global pencil - std::vector lcoor(Nd), gcoor(Nd); - result = source; - int pc = processor_coor[dim]; - for(int p=0;p cbuf(Nd); - sobj s; - - PARALLEL_FOR_LOOP_INTERN - for(int idx=0;idxlSites();idx++) { - sgrid->LocalIndexToLocalCoor(idx,cbuf); - peekLocalSite(s,result,cbuf); - cbuf[dim]+=((pc+p) % processors[dim])*L; - // cbuf[dim]+=p*L; - pokeLocalSite(s,pgbuf,cbuf); - } - } - if (p != processors[dim] - 1) - { - result = Cshift(result,dim,L); - } - } - - // Loop over orthog coords - int NN=pencil_g.lSites(); - GridStopWatch timer; - timer.Start(); - PARALLEL_REGION - { - std::vector cbuf(Nd); - - PARALLEL_FOR_LOOP_INTERN - for(int idx=0;idx::fftw_execute_dft(p,in,out); - } - } - } - timer.Stop(); - - // performance counting - double add,mul,fma; - FFTW::fftw_flops(p,&add,&mul,&fma); - flops_call = add+mul+2.0*fma; - usec += timer.useconds(); - flops+= flops_call*NN; - - // writing out result - PARALLEL_REGION - { - std::vector clbuf(Nd), cgbuf(Nd); - sobj s; - - PARALLEL_FOR_LOOP_INTERN - for(int idx=0;idxlSites();idx++) { - sgrid->LocalIndexToLocalCoor(idx,clbuf); - cgbuf = clbuf; - cgbuf[dim] = clbuf[dim]+L*pc; - peekLocalSite(s,pgbuf,cgbuf); - pokeLocalSite(s,result,clbuf); - } - } - result = result*div; - - // destroying plan - FFTW::fftw_destroy_plan(p); -#endif - } + { + flops=0; + usec =0; + Coordinate layout(Nd,1); + sgrid = new GridCartesian(dimensions,layout,processors); }; -} + + ~FFT ( void) { + delete sgrid; + } + + template + void FFT_dim_mask(Lattice &result,const Lattice &source,Coordinate mask,int sign){ + + conformable(result.Grid(),vgrid); + conformable(source.Grid(),vgrid); + Lattice tmp(vgrid); + tmp = source; + for(int d=0;d + void FFT_all_dim(Lattice &result,const Lattice &source,int sign){ + Coordinate mask(Nd,1); + FFT_dim_mask(result,source,mask,sign); + } + + + template + void FFT_dim(Lattice &result,const Lattice &source,int dim, int sign){ +#ifndef HAVE_FFTW + assert(0); +#else + conformable(result.Grid(),vgrid); + conformable(source.Grid(),vgrid); + + int L = vgrid->_ldimensions[dim]; + int G = vgrid->_fdimensions[dim]; + + Coordinate layout(Nd,1); + Coordinate pencil_gd(vgrid->_fdimensions); + + pencil_gd[dim] = G*processors[dim]; + + // Pencil global vol LxLxGxLxL per node + GridCartesian pencil_g(pencil_gd,layout,processors); + + // Construct pencils + typedef typename vobj::scalar_object sobj; + typedef typename sobj::scalar_type scalar; + + Lattice pgbuf(&pencil_g); + auto pgbuf_v = pgbuf.View(); + + typedef typename FFTW::FFTW_scalar FFTW_scalar; + typedef typename FFTW::FFTW_plan FFTW_plan; + + int Ncomp = sizeof(sobj)/sizeof(scalar); + int Nlow = 1; + for(int d=0;d_ldimensions[d]; + } + + int rank = 1; /* 1d transforms */ + int n[] = {G}; /* 1d transforms of length G */ + int howmany = Ncomp; + int odist,idist,istride,ostride; + idist = odist = 1; /* Distance between consecutive FT's */ + istride = ostride = Ncomp*Nlow; /* distance between two elements in the same FT */ + int *inembed = n, *onembed = n; + + scalar div; + if ( sign == backward ) div = 1.0/G; + else if ( sign == forward ) div = 1.0; + else assert(0); + + FFTW_plan p; + { + FFTW_scalar *in = (FFTW_scalar *)&pgbuf_v[0]; + FFTW_scalar *out= (FFTW_scalar *)&pgbuf_v[0]; + p = FFTW::fftw_plan_many_dft(rank,n,howmany, + in,inembed, + istride,idist, + out,onembed, + ostride, odist, + sign,FFTW_ESTIMATE); + } + + // Barrel shift and collect global pencil + Coordinate lcoor(Nd), gcoor(Nd); + result = source; + int pc = processor_coor[dim]; + for(int p=0;plSites(),{ + Coordinate cbuf(Nd); + sobj s; + sgrid->LocalIndexToLocalCoor(idx,cbuf); + peekLocalSite(s,result,cbuf); + cbuf[dim]+=((pc+p) % processors[dim])*L; + // cbuf[dim]+=p*L; + pokeLocalSite(s,pgbuf,cbuf); + }); + if (p != processors[dim] - 1) { + result = Cshift(result,dim,L); + } + } + + // Loop over orthog coords + int NN=pencil_g.lSites(); + GridStopWatch timer; + timer.Start(); + thread_for( idx,NN,{ + Coordinate cbuf(Nd); + pencil_g.LocalIndexToLocalCoor(idx, cbuf); + if ( cbuf[dim] == 0 ) { // restricts loop to plane at lcoor[dim]==0 + FFTW_scalar *in = (FFTW_scalar *)&pgbuf_v[idx]; + FFTW_scalar *out= (FFTW_scalar *)&pgbuf_v[idx]; + FFTW::fftw_execute_dft(p,in,out); + } + }); + timer.Stop(); + + // performance counting + double add,mul,fma; + FFTW::fftw_flops(p,&add,&mul,&fma); + flops_call = add+mul+2.0*fma; + usec += timer.useconds(); + flops+= flops_call*NN; + + // writing out result + thread_for(idx,sgrid->lSites(),{ + Coordinate clbuf(Nd), cgbuf(Nd); + sobj s; + sgrid->LocalIndexToLocalCoor(idx,clbuf); + cgbuf = clbuf; + cgbuf[dim] = clbuf[dim]+L*pc; + peekLocalSite(s,pgbuf,cgbuf); + pokeLocalSite(s,result,clbuf); + }); + result = result*div; + + // destroying plan + FFTW::fftw_destroy_plan(p); +#endif + } +}; + +NAMESPACE_END(Grid); #endif diff --git a/Grid/algorithms/LinearOperator.h b/Grid/algorithms/LinearOperator.h index a1be48f4..c309ad41 100644 --- a/Grid/algorithms/LinearOperator.h +++ b/Grid/algorithms/LinearOperator.h @@ -1,4 +1,4 @@ - /************************************************************************************* +/************************************************************************************* Grid physics library, www.github.com/paboyle/Grid @@ -24,153 +24,152 @@ Author: Peter Boyle 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ -#ifndef GRID_ALGORITHM_LINEAR_OP_H -#define GRID_ALGORITHM_LINEAR_OP_H +*************************************************************************************/ +/* END LEGAL */ +#pragma once -namespace Grid { +NAMESPACE_BEGIN(Grid); - ///////////////////////////////////////////////////////////////////////////////////////////// - // LinearOperators Take a something and return a something. - ///////////////////////////////////////////////////////////////////////////////////////////// - // - // Hopefully linearity is satisfied and the AdjOp is indeed the Hermitian conjugateugate (transpose if real): - //SBase - // i) F(a x + b y) = aF(x) + b F(y). - // ii) = ^\ast - // - // Would be fun to have a test linearity & Herm Conj function! - ///////////////////////////////////////////////////////////////////////////////////////////// - template class LinearOperatorBase { - public: +///////////////////////////////////////////////////////////////////////////////////////////// +// LinearOperators Take a something and return a something. +///////////////////////////////////////////////////////////////////////////////////////////// +// +// Hopefully linearity is satisfied and the AdjOp is indeed the Hermitian Conjugateugate (transpose if real): +//SBase +// i) F(a x + b y) = aF(x) + b F(y). +// ii) = ^\ast +// +// Would be fun to have a test linearity & Herm Conj function! +///////////////////////////////////////////////////////////////////////////////////////////// +template class LinearOperatorBase { +public: - // Support for coarsening to a multigrid - virtual void OpDiag (const Field &in, Field &out) = 0; // Abstract base - virtual void OpDir (const Field &in, Field &out,int dir,int disp) = 0; // Abstract base + // Support for coarsening to a multigrid + virtual void OpDiag (const Field &in, Field &out) = 0; // Abstract base + virtual void OpDir (const Field &in, Field &out,int dir,int disp) = 0; // Abstract base - virtual void Op (const Field &in, Field &out) = 0; // Abstract base - virtual void AdjOp (const Field &in, Field &out) = 0; // Abstract base - virtual void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2) = 0; - virtual void HermOp(const Field &in, Field &out)=0; - }; + virtual void Op (const Field &in, Field &out) = 0; // Abstract base + virtual void AdjOp (const Field &in, Field &out) = 0; // Abstract base + virtual void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2)=0; + virtual void HermOp(const Field &in, Field &out)=0; +}; - ///////////////////////////////////////////////////////////////////////////////////////////// - // By sharing the class for Sparse Matrix across multiple operator wrappers, we can share code - // between RB and non-RB variants. Sparse matrix is like the fermion action def, and then - // the wrappers implement the specialisation of "Op" and "AdjOp" to the cases minimising - // replication of code. - // - // I'm not entirely happy with implementation; to share the Schur code between herm and non-herm - // while still having a "OpAndNorm" in the abstract base I had to implement it in both cases - // with an assert trap in the non-herm. This isn't right; there must be a better C++ way to - // do it, but I fear it required multiple inheritance and mixed in abstract base classes - ///////////////////////////////////////////////////////////////////////////////////////////// +///////////////////////////////////////////////////////////////////////////////////////////// +// By sharing the class for Sparse Matrix across multiple operator wrappers, we can share code +// between RB and non-RB variants. Sparse matrix is like the fermion action def, and then +// the wrappers implement the specialisation of "Op" and "AdjOp" to the cases minimising +// replication of code. +// +// I'm not entirely happy with implementation; to share the Schur code between herm and non-herm +// while still having a "OpAndNorm" in the abstract base I had to implement it in both cases +// with an assert trap in the non-herm. This isn't right; there must be a better C++ way to +// do it, but I fear it required multiple inheritance and mixed in abstract base classes +///////////////////////////////////////////////////////////////////////////////////////////// - //////////////////////////////////////////////////////////////////// - // Construct herm op from non-herm matrix - //////////////////////////////////////////////////////////////////// - template - class MdagMLinearOperator : public LinearOperatorBase { - Matrix &_Mat; - public: - MdagMLinearOperator(Matrix &Mat): _Mat(Mat){}; +//////////////////////////////////////////////////////////////////// +// Construct herm op from non-herm matrix +//////////////////////////////////////////////////////////////////// +template +class MdagMLinearOperator : public LinearOperatorBase { + Matrix &_Mat; +public: + MdagMLinearOperator(Matrix &Mat): _Mat(Mat){}; - // Support for coarsening to a multigrid - void OpDiag (const Field &in, Field &out) { - _Mat.Mdiag(in,out); - } - void OpDir (const Field &in, Field &out,int dir,int disp) { - _Mat.Mdir(in,out,dir,disp); - } - void Op (const Field &in, Field &out){ - _Mat.M(in,out); - } - void AdjOp (const Field &in, Field &out){ - _Mat.Mdag(in,out); - } - void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){ - _Mat.MdagM(in,out,n1,n2); - } - void HermOp(const Field &in, Field &out){ - RealD n1,n2; - HermOpAndNorm(in,out,n1,n2); - } - }; + // Support for coarsening to a multigrid + void OpDiag (const Field &in, Field &out) { + _Mat.Mdiag(in,out); + } + void OpDir (const Field &in, Field &out,int dir,int disp) { + _Mat.Mdir(in,out,dir,disp); + } + void Op (const Field &in, Field &out){ + _Mat.M(in,out); + } + void AdjOp (const Field &in, Field &out){ + _Mat.Mdag(in,out); + } + void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){ + _Mat.MdagM(in,out,n1,n2); + } + void HermOp(const Field &in, Field &out){ + RealD n1,n2; + HermOpAndNorm(in,out,n1,n2); + } +}; - //////////////////////////////////////////////////////////////////// - // Construct herm op and shift it for mgrid smoother - //////////////////////////////////////////////////////////////////// - template - class ShiftedMdagMLinearOperator : public LinearOperatorBase { - Matrix &_Mat; - RealD _shift; - public: - ShiftedMdagMLinearOperator(Matrix &Mat,RealD shift): _Mat(Mat), _shift(shift){}; - // Support for coarsening to a multigrid - void OpDiag (const Field &in, Field &out) { - _Mat.Mdiag(in,out); - assert(0); - } - void OpDir (const Field &in, Field &out,int dir,int disp) { - _Mat.Mdir(in,out,dir,disp); - assert(0); - } - void Op (const Field &in, Field &out){ - _Mat.M(in,out); - assert(0); - } - void AdjOp (const Field &in, Field &out){ - _Mat.Mdag(in,out); - assert(0); - } - void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){ - _Mat.MdagM(in,out,n1,n2); - out = out + _shift*in; +//////////////////////////////////////////////////////////////////// +// Construct herm op and shift it for mgrid smoother +//////////////////////////////////////////////////////////////////// +template +class ShiftedMdagMLinearOperator : public LinearOperatorBase { + Matrix &_Mat; + RealD _shift; +public: + ShiftedMdagMLinearOperator(Matrix &Mat,RealD shift): _Mat(Mat), _shift(shift){}; + // Support for coarsening to a multigrid + void OpDiag (const Field &in, Field &out) { + _Mat.Mdiag(in,out); + assert(0); + } + void OpDir (const Field &in, Field &out,int dir,int disp) { + _Mat.Mdir(in,out,dir,disp); + assert(0); + } + void Op (const Field &in, Field &out){ + _Mat.M(in,out); + assert(0); + } + void AdjOp (const Field &in, Field &out){ + _Mat.Mdag(in,out); + assert(0); + } + void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){ + _Mat.MdagM(in,out,n1,n2); + out = out + _shift*in; - ComplexD dot; - dot= innerProduct(in,out); - n1=real(dot); - n2=norm2(out); - } - void HermOp(const Field &in, Field &out){ - RealD n1,n2; - HermOpAndNorm(in,out,n1,n2); - } - }; + ComplexD dot; + dot= innerProduct(in,out); + n1=real(dot); + n2=norm2(out); + } + void HermOp(const Field &in, Field &out){ + RealD n1,n2; + HermOpAndNorm(in,out,n1,n2); + } +}; - //////////////////////////////////////////////////////////////////// - // Wrap an already herm matrix - //////////////////////////////////////////////////////////////////// - template - class HermitianLinearOperator : public LinearOperatorBase { - Matrix &_Mat; - public: - HermitianLinearOperator(Matrix &Mat): _Mat(Mat){}; - // Support for coarsening to a multigrid - void OpDiag (const Field &in, Field &out) { - _Mat.Mdiag(in,out); - } - void OpDir (const Field &in, Field &out,int dir,int disp) { - _Mat.Mdir(in,out,dir,disp); - } - void Op (const Field &in, Field &out){ - _Mat.M(in,out); - } - void AdjOp (const Field &in, Field &out){ - _Mat.M(in,out); - } - void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){ - _Mat.M(in,out); +//////////////////////////////////////////////////////////////////// +// Wrap an already herm matrix +//////////////////////////////////////////////////////////////////// +template +class HermitianLinearOperator : public LinearOperatorBase { + Matrix &_Mat; +public: + HermitianLinearOperator(Matrix &Mat): _Mat(Mat){}; + // Support for coarsening to a multigrid + void OpDiag (const Field &in, Field &out) { + _Mat.Mdiag(in,out); + } + void OpDir (const Field &in, Field &out,int dir,int disp) { + _Mat.Mdir(in,out,dir,disp); + } + void Op (const Field &in, Field &out){ + _Mat.M(in,out); + } + void AdjOp (const Field &in, Field &out){ + _Mat.M(in,out); + } + void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){ + _Mat.M(in,out); - ComplexD dot= innerProduct(in,out); n1=real(dot); - n2=norm2(out); - } - void HermOp(const Field &in, Field &out){ - _Mat.M(in,out); - } - }; + ComplexD dot= innerProduct(in,out); n1=real(dot); + n2=norm2(out); + } + void HermOp(const Field &in, Field &out){ + _Mat.M(in,out); + } +}; ////////////////////////////////////////////////////////// // Even Odd Schur decomp operators; there are several @@ -183,13 +182,13 @@ namespace Grid { virtual RealD Mpc (const Field &in, Field &out) =0; virtual RealD MpcDag (const Field &in, Field &out) =0; virtual void MpcDagMpc(const Field &in, Field &out,RealD &ni,RealD &no) { - Field tmp(in._grid); - tmp.checkerboard = in.checkerboard; + Field tmp(in.Grid()); + tmp.Checkerboard() = in.Checkerboard(); ni=Mpc(in,tmp); no=MpcDag(tmp,out); } virtual void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){ - out.checkerboard = in.checkerboard; + out.Checkerboard() = in.Checkerboard(); MpcDagMpc(in,out,n1,n2); } virtual void HermOp(const Field &in, Field &out){ @@ -216,20 +215,20 @@ namespace Grid { Matrix &_Mat; SchurDiagMooeeOperator (Matrix &Mat): _Mat(Mat){}; virtual RealD Mpc (const Field &in, Field &out) { - Field tmp(in._grid); - tmp.checkerboard = !in.checkerboard; + Field tmp(in.Grid()); + tmp.Checkerboard() = !in.Checkerboard(); //std::cout <<"grid pointers: in._grid="<< in._grid << " out._grid=" << out._grid << " _Mat.Grid=" << _Mat.Grid() << " _Mat.RedBlackGrid=" << _Mat.RedBlackGrid() << std::endl; _Mat.Meooe(in,tmp); _Mat.MooeeInv(tmp,out); _Mat.Meooe(out,tmp); - //std::cout << "cb in " << in.checkerboard << " cb out " << out.checkerboard << std::endl; + //std::cout << "cb in " << in.Checkerboard() << " cb out " << out.Checkerboard() << std::endl; _Mat.Mooee(in,out); return axpy_norm(out,-1.0,tmp,out); } virtual RealD MpcDag (const Field &in, Field &out){ - Field tmp(in._grid); + Field tmp(in.Grid()); _Mat.MeooeDag(in,tmp); _Mat.MooeeInvDag(tmp,out); @@ -247,7 +246,7 @@ namespace Grid { SchurDiagOneOperator (Matrix &Mat): _Mat(Mat){}; virtual RealD Mpc (const Field &in, Field &out) { - Field tmp(in._grid); + Field tmp(in.Grid()); _Mat.Meooe(in,out); _Mat.MooeeInv(out,tmp); @@ -257,7 +256,7 @@ namespace Grid { return axpy_norm(out,-1.0,tmp,in); } virtual RealD MpcDag (const Field &in, Field &out){ - Field tmp(in._grid); + Field tmp(in.Grid()); _Mat.MooeeInvDag(in,out); _Mat.MeooeDag(out,tmp); @@ -275,7 +274,7 @@ namespace Grid { SchurDiagTwoOperator (Matrix &Mat): _Mat(Mat){}; virtual RealD Mpc (const Field &in, Field &out) { - Field tmp(in._grid); + Field tmp(in.Grid()); _Mat.MooeeInv(in,out); _Mat.Meooe(out,tmp); @@ -285,7 +284,7 @@ namespace Grid { return axpy_norm(out,-1.0,tmp,in); } virtual RealD MpcDag (const Field &in, Field &out){ - Field tmp(in._grid); + Field tmp(in.Grid()); _Mat.MeooeDag(in,out); _Mat.MooeeInvDag(out,tmp); @@ -315,7 +314,7 @@ namespace Grid { double tMeo; double taxpby_norm; uint64_t ncall; - public: +public: void Report(void) { std::cout << GridLogMessage << " HermOpAndNorm.Mpc "<< tMpc/ncall<<" usec "< using SchurStagOperator = SchurStaggeredOperator; + } + virtual RealD Mpc (const Field &in, Field &out) + { + + Field tmp(in.Grid()); + Field tmp2(in.Grid()); + + // std::cout << GridLogIterative << " HermOp.Mpc "< using SchurStagOperator = SchurStaggeredOperator; - ///////////////////////////////////////////////////////////// - // Base classes for functions of operators - ///////////////////////////////////////////////////////////// - template class OperatorFunction { - public: - virtual void operator() (LinearOperatorBase &Linop, const Field &in, Field &out) = 0; - virtual void operator() (LinearOperatorBase &Linop, const std::vector &in,std::vector &out) { - assert(in.size()==out.size()); - for(int k=0;k class LinearFunction { - public: - virtual void operator() (const Field &in, Field &out) = 0; - }; - - template class IdentityLinearFunction : public LinearFunction { - public: - void operator() (const Field &in, Field &out){ - out = in; - }; - }; - - - ///////////////////////////////////////////////////////////// - // Base classes for Multishift solvers for operators - ///////////////////////////////////////////////////////////// - template class OperatorMultiFunction { - public: - virtual void operator() (LinearOperatorBase &Linop, const Field &in, std::vector &out) = 0; - }; - - // FIXME : To think about - - // Chroma functionality list defining LinearOperator - /* - virtual void operator() (T& chi, const T& psi, enum PlusMinus isign) const = 0; - virtual void operator() (T& chi, const T& psi, enum PlusMinus isign, Real epsilon) const - virtual const Subset& subset() const = 0; - virtual unsigned long nFlops() const { return 0; } - virtual void deriv(P& ds_u, const T& chi, const T& psi, enum PlusMinus isign) const - class UnprecLinearOperator : public DiffLinearOperator - const Subset& subset() const {return all;} - }; - */ - - //////////////////////////////////////////////////////////////////////////////////////////// - // Hermitian operator Linear function and operator function - //////////////////////////////////////////////////////////////////////////////////////////// - template - class HermOpOperatorFunction : public OperatorFunction { - void operator() (LinearOperatorBase &Linop, const Field &in, Field &out) { - Linop.HermOp(in,out); - }; - }; - - template - class PlainHermOp : public LinearFunction { - public: - LinearOperatorBase &_Linop; - - PlainHermOp(LinearOperatorBase& linop) : _Linop(linop) - {} - - void operator()(const Field& in, Field& out) { - _Linop.HermOp(in,out); - } - }; - - template - class FunctionHermOp : public LinearFunction { - public: - OperatorFunction & _poly; - LinearOperatorBase &_Linop; - - FunctionHermOp(OperatorFunction & poly,LinearOperatorBase& linop) - : _poly(poly), _Linop(linop) {}; - - void operator()(const Field& in, Field& out) { - _poly(_Linop,in,out); - } - }; - - template - class Polynomial : public OperatorFunction { - private: - std::vector Coeffs; - public: - Polynomial(std::vector &_Coeffs) : Coeffs(_Coeffs) { }; - - // Implement the required interface - void operator() (LinearOperatorBase &Linop, const Field &in, Field &out) { - - Field AtoN(in._grid); - Field Mtmp(in._grid); - AtoN = in; - out = AtoN*Coeffs[0]; - for(int n=1;n class OperatorFunction { +public: + virtual void operator() (LinearOperatorBase &Linop, const Field &in, Field &out) = 0; + virtual void operator() (LinearOperatorBase &Linop, const std::vector &in,std::vector &out) { + assert(in.size()==out.size()); + for(int k=0;k class LinearFunction { +public: + virtual void operator() (const Field &in, Field &out) = 0; +}; -#endif +template class IdentityLinearFunction : public LinearFunction { +public: + void operator() (const Field &in, Field &out){ + out = in; + }; +}; + + +///////////////////////////////////////////////////////////// +// Base classes for Multishift solvers for operators +///////////////////////////////////////////////////////////// +template class OperatorMultiFunction { +public: + virtual void operator() (LinearOperatorBase &Linop, const Field &in, std::vector &out) = 0; +}; + +// FIXME : To think about + +// Chroma functionality list defining LinearOperator +/* + virtual void operator() (T& chi, const T& psi, enum PlusMinus isign) const = 0; + virtual void operator() (T& chi, const T& psi, enum PlusMinus isign, Real epsilon) const + virtual const Subset& subset() const = 0; + virtual unsigned long nFlops() const { return 0; } + virtual void deriv(P& ds_u, const T& chi, const T& psi, enum PlusMinus isign) const + class UnprecLinearOperator : public DiffLinearOperator + const Subset& subset() const {return all;} + }; +*/ + +//////////////////////////////////////////////////////////////////////////////////////////// +// Hermitian operator Linear function and operator function +//////////////////////////////////////////////////////////////////////////////////////////// +template +class HermOpOperatorFunction : public OperatorFunction { + void operator() (LinearOperatorBase &Linop, const Field &in, Field &out) { + Linop.HermOp(in,out); + }; +}; + +template +class PlainHermOp : public LinearFunction { +public: + LinearOperatorBase &_Linop; + + PlainHermOp(LinearOperatorBase& linop) : _Linop(linop) + {} + + void operator()(const Field& in, Field& out) { + _Linop.HermOp(in,out); + } +}; + +template +class FunctionHermOp : public LinearFunction { +public: + OperatorFunction & _poly; + LinearOperatorBase &_Linop; + + FunctionHermOp(OperatorFunction & poly,LinearOperatorBase& linop) + : _poly(poly), _Linop(linop) {}; + + void operator()(const Field& in, Field& out) { + _poly(_Linop,in,out); + } +}; + +template +class Polynomial : public OperatorFunction { +private: + std::vector Coeffs; +public: + using OperatorFunction::operator(); + + Polynomial(std::vector &_Coeffs) : Coeffs(_Coeffs) { }; + + // Implement the required interface + void operator() (LinearOperatorBase &Linop, const Field &in, Field &out) { + + Field AtoN(in.Grid()); + Field Mtmp(in.Grid()); + AtoN = in; + out = AtoN*Coeffs[0]; + for(int n=1;n 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ +*************************************************************************************/ +/* END LEGAL */ #ifndef GRID_PRECONDITIONER_H #define GRID_PRECONDITIONER_H -namespace Grid { +NAMESPACE_BEGIN(Grid); - template class Preconditioner : public LinearFunction { - virtual void operator()(const Field &src, Field & psi)=0; - }; +template class Preconditioner : public LinearFunction { + virtual void operator()(const Field &src, Field & psi)=0; +}; - template class TrivialPrecon : public Preconditioner { - public: - void operator()(const Field &src, Field & psi){ - psi = src; - } - TrivialPrecon(void){}; - }; +template class TrivialPrecon : public Preconditioner { +public: + void operator()(const Field &src, Field & psi){ + psi = src; + } + TrivialPrecon(void){}; +}; -} +NAMESPACE_END(Grid); #endif diff --git a/Grid/algorithms/SparseMatrix.h b/Grid/algorithms/SparseMatrix.h index 6cc617a6..ffed7527 100644 --- a/Grid/algorithms/SparseMatrix.h +++ b/Grid/algorithms/SparseMatrix.h @@ -1,4 +1,4 @@ - /************************************************************************************* +/************************************************************************************* Grid physics library, www.github.com/paboyle/Grid @@ -23,38 +23,38 @@ Author: Peter Boyle 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ +*************************************************************************************/ +/* END LEGAL */ #ifndef GRID_ALGORITHM_SPARSE_MATRIX_H #define GRID_ALGORITHM_SPARSE_MATRIX_H -namespace Grid { +NAMESPACE_BEGIN(Grid); - ///////////////////////////////////////////////////////////////////////////////////////////// - // Interface defining what I expect of a general sparse matrix, such as a Fermion action - ///////////////////////////////////////////////////////////////////////////////////////////// - template class SparseMatrixBase { - public: - virtual GridBase *Grid(void) =0; - // Full checkerboar operations - virtual RealD M (const Field &in, Field &out)=0; - virtual RealD Mdag (const Field &in, Field &out)=0; - virtual void MdagM(const Field &in, Field &out,RealD &ni,RealD &no) { - Field tmp (in._grid); - ni=M(in,tmp); - no=Mdag(tmp,out); - } - virtual void Mdiag (const Field &in, Field &out)=0; - virtual void Mdir (const Field &in, Field &out,int dir, int disp)=0; - }; +///////////////////////////////////////////////////////////////////////////////////////////// +// Interface defining what I expect of a general sparse matrix, such as a Fermion action +///////////////////////////////////////////////////////////////////////////////////////////// +template class SparseMatrixBase { +public: + virtual GridBase *Grid(void) =0; + // Full checkerboar operations + virtual RealD M (const Field &in, Field &out)=0; + virtual RealD Mdag (const Field &in, Field &out)=0; + virtual void MdagM(const Field &in, Field &out,RealD &ni,RealD &no) { + Field tmp (in.Grid()); + ni=M(in,tmp); + no=Mdag(tmp,out); + } + virtual void Mdiag (const Field &in, Field &out)=0; + virtual void Mdir (const Field &in, Field &out,int dir, int disp)=0; +}; - ///////////////////////////////////////////////////////////////////////////////////////////// - // Interface augmented by a red black sparse matrix, such as a Fermion action - ///////////////////////////////////////////////////////////////////////////////////////////// - template class CheckerBoardedSparseMatrixBase : public SparseMatrixBase { - public: - virtual GridBase *RedBlackGrid(void)=0; +///////////////////////////////////////////////////////////////////////////////////////////// +// Interface augmented by a red black sparse matrix, such as a Fermion action +///////////////////////////////////////////////////////////////////////////////////////////// +template class CheckerBoardedSparseMatrixBase : public SparseMatrixBase { +public: + virtual GridBase *RedBlackGrid(void)=0; ////////////////////////////////////////////////////////////////////// // Query the even even properties to make algorithmic decisions @@ -63,17 +63,17 @@ namespace Grid { virtual int ConstEE(void) { return 1; }; // Disable assumptions unless overridden virtual int isTrivialEE(void) { return 0; }; // by a derived class that knows better - // half checkerboard operaions - virtual void Meooe (const Field &in, Field &out)=0; - virtual void Mooee (const Field &in, Field &out)=0; - virtual void MooeeInv (const Field &in, Field &out)=0; + // half checkerboard operaions + virtual void Meooe (const Field &in, Field &out)=0; + virtual void Mooee (const Field &in, Field &out)=0; + virtual void MooeeInv (const Field &in, Field &out)=0; - virtual void MeooeDag (const Field &in, Field &out)=0; - virtual void MooeeDag (const Field &in, Field &out)=0; - virtual void MooeeInvDag (const Field &in, Field &out)=0; + virtual void MeooeDag (const Field &in, Field &out)=0; + virtual void MooeeDag (const Field &in, Field &out)=0; + virtual void MooeeInvDag (const Field &in, Field &out)=0; - }; +}; -} +NAMESPACE_END(Grid); #endif diff --git a/Grid/algorithms/approx/Chebyshev.h b/Grid/algorithms/approx/Chebyshev.h index b34fac7f..97e0e807 100644 --- a/Grid/algorithms/approx/Chebyshev.h +++ b/Grid/algorithms/approx/Chebyshev.h @@ -1,4 +1,4 @@ - /************************************************************************************* +/************************************************************************************* Grid physics library, www.github.com/paboyle/Grid @@ -25,14 +25,14 @@ Author: Christoph Lehner 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ +*************************************************************************************/ +/* END LEGAL */ #ifndef GRID_CHEBYSHEV_H #define GRID_CHEBYSHEV_H #include -namespace Grid { +NAMESPACE_BEGIN(Grid); struct ChebyParams : Serializable { GRID_SERIALIZABLE_CLASS_MEMBERS(ChebyParams, @@ -41,337 +41,339 @@ struct ChebyParams : Serializable { int, Npoly); }; - //////////////////////////////////////////////////////////////////////////////////////////// - // Generic Chebyshev approximations - //////////////////////////////////////////////////////////////////////////////////////////// - template - class Chebyshev : public OperatorFunction { - private: - std::vector Coeffs; - int order; - RealD hi; - RealD lo; +//////////////////////////////////////////////////////////////////////////////////////////// +// Generic Chebyshev approximations +//////////////////////////////////////////////////////////////////////////////////////////// +template +class Chebyshev : public OperatorFunction { +private: + using OperatorFunction::operator(); - public: - void csv(std::ostream &out){ - RealD diff = hi-lo; - RealD delta = (hi-lo)*1.0e-9; - for (RealD x=lo; x Coeffs; + int order; + RealD hi; + RealD lo; + +public: + void csv(std::ostream &out){ + RealD diff = hi-lo; + RealD delta = diff*1.0e-9; + for (RealD x=lo; x U(M); - std::vector a(M); - std::vector g(M); - for(int n=0;n<=M;n++){ - U[n] = std::sin((n+1)*std::acos(lmax))/std::sin(std::acos(lmax)); - sumUsq += U[n]*U[n]; - } - sumUsq = std::sqrt(sumUsq); - - for(int i=1;i<=M;i++){ - a[i] = U[i]/sumUsq; - } - g[0] = 1.0; - for(int m=1;m<=M;m++){ - g[m] = 0; - for(int i=0;i<=M-m;i++){ - g[m]+= a[i]*a[m+i]; - } - } - for(int m=1;m<=M;m++){ - Coeffs[m]*=g[m]; - } - } - RealD approx(RealD x) // Convenience for plotting the approximation - { - RealD Tn; - RealD Tnm; - RealD Tnp; - - RealD y=( x-0.5*(hi+lo))/(0.5*(hi-lo)); - - RealD T0=1; - RealD T1=y; - - RealD sum; - sum = 0.5*Coeffs[0]*T0; - sum+= Coeffs[1]*T1; - - Tn =T1; - Tnm=T0; - for(int i=2;i::quiet_NaN(); - } - - // Implement the required interface - void operator() (LinearOperatorBase &Linop, const Field &in, Field &out) { - - GridBase *grid=in._grid; - - // std::cout << "Chevyshef(): in._grid="< - class ChebyshevLanczos : public Chebyshev { - private: - std::vector Coeffs; - int order; - RealD alpha; - RealD beta; - RealD mu; + //////////////////////////////////////////////////////////////////////////////////////////////////// + // c.f. numerical recipes "chebft"/"chebev". This is sec 5.8 "Chebyshev approximation". + //////////////////////////////////////////////////////////////////////////////////////////////////// + // CJ: the one we need for Lanczos + void Init(RealD _lo,RealD _hi,int _order) + { + lo=_lo; + hi=_hi; + order=_order; + + if(order < 2) exit(-1); + Coeffs.resize(order); + Coeffs.assign(0.,order); + Coeffs[order-1] = 1.; + }; - public: - ChebyshevLanczos(RealD _alpha,RealD _beta,RealD _mu,int _order) : + void Init(RealD _lo,RealD _hi,int _order, RealD (* func)(RealD)) + { + lo=_lo; + hi=_hi; + order=_order; + + if(order < 2) exit(-1); + Coeffs.resize(order); + for(int j=0;j U(M); + std::vector a(M); + std::vector g(M); + for(int n=0;n<=M;n++){ + U[n] = std::sin((n+1)*std::acos(lmax))/std::sin(std::acos(lmax)); + sumUsq += U[n]*U[n]; + } + sumUsq = std::sqrt(sumUsq); + + for(int i=1;i<=M;i++){ + a[i] = U[i]/sumUsq; + } + g[0] = 1.0; + for(int m=1;m<=M;m++){ + g[m] = 0; + for(int i=0;i<=M-m;i++){ + g[m]+= a[i]*a[m+i]; + } + } + for(int m=1;m<=M;m++){ + Coeffs[m]*=g[m]; + } + } + RealD approx(RealD x) // Convenience for plotting the approximation + { + RealD Tn; + RealD Tnm; + RealD Tnp; + + RealD y=( x-0.5*(hi+lo))/(0.5*(hi-lo)); + + RealD T0=1; + RealD T1=y; + + RealD sum; + sum = 0.5*Coeffs[0]*T0; + sum+= Coeffs[1]*T1; + + Tn =T1; + Tnm=T0; + for(int i=2;i::quiet_NaN(); + } + + // Implement the required interface + void operator() (LinearOperatorBase &Linop, const Field &in, Field &out) { + + GridBase *grid=in.Grid(); + + // std::cout << "Chevyshef(): in.Grid()="< &Linop, const Field &in, Field &out) - { - GridBase *grid=in._grid; - Field tmp(grid); - - RealD aa= alpha*alpha; - RealD bb= beta * beta; - - Linop.HermOp(in,out); - out = out - mu*in; - - Linop.HermOp(out,tmp); - tmp = tmp - mu * out; - - out = (2.0/ (aa-bb) ) * tmp - ((aa+bb)/(aa-bb))*in; - }; - // Implement the required interface - void operator() (LinearOperatorBase &Linop, const Field &in, Field &out) { - - GridBase *grid=in._grid; - - int vol=grid->gSites(); - - Field T0(grid); T0 = in; - Field T1(grid); - Field T2(grid); - Field y(grid); - - Field *Tnm = &T0; - Field *Tn = &T1; - Field *Tnp = &T2; - - // Tn=T1 = (xscale M )*in - AminusMuSq(Linop,T0,T1); - - // sum = .5 c[0] T0 + c[1] T1 - out = (0.5*Coeffs[0])*T0 + Coeffs[1]*T1; - for(int n=2;n &Linop, const Field &in, Field &out) + { + GridBase *grid=in.Grid(); + Field tmp(grid); + + RealD aa= alpha*alpha; + RealD bb= beta * beta; + + Linop.HermOp(in,out); + out = out - mu*in; + + Linop.HermOp(out,tmp); + tmp = tmp - mu * out; + + out = (2.0/ (aa-bb) ) * tmp - ((aa+bb)/(aa-bb))*in; + }; + // Implement the required interface + void operator() (LinearOperatorBase &Linop, const Field &in, Field &out) { + + GridBase *grid=in.Grid(); + + int vol=grid->gSites(); + + Field T0(grid); T0 = in; + Field T1(grid); + Field T2(grid); + Field y(grid); + + Field *Tnm = &T0; + Field *Tn = &T1; + Field *Tnp = &T2; + + // Tn=T1 = (xscale M )*in + AminusMuSq(Linop,T0,T1); + + // sum = .5 c[0] T0 + c[1] T1 + out = (0.5*Coeffs[0])*T0 + Coeffs[1]*T1; + for(int n=2;n - class Forecast +// Abstract base class. +// Takes a matrix (Mat), a source (phi), and a vector of Fields (chi) +// and returns a forecasted solution to the system D*psi = phi (psi). +template +class Forecast +{ +public: + virtual Field operator()(Matrix &Mat, const Field& phi, const std::vector& chi) = 0; +}; + +// Implementation of Brower et al.'s chronological inverter (arXiv:hep-lat/9509012), +// used to forecast solutions across poles of the EOFA heatbath. +// +// Modified from CPS (cps_pp/src/util/dirac_op/d_op_base/comsrc/minresext.C) +template +class ChronoForecast : public Forecast +{ +public: + Field operator()(Matrix &Mat, const Field& phi, const std::vector& prev_solns) { - public: - virtual Field operator()(Matrix &Mat, const Field& phi, const std::vector& chi) = 0; + int degree = prev_solns.size(); + Field chi(phi); // forecasted solution + + // Trivial cases + if(degree == 0){ chi = Zero(); return chi; } + else if(degree == 1){ return prev_solns[0]; } + + // RealD dot; + ComplexD xp; + Field r(phi); // residual + Field Mv(phi); + std::vector v(prev_solns); // orthonormalized previous solutions + std::vector MdagMv(degree,phi); + + // Array to hold the matrix elements + std::vector> G(degree, std::vector(degree)); + + // Solution and source vectors + std::vector a(degree); + std::vector b(degree); + + // Orthonormalize the vector basis + for(int i=0; i abs(G[k][k])){ k = j; } } + if(k != i){ + xp = b[k]; + b[k] = b[i]; + b[i] = xp; + for(int j=0; j=0; i--){ + a[i] = 0.0; + for(int j=i+1; j &Linop, const std::vector &Linop, const Field &B, Field &X) { int Orthog = blockDim; // First dimension is block dim; this is an assumption - Nblock = B._grid->_fdimensions[Orthog]; + Nblock = B.Grid()->_fdimensions[Orthog]; /* FAKE */ Nblock=8; std::cout< &Linop, const Field &B, Field &X) void CGmultiRHSsolve(LinearOperatorBase &Linop, const Field &Src, Field &Psi) { int Orthog = blockDim; // First dimension is block dim - Nblock = Src._grid->_fdimensions[Orthog]; + Nblock = Src.Grid()->_fdimensions[Orthog]; std::cout< &AP, Eigen::MatrixXcd &m , const std::vector< for(int b=0;b &AP, Eigen::MatrixXcd &m , const std::vector< void MulMatrix(std::vector &AP, Eigen::MatrixXcd &m , const std::vector &X){ // Should make this cache friendly with site outermost, parallel_for for(int b=0;b &Linop, const std::vector &Linop, const std::vector class CommunicationAvoidingGeneralisedMinimalResidual : public OperatorFunction { public: + using OperatorFunction::operator(); + bool ErrorOnNoConverge; // Throw an assert when CAGMRES fails to converge, // defaults to true @@ -52,10 +54,10 @@ class CommunicationAvoidingGeneralisedMinimalResidual : public OperatorFunction< Eigen::MatrixXcd H; - std::vector> y; - std::vector> gamma; - std::vector> c; - std::vector> s; + std::vector y; + std::vector gamma; + std::vector c; + std::vector s; CommunicationAvoidingGeneralisedMinimalResidual(RealD tol, Integer maxit, @@ -76,7 +78,7 @@ class CommunicationAvoidingGeneralisedMinimalResidual : public OperatorFunction< std::cout << GridLogWarning << "This algorithm currently doesn't differ from regular GMRES" << std::endl; - psi.checkerboard = src.checkerboard; + psi.Checkerboard() = src.Checkerboard(); conformable(psi, src); RealD guess = norm2(psi); @@ -86,7 +88,7 @@ class CommunicationAvoidingGeneralisedMinimalResidual : public OperatorFunction< RealD ssq = norm2(src); RealD rsq = Tolerance * Tolerance * ssq; - Field r(src._grid); + Field r(src.Grid()); std::cout << std::setprecision(4) << std::scientific; std::cout << GridLogIterative << "CommunicationAvoidingGeneralisedMinimalResidual: guess " << guess << std::endl; @@ -142,11 +144,11 @@ class CommunicationAvoidingGeneralisedMinimalResidual : public OperatorFunction< RealD cp = 0; - Field w(src._grid); - Field r(src._grid); + Field w(src.Grid()); + Field r(src.Grid()); // this should probably be made a class member so that it is only allocated once, not in every restart - std::vector v(RestartLength + 1, src._grid); for (auto &elem : v) elem = zero; + std::vector v(RestartLength + 1, src.Grid()); for (auto &elem : v) elem = Zero(); MatrixTimer.Start(); LinOp.Op(psi, w); @@ -157,7 +159,9 @@ class CommunicationAvoidingGeneralisedMinimalResidual : public OperatorFunction< gamma[0] = sqrt(norm2(r)); - v[0] = (1. / gamma[0]) * r; + ComplexD scale = 1.0/gamma[0]; + v[0] = scale * r; + LinalgTimer.Stop(); for (int i=0; i= 0; i--) { y[i] = gamma[i]; for (int k = i + 1; k <= iter; k++) - y[i] = y[i] - H(k, i) * y[k]; - y[i] = y[i] / H(i, i); + y[i] = y[i] - ComplexD(H(k, i)) * y[k]; + y[i] = y[i] / ComplexD(H(i, i)); } for (int i = 0; i <= iter; i++) diff --git a/Grid/algorithms/iterative/ConjugateGradient.h b/Grid/algorithms/iterative/ConjugateGradient.h index c1717e2a..398f578f 100644 --- a/Grid/algorithms/iterative/ConjugateGradient.h +++ b/Grid/algorithms/iterative/ConjugateGradient.h @@ -27,11 +27,11 @@ with this program; if not, write to the Free Software Foundation, Inc., See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ -/* END LEGAL */ + /* END LEGAL */ #ifndef GRID_CONJUGATE_GRADIENT_H #define GRID_CONJUGATE_GRADIENT_H -namespace Grid { +NAMESPACE_BEGIN(Grid); ///////////////////////////////////////////////////////////// // Base classes for iterative processes based on operators @@ -40,7 +40,10 @@ namespace Grid { template class ConjugateGradient : public OperatorFunction { - public: +public: + + using OperatorFunction::operator(); + bool ErrorOnNoConverge; // throw an assert when the CG fails to converge. // Defaults true. RealD Tolerance; @@ -48,17 +51,18 @@ class ConjugateGradient : public OperatorFunction { Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion ConjugateGradient(RealD tol, Integer maxit, bool err_on_no_conv = true) - : Tolerance(tol), - MaxIterations(maxit), - ErrorOnNoConverge(err_on_no_conv){}; + : Tolerance(tol), + MaxIterations(maxit), + ErrorOnNoConverge(err_on_no_conv){}; void operator()(LinearOperatorBase &Linop, const Field &src, Field &psi) { + psi.Checkerboard() = src.Checkerboard(); - psi.checkerboard = src.checkerboard; conformable(psi, src); - RealD cp, c, a, d, b, ssq, qq, b_pred; + RealD cp, c, a, d, b, ssq, qq; + //RealD b_pred; Field p(src); Field mmp(src); @@ -70,7 +74,7 @@ class ConjugateGradient : public OperatorFunction { Linop.HermOpAndNorm(psi, mmp, d, b); - + r = src - mmp; p = r; @@ -127,10 +131,13 @@ class ConjugateGradient : public OperatorFunction { b = cp / c; LinearCombTimer.Start(); - parallel_for(int ss=0;ssoSites();ss++){ - vstream(psi[ss], a * p[ss] + psi[ss]); - vstream(p [ss], b * p[ss] + r[ss]); - } + auto psi_v = psi.View(); + auto p_v = p.View(); + auto r_v = r.View(); + accelerator_for(ss,p_v.size(), Field::vector_object::Nsimd(),{ + coalescedWrite(psi_v[ss], a * p_v(ss) + psi_v(ss)); + coalescedWrite(p_v[ss] , b * p_v(ss) + r_v (ss)); + }); LinearCombTimer.Stop(); LinalgTimer.Stop(); @@ -143,12 +150,12 @@ class ConjugateGradient : public OperatorFunction { Linop.HermOpAndNorm(psi, mmp, d, qq); p = mmp - src; - RealD srcnorm = sqrt(norm2(src)); - RealD resnorm = sqrt(norm2(p)); + RealD srcnorm = std::sqrt(norm2(src)); + RealD resnorm = std::sqrt(norm2(p)); RealD true_residual = resnorm / srcnorm; std::cout << GridLogMessage << "ConjugateGradient Converged on iteration " << k << std::endl; - std::cout << GridLogMessage << "\tComputed residual " << sqrt(cp / ssq)< { } }; -} +NAMESPACE_END(Grid); #endif diff --git a/Grid/algorithms/iterative/ConjugateGradientMixedPrec.h b/Grid/algorithms/iterative/ConjugateGradientMixedPrec.h index 4c2cf12f..08942097 100644 --- a/Grid/algorithms/iterative/ConjugateGradientMixedPrec.h +++ b/Grid/algorithms/iterative/ConjugateGradientMixedPrec.h @@ -1,4 +1,4 @@ - /************************************************************************************* +/************************************************************************************* Grid physics library, www.github.com/paboyle/Grid @@ -23,13 +23,12 @@ Author: Christopher Kelly 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ +*************************************************************************************/ +/* END LEGAL */ #ifndef GRID_CONJUGATE_GRADIENT_MIXED_PREC_H #define GRID_CONJUGATE_GRADIENT_MIXED_PREC_H -namespace Grid { - +NAMESPACE_BEGIN(Grid); //Mixed precision restarted defect correction CG template CG_f(inner_tol, MaxInnerIterations); - CG_f.ErrorOnNoConverge = false; + ConjugateGradient CG_f(inner_tol, MaxInnerIterations); + CG_f.ErrorOnNoConverge = false; - GridStopWatch InnerCGtimer; + GridStopWatch InnerCGtimer; - GridStopWatch PrecChangeTimer; + GridStopWatch PrecChangeTimer; - Integer &outer_iter = TotalOuterIterations; //so it will be equal to the final iteration count + Integer &outer_iter = TotalOuterIterations; //so it will be equal to the final iteration count - for(outer_iter = 0; outer_iter < MaxOuterIterations; outer_iter++){ - //Compute double precision rsd and also new RHS vector. - Linop_d.HermOp(sol_d, tmp_d); - RealD norm = axpy_norm(src_d, -1., tmp_d, src_d_in); //src_d is residual vector + for(outer_iter = 0; outer_iter < MaxOuterIterations; outer_iter++){ + //Compute double precision rsd and also new RHS vector. + Linop_d.HermOp(sol_d, tmp_d); + RealD norm = axpy_norm(src_d, -1., tmp_d, src_d_in); //src_d is residual vector - std::cout< CG_d(Tolerance, MaxInnerIterations); - CG_d(Linop_d, src_d_in, sol_d); - TotalFinalStepIterations = CG_d.IterationsToComplete; + while(norm * inner_tol * inner_tol < stop) inner_tol *= 2; // inner_tol = sqrt(stop/norm) ?? - TotalTimer.Stop(); - std::cout< CG_d(Tolerance, MaxInnerIterations); + CG_d(Linop_d, src_d_in, sol_d); + TotalFinalStepIterations = CG_d.IterationsToComplete; + TotalTimer.Stop(); + std::cout< 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ +*************************************************************************************/ +/* END LEGAL */ #ifndef GRID_CONJUGATE_MULTI_SHIFT_GRADIENT_H #define GRID_CONJUGATE_MULTI_SHIFT_GRADIENT_H -namespace Grid { +NAMESPACE_BEGIN(Grid); - ///////////////////////////////////////////////////////////// - // Base classes for iterative processes based on operators - // single input vec, single output vec. - ///////////////////////////////////////////////////////////// +///////////////////////////////////////////////////////////// +// Base classes for iterative processes based on operators +// single input vec, single output vec. +///////////////////////////////////////////////////////////// - template - class ConjugateGradientMultiShift : public OperatorMultiFunction, - public OperatorFunction - { +template +class ConjugateGradientMultiShift : public OperatorMultiFunction, + public OperatorFunction +{ public: - RealD Tolerance; - Integer MaxIterations; - Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion - int verbose; - MultiShiftFunction shifts; - ConjugateGradientMultiShift(Integer maxit,MultiShiftFunction &_shifts) : - MaxIterations(maxit), - shifts(_shifts) - { - verbose=1; + using OperatorFunction::operator(); + + RealD Tolerance; + Integer MaxIterations; + Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion + int verbose; + MultiShiftFunction shifts; + + ConjugateGradientMultiShift(Integer maxit,MultiShiftFunction &_shifts) : + MaxIterations(maxit), + shifts(_shifts) + { + verbose=1; + } + + void operator() (LinearOperatorBase &Linop, const Field &src, Field &psi) + { + GridBase *grid = src.Grid(); + int nshift = shifts.order; + std::vector results(nshift,grid); + (*this)(Linop,src,results,psi); + } + void operator() (LinearOperatorBase &Linop, const Field &src, std::vector &results, Field &psi) + { + int nshift = shifts.order; + + (*this)(Linop,src,results); + + psi = shifts.norm*src; + for(int i=0;i &Linop, const Field &src, Field &psi) -{ - GridBase *grid = src._grid; - int nshift = shifts.order; - std::vector results(nshift,grid); - (*this)(Linop,src,results,psi); -} -void operator() (LinearOperatorBase &Linop, const Field &src, std::vector &results, Field &psi) -{ - int nshift = shifts.order; - - (*this)(Linop,src,results); - - psi = shifts.norm*src; - for(int i=0;i &Linop, const Field &src, std::vector &psi) + { + + GridBase *grid = src.Grid(); + + //////////////////////////////////////////////////////////////////////// + // Convenience references to the info stored in "MultiShiftFunction" + //////////////////////////////////////////////////////////////////////// + int nshift = shifts.order; -void operator() (LinearOperatorBase &Linop, const Field &src, std::vector &psi) -{ - - GridBase *grid = src._grid; - - //////////////////////////////////////////////////////////////////////// - // Convenience references to the info stored in "MultiShiftFunction" - //////////////////////////////////////////////////////////////////////// - int nshift = shifts.order; + std::vector &mass(shifts.poles); // Make references to array in "shifts" + std::vector &mresidual(shifts.tolerances); + std::vector alpha(nshift,1.0); + std::vector ps(nshift,grid);// Search directions - std::vector &mass(shifts.poles); // Make references to array in "shifts" - std::vector &mresidual(shifts.tolerances); - std::vector alpha(nshift,1.0); - std::vector ps(nshift,grid);// Search directions - - assert(psi.size()==nshift); - assert(mass.size()==nshift); - assert(mresidual.size()==nshift); + assert(psi.size()==nshift); + assert(mass.size()==nshift); + assert(mresidual.size()==nshift); - // dynamic sized arrays on stack; 2d is a pain with vector - RealD bs[nshift]; - RealD rsq[nshift]; - RealD z[nshift][2]; - int converged[nshift]; + // dynamic sized arrays on stack; 2d is a pain with vector + RealD bs[nshift]; + RealD rsq[nshift]; + RealD z[nshift][2]; + int converged[nshift]; - const int primary =0; + const int primary =0; - //Primary shift fields CG iteration - RealD a,b,c,d; - RealD cp,bp,qq; //prev + //Primary shift fields CG iteration + RealD a,b,c,d; + RealD cp,bp,qq; //prev - // Matrix mult fields - Field r(grid); - Field p(grid); - Field tmp(grid); - Field mmp(grid); + // Matrix mult fields + Field r(grid); + Field p(grid); + Field tmp(grid); + Field mmp(grid); - // Check lightest mass - for(int s=0;s= mass[primary] ); - converged[s]=0; - } + // Check lightest mass + for(int s=0;s= mass[primary] ); + converged[s]=0; + } - // Wire guess to zero - // Residuals "r" are src - // First search direction "p" is also src - cp = norm2(src); - for(int s=0;s &Linop, const Field &src, std::vector GridStopWatch SolverTimer; SolverTimer.Start(); - // Iteration loop - int k; + // Iteration loop + int k; - for (k=1;k<=MaxIterations;k++){ + for (k=1;k<=MaxIterations;k++){ - a = c /cp; + a = c /cp; AXPYTimer.Start(); - axpy(p,a,p,r); + axpy(p,a,p,r); AXPYTimer.Stop(); - // Note to self - direction ps is iterated seperately - // for each shift. Does not appear to have any scope - // for avoiding linear algebra in "single" case. - // - // However SAME r is used. Could load "r" and update - // ALL ps[s]. 2/3 Bandwidth saving - // New Kernel: Load r, vector of coeffs, vector of pointers ps + // Note to self - direction ps is iterated seperately + // for each shift. Does not appear to have any scope + // for avoiding linear algebra in "single" case. + // + // However SAME r is used. Could load "r" and update + // ALL ps[s]. 2/3 Bandwidth saving + // New Kernel: Load r, vector of coeffs, vector of pointers ps AXPYTimer.Start(); - for(int s=0;s &Linop, const Field &src, std::vector MatrixTimer.Stop(); AXPYTimer.Start(); - axpy(mmp,mass[0],p,mmp); + axpy(mmp,mass[0],p,mmp); AXPYTimer.Stop(); - RealD rn = norm2(p); - d += rn*mass[0]; + RealD rn = norm2(p); + d += rn*mass[0]; - bp=b; - b=-cp/d; + bp=b; + b=-cp/d; AXPYTimer.Start(); - c=axpy_norm(r,b,mmp,r); + c=axpy_norm(r,b,mmp,r); AXPYTimer.Stop(); - // Toggle the recurrence history - bs[0] = b; - iz = 1-iz; + // Toggle the recurrence history + bs[0] = b; + iz = 1-iz; ShiftTimer.Start(); - for(int s=1;s 2+Ls, so ~ 3x saving - // Pipelined CG gain: - // - // New Kernel: Load r, vector of coeffs, vector of pointers ps - // New Kernel: Load psi[0], vector of coeffs, vector of pointers ps - // If can predict the coefficient bs then we can fuse these and avoid write reread cyce - // on ps[s]. - // Before: 3 x npole + 3 x npole - // After : 2 x npole (ps[s]) => 3x speed up of multishift CG. + for(int s=0;s 2+Ls, so ~ 3x saving + // Pipelined CG gain: + // + // New Kernel: Load r, vector of coeffs, vector of pointers ps + // New Kernel: Load psi[0], vector of coeffs, vector of pointers ps + // If can predict the coefficient bs then we can fuse these and avoid write reread cyce + // on ps[s]. + // Before: 3 x npole + 3 x npole + // After : 2 x npole (ps[s]) => 3x speed up of multishift CG. - if( (!converged[s]) ) { - axpy(psi[ss],-bs[s]*alpha[s],ps[s],psi[ss]); - } - } - - // Convergence checks - int all_converged = 1; - for(int s=0;s &Linop, const Field &src, std::vector IterationsToComplete = k; - return; - } + return; + } + } + // ugly hack + std::cout< 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ +*************************************************************************************/ +/* END LEGAL */ #ifndef GRID_CONJUGATE_GRADIENT_RELIABLE_UPDATE_H #define GRID_CONJUGATE_GRADIENT_RELIABLE_UPDATE_H -namespace Grid { +NAMESPACE_BEGIN(Grid); - template::value == 2, int>::type = 0,typename std::enable_if< getPrecision::value == 1, int>::type = 0> - class ConjugateGradientReliableUpdate : public LinearFunction { - public: - bool ErrorOnNoConverge; // throw an assert when the CG fails to converge. - // Defaults true. - RealD Tolerance; - Integer MaxIterations; - Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion - Integer ReliableUpdatesPerformed; +template::value == 2, int>::type = 0, + typename std::enable_if< getPrecision::value == 1, int>::type = 0> +class ConjugateGradientReliableUpdate : public LinearFunction { +public: + bool ErrorOnNoConverge; // throw an assert when the CG fails to converge. + // Defaults true. + RealD Tolerance; + Integer MaxIterations; + Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion + Integer ReliableUpdatesPerformed; - bool DoFinalCleanup; //Final DP cleanup, defaults to true - Integer IterationsToCleanup; //Final DP cleanup step iterations + bool DoFinalCleanup; //Final DP cleanup, defaults to true + Integer IterationsToCleanup; //Final DP cleanup step iterations - LinearOperatorBase &Linop_f; - LinearOperatorBase &Linop_d; - GridBase* SinglePrecGrid; - RealD Delta; //reliable update parameter + LinearOperatorBase &Linop_f; + LinearOperatorBase &Linop_d; + GridBase* SinglePrecGrid; + RealD Delta; //reliable update parameter - //Optional ability to switch to a different linear operator once the tolerance reaches a certain point. Useful for single/half -> single/single - LinearOperatorBase *Linop_fallback; - RealD fallback_transition_tol; + //Optional ability to switch to a different linear operator once the tolerance reaches a certain point. Useful for single/half -> single/single + LinearOperatorBase *Linop_fallback; + RealD fallback_transition_tol; - ConjugateGradientReliableUpdate(RealD tol, Integer maxit, RealD _delta, GridBase* _sp_grid, LinearOperatorBase &_Linop_f, LinearOperatorBase &_Linop_d, bool err_on_no_conv = true) - : Tolerance(tol), - MaxIterations(maxit), - Delta(_delta), - Linop_f(_Linop_f), - Linop_d(_Linop_d), - SinglePrecGrid(_sp_grid), - ErrorOnNoConverge(err_on_no_conv), - DoFinalCleanup(true), - Linop_fallback(NULL) - {}; + ConjugateGradientReliableUpdate(RealD tol, Integer maxit, RealD _delta, GridBase* _sp_grid, LinearOperatorBase &_Linop_f, LinearOperatorBase &_Linop_d, bool err_on_no_conv = true) + : Tolerance(tol), + MaxIterations(maxit), + Delta(_delta), + Linop_f(_Linop_f), + Linop_d(_Linop_d), + SinglePrecGrid(_sp_grid), + ErrorOnNoConverge(err_on_no_conv), + DoFinalCleanup(true), + Linop_fallback(NULL) + {}; - void setFallbackLinop(LinearOperatorBase &_Linop_fallback, const RealD _fallback_transition_tol){ - Linop_fallback = &_Linop_fallback; - fallback_transition_tol = _fallback_transition_tol; - } + void setFallbackLinop(LinearOperatorBase &_Linop_fallback, const RealD _fallback_transition_tol){ + Linop_fallback = &_Linop_fallback; + fallback_transition_tol = _fallback_transition_tol; + } - void operator()(const FieldD &src, FieldD &psi) { - LinearOperatorBase *Linop_f_use = &Linop_f; - bool using_fallback = false; + void operator()(const FieldD &src, FieldD &psi) { + LinearOperatorBase *Linop_f_use = &Linop_f; + bool using_fallback = false; - psi.checkerboard = src.checkerboard; - conformable(psi, src); + psi.Checkerboard() = src.Checkerboard(); + conformable(psi, src); - RealD cp, c, a, d, b, ssq, qq, b_pred; + RealD cp, c, a, d, b, ssq, qq, b_pred; - FieldD p(src); - FieldD mmp(src); - FieldD r(src); + FieldD p(src); + FieldD mmp(src); + FieldD r(src); - // Initial residual computation & set up - RealD guess = norm2(psi); - assert(std::isnan(guess) == 0); + // Initial residual computation & set up + RealD guess = norm2(psi); + assert(std::isnan(guess) == 0); - Linop_d.HermOpAndNorm(psi, mmp, d, b); + Linop_d.HermOpAndNorm(psi, mmp, d, b); - r = src - mmp; - p = r; + r = src - mmp; + p = r; - a = norm2(p); - cp = a; - ssq = norm2(src); + a = norm2(p); + cp = a; + ssq = norm2(src); - std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradientReliableUpdate: guess " << guess << std::endl; - std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradientReliableUpdate: src " << ssq << std::endl; - std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradientReliableUpdate: mp " << d << std::endl; - std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradientReliableUpdate: mmp " << b << std::endl; - std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradientReliableUpdate: cp,r " << cp << std::endl; - std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradientReliableUpdate: p " << a << std::endl; + std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradientReliableUpdate: guess " << guess << std::endl; + std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradientReliableUpdate: src " << ssq << std::endl; + std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradientReliableUpdate: mp " << d << std::endl; + std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradientReliableUpdate: mmp " << b << std::endl; + std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradientReliableUpdate: cp,r " << cp << std::endl; + std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradientReliableUpdate: p " << a << std::endl; - RealD rsq = Tolerance * Tolerance * ssq; + RealD rsq = Tolerance * Tolerance * ssq; - // Check if guess is really REALLY good :) + // Check if guess is really REALLY good :) + if (cp <= rsq) { + std::cout << GridLogMessage << "ConjugateGradientReliableUpdate guess was REALLY good\n"; + std::cout << GridLogMessage << "\tComputed residual " << std::sqrt(cp / ssq)<HermOpAndNorm(p_f, mmp_f, d, qq); + MatrixTimer.Stop(); + + LinalgTimer.Start(); + + a = c / d; + b_pred = a * (a * qq - d) / c; + + cp = axpy_norm(r_f, -a, mmp_f, r_f); + b = cp / c; + + // Fuse these loops ; should be really easy + psi_f = a * p_f + psi_f; + //p_f = p_f * b + r_f; + + LinalgTimer.Stop(); + + std::cout << GridLogIterative << "ConjugateGradientReliableUpdate: Iteration " << k + << " residual " << cp << " target " << rsq << std::endl; + std::cout << GridLogDebug << "a = "<< a << " b_pred = "<< b_pred << " b = "<< b << std::endl; + std::cout << GridLogDebug << "qq = "<< qq << " d = "<< d << " c = "<< c << std::endl; + + if(cp > MaxResidSinceLastRelUp){ + std::cout << GridLogIterative << "ConjugateGradientReliableUpdate: updating MaxResidSinceLastRelUp : " << MaxResidSinceLastRelUp << " -> " << cp << std::endl; + MaxResidSinceLastRelUp = cp; + } + + // Stopping condition if (cp <= rsq) { - std::cout << GridLogMessage << "ConjugateGradientReliableUpdate guess was REALLY good\n"; - std::cout << GridLogMessage << "\tComputed residual " << sqrt(cp / ssq)< CG(Tolerance,MaxIterations); + CG.ErrorOnNoConverge = ErrorOnNoConverge; + CG(Linop_d,src,psi); + IterationsToCleanup = CG.IterationsToComplete; + } + else if (ErrorOnNoConverge) assert(true_residual / Tolerance < 10000.0); + + std::cout << GridLogMessage << "ConjugateGradientReliableUpdate complete.\n"; return; } + else if(cp < Delta * MaxResidSinceLastRelUp) { //reliable update + std::cout << GridLogMessage << "ConjugateGradientReliableUpdate " + << cp << "(residual) < " << Delta << "(Delta) * " << MaxResidSinceLastRelUp << "(MaxResidSinceLastRelUp) on iteration " << k << " : performing reliable update\n"; + precisionChange(mmp, psi_f); + psi = psi + mmp; - //Single prec initialization - FieldF r_f(SinglePrecGrid); - r_f.checkerboard = r.checkerboard; - precisionChange(r_f, r); + Linop_d.HermOpAndNorm(psi, mmp, d, qq); + r = src - mmp; - FieldF psi_f(r_f); - psi_f = zero; + psi_f = Zero(); + precisionChange(r_f, r); + cp = norm2(r); + MaxResidSinceLastRelUp = cp; - FieldF p_f(r_f); - FieldF mmp_f(r_f); - - RealD MaxResidSinceLastRelUp = cp; //initial residual - - std::cout << GridLogIterative << std::setprecision(4) - << "ConjugateGradient: k=0 residual " << cp << " target " << rsq << std::endl; - - GridStopWatch LinalgTimer; - GridStopWatch MatrixTimer; - GridStopWatch SolverTimer; - - SolverTimer.Start(); - int k = 0; - int l = 0; - - for (k = 1; k <= MaxIterations; k++) { - c = cp; - - MatrixTimer.Start(); - Linop_f_use->HermOpAndNorm(p_f, mmp_f, d, qq); - MatrixTimer.Stop(); - - LinalgTimer.Start(); - - a = c / d; - b_pred = a * (a * qq - d) / c; - - cp = axpy_norm(r_f, -a, mmp_f, r_f); - b = cp / c; - - // Fuse these loops ; should be really easy - psi_f = a * p_f + psi_f; - //p_f = p_f * b + r_f; - - LinalgTimer.Stop(); - - std::cout << GridLogIterative << "ConjugateGradientReliableUpdate: Iteration " << k - << " residual " << cp << " target " << rsq << std::endl; - std::cout << GridLogDebug << "a = "<< a << " b_pred = "<< b_pred << " b = "<< b << std::endl; - std::cout << GridLogDebug << "qq = "<< qq << " d = "<< d << " c = "<< c << std::endl; - - if(cp > MaxResidSinceLastRelUp){ - std::cout << GridLogIterative << "ConjugateGradientReliableUpdate: updating MaxResidSinceLastRelUp : " << MaxResidSinceLastRelUp << " -> " << cp << std::endl; - MaxResidSinceLastRelUp = cp; - } + b = cp/c; - // Stopping condition - if (cp <= rsq) { - //Although not written in the paper, I assume that I have to add on the final solution - precisionChange(mmp, psi_f); - psi = psi + mmp; - - - SolverTimer.Stop(); - Linop_d.HermOpAndNorm(psi, mmp, d, qq); - p = mmp - src; - - RealD srcnorm = sqrt(norm2(src)); - RealD resnorm = sqrt(norm2(p)); - RealD true_residual = resnorm / srcnorm; - - std::cout << GridLogMessage << "ConjugateGradientReliableUpdate Converged on iteration " << k << " after " << l << " reliable updates" << std::endl; - std::cout << GridLogMessage << "\tComputed residual " << sqrt(cp / ssq)< CG(Tolerance,MaxIterations); - CG.ErrorOnNoConverge = ErrorOnNoConverge; - CG(Linop_d,src,psi); - IterationsToCleanup = CG.IterationsToComplete; - } - else if (ErrorOnNoConverge) assert(true_residual / Tolerance < 10000.0); - - std::cout << GridLogMessage << "ConjugateGradientReliableUpdate complete.\n"; - return; - } - else if(cp < Delta * MaxResidSinceLastRelUp) { //reliable update - std::cout << GridLogMessage << "ConjugateGradientReliableUpdate " - << cp << "(residual) < " << Delta << "(Delta) * " << MaxResidSinceLastRelUp << "(MaxResidSinceLastRelUp) on iteration " << k << " : performing reliable update\n"; - precisionChange(mmp, psi_f); - psi = psi + mmp; - - Linop_d.HermOpAndNorm(psi, mmp, d, qq); - r = src - mmp; - - psi_f = zero; - precisionChange(r_f, r); - cp = norm2(r); - MaxResidSinceLastRelUp = cp; - - b = cp/c; - - std::cout << GridLogMessage << "ConjugateGradientReliableUpdate new residual " << cp << std::endl; - - l = l+1; - } - - p_f = p_f * b + r_f; //update search vector after reliable update appears to help convergence - - if(!using_fallback && Linop_fallback != NULL && cp < fallback_transition_tol){ - std::cout << GridLogMessage << "ConjugateGradientReliableUpdate switching to fallback linear operator on iteration " << k << " at residual " << cp << std::endl; - Linop_f_use = Linop_fallback; - using_fallback = true; - } - - + l = l+1; } - std::cout << GridLogMessage << "ConjugateGradientReliableUpdate did NOT converge" - << std::endl; + + p_f = p_f * b + r_f; //update search vector after reliable update appears to help convergence + + if(!using_fallback && Linop_fallback != NULL && cp < fallback_transition_tol){ + std::cout << GridLogMessage << "ConjugateGradientReliableUpdate switching to fallback linear operator on iteration " << k << " at residual " << cp << std::endl; + Linop_f_use = Linop_fallback; + using_fallback = true; + } + + + } + std::cout << GridLogMessage << "ConjugateGradientReliableUpdate did NOT converge" + << std::endl; - if (ErrorOnNoConverge) assert(0); - IterationsToComplete = k; - ReliableUpdatesPerformed = l; - } - }; - - + if (ErrorOnNoConverge) assert(0); + IterationsToComplete = k; + ReliableUpdatesPerformed = l; + } }; +NAMESPACE_END(Grid); + + #endif diff --git a/Grid/algorithms/iterative/ConjugateResidual.h b/Grid/algorithms/iterative/ConjugateResidual.h index b6f99554..e0c3b69d 100644 --- a/Grid/algorithms/iterative/ConjugateResidual.h +++ b/Grid/algorithms/iterative/ConjugateResidual.h @@ -1,4 +1,4 @@ - /************************************************************************************* +/************************************************************************************* Grid physics library, www.github.com/paboyle/Grid @@ -24,88 +24,90 @@ Author: Peter Boyle 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ +*************************************************************************************/ +/* END LEGAL */ #ifndef GRID_CONJUGATE_RESIDUAL_H #define GRID_CONJUGATE_RESIDUAL_H -namespace Grid { +NAMESPACE_BEGIN(Grid); - ///////////////////////////////////////////////////////////// - // Base classes for iterative processes based on operators - // single input vec, single output vec. - ///////////////////////////////////////////////////////////// +///////////////////////////////////////////////////////////// +// Base classes for iterative processes based on operators +// single input vec, single output vec. +///////////////////////////////////////////////////////////// - template - class ConjugateResidual : public OperatorFunction { - public: - RealD Tolerance; - Integer MaxIterations; - int verbose; +template +class ConjugateResidual : public OperatorFunction { +public: + using OperatorFunction::operator(); - ConjugateResidual(RealD tol,Integer maxit) : Tolerance(tol), MaxIterations(maxit) { - verbose=0; - }; + RealD Tolerance; + Integer MaxIterations; + int verbose; - void operator() (LinearOperatorBase &Linop,const Field &src, Field &psi){ + ConjugateResidual(RealD tol,Integer maxit) : Tolerance(tol), MaxIterations(maxit) { + verbose=0; + }; - RealD a, b, c, d; - RealD cp, ssq,rsq; + void operator() (LinearOperatorBase &Linop,const Field &src, Field &psi){ + + RealD a, b; // c, d; + RealD cp, ssq,rsq; - RealD rAr, rAAr, rArp; - RealD pAp, pAAp; + RealD rAr, rAAr, rArp; + RealD pAp, pAAp; - GridBase *grid = src._grid; - psi=zero; - Field r(grid), p(grid), Ap(grid), Ar(grid); + GridBase *grid = src.Grid(); + psi=Zero(); + Field r(grid), p(grid), Ap(grid), Ar(grid); - r=src; - p=src; + r=src; + p=src; + + Linop.HermOpAndNorm(p,Ap,pAp,pAAp); + Linop.HermOpAndNorm(r,Ar,rAr,rAAr); + + cp =norm2(r); + ssq=norm2(src); + rsq=Tolerance*Tolerance*ssq; + + if (verbose) std::cout< class ZeroGuesser: public LinearFunction { public: - virtual void operator()(const Field &src, Field &guess) { guess = zero; }; + virtual void operator()(const Field &src, Field &guess) { guess = Zero(); }; }; template class DoNothingGuesser: public LinearFunction { @@ -60,14 +60,14 @@ public: DeflatedGuesser(const std::vector & _evec,const std::vector & _eval) : evec(_evec), eval(_eval) {}; virtual void operator()(const Field &src,Field &guess) { - guess = zero; + guess = Zero(); assert(evec.size()==eval.size()); auto N = evec.size(); for (int i=0;i class FlexibleCommunicationAvoidingGeneralisedMinimalResidual : public OperatorFunction { public: + using OperatorFunction::operator(); + bool ErrorOnNoConverge; // Throw an assert when FCAGMRES fails to converge, // defaults to true @@ -53,10 +55,10 @@ class FlexibleCommunicationAvoidingGeneralisedMinimalResidual : public OperatorF Eigen::MatrixXcd H; - std::vector> y; - std::vector> gamma; - std::vector> c; - std::vector> s; + std::vector y; + std::vector gamma; + std::vector c; + std::vector s; LinearFunction &Preconditioner; @@ -81,7 +83,7 @@ class FlexibleCommunicationAvoidingGeneralisedMinimalResidual : public OperatorF std::cout << GridLogWarning << "This algorithm currently doesn't differ from regular FGMRES" << std::endl; - psi.checkerboard = src.checkerboard; + psi.Checkerboard() = src.Checkerboard(); conformable(psi, src); RealD guess = norm2(psi); @@ -91,7 +93,7 @@ class FlexibleCommunicationAvoidingGeneralisedMinimalResidual : public OperatorF RealD ssq = norm2(src); RealD rsq = Tolerance * Tolerance * ssq; - Field r(src._grid); + Field r(src.Grid()); std::cout << std::setprecision(4) << std::scientific; std::cout << GridLogIterative << "FlexibleCommunicationAvoidingGeneralisedMinimalResidual: guess " << guess << std::endl; @@ -149,12 +151,12 @@ class FlexibleCommunicationAvoidingGeneralisedMinimalResidual : public OperatorF RealD cp = 0; - Field w(src._grid); - Field r(src._grid); + Field w(src.Grid()); + Field r(src.Grid()); // these should probably be made class members so that they are only allocated once, not in every restart - std::vector v(RestartLength + 1, src._grid); for (auto &elem : v) elem = zero; - std::vector z(RestartLength + 1, src._grid); for (auto &elem : z) elem = zero; + std::vector v(RestartLength + 1, src.Grid()); for (auto &elem : v) elem = Zero(); + std::vector z(RestartLength + 1, src.Grid()); for (auto &elem : z) elem = Zero(); MatrixTimer.Start(); LinOp.Op(psi, w); @@ -176,7 +178,7 @@ class FlexibleCommunicationAvoidingGeneralisedMinimalResidual : public OperatorF qrUpdate(i); - cp = std::norm(gamma[i+1]); + cp = norm(gamma[i+1]); std::cout << GridLogIterative << "FlexibleCommunicationAvoidingGeneralisedMinimalResidual: Iteration " << IterationCount << " residual " << cp << " target " << rsq << std::endl; @@ -206,11 +208,11 @@ class FlexibleCommunicationAvoidingGeneralisedMinimalResidual : public OperatorF LinalgTimer.Start(); for (int i = 0; i <= iter; ++i) { H(iter, i) = innerProduct(v[i], w); - w = w - H(iter, i) * v[i]; + w = w - ComplexD(H(iter, i)) * v[i]; } H(iter, iter + 1) = sqrt(norm2(w)); - v[iter + 1] = (1. / H(iter, iter + 1)) * w; + v[iter + 1] = ComplexD(1. / H(iter, iter + 1)) * w; LinalgTimer.Stop(); } @@ -218,13 +220,13 @@ class FlexibleCommunicationAvoidingGeneralisedMinimalResidual : public OperatorF QrTimer.Start(); for (int i = 0; i < iter ; ++i) { - auto tmp = -s[i] * H(iter, i) + c[i] * H(iter, i + 1); - H(iter, i) = std::conj(c[i]) * H(iter, i) + std::conj(s[i]) * H(iter, i + 1); + auto tmp = -s[i] * ComplexD(H(iter, i)) + c[i] * ComplexD(H(iter, i + 1)); + H(iter, i) = conjugate(c[i]) * ComplexD(H(iter, i)) + conjugate(s[i]) * ComplexD(H(iter, i + 1)); H(iter, i + 1) = tmp; } // Compute new Givens Rotation - ComplexD nu = sqrt(std::norm(H(iter, iter)) + std::norm(H(iter, iter + 1))); + auto nu = sqrt(std::norm(H(iter, iter)) + std::norm(H(iter, iter + 1))); c[iter] = H(iter, iter) / nu; s[iter] = H(iter, iter + 1) / nu; @@ -233,7 +235,7 @@ class FlexibleCommunicationAvoidingGeneralisedMinimalResidual : public OperatorF H(iter, iter + 1) = 0.; gamma[iter + 1] = -s[iter] * gamma[iter]; - gamma[iter] = std::conj(c[iter]) * gamma[iter]; + gamma[iter] = conjugate(c[iter]) * gamma[iter]; QrTimer.Stop(); } @@ -243,8 +245,8 @@ class FlexibleCommunicationAvoidingGeneralisedMinimalResidual : public OperatorF for (int i = iter; i >= 0; i--) { y[i] = gamma[i]; for (int k = i + 1; k <= iter; k++) - y[i] = y[i] - H(k, i) * y[k]; - y[i] = y[i] / H(i, i); + y[i] = y[i] - ComplexD(H(k, i)) * y[k]; + y[i] = y[i] / ComplexD(H(i, i)); } for (int i = 0; i <= iter; i++) diff --git a/Grid/algorithms/iterative/FlexibleGeneralisedMinimalResidual.h b/Grid/algorithms/iterative/FlexibleGeneralisedMinimalResidual.h index efc8c787..cf108846 100644 --- a/Grid/algorithms/iterative/FlexibleGeneralisedMinimalResidual.h +++ b/Grid/algorithms/iterative/FlexibleGeneralisedMinimalResidual.h @@ -34,6 +34,8 @@ namespace Grid { template class FlexibleGeneralisedMinimalResidual : public OperatorFunction { public: + using OperatorFunction::operator(); + bool ErrorOnNoConverge; // Throw an assert when FGMRES fails to converge, // defaults to true @@ -53,10 +55,10 @@ class FlexibleGeneralisedMinimalResidual : public OperatorFunction { Eigen::MatrixXcd H; - std::vector> y; - std::vector> gamma; - std::vector> c; - std::vector> s; + std::vector y; + std::vector gamma; + std::vector c; + std::vector s; LinearFunction &Preconditioner; @@ -79,7 +81,7 @@ class FlexibleGeneralisedMinimalResidual : public OperatorFunction { void operator()(LinearOperatorBase &LinOp, const Field &src, Field &psi) { - psi.checkerboard = src.checkerboard; + psi.Checkerboard() = src.Checkerboard(); conformable(psi, src); RealD guess = norm2(psi); @@ -89,7 +91,7 @@ class FlexibleGeneralisedMinimalResidual : public OperatorFunction { RealD ssq = norm2(src); RealD rsq = Tolerance * Tolerance * ssq; - Field r(src._grid); + Field r(src.Grid()); std::cout << std::setprecision(4) << std::scientific; std::cout << GridLogIterative << "FlexibleGeneralisedMinimalResidual: guess " << guess << std::endl; @@ -147,12 +149,12 @@ class FlexibleGeneralisedMinimalResidual : public OperatorFunction { RealD cp = 0; - Field w(src._grid); - Field r(src._grid); + Field w(src.Grid()); + Field r(src.Grid()); // these should probably be made class members so that they are only allocated once, not in every restart - std::vector v(RestartLength + 1, src._grid); for (auto &elem : v) elem = zero; - std::vector z(RestartLength + 1, src._grid); for (auto &elem : z) elem = zero; + std::vector v(RestartLength + 1, src.Grid()); for (auto &elem : v) elem = Zero(); + std::vector z(RestartLength + 1, src.Grid()); for (auto &elem : z) elem = Zero(); MatrixTimer.Start(); LinOp.Op(psi, w); @@ -174,7 +176,7 @@ class FlexibleGeneralisedMinimalResidual : public OperatorFunction { qrUpdate(i); - cp = std::norm(gamma[i+1]); + cp = norm(gamma[i+1]); std::cout << GridLogIterative << "FlexibleGeneralisedMinimalResidual: Iteration " << IterationCount << " residual " << cp << " target " << rsq << std::endl; @@ -204,11 +206,11 @@ class FlexibleGeneralisedMinimalResidual : public OperatorFunction { LinalgTimer.Start(); for (int i = 0; i <= iter; ++i) { H(iter, i) = innerProduct(v[i], w); - w = w - H(iter, i) * v[i]; + w = w - ComplexD(H(iter, i)) * v[i]; } H(iter, iter + 1) = sqrt(norm2(w)); - v[iter + 1] = (1. / H(iter, iter + 1)) * w; + v[iter + 1] = ComplexD(1. / H(iter, iter + 1)) * w; LinalgTimer.Stop(); } @@ -216,13 +218,13 @@ class FlexibleGeneralisedMinimalResidual : public OperatorFunction { QrTimer.Start(); for (int i = 0; i < iter ; ++i) { - auto tmp = -s[i] * H(iter, i) + c[i] * H(iter, i + 1); - H(iter, i) = std::conj(c[i]) * H(iter, i) + std::conj(s[i]) * H(iter, i + 1); + auto tmp = -s[i] * ComplexD(H(iter, i)) + c[i] * ComplexD(H(iter, i + 1)); + H(iter, i) = conjugate(c[i]) * ComplexD(H(iter, i)) + conjugate(s[i]) * ComplexD(H(iter, i + 1)); H(iter, i + 1) = tmp; } // Compute new Givens Rotation - ComplexD nu = sqrt(std::norm(H(iter, iter)) + std::norm(H(iter, iter + 1))); + auto nu = sqrt(std::norm(H(iter, iter)) + std::norm(H(iter, iter + 1))); c[iter] = H(iter, iter) / nu; s[iter] = H(iter, iter + 1) / nu; @@ -231,7 +233,7 @@ class FlexibleGeneralisedMinimalResidual : public OperatorFunction { H(iter, iter + 1) = 0.; gamma[iter + 1] = -s[iter] * gamma[iter]; - gamma[iter] = std::conj(c[iter]) * gamma[iter]; + gamma[iter] = conjugate(c[iter]) * gamma[iter]; QrTimer.Stop(); } @@ -241,8 +243,8 @@ class FlexibleGeneralisedMinimalResidual : public OperatorFunction { for (int i = iter; i >= 0; i--) { y[i] = gamma[i]; for (int k = i + 1; k <= iter; k++) - y[i] = y[i] - H(k, i) * y[k]; - y[i] = y[i] / H(i, i); + y[i] = y[i] - ComplexD(H(k, i)) * y[k]; + y[i] = y[i] / ComplexD(H(i, i)); } for (int i = 0; i <= iter; i++) diff --git a/Grid/algorithms/iterative/GeneralisedMinimalResidual.h b/Grid/algorithms/iterative/GeneralisedMinimalResidual.h index 10636234..0596e91e 100644 --- a/Grid/algorithms/iterative/GeneralisedMinimalResidual.h +++ b/Grid/algorithms/iterative/GeneralisedMinimalResidual.h @@ -34,6 +34,8 @@ namespace Grid { template class GeneralisedMinimalResidual : public OperatorFunction { public: + using OperatorFunction::operator(); + bool ErrorOnNoConverge; // Throw an assert when GMRES fails to converge, // defaults to true @@ -52,10 +54,10 @@ class GeneralisedMinimalResidual : public OperatorFunction { Eigen::MatrixXcd H; - std::vector> y; - std::vector> gamma; - std::vector> c; - std::vector> s; + std::vector y; + std::vector gamma; + std::vector c; + std::vector s; GeneralisedMinimalResidual(RealD tol, Integer maxit, @@ -74,7 +76,7 @@ class GeneralisedMinimalResidual : public OperatorFunction { void operator()(LinearOperatorBase &LinOp, const Field &src, Field &psi) { - psi.checkerboard = src.checkerboard; + psi.Checkerboard() = src.Checkerboard(); conformable(psi, src); RealD guess = norm2(psi); @@ -84,7 +86,7 @@ class GeneralisedMinimalResidual : public OperatorFunction { RealD ssq = norm2(src); RealD rsq = Tolerance * Tolerance * ssq; - Field r(src._grid); + Field r(src.Grid()); std::cout << std::setprecision(4) << std::scientific; std::cout << GridLogIterative << "GeneralisedMinimalResidual: guess " << guess << std::endl; @@ -140,11 +142,11 @@ class GeneralisedMinimalResidual : public OperatorFunction { RealD cp = 0; - Field w(src._grid); - Field r(src._grid); + Field w(src.Grid()); + Field r(src.Grid()); // this should probably be made a class member so that it is only allocated once, not in every restart - std::vector v(RestartLength + 1, src._grid); for (auto &elem : v) elem = zero; + std::vector v(RestartLength + 1, src.Grid()); for (auto &elem : v) elem = Zero(); MatrixTimer.Start(); LinOp.Op(psi, w); @@ -166,7 +168,7 @@ class GeneralisedMinimalResidual : public OperatorFunction { qrUpdate(i); - cp = std::norm(gamma[i+1]); + cp = norm(gamma[i+1]); std::cout << GridLogIterative << "GeneralisedMinimalResidual: Iteration " << IterationCount << " residual " << cp << " target " << rsq << std::endl; @@ -192,11 +194,11 @@ class GeneralisedMinimalResidual : public OperatorFunction { LinalgTimer.Start(); for (int i = 0; i <= iter; ++i) { H(iter, i) = innerProduct(v[i], w); - w = w - H(iter, i) * v[i]; + w = w - ComplexD(H(iter, i)) * v[i]; } H(iter, iter + 1) = sqrt(norm2(w)); - v[iter + 1] = (1. / H(iter, iter + 1)) * w; + v[iter + 1] = ComplexD(1. / H(iter, iter + 1)) * w; LinalgTimer.Stop(); } @@ -204,13 +206,13 @@ class GeneralisedMinimalResidual : public OperatorFunction { QrTimer.Start(); for (int i = 0; i < iter ; ++i) { - auto tmp = -s[i] * H(iter, i) + c[i] * H(iter, i + 1); - H(iter, i) = std::conj(c[i]) * H(iter, i) + std::conj(s[i]) * H(iter, i + 1); + auto tmp = -s[i] * ComplexD(H(iter, i)) + c[i] * ComplexD(H(iter, i + 1)); + H(iter, i) = conjugate(c[i]) * ComplexD(H(iter, i)) + conjugate(s[i]) * ComplexD(H(iter, i + 1)); H(iter, i + 1) = tmp; } // Compute new Givens Rotation - ComplexD nu = sqrt(std::norm(H(iter, iter)) + std::norm(H(iter, iter + 1))); + auto nu = sqrt(std::norm(H(iter, iter)) + std::norm(H(iter, iter + 1))); c[iter] = H(iter, iter) / nu; s[iter] = H(iter, iter + 1) / nu; @@ -219,7 +221,7 @@ class GeneralisedMinimalResidual : public OperatorFunction { H(iter, iter + 1) = 0.; gamma[iter + 1] = -s[iter] * gamma[iter]; - gamma[iter] = std::conj(c[iter]) * gamma[iter]; + gamma[iter] = conjugate(c[iter]) * gamma[iter]; QrTimer.Stop(); } @@ -229,8 +231,8 @@ class GeneralisedMinimalResidual : public OperatorFunction { for (int i = iter; i >= 0; i--) { y[i] = gamma[i]; for (int k = i + 1; k <= iter; k++) - y[i] = y[i] - H(k, i) * y[k]; - y[i] = y[i] / H(i, i); + y[i] = y[i] - ComplexD(H(k, i)) * y[k]; + y[i] = y[i] / ComplexD(H(i, i)); } for (int i = 0; i <= iter; i++) diff --git a/Grid/algorithms/iterative/ImplicitlyRestartedLanczos.h b/Grid/algorithms/iterative/ImplicitlyRestartedLanczos.h index 64f1e9a7..47dcee52 100644 --- a/Grid/algorithms/iterative/ImplicitlyRestartedLanczos.h +++ b/Grid/algorithms/iterative/ImplicitlyRestartedLanczos.h @@ -35,7 +35,7 @@ Author: Christoph Lehner //#include #include -namespace Grid { +NAMESPACE_BEGIN(Grid); //////////////////////////////////////////////////////// // Move following 100 LOC to lattice/Lattice_basis.h @@ -52,26 +52,31 @@ void basisOrthogonalize(std::vector &basis,Field &w,int k) template void basisRotate(std::vector &basis,Eigen::MatrixXd& Qt,int j0, int j1, int k0,int k1,int Nm) { + typedef decltype(basis[0].View()) View; + auto tmp_v = basis[0].View(); + std::vector basis_v(basis.size(),tmp_v); typedef typename Field::vector_object vobj; - GridBase* grid = basis[0]._grid; + GridBase* grid = basis[0].Grid(); - parallel_region - { + for(int k=0;k > B(Nm); // Thread private - - parallel_for_internal(int ss=0;ss < grid->oSites();ss++){ + thread_for_in_region(ss, grid->oSites(),{ for(int j=j0; j void basisRotateJ(Field &result,std::vector &basis,Eigen::MatrixXd& Qt,int j, int k0,int k1,int Nm) { typedef typename Field::vector_object vobj; - GridBase* grid = basis[0]._grid; + GridBase* grid = basis[0].Grid(); - result.checkerboard = basis[0].checkerboard; - parallel_for(int ss=0;ss < grid->oSites();ss++){ - vobj B = zero; + result.Checkerboard() = basis[0].Checkerboard(); + auto result_v=result.View(); + thread_for(ss, grid->oSites(),{ + vobj B = Zero(); for(int k=k0; k @@ -119,7 +126,7 @@ void basisReorderInPlace(std::vector &_v,std::vector& sort_vals, s assert(idx[i] > i); assert(j!=idx.size()); assert(idx[j]==i); - std::swap(_v[i]._odata,_v[idx[i]]._odata); // should use vector move constructor, no data copy + swap(_v[i],_v[idx[i]]); // should use vector move constructor, no data copy std::swap(sort_vals[i],sort_vals[idx[i]]); idx[j] = idx[i]; @@ -150,6 +157,19 @@ void basisSortInPlace(std::vector & _v,std::vector& sort_vals, boo basisReorderInPlace(_v,sort_vals,idx); } +// PAB: faster to compute the inner products first then fuse loops. +// If performance critical can improve. +template +void basisDeflate(const std::vector &_v,const std::vector& eval,const Field& src_orig,Field& result) { + result = Zero(); + assert(_v.size()==eval.size()); + int N = (int)_v.size(); + for (int i=0;i static RealD normalise(T& v) { RealD nn = norm2(v); - nn = sqrt(nn); + nn = std::sqrt(nn); v = v * (1.0/nn); return nn; } @@ -321,8 +341,8 @@ until convergence */ void calc(std::vector& eval, std::vector& evec, const Field& src, int& Nconv, bool reverse=false) { - GridBase *grid = src._grid; - assert(grid == evec[0]._grid); + GridBase *grid = src.Grid(); + assert(grid == evec[0].Grid()); GridLogIRL.TimingMode(1); std::cout << GridLogIRL <<"**************************************************************************"<< std::endl; @@ -446,7 +466,7 @@ until convergence assert(k20); basisRotate(evec,Qt,k1-1,k2+1,0,Nm,Nm); /// big constraint on the basis - std::cout<& lmd, std::vector& lme, // determination of 2x2 leading submatrix RealD dsub = lmd[kmax-1]-lmd[kmax-2]; - RealD dd = sqrt(dsub*dsub + 4.0*lme[kmax-2]*lme[kmax-2]); + RealD dd = std::sqrt(dsub*dsub + 4.0*lme[kmax-2]*lme[kmax-2]); RealD Dsh = 0.5*(lmd[kmax-2]+lmd[kmax-1] +dd*(dsub/fabs(dsub))); // (Dsh: shift) @@ -838,5 +858,6 @@ void diagonalize_QR(std::vector& lmd, std::vector& lme, abort(); } }; -} + +NAMESPACE_END(Grid); #endif diff --git a/Grid/algorithms/iterative/LocalCoherenceLanczos.h b/Grid/algorithms/iterative/LocalCoherenceLanczos.h index c771d1cb..9c945565 100644 --- a/Grid/algorithms/iterative/LocalCoherenceLanczos.h +++ b/Grid/algorithms/iterative/LocalCoherenceLanczos.h @@ -1,4 +1,4 @@ - /************************************************************************************* +/************************************************************************************* Grid physics library, www.github.com/paboyle/Grid @@ -24,16 +24,15 @@ Author: paboyle 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ +*************************************************************************************/ +/* END LEGAL */ #ifndef GRID_LOCAL_COHERENCE_IRL_H #define GRID_LOCAL_COHERENCE_IRL_H -namespace Grid { - +NAMESPACE_BEGIN(Grid); struct LanczosParams : Serializable { - public: +public: GRID_SERIALIZABLE_CLASS_MEMBERS(LanczosParams, ChebyParams, Cheby,/*Chebyshev*/ int, Nstop, /*Vecs in Lanczos must converge Nstop < Nk < Nm*/ @@ -46,7 +45,7 @@ struct LanczosParams : Serializable { }; struct LocalCoherenceLanczosParams : Serializable { - public: +public: GRID_SERIALIZABLE_CLASS_MEMBERS(LocalCoherenceLanczosParams, bool, saveEvecs, bool, doFine, @@ -59,7 +58,7 @@ struct LocalCoherenceLanczosParams : Serializable { RealD , coarse_relax_tol, std::vector, blockSize, std::string, config, - std::vector < std::complex >, omega, + std::vector < ComplexD >, omega, RealD, mass, RealD, M5); }; @@ -83,14 +82,14 @@ public: }; void operator()(const CoarseField& in, CoarseField& out) { - GridBase *FineGrid = subspace[0]._grid; - int checkerboard = subspace[0].checkerboard; - - FineField fin (FineGrid); fin.checkerboard= checkerboard; - FineField fout(FineGrid); fout.checkerboard = checkerboard; + GridBase *FineGrid = subspace[0].Grid(); + int checkerboard = subspace[0].Checkerboard(); + + FineField fin (FineGrid); fin.Checkerboard()= checkerboard; + FineField fout(FineGrid); fout.Checkerboard() = checkerboard; blockPromote(in,fin,subspace); std::cout< class ImplicitlyRestartedLanczosSmoothedTester : public ImplicitlyRestartedLanczosTester > > { - public: +public: typedef iVector CoarseSiteVector; typedef Lattice CoarseField; typedef Lattice CoarseScalar; // used for inner products on fine field @@ -142,7 +141,7 @@ class ImplicitlyRestartedLanczosSmoothedTester : public ImplicitlyRestartedLanc LinearFunction & _Poly; OperatorFunction & _smoother; LinearOperatorBase &_Linop; - RealD _coarse_relax_tol; + RealD _coarse_relax_tol; std::vector &_subspace; ImplicitlyRestartedLanczosSmoothedTester(LinearFunction &Poly, @@ -182,10 +181,10 @@ class ImplicitlyRestartedLanczosSmoothedTester : public ImplicitlyRestartedLanc } int ReconstructEval(int j,RealD eresid,CoarseField &B, RealD &eval,RealD evalMaxApprox) { - GridBase *FineGrid = _subspace[0]._grid; - int checkerboard = _subspace[0].checkerboard; - FineField fB(FineGrid);fB.checkerboard =checkerboard; - FineField fv(FineGrid);fv.checkerboard =checkerboard; + GridBase *FineGrid = _subspace[0].Grid(); + int checkerboard = _subspace[0].Checkerboard(); + FineField fB(FineGrid);fB.Checkerboard() =checkerboard; + FineField fv(FineGrid);fv.Checkerboard() =checkerboard; blockPromote(B,fv,_subspace); @@ -305,11 +304,11 @@ public: int Nk = nbasis; subspace.resize(Nk,_FineGrid); subspace[0]=1.0; - subspace[0].checkerboard=_checkerboard; + subspace[0].Checkerboard()=_checkerboard; normalise(subspace[0]); PlainHermOp Op(_FineOp); for(int k=1;k IRL(ChebyOp,Op,Nstop,Nk,Nm,resid,MaxIt,betastp,MinRes); - FineField src(_FineGrid); src=1.0; src.checkerboard = _checkerboard; + FineField src(_FineGrid); + typedef typename FineField::scalar_type Scalar; + // src=1.0; + src=Scalar(1.0); + src.Checkerboard() = _checkerboard; int Nconv; IRL.calc(evals_fine,subspace,src,Nconv,false); @@ -402,5 +405,5 @@ public: } }; -} +NAMESPACE_END(Grid); #endif diff --git a/Grid/algorithms/iterative/MinimalResidual.h b/Grid/algorithms/iterative/MinimalResidual.h index fa1912cf..33b79ac2 100644 --- a/Grid/algorithms/iterative/MinimalResidual.h +++ b/Grid/algorithms/iterative/MinimalResidual.h @@ -33,6 +33,8 @@ namespace Grid { template class MinimalResidual : public OperatorFunction { public: + using OperatorFunction::operator(); + bool ErrorOnNoConverge; // throw an assert when the MR fails to converge. // Defaults true. RealD Tolerance; @@ -46,11 +48,11 @@ template class MinimalResidual : public OperatorFunction { void operator()(LinearOperatorBase &Linop, const Field &src, Field &psi) { - psi.checkerboard = src.checkerboard; + psi.Checkerboard() = src.Checkerboard(); conformable(psi, src); - Complex a, c; - Real d; + ComplexD a, c; + RealD d; Field Mr(src); Field r(src); @@ -71,7 +73,6 @@ template class MinimalResidual : public OperatorFunction { std::cout << std::setprecision(4) << std::scientific; std::cout << GridLogIterative << "MinimalResidual: guess " << guess << std::endl; std::cout << GridLogIterative << "MinimalResidual: src " << ssq << std::endl; - std::cout << GridLogIterative << "MinimalResidual: mp " << d << std::endl; std::cout << GridLogIterative << "MinimalResidual: cp,r " << cp << std::endl; if (cp <= rsq) { diff --git a/Grid/algorithms/iterative/MixedPrecisionFlexibleGeneralisedMinimalResidual.h b/Grid/algorithms/iterative/MixedPrecisionFlexibleGeneralisedMinimalResidual.h index 04113684..d75fdb63 100644 --- a/Grid/algorithms/iterative/MixedPrecisionFlexibleGeneralisedMinimalResidual.h +++ b/Grid/algorithms/iterative/MixedPrecisionFlexibleGeneralisedMinimalResidual.h @@ -34,6 +34,9 @@ namespace Grid { template::value == 2, int>::type = 0, typename std::enable_if< getPrecision::value == 1, int>::type = 0> class MixedPrecisionFlexibleGeneralisedMinimalResidual : public OperatorFunction { public: + + using OperatorFunction::operator(); + bool ErrorOnNoConverge; // Throw an assert when MPFGMRES fails to converge, // defaults to true @@ -54,10 +57,10 @@ class MixedPrecisionFlexibleGeneralisedMinimalResidual : public OperatorFunction Eigen::MatrixXcd H; - std::vector> y; - std::vector> gamma; - std::vector> c; - std::vector> s; + std::vector y; + std::vector gamma; + std::vector c; + std::vector s; GridBase* SinglePrecGrid; @@ -84,7 +87,7 @@ class MixedPrecisionFlexibleGeneralisedMinimalResidual : public OperatorFunction void operator()(LinearOperatorBase &LinOp, const FieldD &src, FieldD &psi) { - psi.checkerboard = src.checkerboard; + psi.Checkerboard() = src.Checkerboard(); conformable(psi, src); RealD guess = norm2(psi); @@ -94,7 +97,7 @@ class MixedPrecisionFlexibleGeneralisedMinimalResidual : public OperatorFunction RealD ssq = norm2(src); RealD rsq = Tolerance * Tolerance * ssq; - FieldD r(src._grid); + FieldD r(src.Grid()); std::cout << std::setprecision(4) << std::scientific; std::cout << GridLogIterative << "MPFGMRES: guess " << guess << std::endl; @@ -154,12 +157,12 @@ class MixedPrecisionFlexibleGeneralisedMinimalResidual : public OperatorFunction RealD cp = 0; - FieldD w(src._grid); - FieldD r(src._grid); + FieldD w(src.Grid()); + FieldD r(src.Grid()); // these should probably be made class members so that they are only allocated once, not in every restart - std::vector v(RestartLength + 1, src._grid); for (auto &elem : v) elem = zero; - std::vector z(RestartLength + 1, src._grid); for (auto &elem : z) elem = zero; + std::vector v(RestartLength + 1, src.Grid()); for (auto &elem : v) elem = Zero(); + std::vector z(RestartLength + 1, src.Grid()); for (auto &elem : z) elem = Zero(); MatrixTimer.Start(); LinOp.Op(psi, w); @@ -181,7 +184,7 @@ class MixedPrecisionFlexibleGeneralisedMinimalResidual : public OperatorFunction qrUpdate(i); - cp = std::norm(gamma[i+1]); + cp = norm(gamma[i+1]); std::cout << GridLogIterative << "MPFGMRES: Iteration " << IterationCount << " residual " << cp << " target " << rsq << std::endl; @@ -223,11 +226,11 @@ class MixedPrecisionFlexibleGeneralisedMinimalResidual : public OperatorFunction LinalgTimer.Start(); for (int i = 0; i <= iter; ++i) { H(iter, i) = innerProduct(v[i], w); - w = w - H(iter, i) * v[i]; + w = w - ComplexD(H(iter, i)) * v[i]; } H(iter, iter + 1) = sqrt(norm2(w)); - v[iter + 1] = (1. / H(iter, iter + 1)) * w; + v[iter + 1] = ComplexD(1. / H(iter, iter + 1)) * w; LinalgTimer.Stop(); } @@ -235,13 +238,13 @@ class MixedPrecisionFlexibleGeneralisedMinimalResidual : public OperatorFunction QrTimer.Start(); for (int i = 0; i < iter ; ++i) { - auto tmp = -s[i] * H(iter, i) + c[i] * H(iter, i + 1); - H(iter, i) = std::conj(c[i]) * H(iter, i) + std::conj(s[i]) * H(iter, i + 1); + auto tmp = -s[i] * ComplexD(H(iter, i)) + c[i] * ComplexD(H(iter, i + 1)); + H(iter, i) = conjugate(c[i]) * ComplexD(H(iter, i)) + conjugate(s[i]) * ComplexD(H(iter, i + 1)); H(iter, i + 1) = tmp; } // Compute new Givens Rotation - ComplexD nu = sqrt(std::norm(H(iter, iter)) + std::norm(H(iter, iter + 1))); + auto nu = sqrt(std::norm(H(iter, iter)) + std::norm(H(iter, iter + 1))); c[iter] = H(iter, iter) / nu; s[iter] = H(iter, iter + 1) / nu; @@ -250,7 +253,7 @@ class MixedPrecisionFlexibleGeneralisedMinimalResidual : public OperatorFunction H(iter, iter + 1) = 0.; gamma[iter + 1] = -s[iter] * gamma[iter]; - gamma[iter] = std::conj(c[iter]) * gamma[iter]; + gamma[iter] = conjugate(c[iter]) * gamma[iter]; QrTimer.Stop(); } @@ -260,8 +263,8 @@ class MixedPrecisionFlexibleGeneralisedMinimalResidual : public OperatorFunction for (int i = iter; i >= 0; i--) { y[i] = gamma[i]; for (int k = i + 1; k <= iter; k++) - y[i] = y[i] - H(k, i) * y[k]; - y[i] = y[i] / H(i, i); + y[i] = y[i] - ComplexD(H(k, i)) * y[k]; + y[i] = y[i] / ComplexD(H(i, i)); } for (int i = 0; i <= iter; i++) diff --git a/Grid/algorithms/iterative/NormalEquations.h b/Grid/algorithms/iterative/NormalEquations.h index 55a84b4b..270b0115 100644 --- a/Grid/algorithms/iterative/NormalEquations.h +++ b/Grid/algorithms/iterative/NormalEquations.h @@ -1,4 +1,4 @@ - /************************************************************************************* +/************************************************************************************* Grid physics library, www.github.com/paboyle/Grid @@ -23,38 +23,38 @@ Author: Peter Boyle 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ +*************************************************************************************/ +/* END LEGAL */ #ifndef GRID_NORMAL_EQUATIONS_H #define GRID_NORMAL_EQUATIONS_H -namespace Grid { +NAMESPACE_BEGIN(Grid); - /////////////////////////////////////////////////////////////////////////////////////////////////////// - // Take a matrix and form an NE solver calling a Herm solver - /////////////////////////////////////////////////////////////////////////////////////////////////////// - template class NormalEquations : public OperatorFunction{ - private: - SparseMatrixBase & _Matrix; - OperatorFunction & _HermitianSolver; +/////////////////////////////////////////////////////////////////////////////////////////////////////// +// Take a matrix and form an NE solver calling a Herm solver +/////////////////////////////////////////////////////////////////////////////////////////////////////// +template class NormalEquations : public OperatorFunction{ +private: + SparseMatrixBase & _Matrix; + OperatorFunction & _HermitianSolver; - public: +public: - ///////////////////////////////////////////////////// - // Wrap the usual normal equations trick - ///////////////////////////////////////////////////// + ///////////////////////////////////////////////////// + // Wrap the usual normal equations trick + ///////////////////////////////////////////////////// NormalEquations(SparseMatrixBase &Matrix, OperatorFunction &HermitianSolver) : _Matrix(Matrix), _HermitianSolver(HermitianSolver) {}; - void operator() (const Field &in, Field &out){ + void operator() (const Field &in, Field &out){ - Field src(in._grid); + Field src(in.Grid()); - _Matrix.Mdag(in,src); - _HermitianSolver(src,out); // Mdag M out = Mdag in + _Matrix.Mdag(in,src); + _HermitianSolver(src,out); // Mdag M out = Mdag in - } - }; + } +}; -} +NAMESPACE_END(Grid); #endif diff --git a/Grid/algorithms/iterative/PowerMethod.h b/Grid/algorithms/iterative/PowerMethod.h index e85f258c..8a238ea6 100644 --- a/Grid/algorithms/iterative/PowerMethod.h +++ b/Grid/algorithms/iterative/PowerMethod.h @@ -14,7 +14,7 @@ template class PowerMethod RealD operator()(LinearOperatorBase &HermOp, const Field &src) { - GridBase *grid = src._grid; + GridBase *grid = src.Grid(); // quickly get an idea of the largest eigenvalue to more properly normalize the residuum RealD evalMaxApprox = 0.0; diff --git a/Grid/algorithms/iterative/PrecConjugateResidual.h b/Grid/algorithms/iterative/PrecConjugateResidual.h index a149d093..b6178833 100644 --- a/Grid/algorithms/iterative/PrecConjugateResidual.h +++ b/Grid/algorithms/iterative/PrecConjugateResidual.h @@ -1,4 +1,4 @@ - /************************************************************************************* +/************************************************************************************* Grid physics library, www.github.com/paboyle/Grid @@ -23,97 +23,97 @@ Author: Peter Boyle 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ +*************************************************************************************/ +/* END LEGAL */ #ifndef GRID_PREC_CONJUGATE_RESIDUAL_H #define GRID_PREC_CONJUGATE_RESIDUAL_H -namespace Grid { +NAMESPACE_BEGIN(Grid); - ///////////////////////////////////////////////////////////// - // Base classes for iterative processes based on operators - // single input vec, single output vec. - ///////////////////////////////////////////////////////////// +///////////////////////////////////////////////////////////// +// Base classes for iterative processes based on operators +// single input vec, single output vec. +///////////////////////////////////////////////////////////// - template - class PrecConjugateResidual : public OperatorFunction { - public: - RealD Tolerance; - Integer MaxIterations; - int verbose; - LinearFunction &Preconditioner; +template +class PrecConjugateResidual : public OperatorFunction { +public: + RealD Tolerance; + Integer MaxIterations; + int verbose; + LinearFunction &Preconditioner; - PrecConjugateResidual(RealD tol,Integer maxit,LinearFunction &Prec) : Tolerance(tol), MaxIterations(maxit), Preconditioner(Prec) - { - verbose=1; - }; + PrecConjugateResidual(RealD tol,Integer maxit,LinearFunction &Prec) : Tolerance(tol), MaxIterations(maxit), Preconditioner(Prec) + { + verbose=1; + }; - void operator() (LinearOperatorBase &Linop,const Field &src, Field &psi){ + void operator() (LinearOperatorBase &Linop,const Field &src, Field &psi){ - RealD a, b, c, d; - RealD cp, ssq,rsq; + RealD a, b, c, d; + RealD cp, ssq,rsq; - RealD rAr, rAAr, rArp; - RealD pAp, pAAp; + RealD rAr, rAAr, rArp; + RealD pAp, pAAp; - GridBase *grid = src._grid; - Field r(grid), p(grid), Ap(grid), Ar(grid), z(grid); + GridBase *grid = src.Grid(); + Field r(grid), p(grid), Ap(grid), Ar(grid), z(grid); - psi=zero; - r = src; - Preconditioner(r,p); + psi=zero; + r = src; + Preconditioner(r,p); - Linop.HermOpAndNorm(p,Ap,pAp,pAAp); - Ar=Ap; - rAr=pAp; - rAAr=pAAp; + Linop.HermOpAndNorm(p,Ap,pAp,pAAp); + Ar=Ap; + rAr=pAp; + rAAr=pAAp; - cp =norm2(r); - ssq=norm2(src); - rsq=Tolerance*Tolerance*ssq; + cp =norm2(r); + ssq=norm2(src); + rsq=Tolerance*Tolerance*ssq; - if (verbose) std::cout< 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ +*************************************************************************************/ +/* END LEGAL */ #ifndef GRID_PREC_GCR_H #define GRID_PREC_GCR_H @@ -36,206 +36,208 @@ Author: Peter Boyle //NB. Likely not original reference since they are focussing on a preconditioner variant. // but VPGCR was nicely written up in their paper /////////////////////////////////////////////////////////////////////////////////////////////////////// -namespace Grid { +NAMESPACE_BEGIN(Grid); - template - class PrecGeneralisedConjugateResidual : public OperatorFunction { - public: - RealD Tolerance; - Integer MaxIterations; - int verbose; - int mmax; - int nstep; - int steps; - GridStopWatch PrecTimer; - GridStopWatch MatTimer; - GridStopWatch LinalgTimer; +template +class PrecGeneralisedConjugateResidual : public OperatorFunction { +public: + using OperatorFunction::operator(); - LinearFunction &Preconditioner; + RealD Tolerance; + Integer MaxIterations; + int verbose; + int mmax; + int nstep; + int steps; + GridStopWatch PrecTimer; + GridStopWatch MatTimer; + GridStopWatch LinalgTimer; - PrecGeneralisedConjugateResidual(RealD tol,Integer maxit,LinearFunction &Prec,int _mmax,int _nstep) : - Tolerance(tol), - MaxIterations(maxit), - Preconditioner(Prec), - mmax(_mmax), - nstep(_nstep) - { - verbose=1; - }; + LinearFunction &Preconditioner; - void operator() (LinearOperatorBase &Linop,const Field &src, Field &psi){ + PrecGeneralisedConjugateResidual(RealD tol,Integer maxit,LinearFunction &Prec,int _mmax,int _nstep) : + Tolerance(tol), + MaxIterations(maxit), + Preconditioner(Prec), + mmax(_mmax), + nstep(_nstep) + { + verbose=1; + }; - psi=zero; - RealD cp, ssq,rsq; - ssq=norm2(src); - rsq=Tolerance*Tolerance*ssq; + void operator() (LinearOperatorBase &Linop,const Field &src, Field &psi){ + + psi=Zero(); + RealD cp, ssq,rsq; + ssq=norm2(src); + rsq=Tolerance*Tolerance*ssq; - Field r(src._grid); + Field r(src.Grid()); - PrecTimer.Reset(); - MatTimer.Reset(); - LinalgTimer.Reset(); + PrecTimer.Reset(); + MatTimer.Reset(); + LinalgTimer.Reset(); - GridStopWatch SolverTimer; - SolverTimer.Start(); + GridStopWatch SolverTimer; + SolverTimer.Start(); - steps=0; - for(int k=0;k &Linop,const Field &src, Field &psi,RealD rsq){ + RealD GCRnStep(LinearOperatorBase &Linop,const Field &src, Field &psi,RealD rsq){ - RealD cp; - RealD a, b, c, d; - RealD zAz, zAAz; - RealD rAq, rq; + RealD cp; + RealD a, b; + RealD zAz, zAAz; + RealD rq; - GridBase *grid = src._grid; + GridBase *grid = src.Grid(); - Field r(grid); - Field z(grid); - Field tmp(grid); - Field ttmp(grid); - Field Az(grid); + Field r(grid); + Field z(grid); + Field tmp(grid); + Field ttmp(grid); + Field Az(grid); - //////////////////////////////// - // history for flexible orthog - //////////////////////////////// - std::vector q(mmax,grid); - std::vector p(mmax,grid); - std::vector qq(mmax); + //////////////////////////////// + // history for flexible orthog + //////////////////////////////// + std::vector q(mmax,grid); + std::vector p(mmax,grid); + std::vector qq(mmax); - ////////////////////////////////// - // initial guess x0 is taken as nonzero. - // r0=src-A x0 = src - ////////////////////////////////// - MatTimer.Start(); - Linop.HermOpAndNorm(psi,Az,zAz,zAAz); - MatTimer.Stop(); - LinalgTimer.Start(); - r=src-Az; - LinalgTimer.Stop(); + ////////////////////////////////// + // initial guess x0 is taken as nonzero. + // r0=src-A x0 = src + ////////////////////////////////// + MatTimer.Start(); + Linop.HermOpAndNorm(psi,Az,zAz,zAAz); + MatTimer.Stop(); + + LinalgTimer.Start(); + r=src-Az; + LinalgTimer.Stop(); + + ///////////////////// + // p = Prec(r) + ///////////////////// + PrecTimer.Start(); + Preconditioner(r,z); + PrecTimer.Stop(); - ///////////////////// - // p = Prec(r) - ///////////////////// - PrecTimer.Start(); - Preconditioner(r,z); - PrecTimer.Stop(); + MatTimer.Start(); + Linop.HermOp(z,tmp); + MatTimer.Stop(); - MatTimer.Start(); - Linop.HermOp(z,tmp); - MatTimer.Stop(); + LinalgTimer.Start(); + ttmp=tmp; + tmp=tmp-r; + LinalgTimer.Stop(); - LinalgTimer.Start(); - ttmp=tmp; - tmp=tmp-r; - LinalgTimer.Stop(); - - /* + /* std::cout<(mmax-1))?(mmax-1):(kp); // if more than mmax done, we orthog all mmax history. + for(int back=0;back=0); - int kp = k+1; - int peri_k = k %mmax; - int peri_kp= kp%mmax; + b=-real(innerProduct(q[peri_back],Az))/qq[peri_back]; + p[peri_kp]=p[peri_kp]+b*p[peri_back]; + q[peri_kp]=q[peri_kp]+b*q[peri_back]; - LinalgTimer.Start(); - rq= real(innerProduct(r,q[peri_k])); // what if rAr not real? - a = rq/qq[peri_k]; - - axpy(psi,a,p[peri_k],psi); - - cp = axpy_norm(r,-a,q[peri_k],r); - LinalgTimer.Stop(); - - if((k==nstep-1)||(cp(mmax-1))?(mmax-1):(kp); // if more than mmax done, we orthog all mmax history. - for(int back=0;back=0); - - b=-real(innerProduct(q[peri_back],Az))/qq[peri_back]; - p[peri_kp]=p[peri_kp]+b*p[peri_back]; - q[peri_kp]=q[peri_kp]+b*q[peri_back]; - - } - qq[peri_kp]=norm2(q[peri_kp]); // could use axpy_norm - LinalgTimer.Stop(); } - - assert(0); // never reached - return cp; + qq[peri_kp]=norm2(q[peri_kp]); // could use axpy_norm + LinalgTimer.Stop(); } - }; -} + assert(0); // never reached + return cp; + } +}; +NAMESPACE_END(Grid); #endif diff --git a/Grid/algorithms/iterative/SchurRedBlack.h b/Grid/algorithms/iterative/SchurRedBlack.h index 9f63e8c0..dd8a14b6 100644 --- a/Grid/algorithms/iterative/SchurRedBlack.h +++ b/Grid/algorithms/iterative/SchurRedBlack.h @@ -297,9 +297,9 @@ namespace Grid { ///////////////////////////////////////////////////// // src_o = (source_o - Moe MeeInv source_e) ///////////////////////////////////////////////////// - _Matrix.MooeeInv(src_e,tmp); assert( tmp.checkerboard ==Even); - _Matrix.Meooe (tmp,Mtmp); assert( Mtmp.checkerboard ==Odd); - tmp=src_o-Mtmp; assert( tmp.checkerboard ==Odd); + _Matrix.MooeeInv(src_e,tmp); assert( tmp.Checkerboard() ==Even); + _Matrix.Meooe (tmp,Mtmp); assert( Mtmp.Checkerboard() ==Odd); + tmp=src_o-Mtmp; assert( tmp.Checkerboard() ==Odd); _Matrix.Mooee(tmp,src_o); // Extra factor of "m" in source from dumb choice of matrix norm. } @@ -317,17 +317,17 @@ namespace Grid { /////////////////////////////////////////////////// // sol_e = M_ee^-1 * ( src_e - Meo sol_o )... /////////////////////////////////////////////////// - _Matrix.Meooe(sol_o,tmp); assert( tmp.checkerboard ==Even); - src_e = src_e-tmp; assert( src_e.checkerboard ==Even); - _Matrix.MooeeInv(src_e,sol_e); assert( sol_e.checkerboard ==Even); + _Matrix.Meooe(sol_o,tmp); assert( tmp.Checkerboard() ==Even); + src_e = src_e-tmp; assert( src_e.Checkerboard() ==Even); + _Matrix.MooeeInv(src_e,sol_e); assert( sol_e.Checkerboard() ==Even); - setCheckerboard(sol,sol_e); assert( sol_e.checkerboard ==Even); - setCheckerboard(sol,sol_o); assert( sol_o.checkerboard ==Odd ); + setCheckerboard(sol,sol_e); assert( sol_e.Checkerboard() ==Even); + setCheckerboard(sol,sol_o); assert( sol_o.Checkerboard() ==Odd ); } virtual void RedBlackSolve (Matrix & _Matrix,const Field &src_o, Field &sol_o) { SchurStaggeredOperator _HermOpEO(_Matrix); - this->_HermitianRBSolver(_HermOpEO,src_o,sol_o); assert(sol_o.checkerboard==Odd); + this->_HermitianRBSolver(_HermOpEO,src_o,sol_o); assert(sol_o.Checkerboard()==Odd); }; virtual void RedBlackSolve (Matrix & _Matrix,const std::vector &src_o, std::vector &sol_o) { @@ -366,13 +366,13 @@ namespace Grid { ///////////////////////////////////////////////////// // src_o = Mdag * (source_o - Moe MeeInv source_e) ///////////////////////////////////////////////////// - _Matrix.MooeeInv(src_e,tmp); assert( tmp.checkerboard ==Even); - _Matrix.Meooe (tmp,Mtmp); assert( Mtmp.checkerboard ==Odd); - tmp=src_o-Mtmp; assert( tmp.checkerboard ==Odd); + _Matrix.MooeeInv(src_e,tmp); assert( tmp.Checkerboard() ==Even); + _Matrix.Meooe (tmp,Mtmp); assert( Mtmp.Checkerboard() ==Odd); + tmp=src_o-Mtmp; assert( tmp.Checkerboard() ==Odd); // get the right MpcDag SchurDiagMooeeOperator _HermOpEO(_Matrix); - _HermOpEO.MpcDag(tmp,src_o); assert(src_o.checkerboard ==Odd); + _HermOpEO.MpcDag(tmp,src_o); assert(src_o.Checkerboard() ==Odd); } virtual void RedBlackSolution(Matrix & _Matrix,const Field &sol_o, const Field &src_e,Field &sol) @@ -386,17 +386,17 @@ namespace Grid { /////////////////////////////////////////////////// // sol_e = M_ee^-1 * ( src_e - Meo sol_o )... /////////////////////////////////////////////////// - _Matrix.Meooe(sol_o,tmp); assert( tmp.checkerboard ==Even); - src_e_i = src_e-tmp; assert( src_e_i.checkerboard ==Even); - _Matrix.MooeeInv(src_e_i,sol_e); assert( sol_e.checkerboard ==Even); + _Matrix.Meooe(sol_o,tmp); assert( tmp.Checkerboard() ==Even); + src_e_i = src_e-tmp; assert( src_e_i.Checkerboard() ==Even); + _Matrix.MooeeInv(src_e_i,sol_e); assert( sol_e.Checkerboard() ==Even); - setCheckerboard(sol,sol_e); assert( sol_e.checkerboard ==Even); - setCheckerboard(sol,sol_o); assert( sol_o.checkerboard ==Odd ); + setCheckerboard(sol,sol_e); assert( sol_e.Checkerboard() ==Even); + setCheckerboard(sol,sol_o); assert( sol_o.Checkerboard() ==Odd ); } virtual void RedBlackSolve (Matrix & _Matrix,const Field &src_o, Field &sol_o) { SchurDiagMooeeOperator _HermOpEO(_Matrix); - this->_HermitianRBSolver(_HermOpEO,src_o,sol_o); assert(sol_o.checkerboard==Odd); + this->_HermitianRBSolver(_HermOpEO,src_o,sol_o); assert(sol_o.Checkerboard()==Odd); }; virtual void RedBlackSolve (Matrix & _Matrix,const std::vector &src_o, std::vector &sol_o) { @@ -437,12 +437,12 @@ namespace Grid { ///////////////////////////////////////////////////// // src_o = Mdag * (source_o - Moe MeeInv source_e) ///////////////////////////////////////////////////// - _Matrix.MooeeInv(src_e,tmp); assert( tmp.checkerboard ==Even); - _Matrix.Meooe (tmp,Mtmp); assert( Mtmp.checkerboard ==Odd); - tmp=src_o-Mtmp; assert( tmp.checkerboard ==Odd); + _Matrix.MooeeInv(src_e,tmp); assert( tmp.Checkerboard() ==Even); + _Matrix.Meooe (tmp,Mtmp); assert( Mtmp.Checkerboard() ==Odd); + tmp=src_o-Mtmp; assert( tmp.Checkerboard() ==Odd); // get the right MpcDag - _HermOpEO.MpcDag(tmp,src_o); assert(src_o.checkerboard ==Odd); + _HermOpEO.MpcDag(tmp,src_o); assert(src_o.Checkerboard() ==Odd); } virtual void RedBlackSolution(Matrix & _Matrix,const Field &sol_o, const Field &src_e,Field &sol) @@ -463,12 +463,12 @@ namespace Grid { /////////////////////////////////////////////////// // sol_e = M_ee^-1 * ( src_e - Meo sol_o )... /////////////////////////////////////////////////// - _Matrix.Meooe(sol_o_i,tmp); assert( tmp.checkerboard ==Even); - tmp = src_e-tmp; assert( src_e.checkerboard ==Even); - _Matrix.MooeeInv(tmp,sol_e); assert( sol_e.checkerboard ==Even); + _Matrix.Meooe(sol_o_i,tmp); assert( tmp.Checkerboard() ==Even); + tmp = src_e-tmp; assert( src_e.Checkerboard() ==Even); + _Matrix.MooeeInv(tmp,sol_e); assert( sol_e.Checkerboard() ==Even); - setCheckerboard(sol,sol_e); assert( sol_e.checkerboard ==Even); - setCheckerboard(sol,sol_o_i); assert( sol_o_i.checkerboard ==Odd ); + setCheckerboard(sol,sol_e); assert( sol_e.Checkerboard() ==Even); + setCheckerboard(sol,sol_o_i); assert( sol_o_i.Checkerboard() ==Odd ); }; virtual void RedBlackSolve (Matrix & _Matrix,const Field &src_o, Field &sol_o) diff --git a/Grid/allocator/AlignedAllocator.cc b/Grid/allocator/AlignedAllocator.cc index 10b49f4b..ed27a8bd 100644 --- a/Grid/allocator/AlignedAllocator.cc +++ b/Grid/allocator/AlignedAllocator.cc @@ -1,11 +1,12 @@ #include #include -namespace Grid { +NAMESPACE_BEGIN(Grid); MemoryStats *MemoryProfiler::stats = nullptr; bool MemoryProfiler::debug = false; +#ifdef POINTER_CACHE int PointerCache::victim; PointerCache::PointerCacheEntry PointerCache::Entries[PointerCache::Ncache]; @@ -49,9 +50,9 @@ void *PointerCache::Insert(void *ptr,size_t bytes) { void *PointerCache::Lookup(size_t bytes) { - if (bytes < 4096 ) return NULL; + if (bytes < 4096 ) return NULL; -#ifdef _OPENMP +#ifdef GRID_OMP assert(omp_in_parallel()==0); #endif @@ -63,7 +64,7 @@ void *PointerCache::Lookup(size_t bytes) { } return NULL; } - +#endif void check_huge_pages(void *Buf,uint64_t BYTES) { @@ -90,7 +91,7 @@ void check_huge_pages(void *Buf,uint64_t BYTES) ++n4ktotal; if (pageaddr != baseaddr + j * page_size) ++nnothuge; - } + } } int rank = CartesianCommunicator::RankWorld(); printf("rank %d Allocated %d 4k pages, %d not in huge pages\n", rank, n4ktotal, nnothuge); @@ -106,20 +107,21 @@ std::string sizeString(const size_t bytes) double count = bytes; while (count >= 1024 && s < 7) - { + { s++; count /= 1024; - } + } if (count - floor(count) == 0.0) - { + { snprintf(buf, bufSize, "%d %sB", (int)count, suffixes[s]); - } + } else - { + { snprintf(buf, bufSize, "%.1f %sB", count, suffixes[s]); - } + } return std::string(buf); } -} +NAMESPACE_END(Grid); + diff --git a/Grid/allocator/AlignedAllocator.h b/Grid/allocator/AlignedAllocator.h index b0f7e206..2aa7d82b 100644 --- a/Grid/allocator/AlignedAllocator.h +++ b/Grid/allocator/AlignedAllocator.h @@ -24,8 +24,8 @@ Author: Peter Boyle 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ +*************************************************************************************/ +/* END LEGAL */ #ifndef GRID_ALIGNED_ALLOCATOR_H #define GRID_ALIGNED_ALLOCATOR_H @@ -40,89 +40,95 @@ Author: Peter Boyle #include #endif -namespace Grid { +#define POINTER_CACHE +#define GRID_ALLOC_ALIGN (2*1024*1024) - class PointerCache { - private: +NAMESPACE_BEGIN(Grid); - static const int Ncache=8; - static int victim; +// Move control to configure.ac and Config.h? +#ifdef POINTER_CACHE +class PointerCache { +private: - typedef struct { - void *address; - size_t bytes; - int valid; - } PointerCacheEntry; + static const int Ncache=8; + static int victim; + + typedef struct { + void *address; + size_t bytes; + int valid; + } PointerCacheEntry; - static PointerCacheEntry Entries[Ncache]; + static PointerCacheEntry Entries[Ncache]; - public: +public: - static void *Insert(void *ptr,size_t bytes) ; - static void *Lookup(size_t bytes) ; + static void *Insert(void *ptr,size_t bytes) ; + static void *Lookup(size_t bytes) ; - }; - - std::string sizeString(size_t bytes); +}; +#endif - struct MemoryStats - { - size_t totalAllocated{0}, maxAllocated{0}, - currentlyAllocated{0}, totalFreed{0}; - }; +std::string sizeString(size_t bytes); + +struct MemoryStats +{ + size_t totalAllocated{0}, maxAllocated{0}, + currentlyAllocated{0}, totalFreed{0}; +}; - class MemoryProfiler - { - public: - static MemoryStats *stats; - static bool debug; - }; +class MemoryProfiler +{ +public: + static MemoryStats *stats; + static bool debug; +}; - #define memString(bytes) std::to_string(bytes) + " (" + sizeString(bytes) + ")" - #define profilerDebugPrint \ - if (MemoryProfiler::stats)\ - {\ - auto s = MemoryProfiler::stats;\ - std::cout << GridLogDebug << "[Memory debug] Stats " << MemoryProfiler::stats << std::endl;\ - std::cout << GridLogDebug << "[Memory debug] total : " << memString(s->totalAllocated) \ - << std::endl;\ - std::cout << GridLogDebug << "[Memory debug] max : " << memString(s->maxAllocated) \ - << std::endl;\ - std::cout << GridLogDebug << "[Memory debug] current: " << memString(s->currentlyAllocated) \ - << std::endl;\ - std::cout << GridLogDebug << "[Memory debug] freed : " << memString(s->totalFreed) \ - << std::endl;\ - } +#define memString(bytes) std::to_string(bytes) + " (" + sizeString(bytes) + ")" +#define profilerDebugPrint \ + if (MemoryProfiler::stats) \ + { \ + auto s = MemoryProfiler::stats; \ + std::cout << GridLogDebug << "[Memory debug] Stats " << MemoryProfiler::stats << std::endl; \ + std::cout << GridLogDebug << "[Memory debug] total : " << memString(s->totalAllocated) \ + << std::endl; \ + std::cout << GridLogDebug << "[Memory debug] max : " << memString(s->maxAllocated) \ + << std::endl; \ + std::cout << GridLogDebug << "[Memory debug] current: " << memString(s->currentlyAllocated) \ + << std::endl; \ + std::cout << GridLogDebug << "[Memory debug] freed : " << memString(s->totalFreed) \ + << std::endl; \ + } - #define profilerAllocate(bytes)\ - if (MemoryProfiler::stats)\ - {\ - auto s = MemoryProfiler::stats;\ - s->totalAllocated += (bytes);\ - s->currentlyAllocated += (bytes);\ - s->maxAllocated = std::max(s->maxAllocated, s->currentlyAllocated);\ - }\ - if (MemoryProfiler::debug)\ - {\ - std::cout << GridLogDebug << "[Memory debug] allocating " << memString(bytes) << std::endl;\ - profilerDebugPrint;\ - } +#define profilerAllocate(bytes) \ + if (MemoryProfiler::stats) \ + { \ + auto s = MemoryProfiler::stats; \ + s->totalAllocated += (bytes); \ + s->currentlyAllocated += (bytes); \ + s->maxAllocated = std::max(s->maxAllocated, s->currentlyAllocated); \ + } \ + if (MemoryProfiler::debug) \ + { \ + std::cout << GridLogDebug << "[Memory debug] allocating " << memString(bytes) << std::endl; \ + profilerDebugPrint; \ + } - #define profilerFree(bytes)\ - if (MemoryProfiler::stats)\ - {\ - auto s = MemoryProfiler::stats;\ - s->totalFreed += (bytes);\ - s->currentlyAllocated -= (bytes);\ - }\ - if (MemoryProfiler::debug)\ - {\ - std::cout << GridLogDebug << "[Memory debug] freeing " << memString(bytes) << std::endl;\ - profilerDebugPrint;\ - } +#define profilerFree(bytes) \ + if (MemoryProfiler::stats) \ + { \ + auto s = MemoryProfiler::stats; \ + s->totalFreed += (bytes); \ + s->currentlyAllocated -= (bytes); \ + } \ + if (MemoryProfiler::debug) \ + { \ + std::cout << GridLogDebug << "[Memory debug] freeing " << memString(bytes) << std::endl; \ + profilerDebugPrint; \ + } - void check_huge_pages(void *Buf,uint64_t BYTES); +void check_huge_pages(void *Buf,uint64_t BYTES); //////////////////////////////////////////////////////////////////// // A lattice of something, but assume the something is SIMDized. @@ -152,29 +158,45 @@ public: size_type bytes = __n*sizeof(_Tp); profilerAllocate(bytes); - _Tp *ptr = (_Tp *) PointerCache::Lookup(bytes); - // if ( ptr != NULL ) - // std::cout << "alignedAllocator "<<__n << " cache hit "<< std::hex << ptr < inline bool operator==(const alignedAllocator<_Tp>&, const alignedAllocator<_Tp>&){ return true; } template inline bool operator!=(const alignedAllocator<_Tp>&, const alignedAllocator<_Tp>&){ return false; } -////////////////////////////////////////////////////////////////////////////////////////// -// MPI3 : comms must use shm region -// SHMEM: comms must use symmetric heap -////////////////////////////////////////////////////////////////////////////////////////// -#ifdef GRID_COMMS_SHMEM -extern "C" { -#include -extern void * shmem_align(size_t, size_t); -extern void shmem_free(void *); -} -#define PARANOID_SYMMETRIC_HEAP -#endif - -template -class commAllocator { -public: - typedef std::size_t size_type; - typedef std::ptrdiff_t difference_type; - typedef _Tp* pointer; - typedef const _Tp* const_pointer; - typedef _Tp& reference; - typedef const _Tp& const_reference; - typedef _Tp value_type; - - template struct rebind { typedef commAllocator<_Tp1> other; }; - commAllocator() throw() { } - commAllocator(const commAllocator&) throw() { } - template commAllocator(const commAllocator<_Tp1>&) throw() { } - ~commAllocator() throw() { } - pointer address(reference __x) const { return &__x; } - size_type max_size() const throw() { return size_t(-1) / sizeof(_Tp); } - -#ifdef GRID_COMMS_SHMEM - pointer allocate(size_type __n, const void* _p= 0) - { - size_type bytes = __n*sizeof(_Tp); - - profilerAllocate(bytes); -#ifdef CRAY - _Tp *ptr = (_Tp *) shmem_align(bytes,64); -#else - _Tp *ptr = (_Tp *) shmem_align(64,bytes); -#endif -#ifdef PARANOID_SYMMETRIC_HEAP - static void * bcast; - static long psync[_SHMEM_REDUCE_SYNC_SIZE]; - - bcast = (void *) ptr; - shmem_broadcast32((void *)&bcast,(void *)&bcast,sizeof(void *)/4,0,0,0,shmem_n_pes(),psync); - - if ( bcast != ptr ) { - std::printf("inconsistent alloc pe %d %lx %lx \n",shmem_my_pe(),bcast,ptr);std::fflush(stdout); - // BACKTRACEFILE(); - exit(0); - } - assert( bcast == (void *) ptr); -#endif - return ptr; - } - void deallocate(pointer __p, size_type __n) { - size_type bytes = __n*sizeof(_Tp); - - profilerFree(bytes); - shmem_free((void *)__p); - } -#else - pointer allocate(size_type __n, const void* _p= 0) - { - size_type bytes = __n*sizeof(_Tp); - - profilerAllocate(bytes); -#ifdef HAVE_MM_MALLOC_H - _Tp * ptr = (_Tp *) _mm_malloc(bytes, GRID_ALLOC_ALIGN); -#else - _Tp * ptr = (_Tp *) memalign(GRID_ALLOC_ALIGN, bytes); -#endif - uint8_t *cp = (uint8_t *)ptr; - if ( ptr ) { - // One touch per 4k page, static OMP loop to catch same loop order -#ifdef GRID_OMP -#pragma omp parallel for schedule(static) -#endif - for(size_type n=0;n inline bool operator==(const commAllocator<_Tp>&, const commAllocator<_Tp>&){ return true; } -template inline bool operator!=(const commAllocator<_Tp>&, const commAllocator<_Tp>&){ return false; } - //////////////////////////////////////////////////////////////////////////////// // Template typedefs //////////////////////////////////////////////////////////////////////////////// +template using commAllocator = alignedAllocator; template using Vector = std::vector >; -template using commVector = std::vector >; +template using commVector = std::vector >; template using Matrix = std::vector > >; - -}; // namespace Grid + +NAMESPACE_END(Grid); + #endif diff --git a/Grid/cartesian/Cartesian.h b/Grid/cartesian/Cartesian.h index f3710a48..070cad95 100644 --- a/Grid/cartesian/Cartesian.h +++ b/Grid/cartesian/Cartesian.h @@ -1,4 +1,4 @@ - /************************************************************************************* +/************************************************************************************* Grid physics library, www.github.com/paboyle/Grid @@ -23,8 +23,8 @@ Author: Peter Boyle 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ +*************************************************************************************/ +/* END LEGAL */ #ifndef GRID_CARTESIAN_H #define GRID_CARTESIAN_H diff --git a/Grid/cartesian/Cartesian_base.h b/Grid/cartesian/Cartesian_base.h index 05a8a3da..76abe0ee 100644 --- a/Grid/cartesian/Cartesian_base.h +++ b/Grid/cartesian/Cartesian_base.h @@ -1,4 +1,4 @@ - /************************************************************************************* +/************************************************************************************* Grid physics library, www.github.com/paboyle/Grid @@ -25,268 +25,266 @@ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ +*************************************************************************************/ +/* END LEGAL */ #ifndef GRID_CARTESIAN_BASE_H #define GRID_CARTESIAN_BASE_H +NAMESPACE_BEGIN(Grid); -namespace Grid{ - - ////////////////////////////////////////////////////////////////////// - // Commicator provides information on the processor grid - ////////////////////////////////////////////////////////////////////// - // unsigned long _ndimension; - // std::vector _processors; // processor grid - // int _processor; // linear processor rank - // std::vector _processor_coor; // linear processor rank - ////////////////////////////////////////////////////////////////////// - class GridBase : public CartesianCommunicator , public GridThread { +////////////////////////////////////////////////////////////////////// +// Commicator provides information on the processor grid +////////////////////////////////////////////////////////////////////// +// unsigned long _ndimension; +// Coordinate _processors; // processor grid +// int _processor; // linear processor rank +// Coordinate _processor_coor; // linear processor rank +////////////////////////////////////////////////////////////////////// +class GridBase : public CartesianCommunicator , public GridThread { public: - int dummy; - // Give Lattice access - template friend class Lattice; + int dummy; + // Give Lattice access + template friend class Lattice; - GridBase(const std::vector & processor_grid) : CartesianCommunicator(processor_grid) {}; - GridBase(const std::vector & processor_grid, - const CartesianCommunicator &parent, - int &split_rank) - : CartesianCommunicator(processor_grid,parent,split_rank) {}; - GridBase(const std::vector & processor_grid, - const CartesianCommunicator &parent) - : CartesianCommunicator(processor_grid,parent,dummy) {}; + GridBase(const Coordinate & processor_grid) : CartesianCommunicator(processor_grid) {}; - virtual ~GridBase() = default; + GridBase(const Coordinate & processor_grid, + const CartesianCommunicator &parent, + int &split_rank) + : CartesianCommunicator(processor_grid,parent,split_rank) {}; + + GridBase(const Coordinate & processor_grid, + const CartesianCommunicator &parent) + : CartesianCommunicator(processor_grid,parent,dummy) {}; + + virtual ~GridBase() = default; - // Physics Grid information. - std::vector _simd_layout;// Which dimensions get relayed out over simd lanes. - std::vector _fdimensions;// (full) Global dimensions of array prior to cb removal - std::vector _gdimensions;// Global dimensions of array after cb removal - std::vector _ldimensions;// local dimensions of array with processor images removed - std::vector _rdimensions;// Reduced local dimensions with simd lane images and processor images removed - std::vector _ostride; // Outer stride for each dimension - std::vector _istride; // Inner stride i.e. within simd lane - int _osites; // _isites*_osites = product(dimensions). - int _isites; - int _fsites; // _isites*_osites = product(dimensions). - int _gsites; - std::vector _slice_block;// subslice information - std::vector _slice_stride; - std::vector _slice_nblock; + // Physics Grid information. + Coordinate _simd_layout;// Which dimensions get relayed out over simd lanes. + Coordinate _fdimensions;// (full) Global dimensions of array prior to cb removal + Coordinate _gdimensions;// Global dimensions of array after cb removal + Coordinate _ldimensions;// local dimensions of array with processor images removed + Coordinate _rdimensions;// Reduced local dimensions with simd lane images and processor images removed + Coordinate _ostride; // Outer stride for each dimension + Coordinate _istride; // Inner stride i.e. within simd lane + int _osites; // _isites*_osites = product(dimensions). + int _isites; + int _fsites; // _isites*_osites = product(dimensions). + int _gsites; + Coordinate _slice_block;// subslice information + Coordinate _slice_stride; + Coordinate _slice_nblock; - std::vector _lstart; // local start of array in gcoors _processor_coor[d]*_ldimensions[d] - std::vector _lend ; // local end of array in gcoors _processor_coor[d]*_ldimensions[d]+_ldimensions_[d]-1 + Coordinate _lstart; // local start of array in gcoors _processor_coor[d]*_ldimensions[d] + Coordinate _lend ; // local end of array in gcoors _processor_coor[d]*_ldimensions[d]+_ldimensions_[d]-1 bool _isCheckerBoarded; public: - //////////////////////////////////////////////////////////////// - // Checkerboarding interface is virtual and overridden by - // GridCartesian / GridRedBlackCartesian - //////////////////////////////////////////////////////////////// - virtual int CheckerBoarded(int dim)=0; - virtual int CheckerBoard(const std::vector &site)=0; - virtual int CheckerBoardDestination(int source_cb,int shift,int dim)=0; - virtual int CheckerBoardShift(int source_cb,int dim,int shift,int osite)=0; - virtual int CheckerBoardShiftForCB(int source_cb,int dim,int shift,int cb)=0; - virtual int CheckerBoardFromOindex (int Oindex)=0; - virtual int CheckerBoardFromOindexTable (int Oindex)=0; + //////////////////////////////////////////////////////////////// + // Checkerboarding interface is virtual and overridden by + // GridCartesian / GridRedBlackCartesian + //////////////////////////////////////////////////////////////// + virtual int CheckerBoarded(int dim)=0; + virtual int CheckerBoard(const Coordinate &site)=0; + virtual int CheckerBoardDestination(int source_cb,int shift,int dim)=0; + virtual int CheckerBoardShift(int source_cb,int dim,int shift,int osite)=0; + virtual int CheckerBoardShiftForCB(int source_cb,int dim,int shift,int cb)=0; + virtual int CheckerBoardFromOindex (int Oindex)=0; + virtual int CheckerBoardFromOindexTable (int Oindex)=0; - ////////////////////////////////////////////////////////////////////////////////////////////// - // Local layout calculations - ////////////////////////////////////////////////////////////////////////////////////////////// - // These routines are key. Subdivide the linearised cartesian index into - // "inner" index identifying which simd lane of object is associated with coord - // "outer" index identifying which element of _odata in class "Lattice" is associated with coord. - // - // Compared to, say, Blitz++ we simply need to store BOTH an inner stride and an outer - // stride per dimension. The cost of evaluating the indexing information is doubled for an n-dimensional - // coordinate. Note, however, for data parallel operations the "inner" indexing cost is not paid and all - // lanes are operated upon simultaneously. + ////////////////////////////////////////////////////////////////////////////////////////////// + // Local layout calculations + ////////////////////////////////////////////////////////////////////////////////////////////// + // These routines are key. Subdivide the linearised cartesian index into + // "inner" index identifying which simd lane of object is associated with coord + // "outer" index identifying which element of _odata in class "Lattice" is associated with coord. + // + // Compared to, say, Blitz++ we simply need to store BOTH an inner stride and an outer + // stride per dimension. The cost of evaluating the indexing information is doubled for an n-dimensional + // coordinate. Note, however, for data parallel operations the "inner" indexing cost is not paid and all + // lanes are operated upon simultaneously. - virtual int oIndex(std::vector &coor) - { - int idx=0; - // Works with either global or local coordinates - for(int d=0;d<_ndimension;d++) idx+=_ostride[d]*(coor[d]%_rdimensions[d]); - return idx; - } - virtual int iIndex(std::vector &lcoor) - { - int idx=0; - for(int d=0;d<_ndimension;d++) idx+=_istride[d]*(lcoor[d]/_rdimensions[d]); - return idx; - } - inline int oIndexReduced(std::vector &ocoor) - { - int idx=0; - // ocoor is already reduced so can eliminate the modulo operation - // for fast indexing and inline the routine - for(int d=0;d<_ndimension;d++) idx+=_ostride[d]*ocoor[d]; - return idx; - } - inline void oCoorFromOindex (std::vector& coor,int Oindex){ - Lexicographic::CoorFromIndex(coor,Oindex,_rdimensions); - } + virtual int oIndex(Coordinate &coor) + { + int idx=0; + // Works with either global or local coordinates + for(int d=0;d<_ndimension;d++) idx+=_ostride[d]*(coor[d]%_rdimensions[d]); + return idx; + } + virtual int iIndex(Coordinate &lcoor) + { + int idx=0; + for(int d=0;d<_ndimension;d++) idx+=_istride[d]*(lcoor[d]/_rdimensions[d]); + return idx; + } + inline int oIndexReduced(Coordinate &ocoor) + { + int idx=0; + // ocoor is already reduced so can eliminate the modulo operation + // for fast indexing and inline the routine + for(int d=0;d<_ndimension;d++) idx+=_ostride[d]*ocoor[d]; + return idx; + } + inline void oCoorFromOindex (Coordinate& coor,int Oindex){ + Lexicographic::CoorFromIndex(coor,Oindex,_rdimensions); + } - inline void InOutCoorToLocalCoor (std::vector &ocoor, std::vector &icoor, std::vector &lcoor) { - lcoor.resize(_ndimension); - for (int d = 0; d < _ndimension; d++) - lcoor[d] = ocoor[d] + _rdimensions[d] * icoor[d]; - } + inline void InOutCoorToLocalCoor (Coordinate &ocoor, Coordinate &icoor, Coordinate &lcoor) { + lcoor.resize(_ndimension); + for (int d = 0; d < _ndimension; d++) + lcoor[d] = ocoor[d] + _rdimensions[d] * icoor[d]; + } - ////////////////////////////////////////////////////////// - // SIMD lane addressing - ////////////////////////////////////////////////////////// - inline void iCoorFromIindex(std::vector &coor,int lane) - { - Lexicographic::CoorFromIndex(coor,lane,_simd_layout); - } + ////////////////////////////////////////////////////////// + // SIMD lane addressing + ////////////////////////////////////////////////////////// + inline void iCoorFromIindex(Coordinate &coor,int lane) + { + Lexicographic::CoorFromIndex(coor,lane,_simd_layout); + } - inline int PermuteDim(int dimension){ - return _simd_layout[dimension]>1; - } - inline int PermuteType(int dimension){ - int permute_type=0; - // - // FIXME: - // - // Best way to encode this would be to present a mask - // for which simd dimensions are rotated, and the rotation - // size. If there is only one simd dimension rotated, this is just - // a permute. - // - // Cases: PermuteType == 1,2,4,8 - // Distance should be either 0,1,2.. - // - if ( _simd_layout[dimension] > 2 ) { - for(int d=0;d<_ndimension;d++){ - if ( d != dimension ) assert ( (_simd_layout[d]==1) ); - } - permute_type = RotateBit; // How to specify distance; this is not just direction. - return permute_type; - } - - for(int d=_ndimension-1;d>dimension;d--){ - if (_simd_layout[d]>1 ) permute_type++; + inline int PermuteDim(int dimension){ + return _simd_layout[dimension]>1; + } + inline int PermuteType(int dimension){ + int permute_type=0; + // + // Best way to encode this would be to present a mask + // for which simd dimensions are rotated, and the rotation + // size. If there is only one simd dimension rotated, this is just + // a permute. + // + // Cases: PermuteType == 1,2,4,8 + // Distance should be either 0,1,2.. + // + if ( _simd_layout[dimension] > 2 ) { + for(int d=0;d<_ndimension;d++){ + if ( d != dimension ) assert ( (_simd_layout[d]==1) ); } + permute_type = RotateBit; // How to specify distance; this is not just direction. return permute_type; } - //////////////////////////////////////////////////////////////// - // Array sizing queries - //////////////////////////////////////////////////////////////// - inline int iSites(void) const { return _isites; }; - inline int Nsimd(void) const { return _isites; };// Synonymous with iSites - inline int oSites(void) const { return _osites; }; - inline int lSites(void) const { return _isites*_osites; }; - inline int gSites(void) const { return _isites*_osites*_Nprocessors; }; - inline int Nd (void) const { return _ndimension;}; - - inline const std::vector LocalStarts(void) { return _lstart; }; - inline const std::vector &FullDimensions(void) { return _fdimensions;}; - inline const std::vector &GlobalDimensions(void) { return _gdimensions;}; - inline const std::vector &LocalDimensions(void) { return _ldimensions;}; - inline const std::vector &VirtualLocalDimensions(void) { return _ldimensions;}; - - //////////////////////////////////////////////////////////////// - // Utility to print the full decomposition details - //////////////////////////////////////////////////////////////// - - void show_decomposition(){ - std::cout << GridLogMessage << "\tFull Dimensions : " << _fdimensions << std::endl; - std::cout << GridLogMessage << "\tSIMD layout : " << _simd_layout << std::endl; - std::cout << GridLogMessage << "\tGlobal Dimensions : " << _gdimensions << std::endl; - std::cout << GridLogMessage << "\tLocal Dimensions : " << _ldimensions << std::endl; - std::cout << GridLogMessage << "\tReduced Dimensions : " << _rdimensions << std::endl; - std::cout << GridLogMessage << "\tOuter strides : " << _ostride << std::endl; - std::cout << GridLogMessage << "\tInner strides : " << _istride << std::endl; - std::cout << GridLogMessage << "\tiSites : " << _isites << std::endl; - std::cout << GridLogMessage << "\toSites : " << _osites << std::endl; - std::cout << GridLogMessage << "\tlSites : " << lSites() << std::endl; - std::cout << GridLogMessage << "\tgSites : " << gSites() << std::endl; - std::cout << GridLogMessage << "\tNd : " << _ndimension << std::endl; - } - - //////////////////////////////////////////////////////////////// - // Global addressing - //////////////////////////////////////////////////////////////// - void GlobalIndexToGlobalCoor(int gidx,std::vector &gcoor){ - assert(gidx< gSites()); - Lexicographic::CoorFromIndex(gcoor,gidx,_gdimensions); + for(int d=_ndimension-1;d>dimension;d--){ + if (_simd_layout[d]>1 ) permute_type++; } - void LocalIndexToLocalCoor(int lidx,std::vector &lcoor){ - assert(lidx & gcoor,int & gidx){ - gidx=0; - int mult=1; - for(int mu=0;mu<_ndimension;mu++) { - gidx+=mult*gcoor[mu]; - mult*=_gdimensions[mu]; - } + } + void GlobalCoorToProcessorCoorLocalCoor(Coordinate &pcoor,Coordinate &lcoor,const Coordinate &gcoor) + { + pcoor.resize(_ndimension); + lcoor.resize(_ndimension); + for(int mu=0;mu<_ndimension;mu++){ + int _fld = _fdimensions[mu]/_processors[mu]; + pcoor[mu] = gcoor[mu]/_fld; + lcoor[mu] = gcoor[mu]%_fld; } - void GlobalCoorToProcessorCoorLocalCoor(std::vector &pcoor,std::vector &lcoor,const std::vector &gcoor) - { - pcoor.resize(_ndimension); - lcoor.resize(_ndimension); - for(int mu=0;mu<_ndimension;mu++){ - int _fld = _fdimensions[mu]/_processors[mu]; - pcoor[mu] = gcoor[mu]/_fld; - lcoor[mu] = gcoor[mu]%_fld; - } - } - void GlobalCoorToRankIndex(int &rank, int &o_idx, int &i_idx ,const std::vector &gcoor) - { - std::vector pcoor; - std::vector lcoor; - GlobalCoorToProcessorCoorLocalCoor(pcoor,lcoor,gcoor); - rank = RankFromProcessorCoor(pcoor); - /* - std::vector cblcoor(lcoor); + } + void GlobalCoorToRankIndex(int &rank, int &o_idx, int &i_idx ,const Coordinate &gcoor) + { + Coordinate pcoor; + Coordinate lcoor; + GlobalCoorToProcessorCoorLocalCoor(pcoor,lcoor,gcoor); + rank = RankFromProcessorCoor(pcoor); + /* + Coordinate cblcoor(lcoor); for(int d=0;dCheckerBoarded(d) ) { - cblcoor[d] = lcoor[d]/2; - } + if( this->CheckerBoarded(d) ) { + cblcoor[d] = lcoor[d]/2; } - */ - i_idx= iIndex(lcoor); - o_idx= oIndex(lcoor); - } + } + */ + i_idx= iIndex(lcoor); + o_idx= oIndex(lcoor); + } - void RankIndexToGlobalCoor(int rank, int o_idx, int i_idx , std::vector &gcoor) - { - gcoor.resize(_ndimension); - std::vector coor(_ndimension); + void RankIndexToGlobalCoor(int rank, int o_idx, int i_idx , Coordinate &gcoor) + { + gcoor.resize(_ndimension); + Coordinate coor(_ndimension); - ProcessorCoorFromRank(rank,coor); - for(int mu=0;mu<_ndimension;mu++) gcoor[mu] = _ldimensions[mu]*coor[mu]; + ProcessorCoorFromRank(rank,coor); + for(int mu=0;mu<_ndimension;mu++) gcoor[mu] = _ldimensions[mu]*coor[mu]; - iCoorFromIindex(coor,i_idx); - for(int mu=0;mu<_ndimension;mu++) gcoor[mu] += _rdimensions[mu]*coor[mu]; + iCoorFromIindex(coor,i_idx); + for(int mu=0;mu<_ndimension;mu++) gcoor[mu] += _rdimensions[mu]*coor[mu]; - oCoorFromOindex (coor,o_idx); - for(int mu=0;mu<_ndimension;mu++) gcoor[mu] += coor[mu]; + oCoorFromOindex (coor,o_idx); + for(int mu=0;mu<_ndimension;mu++) gcoor[mu] += coor[mu]; + } + void RankIndexCbToFullGlobalCoor(int rank, int o_idx, int i_idx, int cb,Coordinate &fcoor) + { + RankIndexToGlobalCoor(rank,o_idx,i_idx ,fcoor); + if(CheckerBoarded(0)){ + fcoor[0] = fcoor[0]*2+cb; } - void RankIndexCbToFullGlobalCoor(int rank, int o_idx, int i_idx, int cb,std::vector &fcoor) - { - RankIndexToGlobalCoor(rank,o_idx,i_idx ,fcoor); - if(CheckerBoarded(0)){ - fcoor[0] = fcoor[0]*2+cb; - } - } - void ProcessorCoorLocalCoorToGlobalCoor(std::vector &Pcoor,std::vector &Lcoor,std::vector &gcoor) - { - gcoor.resize(_ndimension); - for(int mu=0;mu<_ndimension;mu++) gcoor[mu] = Pcoor[mu]*_ldimensions[mu]+Lcoor[mu]; - } + } + void ProcessorCoorLocalCoorToGlobalCoor(Coordinate &Pcoor,Coordinate &Lcoor,Coordinate &gcoor) + { + gcoor.resize(_ndimension); + for(int mu=0;mu<_ndimension;mu++) gcoor[mu] = Pcoor[mu]*_ldimensions[mu]+Lcoor[mu]; + } }; - -} +NAMESPACE_END(Grid); #endif diff --git a/Grid/cartesian/Cartesian_full.h b/Grid/cartesian/Cartesian_full.h index b6297d3d..c083817b 100644 --- a/Grid/cartesian/Cartesian_full.h +++ b/Grid/cartesian/Cartesian_full.h @@ -1,4 +1,4 @@ - /************************************************************************************* +/************************************************************************************* Grid physics library, www.github.com/paboyle/Grid @@ -23,97 +23,96 @@ Author: Peter Boyle 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ +*************************************************************************************/ +/* END LEGAL */ #ifndef GRID_CARTESIAN_FULL_H #define GRID_CARTESIAN_FULL_H -namespace Grid{ +NAMESPACE_BEGIN(Grid); ///////////////////////////////////////////////////////////////////////////////////////// // Grid Support. ///////////////////////////////////////////////////////////////////////////////////////// - class GridCartesian: public GridBase { public: - int dummy; - virtual int CheckerBoardFromOindexTable (int Oindex) { - return 0; - } - virtual int CheckerBoardFromOindex (int Oindex) - { - return 0; - } - virtual int CheckerBoarded(int dim){ - return 0; - } - virtual int CheckerBoard(const std::vector &site){ - return 0; - } - virtual int CheckerBoardDestination(int cb,int shift,int dim){ - return 0; - } - virtual int CheckerBoardShiftForCB(int source_cb,int dim,int shift, int ocb){ - return shift; - } - virtual int CheckerBoardShift(int source_cb,int dim,int shift, int osite){ - return shift; - } - ///////////////////////////////////////////////////////////////////////// - // Constructor takes a parent grid and possibly subdivides communicator. - ///////////////////////////////////////////////////////////////////////// - GridCartesian(const std::vector &dimensions, - const std::vector &simd_layout, - const std::vector &processor_grid, - const GridCartesian &parent) : GridBase(processor_grid,parent,dummy) - { - Init(dimensions,simd_layout,processor_grid); - } - GridCartesian(const std::vector &dimensions, - const std::vector &simd_layout, - const std::vector &processor_grid, - const GridCartesian &parent,int &split_rank) : GridBase(processor_grid,parent,split_rank) - { - Init(dimensions,simd_layout,processor_grid); - } - ///////////////////////////////////////////////////////////////////////// - // Construct from comm world - ///////////////////////////////////////////////////////////////////////// - GridCartesian(const std::vector &dimensions, - const std::vector &simd_layout, - const std::vector &processor_grid) : GridBase(processor_grid) - { - Init(dimensions,simd_layout,processor_grid); - } + int dummy; + virtual int CheckerBoardFromOindexTable (int Oindex) { + return 0; + } + virtual int CheckerBoardFromOindex (int Oindex) + { + return 0; + } + virtual int CheckerBoarded(int dim){ + return 0; + } + virtual int CheckerBoard(const Coordinate &site){ + return 0; + } + virtual int CheckerBoardDestination(int cb,int shift,int dim){ + return 0; + } + virtual int CheckerBoardShiftForCB(int source_cb,int dim,int shift, int ocb){ + return shift; + } + virtual int CheckerBoardShift(int source_cb,int dim,int shift, int osite){ + return shift; + } + ///////////////////////////////////////////////////////////////////////// + // Constructor takes a parent grid and possibly subdivides communicator. + ///////////////////////////////////////////////////////////////////////// + GridCartesian(const Coordinate &dimensions, + const Coordinate &simd_layout, + const Coordinate &processor_grid, + const GridCartesian &parent) : GridBase(processor_grid,parent,dummy) + { + Init(dimensions,simd_layout,processor_grid); + } + GridCartesian(const Coordinate &dimensions, + const Coordinate &simd_layout, + const Coordinate &processor_grid, + const GridCartesian &parent,int &split_rank) : GridBase(processor_grid,parent,split_rank) + { + Init(dimensions,simd_layout,processor_grid); + } + ///////////////////////////////////////////////////////////////////////// + // Construct from comm world + ///////////////////////////////////////////////////////////////////////// + GridCartesian(const Coordinate &dimensions, + const Coordinate &simd_layout, + const Coordinate &processor_grid) : GridBase(processor_grid) + { + Init(dimensions,simd_layout,processor_grid); + } - virtual ~GridCartesian() = default; + virtual ~GridCartesian() = default; - void Init(const std::vector &dimensions, - const std::vector &simd_layout, - const std::vector &processor_grid) - { - /////////////////////// - // Grid information - /////////////////////// + void Init(const Coordinate &dimensions, + const Coordinate &simd_layout, + const Coordinate &processor_grid) + { + /////////////////////// + // Grid information + /////////////////////// _isCheckerBoarded = false; - _ndimension = dimensions.size(); + _ndimension = dimensions.size(); - _fdimensions.resize(_ndimension); - _gdimensions.resize(_ndimension); - _ldimensions.resize(_ndimension); - _rdimensions.resize(_ndimension); - _simd_layout.resize(_ndimension); - _lstart.resize(_ndimension); - _lend.resize(_ndimension); + _fdimensions.resize(_ndimension); + _gdimensions.resize(_ndimension); + _ldimensions.resize(_ndimension); + _rdimensions.resize(_ndimension); + _simd_layout.resize(_ndimension); + _lstart.resize(_ndimension); + _lend.resize(_ndimension); - _ostride.resize(_ndimension); - _istride.resize(_ndimension); + _ostride.resize(_ndimension); + _istride.resize(_ndimension); - _fsites = _gsites = _osites = _isites = 1; + _fsites = _gsites = _osites = _isites = 1; - for (int d = 0; d < _ndimension; d++) + for (int d = 0; d < _ndimension; d++) { _fdimensions[d] = dimensions[d]; // Global dimensions _gdimensions[d] = _fdimensions[d]; // Global dimensions @@ -136,30 +135,30 @@ public: // Addressing support if (d == 0) - { - _ostride[d] = 1; - _istride[d] = 1; - } + { + _ostride[d] = 1; + _istride[d] = 1; + } else - { - _ostride[d] = _ostride[d - 1] * _rdimensions[d - 1]; - _istride[d] = _istride[d - 1] * _simd_layout[d - 1]; - } + { + _ostride[d] = _ostride[d - 1] * _rdimensions[d - 1]; + _istride[d] = _istride[d - 1] * _simd_layout[d - 1]; + } } - /////////////////////// - // subplane information - /////////////////////// - _slice_block.resize(_ndimension); - _slice_stride.resize(_ndimension); - _slice_nblock.resize(_ndimension); + /////////////////////// + // subplane information + /////////////////////// + _slice_block.resize(_ndimension); + _slice_stride.resize(_ndimension); + _slice_nblock.resize(_ndimension); - int block = 1; - int nblock = 1; - for (int d = 0; d < _ndimension; d++) - nblock *= _rdimensions[d]; + int block = 1; + int nblock = 1; + for (int d = 0; d < _ndimension; d++) + nblock *= _rdimensions[d]; - for (int d = 0; d < _ndimension; d++) + for (int d = 0; d < _ndimension; d++) { nblock /= _rdimensions[d]; _slice_block[d] = block; @@ -167,8 +166,9 @@ public: _slice_nblock[d] = nblock; block = block * _rdimensions[d]; } - }; + }; }; -} + +NAMESPACE_END(Grid); #endif diff --git a/Grid/cartesian/Cartesian_red_black.h b/Grid/cartesian/Cartesian_red_black.h index 5a041f65..34f763d2 100644 --- a/Grid/cartesian/Cartesian_red_black.h +++ b/Grid/cartesian/Cartesian_red_black.h @@ -1,4 +1,4 @@ - /************************************************************************************* +/************************************************************************************* Grid physics library, www.github.com/paboyle/Grid @@ -24,178 +24,147 @@ Author: Peter Boyle 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ +*************************************************************************************/ +/* END LEGAL */ #ifndef GRID_CARTESIAN_RED_BLACK_H #define GRID_CARTESIAN_RED_BLACK_H +NAMESPACE_BEGIN(Grid); -namespace Grid { - - static const int CbRed =0; - static const int CbBlack=1; - static const int Even =CbRed; - static const int Odd =CbBlack; +static const int CbRed =0; +static const int CbBlack=1; +static const int Even =CbRed; +static const int Odd =CbBlack; // Specialise this for red black grids storing half the data like a chess board. class GridRedBlackCartesian : public GridBase { public: - std::vector _checker_dim_mask; - int _checker_dim; - std::vector _checker_board; + Coordinate _checker_dim_mask; + int _checker_dim; + std::vector _checker_board; - virtual int CheckerBoarded(int dim){ - if( dim==_checker_dim) return 1; - else return 0; - } - virtual int CheckerBoard(const std::vector &site){ - int linear=0; - assert(site.size()==_ndimension); - for(int d=0;d<_ndimension;d++){ - if(_checker_dim_mask[d]) - linear=linear+site[d]; - } - return (linear&0x1); + virtual int CheckerBoarded(int dim){ + if( dim==_checker_dim) return 1; + else return 0; + } + virtual int CheckerBoard(const Coordinate &site){ + int linear=0; + assert(site.size()==_ndimension); + for(int d=0;d<_ndimension;d++){ + if(_checker_dim_mask[d]) + linear=linear+site[d]; } + return (linear&0x1); + } + // Depending on the cb of site, we toggle source cb. + // for block #b, element #e = (b, e) + // we need + virtual int CheckerBoardShiftForCB(int source_cb,int dim,int shift,int ocb){ + if(dim != _checker_dim) return shift; - // Depending on the cb of site, we toggle source cb. - // for block #b, element #e = (b, e) - // we need - virtual int CheckerBoardShiftForCB(int source_cb,int dim,int shift,int ocb){ - if(dim != _checker_dim) return shift; + int fulldim =_fdimensions[dim]; + shift = (shift+fulldim)%fulldim; - int fulldim =_fdimensions[dim]; - shift = (shift+fulldim)%fulldim; - - // Probably faster with table lookup; - // or by looping over x,y,z and multiply rather than computing checkerboard. + // Probably faster with table lookup; + // or by looping over x,y,z and multiply rather than computing checkerboard. - if ( (source_cb+ocb)&1 ) { - return (shift)/2; - } else { - return (shift+1)/2; - } + if ( (source_cb+ocb)&1 ) { + return (shift)/2; + } else { + return (shift+1)/2; } - virtual int CheckerBoardFromOindexTable (int Oindex) { - return _checker_board[Oindex]; - } - virtual int CheckerBoardFromOindex (int Oindex) - { - std::vector ocoor; - oCoorFromOindex(ocoor,Oindex); - return CheckerBoard(ocoor); - } - virtual int CheckerBoardShift(int source_cb,int dim,int shift,int osite){ + } + virtual int CheckerBoardFromOindexTable (int Oindex) { + return _checker_board[Oindex]; + } + virtual int CheckerBoardFromOindex (int Oindex) + { + Coordinate ocoor; + oCoorFromOindex(ocoor,Oindex); + return CheckerBoard(ocoor); + } + virtual int CheckerBoardShift(int source_cb,int dim,int shift,int osite){ - if(dim != _checker_dim) return shift; + if(dim != _checker_dim) return shift; - int ocb=CheckerBoardFromOindex(osite); + int ocb=CheckerBoardFromOindex(osite); - return CheckerBoardShiftForCB(source_cb,dim,shift,ocb); - } + return CheckerBoardShiftForCB(source_cb,dim,shift,ocb); + } - virtual int CheckerBoardDestination(int source_cb,int shift,int dim){ - if ( _checker_dim_mask[dim] ) { - // If _fdimensions[checker_dim] is odd, then shifting by 1 in other dims - // does NOT cause a parity hop. - int add=(dim==_checker_dim) ? 0 : _fdimensions[_checker_dim]; - if ( (shift+add) &0x1) { - return 1-source_cb; - } else { - return source_cb; - } + virtual int CheckerBoardDestination(int source_cb,int shift,int dim){ + if ( _checker_dim_mask[dim] ) { + // If _fdimensions[checker_dim] is odd, then shifting by 1 in other dims + // does NOT cause a parity hop. + int add=(dim==_checker_dim) ? 0 : _fdimensions[_checker_dim]; + if ( (shift+add) &0x1) { + return 1-source_cb; } else { return source_cb; - } - }; + } else { + return source_cb; - //////////////////////////////////////////////////////////// - // Create Redblack from original grid; require full grid pointer ? - //////////////////////////////////////////////////////////// - GridRedBlackCartesian(const GridBase *base) : GridBase(base->_processors,*base) - { - int dims = base->_ndimension; - std::vector checker_dim_mask(dims,1); - int checker_dim = 0; - Init(base->_fdimensions,base->_simd_layout,base->_processors,checker_dim_mask,checker_dim); - }; - - //////////////////////////////////////////////////////////// - // Create redblack from original grid, with non-trivial checker dim mask - //////////////////////////////////////////////////////////// - GridRedBlackCartesian(const GridBase *base, - const std::vector &checker_dim_mask, - int checker_dim - ) : GridBase(base->_processors,*base) - { - Init(base->_fdimensions,base->_simd_layout,base->_processors,checker_dim_mask,checker_dim) ; } + }; - virtual ~GridRedBlackCartesian() = default; -#if 0 - //////////////////////////////////////////////////////////// - // Create redblack grid ;; deprecate these. Should not - // need direct creation of redblack without a full grid to base on - //////////////////////////////////////////////////////////// - GridRedBlackCartesian(const GridBase *base, - const std::vector &dimensions, - const std::vector &simd_layout, - const std::vector &processor_grid, - const std::vector &checker_dim_mask, - int checker_dim - ) : GridBase(processor_grid,*base) - { - Init(dimensions,simd_layout,processor_grid,checker_dim_mask,checker_dim); - } + //////////////////////////////////////////////////////////// + // Create Redblack from original grid; require full grid pointer ? + //////////////////////////////////////////////////////////// + GridRedBlackCartesian(const GridBase *base) : GridBase(base->_processors,*base) + { + int dims = base->_ndimension; + Coordinate checker_dim_mask(dims,1); + int checker_dim = 0; + Init(base->_fdimensions,base->_simd_layout,base->_processors,checker_dim_mask,checker_dim); + }; - //////////////////////////////////////////////////////////// - // Create redblack grid - //////////////////////////////////////////////////////////// - GridRedBlackCartesian(const GridBase *base, - const std::vector &dimensions, - const std::vector &simd_layout, - const std::vector &processor_grid) : GridBase(processor_grid,*base) - { - std::vector checker_dim_mask(dimensions.size(),1); - int checker_dim = 0; - Init(dimensions,simd_layout,processor_grid,checker_dim_mask,checker_dim); - } -#endif + //////////////////////////////////////////////////////////// + // Create redblack from original grid, with non-trivial checker dim mask + //////////////////////////////////////////////////////////// + GridRedBlackCartesian(const GridBase *base, + const Coordinate &checker_dim_mask, + int checker_dim + ) : GridBase(base->_processors,*base) + { + Init(base->_fdimensions,base->_simd_layout,base->_processors,checker_dim_mask,checker_dim) ; + } - void Init(const std::vector &dimensions, - const std::vector &simd_layout, - const std::vector &processor_grid, - const std::vector &checker_dim_mask, - int checker_dim) - { + virtual ~GridRedBlackCartesian() = default; + + void Init(const Coordinate &dimensions, + const Coordinate &simd_layout, + const Coordinate &processor_grid, + const Coordinate &checker_dim_mask, + int checker_dim) + { _isCheckerBoarded = true; - _checker_dim = checker_dim; - assert(checker_dim_mask[checker_dim] == 1); - _ndimension = dimensions.size(); - assert(checker_dim_mask.size() == _ndimension); - assert(processor_grid.size() == _ndimension); - assert(simd_layout.size() == _ndimension); + _checker_dim = checker_dim; + assert(checker_dim_mask[checker_dim] == 1); + _ndimension = dimensions.size(); + assert(checker_dim_mask.size() == _ndimension); + assert(processor_grid.size() == _ndimension); + assert(simd_layout.size() == _ndimension); - _fdimensions.resize(_ndimension); - _gdimensions.resize(_ndimension); - _ldimensions.resize(_ndimension); - _rdimensions.resize(_ndimension); - _simd_layout.resize(_ndimension); - _lstart.resize(_ndimension); - _lend.resize(_ndimension); + _fdimensions.resize(_ndimension); + _gdimensions.resize(_ndimension); + _ldimensions.resize(_ndimension); + _rdimensions.resize(_ndimension); + _simd_layout.resize(_ndimension); + _lstart.resize(_ndimension); + _lend.resize(_ndimension); - _ostride.resize(_ndimension); - _istride.resize(_ndimension); + _ostride.resize(_ndimension); + _istride.resize(_ndimension); - _fsites = _gsites = _osites = _isites = 1; + _fsites = _gsites = _osites = _isites = 1; - _checker_dim_mask = checker_dim_mask; + _checker_dim_mask = checker_dim_mask; - for (int d = 0; d < _ndimension; d++) + for (int d = 0; d < _ndimension; d++) { _fdimensions[d] = dimensions[d]; _gdimensions[d] = _fdimensions[d]; @@ -203,11 +172,11 @@ public: _gsites = _gsites * _gdimensions[d]; if (d == _checker_dim) - { - assert((_gdimensions[d] & 0x1) == 0); - _gdimensions[d] = _gdimensions[d] / 2; // Remove a checkerboard - _gsites /= 2; - } + { + assert((_gdimensions[d] & 0x1) == 0); + _gdimensions[d] = _gdimensions[d] / 2; // Remove a checkerboard + _gsites /= 2; + } _ldimensions[d] = _gdimensions[d] / _processors[d]; assert(_ldimensions[d] * _processors[d] == _gdimensions[d]); _lstart[d] = _processor_coor[d] * _ldimensions[d]; @@ -222,42 +191,42 @@ public: // all elements of a simd vector must have same checkerboard. // If Ls vectorised, this must still be the case; e.g. dwf rb5d if (_simd_layout[d] > 1) - { - if (checker_dim_mask[d]) - { - assert((_rdimensions[d] & 0x1) == 0); - } - } + { + if (checker_dim_mask[d]) + { + assert((_rdimensions[d] & 0x1) == 0); + } + } _osites *= _rdimensions[d]; _isites *= _simd_layout[d]; // Addressing support if (d == 0) - { - _ostride[d] = 1; - _istride[d] = 1; - } + { + _ostride[d] = 1; + _istride[d] = 1; + } else - { - _ostride[d] = _ostride[d - 1] * _rdimensions[d - 1]; - _istride[d] = _istride[d - 1] * _simd_layout[d - 1]; - } + { + _ostride[d] = _ostride[d - 1] * _rdimensions[d - 1]; + _istride[d] = _istride[d - 1] * _simd_layout[d - 1]; + } } - //////////////////////////////////////////////////////////////////////////////////////////// - // subplane information - //////////////////////////////////////////////////////////////////////////////////////////// - _slice_block.resize(_ndimension); - _slice_stride.resize(_ndimension); - _slice_nblock.resize(_ndimension); + //////////////////////////////////////////////////////////////////////////////////////////// + // subplane information + //////////////////////////////////////////////////////////////////////////////////////////// + _slice_block.resize(_ndimension); + _slice_stride.resize(_ndimension); + _slice_nblock.resize(_ndimension); - int block = 1; - int nblock = 1; - for (int d = 0; d < _ndimension; d++) - nblock *= _rdimensions[d]; + int block = 1; + int nblock = 1; + for (int d = 0; d < _ndimension; d++) + nblock *= _rdimensions[d]; - for (int d = 0; d < _ndimension; d++) + for (int d = 0; d < _ndimension; d++) { nblock /= _rdimensions[d]; _slice_block[d] = block; @@ -266,55 +235,55 @@ public: block = block * _rdimensions[d]; } - //////////////////////////////////////////////// - // Create a checkerboard lookup table - //////////////////////////////////////////////// - int rvol = 1; - for (int d = 0; d < _ndimension; d++) + //////////////////////////////////////////////// + // Create a checkerboard lookup table + //////////////////////////////////////////////// + int rvol = 1; + for (int d = 0; d < _ndimension; d++) { rvol = rvol * _rdimensions[d]; } - _checker_board.resize(rvol); - for (int osite = 0; osite < _osites; osite++) + _checker_board.resize(rvol); + for (int osite = 0; osite < _osites; osite++) { _checker_board[osite] = CheckerBoardFromOindex(osite); } - }; + }; - protected: - virtual int oIndex(std::vector &coor) - { - int idx = 0; - for (int d = 0; d < _ndimension; d++) +protected: + virtual int oIndex(Coordinate &coor) + { + int idx = 0; + for (int d = 0; d < _ndimension; d++) { if (d == _checker_dim) - { - idx += _ostride[d] * ((coor[d] / 2) % _rdimensions[d]); - } + { + idx += _ostride[d] * ((coor[d] / 2) % _rdimensions[d]); + } else - { - idx += _ostride[d] * (coor[d] % _rdimensions[d]); - } + { + idx += _ostride[d] * (coor[d] % _rdimensions[d]); + } } - return idx; - }; + return idx; + }; - virtual int iIndex(std::vector &lcoor) - { - int idx = 0; - for (int d = 0; d < _ndimension; d++) + virtual int iIndex(Coordinate &lcoor) + { + int idx = 0; + for (int d = 0; d < _ndimension; d++) { if (d == _checker_dim) - { - idx += _istride[d] * (lcoor[d] / (2 * _rdimensions[d])); - } + { + idx += _istride[d] * (lcoor[d] / (2 * _rdimensions[d])); + } else - { - idx += _istride[d] * (lcoor[d] / _rdimensions[d]); - } + { + idx += _istride[d] * (lcoor[d] / _rdimensions[d]); + } } - return idx; - } + return idx; + } }; -} +NAMESPACE_END(Grid); #endif diff --git a/Grid/communicator/Communicator.h b/Grid/communicator/Communicator.h index d4ec5a13..76d6f37a 100644 --- a/Grid/communicator/Communicator.h +++ b/Grid/communicator/Communicator.h @@ -1,4 +1,4 @@ - /************************************************************************************* +/************************************************************************************* Grid physics library, www.github.com/paboyle/Grid @@ -23,11 +23,12 @@ Author: Peter Boyle 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ +*************************************************************************************/ +/* END LEGAL */ #ifndef GRID_COMMUNICATOR_H #define GRID_COMMUNICATOR_H +#include #include #include diff --git a/Grid/communicator/Communicator_base.cc b/Grid/communicator/Communicator_base.cc index edbf26af..dfa4846b 100644 --- a/Grid/communicator/Communicator_base.cc +++ b/Grid/communicator/Communicator_base.cc @@ -1,4 +1,4 @@ - /************************************************************************************* +/************************************************************************************* Grid physics library, www.github.com/paboyle/Grid @@ -23,15 +23,15 @@ Author: Peter Boyle 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ +*************************************************************************************/ +/* END LEGAL */ #include #include #include #include #include -namespace Grid { +NAMESPACE_BEGIN(Grid); /////////////////////////////////////////////////////////////// // Info that is setup once and indept of cartesian layout @@ -47,8 +47,8 @@ int CartesianCommunicator::Dimensions(void) { return int CartesianCommunicator::IsBoss(void) { return _processor==0; }; int CartesianCommunicator::BossRank(void) { return 0; }; int CartesianCommunicator::ThisRank(void) { return _processor; }; -const std::vector & CartesianCommunicator::ThisProcessorCoor(void) { return _processor_coor; }; -const std::vector & CartesianCommunicator::ProcessorGrid(void) { return _processors; }; +const Coordinate & CartesianCommunicator::ThisProcessorCoor(void) { return _processor_coor; }; +const Coordinate & CartesianCommunicator::ProcessorGrid(void) { return _processors; }; int CartesianCommunicator::ProcessorCount(void) { return _Nprocessors; }; //////////////////////////////////////////////////////////////////////////////// @@ -72,5 +72,6 @@ void CartesianCommunicator::GlobalSumVector(ComplexD *c,int N) GlobalSumVector((double *)c,2*N); } -} +NAMESPACE_END(Grid); + diff --git a/Grid/communicator/Communicator_base.h b/Grid/communicator/Communicator_base.h index 7d6911d3..11dbfcbb 100644 --- a/Grid/communicator/Communicator_base.h +++ b/Grid/communicator/Communicator_base.h @@ -1,5 +1,5 @@ - /************************************************************************************* +/************************************************************************************* Grid physics library, www.github.com/paboyle/Grid @@ -24,8 +24,8 @@ Author: Peter Boyle 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ +*************************************************************************************/ +/* END LEGAL */ #ifndef GRID_COMMUNICATOR_BASE_H #define GRID_COMMUNICATOR_BASE_H @@ -34,7 +34,7 @@ Author: Peter Boyle /////////////////////////////////// #include -namespace Grid { +NAMESPACE_BEGIN(Grid); class CartesianCommunicator : public SharedMemory { @@ -52,9 +52,9 @@ public: // Communicator should know nothing of the physics grid, only processor grid. //////////////////////////////////////////// int _Nprocessors; // How many in all - std::vector _processors; // Which dimensions get relayed out over processors lanes. + Coordinate _processors; // Which dimensions get relayed out over processors lanes. int _processor; // linear processor rank - std::vector _processor_coor; // linear processor coordinate + Coordinate _processor_coor; // linear processor coordinate unsigned long _ndimension; static Grid_MPI_Comm communicator_world; Grid_MPI_Comm communicator; @@ -69,34 +69,34 @@ public: // Constructors to sub-divide a parent communicator // and default to comm world //////////////////////////////////////////////// - CartesianCommunicator(const std::vector &processors,const CartesianCommunicator &parent,int &srank); - CartesianCommunicator(const std::vector &pdimensions_in); + CartesianCommunicator(const Coordinate &processors,const CartesianCommunicator &parent,int &srank); + CartesianCommunicator(const Coordinate &pdimensions_in); virtual ~CartesianCommunicator(); - private: +private: //////////////////////////////////////////////// // Private initialise from an MPI communicator // Can use after an MPI_Comm_split, but hidden from user so private //////////////////////////////////////////////// - void InitFromMPICommunicator(const std::vector &processors, Grid_MPI_Comm communicator_base); - - public: + void InitFromMPICommunicator(const Coordinate &processors, Grid_MPI_Comm communicator_base); +public: + //////////////////////////////////////////////////////////////////////////////////////// // Wraps MPI_Cart routines, or implements equivalent on other impls //////////////////////////////////////////////////////////////////////////////////////// void ShiftedRanks(int dim,int shift,int & source, int & dest); - int RankFromProcessorCoor(std::vector &coor); - void ProcessorCoorFromRank(int rank,std::vector &coor); + int RankFromProcessorCoor(Coordinate &coor); + void ProcessorCoorFromRank(int rank,Coordinate &coor); int Dimensions(void) ; int IsBoss(void) ; int BossRank(void) ; int ThisRank(void) ; - const std::vector & ThisProcessorCoor(void) ; - const std::vector & ProcessorGrid(void) ; + const Coordinate & ThisProcessorCoor(void) ; + const Coordinate & ProcessorGrid(void) ; int ProcessorCount(void) ; //////////////////////////////////////////////////////////////////////////////// @@ -197,11 +197,12 @@ public: void AllToAll(void *in,void *out,uint64_t words ,uint64_t bytes); template void Broadcast(int root,obj &data) - { - Broadcast(root,(void *)&data,sizeof(data)); - }; + { + Broadcast(root,(void *)&data,sizeof(data)); + } }; -} + +NAMESPACE_END(Grid); #endif diff --git a/Grid/communicator/Communicator_mpi3.cc b/Grid/communicator/Communicator_mpi3.cc index 5f46cf18..2576b1fa 100644 --- a/Grid/communicator/Communicator_mpi3.cc +++ b/Grid/communicator/Communicator_mpi3.cc @@ -23,12 +23,12 @@ Author: Peter Boyle 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ +*************************************************************************************/ +/* END LEGAL */ #include #include -namespace Grid { +NAMESPACE_BEGIN(Grid); Grid_MPI_Comm CartesianCommunicator::communicator_world; @@ -44,10 +44,15 @@ void CartesianCommunicator::Init(int *argc, char ***argv) MPI_Initialized(&flag); // needed to coexist with other libs apparently if ( !flag ) { MPI_Init_thread(argc,argv,MPI_THREAD_MULTIPLE,&provided); + //If only 1 comms thread we require any threading mode other than SINGLE, but for multiple comms threads we need MULTIPLE - if( (nCommThreads == 1 && provided == MPI_THREAD_SINGLE) || - (nCommThreads > 1 && provided != MPI_THREAD_MULTIPLE) ) + if( (nCommThreads == 1) && (provided == MPI_THREAD_SINGLE) ) { assert(0); + } + + if( (nCommThreads > 1) && (provided != MPI_THREAD_MULTIPLE) ) { + assert(0); + } } // Never clean up as done once. @@ -69,14 +74,14 @@ void CartesianCommunicator::ShiftedRanks(int dim,int shift,int &source,int &dest int ierr=MPI_Cart_shift(communicator,dim,shift,&source,&dest); assert(ierr==0); } -int CartesianCommunicator::RankFromProcessorCoor(std::vector &coor) +int CartesianCommunicator::RankFromProcessorCoor(Coordinate &coor) { int rank; int ierr=MPI_Cart_rank (communicator, &coor[0], &rank); assert(ierr==0); return rank; } -void CartesianCommunicator::ProcessorCoorFromRank(int rank, std::vector &coor) +void CartesianCommunicator::ProcessorCoorFromRank(int rank, Coordinate &coor) { coor.resize(_ndimension); int ierr=MPI_Cart_coords (communicator, rank, _ndimension,&coor[0]); @@ -86,7 +91,7 @@ void CartesianCommunicator::ProcessorCoorFromRank(int rank, std::vector &c //////////////////////////////////////////////////////////////////////////////////////////////////////// // Initialises from communicator_world //////////////////////////////////////////////////////////////////////////////////////////////////////// -CartesianCommunicator::CartesianCommunicator(const std::vector &processors) +CartesianCommunicator::CartesianCommunicator(const Coordinate &processors) { MPI_Comm optimal_comm; //////////////////////////////////////////////////// @@ -105,12 +110,12 @@ CartesianCommunicator::CartesianCommunicator(const std::vector &processors) ////////////////////////////////// // Try to subdivide communicator ////////////////////////////////// -CartesianCommunicator::CartesianCommunicator(const std::vector &processors,const CartesianCommunicator &parent,int &srank) +CartesianCommunicator::CartesianCommunicator(const Coordinate &processors,const CartesianCommunicator &parent,int &srank) { _ndimension = processors.size(); assert(_ndimension>=1); int parent_ndimension = parent._ndimension; assert(_ndimension >= parent._ndimension); - std::vector parent_processor_coor(_ndimension,0); - std::vector parent_processors (_ndimension,1); + Coordinate parent_processor_coor(_ndimension,0); + Coordinate parent_processors (_ndimension,1); // Can make 5d grid from 4d etc... int pad = _ndimension-parent_ndimension; @@ -133,9 +138,9 @@ CartesianCommunicator::CartesianCommunicator(const std::vector &processors, int Nchild = Nparent/childsize; assert (childsize * Nchild == Nparent); - std::vector ccoor(_ndimension); // coor within subcommunicator - std::vector scoor(_ndimension); // coor of split within parent - std::vector ssize(_ndimension); // coor of split within parent + Coordinate ccoor(_ndimension); // coor within subcommunicator + Coordinate scoor(_ndimension); // coor of split within parent + Coordinate ssize(_ndimension); // coor of split within parent for(int d=0;d<_ndimension;d++){ ccoor[d] = parent_processor_coor[d] % processors[d]; @@ -152,36 +157,6 @@ CartesianCommunicator::CartesianCommunicator(const std::vector &processors, MPI_Comm comm_split; if ( Nchild > 1 ) { - if(0){ - std::cout << GridLogMessage<<"Child communicator of "<< std::hex << parent.communicator << std::dec< &processors, // Take the right SHM buffers ////////////////////////////////////////////////////////////////////////////////////////////////////// SetCommunicator(comm_split); - + /////////////////////////////////////////////// // Free the temp communicator /////////////////////////////////////////////// @@ -220,7 +195,7 @@ CartesianCommunicator::CartesianCommunicator(const std::vector &processors, } } -void CartesianCommunicator::InitFromMPICommunicator(const std::vector &processors, MPI_Comm communicator_base) +void CartesianCommunicator::InitFromMPICommunicator(const Coordinate &processors, MPI_Comm communicator_base) { //////////////////////////////////////////////////// // Creates communicator, and the communicator_halo @@ -237,7 +212,7 @@ void CartesianCommunicator::InitFromMPICommunicator(const std::vector &proc _Nprocessors*=_processors[i]; } - std::vector periodic(_ndimension,1); + Coordinate periodic(_ndimension,1); MPI_Cart_create(communicator_base, _ndimension,&_processors[0],&periodic[0],0,&communicator); MPI_Comm_rank(communicator,&_processor); MPI_Cart_coords(communicator,_processor,_ndimension,&_processor_coor[0]); @@ -474,7 +449,7 @@ void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes) void CartesianCommunicator::AllToAll(int dim,void *in,void *out,uint64_t words,uint64_t bytes) { - std::vector row(_ndimension,1); + Coordinate row(_ndimension,1); assert(dim>=0 && dim<_ndimension); // Split the communicator @@ -503,7 +478,6 @@ void CartesianCommunicator::AllToAll(void *in,void *out,uint64_t words,uint64_t MPI_Type_free(&object); } +NAMESPACE_END(Grid); -} - diff --git a/Grid/communicator/Communicator_none.cc b/Grid/communicator/Communicator_none.cc index 2369b9b5..b8a15a0e 100644 --- a/Grid/communicator/Communicator_none.cc +++ b/Grid/communicator/Communicator_none.cc @@ -1,4 +1,4 @@ - /************************************************************************************* +/************************************************************************************* Grid physics library, www.github.com/paboyle/Grid @@ -23,11 +23,11 @@ Author: Peter Boyle 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ +*************************************************************************************/ +/* END LEGAL */ #include -namespace Grid { +NAMESPACE_BEGIN(Grid); /////////////////////////////////////////////////////////////////////////////////////////////////// // Info that is setup once and indept of cartesian layout @@ -38,18 +38,18 @@ void CartesianCommunicator::Init(int *argc, char *** arv) { GlobalSharedMemory::Init(communicator_world); GlobalSharedMemory::SharedMemoryAllocate( - GlobalSharedMemory::MAX_MPI_SHM_BYTES, - GlobalSharedMemory::Hugepages); + GlobalSharedMemory::MAX_MPI_SHM_BYTES, + GlobalSharedMemory::Hugepages); } -CartesianCommunicator::CartesianCommunicator(const std::vector &processors,const CartesianCommunicator &parent,int &srank) +CartesianCommunicator::CartesianCommunicator(const Coordinate &processors,const CartesianCommunicator &parent,int &srank) : CartesianCommunicator(processors) { srank=0; SetCommunicator(communicator_world); } -CartesianCommunicator::CartesianCommunicator(const std::vector &processors) +CartesianCommunicator::CartesianCommunicator(const Coordinate &processors) { _processors = processors; _ndimension = processors.size(); assert(_ndimension>=1); @@ -122,8 +122,8 @@ int CartesianCommunicator::RankWorld(void){return 0;} void CartesianCommunicator::Barrier(void){} void CartesianCommunicator::Broadcast(int root,void* data, int bytes) {} void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes) { } -int CartesianCommunicator::RankFromProcessorCoor(std::vector &coor) { return 0;} -void CartesianCommunicator::ProcessorCoorFromRank(int rank, std::vector &coor){ coor = _processor_coor; } +int CartesianCommunicator::RankFromProcessorCoor(Coordinate &coor) { return 0;} +void CartesianCommunicator::ProcessorCoorFromRank(int rank, Coordinate &coor){ coor = _processor_coor; } void CartesianCommunicator::ShiftedRanks(int dim,int shift,int &source,int &dest) { source =0; @@ -160,6 +160,6 @@ void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector #include -namespace Grid { +NAMESPACE_BEGIN(Grid); // static data +int GlobalSharedMemory::HPEhypercube = 1; uint64_t GlobalSharedMemory::MAX_MPI_SHM_BYTES = 1024LL*1024LL*1024LL; int GlobalSharedMemory::Hugepages = 0; int GlobalSharedMemory::_ShmSetup; @@ -76,6 +77,7 @@ void *SharedMemory::ShmBufferMalloc(size_t bytes){ std::cout<< " Current value is " << (heap_size/(1024*1024)) < See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ /* END LEGAL */ - - -// TODO -// 1) move includes into SharedMemory.cc -// -// 2) split shared memory into a) optimal communicator creation from comm world -// -// b) shared memory buffers container -// -- static globally shared; init once -// -- per instance set of buffers. -// - #pragma once #include @@ -57,26 +45,32 @@ Author: Peter Boyle #include #endif -namespace Grid { +NAMESPACE_BEGIN(Grid); #if defined (GRID_COMMS_MPI3) - typedef MPI_Comm Grid_MPI_Comm; - typedef MPI_Request CommsRequest_t; +typedef MPI_Comm Grid_MPI_Comm; +typedef MPI_Request CommsRequest_t; #else - typedef int CommsRequest_t; - typedef int Grid_MPI_Comm; +typedef int CommsRequest_t; +typedef int Grid_MPI_Comm; #endif class GlobalSharedMemory { - private: +private: static const int MAXLOG2RANKSPERNODE = 16; + // Init once lock on the buffer allocation static int _ShmSetup; static int _ShmAlloc; static uint64_t _ShmAllocBytes; - public: +public: + /////////////////////////////////////// + // HPE 8600 hypercube optimisation + /////////////////////////////////////// + static int HPEhypercube; + static int ShmSetup(void) { return _ShmSetup; } static int ShmAlloc(void) { return _ShmAlloc; } static uint64_t ShmAllocBytes(void) { return _ShmAllocBytes; } @@ -102,14 +96,16 @@ class GlobalSharedMemory { // Create an optimal reordered communicator that makes MPI_Cart_create get it right ////////////////////////////////////////////////////////////////////////////////////// static void Init(Grid_MPI_Comm comm); // Typically MPI_COMM_WORLD - static void OptimalCommunicator(const std::vector &processors,Grid_MPI_Comm & optimal_comm); // Turns MPI_COMM_WORLD into right layout for Cartesian - static void OptimalCommunicatorHypercube(const std::vector &processors,Grid_MPI_Comm & optimal_comm); // Turns MPI_COMM_WORLD into right layout for Cartesian - static void OptimalCommunicatorSharedMemory(const std::vector &processors,Grid_MPI_Comm & optimal_comm); // Turns MPI_COMM_WORLD into right layout for Cartesian + static void OptimalCommunicator (const Coordinate &processors,Grid_MPI_Comm & optimal_comm); // Turns MPI_COMM_WORLD into right layout for Cartesian + static void OptimalCommunicatorHypercube (const Coordinate &processors,Grid_MPI_Comm & optimal_comm); // Turns MPI_COMM_WORLD into right layout for Cartesian + static void OptimalCommunicatorSharedMemory(const Coordinate &processors,Grid_MPI_Comm & optimal_comm); // Turns MPI_COMM_WORLD into right layout for Cartesian /////////////////////////////////////////////////// // Provide shared memory facilities off comm world /////////////////////////////////////////////////// static void SharedMemoryAllocate(uint64_t bytes, int flags); static void SharedMemoryFree(void); + static void SharedMemoryCopy(void *dest,const void *src,size_t bytes); + static void SharedMemoryZero(void *dest,size_t bytes); }; @@ -118,14 +114,14 @@ class GlobalSharedMemory { ////////////////////////////// class SharedMemory { - private: +private: static const int MAXLOG2RANKSPERNODE = 16; size_t heap_top; size_t heap_bytes; size_t heap_size; - protected: +protected: Grid_MPI_Comm ShmComm; // for barriers int ShmRank; @@ -133,7 +129,7 @@ class SharedMemory std::vector ShmCommBufs; std::vector ShmRanks;// Mapping comm ranks to Shm ranks - public: +public: SharedMemory() {}; ~SharedMemory(); /////////////////////////////////////////////////////////////////////////////////////// @@ -150,6 +146,7 @@ class SharedMemory // Call on any instance /////////////////////////////////////////////////// void SharedMemoryTest(void); + void *ShmBufferSelf(void); void *ShmBuffer (int rank); void *ShmBufferTranslate(int rank,void * local_p); @@ -164,4 +161,5 @@ class SharedMemory }; -} +NAMESPACE_END(Grid); + diff --git a/Grid/communicator/SharedMemoryMPI.cc b/Grid/communicator/SharedMemoryMPI.cc index b2a6ab53..b2896bda 100644 --- a/Grid/communicator/SharedMemoryMPI.cc +++ b/Grid/communicator/SharedMemoryMPI.cc @@ -29,8 +29,12 @@ Author: Peter Boyle #include #include -namespace Grid { +#ifdef GRID_NVCC +#include +#endif +NAMESPACE_BEGIN(Grid); +#define header "SharedMemoryMpi: " /*Construct from an MPI communicator*/ void GlobalSharedMemory::Init(Grid_MPI_Comm comm) { @@ -46,6 +50,11 @@ void GlobalSharedMemory::Init(Grid_MPI_Comm comm) MPI_Comm_split_type(comm, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL,&WorldShmComm); MPI_Comm_rank(WorldShmComm ,&WorldShmRank); MPI_Comm_size(WorldShmComm ,&WorldShmSize); + + if ( WorldRank == 0) { + std::cout << header " World communicator of size " < &processors,Grid_MPI_Comm & optimal_comm) +void GlobalSharedMemory::OptimalCommunicator(const Coordinate &processors,Grid_MPI_Comm & optimal_comm) { ////////////////////////////////////////////////////////////////////////////// // Look and see if it looks like an HPE 8600 based on hostname conventions @@ -143,10 +152,10 @@ void GlobalSharedMemory::OptimalCommunicator(const std::vector &processors, gethostname(name,namelen); int nscan = sscanf(name,"r%di%dn%d",&R,&I,&N) ; - if(nscan==3) OptimalCommunicatorHypercube(processors,optimal_comm); - else OptimalCommunicatorSharedMemory(processors,optimal_comm); + if(nscan==3 && HPEhypercube ) OptimalCommunicatorHypercube(processors,optimal_comm); + else OptimalCommunicatorSharedMemory(processors,optimal_comm); } -void GlobalSharedMemory::OptimalCommunicatorHypercube(const std::vector &processors,Grid_MPI_Comm & optimal_comm) +void GlobalSharedMemory::OptimalCommunicatorHypercube(const Coordinate &processors,Grid_MPI_Comm & optimal_comm) { //////////////////////////////////////////////////////////////// // Assert power of two shm_size. @@ -188,9 +197,9 @@ void GlobalSharedMemory::OptimalCommunicatorHypercube(const std::vector &pr } std::string hname(name); - std::cout << "hostname "< &pr //////////////////////////////////////////////////////////////// int ndimension = processors.size(); std::vector processor_coor(ndimension); - std::vector WorldDims = processors; std::vector ShmDims (ndimension,1); std::vector NodeDims (ndimension); + std::vector WorldDims = processors.toVector(); + std::vector ShmDims (ndimension,1); std::vector NodeDims (ndimension); std::vector ShmCoor (ndimension); std::vector NodeCoor (ndimension); std::vector WorldCoor(ndimension); std::vector HyperCoor(ndimension); int dim = 0; @@ -221,7 +231,7 @@ void GlobalSharedMemory::OptimalCommunicatorHypercube(const std::vector &pr while ( (WorldDims[dim] / ShmDims[dim]) <= 1 ) dim=(dim+1)%ndimension; ShmDims[dim]*=2; dim=(dim+1)%ndimension; - } + } //////////////////////////////////////////////////////////////// // Establish torus of processes and nodes with sub-blockings @@ -240,7 +250,7 @@ void GlobalSharedMemory::OptimalCommunicatorHypercube(const std::vector &pr HyperCoor[d]=hcoor & msk; HyperCoor[d]=BinaryToGray(HyperCoor[d]); // Space filling curve magic hcoor = hcoor >> bits; - } + } //////////////////////////////////////////////////////////////// // Check processor counts match //////////////////////////////////////////////////////////////// @@ -269,7 +279,7 @@ void GlobalSharedMemory::OptimalCommunicatorHypercube(const std::vector &pr int ierr= MPI_Comm_split(WorldComm,0,rank,&optimal_comm); assert(ierr==0); } -void GlobalSharedMemory::OptimalCommunicatorSharedMemory(const std::vector &processors,Grid_MPI_Comm & optimal_comm) +void GlobalSharedMemory::OptimalCommunicatorSharedMemory(const Coordinate &processors,Grid_MPI_Comm & optimal_comm) { //////////////////////////////////////////////////////////////// // Assert power of two shm_size. @@ -282,9 +292,9 @@ void GlobalSharedMemory::OptimalCommunicatorSharedMemory(const std::vector // in a maximally symmetrical way //////////////////////////////////////////////////////////////// int ndimension = processors.size(); - std::vector processor_coor(ndimension); - std::vector WorldDims = processors; std::vector ShmDims (ndimension,1); std::vector NodeDims (ndimension); - std::vector ShmCoor (ndimension); std::vector NodeCoor (ndimension); std::vector WorldCoor(ndimension); + Coordinate processor_coor(ndimension); + Coordinate WorldDims = processors; Coordinate ShmDims(ndimension,1); Coordinate NodeDims (ndimension); + Coordinate ShmCoor(ndimension); Coordinate NodeCoor(ndimension); Coordinate WorldCoor(ndimension); int dim = 0; for(int l2=0;l2 #ifdef GRID_MPI3_SHMGET void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) { - std::cout << "SharedMemoryAllocate "<< bytes<< " shmget implementation "< See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ /* END LEGAL */ -#ifndef _GRID_CSHIFT_COMMON_H_ -#define _GRID_CSHIFT_COMMON_H_ +#pragma once -namespace Grid { +NAMESPACE_BEGIN(Grid); /////////////////////////////////////////////////////////////////// // Gather for when there is no need to SIMD split @@ -36,20 +35,21 @@ namespace Grid { template void Gather_plane_simple (const Lattice &rhs,commVector &buffer,int dimension,int plane,int cbmask, int off=0) { - int rd = rhs._grid->_rdimensions[dimension]; + int rd = rhs.Grid()->_rdimensions[dimension]; - if ( !rhs._grid->CheckerBoarded(dimension) ) { + if ( !rhs.Grid()->CheckerBoarded(dimension) ) { cbmask = 0x3; } - int so=plane*rhs._grid->_ostride[dimension]; // base offset for start of plane - int e1=rhs._grid->_slice_nblock[dimension]; - int e2=rhs._grid->_slice_block[dimension]; + int so=plane*rhs.Grid()->_ostride[dimension]; // base offset for start of plane + int e1=rhs.Grid()->_slice_nblock[dimension]; + int e2=rhs.Grid()->_slice_block[dimension]; int ent = 0; - static std::vector > table; table.resize(e1*e2); + static Vector > table; table.resize(e1*e2); + int stride=rhs.Grid()->_slice_stride[dimension]; - int stride=rhs._grid->_slice_stride[dimension]; + auto rhs_v = rhs.View(); if ( cbmask == 0x3 ) { for(int n=0;n &rhs,commVector &buffer,int dimen for(int n=0;nCheckerBoardFromOindex(o+b); + int ocb=1<CheckerBoardFromOindex(o+b); if ( ocb &cbmask ) { table[ent++]=std::pair (off+bo++,so+o+b); } } } } - parallel_for(int i=0;i void -Gather_plane_extract(const Lattice &rhs,std::vector pointers,int dimension,int plane,int cbmask) +Gather_plane_extract(const Lattice &rhs, + ExtractPointerArray pointers, + int dimension,int plane,int cbmask) { - int rd = rhs._grid->_rdimensions[dimension]; + int rd = rhs.Grid()->_rdimensions[dimension]; - if ( !rhs._grid->CheckerBoarded(dimension) ) { + if ( !rhs.Grid()->CheckerBoarded(dimension) ) { cbmask = 0x3; } - int so = plane*rhs._grid->_ostride[dimension]; // base offset for start of plane + int so = plane*rhs.Grid()->_ostride[dimension]; // base offset for start of plane - int e1=rhs._grid->_slice_nblock[dimension]; - int e2=rhs._grid->_slice_block[dimension]; - int n1=rhs._grid->_slice_stride[dimension]; + int e1=rhs.Grid()->_slice_nblock[dimension]; + int e2=rhs.Grid()->_slice_block[dimension]; + int n1=rhs.Grid()->_slice_stride[dimension]; + auto rhs_v = rhs.View(); if ( cbmask ==0x3){ - parallel_for_nest2(int n=0;n(temp,pointers,offset); - } - } + }); } else { // Case of SIMD split AND checker dim cannot currently be hit, except in // Test_cshift_red_black code. std::cout << " Dense packed buffer WARNING " <CheckerBoardFromOindex(o+b); + int ocb=1<CheckerBoardFromOindex(o+b); int offset = b+n*e2; if ( ocb & cbmask ) { - vobj temp =rhs._odata[so+o+b]; + vobj temp =rhs_v[so+o+b]; extract(temp,pointers,offset); } } - } + }); } } @@ -131,17 +133,17 @@ Gather_plane_extract(const Lattice &rhs,std::vector void Scatter_plane_simple (Lattice &rhs,commVector &buffer, int dimension,int plane,int cbmask) { - int rd = rhs._grid->_rdimensions[dimension]; + int rd = rhs.Grid()->_rdimensions[dimension]; - if ( !rhs._grid->CheckerBoarded(dimension) ) { + if ( !rhs.Grid()->CheckerBoarded(dimension) ) { cbmask=0x3; } - int so = plane*rhs._grid->_ostride[dimension]; // base offset for start of plane + int so = plane*rhs.Grid()->_ostride[dimension]; // base offset for start of plane - int e1=rhs._grid->_slice_nblock[dimension]; - int e2=rhs._grid->_slice_block[dimension]; - int stride=rhs._grid->_slice_stride[dimension]; + int e1=rhs.Grid()->_slice_nblock[dimension]; + int e2=rhs.Grid()->_slice_block[dimension]; + int stride=rhs.Grid()->_slice_stride[dimension]; static std::vector > table; table.resize(e1*e2); int ent =0; @@ -150,8 +152,8 @@ template void Scatter_plane_simple (Lattice &rhs,commVector_slice_stride[dimension]; - int bo =n*rhs._grid->_slice_block[dimension]; + int o =n*rhs.Grid()->_slice_stride[dimension]; + int bo =n*rhs.Grid()->_slice_block[dimension]; table[ent++] = std::pair(so+o+b,bo+b); } } @@ -160,57 +162,60 @@ template void Scatter_plane_simple (Lattice &rhs,commVector_slice_stride[dimension]; - int ocb=1<CheckerBoardFromOindex(o+b);// Could easily be a table lookup + int o =n*rhs.Grid()->_slice_stride[dimension]; + int ocb=1<CheckerBoardFromOindex(o+b);// Could easily be a table lookup if ( ocb & cbmask ) { table[ent++]=std::pair (so+o+b,bo++); } } } } - - parallel_for(int i=0;i void Scatter_plane_merge(Lattice &rhs,std::vector pointers,int dimension,int plane,int cbmask) +template void Scatter_plane_merge(Lattice &rhs,ExtractPointerArray pointers,int dimension,int plane,int cbmask) { - int rd = rhs._grid->_rdimensions[dimension]; + int rd = rhs.Grid()->_rdimensions[dimension]; - if ( !rhs._grid->CheckerBoarded(dimension) ) { + if ( !rhs.Grid()->CheckerBoarded(dimension) ) { cbmask=0x3; } - int so = plane*rhs._grid->_ostride[dimension]; // base offset for start of plane + int so = plane*rhs.Grid()->_ostride[dimension]; // base offset for start of plane - int e1=rhs._grid->_slice_nblock[dimension]; - int e2=rhs._grid->_slice_block[dimension]; + int e1=rhs.Grid()->_slice_nblock[dimension]; + int e2=rhs.Grid()->_slice_block[dimension]; if(cbmask ==0x3 ) { - parallel_for_nest2(int n=0;n_slice_stride[dimension]; - int offset = b+n*rhs._grid->_slice_block[dimension]; - merge(rhs._odata[so+o+b],pointers,offset); + int o = n*rhs.Grid()->_slice_stride[dimension]; + int offset = b+n*rhs.Grid()->_slice_block[dimension]; + merge(rhs_v[so+o+b],pointers,offset); } - } + }); } else { // Case of SIMD split AND checker dim cannot currently be hit, except in // Test_cshift_red_black code. // std::cout << "Scatter_plane merge assert(0); think this is buggy FIXME "<< std::endl;// think this is buggy FIXME std::cout<<" Unthreaded warning -- buffer is not densely packed ??"<_slice_stride[dimension]; - int offset = b+n*rhs._grid->_slice_block[dimension]; - int ocb=1<CheckerBoardFromOindex(o+b); + int o = n*rhs.Grid()->_slice_stride[dimension]; + int offset = b+n*rhs.Grid()->_slice_block[dimension]; + int ocb=1<CheckerBoardFromOindex(o+b); if ( ocb&cbmask ) { - merge(rhs._odata[so+o+b],pointers,offset); + merge(rhs_v[so+o+b],pointers,offset); } } } @@ -222,18 +227,18 @@ template void Scatter_plane_merge(Lattice &rhs,std::vector void Copy_plane(Lattice& lhs,const Lattice &rhs, int dimension,int lplane,int rplane,int cbmask) { - int rd = rhs._grid->_rdimensions[dimension]; + int rd = rhs.Grid()->_rdimensions[dimension]; - if ( !rhs._grid->CheckerBoarded(dimension) ) { + if ( !rhs.Grid()->CheckerBoarded(dimension) ) { cbmask=0x3; } - int ro = rplane*rhs._grid->_ostride[dimension]; // base offset for start of plane - int lo = lplane*lhs._grid->_ostride[dimension]; // base offset for start of plane + int ro = rplane*rhs.Grid()->_ostride[dimension]; // base offset for start of plane + int lo = lplane*lhs.Grid()->_ostride[dimension]; // base offset for start of plane - int e1=rhs._grid->_slice_nblock[dimension]; // clearly loop invariant for icpc - int e2=rhs._grid->_slice_block[dimension]; - int stride = rhs._grid->_slice_stride[dimension]; + int e1=rhs.Grid()->_slice_nblock[dimension]; // clearly loop invariant for icpc + int e2=rhs.Grid()->_slice_block[dimension]; + int stride = rhs.Grid()->_slice_stride[dimension]; static std::vector > table; table.resize(e1*e2); int ent=0; @@ -248,7 +253,7 @@ template void Copy_plane(Lattice& lhs,const Lattice &rhs for(int n=0;nCheckerBoardFromOindex(o); + int ocb=1<CheckerBoardFromOindex(o); if ( ocb&cbmask ) { table[ent++] = std::pair(lo+o,ro+o); } @@ -256,32 +261,33 @@ template void Copy_plane(Lattice& lhs,const Lattice &rhs } } - parallel_for(int i=0;i void Copy_plane_permute(Lattice& lhs,const Lattice &rhs, int dimension,int lplane,int rplane,int cbmask,int permute_type) { - int rd = rhs._grid->_rdimensions[dimension]; + int rd = rhs.Grid()->_rdimensions[dimension]; - if ( !rhs._grid->CheckerBoarded(dimension) ) { + if ( !rhs.Grid()->CheckerBoarded(dimension) ) { cbmask=0x3; } - int ro = rplane*rhs._grid->_ostride[dimension]; // base offset for start of plane - int lo = lplane*lhs._grid->_ostride[dimension]; // base offset for start of plane + int ro = rplane*rhs.Grid()->_ostride[dimension]; // base offset for start of plane + int lo = lplane*lhs.Grid()->_ostride[dimension]; // base offset for start of plane - int e1=rhs._grid->_slice_nblock[dimension]; - int e2=rhs._grid->_slice_block [dimension]; - int stride = rhs._grid->_slice_stride[dimension]; + int e1=rhs.Grid()->_slice_nblock[dimension]; + int e2=rhs.Grid()->_slice_block [dimension]; + int stride = rhs.Grid()->_slice_stride[dimension]; static std::vector > table; table.resize(e1*e2); int ent=0; - double t_tab,t_perm; if ( cbmask == 0x3 ) { for(int n=0;n void Copy_plane_permute(Lattice& lhs,const LatticeCheckerBoardFromOindex(o+b); + int ocb=1<CheckerBoardFromOindex(o+b); if ( ocb&cbmask ) table[ent++] = std::pair(lo+o+b,ro+o+b); }} } - parallel_for(int i=0;i void Cshift_local(Lattice& ret,const Lattice &r { int sshift[2]; - sshift[0] = rhs._grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,Even); - sshift[1] = rhs._grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,Odd); + sshift[0] = rhs.Grid()->CheckerBoardShiftForCB(rhs.Checkerboard(),dimension,shift,Even); + sshift[1] = rhs.Grid()->CheckerBoardShiftForCB(rhs.Checkerboard(),dimension,shift,Odd); - double t_local; - if ( sshift[0] == sshift[1] ) { Cshift_local(ret,rhs,dimension,shift,0x3); } else { @@ -324,7 +330,7 @@ template void Cshift_local(Lattice& ret,const Lattice &r template void Cshift_local(Lattice &ret,const Lattice &rhs,int dimension,int shift,int cbmask) { - GridBase *grid = rhs._grid; + GridBase *grid = rhs.Grid(); int fd = grid->_fdimensions[dimension]; int rd = grid->_rdimensions[dimension]; int ld = grid->_ldimensions[dimension]; @@ -335,18 +341,18 @@ template void Cshift_local(Lattice &ret,const Lattice &r shift = (shift+fd)%fd; // the permute type - ret.checkerboard = grid->CheckerBoardDestination(rhs.checkerboard,shift,dimension); + ret.Checkerboard() = grid->CheckerBoardDestination(rhs.Checkerboard(),shift,dimension); int permute_dim =grid->PermuteDim(dimension); int permute_type=grid->PermuteType(dimension); int permute_type_dist; for(int x=0;x_ostride[dimension]; int cb= (cbmask==0x2)? Odd : Even; - int sshift = grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,cb); + int sshift = grid->CheckerBoardShiftForCB(rhs.Checkerboard(),dimension,shift,cb); int sx = (x+sshift)%rd; // wrap is whether sshift > rd. @@ -387,5 +393,5 @@ template void Cshift_local(Lattice &ret,const Lattice &r } } -} -#endif +NAMESPACE_END(Grid); + diff --git a/Grid/cshift/Cshift_mpi.h b/Grid/cshift/Cshift_mpi.h index 98972135..0f0e80b1 100644 --- a/Grid/cshift/Cshift_mpi.h +++ b/Grid/cshift/Cshift_mpi.h @@ -1,4 +1,4 @@ - /************************************************************************************* +/************************************************************************************* Grid physics library, www.github.com/paboyle/Grid @@ -24,33 +24,33 @@ Author: paboyle 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ +*************************************************************************************/ +/* END LEGAL */ #ifndef _GRID_CSHIFT_MPI_H_ #define _GRID_CSHIFT_MPI_H_ -namespace Grid { +NAMESPACE_BEGIN(Grid); template Lattice Cshift(const Lattice &rhs,int dimension,int shift) { typedef typename vobj::vector_type vector_type; typedef typename vobj::scalar_type scalar_type; - Lattice ret(rhs._grid); + Lattice ret(rhs.Grid()); - int fd = rhs._grid->_fdimensions[dimension]; - int rd = rhs._grid->_rdimensions[dimension]; + int fd = rhs.Grid()->_fdimensions[dimension]; + int rd = rhs.Grid()->_rdimensions[dimension]; // Map to always positive shift modulo global full dimension. shift = (shift+fd)%fd; - ret.checkerboard = rhs._grid->CheckerBoardDestination(rhs.checkerboard,shift,dimension); + ret.Checkerboard() = rhs.Grid()->CheckerBoardDestination(rhs.Checkerboard(),shift,dimension); // the permute type - int simd_layout = rhs._grid->_simd_layout[dimension]; - int comm_dim = rhs._grid->_processors[dimension] >1 ; - int splice_dim = rhs._grid->_simd_layout[dimension]>1 && (comm_dim); + int simd_layout = rhs.Grid()->_simd_layout[dimension]; + int comm_dim = rhs.Grid()->_processors[dimension] >1 ; + int splice_dim = rhs.Grid()->_simd_layout[dimension]>1 && (comm_dim); if ( !comm_dim ) { @@ -70,10 +70,10 @@ template void Cshift_comms(Lattice& ret,const Lattice &r { int sshift[2]; - sshift[0] = rhs._grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,Even); - sshift[1] = rhs._grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,Odd); + sshift[0] = rhs.Grid()->CheckerBoardShiftForCB(rhs.Checkerboard(),dimension,shift,Even); + sshift[1] = rhs.Grid()->CheckerBoardShiftForCB(rhs.Checkerboard(),dimension,shift,Odd); - // std::cout << "Cshift_comms dim "< void Cshift_comms_simd(Lattice& ret,const LatticeCheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,Even); - sshift[1] = rhs._grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,Odd); + sshift[0] = rhs.Grid()->CheckerBoardShiftForCB(rhs.Checkerboard(),dimension,shift,Even); + sshift[1] = rhs.Grid()->CheckerBoardShiftForCB(rhs.Checkerboard(),dimension,shift,Odd); //std::cout << "Cshift_comms_simd dim "< void Cshift_comms(Lattice &ret,const Lattice &r typedef typename vobj::vector_type vector_type; typedef typename vobj::scalar_type scalar_type; - GridBase *grid=rhs._grid; - Lattice temp(rhs._grid); + GridBase *grid=rhs.Grid(); + Lattice temp(rhs.Grid()); - int fd = rhs._grid->_fdimensions[dimension]; - int rd = rhs._grid->_rdimensions[dimension]; - int pd = rhs._grid->_processors[dimension]; - int simd_layout = rhs._grid->_simd_layout[dimension]; - int comm_dim = rhs._grid->_processors[dimension] >1 ; + int fd = rhs.Grid()->_fdimensions[dimension]; + int rd = rhs.Grid()->_rdimensions[dimension]; + int pd = rhs.Grid()->_processors[dimension]; + int simd_layout = rhs.Grid()->_simd_layout[dimension]; + int comm_dim = rhs.Grid()->_processors[dimension] >1 ; assert(simd_layout==1); assert(comm_dim==1); assert(shift>=0); assert(shift_slice_nblock[dimension]*rhs._grid->_slice_block[dimension]; + int buffer_size = rhs.Grid()->_slice_nblock[dimension]*rhs.Grid()->_slice_block[dimension]; commVector send_buf(buffer_size); commVector recv_buf(buffer_size); int cb= (cbmask==0x2)? Odd : Even; - int sshift= rhs._grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,cb); + int sshift= rhs.Grid()->CheckerBoardShiftForCB(rhs.Checkerboard(),dimension,shift,cb); for(int x=0;x void Cshift_comms(Lattice &ret,const Lattice &r Gather_plane_simple (rhs,send_buf,dimension,sx,cbmask); - int rank = grid->_processor; + // int rank = grid->_processor; int recv_from_rank; int xmit_to_rank; grid->ShiftedRanks(dimension,comm_proc,xmit_to_rank,recv_from_rank); @@ -165,7 +165,7 @@ template void Cshift_comms(Lattice &ret,const Lattice &r template void Cshift_comms_simd(Lattice &ret,const Lattice &rhs,int dimension,int shift,int cbmask) { - GridBase *grid=rhs._grid; + GridBase *grid=rhs.Grid(); const int Nsimd = grid->Nsimd(); typedef typename vobj::vector_type vector_type; typedef typename vobj::scalar_object scalar_object; @@ -193,21 +193,21 @@ template void Cshift_comms_simd(Lattice &ret,const Lattice_slice_nblock[dimension]*grid->_slice_block[dimension]; - int words = sizeof(vobj)/sizeof(vector_type); + // int words = sizeof(vobj)/sizeof(vector_type); std::vector > send_buf_extract(Nsimd,commVector(buffer_size) ); std::vector > recv_buf_extract(Nsimd,commVector(buffer_size) ); int bytes = buffer_size*sizeof(scalar_object); - std::vector pointers(Nsimd); // - std::vector rpointers(Nsimd); // received pointers + ExtractPointerArray pointers(Nsimd); // + ExtractPointerArray rpointers(Nsimd); // received pointers /////////////////////////////////////////// // Work out what to send where /////////////////////////////////////////// int cb = (cbmask==0x2)? Odd : Even; - int sshift= grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,cb); + int sshift= grid->CheckerBoardShiftForCB(rhs.Checkerboard(),dimension,shift,cb); // loop over outer coord planes orthog to dim for(int x=0;x void Cshift_comms_simd(Lattice &ret,const Lattice 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ +*************************************************************************************/ +/* END LEGAL */ #ifndef _GRID_CSHIFT_NONE_H_ #define _GRID_CSHIFT_NONE_H_ -namespace Grid { +NAMESPACE_BEGIN(Grid); template Lattice Cshift(const Lattice &rhs,int dimension,int shift) { - Lattice ret(rhs._grid); - ret.checkerboard = rhs._grid->CheckerBoardDestination(rhs.checkerboard,shift,dimension); + Lattice ret(rhs.Grid()); + ret.Checkerboard() = rhs.Grid()->CheckerBoardDestination(rhs.Checkerboard(),shift,dimension); Cshift_local(ret,rhs,dimension,shift); return ret; } -} +NAMESPACE_END(Grid); + #endif diff --git a/Grid/json/json.hpp b/Grid/json/json.hpp index c8b0cc9e..618aa7a1 100644 --- a/Grid/json/json.hpp +++ b/Grid/json/json.hpp @@ -1,3 +1,4 @@ +#ifndef __NVCC__ /* __ _____ _____ _____ __| | __| | | | JSON for Modern C++ @@ -18918,3 +18919,4 @@ inline nlohmann::json::json_pointer operator "" _json_pointer(const char* s, std #endif +#endif diff --git a/Grid/lattice/Lattice.h b/Grid/lattice/Lattice.h index e2bb2a82..1eea98ed 100644 --- a/Grid/lattice/Lattice.h +++ b/Grid/lattice/Lattice.h @@ -25,9 +25,22 @@ Author: Peter Boyle See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ /* END LEGAL */ -#ifndef GRID_LATTICE_H -#define GRID_LATTICE_H - +#pragma once #include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +//#include +#include +#include +#include -#endif diff --git a/Grid/lattice/Lattice_ET.h b/Grid/lattice/Lattice_ET.h index 1bb83901..cf7147b9 100644 --- a/Grid/lattice/Lattice_ET.h +++ b/Grid/lattice/Lattice_ET.h @@ -27,7 +27,7 @@ with this program; if not, write to the Free Software Foundation, Inc., See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ -/* END LEGAL */ + /* END LEGAL */ #ifndef GRID_LATTICE_ET_H #define GRID_LATTICE_ET_H @@ -36,13 +36,13 @@ directory #include #include -namespace Grid { +NAMESPACE_BEGIN(Grid); //////////////////////////////////////////////////// // Predicated where support //////////////////////////////////////////////////// template -inline vobj predicatedWhere(const iobj &predicate, const vobj &iftrue, +accelerator_inline vobj predicatedWhere(const iobj &predicate, const vobj &iftrue, const robj &iffalse) { typename std::remove_const::type ret; @@ -51,11 +51,10 @@ inline vobj predicatedWhere(const iobj &predicate, const vobj &iftrue, typedef typename vobj::vector_type vector_type; const int Nsimd = vobj::vector_type::Nsimd(); - const int words = sizeof(vobj) / sizeof(vector_type); - std::vector mask(Nsimd); - std::vector truevals(Nsimd); - std::vector falsevals(Nsimd); + ExtractBuffer mask(Nsimd); + ExtractBuffer truevals(Nsimd); + ExtractBuffer falsevals(Nsimd); extract(iftrue, truevals); extract(iffalse, falsevals); @@ -69,158 +68,148 @@ inline vobj predicatedWhere(const iobj &predicate, const vobj &iftrue, return ret; } -//////////////////////////////////////////// -// recursive evaluation of expressions; Could -// switch to generic approach with variadics, a la -// Antonin's Lat Sim but the repack to variadic with popped -// from tuple is hideous; C++14 introduces std::make_index_sequence for this -//////////////////////////////////////////// - -// leaf eval of lattice ; should enable if protect using traits - -template -using is_lattice = std::is_base_of; - -template -using is_lattice_expr = std::is_base_of; - -template using is_lattice_expr = std::is_base_of; - +///////////////////////////////////////////////////// //Specialization of getVectorType for lattices +///////////////////////////////////////////////////// template struct getVectorType >{ typedef typename Lattice::vector_object type; }; - -template -inline sobj eval(const unsigned int ss, const sobj &arg) + +//////////////////////////////////////////// +//-- recursive evaluation of expressions; -- +// handle leaves of syntax tree +/////////////////////////////////////////////////// +template accelerator_inline +sobj eval(const uint64_t ss, const sobj &arg) { return arg; } -template -inline const lobj &eval(const unsigned int ss, const Lattice &arg) { - return arg._odata[ss]; + +template accelerator_inline +const lobj & eval(const uint64_t ss, const LatticeView &arg) +{ + return arg[ss]; +} +template accelerator_inline +const lobj & eval(const uint64_t ss, const Lattice &arg) +{ + auto view = arg.View(); + return view[ss]; } -// handle nodes in syntax tree -template -auto inline eval( - const unsigned int ss, - const LatticeUnaryExpression &expr) // eval one operand - -> decltype(expr.first.func(eval(ss, std::get<0>(expr.second)))) { - return expr.first.func(eval(ss, std::get<0>(expr.second))); +/////////////////////////////////////////////////// +// handle nodes in syntax tree- eval one operand +/////////////////////////////////////////////////// +template accelerator_inline +auto eval(const uint64_t ss, const LatticeUnaryExpression &expr) + -> decltype(expr.op.func( eval(ss, expr.arg1))) +{ + return expr.op.func( eval(ss, expr.arg1) ); } - -template -auto inline eval( - const unsigned int ss, - const LatticeBinaryExpression &expr) // eval two operands - -> decltype(expr.first.func(eval(ss, std::get<0>(expr.second)), - eval(ss, std::get<1>(expr.second)))) { - return expr.first.func(eval(ss, std::get<0>(expr.second)), - eval(ss, std::get<1>(expr.second))); +/////////////////////// +// eval two operands +/////////////////////// +template accelerator_inline +auto eval(const uint64_t ss, const LatticeBinaryExpression &expr) + -> decltype(expr.op.func( eval(ss,expr.arg1),eval(ss,expr.arg2))) +{ + return expr.op.func( eval(ss,expr.arg1), eval(ss,expr.arg2) ); } - -template -auto inline eval(const unsigned int ss, - const LatticeTrinaryExpression - &expr) // eval three operands - -> decltype(expr.first.func(eval(ss, std::get<0>(expr.second)), - eval(ss, std::get<1>(expr.second)), - eval(ss, std::get<2>(expr.second)))) { - return expr.first.func(eval(ss, std::get<0>(expr.second)), - eval(ss, std::get<1>(expr.second)), - eval(ss, std::get<2>(expr.second))); +/////////////////////// +// eval three operands +/////////////////////// +template accelerator_inline +auto eval(const uint64_t ss, const LatticeTrinaryExpression &expr) + -> decltype(expr.op.func(eval(ss, expr.arg1), eval(ss, expr.arg2), eval(ss, expr.arg3))) +{ + return expr.op.func(eval(ss, expr.arg1), eval(ss, expr.arg2), eval(ss, expr.arg3)); } ////////////////////////////////////////////////////////////////////////// // Obtain the grid from an expression, ensuring conformable. This must follow a -// tree recursion +// tree recursion; must retain grid pointer in the LatticeView class which sucks +// Use a different method, and make it void *. +// Perhaps a conformable method. ////////////////////////////////////////////////////////////////////////// -template ::value, T1>::type * = nullptr> -inline void GridFromExpression(GridBase *&grid, const T1 &lat) // Lattice leaf +template ::value, T1>::type * = nullptr> +accelerator_inline void GridFromExpression(GridBase *&grid, const T1 &lat) // Lattice leaf { - if (grid) { - conformable(grid, lat._grid); - } - grid = lat._grid; + lat.Conformable(grid); } -template ::value, T1>::type * = nullptr> -inline void GridFromExpression(GridBase *&grid, - const T1 ¬lat) // non-lattice leaf + +template ::value, T1>::type * = nullptr> +accelerator_inline +void GridFromExpression(GridBase *&grid,const T1 ¬lat) // non-lattice leaf {} + template -inline void GridFromExpression(GridBase *&grid, - const LatticeUnaryExpression &expr) { - GridFromExpression(grid, std::get<0>(expr.second)); // recurse +accelerator_inline +void GridFromExpression(GridBase *&grid,const LatticeUnaryExpression &expr) +{ + GridFromExpression(grid, expr.arg1); // recurse } template -inline void GridFromExpression( - GridBase *&grid, const LatticeBinaryExpression &expr) { - GridFromExpression(grid, std::get<0>(expr.second)); // recurse - GridFromExpression(grid, std::get<1>(expr.second)); +accelerator_inline +void GridFromExpression(GridBase *&grid, const LatticeBinaryExpression &expr) +{ + GridFromExpression(grid, expr.arg1); // recurse + GridFromExpression(grid, expr.arg2); } template -inline void GridFromExpression( - GridBase *&grid, const LatticeTrinaryExpression &expr) { - GridFromExpression(grid, std::get<0>(expr.second)); // recurse - GridFromExpression(grid, std::get<1>(expr.second)); - GridFromExpression(grid, std::get<2>(expr.second)); +accelerator_inline +void GridFromExpression(GridBase *&grid, const LatticeTrinaryExpression &expr) +{ + GridFromExpression(grid, expr.arg1); // recurse + GridFromExpression(grid, expr.arg2); // recurse + GridFromExpression(grid, expr.arg3); // recurse } ////////////////////////////////////////////////////////////////////////// // Obtain the CB from an expression, ensuring conformable. This must follow a // tree recursion ////////////////////////////////////////////////////////////////////////// -template ::value, T1>::type * = nullptr> +template ::value, T1>::type * = nullptr> inline void CBFromExpression(int &cb, const T1 &lat) // Lattice leaf { if ((cb == Odd) || (cb == Even)) { - assert(cb == lat.checkerboard); + assert(cb == lat.Checkerboard()); } - cb = lat.checkerboard; - // std::cout<::value, T1>::type * = nullptr> +template ::value, T1>::type * = nullptr> inline void CBFromExpression(int &cb, const T1 ¬lat) // non-lattice leaf { - // std::cout< -inline void CBFromExpression(int &cb, - const LatticeUnaryExpression &expr) { - CBFromExpression(cb, std::get<0>(expr.second)); // recurse - // std::cout< -inline void CBFromExpression(int &cb, - const LatticeBinaryExpression &expr) { - CBFromExpression(cb, std::get<0>(expr.second)); // recurse - CBFromExpression(cb, std::get<1>(expr.second)); - // std::cout< inline +void CBFromExpression(int &cb,const LatticeUnaryExpression &expr) +{ + CBFromExpression(cb, expr.arg1); // recurse AST +} + +template inline +void CBFromExpression(int &cb,const LatticeBinaryExpression &expr) +{ + CBFromExpression(cb, expr.arg1); // recurse AST + CBFromExpression(cb, expr.arg2); // recurse AST } template -inline void CBFromExpression( - int &cb, const LatticeTrinaryExpression &expr) { - CBFromExpression(cb, std::get<0>(expr.second)); // recurse - CBFromExpression(cb, std::get<1>(expr.second)); - CBFromExpression(cb, std::get<2>(expr.second)); - // std::cout< &expr) +{ + CBFromExpression(cb, expr.arg1); // recurse AST + CBFromExpression(cb, expr.arg2); // recurse AST + CBFromExpression(cb, expr.arg3); // recurse AST } //////////////////////////////////////////// // Unary operators and funcs //////////////////////////////////////////// -#define GridUnopClass(name, ret) \ - template \ - struct name { \ - static auto inline func(const arg a) -> decltype(ret) { return ret; } \ +#define GridUnopClass(name, ret) \ + template \ + struct name { \ + static auto accelerator_inline func(const arg a) -> decltype(ret) { return ret; } \ }; GridUnopClass(UnarySub, -a); @@ -250,19 +239,21 @@ GridUnopClass(UnaryExp, exp(a)); //////////////////////////////////////////// // Binary operators //////////////////////////////////////////// -#define GridBinOpClass(name, combination) \ - template \ - struct name { \ - static auto inline func(const left &lhs, const right &rhs) \ - -> decltype(combination) const { \ - return combination; \ - } \ - } +#define GridBinOpClass(name, combination) \ + template \ + struct name { \ + static auto accelerator_inline \ + func(const left &lhs, const right &rhs) \ + -> decltype(combination) const \ + { \ + return combination; \ + } \ + }; + GridBinOpClass(BinaryAdd, lhs + rhs); GridBinOpClass(BinarySub, lhs - rhs); GridBinOpClass(BinaryMul, lhs *rhs); GridBinOpClass(BinaryDiv, lhs /rhs); - GridBinOpClass(BinaryAnd, lhs &rhs); GridBinOpClass(BinaryOr, lhs | rhs); GridBinOpClass(BinaryAndAnd, lhs &&rhs); @@ -271,92 +262,71 @@ GridBinOpClass(BinaryOrOr, lhs || rhs); //////////////////////////////////////////////////// // Trinary conditional op //////////////////////////////////////////////////// -#define GridTrinOpClass(name, combination) \ - template \ - struct name { \ - static auto inline func(const predicate &pred, const left &lhs, \ - const right &rhs) -> decltype(combination) const { \ - return combination; \ - } \ - } +#define GridTrinOpClass(name, combination) \ + template \ + struct name { \ + static auto accelerator_inline \ + func(const predicate &pred, const left &lhs, const right &rhs) \ + -> decltype(combination) const \ + { \ + return combination; \ + } \ + }; -GridTrinOpClass( - TrinaryWhere, - (predicatedWhere::type, - typename std::remove_reference::type>(pred, lhs, - rhs))); +GridTrinOpClass(TrinaryWhere, + (predicatedWhere::type, + typename std::remove_reference::type>(pred, lhs,rhs))); //////////////////////////////////////////// // Operator syntactical glue //////////////////////////////////////////// -#define GRID_UNOP(name) name -#define GRID_BINOP(name) name -#define GRID_TRINOP(name) \ - name +#define GRID_UNOP(name) name +#define GRID_BINOP(name) name +#define GRID_TRINOP(name) name -#define GRID_DEF_UNOP(op, name) \ - template ::value || \ - is_lattice_expr::value, \ - T1>::type * = nullptr> \ - inline auto op(const T1 &arg) \ - ->decltype(LatticeUnaryExpression( \ - std::make_pair(GRID_UNOP(name)(), std::forward_as_tuple(arg)))) { \ - return LatticeUnaryExpression( \ - std::make_pair(GRID_UNOP(name)(), std::forward_as_tuple(arg))); \ +#define GRID_DEF_UNOP(op, name) \ + template ::value||is_lattice_expr::value,T1>::type * = nullptr> \ + inline auto op(const T1 &arg) ->decltype(LatticeUnaryExpression(GRID_UNOP(name)(), arg)) \ + { \ + return LatticeUnaryExpression(GRID_UNOP(name)(), arg); \ } -#define GRID_BINOP_LEFT(op, name) \ - template ::value || \ - is_lattice_expr::value, \ - T1>::type * = nullptr> \ - inline auto op(const T1 &lhs, const T2 &rhs) \ - ->decltype( \ - LatticeBinaryExpression( \ - std::make_pair(GRID_BINOP(name)(), \ - std::forward_as_tuple(lhs, rhs)))) { \ - return LatticeBinaryExpression( \ - std::make_pair(GRID_BINOP(name)(), std::forward_as_tuple(lhs, rhs))); \ +#define GRID_BINOP_LEFT(op, name) \ + template ::value||is_lattice_expr::value,T1>::type * = nullptr> \ + inline auto op(const T1 &lhs, const T2 &rhs) \ + ->decltype(LatticeBinaryExpression(GRID_BINOP(name)(),lhs,rhs)) \ + { \ + return LatticeBinaryExpression(GRID_BINOP(name)(),lhs,rhs);\ } -#define GRID_BINOP_RIGHT(op, name) \ - template ::value && \ - !is_lattice_expr::value, \ - T1>::type * = nullptr, \ - typename std::enable_if::value || \ - is_lattice_expr::value, \ - T2>::type * = nullptr> \ - inline auto op(const T1 &lhs, const T2 &rhs) \ - ->decltype( \ - LatticeBinaryExpression( \ - std::make_pair(GRID_BINOP(name)(), \ - std::forward_as_tuple(lhs, rhs)))) { \ - return LatticeBinaryExpression( \ - std::make_pair(GRID_BINOP(name)(), std::forward_as_tuple(lhs, rhs))); \ +#define GRID_BINOP_RIGHT(op, name) \ + template ::value&&!is_lattice_expr::value,T1>::type * = nullptr, \ + typename std::enable_if< is_lattice::value|| is_lattice_expr::value,T2>::type * = nullptr> \ + inline auto op(const T1 &lhs, const T2 &rhs) \ + ->decltype(LatticeBinaryExpression(GRID_BINOP(name)(),lhs, rhs)) \ + { \ + return LatticeBinaryExpression(GRID_BINOP(name)(),lhs, rhs); \ } -#define GRID_DEF_BINOP(op, name) \ - GRID_BINOP_LEFT(op, name); \ +#define GRID_DEF_BINOP(op, name) \ + GRID_BINOP_LEFT(op, name); \ GRID_BINOP_RIGHT(op, name); -#define GRID_DEF_TRINOP(op, name) \ - template \ - inline auto op(const T1 &pred, const T2 &lhs, const T3 &rhs) \ - ->decltype( \ - LatticeTrinaryExpression(std::make_pair( \ - GRID_TRINOP(name)(), std::forward_as_tuple(pred, lhs, rhs)))) { \ - return LatticeTrinaryExpression(std::make_pair( \ - GRID_TRINOP(name)(), std::forward_as_tuple(pred, lhs, rhs))); \ +#define GRID_DEF_TRINOP(op, name) \ + template \ + inline auto op(const T1 &pred, const T2 &lhs, const T3 &rhs) \ + ->decltype(LatticeTrinaryExpression(GRID_TRINOP(name)(),pred, lhs, rhs)) \ + { \ + return LatticeTrinaryExpression(GRID_TRINOP(name)(),pred, lhs, rhs); \ } + //////////////////////// // Operator definitions //////////////////////// - GRID_DEF_UNOP(operator-, UnarySub); GRID_DEF_UNOP(Not, UnaryNot); GRID_DEF_UNOP(operator!, UnaryNot); @@ -400,29 +370,27 @@ GRID_DEF_TRINOP(where, TrinaryWhere); ///////////////////////////////////////////////////////////// template auto closure(const LatticeUnaryExpression &expr) - -> Lattice(expr.second))))> { - Lattice(expr.second))))> ret( - expr); + -> Lattice +{ + Lattice ret(expr); return ret; } template auto closure(const LatticeBinaryExpression &expr) - -> Lattice(expr.second)), - eval(0, std::get<1>(expr.second))))> { - Lattice(expr.second)), - eval(0, std::get<1>(expr.second))))> - ret(expr); + -> Lattice +{ + Lattice ret(expr); return ret; } template auto closure(const LatticeTrinaryExpression &expr) - -> Lattice(expr.second)), - eval(0, std::get<1>(expr.second)), - eval(0, std::get<2>(expr.second))))> { - Lattice(expr.second)), - eval(0, std::get<1>(expr.second)), - eval(0, std::get<2>(expr.second))))> - ret(expr); + -> Lattice +{ + Lattice ret(expr); return ret; } @@ -433,34 +401,7 @@ auto closure(const LatticeTrinaryExpression &expr) #undef GRID_DEF_UNOP #undef GRID_DEF_BINOP #undef GRID_DEF_TRINOP -} -#if 0 -using namespace Grid; - - int main(int argc,char **argv){ - - Lattice v1(16); - Lattice v2(16); - Lattice v3(16); - - BinaryAdd tmp; - LatticeBinaryExpression,Lattice &,Lattice &> - expr(std::make_pair(tmp, - std::forward_as_tuple(v1,v2))); - tmp.func(eval(0,v1),eval(0,v2)); - - auto var = v1+v2; - std::cout< &v1,Lattice &v2,Lattice &v3) -{ - v3=v1+v2+v1*v2; -} -#endif +NAMESPACE_END(Grid); #endif diff --git a/Grid/lattice/Lattice_arith.h b/Grid/lattice/Lattice_arith.h index d1cbc84a..3543d6aa 100644 --- a/Grid/lattice/Lattice_arith.h +++ b/Grid/lattice/Lattice_arith.h @@ -1,4 +1,4 @@ - /************************************************************************************* +/************************************************************************************* Grid physics library, www.github.com/paboyle/Grid @@ -23,233 +23,235 @@ Author: Peter Boyle 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ +*************************************************************************************/ +/* END LEGAL */ #ifndef GRID_LATTICE_ARITH_H #define GRID_LATTICE_ARITH_H -namespace Grid { - - - ////////////////////////////////////////////////////////////////////////////////////////////////////// - // avoid copy back routines for mult, mac, sub, add - ////////////////////////////////////////////////////////////////////////////////////////////////////// - template strong_inline - void mult(Lattice &ret,const Lattice &lhs,const Lattice &rhs){ - ret.checkerboard = lhs.checkerboard; - conformable(ret,rhs); - conformable(lhs,rhs); - parallel_for(int ss=0;ssoSites();ss++){ -#ifdef STREAMING_STORES - obj1 tmp; - mult(&tmp,&lhs._odata[ss],&rhs._odata[ss]); - vstream(ret._odata[ss],tmp); -#else - mult(&ret._odata[ss],&lhs._odata[ss],&rhs._odata[ss]); -#endif - } - } - - template strong_inline - void mac(Lattice &ret,const Lattice &lhs,const Lattice &rhs){ - ret.checkerboard = lhs.checkerboard; - conformable(ret,rhs); - conformable(lhs,rhs); - parallel_for(int ss=0;ssoSites();ss++){ -#ifdef STREAMING_STORES - obj1 tmp; - mac(&tmp,&lhs._odata[ss],&rhs._odata[ss]); - vstream(ret._odata[ss],tmp); -#else - mac(&ret._odata[ss],&lhs._odata[ss],&rhs._odata[ss]); -#endif - } - } - - template strong_inline - void sub(Lattice &ret,const Lattice &lhs,const Lattice &rhs){ - ret.checkerboard = lhs.checkerboard; - conformable(ret,rhs); - conformable(lhs,rhs); - parallel_for(int ss=0;ssoSites();ss++){ -#ifdef STREAMING_STORES - obj1 tmp; - sub(&tmp,&lhs._odata[ss],&rhs._odata[ss]); - vstream(ret._odata[ss],tmp); -#else - sub(&ret._odata[ss],&lhs._odata[ss],&rhs._odata[ss]); -#endif - } - } - template strong_inline - void add(Lattice &ret,const Lattice &lhs,const Lattice &rhs){ - ret.checkerboard = lhs.checkerboard; - conformable(ret,rhs); - conformable(lhs,rhs); - parallel_for(int ss=0;ssoSites();ss++){ -#ifdef STREAMING_STORES - obj1 tmp; - add(&tmp,&lhs._odata[ss],&rhs._odata[ss]); - vstream(ret._odata[ss],tmp); -#else - add(&ret._odata[ss],&lhs._odata[ss],&rhs._odata[ss]); -#endif - } - } - - ////////////////////////////////////////////////////////////////////////////////////////////////////// - // avoid copy back routines for mult, mac, sub, add - ////////////////////////////////////////////////////////////////////////////////////////////////////// - template strong_inline - void mult(Lattice &ret,const Lattice &lhs,const obj3 &rhs){ - ret.checkerboard = lhs.checkerboard; - conformable(lhs,ret); - parallel_for(int ss=0;ssoSites();ss++){ - obj1 tmp; - mult(&tmp,&lhs._odata[ss],&rhs); - vstream(ret._odata[ss],tmp); - } - } - - template strong_inline - void mac(Lattice &ret,const Lattice &lhs,const obj3 &rhs){ - ret.checkerboard = lhs.checkerboard; - conformable(ret,lhs); - parallel_for(int ss=0;ssoSites();ss++){ - obj1 tmp; - mac(&tmp,&lhs._odata[ss],&rhs); - vstream(ret._odata[ss],tmp); - } - } - - template strong_inline - void sub(Lattice &ret,const Lattice &lhs,const obj3 &rhs){ - ret.checkerboard = lhs.checkerboard; - conformable(ret,lhs); - parallel_for(int ss=0;ssoSites();ss++){ -#ifdef STREAMING_STORES - obj1 tmp; - sub(&tmp,&lhs._odata[ss],&rhs); - vstream(ret._odata[ss],tmp); -#else - sub(&ret._odata[ss],&lhs._odata[ss],&rhs); -#endif - } - } - template strong_inline - void add(Lattice &ret,const Lattice &lhs,const obj3 &rhs){ - ret.checkerboard = lhs.checkerboard; - conformable(lhs,ret); - parallel_for(int ss=0;ssoSites();ss++){ -#ifdef STREAMING_STORES - obj1 tmp; - add(&tmp,&lhs._odata[ss],&rhs); - vstream(ret._odata[ss],tmp); -#else - add(&ret._odata[ss],&lhs._odata[ss],&rhs); -#endif - } - } - - ////////////////////////////////////////////////////////////////////////////////////////////////////// - // avoid copy back routines for mult, mac, sub, add - ////////////////////////////////////////////////////////////////////////////////////////////////////// - template strong_inline - void mult(Lattice &ret,const obj2 &lhs,const Lattice &rhs){ - ret.checkerboard = rhs.checkerboard; - conformable(ret,rhs); - parallel_for(int ss=0;ssoSites();ss++){ -#ifdef STREAMING_STORES - obj1 tmp; - mult(&tmp,&lhs,&rhs._odata[ss]); - vstream(ret._odata[ss],tmp); -#else - mult(&ret._odata[ss],&lhs,&rhs._odata[ss]); -#endif - } - } - - template strong_inline - void mac(Lattice &ret,const obj2 &lhs,const Lattice &rhs){ - ret.checkerboard = rhs.checkerboard; - conformable(ret,rhs); - parallel_for(int ss=0;ssoSites();ss++){ -#ifdef STREAMING_STORES - obj1 tmp; - mac(&tmp,&lhs,&rhs._odata[ss]); - vstream(ret._odata[ss],tmp); -#else - mac(&ret._odata[ss],&lhs,&rhs._odata[ss]); -#endif - } - } - - template strong_inline - void sub(Lattice &ret,const obj2 &lhs,const Lattice &rhs){ - ret.checkerboard = rhs.checkerboard; - conformable(ret,rhs); - parallel_for(int ss=0;ssoSites();ss++){ -#ifdef STREAMING_STORES - obj1 tmp; - sub(&tmp,&lhs,&rhs._odata[ss]); - vstream(ret._odata[ss],tmp); -#else - sub(&ret._odata[ss],&lhs,&rhs._odata[ss]); -#endif - } - } - template strong_inline - void add(Lattice &ret,const obj2 &lhs,const Lattice &rhs){ - ret.checkerboard = rhs.checkerboard; - conformable(ret,rhs); - parallel_for(int ss=0;ssoSites();ss++){ -#ifdef STREAMING_STORES - obj1 tmp; - add(&tmp,&lhs,&rhs._odata[ss]); - vstream(ret._odata[ss],tmp); -#else - add(&ret._odata[ss],&lhs,&rhs._odata[ss]); -#endif - } - } - - template strong_inline - void axpy(Lattice &ret,sobj a,const Lattice &x,const Lattice &y){ - ret.checkerboard = x.checkerboard; - conformable(ret,x); - conformable(x,y); - parallel_for(int ss=0;ssoSites();ss++){ -#ifdef STREAMING_STORES - vobj tmp = a*x._odata[ss]+y._odata[ss]; - vstream(ret._odata[ss],tmp); -#else - ret._odata[ss]=a*x._odata[ss]+y._odata[ss]; -#endif - } - } - template strong_inline - void axpby(Lattice &ret,sobj a,sobj b,const Lattice &x,const Lattice &y){ - ret.checkerboard = x.checkerboard; - conformable(ret,x); - conformable(x,y); - parallel_for(int ss=0;ssoSites();ss++){ -#ifdef STREAMING_STORES - vobj tmp = a*x._odata[ss]+b*y._odata[ss]; - vstream(ret._odata[ss],tmp); -#else - ret._odata[ss]=a*x._odata[ss]+b*y._odata[ss]; -#endif - } - } - - template strong_inline - RealD axpy_norm(Lattice &ret,sobj a,const Lattice &x,const Lattice &y){ - return axpy_norm_fast(ret,a,x,y); - } - template strong_inline - RealD axpby_norm(Lattice &ret,sobj a,sobj b,const Lattice &x,const Lattice &y){ - return axpby_norm_fast(ret,a,b,x,y); - } +NAMESPACE_BEGIN(Grid); +////////////////////////////////////////////////////////////////////////////////////////////////////// +// avoid copy back routines for mult, mac, sub, add +////////////////////////////////////////////////////////////////////////////////////////////////////// +template inline +void mult(Lattice &ret,const Lattice &lhs,const Lattice &rhs){ + ret.Checkerboard() = lhs.Checkerboard(); + auto ret_v = ret.View(); + auto lhs_v = lhs.View(); + auto rhs_v = rhs.View(); + conformable(ret,rhs); + conformable(lhs,rhs); + accelerator_for(ss,lhs_v.size(),obj1::Nsimd(),{ + decltype(coalescedRead(obj1())) tmp; + auto lhs_t = lhs_v(ss); + auto rhs_t = rhs_v(ss); + mult(&tmp,&lhs_t,&rhs_t); + coalescedWrite(ret_v[ss],tmp); + }); } + +template inline +void mac(Lattice &ret,const Lattice &lhs,const Lattice &rhs){ + ret.Checkerboard() = lhs.Checkerboard(); + conformable(ret,rhs); + conformable(lhs,rhs); + auto ret_v = ret.View(); + auto lhs_v = lhs.View(); + auto rhs_v = rhs.View(); + accelerator_for(ss,lhs_v.size(),obj1::Nsimd(),{ + decltype(coalescedRead(obj1())) tmp; + auto lhs_t=lhs_v(ss); + auto rhs_t=rhs_v(ss); + mac(&tmp,&lhs_t,&rhs_t); + coalescedWrite(ret_v[ss],tmp); + }); +} + +template inline +void sub(Lattice &ret,const Lattice &lhs,const Lattice &rhs){ + ret.Checkerboard() = lhs.Checkerboard(); + conformable(ret,rhs); + conformable(lhs,rhs); + auto ret_v = ret.View(); + auto lhs_v = lhs.View(); + auto rhs_v = rhs.View(); + accelerator_for(ss,lhs_v.size(),obj1::Nsimd(),{ + decltype(coalescedRead(obj1())) tmp; + auto lhs_t=lhs_v(ss); + auto rhs_t=rhs_v(ss); + sub(&tmp,&lhs_t,&rhs_t); + coalescedWrite(ret_v[ss],tmp); + }); +} +template inline +void add(Lattice &ret,const Lattice &lhs,const Lattice &rhs){ + ret.Checkerboard() = lhs.Checkerboard(); + conformable(ret,rhs); + conformable(lhs,rhs); + auto ret_v = ret.View(); + auto lhs_v = lhs.View(); + auto rhs_v = rhs.View(); + accelerator_for(ss,lhs_v.size(),obj1::Nsimd(),{ + decltype(coalescedRead(obj1())) tmp; + auto lhs_t=lhs_v(ss); + auto rhs_t=rhs_v(ss); + add(&tmp,&lhs_t,&rhs_t); + coalescedWrite(ret_v[ss],tmp); + }); +} + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// avoid copy back routines for mult, mac, sub, add +////////////////////////////////////////////////////////////////////////////////////////////////////// +template inline +void mult(Lattice &ret,const Lattice &lhs,const obj3 &rhs){ + ret.Checkerboard() = lhs.Checkerboard(); + conformable(lhs,ret); + auto ret_v = ret.View(); + auto lhs_v = lhs.View(); + accelerator_for(ss,lhs_v.size(),obj1::Nsimd(),{ + decltype(coalescedRead(obj1())) tmp; + mult(&tmp,&lhs_v(ss),&rhs); + coalescedWrite(ret_v[ss],tmp); + }); +} + +template inline +void mac(Lattice &ret,const Lattice &lhs,const obj3 &rhs){ + ret.Checkerboard() = lhs.Checkerboard(); + conformable(ret,lhs); + auto ret_v = ret.View(); + auto lhs_v = lhs.View(); + accelerator_for(ss,lhs_v.size(),obj1::Nsimd(),{ + decltype(coalescedRead(obj1())) tmp; + auto lhs_t=lhs_v(ss); + mac(&tmp,&lhs_t,&rhs); + coalescedWrite(ret_v[ss],tmp); + }); +} + +template inline +void sub(Lattice &ret,const Lattice &lhs,const obj3 &rhs){ + ret.Checkerboard() = lhs.Checkerboard(); + conformable(ret,lhs); + auto ret_v = ret.View(); + auto lhs_v = lhs.View(); + accelerator_for(ss,lhs_v.size(),obj1::Nsimd(),{ + decltype(coalescedRead(obj1())) tmp; + auto lhs_t=lhs_v(ss); + sub(&tmp,&lhs_t,&rhs); + coalescedWrite(ret_v[ss],tmp); + }); +} +template inline +void add(Lattice &ret,const Lattice &lhs,const obj3 &rhs){ + ret.Checkerboard() = lhs.Checkerboard(); + conformable(lhs,ret); + auto ret_v = ret.View(); + auto lhs_v = lhs.View(); + accelerator_for(ss,lhs_v.size(),obj1::Nsimd(),{ + decltype(coalescedRead(obj1())) tmp; + auto lhs_t=lhs_v(ss); + add(&tmp,&lhs_t,&rhs); + coalescedWrite(ret_v[ss],tmp); + }); +} + +////////////////////////////////////////////////////////////////////////////////////////////////////// +// avoid copy back routines for mult, mac, sub, add +////////////////////////////////////////////////////////////////////////////////////////////////////// +template inline +void mult(Lattice &ret,const obj2 &lhs,const Lattice &rhs){ + ret.Checkerboard() = rhs.Checkerboard(); + conformable(ret,rhs); + auto ret_v = ret.View(); + auto rhs_v = lhs.View(); + accelerator_for(ss,rhs_v.size(),obj1::Nsimd(),{ + decltype(coalescedRead(obj1())) tmp; + auto rhs_t=rhs_v(ss); + mult(&tmp,&lhs,&rhs_t); + coalescedWrite(ret_v[ss],tmp); + }); +} + +template inline +void mac(Lattice &ret,const obj2 &lhs,const Lattice &rhs){ + ret.Checkerboard() = rhs.Checkerboard(); + conformable(ret,rhs); + auto ret_v = ret.View(); + auto rhs_v = lhs.View(); + accelerator_for(ss,rhs_v.size(),obj1::Nsimd(),{ + decltype(coalescedRead(obj1())) tmp; + auto rhs_t=rhs_v(ss); + mac(&tmp,&lhs,&rhs_t); + coalescedWrite(ret_v[ss],tmp); + }); +} + +template inline +void sub(Lattice &ret,const obj2 &lhs,const Lattice &rhs){ + ret.Checkerboard() = rhs.Checkerboard(); + conformable(ret,rhs); + auto ret_v = ret.View(); + auto rhs_v = lhs.View(); + accelerator_for(ss,rhs_v.size(),obj1::Nsimd(),{ + decltype(coalescedRead(obj1())) tmp; + auto rhs_t=rhs_v(ss); + sub(&tmp,&lhs,&rhs_t); + coalescedWrite(ret_v[ss],tmp); + }); +} +template inline +void add(Lattice &ret,const obj2 &lhs,const Lattice &rhs){ + ret.Checkerboard() = rhs.Checkerboard(); + conformable(ret,rhs); + auto ret_v = ret.View(); + auto rhs_v = lhs.View(); + accelerator_for(ss,rhs_v.size(),obj1::Nsimd(),{ + decltype(coalescedRead(obj1())) tmp; + auto rhs_t=rhs_v(ss); + add(&tmp,&lhs,&rhs_t); + coalescedWrite(ret_v[ss],tmp); + }); +} + +template inline +void axpy(Lattice &ret,sobj a,const Lattice &x,const Lattice &y){ + ret.Checkerboard() = x.Checkerboard(); + conformable(ret,x); + conformable(x,y); + auto ret_v = ret.View(); + auto x_v = x.View(); + auto y_v = y.View(); + accelerator_for(ss,x_v.size(),vobj::Nsimd(),{ + auto tmp = a*x_v(ss)+y_v(ss); + coalescedWrite(ret_v[ss],tmp); + }); +} +template inline +void axpby(Lattice &ret,sobj a,sobj b,const Lattice &x,const Lattice &y){ + ret.Checkerboard() = x.Checkerboard(); + conformable(ret,x); + conformable(x,y); + auto ret_v = ret.View(); + auto x_v = x.View(); + auto y_v = y.View(); + accelerator_for(ss,x_v.size(),vobj::Nsimd(),{ + auto tmp = a*x_v(ss)+b*y_v(ss); + coalescedWrite(ret_v[ss],tmp); + }); +} + +template inline +RealD axpy_norm(Lattice &ret,sobj a,const Lattice &x,const Lattice &y) +{ + return axpy_norm_fast(ret,a,x,y); +} +template inline +RealD axpby_norm(Lattice &ret,sobj a,sobj b,const Lattice &x,const Lattice &y) +{ + return axpby_norm_fast(ret,a,b,x,y); +} + +NAMESPACE_END(Grid); #endif diff --git a/Grid/lattice/Lattice_base.h b/Grid/lattice/Lattice_base.h index d0ef0f55..d48e5090 100644 --- a/Grid/lattice/Lattice_base.h +++ b/Grid/lattice/Lattice_base.h @@ -27,349 +27,440 @@ with this program; if not, write to the Free Software Foundation, Inc., See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ -/* END LEGAL */ -#ifndef GRID_LATTICE_BASE_H -#define GRID_LATTICE_BASE_H + /* END LEGAL */ +#pragma once #define STREAMING_STORES -namespace Grid { - -// TODO: -// mac,real,imag - -// Functionality: -// -=,+=,*=,() -// add,+,sub,-,mult,mac,* -// adj,conjugate -// real,imag -// transpose,transposeIndex -// trace,traceIndex -// peekIndex -// innerProduct,outerProduct, -// localNorm2 -// localInnerProduct +NAMESPACE_BEGIN(Grid); extern int GridCshiftPermuteMap[4][16]; -//////////////////////////////////////////////// -// Basic expressions used in Expression Template -//////////////////////////////////////////////// +/////////////////////////////////////////////////////////////////// +// Base class which can be used by traits to pick up behaviour +/////////////////////////////////////////////////////////////////// +class LatticeBase {}; -class LatticeBase +///////////////////////////////////////////////////////////////////////////////////////// +// Conformable checks; same instance of Grid required +///////////////////////////////////////////////////////////////////////////////////////// +void accelerator_inline conformable(GridBase *lhs,GridBase *rhs) { -public: - virtual ~LatticeBase(void) = default; - GridBase *_grid; -}; - -class LatticeExpressionBase {}; - -template -class LatticeUnaryExpression : public std::pair > , public LatticeExpressionBase { - public: - LatticeUnaryExpression(const std::pair > &arg): std::pair >(arg) {}; -}; - -template -class LatticeBinaryExpression : public std::pair > , public LatticeExpressionBase { - public: - LatticeBinaryExpression(const std::pair > &arg): std::pair >(arg) {}; -}; - -template -class LatticeTrinaryExpression :public std::pair >, public LatticeExpressionBase { - public: - LatticeTrinaryExpression(const std::pair > &arg): std::pair >(arg) {}; -}; - -void inline conformable(GridBase *lhs,GridBase *rhs) -{ - assert((lhs == rhs) && " conformable check pointers mismatch "); + assert(lhs == rhs); } -template -class Lattice : public LatticeBase +//////////////////////////////////////////////////////////////////////////// +// Minimal base class containing only data valid to access from accelerator +// _odata will be a managed pointer in CUDA +//////////////////////////////////////////////////////////////////////////// +// Force access to lattice through a view object. +// prevents writing of code that will not offload to GPU, but perhaps annoyingly +// strict since host could could in principle direct access through the lattice object +// Need to decide programming model. +#define LATTICE_VIEW_STRICT +template class LatticeAccelerator : public LatticeBase +{ +protected: + GridBase *_grid; + int checkerboard; + vobj *_odata; // A managed pointer + uint64_t _odata_size; +public: + accelerator_inline LatticeAccelerator() : checkerboard(0), _odata(nullptr), _odata_size(0), _grid(nullptr) { }; + accelerator_inline uint64_t oSites(void) const { return _odata_size; }; + accelerator_inline int Checkerboard(void) const { return checkerboard; }; + accelerator_inline int &Checkerboard(void) { return this->checkerboard; }; // can assign checkerboard on a container, not a view + accelerator_inline void Conformable(GridBase * &grid) const + { + if (grid) conformable(grid, _grid); + else grid = _grid; + }; +}; + +///////////////////////////////////////////////////////////////////////////////////////// +// A View class which provides accessor to the data. +// This will be safe to call from accelerator_for and is trivially copy constructible +// The copy constructor for this will need to be used by device lambda functions +///////////////////////////////////////////////////////////////////////////////////////// +template +class LatticeView : public LatticeAccelerator { public: - int checkerboard; - Vector _odata; - - // to pthread need a computable loop where loop induction is not required - int begin(void) { return 0;}; - int end(void) { return _odata.size(); } - vobj & operator[](int i) { return _odata[i]; }; - const vobj & operator[](int i) const { return _odata[i]; }; + + // Rvalue +#ifdef __CUDA_ARCH__ + accelerator_inline const typename vobj::scalar_object operator()(size_t i) const { return coalescedRead(this->_odata[i]); } +#else + accelerator_inline const vobj & operator()(size_t i) const { return this->_odata[i]; } +#endif + + accelerator_inline const vobj & operator[](size_t i) const { return this->_odata[i]; }; + accelerator_inline vobj & operator[](size_t i) { return this->_odata[i]; }; + + accelerator_inline uint64_t begin(void) const { return 0;}; + accelerator_inline uint64_t end(void) const { return this->_odata_size; }; + accelerator_inline uint64_t size(void) const { return this->_odata_size; }; + + LatticeView(const LatticeAccelerator &refer_to_me) : LatticeAccelerator (refer_to_me) + { + } +}; + +///////////////////////////////////////////////////////////////////////////////////////// +// Lattice expression types used by ET to assemble the AST +// +// Need to be able to detect code paths according to the whether a lattice object or not +// so introduce some trait type things +///////////////////////////////////////////////////////////////////////////////////////// + +class LatticeExpressionBase {}; + +template using is_lattice = std::is_base_of; +template using is_lattice_expr = std::is_base_of; + +template struct ViewMapBase { typedef T Type; }; +template struct ViewMapBase { typedef LatticeView Type; }; +template using ViewMap = ViewMapBase::value >; + +template +class LatticeUnaryExpression : public LatticeExpressionBase +{ public: - typedef typename vobj::scalar_type scalar_type; - typedef typename vobj::vector_type vector_type; - typedef vobj vector_object; - + typedef typename ViewMap<_T1>::Type T1; + Op op; + T1 arg1; + LatticeUnaryExpression(Op _op,const _T1 &_arg1) : op(_op), arg1(_arg1) {}; +}; + +template +class LatticeBinaryExpression : public LatticeExpressionBase +{ +public: + typedef typename ViewMap<_T1>::Type T1; + typedef typename ViewMap<_T2>::Type T2; + Op op; + T1 arg1; + T2 arg2; + LatticeBinaryExpression(Op _op,const _T1 &_arg1,const _T2 &_arg2) : op(_op), arg1(_arg1), arg2(_arg2) {}; +}; + +template +class LatticeTrinaryExpression : public LatticeExpressionBase +{ +public: + typedef typename ViewMap<_T1>::Type T1; + typedef typename ViewMap<_T2>::Type T2; + typedef typename ViewMap<_T3>::Type T3; + Op op; + T1 arg1; + T2 arg2; + T3 arg3; + LatticeTrinaryExpression(Op _op,const _T1 &_arg1,const _T2 &_arg2,const _T3 &_arg3) : op(_op), arg1(_arg1), arg2(_arg2), arg3(_arg3) {}; +}; + +///////////////////////////////////////////////////////////////////////////////////////// +// The real lattice class, with normal copy and assignment semantics. +// This contains extra (host resident) grid pointer data that may be accessed by host code +///////////////////////////////////////////////////////////////////////////////////////// +template +class Lattice : public LatticeAccelerator +{ +public: + GridBase *Grid(void) const { return this->_grid; } + /////////////////////////////////////////////////// + // Member types + /////////////////////////////////////////////////// + typedef typename vobj::scalar_type scalar_type; + typedef typename vobj::vector_type vector_type; + typedef vobj vector_object; + +private: + void dealloc(void) + { + alignedAllocator alloc; + if( this->_odata_size ) { + alloc.deallocate(this->_odata,this->_odata_size); + this->_odata=nullptr; + this->_odata_size=0; + } + } + void resize(uint64_t size) + { + alignedAllocator alloc; + if ( this->_odata_size != size ) { + dealloc(); + } + this->_odata_size = size; + if ( size ) + this->_odata = alloc.allocate(this->_odata_size); + else + this->_odata = nullptr; + } +public: + ///////////////////////////////////////////////////////////////////////////////// + // Return a view object that may be dereferenced in site loops. + // The view is trivially copy constructible and may be copied to an accelerator device + // in device lambdas + ///////////////////////////////////////////////////////////////////////////////// + LatticeView View (void) const + { + LatticeView accessor(*( (LatticeAccelerator *) this)); + return accessor; + } + + ~Lattice() { + if ( this->_odata_size ) { + dealloc(); + } + } //////////////////////////////////////////////////////////////////////////////// // Expression Template closure support //////////////////////////////////////////////////////////////////////////////// - template strong_inline Lattice & operator=(const LatticeUnaryExpression &expr) + template inline Lattice & operator=(const LatticeUnaryExpression &expr) { GridBase *egrid(nullptr); GridFromExpression(egrid,expr); assert(egrid!=nullptr); - conformable(_grid,egrid); + conformable(this->_grid,egrid); int cb=-1; CBFromExpression(cb,expr); assert( (cb==Odd) || (cb==Even)); - checkerboard=cb; + this->checkerboard=cb; - parallel_for(int ss=0;ss<_grid->oSites();ss++){ -#ifdef STREAMING_STORES - vobj tmp = eval(ss,expr); - vstream(_odata[ss] ,tmp); -#else - _odata[ss]=eval(ss,expr); -#endif - } + auto me = View(); + accelerator_for(ss,me.size(),1,{ + auto tmp = eval(ss,expr); + vstream(me[ss],tmp); + }); return *this; } - template strong_inline Lattice & operator=(const LatticeBinaryExpression &expr) + template inline Lattice & operator=(const LatticeBinaryExpression &expr) { GridBase *egrid(nullptr); GridFromExpression(egrid,expr); assert(egrid!=nullptr); - conformable(_grid,egrid); + conformable(this->_grid,egrid); int cb=-1; CBFromExpression(cb,expr); assert( (cb==Odd) || (cb==Even)); - checkerboard=cb; + this->checkerboard=cb; - parallel_for(int ss=0;ss<_grid->oSites();ss++){ -#ifdef STREAMING_STORES - vobj tmp = eval(ss,expr); - vstream(_odata[ss] ,tmp); -#else - _odata[ss]=eval(ss,expr); -#endif - } + auto me = View(); + accelerator_for(ss,me.size(),1,{ + auto tmp = eval(ss,expr); + vstream(me[ss],tmp); + }); return *this; } - template strong_inline Lattice & operator=(const LatticeTrinaryExpression &expr) + template inline Lattice & operator=(const LatticeTrinaryExpression &expr) { GridBase *egrid(nullptr); GridFromExpression(egrid,expr); assert(egrid!=nullptr); - conformable(_grid,egrid); + conformable(this->_grid,egrid); int cb=-1; CBFromExpression(cb,expr); assert( (cb==Odd) || (cb==Even)); - checkerboard=cb; - - parallel_for(int ss=0;ss<_grid->oSites();ss++){ -#ifdef STREAMING_STORES - //vobj tmp = eval(ss,expr); - vstream(_odata[ss] ,eval(ss,expr)); -#else - _odata[ss] = eval(ss,expr); -#endif - } + this->checkerboard=cb; + auto me = View(); + accelerator_for(ss,me.size(),1,{ + auto tmp = eval(ss,expr); + vstream(me[ss],tmp); + }); return *this; } //GridFromExpression is tricky to do template - Lattice(const LatticeUnaryExpression & expr) { - _grid = nullptr; - GridFromExpression(_grid,expr); - assert(_grid!=nullptr); + Lattice(const LatticeUnaryExpression & expr) { + this->_grid = nullptr; + GridFromExpression(this->_grid,expr); + assert(this->_grid!=nullptr); int cb=-1; CBFromExpression(cb,expr); assert( (cb==Odd) || (cb==Even)); - checkerboard=cb; + this->checkerboard=cb; - _odata.resize(_grid->oSites()); - parallel_for(int ss=0;ss<_grid->oSites();ss++){ -#ifdef STREAMING_STORES - vobj tmp = eval(ss,expr); - vstream(_odata[ss] ,tmp); -#else - _odata[ss]=eval(ss,expr); -#endif - } - }; + resize(this->_grid->oSites()); + + *this = expr; + } template Lattice(const LatticeBinaryExpression & expr) { - _grid = nullptr; - GridFromExpression(_grid,expr); - assert(_grid!=nullptr); + this->_grid = nullptr; + GridFromExpression(this->_grid,expr); + assert(this->_grid!=nullptr); int cb=-1; CBFromExpression(cb,expr); assert( (cb==Odd) || (cb==Even)); - checkerboard=cb; + this->checkerboard=cb; - _odata.resize(_grid->oSites()); - parallel_for(int ss=0;ss<_grid->oSites();ss++){ -#ifdef STREAMING_STORES - vobj tmp = eval(ss,expr); - vstream(_odata[ss] ,tmp); -#else - _odata[ss]=eval(ss,expr); -#endif - } - }; + resize(this->_grid->oSites()); + + *this = expr; + } template Lattice(const LatticeTrinaryExpression & expr) { - _grid = nullptr; - GridFromExpression(_grid,expr); - assert(_grid!=nullptr); + this->_grid = nullptr; + GridFromExpression(this->_grid,expr); + assert(this->_grid!=nullptr); int cb=-1; CBFromExpression(cb,expr); assert( (cb==Odd) || (cb==Even)); - checkerboard=cb; + this->checkerboard=cb; - _odata.resize(_grid->oSites()); - parallel_for(int ss=0;ss<_grid->oSites();ss++){ - vstream(_odata[ss] ,eval(ss,expr)); - } - }; + resize(this->_grid->oSites()); + + *this = expr; + } + + template inline Lattice & operator = (const sobj & r){ + auto me = View(); + thread_for(ss,me.size(),{ + me[ss] = r; + }); + return *this; + } ////////////////////////////////////////////////////////////////// - // Constructor requires "grid" passed. - // what about a default grid? - ////////////////////////////////////////////////////////////////// - Lattice(GridBase *grid) : _odata(grid->oSites()) { - _grid = grid; - // _odata.reserve(_grid->oSites()); - // _odata.resize(_grid->oSites()); - // std::cout << "Constructing lattice object with Grid pointer "<<_grid<_grid = grid; + resize(this->_grid->oSites()); + assert((((uint64_t)&this->_odata[0])&0xF) ==0); + this->checkerboard=0; } - Lattice(const Lattice& r){ // copy constructor - _grid = r._grid; - checkerboard = r.checkerboard; - _odata.resize(_grid->oSites());// essential - parallel_for(int ss=0;ss<_grid->oSites();ss++){ - _odata[ss]=r._odata[ss]; - } - } - - Lattice(Lattice&& r){ // move constructor - _grid = r._grid; - checkerboard = r.checkerboard; - _odata=std::move(r._odata); - } - - inline Lattice & operator = (Lattice && r) - { - _grid = r._grid; - checkerboard = r.checkerboard; - _odata =std::move(r._odata); - return *this; - } - - inline Lattice & operator = (const Lattice & r){ - _grid = r._grid; - checkerboard = r.checkerboard; - _odata.resize(_grid->oSites());// essential - - parallel_for(int ss=0;ss<_grid->oSites();ss++){ - _odata[ss]=r._odata[ss]; - } - return *this; - } - - template strong_inline Lattice & operator = (const Lattice & r){ - this->checkerboard = r.checkerboard; - conformable(*this,r); - - parallel_for(int ss=0;ss<_grid->oSites();ss++){ - this->_odata[ss]=r._odata[ss]; - } - return *this; - } - - virtual ~Lattice(void) = default; + // virtual ~Lattice(void) = default; void reset(GridBase* grid) { - if (_grid != grid) { - _grid = grid; - _odata.resize(grid->oSites()); - checkerboard = 0; + if (this->_grid != grid) { + this->_grid = grid; + this->_odata.resize(grid->oSites()); + this->checkerboard = 0; } } - - - template strong_inline Lattice & operator = (const sobj & r){ - parallel_for(int ss=0;ss<_grid->oSites();ss++){ - this->_odata[ss]=r; - } + /////////////////////////////////////////// + // copy constructor + /////////////////////////////////////////// + Lattice(const Lattice& r){ + // std::cout << "Lattice constructor(const Lattice &) "<_grid = r.Grid(); + resize(this->_grid->oSites()); + *this = r; + } + /////////////////////////////////////////// + // move constructor + /////////////////////////////////////////// + Lattice(Lattice && r){ + this->_grid = r.Grid(); + this->_odata = r._odata; + this->_odata_size = r._odata_size; + this->checkerboard= r.Checkerboard(); + r._odata = nullptr; + r._odata_size = 0; + } + /////////////////////////////////////////// + // assignment template + /////////////////////////////////////////// + template inline Lattice & operator = (const Lattice & r){ + typename std::enable_if::value,int>::type i=0; + conformable(*this,r); + this->checkerboard = r.Checkerboard(); + auto me = View(); + auto him= r.View(); + accelerator_for(ss,me.size(),vobj::Nsimd(),{ + coalescedWrite(me[ss],him(ss)); + }); return *this; } - - + + /////////////////////////////////////////// + // Copy assignment + /////////////////////////////////////////// + inline Lattice & operator = (const Lattice & r){ + this->checkerboard = r.Checkerboard(); + conformable(*this,r); + auto me = View(); + auto him= r.View(); + accelerator_for(ss,me.size(),vobj::Nsimd(),{ + coalescedWrite(me[ss],him(ss)); + }); + return *this; + } + /////////////////////////////////////////// + // Move assignment possible if same type + /////////////////////////////////////////// + inline Lattice & operator = (Lattice && r){ + + resize(0); // deletes if appropriate + this->_grid = r.Grid(); + this->_odata = r._odata; + this->_odata_size = r._odata_size; + this->checkerboard= r.Checkerboard(); + + r._odata = nullptr; + r._odata_size = 0; + + return *this; + } + + ///////////////////////////////////////////////////////////////////////////// // *=,+=,-= operators inherit behvour from correspond */+/- operation - template strong_inline Lattice &operator *=(const T &r) { + ///////////////////////////////////////////////////////////////////////////// + template inline Lattice &operator *=(const T &r) { *this = (*this)*r; return *this; } - template strong_inline Lattice &operator -=(const T &r) { + template inline Lattice &operator -=(const T &r) { *this = (*this)-r; return *this; } - template strong_inline Lattice &operator +=(const T &r) { + template inline Lattice &operator +=(const T &r) { *this = (*this)+r; return *this; } -}; // class Lattice - - template std::ostream& operator<< (std::ostream& stream, const Lattice &o){ - std::vector gcoor; - typedef typename vobj::scalar_object sobj; - sobj ss; - for(int g=0;g_gsites;g++){ - o._grid->GlobalIndexToGlobalCoor(g,gcoor); - peekSite(ss,o,gcoor); - stream<<"["; - for(int d=0;d tmp; + LatticeAccelerator *lp = (LatticeAccelerator *)&l; + LatticeAccelerator *rp = (LatticeAccelerator *)&r; + tmp = *lp; *lp=*rp; *rp=tmp; } - + +}; // class Lattice + +template std::ostream& operator<< (std::ostream& stream, const Lattice &o){ + typedef typename vobj::scalar_object sobj; + for(int g=0;g_gsites;g++){ + + Coordinate gcoor; + o.Grid()->GlobalIndexToGlobalCoor(g,gcoor); + + sobj ss; + peekSite(ss,o,gcoor); + stream<<"["; + for(int d=0;d 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ +*************************************************************************************/ +/* END LEGAL */ #ifndef GRID_LATTICE_COMPARISON_H #define GRID_LATTICE_COMPARISON_H -namespace Grid { +NAMESPACE_BEGIN(Grid); - ////////////////////////////////////////////////////////////////////////// - // relational operators - // - // Support <,>,<=,>=,==,!= - // - //Query supporting bitwise &, |, ^, ! - //Query supporting logical &&, ||, - ////////////////////////////////////////////////////////////////////////// +////////////////////////////////////////////////////////////////////////// +// relational operators +// +// Support <,>,<=,>=,==,!= +// +//Query supporting bitwise &, |, ^, ! +//Query supporting logical &&, ||, +////////////////////////////////////////////////////////////////////////// - ////////////////////////////////////////////////////////////////////////// - // compare lattice to lattice - ////////////////////////////////////////////////////////////////////////// - template - inline Lattice LLComparison(vfunctor op,const Lattice &lhs,const Lattice &rhs) - { - Lattice ret(rhs._grid); - parallel_for(int ss=0;ssoSites(); ss++){ - ret._odata[ss]=op(lhs._odata[ss],rhs._odata[ss]); - } - return ret; +typedef iScalar vPredicate ; + +/* +template accelerator_inline +vobj predicatedWhere(const iobj &predicate, const vobj &iftrue, const robj &iffalse) +{ + typename std::remove_const::type ret; + + typedef typename vobj::scalar_object scalar_object; + typedef typename vobj::scalar_type scalar_type; + typedef typename vobj::vector_type vector_type; + + const int Nsimd = vobj::vector_type::Nsimd(); + + ExtractBuffer mask(Nsimd); + ExtractBuffer truevals(Nsimd); + ExtractBuffer falsevals(Nsimd); + + extract(iftrue, truevals); + extract(iffalse, falsevals); + extract(TensorRemove(predicate), mask); + + for (int s = 0; s < Nsimd; s++) { + if (mask[s]) falsevals[s] = truevals[s]; } - ////////////////////////////////////////////////////////////////////////// - // compare lattice to scalar - ////////////////////////////////////////////////////////////////////////// - template - inline Lattice LSComparison(vfunctor op,const Lattice &lhs,const robj &rhs) - { - Lattice ret(lhs._grid); - parallel_for(int ss=0;ssoSites(); ss++){ - ret._odata[ss]=op(lhs._odata[ss],rhs); - } - return ret; - } - ////////////////////////////////////////////////////////////////////////// - // compare scalar to lattice - ////////////////////////////////////////////////////////////////////////// - template - inline Lattice SLComparison(vfunctor op,const lobj &lhs,const Lattice &rhs) - { - Lattice ret(rhs._grid); - parallel_for(int ss=0;ssoSites(); ss++){ - ret._odata[ss]=op(lhs._odata[ss],rhs); - } - return ret; - } - - ////////////////////////////////////////////////////////////////////////// - // Map to functors - ////////////////////////////////////////////////////////////////////////// - // Less than - template - inline Lattice operator < (const Lattice & lhs, const Lattice & rhs) { - return LLComparison(vlt(),lhs,rhs); - } - template - inline Lattice operator < (const Lattice & lhs, const robj & rhs) { - return LSComparison(vlt(),lhs,rhs); - } - template - inline Lattice operator < (const lobj & lhs, const Lattice & rhs) { - return SLComparison(vlt(),lhs,rhs); - } - - // Less than equal - template - inline Lattice operator <= (const Lattice & lhs, const Lattice & rhs) { - return LLComparison(vle(),lhs,rhs); - } - template - inline Lattice operator <= (const Lattice & lhs, const robj & rhs) { - return LSComparison(vle(),lhs,rhs); - } - template - inline Lattice operator <= (const lobj & lhs, const Lattice & rhs) { - return SLComparison(vle(),lhs,rhs); - } - - // Greater than - template - inline Lattice operator > (const Lattice & lhs, const Lattice & rhs) { - return LLComparison(vgt(),lhs,rhs); - } - template - inline Lattice operator > (const Lattice & lhs, const robj & rhs) { - return LSComparison(vgt(),lhs,rhs); - } - template - inline Lattice operator > (const lobj & lhs, const Lattice & rhs) { - return SLComparison(vgt(),lhs,rhs); - } - - - // Greater than equal - template - inline Lattice operator >= (const Lattice & lhs, const Lattice & rhs) { - return LLComparison(vge(),lhs,rhs); - } - template - inline Lattice operator >= (const Lattice & lhs, const robj & rhs) { - return LSComparison(vge(),lhs,rhs); - } - template - inline Lattice operator >= (const lobj & lhs, const Lattice & rhs) { - return SLComparison(vge(),lhs,rhs); - } - - // equal - template - inline Lattice operator == (const Lattice & lhs, const Lattice & rhs) { - return LLComparison(veq(),lhs,rhs); - } - template - inline Lattice operator == (const Lattice & lhs, const robj & rhs) { - return LSComparison(veq(),lhs,rhs); - } - template - inline Lattice operator == (const lobj & lhs, const Lattice & rhs) { - return SLComparison(veq(),lhs,rhs); - } - - - // not equal - template - inline Lattice operator != (const Lattice & lhs, const Lattice & rhs) { - return LLComparison(vne(),lhs,rhs); - } - template - inline Lattice operator != (const Lattice & lhs, const robj & rhs) { - return LSComparison(vne(),lhs,rhs); - } - template - inline Lattice operator != (const lobj & lhs, const Lattice & rhs) { - return SLComparison(vne(),lhs,rhs); - } + + merge(ret, falsevals); + return ret; } +*/ +////////////////////////////////////////////////////////////////////////// +// compare lattice to lattice +////////////////////////////////////////////////////////////////////////// + +template +inline Lattice LLComparison(vfunctor op,const Lattice &lhs,const Lattice &rhs) +{ + Lattice ret(rhs.Grid()); + auto lhs_v = lhs.View(); + auto rhs_v = rhs.View(); + auto ret_v = ret.View(); + thread_for( ss, rhs_v.size(), { + ret_v[ss]=op(lhs_v[ss],rhs_v[ss]); + }); + return ret; +} +////////////////////////////////////////////////////////////////////////// +// compare lattice to scalar +////////////////////////////////////////////////////////////////////////// +template +inline Lattice LSComparison(vfunctor op,const Lattice &lhs,const robj &rhs) +{ + Lattice ret(lhs.Grid()); + auto lhs_v = lhs.View(); + auto ret_v = ret.View(); + thread_for( ss, lhs_v.size(), { + ret_v[ss]=op(lhs_v[ss],rhs); + }); + return ret; +} +////////////////////////////////////////////////////////////////////////// +// compare scalar to lattice +////////////////////////////////////////////////////////////////////////// +template +inline Lattice SLComparison(vfunctor op,const lobj &lhs,const Lattice &rhs) +{ + Lattice ret(rhs.Grid()); + auto rhs_v = rhs.View(); + auto ret_v = ret.View(); + thread_for( ss, rhs_v.size(), { + ret_v[ss]=op(lhs,rhs_v[ss]); + }); + return ret; +} + +////////////////////////////////////////////////////////////////////////// +// Map to functors +////////////////////////////////////////////////////////////////////////// +// Less than +template +inline Lattice operator < (const Lattice & lhs, const Lattice & rhs) { + return LLComparison(vlt(),lhs,rhs); +} +template +inline Lattice operator < (const Lattice & lhs, const robj & rhs) { + return LSComparison(vlt(),lhs,rhs); +} +template +inline Lattice operator < (const lobj & lhs, const Lattice & rhs) { + return SLComparison(vlt(),lhs,rhs); +} + +// Less than equal +template +inline Lattice operator <= (const Lattice & lhs, const Lattice & rhs) { + return LLComparison(vle(),lhs,rhs); +} +template +inline Lattice operator <= (const Lattice & lhs, const robj & rhs) { + return LSComparison(vle(),lhs,rhs); +} +template +inline Lattice operator <= (const lobj & lhs, const Lattice & rhs) { + return SLComparison(vle(),lhs,rhs); +} + +// Greater than +template +inline Lattice operator > (const Lattice & lhs, const Lattice & rhs) { + return LLComparison(vgt(),lhs,rhs); +} +template +inline Lattice operator > (const Lattice & lhs, const robj & rhs) { + return LSComparison(vgt(),lhs,rhs); +} +template +inline Lattice operator > (const lobj & lhs, const Lattice & rhs) { + return SLComparison(vgt(),lhs,rhs); +} + + +// Greater than equal +template +inline Lattice operator >= (const Lattice & lhs, const Lattice & rhs) { + return LLComparison(vge(),lhs,rhs); +} +template +inline Lattice operator >= (const Lattice & lhs, const robj & rhs) { + return LSComparison(vge(),lhs,rhs); +} +template +inline Lattice operator >= (const lobj & lhs, const Lattice & rhs) { + return SLComparison(vge(),lhs,rhs); +} + +// equal +template +inline Lattice operator == (const Lattice & lhs, const Lattice & rhs) { + return LLComparison(veq(),lhs,rhs); +} +template +inline Lattice operator == (const Lattice & lhs, const robj & rhs) { + return LSComparison(veq(),lhs,rhs); +} +template +inline Lattice operator == (const lobj & lhs, const Lattice & rhs) { + return SLComparison(veq(),lhs,rhs); +} + + +// not equal +template +inline Lattice operator != (const Lattice & lhs, const Lattice & rhs) { + return LLComparison(vne(),lhs,rhs); +} +template +inline Lattice operator != (const Lattice & lhs, const robj & rhs) { + return LSComparison(vne(),lhs,rhs); +} +template +inline Lattice operator != (const lobj & lhs, const Lattice & rhs) { + return SLComparison(vne(),lhs,rhs); +} +NAMESPACE_END(Grid); #endif diff --git a/Grid/lattice/Lattice_comparison_utils.h b/Grid/lattice/Lattice_comparison_utils.h index cbac20ec..431aa9e1 100644 --- a/Grid/lattice/Lattice_comparison_utils.h +++ b/Grid/lattice/Lattice_comparison_utils.h @@ -26,10 +26,10 @@ Author: Peter Boyle See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ /* END LEGAL */ -#ifndef GRID_COMPARISON_H -#define GRID_COMPARISON_H -namespace Grid { +#pragma once + +NAMESPACE_BEGIN(Grid); ///////////////////////////////////////// // This implementation is a bit poor. @@ -44,42 +44,42 @@ namespace Grid { // template class veq { public: - vInteger operator()(const lobj &lhs, const robj &rhs) + accelerator vInteger operator()(const lobj &lhs, const robj &rhs) { return (lhs) == (rhs); } }; template class vne { public: - vInteger operator()(const lobj &lhs, const robj &rhs) + accelerator vInteger operator()(const lobj &lhs, const robj &rhs) { return (lhs) != (rhs); } }; template class vlt { public: - vInteger operator()(const lobj &lhs, const robj &rhs) + accelerator vInteger operator()(const lobj &lhs, const robj &rhs) { return (lhs) < (rhs); } }; template class vle { public: - vInteger operator()(const lobj &lhs, const robj &rhs) + accelerator vInteger operator()(const lobj &lhs, const robj &rhs) { return (lhs) <= (rhs); } }; template class vgt { public: - vInteger operator()(const lobj &lhs, const robj &rhs) + accelerator vInteger operator()(const lobj &lhs, const robj &rhs) { return (lhs) > (rhs); } }; template class vge { public: - vInteger operator()(const lobj &lhs, const robj &rhs) + accelerator vInteger operator()(const lobj &lhs, const robj &rhs) { return (lhs) >= (rhs); } @@ -88,42 +88,42 @@ namespace Grid { // Generic list of functors template class seq { public: - Integer operator()(const lobj &lhs, const robj &rhs) + accelerator Integer operator()(const lobj &lhs, const robj &rhs) { return (lhs) == (rhs); } }; template class sne { public: - Integer operator()(const lobj &lhs, const robj &rhs) + accelerator Integer operator()(const lobj &lhs, const robj &rhs) { return (lhs) != (rhs); } }; template class slt { public: - Integer operator()(const lobj &lhs, const robj &rhs) + accelerator Integer operator()(const lobj &lhs, const robj &rhs) { return (lhs) < (rhs); } }; template class sle { public: - Integer operator()(const lobj &lhs, const robj &rhs) + accelerator Integer operator()(const lobj &lhs, const robj &rhs) { return (lhs) <= (rhs); } }; template class sgt { public: - Integer operator()(const lobj &lhs, const robj &rhs) + accelerator Integer operator()(const lobj &lhs, const robj &rhs) { return (lhs) > (rhs); } }; template class sge { public: - Integer operator()(const lobj &lhs, const robj &rhs) + accelerator Integer operator()(const lobj &lhs, const robj &rhs) { return (lhs) >= (rhs); } @@ -133,12 +133,12 @@ namespace Grid { // Integer and real get extra relational functions. ////////////////////////////////////////////////////////////////////////////////////////////////////// template = 0> - inline vInteger Comparison(sfunctor sop,const vsimd & lhs, const vsimd & rhs) + accelerator_inline vInteger Comparison(sfunctor sop,const vsimd & lhs, const vsimd & rhs) { typedef typename vsimd::scalar_type scalar; - std::vector vlhs(vsimd::Nsimd()); // Use functors to reduce this to single implementation - std::vector vrhs(vsimd::Nsimd()); - std::vector vpred(vsimd::Nsimd()); + ExtractBuffer vlhs(vsimd::Nsimd()); // Use functors to reduce this to single implementation + ExtractBuffer vrhs(vsimd::Nsimd()); + ExtractBuffer vpred(vsimd::Nsimd()); vInteger ret; extract(lhs,vlhs); extract(rhs,vrhs); @@ -150,11 +150,11 @@ namespace Grid { } template = 0> - inline vInteger Comparison(sfunctor sop,const vsimd & lhs, const typename vsimd::scalar_type & rhs) + accelerator_inline vInteger Comparison(sfunctor sop,const vsimd & lhs, const typename vsimd::scalar_type & rhs) { typedef typename vsimd::scalar_type scalar; - std::vector vlhs(vsimd::Nsimd()); // Use functors to reduce this to single implementation - std::vector vpred(vsimd::Nsimd()); + ExtractBuffer vlhs(vsimd::Nsimd()); // Use functors to reduce this to single implementation + ExtractBuffer vpred(vsimd::Nsimd()); vInteger ret; extract(lhs,vlhs); for(int s=0;s = 0> - inline vInteger Comparison(sfunctor sop,const typename vsimd::scalar_type & lhs, const vsimd & rhs) + accelerator_inline vInteger Comparison(sfunctor sop,const typename vsimd::scalar_type & lhs, const vsimd & rhs) { typedef typename vsimd::scalar_type scalar; - std::vector vrhs(vsimd::Nsimd()); // Use functors to reduce this to single implementation - std::vector vpred(vsimd::Nsimd()); + ExtractBuffer vrhs(vsimd::Nsimd()); // Use functors to reduce this to single implementation + ExtractBuffer vpred(vsimd::Nsimd()); vInteger ret; extract(rhs,vrhs); for(int s=0;s = 0>\ - inline vInteger operator op (const vsimd & lhs, const vsimd & rhs)\ + accelerator_inline vInteger operator op (const vsimd & lhs, const vsimd & rhs)\ {\ typedef typename vsimd::scalar_type scalar;\ return Comparison(functor(),lhs,rhs);\ }\ template = 0>\ - inline vInteger operator op (const vsimd & lhs, const typename vsimd::scalar_type & rhs) \ + accelerator_inline vInteger operator op (const vsimd & lhs, const typename vsimd::scalar_type & rhs) \ {\ typedef typename vsimd::scalar_type scalar;\ return Comparison(functor(),lhs,rhs);\ }\ template = 0>\ - inline vInteger operator op (const typename vsimd::scalar_type & lhs, const vsimd & rhs) \ + accelerator_inline vInteger operator op (const typename vsimd::scalar_type & lhs, const vsimd & rhs) \ {\ typedef typename vsimd::scalar_type scalar;\ return Comparison(functor(),lhs,rhs);\ }\ template\ - inline vInteger operator op(const iScalar &lhs,const typename vsimd::scalar_type &rhs) \ + accelerator_inline vInteger operator op(const iScalar &lhs,const typename vsimd::scalar_type &rhs) \ { \ return lhs._internal op rhs; \ } \ template\ - inline vInteger operator op(const typename vsimd::scalar_type &lhs,const iScalar &rhs) \ + accelerator_inline vInteger operator op(const typename vsimd::scalar_type &lhs,const iScalar &rhs) \ { \ return lhs op rhs._internal; \ } \ @@ -212,7 +212,7 @@ namespace Grid { #define DECLARE_RELATIONAL(op,functor) \ DECLARE_RELATIONAL_EQ(op,functor) \ template\ - inline vInteger operator op(const iScalar &lhs,const iScalar &rhs)\ + accelerator_inline vInteger operator op(const iScalar &lhs,const iScalar &rhs)\ { \ return lhs._internal op rhs._internal; \ } @@ -226,7 +226,7 @@ DECLARE_RELATIONAL(!=,sne); #undef DECLARE_RELATIONAL -} +NAMESPACE_END(Grid); + -#endif diff --git a/Grid/lattice/Lattice_conformable.h b/Grid/lattice/Lattice_conformable.h index 398b3c90..ce22685e 100644 --- a/Grid/lattice/Lattice_conformable.h +++ b/Grid/lattice/Lattice_conformable.h @@ -1,4 +1,4 @@ - /************************************************************************************* +/************************************************************************************* Grid physics library, www.github.com/paboyle/Grid @@ -23,18 +23,18 @@ Author: Peter Boyle 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ +*************************************************************************************/ +/* END LEGAL */ #ifndef GRID_LATTICE_CONFORMABLE_H #define GRID_LATTICE_CONFORMABLE_H -namespace Grid { - - template void conformable(const Lattice &lhs,const Lattice &rhs) - { - assert(lhs._grid == rhs._grid); - assert(lhs.checkerboard == rhs.checkerboard); - } +NAMESPACE_BEGIN(Grid); +template void conformable(const Lattice &lhs,const Lattice &rhs) +{ + assert(lhs.Grid() == rhs.Grid()); + assert(lhs.Checkerboard() == rhs.Checkerboard()); } + +NAMESPACE_END(Grid); #endif diff --git a/Grid/lattice/Lattice_coordinate.h b/Grid/lattice/Lattice_coordinate.h index 19eceba8..16f3641b 100644 --- a/Grid/lattice/Lattice_coordinate.h +++ b/Grid/lattice/Lattice_coordinate.h @@ -1,4 +1,4 @@ - /************************************************************************************* +/************************************************************************************* Grid physics library, www.github.com/paboyle/Grid @@ -23,34 +23,52 @@ Author: Peter Boyle 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ -#ifndef GRID_LATTICE_COORDINATE_H -#define GRID_LATTICE_COORDINATE_H +*************************************************************************************/ +/* END LEGAL */ +#pragma once -namespace Grid { +NAMESPACE_BEGIN(Grid); - template inline void LatticeCoordinate(Lattice &l,int mu) - { - typedef typename iobj::scalar_type scalar_type; - typedef typename iobj::vector_type vector_type; +template inline void LatticeCoordinate(Lattice &l,int mu) +{ + typedef typename iobj::scalar_type scalar_type; + typedef typename iobj::vector_type vector_type; - GridBase *grid = l._grid; - int Nsimd = grid->iSites(); + GridBase *grid = l.Grid(); + int Nsimd = grid->iSites(); - std::vector gcoor; - std::vector mergebuf(Nsimd); + Coordinate gcoor; + ExtractBuffer mergebuf(Nsimd); - vector_type vI; - for(int o=0;ooSites();o++){ - for(int i=0;iiSites();i++){ - grid->RankIndexToGlobalCoor(grid->ThisRank(),o,i,gcoor); - mergebuf[i]=(Integer)gcoor[mu]; - } - merge(vI,mergebuf); - l._odata[o]=vI; + vector_type vI; + auto l_v = l.View(); + for(int o=0;ooSites();o++){ + for(int i=0;iiSites();i++){ + grid->RankIndexToGlobalCoor(grid->ThisRank(),o,i,gcoor); + mergebuf[i]=(Integer)gcoor[mu]; + } + merge(vI,mergebuf); + l_v[o]=vI; + } +}; + +// LatticeCoordinate(); +// FIXME for debug; deprecate this; made obscelete by +template void lex_sites(Lattice &l){ + auto l_v = l.View(); + Real *v_ptr = (Real *)&l_v[0]; + size_t o_len = l.Grid()->oSites(); + size_t v_len = sizeof(vobj)/sizeof(vRealF); + size_t vec_len = vRealF::Nsimd(); + + for(int i=0;i 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ +*************************************************************************************/ +/* END LEGAL */ #ifndef GRID_LATTICE_LOCALREDUCTION_H #define GRID_LATTICE_LOCALREDUCTION_H @@ -32,44 +32,56 @@ Author: Peter Boyle // localInner, localNorm, outerProduct /////////////////////////////////////////////// -namespace Grid { +NAMESPACE_BEGIN(Grid); - ///////////////////////////////////////////////////// - // Non site, reduced locally reduced routines - ///////////////////////////////////////////////////// +///////////////////////////////////////////////////// +// Non site, reduced locally reduced routines +///////////////////////////////////////////////////// - // localNorm2, - template - inline auto localNorm2 (const Lattice &rhs)-> Lattice - { - Lattice ret(rhs._grid); - parallel_for(int ss=0;ssoSites(); ss++){ - ret._odata[ss]=innerProduct(rhs._odata[ss],rhs._odata[ss]); - } - return ret; - } - - // localInnerProduct - template - inline auto localInnerProduct (const Lattice &lhs,const Lattice &rhs) -> Lattice - { - Lattice ret(rhs._grid); - parallel_for(int ss=0;ssoSites(); ss++){ - ret._odata[ss]=innerProduct(lhs._odata[ss],rhs._odata[ss]); - } - return ret; - } - - // outerProduct Scalar x Scalar -> Scalar - // Vector x Vector -> Matrix - template - inline auto outerProduct (const Lattice &lhs,const Lattice &rhs) -> Lattice - { - Lattice ret(rhs._grid); - parallel_for(int ss=0;ssoSites(); ss++){ - ret._odata[ss]=outerProduct(lhs._odata[ss],rhs._odata[ss]); - } - return ret; - } +// localNorm2, +template +inline auto localNorm2 (const Lattice &rhs)-> Lattice +{ + Lattice ret(rhs.Grid()); + auto rhs_v = rhs.View(); + auto ret_v = ret.View(); + accelerator_for(ss,rhs_v.size(),vobj::Nsimd(),{ + coalescedWrite(ret_v[ss],innerProduct(rhs_v(ss),rhs_v(ss))); + }); + return ret; } + +// localInnerProduct +template +inline auto localInnerProduct (const Lattice &lhs,const Lattice &rhs) -> Lattice +{ + Lattice ret(rhs.Grid()); + auto lhs_v = lhs.View(); + auto rhs_v = rhs.View(); + auto ret_v = ret.View(); + accelerator_for(ss,rhs_v.size(),vobj::Nsimd(),{ + coalescedWrite(ret_v[ss],innerProduct(lhs_v(ss),rhs_v(ss))); + }); + return ret; +} + +// outerProduct Scalar x Scalar -> Scalar +// Vector x Vector -> Matrix +template +inline auto outerProduct (const Lattice &lhs,const Lattice &rhs) -> Lattice +{ + typedef decltype(coalescedRead(ll())) sll; + typedef decltype(coalescedRead(rr())) srr; + Lattice ret(rhs.Grid()); + auto lhs_v = lhs.View(); + auto rhs_v = rhs.View(); + auto ret_v = ret.View(); + accelerator_for(ss,rhs_v.size(),1,{ + // FIXME had issues with scalar version of outer + // Use vector [] operator and don't read coalesce this loop + ret_v[ss]=outerProduct(lhs_v[ss],rhs_v[ss]); + }); + return ret; +} +NAMESPACE_END(Grid); #endif diff --git a/Grid/lattice/Lattice_matrix_reduction.h b/Grid/lattice/Lattice_matrix_reduction.h new file mode 100644 index 00000000..0980ad8a --- /dev/null +++ b/Grid/lattice/Lattice_matrix_reduction.h @@ -0,0 +1,202 @@ +/************************************************************************************* + Grid physics library, www.github.com/paboyle/Grid + Source file: ./lib/lattice/Lattice_reduction.h + Copyright (C) 2015 +Author: Azusa Yamaguchi +Author: Peter Boyle +Author: paboyle + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ +#pragma once +#include + +#ifdef GRID_WARN_SUBOPTIMAL +#warning "Optimisation alert all these reduction loops are NOT threaded " +#endif + +NAMESPACE_BEGIN(Grid); + +template +static void sliceMaddMatrix (Lattice &R,Eigen::MatrixXcd &aa,const Lattice &X,const Lattice &Y,int Orthog,RealD scale=1.0) +{ + typedef typename vobj::scalar_object sobj; + typedef typename vobj::scalar_type scalar_type; + typedef typename vobj::vector_type vector_type; + + int Nblock = X.Grid()->GlobalDimensions()[Orthog]; + + GridBase *FullGrid = X.Grid(); + // GridBase *SliceGrid = makeSubSliceGrid(FullGrid,Orthog); + + // Lattice Xslice(SliceGrid); + // Lattice Rslice(SliceGrid); + + assert( FullGrid->_simd_layout[Orthog]==1); + + //FIXME package in a convenient iterator + //Should loop over a plane orthogonal to direction "Orthog" + int stride=FullGrid->_slice_stride[Orthog]; + int block =FullGrid->_slice_block [Orthog]; + int nblock=FullGrid->_slice_nblock[Orthog]; + int ostride=FullGrid->_ostride[Orthog]; + auto X_v = X.View(); + auto Y_v = Y.View(); + auto R_v = R.View(); + thread_region + { + std::vector s_x(Nblock); + + thread_loop_collapse2( (int n=0;n +static void sliceMulMatrix (Lattice &R,Eigen::MatrixXcd &aa,const Lattice &X,int Orthog,RealD scale=1.0) +{ + typedef typename vobj::scalar_object sobj; + typedef typename vobj::scalar_type scalar_type; + typedef typename vobj::vector_type vector_type; + + int Nblock = X.Grid()->GlobalDimensions()[Orthog]; + + GridBase *FullGrid = X.Grid(); + assert( FullGrid->_simd_layout[Orthog]==1); + + //FIXME package in a convenient iterator + //Should loop over a plane orthogonal to direction "Orthog" + int stride=FullGrid->_slice_stride[Orthog]; + int block =FullGrid->_slice_block [Orthog]; + int nblock=FullGrid->_slice_nblock[Orthog]; + int ostride=FullGrid->_ostride[Orthog]; + + auto X_v = X.View(); + auto R_v = R.View(); + + thread_region + { + std::vector s_x(Nblock); + + thread_loop_collapse2( (int n=0;n +static void sliceInnerProductMatrix( Eigen::MatrixXcd &mat, const Lattice &lhs,const Lattice &rhs,int Orthog) +{ + typedef typename vobj::scalar_object sobj; + typedef typename vobj::scalar_type scalar_type; + typedef typename vobj::vector_type vector_type; + + GridBase *FullGrid = lhs.Grid(); + // GridBase *SliceGrid = makeSubSliceGrid(FullGrid,Orthog); + + int Nblock = FullGrid->GlobalDimensions()[Orthog]; + + // Lattice Lslice(SliceGrid); + // Lattice Rslice(SliceGrid); + + mat = Eigen::MatrixXcd::Zero(Nblock,Nblock); + + assert( FullGrid->_simd_layout[Orthog]==1); + // int nh = FullGrid->_ndimension; + // int nl = SliceGrid->_ndimension; + // int nl = nh-1; + + //FIXME package in a convenient iterator + //Should loop over a plane orthogonal to direction "Orthog" + int stride=FullGrid->_slice_stride[Orthog]; + int block =FullGrid->_slice_block [Orthog]; + int nblock=FullGrid->_slice_nblock[Orthog]; + int ostride=FullGrid->_ostride[Orthog]; + + typedef typename vobj::vector_typeD vector_typeD; + auto lhs_v = lhs.View(); + auto rhs_v = rhs.View(); + thread_region { + std::vector Left(Nblock); + std::vector Right(Nblock); + Eigen::MatrixXcd mat_thread = Eigen::MatrixXcd::Zero(Nblock,Nblock); + + thread_loop_collapse2((int n=0;n(real(z),imag(z)); + }} + }}); + thread_critical { + mat += mat_thread; + } + } + + for(int i=0;iGlobalSum(sum); + mat(i,j)=sum; + }} + + return; +} + +NAMESPACE_END(Grid); + + + diff --git a/Grid/lattice/Lattice_overload.h b/Grid/lattice/Lattice_overload.h deleted file mode 100644 index 0906b610..00000000 --- a/Grid/lattice/Lattice_overload.h +++ /dev/null @@ -1,138 +0,0 @@ - /************************************************************************************* - - Grid physics library, www.github.com/paboyle/Grid - - Source file: ./lib/lattice/Lattice_overload.h - - Copyright (C) 2015 - -Author: Peter Boyle - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - - See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ -#ifndef GRID_LATTICE_OVERLOAD_H -#define GRID_LATTICE_OVERLOAD_H - -namespace Grid { - - ////////////////////////////////////////////////////////////////////////////////////////////////////// - // unary negation - ////////////////////////////////////////////////////////////////////////////////////////////////////// - template - inline Lattice operator -(const Lattice &r) - { - Lattice ret(r._grid); - parallel_for(int ss=0;ssoSites();ss++){ - vstream(ret._odata[ss], -r._odata[ss]); - } - return ret; - } - ///////////////////////////////////////////////////////////////////////////////////// - // Lattice BinOp Lattice, - //NB mult performs conformable check. Do not reapply here for performance. - ///////////////////////////////////////////////////////////////////////////////////// - template - inline auto operator * (const Lattice &lhs,const Lattice &rhs)-> Lattice - { - Lattice ret(rhs._grid); - mult(ret,lhs,rhs); - return ret; - } - template - inline auto operator + (const Lattice &lhs,const Lattice &rhs)-> Lattice - { - Lattice ret(rhs._grid); - add(ret,lhs,rhs); - return ret; - } - template - inline auto operator - (const Lattice &lhs,const Lattice &rhs)-> Lattice - { - Lattice ret(rhs._grid); - sub(ret,lhs,rhs); - return ret; - } - - // Scalar BinOp Lattice ;generate return type - template - inline auto operator * (const left &lhs,const Lattice &rhs) -> Lattice - { - Lattice ret(rhs._grid); - parallel_for(int ss=0;ssoSites(); ss++){ - decltype(lhs*rhs._odata[0]) tmp=lhs*rhs._odata[ss]; - vstream(ret._odata[ss],tmp); - // ret._odata[ss]=lhs*rhs._odata[ss]; - } - return ret; - } - template - inline auto operator + (const left &lhs,const Lattice &rhs) -> Lattice - { - Lattice ret(rhs._grid); - parallel_for(int ss=0;ssoSites(); ss++){ - decltype(lhs+rhs._odata[0]) tmp =lhs-rhs._odata[ss]; - vstream(ret._odata[ss],tmp); - // ret._odata[ss]=lhs+rhs._odata[ss]; - } - return ret; - } - template - inline auto operator - (const left &lhs,const Lattice &rhs) -> Lattice - { - Lattice ret(rhs._grid); - parallel_for(int ss=0;ssoSites(); ss++){ - decltype(lhs-rhs._odata[0]) tmp=lhs-rhs._odata[ss]; - vstream(ret._odata[ss],tmp); - } - return ret; - } - template - inline auto operator * (const Lattice &lhs,const right &rhs) -> Lattice - { - Lattice ret(lhs._grid); - parallel_for(int ss=0;ssoSites(); ss++){ - decltype(lhs._odata[0]*rhs) tmp =lhs._odata[ss]*rhs; - vstream(ret._odata[ss],tmp); - // ret._odata[ss]=lhs._odata[ss]*rhs; - } - return ret; - } - template - inline auto operator + (const Lattice &lhs,const right &rhs) -> Lattice - { - Lattice ret(lhs._grid); - parallel_for(int ss=0;ssoSites(); ss++){ - decltype(lhs._odata[0]+rhs) tmp=lhs._odata[ss]+rhs; - vstream(ret._odata[ss],tmp); - // ret._odata[ss]=lhs._odata[ss]+rhs; - } - return ret; - } - template - inline auto operator - (const Lattice &lhs,const right &rhs) -> Lattice - { - Lattice ret(lhs._grid); - parallel_for(int ss=0;ssoSites(); ss++){ - decltype(lhs._odata[0]-rhs) tmp=lhs._odata[ss]-rhs; - vstream(ret._odata[ss],tmp); - // ret._odata[ss]=lhs._odata[ss]-rhs; - } - return ret; - } -} -#endif diff --git a/Grid/lattice/Lattice_peekpoke.h b/Grid/lattice/Lattice_peekpoke.h index 3d6268d2..c3e7e931 100644 --- a/Grid/lattice/Lattice_peekpoke.h +++ b/Grid/lattice/Lattice_peekpoke.h @@ -1,4 +1,4 @@ - /************************************************************************************* +/************************************************************************************* Grid physics library, www.github.com/paboyle/Grid @@ -25,8 +25,8 @@ Author: Peter Boyle 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ +*************************************************************************************/ +/* END LEGAL */ #ifndef GRID_LATTICE_PEEK_H #define GRID_LATTICE_PEEK_H @@ -34,172 +34,184 @@ Author: Peter Boyle // Peeking and poking around /////////////////////////////////////////////// -namespace Grid { - - //////////////////////////////////////////////////////////////////////////////////////////////////// - // Peek internal indices of a Lattice object - //////////////////////////////////////////////////////////////////////////////////////////////////// - template - auto PeekIndex(const Lattice &lhs,int i) -> Lattice(lhs._odata[0],i))> - { - Lattice(lhs._odata[0],i))> ret(lhs._grid); - ret.checkerboard=lhs.checkerboard; - parallel_for(int ss=0;ssoSites();ss++){ - ret._odata[ss] = peekIndex(lhs._odata[ss],i); - } - return ret; - }; - template - auto PeekIndex(const Lattice &lhs,int i,int j) -> Lattice(lhs._odata[0],i,j))> - { - Lattice(lhs._odata[0],i,j))> ret(lhs._grid); - ret.checkerboard=lhs.checkerboard; - parallel_for(int ss=0;ssoSites();ss++){ - ret._odata[ss] = peekIndex(lhs._odata[ss],i,j); - } - return ret; - }; - - //////////////////////////////////////////////////////////////////////////////////////////////////// - // Poke internal indices of a Lattice object - //////////////////////////////////////////////////////////////////////////////////////////////////// - template - void PokeIndex(Lattice &lhs,const Lattice(lhs._odata[0],0))> & rhs,int i) - { - parallel_for(int ss=0;ssoSites();ss++){ - pokeIndex(lhs._odata[ss],rhs._odata[ss],i); - } - } - template - void PokeIndex(Lattice &lhs,const Lattice(lhs._odata[0],0,0))> & rhs,int i,int j) - { - parallel_for(int ss=0;ssoSites();ss++){ - pokeIndex(lhs._odata[ss],rhs._odata[ss],i,j); - } - } - - ////////////////////////////////////////////////////// - // Poke a scalar object into the SIMD array - ////////////////////////////////////////////////////// - template - void pokeSite(const sobj &s,Lattice &l,const std::vector &site){ - - GridBase *grid=l._grid; - - typedef typename vobj::scalar_type scalar_type; - typedef typename vobj::vector_type vector_type; - - int Nsimd = grid->Nsimd(); - - assert( l.checkerboard== l._grid->CheckerBoard(site)); - assert( sizeof(sobj)*Nsimd == sizeof(vobj)); - - int rank,odx,idx; - // Optional to broadcast from node 0. - grid->GlobalCoorToRankIndex(rank,odx,idx,site); - grid->Broadcast(grid->BossRank(),s); - - std::vector buf(Nsimd); - - // extract-modify-merge cycle is easiest way and this is not perf critical - if ( rank == grid->ThisRank() ) { - extract(l._odata[odx],buf); - buf[idx] = s; - merge(l._odata[odx],buf); - } - - return; - }; +NAMESPACE_BEGIN(Grid); - ////////////////////////////////////////////////////////// - // Peek a scalar object from the SIMD array - ////////////////////////////////////////////////////////// - template - void peekSite(sobj &s,const Lattice &l,const std::vector &site){ - - GridBase *grid=l._grid; - - typedef typename vobj::scalar_type scalar_type; - typedef typename vobj::vector_type vector_type; - - int Nsimd = grid->Nsimd(); - - assert( l.checkerboard == l._grid->CheckerBoard(site)); - - int rank,odx,idx; - grid->GlobalCoorToRankIndex(rank,odx,idx,site); - - std::vector buf(Nsimd); - extract(l._odata[odx],buf); - - s = buf[idx]; - - grid->Broadcast(rank,s); - - return; - }; - - - ////////////////////////////////////////////////////////// - // Peek a scalar object from the SIMD array - ////////////////////////////////////////////////////////// - template - void peekLocalSite(sobj &s,const Lattice &l,std::vector &site){ - - GridBase *grid = l._grid; - - typedef typename vobj::scalar_type scalar_type; - typedef typename vobj::vector_type vector_type; - - int Nsimd = grid->Nsimd(); - - assert( l.checkerboard== l._grid->CheckerBoard(site)); - assert( sizeof(sobj)*Nsimd == sizeof(vobj)); - - static const int words=sizeof(vobj)/sizeof(vector_type); - int odx,idx; - idx= grid->iIndex(site); - odx= grid->oIndex(site); - - scalar_type * vp = (scalar_type *)&l._odata[odx]; - scalar_type * pt = (scalar_type *)&s; - - for(int w=0;w - void pokeLocalSite(const sobj &s,Lattice &l,std::vector &site){ - - GridBase *grid=l._grid; - - typedef typename vobj::scalar_type scalar_type; - typedef typename vobj::vector_type vector_type; - - int Nsimd = grid->Nsimd(); - - assert( l.checkerboard== l._grid->CheckerBoard(site)); - assert( sizeof(sobj)*Nsimd == sizeof(vobj)); - - static const int words=sizeof(vobj)/sizeof(vector_type); - int odx,idx; - idx= grid->iIndex(site); - odx= grid->oIndex(site); - - scalar_type * vp = (scalar_type *)&l._odata[odx]; - scalar_type * pt = (scalar_type *)&s; - - for(int w=0;w +auto PeekIndex(const Lattice &lhs,int i) -> Lattice(vobj(),i))> +{ + Lattice(vobj(),i))> ret(lhs.Grid()); + ret.Checkerboard()=lhs.Checkerboard(); + auto ret_v = ret.View(); + auto lhs_v = lhs.View(); + thread_for( ss, lhs_v.size(), { + ret_v[ss] = peekIndex(lhs_v[ss],i); + }); + return ret; +}; +template +auto PeekIndex(const Lattice &lhs,int i,int j) -> Lattice(vobj(),i,j))> +{ + Lattice(vobj(),i,j))> ret(lhs.Grid()); + ret.Checkerboard()=lhs.Checkerboard(); + auto ret_v = ret.View(); + auto lhs_v = lhs.View(); + thread_for( ss, lhs_v.size(), { + ret_v[ss] = peekIndex(lhs_v[ss],i,j); + }); + return ret; +}; +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Poke internal indices of a Lattice object +//////////////////////////////////////////////////////////////////////////////////////////////////// +template +void PokeIndex(Lattice &lhs,const Lattice(vobj(),0))> & rhs,int i) +{ + auto rhs_v = rhs.View(); + auto lhs_v = lhs.View(); + thread_for( ss, lhs_v.size(), { + pokeIndex(lhs_v[ss],rhs_v[ss],i); + }); } +template +void PokeIndex(Lattice &lhs,const Lattice(vobj(),0,0))> & rhs,int i,int j) +{ + auto rhs_v = rhs.View(); + auto lhs_v = lhs.View(); + thread_for( ss, lhs_v.size(), { + pokeIndex(lhs_v[ss],rhs_v[ss],i,j); + }); +} + +////////////////////////////////////////////////////// +// Poke a scalar object into the SIMD array +////////////////////////////////////////////////////// +template +void pokeSite(const sobj &s,Lattice &l,const Coordinate &site){ + + GridBase *grid=l.Grid(); + + typedef typename vobj::scalar_type scalar_type; + typedef typename vobj::vector_type vector_type; + + int Nsimd = grid->Nsimd(); + + assert( l.Checkerboard()== l.Grid()->CheckerBoard(site)); + assert( sizeof(sobj)*Nsimd == sizeof(vobj)); + + int rank,odx,idx; + // Optional to broadcast from node 0. + grid->GlobalCoorToRankIndex(rank,odx,idx,site); + grid->Broadcast(grid->BossRank(),s); + + // extract-modify-merge cycle is easiest way and this is not perf critical + ExtractBuffer buf(Nsimd); + auto l_v = l.View(); + if ( rank == grid->ThisRank() ) { + extract(l_v[odx],buf); + buf[idx] = s; + merge(l_v[odx],buf); + } + + return; +}; + + +////////////////////////////////////////////////////////// +// Peek a scalar object from the SIMD array +////////////////////////////////////////////////////////// +template +void peekSite(sobj &s,const Lattice &l,const Coordinate &site){ + + GridBase *grid=l.Grid(); + + typedef typename vobj::scalar_type scalar_type; + typedef typename vobj::vector_type vector_type; + + int Nsimd = grid->Nsimd(); + + assert( l.Checkerboard() == l.Grid()->CheckerBoard(site)); + + int rank,odx,idx; + grid->GlobalCoorToRankIndex(rank,odx,idx,site); + + ExtractBuffer buf(Nsimd); + auto l_v = l.View(); + extract(l_v[odx],buf); + + s = buf[idx]; + + grid->Broadcast(rank,s); + + return; +}; + + +////////////////////////////////////////////////////////// +// Peek a scalar object from the SIMD array +////////////////////////////////////////////////////////// +template +void peekLocalSite(sobj &s,const Lattice &l,Coordinate &site){ + + GridBase *grid = l.Grid(); + + typedef typename vobj::scalar_type scalar_type; + typedef typename vobj::vector_type vector_type; + + int Nsimd = grid->Nsimd(); + + assert( l.Checkerboard()== l.Grid()->CheckerBoard(site)); + assert( sizeof(sobj)*Nsimd == sizeof(vobj)); + + static const int words=sizeof(vobj)/sizeof(vector_type); + int odx,idx; + idx= grid->iIndex(site); + odx= grid->oIndex(site); + + auto l_v = l.View(); + scalar_type * vp = (scalar_type *)&l_v[odx]; + scalar_type * pt = (scalar_type *)&s; + + for(int w=0;w +void pokeLocalSite(const sobj &s,Lattice &l,Coordinate &site){ + + GridBase *grid=l.Grid(); + + typedef typename vobj::scalar_type scalar_type; + typedef typename vobj::vector_type vector_type; + + int Nsimd = grid->Nsimd(); + + assert( l.Checkerboard()== l.Grid()->CheckerBoard(site)); + assert( sizeof(sobj)*Nsimd == sizeof(vobj)); + + static const int words=sizeof(vobj)/sizeof(vector_type); + int odx,idx; + idx= grid->iIndex(site); + odx= grid->oIndex(site); + + auto l_v = l.View(); + scalar_type * vp = (scalar_type *)&l_v[odx]; + scalar_type * pt = (scalar_type *)&s; + for(int w=0;w 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ +*************************************************************************************/ +/* END LEGAL */ #ifndef GRID_LATTICE_REALITY_H #define GRID_LATTICE_REALITY_H @@ -36,22 +36,28 @@ Author: neo // The choice of burying complex in the SIMD // is making the use of "real" and "imag" very cumbersome -namespace Grid { +NAMESPACE_BEGIN(Grid); - template inline Lattice adj(const Lattice &lhs){ - Lattice ret(lhs._grid); - parallel_for(int ss=0;ssoSites();ss++){ - ret._odata[ss] = adj(lhs._odata[ss]); - } - return ret; - }; +template inline Lattice adj(const Lattice &lhs){ + Lattice ret(lhs.Grid()); + auto lhs_v = lhs.View(); + auto ret_v = ret.View(); + accelerator_for( ss, lhs_v.size(), vobj::Nsimd(), { + coalescedWrite(ret_v[ss], adj(lhs_v(ss))); + }); + return ret; +}; + +template inline Lattice conjugate(const Lattice &lhs){ + Lattice ret(lhs.Grid()); + auto lhs_v = lhs.View(); + auto ret_v = ret.View(); + accelerator_for( ss, lhs_v.size(), vobj::Nsimd(), { + coalescedWrite( ret_v[ss] , conjugate(lhs_v(ss))); + }); + return ret; +}; + +NAMESPACE_END(Grid); - template inline Lattice conjugate(const Lattice &lhs){ - Lattice ret(lhs._grid); - parallel_for(int ss=0;ssoSites();ss++){ - ret._odata[ss] = conjugate(lhs._odata[ss]); - } - return ret; - }; -} #endif diff --git a/Grid/lattice/Lattice_reduction.h b/Grid/lattice/Lattice_reduction.h index 41351c98..3a076038 100644 --- a/Grid/lattice/Lattice_reduction.h +++ b/Grid/lattice/Lattice_reduction.h @@ -19,22 +19,76 @@ Author: paboyle See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ /* END LEGAL */ -#ifndef GRID_LATTICE_REDUCTION_H -#define GRID_LATTICE_REDUCTION_H +#pragma once #include -namespace Grid { -#ifdef GRID_WARN_SUBOPTIMAL -#warning "Optimisation alert all these reduction loops are NOT threaded " -#endif - //////////////////////////////////////////////////////////////////////////////////////////////////// - // Deterministic Reduction operations - //////////////////////////////////////////////////////////////////////////////////////////////////// +#ifdef GRID_NVCC +#include +#endif + +NAMESPACE_BEGIN(Grid); + +////////////////////////////////////////////////////// +// FIXME this should promote to double and accumulate +////////////////////////////////////////////////////// +template +inline typename vobj::scalar_object sum_cpu(const vobj *arg, Integer osites) +{ + typedef typename vobj::scalar_object sobj; + + const int Nsimd = vobj::Nsimd(); + const int nthread = GridThread::GetThreads(); + + Vector sumarray(nthread); + for(int i=0;i +inline typename vobj::scalar_object sum(const vobj *arg, Integer osites) +{ +#ifdef GRID_NVCC + return sum_gpu(arg,osites); +#else + return sum_cpu(arg,osites); +#endif +} +template +inline typename vobj::scalar_object sum(const Lattice &arg) +{ + auto arg_v = arg.View(); + Integer osites = arg.Grid()->oSites(); + auto ssum= sum(&arg_v[0],osites); + arg.Grid()->GlobalSum(ssum); + return ssum; +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Deterministic Reduction operations +//////////////////////////////////////////////////////////////////////////////////////////////////// template inline RealD norm2(const Lattice &arg){ - auto nrm = innerProduct(arg,arg); - return std::real(nrm); + ComplexD nrm = innerProduct(arg,arg); + return real(nrm); } // Double inner product @@ -43,32 +97,49 @@ inline ComplexD innerProduct(const Lattice &left,const Lattice &righ { typedef typename vobj::scalar_type scalar_type; typedef typename vobj::vector_typeD vector_type; - GridBase *grid = left._grid; - const int pad = 8; - - ComplexD inner; - Vector sumarray(grid->SumArraySize()*pad); - - parallel_for(int thr=0;thrSumArraySize();thr++){ - int nwork, mywork, myoff; - GridThread::GetWork(left._grid->oSites(),thr,mywork,myoff); - - decltype(innerProductD(left._odata[0],right._odata[0])) vinner=zero; // private to thread; sub summation - for(int ss=myoff;ssSumArraySize();i++){ - inner = inner+sumarray[i*pad]; - } - right._grid->GlobalSum(inner); - return inner; + GridBase *grid = left.Grid(); + + // Might make all code paths go this way. + auto left_v = left.View(); + auto right_v=right.View(); + + const uint64_t nsimd = grid->Nsimd(); + const uint64_t sites = grid->oSites(); + +#ifdef GRID_NVCC + // GPU - SIMT lane compliance... + typedef decltype(innerProduct(left_v[0],right_v[0])) inner_t; + Vector inner_tmp(sites); + auto inner_tmp_v = &inner_tmp[0]; + + + accelerator_for( ss, sites, nsimd,{ + auto x_l = left_v(ss); + auto y_l = right_v(ss); + coalescedWrite(inner_tmp_v[ss],innerProduct(x_l,y_l)); + }) + + // This is in single precision and fails some tests + // Need a sumD that sums in double + nrm = TensorRemove(sumD_gpu(inner_tmp_v,sites)); +#else + // CPU + typedef decltype(innerProductD(left_v[0],right_v[0])) inner_t; + Vector inner_tmp(sites); + auto inner_tmp_v = &inner_tmp[0]; + + accelerator_for( ss, sites, nsimd,{ + auto x_l = left_v[ss]; + auto y_l = right_v[ss]; + inner_tmp_v[ss]=innerProductD(x_l,y_l); + }) + nrm = TensorRemove(sum(inner_tmp_v,sites)); +#endif + grid->GlobalSum(nrm); + + return nrm; } ///////////////////////// @@ -86,8 +157,7 @@ axpy_norm_fast(Lattice &z,sobj a,const Lattice &x,const Lattice strong_inline RealD axpby_norm_fast(Lattice &z,sobj a,sobj b,const Lattice &x,const Lattice &y) { - const int pad = 8; - z.checkerboard = x.checkerboard; + z.Checkerboard() = x.Checkerboard(); conformable(z,x); conformable(x,y); @@ -95,43 +165,57 @@ axpby_norm_fast(Lattice &z,sobj a,sobj b,const Lattice &x,const Latt typedef typename vobj::vector_typeD vector_type; RealD nrm; - GridBase *grid = x._grid; + GridBase *grid = x.Grid(); + + auto x_v=x.View(); + auto y_v=y.View(); + auto z_v=z.View(); + + const uint64_t nsimd = grid->Nsimd(); + const uint64_t sites = grid->oSites(); - Vector sumarray(grid->SumArraySize()*pad); +#ifdef GRID_NVCC + // GPU + typedef decltype(innerProduct(x_v[0],y_v[0])) inner_t; + Vector inner_tmp(sites); + auto inner_tmp_v = &inner_tmp[0]; + + accelerator_for( ss, sites, nsimd,{ + auto tmp = a*x_v(ss)+b*y_v(ss); + coalescedWrite(inner_tmp_v[ss],innerProduct(tmp,tmp)); + coalescedWrite(z_v[ss],tmp); + }); + + nrm = real(TensorRemove(sumD_gpu(inner_tmp_v,sites))); +#else + // CPU + typedef decltype(innerProductD(x_v[0],y_v[0])) inner_t; + Vector inner_tmp(sites); + auto inner_tmp_v = &inner_tmp[0]; - parallel_for(int thr=0;thrSumArraySize();thr++){ - int nwork, mywork, myoff; - GridThread::GetWork(x._grid->oSites(),thr,mywork,myoff); - - // private to thread; sub summation - decltype(innerProductD(z._odata[0],z._odata[0])) vnrm=zero; - for(int ss=myoff;ssSumArraySize();i++){ - nrm = nrm+sumarray[i*pad]; - } - z._grid->GlobalSum(nrm); + accelerator_for( ss, sites, nsimd,{ + auto tmp = a*x_v(ss)+b*y_v(ss); + inner_tmp_v[ss]=innerProductD(tmp,tmp); + z_v[ss]=tmp; + }); + // Already promoted to double + nrm = real(TensorRemove(sum(inner_tmp_v,sites))); +#endif + grid->GlobalSum(nrm); return nrm; } template inline auto sum(const LatticeUnaryExpression & expr) - ->typename decltype(expr.first.func(eval(0,std::get<0>(expr.second))))::scalar_object + ->typename decltype(expr.op.func(eval(0,expr.arg1)))::scalar_object { return sum(closure(expr)); } template inline auto sum(const LatticeBinaryExpression & expr) - ->typename decltype(expr.first.func(eval(0,std::get<0>(expr.second)),eval(0,std::get<1>(expr.second))))::scalar_object + ->typename decltype(expr.op.func(eval(0,expr.arg1),eval(0,expr.arg2)))::scalar_object { return sum(closure(expr)); } @@ -139,54 +223,14 @@ inline auto sum(const LatticeBinaryExpression & expr) template inline auto sum(const LatticeTrinaryExpression & expr) - ->typename decltype(expr.first.func(eval(0,std::get<0>(expr.second)), - eval(0,std::get<1>(expr.second)), - eval(0,std::get<2>(expr.second)) + ->typename decltype(expr.op.func(eval(0,expr.arg1), + eval(0,expr.arg2), + eval(0,expr.arg3) ))::scalar_object { return sum(closure(expr)); } -template -inline typename vobj::scalar_object sum(const Lattice &arg) -{ - GridBase *grid=arg._grid; - int Nsimd = grid->Nsimd(); - - std::vector > sumarray(grid->SumArraySize()); - for(int i=0;iSumArraySize();i++){ - sumarray[i]=zero; - } - - parallel_for(int thr=0;thrSumArraySize();thr++){ - int nwork, mywork, myoff; - GridThread::GetWork(grid->oSites(),thr,mywork,myoff); - - vobj vvsum=zero; - for(int ss=myoff;ssSumArraySize();i++){ - vsum = vsum+sumarray[i]; - } - - typedef typename vobj::scalar_object sobj; - sobj ssum=zero; - - std::vector buf(Nsimd); - extract(vsum,buf); - - for(int i=0;iGlobalSum(ssum); - - return ssum; -} - - ////////////////////////////////////////////////////////////////////////////////////////////////////////////// // sliceSum, sliceInnerProduct, sliceAxpy, sliceNorm etc... ////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -199,7 +243,7 @@ template inline void sliceSum(const Lattice &Data,std::vector< // But easily avoided by using double precision fields /////////////////////////////////////////////////////// typedef typename vobj::scalar_object sobj; - GridBase *grid = Data._grid; + GridBase *grid = Data.Grid(); assert(grid!=NULL); const int Nd = grid->_ndimension; @@ -212,13 +256,13 @@ template inline void sliceSum(const Lattice &Data,std::vector< int ld=grid->_ldimensions[orthogdim]; int rd=grid->_rdimensions[orthogdim]; - std::vector > lvSum(rd); // will locally sum vectors first - std::vector lsSum(ld,zero); // sum across these down to scalars - std::vector extracted(Nsimd); // splitting the SIMD + Vector lvSum(rd); // will locally sum vectors first + Vector lsSum(ld,Zero()); // sum across these down to scalars + ExtractBuffer extracted(Nsimd); // splitting the SIMD result.resize(fd); // And then global sum to return the same vector to every node for(int r=0;r_slice_nblock[orthogdim]; @@ -227,20 +271,19 @@ template inline void sliceSum(const Lattice &Data,std::vector< // sum over reduced dimension planes, breaking out orthog dir // Parallel over orthog direction - parallel_for(int r=0;r_ostride[orthogdim]; // base offset for start of plane - for(int n=0;n icoor(Nd); + Coordinate icoor(Nd); for(int rt=0;rt inline void sliceSum(const Lattice &Data,std::vector< if ( pt == grid->_processor_coor[orthogdim] ) { gsum=lsSum[lt]; } else { - gsum=zero; + gsum=Zero(); } grid->GlobalSum(gsum); @@ -292,9 +335,9 @@ static void localSliceInnerProductVector(std::vector &result, const La // std::cout << GridLogMessage << "Start prep" << std::endl; typedef typename vobj::vector_type vector_type; typedef typename vobj::scalar_type scalar_type; - GridBase *grid = lhs._grid; + GridBase *grid = lhs.Grid(); assert(grid!=NULL); - conformable(grid,rhs._grid); + conformable(grid,rhs.Grid()); const int Nd = grid->_ndimension; const int Nsimd = grid->Nsimd(); @@ -307,14 +350,14 @@ static void localSliceInnerProductVector(std::vector &result, const La int rd=grid->_rdimensions[orthogdim]; // std::cout << GridLogMessage << "Start alloc" << std::endl; - std::vector > lvSum(rd); // will locally sum vectors first + Vector lvSum(rd); // will locally sum vectors first lsSum.resize(ld,scalar_type(0.0)); // sum across these down to scalars - std::vector> extracted(Nsimd); // splitting the SIMD + ExtractBuffer > extracted(Nsimd); // splitting the SIMD // std::cout << GridLogMessage << "End alloc" << std::endl; result.resize(fd); // And then global sum to return the same vector to every node for IO to file for(int r=0;r_slice_nblock[orthogdim]; @@ -323,23 +366,24 @@ static void localSliceInnerProductVector(std::vector &result, const La // std::cout << GridLogMessage << "End prep" << std::endl; // std::cout << GridLogMessage << "Start parallel inner product, _rd = " << rd << std::endl; vector_type vv; - parallel_for(int r=0;r_ostride[orthogdim]; // base offset for start of plane for(int n=0;n icoor(Nd); + Coordinate icoor(Nd); for(int rt=0;rt temp; @@ -362,7 +406,7 @@ template static void globalSliceInnerProductVector(std::vector &result, const Lattice &lhs, std::vector &lsSum, int orthogdim) { typedef typename vobj::scalar_type scalar_type; - GridBase *grid = lhs._grid; + GridBase *grid = lhs.Grid(); int fd = result.size(); int ld = lsSum.size(); // sum over nodes. @@ -388,9 +432,9 @@ static void sliceInnerProductVector( std::vector & result, const Latti { typedef typename vobj::vector_type vector_type; typedef typename vobj::scalar_type scalar_type; - GridBase *grid = lhs._grid; + GridBase *grid = lhs.Grid(); assert(grid!=NULL); - conformable(grid,rhs._grid); + conformable(grid,rhs.Grid()); const int Nd = grid->_ndimension; const int Nsimd = grid->Nsimd(); @@ -402,34 +446,36 @@ static void sliceInnerProductVector( std::vector & result, const Latti int ld=grid->_ldimensions[orthogdim]; int rd=grid->_rdimensions[orthogdim]; - std::vector > lvSum(rd); // will locally sum vectors first - std::vector lsSum(ld,scalar_type(0.0)); // sum across these down to scalars - std::vector > extracted(Nsimd); // splitting the SIMD + Vector lvSum(rd); // will locally sum vectors first + Vector lsSum(ld,scalar_type(0.0)); // sum across these down to scalars + ExtractBuffer > extracted(Nsimd); // splitting the SIMD result.resize(fd); // And then global sum to return the same vector to every node for IO to file for(int r=0;r_slice_nblock[orthogdim]; int e2= grid->_slice_block [orthogdim]; int stride=grid->_slice_stride[orthogdim]; - parallel_for(int r=0;r_ostride[orthogdim]; // base offset for start of plane for(int n=0;n icoor(Nd); + Coordinate icoor(Nd); for(int rt=0;rt temp; @@ -470,7 +516,7 @@ static void sliceNorm (std::vector &sn,const Lattice &rhs,int Ortho typedef typename vobj::scalar_type scalar_type; typedef typename vobj::vector_type vector_type; - int Nblock = rhs._grid->GlobalDimensions()[Orthog]; + int Nblock = rhs.Grid()->GlobalDimensions()[Orthog]; std::vector ip(Nblock); sn.resize(Nblock); @@ -492,7 +538,7 @@ static void sliceMaddVector(Lattice &R,std::vector &a,const Lattice scalar_type zscale(scale); - GridBase *grid = X._grid; + GridBase *grid = X.Grid(); int Nsimd =grid->Nsimd(); int Nblock =grid->GlobalDimensions()[orthogdim]; @@ -505,8 +551,7 @@ static void sliceMaddVector(Lattice &R,std::vector &a,const Lattice int e2 =grid->_slice_block [orthogdim]; int stride =grid->_slice_stride[orthogdim]; - std::vector icoor; - + Coordinate icoor; for(int r=0;r_ostride[orthogdim]; // base offset for start of plane @@ -522,12 +567,15 @@ static void sliceMaddVector(Lattice &R,std::vector &a,const Lattice tensor_reduced at; at=av; - parallel_for_nest2(int n=0;n &R,Eigen::MatrixXcd &aa,const Lattice typedef typename vobj::scalar_type scalar_type; typedef typename vobj::vector_type vector_type; - int Nblock = X._grid->GlobalDimensions()[Orthog]; + int Nblock = X.Grid()->GlobalDimensions()[Orthog]; - GridBase *FullGrid = X._grid; + GridBase *FullGrid = X.Grid(); // GridBase *SliceGrid = makeSubSliceGrid(FullGrid,Orthog); // Lattice Xslice(SliceGrid); // Lattice Rslice(SliceGrid); assert( FullGrid->_simd_layout[Orthog]==1); - int nh = FullGrid->_ndimension; + // int nh = FullGrid->_ndimension; // int nl = SliceGrid->_ndimension; - int nl = nh-1; + // int nl = nh-1; //FIXME package in a convenient iterator //Should loop over a plane orthogonal to direction "Orthog" @@ -578,28 +626,31 @@ static void sliceMaddMatrix (Lattice &R,Eigen::MatrixXcd &aa,const Lattice int block =FullGrid->_slice_block [Orthog]; int nblock=FullGrid->_slice_nblock[Orthog]; int ostride=FullGrid->_ostride[Orthog]; -#pragma omp parallel - { - std::vector s_x(Nblock); -#pragma omp for collapse(2) - for(int n=0;n s_x(Nblock); + + thread_for_collapse_in_region(2, n,nblock, { + for(int b=0;b &R,Eigen::MatrixXcd &aa,const Lattice< typedef typename vobj::scalar_type scalar_type; typedef typename vobj::vector_type vector_type; - int Nblock = X._grid->GlobalDimensions()[Orthog]; + int Nblock = X.Grid()->GlobalDimensions()[Orthog]; - GridBase *FullGrid = X._grid; + GridBase *FullGrid = X.Grid(); // GridBase *SliceGrid = makeSubSliceGrid(FullGrid,Orthog); // Lattice Xslice(SliceGrid); // Lattice Rslice(SliceGrid); assert( FullGrid->_simd_layout[Orthog]==1); - int nh = FullGrid->_ndimension; + // int nh = FullGrid->_ndimension; // int nl = SliceGrid->_ndimension; - int nl=1; + // int nl=1; //FIXME package in a convenient iterator //Should loop over a plane orthogonal to direction "Orthog" @@ -628,17 +679,19 @@ static void sliceMulMatrix (Lattice &R,Eigen::MatrixXcd &aa,const Lattice< int block =FullGrid->_slice_block [Orthog]; int nblock=FullGrid->_slice_nblock[Orthog]; int ostride=FullGrid->_ostride[Orthog]; -#pragma omp parallel + auto R_v = R.View(); + auto X_v = X.View(); + thread_region { std::vector s_x(Nblock); -#pragma omp for collapse(2) - for(int n=0;n &R,Eigen::MatrixXcd &aa,const Lattice< for(int j=1;j typedef typename vobj::scalar_type scalar_type; typedef typename vobj::vector_type vector_type; - GridBase *FullGrid = lhs._grid; + GridBase *FullGrid = lhs.Grid(); // GridBase *SliceGrid = makeSubSliceGrid(FullGrid,Orthog); int Nblock = FullGrid->GlobalDimensions()[Orthog]; @@ -673,9 +725,9 @@ static void sliceInnerProductMatrix( Eigen::MatrixXcd &mat, const Lattice mat = Eigen::MatrixXcd::Zero(Nblock,Nblock); assert( FullGrid->_simd_layout[Orthog]==1); - int nh = FullGrid->_ndimension; + // int nh = FullGrid->_ndimension; // int nl = SliceGrid->_ndimension; - int nl = nh-1; + // int nl = nh-1; //FIXME package in a convenient iterator //Should loop over a plane orthogonal to direction "Orthog" @@ -686,31 +738,33 @@ static void sliceInnerProductMatrix( Eigen::MatrixXcd &mat, const Lattice typedef typename vobj::vector_typeD vector_typeD; -#pragma omp parallel + auto lhs_v=lhs.View(); + auto rhs_v=rhs.View(); + thread_region { std::vector Left(Nblock); std::vector Right(Nblock); Eigen::MatrixXcd mat_thread = Eigen::MatrixXcd::Zero(Nblock,Nblock); -#pragma omp for collapse(2) - for(int n=0;n(real(red),imag(red)); }} - }} -#pragma omp critical + }}); + thread_critical { mat += mat_thread; } @@ -726,8 +780,8 @@ static void sliceInnerProductMatrix( Eigen::MatrixXcd &mat, const Lattice return; } -} /*END NAMESPACE GRID*/ -#endif +NAMESPACE_END(Grid); + diff --git a/Grid/lattice/Lattice_reduction_gpu.h b/Grid/lattice/Lattice_reduction_gpu.h new file mode 100644 index 00000000..c5d75356 --- /dev/null +++ b/Grid/lattice/Lattice_reduction_gpu.h @@ -0,0 +1,226 @@ +NAMESPACE_BEGIN(Grid); + +#define WARP_SIZE 32 +extern cudaDeviceProp *gpu_props; +__device__ unsigned int retirementCount = 0; + +template +unsigned int nextPow2(Iterator x) { + --x; + x |= x >> 1; + x |= x >> 2; + x |= x >> 4; + x |= x >> 8; + x |= x >> 16; + return ++x; +} + +template +void getNumBlocksAndThreads(const Iterator n, const size_t sizeofsobj, Iterator &threads, Iterator &blocks) { + + int device; + cudaGetDevice(&device); + + Iterator warpSize = gpu_props[device].warpSize; + Iterator sharedMemPerBlock = gpu_props[device].sharedMemPerBlock; + Iterator maxThreadsPerBlock = gpu_props[device].maxThreadsPerBlock; + Iterator multiProcessorCount = gpu_props[device].multiProcessorCount; + + std::cout << GridLogDebug << "GPU has:" << std::endl; + std::cout << GridLogDebug << "\twarpSize = " << warpSize << std::endl; + std::cout << GridLogDebug << "\tsharedMemPerBlock = " << sharedMemPerBlock << std::endl; + std::cout << GridLogDebug << "\tmaxThreadsPerBlock = " << maxThreadsPerBlock << std::endl; + std::cout << GridLogDebug << "\tmaxThreadsPerBlock = " << warpSize << std::endl; + std::cout << GridLogDebug << "\tmultiProcessorCount = " << multiProcessorCount << std::endl; + + if (warpSize != WARP_SIZE) { + std::cout << GridLogError << "The warp size of the GPU in use does not match the warp size set when compiling Grid." << std::endl; + exit(EXIT_FAILURE); + } + + // let the number of threads in a block be a multiple of 2, starting from warpSize + threads = warpSize; + while( 2*threads*sizeofsobj < sharedMemPerBlock && 2*threads <= maxThreadsPerBlock ) threads *= 2; + // keep all the streaming multiprocessors busy + blocks = nextPow2(multiProcessorCount); + +} + +template +__device__ void reduceBlock(volatile sobj *sdata, sobj mySum, const Iterator tid) { + + Iterator blockSize = blockDim.x; + + // cannot use overloaded operators for sobj as they are not volatile-qualified + memcpy((void *)&sdata[tid], (void *)&mySum, sizeof(sobj)); + __syncwarp(); + + const Iterator VEC = WARP_SIZE; + const Iterator vid = tid & (VEC-1); + + sobj beta, temp; + memcpy((void *)&beta, (void *)&mySum, sizeof(sobj)); + + for (int i = VEC/2; i > 0; i>>=1) { + if (vid < i) { + memcpy((void *)&temp, (void *)&sdata[tid+i], sizeof(sobj)); + beta += temp; + memcpy((void *)&sdata[tid], (void *)&beta, sizeof(sobj)); + } + __syncwarp(); + } + __syncthreads(); + + if (threadIdx.x == 0) { + beta = Zero(); + for (Iterator i = 0; i < blockSize; i += VEC) { + memcpy((void *)&temp, (void *)&sdata[i], sizeof(sobj)); + beta += temp; + } + memcpy((void *)&sdata[0], (void *)&beta, sizeof(sobj)); + } + __syncthreads(); +} + + +template +__device__ void reduceBlocks(const vobj *g_idata, sobj *g_odata, Iterator n) +{ + constexpr Iterator nsimd = vobj::Nsimd(); + + Iterator blockSize = blockDim.x; + + // force shared memory alignment + extern __shared__ __align__(COALESCE_GRANULARITY) unsigned char shmem_pointer[]; + // it's not possible to have two extern __shared__ arrays with same name + // but different types in different scopes -- need to cast each time + sobj *sdata = (sobj *)shmem_pointer; + + // first level of reduction, + // each thread writes result in mySum + Iterator tid = threadIdx.x; + Iterator i = blockIdx.x*(blockSize*2) + threadIdx.x; + Iterator gridSize = blockSize*2*gridDim.x; + sobj mySum = Zero(); + + while (i < n) { + Iterator lane = i % nsimd; + Iterator ss = i / nsimd; + auto tmp = extractLane(lane,g_idata[ss]); + sobj tmpD; + tmpD=tmp; + mySum +=tmpD; + + if (i + blockSize < n) { + lane = (i+blockSize) % nsimd; + ss = (i+blockSize) / nsimd; + tmp = extractLane(lane,g_idata[ss]); + tmpD = tmp; + mySum += tmpD; + } + i += gridSize; + } + + // copy mySum to shared memory and perform + // reduction for all threads in this block + reduceBlock(sdata, mySum, tid); + if (tid == 0) g_odata[blockIdx.x] = sdata[0]; +} + +template +__global__ void reduceKernel(const vobj *lat, sobj *buffer, Iterator n) { + + Iterator blockSize = blockDim.x; + + // perform reduction for this block and + // write result to global memory buffer + reduceBlocks(lat, buffer, n); + + if (gridDim.x > 1) { + + const Iterator tid = threadIdx.x; + __shared__ bool amLast; + // force shared memory alignment + extern __shared__ __align__(COALESCE_GRANULARITY) unsigned char shmem_pointer[]; + // it's not possible to have two extern __shared__ arrays with same name + // but different types in different scopes -- need to cast each time + sobj *smem = (sobj *)shmem_pointer; + + // wait until all outstanding memory instructions in this thread are finished + __threadfence(); + + if (tid==0) { + unsigned int ticket = atomicInc(&retirementCount, gridDim.x); + // true if this block is the last block to be done + amLast = (ticket == gridDim.x-1); + } + + // each thread must read the correct value of amLast + __syncthreads(); + + if (amLast) { + // reduce buffer[0], ..., buffer[gridDim.x-1] + Iterator i = tid; + sobj mySum = Zero(); + + while (i < gridDim.x) { + mySum += buffer[i]; + i += blockSize; + } + + reduceBlock(smem, mySum, tid); + + if (tid==0) { + buffer[0] = smem[0]; + // reset count variable + retirementCount = 0; + } + } + } +} + +///////////////////////////////////////////////////////////////////////////////////////////////////////// +// Possibly promote to double and sum +///////////////////////////////////////////////////////////////////////////////////////////////////////// +template +inline typename vobj::scalar_objectD sumD_gpu(const vobj *lat, Integer osites) +{ + typedef typename vobj::scalar_objectD sobj; + typedef decltype(lat) Iterator; + + Integer nsimd= vobj::Nsimd(); + Integer size = osites*nsimd; + + Integer numThreads, numBlocks; + getNumBlocksAndThreads(size, sizeof(sobj), numThreads, numBlocks); + Integer smemSize = numThreads * sizeof(sobj); + + Vector buffer(numBlocks); + sobj *buffer_v = &buffer[0]; + + reduceKernel<<< numBlocks, numThreads, smemSize >>>(lat, buffer_v, size); + cudaDeviceSynchronize(); + + cudaError err = cudaGetLastError(); + if ( cudaSuccess != err ) { + printf("Cuda error %s\n",cudaGetErrorString( err )); + exit(0); + } + auto result = buffer_v[0]; + return result; +} +///////////////////////////////////////////////////////////////////////////////////////////////////////// +// Return as same precision as input performing reduction in double precision though +///////////////////////////////////////////////////////////////////////////////////////////////////////// +template +inline typename vobj::scalar_object sum_gpu(const vobj *lat, Integer osites) +{ + typedef typename vobj::scalar_object sobj; + sobj result; + result = sumD_gpu(lat,osites); + return result; +} + + + +NAMESPACE_END(Grid); diff --git a/Grid/lattice/Lattice_rng.h b/Grid/lattice/Lattice_rng.h index 5348538c..1bb1f087 100644 --- a/Grid/lattice/Lattice_rng.h +++ b/Grid/lattice/Lattice_rng.h @@ -1,4 +1,4 @@ - /************************************************************************************* +/************************************************************************************* Grid physics library, www.github.com/paboyle/Grid @@ -24,8 +24,8 @@ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ +*************************************************************************************/ +/* END LEGAL */ #ifndef GRID_LATTICE_RNG_H #define GRID_LATTICE_RNG_H @@ -41,282 +41,289 @@ #undef RNG_FAST_DISCARD #endif -namespace Grid { +NAMESPACE_BEGIN(Grid); - ////////////////////////////////////////////////////////////// - // Allow the RNG state to be less dense than the fine grid - ////////////////////////////////////////////////////////////// - inline int RNGfillable(GridBase *coarse,GridBase *fine) - { +////////////////////////////////////////////////////////////// +// Allow the RNG state to be less dense than the fine grid +////////////////////////////////////////////////////////////// +inline int RNGfillable(GridBase *coarse,GridBase *fine) +{ - int rngdims = coarse->_ndimension; + int rngdims = coarse->_ndimension; - // trivially extended in higher dims, with locality guaranteeing RNG state is local to node - int lowerdims = fine->_ndimension - coarse->_ndimension; - assert(lowerdims >= 0); - for(int d=0;d_simd_layout[d]==1); - assert(fine->_processors[d]==1); - } - - int multiplicity=1; - for(int d=0;d_rdimensions[d]; - } - // local and global volumes subdivide cleanly after SIMDization - for(int d=0;d_processors[d] == fine->_processors[fd]); - assert(coarse->_simd_layout[d] == fine->_simd_layout[fd]); - assert(((fine->_rdimensions[fd] / coarse->_rdimensions[d])* coarse->_rdimensions[d])==fine->_rdimensions[fd]); - - multiplicity = multiplicity *fine->_rdimensions[fd] / coarse->_rdimensions[d]; - } - return multiplicity; + // trivially extended in higher dims, with locality guaranteeing RNG state is local to node + int lowerdims = fine->_ndimension - coarse->_ndimension; + assert(lowerdims >= 0); + for(int d=0;d_simd_layout[d]==1); + assert(fine->_processors[d]==1); } + int multiplicity=1; + for(int d=0;d_rdimensions[d]; + } + // local and global volumes subdivide cleanly after SIMDization + for(int d=0;d_processors[d] == fine->_processors[fd]); + assert(coarse->_simd_layout[d] == fine->_simd_layout[fd]); + assert(((fine->_rdimensions[fd] / coarse->_rdimensions[d])* coarse->_rdimensions[d])==fine->_rdimensions[fd]); + + multiplicity = multiplicity *fine->_rdimensions[fd] / coarse->_rdimensions[d]; + } + return multiplicity; +} + // merge of April 11 2017 - // this function is necessary for the LS vectorised field - inline int RNGfillable_general(GridBase *coarse,GridBase *fine) - { - int rngdims = coarse->_ndimension; +// this function is necessary for the LS vectorised field +inline int RNGfillable_general(GridBase *coarse,GridBase *fine) +{ + int rngdims = coarse->_ndimension; - // trivially extended in higher dims, with locality guaranteeing RNG state is local to node - int lowerdims = fine->_ndimension - coarse->_ndimension; assert(lowerdims >= 0); - // assumes that the higher dimensions are not using more processors - // all further divisions are local - for(int d=0;d_processors[d]==1); - for(int d=0;d_processors[d] == fine->_processors[d+lowerdims]); + // trivially extended in higher dims, with locality guaranteeing RNG state is local to node + int lowerdims = fine->_ndimension - coarse->_ndimension; assert(lowerdims >= 0); + // assumes that the higher dimensions are not using more processors + // all further divisions are local + for(int d=0;d_processors[d]==1); + for(int d=0;d_processors[d] == fine->_processors[d+lowerdims]); - // then divide the number of local sites - // check that the total number of sims agree, meanse the iSites are the same - assert(fine->Nsimd() == coarse->Nsimd()); + // then divide the number of local sites + // check that the total number of sims agree, meanse the iSites are the same + assert(fine->Nsimd() == coarse->Nsimd()); - // check that the two grids divide cleanly - assert( (fine->lSites() / coarse->lSites() ) * coarse->lSites() == fine->lSites() ); + // check that the two grids divide cleanly + assert( (fine->lSites() / coarse->lSites() ) * coarse->lSites() == fine->lSites() ); - return fine->lSites() / coarse->lSites(); - } + return fine->lSites() / coarse->lSites(); +} - // real scalars are one component - template - void fillScalar(scalar &s,distribution &dist,generator & gen) - { - s=dist(gen); - } - template - void fillScalar(ComplexF &s,distribution &dist, generator &gen) - { - s=ComplexF(dist(gen),dist(gen)); - } - template - void fillScalar(ComplexD &s,distribution &dist,generator &gen) - { - s=ComplexD(dist(gen),dist(gen)); - } +// real scalars are one component +template +void fillScalar(scalar &s,distribution &dist,generator & gen) +{ + s=dist(gen); +} +template +void fillScalar(ComplexF &s,distribution &dist, generator &gen) +{ + // s=ComplexF(dist(gen),dist(gen)); + s.real(dist(gen)); + s.imag(dist(gen)); +} +template +void fillScalar(ComplexD &s,distribution &dist,generator &gen) +{ + // s=ComplexD(dist(gen),dist(gen)); + s.real(dist(gen)); + s.imag(dist(gen)); +} - class GridRNGbase { - public: - // One generator per site. - // Uniform and Gaussian distributions from these generators. +class GridRNGbase { +public: + // One generator per site. + // Uniform and Gaussian distributions from these generators. #ifdef RNG_RANLUX - typedef std::ranlux48 RngEngine; - typedef uint64_t RngStateType; - static const int RngStateCount = 15; + typedef std::ranlux48 RngEngine; + typedef uint64_t RngStateType; + static const int RngStateCount = 15; #endif #ifdef RNG_MT19937 - typedef std::mt19937 RngEngine; - typedef uint32_t RngStateType; - static const int RngStateCount = std::mt19937::state_size; + typedef std::mt19937 RngEngine; + typedef uint32_t RngStateType; + static const int RngStateCount = std::mt19937::state_size; #endif #ifdef RNG_SITMO - typedef sitmo::prng_engine RngEngine; - typedef uint64_t RngStateType; - static const int RngStateCount = 13; + typedef sitmo::prng_engine RngEngine; + typedef uint64_t RngStateType; + static const int RngStateCount = 13; #endif - std::vector _generators; - std::vector > _uniform; - std::vector > _gaussian; - std::vector > _bernoulli; - std::vector > _uid; + std::vector _generators; + std::vector > _uniform; + std::vector > _gaussian; + std::vector > _bernoulli; + std::vector > _uid; - /////////////////////// - // support for parallel init - /////////////////////// + /////////////////////// + // support for parallel init + /////////////////////// #ifdef RNG_FAST_DISCARD - static void Skip(RngEngine &eng,uint64_t site) - { - ///////////////////////////////////////////////////////////////////////////////////// - // Skip by 2^40 elements between successive lattice sites - // This goes by 10^12. - // Consider quenched updating; likely never exceeding rate of 1000 sweeps - // per second on any machine. This gives us of order 10^9 seconds, or 100 years - // skip ahead. - // For HMC unlikely to go at faster than a solve per second, and - // tens of seconds per trajectory so this is clean in all reasonable cases, - // and margin of safety is orders of magnitude. - // We could hack Sitmo to skip in the higher order words of state if necessary + static void Skip(RngEngine &eng,uint64_t site) + { + ///////////////////////////////////////////////////////////////////////////////////// + // Skip by 2^40 elements between successive lattice sites + // This goes by 10^12. + // Consider quenched updating; likely never exceeding rate of 1000 sweeps + // per second on any machine. This gives us of order 10^9 seconds, or 100 years + // skip ahead. + // For HMC unlikely to go at faster than a solve per second, and + // tens of seconds per trajectory so this is clean in all reasonable cases, + // and margin of safety is orders of magnitude. + // We could hack Sitmo to skip in the higher order words of state if necessary // // Replace with 2^30 ; avoid problem on large volumes // - ///////////////////////////////////////////////////////////////////////////////////// - // uint64_t skip = site+1; // Old init Skipped then drew. Checked compat with faster init - const int shift = 30; + ///////////////////////////////////////////////////////////////////////////////////// + // uint64_t skip = site+1; // Old init Skipped then drew. Checked compat with faster init + const int shift = 30; - uint64_t skip = site; + //////////////////////////////////////////////////////////////////// + // Weird compiler bug in Intel 2018.1 under O3 was generating 32bit and not 64 bit left shift. + //////////////////////////////////////////////////////////////////// + volatile uint64_t skip = site; - skip = skip<> shift)==site); // check for overflow + assert((skip >> shift)==site); // check for overflow - eng.discard(skip); - // std::cout << " Engine " < newseed; - std::uniform_int_distribution uid; - return Reseed(eng,newseed,uid); - } - static RngEngine Reseed(RngEngine &eng,std::vector & newseed, - std::uniform_int_distribution &uid) - { - const int reseeds=4; + static RngEngine Reseed(RngEngine &eng) + { + std::vector newseed; + std::uniform_int_distribution uid; + return Reseed(eng,newseed,uid); + } + static RngEngine Reseed(RngEngine &eng,std::vector & newseed, + std::uniform_int_distribution &uid) + { + const int reseeds=4; - newseed.resize(reseeds); - for(int i=0;i & saved,RngEngine &eng) { - saved.resize(RngStateCount); - std::stringstream ss; - ss<>saved[i]; - } + void GetState(std::vector & saved,RngEngine &eng) { + saved.resize(RngStateCount); + std::stringstream ss; + ss<>saved[i]; } - void GetState(std::vector & saved,int gen) { - GetState(saved,_generators[gen]); + } + void GetState(std::vector & saved,int gen) { + GetState(saved,_generators[gen]); + } + void SetState(std::vector & saved,RngEngine &eng){ + assert(saved.size()==RngStateCount); + std::stringstream ss; + for(int i=0;i & saved,RngEngine &eng){ - assert(saved.size()==RngStateCount); - std::stringstream ss; - for(int i=0;i>eng; - } - void SetState(std::vector & saved,int gen){ - SetState(saved,_generators[gen]); - } - void SetEngine(RngEngine &Eng, int gen){ - _generators[gen]=Eng; - } - void GetEngine(RngEngine &Eng, int gen){ - Eng=_generators[gen]; - } - template void Seed(source &src, int gen) - { - _generators[gen] = RngEngine(src); - } - }; + ss.seekg(0,ss.beg); + ss>>eng; + } + void SetState(std::vector & saved,int gen){ + SetState(saved,_generators[gen]); + } + void SetEngine(RngEngine &Eng, int gen){ + _generators[gen]=Eng; + } + void GetEngine(RngEngine &Eng, int gen){ + Eng=_generators[gen]; + } + template void Seed(source &src, int gen) + { + _generators[gen] = RngEngine(src); + } +}; - class GridSerialRNG : public GridRNGbase { - public: +class GridSerialRNG : public GridRNGbase { +public: - GridSerialRNG() : GridRNGbase() { - _generators.resize(1); - _uniform.resize(1,std::uniform_real_distribution{0,1}); - _gaussian.resize(1,std::normal_distribution(0.0,1.0) ); - _bernoulli.resize(1,std::discrete_distribution{1,1}); - _uid.resize(1,std::uniform_int_distribution() ); - } + GridSerialRNG() : GridRNGbase() { + _generators.resize(1); + _uniform.resize(1,std::uniform_real_distribution{0,1}); + _gaussian.resize(1,std::normal_distribution(0.0,1.0) ); + _bernoulli.resize(1,std::discrete_distribution{1,1}); + _uid.resize(1,std::uniform_int_distribution() ); + } - template inline void fill(sobj &l,std::vector &dist){ + template inline void fill(sobj &l,std::vector &dist){ - typedef typename sobj::scalar_type scalar_type; + typedef typename sobj::scalar_type scalar_type; - int words = sizeof(sobj)/sizeof(scalar_type); + int words = sizeof(sobj)/sizeof(scalar_type); - scalar_type *buf = (scalar_type *) & l; + scalar_type *buf = (scalar_type *) & l; - dist[0].reset(); - for(int idx=0;idx inline void fill(ComplexF &l,std::vector &dist){ - dist[0].reset(); - fillScalar(l,dist[0],_generators[0]); - CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l)); + template inline void fill(ComplexF &l,std::vector &dist){ + dist[0].reset(); + fillScalar(l,dist[0],_generators[0]); + CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l)); + } + template inline void fill(ComplexD &l,std::vector &dist){ + dist[0].reset(); + fillScalar(l,dist[0],_generators[0]); + CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l)); + } + template inline void fill(RealF &l,std::vector &dist){ + dist[0].reset(); + fillScalar(l,dist[0],_generators[0]); + CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l)); + } + template inline void fill(RealD &l,std::vector &dist){ + dist[0].reset(); + fillScalar(l,dist[0],_generators[0]); + CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l)); + } + // vector fill + template inline void fill(vComplexF &l,std::vector &dist){ + RealF *pointer=(RealF *)&l; + dist[0].reset(); + for(int i=0;i<2*vComplexF::Nsimd();i++){ + fillScalar(pointer[i],dist[0],_generators[0]); } - template inline void fill(ComplexD &l,std::vector &dist){ - dist[0].reset(); - fillScalar(l,dist[0],_generators[0]); - CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l)); + CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l)); + } + template inline void fill(vComplexD &l,std::vector &dist){ + RealD *pointer=(RealD *)&l; + dist[0].reset(); + for(int i=0;i<2*vComplexD::Nsimd();i++){ + fillScalar(pointer[i],dist[0],_generators[0]); } - template inline void fill(RealF &l,std::vector &dist){ - dist[0].reset(); - fillScalar(l,dist[0],_generators[0]); - CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l)); + CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l)); + } + template inline void fill(vRealF &l,std::vector &dist){ + RealF *pointer=(RealF *)&l; + dist[0].reset(); + for(int i=0;i inline void fill(RealD &l,std::vector &dist){ - dist[0].reset(); - fillScalar(l,dist[0],_generators[0]); - CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l)); - } - // vector fill - template inline void fill(vComplexF &l,std::vector &dist){ - RealF *pointer=(RealF *)&l; - dist[0].reset(); - for(int i=0;i<2*vComplexF::Nsimd();i++){ - fillScalar(pointer[i],dist[0],_generators[0]); - } - CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l)); - } - template inline void fill(vComplexD &l,std::vector &dist){ - RealD *pointer=(RealD *)&l; - dist[0].reset(); - for(int i=0;i<2*vComplexD::Nsimd();i++){ - fillScalar(pointer[i],dist[0],_generators[0]); - } - CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l)); - } - template inline void fill(vRealF &l,std::vector &dist){ - RealF *pointer=(RealF *)&l; - dist[0].reset(); - for(int i=0;i inline void fill(vRealD &l,std::vector &dist){ - RealD *pointer=(RealD *)&l; - dist[0].reset(); - for(int i=0;i inline void fill(vRealD &l,std::vector &dist){ + RealD *pointer=(RealD *)&l; + dist[0].reset(); + for(int i=0;i &seeds){ - CartesianCommunicator::BroadcastWorld(0,(void *)&seeds[0],sizeof(int)*seeds.size()); - std::seed_seq src(seeds.begin(),seeds.end()); - Seed(src,0); - } + void SeedFixedIntegers(const std::vector &seeds){ + CartesianCommunicator::BroadcastWorld(0,(void *)&seeds[0],sizeof(int)*seeds.size()); + std::seed_seq src(seeds.begin(),seeds.end()); + Seed(src,0); + } void SeedUniqueString(const std::string &s){ std::vector seeds; @@ -330,65 +337,67 @@ namespace Grid { std::cout << GridLogMessage << "Seed SHA256: " << sha.str() << std::endl; SeedFixedIntegers(seeds); } - }; +}; - class GridParallelRNG : public GridRNGbase { +class GridParallelRNG : public GridRNGbase { +private: + double _time_counter; + GridBase *_grid; + unsigned int _vol; - double _time_counter; +public: + GridBase *Grid(void) const { return _grid; } + int generator_idx(int os,int is) { + return is*_grid->oSites()+os; + } - public: - GridBase *_grid; - unsigned int _vol; + GridParallelRNG(GridBase *grid) : GridRNGbase() { + _grid = grid; + _vol =_grid->iSites()*_grid->oSites(); - int generator_idx(int os,int is) { - return is*_grid->oSites()+os; - } + _generators.resize(_vol); + _uniform.resize(_vol,std::uniform_real_distribution{0,1}); + _gaussian.resize(_vol,std::normal_distribution(0.0,1.0) ); + _bernoulli.resize(_vol,std::discrete_distribution{1,1}); + _uid.resize(_vol,std::uniform_int_distribution() ); + } - GridParallelRNG(GridBase *grid) : GridRNGbase() { - _grid = grid; - _vol =_grid->iSites()*_grid->oSites(); + template inline void fill(Lattice &l,std::vector &dist){ - _generators.resize(_vol); - _uniform.resize(_vol,std::uniform_real_distribution{0,1}); - _gaussian.resize(_vol,std::normal_distribution(0.0,1.0) ); - _bernoulli.resize(_vol,std::discrete_distribution{1,1}); - _uid.resize(_vol,std::uniform_int_distribution() ); - } + typedef typename vobj::scalar_object scalar_object; + typedef typename vobj::scalar_type scalar_type; + typedef typename vobj::vector_type vector_type; - template inline void fill(Lattice &l,std::vector &dist){ + double inner_time_counter = usecond(); - typedef typename vobj::scalar_object scalar_object; - typedef typename vobj::scalar_type scalar_type; - typedef typename vobj::vector_type vector_type; + int multiplicity = RNGfillable_general(_grid, l.Grid()); // l has finer or same grid + int Nsimd = _grid->Nsimd(); // guaranteed to be the same for l.Grid() too + int osites = _grid->oSites(); // guaranteed to be <= l.Grid()->oSites() by a factor multiplicity + int words = sizeof(scalar_object) / sizeof(scalar_type); - double inner_time_counter = usecond(); + auto l_v = l.View(); + thread_for( ss, osites, { + ExtractBuffer buf(Nsimd); + for (int m = 0; m < multiplicity; m++) { // Draw from same generator multiplicity times - int multiplicity = RNGfillable_general(_grid, l._grid); // l has finer or same grid - int Nsimd = _grid->Nsimd(); // guaranteed to be the same for l._grid too - int osites = _grid->oSites(); // guaranteed to be <= l._grid->oSites() by a factor multiplicity - int words = sizeof(scalar_object) / sizeof(scalar_type); + int sm = multiplicity * ss + m; // Maps the generator site to the fine site - parallel_for(int ss=0;ss buf(Nsimd); - for (int m = 0; m < multiplicity; m++) { // Draw from same generator multiplicity times - - int sm = multiplicity * ss + m; // Maps the generator site to the fine site - - for (int si = 0; si < Nsimd; si++) { + for (int si = 0; si < Nsimd; si++) { - int gdx = generator_idx(ss, si); // index of generator state - scalar_type *pointer = (scalar_type *)&buf[si]; - dist[gdx].reset(); - for (int idx = 0; idx < words; idx++) - fillScalar(pointer[idx], dist[gdx], _generators[gdx]); - } - // merge into SIMD lanes, FIXME suboptimal implementation - merge(l._odata[sm], buf); - } + int gdx = generator_idx(ss, si); // index of generator state + scalar_type *pointer = (scalar_type *)&buf[si]; + dist[gdx].reset(); + for (int idx = 0; idx < words; idx++) + fillScalar(pointer[idx], dist[gdx], _generators[gdx]); + } + // merge into SIMD lanes, FIXME suboptimal implementation + merge(l_v[sm], buf); } + }); + // }); - _time_counter += usecond()- inner_time_counter; - }; + _time_counter += usecond()- inner_time_counter; + } void SeedUniqueString(const std::string &s){ std::vector seeds; @@ -398,119 +407,119 @@ namespace Grid { std::cout << GridLogMessage << "Seed SHA256: " << GridChecksum::sha256_string(seeds) << std::endl; SeedFixedIntegers(seeds); } - void SeedFixedIntegers(const std::vector &seeds){ + void SeedFixedIntegers(const std::vector &seeds){ - // Everyone generates the same seed_seq based on input seeds - CartesianCommunicator::BroadcastWorld(0,(void *)&seeds[0],sizeof(int)*seeds.size()); + // Everyone generates the same seed_seq based on input seeds + CartesianCommunicator::BroadcastWorld(0,(void *)&seeds[0],sizeof(int)*seeds.size()); - std::seed_seq source(seeds.begin(),seeds.end()); + std::seed_seq source(seeds.begin(),seeds.end()); - RngEngine master_engine(source); + RngEngine master_engine(source); #ifdef RNG_FAST_DISCARD - //////////////////////////////////////////////// - // Skip ahead through a single stream. - // Applicable to SITMO and other has based/crypto RNGs - // Should be applicable to Mersenne Twister, but the C++11 - // MT implementation does not implement fast discard even though - // in principle this is possible - //////////////////////////////////////////////// - - // Everybody loops over global volume. - parallel_for(int gidx=0;gidx<_grid->_gsites;gidx++){ + //////////////////////////////////////////////// + // Skip ahead through a single stream. + // Applicable to SITMO and other has based/crypto RNGs + // Should be applicable to Mersenne Twister, but the C++11 + // MT implementation does not implement fast discard even though + // in principle this is possible + //////////////////////////////////////////////// + // Everybody loops over global volume. + thread_for( gidx, _grid->_gsites, { // Where is it? - int rank,o_idx,i_idx; - std::vector gcoor; + int rank; + int o_idx; + int i_idx; + Coordinate gcoor; _grid->GlobalIndexToGlobalCoor(gidx,gcoor); _grid->GlobalCoorToRankIndex(rank,o_idx,i_idx,gcoor); - + // If this is one of mine we take it if( rank == _grid->ThisRank() ){ int l_idx=generator_idx(o_idx,i_idx); _generators[l_idx] = master_engine; Skip(_generators[l_idx],gidx); // Skip to next RNG sequence } - - } + }); #else - //////////////////////////////////////////////////////////////// - // Machine and thread decomposition dependent seeding is efficient - // and maximally parallel; but NOT reproducible from machine to machine. - // Not ideal, but fastest way to reseed all nodes. - //////////////////////////////////////////////////////////////// - { - // Obtain one Reseed per processor - int Nproc = _grid->ProcessorCount(); - std::vector seeders(Nproc); - int me= _grid->ThisRank(); - for(int p=0;pProcessorCount(); + std::vector seeders(Nproc); + int me= _grid->ThisRank(); + for(int p=0;p seeders(Nthread); + for(int t=0;t seeders(Nthread); - for(int t=0;t newseeds; - std::uniform_int_distribution uid; - for(int l=0;l<_grid->lSites();l++) { - if ( (l%Nthread)==t ) { - _generators[l] = Reseed(seeders[t],newseeds,uid); - } + thread_for( t, Nthread, { + // set up one per local site in threaded fashion + std::vector newseeds; + std::uniform_int_distribution uid; + for(int l=0;l<_grid->lSites();l++) { + if ( (l%Nthread)==t ) { + _generators[l] = Reseed(seeders[t],newseeds,uid); } } - } + }); + } #endif + } + + void Report(){ + std::cout << GridLogMessage << "Time spent in the fill() routine by GridParallelRNG: "<< _time_counter/1e3 << " ms" << std::endl; + } + + + //////////////////////////////////////////////////////////////////////// + // Support for rigorous test of RNG's + // Return uniform random uint32_t from requested site generator + //////////////////////////////////////////////////////////////////////// + uint32_t GlobalU01(int gsite){ + + uint32_t the_number; + // who + int rank,o_idx,i_idx; + Coordinate gcoor; + _grid->GlobalIndexToGlobalCoor(gsite,gcoor); + _grid->GlobalCoorToRankIndex(rank,o_idx,i_idx,gcoor); + + // draw + int l_idx=generator_idx(o_idx,i_idx); + if( rank == _grid->ThisRank() ){ + the_number = _uid[l_idx](_generators[l_idx]); } - - void Report(){ - std::cout << GridLogMessage << "Time spent in the fill() routine by GridParallelRNG: "<< _time_counter/1e3 << " ms" << std::endl; - } - - - //////////////////////////////////////////////////////////////////////// - // Support for rigorous test of RNG's - // Return uniform random uint32_t from requested site generator - //////////////////////////////////////////////////////////////////////// - uint32_t GlobalU01(int gsite){ - - uint32_t the_number; - // who - std::vector gcoor; - int rank,o_idx,i_idx; - _grid->GlobalIndexToGlobalCoor(gsite,gcoor); - _grid->GlobalCoorToRankIndex(rank,o_idx,i_idx,gcoor); - - // draw - int l_idx=generator_idx(o_idx,i_idx); - if( rank == _grid->ThisRank() ){ - the_number = _uid[l_idx](_generators[l_idx]); - } - // share & return - _grid->Broadcast(rank,(void *)&the_number,sizeof(the_number)); - return the_number; - } + // share & return + _grid->Broadcast(rank,(void *)&the_number,sizeof(the_number)); + return the_number; + } - }; +}; - template inline void random(GridParallelRNG &rng,Lattice &l) { rng.fill(l,rng._uniform); } - template inline void gaussian(GridParallelRNG &rng,Lattice &l) { rng.fill(l,rng._gaussian); } - template inline void bernoulli(GridParallelRNG &rng,Lattice &l){ rng.fill(l,rng._bernoulli);} +template inline void random(GridParallelRNG &rng,Lattice &l) { rng.fill(l,rng._uniform); } +template inline void gaussian(GridParallelRNG &rng,Lattice &l) { rng.fill(l,rng._gaussian); } +template inline void bernoulli(GridParallelRNG &rng,Lattice &l){ rng.fill(l,rng._bernoulli);} - template inline void random(GridSerialRNG &rng,sobj &l) { rng.fill(l,rng._uniform ); } - template inline void gaussian(GridSerialRNG &rng,sobj &l) { rng.fill(l,rng._gaussian ); } - template inline void bernoulli(GridSerialRNG &rng,sobj &l){ rng.fill(l,rng._bernoulli); } +template inline void random(GridSerialRNG &rng,sobj &l) { rng.fill(l,rng._uniform ); } +template inline void gaussian(GridSerialRNG &rng,sobj &l) { rng.fill(l,rng._gaussian ); } +template inline void bernoulli(GridSerialRNG &rng,sobj &l){ rng.fill(l,rng._bernoulli); } -} +NAMESPACE_END(Grid); #endif diff --git a/Grid/lattice/Lattice_trace.h b/Grid/lattice/Lattice_trace.h index 449c55f8..93444e0c 100644 --- a/Grid/lattice/Lattice_trace.h +++ b/Grid/lattice/Lattice_trace.h @@ -1,4 +1,4 @@ - /************************************************************************************* +/************************************************************************************* Grid physics library, www.github.com/paboyle/Grid @@ -23,8 +23,8 @@ Author: Peter Boyle 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ +*************************************************************************************/ +/* END LEGAL */ #ifndef GRID_LATTICE_TRACE_H #define GRID_LATTICE_TRACE_H @@ -32,36 +32,38 @@ Author: Peter Boyle // Tracing, transposing, peeking, poking /////////////////////////////////////////////// -namespace Grid { +NAMESPACE_BEGIN(Grid); - //////////////////////////////////////////////////////////////////////////////////////////////////// - // Trace - //////////////////////////////////////////////////////////////////////////////////////////////////// - template - inline auto trace(const Lattice &lhs) - -> Lattice - { - Lattice ret(lhs._grid); - parallel_for(int ss=0;ssoSites();ss++){ - ret._odata[ss] = trace(lhs._odata[ss]); - } - return ret; - }; +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Trace +//////////////////////////////////////////////////////////////////////////////////////////////////// +template +inline auto trace(const Lattice &lhs) -> Lattice +{ + Lattice ret(lhs.Grid()); + auto ret_v = ret.View(); + auto lhs_v = lhs.View(); + accelerator_for( ss, lhs_v.size(), vobj::Nsimd(), { + coalescedWrite(ret_v[ss], trace(lhs_v(ss))); + }); + return ret; +}; - //////////////////////////////////////////////////////////////////////////////////////////////////// - // Trace Index level dependent operation - //////////////////////////////////////////////////////////////////////////////////////////////////// - template - inline auto TraceIndex(const Lattice &lhs) -> Lattice(lhs._odata[0]))> - { - Lattice(lhs._odata[0]))> ret(lhs._grid); - parallel_for(int ss=0;ssoSites();ss++){ - ret._odata[ss] = traceIndex(lhs._odata[ss]); - } - return ret; - }; +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Trace Index level dependent operation +//////////////////////////////////////////////////////////////////////////////////////////////////// +template +inline auto TraceIndex(const Lattice &lhs) -> Lattice(vobj()))> +{ + Lattice(vobj()))> ret(lhs.Grid()); + auto ret_v = ret.View(); + auto lhs_v = lhs.View(); + accelerator_for( ss, lhs_v.size(), vobj::Nsimd(), { + coalescedWrite(ret_v[ss], traceIndex(lhs_v(ss))); + }); + return ret; +}; - -} +NAMESPACE_END(Grid); #endif diff --git a/Grid/lattice/Lattice_transfer.h b/Grid/lattice/Lattice_transfer.h index 69c132ed..865a4b14 100644 --- a/Grid/lattice/Lattice_transfer.h +++ b/Grid/lattice/Lattice_transfer.h @@ -23,12 +23,11 @@ Author: Peter Boyle 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ -#ifndef GRID_LATTICE_TRANSFER_H -#define GRID_LATTICE_TRANSFER_H +*************************************************************************************/ +/* END LEGAL */ +#pragma once -namespace Grid { +NAMESPACE_BEGIN(Grid); inline void subdivides(GridBase *coarse,GridBase *fine) { @@ -45,39 +44,44 @@ inline void subdivides(GridBase *coarse,GridBase *fine) } - //////////////////////////////////////////////////////////////////////////////////////////// - // remove and insert a half checkerboard - //////////////////////////////////////////////////////////////////////////////////////////// - template inline void pickCheckerboard(int cb,Lattice &half,const Lattice &full){ - half.checkerboard = cb; +//////////////////////////////////////////////////////////////////////////////////////////// +// remove and insert a half checkerboard +//////////////////////////////////////////////////////////////////////////////////////////// +template inline void pickCheckerboard(int cb,Lattice &half,const Lattice &full){ + half.Checkerboard() = cb; - parallel_for(int ss=0;ssoSites();ss++){ - int cbos; - std::vector coor; - full._grid->oCoorFromOindex(coor,ss); - cbos=half._grid->CheckerBoard(coor); - - if (cbos==cb) { - int ssh=half._grid->oIndex(coor); - half._odata[ssh] = full._odata[ss]; - } - } - } - template inline void setCheckerboard(Lattice &full,const Lattice &half){ - int cb = half.checkerboard; - parallel_for(int ss=0;ssoSites();ss++){ - std::vector coor; - int cbos; + auto half_v = half.View(); + auto full_v = full.View(); + thread_for(ss, full.Grid()->oSites(),{ + int cbos; + Coordinate coor; + full.Grid()->oCoorFromOindex(coor,ss); + cbos=half.Grid()->CheckerBoard(coor); - full._grid->oCoorFromOindex(coor,ss); - cbos=half._grid->CheckerBoard(coor); - - if (cbos==cb) { - int ssh=half._grid->oIndex(coor); - full._odata[ss]=half._odata[ssh]; - } + if (cbos==cb) { + int ssh=half.Grid()->oIndex(coor); + half_v[ssh] = full_v[ss]; } - } + }); +} +template inline void setCheckerboard(Lattice &full,const Lattice &half){ + int cb = half.Checkerboard(); + auto half_v = half.View(); + auto full_v = full.View(); + thread_for(ss,full.Grid()->oSites(),{ + + Coordinate coor; + int cbos; + + full.Grid()->oCoorFromOindex(coor,ss); + cbos=half.Grid()->CheckerBoard(coor); + + if (cbos==cb) { + int ssh=half.Grid()->oIndex(coor); + full_v[ss]=half_v[ssh]; + } + }); +} template @@ -85,8 +89,8 @@ inline void blockProject(Lattice > &coarseData, const Lattice &fineData, const std::vector > &Basis) { - GridBase * fine = fineData._grid; - GridBase * coarse= coarseData._grid; + GridBase * fine = fineData.Grid(); + GridBase * coarse= coarseData.Grid(); int _ndimension = coarse->_ndimension; // checks @@ -96,33 +100,33 @@ inline void blockProject(Lattice > &coarseData, conformable(Basis[i],fineData); } - std::vector block_r (_ndimension); + Coordinate block_r (_ndimension); for(int d=0 ; d<_ndimension;d++){ block_r[d] = fine->_rdimensions[d] / coarse->_rdimensions[d]; assert(block_r[d]*coarse->_rdimensions[d] == fine->_rdimensions[d]); } - coarseData=zero; + coarseData=Zero(); + auto fineData_ = fineData.View(); + auto coarseData_ = coarseData.View(); // Loop over coars parallel, and then loop over fine associated with coarse. - parallel_for(int sf=0;sfoSites();sf++){ - + thread_for( sf, fine->oSites(), { int sc; - std::vector coor_c(_ndimension); - std::vector coor_f(_ndimension); + Coordinate coor_c(_ndimension); + Coordinate coor_f(_ndimension); Lexicographic::CoorFromIndex(coor_f,sf,fine->_rdimensions); for(int d=0;d<_ndimension;d++) coor_c[d]=coor_f[d]/block_r[d]; Lexicographic::IndexFromCoor(coor_c,sc,coarse->_rdimensions); -PARALLEL_CRITICAL - for(int i=0;i &fineZ, const Lattice &fineX, const Lattice &fineY) { - GridBase * fine = fineZ._grid; - GridBase * coarse= coarseA._grid; + GridBase * fine = fineZ.Grid(); + GridBase * coarse= coarseA.Grid(); - fineZ.checkerboard=fineX.checkerboard; - assert(fineX.checkerboard==fineY.checkerboard); + fineZ.Checkerboard()=fineX.Checkerboard(); + assert(fineX.Checkerboard()==fineY.Checkerboard()); subdivides(coarse,fine); // require they map conformable(fineX,fineY); conformable(fineX,fineZ); int _ndimension = coarse->_ndimension; - std::vector block_r (_ndimension); + Coordinate block_r (_ndimension); // FIXME merge with subdivide checking routine as this is redundant for(int d=0 ; d<_ndimension;d++){ @@ -151,48 +155,56 @@ inline void blockZAXPY(Lattice &fineZ, assert(block_r[d]*coarse->_rdimensions[d]==fine->_rdimensions[d]); } - parallel_for(int sf=0;sfoSites();sf++){ + auto fineZ_ = fineZ.View(); + auto fineX_ = fineX.View(); + auto fineY_ = fineY.View(); + auto coarseA_= coarseA.View(); + + thread_for(sf, fine->oSites(), { int sc; - std::vector coor_c(_ndimension); - std::vector coor_f(_ndimension); + Coordinate coor_c(_ndimension); + Coordinate coor_f(_ndimension); Lexicographic::CoorFromIndex(coor_f,sf,fine->_rdimensions); for(int d=0;d<_ndimension;d++) coor_c[d]=coor_f[d]/block_r[d]; Lexicographic::IndexFromCoor(coor_c,sc,coarse->_rdimensions); // z = A x + y - fineZ._odata[sf]=coarseA._odata[sc]*fineX._odata[sf]+fineY._odata[sf]; + fineZ_[sf]=coarseA_[sc]*fineX_[sf]+fineY_[sf]; - } + }); return; } template - inline void blockInnerProduct(Lattice &CoarseInner, - const Lattice &fineX, - const Lattice &fineY) +inline void blockInnerProduct(Lattice &CoarseInner, + const Lattice &fineX, + const Lattice &fineY) { - typedef decltype(innerProduct(fineX._odata[0],fineY._odata[0])) dotp; + typedef decltype(innerProduct(vobj(),vobj())) dotp; - GridBase *coarse(CoarseInner._grid); - GridBase *fine (fineX._grid); + GridBase *coarse(CoarseInner.Grid()); + GridBase *fine (fineX.Grid()); - Lattice fine_inner(fine); fine_inner.checkerboard = fineX.checkerboard; + Lattice fine_inner(fine); fine_inner.Checkerboard() = fineX.Checkerboard(); Lattice coarse_inner(coarse); // Precision promotion? + auto CoarseInner_ = CoarseInner.View(); + auto coarse_inner_ = coarse_inner.View(); + fine_inner = localInnerProduct(fineX,fineY); blockSum(coarse_inner,fine_inner); - parallel_for(int ss=0;ssoSites();ss++){ - CoarseInner._odata[ss] = coarse_inner._odata[ss]; - } + thread_for(ss, coarse->oSites(),{ + CoarseInner_[ss] = coarse_inner_[ss]; + }); } template inline void blockNormalise(Lattice &ip,Lattice &fineX) { - GridBase *coarse = ip._grid; - Lattice zz(fineX._grid); zz=zero; zz.checkerboard=fineX.checkerboard; + GridBase *coarse = ip.Grid(); + Lattice zz(fineX.Grid()); zz=Zero(); zz.Checkerboard()=fineX.Checkerboard(); blockInnerProduct(ip,fineX,fineX); ip = pow(ip,-0.5); blockZAXPY(fineX,ip,fineX,zz); @@ -202,14 +214,14 @@ inline void blockNormalise(Lattice &ip,Lattice &fineX) template inline void blockSum(Lattice &coarseData,const Lattice &fineData) { - GridBase * fine = fineData._grid; - GridBase * coarse= coarseData._grid; + GridBase * fine = fineData.Grid(); + GridBase * coarse= coarseData.Grid(); subdivides(coarse,fine); // require they map int _ndimension = coarse->_ndimension; - std::vector block_r (_ndimension); + Coordinate block_r (_ndimension); for(int d=0 ; d<_ndimension;d++){ block_r[d] = fine->_rdimensions[d] / coarse->_rdimensions[d]; @@ -217,36 +229,36 @@ inline void blockSum(Lattice &coarseData,const Lattice &fineData) // Turn this around to loop threaded over sc and interior loop // over sf would thread better - coarseData=zero; - parallel_region { + coarseData=Zero(); + auto coarseData_ = coarseData.View(); + auto fineData_ = fineData.View(); + thread_for(sf,fine->oSites(),{ int sc; - std::vector coor_c(_ndimension); - std::vector coor_f(_ndimension); - - parallel_for_internal(int sf=0;sfoSites();sf++){ + Coordinate coor_c(_ndimension); + Coordinate coor_f(_ndimension); - Lexicographic::CoorFromIndex(coor_f,sf,fine->_rdimensions); - for(int d=0;d<_ndimension;d++) coor_c[d]=coor_f[d]/block_r[d]; - Lexicographic::IndexFromCoor(coor_c,sc,coarse->_rdimensions); - -PARALLEL_CRITICAL - coarseData._odata[sc]=coarseData._odata[sc]+fineData._odata[sf]; - + Lexicographic::CoorFromIndex(coor_f,sf,fine->_rdimensions); + for(int d=0;d<_ndimension;d++) coor_c[d]=coor_f[d]/block_r[d]; + Lexicographic::IndexFromCoor(coor_c,sc,coarse->_rdimensions); + + thread_critical { + coarseData_[sc]=coarseData_[sc]+fineData_[sf]; } - } + + }); return; } template -inline void blockPick(GridBase *coarse,const Lattice &unpicked,Lattice &picked,std::vector coor) +inline void blockPick(GridBase *coarse,const Lattice &unpicked,Lattice &picked,Coordinate coor) { - GridBase * fine = unpicked._grid; + GridBase * fine = unpicked.Grid(); - Lattice zz(fine); zz.checkerboard = unpicked.checkerboard; + Lattice zz(fine); zz.Checkerboard() = unpicked.Checkerboard(); Lattice > fcoor(fine); - zz = zero; + zz = Zero(); picked = unpicked; for(int d=0;d_ndimension;d++){ @@ -262,16 +274,15 @@ inline void blockPick(GridBase *coarse,const Lattice &unpicked,Lattice inline void blockOrthogonalise(Lattice &ip,std::vector > &Basis) { - GridBase *coarse = ip._grid; - GridBase *fine = Basis[0]._grid; + GridBase *coarse = ip.Grid(); + GridBase *fine = Basis[0].Grid(); int nbasis = Basis.size() ; - int _ndimension = coarse->_ndimension; // checks subdivides(coarse,fine); for(int i=0;i > &coarseData, Lattice &fineData, const std::vector > &Basis) { - GridBase * fine = fineData._grid; - GridBase * coarse= coarseData._grid; + GridBase * fine = fineData.Grid(); + GridBase * coarse= coarseData.Grid(); int _ndimension = coarse->_ndimension; // checks assert( nbasis == Basis.size() ); subdivides(coarse,fine); for(int i=0;i block_r (_ndimension); + Coordinate block_r (_ndimension); for(int d=0 ; d<_ndimension;d++){ block_r[d] = fine->_rdimensions[d] / coarse->_rdimensions[d]; } + auto fineData_ = fineData.View(); + auto coarseData_ = coarseData.View(); // Loop with a cache friendly loop ordering - parallel_region { + thread_for(sf,fine->oSites(),{ int sc; - std::vector coor_c(_ndimension); - std::vector coor_f(_ndimension); + Coordinate coor_c(_ndimension); + Coordinate coor_f(_ndimension); - parallel_for_internal(int sf=0;sfoSites();sf++){ + Lexicographic::CoorFromIndex(coor_f,sf,fine->_rdimensions); + for(int d=0;d<_ndimension;d++) coor_c[d]=coor_f[d]/block_r[d]; + Lexicographic::IndexFromCoor(coor_c,sc,coarse->_rdimensions); - Lexicographic::CoorFromIndex(coor_f,sf,fine->_rdimensions); - for(int d=0;d<_ndimension;d++) coor_c[d]=coor_f[d]/block_r[d]; - Lexicographic::IndexFromCoor(coor_c,sc,coarse->_rdimensions); - - for(int i=0;i &in,Lattice &out) typedef typename vobj::scalar_object sobj; typedef typename vvobj::scalar_object ssobj; - GridBase *ig = in._grid; - GridBase *og = out._grid; + GridBase *ig = in.Grid(); + GridBase *og = out.Grid(); int ni = ig->_ndimension; int no = og->_ndimension; @@ -351,16 +362,16 @@ void localConvert(const Lattice &in,Lattice &out) assert(ig->lSites() == og->lSites()); } - parallel_for(int idx=0;idxlSites();idx++){ + thread_for(idx, ig->lSites(),{ sobj s; ssobj ss; - std::vector lcoor(ni); + Coordinate lcoor(ni); ig->LocalIndexToLocalCoor(idx,lcoor); peekLocalSite(s,in,lcoor); ss=s; pokeLocalSite(ss,out,lcoor); - } + }); } @@ -369,8 +380,8 @@ void InsertSlice(const Lattice &lowDim,Lattice & higherDim,int slice { typedef typename vobj::scalar_object sobj; - GridBase *lg = lowDim._grid; - GridBase *hg = higherDim._grid; + GridBase *lg = lowDim.Grid(); + GridBase *hg = higherDim.Grid(); int nl = lg->_ndimension; int nh = hg->_ndimension; @@ -389,10 +400,10 @@ void InsertSlice(const Lattice &lowDim,Lattice & higherDim,int slice } // the above should guarantee that the operations are local - parallel_for(int idx=0;idxlSites();idx++){ + thread_for(idx,lg->lSites(),{ sobj s; - std::vector lcoor(nl); - std::vector hcoor(nh); + Coordinate lcoor(nl); + Coordinate hcoor(nh); lg->LocalIndexToLocalCoor(idx,lcoor); int ddl=0; hcoor[orthog] = slice; @@ -403,7 +414,7 @@ void InsertSlice(const Lattice &lowDim,Lattice & higherDim,int slice } peekLocalSite(s,lowDim,lcoor); pokeLocalSite(s,higherDim,hcoor); - } + }); } template @@ -411,8 +422,8 @@ void ExtractSlice(Lattice &lowDim,const Lattice & higherDim,int slic { typedef typename vobj::scalar_object sobj; - GridBase *lg = lowDim._grid; - GridBase *hg = higherDim._grid; + GridBase *lg = lowDim.Grid(); + GridBase *hg = higherDim.Grid(); int nl = lg->_ndimension; int nh = hg->_ndimension; @@ -422,18 +433,18 @@ void ExtractSlice(Lattice &lowDim,const Lattice & higherDim,int slic assert(hg->_processors[orthog]==1); int dl; dl = 0; - for(int d=0;d_processors[dl] == hg->_processors[d]); - assert(lg->_ldimensions[dl] == hg->_ldimensions[d]); - dl++; + for(int d=0;d_processors[dl] == hg->_processors[d]); + assert(lg->_ldimensions[dl] == hg->_ldimensions[d]); + dl++; } } // the above should guarantee that the operations are local - parallel_for(int idx=0;idxlSites();idx++){ + thread_for(idx,lg->lSites(),{ sobj s; - std::vector lcoor(nl); - std::vector hcoor(nh); + Coordinate lcoor(nl); + Coordinate hcoor(nh); lg->LocalIndexToLocalCoor(idx,lcoor); int ddl=0; hcoor[orthog] = slice; @@ -444,7 +455,7 @@ void ExtractSlice(Lattice &lowDim,const Lattice & higherDim,int slic } peekLocalSite(s,higherDim,hcoor); pokeLocalSite(s,lowDim,lcoor); - } + }); } @@ -454,8 +465,8 @@ void InsertSliceLocal(const Lattice &lowDim, Lattice & higherDim,int { typedef typename vobj::scalar_object sobj; - GridBase *lg = lowDim._grid; - GridBase *hg = higherDim._grid; + GridBase *lg = lowDim.Grid(); + GridBase *hg = higherDim.Grid(); int nl = lg->_ndimension; int nh = hg->_ndimension; @@ -465,16 +476,16 @@ void InsertSliceLocal(const Lattice &lowDim, Lattice & higherDim,int for(int d=0;d_processors[d] == hg->_processors[d]); - assert(lg->_ldimensions[d] == hg->_ldimensions[d]); - } + assert(lg->_processors[d] == hg->_processors[d]); + assert(lg->_ldimensions[d] == hg->_ldimensions[d]); + } } // the above should guarantee that the operations are local - parallel_for(int idx=0;idxlSites();idx++){ + thread_for(idx,lg->lSites(),{ sobj s; - std::vector lcoor(nl); - std::vector hcoor(nh); + Coordinate lcoor(nl); + Coordinate hcoor(nh); lg->LocalIndexToLocalCoor(idx,lcoor); if( lcoor[orthog] == slice_lo ) { hcoor=lcoor; @@ -482,7 +493,7 @@ void InsertSliceLocal(const Lattice &lowDim, Lattice & higherDim,int peekLocalSite(s,lowDim,lcoor); pokeLocalSite(s,higherDim,hcoor); } - } + }); } @@ -491,8 +502,8 @@ void ExtractSliceLocal(Lattice &lowDim,const Lattice & higherDim,int { typedef typename vobj::scalar_object sobj; - GridBase *lg = lowDim._grid; - GridBase *hg = higherDim._grid; + GridBase *lg = lowDim.Grid(); + GridBase *hg = higherDim.Grid(); int nl = lg->_ndimension; int nh = hg->_ndimension; @@ -502,16 +513,16 @@ void ExtractSliceLocal(Lattice &lowDim,const Lattice & higherDim,int for(int d=0;d_processors[d] == hg->_processors[d]); - assert(lg->_ldimensions[d] == hg->_ldimensions[d]); - } + assert(lg->_processors[d] == hg->_processors[d]); + assert(lg->_ldimensions[d] == hg->_ldimensions[d]); + } } // the above should guarantee that the operations are local - parallel_for(int idx=0;idxlSites();idx++){ + thread_for(idx,lg->lSites(),{ sobj s; - std::vector lcoor(nl); - std::vector hcoor(nh); + Coordinate lcoor(nl); + Coordinate hcoor(nh); lg->LocalIndexToLocalCoor(idx,lcoor); if( lcoor[orthog] == slice_lo ) { hcoor=lcoor; @@ -519,7 +530,7 @@ void ExtractSliceLocal(Lattice &lowDim,const Lattice & higherDim,int peekLocalSite(s,higherDim,hcoor); pokeLocalSite(s,lowDim,lcoor); } - } + }); } @@ -528,8 +539,8 @@ void Replicate(Lattice &coarse,Lattice & fine) { typedef typename vobj::scalar_object sobj; - GridBase *cg = coarse._grid; - GridBase *fg = fine._grid; + GridBase *cg = coarse.Grid(); + GridBase *fg = fine.Grid(); int nd = cg->_ndimension; @@ -537,14 +548,14 @@ void Replicate(Lattice &coarse,Lattice & fine) assert(cg->_ndimension==fg->_ndimension); - std::vector ratio(cg->_ndimension); + Coordinate ratio(cg->_ndimension); for(int d=0;d_ndimension;d++){ ratio[d] = fg->_fdimensions[d]/cg->_fdimensions[d]; } - std::vector fcoor(nd); - std::vector ccoor(nd); + Coordinate fcoor(nd); + Coordinate ccoor(nd); for(int g=0;ggSites();g++){ fg->GlobalIndexToGlobalCoor(g,fcoor); @@ -567,41 +578,46 @@ unvectorizeToLexOrdArray(std::vector &out, const Lattice &in) typedef typename vobj::vector_type vtype; - GridBase* in_grid = in._grid; + GridBase* in_grid = in.Grid(); out.resize(in_grid->lSites()); int ndim = in_grid->Nd(); int in_nsimd = vtype::Nsimd(); - std::vector > in_icoor(in_nsimd); + std::vector in_icoor(in_nsimd); for(int lane=0; lane < in_nsimd; lane++){ in_icoor[lane].resize(ndim); in_grid->iCoorFromIindex(in_icoor[lane], lane); } - - parallel_for(int in_oidx = 0; in_oidx < in_grid->oSites(); in_oidx++){ //loop over outer index - //Assemble vector of pointers to output elements - std::vector out_ptrs(in_nsimd); - std::vector in_ocoor(ndim); + //loop over outer index + auto in_v = in.View(); + thread_for(in_oidx,in_grid->oSites(),{ + //Assemble vector of pointers to output elements + ExtractPointerArray out_ptrs(in_nsimd); + + Coordinate in_ocoor(ndim); in_grid->oCoorFromOindex(in_ocoor, in_oidx); - std::vector lcoor(in_grid->Nd()); + Coordinate lcoor(in_grid->Nd()); for(int lane=0; lane < in_nsimd; lane++){ - for(int mu=0;mu_rdimensions[mu]*in_icoor[lane][mu]; + } int lex; Lexicographic::IndexFromCoor(lcoor, lex, in_grid->_ldimensions); + assert(lex < out.size()); out_ptrs[lane] = &out[lex]; } //Unpack into those ptrs - const vobj & in_vobj = in._odata[in_oidx]; - extract1(in_vobj, out_ptrs, 0); - } + const vobj & in_vobj = in_v[in_oidx]; + extract(in_vobj, out_ptrs, 0); + }); } template @@ -617,21 +633,21 @@ unvectorizeToRevLexOrdArray(std::vector &out, const Lattice &in) int ndim = in_grid->Nd(); int in_nsimd = vtype::Nsimd(); - std::vector > in_icoor(in_nsimd); + std::vector in_icoor(in_nsimd); for(int lane=0; lane < in_nsimd; lane++){ in_icoor[lane].resize(ndim); in_grid->iCoorFromIindex(in_icoor[lane], lane); } - parallel_for(int in_oidx = 0; in_oidx < in_grid->oSites(); in_oidx++){ //loop over outer index + thread_for(in_oidx, in_grid->oSites(),{ //Assemble vector of pointers to output elements std::vector out_ptrs(in_nsimd); - std::vector in_ocoor(ndim); + Coordinate in_ocoor(ndim); in_grid->oCoorFromOindex(in_ocoor, in_oidx); - std::vector lcoor(in_grid->Nd()); + Coordinate lcoor(in_grid->Nd()); for(int lane=0; lane < in_nsimd; lane++){ for(int mu=0;mu &out, const Lattice &in) //Unpack into those ptrs const vobj & in_vobj = in._odata[in_oidx]; extract1(in_vobj, out_ptrs, 0); - } + }); } //Copy SIMD-vectorized lattice to array of scalar objects in lexicographic order template typename std::enable_if::value - && !isSIMDvectorized::value, void>::type + && !isSIMDvectorized::value, void>::type vectorizeFromLexOrdArray( std::vector &in, Lattice &out) { typedef typename vobj::vector_type vtype; - GridBase* grid = out._grid; + GridBase* grid = out.Grid(); assert(in.size()==grid->lSites()); - int ndim = grid->Nd(); - int nsimd = vtype::Nsimd(); + const int ndim = grid->Nd(); + constexpr int nsimd = vtype::Nsimd(); - std::vector > icoor(nsimd); + std::vector icoor(nsimd); for(int lane=0; lane < nsimd; lane++){ icoor[lane].resize(ndim); grid->iCoorFromIindex(icoor[lane],lane); } - - parallel_for(uint64_t oidx = 0; oidx < grid->oSites(); oidx++){ //loop over outer index + auto out_v = out.View(); + thread_for(oidx, grid->oSites(),{ //Assemble vector of pointers to output elements - std::vector ptrs(nsimd); + ExtractPointerArray ptrs(nsimd); - std::vector ocoor(ndim); + Coordinate ocoor(ndim); + Coordinate lcoor(ndim); grid->oCoorFromOindex(ocoor, oidx); - - std::vector lcoor(grid->Nd()); for(int lane=0; lane < nsimd; lane++){ @@ -692,9 +707,9 @@ vectorizeFromLexOrdArray( std::vector &in, Lattice &out) //pack from those ptrs vobj vecobj; - merge1(vecobj, ptrs, 0); - out._odata[oidx] = vecobj; - } + merge(vecobj, ptrs, 0); + out_v[oidx] = vecobj; + }); } template @@ -711,21 +726,21 @@ vectorizeFromRevLexOrdArray( std::vector &in, Lattice &out) int ndim = grid->Nd(); int nsimd = vtype::Nsimd(); - std::vector > icoor(nsimd); + std::vector icoor(nsimd); for(int lane=0; lane < nsimd; lane++){ icoor[lane].resize(ndim); grid->iCoorFromIindex(icoor[lane],lane); } - parallel_for(uint64_t oidx = 0; oidx < grid->oSites(); oidx++){ //loop over outer index + thread_for(oidx, grid->oSites(), { //Assemble vector of pointers to output elements std::vector ptrs(nsimd); - std::vector ocoor(ndim); + Coordinate ocoor(ndim); grid->oCoorFromOindex(ocoor, oidx); - std::vector lcoor(grid->Nd()); + Coordinate lcoor(grid->Nd()); for(int lane=0; lane < nsimd; lane++){ @@ -742,25 +757,28 @@ vectorizeFromRevLexOrdArray( std::vector &in, Lattice &out) vobj vecobj; merge1(vecobj, ptrs, 0); out._odata[oidx] = vecobj; - } + }); } //Convert a Lattice from one precision to another template -void precisionChange(Lattice &out, const Lattice &in){ - assert(out._grid->Nd() == in._grid->Nd()); - assert(out._grid->FullDimensions() == in._grid->FullDimensions()); - out.checkerboard = in.checkerboard; - GridBase *in_grid=in._grid; - GridBase *out_grid = out._grid; +void precisionChange(Lattice &out, const Lattice &in) +{ + assert(out.Grid()->Nd() == in.Grid()->Nd()); + for(int d=0;dNd();d++){ + assert(out.Grid()->FullDimensions()[d] == in.Grid()->FullDimensions()[d]); + } + out.Checkerboard() = in.Checkerboard(); + GridBase *in_grid=in.Grid(); + GridBase *out_grid = out.Grid(); typedef typename VobjOut::scalar_object SobjOut; typedef typename VobjIn::scalar_object SobjIn; - int ndim = out._grid->Nd(); + int ndim = out.Grid()->Nd(); int out_nsimd = out_grid->Nsimd(); - std::vector > out_icoor(out_nsimd); + std::vector out_icoor(out_nsimd); for(int lane=0; lane < out_nsimd; lane++){ out_icoor[lane].resize(ndim); @@ -770,13 +788,14 @@ void precisionChange(Lattice &out, const Lattice &in){ std::vector in_slex_conv(in_grid->lSites()); unvectorizeToLexOrdArray(in_slex_conv, in); - parallel_for(uint64_t out_oidx=0;out_oidxoSites();out_oidx++){ - std::vector out_ocoor(ndim); + auto out_v = out.View(); + thread_for(out_oidx,out_grid->oSites(),{ + Coordinate out_ocoor(ndim); out_grid->oCoorFromOindex(out_ocoor, out_oidx); - std::vector ptrs(out_nsimd); + ExtractPointerArray ptrs(out_nsimd); - std::vector lcoor(out_grid->Nd()); + Coordinate lcoor(out_grid->Nd()); for(int lane=0; lane < out_nsimd; lane++){ for(int mu=0;mu &out, const Lattice &in){ int llex; Lexicographic::IndexFromCoor(lcoor, llex, out_grid->_ldimensions); ptrs[lane] = &in_slex_conv[llex]; } - merge(out._odata[out_oidx], ptrs, 0); - } + merge(out_v[out_oidx], ptrs, 0); + }); } //////////////////////////////////////////////////////////////////////////////// @@ -845,8 +864,8 @@ void Grid_split(std::vector > & full,Lattice & split) assert(full_vecs>=1); - GridBase * full_grid = full[0]._grid; - GridBase *split_grid = split._grid; + GridBase * full_grid = full[0].Grid(); + GridBase *split_grid = split.Grid(); int ndim = full_grid->_ndimension; int full_nproc = full_grid->_Nprocessors; @@ -855,18 +874,18 @@ void Grid_split(std::vector > & full,Lattice & split) //////////////////////////////// // Checkerboard management //////////////////////////////// - int cb = full[0].checkerboard; - split.checkerboard = cb; + int cb = full[0].Checkerboard(); + split.Checkerboard() = cb; ////////////////////////////// // Checks ////////////////////////////// assert(full_grid->_ndimension==split_grid->_ndimension); for(int n=0;n_gdimensions[d]==split._grid->_gdimensions[d]); - assert(full[n]._grid->_fdimensions[d]==split._grid->_fdimensions[d]); + assert(full[n].Grid()->_gdimensions[d]==split.Grid()->_gdimensions[d]); + assert(full[n].Grid()->_fdimensions[d]==split.Grid()->_fdimensions[d]); } } @@ -874,7 +893,7 @@ void Grid_split(std::vector > & full,Lattice & split) assert(nvector*split_nproc==full_nproc); assert(nvector == full_vecs); - std::vector ratio(ndim); + Coordinate ratio(ndim); for(int d=0;d_processors[d]/ split_grid->_processors[d]; } @@ -887,13 +906,13 @@ void Grid_split(std::vector > & full,Lattice & split) for(int v=0;v ldims = full_grid->_ldimensions; + Coordinate ldims = full_grid->_ldimensions; for(int d=ndim-1;d>=0;d--){ @@ -919,8 +938,8 @@ void Grid_split(std::vector > & full,Lattice & split) int chunk = (nvec*fvol)/sP; assert(chunk*sP == nvec*fvol); // Loop over reordered data post A2A - parallel_for(int c=0;c coor(ndim); + thread_for(c, chunk, { + Coordinate coor(ndim); for(int m=0;m > & full,Lattice & split) } } - } + }); ldims[d]*= ratio[d]; lsites *= ratio[d]; @@ -954,8 +973,8 @@ void Grid_split(std::vector > & full,Lattice & split) template void Grid_split(Lattice &full,Lattice & split) { - int nvector = full._grid->_Nprocessors / split._grid->_Nprocessors; - std::vector > full_v(nvector,full._grid); + int nvector = full.Grid()->_Nprocessors / split.Grid()->_Nprocessors; + std::vector > full_v(nvector,full.Grid()); for(int n=0;n > & full,Lattice & split) assert(full_vecs>=1); - GridBase * full_grid = full[0]._grid; - GridBase *split_grid = split._grid; + GridBase * full_grid = full[0].Grid(); + GridBase *split_grid = split.Grid(); int ndim = full_grid->_ndimension; int full_nproc = full_grid->_Nprocessors; @@ -981,18 +1000,18 @@ void Grid_unsplit(std::vector > & full,Lattice & split) //////////////////////////////// // Checkerboard management //////////////////////////////// - int cb = full[0].checkerboard; - split.checkerboard = cb; + int cb = full[0].Checkerboard(); + split.Checkerboard() = cb; ////////////////////////////// // Checks ////////////////////////////// assert(full_grid->_ndimension==split_grid->_ndimension); for(int n=0;n_gdimensions[d]==split._grid->_gdimensions[d]); - assert(full[n]._grid->_fdimensions[d]==split._grid->_fdimensions[d]); + assert(full[n].Grid()->_gdimensions[d]==split.Grid()->_gdimensions[d]); + assert(full[n].Grid()->_fdimensions[d]==split.Grid()->_fdimensions[d]); } } @@ -1000,7 +1019,7 @@ void Grid_unsplit(std::vector > & full,Lattice & split) assert(nvector*split_nproc==full_nproc); assert(nvector == full_vecs); - std::vector ratio(ndim); + Coordinate ratio(ndim); for(int d=0;d_processors[d]/ split_grid->_processors[d]; } @@ -1019,7 +1038,7 @@ void Grid_unsplit(std::vector > & full,Lattice & split) int nvec = 1; uint64_t rsites = split_grid->lSites(); - std::vector rdims = split_grid->_ldimensions; + Coordinate rdims = split_grid->_ldimensions; for(int d=0;d > & full,Lattice & split) { // Loop over reordered data post A2A - parallel_for(int c=0;c coor(ndim); + thread_for(c, chunk,{ + Coordinate coor(ndim); for(int m=0;m > & full,Lattice & split) tmpdata[lex_c] = alldata[lex_r]; } } - } + }); } if ( split_grid->_processors[d] > 1 ) { @@ -1076,14 +1095,12 @@ void Grid_unsplit(std::vector > & full,Lattice & split) lsites = full_grid->lSites(); for(int v=0;v 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ +*************************************************************************************/ +/* END LEGAL */ #ifndef GRID_LATTICE_TRANSPOSE_H #define GRID_LATTICE_TRANSPOSE_H @@ -33,31 +33,36 @@ Author: Peter Boyle // Transpose /////////////////////////////////////////////// -namespace Grid { +NAMESPACE_BEGIN(Grid); - //////////////////////////////////////////////////////////////////////////////////////////////////// - // Transpose - //////////////////////////////////////////////////////////////////////////////////////////////////// - template - inline Lattice transpose(const Lattice &lhs){ - Lattice ret(lhs._grid); - parallel_for(int ss=0;ssoSites();ss++){ - ret._odata[ss] = transpose(lhs._odata[ss]); - } - return ret; - }; +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Transpose +//////////////////////////////////////////////////////////////////////////////////////////////////// +template +inline Lattice transpose(const Lattice &lhs){ + Lattice ret(lhs.Grid()); + auto ret_v = ret.View(); + auto lhs_v = lhs.View(); + accelerator_for(ss,lhs_v.size(),vobj::Nsimd(),{ + coalescedWrite(ret_v[ss], transpose(lhs_v(ss))); + }); + return ret; +}; - //////////////////////////////////////////////////////////////////////////////////////////////////// - // Index level dependent transpose - //////////////////////////////////////////////////////////////////////////////////////////////////// - template - inline auto TransposeIndex(const Lattice &lhs) -> Lattice(lhs._odata[0]))> - { - Lattice(lhs._odata[0]))> ret(lhs._grid); - parallel_for(int ss=0;ssoSites();ss++){ - ret._odata[ss] = transposeIndex(lhs._odata[ss]); - } - return ret; - }; -} +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Index level dependent transpose +//////////////////////////////////////////////////////////////////////////////////////////////////// +template +inline auto TransposeIndex(const Lattice &lhs) -> Lattice(vobj()))> +{ + Lattice(vobj()))> ret(lhs.Grid()); + auto ret_v = ret.View(); + auto lhs_v = lhs.View(); + accelerator_for(ss,lhs_v.size(),vobj::Nsimd(),{ + coalescedWrite(ret_v[ss] , transposeIndex(lhs_v(ss))); + }); + return ret; +}; + +NAMESPACE_END(Grid); #endif diff --git a/Grid/lattice/Lattice_unary.h b/Grid/lattice/Lattice_unary.h index 44b7b4f1..591afe72 100644 --- a/Grid/lattice/Lattice_unary.h +++ b/Grid/lattice/Lattice_unary.h @@ -1,4 +1,4 @@ - /************************************************************************************* +/************************************************************************************* Grid physics library, www.github.com/paboyle/Grid @@ -26,59 +26,55 @@ Author: paboyle 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ +*************************************************************************************/ +/* END LEGAL */ #ifndef GRID_LATTICE_UNARY_H #define GRID_LATTICE_UNARY_H -namespace Grid { - - template Lattice pow(const Lattice &rhs,RealD y){ - Lattice ret(rhs._grid); - ret.checkerboard = rhs.checkerboard; - conformable(ret,rhs); - parallel_for(int ss=0;ssoSites();ss++){ - ret._odata[ss]=pow(rhs._odata[ss],y); - } - return ret; - } - template Lattice mod(const Lattice &rhs,Integer y){ - Lattice ret(rhs._grid); - ret.checkerboard = rhs.checkerboard; - conformable(ret,rhs); - parallel_for(int ss=0;ssoSites();ss++){ - ret._odata[ss]=mod(rhs._odata[ss],y); - } - return ret; - } - - template Lattice div(const Lattice &rhs,Integer y){ - Lattice ret(rhs._grid); - ret.checkerboard = rhs.checkerboard; - conformable(ret,rhs); - parallel_for(int ss=0;ssoSites();ss++){ - ret._odata[ss]=div(rhs._odata[ss],y); - } - return ret; - } - - template Lattice expMat(const Lattice &rhs, RealD alpha, Integer Nexp = DEFAULT_MAT_EXP){ - Lattice ret(rhs._grid); - ret.checkerboard = rhs.checkerboard; - conformable(ret,rhs); - parallel_for(int ss=0;ssoSites();ss++){ - ret._odata[ss]=Exponentiate(rhs._odata[ss],alpha, Nexp); - } - - return ret; - - - - - - } - - +NAMESPACE_BEGIN(Grid); +template Lattice pow(const Lattice &rhs_i,RealD y){ + Lattice ret_i(rhs_i.Grid()); + auto rhs = rhs_i.View(); + auto ret = ret_i.View(); + ret.Checkerboard() = rhs.Checkerboard(); + accelerator_for(ss,rhs.size(),1,{ + ret[ss]=pow(rhs[ss],y); + }); + return ret_i; } +template Lattice mod(const Lattice &rhs_i,Integer y){ + Lattice ret_i(rhs_i.Grid()); + auto rhs = rhs_i.View(); + auto ret = ret_i.View(); + ret.Checkerboard() = rhs.Checkerboard(); + accelerator_for(ss,rhs.size(),obj::Nsimd(),{ + coalescedWrite(ret[ss],mod(rhs(ss),y)); + }); + return ret_i; +} + +template Lattice div(const Lattice &rhs_i,Integer y){ + Lattice ret_i(rhs_i.Grid()); + auto ret = ret_i.View(); + auto rhs = rhs_i.View(); + ret.Checkerboard() = rhs_i.Checkerboard(); + accelerator_for(ss,rhs.size(),obj::Nsimd(),{ + coalescedWrite(ret[ss],div(rhs(ss),y)); + }); + return ret_i; +} + +template Lattice expMat(const Lattice &rhs_i, RealD alpha, Integer Nexp = DEFAULT_MAT_EXP){ + Lattice ret_i(rhs_i.Grid()); + auto rhs = rhs_i.View(); + auto ret = ret_i.View(); + ret.Checkerboard() = rhs.Checkerboard(); + accelerator_for(ss,rhs.size(),obj::Nsimd(),{ + coalescedWrite(ret[ss],Exponentiate(rhs(ss),alpha, Nexp)); + }); + return ret_i; +} + +NAMESPACE_END(Grid); #endif diff --git a/Grid/log/Log.cc b/Grid/log/Log.cc index cb4a8521..9302b4cc 100644 --- a/Grid/log/Log.cc +++ b/Grid/log/Log.cc @@ -28,27 +28,27 @@ with this program; if not, write to the Free Software Foundation, Inc., See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ -/* END LEGAL */ + /* END LEGAL */ #include #include #include #include -namespace Grid { +NAMESPACE_BEGIN(Grid); - std::string demangle(const char* name) { +std::string demangle(const char* name) { - int status = -4; // some arbitrary value to eliminate the compiler warning + int status = -4; // some arbitrary value to eliminate the compiler warning - // enable c++11 by passing the flag -std=c++11 to g++ - std::unique_ptr res { - abi::__cxa_demangle(name, NULL, NULL, &status), - std::free - }; + // enable c++11 by passing the flag -std=c++11 to g++ + std::unique_ptr res { + abi::__cxa_demangle(name, NULL, NULL, &status), + std::free + }; - return (status==0) ? res.get() : name ; - } + return (status==0) ? res.get() : name ; +} GridStopWatch Logger::GlobalStopWatch; int Logger::timestamp; @@ -109,8 +109,9 @@ void Grid_quiesce_nodes(void) { } void Grid_unquiesce_nodes(void) { -#ifdef GRID_COMMS_MPI +#if defined(GRID_COMMS_MPI) || defined(GRID_COMMS_MPI3) || defined(GRID_COMMS_MPIT) std::cout.clear(); #endif } -} +NAMESPACE_END(Grid); + diff --git a/Grid/log/Log.h b/Grid/log/Log.h index 322ab32e..d459a4a9 100644 --- a/Grid/log/Log.h +++ b/Grid/log/Log.h @@ -1,4 +1,4 @@ - /************************************************************************************* +/************************************************************************************* Grid physics library, www.github.com/paboyle/Grid @@ -25,8 +25,8 @@ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ +*************************************************************************************/ +/* END LEGAL */ #include @@ -37,13 +37,12 @@ #include #endif -namespace Grid { +NAMESPACE_BEGIN(Grid); ////////////////////////////////////////////////////////////////////////////////////////////////// // Dress the output; use std::chrono for time stamping via the StopWatch class ////////////////////////////////////////////////////////////////////////////////////////////////// - class Colours{ protected: bool is_active; @@ -57,15 +56,15 @@ public: void Active(bool activate){ is_active=activate; if (is_active){ - colour["BLACK"] ="\033[30m"; - colour["RED"] ="\033[31m"; - colour["GREEN"] ="\033[32m"; - colour["YELLOW"] ="\033[33m"; - colour["BLUE"] ="\033[34m"; - colour["PURPLE"] ="\033[35m"; - colour["CYAN"] ="\033[36m"; - colour["WHITE"] ="\033[37m"; - colour["NORMAL"] ="\033[0;39m"; + colour["BLACK"] ="\033[30m"; + colour["RED"] ="\033[31m"; + colour["GREEN"] ="\033[32m"; + colour["YELLOW"] ="\033[33m"; + colour["BLUE"] ="\033[34m"; + colour["PURPLE"] ="\033[35m"; + colour["CYAN"] ="\033[36m"; + colour["WHITE"] ="\033[37m"; + colour["NORMAL"] ="\033[0;39m"; } else { colour["BLACK"] =""; colour["RED"] =""; @@ -102,14 +101,14 @@ public: std::string colour() {return Painter.colour[COLOUR];} Logger(std::string topNm, int on, std::string nm, Colours& col_class, std::string col) : active(on), - name(nm), - topName(topNm), - Painter(col_class), - timing_mode(0), - COLOUR(col) - { - StopWatch = & GlobalStopWatch; - }; + name(nm), + topName(topNm), + Painter(col_class), + timing_mode(0), + COLOUR(col) + { + StopWatch = & GlobalStopWatch; + }; void Active(int on) {active = on;}; int isActive(void) {return active;}; @@ -164,7 +163,7 @@ public: class GridLogger: public Logger { public: GridLogger(int on, std::string nm, Colours&col_class, std::string col_key = "NORMAL"): - Logger("Grid", on, nm, col_class, col_key){}; + Logger("Grid", on, nm, col_class, col_key){}; }; void GridLogConfigure(std::vector &logstreams); @@ -181,39 +180,39 @@ extern GridLogger GridLogIterative ; extern GridLogger GridLogIntegrator ; extern Colours GridLogColours; - std::string demangle(const char* name) ; +std::string demangle(const char* name) ; #define _NBACKTRACE (256) extern void * Grid_backtrace_buffer[_NBACKTRACE]; -#define BACKTRACEFILE() {\ -char string[20]; \ -std::sprintf(string,"backtrace.%d",CartesianCommunicator::RankWorld()); \ -std::FILE * fp = std::fopen(string,"w"); \ -BACKTRACEFP(fp)\ -std::fclose(fp); \ -} +#define BACKTRACEFILE() { \ + char string[20]; \ + std::sprintf(string,"backtrace.%d",CartesianCommunicator::RankWorld()); \ + std::FILE * fp = std::fopen(string,"w"); \ + BACKTRACEFP(fp) \ + std::fclose(fp); \ + } #ifdef HAVE_EXECINFO_H -#define BACKTRACEFP(fp) { \ -int symbols = backtrace (Grid_backtrace_buffer,_NBACKTRACE);\ -char **strings = backtrace_symbols(Grid_backtrace_buffer,symbols);\ -for (int i = 0; i < symbols; i++){\ - std::fprintf (fp,"BackTrace Strings: %d %s\n",i, demangle(strings[i]).c_str()); std::fflush(fp); \ -}\ -} +#define BACKTRACEFP(fp) { \ + int symbols = backtrace (Grid_backtrace_buffer,_NBACKTRACE); \ + char **strings = backtrace_symbols(Grid_backtrace_buffer,symbols); \ + for (int i = 0; i < symbols; i++){ \ + std::fprintf (fp,"BackTrace Strings: %d %s\n",i, demangle(strings[i]).c_str()); std::fflush(fp); \ + } \ + } #else -#define BACKTRACEFP(fp) { \ -std::fprintf (fp,"BT %d %lx\n",0, __builtin_return_address(0)); std::fflush(fp); \ -std::fprintf (fp,"BT %d %lx\n",1, __builtin_return_address(1)); std::fflush(fp); \ -std::fprintf (fp,"BT %d %lx\n",2, __builtin_return_address(2)); std::fflush(fp); \ -std::fprintf (fp,"BT %d %lx\n",3, __builtin_return_address(3)); std::fflush(fp); \ -} +#define BACKTRACEFP(fp) { \ + std::fprintf (fp,"BT %d %lx\n",0, __builtin_return_address(0)); std::fflush(fp); \ + std::fprintf (fp,"BT %d %lx\n",1, __builtin_return_address(1)); std::fflush(fp); \ + std::fprintf (fp,"BT %d %lx\n",2, __builtin_return_address(2)); std::fflush(fp); \ + std::fprintf (fp,"BT %d %lx\n",3, __builtin_return_address(3)); std::fflush(fp); \ + } #endif #define BACKTRACE() BACKTRACEFP(stdout) +NAMESPACE_END(Grid); -} #endif diff --git a/Grid/parallelIO/BinaryIO.h b/Grid/parallelIO/BinaryIO.h index 144ff29f..f90c34a9 100644 --- a/Grid/parallelIO/BinaryIO.h +++ b/Grid/parallelIO/BinaryIO.h @@ -26,8 +26,7 @@ See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ /* END LEGAL */ -#ifndef GRID_BINARY_IO_H -#define GRID_BINARY_IO_H +#pragma once #if defined(GRID_COMMS_MPI) || defined(GRID_COMMS_MPI3) || defined(GRID_COMMS_MPIT) #define USE_MPI_IO @@ -42,8 +41,7 @@ #include #include -namespace Grid { - +NAMESPACE_BEGIN(Grid); ///////////////////////////////////////////////////////////////////////////////// // Byte reversal garbage @@ -91,7 +89,7 @@ class BinaryIO { { typedef typename vobj::scalar_object sobj; - GridBase *grid = lat._grid; + GridBase *grid = lat.Grid(); uint64_t lsites = grid->lSites(); std::vector scalardata(lsites); @@ -111,21 +109,20 @@ class BinaryIO { lsites = 1; } -PARALLEL_REGION + thread_region { uint32_t nersc_csum_thr = 0; -PARALLEL_FOR_LOOP_INTERN - for (uint64_t local_site = 0; local_site < lsites; local_site++) + thread_for_in_region( local_site, lsites, { uint32_t *site_buf = (uint32_t *)&fbuf[local_site]; for (uint64_t j = 0; j < size32; j++) { nersc_csum_thr = nersc_csum_thr + site_buf[j]; } - } + }); -PARALLEL_CRITICAL + thread_critical { nersc_csum += nersc_csum_thr; } @@ -134,28 +131,25 @@ PARALLEL_CRITICAL template static inline void ScidacChecksum(GridBase *grid,std::vector &fbuf,uint32_t &scidac_csuma,uint32_t &scidac_csumb) { - const uint64_t size32 = sizeof(fobj)/sizeof(uint32_t); - - int nd = grid->_ndimension; uint64_t lsites =grid->lSites(); if (fbuf.size()==1) { lsites=1; } - std::vector local_vol =grid->LocalDimensions(); - std::vector local_start =grid->LocalStarts(); - std::vector global_vol =grid->FullDimensions(); + Coordinate local_vol =grid->LocalDimensions(); + Coordinate local_start =grid->LocalStarts(); + Coordinate global_vol =grid->FullDimensions(); -PARALLEL_REGION + thread_region { - std::vector coor(nd); + Coordinate coor(nd); uint32_t scidac_csuma_thr=0; uint32_t scidac_csumb_thr=0; uint32_t site_crc=0; -PARALLEL_FOR_LOOP_INTERN - for(uint64_t local_site=0;local_site>(32-gsite29); scidac_csumb_thr ^= site_crc<>(32-gsite31); - } + }); -PARALLEL_CRITICAL + thread_critical { scidac_csuma^= scidac_csuma_thr; scidac_csumb^= scidac_csumb_thr; @@ -202,9 +196,9 @@ PARALLEL_CRITICAL { uint32_t * f = (uint32_t *)file_object; uint64_t count = bytes/sizeof(uint32_t); - parallel_for(uint64_t i=0;i>8) | ((f&0xFF000000UL)>>24) ; fp[i] = ntohl(f); - } + }); } // BE is same as network @@ -226,9 +220,9 @@ PARALLEL_CRITICAL { uint64_t * f = (uint64_t *)file_object; uint64_t count = bytes/sizeof(uint64_t); - parallel_for(uint64_t i=0;i> 32; g|= ((f&0xFF)<<24) | ((f&0xFF00)<<8) | ((f&0xFF0000)>>8) | ((f&0xFF000000UL)>>24) ; fp[i] = Grid_ntohll(g); - } + }); } ///////////////////////////////////////////////////////////////////////////// // Real action: @@ -281,13 +275,13 @@ PARALLEL_CRITICAL int nrank = grid->ProcessorCount(); int myrank = grid->ThisRank(); - std::vector psizes = grid->ProcessorGrid(); - std::vector pcoor = grid->ThisProcessorCoor(); - std::vector gLattice= grid->GlobalDimensions(); - std::vector lLattice= grid->LocalDimensions(); + Coordinate psizes = grid->ProcessorGrid(); + Coordinate pcoor = grid->ThisProcessorCoor(); + Coordinate gLattice= grid->GlobalDimensions(); + Coordinate lLattice= grid->LocalDimensions(); - std::vector lStart(ndim); - std::vector gStart(ndim); + Coordinate lStart(ndim); + Coordinate gStart(ndim); // Flatten the file uint64_t lsites = grid->lSites(); @@ -546,7 +540,7 @@ PARALLEL_CRITICAL typedef typename vobj::scalar_object sobj; typedef typename vobj::Realified::scalar_type word; word w=0; - GridBase *grid = Umu._grid; + GridBase *grid = Umu.Grid(); uint64_t lsites = grid->lSites(); std::vector scalardata(lsites); @@ -558,7 +552,7 @@ PARALLEL_CRITICAL GridStopWatch timer; timer.Start(); - parallel_for(uint64_t x=0;xBarrier(); @@ -582,7 +576,7 @@ PARALLEL_CRITICAL { typedef typename vobj::scalar_object sobj; typedef typename vobj::Realified::scalar_type word; word w=0; - GridBase *grid = Umu._grid; + GridBase *grid = Umu.Grid(); uint64_t lsites = grid->lSites(), offsetCopy = offset; int attemptsLeft = std::max(0, BinaryIO::latticeWriteMaxRetry); bool checkWrite = (BinaryIO::latticeWriteMaxRetry >= 0); @@ -596,7 +590,7 @@ PARALLEL_CRITICAL GridStopWatch timer; timer.Start(); unvectorizeToLexOrdArray(scalardata,Umu); - parallel_for(uint64_t x=0;xBarrier(); timer.Stop(); @@ -619,7 +613,7 @@ PARALLEL_CRITICAL { std::cout << GridLogMessage << "writeLatticeObject: read test checksum failure, re-writing (" << attemptsLeft << " attempt(s) remaining)" << std::endl; offset = offsetCopy; - parallel_for(uint64_t x=0;xgSites(); uint64_t lsites = grid->lSites(); @@ -669,11 +663,11 @@ PARALLEL_CRITICAL nersc_csum,scidac_csuma,scidac_csumb); timer.Start(); - parallel_for(uint64_t lidx=0;lidx tmp(RngStateCount); std::copy(iodata[lidx].begin(),iodata[lidx].end(),tmp.begin()); - parallel.SetState(tmp,lidx); - } + parallel_rng.SetState(tmp,lidx); + }); timer.Stop(); iodata.resize(1); @@ -683,7 +677,7 @@ PARALLEL_CRITICAL { std::vector tmp(RngStateCount); std::copy(iodata[0].begin(),iodata[0].end(),tmp.begin()); - serial.SetState(tmp,0); + serial_rng.SetState(tmp,0); } nersc_csum = nersc_csum + nersc_csum_tmp; @@ -699,8 +693,8 @@ PARALLEL_CRITICAL ///////////////////////////////////////////////////////////////////////////// // Write a RNG; lexico map to an array of state and use IOobject ////////////////////////////////////////////////////////////////////////////////////// - static inline void writeRNG(GridSerialRNG &serial, - GridParallelRNG ¶llel, + static inline void writeRNG(GridSerialRNG &serial_rng, + GridParallelRNG ¶llel_rng, std::string file, uint64_t offset, uint32_t &nersc_csum, @@ -712,7 +706,7 @@ PARALLEL_CRITICAL const int RngStateCount = GridSerialRNG::RngStateCount; typedef std::array RNGstate; - GridBase *grid = parallel._grid; + GridBase *grid = parallel_rng.Grid(); uint64_t gsites = grid->gSites(); uint64_t lsites = grid->lSites(); @@ -727,11 +721,11 @@ PARALLEL_CRITICAL timer.Start(); std::vector iodata(lsites); - parallel_for(uint64_t lidx=0;lidx tmp(RngStateCount); - parallel.GetState(tmp,lidx); + parallel_rng.GetState(tmp,lidx); std::copy(tmp.begin(),tmp.end(),iodata[lidx].begin()); - } + }); timer.Stop(); IOobject(w,grid,iodata,file,offset,format,BINARYIO_WRITE|BINARYIO_LEXICOGRAPHIC, @@ -739,7 +733,7 @@ PARALLEL_CRITICAL iodata.resize(1); { std::vector tmp(RngStateCount); - serial.GetState(tmp,0); + serial_rng.GetState(tmp,0); std::copy(tmp.begin(),tmp.end(),iodata[0].begin()); } IOobject(w,grid,iodata,file,offset,format,BINARYIO_WRITE|BINARYIO_MASTER_APPEND, @@ -756,5 +750,4 @@ PARALLEL_CRITICAL } }; -} -#endif +NAMESPACE_END(Grid); diff --git a/Grid/parallelIO/IldgIO.h b/Grid/parallelIO/IldgIO.h index db07cac0..b564371b 100644 --- a/Grid/parallelIO/IldgIO.h +++ b/Grid/parallelIO/IldgIO.h @@ -24,8 +24,7 @@ See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ /* END LEGAL */ -#ifndef GRID_ILDG_IO_H -#define GRID_ILDG_IO_H +#pragma once #ifdef HAVE_LIME #include @@ -43,8 +42,7 @@ extern "C" { #include "lime.h" } -namespace Grid { -namespace QCD { +NAMESPACE_BEGIN(Grid); #define GRID_FIELD_NORM "FieldNormMetaData" #define GRID_FIELD_NORM_CALC(FieldNormMetaData_, n2ck) \ @@ -140,7 +138,7 @@ assert(GRID_FIELD_NORM_CALC(FieldNormMetaData_, n2ck) < 1.0e-5); ///////////////////////////////////// // Scidac Private File structure ///////////////////////////////////// - _scidacFile = scidacFile(field._grid); + _scidacFile = scidacFile(field.Grid()); ///////////////////////////////////// // Scidac Private Record structure @@ -227,10 +225,10 @@ class GridLimeReader : public BinaryIO { // std::cout << GridLogMessage<< " readLimeLatticeBinaryObject matches ! " <_gsites; + uint64_t PayloadSize = sizeof(sobj) * field.Grid()->_gsites; // std::cout << "R sizeof(sobj)= " <_gsites<_gsites< void writeLimeLatticeBinaryObject(Lattice &field,std::string record_name) @@ -425,8 +423,8 @@ class GridLimeWriter : public BinaryIO // v) Continue writing scidac record. //////////////////////////////////////////////////////////////////// - GridBase *grid = field._grid; - assert(boss_node == field._grid->IsBoss() ); + GridBase *grid = field.Grid(); + assert(boss_node == field.Grid()->IsBoss() ); FieldNormMetaData FNMD; FNMD.norm2 = norm2(field); @@ -443,7 +441,7 @@ class GridLimeWriter : public BinaryIO } // std::cout << "W sizeof(sobj)" <_gsites<_gsites< &field,userRecord _userRecord, const unsigned int recordScientificPrec = 0) { - GridBase * grid = field._grid; + GridBase * grid = field.Grid(); //////////////////////////////////////// // fill the Grid header @@ -557,7 +555,7 @@ class ScidacReader : public GridLimeReader { void readScidacFieldRecord(Lattice &field,userRecord &_userRecord) { typedef typename vobj::scalar_object sobj; - GridBase * grid = field._grid; + GridBase * grid = field.Grid(); //////////////////////////////////////// // fill the Grid header @@ -624,7 +622,7 @@ class IldgWriter : public ScidacWriter { template void writeConfiguration(Lattice > &Umu,int sequence,std::string LFN,std::string description) { - GridBase * grid = Umu._grid; + GridBase * grid = Umu.Grid(); typedef Lattice > GaugeField; typedef iLorentzColourMatrix vobj; typedef typename vobj::scalar_object sobj; @@ -717,9 +715,9 @@ class IldgReader : public GridLimeReader { typedef LorentzColourMatrixF fobj; typedef LorentzColourMatrixD dobj; - GridBase *grid = Umu._grid; + GridBase *grid = Umu.Grid(); - std::vector dims = Umu._grid->FullDimensions(); + Coordinate dims = Umu.Grid()->FullDimensions(); assert(dims.size()==4); @@ -853,6 +851,7 @@ class IldgReader : public GridLimeReader { // Minimally must find binary segment and checksum // Since this is an ILDG reader require ILDG format ////////////////////////////////////////////////////// + assert(found_ildgLFN); assert(found_ildgBinary); assert(found_ildgFormat); assert(found_scidacChecksum); @@ -930,9 +929,9 @@ class IldgReader : public GridLimeReader { } }; -}} +NAMESPACE_END(Grid); + //HAVE_LIME #endif -#endif diff --git a/Grid/parallelIO/IldgIOtypes.h b/Grid/parallelIO/IldgIOtypes.h index 281b20f2..ddc0969c 100644 --- a/Grid/parallelIO/IldgIOtypes.h +++ b/Grid/parallelIO/IldgIOtypes.h @@ -23,7 +23,7 @@ with this program; if not, write to the Free Software Foundation, Inc., See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ -/* END LEGAL */ + /* END LEGAL */ #ifndef GRID_ILDGTYPES_IO_H #define GRID_ILDGTYPES_IO_H @@ -32,7 +32,7 @@ extern "C" { // for linkage #include "lime.h" } -namespace Grid { +NAMESPACE_BEGIN(Grid); ///////////////////////////////////////////////////////////////////////////////// // Data representation of records that enter ILDG and SciDac formats @@ -51,12 +51,12 @@ namespace Grid { // Unused SCIDAC records names; could move to support this functionality #define SCIDAC_SITELIST "scidac-sitelist" - //////////////////////////////////////////////////////////// - const int GRID_IO_SINGLEFILE = 0; // hardcode lift from QIO compat - const int GRID_IO_MULTIFILE = 1; // hardcode lift from QIO compat - const int GRID_IO_FIELD = 0; // hardcode lift from QIO compat - const int GRID_IO_GLOBAL = 1; // hardcode lift from QIO compat - //////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////// +const int GRID_IO_SINGLEFILE = 0; // hardcode lift from QIO compat +const int GRID_IO_MULTIFILE = 1; // hardcode lift from QIO compat +const int GRID_IO_FIELD = 0; // hardcode lift from QIO compat +const int GRID_IO_GLOBAL = 1; // hardcode lift from QIO compat +//////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////// // QIO uses mandatory "private" records fixed format @@ -74,7 +74,7 @@ struct emptyUserRecord : Serializable { // 1.1416 16 16 32 0 //////////////////////// struct scidacFile : Serializable { - public: +public: GRID_SERIALIZABLE_CLASS_MEMBERS(scidacFile, double, version, int, spacetime, @@ -91,7 +91,7 @@ struct scidacFile : Serializable { return dimensions; } - void setDimensions(std::vector dimensions) { + void setDimensions(Coordinate dimensions) { char delimiter = ' '; std::stringstream stream; for(int i=0;i #include -namespace Grid { +NAMESPACE_BEGIN(Grid); - /////////////////////////////////////////////////////// - // Precision mapping - /////////////////////////////////////////////////////// - template static std::string getFormatString (void) - { - std::string format; - typedef typename getPrecision::real_scalar_type stype; - if ( sizeof(stype) == sizeof(float) ) { - format = std::string("IEEE32BIG"); - } - if ( sizeof(stype) == sizeof(double) ) { - format = std::string("IEEE64BIG"); - } - return format; +/////////////////////////////////////////////////////// +// Precision mapping +/////////////////////////////////////////////////////// +template static std::string getFormatString (void) +{ + std::string format; + typedef typename getPrecision::real_scalar_type stype; + if ( sizeof(stype) == sizeof(float) ) { + format = std::string("IEEE32BIG"); } + if ( sizeof(stype) == sizeof(double) ) { + format = std::string("IEEE64BIG"); + } + return format; +}; + //////////////////////////////////////////////////////////////////////////////// // header specification/interpretation //////////////////////////////////////////////////////////////////////////////// @@ -93,146 +94,145 @@ namespace Grid { link_trace(0.), plaquette(0.), checksum(0), scidac_checksuma(0), scidac_checksumb(0), sequence_number(0) {} - }; + }; - namespace QCD { +// PB disable using namespace - this is a header and forces namesapce visibility for all +// including files +//using namespace Grid; - using namespace Grid; +////////////////////////////////////////////////////////////////////// +// Bit and Physical Checksumming and QA of data +////////////////////////////////////////////////////////////////////// +inline void GridMetaData(GridBase *grid,FieldMetaData &header) +{ + int nd = grid->_ndimension; + header.nd = nd; + header.dimension.resize(nd); + header.boundary.resize(nd); + header.data_start = 0; + for(int d=0;d_fdimensions[d]; + } + for(int d=0;dpw_name); - ////////////////////////////////////////////////////////////////////// - // Bit and Physical Checksumming and QA of data - ////////////////////////////////////////////////////////////////////// - inline void GridMetaData(GridBase *grid,FieldMetaData &header) - { - int nd = grid->_ndimension; - header.nd = nd; - header.dimension.resize(nd); - header.boundary.resize(nd); - header.data_start = 0; - for(int d=0;d_fdimensions[d]; - } - for(int d=0;dpw_name); - - // When - std::time_t t = std::time(nullptr); - std::tm tm_ = *std::localtime(&t); - std::ostringstream oss; - // oss << std::put_time(&tm_, "%c %Z"); - header.creation_date = oss.str(); - header.archive_date = header.creation_date; - - // What - struct utsname name; uname(&name); - header.creator_hardware = std::string(name.nodename)+"-"; - header.creator_hardware+= std::string(name.machine)+"-"; - header.creator_hardware+= std::string(name.sysname)+"-"; - header.creator_hardware+= std::string(name.release); - } + // What + struct utsname name; uname(&name); + header.creator_hardware = std::string(name.nodename)+"-"; + header.creator_hardware+= std::string(name.machine)+"-"; + header.creator_hardware+= std::string(name.sysname)+"-"; + header.creator_hardware+= std::string(name.release); +} #define dump_meta_data(field, s) \ - s << "BEGIN_HEADER" << std::endl; \ - s << "HDR_VERSION = " << field.hdr_version << std::endl; \ - s << "DATATYPE = " << field.data_type << std::endl; \ - s << "STORAGE_FORMAT = " << field.storage_format << std::endl; \ - for(int i=0;i<4;i++){ \ - s << "DIMENSION_" << i+1 << " = " << field.dimension[i] << std::endl ; \ - } \ - s << "LINK_TRACE = " << std::setprecision(10) << field.link_trace << std::endl; \ - s << "PLAQUETTE = " << std::setprecision(10) << field.plaquette << std::endl; \ - for(int i=0;i<4;i++){ \ - s << "BOUNDARY_"< inline void PrepareMetaData(Lattice & field, FieldMetaData &header) { - GridBase *grid = field._grid; + GridBase *grid = field.Grid(); std::string format = getFormatString(); - header.floating_point = format; - header.checksum = 0x0; // Nersc checksum unused in ILDG, Scidac - GridMetaData(grid,header); - MachineCharacteristics(header); - } - inline void GaugeStatistics(Lattice & data,FieldMetaData &header) - { - // How to convert data precision etc... - header.link_trace=Grid::QCD::WilsonLoops::linkTrace(data); - header.plaquette =Grid::QCD::WilsonLoops::avgPlaquette(data); - } - inline void GaugeStatistics(Lattice & data,FieldMetaData &header) - { - // How to convert data precision etc... - header.link_trace=Grid::QCD::WilsonLoops::linkTrace(data); - header.plaquette =Grid::QCD::WilsonLoops::avgPlaquette(data); - } - template<> inline void PrepareMetaData(Lattice & field, FieldMetaData &header) - { + header.floating_point = format; + header.checksum = 0x0; // Nersc checksum unused in ILDG, Scidac + GridMetaData(grid,header); + MachineCharacteristics(header); +} +inline void GaugeStatistics(Lattice & data,FieldMetaData &header) +{ + // How to convert data precision etc... + header.link_trace=WilsonLoops::linkTrace(data); + header.plaquette =WilsonLoops::avgPlaquette(data); +} +inline void GaugeStatistics(Lattice & data,FieldMetaData &header) +{ + // How to convert data precision etc... + header.link_trace=WilsonLoops::linkTrace(data); + header.plaquette =WilsonLoops::avgPlaquette(data); +} +template<> inline void PrepareMetaData(Lattice & field, FieldMetaData &header) +{ - GridBase *grid = field._grid; - std::string format = getFormatString(); - header.floating_point = format; - header.checksum = 0x0; // Nersc checksum unused in ILDG, Scidac - GridMetaData(grid,header); - GaugeStatistics(field,header); - MachineCharacteristics(header); - } - template<> inline void PrepareMetaData(Lattice & field, FieldMetaData &header) - { - GridBase *grid = field._grid; - std::string format = getFormatString(); - header.floating_point = format; - header.checksum = 0x0; // Nersc checksum unused in ILDG, Scidac - GridMetaData(grid,header); - GaugeStatistics(field,header); - MachineCharacteristics(header); - } + GridBase *grid = field.Grid(); + std::string format = getFormatString(); + header.floating_point = format; + header.checksum = 0x0; // Nersc checksum unused in ILDG, Scidac + GridMetaData(grid,header); + GaugeStatistics(field,header); + MachineCharacteristics(header); +} +template<> inline void PrepareMetaData(Lattice & field, FieldMetaData &header) +{ + GridBase *grid = field.Grid(); + std::string format = getFormatString(); + header.floating_point = format; + header.checksum = 0x0; // Nersc checksum unused in ILDG, Scidac + GridMetaData(grid,header); + GaugeStatistics(field,header); + MachineCharacteristics(header); +} - ////////////////////////////////////////////////////////////////////// - // Utilities ; these are QCD aware - ////////////////////////////////////////////////////////////////////// - inline void reconstruct3(LorentzColourMatrix & cm) - { - const int x=0; - const int y=1; - const int z=2; - for(int mu=0;mu using iLorentzColour2x3 = iVector, 2>, Nd >; +//////////////////////////////////////////////////////////////////////////////// +// Some data types for intermediate storage +//////////////////////////////////////////////////////////////////////////////// +template using iLorentzColour2x3 = iVector, 2>, Nd >; - typedef iLorentzColour2x3 LorentzColour2x3; - typedef iLorentzColour2x3 LorentzColour2x3F; - typedef iLorentzColour2x3 LorentzColour2x3D; +typedef iLorentzColour2x3 LorentzColour2x3; +typedef iLorentzColour2x3 LorentzColour2x3F; +typedef iLorentzColour2x3 LorentzColour2x3D; ///////////////////////////////////////////////////////////////////////////////// // Simple classes for precision conversion @@ -276,56 +276,55 @@ struct BinarySimpleMunger { }; - template - struct GaugeSimpleMunger{ - void operator()(fobj &in, sobj &out) { - for (int mu = 0; mu < Nd; mu++) { - for (int i = 0; i < Nc; i++) { - for (int j = 0; j < Nc; j++) { - out(mu)()(i, j) = in(mu)()(i, j); - }} - } - }; - }; +template +struct GaugeSimpleMunger{ + void operator()(fobj &in, sobj &out) { + for (int mu = 0; mu < Nd; mu++) { + for (int i = 0; i < Nc; i++) { + for (int j = 0; j < Nc; j++) { + out(mu)()(i, j) = in(mu)()(i, j); + }} + } + }; +}; - template - struct GaugeSimpleUnmunger { +template +struct GaugeSimpleUnmunger { - void operator()(sobj &in, fobj &out) { - for (int mu = 0; mu < Nd; mu++) { - for (int i = 0; i < Nc; i++) { - for (int j = 0; j < Nc; j++) { - out(mu)()(i, j) = in(mu)()(i, j); - }} - } - }; - }; + void operator()(sobj &in, fobj &out) { + for (int mu = 0; mu < Nd; mu++) { + for (int i = 0; i < Nc; i++) { + for (int j = 0; j < Nc; j++) { + out(mu)()(i, j) = in(mu)()(i, j); + }} + } + }; +}; - template - struct Gauge3x2munger{ - void operator() (fobj &in,sobj &out){ - for(int mu=0;mu - struct Gauge3x2unmunger{ - void operator() (sobj &in,fobj &out){ - for(int mu=0;mu +struct Gauge3x2munger{ + void operator() (fobj &in,sobj &out){ + for(int mu=0;mu +struct Gauge3x2unmunger{ + void operator() (sobj &in,fobj &out){ + for(int mu=0;mu header; - std::string line; + // for the header-reader + static inline int readHeader(std::string file,GridBase *grid, FieldMetaData &field) + { + std::map header; + std::string line; - ////////////////////////////////////////////////// - // read the header - ////////////////////////////////////////////////// - std::ifstream fin(file); + ////////////////////////////////////////////////// + // read the header + ////////////////////////////////////////////////// + std::ifstream fin(file); - getline(fin,line); // read one line and insist is + getline(fin,line); // read one line and insist is - removeWhitespace(line); - std::cout << GridLogMessage << "* " << line << std::endl; + removeWhitespace(line); + std::cout << GridLogMessage << "* " << line << std::endl; - assert(line==std::string("BEGIN_HEADER")); + assert(line==std::string("BEGIN_HEADER")); - do { + do { getline(fin,line); // read one line std::cout << GridLogMessage << "* "<0) { - std::string key=line.substr(0,eq); - std::string val=line.substr(eq+1); - removeWhitespace(key); - removeWhitespace(val); + std::string key=line.substr(0,eq); + std::string val=line.substr(eq+1); + removeWhitespace(key); + removeWhitespace(val); - header[key] = val; - } + header[key] = val; + } } while( line.find("END_HEADER") == std::string::npos ); - field.data_start = fin.tellg(); + field.data_start = fin.tellg(); - ////////////////////////////////////////////////// - // chomp the values - ////////////////////////////////////////////////// - field.hdr_version = header["HDR_VERSION"]; - field.data_type = header["DATATYPE"]; - field.storage_format = header["STORAGE_FORMAT"]; + ////////////////////////////////////////////////// + // chomp the values + ////////////////////////////////////////////////// + field.hdr_version = header["HDR_VERSION"]; + field.data_type = header["DATATYPE"]; + field.storage_format = header["STORAGE_FORMAT"]; - field.dimension[0] = std::stol(header["DIMENSION_1"]); - field.dimension[1] = std::stol(header["DIMENSION_2"]); - field.dimension[2] = std::stol(header["DIMENSION_3"]); - field.dimension[3] = std::stol(header["DIMENSION_4"]); + field.dimension[0] = std::stol(header["DIMENSION_1"]); + field.dimension[1] = std::stol(header["DIMENSION_2"]); + field.dimension[2] = std::stol(header["DIMENSION_3"]); + field.dimension[3] = std::stol(header["DIMENSION_4"]); - assert(grid->_ndimension == 4); - for(int d=0;d<4;d++){ + assert(grid->_ndimension == 4); + for(int d=0;d<4;d++){ assert(grid->_fdimensions[d]==field.dimension[d]); } - field.link_trace = std::stod(header["LINK_TRACE"]); - field.plaquette = std::stod(header["PLAQUETTE"]); + field.link_trace = std::stod(header["LINK_TRACE"]); + field.plaquette = std::stod(header["PLAQUETTE"]); - field.boundary[0] = header["BOUNDARY_1"]; - field.boundary[1] = header["BOUNDARY_2"]; - field.boundary[2] = header["BOUNDARY_3"]; - field.boundary[3] = header["BOUNDARY_4"]; + field.boundary[0] = header["BOUNDARY_1"]; + field.boundary[1] = header["BOUNDARY_2"]; + field.boundary[2] = header["BOUNDARY_3"]; + field.boundary[3] = header["BOUNDARY_4"]; - field.checksum = std::stoul(header["CHECKSUM"],0,16); - field.ensemble_id = header["ENSEMBLE_ID"]; - field.ensemble_label = header["ENSEMBLE_LABEL"]; - field.sequence_number = std::stol(header["SEQUENCE_NUMBER"]); - field.creator = header["CREATOR"]; - field.creator_hardware = header["CREATOR_HARDWARE"]; - field.creation_date = header["CREATION_DATE"]; - field.archive_date = header["ARCHIVE_DATE"]; - field.floating_point = header["FLOATING_POINT"]; + field.checksum = std::stoul(header["CHECKSUM"],0,16); + field.ensemble_id = header["ENSEMBLE_ID"]; + field.ensemble_label = header["ENSEMBLE_LABEL"]; + field.sequence_number = std::stol(header["SEQUENCE_NUMBER"]); + field.creator = header["CREATOR"]; + field.creator_hardware = header["CREATOR_HARDWARE"]; + field.creation_date = header["CREATION_DATE"]; + field.archive_date = header["ARCHIVE_DATE"]; + field.floating_point = header["FLOATING_POINT"]; - return field.data_start; + return field.data_start; + } + + ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + // Now the meat: the object readers + ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + template + static inline void readConfiguration(Lattice > &Umu, + FieldMetaData& header, + std::string file) + { + typedef Lattice > GaugeField; + + GridBase *grid = Umu.Grid(); + uint64_t offset = readHeader(file,Umu.Grid(),header); + + FieldMetaData clone(header); + + std::string format(header.floating_point); + + int ieee32big = (format == std::string("IEEE32BIG")); + int ieee32 = (format == std::string("IEEE32")); + int ieee64big = (format == std::string("IEEE64BIG")); + int ieee64 = (format == std::string("IEEE64")); + + uint32_t nersc_csum,scidac_csuma,scidac_csumb; + // depending on datatype, set up munger; + // munger is a function of + if ( header.data_type == std::string("4D_SU3_GAUGE") ) { + if ( ieee32 || ieee32big ) { + BinaryIO::readLatticeObject, LorentzColour2x3F> + (Umu,file,Gauge3x2munger(), offset,format, + nersc_csum,scidac_csuma,scidac_csumb); + } + if ( ieee64 || ieee64big ) { + BinaryIO::readLatticeObject, LorentzColour2x3D> + (Umu,file,Gauge3x2munger(),offset,format, + nersc_csum,scidac_csuma,scidac_csumb); + } + } else if ( header.data_type == std::string("4D_SU3_GAUGE_3x3") ) { + if ( ieee32 || ieee32big ) { + BinaryIO::readLatticeObject,LorentzColourMatrixF> + (Umu,file,GaugeSimpleMunger(),offset,format, + nersc_csum,scidac_csuma,scidac_csumb); + } + if ( ieee64 || ieee64big ) { + BinaryIO::readLatticeObject,LorentzColourMatrixD> + (Umu,file,GaugeSimpleMunger(),offset,format, + nersc_csum,scidac_csuma,scidac_csumb); + } + } else { + assert(0); } - ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - // Now the meat: the object readers - ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + GaugeStatistics(Umu,clone); - template - static inline void readConfiguration(Lattice > &Umu, - FieldMetaData& header, - std::string file) - { - typedef Lattice > GaugeField; + std::cout< - if ( header.data_type == std::string("4D_SU3_GAUGE") ) { - if ( ieee32 || ieee32big ) { - BinaryIO::readLatticeObject, LorentzColour2x3F> - (Umu,file,Gauge3x2munger(), offset,format, - nersc_csum,scidac_csuma,scidac_csumb); - } - if ( ieee64 || ieee64big ) { - BinaryIO::readLatticeObject, LorentzColour2x3D> - (Umu,file,Gauge3x2munger(),offset,format, - nersc_csum,scidac_csuma,scidac_csumb); - } - } else if ( header.data_type == std::string("4D_SU3_GAUGE_3x3") ) { - if ( ieee32 || ieee32big ) { - BinaryIO::readLatticeObject,LorentzColourMatrixF> - (Umu,file,GaugeSimpleMunger(),offset,format, - nersc_csum,scidac_csuma,scidac_csumb); - } - if ( ieee64 || ieee64big ) { - BinaryIO::readLatticeObject,LorentzColourMatrixD> - (Umu,file,GaugeSimpleMunger(),offset,format, - nersc_csum,scidac_csuma,scidac_csumb); - } - } else { - assert(0); - } - - GaugeStatistics(Umu,clone); - - std::cout<= 1.0e-5 ) { - std::cout << " Plaquette mismatch "<= 1.0e-5 ) { + std::cout << " Plaquette mismatch "< - static inline void writeConfiguration(Lattice > &Umu, - std::string file, - int two_row, - int bits32) - { - typedef Lattice > GaugeField; + template + static inline void writeConfiguration(Lattice > &Umu, + std::string file, + int two_row, + int bits32) + { + typedef Lattice > GaugeField; - typedef iLorentzColourMatrix vobj; - typedef typename vobj::scalar_object sobj; + typedef iLorentzColourMatrix vobj; + typedef typename vobj::scalar_object sobj; - FieldMetaData header; - /////////////////////////////////////////// - // Following should become arguments - /////////////////////////////////////////// - header.sequence_number = 1; - header.ensemble_id = "UKQCD"; - header.ensemble_label = "DWF"; + FieldMetaData header; + /////////////////////////////////////////// + // Following should become arguments + /////////////////////////////////////////// + header.sequence_number = 1; + header.ensemble_id = "UKQCD"; + header.ensemble_label = "DWF"; - typedef LorentzColourMatrixD fobj3D; - typedef LorentzColour2x3D fobj2D; + typedef LorentzColourMatrixD fobj3D; + typedef LorentzColour2x3D fobj2D; - GridBase *grid = Umu._grid; + GridBase *grid = Umu.Grid(); - GridMetaData(grid,header); - assert(header.nd==4); - GaugeStatistics(Umu,header); - MachineCharacteristics(header); + GridMetaData(grid,header); + assert(header.nd==4); + GaugeStatistics(Umu,header); + MachineCharacteristics(header); uint64_t offset; - // Sod it -- always write 3x3 double - header.floating_point = std::string("IEEE64BIG"); - header.data_type = std::string("4D_SU3_GAUGE_3x3"); - GaugeSimpleUnmunger munge; + // Sod it -- always write 3x3 double + header.floating_point = std::string("IEEE64BIG"); + header.data_type = std::string("4D_SU3_GAUGE_3x3"); + GaugeSimpleUnmunger munge; if ( grid->IsBoss() ) { truncate(file); - offset = writeHeader(header,file); + offset = writeHeader(header,file); } grid->Broadcast(0,(void *)&offset,sizeof(offset)); - uint32_t nersc_csum,scidac_csuma,scidac_csumb; - BinaryIO::writeLatticeObject(Umu,file,munge,offset,header.floating_point, - nersc_csum,scidac_csuma,scidac_csumb); - header.checksum = nersc_csum; + uint32_t nersc_csum,scidac_csuma,scidac_csumb; + BinaryIO::writeLatticeObject(Umu,file,munge,offset,header.floating_point, + nersc_csum,scidac_csuma,scidac_csumb); + header.checksum = nersc_csum; if ( grid->IsBoss() ) { - writeHeader(header,file); + writeHeader(header,file); } - std::cout<IsBoss() ) { - truncate(file); - offset = writeHeader(header,file); + truncate(file); + offset = writeHeader(header,file); } grid->Broadcast(0,(void *)&offset,sizeof(offset)); - uint32_t nersc_csum,scidac_csuma,scidac_csumb; - BinaryIO::writeRNG(serial,parallel,file,offset,nersc_csum,scidac_csuma,scidac_csumb); - header.checksum = nersc_csum; + uint32_t nersc_csum,scidac_csuma,scidac_csumb; + BinaryIO::writeRNG(serial,parallel,file,offset,nersc_csum,scidac_csuma,scidac_csumb); + header.checksum = nersc_csum; if ( grid->IsBoss() ) { - offset = writeHeader(header,file); + offset = writeHeader(header,file); } - std::cout< - uint32_t nersc_csum,scidac_csuma,scidac_csumb; - BinaryIO::readRNG(serial,parallel,file,offset,nersc_csum,scidac_csuma,scidac_csumb); + // depending on datatype, set up munger; + // munger is a function of + uint32_t nersc_csum,scidac_csuma,scidac_csumb; + BinaryIO::readRNG(serial,parallel,file,offset,nersc_csum,scidac_csuma,scidac_csumb); - if ( nersc_csum != header.checksum ) { - std::cerr << "checksum mismatch "< 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ +*************************************************************************************/ +/* END LEGAL */ #include #include -namespace Grid { +NAMESPACE_BEGIN(Grid); #define CacheControl(L,O,R) ((PERF_COUNT_HW_CACHE_##L)|(PERF_COUNT_HW_CACHE_OP_##O<<8)| (PERF_COUNT_HW_CACHE_RESULT_##R<<16)) #define RawConfig(A,B) (A<<8|B) @@ -39,16 +39,16 @@ const PerformanceCounter::PerformanceCounterConfig PerformanceCounter::Performan { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_MISSES , "CACHE_MISSES......." , CACHE_REFERENCES}, { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES , "CPUCYCLES.........." , INSTRUCTIONS}, { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS , "INSTRUCTIONS......." , CPUCYCLES }, - // 4 + // 4 #ifdef KNL - { PERF_TYPE_RAW, RawConfig(0x40,0x04), "ALL_LOADS..........", CPUCYCLES }, - { PERF_TYPE_RAW, RawConfig(0x01,0x04), "L1_MISS_LOADS......", L1D_READ_ACCESS }, - { PERF_TYPE_RAW, RawConfig(0x40,0x04), "ALL_LOADS..........", L1D_READ_ACCESS }, - { PERF_TYPE_RAW, RawConfig(0x02,0x04), "L2_HIT_LOADS.......", L1D_READ_ACCESS }, - { PERF_TYPE_RAW, RawConfig(0x04,0x04), "L2_MISS_LOADS......", L1D_READ_ACCESS }, - { PERF_TYPE_RAW, RawConfig(0x10,0x04), "UTLB_MISS_LOADS....", L1D_READ_ACCESS }, - { PERF_TYPE_RAW, RawConfig(0x08,0x04), "DTLB_MISS_LOADS....", L1D_READ_ACCESS }, - // 11 + { PERF_TYPE_RAW, RawConfig(0x40,0x04), "ALL_LOADS..........", CPUCYCLES }, + { PERF_TYPE_RAW, RawConfig(0x01,0x04), "L1_MISS_LOADS......", L1D_READ_ACCESS }, + { PERF_TYPE_RAW, RawConfig(0x40,0x04), "ALL_LOADS..........", L1D_READ_ACCESS }, + { PERF_TYPE_RAW, RawConfig(0x02,0x04), "L2_HIT_LOADS.......", L1D_READ_ACCESS }, + { PERF_TYPE_RAW, RawConfig(0x04,0x04), "L2_MISS_LOADS......", L1D_READ_ACCESS }, + { PERF_TYPE_RAW, RawConfig(0x10,0x04), "UTLB_MISS_LOADS....", L1D_READ_ACCESS }, + { PERF_TYPE_RAW, RawConfig(0x08,0x04), "DTLB_MISS_LOADS....", L1D_READ_ACCESS }, + // 11 #else { PERF_TYPE_HW_CACHE, CacheControl(L1D,READ,ACCESS) , "L1D_READ_ACCESS....",INSTRUCTIONS}, { PERF_TYPE_HW_CACHE, CacheControl(L1D,READ,MISS) , "L1D_READ_MISS......",L1D_READ_ACCESS}, @@ -57,19 +57,20 @@ const PerformanceCounter::PerformanceCounterConfig PerformanceCounter::Performan { PERF_TYPE_HW_CACHE, CacheControl(L1D,PREFETCH,MISS) , "L1D_PREFETCH_MISS..",L1D_READ_ACCESS}, { PERF_TYPE_HW_CACHE, CacheControl(L1D,PREFETCH,ACCESS) , "L1D_PREFETCH_ACCESS",L1D_READ_ACCESS}, { PERF_TYPE_HW_CACHE, CacheControl(L1D,PREFETCH,ACCESS) , "L1D_PREFETCH_ACCESS",L1D_READ_ACCESS}, - // 11 + // 11 #endif { PERF_TYPE_HW_CACHE, CacheControl(LL,READ,MISS) , "LL_READ_MISS.......",L1D_READ_ACCESS}, { PERF_TYPE_HW_CACHE, CacheControl(LL,READ,ACCESS) , "LL_READ_ACCESS.....",L1D_READ_ACCESS}, { PERF_TYPE_HW_CACHE, CacheControl(LL,WRITE,MISS) , "LL_WRITE_MISS......",L1D_READ_ACCESS}, { PERF_TYPE_HW_CACHE, CacheControl(LL,WRITE,ACCESS) , "LL_WRITE_ACCESS....",L1D_READ_ACCESS}, - //15 + //15 { PERF_TYPE_HW_CACHE, CacheControl(LL,PREFETCH,MISS) , "LL_PREFETCH_MISS...",L1D_READ_ACCESS}, { PERF_TYPE_HW_CACHE, CacheControl(LL,PREFETCH,ACCESS) , "LL_PREFETCH_ACCESS.",L1D_READ_ACCESS}, { PERF_TYPE_HW_CACHE, CacheControl(L1I,READ,MISS) , "L1I_READ_MISS......",INSTRUCTIONS}, { PERF_TYPE_HW_CACHE, CacheControl(L1I,READ,ACCESS) , "L1I_READ_ACCESS....",INSTRUCTIONS} - //19 + //19 // { PERF_TYPE_HARDWARE, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND, "STALL_CYCLES" }, #endif }; -} +NAMESPACE_END(Grid); + diff --git a/Grid/perfmon/PerfCount.h b/Grid/perfmon/PerfCount.h index 73d2c70f..13d59719 100644 --- a/Grid/perfmon/PerfCount.h +++ b/Grid/perfmon/PerfCount.h @@ -1,4 +1,4 @@ - /************************************************************************************* +/************************************************************************************* Grid physics library, www.github.com/paboyle/Grid @@ -25,8 +25,8 @@ Author: paboyle 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ +*************************************************************************************/ +/* END LEGAL */ #ifndef GRID_PERFCOUNT_H #define GRID_PERFCOUNT_H @@ -47,7 +47,7 @@ Author: paboyle #include #endif -namespace Grid { +NAMESPACE_BEGIN(Grid); #ifdef __linux__ static long perf_event_open(struct perf_event_attr *hw_event, pid_t pid, @@ -84,11 +84,14 @@ inline uint64_t cyclecount(void){ #ifdef __bgq__ inline uint64_t cyclecount(void){ - uint64_t tmp; - asm volatile ("mfspr %0,0x10C" : "=&r" (tmp) ); - return tmp; + uint64_t tmp; + asm volatile ("mfspr %0,0x10C" : "=&r" (tmp) ); + return tmp; } #elif defined __x86_64__ +#ifdef GRID_NVCC +accelerator_inline uint64_t __rdtsc(void) { return 0; } +#endif inline uint64_t cyclecount(void){ return __rdtsc(); // unsigned int dummy; @@ -97,7 +100,7 @@ inline uint64_t cyclecount(void){ #else inline uint64_t cyclecount(void){ - return 0; + return 0; } #endif @@ -212,7 +215,7 @@ public: ::ioctl(cyclefd, PERF_EVENT_IOC_DISABLE, 0); ign=::read(fd, &count, sizeof(long long)); ign+=::read(cyclefd, &cycles, sizeof(long long)); - assert(ign=2*sizeof(long long)); + assert(ign==2*sizeof(long long)); } elapsed = cyclecount() - begin; #else @@ -225,8 +228,8 @@ public: int N = PerformanceCounterConfigs[PCT].normalisation; const char * sn = PerformanceCounterConfigs[N].name ; const char * sc = PerformanceCounterConfigs[PCT].name; - std::printf("tsc = %llu %s = %llu %s = %20llu\n (%s/%s) rate = %lf\n", elapsed,sn ,cycles, - sc, count, sc,sn, (double)count/(double)cycles); + std::printf("tsc = %llu %s = %llu %s = %20llu\n (%s/%s) rate = %lf\n", elapsed,sn ,cycles, + sc, count, sc,sn, (double)count/(double)cycles); #else std::printf("%llu cycles \n", elapsed ); #endif @@ -241,5 +244,6 @@ public: }; -} +NAMESPACE_END(Grid); + #endif diff --git a/Grid/perfmon/Stat.cc b/Grid/perfmon/Stat.cc index 3f47fd83..4c3be254 100644 --- a/Grid/perfmon/Stat.cc +++ b/Grid/perfmon/Stat.cc @@ -2,7 +2,7 @@ #include #include -namespace Grid { +NAMESPACE_BEGIN(Grid); bool PmuStat::pmu_initialized=false; @@ -175,39 +175,39 @@ void PmuStat::KNLevsetup(const char *ename, int &fd, int event, int umask) } - void PmuStat::KNLsetup(void){ +void PmuStat::KNLsetup(void){ - int ret; - char fname[1024]; + int ret; + char fname[1024]; - // MC RPQ inserts and WPQ inserts (reads & writes) - for (int mc = 0; mc < NMC; ++mc) - { - ::snprintf(fname, sizeof(fname), "/sys/devices/uncore_imc_%d",mc); - // RPQ Inserts - KNLevsetup(fname, gbl.mc_rd[mc], 0x1, 0x1); - // WPQ Inserts - KNLevsetup(fname, gbl.mc_wr[mc], 0x2, 0x1); - } - // EDC RPQ inserts and WPQ inserts - for (int edc=0; edc < NEDC; ++edc) - { - ::snprintf(fname, sizeof(fname), "/sys/devices/uncore_edc_eclk_%d",edc); - // RPQ inserts - KNLevsetup(fname, gbl.edc_rd[edc], 0x1, 0x1); - // WPQ inserts - KNLevsetup(fname, gbl.edc_wr[edc], 0x2, 0x1); - } - // EDC HitE, HitM, MissE, MissM - for (int edc=0; edc < NEDC; ++edc) - { - ::snprintf(fname, sizeof(fname), "/sys/devices/uncore_edc_uclk_%d", edc); - KNLevsetup(fname, gbl.edc_hite[edc], 0x2, 0x1); - KNLevsetup(fname, gbl.edc_hitm[edc], 0x2, 0x2); - KNLevsetup(fname, gbl.edc_misse[edc], 0x2, 0x4); - KNLevsetup(fname, gbl.edc_missm[edc], 0x2, 0x8); - } - } + // MC RPQ inserts and WPQ inserts (reads & writes) + for (int mc = 0; mc < NMC; ++mc) + { + ::snprintf(fname, sizeof(fname), "/sys/devices/uncore_imc_%d",mc); + // RPQ Inserts + KNLevsetup(fname, gbl.mc_rd[mc], 0x1, 0x1); + // WPQ Inserts + KNLevsetup(fname, gbl.mc_wr[mc], 0x2, 0x1); + } + // EDC RPQ inserts and WPQ inserts + for (int edc=0; edc < NEDC; ++edc) + { + ::snprintf(fname, sizeof(fname), "/sys/devices/uncore_edc_eclk_%d",edc); + // RPQ inserts + KNLevsetup(fname, gbl.edc_rd[edc], 0x1, 0x1); + // WPQ inserts + KNLevsetup(fname, gbl.edc_wr[edc], 0x2, 0x1); + } + // EDC HitE, HitM, MissE, MissM + for (int edc=0; edc < NEDC; ++edc) + { + ::snprintf(fname, sizeof(fname), "/sys/devices/uncore_edc_uclk_%d", edc); + KNLevsetup(fname, gbl.edc_hite[edc], 0x2, 0x1); + KNLevsetup(fname, gbl.edc_hitm[edc], 0x2, 0x2); + KNLevsetup(fname, gbl.edc_misse[edc], 0x2, 0x4); + KNLevsetup(fname, gbl.edc_missm[edc], 0x2, 0x8); + } +} uint64_t PmuStat::KNLreadctr(int fd) { @@ -242,4 +242,5 @@ void PmuStat::KNLreadctrs(ctrs &c) } #endif -} +NAMESPACE_END(Grid); + diff --git a/Grid/perfmon/Stat.h b/Grid/perfmon/Stat.h index 96bd594a..30baec29 100644 --- a/Grid/perfmon/Stat.h +++ b/Grid/perfmon/Stat.h @@ -5,7 +5,7 @@ #define _KNIGHTS_LANDING_ROOTONLY #endif -namespace Grid { +NAMESPACE_BEGIN(Grid); /////////////////////////////////////////////////////////////////////////////// // Extra KNL counters from MCDRAM @@ -15,14 +15,14 @@ namespace Grid { #define NEDC 8 struct ctrs { - uint64_t mcrd[NMC]; - uint64_t mcwr[NMC]; - uint64_t edcrd[NEDC]; - uint64_t edcwr[NEDC]; - uint64_t edchite[NEDC]; - uint64_t edchitm[NEDC]; - uint64_t edcmisse[NEDC]; - uint64_t edcmissm[NEDC]; + uint64_t mcrd[NMC]; + uint64_t mcwr[NMC]; + uint64_t edcrd[NEDC]; + uint64_t edcwr[NEDC]; + uint64_t edchite[NEDC]; + uint64_t edchitm[NEDC]; + uint64_t edcmisse[NEDC]; + uint64_t edcmissm[NEDC]; }; // Peter/Azusa: // Our modification of a code provided by Larry Meadows from Intel @@ -44,61 +44,62 @@ struct knl_gbl_ class PmuStat { - uint64_t counters[8][256]; + uint64_t counters[8][256]; #ifdef _KNIGHTS_LANDING_ - static struct knl_gbl_ gbl; + static struct knl_gbl_ gbl; #endif - const char *name; + const char *name; - uint64_t reads; // memory reads - uint64_t writes; // memory writes - uint64_t mrstart; // memory read counter at start of parallel region - uint64_t mrend; // memory read counter at end of parallel region - uint64_t mwstart; // memory write counter at start of parallel region - uint64_t mwend; // memory write counter at end of parallel region + uint64_t reads; // memory reads + uint64_t writes; // memory writes + uint64_t mrstart; // memory read counter at start of parallel region + uint64_t mrend; // memory read counter at end of parallel region + uint64_t mwstart; // memory write counter at start of parallel region + uint64_t mwend; // memory write counter at end of parallel region - // cumulative counters - uint64_t count; // number of invocations - uint64_t tregion; // total time in parallel region (from thread 0) - uint64_t tcycles; // total cycles inside parallel region - uint64_t inst, ref, cyc; // fixed counters - uint64_t pmc0, pmc1;// pmu - // add memory counters here - // temp variables - uint64_t tstart; // tsc at start of parallel region - uint64_t tend; // tsc at end of parallel region - // map for ctrs values - // 0 pmc0 start - // 1 pmc0 end - // 2 pmc1 start - // 3 pmc1 end - // 4 tsc start - // 5 tsc end - static bool pmu_initialized; + // cumulative counters + uint64_t count; // number of invocations + uint64_t tregion; // total time in parallel region (from thread 0) + uint64_t tcycles; // total cycles inside parallel region + uint64_t inst, ref, cyc; // fixed counters + uint64_t pmc0, pmc1;// pmu + // add memory counters here + // temp variables + uint64_t tstart; // tsc at start of parallel region + uint64_t tend; // tsc at end of parallel region + // map for ctrs values + // 0 pmc0 start + // 1 pmc0 end + // 2 pmc1 start + // 3 pmc1 end + // 4 tsc start + // 5 tsc end + static bool pmu_initialized; public: - static bool is_init(void){ return pmu_initialized;} - static void pmu_init(void); - static void pmu_fini(void); - static void pmu_start(void); - static void pmu_stop(void); - void accum(int nthreads); - static void xmemctrs(uint64_t *mr, uint64_t *mw); - void start(void); - void enter(int t); - void exit(int t); - void print(void); - void init(const char *regname); - void clear(void); + static bool is_init(void){ return pmu_initialized;} + static void pmu_init(void); + static void pmu_fini(void); + static void pmu_start(void); + static void pmu_stop(void); + void accum(int nthreads); + static void xmemctrs(uint64_t *mr, uint64_t *mw); + void start(void); + void enter(int t); + void exit(int t); + void print(void); + void init(const char *regname); + void clear(void); #ifdef _KNIGHTS_LANDING_ - static void KNLsetup(void); - static uint64_t KNLreadctr(int fd); - static void KNLreadctrs(ctrs &c); - static void KNLevsetup(const char *ename, int &fd, int event, int umask); + static void KNLsetup(void); + static uint64_t KNLreadctr(int fd); + static void KNLreadctrs(ctrs &c); + static void KNLevsetup(const char *ename, int &fd, int event, int umask); #endif - }; +}; + +NAMESPACE_END(Grid); -} #endif diff --git a/Grid/perfmon/Timer.h b/Grid/perfmon/Timer.h index ce1b5d76..88b4e1cc 100644 --- a/Grid/perfmon/Timer.h +++ b/Grid/perfmon/Timer.h @@ -1,4 +1,4 @@ - /************************************************************************************* +/************************************************************************************* Grid physics library, www.github.com/paboyle/Grid @@ -24,8 +24,8 @@ Author: Peter Boyle 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ +*************************************************************************************/ +/* END LEGAL */ #ifndef GRID_TIME_H #define GRID_TIME_H @@ -33,11 +33,9 @@ Author: Peter Boyle #include #include -namespace Grid { - - - // Dress the output; use std::chrono +NAMESPACE_BEGIN(Grid) +// Dress the output; use std::chrono // C++11 time facilities better? inline double usecond(void) { struct timeval tv; @@ -80,7 +78,7 @@ inline std::ostream& operator<< (std::ostream & stream, const GridUsecs & now) stream.fill(fill); return stream; } - + class GridStopWatch { private: @@ -125,5 +123,6 @@ public: } }; -} +NAMESPACE_END(Grid) + #endif diff --git a/Grid/pugixml/pugixml.cc b/Grid/pugixml/pugixml.cc index dd08092c..e7b395ad 100644 --- a/Grid/pugixml/pugixml.cc +++ b/Grid/pugixml/pugixml.cc @@ -14,7 +14,12 @@ #ifndef SOURCE_PUGIXML_CPP #define SOURCE_PUGIXML_CPP -#include +#ifdef __NVCC__ +#pragma push +#pragma diag_suppress declared_but_not_referenced // suppress "function was declared but never referenced warning" +#endif + +#include "pugixml.h" #include #include @@ -202,7 +207,7 @@ PUGI__NS_BEGIN // Without a template<> we'll get multiple definitions of the same static template allocation_function xml_memory_management_function_storage::allocate = default_allocate; template deallocation_function xml_memory_management_function_storage::deallocate = default_deallocate; - + template struct xml_memory_management_function_storage; typedef xml_memory_management_function_storage xml_memory; PUGI__NS_END @@ -12768,6 +12773,10 @@ namespace pugi #undef PUGI__THROW_ERROR #undef PUGI__CHECK_ERROR +#ifdef GRID_NVCC +#pragma pop +#endif + #endif /** diff --git a/Grid/qcd/QCD.h b/Grid/qcd/QCD.h index 7e9b2da0..2c8e60da 100644 --- a/Grid/qcd/QCD.h +++ b/Grid/qcd/QCD.h @@ -1,4 +1,4 @@ - /************************************************************************************* +/************************************************************************************* Grid physics library, www.github.com/paboyle/Grid @@ -27,114 +27,112 @@ Author: paboyle 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ -#ifndef GRID_QCD_BASE_H -#define GRID_QCD_BASE_H -namespace Grid{ -namespace QCD { +*************************************************************************************/ +/* END LEGAL */ +#pragma once - static const int Xdir = 0; - static const int Ydir = 1; - static const int Zdir = 2; - static const int Tdir = 3; +NAMESPACE_BEGIN(Grid); - - static const int Xp = 0; - static const int Yp = 1; - static const int Zp = 2; - static const int Tp = 3; - static const int Xm = 4; - static const int Ym = 5; - static const int Zm = 6; - static const int Tm = 7; +static constexpr int Xdir = 0; +static constexpr int Ydir = 1; +static constexpr int Zdir = 2; +static constexpr int Tdir = 3; - static const int Nc=3; - static const int Ns=4; - static const int Nd=4; - static const int Nhs=2; // half spinor - static const int Nds=8; // double stored gauge field - static const int Ngp=2; // gparity index range +static constexpr int Xp = 0; +static constexpr int Yp = 1; +static constexpr int Zp = 2; +static constexpr int Tp = 3; +static constexpr int Xm = 4; +static constexpr int Ym = 5; +static constexpr int Zm = 6; +static constexpr int Tm = 7; - ////////////////////////////////////////////////////////////////////////////// - // QCD iMatrix types - // Index conventions: Lorentz x Spin x Colour - // note: static const int or constexpr will work for type deductions - // with the intel compiler (up to version 17) - ////////////////////////////////////////////////////////////////////////////// - #define ColourIndex 2 - #define SpinIndex 1 - #define LorentzIndex 0 +static constexpr int Nc=3; +static constexpr int Ns=4; +static constexpr int Nd=4; +static constexpr int Nhs=2; // half spinor +static constexpr int Nds=8; // double stored gauge field +static constexpr int Ngp=2; // gparity index range - // Also should make these a named enum type - static const int DaggerNo=0; - static const int DaggerYes=1; - static const int InverseNo=0; - static const int InverseYes=1; +////////////////////////////////////////////////////////////////////////////// +// QCD iMatrix types +// Index conventions: Lorentz x Spin x Colour +// note: static constexpr int or constexpr will work for type deductions +// with the intel compiler (up to version 17) +////////////////////////////////////////////////////////////////////////////// +#define ColourIndex (2) +#define SpinIndex (1) +#define LorentzIndex (0) - // Useful traits is this a spin index - //typename std::enable_if,SpinorIndex>::value,iVector >::type *SFINAE; +// Also should make these a named enum type +static constexpr int DaggerNo=0; +static constexpr int DaggerYes=1; +static constexpr int InverseNo=0; +static constexpr int InverseYes=1; - const int SpinorIndex = 2; - template struct isSpinor { - static const bool value = (SpinorIndex==T::TensorLevel); - }; - template using IfSpinor = Invoke::value,int> > ; - template using IfNotSpinor = Invoke::value,int> > ; +// Useful traits is this a spin index +//typename std::enable_if,SpinorIndex>::value,iVector >::type *SFINAE; - // ChrisK very keen to add extra space for Gparity doubling. - // - // Also add domain wall index, in a way where Wilson operator - // naturally distributes across the 5th dimensions. - // - // That probably makes for GridRedBlack4dCartesian grid. +const int SpinorIndex = 2; +template struct isSpinor { + static constexpr bool value = (SpinorIndex==T::TensorLevel); +}; +template using IfSpinor = Invoke::value,int> > ; +template using IfNotSpinor = Invoke::value,int> > ; - // s,sp,c,spc,lc +// ChrisK very keen to add extra space for Gparity doubling. +// +// Also add domain wall index, in a way where Wilson operator +// naturally distributes across the 5th dimensions. +// +// That probably makes for GridRedBlack4dCartesian grid. - template using iSinglet = iScalar > >; - template using iSpinMatrix = iScalar, Ns> >; - template using iColourMatrix = iScalar > > ; - template using iSpinColourMatrix = iScalar, Ns> >; - template using iLorentzColourMatrix = iVector >, Nd > ; - template using iDoubleStoredColourMatrix = iVector >, Nds > ; - template using iSpinVector = iScalar, Ns> >; - template using iColourVector = iScalar > >; - template using iSpinColourVector = iScalar, Ns> >; - template using iHalfSpinVector = iScalar, Nhs> >; - template using iHalfSpinColourVector = iScalar, Nhs> >; +// s,sp,c,spc,lc + +template using iSinglet = iScalar > >; +template using iSpinMatrix = iScalar, Ns> >; +template using iColourMatrix = iScalar > > ; +template using iSpinColourMatrix = iScalar, Ns> >; +template using iLorentzColourMatrix = iVector >, Nd > ; +template using iDoubleStoredColourMatrix = iVector >, Nds > ; +template using iSpinVector = iScalar, Ns> >; +template using iColourVector = iScalar > >; +template using iSpinColourVector = iScalar, Ns> >; +template using iHalfSpinVector = iScalar, Nhs> >; +template using iHalfSpinColourVector = iScalar, Nhs> >; template using iSpinColourSpinColourMatrix = iScalar, Ns>, Nc>, Ns> >; - template using iGparitySpinColourVector = iVector, Ns>, Ngp >; - template using iGparityHalfSpinColourVector = iVector, Nhs>, Ngp >; +template using iGparitySpinColourVector = iVector, Ns>, Ngp >; +template using iGparityHalfSpinColourVector = iVector, Nhs>, Ngp >; - // Spin matrix - typedef iSpinMatrix SpinMatrix; - typedef iSpinMatrix SpinMatrixF; - typedef iSpinMatrix SpinMatrixD; +// Spin matrix +typedef iSpinMatrix SpinMatrix; +typedef iSpinMatrix SpinMatrixF; +typedef iSpinMatrix SpinMatrixD; - typedef iSpinMatrix vSpinMatrix; - typedef iSpinMatrix vSpinMatrixF; - typedef iSpinMatrix vSpinMatrixD; +typedef iSpinMatrix vSpinMatrix; +typedef iSpinMatrix vSpinMatrixF; +typedef iSpinMatrix vSpinMatrixD; - // Colour Matrix - typedef iColourMatrix ColourMatrix; - typedef iColourMatrix ColourMatrixF; - typedef iColourMatrix ColourMatrixD; +// Colour Matrix +typedef iColourMatrix ColourMatrix; +typedef iColourMatrix ColourMatrixF; +typedef iColourMatrix ColourMatrixD; - typedef iColourMatrix vColourMatrix; - typedef iColourMatrix vColourMatrixF; - typedef iColourMatrix vColourMatrixD; +typedef iColourMatrix vColourMatrix; +typedef iColourMatrix vColourMatrixF; +typedef iColourMatrix vColourMatrixD; + +// SpinColour matrix +typedef iSpinColourMatrix SpinColourMatrix; +typedef iSpinColourMatrix SpinColourMatrixF; +typedef iSpinColourMatrix SpinColourMatrixD; + +typedef iSpinColourMatrix vSpinColourMatrix; +typedef iSpinColourMatrix vSpinColourMatrixF; +typedef iSpinColourMatrix vSpinColourMatrixD; - // SpinColour matrix - typedef iSpinColourMatrix SpinColourMatrix; - typedef iSpinColourMatrix SpinColourMatrixF; - typedef iSpinColourMatrix SpinColourMatrixD; - - typedef iSpinColourMatrix vSpinColourMatrix; - typedef iSpinColourMatrix vSpinColourMatrixF; - typedef iSpinColourMatrix vSpinColourMatrixD; - // SpinColourSpinColour matrix typedef iSpinColourSpinColourMatrix SpinColourSpinColourMatrix; typedef iSpinColourSpinColourMatrix SpinColourSpinColourMatrixF; @@ -153,383 +151,379 @@ namespace QCD { typedef iSpinColourSpinColourMatrix vSpinColourSpinColourMatrixF; typedef iSpinColourSpinColourMatrix vSpinColourSpinColourMatrixD; - // LorentzColour - typedef iLorentzColourMatrix LorentzColourMatrix; - typedef iLorentzColourMatrix LorentzColourMatrixF; - typedef iLorentzColourMatrix LorentzColourMatrixD; +// LorentzColour +typedef iLorentzColourMatrix LorentzColourMatrix; +typedef iLorentzColourMatrix LorentzColourMatrixF; +typedef iLorentzColourMatrix LorentzColourMatrixD; - typedef iLorentzColourMatrix vLorentzColourMatrix; - typedef iLorentzColourMatrix vLorentzColourMatrixF; - typedef iLorentzColourMatrix vLorentzColourMatrixD; +typedef iLorentzColourMatrix vLorentzColourMatrix; +typedef iLorentzColourMatrix vLorentzColourMatrixF; +typedef iLorentzColourMatrix vLorentzColourMatrixD; - // DoubleStored gauge field - typedef iDoubleStoredColourMatrix DoubleStoredColourMatrix; - typedef iDoubleStoredColourMatrix DoubleStoredColourMatrixF; - typedef iDoubleStoredColourMatrix DoubleStoredColourMatrixD; +// DoubleStored gauge field +typedef iDoubleStoredColourMatrix DoubleStoredColourMatrix; +typedef iDoubleStoredColourMatrix DoubleStoredColourMatrixF; +typedef iDoubleStoredColourMatrix DoubleStoredColourMatrixD; - typedef iDoubleStoredColourMatrix vDoubleStoredColourMatrix; - typedef iDoubleStoredColourMatrix vDoubleStoredColourMatrixF; - typedef iDoubleStoredColourMatrix vDoubleStoredColourMatrixD; +typedef iDoubleStoredColourMatrix vDoubleStoredColourMatrix; +typedef iDoubleStoredColourMatrix vDoubleStoredColourMatrixF; +typedef iDoubleStoredColourMatrix vDoubleStoredColourMatrixD; - // Spin vector - typedef iSpinVector SpinVector; - typedef iSpinVector SpinVectorF; - typedef iSpinVector SpinVectorD; +// Spin vector +typedef iSpinVector SpinVector; +typedef iSpinVector SpinVectorF; +typedef iSpinVector SpinVectorD; - typedef iSpinVector vSpinVector; - typedef iSpinVector vSpinVectorF; - typedef iSpinVector vSpinVectorD; +typedef iSpinVector vSpinVector; +typedef iSpinVector vSpinVectorF; +typedef iSpinVector vSpinVectorD; - // Colour vector - typedef iColourVector ColourVector; - typedef iColourVector ColourVectorF; - typedef iColourVector ColourVectorD; +// Colour vector +typedef iColourVector ColourVector; +typedef iColourVector ColourVectorF; +typedef iColourVector ColourVectorD; - typedef iColourVector vColourVector; - typedef iColourVector vColourVectorF; - typedef iColourVector vColourVectorD; +typedef iColourVector vColourVector; +typedef iColourVector vColourVectorF; +typedef iColourVector vColourVectorD; - // SpinColourVector - typedef iSpinColourVector SpinColourVector; - typedef iSpinColourVector SpinColourVectorF; - typedef iSpinColourVector SpinColourVectorD; +// SpinColourVector +typedef iSpinColourVector SpinColourVector; +typedef iSpinColourVector SpinColourVectorF; +typedef iSpinColourVector SpinColourVectorD; - typedef iSpinColourVector vSpinColourVector; - typedef iSpinColourVector vSpinColourVectorF; - typedef iSpinColourVector vSpinColourVectorD; +typedef iSpinColourVector vSpinColourVector; +typedef iSpinColourVector vSpinColourVectorF; +typedef iSpinColourVector vSpinColourVectorD; - // HalfSpin vector - typedef iHalfSpinVector HalfSpinVector; - typedef iHalfSpinVector HalfSpinVectorF; - typedef iHalfSpinVector HalfSpinVectorD; +// HalfSpin vector +typedef iHalfSpinVector HalfSpinVector; +typedef iHalfSpinVector HalfSpinVectorF; +typedef iHalfSpinVector HalfSpinVectorD; - typedef iHalfSpinVector vHalfSpinVector; - typedef iHalfSpinVector vHalfSpinVectorF; - typedef iHalfSpinVector vHalfSpinVectorD; +typedef iHalfSpinVector vHalfSpinVector; +typedef iHalfSpinVector vHalfSpinVectorF; +typedef iHalfSpinVector vHalfSpinVectorD; - // HalfSpinColour vector - typedef iHalfSpinColourVector HalfSpinColourVector; - typedef iHalfSpinColourVector HalfSpinColourVectorF; - typedef iHalfSpinColourVector HalfSpinColourVectorD; +// HalfSpinColour vector +typedef iHalfSpinColourVector HalfSpinColourVector; +typedef iHalfSpinColourVector HalfSpinColourVectorF; +typedef iHalfSpinColourVector HalfSpinColourVectorD; - typedef iHalfSpinColourVector vHalfSpinColourVector; - typedef iHalfSpinColourVector vHalfSpinColourVectorF; - typedef iHalfSpinColourVector vHalfSpinColourVectorD; +typedef iHalfSpinColourVector vHalfSpinColourVector; +typedef iHalfSpinColourVector vHalfSpinColourVectorF; +typedef iHalfSpinColourVector vHalfSpinColourVectorD; - // singlets - typedef iSinglet TComplex; // FIXME This is painful. Tensor singlet complex type. - typedef iSinglet TComplexF; // FIXME This is painful. Tensor singlet complex type. - typedef iSinglet TComplexD; // FIXME This is painful. Tensor singlet complex type. +// singlets +typedef iSinglet TComplex; // FIXME This is painful. Tensor singlet complex type. +typedef iSinglet TComplexF; // FIXME This is painful. Tensor singlet complex type. +typedef iSinglet TComplexD; // FIXME This is painful. Tensor singlet complex type. - typedef iSinglet vTComplex ; // what if we don't know the tensor structure - typedef iSinglet vTComplexF; // what if we don't know the tensor structure - typedef iSinglet vTComplexD; // what if we don't know the tensor structure +typedef iSinglet vTComplex ; // what if we don't know the tensor structure +typedef iSinglet vTComplexF; // what if we don't know the tensor structure +typedef iSinglet vTComplexD; // what if we don't know the tensor structure - typedef iSinglet TReal; // Shouldn't need these; can I make it work without? - typedef iSinglet TRealF; // Shouldn't need these; can I make it work without? - typedef iSinglet TRealD; // Shouldn't need these; can I make it work without? +typedef iSinglet TReal; // Shouldn't need these; can I make it work without? +typedef iSinglet TRealF; // Shouldn't need these; can I make it work without? +typedef iSinglet TRealD; // Shouldn't need these; can I make it work without? - typedef iSinglet vTReal; - typedef iSinglet vTRealF; - typedef iSinglet vTRealD; +typedef iSinglet vTReal; +typedef iSinglet vTRealF; +typedef iSinglet vTRealD; - typedef iSinglet vTInteger; - typedef iSinglet TInteger; +typedef iSinglet vTInteger; +typedef iSinglet TInteger; - // Lattices of these - typedef Lattice LatticeColourMatrix; - typedef Lattice LatticeColourMatrixF; - typedef Lattice LatticeColourMatrixD; +// Lattices of these +typedef Lattice LatticeColourMatrix; +typedef Lattice LatticeColourMatrixF; +typedef Lattice LatticeColourMatrixD; - typedef Lattice LatticeSpinMatrix; - typedef Lattice LatticeSpinMatrixF; - typedef Lattice LatticeSpinMatrixD; +typedef Lattice LatticeSpinMatrix; +typedef Lattice LatticeSpinMatrixF; +typedef Lattice LatticeSpinMatrixD; - typedef Lattice LatticeSpinColourMatrix; - typedef Lattice LatticeSpinColourMatrixF; - typedef Lattice LatticeSpinColourMatrixD; +typedef Lattice LatticeSpinColourMatrix; +typedef Lattice LatticeSpinColourMatrixF; +typedef Lattice LatticeSpinColourMatrixD; - typedef Lattice LatticeSpinColourSpinColourMatrix; - typedef Lattice LatticeSpinColourSpinColourMatrixF; - typedef Lattice LatticeSpinColourSpinColourMatrixD; +typedef Lattice LatticeSpinColourSpinColourMatrix; +typedef Lattice LatticeSpinColourSpinColourMatrixF; +typedef Lattice LatticeSpinColourSpinColourMatrixD; - typedef Lattice LatticeLorentzColourMatrix; - typedef Lattice LatticeLorentzColourMatrixF; - typedef Lattice LatticeLorentzColourMatrixD; +typedef Lattice LatticeLorentzColourMatrix; +typedef Lattice LatticeLorentzColourMatrixF; +typedef Lattice LatticeLorentzColourMatrixD; - // DoubleStored gauge field - typedef Lattice LatticeDoubleStoredColourMatrix; - typedef Lattice LatticeDoubleStoredColourMatrixF; - typedef Lattice LatticeDoubleStoredColourMatrixD; +// DoubleStored gauge field +typedef Lattice LatticeDoubleStoredColourMatrix; +typedef Lattice LatticeDoubleStoredColourMatrixF; +typedef Lattice LatticeDoubleStoredColourMatrixD; - typedef Lattice LatticeSpinVector; - typedef Lattice LatticeSpinVectorF; - typedef Lattice LatticeSpinVectorD; +typedef Lattice LatticeSpinVector; +typedef Lattice LatticeSpinVectorF; +typedef Lattice LatticeSpinVectorD; - typedef Lattice LatticeColourVector; - typedef Lattice LatticeColourVectorF; - typedef Lattice LatticeColourVectorD; +typedef Lattice LatticeColourVector; +typedef Lattice LatticeColourVectorF; +typedef Lattice LatticeColourVectorD; - typedef Lattice LatticeSpinColourVector; - typedef Lattice LatticeSpinColourVectorF; - typedef Lattice LatticeSpinColourVectorD; +typedef Lattice LatticeSpinColourVector; +typedef Lattice LatticeSpinColourVectorF; +typedef Lattice LatticeSpinColourVectorD; - typedef Lattice LatticeHalfSpinVector; - typedef Lattice LatticeHalfSpinVectorF; - typedef Lattice LatticeHalfSpinVectorD; +typedef Lattice LatticeHalfSpinVector; +typedef Lattice LatticeHalfSpinVectorF; +typedef Lattice LatticeHalfSpinVectorD; - typedef Lattice LatticeHalfSpinColourVector; - typedef Lattice LatticeHalfSpinColourVectorF; - typedef Lattice LatticeHalfSpinColourVectorD; +typedef Lattice LatticeHalfSpinColourVector; +typedef Lattice LatticeHalfSpinColourVectorF; +typedef Lattice LatticeHalfSpinColourVectorD; - typedef Lattice LatticeReal; - typedef Lattice LatticeRealF; - typedef Lattice LatticeRealD; +typedef Lattice LatticeReal; +typedef Lattice LatticeRealF; +typedef Lattice LatticeRealD; - typedef Lattice LatticeComplex; - typedef Lattice LatticeComplexF; - typedef Lattice LatticeComplexD; +typedef Lattice LatticeComplex; +typedef Lattice LatticeComplexF; +typedef Lattice LatticeComplexD; - typedef Lattice LatticeInteger; // Predicates for "where" +typedef Lattice LatticeInteger; // Predicates for "where" - /////////////////////////////////////////// - // Physical names for things - /////////////////////////////////////////// - typedef LatticeHalfSpinColourVector LatticeHalfFermion; - typedef LatticeHalfSpinColourVectorF LatticeHalfFermionF; - typedef LatticeHalfSpinColourVectorF LatticeHalfFermionD; +/////////////////////////////////////////// +// Physical names for things +/////////////////////////////////////////// +typedef LatticeHalfSpinColourVector LatticeHalfFermion; +typedef LatticeHalfSpinColourVectorF LatticeHalfFermionF; +typedef LatticeHalfSpinColourVectorF LatticeHalfFermionD; - typedef LatticeSpinColourVector LatticeFermion; - typedef LatticeSpinColourVectorF LatticeFermionF; - typedef LatticeSpinColourVectorD LatticeFermionD; +typedef LatticeSpinColourVector LatticeFermion; +typedef LatticeSpinColourVectorF LatticeFermionF; +typedef LatticeSpinColourVectorD LatticeFermionD; - typedef LatticeSpinColourMatrix LatticePropagator; - typedef LatticeSpinColourMatrixF LatticePropagatorF; - typedef LatticeSpinColourMatrixD LatticePropagatorD; +typedef LatticeSpinColourMatrix LatticePropagator; +typedef LatticeSpinColourMatrixF LatticePropagatorF; +typedef LatticeSpinColourMatrixD LatticePropagatorD; - typedef LatticeLorentzColourMatrix LatticeGaugeField; - typedef LatticeLorentzColourMatrixF LatticeGaugeFieldF; - typedef LatticeLorentzColourMatrixD LatticeGaugeFieldD; +typedef LatticeLorentzColourMatrix LatticeGaugeField; +typedef LatticeLorentzColourMatrixF LatticeGaugeFieldF; +typedef LatticeLorentzColourMatrixD LatticeGaugeFieldD; - typedef LatticeDoubleStoredColourMatrix LatticeDoubledGaugeField; - typedef LatticeDoubleStoredColourMatrixF LatticeDoubledGaugeFieldF; - typedef LatticeDoubleStoredColourMatrixD LatticeDoubledGaugeFieldD; +typedef LatticeDoubleStoredColourMatrix LatticeDoubledGaugeField; +typedef LatticeDoubleStoredColourMatrixF LatticeDoubledGaugeFieldF; +typedef LatticeDoubleStoredColourMatrixD LatticeDoubledGaugeFieldD; - template using LorentzScalar = Lattice >; +template using LorentzScalar = Lattice >; - // Uhgg... typing this hurt ;) - // (my keyboard got burning hot when I typed this, must be the anti-Fermion) - typedef Lattice LatticeStaggeredFermion; - typedef Lattice LatticeStaggeredFermionF; - typedef Lattice LatticeStaggeredFermionD; +// Uhgg... typing this hurt ;) +// (my keyboard got burning hot when I typed this, must be the anti-Fermion) +typedef Lattice LatticeStaggeredFermion; +typedef Lattice LatticeStaggeredFermionF; +typedef Lattice LatticeStaggeredFermionD; - typedef Lattice LatticeStaggeredPropagator; - typedef Lattice LatticeStaggeredPropagatorF; - typedef Lattice LatticeStaggeredPropagatorD; +typedef Lattice LatticeStaggeredPropagator; +typedef Lattice LatticeStaggeredPropagatorF; +typedef Lattice LatticeStaggeredPropagatorD; - ////////////////////////////////////////////////////////////////////////////// - // Peek and Poke named after physics attributes - ////////////////////////////////////////////////////////////////////////////// +////////////////////////////////////////////////////////////////////////////// +// Peek and Poke named after physics attributes +////////////////////////////////////////////////////////////////////////////// - //spin - template auto peekSpin(const vobj &rhs,int i) -> decltype(PeekIndex(rhs,0)) - { - return PeekIndex(rhs,i); - } - template auto peekSpin(const vobj &rhs,int i,int j) -> decltype(PeekIndex(rhs,0,0)) - { - return PeekIndex(rhs,i,j); - } - template auto peekSpin(const Lattice &rhs,int i) -> decltype(PeekIndex(rhs,0)) - { - return PeekIndex(rhs,i); - } - template auto peekSpin(const Lattice &rhs,int i,int j) -> decltype(PeekIndex(rhs,0,0)) - { - return PeekIndex(rhs,i,j); - } - //colour - template auto peekColour(const vobj &rhs,int i) -> decltype(PeekIndex(rhs,0)) - { - return PeekIndex(rhs,i); - } - template auto peekColour(const vobj &rhs,int i,int j) -> decltype(PeekIndex(rhs,0,0)) - { - return PeekIndex(rhs,i,j); - } - template auto peekColour(const Lattice &rhs,int i) -> decltype(PeekIndex(rhs,0)) - { - return PeekIndex(rhs,i); - } - template auto peekColour(const Lattice &rhs,int i,int j) -> decltype(PeekIndex(rhs,0,0)) - { - return PeekIndex(rhs,i,j); - } - //lorentz - template auto peekLorentz(const vobj &rhs,int i) -> decltype(PeekIndex(rhs,0)) - { - return PeekIndex(rhs,i); - } - template auto peekLorentz(const Lattice &rhs,int i) -> decltype(PeekIndex(rhs,0)) - { - return PeekIndex(rhs,i); - } +//spin +template auto peekSpin(const vobj &rhs,int i) -> decltype(PeekIndex(rhs,0)) +{ + return PeekIndex(rhs,i); +} +template auto peekSpin(const vobj &rhs,int i,int j) -> decltype(PeekIndex(rhs,0,0)) +{ + return PeekIndex(rhs,i,j); +} +template auto peekSpin(const Lattice &rhs,int i) -> decltype(PeekIndex(rhs,0)) +{ + return PeekIndex(rhs,i); +} +template auto peekSpin(const Lattice &rhs,int i,int j) -> decltype(PeekIndex(rhs,0,0)) +{ + return PeekIndex(rhs,i,j); +} +//colour +template auto peekColour(const vobj &rhs,int i) -> decltype(PeekIndex(rhs,0)) +{ + return PeekIndex(rhs,i); +} +template auto peekColour(const vobj &rhs,int i,int j) -> decltype(PeekIndex(rhs,0,0)) +{ + return PeekIndex(rhs,i,j); +} +template auto peekColour(const Lattice &rhs,int i) -> decltype(PeekIndex(rhs,0)) +{ + return PeekIndex(rhs,i); +} +template auto peekColour(const Lattice &rhs,int i,int j) -> decltype(PeekIndex(rhs,0,0)) +{ + return PeekIndex(rhs,i,j); +} +//lorentz +template auto peekLorentz(const vobj &rhs,int i) -> decltype(PeekIndex(rhs,0)) +{ + return PeekIndex(rhs,i); +} +template auto peekLorentz(const Lattice &rhs,int i) -> decltype(PeekIndex(rhs,0)) +{ + return PeekIndex(rhs,i); +} - ////////////////////////////////////////////// - // Poke lattice - ////////////////////////////////////////////// - template - void pokeColour(Lattice &lhs, - const Lattice(lhs._odata[0],0))> & rhs, +////////////////////////////////////////////// +// Poke lattice +////////////////////////////////////////////// +template +void pokeColour(Lattice &lhs, + const Lattice(vobj(),0))> & rhs, + int i) +{ + PokeIndex(lhs,rhs,i); +} +template +void pokeColour(Lattice &lhs, + const Lattice(vobj(),0,0))> & rhs, + int i,int j) +{ + PokeIndex(lhs,rhs,i,j); +} +template +void pokeSpin(Lattice &lhs, + const Lattice(vobj(),0))> & rhs, int i) - { - PokeIndex(lhs,rhs,i); - } - template - void pokeColour(Lattice &lhs, - const Lattice(lhs._odata[0],0,0))> & rhs, +{ + PokeIndex(lhs,rhs,i); +} +template +void pokeSpin(Lattice &lhs, + const Lattice(vobj(),0,0))> & rhs, int i,int j) - { - PokeIndex(lhs,rhs,i,j); - } - template - void pokeSpin(Lattice &lhs, - const Lattice(lhs._odata[0],0))> & rhs, - int i) - { - PokeIndex(lhs,rhs,i); - } - template - void pokeSpin(Lattice &lhs, - const Lattice(lhs._odata[0],0,0))> & rhs, - int i,int j) - { - PokeIndex(lhs,rhs,i,j); - } - template - void pokeLorentz(Lattice &lhs, - const Lattice(lhs._odata[0],0))> & rhs, - int i) - { - PokeIndex(lhs,rhs,i); - } +{ + PokeIndex(lhs,rhs,i,j); +} +template +void pokeLorentz(Lattice &lhs, + const Lattice(vobj(),0))> & rhs, + int i) +{ + PokeIndex(lhs,rhs,i); +} - ////////////////////////////////////////////// - // Poke scalars - ////////////////////////////////////////////// - template void pokeSpin(vobj &lhs,const decltype(peekIndex(lhs,0)) & rhs,int i) - { - pokeIndex(lhs,rhs,i); - } - template void pokeSpin(vobj &lhs,const decltype(peekIndex(lhs,0,0)) & rhs,int i,int j) - { - pokeIndex(lhs,rhs,i,j); - } +////////////////////////////////////////////// +// Poke scalars +////////////////////////////////////////////// +template void pokeSpin(vobj &lhs,const decltype(peekIndex(lhs,0)) & rhs,int i) +{ + pokeIndex(lhs,rhs,i); +} +template void pokeSpin(vobj &lhs,const decltype(peekIndex(lhs,0,0)) & rhs,int i,int j) +{ + pokeIndex(lhs,rhs,i,j); +} - template void pokeColour(vobj &lhs,const decltype(peekIndex(lhs,0)) & rhs,int i) - { - pokeIndex(lhs,rhs,i); - } - template void pokeColour(vobj &lhs,const decltype(peekIndex(lhs,0,0)) & rhs,int i,int j) - { - pokeIndex(lhs,rhs,i,j); - } +template void pokeColour(vobj &lhs,const decltype(peekIndex(lhs,0)) & rhs,int i) +{ + pokeIndex(lhs,rhs,i); +} +template void pokeColour(vobj &lhs,const decltype(peekIndex(lhs,0,0)) & rhs,int i,int j) +{ + pokeIndex(lhs,rhs,i,j); +} - template void pokeLorentz(vobj &lhs,const decltype(peekIndex(lhs,0)) & rhs,int i) - { - pokeIndex(lhs,rhs,i); - } +template void pokeLorentz(vobj &lhs,const decltype(peekIndex(lhs,0)) & rhs,int i) +{ + pokeIndex(lhs,rhs,i); +} - ////////////////////////////////////////////// - // Fermion <-> propagator assignements - ////////////////////////////////////////////// +////////////////////////////////////////////// +// Fermion <-> propagator assignements +////////////////////////////////////////////// //template template void FermToProp(typename Fimpl::PropagatorField &p, const typename Fimpl::FermionField &f, const int s, const int c) +{ + for(int j = 0; j < Ns; ++j) { - for(int j = 0; j < Ns; ++j) - { - auto pjs = peekSpin(p, j, s); - auto fj = peekSpin(f, j); + auto pjs = peekSpin(p, j, s); + auto fj = peekSpin(f, j); for(int i = 0; i < Fimpl::Dimension; ++i) - { - pokeColour(pjs, peekColour(fj, i), i, c); - } - pokeSpin(p, pjs, j, s); - } + { + pokeColour(pjs, peekColour(fj, i), i, c); + } + pokeSpin(p, pjs, j, s); } +} //template template void PropToFerm(typename Fimpl::FermionField &f, const typename Fimpl::PropagatorField &p, const int s, const int c) +{ + for(int j = 0; j < Ns; ++j) { - for(int j = 0; j < Ns; ++j) - { - auto pjs = peekSpin(p, j, s); - auto fj = peekSpin(f, j); + auto pjs = peekSpin(p, j, s); + auto fj = peekSpin(f, j); for(int i = 0; i < Fimpl::Dimension; ++i) - { - pokeColour(fj, peekColour(pjs, i, c), i); - } - pokeSpin(f, fj, j); - } + { + pokeColour(fj, peekColour(pjs, i, c), i); + } + pokeSpin(f, fj, j); } +} - ////////////////////////////////////////////// - // transpose array and scalar - ////////////////////////////////////////////// - template inline Lattice transposeSpin(const Lattice &lhs){ - return transposeIndex(lhs); - } - template inline Lattice transposeColour(const Lattice &lhs){ - return transposeIndex(lhs); - } - template inline vobj transposeSpin(const vobj &lhs){ - return transposeIndex(lhs); - } - template inline vobj transposeColour(const vobj &lhs){ - return transposeIndex(lhs); - } +////////////////////////////////////////////// +// transpose array and scalar +////////////////////////////////////////////// +template inline Lattice transposeSpin(const Lattice &lhs){ + return transposeIndex(lhs); +} +template inline Lattice transposeColour(const Lattice &lhs){ + return transposeIndex(lhs); +} +template inline vobj transposeSpin(const vobj &lhs){ + return transposeIndex(lhs); +} +template inline vobj transposeColour(const vobj &lhs){ + return transposeIndex(lhs); +} - ////////////////////////////////////////// - // Trace lattice and non-lattice - ////////////////////////////////////////// - template - inline auto traceSpin(const Lattice &lhs) -> Lattice(lhs._odata[0]))> - { - return traceIndex(lhs); - } - template - inline auto traceColour(const Lattice &lhs) -> Lattice(lhs._odata[0]))> - { - return traceIndex(lhs); - } - template - inline auto traceSpin(const vobj &lhs) -> Lattice(lhs))> - { - return traceIndex(lhs); - } - template - inline auto traceColour(const vobj &lhs) -> Lattice(lhs))> - { - return traceIndex(lhs); - } +////////////////////////////////////////// +// Trace lattice and non-lattice +////////////////////////////////////////// +template +inline auto traceSpin(const Lattice &lhs) -> Lattice(vobj()))> +{ + return traceIndex(lhs); +} +template +inline auto traceColour(const Lattice &lhs) -> Lattice(vobj()))> +{ + return traceIndex(lhs); +} +template +inline auto traceSpin(const vobj &lhs) -> Lattice(lhs))> +{ + return traceIndex(lhs); +} +template +inline auto traceColour(const vobj &lhs) -> Lattice(lhs))> +{ + return traceIndex(lhs); +} - ////////////////////////////////////////// - // Current types - ////////////////////////////////////////// - GRID_SERIALIZABLE_ENUM(Current, undef, - Vector, 0, - Axial, 1, - Tadpole, 2); +////////////////////////////////////////// +// Current types +////////////////////////////////////////// +GRID_SERIALIZABLE_ENUM(Current, undef, + Vector, 0, + Axial, 1, + Tadpole, 2); -} //namespace QCD -} // Grid +NAMESPACE_END(Grid); - - -#endif diff --git a/Grid/qcd/action/Action.h b/Grid/qcd/action/Action.h index 7272c90d..737c1ff0 100644 --- a/Grid/qcd/action/Action.h +++ b/Grid/qcd/action/Action.h @@ -37,14 +37,18 @@ Author: paboyle // Abstract base interface //////////////////////////////////////////// #include +NAMESPACE_CHECK(ActionCore); //////////////////////////////////////////////////////////////////////// // Fermion actions; prevent coupling fermion.cc files to other headers //////////////////////////////////////////////////////////////////////// #include +NAMESPACE_CHECK(FermionCore); #include +NAMESPACE_CHECK(Fermion); //////////////////////////////////////// // Pseudo fermion combinations for HMC //////////////////////////////////////// #include +NAMESPACE_CHECK(PseudoFermion); #endif diff --git a/Grid/qcd/action/ActionBase.h b/Grid/qcd/action/ActionBase.h index 8d853d45..bff21d1d 100644 --- a/Grid/qcd/action/ActionBase.h +++ b/Grid/qcd/action/ActionBase.h @@ -27,19 +27,18 @@ with this program; if not, write to the Free Software Foundation, Inc., See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ -/* END LEGAL */ + /* END LEGAL */ #ifndef ACTION_BASE_H #define ACTION_BASE_H -namespace Grid { -namespace QCD { +NAMESPACE_BEGIN(Grid); template class Action { - public: +public: bool is_smeared = false; // Heatbath? virtual void refresh(const GaugeField& U, GridParallelRNG& pRNG) = 0; // refresh pseudofermions @@ -50,7 +49,6 @@ class Action virtual ~Action(){} }; -} -} +NAMESPACE_END(Grid); #endif // ACTION_BASE_H diff --git a/Grid/qcd/action/ActionCore.h b/Grid/qcd/action/ActionCore.h index 7a5caf15..6544318d 100644 --- a/Grid/qcd/action/ActionCore.h +++ b/Grid/qcd/action/ActionCore.h @@ -31,29 +31,37 @@ directory #define QCD_ACTION_CORE #include +NAMESPACE_CHECK(ActionBase); #include +NAMESPACE_CHECK(ActionSet); #include +NAMESPACE_CHECK(ActionParams); //////////////////////////////////////////// // Gauge Actions //////////////////////////////////////////// #include +NAMESPACE_CHECK(Gauge); //////////////////////////////////////////// // Fermion prereqs //////////////////////////////////////////// #include +NAMESPACE_CHECK(ActionFermionCore); //////////////////////////////////////////// // Scalar Actions //////////////////////////////////////////// #include +NAMESPACE_CHECK(Scalar); //////////////////////////////////////////// // Utility functions //////////////////////////////////////////// #include +NAMESPACE_CHECK(Metric); #include +NAMESPACE_CHECK(CovariantLaplacian); diff --git a/Grid/qcd/action/ActionParams.h b/Grid/qcd/action/ActionParams.h index 88de777d..0e6a11c6 100644 --- a/Grid/qcd/action/ActionParams.h +++ b/Grid/qcd/action/ActionParams.h @@ -27,37 +27,35 @@ with this program; if not, write to the Free Software Foundation, Inc., See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ -/* END LEGAL */ + /* END LEGAL */ #ifndef GRID_QCD_ACTION_PARAMS_H #define GRID_QCD_ACTION_PARAMS_H -namespace Grid { -namespace QCD { +NAMESPACE_BEGIN(Grid); - // These can move into a params header and be given MacroMagic serialisation - struct GparityWilsonImplParams { - bool overlapCommsCompute; - std::vector twists; - GparityWilsonImplParams() : twists(Nd, 0), overlapCommsCompute(false){}; - }; +// These can move into a params header and be given MacroMagic serialisation +struct GparityWilsonImplParams { + Coordinate twists; + GparityWilsonImplParams() : twists(Nd, 0) {}; +}; - struct WilsonImplParams { - bool overlapCommsCompute; - std::vector twist_n_2pi_L; - std::vector boundary_phases; - WilsonImplParams() : overlapCommsCompute(false) { - boundary_phases.resize(Nd, 1.0); +struct WilsonImplParams { + bool overlapCommsCompute; + AcceleratorVector twist_n_2pi_L; + AcceleratorVector boundary_phases; + WilsonImplParams() { + boundary_phases.resize(Nd, 1.0); twist_n_2pi_L.resize(Nd, 0.0); - }; - WilsonImplParams(const std::vector phi) : boundary_phases(phi), overlapCommsCompute(false) { - twist_n_2pi_L.resize(Nd, 0.0); - } }; + WilsonImplParams(const AcceleratorVector phi) : boundary_phases(phi), overlapCommsCompute(false) { + twist_n_2pi_L.resize(Nd, 0.0); + } +}; - struct StaggeredImplParams { - StaggeredImplParams() {}; - }; +struct StaggeredImplParams { + StaggeredImplParams() {}; +}; struct OneFlavourRationalParams : Serializable { GRID_SERIALIZABLE_CLASS_MEMBERS(OneFlavourRationalParams, @@ -69,10 +67,10 @@ namespace QCD { int, precision, int, BoundsCheckFreq); - // MaxIter and tolerance, vectors?? + // MaxIter and tolerance, vectors?? - // constructor - OneFlavourRationalParams( RealD _lo = 0.0, + // constructor + OneFlavourRationalParams( RealD _lo = 0.0, RealD _hi = 1.0, int _maxit = 1000, RealD tol = 1.0e-8, @@ -88,11 +86,6 @@ namespace QCD { BoundsCheckFreq(_BoundsCheckFreq){}; }; - -} -} - - - +NAMESPACE_END(Grid); #endif diff --git a/Grid/qcd/action/ActionSet.h b/Grid/qcd/action/ActionSet.h index 4ed6a582..e6879fe5 100644 --- a/Grid/qcd/action/ActionSet.h +++ b/Grid/qcd/action/ActionSet.h @@ -26,14 +26,11 @@ with this program; if not, write to the Free Software Foundation, Inc., See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ -/* END LEGAL */ + /* END LEGAL */ #ifndef ACTION_SET_H #define ACTION_SET_H -namespace Grid { - -// Should drop this namespace here -namespace QCD { +NAMESPACE_BEGIN(Grid); ////////////////////////////////// // Indexing of tuple types @@ -62,7 +59,7 @@ struct Index> { template struct ActionLevel { - public: +public: unsigned int multiplier; // Fundamental repr actions separated because of the smearing @@ -77,7 +74,7 @@ struct ActionLevel { std::vector& actions; explicit ActionLevel(unsigned int mul = 1) : - actions(std::get<0>(actions_hirep)), multiplier(mul) { + actions(std::get<0>(actions_hirep)), multiplier(mul) { // initialize the hirep vectors to zero. // apply(this->resize, actions_hirep, 0); //need a working resize assert(mul >= 1); @@ -87,7 +84,7 @@ struct ActionLevel { void push_back(Action* ptr) { // insert only in the correct vector std::get< Index < GenField, action_hirep_types>::value >(actions_hirep).push_back(ptr); - }; + } template static void resize(ActPtr ap, unsigned int n) { @@ -110,7 +107,6 @@ struct ActionLevel { template using ActionSet = std::vector >; -} // QCD -} // Grid +NAMESPACE_END(Grid); #endif // ACTION_SET_H diff --git a/Grid/qcd/action/fermion/AbstractEOFAFermion.h b/Grid/qcd/action/fermion/AbstractEOFAFermion.h index 15faa401..18bcb394 100644 --- a/Grid/qcd/action/fermion/AbstractEOFAFermion.h +++ b/Grid/qcd/action/fermion/AbstractEOFAFermion.h @@ -26,75 +26,75 @@ with this program; if not, write to the Free Software Foundation, Inc., See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ -/* END LEGAL */ + /* END LEGAL */ #ifndef GRID_QCD_ABSTRACT_EOFA_FERMION_H #define GRID_QCD_ABSTRACT_EOFA_FERMION_H #include -namespace Grid { -namespace QCD { +NAMESPACE_BEGIN(Grid); - // DJM: Abstract base class for EOFA fermion types. - // Defines layout of additional EOFA-specific parameters and operators. - // Use to construct EOFA pseudofermion actions that are agnostic to - // Shamir / Mobius / etc., and ensure that no one can construct EOFA - // pseudofermion action with non-EOFA fermion type. - template - class AbstractEOFAFermion : public CayleyFermion5D { - public: - INHERIT_IMPL_TYPES(Impl); +// DJM: Abstract base class for EOFA fermion types. +// Defines layout of additional EOFA-specific parameters and operators. +// Use to construct EOFA pseudofermion actions that are agnostic to +// Shamir / Mobius / etc., and ensure that no one can construct EOFA +// pseudofermion action with non-EOFA fermion type. +template +class AbstractEOFAFermion : public CayleyFermion5D { +public: + INHERIT_IMPL_TYPES(Impl); - public: - // Fermion operator: D(mq1) + shift*\gamma_{5}*R_{5}*\Delta_{\pm}(mq2,mq3)*P_{\pm} - RealD mq1; - RealD mq2; - RealD mq3; - RealD shift; - int pm; +public: + // Fermion operator: D(mq1) + shift*\gamma_{5}*R_{5}*\Delta_{\pm}(mq2,mq3)*P_{\pm} + RealD mq1; + RealD mq2; + RealD mq3; + RealD shift; + int pm; - RealD alpha; // Mobius scale - RealD k; // EOFA normalization constant + RealD alpha; // Mobius scale + RealD k; // EOFA normalization constant - virtual void Instantiatable(void) = 0; + virtual void Instantiatable(void) = 0; - // EOFA-specific operations - // Force user to implement in derived classes - virtual void Omega (const FermionField& in, FermionField& out, int sign, int dag) = 0; - virtual void Dtilde (const FermionField& in, FermionField& out) = 0; - virtual void DtildeInv(const FermionField& in, FermionField& out) = 0; + // EOFA-specific operations + // Force user to implement in derived classes + virtual void Omega (const FermionField& in, FermionField& out, int sign, int dag) = 0; + virtual void Dtilde (const FermionField& in, FermionField& out) = 0; + virtual void DtildeInv(const FermionField& in, FermionField& out) = 0; - // Implement derivatives in base class: - // for EOFA both DWF and Mobius just need d(Dw)/dU - virtual void MDeriv(GaugeField& mat, const FermionField& U, const FermionField& V, int dag){ - this->DhopDeriv(mat, U, V, dag); - }; - virtual void MoeDeriv(GaugeField& mat, const FermionField& U, const FermionField& V, int dag){ - this->DhopDerivOE(mat, U, V, dag); - }; - virtual void MeoDeriv(GaugeField& mat, const FermionField& U, const FermionField& V, int dag){ - this->DhopDerivEO(mat, U, V, dag); - }; - - // Recompute 5D coefficients for different value of shift constant - // (needed for heatbath loop over poles) - virtual void RefreshShiftCoefficients(RealD new_shift) = 0; - - // Constructors - AbstractEOFAFermion(GaugeField& _Umu, GridCartesian& FiveDimGrid, GridRedBlackCartesian& FiveDimRedBlackGrid, - GridCartesian& FourDimGrid, GridRedBlackCartesian& FourDimRedBlackGrid, - RealD _mq1, RealD _mq2, RealD _mq3, RealD _shift, int _pm, - RealD _M5, RealD _b, RealD _c, const ImplParams& p=ImplParams()) - : CayleyFermion5D(_Umu, FiveDimGrid, FiveDimRedBlackGrid, FourDimGrid, FourDimRedBlackGrid, - _mq1, _M5, p), mq1(_mq1), mq2(_mq2), mq3(_mq3), shift(_shift), pm(_pm) - { - int Ls = this->Ls; - this->alpha = _b + _c; - this->k = this->alpha * (_mq3-_mq2) * std::pow(this->alpha+1.0,2*Ls) / - ( std::pow(this->alpha+1.0,Ls) + _mq2*std::pow(this->alpha-1.0,Ls) ) / - ( std::pow(this->alpha+1.0,Ls) + _mq3*std::pow(this->alpha-1.0,Ls) ); - }; + // Implement derivatives in base class: + // for EOFA both DWF and Mobius just need d(Dw)/dU + virtual void MDeriv(GaugeField& mat, const FermionField& U, const FermionField& V, int dag){ + this->DhopDeriv(mat, U, V, dag); }; -}} + virtual void MoeDeriv(GaugeField& mat, const FermionField& U, const FermionField& V, int dag){ + this->DhopDerivOE(mat, U, V, dag); + }; + virtual void MeoDeriv(GaugeField& mat, const FermionField& U, const FermionField& V, int dag){ + this->DhopDerivEO(mat, U, V, dag); + }; + + // Recompute 5D coefficients for different value of shift constant + // (needed for heatbath loop over poles) + virtual void RefreshShiftCoefficients(RealD new_shift) = 0; + + // Constructors + AbstractEOFAFermion(GaugeField& _Umu, GridCartesian& FiveDimGrid, GridRedBlackCartesian& FiveDimRedBlackGrid, + GridCartesian& FourDimGrid, GridRedBlackCartesian& FourDimRedBlackGrid, + RealD _mq1, RealD _mq2, RealD _mq3, RealD _shift, int _pm, + RealD _M5, RealD _b, RealD _c, const ImplParams& p=ImplParams()) + : CayleyFermion5D(_Umu, FiveDimGrid, FiveDimRedBlackGrid, FourDimGrid, FourDimRedBlackGrid, + _mq1, _M5, p), mq1(_mq1), mq2(_mq2), mq3(_mq3), shift(_shift), pm(_pm) + { + int Ls = this->Ls; + this->alpha = _b + _c; + this->k = this->alpha * (_mq3-_mq2) * std::pow(this->alpha+1.0,2*Ls) / + ( std::pow(this->alpha+1.0,Ls) + _mq2*std::pow(this->alpha-1.0,Ls) ) / + ( std::pow(this->alpha+1.0,Ls) + _mq3*std::pow(this->alpha-1.0,Ls) ); + }; +}; + +NAMESPACE_END(Grid); #endif diff --git a/Grid/qcd/action/fermion/CayleyFermion5D.h b/Grid/qcd/action/fermion/CayleyFermion5D.h index 203a015e..333ba49b 100644 --- a/Grid/qcd/action/fermion/CayleyFermion5D.h +++ b/Grid/qcd/action/fermion/CayleyFermion5D.h @@ -1,4 +1,4 @@ - /************************************************************************************* +/************************************************************************************* Grid physics library, www.github.com/paboyle/Grid @@ -24,203 +24,146 @@ Author: Peter Boyle 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ -#ifndef GRID_QCD_CAYLEY_FERMION_H -#define GRID_QCD_CAYLEY_FERMION_H +*************************************************************************************/ +/* END LEGAL */ +#pragma once #include -namespace Grid { +NAMESPACE_BEGIN(Grid); - namespace QCD { +template +class CayleyFermion5D : public WilsonFermion5D +{ +public: + INHERIT_IMPL_TYPES(Impl); +public: - template struct switcheroo { - static inline int iscomplex() { return 0; } + // override multiply + virtual RealD M (const FermionField &in, FermionField &out); + virtual RealD Mdag (const FermionField &in, FermionField &out); - template - static inline vec mult(vec a, vec b) { - return real_mult(a,b); - } - }; - template<> struct switcheroo { - static inline int iscomplex() { return 1; } + // half checkerboard operations + virtual void Meooe (const FermionField &in, FermionField &out); + virtual void MeooeDag (const FermionField &in, FermionField &out); + virtual void Mooee (const FermionField &in, FermionField &out); + virtual void MooeeDag (const FermionField &in, FermionField &out); + virtual void MooeeInv (const FermionField &in, FermionField &out); + virtual void MooeeInvDag (const FermionField &in, FermionField &out); + virtual void Meo5D (const FermionField &psi, FermionField &chi); - template - static inline vec mult(vec a, vec b) { - return a*b; - } - }; - template<> struct switcheroo { - static inline int iscomplex() { return 1; } - template - static inline vec mult(vec a, vec b) { - return a*b; - } - }; + virtual void M5D (const FermionField &psi, FermionField &chi); + virtual void M5Ddag(const FermionField &psi, FermionField &chi); + /////////////////////////////////////////////////////////////// + // Physical surface field utilities + /////////////////////////////////////////////////////////////// + virtual void Dminus(const FermionField &psi, FermionField &chi); + virtual void DminusDag(const FermionField &psi, FermionField &chi); + virtual void ExportPhysicalFermionSolution(const FermionField &solution5d,FermionField &exported4d); + virtual void ExportPhysicalFermionSource(const FermionField &solution5d, FermionField &exported4d); + virtual void ImportPhysicalFermionSource(const FermionField &input4d,FermionField &imported5d); + virtual void ImportUnphysicalFermion(const FermionField &solution5d, FermionField &exported4d); - template - class CayleyFermion5D : public WilsonFermion5D - { - public: - INHERIT_IMPL_TYPES(Impl); - public: + /////////////////////////////////////////////////////////////// + // Support for MADWF tricks + /////////////////////////////////////////////////////////////// + RealD Mass(void) { return mass; }; + void SetMass(RealD _mass) { + mass=_mass; + SetCoefficientsInternal(_zolo_hi,_gamma,_b,_c); // Reset coeffs + } ; + void P(const FermionField &psi, FermionField &chi); + void Pdag(const FermionField &psi, FermionField &chi); + + ///////////////////////////////////////////////////// + // Instantiate different versions depending on Impl + ///////////////////////////////////////////////////// + void M5D(const FermionField &psi, + const FermionField &phi, + FermionField &chi, + Vector &lower, + Vector &diag, + Vector &upper); - // override multiply - virtual RealD M (const FermionField &in, FermionField &out); - virtual RealD Mdag (const FermionField &in, FermionField &out); + void M5Ddag(const FermionField &psi, + const FermionField &phi, + FermionField &chi, + Vector &lower, + Vector &diag, + Vector &upper); - // half checkerboard operations - virtual void Meooe (const FermionField &in, FermionField &out); - virtual void MeooeDag (const FermionField &in, FermionField &out); - virtual void Mooee (const FermionField &in, FermionField &out); - virtual void MooeeDag (const FermionField &in, FermionField &out); - virtual void MooeeInv (const FermionField &in, FermionField &out); - virtual void MooeeInvDag (const FermionField &in, FermionField &out); - virtual void Meo5D (const FermionField &psi, FermionField &chi); + virtual void Instantiatable(void)=0; - virtual void M5D (const FermionField &psi, FermionField &chi); - virtual void M5Ddag(const FermionField &psi, FermionField &chi); + // force terms; five routines; default to Dhop on diagonal + virtual void MDeriv (GaugeField &mat,const FermionField &U,const FermionField &V,int dag); + virtual void MoeDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag); + virtual void MeoDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag); - /////////////////////////////////////////////////////////////// - // Physical surface field utilities - /////////////////////////////////////////////////////////////// - virtual void Dminus(const FermionField &psi, FermionField &chi); - virtual void DminusDag(const FermionField &psi, FermionField &chi); - virtual void ExportPhysicalFermionSolution(const FermionField &solution5d,FermionField &exported4d); - virtual void ExportPhysicalFermionSource(const FermionField &solution5d, FermionField &exported4d); - virtual void ImportPhysicalFermionSource(const FermionField &input4d,FermionField &imported5d); - virtual void ImportUnphysicalFermion(const FermionField &solution5d, FermionField &exported4d); + // Efficient support for multigrid coarsening + virtual void Mdir (const FermionField &in, FermionField &out,int dir,int disp); - /////////////////////////////////////////////////////////////// - // Support for MADWF tricks - /////////////////////////////////////////////////////////////// - RealD Mass(void) { return mass; }; - void SetMass(RealD _mass) { - mass=_mass; - SetCoefficientsInternal(_zolo_hi,_gamma,_b,_c); // Reset coeffs - } ; - void P(const FermionField &psi, FermionField &chi); - void Pdag(const FermionField &psi, FermionField &chi); + void Meooe5D (const FermionField &in, FermionField &out); + void MeooeDag5D (const FermionField &in, FermionField &out); - ///////////////////////////////////////////////////// - // Instantiate different versions depending on Impl - ///////////////////////////////////////////////////// - void M5D(const FermionField &psi, - const FermionField &phi, - FermionField &chi, - std::vector &lower, - std::vector &diag, - std::vector &upper); + // protected: + RealD mass; - void M5Ddag(const FermionField &psi, - const FermionField &phi, - FermionField &chi, - std::vector &lower, - std::vector &diag, - std::vector &upper); + // Save arguments to SetCoefficientsInternal + Vector _gamma; + RealD _zolo_hi; + RealD _b; + RealD _c; - void MooeeInternal(const FermionField &in, FermionField &out,int dag,int inv); - void MooeeInternalCompute(int dag, int inv, Vector > & Matp, Vector > & Matm); + // Cayley form Moebius (tanh and zolotarev) + Vector omega; + Vector bs; // S dependent coeffs + Vector cs; + Vector as; + // For preconditioning Cayley form + Vector bee; + Vector cee; + Vector aee; + Vector beo; + Vector ceo; + Vector aeo; + // LDU factorisation of the eeoo matrix + Vector lee; + Vector leem; + Vector uee; + Vector ueem; + Vector dee; - void MooeeInternalAsm(const FermionField &in, FermionField &out, - int LLs, int site, - Vector > &Matp, - Vector > &Matm); - void MooeeInternalZAsm(const FermionField &in, FermionField &out, - int LLs, int site, - Vector > &Matp, - Vector > &Matm); + // Matrices of 5d ee inverse params + Vector > MatpInv; + Vector > MatmInv; + Vector > MatpInvDag; + Vector > MatmInvDag; + // Constructors + CayleyFermion5D(GaugeField &_Umu, + GridCartesian &FiveDimGrid, + GridRedBlackCartesian &FiveDimRedBlackGrid, + GridCartesian &FourDimGrid, + GridRedBlackCartesian &FourDimRedBlackGrid, + RealD _mass,RealD _M5,const ImplParams &p= ImplParams()); - virtual void Instantiatable(void)=0; + void CayleyReport(void); + void CayleyZeroCounters(void); - // force terms; five routines; default to Dhop on diagonal - virtual void MDeriv (GaugeField &mat,const FermionField &U,const FermionField &V,int dag); - virtual void MoeDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag); - virtual void MeoDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag); + double M5Dflops; + double M5Dcalls; + double M5Dtime; - // Efficient support for multigrid coarsening - virtual void Mdir (const FermionField &in, FermionField &out,int dir,int disp); + double MooeeInvFlops; + double MooeeInvCalls; + double MooeeInvTime; - void Meooe5D (const FermionField &in, FermionField &out); - void MeooeDag5D (const FermionField &in, FermionField &out); +protected: + virtual void SetCoefficientsZolotarev(RealD zolohi,Approx::zolotarev_data *zdata,RealD b,RealD c); + virtual void SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD b,RealD c); + virtual void SetCoefficientsInternal(RealD zolo_hi,Vector & gamma,RealD b,RealD c); +}; - // protected: - RealD mass; +NAMESPACE_END(Grid); - // Save arguments to SetCoefficientsInternal - std::vector _gamma; - RealD _zolo_hi; - RealD _b; - RealD _c; - - // Cayley form Moebius (tanh and zolotarev) - std::vector omega; - std::vector bs; // S dependent coeffs - std::vector cs; - std::vector as; - // For preconditioning Cayley form - std::vector bee; - std::vector cee; - std::vector aee; - std::vector beo; - std::vector ceo; - std::vector aeo; - // LDU factorisation of the eeoo matrix - std::vector lee; - std::vector leem; - std::vector uee; - std::vector ueem; - std::vector dee; - - // Matrices of 5d ee inverse params - Vector > MatpInv; - Vector > MatmInv; - Vector > MatpInvDag; - Vector > MatmInvDag; - - // Constructors - CayleyFermion5D(GaugeField &_Umu, - GridCartesian &FiveDimGrid, - GridRedBlackCartesian &FiveDimRedBlackGrid, - GridCartesian &FourDimGrid, - GridRedBlackCartesian &FourDimRedBlackGrid, - RealD _mass,RealD _M5,const ImplParams &p= ImplParams()); - - - - void CayleyReport(void); - void CayleyZeroCounters(void); - - double M5Dflops; - double M5Dcalls; - double M5Dtime; - - double MooeeInvFlops; - double MooeeInvCalls; - double MooeeInvTime; - - protected: - virtual void SetCoefficientsZolotarev(RealD zolohi,Approx::zolotarev_data *zdata,RealD b,RealD c); - virtual void SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD b,RealD c); - virtual void SetCoefficientsInternal(RealD zolo_hi,std::vector & gamma,RealD b,RealD c); - }; - - } -} -#define INSTANTIATE_DPERP(A)\ -template void CayleyFermion5D< A >::M5D(const FermionField &psi,const FermionField &phi,FermionField &chi,\ - std::vector &lower,std::vector &diag,std::vector &upper); \ -template void CayleyFermion5D< A >::M5Ddag(const FermionField &psi,const FermionField &phi,FermionField &chi,\ - std::vector &lower,std::vector &diag,std::vector &upper); \ -template void CayleyFermion5D< A >::MooeeInv (const FermionField &psi, FermionField &chi); \ -template void CayleyFermion5D< A >::MooeeInvDag (const FermionField &psi, FermionField &chi); - -#undef CAYLEY_DPERP_DENSE -#define CAYLEY_DPERP_CACHE -#undef CAYLEY_DPERP_LINALG -#define CAYLEY_DPERP_VEC - -#endif diff --git a/Grid/qcd/action/fermion/CayleyFermion5Dcache.cc b/Grid/qcd/action/fermion/CayleyFermion5Dcache.cc deleted file mode 100644 index dd6ec7bf..00000000 --- a/Grid/qcd/action/fermion/CayleyFermion5Dcache.cc +++ /dev/null @@ -1,249 +0,0 @@ - /************************************************************************************* - - Grid physics library, www.github.com/paboyle/Grid - - Source file: ./lib/qcd/action/fermion/CayleyFermion5D.cc - - Copyright (C) 2015 - -Author: Peter Boyle -Author: Peter Boyle -Author: Peter Boyle -Author: paboyle - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - - See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ - -#include -#include - - -namespace Grid { -namespace QCD { - - // FIXME -- make a version of these routines with site loop outermost for cache reuse. - - // Pminus fowards - // Pplus backwards.. -template -void CayleyFermion5D::M5D(const FermionField &psi, - const FermionField &phi, - FermionField &chi, - std::vector &lower, - std::vector &diag, - std::vector &upper) -{ - int Ls =this->Ls; - GridBase *grid=psi._grid; - assert(phi.checkerboard == psi.checkerboard); - chi.checkerboard=psi.checkerboard; - // Flops = 6.0*(Nc*Ns) *Ls*vol - M5Dcalls++; - M5Dtime-=usecond(); - - parallel_for(int ss=0;ssoSites();ss+=Ls){ // adds Ls - for(int s=0;s -void CayleyFermion5D::M5Ddag(const FermionField &psi, - const FermionField &phi, - FermionField &chi, - std::vector &lower, - std::vector &diag, - std::vector &upper) -{ - int Ls =this->Ls; - GridBase *grid=psi._grid; - assert(phi.checkerboard == psi.checkerboard); - chi.checkerboard=psi.checkerboard; - - // Flops = 6.0*(Nc*Ns) *Ls*vol - M5Dcalls++; - M5Dtime-=usecond(); - - parallel_for(int ss=0;ssoSites();ss+=Ls){ // adds Ls - auto tmp = psi._odata[0]; - for(int s=0;s -void CayleyFermion5D::MooeeInv (const FermionField &psi, FermionField &chi) -{ - GridBase *grid=psi._grid; - int Ls=this->Ls; - - chi.checkerboard=psi.checkerboard; - - MooeeInvCalls++; - MooeeInvTime-=usecond(); - - parallel_for(int ss=0;ssoSites();ss+=Ls){ // adds Ls - auto tmp = psi._odata[0]; - - // flops = 12*2*Ls + 12*2*Ls + 3*12*Ls + 12*2*Ls = 12*Ls * (9) = 108*Ls flops - // Apply (L^{\prime})^{-1} - chi[ss]=psi[ss]; // chi[0]=psi[0] - for(int s=1;s=0;s--){ - spProj5m(tmp,chi[ss+s+1]); - chi[ss+s] = chi[ss+s] - uee[s]*tmp; - } - } - - MooeeInvTime+=usecond(); - -} - -template -void CayleyFermion5D::MooeeInvDag (const FermionField &psi, FermionField &chi) -{ - GridBase *grid=psi._grid; - int Ls=this->Ls; - - assert(psi.checkerboard == psi.checkerboard); - chi.checkerboard=psi.checkerboard; - - std::vector ueec(Ls); - std::vector deec(Ls); - std::vector leec(Ls); - std::vector ueemc(Ls); - std::vector leemc(Ls); - for(int s=0;soSites();ss+=Ls){ // adds Ls - - auto tmp = psi._odata[0]; - - // Apply (U^{\prime})^{-dagger} - chi[ss]=psi[ss]; - for (int s=1;s=0;s--){ - spProj5p(tmp,chi[ss+s+1]); - chi[ss+s] = chi[ss+s] - leec[s]*tmp; - } - } - - MooeeInvTime+=usecond(); - -} - -#ifdef CAYLEY_DPERP_CACHE - INSTANTIATE_DPERP(WilsonImplF); - INSTANTIATE_DPERP(WilsonImplD); - INSTANTIATE_DPERP(GparityWilsonImplF); - INSTANTIATE_DPERP(GparityWilsonImplD); - INSTANTIATE_DPERP(ZWilsonImplF); - INSTANTIATE_DPERP(ZWilsonImplD); - - INSTANTIATE_DPERP(WilsonImplFH); - INSTANTIATE_DPERP(WilsonImplDF); - INSTANTIATE_DPERP(GparityWilsonImplFH); - INSTANTIATE_DPERP(GparityWilsonImplDF); - INSTANTIATE_DPERP(ZWilsonImplFH); - INSTANTIATE_DPERP(ZWilsonImplDF); -#endif - -}} diff --git a/Grid/qcd/action/fermion/CayleyFermion5Dvec.cc b/Grid/qcd/action/fermion/CayleyFermion5Dvec.cc deleted file mode 100644 index 2b2eace7..00000000 --- a/Grid/qcd/action/fermion/CayleyFermion5Dvec.cc +++ /dev/null @@ -1,828 +0,0 @@ - /************************************************************************************* - - Grid physics library, www.github.com/paboyle/Grid - - Source file: ./lib/qcd/action/fermion/CayleyFermion5D.cc - - Copyright (C) 2015 - -Author: Peter Boyle -Author: Peter Boyle -Author: Peter Boyle -Author: paboyle - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - - See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ - - -#include -#include - - -namespace Grid { -namespace QCD { - /* - * Dense matrix versions of routines - */ -template -void CayleyFermion5D::MooeeInvDag (const FermionField &psi, FermionField &chi) -{ - this->MooeeInternal(psi,chi,DaggerYes,InverseYes); -} - -template -void CayleyFermion5D::MooeeInv(const FermionField &psi, FermionField &chi) -{ - this->MooeeInternal(psi,chi,DaggerNo,InverseYes); -} -template -void CayleyFermion5D::M5D(const FermionField &psi, - const FermionField &phi, - FermionField &chi, - std::vector &lower, - std::vector &diag, - std::vector &upper) -{ - GridBase *grid=psi._grid; - int Ls = this->Ls; - int LLs = grid->_rdimensions[0]; - const int nsimd= Simd::Nsimd(); - - Vector > u(LLs); - Vector > l(LLs); - Vector > d(LLs); - - assert(Ls/LLs==nsimd); - assert(phi.checkerboard == psi.checkerboard); - - chi.checkerboard=psi.checkerboard; - - // just directly address via type pun - typedef typename Simd::scalar_type scalar_type; - scalar_type * u_p = (scalar_type *)&u[0]; - scalar_type * l_p = (scalar_type *)&l[0]; - scalar_type * d_p = (scalar_type *)&d[0]; - - for(int o=0;ooSites();ss+=LLs){ // adds LLs -#if 0 - alignas(64) SiteHalfSpinor hp; - alignas(64) SiteHalfSpinor hm; - alignas(64) SiteSpinor fp; - alignas(64) SiteSpinor fm; - - for(int v=0;v=v ) rotate(hm,hm,nsimd-1); - - hp=0.5*hp; - hm=0.5*hm; - - spRecon5m(fp,hp); - spRecon5p(fm,hm); - - chi[ss+v] = d[v]*phi[ss+v]; - chi[ss+v] = chi[ss+v] +u[v]*fp; - chi[ss+v] = chi[ss+v] +l[v]*fm; - - } -#else - for(int v=0;v(hp_00.v); - hp_01.v = Optimization::Rotate::tRotate<2>(hp_01.v); - hp_02.v = Optimization::Rotate::tRotate<2>(hp_02.v); - hp_10.v = Optimization::Rotate::tRotate<2>(hp_10.v); - hp_11.v = Optimization::Rotate::tRotate<2>(hp_11.v); - hp_12.v = Optimization::Rotate::tRotate<2>(hp_12.v); - } - if ( vm>=v ) { - hm_00.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_00.v); - hm_01.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_01.v); - hm_02.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_02.v); - hm_10.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_10.v); - hm_11.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_11.v); - hm_12.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_12.v); - } - - // Can force these to real arithmetic and save 2x. - Simd p_00 = switcheroo::mult(d[v]()()(), phi[ss+v]()(0)(0)) + switcheroo::mult(l[v]()()(),hm_00); - Simd p_01 = switcheroo::mult(d[v]()()(), phi[ss+v]()(0)(1)) + switcheroo::mult(l[v]()()(),hm_01); - Simd p_02 = switcheroo::mult(d[v]()()(), phi[ss+v]()(0)(2)) + switcheroo::mult(l[v]()()(),hm_02); - Simd p_10 = switcheroo::mult(d[v]()()(), phi[ss+v]()(1)(0)) + switcheroo::mult(l[v]()()(),hm_10); - Simd p_11 = switcheroo::mult(d[v]()()(), phi[ss+v]()(1)(1)) + switcheroo::mult(l[v]()()(),hm_11); - Simd p_12 = switcheroo::mult(d[v]()()(), phi[ss+v]()(1)(2)) + switcheroo::mult(l[v]()()(),hm_12); - Simd p_20 = switcheroo::mult(d[v]()()(), phi[ss+v]()(2)(0)) + switcheroo::mult(u[v]()()(),hp_00); - Simd p_21 = switcheroo::mult(d[v]()()(), phi[ss+v]()(2)(1)) + switcheroo::mult(u[v]()()(),hp_01); - Simd p_22 = switcheroo::mult(d[v]()()(), phi[ss+v]()(2)(2)) + switcheroo::mult(u[v]()()(),hp_02); - Simd p_30 = switcheroo::mult(d[v]()()(), phi[ss+v]()(3)(0)) + switcheroo::mult(u[v]()()(),hp_10); - Simd p_31 = switcheroo::mult(d[v]()()(), phi[ss+v]()(3)(1)) + switcheroo::mult(u[v]()()(),hp_11); - Simd p_32 = switcheroo::mult(d[v]()()(), phi[ss+v]()(3)(2)) + switcheroo::mult(u[v]()()(),hp_12); - - vstream(chi[ss+v]()(0)(0),p_00); - vstream(chi[ss+v]()(0)(1),p_01); - vstream(chi[ss+v]()(0)(2),p_02); - vstream(chi[ss+v]()(1)(0),p_10); - vstream(chi[ss+v]()(1)(1),p_11); - vstream(chi[ss+v]()(1)(2),p_12); - vstream(chi[ss+v]()(2)(0),p_20); - vstream(chi[ss+v]()(2)(1),p_21); - vstream(chi[ss+v]()(2)(2),p_22); - vstream(chi[ss+v]()(3)(0),p_30); - vstream(chi[ss+v]()(3)(1),p_31); - vstream(chi[ss+v]()(3)(2),p_32); - - } -#endif - } - M5Dtime+=usecond(); -} - -template -void CayleyFermion5D::M5Ddag(const FermionField &psi, - const FermionField &phi, - FermionField &chi, - std::vector &lower, - std::vector &diag, - std::vector &upper) -{ - GridBase *grid=psi._grid; - int Ls = this->Ls; - int LLs = grid->_rdimensions[0]; - int nsimd= Simd::Nsimd(); - - Vector > u(LLs); - Vector > l(LLs); - Vector > d(LLs); - - assert(Ls/LLs==nsimd); - assert(phi.checkerboard == psi.checkerboard); - - chi.checkerboard=psi.checkerboard; - - // just directly address via type pun - typedef typename Simd::scalar_type scalar_type; - scalar_type * u_p = (scalar_type *)&u[0]; - scalar_type * l_p = (scalar_type *)&l[0]; - scalar_type * d_p = (scalar_type *)&d[0]; - - for(int o=0;ooSites();ss+=LLs){ // adds LLs -#if 0 - alignas(64) SiteHalfSpinor hp; - alignas(64) SiteHalfSpinor hm; - alignas(64) SiteSpinor fp; - alignas(64) SiteSpinor fm; - - for(int v=0;v=v ) rotate(hm,hm,nsimd-1); - - hp=hp*0.5; - hm=hm*0.5; - spRecon5p(fp,hp); - spRecon5m(fm,hm); - - chi[ss+v] = d[v]*phi[ss+v]+u[v]*fp; - chi[ss+v] = chi[ss+v] +l[v]*fm; - - } -#else - for(int v=0;v(hp_00.v); - hp_01.v = Optimization::Rotate::tRotate<2>(hp_01.v); - hp_02.v = Optimization::Rotate::tRotate<2>(hp_02.v); - hp_10.v = Optimization::Rotate::tRotate<2>(hp_10.v); - hp_11.v = Optimization::Rotate::tRotate<2>(hp_11.v); - hp_12.v = Optimization::Rotate::tRotate<2>(hp_12.v); - } - if ( vm>=v ) { - hm_00.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_00.v); - hm_01.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_01.v); - hm_02.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_02.v); - hm_10.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_10.v); - hm_11.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_11.v); - hm_12.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_12.v); - } - - Simd p_00 = switcheroo::mult(d[v]()()(), phi[ss+v]()(0)(0)) + switcheroo::mult(u[v]()()(),hp_00); - Simd p_01 = switcheroo::mult(d[v]()()(), phi[ss+v]()(0)(1)) + switcheroo::mult(u[v]()()(),hp_01); - Simd p_02 = switcheroo::mult(d[v]()()(), phi[ss+v]()(0)(2)) + switcheroo::mult(u[v]()()(),hp_02); - Simd p_10 = switcheroo::mult(d[v]()()(), phi[ss+v]()(1)(0)) + switcheroo::mult(u[v]()()(),hp_10); - Simd p_11 = switcheroo::mult(d[v]()()(), phi[ss+v]()(1)(1)) + switcheroo::mult(u[v]()()(),hp_11); - Simd p_12 = switcheroo::mult(d[v]()()(), phi[ss+v]()(1)(2)) + switcheroo::mult(u[v]()()(),hp_12); - - Simd p_20 = switcheroo::mult(d[v]()()(), phi[ss+v]()(2)(0)) + switcheroo::mult(l[v]()()(),hm_00); - Simd p_21 = switcheroo::mult(d[v]()()(), phi[ss+v]()(2)(1)) + switcheroo::mult(l[v]()()(),hm_01); - Simd p_22 = switcheroo::mult(d[v]()()(), phi[ss+v]()(2)(2)) + switcheroo::mult(l[v]()()(),hm_02); - Simd p_30 = switcheroo::mult(d[v]()()(), phi[ss+v]()(3)(0)) + switcheroo::mult(l[v]()()(),hm_10); - Simd p_31 = switcheroo::mult(d[v]()()(), phi[ss+v]()(3)(1)) + switcheroo::mult(l[v]()()(),hm_11); - Simd p_32 = switcheroo::mult(d[v]()()(), phi[ss+v]()(3)(2)) + switcheroo::mult(l[v]()()(),hm_12); - - vstream(chi[ss+v]()(0)(0),p_00); - vstream(chi[ss+v]()(0)(1),p_01); - vstream(chi[ss+v]()(0)(2),p_02); - vstream(chi[ss+v]()(1)(0),p_10); - vstream(chi[ss+v]()(1)(1),p_11); - vstream(chi[ss+v]()(1)(2),p_12); - vstream(chi[ss+v]()(2)(0),p_20); - vstream(chi[ss+v]()(2)(1),p_21); - vstream(chi[ss+v]()(2)(2),p_22); - vstream(chi[ss+v]()(3)(0),p_30); - vstream(chi[ss+v]()(3)(1),p_31); - vstream(chi[ss+v]()(3)(2),p_32); - } -#endif - } - M5Dtime+=usecond(); -} - - -#ifdef AVX512 -#include -#include -#include -#endif - -template -void CayleyFermion5D::MooeeInternalAsm(const FermionField &psi, FermionField &chi, - int LLs, int site, - Vector > &Matp, - Vector > &Matm) -{ -#ifndef AVX512 - { - SiteHalfSpinor BcastP; - SiteHalfSpinor BcastM; - SiteHalfSpinor SiteChiP; - SiteHalfSpinor SiteChiM; - - // Ls*Ls * 2 * 12 * vol flops - for(int s1=0;s1); - for(int s1=0;s1 -void CayleyFermion5D::MooeeInternalZAsm(const FermionField &psi, FermionField &chi, - int LLs, int site, Vector > &Matp, Vector > &Matm) -{ -#ifndef AVX512 - { - SiteHalfSpinor BcastP; - SiteHalfSpinor BcastM; - SiteHalfSpinor SiteChiP; - SiteHalfSpinor SiteChiM; - - // Ls*Ls * 2 * 12 * vol flops - for(int s1=0;s1); - for(int s1=0;s1 -void CayleyFermion5D::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv) -{ - int Ls=this->Ls; - int LLs = psi._grid->_rdimensions[0]; - int vol = psi._grid->oSites()/LLs; - - chi.checkerboard=psi.checkerboard; - - Vector > Matp; - Vector > Matm; - Vector > *_Matp; - Vector > *_Matm; - - // MooeeInternalCompute(dag,inv,Matp,Matm); - if ( inv && dag ) { - _Matp = &MatpInvDag; - _Matm = &MatmInvDag; - } - if ( inv && (!dag) ) { - _Matp = &MatpInv; - _Matm = &MatmInv; - } - if ( !inv ) { - MooeeInternalCompute(dag,inv,Matp,Matm); - _Matp = &Matp; - _Matm = &Matm; - } - assert(_Matp->size()==Ls*LLs); - - MooeeInvCalls++; - MooeeInvTime-=usecond(); - - if ( switcheroo::iscomplex() ) { - parallel_for(auto site=0;site::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv); -template void CayleyFermion5D::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv); -template void CayleyFermion5D::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv); -template void CayleyFermion5D::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv); - -template void CayleyFermion5D::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv); -template void CayleyFermion5D::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv); -template void CayleyFermion5D::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv); -template void CayleyFermion5D::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv); - - - -}} diff --git a/Grid/qcd/action/fermion/ContinuedFractionFermion5D.cc b/Grid/qcd/action/fermion/ContinuedFractionFermion5D.cc deleted file mode 100644 index f6857115..00000000 --- a/Grid/qcd/action/fermion/ContinuedFractionFermion5D.cc +++ /dev/null @@ -1,323 +0,0 @@ - /************************************************************************************* - - Grid physics library, www.github.com/paboyle/Grid - - Source file: ./lib/qcd/action/fermion/ContinuedFractionFermion5D.cc - - Copyright (C) 2015 - -Author: Peter Boyle -Author: Peter Boyle - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - - See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ -#include -#include - -namespace Grid { - namespace QCD { - - template - void ContinuedFractionFermion5D::SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD scale) - { - SetCoefficientsZolotarev(1.0/scale,zdata); - } - template - void ContinuedFractionFermion5D::SetCoefficientsZolotarev(RealD zolo_hi,Approx::zolotarev_data *zdata) - { - // How to check Ls matches?? - // std::cout<n << " - n"<da << " -da "<db << " -db"<dn << " -dn"<dd << " -dd"<Ls; - assert(zdata->db==Ls);// Beta has Ls coeffs - - R=(1+this->mass)/(1-this->mass); - - Beta.resize(Ls); - cc.resize(Ls); - cc_d.resize(Ls); - sqrt_cc.resize(Ls); - for(int i=0; i < Ls ; i++){ - Beta[i] = zdata -> beta[i]; - cc[i] = 1.0/Beta[i]; - cc_d[i]=sqrt(cc[i]); - } - - cc_d[Ls-1]=1.0; - for(int i=0; i < Ls-1 ; i++){ - sqrt_cc[i]= sqrt(cc[i]*cc[i+1]); - } - sqrt_cc[Ls-2]=sqrt(cc[Ls-2]); - - - ZoloHiInv =1.0/zolo_hi; - dw_diag = (4.0-this->M5)*ZoloHiInv; - - See.resize(Ls); - Aee.resize(Ls); - int sign=1; - for(int s=0;s - RealD ContinuedFractionFermion5D::M (const FermionField &psi, FermionField &chi) - { - int Ls = this->Ls; - - FermionField D(psi._grid); - - this->DW(psi,D,DaggerNo); - - int sign=1; - for(int s=0;s - RealD ContinuedFractionFermion5D::Mdag (const FermionField &psi, FermionField &chi) - { - // This matrix is already hermitian. (g5 Dw) = Dw dag g5 = (g5 Dw)dag - // The rest of matrix is symmetric. - // Can ignore "dag" - return M(psi,chi); - } - template - void ContinuedFractionFermion5D::Mdir (const FermionField &psi, FermionField &chi,int dir,int disp){ - int Ls = this->Ls; - - this->DhopDir(psi,chi,dir,disp); // Dslash on diagonal. g5 Dslash is hermitian - - int sign=1; - for(int s=0;s - void ContinuedFractionFermion5D::Meooe (const FermionField &psi, FermionField &chi) - { - int Ls = this->Ls; - - // Apply 4d dslash - if ( psi.checkerboard == Odd ) { - this->DhopEO(psi,chi,DaggerNo); // Dslash on diagonal. g5 Dslash is hermitian - } else { - this->DhopOE(psi,chi,DaggerNo); // Dslash on diagonal. g5 Dslash is hermitian - } - - int sign=1; - for(int s=0;s - void ContinuedFractionFermion5D::MeooeDag (const FermionField &psi, FermionField &chi) - { - this->Meooe(psi,chi); - } - template - void ContinuedFractionFermion5D::Mooee (const FermionField &psi, FermionField &chi) - { - int Ls = this->Ls; - - int sign=1; - for(int s=0;s - void ContinuedFractionFermion5D::MooeeDag (const FermionField &psi, FermionField &chi) - { - this->Mooee(psi,chi); - } - template - void ContinuedFractionFermion5D::MooeeInv (const FermionField &psi, FermionField &chi) - { - int Ls = this->Ls; - - // Apply Linv - axpby_ssp(chi,1.0/cc_d[0],psi,0.0,psi,0,0); - for(int s=1;s=0;s--){ - axpbg5y_ssp(chi,1.0/cc_d[s],chi,-1.0*cc_d[s+1]/See[s]/cc_d[s],chi,s,s+1); - } - } - template - void ContinuedFractionFermion5D::MooeeInvDag (const FermionField &psi, FermionField &chi) - { - this->MooeeInv(psi,chi); - } - - // force terms; five routines; default to Dhop on diagonal - template - void ContinuedFractionFermion5D::MDeriv (GaugeField &mat,const FermionField &U,const FermionField &V,int dag) - { - int Ls = this->Ls; - - FermionField D(V._grid); - - int sign=1; - for(int s=0;sDhopDeriv(mat,D,V,DaggerNo); - }; - template - void ContinuedFractionFermion5D::MoeDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag) - { - int Ls = this->Ls; - - FermionField D(V._grid); - - int sign=1; - for(int s=0;sDhopDerivOE(mat,D,V,DaggerNo); - }; - template - void ContinuedFractionFermion5D::MeoDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag) - { - int Ls = this->Ls; - - FermionField D(V._grid); - - int sign=1; - for(int s=0;sDhopDerivEO(mat,D,V,DaggerNo); - }; - - // Constructors - template - ContinuedFractionFermion5D::ContinuedFractionFermion5D( - GaugeField &_Umu, - GridCartesian &FiveDimGrid, - GridRedBlackCartesian &FiveDimRedBlackGrid, - GridCartesian &FourDimGrid, - GridRedBlackCartesian &FourDimRedBlackGrid, - RealD _mass,RealD M5,const ImplParams &p) : - WilsonFermion5D(_Umu, - FiveDimGrid, FiveDimRedBlackGrid, - FourDimGrid, FourDimRedBlackGrid,M5,p), - mass(_mass) - { - int Ls = this->Ls; - assert((Ls&0x1)==1); // Odd Ls required - } - - template - void ContinuedFractionFermion5D::ExportPhysicalFermionSolution(const FermionField &solution5d,FermionField &exported4d) - { - int Ls = this->Ls; - conformable(solution5d._grid,this->FermionGrid()); - conformable(exported4d._grid,this->GaugeGrid()); - ExtractSlice(exported4d, solution5d, Ls-1, Ls-1); - } - template - void ContinuedFractionFermion5D::ImportPhysicalFermionSource(const FermionField &input4d,FermionField &imported5d) - { - int Ls = this->Ls; - conformable(imported5d._grid,this->FermionGrid()); - conformable(input4d._grid ,this->GaugeGrid()); - FermionField tmp(this->FermionGrid()); - tmp=zero; - InsertSlice(input4d, tmp, Ls-1, Ls-1); - tmp=Gamma(Gamma::Algebra::Gamma5)*tmp; - this->Dminus(tmp,imported5d); - } - - FermOpTemplateInstantiate(ContinuedFractionFermion5D); - - } -} - diff --git a/Grid/qcd/action/fermion/ContinuedFractionFermion5D.h b/Grid/qcd/action/fermion/ContinuedFractionFermion5D.h index b551fc28..379c5f8f 100644 --- a/Grid/qcd/action/fermion/ContinuedFractionFermion5D.h +++ b/Grid/qcd/action/fermion/ContinuedFractionFermion5D.h @@ -1,4 +1,4 @@ - /************************************************************************************* +/************************************************************************************* Grid physics library, www.github.com/paboyle/Grid @@ -24,46 +24,44 @@ Author: Peter Boyle 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ +*************************************************************************************/ +/* END LEGAL */ #ifndef GRID_QCD_CONTINUED_FRACTION_H #define GRID_QCD_CONTINUED_FRACTION_H #include -namespace Grid { +NAMESPACE_BEGIN(Grid); - namespace QCD { +template +class ContinuedFractionFermion5D : public WilsonFermion5D +{ +public: + INHERIT_IMPL_TYPES(Impl); +public: - template - class ContinuedFractionFermion5D : public WilsonFermion5D - { - public: - INHERIT_IMPL_TYPES(Impl); - public: + // override multiply + virtual RealD M (const FermionField &in, FermionField &out); + virtual RealD Mdag (const FermionField &in, FermionField &out); - // override multiply - virtual RealD M (const FermionField &in, FermionField &out); - virtual RealD Mdag (const FermionField &in, FermionField &out); + // half checkerboard operaions + virtual void Meooe (const FermionField &in, FermionField &out); + virtual void MeooeDag (const FermionField &in, FermionField &out); + virtual void Mooee (const FermionField &in, FermionField &out); + virtual void MooeeDag (const FermionField &in, FermionField &out); + virtual void MooeeInv (const FermionField &in, FermionField &out); + virtual void MooeeInvDag (const FermionField &in, FermionField &out); - // half checkerboard operaions - virtual void Meooe (const FermionField &in, FermionField &out); - virtual void MeooeDag (const FermionField &in, FermionField &out); - virtual void Mooee (const FermionField &in, FermionField &out); - virtual void MooeeDag (const FermionField &in, FermionField &out); - virtual void MooeeInv (const FermionField &in, FermionField &out); - virtual void MooeeInvDag (const FermionField &in, FermionField &out); + // force terms; five routines; default to Dhop on diagonal + virtual void MDeriv (GaugeField &mat,const FermionField &U,const FermionField &V,int dag); + virtual void MoeDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag); + virtual void MeoDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag); - // force terms; five routines; default to Dhop on diagonal - virtual void MDeriv (GaugeField &mat,const FermionField &U,const FermionField &V,int dag); - virtual void MoeDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag); - virtual void MeoDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag); + // virtual void Instantiatable(void)=0; + virtual void Instantiatable(void) =0; - // virtual void Instantiatable(void)=0; - virtual void Instantiatable(void) =0; - - // Efficient support for multigrid coarsening - virtual void Mdir (const FermionField &in, FermionField &out,int dir,int disp); + // Efficient support for multigrid coarsening + virtual void Mdir (const FermionField &in, FermionField &out,int dir,int disp); /////////////////////////////////////////////////////////////// // Physical surface field utilities @@ -73,35 +71,34 @@ namespace Grid { virtual void ExportPhysicalFermionSolution(const FermionField &solution5d,FermionField &exported4d); virtual void ImportPhysicalFermionSource (const FermionField &input4d,FermionField &imported5d); - // Constructors - ContinuedFractionFermion5D(GaugeField &_Umu, - GridCartesian &FiveDimGrid, - GridRedBlackCartesian &FiveDimRedBlackGrid, - GridCartesian &FourDimGrid, - GridRedBlackCartesian &FourDimRedBlackGrid, - RealD _mass,RealD M5,const ImplParams &p= ImplParams()); + // Constructors + ContinuedFractionFermion5D(GaugeField &_Umu, + GridCartesian &FiveDimGrid, + GridRedBlackCartesian &FiveDimRedBlackGrid, + GridCartesian &FourDimGrid, + GridRedBlackCartesian &FourDimRedBlackGrid, + RealD _mass,RealD M5,const ImplParams &p= ImplParams()); - protected: +protected: - void SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD scale); - void SetCoefficientsZolotarev(RealD zolo_hi,Approx::zolotarev_data *zdata);; + void SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD scale); + void SetCoefficientsZolotarev(RealD zolo_hi,Approx::zolotarev_data *zdata);; - // Cont frac - RealD dw_diag; - RealD mass; - RealD R; - RealD ZoloHiInv; - std::vector Beta; - std::vector cc;; - std::vector cc_d;; - std::vector sqrt_cc; - std::vector See; - std::vector Aee; + // Cont frac + RealD dw_diag; + RealD mass; + RealD R; + RealD ZoloHiInv; + Vector Beta; + Vector cc;; + Vector cc_d;; + Vector sqrt_cc; + Vector See; + Vector Aee; - }; +}; - } -} +NAMESPACE_END(Grid); #endif diff --git a/Grid/qcd/action/fermion/DomainWallEOFAFermion.cc b/Grid/qcd/action/fermion/DomainWallEOFAFermion.cc deleted file mode 100644 index 37ab5fa6..00000000 --- a/Grid/qcd/action/fermion/DomainWallEOFAFermion.cc +++ /dev/null @@ -1,438 +0,0 @@ -/************************************************************************************* - -Grid physics library, www.github.com/paboyle/Grid - -Source file: ./lib/qcd/action/fermion/DomainWallEOFAFermion.cc - -Copyright (C) 2017 - -Author: Peter Boyle -Author: Peter Boyle -Author: Peter Boyle -Author: paboyle -Author: David Murphy - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -See the full license in the file "LICENSE" in the top level distribution directory -*************************************************************************************/ -/* END LEGAL */ - -#include -#include -#include - -namespace Grid { -namespace QCD { - - template - DomainWallEOFAFermion::DomainWallEOFAFermion( - GaugeField &_Umu, - GridCartesian &FiveDimGrid, - GridRedBlackCartesian &FiveDimRedBlackGrid, - GridCartesian &FourDimGrid, - GridRedBlackCartesian &FourDimRedBlackGrid, - RealD _mq1, RealD _mq2, RealD _mq3, - RealD _shift, int _pm, RealD _M5, const ImplParams &p) : - AbstractEOFAFermion(_Umu, FiveDimGrid, FiveDimRedBlackGrid, - FourDimGrid, FourDimRedBlackGrid, _mq1, _mq2, _mq3, - _shift, _pm, _M5, 1.0, 0.0, p) - { - RealD eps = 1.0; - Approx::zolotarev_data *zdata = Approx::higham(eps,this->Ls); - assert(zdata->n == this->Ls); - - std::cout << GridLogMessage << "DomainWallEOFAFermion with Ls=" << this->Ls << std::endl; - this->SetCoefficientsTanh(zdata, 1.0, 0.0); - - Approx::zolotarev_free(zdata); - } - - /*************************************************************** - * Additional EOFA operators only called outside the inverter. - * Since speed is not essential, simple axpby-style - * implementations should be fine. - ***************************************************************/ - template - void DomainWallEOFAFermion::Omega(const FermionField& psi, FermionField& Din, int sign, int dag) - { - int Ls = this->Ls; - - Din = zero; - if((sign == 1) && (dag == 0)){ axpby_ssp(Din, 0.0, psi, 1.0, psi, Ls-1, 0); } - else if((sign == -1) && (dag == 0)){ axpby_ssp(Din, 0.0, psi, 1.0, psi, 0, 0); } - else if((sign == 1 ) && (dag == 1)){ axpby_ssp(Din, 0.0, psi, 1.0, psi, 0, Ls-1); } - else if((sign == -1) && (dag == 1)){ axpby_ssp(Din, 0.0, psi, 1.0, psi, 0, 0); } - } - - // This is just the identity for DWF - template - void DomainWallEOFAFermion::Dtilde(const FermionField& psi, FermionField& chi){ chi = psi; } - - // This is just the identity for DWF - template - void DomainWallEOFAFermion::DtildeInv(const FermionField& psi, FermionField& chi){ chi = psi; } - - /*****************************************************************************************************/ - - template - RealD DomainWallEOFAFermion::M(const FermionField& psi, FermionField& chi) - { - int Ls = this->Ls; - - FermionField Din(psi._grid); - - this->Meooe5D(psi, Din); - this->DW(Din, chi, DaggerNo); - axpby(chi, 1.0, 1.0, chi, psi); - this->M5D(psi, chi); - return(norm2(chi)); - } - - template - RealD DomainWallEOFAFermion::Mdag(const FermionField& psi, FermionField& chi) - { - int Ls = this->Ls; - - FermionField Din(psi._grid); - - this->DW(psi, Din, DaggerYes); - this->MeooeDag5D(Din, chi); - this->M5Ddag(psi, chi); - axpby(chi, 1.0, 1.0, chi, psi); - return(norm2(chi)); - } - - /******************************************************************** - * Performance critical fermion operators called inside the inverter - ********************************************************************/ - - template - void DomainWallEOFAFermion::M5D(const FermionField& psi, FermionField& chi) - { - int Ls = this->Ls; - int pm = this->pm; - RealD shift = this->shift; - RealD mq1 = this->mq1; - RealD mq2 = this->mq2; - RealD mq3 = this->mq3; - - // coefficients for shift operator ( = shift*\gamma_{5}*R_{5}*\Delta_{\pm}(mq2,mq3)*P_{\pm} ) - Coeff_t shiftp(0.0), shiftm(0.0); - if(shift != 0.0){ - if(pm == 1){ shiftp = shift*(mq3-mq2); } - else{ shiftm = -shift*(mq3-mq2); } - } - - std::vector diag(Ls,1.0); - std::vector upper(Ls,-1.0); upper[Ls-1] = mq1 + shiftm; - std::vector lower(Ls,-1.0); lower[0] = mq1 + shiftp; - - #if(0) - std::cout << GridLogMessage << "DomainWallEOFAFermion::M5D(FF&,FF&):" << std::endl; - for(int i=0; i::iscomplex()) { - sp[l] = PplusMat (l*istride+s1*ostride,s2); - sm[l] = PminusMat(l*istride+s1*ostride,s2); - } else { - // if real - scalar_type tmp; - tmp = PplusMat (l*istride+s1*ostride,s2); - sp[l] = scalar_type(tmp.real(),tmp.real()); - tmp = PminusMat(l*istride+s1*ostride,s2); - sm[l] = scalar_type(tmp.real(),tmp.real()); - } - } - Matp[LLs*s2+s1] = Vp; - Matm[LLs*s2+s1] = Vm; - }} - } - - FermOpTemplateInstantiate(DomainWallEOFAFermion); - GparityFermOpTemplateInstantiate(DomainWallEOFAFermion); - -}} diff --git a/Grid/qcd/action/fermion/DomainWallEOFAFermion.h b/Grid/qcd/action/fermion/DomainWallEOFAFermion.h index 5362cda8..a2d0e733 100644 --- a/Grid/qcd/action/fermion/DomainWallEOFAFermion.h +++ b/Grid/qcd/action/fermion/DomainWallEOFAFermion.h @@ -26,90 +26,65 @@ with this program; if not, write to the Free Software Foundation, Inc., See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ -/* END LEGAL */ -#ifndef GRID_QCD_DOMAIN_WALL_EOFA_FERMION_H -#define GRID_QCD_DOMAIN_WALL_EOFA_FERMION_H + /* END LEGAL */ +#pragma once #include -namespace Grid { -namespace QCD { +NAMESPACE_BEGIN(Grid); - template - class DomainWallEOFAFermion : public AbstractEOFAFermion - { - public: - INHERIT_IMPL_TYPES(Impl); +template +class DomainWallEOFAFermion : public AbstractEOFAFermion +{ +public: + INHERIT_IMPL_TYPES(Impl); - public: - // Modified (0,Ls-1) and (Ls-1,0) elements of Mooee - // for red-black preconditioned Shamir EOFA - Coeff_t dm; - Coeff_t dp; +public: + // Modified (0,Ls-1) and (Ls-1,0) elements of Mooee + // for red-black preconditioned Shamir EOFA + Coeff_t dm; + Coeff_t dp; - virtual void Instantiatable(void) {}; + virtual void Instantiatable(void) {}; - // EOFA-specific operations - virtual void Omega (const FermionField& in, FermionField& out, int sign, int dag); - virtual void Dtilde (const FermionField& in, FermionField& out); - virtual void DtildeInv (const FermionField& in, FermionField& out); + // EOFA-specific operations + virtual void Omega (const FermionField& in, FermionField& out, int sign, int dag); + virtual void Dtilde (const FermionField& in, FermionField& out); + virtual void DtildeInv (const FermionField& in, FermionField& out); - // override multiply - virtual RealD M (const FermionField& in, FermionField& out); - virtual RealD Mdag (const FermionField& in, FermionField& out); + // override multiply + virtual RealD M (const FermionField& in, FermionField& out); + virtual RealD Mdag (const FermionField& in, FermionField& out); - // half checkerboard operations - virtual void Mooee (const FermionField& in, FermionField& out); - virtual void MooeeDag (const FermionField& in, FermionField& out); - virtual void MooeeInv (const FermionField& in, FermionField& out); - virtual void MooeeInvDag(const FermionField& in, FermionField& out); + // half checkerboard operations + virtual void Mooee (const FermionField& in, FermionField& out); + virtual void MooeeDag (const FermionField& in, FermionField& out); + virtual void MooeeInv (const FermionField& in, FermionField& out); + virtual void MooeeInvDag(const FermionField& in, FermionField& out); - virtual void M5D (const FermionField& psi, FermionField& chi); - virtual void M5Ddag (const FermionField& psi, FermionField& chi); + virtual void M5D (const FermionField& psi, FermionField& chi); + virtual void M5Ddag (const FermionField& psi, FermionField& chi); - ///////////////////////////////////////////////////// - // Instantiate different versions depending on Impl - ///////////////////////////////////////////////////// - void M5D(const FermionField& psi, const FermionField& phi, FermionField& chi, - std::vector& lower, std::vector& diag, std::vector& upper); + ///////////////////////////////////////////////////// + // Instantiate different versions depending on Impl + ///////////////////////////////////////////////////// + void M5D(const FermionField& psi, const FermionField& phi, FermionField& chi, + Vector& lower, Vector& diag, Vector& upper); - void M5Ddag(const FermionField& psi, const FermionField& phi, FermionField& chi, - std::vector& lower, std::vector& diag, std::vector& upper); + void M5Ddag(const FermionField& psi, const FermionField& phi, FermionField& chi, + Vector& lower, Vector& diag, Vector& upper); - void MooeeInternal(const FermionField& in, FermionField& out, int dag, int inv); + virtual void RefreshShiftCoefficients(RealD new_shift); - void MooeeInternalCompute(int dag, int inv, Vector>& Matp, Vector>& Matm); + // Constructors + DomainWallEOFAFermion(GaugeField& _Umu, GridCartesian& FiveDimGrid, GridRedBlackCartesian& FiveDimRedBlackGrid, + GridCartesian& FourDimGrid, GridRedBlackCartesian& FourDimRedBlackGrid, + RealD _mq1, RealD _mq2, RealD _mq3, RealD _shift, int pm, + RealD _M5, const ImplParams& p=ImplParams()); - void MooeeInternalAsm(const FermionField& in, FermionField& out, int LLs, int site, - Vector>& Matp, Vector>& Matm); +protected: + void SetCoefficientsInternal(RealD zolo_hi, Vector& gamma, RealD b, RealD c); +}; - void MooeeInternalZAsm(const FermionField& in, FermionField& out, int LLs, int site, - Vector>& Matp, Vector>& Matm); +NAMESPACE_END(Grid); - virtual void RefreshShiftCoefficients(RealD new_shift); - - // Constructors - DomainWallEOFAFermion(GaugeField& _Umu, GridCartesian& FiveDimGrid, GridRedBlackCartesian& FiveDimRedBlackGrid, - GridCartesian& FourDimGrid, GridRedBlackCartesian& FourDimRedBlackGrid, - RealD _mq1, RealD _mq2, RealD _mq3, RealD _shift, int pm, - RealD _M5, const ImplParams& p=ImplParams()); - - protected: - void SetCoefficientsInternal(RealD zolo_hi, std::vector& gamma, RealD b, RealD c); - }; -}} - -#define INSTANTIATE_DPERP_DWF_EOFA(A)\ -template void DomainWallEOFAFermion::M5D(const FermionField& psi, const FermionField& phi, FermionField& chi, \ - std::vector& lower, std::vector& diag, std::vector& upper); \ -template void DomainWallEOFAFermion::M5Ddag(const FermionField& psi, const FermionField& phi, FermionField& chi, \ - std::vector& lower, std::vector& diag, std::vector& upper); \ -template void DomainWallEOFAFermion::MooeeInv(const FermionField& psi, FermionField& chi); \ -template void DomainWallEOFAFermion::MooeeInvDag(const FermionField& psi, FermionField& chi); - -#undef DOMAIN_WALL_EOFA_DPERP_DENSE -#define DOMAIN_WALL_EOFA_DPERP_CACHE -#undef DOMAIN_WALL_EOFA_DPERP_LINALG -#define DOMAIN_WALL_EOFA_DPERP_VEC - -#endif diff --git a/Grid/qcd/action/fermion/DomainWallEOFAFermioncache.cc b/Grid/qcd/action/fermion/DomainWallEOFAFermioncache.cc deleted file mode 100644 index 0b214d31..00000000 --- a/Grid/qcd/action/fermion/DomainWallEOFAFermioncache.cc +++ /dev/null @@ -1,248 +0,0 @@ -/************************************************************************************* - -Grid physics library, www.github.com/paboyle/Grid - -Source file: ./lib/qcd/action/fermion/DomainWallEOFAFermioncache.cc - -Copyright (C) 2017 - -Author: Peter Boyle -Author: Peter Boyle -Author: Peter Boyle -Author: paboyle -Author: David Murphy - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -See the full license in the file "LICENSE" in the top level distribution directory -*************************************************************************************/ -/* END LEGAL */ - -#include -#include - -namespace Grid { -namespace QCD { - - // FIXME -- make a version of these routines with site loop outermost for cache reuse. - - // Pminus fowards - // Pplus backwards.. - template - void DomainWallEOFAFermion::M5D(const FermionField& psi, const FermionField& phi, - FermionField& chi, std::vector& lower, std::vector& diag, std::vector& upper) - { - int Ls = this->Ls; - GridBase* grid = psi._grid; - - assert(phi.checkerboard == psi.checkerboard); - chi.checkerboard = psi.checkerboard; - // Flops = 6.0*(Nc*Ns) *Ls*vol - this->M5Dcalls++; - this->M5Dtime -= usecond(); - - parallel_for(int ss=0; ssoSites(); ss+=Ls){ // adds Ls - for(int s=0; sM5Dtime += usecond(); - } - - template - void DomainWallEOFAFermion::M5Ddag(const FermionField& psi, const FermionField& phi, - FermionField& chi, std::vector& lower, std::vector& diag, std::vector& upper) - { - int Ls = this->Ls; - GridBase* grid = psi._grid; - assert(phi.checkerboard == psi.checkerboard); - chi.checkerboard=psi.checkerboard; - - // Flops = 6.0*(Nc*Ns) *Ls*vol - this->M5Dcalls++; - this->M5Dtime -= usecond(); - - parallel_for(int ss=0; ssoSites(); ss+=Ls){ // adds Ls - auto tmp = psi._odata[0]; - for(int s=0; sM5Dtime += usecond(); - } - - template - void DomainWallEOFAFermion::MooeeInv(const FermionField& psi, FermionField& chi) - { - GridBase* grid = psi._grid; - int Ls = this->Ls; - - chi.checkerboard = psi.checkerboard; - - this->MooeeInvCalls++; - this->MooeeInvTime -= usecond(); - - parallel_for(int ss=0; ssoSites(); ss+=Ls){ // adds Ls - - auto tmp1 = psi._odata[0]; - auto tmp2 = psi._odata[0]; - - // flops = 12*2*Ls + 12*2*Ls + 3*12*Ls + 12*2*Ls = 12*Ls * (9) = 108*Ls flops - // Apply (L^{\prime})^{-1} - chi[ss] = psi[ss]; // chi[0]=psi[0] - for(int s=1; slee[s-1]*tmp1; - } - - // L_m^{-1} - for(int s=0; sleem[s]*tmp1; - } - - // U_m^{-1} D^{-1} - for(int s=0; sdee[s])*chi[ss+s] - (this->ueem[s]/this->dee[Ls])*tmp1; - } - spProj5m(tmp2, chi[ss+Ls-1]); - chi[ss+Ls-1] = (1.0/this->dee[Ls])*tmp1 + (1.0/this->dee[Ls-1])*tmp2; - - // Apply U^{-1} - for(int s=Ls-2; s>=0; s--){ - spProj5m(tmp1, chi[ss+s+1]); - chi[ss+s] = chi[ss+s] - this->uee[s]*tmp1; - } - } - - this->MooeeInvTime += usecond(); - } - - template - void DomainWallEOFAFermion::MooeeInvDag(const FermionField& psi, FermionField& chi) - { - GridBase* grid = psi._grid; - int Ls = this->Ls; - - assert(psi.checkerboard == psi.checkerboard); - chi.checkerboard = psi.checkerboard; - - std::vector ueec(Ls); - std::vector deec(Ls+1); - std::vector leec(Ls); - std::vector ueemc(Ls); - std::vector leemc(Ls); - - for(int s=0; suee[s]); - deec[s] = conjugate(this->dee[s]); - leec[s] = conjugate(this->lee[s]); - ueemc[s] = conjugate(this->ueem[s]); - leemc[s] = conjugate(this->leem[s]); - } - deec[Ls] = conjugate(this->dee[Ls]); - - this->MooeeInvCalls++; - this->MooeeInvTime -= usecond(); - - parallel_for(int ss=0; ssoSites(); ss+=Ls){ // adds Ls - - auto tmp1 = psi._odata[0]; - auto tmp2 = psi._odata[0]; - - // Apply (U^{\prime})^{-dagger} - chi[ss] = psi[ss]; - for(int s=1; s=0; s--){ - spProj5p(tmp1, chi[ss+s+1]); - chi[ss+s] = chi[ss+s] - leec[s]*tmp1; - } - } - - this->MooeeInvTime += usecond(); - } - - #ifdef DOMAIN_WALL_EOFA_DPERP_CACHE - - INSTANTIATE_DPERP_DWF_EOFA(WilsonImplF); - INSTANTIATE_DPERP_DWF_EOFA(WilsonImplD); - INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplF); - INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplD); - INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplF); - INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplD); - - INSTANTIATE_DPERP_DWF_EOFA(WilsonImplFH); - INSTANTIATE_DPERP_DWF_EOFA(WilsonImplDF); - INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplFH); - INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplDF); - INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplFH); - INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplDF); - - #endif - -}} diff --git a/Grid/qcd/action/fermion/DomainWallEOFAFermiondense.cc b/Grid/qcd/action/fermion/DomainWallEOFAFermiondense.cc deleted file mode 100644 index c27074d9..00000000 --- a/Grid/qcd/action/fermion/DomainWallEOFAFermiondense.cc +++ /dev/null @@ -1,159 +0,0 @@ -/************************************************************************************* - -Grid physics library, www.github.com/paboyle/Grid - -Source file: ./lib/qcd/action/fermion/DomainWallEOFAFermiondense.cc - -Copyright (C) 2017 - -Author: Peter Boyle -Author: Peter Boyle -Author: Peter Boyle -Author: paboyle -Author: David Murphy - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -See the full license in the file "LICENSE" in the top level distribution directory -*************************************************************************************/ -/* END LEGAL */ - -#include -#include -#include - -namespace Grid { -namespace QCD { - - /* - * Dense matrix versions of routines - */ - template - void DomainWallEOFAFermion::MooeeInvDag(const FermionField& psi, FermionField& chi) - { - this->MooeeInternal(psi, chi, DaggerYes, InverseYes); - } - - template - void DomainWallEOFAFermion::MooeeInv(const FermionField& psi, FermionField& chi) - { - this->MooeeInternal(psi, chi, DaggerNo, InverseYes); - } - - template - void DomainWallEOFAFermion::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv) - { - int Ls = this->Ls; - int LLs = psi._grid->_rdimensions[0]; - int vol = psi._grid->oSites()/LLs; - - chi.checkerboard = psi.checkerboard; - - assert(Ls==LLs); - - Eigen::MatrixXd Pplus = Eigen::MatrixXd::Zero(Ls,Ls); - Eigen::MatrixXd Pminus = Eigen::MatrixXd::Zero(Ls,Ls); - - for(int s=0;sbee[s]; - Pminus(s,s) = this->bee[s]; - } - - for(int s=0; scee[s]; - } - - for(int s=0; scee[s+1]; - } - - Pplus (0,Ls-1) = this->dp; - Pminus(Ls-1,0) = this->dm; - - Eigen::MatrixXd PplusMat ; - Eigen::MatrixXd PminusMat; - - if(inv) { - PplusMat = Pplus.inverse(); - PminusMat = Pminus.inverse(); - } else { - PplusMat = Pplus; - PminusMat = Pminus; - } - - if(dag){ - PplusMat.adjointInPlace(); - PminusMat.adjointInPlace(); - } - - // For the non-vectorised s-direction this is simple - - for(auto site=0; site::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv); - template void DomainWallEOFAFermion::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv); - template void DomainWallEOFAFermion::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv); - template void DomainWallEOFAFermion::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv); - template void DomainWallEOFAFermion::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv); - template void DomainWallEOFAFermion::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv); - - INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplFH); - INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplDF); - INSTANTIATE_DPERP_DWF_EOFA(WilsonImplFH); - INSTANTIATE_DPERP_DWF_EOFA(WilsonImplDF); - INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplFH); - INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplDF); - - template void DomainWallEOFAFermion::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv); - template void DomainWallEOFAFermion::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv); - template void DomainWallEOFAFermion::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv); - template void DomainWallEOFAFermion::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv); - template void DomainWallEOFAFermion::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv); - template void DomainWallEOFAFermion::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv); - - #endif - -}} diff --git a/Grid/qcd/action/fermion/DomainWallEOFAFermionssp.cc b/Grid/qcd/action/fermion/DomainWallEOFAFermionssp.cc deleted file mode 100644 index 80a4bf09..00000000 --- a/Grid/qcd/action/fermion/DomainWallEOFAFermionssp.cc +++ /dev/null @@ -1,168 +0,0 @@ -/************************************************************************************* - -Grid physics library, www.github.com/paboyle/Grid - -Source file: ./lib/qcd/action/fermion/DomainWallEOFAFermionssp.cc - -Copyright (C) 2017 - -Author: Peter Boyle -Author: Peter Boyle -Author: Peter Boyle -Author: paboyle -Author: David Murphy - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -See the full license in the file "LICENSE" in the top level distribution directory -*************************************************************************************/ -/* END LEGAL */ - -#include -#include - -namespace Grid { -namespace QCD { - - // FIXME -- make a version of these routines with site loop outermost for cache reuse. - // Pminus fowards - // Pplus backwards - template - void DomainWallEOFAFermion::M5D(const FermionField& psi, const FermionField& phi, - FermionField& chi, std::vector& lower, std::vector& diag, std::vector& upper) - { - Coeff_t one(1.0); - int Ls = this->Ls; - for(int s=0; s - void DomainWallEOFAFermion::M5Ddag(const FermionField& psi, const FermionField& phi, - FermionField& chi, std::vector& lower, std::vector& diag, std::vector& upper) - { - Coeff_t one(1.0); - int Ls = this->Ls; - for(int s=0; s - void DomainWallEOFAFermion::MooeeInv(const FermionField& psi, FermionField& chi) - { - Coeff_t one(1.0); - Coeff_t czero(0.0); - chi.checkerboard = psi.checkerboard; - int Ls = this->Ls; - - FermionField tmp(psi._grid); - - // Apply (L^{\prime})^{-1} - axpby_ssp(chi, one, psi, czero, psi, 0, 0); // chi[0]=psi[0] - for(int s=1; slee[s-1], chi, s, s-1);// recursion Psi[s] -lee P_+ chi[s-1] - } - - // L_m^{-1} - for(int s=0; sleem[s], chi, Ls-1, s); - } - - // U_m^{-1} D^{-1} - for(int s=0; sdee[s], chi, -this->ueem[s]/this->dee[Ls], chi, s, Ls-1); - } - axpby_ssp_pminus(tmp, czero, chi, one/this->dee[Ls-1], chi, Ls-1, Ls-1); - axpby_ssp_pplus(chi, one, tmp, one/this->dee[Ls], chi, Ls-1, Ls-1); - - // Apply U^{-1} - for(int s=Ls-2; s>=0; s--){ - axpby_ssp_pminus(chi, one, chi, -this->uee[s], chi, s, s+1); // chi[Ls] - } - } - - template - void DomainWallEOFAFermion::MooeeInvDag(const FermionField& psi, FermionField& chi) - { - Coeff_t one(1.0); - Coeff_t czero(0.0); - chi.checkerboard = psi.checkerboard; - int Ls = this->Ls; - - FermionField tmp(psi._grid); - - // Apply (U^{\prime})^{-dagger} - axpby_ssp(chi, one, psi, czero, psi, 0, 0); // chi[0]=psi[0] - for(int s=1; suee[s-1]), chi, s, s-1); - } - - // U_m^{-\dagger} - for(int s=0; sueem[s]), chi, Ls-1, s); - } - - // L_m^{-\dagger} D^{-dagger} - for(int s=0; sdee[s]), chi, -conjugate(this->leem[s]/this->dee[Ls-1]), chi, s, Ls-1); - } - axpby_ssp_pminus(tmp, czero, chi, one/conjugate(this->dee[Ls-1]), chi, Ls-1, Ls-1); - axpby_ssp_pplus(chi, one, tmp, one/conjugate(this->dee[Ls]), chi, Ls-1, Ls-1); - - // Apply L^{-dagger} - for(int s=Ls-2; s>=0; s--){ - axpby_ssp_pplus(chi, one, chi, -conjugate(this->lee[s]), chi, s, s+1); // chi[Ls] - } - } - - #ifdef DOMAIN_WALL_EOFA_DPERP_LINALG - - INSTANTIATE_DPERP_DWF_EOFA(WilsonImplF); - INSTANTIATE_DPERP_DWF_EOFA(WilsonImplD); - INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplF); - INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplD); - INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplF); - INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplD); - - INSTANTIATE_DPERP_DWF_EOFA(WilsonImplFH); - INSTANTIATE_DPERP_DWF_EOFA(WilsonImplDF); - INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplFH); - INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplDF); - INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplFH); - INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplDF); - - #endif - -}} diff --git a/Grid/qcd/action/fermion/DomainWallEOFAFermionvec.cc b/Grid/qcd/action/fermion/DomainWallEOFAFermionvec.cc deleted file mode 100644 index c95172a5..00000000 --- a/Grid/qcd/action/fermion/DomainWallEOFAFermionvec.cc +++ /dev/null @@ -1,605 +0,0 @@ -/************************************************************************************* - -Grid physics library, www.github.com/paboyle/Grid - -Source file: ./lib/qcd/action/fermion/DomainWallEOFAFermionvec.cc - -Copyright (C) 2017 - -Author: Peter Boyle -Author: Peter Boyle -Author: Peter Boyle -Author: paboyle -Author: David Murphy - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -See the full license in the file "LICENSE" in the top level distribution directory -*************************************************************************************/ -/* END LEGAL */ - -#include -#include - -namespace Grid { -namespace QCD { - - /* - * Dense matrix versions of routines - */ - template - void DomainWallEOFAFermion::MooeeInvDag(const FermionField& psi, FermionField& chi) - { - this->MooeeInternal(psi, chi, DaggerYes, InverseYes); - } - - template - void DomainWallEOFAFermion::MooeeInv(const FermionField& psi, FermionField& chi) - { - this->MooeeInternal(psi, chi, DaggerNo, InverseYes); - } - - template - void DomainWallEOFAFermion::M5D(const FermionField& psi, const FermionField& phi, - FermionField& chi, std::vector& lower, std::vector& diag, std::vector& upper) - { - GridBase* grid = psi._grid; - int Ls = this->Ls; - int LLs = grid->_rdimensions[0]; - const int nsimd = Simd::Nsimd(); - - Vector > u(LLs); - Vector > l(LLs); - Vector > d(LLs); - - assert(Ls/LLs == nsimd); - assert(phi.checkerboard == psi.checkerboard); - - chi.checkerboard = psi.checkerboard; - - // just directly address via type pun - typedef typename Simd::scalar_type scalar_type; - scalar_type* u_p = (scalar_type*) &u[0]; - scalar_type* l_p = (scalar_type*) &l[0]; - scalar_type* d_p = (scalar_type*) &d[0]; - - for(int o=0;oM5Dcalls++; - this->M5Dtime -= usecond(); - - assert(Nc == 3); - - parallel_for(int ss=0; ssoSites(); ss+=LLs){ // adds LLs - - #if 0 - - alignas(64) SiteHalfSpinor hp; - alignas(64) SiteHalfSpinor hm; - alignas(64) SiteSpinor fp; - alignas(64) SiteSpinor fm; - - for(int v=0; v= v){ rotate(hm, hm, nsimd-1); } - - hp = 0.5*hp; - hm = 0.5*hm; - - spRecon5m(fp, hp); - spRecon5p(fm, hm); - - chi[ss+v] = d[v]*phi[ss+v]; - chi[ss+v] = chi[ss+v] + u[v]*fp; - chi[ss+v] = chi[ss+v] + l[v]*fm; - - } - - #else - - for(int v=0; v(hp_00.v); - hp_01.v = Optimization::Rotate::tRotate<2>(hp_01.v); - hp_02.v = Optimization::Rotate::tRotate<2>(hp_02.v); - hp_10.v = Optimization::Rotate::tRotate<2>(hp_10.v); - hp_11.v = Optimization::Rotate::tRotate<2>(hp_11.v); - hp_12.v = Optimization::Rotate::tRotate<2>(hp_12.v); - } - - if(vm >= v){ - hm_00.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_00.v); - hm_01.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_01.v); - hm_02.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_02.v); - hm_10.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_10.v); - hm_11.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_11.v); - hm_12.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_12.v); - } - - // Can force these to real arithmetic and save 2x. - Simd p_00 = switcheroo::mult(d[v]()()(), phi[ss+v]()(0)(0)) + switcheroo::mult(l[v]()()(), hm_00); - Simd p_01 = switcheroo::mult(d[v]()()(), phi[ss+v]()(0)(1)) + switcheroo::mult(l[v]()()(), hm_01); - Simd p_02 = switcheroo::mult(d[v]()()(), phi[ss+v]()(0)(2)) + switcheroo::mult(l[v]()()(), hm_02); - Simd p_10 = switcheroo::mult(d[v]()()(), phi[ss+v]()(1)(0)) + switcheroo::mult(l[v]()()(), hm_10); - Simd p_11 = switcheroo::mult(d[v]()()(), phi[ss+v]()(1)(1)) + switcheroo::mult(l[v]()()(), hm_11); - Simd p_12 = switcheroo::mult(d[v]()()(), phi[ss+v]()(1)(2)) + switcheroo::mult(l[v]()()(), hm_12); - Simd p_20 = switcheroo::mult(d[v]()()(), phi[ss+v]()(2)(0)) + switcheroo::mult(u[v]()()(), hp_00); - Simd p_21 = switcheroo::mult(d[v]()()(), phi[ss+v]()(2)(1)) + switcheroo::mult(u[v]()()(), hp_01); - Simd p_22 = switcheroo::mult(d[v]()()(), phi[ss+v]()(2)(2)) + switcheroo::mult(u[v]()()(), hp_02); - Simd p_30 = switcheroo::mult(d[v]()()(), phi[ss+v]()(3)(0)) + switcheroo::mult(u[v]()()(), hp_10); - Simd p_31 = switcheroo::mult(d[v]()()(), phi[ss+v]()(3)(1)) + switcheroo::mult(u[v]()()(), hp_11); - Simd p_32 = switcheroo::mult(d[v]()()(), phi[ss+v]()(3)(2)) + switcheroo::mult(u[v]()()(), hp_12); - - vstream(chi[ss+v]()(0)(0), p_00); - vstream(chi[ss+v]()(0)(1), p_01); - vstream(chi[ss+v]()(0)(2), p_02); - vstream(chi[ss+v]()(1)(0), p_10); - vstream(chi[ss+v]()(1)(1), p_11); - vstream(chi[ss+v]()(1)(2), p_12); - vstream(chi[ss+v]()(2)(0), p_20); - vstream(chi[ss+v]()(2)(1), p_21); - vstream(chi[ss+v]()(2)(2), p_22); - vstream(chi[ss+v]()(3)(0), p_30); - vstream(chi[ss+v]()(3)(1), p_31); - vstream(chi[ss+v]()(3)(2), p_32); - } - - #endif - } - - this->M5Dtime += usecond(); - } - - template - void DomainWallEOFAFermion::M5Ddag(const FermionField& psi, const FermionField& phi, - FermionField& chi, std::vector& lower, std::vector& diag, std::vector& upper) - { - GridBase* grid = psi._grid; - int Ls = this->Ls; - int LLs = grid->_rdimensions[0]; - int nsimd = Simd::Nsimd(); - - Vector > u(LLs); - Vector > l(LLs); - Vector > d(LLs); - - assert(Ls/LLs == nsimd); - assert(phi.checkerboard == psi.checkerboard); - - chi.checkerboard = psi.checkerboard; - - // just directly address via type pun - typedef typename Simd::scalar_type scalar_type; - scalar_type* u_p = (scalar_type*) &u[0]; - scalar_type* l_p = (scalar_type*) &l[0]; - scalar_type* d_p = (scalar_type*) &d[0]; - - for(int o=0; oM5Dcalls++; - this->M5Dtime -= usecond(); - - parallel_for(int ss=0; ssoSites(); ss+=LLs){ // adds LLs - - #if 0 - - alignas(64) SiteHalfSpinor hp; - alignas(64) SiteHalfSpinor hm; - alignas(64) SiteSpinor fp; - alignas(64) SiteSpinor fm; - - for(int v=0; v= v){ rotate(hm, hm, nsimd-1); } - - hp = hp*0.5; - hm = hm*0.5; - spRecon5p(fp, hp); - spRecon5m(fm, hm); - - chi[ss+v] = d[v]*phi[ss+v]+u[v]*fp; - chi[ss+v] = chi[ss+v] +l[v]*fm; - } - - #else - - for(int v=0; v(hp_00.v); - hp_01.v = Optimization::Rotate::tRotate<2>(hp_01.v); - hp_02.v = Optimization::Rotate::tRotate<2>(hp_02.v); - hp_10.v = Optimization::Rotate::tRotate<2>(hp_10.v); - hp_11.v = Optimization::Rotate::tRotate<2>(hp_11.v); - hp_12.v = Optimization::Rotate::tRotate<2>(hp_12.v); - } - - if(vm >= v){ - hm_00.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_00.v); - hm_01.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_01.v); - hm_02.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_02.v); - hm_10.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_10.v); - hm_11.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_11.v); - hm_12.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_12.v); - } - - Simd p_00 = switcheroo::mult(d[v]()()(), phi[ss+v]()(0)(0)) + switcheroo::mult(u[v]()()(), hp_00); - Simd p_01 = switcheroo::mult(d[v]()()(), phi[ss+v]()(0)(1)) + switcheroo::mult(u[v]()()(), hp_01); - Simd p_02 = switcheroo::mult(d[v]()()(), phi[ss+v]()(0)(2)) + switcheroo::mult(u[v]()()(), hp_02); - Simd p_10 = switcheroo::mult(d[v]()()(), phi[ss+v]()(1)(0)) + switcheroo::mult(u[v]()()(), hp_10); - Simd p_11 = switcheroo::mult(d[v]()()(), phi[ss+v]()(1)(1)) + switcheroo::mult(u[v]()()(), hp_11); - Simd p_12 = switcheroo::mult(d[v]()()(), phi[ss+v]()(1)(2)) + switcheroo::mult(u[v]()()(), hp_12); - Simd p_20 = switcheroo::mult(d[v]()()(), phi[ss+v]()(2)(0)) + switcheroo::mult(l[v]()()(), hm_00); - Simd p_21 = switcheroo::mult(d[v]()()(), phi[ss+v]()(2)(1)) + switcheroo::mult(l[v]()()(), hm_01); - Simd p_22 = switcheroo::mult(d[v]()()(), phi[ss+v]()(2)(2)) + switcheroo::mult(l[v]()()(), hm_02); - Simd p_30 = switcheroo::mult(d[v]()()(), phi[ss+v]()(3)(0)) + switcheroo::mult(l[v]()()(), hm_10); - Simd p_31 = switcheroo::mult(d[v]()()(), phi[ss+v]()(3)(1)) + switcheroo::mult(l[v]()()(), hm_11); - Simd p_32 = switcheroo::mult(d[v]()()(), phi[ss+v]()(3)(2)) + switcheroo::mult(l[v]()()(), hm_12); - - vstream(chi[ss+v]()(0)(0), p_00); - vstream(chi[ss+v]()(0)(1), p_01); - vstream(chi[ss+v]()(0)(2), p_02); - vstream(chi[ss+v]()(1)(0), p_10); - vstream(chi[ss+v]()(1)(1), p_11); - vstream(chi[ss+v]()(1)(2), p_12); - vstream(chi[ss+v]()(2)(0), p_20); - vstream(chi[ss+v]()(2)(1), p_21); - vstream(chi[ss+v]()(2)(2), p_22); - vstream(chi[ss+v]()(3)(0), p_30); - vstream(chi[ss+v]()(3)(1), p_31); - vstream(chi[ss+v]()(3)(2), p_32); - } - #endif - - } - - this->M5Dtime += usecond(); - } - - #ifdef AVX512 - #include - #include - #include - #endif - - template - void DomainWallEOFAFermion::MooeeInternalAsm(const FermionField& psi, FermionField& chi, - int LLs, int site, Vector >& Matp, Vector >& Matm) - { - #ifndef AVX512 - { - SiteHalfSpinor BcastP; - SiteHalfSpinor BcastM; - SiteHalfSpinor SiteChiP; - SiteHalfSpinor SiteChiM; - - // Ls*Ls * 2 * 12 * vol flops - for(int s1=0; s1); - for(int s1=0; s1 - void DomainWallEOFAFermion::MooeeInternalZAsm(const FermionField& psi, FermionField& chi, - int LLs, int site, Vector >& Matp, Vector >& Matm) - { - std::cout << "Error: zMobius not implemented for EOFA" << std::endl; - exit(-1); - }; - - template - void DomainWallEOFAFermion::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv) - { - int Ls = this->Ls; - int LLs = psi._grid->_rdimensions[0]; - int vol = psi._grid->oSites()/LLs; - - chi.checkerboard = psi.checkerboard; - - Vector > Matp; - Vector > Matm; - Vector > *_Matp; - Vector > *_Matm; - - // MooeeInternalCompute(dag,inv,Matp,Matm); - if(inv && dag){ - _Matp = &this->MatpInvDag; - _Matm = &this->MatmInvDag; - } - - if(inv && (!dag)){ - _Matp = &this->MatpInv; - _Matm = &this->MatmInv; - } - - if(!inv){ - MooeeInternalCompute(dag, inv, Matp, Matm); - _Matp = &Matp; - _Matm = &Matm; - } - - assert(_Matp->size() == Ls*LLs); - - this->MooeeInvCalls++; - this->MooeeInvTime -= usecond(); - - if(switcheroo::iscomplex()){ - parallel_for(auto site=0; siteMooeeInvTime += usecond(); - } - - #ifdef DOMAIN_WALL_EOFA_DPERP_VEC - - INSTANTIATE_DPERP_DWF_EOFA(DomainWallVec5dImplD); - INSTANTIATE_DPERP_DWF_EOFA(DomainWallVec5dImplF); - INSTANTIATE_DPERP_DWF_EOFA(ZDomainWallVec5dImplD); - INSTANTIATE_DPERP_DWF_EOFA(ZDomainWallVec5dImplF); - - INSTANTIATE_DPERP_DWF_EOFA(DomainWallVec5dImplDF); - INSTANTIATE_DPERP_DWF_EOFA(DomainWallVec5dImplFH); - INSTANTIATE_DPERP_DWF_EOFA(ZDomainWallVec5dImplDF); - INSTANTIATE_DPERP_DWF_EOFA(ZDomainWallVec5dImplFH); - - template void DomainWallEOFAFermion::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv); - template void DomainWallEOFAFermion::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv); - template void DomainWallEOFAFermion::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv); - template void DomainWallEOFAFermion::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv); - - template void DomainWallEOFAFermion::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv); - template void DomainWallEOFAFermion::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv); - template void DomainWallEOFAFermion::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv); - template void DomainWallEOFAFermion::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv); - - #endif - -}} diff --git a/Grid/qcd/action/fermion/DomainWallFermion.h b/Grid/qcd/action/fermion/DomainWallFermion.h index b6824dc4..e32e5917 100644 --- a/Grid/qcd/action/fermion/DomainWallFermion.h +++ b/Grid/qcd/action/fermion/DomainWallFermion.h @@ -1,4 +1,4 @@ - /************************************************************************************* +/************************************************************************************* Grid physics library, www.github.com/paboyle/Grid @@ -25,34 +25,33 @@ Author: Vera Guelpers 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ +*************************************************************************************/ +/* END LEGAL */ #ifndef GRID_QCD_DOMAIN_WALL_FERMION_H #define GRID_QCD_DOMAIN_WALL_FERMION_H #include -namespace Grid { +NAMESPACE_BEGIN(Grid); - namespace QCD { +template +class DomainWallFermion : public CayleyFermion5D +{ +public: + INHERIT_IMPL_TYPES(Impl); +public: - template - class DomainWallFermion : public CayleyFermion5D - { - public: - INHERIT_IMPL_TYPES(Impl); - public: + void FreePropagator(const FermionField &in,FermionField &out,RealD mass,std::vector boundary, std::vector twist, bool fiveD) { + FermionField in_k(in.Grid()); + FermionField prop_k(in.Grid()); - void FreePropagator(const FermionField &in,FermionField &out,RealD mass,std::vector boundary, std::vector twist, bool fiveD) { - FermionField in_k(in._grid); - FermionField prop_k(in._grid); - - FFT theFFT((GridCartesian *) in._grid); + FFT theFFT((GridCartesian *) in.Grid()); //phase for boundary condition - ComplexField coor(in._grid); - ComplexField ph(in._grid); ph = zero; - FermionField in_buf(in._grid); in_buf = zero; + ComplexField coor(in.Grid()); + ComplexField ph(in.Grid()); ph = Zero(); + FermionField in_buf(in.Grid()); in_buf = Zero(); + typedef typename Simd::scalar_type Scalar; Scalar ci(0.0,1.0); assert(twist.size() == Nd);//check that twist is Nd assert(boundary.size() == Nd);//check that boundary conditions is Nd @@ -63,13 +62,12 @@ namespace Grid { // Shift coordinate lattice index by 1 to account for 5th dimension. LatticeCoordinate(coor, nu + shift); double boundary_phase = ::acos(real(boundary[nu])); - ph = ph + boundary_phase*coor*((1./(in._grid->_fdimensions[nu+shift]))); + ph = ph + boundary_phase*coor*((1./(in.Grid()->_fdimensions[nu+shift]))); //momenta for propagator shifted by twist+boundary twist[nu] = twist[nu] + boundary_phase/((2.0*M_PI)); } in_buf = exp(ci*ph*(-1.0))*in; - if(fiveD){//FFT only on temporal and spatial dimensions std::vector mask(Nd+1,1); mask[0] = 0; theFFT.FFT_dim_mask(in_k,in_buf,mask,FFT::forward); @@ -82,7 +80,7 @@ namespace Grid { theFFT.FFT_all_dim(out,prop_k,FFT::backward); } //phase for boundary condition - out = out * exp(ci*ph); + out = out * exp(Scalar(2.0*M_PI)*ci*ph); }; virtual void FreePropagator(const FermionField &in,FermionField &out,RealD mass,std::vector boundary,std::vector twist) { @@ -105,38 +103,37 @@ namespace Grid { FreePropagator(in,out,mass,boundary,twist,fiveD); }; - virtual void Instantiatable(void) {}; - // Constructors - DomainWallFermion(GaugeField &_Umu, - GridCartesian &FiveDimGrid, - GridRedBlackCartesian &FiveDimRedBlackGrid, - GridCartesian &FourDimGrid, - GridRedBlackCartesian &FourDimRedBlackGrid, - RealD _mass,RealD _M5,const ImplParams &p= ImplParams()) : + virtual void Instantiatable(void) {}; + // Constructors + DomainWallFermion(GaugeField &_Umu, + GridCartesian &FiveDimGrid, + GridRedBlackCartesian &FiveDimRedBlackGrid, + GridCartesian &FourDimGrid, + GridRedBlackCartesian &FourDimRedBlackGrid, + RealD _mass,RealD _M5,const ImplParams &p= ImplParams()) : - CayleyFermion5D(_Umu, - FiveDimGrid, - FiveDimRedBlackGrid, - FourDimGrid, - FourDimRedBlackGrid,_mass,_M5,p) + CayleyFermion5D(_Umu, + FiveDimGrid, + FiveDimRedBlackGrid, + FourDimGrid, + FourDimRedBlackGrid,_mass,_M5,p) - { - RealD eps = 1.0; + { + RealD eps = 1.0; - Approx::zolotarev_data *zdata = Approx::higham(eps,this->Ls);// eps is ignored for higham - assert(zdata->n==this->Ls); + Approx::zolotarev_data *zdata = Approx::higham(eps,this->Ls);// eps is ignored for higham + assert(zdata->n==this->Ls); - std::cout< +class DomainWallVec5dImpl : public PeriodicGaugeImpl< GaugeImplTypes< S,Representation::Dimension> > { +public: + + typedef PeriodicGaugeImpl > Gimpl; + INHERIT_GIMPL_TYPES(Gimpl); + + static const int Dimension = Representation::Dimension; + static const bool isFundamental = Representation::isFundamental; + static const bool LsVectorised=true; + static const int Nhcs = Options::Nhcs; + + typedef typename Options::_Coeff_t Coeff_t; + typedef typename Options::template PrecisionMapper::LowerPrecVector SimdL; + + template using iImplSpinor = iScalar, Ns> >; + template using iImplPropagator = iScalar, Ns> >; + template using iImplHalfSpinor = iScalar, Nhs> >; + template using iImplHalfCommSpinor = iScalar, Nhcs> >; + template using iImplDoubledGaugeField = iVector >, Nds>; + template using iImplGaugeField = iVector >, Nd>; + template using iImplGaugeLink = iScalar > >; + + typedef iImplSpinor SiteSpinor; + typedef iImplPropagator SitePropagator; + typedef iImplHalfSpinor SiteHalfSpinor; + typedef iImplHalfCommSpinor SiteHalfCommSpinor; + typedef Lattice FermionField; + typedef Lattice PropagatorField; + + ///////////////////////////////////////////////// + // Make the doubled gauge field a *scalar* + ///////////////////////////////////////////////// + typedef iImplDoubledGaugeField SiteDoubledGaugeField; // This is a scalar + typedef iImplGaugeField SiteScalarGaugeField; // scalar + typedef iImplGaugeLink SiteScalarGaugeLink; // scalar + typedef Lattice DoubledGaugeField; + + typedef WilsonCompressor Compressor; + typedef WilsonImplParams ImplParams; + typedef WilsonStencil StencilImpl; + typedef typename StencilImpl::View_type StencilView; + + ImplParams Params; + + DomainWallVec5dImpl(const ImplParams &p = ImplParams()) : Params(p){}; + + template + static accelerator_inline void loadLinkElement(Simd ®, ref &memory) + { + vsplat(reg, memory); + } + + template + static accelerator_inline void multLink(_Spinor &phi, const SiteDoubledGaugeField &U, + const _Spinor &chi, int mu, StencilEntry *SE, + StencilView &St) + { +#ifdef GPU_VEC + // Gauge link is scalarised + mult(&phi(), &U(mu), &chi()); +#else + SiteGaugeLink UU; + for (int i = 0; i < Dimension; i++) { + for (int j = 0; j < Dimension; j++) { + vsplat(UU()()(i, j), U(mu)()(i, j)); + } + } + mult(&phi(), &UU(), &chi()); +#endif + } + + inline void DoubleStore(GridBase *GaugeGrid, DoubledGaugeField &Uds,const GaugeField &Umu) + { + SiteScalarGaugeField ScalarUmu; + SiteDoubledGaugeField ScalarUds; + + GaugeLinkField U(Umu.Grid()); + GaugeField Uadj(Umu.Grid()); + for (int mu = 0; mu < Nd; mu++) { + U = PeekIndex(Umu, mu); + U = adj(Cshift(U, mu, -1)); + PokeIndex(Uadj, U, mu); + } + + for (int lidx = 0; lidx < GaugeGrid->lSites(); lidx++) { + Coordinate lcoor; + GaugeGrid->LocalIndexToLocalCoor(lidx, lcoor); + + peekLocalSite(ScalarUmu, Umu, lcoor); + for (int mu = 0; mu < 4; mu++) ScalarUds(mu) = ScalarUmu(mu); + + peekLocalSite(ScalarUmu, Uadj, lcoor); + for (int mu = 0; mu < 4; mu++) ScalarUds(mu + 4) = ScalarUmu(mu); + + pokeLocalSite(ScalarUds, Uds, lcoor); + } + } + + inline void InsertForce4D(GaugeField &mat, FermionField &Btilde,FermionField &A, int mu) + { + assert(0); + } + + inline void outerProductImpl(PropagatorField &mat, const FermionField &Btilde, const FermionField &A){ + assert(0); + } + + inline void TraceSpinImpl(GaugeLinkField &mat, PropagatorField&P) { + assert(0); + } + + inline void extractLinkField(std::vector &mat, DoubledGaugeField &Uds){ + assert(0); + } + + + inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField Ã, int mu) { + + assert(0); + // Following lines to be revised after Peter's addition of half prec + // missing put lane... + /* + typedef decltype(traceIndex(outerProduct(Btilde[0], Atilde[0]))) result_type; + unsigned int LLs = Btilde.Grid()->_rdimensions[0]; + conformable(Atilde.Grid(),Btilde.Grid()); + GridBase* grid = mat.Grid(); + GridBase* Bgrid = Btilde.Grid(); + unsigned int dimU = grid->Nd(); + unsigned int dimF = Bgrid->Nd(); + GaugeLinkField tmp(grid); + tmp = Zero(); + + // FIXME + // Current implementation works, thread safe, probably suboptimal + // Passing through the local coordinate for grid transformation + // the force grid is in general very different from the Ls vectorized grid + + for (int so = 0; so < grid->oSites(); so++) { + std::vector vres(Bgrid->Nsimd()); + std::vector ocoor; grid->oCoorFromOindex(ocoor,so); + for (int si = 0; si < tmp.Grid()->iSites(); si++){ + typename result_type::scalar_object scalar_object; scalar_object = Zero(); + std::vector local_coor; + std::vector icoor; grid->iCoorFromIindex(icoor,si); + grid->InOutCoorToLocalCoor(ocoor, icoor, local_coor); + for (int s = 0; s < LLs; s++) { + std::vector slocal_coor(dimF); + slocal_coor[0] = s; + for (int s4d = 1; s4d< dimF; s4d++) slocal_coor[s4d] = local_coor[s4d-1]; + int sF = Bgrid->oIndexReduced(slocal_coor); + assert(sF < Bgrid->oSites()); + + extract(traceIndex(outerProduct(Btilde[sF], Atilde[sF])), vres); + // sum across the 5d dimension + for (auto v : vres) scalar_object += v; + } + tmp[so].putlane(scalar_object, si); + } + } + PokeIndex(mat, tmp, mu); + */ + } +}; +typedef DomainWallVec5dImpl DomainWallVec5dImplR; // Real.. whichever prec +typedef DomainWallVec5dImpl DomainWallVec5dImplF; // Float +typedef DomainWallVec5dImpl DomainWallVec5dImplD; // Double + +typedef DomainWallVec5dImpl DomainWallVec5dImplRL; // Real.. whichever prec +typedef DomainWallVec5dImpl DomainWallVec5dImplFH; // Float +typedef DomainWallVec5dImpl DomainWallVec5dImplDF; // Double + +typedef DomainWallVec5dImpl ZDomainWallVec5dImplR; // Real.. whichever prec +typedef DomainWallVec5dImpl ZDomainWallVec5dImplF; // Float +typedef DomainWallVec5dImpl ZDomainWallVec5dImplD; // Double + +typedef DomainWallVec5dImpl ZDomainWallVec5dImplRL; // Real.. whichever prec +typedef DomainWallVec5dImpl ZDomainWallVec5dImplFH; // Float +typedef DomainWallVec5dImpl ZDomainWallVec5dImplDF; // Double + +NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/Fermion.h b/Grid/qcd/action/fermion/Fermion.h index 77a4681f..fb6f18bb 100644 --- a/Grid/qcd/action/fermion/Fermion.h +++ b/Grid/qcd/action/fermion/Fermion.h @@ -23,10 +23,9 @@ Author: Peter Boyle 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ -#ifndef GRID_QCD_FERMION_H -#define GRID_QCD_FERMION_H +*************************************************************************************/ +/* END LEGAL */ +#pragma once //////////////////////////////////////////////////////////////////////////////////////////////////// // Explicit explicit template instantiation is still required in the .cc files @@ -50,12 +49,17 @@ Author: Peter Boyle //////////////////////////////////////////// #include // 4d wilson like -#include // 4d wilson like +NAMESPACE_CHECK(Wilson); +#include // 4d wilson like +NAMESPACE_CHECK(WilsonTM); #include // 4d wilson clover fermions +NAMESPACE_CHECK(WilsonClover); #include // 5d base used by all 5d overlap types +NAMESPACE_CHECK(Wilson5D); #include #include +NAMESPACE_CHECK(Staggered); #include // Cayley types #include @@ -63,7 +67,8 @@ Author: Peter Boyle #include #include #include -#include +NAMESPACE_CHECK(DomainWall); + #include #include #include @@ -75,6 +80,7 @@ Author: Peter Boyle #include // Partial fraction #include #include +NAMESPACE_CHECK(Overlap); /////////////////////////////////////////////////////////////////////////////// // G5 herm -- this has to live in QCD since dirac matrix is not in the broader sector of code /////////////////////////////////////////////////////////////////////////////// @@ -84,14 +90,17 @@ Author: Peter Boyle // Fourier accelerated Pauli Villars inverse support /////////////////////////////////////////////////////////////////////////////// #include +NAMESPACE_CHECK(WilsonTM5); //////////////////////////////////////////////////////////////////////////////// // Move this group to a DWF specific tools/algorithms subdir? //////////////////////////////////////////////////////////////////////////////// +#include #include #include #include #include +NAMESPACE_CHECK(DWFutils); //////////////////////////////////////////////////////////////////////////////////////////////////// // More maintainable to maintain the following typedef list centrally, as more "impl" targets @@ -99,8 +108,7 @@ Author: Peter Boyle //////////////////////////////////////////////////////////////////////////////////////////////////// // Cayley 5d -namespace Grid { - namespace QCD { +NAMESPACE_BEGIN(Grid); typedef WilsonFermion WilsonFermionR; typedef WilsonFermion WilsonFermionF; @@ -186,46 +194,6 @@ typedef ZMobiusFermion ZMobiusFermionFH; typedef ZMobiusFermion ZMobiusFermionDF; // Ls vectorised -typedef DomainWallFermion DomainWallFermionVec5dR; -typedef DomainWallFermion DomainWallFermionVec5dF; -typedef DomainWallFermion DomainWallFermionVec5dD; - -typedef DomainWallFermion DomainWallFermionVec5dRL; -typedef DomainWallFermion DomainWallFermionVec5dFH; -typedef DomainWallFermion DomainWallFermionVec5dDF; - -typedef DomainWallEOFAFermion DomainWallEOFAFermionVec5dR; -typedef DomainWallEOFAFermion DomainWallEOFAFermionVec5dF; -typedef DomainWallEOFAFermion DomainWallEOFAFermionVec5dD; - -typedef DomainWallEOFAFermion DomainWallEOFAFermionVec5dRL; -typedef DomainWallEOFAFermion DomainWallEOFAFermionVec5dFH; -typedef DomainWallEOFAFermion DomainWallEOFAFermionVec5dDF; - -typedef MobiusFermion MobiusFermionVec5dR; -typedef MobiusFermion MobiusFermionVec5dF; -typedef MobiusFermion MobiusFermionVec5dD; - -typedef MobiusFermion MobiusFermionVec5dRL; -typedef MobiusFermion MobiusFermionVec5dFH; -typedef MobiusFermion MobiusFermionVec5dDF; - -typedef MobiusEOFAFermion MobiusEOFAFermionVec5dR; -typedef MobiusEOFAFermion MobiusEOFAFermionVec5dF; -typedef MobiusEOFAFermion MobiusEOFAFermionVec5dD; - -typedef MobiusEOFAFermion MobiusEOFAFermionVec5dRL; -typedef MobiusEOFAFermion MobiusEOFAFermionVec5dFH; -typedef MobiusEOFAFermion MobiusEOFAFermionVec5dDF; - -typedef ZMobiusFermion ZMobiusFermionVec5dR; -typedef ZMobiusFermion ZMobiusFermionVec5dF; -typedef ZMobiusFermion ZMobiusFermionVec5dD; - -typedef ZMobiusFermion ZMobiusFermionVec5dRL; -typedef ZMobiusFermion ZMobiusFermionVec5dFH; -typedef ZMobiusFermion ZMobiusFermionVec5dDF; - typedef ScaledShamirFermion ScaledShamirFermionR; typedef ScaledShamirFermion ScaledShamirFermionF; typedef ScaledShamirFermion ScaledShamirFermionD; @@ -318,12 +286,13 @@ typedef ImprovedStaggeredFermion5D ImprovedStaggeredFermion5DR; typedef ImprovedStaggeredFermion5D ImprovedStaggeredFermion5DF; typedef ImprovedStaggeredFermion5D ImprovedStaggeredFermion5DD; +#ifndef GRID_NVCC typedef ImprovedStaggeredFermion5D ImprovedStaggeredFermionVec5dR; typedef ImprovedStaggeredFermion5D ImprovedStaggeredFermionVec5dF; typedef ImprovedStaggeredFermion5D ImprovedStaggeredFermionVec5dD; +#endif - - }} +NAMESPACE_END(Grid); //////////////////// // Scalar QED actions @@ -332,4 +301,4 @@ typedef ImprovedStaggeredFermion5D ImprovedStaggeredFermion #include #include -#endif + diff --git a/Grid/qcd/action/fermion/FermionCore.h b/Grid/qcd/action/fermion/FermionCore.h index 60632c3a..6745032e 100644 --- a/Grid/qcd/action/fermion/FermionCore.h +++ b/Grid/qcd/action/fermion/FermionCore.h @@ -36,58 +36,13 @@ Author: Peter Boyle // Fermion prereqs //////////////////////////////////////////// #include //used by all wilson type fermions +NAMESPACE_CHECK(Compressor); #include +NAMESPACE_CHECK(FermionOperatorImpl); #include +NAMESPACE_CHECK(FermionOperator); #include //used by all wilson type fermions #include //used by all wilson type fermions - -#define FermOpStaggeredTemplateInstantiate(A) \ - template class A; \ - template class A; - -#define FermOpStaggeredVec5dTemplateInstantiate(A) \ - template class A; \ - template class A; - -#define FermOp4dVecTemplateInstantiate(A) \ - template class A; \ - template class A; \ - template class A; \ - template class A; \ - template class A; \ - template class A; \ - template class A; \ - template class A; \ - template class A; \ - template class A; \ - template class A; \ - template class A; - - -#define AdjointFermOpTemplateInstantiate(A) \ - template class A; \ - template class A; - -#define TwoIndexFermOpTemplateInstantiate(A) \ - template class A; \ - template class A; \ - template class A; \ - template class A; - -#define FermOp5dVecTemplateInstantiate(A) \ - template class A; \ - template class A; \ - template class A; \ - template class A; \ - template class A; \ - template class A; \ - template class A; \ - template class A; - -#define FermOpTemplateInstantiate(A) \ - FermOp4dVecTemplateInstantiate(A) \ - FermOp5dVecTemplateInstantiate(A) - -#define GparityFermOpTemplateInstantiate(A) +NAMESPACE_CHECK(Kernels); #endif diff --git a/Grid/qcd/action/fermion/FermionOperator.h b/Grid/qcd/action/fermion/FermionOperator.h index 221f2bfd..c60a2e84 100644 --- a/Grid/qcd/action/fermion/FermionOperator.h +++ b/Grid/qcd/action/fermion/FermionOperator.h @@ -1,4 +1,4 @@ - /************************************************************************************* +/************************************************************************************* Grid physics library, www.github.com/paboyle/Grid @@ -26,86 +26,87 @@ Author: Vera Guelpers 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ -#ifndef GRID_QCD_FERMION_OPERATOR_H -#define GRID_QCD_FERMION_OPERATOR_H +*************************************************************************************/ +/* END LEGAL */ +#pragma once -namespace Grid { +NAMESPACE_BEGIN(Grid); - namespace QCD { - - //////////////////////////////////////////////////////////////// - // Allow to select between gauge representation rank bc's, flavours etc. - // and single/double precision. - //////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////// +// Allow to select between gauge representation rank bc's, flavours etc. +// and single/double precision. +//////////////////////////////////////////////////////////////// - template - class FermionOperator : public CheckerBoardedSparseMatrixBase, public Impl - { - public: +template +class FermionOperator : public CheckerBoardedSparseMatrixBase, public Impl +{ +public: - INHERIT_IMPL_TYPES(Impl); + INHERIT_IMPL_TYPES(Impl); - FermionOperator(const ImplParams &p= ImplParams()) : Impl(p) {}; - virtual ~FermionOperator(void) = default; + FermionOperator(const ImplParams &p= ImplParams()) : Impl(p) {}; + virtual ~FermionOperator(void) = default; - virtual FermionField &tmp(void) = 0; + virtual FermionField &tmp(void) = 0; - GridBase * Grid(void) { return FermionGrid(); }; // this is all the linalg routines need to know - GridBase * RedBlackGrid(void) { return FermionRedBlackGrid(); }; + GridBase * Grid(void) { return FermionGrid(); }; // this is all the linalg routines need to know + GridBase * RedBlackGrid(void) { return FermionRedBlackGrid(); }; - virtual GridBase *FermionGrid(void) =0; - virtual GridBase *FermionRedBlackGrid(void) =0; - virtual GridBase *GaugeGrid(void) =0; - virtual GridBase *GaugeRedBlackGrid(void) =0; + virtual GridBase *FermionGrid(void) =0; + virtual GridBase *FermionRedBlackGrid(void) =0; + virtual GridBase *GaugeGrid(void) =0; + virtual GridBase *GaugeRedBlackGrid(void) =0; - // override multiply - virtual RealD M (const FermionField &in, FermionField &out)=0; - virtual RealD Mdag (const FermionField &in, FermionField &out)=0; + // override multiply + virtual RealD M (const FermionField &in, FermionField &out)=0; + virtual RealD Mdag (const FermionField &in, FermionField &out)=0; - // half checkerboard operaions - virtual void Meooe (const FermionField &in, FermionField &out)=0; - virtual void MeooeDag (const FermionField &in, FermionField &out)=0; - virtual void Mooee (const FermionField &in, FermionField &out)=0; - virtual void MooeeDag (const FermionField &in, FermionField &out)=0; - virtual void MooeeInv (const FermionField &in, FermionField &out)=0; - virtual void MooeeInvDag (const FermionField &in, FermionField &out)=0; + // half checkerboard operaions + virtual void Meooe (const FermionField &in, FermionField &out)=0; + virtual void MeooeDag (const FermionField &in, FermionField &out)=0; + virtual void Mooee (const FermionField &in, FermionField &out)=0; + virtual void MooeeDag (const FermionField &in, FermionField &out)=0; + virtual void MooeeInv (const FermionField &in, FermionField &out)=0; + virtual void MooeeInvDag (const FermionField &in, FermionField &out)=0; - // non-hermitian hopping term; half cb or both - virtual void Dhop (const FermionField &in, FermionField &out,int dag)=0; - virtual void DhopOE(const FermionField &in, FermionField &out,int dag)=0; - virtual void DhopEO(const FermionField &in, FermionField &out,int dag)=0; - virtual void DhopDir(const FermionField &in, FermionField &out,int dir,int disp)=0; // implemented by WilsonFermion and WilsonFermion5D + // non-hermitian hopping term; half cb or both + virtual void Dhop (const FermionField &in, FermionField &out,int dag)=0; + virtual void DhopOE(const FermionField &in, FermionField &out,int dag)=0; + virtual void DhopEO(const FermionField &in, FermionField &out,int dag)=0; + virtual void DhopDir(const FermionField &in, FermionField &out,int dir,int disp)=0; // implemented by WilsonFermion and WilsonFermion5D - // force terms; five routines; default to Dhop on diagonal - virtual void MDeriv (GaugeField &mat,const FermionField &U,const FermionField &V,int dag){DhopDeriv(mat,U,V,dag);}; - virtual void MoeDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag){DhopDerivOE(mat,U,V,dag);}; - virtual void MeoDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag){DhopDerivEO(mat,U,V,dag);}; - virtual void MooDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag){mat=zero;}; // Clover can override these - virtual void MeeDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag){mat=zero;}; + // force terms; five routines; default to Dhop on diagonal + virtual void MDeriv (GaugeField &mat,const FermionField &U,const FermionField &V,int dag){DhopDeriv(mat,U,V,dag);}; + virtual void MoeDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag){DhopDerivOE(mat,U,V,dag);}; + virtual void MeoDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag){DhopDerivEO(mat,U,V,dag);}; + virtual void MooDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag){mat=Zero();}; // Clover can override these + virtual void MeeDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag){mat=Zero();}; - virtual void DhopDeriv (GaugeField &mat,const FermionField &U,const FermionField &V,int dag)=0; - virtual void DhopDerivEO(GaugeField &mat,const FermionField &U,const FermionField &V,int dag)=0; - virtual void DhopDerivOE(GaugeField &mat,const FermionField &U,const FermionField &V,int dag)=0; + virtual void DhopDeriv (GaugeField &mat,const FermionField &U,const FermionField &V,int dag)=0; + virtual void DhopDerivEO(GaugeField &mat,const FermionField &U,const FermionField &V,int dag)=0; + virtual void DhopDerivOE(GaugeField &mat,const FermionField &U,const FermionField &V,int dag)=0; - virtual void Mdiag (const FermionField &in, FermionField &out) { Mooee(in,out);}; // Same as Mooee applied to both CB's - virtual void Mdir (const FermionField &in, FermionField &out,int dir,int disp)=0; // case by case Wilson, Clover, Cayley, ContFrac, PartFrac + virtual void Mdiag (const FermionField &in, FermionField &out) { Mooee(in,out);}; // Same as Mooee applied to both CB's + virtual void Mdir (const FermionField &in, FermionField &out,int dir,int disp)=0; // case by case Wilson, Clover, Cayley, ContFrac, PartFrac virtual void MomentumSpacePropagator(FermionField &out,const FermionField &in,RealD _m,std::vector twist) { assert(0);}; - virtual void FreePropagator(const FermionField &in,FermionField &out,RealD mass,std::vector boundary,std::vector twist) { - FFT theFFT((GridCartesian *) in._grid); + virtual void FreePropagator(const FermionField &in,FermionField &out,RealD mass,std::vector boundary,std::vector twist) + { + FFT theFFT((GridCartesian *) in.Grid()); - FermionField in_k(in._grid); - FermionField prop_k(in._grid); + typedef typename Simd::scalar_type Scalar; + + FermionField in_k(in.Grid()); + FermionField prop_k(in.Grid()); //phase for boundary condition - ComplexField coor(in._grid); - ComplexField ph(in._grid); ph = zero; - FermionField in_buf(in._grid); in_buf = zero; + ComplexField coor(in.Grid()); + ComplexField ph(in.Grid()); ph = Zero(); + FermionField in_buf(in.Grid()); in_buf = Zero(); + Scalar ci(0.0,1.0); assert(twist.size() == Nd);//check that twist is Nd assert(boundary.size() == Nd);//check that boundary conditions is Nd @@ -113,7 +114,7 @@ namespace Grid { { LatticeCoordinate(coor, nu); double boundary_phase = ::acos(real(boundary[nu])); - ph = ph + boundary_phase*coor*((1./(in._grid->_fdimensions[nu]))); + ph = ph + boundary_phase*coor*((1./(in.Grid()->_fdimensions[nu]))); //momenta for propagator shifted by twist+boundary twist[nu] = twist[nu] + boundary_phase/((2.0*M_PI)); } @@ -124,43 +125,42 @@ namespace Grid { theFFT.FFT_all_dim(out,prop_k,FFT::backward); //phase for boundary condition - out = out * exp(ci*ph); + out = out * exp(Scalar(2.0*M_PI)*ci*ph); }; virtual void FreePropagator(const FermionField &in,FermionField &out,RealD mass) { - std::vector boundary; - for(int i=0;i twist(Nd,0.0); //default: periodic boundarys in all directions - FreePropagator(in,out,mass,boundary,twist); + std::vector boundary; + for(int i=0;i twist(Nd,0.0); //default: periodic boundarys in all directions + FreePropagator(in,out,mass,boundary,twist); }; - /////////////////////////////////////////////// - // Updates gauge field during HMC - /////////////////////////////////////////////// - virtual void ImportGauge(const GaugeField & _U)=0; + /////////////////////////////////////////////// + // Updates gauge field during HMC + /////////////////////////////////////////////// + virtual void ImportGauge(const GaugeField & _U)=0; - ////////////////////////////////////////////////////////////////////// - // Conserved currents, either contract at sink or insert sequentially. - ////////////////////////////////////////////////////////////////////// - - virtual void ContractConservedCurrent(PropagatorField &q_in_1, - PropagatorField &q_in_2, - PropagatorField &q_out, - Current curr_type, - unsigned int mu)=0; - virtual void SeqConservedCurrent(PropagatorField &q_in, - PropagatorField &q_out, - Current curr_type, - unsigned int mu, - unsigned int tmin, - unsigned int tmax, - ComplexField &lattice_cmplx)=0; + ////////////////////////////////////////////////////////////////////// + // Conserved currents, either contract at sink or insert sequentially. + ////////////////////////////////////////////////////////////////////// + virtual void ContractConservedCurrent(PropagatorField &q_in_1, + PropagatorField &q_in_2, + PropagatorField &q_out, + Current curr_type, + unsigned int mu)=0; + virtual void SeqConservedCurrent(PropagatorField &q_in, + PropagatorField &q_out, + Current curr_type, + unsigned int mu, + unsigned int tmin, + unsigned int tmax, + ComplexField &lattice_cmplx)=0; // Only reimplemented in Wilson5D // Default to just a zero correlation function - virtual void ContractJ5q(FermionField &q_in ,ComplexField &J5q) { J5q=zero; }; - virtual void ContractJ5q(PropagatorField &q_in,ComplexField &J5q) { J5q=zero; }; + virtual void ContractJ5q(FermionField &q_in ,ComplexField &J5q) { J5q=Zero(); }; + virtual void ContractJ5q(PropagatorField &q_in,ComplexField &J5q) { J5q=Zero(); }; /////////////////////////////////////////////// // Physical field import/export @@ -183,9 +183,7 @@ namespace Grid { { exported=solution; }; - }; +}; - } -} +NAMESPACE_END(Grid); -#endif diff --git a/Grid/qcd/action/fermion/FermionOperatorImpl.h b/Grid/qcd/action/fermion/FermionOperatorImpl.h index 721004e1..b444f6dc 100644 --- a/Grid/qcd/action/fermion/FermionOperatorImpl.h +++ b/Grid/qcd/action/fermion/FermionOperatorImpl.h @@ -28,1056 +28,162 @@ with this program; if not, write to the Free Software Foundation, Inc., See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ -/* END LEGAL */ -#ifndef GRID_QCD_FERMION_OPERATOR_IMPL_H -#define GRID_QCD_FERMION_OPERATOR_IMPL_H + /* END LEGAL */ -namespace Grid { -namespace QCD { +#pragma once - ////////////////////////////////////////////// - // Template parameter class constructs to package - // externally control Fermion implementations - // in orthogonal directions - // - // Ultimately need Impl to always define types where XXX is opaque - // - // typedef typename XXX Simd; - // typedef typename XXX GaugeLinkField; - // typedef typename XXX GaugeField; - // typedef typename XXX GaugeActField; - // typedef typename XXX FermionField; - // typedef typename XXX PropagatorField; - // typedef typename XXX DoubledGaugeField; - // typedef typename XXX SiteSpinor; - // typedef typename XXX SitePropagator; - // typedef typename XXX SiteHalfSpinor; - // typedef typename XXX Compressor; - // - // and Methods: - // void ImportGauge(GridBase *GaugeGrid,DoubledGaugeField &Uds,const GaugeField &Umu) - // void DoubleStore(GridBase *GaugeGrid,DoubledGaugeField &Uds,const GaugeField &Umu) - // void multLink(SiteHalfSpinor &phi,const SiteDoubledGaugeField &U,const SiteHalfSpinor &chi,int mu,StencilEntry *SE,StencilImpl &St) - // void InsertForce4D(GaugeField &mat,const FermionField &Btilde,const FermionField &A,int mu) - // void InsertForce5D(GaugeField &mat,const FermionField &Btilde,const FermionField &A,int mu) - // - // - // To acquire the typedefs from "Base" (either a base class or template param) use: - // - // INHERIT_GIMPL_TYPES(Base) - // INHERIT_FIMPL_TYPES(Base) - // INHERIT_IMPL_TYPES(Base) - // - // The Fermion operators will do the following: - // - // struct MyOpParams { - // RealD mass; - // }; - // - // - // template - // class MyOp : public { - // public: - // - // INHERIT_ALL_IMPL_TYPES(Impl); - // - // MyOp(MyOpParams Myparm, ImplParams &ImplParam) : Impl(ImplParam) - // { - // - // }; - // - // } - ////////////////////////////////////////////// +NAMESPACE_BEGIN(Grid); - template struct SamePrecisionMapper { - typedef T HigherPrecVector ; - typedef T LowerPrecVector ; - }; - template struct LowerPrecisionMapper { }; - template <> struct LowerPrecisionMapper { - typedef vRealF HigherPrecVector ; - typedef vRealH LowerPrecVector ; - }; - template <> struct LowerPrecisionMapper { - typedef vRealD HigherPrecVector ; - typedef vRealF LowerPrecVector ; - }; - template <> struct LowerPrecisionMapper { - typedef vComplexF HigherPrecVector ; - typedef vComplexH LowerPrecVector ; - }; - template <> struct LowerPrecisionMapper { - typedef vComplexD HigherPrecVector ; - typedef vComplexF LowerPrecVector ; - }; +////////////////////////////////////////////// +// Template parameter class constructs to package +// externally control Fermion implementations +// in orthogonal directions +// +// Ultimately need Impl to always define types where XXX is opaque +// +// typedef typename XXX Simd; +// typedef typename XXX GaugeLinkField; +// typedef typename XXX GaugeField; +// typedef typename XXX GaugeActField; +// typedef typename XXX FermionField; +// typedef typename XXX PropagatorField; +// typedef typename XXX DoubledGaugeField; +// typedef typename XXX SiteSpinor; +// typedef typename XXX SitePropagator; +// typedef typename XXX SiteHalfSpinor; +// typedef typename XXX Compressor; +// +// and Methods: +// void ImportGauge(GridBase *GaugeGrid,DoubledGaugeField &Uds,const GaugeField &Umu) +// void DoubleStore(GridBase *GaugeGrid,DoubledGaugeField &Uds,const GaugeField &Umu) +// void multLink(SiteHalfSpinor &phi,const SiteDoubledGaugeField &U,const SiteHalfSpinor &chi,int mu,StencilEntry *SE,StencilImpl::View_type &St) +// void InsertForce4D(GaugeField &mat,const FermionField &Btilde,const FermionField &A,int mu) +// void InsertForce5D(GaugeField &mat,const FermionField &Btilde,const FermionField &A,int mu) +// +// +// To acquire the typedefs from "Base" (either a base class or template param) use: +// +// INHERIT_GIMPL_TYPES(Base) +// INHERIT_FIMPL_TYPES(Base) +// INHERIT_IMPL_TYPES(Base) +// +// The Fermion operators will do the following: +// +// struct MyOpParams { +// RealD mass; +// }; +// +// +// template +// class MyOp : public { +// public: +// +// INHERIT_ALL_IMPL_TYPES(Impl); +// +// MyOp(MyOpParams Myparm, ImplParams &ImplParam) : Impl(ImplParam) +// { +// +// }; +// +// } +////////////////////////////////////////////// - struct CoeffReal { - public: - typedef RealD _Coeff_t; - static const int Nhcs = 2; - template using PrecisionMapper = SamePrecisionMapper; - }; - struct CoeffRealHalfComms { - public: - typedef RealD _Coeff_t; - static const int Nhcs = 1; - template using PrecisionMapper = LowerPrecisionMapper; - }; - struct CoeffComplex { - public: - typedef ComplexD _Coeff_t; - static const int Nhcs = 2; - template using PrecisionMapper = SamePrecisionMapper; - }; - struct CoeffComplexHalfComms { - public: - typedef ComplexD _Coeff_t; - static const int Nhcs = 1; - template using PrecisionMapper = LowerPrecisionMapper; - }; +template struct SamePrecisionMapper { + typedef T HigherPrecVector ; + typedef T LowerPrecVector ; +}; +template struct LowerPrecisionMapper { }; +template <> struct LowerPrecisionMapper { + typedef vRealF HigherPrecVector ; + typedef vRealH LowerPrecVector ; +}; +template <> struct LowerPrecisionMapper { + typedef vRealD HigherPrecVector ; + typedef vRealF LowerPrecVector ; +}; +template <> struct LowerPrecisionMapper { + typedef vComplexF HigherPrecVector ; + typedef vComplexH LowerPrecVector ; +}; +template <> struct LowerPrecisionMapper { + typedef vComplexD HigherPrecVector ; + typedef vComplexF LowerPrecVector ; +}; - //////////////////////////////////////////////////////////////////////// - // Implementation dependent fermion types - //////////////////////////////////////////////////////////////////////// +struct CoeffReal { +public: + typedef RealD _Coeff_t; + static const int Nhcs = 2; + template using PrecisionMapper = SamePrecisionMapper; +}; +struct CoeffRealHalfComms { +public: + typedef RealD _Coeff_t; + static const int Nhcs = 1; + template using PrecisionMapper = LowerPrecisionMapper; +}; +struct CoeffComplex { +public: + typedef ComplexD _Coeff_t; + static const int Nhcs = 2; + template using PrecisionMapper = SamePrecisionMapper; +}; +struct CoeffComplexHalfComms { +public: + typedef ComplexD _Coeff_t; + static const int Nhcs = 1; + template using PrecisionMapper = LowerPrecisionMapper; +}; + +//////////////////////////////////////////////////////////////////////// +// Implementation dependent fermion types +//////////////////////////////////////////////////////////////////////// #define INHERIT_FIMPL_TYPES(Impl)\ + typedef typename Impl::Coeff_t Coeff_t; \ typedef Impl Impl_t; \ typedef typename Impl::FermionField FermionField; \ typedef typename Impl::PropagatorField PropagatorField; \ typedef typename Impl::DoubledGaugeField DoubledGaugeField; \ + typedef typename Impl::SiteDoubledGaugeField SiteDoubledGaugeField; \ typedef typename Impl::SiteSpinor SiteSpinor; \ typedef typename Impl::SitePropagator SitePropagator; \ typedef typename Impl::SiteHalfSpinor SiteHalfSpinor; \ typedef typename Impl::Compressor Compressor; \ typedef typename Impl::StencilImpl StencilImpl; \ typedef typename Impl::ImplParams ImplParams; \ - typedef typename Impl::Coeff_t Coeff_t; \ - -#define INHERIT_IMPL_TYPES(Base) \ - INHERIT_GIMPL_TYPES(Base) \ + typedef typename Impl::StencilImpl::View_type StencilView; \ + typedef typename ViewMap::Type FermionFieldView; \ + typedef typename ViewMap::Type DoubledGaugeFieldView; + +#define INHERIT_IMPL_TYPES(Base) \ + INHERIT_GIMPL_TYPES(Base) \ INHERIT_FIMPL_TYPES(Base) - - ///////////////////////////////////////////////////////////////////////////// - // Single flavour four spinors with colour index - ///////////////////////////////////////////////////////////////////////////// - template - class WilsonImpl : public PeriodicGaugeImpl > { - public: - static const int Dimension = Representation::Dimension; - static const bool isFundamental = Representation::isFundamental; - static const bool LsVectorised=false; - static const int Nhcs = Options::Nhcs; - - typedef PeriodicGaugeImpl > Gimpl; - INHERIT_GIMPL_TYPES(Gimpl); - - //Necessary? - constexpr bool is_fundamental() const{return Dimension == Nc ? 1 : 0;} - - typedef typename Options::_Coeff_t Coeff_t; - typedef typename Options::template PrecisionMapper::LowerPrecVector SimdL; - - template using iImplSpinor = iScalar, Ns> >; - template using iImplPropagator = iScalar, Ns> >; - template using iImplHalfSpinor = iScalar, Nhs> >; - template using iImplHalfCommSpinor = iScalar, Nhcs> >; - template using iImplDoubledGaugeField = iVector >, Nds>; - - typedef iImplSpinor SiteSpinor; - typedef iImplPropagator SitePropagator; - typedef iImplHalfSpinor SiteHalfSpinor; - typedef iImplHalfCommSpinor SiteHalfCommSpinor; - typedef iImplDoubledGaugeField SiteDoubledGaugeField; - - typedef Lattice FermionField; - typedef Lattice PropagatorField; - typedef Lattice DoubledGaugeField; - - typedef WilsonCompressor Compressor; - typedef WilsonImplParams ImplParams; - typedef WilsonStencil StencilImpl; - - ImplParams Params; - - WilsonImpl(const ImplParams &p = ImplParams()) : Params(p){ - assert(Params.boundary_phases.size() == Nd); - }; - - bool overlapCommsCompute(void) { return Params.overlapCommsCompute; }; - - inline void multLink(SiteHalfSpinor &phi, - const SiteDoubledGaugeField &U, - const SiteHalfSpinor &chi, - int mu, - StencilEntry *SE, - StencilImpl &St) { - mult(&phi(), &U(mu), &chi()); - } - - inline void multLinkProp(SitePropagator &phi, - const SiteDoubledGaugeField &U, - const SitePropagator &chi, - int mu) { - mult(&phi(), &U(mu), &chi()); - } - - template - inline void loadLinkElement(Simd ®, ref &memory) { - reg = memory; - } - - inline void DoubleStore(GridBase *GaugeGrid, - DoubledGaugeField &Uds, - const GaugeField &Umu) - { - typedef typename Simd::scalar_type scalar_type; - - conformable(Uds._grid, GaugeGrid); - conformable(Umu._grid, GaugeGrid); - - GaugeLinkField U(GaugeGrid); - GaugeLinkField tmp(GaugeGrid); - - Lattice > coor(GaugeGrid); - //////////////////////////////////////////////////// - // apply any boundary phase or twists - //////////////////////////////////////////////////// - for (int mu = 0; mu < Nd; mu++) { - - ////////// boundary phase ///////////// - auto pha = Params.boundary_phases[mu]; - scalar_type phase( real(pha),imag(pha) ); - - int L = GaugeGrid->GlobalDimensions()[mu]; - int Lmu = L - 1; - - LatticeCoordinate(coor, mu); - - U = PeekIndex(Umu, mu); - - // apply any twists - RealD theta = Params.twist_n_2pi_L[mu] * 2*M_PI / L; - if ( theta != 0.0) { - scalar_type twphase(::cos(theta),::sin(theta)); - U = twphase*U; - std::cout << GridLogMessage << " Twist ["<(Uds, tmp, mu); - - U = adj(Cshift(U, mu, -1)); - U = where(coor == 0, conjugate(phase) * U, U); - PokeIndex(Uds, U, mu + 4); - } - } - - inline void InsertForce4D(GaugeField &mat, FermionField &Btilde, FermionField &A,int mu){ - GaugeLinkField link(mat._grid); - link = TraceIndex(outerProduct(Btilde,A)); - PokeIndex(mat,link,mu); - } - - inline void outerProductImpl(PropagatorField &mat, const FermionField &B, const FermionField &A){ - mat = outerProduct(B,A); - } - - inline void TraceSpinImpl(GaugeLinkField &mat, PropagatorField&P) { - mat = TraceIndex(P); - } - - inline void extractLinkField(std::vector &mat, DoubledGaugeField &Uds){ - for (int mu = 0; mu < Nd; mu++) - mat[mu] = PeekIndex(Uds, mu); - } - - - inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField Ã,int mu){ - - int Ls=Btilde._grid->_fdimensions[0]; - GaugeLinkField tmp(mat._grid); - tmp = zero; - - parallel_for(int sss=0;sssoSites();sss++){ - int sU=sss; - for(int s=0;s(outerProduct(Btilde[sF],Atilde[sF])); // ordering here - } - } - PokeIndex(mat,tmp,mu); - - } - }; - - //////////////////////////////////////////////////////////////////////////////////// - // Single flavour four spinors with colour index, 5d redblack - //////////////////////////////////////////////////////////////////////////////////// -template -class DomainWallVec5dImpl : public PeriodicGaugeImpl< GaugeImplTypes< S,Representation::Dimension> > { - public: - - typedef PeriodicGaugeImpl > Gimpl; - INHERIT_GIMPL_TYPES(Gimpl); - - static const int Dimension = Representation::Dimension; - static const bool isFundamental = Representation::isFundamental; - static const bool LsVectorised=true; - static const int Nhcs = Options::Nhcs; - - typedef typename Options::_Coeff_t Coeff_t; - typedef typename Options::template PrecisionMapper::LowerPrecVector SimdL; - - template using iImplSpinor = iScalar, Ns> >; - template using iImplPropagator = iScalar, Ns> >; - template using iImplHalfSpinor = iScalar, Nhs> >; - template using iImplHalfCommSpinor = iScalar, Nhcs> >; - template using iImplDoubledGaugeField = iVector >, Nds>; - template using iImplGaugeField = iVector >, Nd>; - template using iImplGaugeLink = iScalar > >; - - typedef iImplSpinor SiteSpinor; - typedef iImplPropagator SitePropagator; - typedef iImplHalfSpinor SiteHalfSpinor; - typedef iImplHalfCommSpinor SiteHalfCommSpinor; - typedef Lattice FermionField; - typedef Lattice PropagatorField; - - ///////////////////////////////////////////////// - // Make the doubled gauge field a *scalar* - ///////////////////////////////////////////////// - typedef iImplDoubledGaugeField SiteDoubledGaugeField; // This is a scalar - typedef iImplGaugeField SiteScalarGaugeField; // scalar - typedef iImplGaugeLink SiteScalarGaugeLink; // scalar - typedef Lattice DoubledGaugeField; - - typedef WilsonCompressor Compressor; - typedef WilsonImplParams ImplParams; - typedef WilsonStencil StencilImpl; - - ImplParams Params; - - DomainWallVec5dImpl(const ImplParams &p = ImplParams()) : Params(p){}; - - bool overlapCommsCompute(void) { return false; }; - - template - inline void loadLinkElement(Simd ®, ref &memory) { - vsplat(reg, memory); - } - - inline void multLink(SiteHalfSpinor &phi, const SiteDoubledGaugeField &U, - const SiteHalfSpinor &chi, int mu, StencilEntry *SE, - StencilImpl &St) { - SiteGaugeLink UU; - for (int i = 0; i < Dimension; i++) { - for (int j = 0; j < Dimension; j++) { - vsplat(UU()()(i, j), U(mu)()(i, j)); - } - } - mult(&phi(), &UU(), &chi()); - } - - inline void multLinkProp(SitePropagator &phi, - const SiteDoubledGaugeField &U, - const SitePropagator &chi, - int mu) { - SiteGaugeLink UU; - for (int i = 0; i < Dimension; i++) { - for (int j = 0; j < Dimension; j++) { - vsplat(UU()()(i, j), U(mu)()(i, j)); - } - } - mult(&phi(), &UU(), &chi()); - } - - inline void DoubleStore(GridBase *GaugeGrid, DoubledGaugeField &Uds,const GaugeField &Umu) - { - SiteScalarGaugeField ScalarUmu; - SiteDoubledGaugeField ScalarUds; - - GaugeLinkField U(Umu._grid); - GaugeField Uadj(Umu._grid); - for (int mu = 0; mu < Nd; mu++) { - U = PeekIndex(Umu, mu); - U = adj(Cshift(U, mu, -1)); - PokeIndex(Uadj, U, mu); - } - - for (int lidx = 0; lidx < GaugeGrid->lSites(); lidx++) { - std::vector lcoor; - GaugeGrid->LocalIndexToLocalCoor(lidx, lcoor); - - peekLocalSite(ScalarUmu, Umu, lcoor); - for (int mu = 0; mu < 4; mu++) ScalarUds(mu) = ScalarUmu(mu); - - peekLocalSite(ScalarUmu, Uadj, lcoor); - for (int mu = 0; mu < 4; mu++) ScalarUds(mu + 4) = ScalarUmu(mu); - - pokeLocalSite(ScalarUds, Uds, lcoor); - } - } - - inline void InsertForce4D(GaugeField &mat, FermionField &Btilde,FermionField &A, int mu) - { - assert(0); - } - - inline void outerProductImpl(PropagatorField &mat, const FermionField &Btilde, const FermionField &A){ - assert(0); - } - - inline void TraceSpinImpl(GaugeLinkField &mat, PropagatorField&P) { - assert(0); - } - - inline void extractLinkField(std::vector &mat, DoubledGaugeField &Uds){ - assert(0); - } - - - inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField Ã, int mu) { - - assert(0); - // Following lines to be revised after Peter's addition of half prec - // missing put lane... - /* - typedef decltype(traceIndex(outerProduct(Btilde[0], Atilde[0]))) result_type; - unsigned int LLs = Btilde._grid->_rdimensions[0]; - conformable(Atilde._grid,Btilde._grid); - GridBase* grid = mat._grid; - GridBase* Bgrid = Btilde._grid; - unsigned int dimU = grid->Nd(); - unsigned int dimF = Bgrid->Nd(); - GaugeLinkField tmp(grid); - tmp = zero; - - // FIXME - // Current implementation works, thread safe, probably suboptimal - // Passing through the local coordinate for grid transformation - // the force grid is in general very different from the Ls vectorized grid - - PARALLEL_FOR_LOOP - for (int so = 0; so < grid->oSites(); so++) { - std::vector vres(Bgrid->Nsimd()); - std::vector ocoor; grid->oCoorFromOindex(ocoor,so); - for (int si = 0; si < tmp._grid->iSites(); si++){ - typename result_type::scalar_object scalar_object; scalar_object = zero; - std::vector local_coor; - std::vector icoor; grid->iCoorFromIindex(icoor,si); - grid->InOutCoorToLocalCoor(ocoor, icoor, local_coor); - for (int s = 0; s < LLs; s++) { - std::vector slocal_coor(dimF); - slocal_coor[0] = s; - for (int s4d = 1; s4d< dimF; s4d++) slocal_coor[s4d] = local_coor[s4d-1]; - int sF = Bgrid->oIndexReduced(slocal_coor); - assert(sF < Bgrid->oSites()); - - extract(traceIndex(outerProduct(Btilde[sF], Atilde[sF])), vres); - // sum across the 5d dimension - for (auto v : vres) scalar_object += v; - } - tmp._odata[so].putlane(scalar_object, si); - } - } - PokeIndex(mat, tmp, mu); - */ - } -}; - - //////////////////////////////////////////////////////////////////////////////////////// - // Flavour doubled spinors; is Gparity the only? what about C*? - //////////////////////////////////////////////////////////////////////////////////////// -template -class GparityWilsonImpl : public ConjugateGaugeImpl > { - public: - - static const int Dimension = Representation::Dimension; - static const bool isFundamental = Representation::isFundamental; - static const int Nhcs = Options::Nhcs; - static const bool LsVectorised=false; - - typedef ConjugateGaugeImpl< GaugeImplTypes > Gimpl; - INHERIT_GIMPL_TYPES(Gimpl); - - typedef typename Options::_Coeff_t Coeff_t; - typedef typename Options::template PrecisionMapper::LowerPrecVector SimdL; - - template using iImplSpinor = iVector, Ns>, Ngp>; - template using iImplPropagator = iVector, Ns>, Ngp>; - template using iImplHalfSpinor = iVector, Nhs>, Ngp>; - template using iImplHalfCommSpinor = iVector, Nhcs>, Ngp>; - template using iImplDoubledGaugeField = iVector >, Nds>, Ngp>; - - typedef iImplSpinor SiteSpinor; - typedef iImplPropagator SitePropagator; - typedef iImplHalfSpinor SiteHalfSpinor; - typedef iImplHalfCommSpinor SiteHalfCommSpinor; - typedef iImplDoubledGaugeField SiteDoubledGaugeField; - - typedef Lattice FermionField; - typedef Lattice PropagatorField; - typedef Lattice DoubledGaugeField; - - typedef WilsonCompressor Compressor; - typedef WilsonStencil StencilImpl; - - typedef GparityWilsonImplParams ImplParams; - - ImplParams Params; - - GparityWilsonImpl(const ImplParams &p = ImplParams()) : Params(p){}; - - bool overlapCommsCompute(void) { return Params.overlapCommsCompute; }; - - // provide the multiply by link that is differentiated between Gparity (with - // flavour index) and non-Gparity - inline void multLink(SiteHalfSpinor &phi, const SiteDoubledGaugeField &U, - const SiteHalfSpinor &chi, int mu, StencilEntry *SE, - StencilImpl &St) { - - typedef SiteHalfSpinor vobj; - typedef typename SiteHalfSpinor::scalar_object sobj; - - vobj vtmp; - sobj stmp; - - GridBase *grid = St._grid; - - const int Nsimd = grid->Nsimd(); - - int direction = St._directions[mu]; - int distance = St._distances[mu]; - int ptype = St._permute_type[mu]; - int sl = St._grid->_simd_layout[direction]; +NAMESPACE_END(Grid); +NAMESPACE_CHECK(ImplBase); +///////////////////////////////////////////////////////////////////////////// +// Single flavour four spinors with colour index +///////////////////////////////////////////////////////////////////////////// +#include +NAMESPACE_CHECK(ImplWilson); - // Fixme X.Y.Z.T hardcode in stencil - int mmu = mu % Nd; - - // assert our assumptions - assert((distance == 1) || (distance == -1)); // nearest neighbour stencil hard code - assert((sl == 1) || (sl == 2)); - - std::vector icoor; - - if ( SE->_around_the_world && Params.twists[mmu] ) { - - if ( sl == 2 ) { - - std::vector vals(Nsimd); - - extract(chi,vals); - for(int s=0;siCoorFromIindex(icoor,s); - - assert((icoor[direction]==0)||(icoor[direction]==1)); - - int permute_lane; - if ( distance == 1) { - permute_lane = icoor[direction]?1:0; - } else { - permute_lane = icoor[direction]?0:1; - } - - if ( permute_lane ) { - stmp(0) = vals[s](1); - stmp(1) = vals[s](0); - vals[s] = stmp; - } - } - merge(vtmp,vals); - - } else { - vtmp(0) = chi(1); - vtmp(1) = chi(0); - } - mult(&phi(0),&U(0)(mu),&vtmp(0)); - mult(&phi(1),&U(1)(mu),&vtmp(1)); - - } else { - mult(&phi(0),&U(0)(mu),&chi(0)); - mult(&phi(1),&U(1)(mu),&chi(1)); - } - - } - // Fixme: Gparity prop * link - inline void multLinkProp(SitePropagator &phi, const SiteDoubledGaugeField &U, - const SitePropagator &chi, int mu) - { - assert(0); - } - - template - inline void loadLinkElement(Simd ®, ref &memory) { - reg = memory; - } - - inline void DoubleStore(GridBase *GaugeGrid,DoubledGaugeField &Uds,const GaugeField &Umu) - { - conformable(Uds._grid,GaugeGrid); - conformable(Umu._grid,GaugeGrid); - - GaugeLinkField Utmp (GaugeGrid); - GaugeLinkField U (GaugeGrid); - GaugeLinkField Uconj(GaugeGrid); - - Lattice > coor(GaugeGrid); - - for(int mu=0;mu(Umu,mu); - Uconj = conjugate(U); - - // This phase could come from a simple bc 1,1,-1,1 .. - int neglink = GaugeGrid->GlobalDimensions()[mu]-1; - if ( Params.twists[mu] ) { - Uconj = where(coor==neglink,-Uconj,Uconj); - } - - parallel_for(auto ss=U.begin();ss(outerProduct(Btilde, A)); - parallel_for(auto ss = tmp.begin(); ss < tmp.end(); ss++) { - link[ss]() = tmp[ss](0, 0) + conjugate(tmp[ss](1, 1)); - } - PokeIndex(mat, link, mu); - return; - } - - inline void outerProductImpl(PropagatorField &mat, const FermionField &Btilde, const FermionField &A){ - //mat = outerProduct(Btilde, A); - assert(0); - } - - inline void TraceSpinImpl(GaugeLinkField &mat, PropagatorField&P) { - assert(0); - /* - auto tmp = TraceIndex(P); - parallel_for(auto ss = tmp.begin(); ss < tmp.end(); ss++) { - mat[ss]() = tmp[ss](0, 0) + conjugate(tmp[ss](1, 1)); - } - */ - } - - inline void extractLinkField(std::vector &mat, DoubledGaugeField &Uds){ - assert(0); - } - - inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField Ã, int mu) { - - int Ls = Btilde._grid->_fdimensions[0]; - - GaugeLinkField tmp(mat._grid); - tmp = zero; - parallel_for(int ss = 0; ss < tmp._grid->oSites(); ss++) { - for (int s = 0; s < Ls; s++) { - int sF = s + Ls * ss; - auto ttmp = traceIndex(outerProduct(Btilde[sF], Atilde[sF])); - tmp[ss]() = tmp[ss]() + ttmp(0, 0) + conjugate(ttmp(1, 1)); - } - } - PokeIndex(mat, tmp, mu); - return; - } - -}; +//////////////////////////////////////////////////////////////////////////////////////// +// Flavour doubled spinors; is Gparity the only? what about C*? +//////////////////////////////////////////////////////////////////////////////////////// +#include +NAMESPACE_CHECK(ImplGparityWilson); ///////////////////////////////////////////////////////////////////////////// // Single flavour one component spinors with colour index ///////////////////////////////////////////////////////////////////////////// -template -class StaggeredImpl : public PeriodicGaugeImpl > { +#include +NAMESPACE_CHECK(ImplStaggered); - public: - - typedef RealD _Coeff_t ; - static const int Dimension = Representation::Dimension; - static const bool isFundamental = Representation::isFundamental; - static const bool LsVectorised=false; - typedef PeriodicGaugeImpl > Gimpl; - - //Necessary? - constexpr bool is_fundamental() const{return Dimension == Nc ? 1 : 0;} - - typedef _Coeff_t Coeff_t; - - INHERIT_GIMPL_TYPES(Gimpl); - - template using iImplSpinor = iScalar > >; - template using iImplHalfSpinor = iScalar > >; - template using iImplDoubledGaugeField = iVector >, Nds>; - template using iImplPropagator = iScalar > >; - - typedef iImplSpinor SiteSpinor; - typedef iImplHalfSpinor SiteHalfSpinor; - typedef iImplDoubledGaugeField SiteDoubledGaugeField; - typedef iImplPropagator SitePropagator; - - typedef Lattice FermionField; - typedef Lattice DoubledGaugeField; - typedef Lattice PropagatorField; - - typedef SimpleCompressor Compressor; - typedef StaggeredImplParams ImplParams; - typedef CartesianStencil StencilImpl; - - ImplParams Params; - - StaggeredImpl(const ImplParams &p = ImplParams()) : Params(p){}; - - inline void multLink(SiteSpinor &phi, - const SiteDoubledGaugeField &U, - const SiteSpinor &chi, - int mu){ - mult(&phi(), &U(mu), &chi()); - } - inline void multLinkAdd(SiteSpinor &phi, - const SiteDoubledGaugeField &U, - const SiteSpinor &chi, - int mu){ - mac(&phi(), &U(mu), &chi()); - } - - template - inline void loadLinkElement(Simd ®, ref &memory) { - reg = memory; - } - - inline void InsertGaugeField(DoubledGaugeField &U_ds, - const GaugeLinkField &U,int mu) - { - PokeIndex(U_ds, U, mu); - } - inline void DoubleStore(GridBase *GaugeGrid, - DoubledGaugeField &UUUds, // for Naik term - DoubledGaugeField &Uds, - const GaugeField &Uthin, - const GaugeField &Ufat) { - conformable(Uds._grid, GaugeGrid); - conformable(Uthin._grid, GaugeGrid); - conformable(Ufat._grid, GaugeGrid); - GaugeLinkField U(GaugeGrid); - GaugeLinkField UU(GaugeGrid); - GaugeLinkField UUU(GaugeGrid); - GaugeLinkField Udag(GaugeGrid); - GaugeLinkField UUUdag(GaugeGrid); - for (int mu = 0; mu < Nd; mu++) { - - // Staggered Phase. - Lattice > coor(GaugeGrid); - Lattice > x(GaugeGrid); LatticeCoordinate(x,0); - Lattice > y(GaugeGrid); LatticeCoordinate(y,1); - Lattice > z(GaugeGrid); LatticeCoordinate(z,2); - Lattice > t(GaugeGrid); LatticeCoordinate(t,3); - - Lattice > lin_z(GaugeGrid); lin_z=x+y; - Lattice > lin_t(GaugeGrid); lin_t=x+y+z; - - ComplexField phases(GaugeGrid); phases=1.0; - - if ( mu == 1 ) phases = where( mod(x ,2)==(Integer)0, phases,-phases); - if ( mu == 2 ) phases = where( mod(lin_z,2)==(Integer)0, phases,-phases); - if ( mu == 3 ) phases = where( mod(lin_t,2)==(Integer)0, phases,-phases); - - // 1 hop based on fat links - U = PeekIndex(Ufat, mu); - Udag = adj( Cshift(U, mu, -1)); - - U = U *phases; - Udag = Udag *phases; - - InsertGaugeField(Uds,U,mu); - InsertGaugeField(Uds,Udag,mu+4); - // PokeIndex(Uds, U, mu); - // PokeIndex(Uds, Udag, mu + 4); - - // 3 hop based on thin links. Crazy huh ? - U = PeekIndex(Uthin, mu); - UU = Gimpl::CovShiftForward(U,mu,U); - UUU= Gimpl::CovShiftForward(U,mu,UU); - - UUUdag = adj( Cshift(UUU, mu, -3)); - - UUU = UUU *phases; - UUUdag = UUUdag *phases; - - InsertGaugeField(UUUds,UUU,mu); - InsertGaugeField(UUUds,UUUdag,mu+4); - - } - } - - inline void InsertForce4D(GaugeField &mat, FermionField &Btilde, FermionField &A,int mu){ - GaugeLinkField link(mat._grid); - link = TraceIndex(outerProduct(Btilde,A)); - PokeIndex(mat,link,mu); - } - - inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField Ã,int mu){ - assert (0); - // Must never hit - } - }; - - ///////////////////////////////////////////////////////////////////////////// - // Single flavour one component spinors with colour index. 5d vec - ///////////////////////////////////////////////////////////////////////////// - template - class StaggeredVec5dImpl : public PeriodicGaugeImpl > { - - public: - - static const int Dimension = Representation::Dimension; - static const bool isFundamental = Representation::isFundamental; - static const bool LsVectorised=true; - typedef RealD Coeff_t ; - typedef PeriodicGaugeImpl > Gimpl; - - //Necessary? - constexpr bool is_fundamental() const{return Dimension == Nc ? 1 : 0;} +///////////////////////////////////////////////////////////////////////////// +// Single flavour one component spinors with colour index. 5d vec +///////////////////////////////////////////////////////////////////////////// +#include +NAMESPACE_CHECK(ImplStaggered5dVec); - INHERIT_GIMPL_TYPES(Gimpl); - - template using iImplSpinor = iScalar > >; - template using iImplHalfSpinor = iScalar > >; - template using iImplDoubledGaugeField = iVector >, Nds>; - template using iImplGaugeField = iVector >, Nd>; - template using iImplGaugeLink = iScalar > >; - template using iImplPropagator = iScalar > >; - - // Make the doubled gauge field a *scalar* - typedef iImplDoubledGaugeField SiteDoubledGaugeField; // This is a scalar - typedef iImplGaugeField SiteScalarGaugeField; // scalar - typedef iImplGaugeLink SiteScalarGaugeLink; // scalar - typedef iImplPropagator SitePropagator; - - typedef Lattice DoubledGaugeField; - typedef Lattice PropagatorField; - - typedef iImplSpinor SiteSpinor; - typedef iImplHalfSpinor SiteHalfSpinor; - - - typedef Lattice FermionField; - - typedef SimpleCompressor Compressor; - typedef StaggeredImplParams ImplParams; - typedef CartesianStencil StencilImpl; - - ImplParams Params; - - StaggeredVec5dImpl(const ImplParams &p = ImplParams()) : Params(p){}; - - template - inline void loadLinkElement(Simd ®, ref &memory) { - vsplat(reg, memory); - } - - inline void multLink(SiteHalfSpinor &phi, const SiteDoubledGaugeField &U, - const SiteHalfSpinor &chi, int mu) { - SiteGaugeLink UU; - for (int i = 0; i < Dimension; i++) { - for (int j = 0; j < Dimension; j++) { - vsplat(UU()()(i, j), U(mu)()(i, j)); - } - } - mult(&phi(), &UU(), &chi()); - } - inline void multLinkAdd(SiteHalfSpinor &phi, const SiteDoubledGaugeField &U, - const SiteHalfSpinor &chi, int mu) { - SiteGaugeLink UU; - for (int i = 0; i < Dimension; i++) { - for (int j = 0; j < Dimension; j++) { - vsplat(UU()()(i, j), U(mu)()(i, j)); - } - } - mac(&phi(), &UU(), &chi()); - } - - inline void InsertGaugeField(DoubledGaugeField &U_ds,const GaugeLinkField &U,int mu) - { - GridBase *GaugeGrid = U_ds._grid; - parallel_for (int lidx = 0; lidx < GaugeGrid->lSites(); lidx++) { - - SiteScalarGaugeLink ScalarU; - SiteDoubledGaugeField ScalarUds; - - std::vector lcoor; - GaugeGrid->LocalIndexToLocalCoor(lidx, lcoor); - peekLocalSite(ScalarUds, U_ds, lcoor); - - peekLocalSite(ScalarU, U, lcoor); - ScalarUds(mu) = ScalarU(); - - } - } - inline void DoubleStore(GridBase *GaugeGrid, - DoubledGaugeField &UUUds, // for Naik term - DoubledGaugeField &Uds, - const GaugeField &Uthin, - const GaugeField &Ufat) - { - - GridBase * InputGrid = Uthin._grid; - conformable(InputGrid,Ufat._grid); - - GaugeLinkField U(InputGrid); - GaugeLinkField UU(InputGrid); - GaugeLinkField UUU(InputGrid); - GaugeLinkField Udag(InputGrid); - GaugeLinkField UUUdag(InputGrid); - - for (int mu = 0; mu < Nd; mu++) { - - // Staggered Phase. - Lattice > coor(InputGrid); - Lattice > x(InputGrid); LatticeCoordinate(x,0); - Lattice > y(InputGrid); LatticeCoordinate(y,1); - Lattice > z(InputGrid); LatticeCoordinate(z,2); - Lattice > t(InputGrid); LatticeCoordinate(t,3); - - Lattice > lin_z(InputGrid); lin_z=x+y; - Lattice > lin_t(InputGrid); lin_t=x+y+z; - - ComplexField phases(InputGrid); phases=1.0; - - if ( mu == 1 ) phases = where( mod(x ,2)==(Integer)0, phases,-phases); - if ( mu == 2 ) phases = where( mod(lin_z,2)==(Integer)0, phases,-phases); - if ( mu == 3 ) phases = where( mod(lin_t,2)==(Integer)0, phases,-phases); - - // 1 hop based on fat links - U = PeekIndex(Ufat, mu); - Udag = adj( Cshift(U, mu, -1)); - - U = U *phases; - Udag = Udag *phases; - - InsertGaugeField(Uds,U,mu); - InsertGaugeField(Uds,Udag,mu+4); - - // 3 hop based on thin links. Crazy huh ? - U = PeekIndex(Uthin, mu); - UU = Gimpl::CovShiftForward(U,mu,U); - UUU= Gimpl::CovShiftForward(U,mu,UU); - - UUUdag = adj( Cshift(UUU, mu, -3)); - - UUU = UUU *phases; - UUUdag = UUUdag *phases; - - InsertGaugeField(UUUds,UUU,mu); - InsertGaugeField(UUUds,UUUdag,mu+4); - - } - } - - inline void InsertForce4D(GaugeField &mat, FermionField &Btilde, FermionField &A,int mu){ - assert(0); - } - - inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField Ã,int mu){ - assert (0); - } - }; - -typedef WilsonImpl WilsonImplR; // Real.. whichever prec -typedef WilsonImpl WilsonImplF; // Float -typedef WilsonImpl WilsonImplD; // Double - -typedef WilsonImpl WilsonImplRL; // Real.. whichever prec -typedef WilsonImpl WilsonImplFH; // Float -typedef WilsonImpl WilsonImplDF; // Double - -typedef WilsonImpl ZWilsonImplR; // Real.. whichever prec -typedef WilsonImpl ZWilsonImplF; // Float -typedef WilsonImpl ZWilsonImplD; // Double - -typedef WilsonImpl ZWilsonImplRL; // Real.. whichever prec -typedef WilsonImpl ZWilsonImplFH; // Float -typedef WilsonImpl ZWilsonImplDF; // Double - -typedef WilsonImpl WilsonAdjImplR; // Real.. whichever prec -typedef WilsonImpl WilsonAdjImplF; // Float -typedef WilsonImpl WilsonAdjImplD; // Double - -typedef WilsonImpl WilsonTwoIndexSymmetricImplR; // Real.. whichever prec -typedef WilsonImpl WilsonTwoIndexSymmetricImplF; // Float -typedef WilsonImpl WilsonTwoIndexSymmetricImplD; // Double - -typedef WilsonImpl WilsonTwoIndexAntiSymmetricImplR; // Real.. whichever prec -typedef WilsonImpl WilsonTwoIndexAntiSymmetricImplF; // Float -typedef WilsonImpl WilsonTwoIndexAntiSymmetricImplD; // Double - -typedef DomainWallVec5dImpl DomainWallVec5dImplR; // Real.. whichever prec -typedef DomainWallVec5dImpl DomainWallVec5dImplF; // Float -typedef DomainWallVec5dImpl DomainWallVec5dImplD; // Double - -typedef DomainWallVec5dImpl DomainWallVec5dImplRL; // Real.. whichever prec -typedef DomainWallVec5dImpl DomainWallVec5dImplFH; // Float -typedef DomainWallVec5dImpl DomainWallVec5dImplDF; // Double - -typedef DomainWallVec5dImpl ZDomainWallVec5dImplR; // Real.. whichever prec -typedef DomainWallVec5dImpl ZDomainWallVec5dImplF; // Float -typedef DomainWallVec5dImpl ZDomainWallVec5dImplD; // Double - -typedef DomainWallVec5dImpl ZDomainWallVec5dImplRL; // Real.. whichever prec -typedef DomainWallVec5dImpl ZDomainWallVec5dImplFH; // Float -typedef DomainWallVec5dImpl ZDomainWallVec5dImplDF; // Double - -typedef GparityWilsonImpl GparityWilsonImplR; // Real.. whichever prec -typedef GparityWilsonImpl GparityWilsonImplF; // Float -typedef GparityWilsonImpl GparityWilsonImplD; // Double - -typedef GparityWilsonImpl GparityWilsonImplRL; // Real.. whichever prec -typedef GparityWilsonImpl GparityWilsonImplFH; // Float -typedef GparityWilsonImpl GparityWilsonImplDF; // Double - -typedef StaggeredImpl StaggeredImplR; // Real.. whichever prec -typedef StaggeredImpl StaggeredImplF; // Float -typedef StaggeredImpl StaggeredImplD; // Double - -typedef StaggeredVec5dImpl StaggeredVec5dImplR; // Real.. whichever prec -typedef StaggeredVec5dImpl StaggeredVec5dImplF; // Float -typedef StaggeredVec5dImpl StaggeredVec5dImplD; // Double - -}} - -#endif diff --git a/Grid/qcd/action/fermion/FourierAcceleratedPV.h b/Grid/qcd/action/fermion/FourierAcceleratedPV.h index d6196eee..bf23ff75 100644 --- a/Grid/qcd/action/fermion/FourierAcceleratedPV.h +++ b/Grid/qcd/action/fermion/FourierAcceleratedPV.h @@ -28,8 +28,8 @@ Author: Peter Boyle *************************************************************************************/ /* END LEGAL */ #pragma once -namespace Grid { -namespace QCD { + +NAMESPACE_BEGIN(Grid); template void get_real_const_bc(M& m, RealD& _b, RealD& _c) { @@ -63,8 +63,8 @@ class FourierAcceleratedPV { : dwfPV(_dwfPV), Umu(_Umu), cg(_cg), group_in_s(_group_in_s) { assert( dwfPV.FermionGrid()->_fdimensions[0] % (2*group_in_s) == 0); - grid5D = QCD::SpaceTimeGrid::makeFiveDimGrid(2*group_in_s, (GridCartesian*)Umu._grid); - gridRB5D = QCD::SpaceTimeGrid::makeFiveDimRedBlackGrid(2*group_in_s, (GridCartesian*)Umu._grid); + grid5D = SpaceTimeGrid::makeFiveDimGrid(2*group_in_s, (GridCartesian*)Umu.Grid()); + gridRB5D = SpaceTimeGrid::makeFiveDimRedBlackGrid(2*group_in_s, (GridCartesian*)Umu.Grid()); } void rotatePV(const Vi& _src, Vi& dst, bool forward) const { @@ -72,13 +72,13 @@ class FourierAcceleratedPV { GridStopWatch gsw1, gsw2; typedef typename Vi::scalar_type Coeff_t; - int Ls = dst._grid->_fdimensions[0]; + int Ls = dst.Grid()->_fdimensions[0]; - Vi _tmp(dst._grid); + Vi _tmp(dst.Grid()); double phase = M_PI / (double)Ls; Coeff_t bzero(0.0,0.0); - FFT theFFT((GridCartesian*)dst._grid); + FFT theFFT((GridCartesian*)dst.Grid()); if (!forward) { gsw1.Start(); @@ -115,7 +115,7 @@ class FourierAcceleratedPV { std::cout << GridLogMessage << "Fourier-Accelerated Outer Pauli Villars"<_fdimensions[0]; + int Ls = _dst.Grid()->_fdimensions[0]; GridStopWatch gswT; gswT.Start(); @@ -126,12 +126,12 @@ class FourierAcceleratedPV { // U(true) Rightinv TMinv U(false) = Minv - Vi _src_diag(_dst._grid); + Vi _src_diag(_dst.Grid()); Vi _src_diag_slice(dwfPV.GaugeGrid()); Vi _dst_diag_slice(dwfPV.GaugeGrid()); Vi _src_diag_slices(grid5D); Vi _dst_diag_slices(grid5D); - Vi _dst_diag(_dst._grid); + Vi _dst_diag(_dst.Grid()); rotatePV(_src,_src_diag,false); @@ -163,7 +163,7 @@ class FourierAcceleratedPV { for (int sidx=0;sidx + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ + /* END LEGAL */ +#pragma once + +NAMESPACE_BEGIN(Grid); + +template +class GparityWilsonImpl : public ConjugateGaugeImpl > { +public: + + static const int Dimension = Representation::Dimension; + static const bool isFundamental = Representation::isFundamental; + static const int Nhcs = Options::Nhcs; + static const bool LsVectorised=false; + + typedef ConjugateGaugeImpl< GaugeImplTypes > Gimpl; + INHERIT_GIMPL_TYPES(Gimpl); + + typedef typename Options::_Coeff_t Coeff_t; + typedef typename Options::template PrecisionMapper::LowerPrecVector SimdL; + + template using iImplSpinor = iVector, Ns>, Ngp>; + template using iImplPropagator = iVector, Ns>, Ngp>; + template using iImplHalfSpinor = iVector, Nhs>, Ngp>; + template using iImplHalfCommSpinor = iVector, Nhcs>, Ngp>; + template using iImplDoubledGaugeField = iVector >, Nds>, Ngp>; + + typedef iImplSpinor SiteSpinor; + typedef iImplPropagator SitePropagator; + typedef iImplHalfSpinor SiteHalfSpinor; + typedef iImplHalfCommSpinor SiteHalfCommSpinor; + typedef iImplDoubledGaugeField SiteDoubledGaugeField; + + typedef Lattice FermionField; + typedef Lattice PropagatorField; + typedef Lattice DoubledGaugeField; + + typedef GparityWilsonImplParams ImplParams; + typedef WilsonCompressor Compressor; + typedef WilsonStencil StencilImpl; + typedef typename StencilImpl::View_type StencilView; + + ImplParams Params; + + GparityWilsonImpl(const ImplParams &p = ImplParams()) : Params(p){}; + + // provide the multiply by link that is differentiated between Gparity (with + // flavour index) and non-Gparity + template + static accelerator_inline void multLink(_Spinor &phi, + const SiteDoubledGaugeField &U, + const _Spinor &chi, + int mu) + { + assert(0); + } + template + static accelerator_inline void multLink(_Spinor &phi, + const SiteDoubledGaugeField &U, + const _Spinor &chi, + int mu, + StencilEntry *SE, + StencilView &St) + { + int direction = St._directions[mu]; + int distance = St._distances[mu]; + int ptype = St._permute_type[mu]; + int sl = St._simd_layout[direction]; + Coordinate icoor; + +#ifdef __CUDA_ARCH__ + _Spinor tmp; + + const int Nsimd =SiteDoubledGaugeField::Nsimd(); + int s = SIMTlane(Nsimd); + St.iCoorFromIindex(icoor,s); + + int mmu = mu % Nd; + if ( SE->_around_the_world && St.parameters.twists[mmu] ) { + + int permute_lane = (sl==1) + || ((distance== 1)&&(icoor[direction]==1)) + || ((distance==-1)&&(icoor[direction]==0)); + + if ( permute_lane ) { + tmp(0) = chi(1); + tmp(1) = chi(0); + } else { + tmp(0) = chi(0); + tmp(1) = chi(1); + } + + auto UU0=coalescedRead(U(0)(mu)); + auto UU1=coalescedRead(U(1)(mu)); + + mult(&phi(0),&UU0,&tmp(0)); + mult(&phi(1),&UU1,&tmp(1)); + + } else { + + auto UU0=coalescedRead(U(0)(mu)); + auto UU1=coalescedRead(U(1)(mu)); + + mult(&phi(0),&UU0,&chi(0)); + mult(&phi(1),&UU1,&chi(1)); + + } + +#else + typedef _Spinor vobj; + typedef typename SiteHalfSpinor::scalar_object sobj; + typedef typename SiteHalfSpinor::vector_type vector_type; + + vobj vtmp; + sobj stmp; + + const int Nsimd =vector_type::Nsimd(); + + // Fixme X.Y.Z.T hardcode in stencil + int mmu = mu % Nd; + + // assert our assumptions + assert((distance == 1) || (distance == -1)); // nearest neighbour stencil hard code + assert((sl == 1) || (sl == 2)); + + if ( SE->_around_the_world && St.parameters.twists[mmu] ) { + + if ( sl == 2 ) { + + ExtractBuffer vals(Nsimd); + + extract(chi,vals); + for(int s=0;s + static accelerator_inline void loadLinkElement(Simd ®, ref &memory) + { + reg = memory; + } + + inline void DoubleStore(GridBase *GaugeGrid,DoubledGaugeField &Uds,const GaugeField &Umu) + { + conformable(Uds.Grid(),GaugeGrid); + conformable(Umu.Grid(),GaugeGrid); + + GaugeLinkField Utmp (GaugeGrid); + GaugeLinkField U (GaugeGrid); + GaugeLinkField Uconj(GaugeGrid); + + Lattice > coor(GaugeGrid); + + for(int mu=0;mu(Umu,mu); + Uconj = conjugate(U); + + // This phase could come from a simple bc 1,1,-1,1 .. + int neglink = GaugeGrid->GlobalDimensions()[mu]-1; + if ( Params.twists[mu] ) { + Uconj = where(coor==neglink,-Uconj,Uconj); + } + + auto U_v = U.View(); + auto Uds_v = Uds.View(); + auto Uconj_v = Uconj.View(); + auto Utmp_v= Utmp.View(); + thread_foreach(ss,U_v,{ + Uds_v[ss](0)(mu) = U_v[ss](); + Uds_v[ss](1)(mu) = Uconj_v[ss](); + }); + + U = adj(Cshift(U ,mu,-1)); // correct except for spanning the boundary + Uconj = adj(Cshift(Uconj,mu,-1)); + + Utmp = U; + if ( Params.twists[mu] ) { + Utmp = where(coor==0,Uconj,Utmp); + } + + thread_foreach(ss,Utmp_v,{ + Uds_v[ss](0)(mu+4) = Utmp_v[ss](); + }); + + Utmp = Uconj; + if ( Params.twists[mu] ) { + Utmp = where(coor==0,U,Utmp); + } + + thread_foreach(ss,Utmp_v,{ + Uds_v[ss](1)(mu+4) = Utmp_v[ss](); + }); + + } + } + + inline void InsertForce4D(GaugeField &mat, FermionField &Btilde, FermionField &A, int mu) { + + // DhopDir provides U or Uconj depending on coor/flavour. + GaugeLinkField link(mat.Grid()); + // use lorentz for flavour as hack. + auto tmp = TraceIndex(outerProduct(Btilde, A)); + auto link_v = link.View(); + auto tmp_v = tmp.View(); + thread_foreach(ss,tmp_v,{ + link_v[ss]() = tmp_v[ss](0, 0) + conjugate(tmp_v[ss](1, 1)); + }); + PokeIndex(mat, link, mu); + return; + } + + inline void outerProductImpl(PropagatorField &mat, const FermionField &Btilde, const FermionField &A){ + //mat = outerProduct(Btilde, A); + assert(0); + } + + inline void TraceSpinImpl(GaugeLinkField &mat, PropagatorField&P) { + assert(0); + /* + auto tmp = TraceIndex(P); + parallel_for(auto ss = tmp.begin(); ss < tmp.end(); ss++) { + mat[ss]() = tmp[ss](0, 0) + conjugate(tmp[ss](1, 1)); + } + */ + } + + inline void extractLinkField(std::vector &mat, DoubledGaugeField &Uds){ + assert(0); + } + + inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField Ã, int mu) { + + int Ls = Btilde.Grid()->_fdimensions[0]; + + GaugeLinkField tmp(mat.Grid()); + tmp = Zero(); + auto tmp_v = tmp.View(); + auto Atilde_v = Atilde.View(); + auto Btilde_v = Btilde.View(); + thread_for(ss,tmp.Grid()->oSites(),{ + for (int s = 0; s < Ls; s++) { + int sF = s + Ls * ss; + auto ttmp = traceIndex(outerProduct(Btilde_v[sF], Atilde_v[sF])); + tmp_v[ss]() = tmp_v[ss]() + ttmp(0, 0) + conjugate(ttmp(1, 1)); + } + }); + PokeIndex(mat, tmp, mu); + return; + } + +}; + +typedef GparityWilsonImpl GparityWilsonImplR; // Real.. whichever prec +typedef GparityWilsonImpl GparityWilsonImplF; // Float +typedef GparityWilsonImpl GparityWilsonImplD; // Double + +typedef GparityWilsonImpl GparityWilsonImplRL; // Real.. whichever prec +typedef GparityWilsonImpl GparityWilsonImplFH; // Float +typedef GparityWilsonImpl GparityWilsonImplDF; // Double + +NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/ImprovedStaggeredFermion.h b/Grid/qcd/action/fermion/ImprovedStaggeredFermion.h index 3aa78d1b..b4d8d60b 100644 --- a/Grid/qcd/action/fermion/ImprovedStaggeredFermion.h +++ b/Grid/qcd/action/fermion/ImprovedStaggeredFermion.h @@ -25,16 +25,14 @@ with this program; if not, write to the Free Software Foundation, Inc., See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ -/* END LEGAL */ + /* END LEGAL */ #ifndef GRID_QCD_IMPR_STAG_FERMION_H #define GRID_QCD_IMPR_STAG_FERMION_H -namespace Grid { - -namespace QCD { +NAMESPACE_BEGIN(Grid); class ImprovedStaggeredFermionStatic { - public: +public: static const std::vector directions; static const std::vector displacements; static const int npoint = 16; @@ -42,7 +40,7 @@ class ImprovedStaggeredFermionStatic { template class ImprovedStaggeredFermion : public StaggeredKernels, public ImprovedStaggeredFermionStatic { - public: +public: INHERIT_IMPL_TYPES(Impl); typedef StaggeredKernels Kernels; @@ -139,7 +137,7 @@ class ImprovedStaggeredFermion : public StaggeredKernels, public ImprovedS // DoubleStore impl dependent void ImportGauge (const GaugeField &_Uthin ) { assert(0); } - void ImportGauge (const GaugeField &_Uthin ,const GaugeField &_Ufat); + void ImportGauge(const GaugeField &_Uthin, const GaugeField &_Ufat); void ImportGaugeSimple(const GaugeField &_UUU ,const GaugeField &_U); void ImportGaugeSimple(const DoubledGaugeField &_UUU,const DoubledGaugeField &_U); DoubledGaugeField &GetU(void) { return Umu ; } ; @@ -151,7 +149,7 @@ class ImprovedStaggeredFermion : public StaggeredKernels, public ImprovedS /////////////////////////////////////////////////////////////// // protected: - public: +public: // any other parameters of action ??? virtual int isTrivialEE(void) { return 1; }; virtual RealD Mass(void) { return mass; } @@ -188,11 +186,11 @@ class ImprovedStaggeredFermion : public StaggeredKernels, public ImprovedS PropagatorField &q_out, Current curr_type, unsigned int mu); - void SeqConservedCurrent(PropagatorField &q_in, + void SeqConservedCurrent(PropagatorField &q_in, PropagatorField &q_out, - Current curr_type, - unsigned int mu, - unsigned int tmin, + Current curr_type, + unsigned int mu, + unsigned int tmin, unsigned int tmax, ComplexField &lattice_cmplx); }; @@ -200,6 +198,6 @@ class ImprovedStaggeredFermion : public StaggeredKernels, public ImprovedS typedef ImprovedStaggeredFermion ImprovedStaggeredFermionF; typedef ImprovedStaggeredFermion ImprovedStaggeredFermionD; -} -} +NAMESPACE_END(Grid); + #endif diff --git a/Grid/qcd/action/fermion/ImprovedStaggeredFermion5D.h b/Grid/qcd/action/fermion/ImprovedStaggeredFermion5D.h index 0d729e60..b10c0356 100644 --- a/Grid/qcd/action/fermion/ImprovedStaggeredFermion5D.h +++ b/Grid/qcd/action/fermion/ImprovedStaggeredFermion5D.h @@ -1,5 +1,5 @@ - /************************************************************************************* +/************************************************************************************* Grid physics library, www.github.com/paboyle/Grid @@ -25,101 +25,99 @@ Author: AzusaYamaguchi 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ -#ifndef GRID_QCD_IMPROVED_STAGGERED_FERMION_5D_H -#define GRID_QCD_IMPROVED_STAGGERED_FERMION_5D_H +*************************************************************************************/ +/* END LEGAL */ +#pragma once -namespace Grid { -namespace QCD { +NAMESPACE_BEGIN(Grid); - //////////////////////////////////////////////////////////////////////////////// - // This is the 4d red black case appropriate to support - //////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////// +// This is the 4d red black case appropriate to support +//////////////////////////////////////////////////////////////////////////////// - class ImprovedStaggeredFermion5DStatic { - public: - // S-direction is INNERMOST and takes no part in the parity. - static const std::vector directions; - static const std::vector displacements; - const int npoint = 16; - }; +class ImprovedStaggeredFermion5DStatic { +public: + // S-direction is INNERMOST and takes no part in the parity. + static const std::vector directions; + static const std::vector displacements; + const int npoint = 16; +}; - template - class ImprovedStaggeredFermion5D : public StaggeredKernels, public ImprovedStaggeredFermion5DStatic - { - public: - INHERIT_IMPL_TYPES(Impl); - typedef StaggeredKernels Kernels; +template +class ImprovedStaggeredFermion5D : public StaggeredKernels, public ImprovedStaggeredFermion5DStatic +{ +public: + INHERIT_IMPL_TYPES(Impl); + typedef StaggeredKernels Kernels; - FermionField _tmp; - FermionField &tmp(void) { return _tmp; } + FermionField _tmp; + FermionField &tmp(void) { return _tmp; } - //////////////////////////////////////// - // Performance monitoring - //////////////////////////////////////// - void Report(void); - void ZeroCounters(void); - double DhopTotalTime; - double DhopCalls; - double DhopCommTime; - double DhopComputeTime; + //////////////////////////////////////// + // Performance monitoring + //////////////////////////////////////// + void Report(void); + void ZeroCounters(void); + double DhopTotalTime; + double DhopCalls; + double DhopCommTime; + double DhopComputeTime; double DhopComputeTime2; double DhopFaceTime; - /////////////////////////////////////////////////////////////// - // Implement the abstract base - /////////////////////////////////////////////////////////////// - GridBase *GaugeGrid(void) { return _FourDimGrid ;} - GridBase *GaugeRedBlackGrid(void) { return _FourDimRedBlackGrid ;} - GridBase *FermionGrid(void) { return _FiveDimGrid;} - GridBase *FermionRedBlackGrid(void) { return _FiveDimRedBlackGrid;} + /////////////////////////////////////////////////////////////// + // Implement the abstract base + /////////////////////////////////////////////////////////////// + GridBase *GaugeGrid(void) { return _FourDimGrid ;} + GridBase *GaugeRedBlackGrid(void) { return _FourDimRedBlackGrid ;} + GridBase *FermionGrid(void) { return _FiveDimGrid;} + GridBase *FermionRedBlackGrid(void) { return _FiveDimRedBlackGrid;} - // full checkerboard operations; leave unimplemented as abstract for now - RealD M (const FermionField &in, FermionField &out); - RealD Mdag (const FermionField &in, FermionField &out); + // full checkerboard operations; leave unimplemented as abstract for now + RealD M (const FermionField &in, FermionField &out); + RealD Mdag (const FermionField &in, FermionField &out); - // half checkerboard operations - void Meooe (const FermionField &in, FermionField &out); - void Mooee (const FermionField &in, FermionField &out); - void MooeeInv (const FermionField &in, FermionField &out); + // half checkerboard operations + void Meooe (const FermionField &in, FermionField &out); + void Mooee (const FermionField &in, FermionField &out); + void MooeeInv (const FermionField &in, FermionField &out); - void MeooeDag (const FermionField &in, FermionField &out); - void MooeeDag (const FermionField &in, FermionField &out); - void MooeeInvDag (const FermionField &in, FermionField &out); + void MeooeDag (const FermionField &in, FermionField &out); + void MooeeDag (const FermionField &in, FermionField &out); + void MooeeInvDag (const FermionField &in, FermionField &out); - void Mdir (const FermionField &in, FermionField &out,int dir,int disp); - void DhopDir(const FermionField &in, FermionField &out,int dir,int disp); + void Mdir (const FermionField &in, FermionField &out,int dir,int disp); + void DhopDir(const FermionField &in, FermionField &out,int dir,int disp); - // These can be overridden by fancy 5d chiral action - void DhopDeriv (GaugeField &mat,const FermionField &U,const FermionField &V,int dag); - void DhopDerivEO(GaugeField &mat,const FermionField &U,const FermionField &V,int dag); - void DhopDerivOE(GaugeField &mat,const FermionField &U,const FermionField &V,int dag); + // These can be overridden by fancy 5d chiral action + void DhopDeriv (GaugeField &mat,const FermionField &U,const FermionField &V,int dag); + void DhopDerivEO(GaugeField &mat,const FermionField &U,const FermionField &V,int dag); + void DhopDerivOE(GaugeField &mat,const FermionField &U,const FermionField &V,int dag); - // Implement hopping term non-hermitian hopping term; half cb or both - void Dhop (const FermionField &in, FermionField &out,int dag); - void DhopOE(const FermionField &in, FermionField &out,int dag); - void DhopEO(const FermionField &in, FermionField &out,int dag); + // Implement hopping term non-hermitian hopping term; half cb or both + void Dhop (const FermionField &in, FermionField &out,int dag); + void DhopOE(const FermionField &in, FermionField &out,int dag); + void DhopEO(const FermionField &in, FermionField &out,int dag); - /////////////////////////////////////////////////////////////// - // New methods added - /////////////////////////////////////////////////////////////// - void DerivInternal(StencilImpl & st, - DoubledGaugeField & U, - DoubledGaugeField & UUU, - GaugeField &mat, - const FermionField &A, - const FermionField &B, - int dag); + /////////////////////////////////////////////////////////////// + // New methods added + /////////////////////////////////////////////////////////////// + void DerivInternal(StencilImpl & st, + DoubledGaugeField & U, + DoubledGaugeField & UUU, + GaugeField &mat, + const FermionField &A, + const FermionField &B, + int dag); - void DhopInternal(StencilImpl & st, - LebesgueOrder &lo, - DoubledGaugeField &U, - DoubledGaugeField &UUU, - const FermionField &in, - FermionField &out, - int dag); + void DhopInternal(StencilImpl & st, + LebesgueOrder &lo, + DoubledGaugeField &U, + DoubledGaugeField &UUU, + const FermionField &in, + FermionField &out, + int dag); void DhopInternalOverlappedComms(StencilImpl & st, LebesgueOrder &lo, @@ -138,17 +136,17 @@ namespace QCD { int dag); - // Constructors + // Constructors //////////////////////////////////////////////////////////////////////////////////////////////// // Grid internal interface -- Thin link and fat link, with coefficients //////////////////////////////////////////////////////////////////////////////////////////////// - ImprovedStaggeredFermion5D(GaugeField &_Uthin, - GaugeField &_Ufat, - GridCartesian &FiveDimGrid, - GridRedBlackCartesian &FiveDimRedBlackGrid, - GridCartesian &FourDimGrid, - GridRedBlackCartesian &FourDimRedBlackGrid, - double _mass, + ImprovedStaggeredFermion5D(GaugeField &_Uthin, + GaugeField &_Ufat, + GridCartesian &FiveDimGrid, + GridRedBlackCartesian &FiveDimRedBlackGrid, + GridCartesian &FourDimGrid, + GridRedBlackCartesian &FourDimRedBlackGrid, + double _mass, RealD _c1, RealD _c2,RealD _u0, const ImplParams &p= ImplParams()); //////////////////////////////////////////////////////////////////////////////////////////////// @@ -160,11 +158,11 @@ namespace QCD { GridRedBlackCartesian &FourDimRedBlackGrid, double _mass, RealD _c1=1.0, RealD _c2=1.0,RealD _u0=1.0, - const ImplParams &p= ImplParams()); - + const ImplParams &p= ImplParams()); + // DoubleStore gauge field in operator void ImportGauge (const GaugeField &_Uthin ) { assert(0); } - void ImportGauge (const GaugeField &_Uthin ,const GaugeField &_Ufat); + void ImportGauge(const GaugeField &_Uthin,const GaugeField &_Ufat); void ImportGaugeSimple(const GaugeField &_UUU,const GaugeField &_U); void ImportGaugeSimple(const DoubledGaugeField &_UUU,const DoubledGaugeField &_U); // Give a reference; can be used to do an assignment or copy back out after import @@ -173,62 +171,61 @@ namespace QCD { DoubledGaugeField &GetUUU(void) { return UUUmu; }; void CopyGaugeCheckerboards(void); - /////////////////////////////////////////////////////////////// - // Data members require to support the functionality - /////////////////////////////////////////////////////////////// - public: - + /////////////////////////////////////////////////////////////// + // Data members require to support the functionality + /////////////////////////////////////////////////////////////// +public: + virtual int isTrivialEE(void) { return 1; }; virtual RealD Mass(void) { return mass; } - GridBase *_FourDimGrid; - GridBase *_FourDimRedBlackGrid; - GridBase *_FiveDimGrid; - GridBase *_FiveDimRedBlackGrid; + GridBase *_FourDimGrid; + GridBase *_FourDimRedBlackGrid; + GridBase *_FiveDimGrid; + GridBase *_FiveDimRedBlackGrid; - RealD mass; - RealD c1; - RealD c2; - RealD u0; - int Ls; + RealD mass; + RealD c1; + RealD c2; + RealD u0; + int Ls; - //Defines the stencils for even and odd - StencilImpl Stencil; - StencilImpl StencilEven; - StencilImpl StencilOdd; + //Defines the stencils for even and odd + StencilImpl Stencil; + StencilImpl StencilEven; + StencilImpl StencilOdd; - // Copy of the gauge field , with even and odd subsets - DoubledGaugeField Umu; - DoubledGaugeField UmuEven; - DoubledGaugeField UmuOdd; + // Copy of the gauge field , with even and odd subsets + DoubledGaugeField Umu; + DoubledGaugeField UmuEven; + DoubledGaugeField UmuOdd; - DoubledGaugeField UUUmu; - DoubledGaugeField UUUmuEven; - DoubledGaugeField UUUmuOdd; + DoubledGaugeField UUUmu; + DoubledGaugeField UUUmuEven; + DoubledGaugeField UUUmuOdd; - LebesgueOrder Lebesgue; - LebesgueOrder LebesgueEvenOdd; + LebesgueOrder Lebesgue; + LebesgueOrder LebesgueEvenOdd; - // Comms buffer - std::vector > comm_buf; + // Comms buffer + std::vector > comm_buf; - /////////////////////////////////////////////////////////////// - // Conserved current utilities - /////////////////////////////////////////////////////////////// - void ContractConservedCurrent(PropagatorField &q_in_1, - PropagatorField &q_in_2, - PropagatorField &q_out, - Current curr_type, - unsigned int mu); - void SeqConservedCurrent(PropagatorField &q_in, - PropagatorField &q_out, - Current curr_type, - unsigned int mu, - unsigned int tmin, + /////////////////////////////////////////////////////////////// + // Conserved current utilities + /////////////////////////////////////////////////////////////// + void ContractConservedCurrent(PropagatorField &q_in_1, + PropagatorField &q_in_2, + PropagatorField &q_out, + Current curr_type, + unsigned int mu); + void SeqConservedCurrent(PropagatorField &q_in, + PropagatorField &q_out, + Current curr_type, + unsigned int mu, + unsigned int tmin, unsigned int tmax, ComplexField &lattice_cmplx); - }; +}; -}} +NAMESPACE_END(Grid); -#endif diff --git a/Grid/qcd/action/fermion/MADWF.h b/Grid/qcd/action/fermion/MADWF.h index 064b13a8..f7f0ee1b 100644 --- a/Grid/qcd/action/fermion/MADWF.h +++ b/Grid/qcd/action/fermion/MADWF.h @@ -27,8 +27,7 @@ Author: Peter Boyle /* END LEGAL */ #pragma once -namespace Grid { -namespace QCD { +NAMESPACE_BEGIN(Grid); template X=0> inline void convert(const Fieldi &from,Fieldo &to) @@ -109,7 +108,7 @@ class MADWF std::cout << GridLogMessage << " b " < -Author: Peter Boyle -Author: Peter Boyle -Author: paboyle -Author: David Murphy - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -See the full license in the file "LICENSE" in the top level distribution directory -*************************************************************************************/ -/* END LEGAL */ - -#include -#include -#include - -namespace Grid { -namespace QCD { - - template - MobiusEOFAFermion::MobiusEOFAFermion( - GaugeField &_Umu, - GridCartesian &FiveDimGrid, - GridRedBlackCartesian &FiveDimRedBlackGrid, - GridCartesian &FourDimGrid, - GridRedBlackCartesian &FourDimRedBlackGrid, - RealD _mq1, RealD _mq2, RealD _mq3, - RealD _shift, int _pm, RealD _M5, - RealD _b, RealD _c, const ImplParams &p) : - AbstractEOFAFermion(_Umu, FiveDimGrid, FiveDimRedBlackGrid, - FourDimGrid, FourDimRedBlackGrid, _mq1, _mq2, _mq3, - _shift, _pm, _M5, _b, _c, p) - { - int Ls = this->Ls; - - RealD eps = 1.0; - Approx::zolotarev_data *zdata = Approx::higham(eps, this->Ls); - assert(zdata->n == this->Ls); - - std::cout << GridLogMessage << "MobiusEOFAFermion (b=" << _b << - ",c=" << _c << ") with Ls=" << Ls << std::endl; - this->SetCoefficientsTanh(zdata, _b, _c); - std::cout << GridLogMessage << "EOFA parameters: (mq1=" << _mq1 << - ",mq2=" << _mq2 << ",mq3=" << _mq3 << ",shift=" << _shift << - ",pm=" << _pm << ")" << std::endl; - - Approx::zolotarev_free(zdata); - - if(_shift != 0.0){ - SetCoefficientsPrecondShiftOps(); - } else { - Mooee_shift.resize(Ls, 0.0); - MooeeInv_shift_lc.resize(Ls, 0.0); - MooeeInv_shift_norm.resize(Ls, 0.0); - MooeeInvDag_shift_lc.resize(Ls, 0.0); - MooeeInvDag_shift_norm.resize(Ls, 0.0); - } - } - - /**************************************************************** - * Additional EOFA operators only called outside the inverter. - * Since speed is not essential, simple axpby-style - * implementations should be fine. - ***************************************************************/ - template - void MobiusEOFAFermion::Omega(const FermionField& psi, FermionField& Din, int sign, int dag) - { - int Ls = this->Ls; - RealD alpha = this->alpha; - - Din = zero; - if((sign == 1) && (dag == 0)) { // \Omega_{+} - for(int s=0; s - void MobiusEOFAFermion::Dtilde(const FermionField& psi, FermionField& chi) - { - int Ls = this->Ls; - RealD b = 0.5 * ( 1.0 + this->alpha ); - RealD c = 0.5 * ( 1.0 - this->alpha ); - RealD mq1 = this->mq1; - - for(int s=0; s - void MobiusEOFAFermion::DtildeInv(const FermionField& psi, FermionField& chi) - { - int Ls = this->Ls; - RealD m = this->mq1; - RealD c = 0.5 * this->alpha; - RealD d = 0.5; - - RealD DtInv_p(0.0), DtInv_m(0.0); - RealD N = std::pow(c+d,Ls) + m*std::pow(c-d,Ls); - FermionField tmp(this->FermionGrid()); - - for(int s=0; s sp) ? 0.0 : std::pow(-1.0,sp-s) * std::pow(c-d,sp-s) / std::pow(c+d,sp-s+1); - - if(sp == 0){ - axpby_ssp_pplus (tmp, 0.0, tmp, DtInv_p, psi, s, sp); - axpby_ssp_pminus(tmp, 0.0, tmp, DtInv_m, psi, s, sp); - } else { - axpby_ssp_pplus (tmp, 1.0, tmp, DtInv_p, psi, s, sp); - axpby_ssp_pminus(tmp, 1.0, tmp, DtInv_m, psi, s, sp); - } - - }} - } - - /*****************************************************************************************************/ - - template - RealD MobiusEOFAFermion::M(const FermionField& psi, FermionField& chi) - { - int Ls = this->Ls; - - FermionField Din(psi._grid); - - this->Meooe5D(psi, Din); - this->DW(Din, chi, DaggerNo); - axpby(chi, 1.0, 1.0, chi, psi); - this->M5D(psi, chi); - return(norm2(chi)); - } - - template - RealD MobiusEOFAFermion::Mdag(const FermionField& psi, FermionField& chi) - { - int Ls = this->Ls; - - FermionField Din(psi._grid); - - this->DW(psi, Din, DaggerYes); - this->MeooeDag5D(Din, chi); - this->M5Ddag(psi, chi); - axpby(chi, 1.0, 1.0, chi, psi); - return(norm2(chi)); - } - - /******************************************************************** - * Performance critical fermion operators called inside the inverter - ********************************************************************/ - - template - void MobiusEOFAFermion::M5D(const FermionField& psi, FermionField& chi) - { - int Ls = this->Ls; - - std::vector diag(Ls,1.0); - std::vector upper(Ls,-1.0); upper[Ls-1] = this->mq1; - std::vector lower(Ls,-1.0); lower[0] = this->mq1; - - // no shift term - if(this->shift == 0.0){ this->M5D(psi, chi, chi, lower, diag, upper); } - - // fused M + shift operation - else{ this->M5D_shift(psi, chi, chi, lower, diag, upper, Mooee_shift); } - } - - template - void MobiusEOFAFermion::M5Ddag(const FermionField& psi, FermionField& chi) - { - int Ls = this->Ls; - - std::vector diag(Ls,1.0); - std::vector upper(Ls,-1.0); upper[Ls-1] = this->mq1; - std::vector lower(Ls,-1.0); lower[0] = this->mq1; - - // no shift term - if(this->shift == 0.0){ this->M5Ddag(psi, chi, chi, lower, diag, upper); } - - // fused M + shift operation - else{ this->M5Ddag_shift(psi, chi, chi, lower, diag, upper, Mooee_shift); } - } - - // half checkerboard operations - template - void MobiusEOFAFermion::Mooee(const FermionField& psi, FermionField& chi) - { - int Ls = this->Ls; - - // coefficients of Mooee - std::vector diag = this->bee; - std::vector upper(Ls); - std::vector lower(Ls); - for(int s=0; scee[s]; - lower[s] = -this->cee[s]; - } - upper[Ls-1] *= -this->mq1; - lower[0] *= -this->mq1; - - // no shift term - if(this->shift == 0.0){ this->M5D(psi, psi, chi, lower, diag, upper); } - - // fused M + shift operation - else { this->M5D_shift(psi, psi, chi, lower, diag, upper, Mooee_shift); } - } - - template - void MobiusEOFAFermion::MooeeDag(const FermionField& psi, FermionField& chi) - { - int Ls = this->Ls; - - // coefficients of MooeeDag - std::vector diag = this->bee; - std::vector upper(Ls); - std::vector lower(Ls); - for(int s=0; scee[s+1]; - lower[s] = this->mq1*this->cee[Ls-1]; - } else if(s==(Ls-1)) { - upper[s] = this->mq1*this->cee[0]; - lower[s] = -this->cee[s-1]; - } else { - upper[s] = -this->cee[s+1]; - lower[s] = -this->cee[s-1]; - } - } - - // no shift term - if(this->shift == 0.0){ this->M5Ddag(psi, psi, chi, lower, diag, upper); } - - // fused M + shift operation - else{ this->M5Ddag_shift(psi, psi, chi, lower, diag, upper, Mooee_shift); } - } - - /****************************************************************************************/ - - // Computes coefficients for applying Cayley preconditioned shift operators - // (Mooee + \Delta) --> Mooee_shift - // (Mooee + \Delta)^{-1} --> MooeeInv_shift_lc, MooeeInv_shift_norm - // (Mooee + \Delta)^{-dag} --> MooeeInvDag_shift_lc, MooeeInvDag_shift_norm - // For the latter two cases, the operation takes the form - // [ (Mooee + \Delta)^{-1} \psi ]_{i} = Mooee_{ij} \psi_{j} + - // ( MooeeInv_shift_norm )_{i} ( \sum_{j} [ MooeeInv_shift_lc ]_{j} P_{pm} \psi_{j} ) - template - void MobiusEOFAFermion::SetCoefficientsPrecondShiftOps() - { - int Ls = this->Ls; - int pm = this->pm; - RealD alpha = this->alpha; - RealD k = this->k; - RealD mq1 = this->mq1; - RealD shift = this->shift; - - // Initialize - Mooee_shift.resize(Ls); - MooeeInv_shift_lc.resize(Ls); - MooeeInv_shift_norm.resize(Ls); - MooeeInvDag_shift_lc.resize(Ls); - MooeeInvDag_shift_norm.resize(Ls); - - // Construct Mooee_shift - int idx(0); - Coeff_t N = ( (pm == 1) ? 1.0 : -1.0 ) * (2.0*shift*k) * - ( std::pow(alpha+1.0,Ls) + mq1*std::pow(alpha-1.0,Ls) ); - for(int s=0; s d = Mooee_shift; - std::vector u(Ls,0.0); - std::vector y(Ls,0.0); - std::vector q(Ls,0.0); - if(pm == 1){ u[0] = 1.0; } - else{ u[Ls-1] = 1.0; } - - // Tridiagonal matrix algorithm + Sherman-Morrison formula - // - // We solve - // ( Mooee' + u \otimes v ) MooeeInvDag_shift_lc = Mooee_shift - // where Mooee' is the tridiagonal part of Mooee_{+}, and - // u = (1,0,...,0) and v = (0,...,0,mq1*cee[0]) are chosen - // so that the outer-product u \otimes v gives the (0,Ls-1) - // entry of Mooee_{+}. - // - // We do this as two solves: Mooee'*y = d and Mooee'*q = u, - // and then construct the solution to the original system - // MooeeInvDag_shift_lc = y - / ( 1 + ) q - if(pm == 1){ - for(int s=1; scee[s] / this->bee[s-1]; - d[s] -= m*d[s-1]; - u[s] -= m*u[s-1]; - } - } - y[Ls-1] = d[Ls-1] / this->bee[Ls-1]; - q[Ls-1] = u[Ls-1] / this->bee[Ls-1]; - for(int s=Ls-2; s>=0; --s){ - if(pm == 1){ - y[s] = d[s] / this->bee[s]; - q[s] = u[s] / this->bee[s]; - } else { - y[s] = ( d[s] + this->cee[s]*y[s+1] ) / this->bee[s]; - q[s] = ( u[s] + this->cee[s]*q[s+1] ) / this->bee[s]; - } - } - - // Construct MooeeInvDag_shift_lc - for(int s=0; scee[0]*y[Ls-1] / - (1.0+mq1*this->cee[0]*q[Ls-1]) * q[s]; - } else { - MooeeInvDag_shift_lc[s] = y[s] - mq1*this->cee[Ls-1]*y[0] / - (1.0+mq1*this->cee[Ls-1]*q[0]) * q[s]; - } - } - - // Compute remaining coefficients - N = (pm == 1) ? (1.0 + MooeeInvDag_shift_lc[Ls-1]) : (1.0 + MooeeInvDag_shift_lc[0]); - for(int s=0; sbee[s],s) * std::pow(this->cee[s],Ls-1-s); } - else{ MooeeInv_shift_lc[s] = std::pow(this->bee[s],Ls-1-s) * std::pow(this->cee[s],s); } - - // MooeeInv_shift_norm - MooeeInv_shift_norm[s] = -MooeeInvDag_shift_lc[s] / - ( std::pow(this->bee[s],Ls) + mq1*std::pow(this->cee[s],Ls) ) / N; - - // MooeeInvDag_shift_norm - if(pm == 1){ MooeeInvDag_shift_norm[s] = -std::pow(this->bee[s],s) * std::pow(this->cee[s],Ls-1-s) / - ( std::pow(this->bee[s],Ls) + mq1*std::pow(this->cee[s],Ls) ) / N; } - else{ MooeeInvDag_shift_norm[s] = -std::pow(this->bee[s],Ls-1-s) * std::pow(this->cee[s],s) / - ( std::pow(this->bee[s],Ls) + mq1*std::pow(this->cee[s],Ls) ) / N; } - } - } - } - - // Recompute coefficients for a different value of shift constant - template - void MobiusEOFAFermion::RefreshShiftCoefficients(RealD new_shift) - { - this->shift = new_shift; - if(new_shift != 0.0){ - SetCoefficientsPrecondShiftOps(); - } else { - int Ls = this->Ls; - Mooee_shift.resize(Ls,0.0); - MooeeInv_shift_lc.resize(Ls,0.0); - MooeeInv_shift_norm.resize(Ls,0.0); - MooeeInvDag_shift_lc.resize(Ls,0.0); - MooeeInvDag_shift_norm.resize(Ls,0.0); - } - } - - template - void MobiusEOFAFermion::MooeeInternalCompute(int dag, int inv, - Vector >& Matp, Vector >& Matm) - { - int Ls = this->Ls; - - GridBase* grid = this->FermionRedBlackGrid(); - int LLs = grid->_rdimensions[0]; - - if(LLs == Ls){ return; } // Not vectorised in 5th direction - - Eigen::MatrixXcd Pplus = Eigen::MatrixXcd::Zero(Ls,Ls); - Eigen::MatrixXcd Pminus = Eigen::MatrixXcd::Zero(Ls,Ls); - - for(int s=0; sbee[s]; - Pminus(s,s) = this->bee[s]; - } - - for(int s=0; scee[s]; - Pplus(s+1,s) = -this->cee[s+1]; - } - - Pplus (0,Ls-1) = this->mq1*this->cee[0]; - Pminus(Ls-1,0) = this->mq1*this->cee[Ls-1]; - - if(this->shift != 0.0){ - RealD c = 0.5 * this->alpha; - RealD d = 0.5; - RealD N = this->shift * this->k * ( std::pow(c+d,Ls) + this->mq1*std::pow(c-d,Ls) ); - if(this->pm == 1) { - for(int s=0; s::iscomplex()) { - sp[l] = PplusMat (l*istride+s1*ostride,s2); - sm[l] = PminusMat(l*istride+s1*ostride,s2); - } else { - // if real - scalar_type tmp; - tmp = PplusMat (l*istride+s1*ostride,s2); - sp[l] = scalar_type(tmp.real(),tmp.real()); - tmp = PminusMat(l*istride+s1*ostride,s2); - sm[l] = scalar_type(tmp.real(),tmp.real()); - } - } - Matp[LLs*s2+s1] = Vp; - Matm[LLs*s2+s1] = Vm; - }} - } - - FermOpTemplateInstantiate(MobiusEOFAFermion); - GparityFermOpTemplateInstantiate(MobiusEOFAFermion); - -}} diff --git a/Grid/qcd/action/fermion/MobiusEOFAFermion.h b/Grid/qcd/action/fermion/MobiusEOFAFermion.h index 519b49e7..6b214233 100644 --- a/Grid/qcd/action/fermion/MobiusEOFAFermion.h +++ b/Grid/qcd/action/fermion/MobiusEOFAFermion.h @@ -26,108 +26,79 @@ with this program; if not, write to the Free Software Foundation, Inc., See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ -/* END LEGAL */ + /* END LEGAL */ #ifndef GRID_QCD_MOBIUS_EOFA_FERMION_H #define GRID_QCD_MOBIUS_EOFA_FERMION_H #include -namespace Grid { -namespace QCD { +NAMESPACE_BEGIN(Grid); - template - class MobiusEOFAFermion : public AbstractEOFAFermion - { - public: - INHERIT_IMPL_TYPES(Impl); +template +class MobiusEOFAFermion : public AbstractEOFAFermion +{ +public: + INHERIT_IMPL_TYPES(Impl); - public: - // Shift operator coefficients for red-black preconditioned Mobius EOFA - std::vector Mooee_shift; - std::vector MooeeInv_shift_lc; - std::vector MooeeInv_shift_norm; - std::vector MooeeInvDag_shift_lc; - std::vector MooeeInvDag_shift_norm; +public: + // Shift operator coefficients for red-black preconditioned Mobius EOFA + Vector Mooee_shift; + Vector MooeeInv_shift_lc; + Vector MooeeInv_shift_norm; + Vector MooeeInvDag_shift_lc; + Vector MooeeInvDag_shift_norm; - virtual void Instantiatable(void) {}; + virtual void Instantiatable(void) {}; - // EOFA-specific operations - virtual void Omega (const FermionField& in, FermionField& out, int sign, int dag); - virtual void Dtilde (const FermionField& in, FermionField& out); - virtual void DtildeInv (const FermionField& in, FermionField& out); + // EOFA-specific operations + virtual void Omega (const FermionField& in, FermionField& out, int sign, int dag); + virtual void Dtilde (const FermionField& in, FermionField& out); + virtual void DtildeInv (const FermionField& in, FermionField& out); - // override multiply - virtual RealD M (const FermionField& in, FermionField& out); - virtual RealD Mdag (const FermionField& in, FermionField& out); + // override multiply + virtual RealD M (const FermionField& in, FermionField& out); + virtual RealD Mdag (const FermionField& in, FermionField& out); - // half checkerboard operations - virtual void Mooee (const FermionField& in, FermionField& out); - virtual void MooeeDag (const FermionField& in, FermionField& out); - virtual void MooeeInv (const FermionField& in, FermionField& out); - virtual void MooeeInv_shift (const FermionField& in, FermionField& out); - virtual void MooeeInvDag (const FermionField& in, FermionField& out); - virtual void MooeeInvDag_shift(const FermionField& in, FermionField& out); + // half checkerboard operations + virtual void Mooee (const FermionField& in, FermionField& out); + virtual void MooeeDag (const FermionField& in, FermionField& out); + virtual void MooeeInv (const FermionField& in, FermionField& out); + virtual void MooeeInv_shift (const FermionField& in, FermionField& out); + virtual void MooeeInvDag (const FermionField& in, FermionField& out); + virtual void MooeeInvDag_shift(const FermionField& in, FermionField& out); - virtual void M5D (const FermionField& psi, FermionField& chi); - virtual void M5Ddag (const FermionField& psi, FermionField& chi); + virtual void M5D (const FermionField& psi, FermionField& chi); + virtual void M5Ddag (const FermionField& psi, FermionField& chi); - ///////////////////////////////////////////////////// - // Instantiate different versions depending on Impl - ///////////////////////////////////////////////////// - void M5D(const FermionField& psi, const FermionField& phi, FermionField& chi, - std::vector& lower, std::vector& diag, std::vector& upper); + ///////////////////////////////////////////////////// + // Instantiate different versions depending on Impl + ///////////////////////////////////////////////////// + void M5D(const FermionField& psi, const FermionField& phi, FermionField& chi, + Vector& lower, Vector& diag, Vector& upper); - void M5D_shift(const FermionField& psi, const FermionField& phi, FermionField& chi, - std::vector& lower, std::vector& diag, std::vector& upper, - std::vector& shift_coeffs); + void M5D_shift(const FermionField& psi, const FermionField& phi, FermionField& chi, + Vector& lower, Vector& diag, Vector& upper, + Vector& shift_coeffs); - void M5Ddag(const FermionField& psi, const FermionField& phi, FermionField& chi, - std::vector& lower, std::vector& diag, std::vector& upper); + void M5Ddag(const FermionField& psi, const FermionField& phi, FermionField& chi, + Vector& lower, Vector& diag, Vector& upper); - void M5Ddag_shift(const FermionField& psi, const FermionField& phi, FermionField& chi, - std::vector& lower, std::vector& diag, std::vector& upper, - std::vector& shift_coeffs); + void M5Ddag_shift(const FermionField& psi, const FermionField& phi, FermionField& chi, + Vector& lower, Vector& diag, Vector& upper, + Vector& shift_coeffs); - void MooeeInternal(const FermionField& in, FermionField& out, int dag, int inv); + virtual void RefreshShiftCoefficients(RealD new_shift); - void MooeeInternalCompute(int dag, int inv, Vector>& Matp, Vector>& Matm); + // Constructors + MobiusEOFAFermion(GaugeField& _Umu, GridCartesian& FiveDimGrid, GridRedBlackCartesian& FiveDimRedBlackGrid, + GridCartesian& FourDimGrid, GridRedBlackCartesian& FourDimRedBlackGrid, + RealD _mq1, RealD _mq2, RealD _mq3, RealD _shift, int pm, + RealD _M5, RealD _b, RealD _c, const ImplParams& p=ImplParams()); - void MooeeInternalAsm(const FermionField& in, FermionField& out, int LLs, int site, - Vector>& Matp, Vector>& Matm); +protected: + void SetCoefficientsPrecondShiftOps(void); +}; - void MooeeInternalZAsm(const FermionField& in, FermionField& out, int LLs, int site, - Vector>& Matp, Vector>& Matm); - - virtual void RefreshShiftCoefficients(RealD new_shift); - - // Constructors - MobiusEOFAFermion(GaugeField& _Umu, GridCartesian& FiveDimGrid, GridRedBlackCartesian& FiveDimRedBlackGrid, - GridCartesian& FourDimGrid, GridRedBlackCartesian& FourDimRedBlackGrid, - RealD _mq1, RealD _mq2, RealD _mq3, RealD _shift, int pm, - RealD _M5, RealD _b, RealD _c, const ImplParams& p=ImplParams()); - - protected: - void SetCoefficientsPrecondShiftOps(void); - }; -}} - -#define INSTANTIATE_DPERP_MOBIUS_EOFA(A)\ -template void MobiusEOFAFermion::M5D(const FermionField& psi, const FermionField& phi, FermionField& chi, \ - std::vector& lower, std::vector& diag, std::vector& upper); \ -template void MobiusEOFAFermion::M5D_shift(const FermionField& psi, const FermionField& phi, FermionField& chi, \ - std::vector& lower, std::vector& diag, std::vector& upper, std::vector& shift_coeffs); \ -template void MobiusEOFAFermion::M5Ddag(const FermionField& psi, const FermionField& phi, FermionField& chi, \ - std::vector& lower, std::vector& diag, std::vector& upper); \ -template void MobiusEOFAFermion::M5Ddag_shift(const FermionField& psi, const FermionField& phi, FermionField& chi, \ - std::vector& lower, std::vector& diag, std::vector& upper, std::vector& shift_coeffs); \ -template void MobiusEOFAFermion::MooeeInv(const FermionField& psi, FermionField& chi); \ -template void MobiusEOFAFermion::MooeeInv_shift(const FermionField& psi, FermionField& chi); \ -template void MobiusEOFAFermion::MooeeInvDag(const FermionField& psi, FermionField& chi); \ -template void MobiusEOFAFermion::MooeeInvDag_shift(const FermionField& psi, FermionField& chi); - -#undef MOBIUS_EOFA_DPERP_DENSE -#define MOBIUS_EOFA_DPERP_CACHE -#undef MOBIUS_EOFA_DPERP_LINALG -#define MOBIUS_EOFA_DPERP_VEC +NAMESPACE_END(Grid); #endif diff --git a/Grid/qcd/action/fermion/MobiusEOFAFermioncache.cc b/Grid/qcd/action/fermion/MobiusEOFAFermioncache.cc deleted file mode 100644 index 420f6390..00000000 --- a/Grid/qcd/action/fermion/MobiusEOFAFermioncache.cc +++ /dev/null @@ -1,429 +0,0 @@ -/************************************************************************************* - -Grid physics library, www.github.com/paboyle/Grid - -Source file: ./lib/qcd/action/fermion/MobiusEOFAFermioncache.cc - -Copyright (C) 2017 - -Author: Peter Boyle -Author: Peter Boyle -Author: Peter Boyle -Author: paboyle -Author: David Murphy - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -See the full license in the file "LICENSE" in the top level distribution directory -*************************************************************************************/ -/* END LEGAL */ - -#include -#include - -namespace Grid { -namespace QCD { - - // FIXME -- make a version of these routines with site loop outermost for cache reuse. - - template - void MobiusEOFAFermion::M5D(const FermionField &psi, const FermionField &phi, FermionField &chi, - std::vector &lower, std::vector &diag, std::vector &upper) - { - int Ls = this->Ls; - GridBase *grid = psi._grid; - - assert(phi.checkerboard == psi.checkerboard); - chi.checkerboard = psi.checkerboard; - - // Flops = 6.0*(Nc*Ns) *Ls*vol - this->M5Dcalls++; - this->M5Dtime -= usecond(); - - parallel_for(int ss=0; ssoSites(); ss+=Ls){ - for(int s=0; sM5Dtime += usecond(); - } - - template - void MobiusEOFAFermion::M5D_shift(const FermionField &psi, const FermionField &phi, FermionField &chi, - std::vector &lower, std::vector &diag, std::vector &upper, - std::vector &shift_coeffs) - { - int Ls = this->Ls; - int shift_s = (this->pm == 1) ? (Ls-1) : 0; // s-component modified by shift operator - GridBase *grid = psi._grid; - - assert(phi.checkerboard == psi.checkerboard); - chi.checkerboard = psi.checkerboard; - - // Flops = 6.0*(Nc*Ns) *Ls*vol - this->M5Dcalls++; - this->M5Dtime -= usecond(); - - parallel_for(int ss=0; ssoSites(); ss+=Ls){ - for(int s=0; spm == 1){ spProj5p(tmp, psi._odata[ss+shift_s]); } - else{ spProj5m(tmp, psi._odata[ss+shift_s]); } - chi[ss+s] = chi[ss+s] + shift_coeffs[s]*tmp; - } - } - - this->M5Dtime += usecond(); - } - - template - void MobiusEOFAFermion::M5Ddag(const FermionField &psi, const FermionField &phi, FermionField &chi, - std::vector &lower, std::vector &diag, std::vector &upper) - { - int Ls = this->Ls; - GridBase *grid = psi._grid; - - assert(phi.checkerboard == psi.checkerboard); - chi.checkerboard = psi.checkerboard; - - // Flops = 6.0*(Nc*Ns) *Ls*vol - this->M5Dcalls++; - this->M5Dtime -= usecond(); - - parallel_for(int ss=0; ssoSites(); ss+=Ls){ - auto tmp = psi._odata[0]; - for(int s=0; sM5Dtime += usecond(); - } - - template - void MobiusEOFAFermion::M5Ddag_shift(const FermionField &psi, const FermionField &phi, FermionField &chi, - std::vector &lower, std::vector &diag, std::vector &upper, - std::vector &shift_coeffs) - { - int Ls = this->Ls; - int shift_s = (this->pm == 1) ? (Ls-1) : 0; // s-component modified by shift operator - GridBase *grid = psi._grid; - - assert(phi.checkerboard == psi.checkerboard); - chi.checkerboard = psi.checkerboard; - - // Flops = 6.0*(Nc*Ns) *Ls*vol - this->M5Dcalls++; - this->M5Dtime -= usecond(); - - parallel_for(int ss=0; ssoSites(); ss+=Ls){ - chi[ss+Ls-1] = zero; - auto tmp = psi._odata[0]; - for(int s=0; spm == 1){ spProj5p(tmp, psi._odata[ss+s]); } - else{ spProj5m(tmp, psi._odata[ss+s]); } - chi[ss+shift_s] = chi[ss+shift_s] + shift_coeffs[s]*tmp; - } - } - - this->M5Dtime += usecond(); - } - - template - void MobiusEOFAFermion::MooeeInv(const FermionField &psi, FermionField &chi) - { - if(this->shift != 0.0){ MooeeInv_shift(psi,chi); return; } - - GridBase *grid = psi._grid; - int Ls = this->Ls; - - chi.checkerboard = psi.checkerboard; - - this->MooeeInvCalls++; - this->MooeeInvTime -= usecond(); - - parallel_for(int ss=0; ssoSites(); ss+=Ls){ - - auto tmp = psi._odata[0]; - - // Apply (L^{\prime})^{-1} - chi[ss] = psi[ss]; // chi[0]=psi[0] - for(int s=1; slee[s-1]*tmp; - } - - // L_m^{-1} - for(int s=0; sleem[s]*tmp; - } - - // U_m^{-1} D^{-1} - for(int s=0; sdee[s])*chi[ss+s] - (this->ueem[s]/this->dee[Ls-1])*tmp; - } - chi[ss+Ls-1] = (1.0/this->dee[Ls-1])*chi[ss+Ls-1]; - - // Apply U^{-1} - for(int s=Ls-2; s>=0; s--){ - spProj5m(tmp, chi[ss+s+1]); - chi[ss+s] = chi[ss+s] - this->uee[s]*tmp; - } - } - - this->MooeeInvTime += usecond(); - } - - template - void MobiusEOFAFermion::MooeeInv_shift(const FermionField &psi, FermionField &chi) - { - GridBase *grid = psi._grid; - int Ls = this->Ls; - - chi.checkerboard = psi.checkerboard; - - this->MooeeInvCalls++; - this->MooeeInvTime -= usecond(); - - parallel_for(int ss=0; ssoSites(); ss+=Ls){ - - auto tmp1 = psi._odata[0]; - auto tmp2 = psi._odata[0]; - auto tmp2_spProj = psi._odata[0]; - - // Apply (L^{\prime})^{-1} and accumulate MooeeInv_shift_lc[j]*psi[j] in tmp2 - chi[ss] = psi[ss]; // chi[0]=psi[0] - tmp2 = MooeeInv_shift_lc[0]*psi[ss]; - for(int s=1; slee[s-1]*tmp1; - tmp2 = tmp2 + MooeeInv_shift_lc[s]*psi[ss+s]; - } - if(this->pm == 1){ spProj5p(tmp2_spProj, tmp2);} - else{ spProj5m(tmp2_spProj, tmp2); } - - // L_m^{-1} - for(int s=0; sleem[s]*tmp1; - } - - // U_m^{-1} D^{-1} - for(int s=0; sdee[s])*chi[ss+s] - (this->ueem[s]/this->dee[Ls-1])*tmp1; - } - // chi[ss+Ls-1] = (1.0/this->dee[Ls-1])*chi[ss+Ls-1] + MooeeInv_shift_norm[Ls-1]*tmp2_spProj; - chi[ss+Ls-1] = (1.0/this->dee[Ls-1])*chi[ss+Ls-1]; - spProj5m(tmp1, chi[ss+Ls-1]); - chi[ss+Ls-1] = chi[ss+Ls-1] + MooeeInv_shift_norm[Ls-1]*tmp2_spProj; - - // Apply U^{-1} and add shift term - for(int s=Ls-2; s>=0; s--){ - chi[ss+s] = chi[ss+s] - this->uee[s]*tmp1; - spProj5m(tmp1, chi[ss+s]); - chi[ss+s] = chi[ss+s] + MooeeInv_shift_norm[s]*tmp2_spProj; - } - } - - this->MooeeInvTime += usecond(); - } - - template - void MobiusEOFAFermion::MooeeInvDag(const FermionField &psi, FermionField &chi) - { - if(this->shift != 0.0){ MooeeInvDag_shift(psi,chi); return; } - - GridBase *grid = psi._grid; - int Ls = this->Ls; - - chi.checkerboard = psi.checkerboard; - - this->MooeeInvCalls++; - this->MooeeInvTime -= usecond(); - - parallel_for(int ss=0; ssoSites(); ss+=Ls){ - - auto tmp = psi._odata[0]; - - // Apply (U^{\prime})^{-dag} - chi[ss] = psi[ss]; - for(int s=1; suee[s-1]*tmp; - } - - // U_m^{-\dag} - for(int s=0; sueem[s]*tmp; - } - - // L_m^{-\dag} D^{-dag} - for(int s=0; sdee[s])*chi[ss+s] - (this->leem[s]/this->dee[Ls-1])*tmp; - } - chi[ss+Ls-1] = (1.0/this->dee[Ls-1])*chi[ss+Ls-1]; - - // Apply L^{-dag} - for(int s=Ls-2; s>=0; s--){ - spProj5p(tmp, chi[ss+s+1]); - chi[ss+s] = chi[ss+s] - this->lee[s]*tmp; - } - } - - this->MooeeInvTime += usecond(); - } - - template - void MobiusEOFAFermion::MooeeInvDag_shift(const FermionField &psi, FermionField &chi) - { - GridBase *grid = psi._grid; - int Ls = this->Ls; - - chi.checkerboard = psi.checkerboard; - - this->MooeeInvCalls++; - this->MooeeInvTime -= usecond(); - - parallel_for(int ss=0; ssoSites(); ss+=Ls){ - - auto tmp1 = psi._odata[0]; - auto tmp2 = psi._odata[0]; - auto tmp2_spProj = psi._odata[0]; - - // Apply (U^{\prime})^{-dag} and accumulate MooeeInvDag_shift_lc[j]*psi[j] in tmp2 - chi[ss] = psi[ss]; - tmp2 = MooeeInvDag_shift_lc[0]*psi[ss]; - for(int s=1; suee[s-1]*tmp1; - tmp2 = tmp2 + MooeeInvDag_shift_lc[s]*psi[ss+s]; - } - if(this->pm == 1){ spProj5p(tmp2_spProj, tmp2);} - else{ spProj5m(tmp2_spProj, tmp2); } - - // U_m^{-\dag} - for(int s=0; sueem[s]*tmp1; - } - - // L_m^{-\dag} D^{-dag} - for(int s=0; sdee[s])*chi[ss+s] - (this->leem[s]/this->dee[Ls-1])*tmp1; - } - chi[ss+Ls-1] = (1.0/this->dee[Ls-1])*chi[ss+Ls-1]; - spProj5p(tmp1, chi[ss+Ls-1]); - chi[ss+Ls-1] = chi[ss+Ls-1] + MooeeInvDag_shift_norm[Ls-1]*tmp2_spProj; - - // Apply L^{-dag} - for(int s=Ls-2; s>=0; s--){ - chi[ss+s] = chi[ss+s] - this->lee[s]*tmp1; - spProj5p(tmp1, chi[ss+s]); - chi[ss+s] = chi[ss+s] + MooeeInvDag_shift_norm[s]*tmp2_spProj; - } - } - - this->MooeeInvTime += usecond(); - } - - #ifdef MOBIUS_EOFA_DPERP_CACHE - - INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplF); - INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplD); - INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplF); - INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplD); - INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplF); - INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplD); - - INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplFH); - INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplDF); - INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplFH); - INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplDF); - INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplFH); - INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplDF); - - #endif - -}} diff --git a/Grid/qcd/action/fermion/MobiusEOFAFermiondense.cc b/Grid/qcd/action/fermion/MobiusEOFAFermiondense.cc deleted file mode 100644 index d66b8cd9..00000000 --- a/Grid/qcd/action/fermion/MobiusEOFAFermiondense.cc +++ /dev/null @@ -1,184 +0,0 @@ -/************************************************************************************* - -Grid physics library, www.github.com/paboyle/Grid - -Source file: ./lib/qcd/action/fermion/MobiusEOFAFermiondense.cc - -Copyright (C) 2017 - -Author: Peter Boyle -Author: Peter Boyle -Author: Peter Boyle -Author: paboyle -Author: David Murphy - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -See the full license in the file "LICENSE" in the top level distribution directory -*************************************************************************************/ -/* END LEGAL */ - -#include -#include -#include - -namespace Grid { -namespace QCD { - - /* - * Dense matrix versions of routines - */ - template - void MobiusEOFAFermion::MooeeInv(const FermionField& psi, FermionField& chi) - { - this->MooeeInternal(psi, chi, DaggerNo, InverseYes); - } - - template - void MobiusEOFAFermion::MooeeInv_shift(const FermionField& psi, FermionField& chi) - { - this->MooeeInternal(psi, chi, DaggerNo, InverseYes); - } - - template - void MobiusEOFAFermion::MooeeInvDag(const FermionField& psi, FermionField& chi) - { - this->MooeeInternal(psi, chi, DaggerYes, InverseYes); - } - - template - void MobiusEOFAFermion::MooeeInvDag_shift(const FermionField& psi, FermionField& chi) - { - this->MooeeInternal(psi, chi, DaggerYes, InverseYes); - } - - template - void MobiusEOFAFermion::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv) - { - int Ls = this->Ls; - int LLs = psi._grid->_rdimensions[0]; - int vol = psi._grid->oSites()/LLs; - - int pm = this->pm; - RealD shift = this->shift; - RealD alpha = this->alpha; - RealD k = this->k; - RealD mq1 = this->mq1; - - chi.checkerboard = psi.checkerboard; - - assert(Ls==LLs); - - Eigen::MatrixXd Pplus = Eigen::MatrixXd::Zero(Ls,Ls); - Eigen::MatrixXd Pminus = Eigen::MatrixXd::Zero(Ls,Ls); - - for(int s=0;sbee[s]; - Pminus(s,s) = this->bee[s]; - } - - for(int s=0; scee[s]; - } - - for(int s=0; scee[s+1]; - } - Pplus (0,Ls-1) = mq1*this->cee[0]; - Pminus(Ls-1,0) = mq1*this->cee[Ls-1]; - - if(shift != 0.0){ - Coeff_t N = 2.0 * ( std::pow(alpha+1.0,Ls) + mq1*std::pow(alpha-1.0,Ls) ); - for(int s=0; s::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv); - template void MobiusEOFAFermion::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv); - template void MobiusEOFAFermion::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv); - template void MobiusEOFAFermion::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv); - template void MobiusEOFAFermion::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv); - template void MobiusEOFAFermion::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv); - - INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplFH); - INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplDF); - INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplFH); - INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplDF); - INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplFH); - INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplDF); - - template void MobiusEOFAFermion::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv); - template void MobiusEOFAFermion::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv); - template void MobiusEOFAFermion::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv); - template void MobiusEOFAFermion::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv); - template void MobiusEOFAFermion::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv); - template void MobiusEOFAFermion::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv); - - #endif - -}} diff --git a/Grid/qcd/action/fermion/MobiusEOFAFermionssp.cc b/Grid/qcd/action/fermion/MobiusEOFAFermionssp.cc deleted file mode 100644 index c86bb995..00000000 --- a/Grid/qcd/action/fermion/MobiusEOFAFermionssp.cc +++ /dev/null @@ -1,290 +0,0 @@ -/************************************************************************************* - -Grid physics library, www.github.com/paboyle/Grid - -Source file: ./lib/qcd/action/fermion/MobiusEOFAFermionssp.cc - -Copyright (C) 2017 - -Author: Peter Boyle -Author: Peter Boyle -Author: Peter Boyle -Author: paboyle -Author: David Murphy - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -See the full license in the file "LICENSE" in the top level distribution directory -*************************************************************************************/ -/* END LEGAL */ - -#include -#include - -namespace Grid { -namespace QCD { - - // FIXME -- make a version of these routines with site loop outermost for cache reuse. - // Pminus fowards - // Pplus backwards - template - void MobiusEOFAFermion::M5D(const FermionField& psi, const FermionField& phi, - FermionField& chi, std::vector& lower, std::vector& diag, std::vector& upper) - { - Coeff_t one(1.0); - int Ls = this->Ls; - for(int s=0; s - void MobiusEOFAFermion::M5D_shift(const FermionField& psi, const FermionField& phi, - FermionField& chi, std::vector& lower, std::vector& diag, std::vector& upper, - std::vector& shift_coeffs) - { - Coeff_t one(1.0); - int Ls = this->Ls; - for(int s=0; spm == 1){ axpby_ssp_pplus(chi, one, chi, shift_coeffs[s], psi, s, Ls-1); } - else{ axpby_ssp_pminus(chi, one, chi, shift_coeffs[s], psi, s, 0); } - } - } - - template - void MobiusEOFAFermion::M5Ddag(const FermionField& psi, const FermionField& phi, - FermionField& chi, std::vector& lower, std::vector& diag, std::vector& upper) - { - Coeff_t one(1.0); - int Ls = this->Ls; - for(int s=0; s - void MobiusEOFAFermion::M5Ddag_shift(const FermionField& psi, const FermionField& phi, - FermionField& chi, std::vector& lower, std::vector& diag, std::vector& upper, - std::vector& shift_coeffs) - { - Coeff_t one(1.0); - int Ls = this->Ls; - for(int s=0; spm == 1){ axpby_ssp_pplus(chi, one, chi, shift_coeffs[s], psi, Ls-1, s); } - else{ axpby_ssp_pminus(chi, one, chi, shift_coeffs[s], psi, 0, s); } - } - } - - template - void MobiusEOFAFermion::MooeeInv(const FermionField& psi, FermionField& chi) - { - if(this->shift != 0.0){ MooeeInv_shift(psi,chi); return; } - - Coeff_t one(1.0); - Coeff_t czero(0.0); - chi.checkerboard = psi.checkerboard; - int Ls = this->Ls; - - // Apply (L^{\prime})^{-1} - axpby_ssp(chi, one, psi, czero, psi, 0, 0); // chi[0]=psi[0] - for(int s=1; slee[s-1], chi, s, s-1);// recursion Psi[s] -lee P_+ chi[s-1] - } - - // L_m^{-1} - for(int s=0; sleem[s], chi, Ls-1, s); - } - - // U_m^{-1} D^{-1} - for(int s=0; sdee[s], chi, -this->ueem[s]/this->dee[Ls-1], chi, s, Ls-1); - } - axpby_ssp(chi, one/this->dee[Ls-1], chi, czero, chi, Ls-1, Ls-1); - - // Apply U^{-1} - for(int s=Ls-2; s>=0; s--){ - axpby_ssp_pminus(chi, one, chi, -this->uee[s], chi, s, s+1); // chi[Ls] - } - } - - template - void MobiusEOFAFermion::MooeeInv_shift(const FermionField& psi, FermionField& chi) - { - Coeff_t one(1.0); - Coeff_t czero(0.0); - chi.checkerboard = psi.checkerboard; - int Ls = this->Ls; - - FermionField tmp(psi._grid); - - // Apply (L^{\prime})^{-1} - axpby_ssp(chi, one, psi, czero, psi, 0, 0); // chi[0]=psi[0] - axpby_ssp(tmp, czero, tmp, this->MooeeInv_shift_lc[0], psi, 0, 0); - for(int s=1; slee[s-1], chi, s, s-1);// recursion Psi[s] -lee P_+ chi[s-1] - axpby_ssp(tmp, one, tmp, this->MooeeInv_shift_lc[s], psi, 0, s); - } - - // L_m^{-1} - for(int s=0; sleem[s], chi, Ls-1, s); - } - - // U_m^{-1} D^{-1} - for(int s=0; sdee[s], chi, -this->ueem[s]/this->dee[Ls-1], chi, s, Ls-1); - } - axpby_ssp(chi, one/this->dee[Ls-1], chi, czero, chi, Ls-1, Ls-1); - - // Apply U^{-1} and add shift term - if(this->pm == 1){ axpby_ssp_pplus(chi, one, chi, this->MooeeInv_shift_norm[Ls-1], tmp, Ls-1, 0); } - else{ axpby_ssp_pminus(chi, one, chi, this->MooeeInv_shift_norm[Ls-1], tmp, Ls-1, 0); } - for(int s=Ls-2; s>=0; s--){ - axpby_ssp_pminus(chi, one, chi, -this->uee[s], chi, s, s+1); // chi[Ls] - if(this->pm == 1){ axpby_ssp_pplus(chi, one, chi, this->MooeeInv_shift_norm[s], tmp, s, 0); } - else{ axpby_ssp_pminus(chi, one, chi, this->MooeeInv_shift_norm[s], tmp, s, 0); } - } - } - - template - void MobiusEOFAFermion::MooeeInvDag(const FermionField& psi, FermionField& chi) - { - if(this->shift != 0.0){ MooeeInvDag_shift(psi,chi); return; } - - Coeff_t one(1.0); - Coeff_t czero(0.0); - chi.checkerboard = psi.checkerboard; - int Ls = this->Ls; - - // Apply (U^{\prime})^{-dagger} - axpby_ssp(chi, one, psi, czero, psi, 0, 0); // chi[0]=psi[0] - for(int s=1; suee[s-1]), chi, s, s-1); - } - - // U_m^{-\dagger} - for(int s=0; sueem[s]), chi, Ls-1, s); - } - - // L_m^{-\dagger} D^{-dagger} - for(int s=0; sdee[s]), chi, -conjugate(this->leem[s]/this->dee[Ls-1]), chi, s, Ls-1); - } - axpby_ssp(chi, one/conjugate(this->dee[Ls-1]), chi, czero, chi, Ls-1, Ls-1); - - // Apply L^{-dagger} - for(int s=Ls-2; s>=0; s--){ - axpby_ssp_pplus(chi, one, chi, -conjugate(this->lee[s]), chi, s, s+1); // chi[Ls] - } - } - - template - void MobiusEOFAFermion::MooeeInvDag_shift(const FermionField& psi, FermionField& chi) - { - Coeff_t one(1.0); - Coeff_t czero(0.0); - chi.checkerboard = psi.checkerboard; - int Ls = this->Ls; - - FermionField tmp(psi._grid); - - // Apply (U^{\prime})^{-dagger} and accumulate (MooeeInvDag_shift_lc)_{j} \psi_{j} in tmp[0] - axpby_ssp(chi, one, psi, czero, psi, 0, 0); // chi[0]=psi[0] - axpby_ssp(tmp, czero, tmp, this->MooeeInvDag_shift_lc[0], psi, 0, 0); - for(int s=1; suee[s-1]), chi, s, s-1); - axpby_ssp(tmp, one, tmp, this->MooeeInvDag_shift_lc[s], psi, 0, s); - } - - // U_m^{-\dagger} - for(int s=0; sueem[s]), chi, Ls-1, s); - } - - // L_m^{-\dagger} D^{-dagger} - for(int s=0; sdee[s]), chi, -conjugate(this->leem[s]/this->dee[Ls-1]), chi, s, Ls-1); - } - axpby_ssp(chi, one/conjugate(this->dee[Ls-1]), chi, czero, chi, Ls-1, Ls-1); - - // Apply L^{-dagger} and add shift - if(this->pm == 1){ axpby_ssp_pplus(chi, one, chi, this->MooeeInvDag_shift_norm[Ls-1], tmp, Ls-1, 0); } - else{ axpby_ssp_pminus(chi, one, chi, this->MooeeInvDag_shift_norm[Ls-1], tmp, Ls-1, 0); } - for(int s=Ls-2; s>=0; s--){ - axpby_ssp_pplus(chi, one, chi, -conjugate(this->lee[s]), chi, s, s+1); // chi[Ls] - if(this->pm == 1){ axpby_ssp_pplus(chi, one, chi, this->MooeeInvDag_shift_norm[s], tmp, s, 0); } - else{ axpby_ssp_pminus(chi, one, chi, this->MooeeInvDag_shift_norm[s], tmp, s, 0); } - } - } - - #ifdef MOBIUS_EOFA_DPERP_LINALG - - INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplF); - INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplD); - INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplF); - INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplD); - INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplF); - INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplD); - - INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplFH); - INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplDF); - INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplFH); - INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplDF); - INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplFH); - INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplDF); - - #endif - -}} diff --git a/Grid/qcd/action/fermion/MobiusEOFAFermionvec.cc b/Grid/qcd/action/fermion/MobiusEOFAFermionvec.cc deleted file mode 100644 index 290ba158..00000000 --- a/Grid/qcd/action/fermion/MobiusEOFAFermionvec.cc +++ /dev/null @@ -1,983 +0,0 @@ -/************************************************************************************* - -Grid physics library, www.github.com/paboyle/Grid - -Source file: ./lib/qcd/action/fermion/MobiusEOFAFermionvec.cc - -Copyright (C) 2017 - -Author: Peter Boyle -Author: Peter Boyle -Author: Peter Boyle -Author: paboyle -Author: David Murphy - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -See the full license in the file "LICENSE" in the top level distribution directory -*************************************************************************************/ -/* END LEGAL */ - -#include -#include - -namespace Grid { -namespace QCD { - - /* - * Dense matrix versions of routines - */ - template - void MobiusEOFAFermion::MooeeInv(const FermionField& psi, FermionField& chi) - { - this->MooeeInternal(psi, chi, DaggerNo, InverseYes); - } - - template - void MobiusEOFAFermion::MooeeInv_shift(const FermionField& psi, FermionField& chi) - { - this->MooeeInternal(psi, chi, DaggerNo, InverseYes); - } - - template - void MobiusEOFAFermion::MooeeInvDag(const FermionField& psi, FermionField& chi) - { - this->MooeeInternal(psi, chi, DaggerYes, InverseYes); - } - - template - void MobiusEOFAFermion::MooeeInvDag_shift(const FermionField& psi, FermionField& chi) - { - this->MooeeInternal(psi, chi, DaggerYes, InverseYes); - } - - template - void MobiusEOFAFermion::M5D(const FermionField& psi, const FermionField& phi, - FermionField& chi, std::vector& lower, std::vector& diag, std::vector& upper) - { - GridBase* grid = psi._grid; - int Ls = this->Ls; - int LLs = grid->_rdimensions[0]; - const int nsimd = Simd::Nsimd(); - - Vector> u(LLs); - Vector> l(LLs); - Vector> d(LLs); - - assert(Ls/LLs == nsimd); - assert(phi.checkerboard == psi.checkerboard); - - chi.checkerboard = psi.checkerboard; - - // just directly address via type pun - typedef typename Simd::scalar_type scalar_type; - scalar_type* u_p = (scalar_type*) &u[0]; - scalar_type* l_p = (scalar_type*) &l[0]; - scalar_type* d_p = (scalar_type*) &d[0]; - - for(int o=0; oM5Dcalls++; - this->M5Dtime -= usecond(); - - assert(Nc == 3); - - parallel_for(int ss=0; ssoSites(); ss+=LLs){ // adds LLs - - #if 0 - - alignas(64) SiteHalfSpinor hp; - alignas(64) SiteHalfSpinor hm; - alignas(64) SiteSpinor fp; - alignas(64) SiteSpinor fm; - - for(int v=0; v= v){ rotate(hm, hm, nsimd-1); } - - hp = 0.5*hp; - hm = 0.5*hm; - - spRecon5m(fp, hp); - spRecon5p(fm, hm); - - chi[ss+v] = d[v]*phi[ss+v]; - chi[ss+v] = chi[ss+v] + u[v]*fp; - chi[ss+v] = chi[ss+v] + l[v]*fm; - - } - - #else - - for(int v=0; v(hp_00.v); - hp_01.v = Optimization::Rotate::tRotate<2>(hp_01.v); - hp_02.v = Optimization::Rotate::tRotate<2>(hp_02.v); - hp_10.v = Optimization::Rotate::tRotate<2>(hp_10.v); - hp_11.v = Optimization::Rotate::tRotate<2>(hp_11.v); - hp_12.v = Optimization::Rotate::tRotate<2>(hp_12.v); - } - - if(vm >= v){ - hm_00.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_00.v); - hm_01.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_01.v); - hm_02.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_02.v); - hm_10.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_10.v); - hm_11.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_11.v); - hm_12.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_12.v); - } - - // Can force these to real arithmetic and save 2x. - Simd p_00 = switcheroo::mult(d[v]()()(), phi[ss+v]()(0)(0)) + switcheroo::mult(l[v]()()(), hm_00); - Simd p_01 = switcheroo::mult(d[v]()()(), phi[ss+v]()(0)(1)) + switcheroo::mult(l[v]()()(), hm_01); - Simd p_02 = switcheroo::mult(d[v]()()(), phi[ss+v]()(0)(2)) + switcheroo::mult(l[v]()()(), hm_02); - Simd p_10 = switcheroo::mult(d[v]()()(), phi[ss+v]()(1)(0)) + switcheroo::mult(l[v]()()(), hm_10); - Simd p_11 = switcheroo::mult(d[v]()()(), phi[ss+v]()(1)(1)) + switcheroo::mult(l[v]()()(), hm_11); - Simd p_12 = switcheroo::mult(d[v]()()(), phi[ss+v]()(1)(2)) + switcheroo::mult(l[v]()()(), hm_12); - Simd p_20 = switcheroo::mult(d[v]()()(), phi[ss+v]()(2)(0)) + switcheroo::mult(u[v]()()(), hp_00); - Simd p_21 = switcheroo::mult(d[v]()()(), phi[ss+v]()(2)(1)) + switcheroo::mult(u[v]()()(), hp_01); - Simd p_22 = switcheroo::mult(d[v]()()(), phi[ss+v]()(2)(2)) + switcheroo::mult(u[v]()()(), hp_02); - Simd p_30 = switcheroo::mult(d[v]()()(), phi[ss+v]()(3)(0)) + switcheroo::mult(u[v]()()(), hp_10); - Simd p_31 = switcheroo::mult(d[v]()()(), phi[ss+v]()(3)(1)) + switcheroo::mult(u[v]()()(), hp_11); - Simd p_32 = switcheroo::mult(d[v]()()(), phi[ss+v]()(3)(2)) + switcheroo::mult(u[v]()()(), hp_12); - - vstream(chi[ss+v]()(0)(0), p_00); - vstream(chi[ss+v]()(0)(1), p_01); - vstream(chi[ss+v]()(0)(2), p_02); - vstream(chi[ss+v]()(1)(0), p_10); - vstream(chi[ss+v]()(1)(1), p_11); - vstream(chi[ss+v]()(1)(2), p_12); - vstream(chi[ss+v]()(2)(0), p_20); - vstream(chi[ss+v]()(2)(1), p_21); - vstream(chi[ss+v]()(2)(2), p_22); - vstream(chi[ss+v]()(3)(0), p_30); - vstream(chi[ss+v]()(3)(1), p_31); - vstream(chi[ss+v]()(3)(2), p_32); - } - - #endif - } - - this->M5Dtime += usecond(); - } - - template - void MobiusEOFAFermion::M5D_shift(const FermionField& psi, const FermionField& phi, - FermionField& chi, std::vector& lower, std::vector& diag, std::vector& upper, - std::vector& shift_coeffs) - { - #if 0 - - this->M5D(psi, phi, chi, lower, diag, upper); - - // FIXME: possible gain from vectorizing shift operation as well? - Coeff_t one(1.0); - int Ls = this->Ls; - for(int s=0; spm == 1){ axpby_ssp_pplus(chi, one, chi, shift_coeffs[s], psi, s, Ls-1); } - else{ axpby_ssp_pminus(chi, one, chi, shift_coeffs[s], psi, s, 0); } - } - - #else - - GridBase* grid = psi._grid; - int Ls = this->Ls; - int LLs = grid->_rdimensions[0]; - const int nsimd = Simd::Nsimd(); - - Vector> u(LLs); - Vector> l(LLs); - Vector> d(LLs); - Vector> s(LLs); - - assert(Ls/LLs == nsimd); - assert(phi.checkerboard == psi.checkerboard); - - chi.checkerboard = psi.checkerboard; - - // just directly address via type pun - typedef typename Simd::scalar_type scalar_type; - scalar_type* u_p = (scalar_type*) &u[0]; - scalar_type* l_p = (scalar_type*) &l[0]; - scalar_type* d_p = (scalar_type*) &d[0]; - scalar_type* s_p = (scalar_type*) &s[0]; - - for(int o=0; oM5Dcalls++; - this->M5Dtime -= usecond(); - - assert(Nc == 3); - - parallel_for(int ss=0; ssoSites(); ss+=LLs){ // adds LLs - - int vs = (this->pm == 1) ? LLs-1 : 0; - Simd hs_00 = (this->pm == 1) ? psi[ss+vs]()(2)(0) : psi[ss+vs]()(0)(0); - Simd hs_01 = (this->pm == 1) ? psi[ss+vs]()(2)(1) : psi[ss+vs]()(0)(1); - Simd hs_02 = (this->pm == 1) ? psi[ss+vs]()(2)(2) : psi[ss+vs]()(0)(2); - Simd hs_10 = (this->pm == 1) ? psi[ss+vs]()(3)(0) : psi[ss+vs]()(1)(0); - Simd hs_11 = (this->pm == 1) ? psi[ss+vs]()(3)(1) : psi[ss+vs]()(1)(1); - Simd hs_12 = (this->pm == 1) ? psi[ss+vs]()(3)(2) : psi[ss+vs]()(1)(2); - - for(int v=0; v(hp_00.v); - hp_01.v = Optimization::Rotate::tRotate<2>(hp_01.v); - hp_02.v = Optimization::Rotate::tRotate<2>(hp_02.v); - hp_10.v = Optimization::Rotate::tRotate<2>(hp_10.v); - hp_11.v = Optimization::Rotate::tRotate<2>(hp_11.v); - hp_12.v = Optimization::Rotate::tRotate<2>(hp_12.v); - } - - if(this->pm == 1 && vs <= v){ - hs_00.v = Optimization::Rotate::tRotate<2>(hs_00.v); - hs_01.v = Optimization::Rotate::tRotate<2>(hs_01.v); - hs_02.v = Optimization::Rotate::tRotate<2>(hs_02.v); - hs_10.v = Optimization::Rotate::tRotate<2>(hs_10.v); - hs_11.v = Optimization::Rotate::tRotate<2>(hs_11.v); - hs_12.v = Optimization::Rotate::tRotate<2>(hs_12.v); - } - - if(vm >= v){ - hm_00.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_00.v); - hm_01.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_01.v); - hm_02.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_02.v); - hm_10.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_10.v); - hm_11.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_11.v); - hm_12.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_12.v); - } - - if(this->pm == -1 && vs >= v){ - hs_00.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hs_00.v); - hs_01.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hs_01.v); - hs_02.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hs_02.v); - hs_10.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hs_10.v); - hs_11.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hs_11.v); - hs_12.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hs_12.v); - } - - // Can force these to real arithmetic and save 2x. - Simd p_00 = (this->pm == 1) ? switcheroo::mult(d[v]()()(), phi[ss+v]()(0)(0)) + switcheroo::mult(l[v]()()(), hm_00) - : switcheroo::mult(d[v]()()(), phi[ss+v]()(0)(0)) + switcheroo::mult(l[v]()()(), hm_00) - + switcheroo::mult(s[v]()()(), hs_00); - Simd p_01 = (this->pm == 1) ? switcheroo::mult(d[v]()()(), phi[ss+v]()(0)(1)) + switcheroo::mult(l[v]()()(), hm_01) - : switcheroo::mult(d[v]()()(), phi[ss+v]()(0)(1)) + switcheroo::mult(l[v]()()(), hm_01) - + switcheroo::mult(s[v]()()(), hs_01); - Simd p_02 = (this->pm == 1) ? switcheroo::mult(d[v]()()(), phi[ss+v]()(0)(2)) + switcheroo::mult(l[v]()()(), hm_02) - : switcheroo::mult(d[v]()()(), phi[ss+v]()(0)(2)) + switcheroo::mult(l[v]()()(), hm_02) - + switcheroo::mult(s[v]()()(), hs_02); - Simd p_10 = (this->pm == 1) ? switcheroo::mult(d[v]()()(), phi[ss+v]()(1)(0)) + switcheroo::mult(l[v]()()(), hm_10) - : switcheroo::mult(d[v]()()(), phi[ss+v]()(1)(0)) + switcheroo::mult(l[v]()()(), hm_10) - + switcheroo::mult(s[v]()()(), hs_10); - Simd p_11 = (this->pm == 1) ? switcheroo::mult(d[v]()()(), phi[ss+v]()(1)(1)) + switcheroo::mult(l[v]()()(), hm_11) - : switcheroo::mult(d[v]()()(), phi[ss+v]()(1)(1)) + switcheroo::mult(l[v]()()(), hm_11) - + switcheroo::mult(s[v]()()(), hs_11); - Simd p_12 = (this->pm == 1) ? switcheroo::mult(d[v]()()(), phi[ss+v]()(1)(2)) + switcheroo::mult(l[v]()()(), hm_12) - : switcheroo::mult(d[v]()()(), phi[ss+v]()(1)(2)) + switcheroo::mult(l[v]()()(), hm_12) - + switcheroo::mult(s[v]()()(), hs_12); - Simd p_20 = (this->pm == 1) ? switcheroo::mult(d[v]()()(), phi[ss+v]()(2)(0)) + switcheroo::mult(u[v]()()(), hp_00) - + switcheroo::mult(s[v]()()(), hs_00) - : switcheroo::mult(d[v]()()(), phi[ss+v]()(2)(0)) + switcheroo::mult(u[v]()()(), hp_00); - Simd p_21 = (this->pm == 1) ? switcheroo::mult(d[v]()()(), phi[ss+v]()(2)(1)) + switcheroo::mult(u[v]()()(), hp_01) - + switcheroo::mult(s[v]()()(), hs_01) - : switcheroo::mult(d[v]()()(), phi[ss+v]()(2)(1)) + switcheroo::mult(u[v]()()(), hp_01); - Simd p_22 = (this->pm == 1) ? switcheroo::mult(d[v]()()(), phi[ss+v]()(2)(2)) + switcheroo::mult(u[v]()()(), hp_02) - + switcheroo::mult(s[v]()()(), hs_02) - : switcheroo::mult(d[v]()()(), phi[ss+v]()(2)(2)) + switcheroo::mult(u[v]()()(), hp_02); - Simd p_30 = (this->pm == 1) ? switcheroo::mult(d[v]()()(), phi[ss+v]()(3)(0)) + switcheroo::mult(u[v]()()(), hp_10) - + switcheroo::mult(s[v]()()(), hs_10) - : switcheroo::mult(d[v]()()(), phi[ss+v]()(3)(0)) + switcheroo::mult(u[v]()()(), hp_10); - Simd p_31 = (this->pm == 1) ? switcheroo::mult(d[v]()()(), phi[ss+v]()(3)(1)) + switcheroo::mult(u[v]()()(), hp_11) - + switcheroo::mult(s[v]()()(), hs_11) - : switcheroo::mult(d[v]()()(), phi[ss+v]()(3)(1)) + switcheroo::mult(u[v]()()(), hp_11); - Simd p_32 = (this->pm == 1) ? switcheroo::mult(d[v]()()(), phi[ss+v]()(3)(2)) + switcheroo::mult(u[v]()()(), hp_12) - + switcheroo::mult(s[v]()()(), hs_12) - : switcheroo::mult(d[v]()()(), phi[ss+v]()(3)(2)) + switcheroo::mult(u[v]()()(), hp_12); - - vstream(chi[ss+v]()(0)(0), p_00); - vstream(chi[ss+v]()(0)(1), p_01); - vstream(chi[ss+v]()(0)(2), p_02); - vstream(chi[ss+v]()(1)(0), p_10); - vstream(chi[ss+v]()(1)(1), p_11); - vstream(chi[ss+v]()(1)(2), p_12); - vstream(chi[ss+v]()(2)(0), p_20); - vstream(chi[ss+v]()(2)(1), p_21); - vstream(chi[ss+v]()(2)(2), p_22); - vstream(chi[ss+v]()(3)(0), p_30); - vstream(chi[ss+v]()(3)(1), p_31); - vstream(chi[ss+v]()(3)(2), p_32); - } - } - - this->M5Dtime += usecond(); - - #endif - } - - template - void MobiusEOFAFermion::M5Ddag(const FermionField& psi, const FermionField& phi, - FermionField& chi, std::vector& lower, std::vector& diag, std::vector& upper) - { - GridBase* grid = psi._grid; - int Ls = this->Ls; - int LLs = grid->_rdimensions[0]; - int nsimd = Simd::Nsimd(); - - Vector> u(LLs); - Vector> l(LLs); - Vector> d(LLs); - - assert(Ls/LLs == nsimd); - assert(phi.checkerboard == psi.checkerboard); - - chi.checkerboard = psi.checkerboard; - - // just directly address via type pun - typedef typename Simd::scalar_type scalar_type; - scalar_type* u_p = (scalar_type*) &u[0]; - scalar_type* l_p = (scalar_type*) &l[0]; - scalar_type* d_p = (scalar_type*) &d[0]; - - for(int o=0; oM5Dcalls++; - this->M5Dtime -= usecond(); - - parallel_for(int ss=0; ssoSites(); ss+=LLs){ // adds LLs - - #if 0 - - alignas(64) SiteHalfSpinor hp; - alignas(64) SiteHalfSpinor hm; - alignas(64) SiteSpinor fp; - alignas(64) SiteSpinor fm; - - for(int v=0; v= v){ rotate(hm, hm, nsimd-1); } - - hp = hp*0.5; - hm = hm*0.5; - spRecon5p(fp, hp); - spRecon5m(fm, hm); - - chi[ss+v] = d[v]*phi[ss+v]+u[v]*fp; - chi[ss+v] = chi[ss+v] +l[v]*fm; - - } - - #else - - for(int v=0; v(hp_00.v); - hp_01.v = Optimization::Rotate::tRotate<2>(hp_01.v); - hp_02.v = Optimization::Rotate::tRotate<2>(hp_02.v); - hp_10.v = Optimization::Rotate::tRotate<2>(hp_10.v); - hp_11.v = Optimization::Rotate::tRotate<2>(hp_11.v); - hp_12.v = Optimization::Rotate::tRotate<2>(hp_12.v); - } - - if(vm >= v){ - hm_00.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_00.v); - hm_01.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_01.v); - hm_02.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_02.v); - hm_10.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_10.v); - hm_11.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_11.v); - hm_12.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_12.v); - } - - Simd p_00 = switcheroo::mult(d[v]()()(), phi[ss+v]()(0)(0)) + switcheroo::mult(u[v]()()(), hp_00); - Simd p_01 = switcheroo::mult(d[v]()()(), phi[ss+v]()(0)(1)) + switcheroo::mult(u[v]()()(), hp_01); - Simd p_02 = switcheroo::mult(d[v]()()(), phi[ss+v]()(0)(2)) + switcheroo::mult(u[v]()()(), hp_02); - Simd p_10 = switcheroo::mult(d[v]()()(), phi[ss+v]()(1)(0)) + switcheroo::mult(u[v]()()(), hp_10); - Simd p_11 = switcheroo::mult(d[v]()()(), phi[ss+v]()(1)(1)) + switcheroo::mult(u[v]()()(), hp_11); - Simd p_12 = switcheroo::mult(d[v]()()(), phi[ss+v]()(1)(2)) + switcheroo::mult(u[v]()()(), hp_12); - Simd p_20 = switcheroo::mult(d[v]()()(), phi[ss+v]()(2)(0)) + switcheroo::mult(l[v]()()(), hm_00); - Simd p_21 = switcheroo::mult(d[v]()()(), phi[ss+v]()(2)(1)) + switcheroo::mult(l[v]()()(), hm_01); - Simd p_22 = switcheroo::mult(d[v]()()(), phi[ss+v]()(2)(2)) + switcheroo::mult(l[v]()()(), hm_02); - Simd p_30 = switcheroo::mult(d[v]()()(), phi[ss+v]()(3)(0)) + switcheroo::mult(l[v]()()(), hm_10); - Simd p_31 = switcheroo::mult(d[v]()()(), phi[ss+v]()(3)(1)) + switcheroo::mult(l[v]()()(), hm_11); - Simd p_32 = switcheroo::mult(d[v]()()(), phi[ss+v]()(3)(2)) + switcheroo::mult(l[v]()()(), hm_12); - - vstream(chi[ss+v]()(0)(0), p_00); - vstream(chi[ss+v]()(0)(1), p_01); - vstream(chi[ss+v]()(0)(2), p_02); - vstream(chi[ss+v]()(1)(0), p_10); - vstream(chi[ss+v]()(1)(1), p_11); - vstream(chi[ss+v]()(1)(2), p_12); - vstream(chi[ss+v]()(2)(0), p_20); - vstream(chi[ss+v]()(2)(1), p_21); - vstream(chi[ss+v]()(2)(2), p_22); - vstream(chi[ss+v]()(3)(0), p_30); - vstream(chi[ss+v]()(3)(1), p_31); - vstream(chi[ss+v]()(3)(2), p_32); - - } - - #endif - - } - - this->M5Dtime += usecond(); - } - - template - void MobiusEOFAFermion::M5Ddag_shift(const FermionField& psi, const FermionField& phi, - FermionField& chi, std::vector& lower, std::vector& diag, std::vector& upper, - std::vector& shift_coeffs) - { - #if 0 - - this->M5Ddag(psi, phi, chi, lower, diag, upper); - - // FIXME: possible gain from vectorizing shift operation as well? - Coeff_t one(1.0); - int Ls = this->Ls; - for(int s=0; spm == 1){ axpby_ssp_pplus(chi, one, chi, shift_coeffs[s], psi, Ls-1, s); } - else{ axpby_ssp_pminus(chi, one, chi, shift_coeffs[s], psi, 0, s); } - } - - #else - - GridBase* grid = psi._grid; - int Ls = this->Ls; - int LLs = grid->_rdimensions[0]; - int nsimd = Simd::Nsimd(); - - Vector> u(LLs); - Vector> l(LLs); - Vector> d(LLs); - Vector> s(LLs); - - assert(Ls/LLs == nsimd); - assert(phi.checkerboard == psi.checkerboard); - - chi.checkerboard = psi.checkerboard; - - // just directly address via type pun - typedef typename Simd::scalar_type scalar_type; - scalar_type* u_p = (scalar_type*) &u[0]; - scalar_type* l_p = (scalar_type*) &l[0]; - scalar_type* d_p = (scalar_type*) &d[0]; - scalar_type* s_p = (scalar_type*) &s[0]; - - for(int o=0; oM5Dcalls++; - this->M5Dtime -= usecond(); - - parallel_for(int ss=0; ssoSites(); ss+=LLs){ // adds LLs - - int vs = (this->pm == 1) ? LLs-1 : 0; - Simd hs_00 = (this->pm == 1) ? psi[ss+vs]()(0)(0) : psi[ss+vs]()(2)(0); - Simd hs_01 = (this->pm == 1) ? psi[ss+vs]()(0)(1) : psi[ss+vs]()(2)(1); - Simd hs_02 = (this->pm == 1) ? psi[ss+vs]()(0)(2) : psi[ss+vs]()(2)(2); - Simd hs_10 = (this->pm == 1) ? psi[ss+vs]()(1)(0) : psi[ss+vs]()(3)(0); - Simd hs_11 = (this->pm == 1) ? psi[ss+vs]()(1)(1) : psi[ss+vs]()(3)(1); - Simd hs_12 = (this->pm == 1) ? psi[ss+vs]()(1)(2) : psi[ss+vs]()(3)(2); - - for(int v=0; v(hp_00.v); - hp_01.v = Optimization::Rotate::tRotate<2>(hp_01.v); - hp_02.v = Optimization::Rotate::tRotate<2>(hp_02.v); - hp_10.v = Optimization::Rotate::tRotate<2>(hp_10.v); - hp_11.v = Optimization::Rotate::tRotate<2>(hp_11.v); - hp_12.v = Optimization::Rotate::tRotate<2>(hp_12.v); - } - - if(this->pm == 1 && vs <= v){ - hs_00.v = Optimization::Rotate::tRotate<2>(hs_00.v); - hs_01.v = Optimization::Rotate::tRotate<2>(hs_01.v); - hs_02.v = Optimization::Rotate::tRotate<2>(hs_02.v); - hs_10.v = Optimization::Rotate::tRotate<2>(hs_10.v); - hs_11.v = Optimization::Rotate::tRotate<2>(hs_11.v); - hs_12.v = Optimization::Rotate::tRotate<2>(hs_12.v); - } - - if(vm >= v){ - hm_00.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_00.v); - hm_01.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_01.v); - hm_02.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_02.v); - hm_10.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_10.v); - hm_11.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_11.v); - hm_12.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_12.v); - } - - if(this->pm == -1 && vs >= v){ - hs_00.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hs_00.v); - hs_01.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hs_01.v); - hs_02.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hs_02.v); - hs_10.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hs_10.v); - hs_11.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hs_11.v); - hs_12.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hs_12.v); - } - - Simd p_00 = (this->pm == 1) ? switcheroo::mult(d[v]()()(), phi[ss+v]()(0)(0)) + switcheroo::mult(u[v]()()(), hp_00) - + switcheroo::mult(s[v]()()(), hs_00) - : switcheroo::mult(d[v]()()(), phi[ss+v]()(0)(0)) + switcheroo::mult(u[v]()()(), hp_00); - Simd p_01 = (this->pm == 1) ? switcheroo::mult(d[v]()()(), phi[ss+v]()(0)(1)) + switcheroo::mult(u[v]()()(), hp_01) - + switcheroo::mult(s[v]()()(), hs_01) - : switcheroo::mult(d[v]()()(), phi[ss+v]()(0)(1)) + switcheroo::mult(u[v]()()(), hp_01); - Simd p_02 = (this->pm == 1) ? switcheroo::mult(d[v]()()(), phi[ss+v]()(0)(2)) + switcheroo::mult(u[v]()()(), hp_02) - + switcheroo::mult(s[v]()()(), hs_02) - : switcheroo::mult(d[v]()()(), phi[ss+v]()(0)(2)) + switcheroo::mult(u[v]()()(), hp_02); - Simd p_10 = (this->pm == 1) ? switcheroo::mult(d[v]()()(), phi[ss+v]()(1)(0)) + switcheroo::mult(u[v]()()(), hp_10) - + switcheroo::mult(s[v]()()(), hs_10) - : switcheroo::mult(d[v]()()(), phi[ss+v]()(1)(0)) + switcheroo::mult(u[v]()()(), hp_10); - Simd p_11 = (this->pm == 1) ? switcheroo::mult(d[v]()()(), phi[ss+v]()(1)(1)) + switcheroo::mult(u[v]()()(), hp_11) - + switcheroo::mult(s[v]()()(), hs_11) - : switcheroo::mult(d[v]()()(), phi[ss+v]()(1)(1)) + switcheroo::mult(u[v]()()(), hp_11); - Simd p_12 = (this->pm == 1) ? switcheroo::mult(d[v]()()(), phi[ss+v]()(1)(2)) + switcheroo::mult(u[v]()()(), hp_12) - + switcheroo::mult(s[v]()()(), hs_12) - : switcheroo::mult(d[v]()()(), phi[ss+v]()(1)(2)) + switcheroo::mult(u[v]()()(), hp_12); - Simd p_20 = (this->pm == 1) ? switcheroo::mult(d[v]()()(), phi[ss+v]()(2)(0)) + switcheroo::mult(l[v]()()(), hm_00) - : switcheroo::mult(d[v]()()(), phi[ss+v]()(2)(0)) + switcheroo::mult(l[v]()()(), hm_00) - + switcheroo::mult(s[v]()()(), hs_00); - Simd p_21 = (this->pm == 1) ? switcheroo::mult(d[v]()()(), phi[ss+v]()(2)(1)) + switcheroo::mult(l[v]()()(), hm_01) - : switcheroo::mult(d[v]()()(), phi[ss+v]()(2)(1)) + switcheroo::mult(l[v]()()(), hm_01) - + switcheroo::mult(s[v]()()(), hs_01); - Simd p_22 = (this->pm == 1) ? switcheroo::mult(d[v]()()(), phi[ss+v]()(2)(2)) + switcheroo::mult(l[v]()()(), hm_02) - : switcheroo::mult(d[v]()()(), phi[ss+v]()(2)(2)) + switcheroo::mult(l[v]()()(), hm_02) - + switcheroo::mult(s[v]()()(), hs_02); - Simd p_30 = (this->pm == 1) ? switcheroo::mult(d[v]()()(), phi[ss+v]()(3)(0)) + switcheroo::mult(l[v]()()(), hm_10) - : switcheroo::mult(d[v]()()(), phi[ss+v]()(3)(0)) + switcheroo::mult(l[v]()()(), hm_10) - + switcheroo::mult(s[v]()()(), hs_10); - Simd p_31 = (this->pm == 1) ? switcheroo::mult(d[v]()()(), phi[ss+v]()(3)(1)) + switcheroo::mult(l[v]()()(), hm_11) - : switcheroo::mult(d[v]()()(), phi[ss+v]()(3)(1)) + switcheroo::mult(l[v]()()(), hm_11) - + switcheroo::mult(s[v]()()(), hs_11); - Simd p_32 = (this->pm == 1) ? switcheroo::mult(d[v]()()(), phi[ss+v]()(3)(2)) + switcheroo::mult(l[v]()()(), hm_12) - : switcheroo::mult(d[v]()()(), phi[ss+v]()(3)(2)) + switcheroo::mult(l[v]()()(), hm_12) - + switcheroo::mult(s[v]()()(), hs_12); - - vstream(chi[ss+v]()(0)(0), p_00); - vstream(chi[ss+v]()(0)(1), p_01); - vstream(chi[ss+v]()(0)(2), p_02); - vstream(chi[ss+v]()(1)(0), p_10); - vstream(chi[ss+v]()(1)(1), p_11); - vstream(chi[ss+v]()(1)(2), p_12); - vstream(chi[ss+v]()(2)(0), p_20); - vstream(chi[ss+v]()(2)(1), p_21); - vstream(chi[ss+v]()(2)(2), p_22); - vstream(chi[ss+v]()(3)(0), p_30); - vstream(chi[ss+v]()(3)(1), p_31); - vstream(chi[ss+v]()(3)(2), p_32); - - } - - } - - this->M5Dtime += usecond(); - - #endif - } - - #ifdef AVX512 - #include - #include - #include - #endif - - template - void MobiusEOFAFermion::MooeeInternalAsm(const FermionField& psi, FermionField& chi, - int LLs, int site, Vector >& Matp, Vector >& Matm) - { - #ifndef AVX512 - { - SiteHalfSpinor BcastP; - SiteHalfSpinor BcastM; - SiteHalfSpinor SiteChiP; - SiteHalfSpinor SiteChiM; - - // Ls*Ls * 2 * 12 * vol flops - for(int s1=0; s1); - - for(int s1=0; s1 - void MobiusEOFAFermion::MooeeInternalZAsm(const FermionField& psi, FermionField& chi, - int LLs, int site, Vector >& Matp, Vector >& Matm) - { - std::cout << "Error: zMobius not implemented for EOFA" << std::endl; - exit(-1); - }; - - template - void MobiusEOFAFermion::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv) - { - int Ls = this->Ls; - int LLs = psi._grid->_rdimensions[0]; - int vol = psi._grid->oSites()/LLs; - - chi.checkerboard = psi.checkerboard; - - Vector> Matp; - Vector> Matm; - Vector>* _Matp; - Vector>* _Matm; - - // MooeeInternalCompute(dag,inv,Matp,Matm); - if(inv && dag){ - _Matp = &this->MatpInvDag; - _Matm = &this->MatmInvDag; - } - - if(inv && (!dag)){ - _Matp = &this->MatpInv; - _Matm = &this->MatmInv; - } - - if(!inv){ - MooeeInternalCompute(dag, inv, Matp, Matm); - _Matp = &Matp; - _Matm = &Matm; - } - - assert(_Matp->size() == Ls*LLs); - - this->MooeeInvCalls++; - this->MooeeInvTime -= usecond(); - - if(switcheroo::iscomplex()){ - parallel_for(auto site=0; siteMooeeInvTime += usecond(); - } - - #ifdef MOBIUS_EOFA_DPERP_VEC - - INSTANTIATE_DPERP_MOBIUS_EOFA(DomainWallVec5dImplD); - INSTANTIATE_DPERP_MOBIUS_EOFA(DomainWallVec5dImplF); - INSTANTIATE_DPERP_MOBIUS_EOFA(ZDomainWallVec5dImplD); - INSTANTIATE_DPERP_MOBIUS_EOFA(ZDomainWallVec5dImplF); - - INSTANTIATE_DPERP_MOBIUS_EOFA(DomainWallVec5dImplDF); - INSTANTIATE_DPERP_MOBIUS_EOFA(DomainWallVec5dImplFH); - INSTANTIATE_DPERP_MOBIUS_EOFA(ZDomainWallVec5dImplDF); - INSTANTIATE_DPERP_MOBIUS_EOFA(ZDomainWallVec5dImplFH); - - template void MobiusEOFAFermion::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv); - template void MobiusEOFAFermion::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv); - template void MobiusEOFAFermion::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv); - template void MobiusEOFAFermion::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv); - - template void MobiusEOFAFermion::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv); - template void MobiusEOFAFermion::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv); - template void MobiusEOFAFermion::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv); - template void MobiusEOFAFermion::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv); - - #endif - -}} diff --git a/Grid/qcd/action/fermion/MobiusFermion.h b/Grid/qcd/action/fermion/MobiusFermion.h index b61c26d5..1cbb6609 100644 --- a/Grid/qcd/action/fermion/MobiusFermion.h +++ b/Grid/qcd/action/fermion/MobiusFermion.h @@ -1,4 +1,4 @@ - /************************************************************************************* +/************************************************************************************* Grid physics library, www.github.com/paboyle/Grid @@ -24,57 +24,54 @@ Author: Peter Boyle 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ +*************************************************************************************/ +/* END LEGAL */ #ifndef GRID_QCD_MOBIUS_FERMION_H #define GRID_QCD_MOBIUS_FERMION_H #include -namespace Grid { +NAMESPACE_BEGIN(Grid); - namespace QCD { +template +class MobiusFermion : public CayleyFermion5D +{ +public: + INHERIT_IMPL_TYPES(Impl); +public: - template - class MobiusFermion : public CayleyFermion5D - { - public: - INHERIT_IMPL_TYPES(Impl); - public: - - virtual void Instantiatable(void) {}; - // Constructors - MobiusFermion(GaugeField &_Umu, - GridCartesian &FiveDimGrid, - GridRedBlackCartesian &FiveDimRedBlackGrid, - GridCartesian &FourDimGrid, - GridRedBlackCartesian &FourDimRedBlackGrid, - RealD _mass,RealD _M5, - RealD b, RealD c,const ImplParams &p= ImplParams()) : + virtual void Instantiatable(void) {}; + // Constructors + MobiusFermion(GaugeField &_Umu, + GridCartesian &FiveDimGrid, + GridRedBlackCartesian &FiveDimRedBlackGrid, + GridCartesian &FourDimGrid, + GridRedBlackCartesian &FourDimRedBlackGrid, + RealD _mass,RealD _M5, + RealD b, RealD c,const ImplParams &p= ImplParams()) : - CayleyFermion5D(_Umu, - FiveDimGrid, - FiveDimRedBlackGrid, - FourDimGrid, - FourDimRedBlackGrid,_mass,_M5,p) + CayleyFermion5D(_Umu, + FiveDimGrid, + FiveDimRedBlackGrid, + FourDimGrid, + FourDimRedBlackGrid,_mass,_M5,p) - { - RealD eps = 1.0; + { + RealD eps = 1.0; - std::cout<Ls);// eps is ignored for higham - assert(zdata->n==this->Ls); + std::cout<Ls);// eps is ignored for higham + assert(zdata->n==this->Ls); - // Call base setter - this->SetCoefficientsTanh(zdata,b,c); + // Call base setter + this->SetCoefficientsTanh(zdata,b,c); - Approx::zolotarev_free(zdata); + Approx::zolotarev_free(zdata); - } - - }; - } -} + +}; + +NAMESPACE_END(Grid); #endif diff --git a/Grid/qcd/action/fermion/MobiusZolotarevFermion.h b/Grid/qcd/action/fermion/MobiusZolotarevFermion.h index 078d4f3e..48496773 100644 --- a/Grid/qcd/action/fermion/MobiusZolotarevFermion.h +++ b/Grid/qcd/action/fermion/MobiusZolotarevFermion.h @@ -1,4 +1,4 @@ - /************************************************************************************* +/************************************************************************************* Grid physics library, www.github.com/paboyle/Grid @@ -24,58 +24,55 @@ Author: Peter Boyle 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ +*************************************************************************************/ +/* END LEGAL */ #ifndef GRID_QCD_MOBIUS_ZOLOTAREV_FERMION_H #define GRID_QCD_MOBIUS_ZOLOTAREV_FERMION_H #include -namespace Grid { +NAMESPACE_BEGIN(Grid); - namespace QCD { +template +class MobiusZolotarevFermion : public CayleyFermion5D +{ +public: + INHERIT_IMPL_TYPES(Impl); +public: - template - class MobiusZolotarevFermion : public CayleyFermion5D - { - public: - INHERIT_IMPL_TYPES(Impl); - public: - - virtual void Instantiatable(void) {}; - // Constructors - MobiusZolotarevFermion(GaugeField &_Umu, - GridCartesian &FiveDimGrid, - GridRedBlackCartesian &FiveDimRedBlackGrid, - GridCartesian &FourDimGrid, - GridRedBlackCartesian &FourDimRedBlackGrid, - RealD _mass,RealD _M5, - RealD b, RealD c, - RealD lo, RealD hi,const ImplParams &p= ImplParams()) : + virtual void Instantiatable(void) {}; + // Constructors + MobiusZolotarevFermion(GaugeField &_Umu, + GridCartesian &FiveDimGrid, + GridRedBlackCartesian &FiveDimRedBlackGrid, + GridCartesian &FourDimGrid, + GridRedBlackCartesian &FourDimRedBlackGrid, + RealD _mass,RealD _M5, + RealD b, RealD c, + RealD lo, RealD hi,const ImplParams &p= ImplParams()) : - CayleyFermion5D(_Umu, - FiveDimGrid, - FiveDimRedBlackGrid, - FourDimGrid, - FourDimRedBlackGrid,_mass,_M5,p) + CayleyFermion5D(_Umu, + FiveDimGrid, + FiveDimRedBlackGrid, + FourDimGrid, + FourDimRedBlackGrid,_mass,_M5,p) - { - RealD eps = lo/hi; + { + RealD eps = lo/hi; - Approx::zolotarev_data *zdata = Approx::zolotarev(eps,this->Ls,0); - assert(zdata->n==this->Ls); + Approx::zolotarev_data *zdata = Approx::zolotarev(eps,this->Ls,0); + assert(zdata->n==this->Ls); - std::cout<SetCoefficientsZolotarev(hi,zdata,b,c); + // Call base setter + this->SetCoefficientsZolotarev(hi,zdata,b,c); - Approx::zolotarev_free(zdata); - } - - }; - + Approx::zolotarev_free(zdata); } -} + +}; + +NAMESPACE_END(Grid); #endif diff --git a/Grid/qcd/action/fermion/OverlapWilsonCayleyTanhFermion.h b/Grid/qcd/action/fermion/OverlapWilsonCayleyTanhFermion.h index fd7d74df..350e89e2 100644 --- a/Grid/qcd/action/fermion/OverlapWilsonCayleyTanhFermion.h +++ b/Grid/qcd/action/fermion/OverlapWilsonCayleyTanhFermion.h @@ -1,4 +1,4 @@ - /************************************************************************************* +/************************************************************************************* Grid physics library, www.github.com/paboyle/Grid @@ -24,46 +24,44 @@ Author: Peter Boyle 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ +*************************************************************************************/ +/* END LEGAL */ #ifndef OVERLAP_WILSON_CAYLEY_TANH_FERMION_H #define OVERLAP_WILSON_CAYLEY_TANH_FERMION_H #include -namespace Grid { +NAMESPACE_BEGIN(Grid); - namespace QCD { - - template - class OverlapWilsonCayleyTanhFermion : public MobiusFermion - { - public: - INHERIT_IMPL_TYPES(Impl); - public: +template +class OverlapWilsonCayleyTanhFermion : public MobiusFermion +{ +public: + INHERIT_IMPL_TYPES(Impl); +public: void MomentumSpacePropagator(FermionField &out,const FermionField &in,RealD _m,std::vector twist) { this->MomentumSpacePropagatorHw(out,in,_m,twist); - }; + }; - // Constructors - OverlapWilsonCayleyTanhFermion(GaugeField &_Umu, - GridCartesian &FiveDimGrid, - GridRedBlackCartesian &FiveDimRedBlackGrid, - GridCartesian &FourDimGrid, - GridRedBlackCartesian &FourDimRedBlackGrid, - RealD _mass,RealD _M5, - RealD scale,const ImplParams &p= ImplParams()) : + // Constructors + OverlapWilsonCayleyTanhFermion(GaugeField &_Umu, + GridCartesian &FiveDimGrid, + GridRedBlackCartesian &FiveDimRedBlackGrid, + GridCartesian &FourDimGrid, + GridRedBlackCartesian &FourDimRedBlackGrid, + RealD _mass,RealD _M5, + RealD scale,const ImplParams &p= ImplParams()) : - // b+c=scale, b-c = 0 <=> b =c = scale/2 - MobiusFermion(_Umu, - FiveDimGrid, - FiveDimRedBlackGrid, - FourDimGrid, - FourDimRedBlackGrid,_mass,_M5,0.5*scale,0.5*scale,p) - { - } - }; + // b+c=scale, b-c = 0 <=> b =c = scale/2 + MobiusFermion(_Umu, + FiveDimGrid, + FiveDimRedBlackGrid, + FourDimGrid, + FourDimRedBlackGrid,_mass,_M5,0.5*scale,0.5*scale,p) + { } -} +}; + +NAMESPACE_END(Grid); #endif diff --git a/Grid/qcd/action/fermion/OverlapWilsonCayleyZolotarevFermion.h b/Grid/qcd/action/fermion/OverlapWilsonCayleyZolotarevFermion.h index 4f1adbbf..d15690fa 100644 --- a/Grid/qcd/action/fermion/OverlapWilsonCayleyZolotarevFermion.h +++ b/Grid/qcd/action/fermion/OverlapWilsonCayleyZolotarevFermion.h @@ -1,4 +1,4 @@ - /************************************************************************************* +/************************************************************************************* Grid physics library, www.github.com/paboyle/Grid @@ -24,45 +24,42 @@ Author: Peter Boyle 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ +*************************************************************************************/ +/* END LEGAL */ #ifndef OVERLAP_WILSON_CAYLEY_ZOLOTAREV_FERMION_H #define OVERLAP_WILSON_CAYLEY_ZOLOTAREV_FERMION_H #include -namespace Grid { +NAMESPACE_BEGIN(Grid); - namespace QCD { +template +class OverlapWilsonCayleyZolotarevFermion : public MobiusZolotarevFermion +{ +public: + INHERIT_IMPL_TYPES(Impl); +public: - template - class OverlapWilsonCayleyZolotarevFermion : public MobiusZolotarevFermion - { - public: - INHERIT_IMPL_TYPES(Impl); - public: + // Constructors - // Constructors + OverlapWilsonCayleyZolotarevFermion(GaugeField &_Umu, + GridCartesian &FiveDimGrid, + GridRedBlackCartesian &FiveDimRedBlackGrid, + GridCartesian &FourDimGrid, + GridRedBlackCartesian &FourDimRedBlackGrid, + RealD _mass,RealD _M5, + RealD lo, RealD hi,const ImplParams &p= ImplParams()) : + // b+c=1.0, b-c = 0 <=> b =c = 1/2 + MobiusZolotarevFermion(_Umu, + FiveDimGrid, + FiveDimRedBlackGrid, + FourDimGrid, + FourDimRedBlackGrid,_mass,_M5,0.5,0.5,lo,hi,p) - OverlapWilsonCayleyZolotarevFermion(GaugeField &_Umu, - GridCartesian &FiveDimGrid, - GridRedBlackCartesian &FiveDimRedBlackGrid, - GridCartesian &FourDimGrid, - GridRedBlackCartesian &FourDimRedBlackGrid, - RealD _mass,RealD _M5, - RealD lo, RealD hi,const ImplParams &p= ImplParams()) : - // b+c=1.0, b-c = 0 <=> b =c = 1/2 - MobiusZolotarevFermion(_Umu, - FiveDimGrid, - FiveDimRedBlackGrid, - FourDimGrid, - FourDimRedBlackGrid,_mass,_M5,0.5,0.5,lo,hi,p) + {} - {} +}; - }; - - } -} +NAMESPACE_END(Grid); #endif diff --git a/Grid/qcd/action/fermion/OverlapWilsonContfracTanhFermion.h b/Grid/qcd/action/fermion/OverlapWilsonContfracTanhFermion.h index 38d0fda2..9d1a9a86 100644 --- a/Grid/qcd/action/fermion/OverlapWilsonContfracTanhFermion.h +++ b/Grid/qcd/action/fermion/OverlapWilsonContfracTanhFermion.h @@ -1,4 +1,4 @@ - /************************************************************************************* +/************************************************************************************* Grid physics library, www.github.com/paboyle/Grid @@ -24,48 +24,47 @@ Author: Peter Boyle 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ +*************************************************************************************/ +/* END LEGAL */ #ifndef OVERLAP_WILSON_CONTFRAC_TANH_FERMION_H #define OVERLAP_WILSON_CONTFRAC_TANH_FERMION_H #include -namespace Grid { +NAMESPACE_BEGIN(Grid); - namespace QCD { +template +class OverlapWilsonContFracTanhFermion : public ContinuedFractionFermion5D +{ +public: + INHERIT_IMPL_TYPES(Impl); +public: - template - class OverlapWilsonContFracTanhFermion : public ContinuedFractionFermion5D - { - public: - INHERIT_IMPL_TYPES(Impl); - public: - - virtual void Instantiatable(void){}; - // Constructors - OverlapWilsonContFracTanhFermion(GaugeField &_Umu, - GridCartesian &FiveDimGrid, - GridRedBlackCartesian &FiveDimRedBlackGrid, - GridCartesian &FourDimGrid, - GridRedBlackCartesian &FourDimRedBlackGrid, - RealD _mass,RealD _M5, - RealD scale,const ImplParams &p= ImplParams()) : + virtual void Instantiatable(void){}; + // Constructors + OverlapWilsonContFracTanhFermion(GaugeField &_Umu, + GridCartesian &FiveDimGrid, + GridRedBlackCartesian &FiveDimRedBlackGrid, + GridCartesian &FourDimGrid, + GridRedBlackCartesian &FourDimRedBlackGrid, + RealD _mass,RealD _M5, + RealD scale,const ImplParams &p= ImplParams()) : - // b+c=scale, b-c = 0 <=> b =c = scale/2 - ContinuedFractionFermion5D(_Umu, - FiveDimGrid, - FiveDimRedBlackGrid, - FourDimGrid, - FourDimRedBlackGrid,_mass,_M5,p) - { - assert((this->Ls&0x1)==1); // Odd Ls required - int nrational=this->Ls-1;// Even rational order - Approx::zolotarev_data *zdata = Approx::higham(1.0,nrational);// eps is ignored for higham - this->SetCoefficientsTanh(zdata,scale); - Approx::zolotarev_free(zdata); - } - }; + // b+c=scale, b-c = 0 <=> b =c = scale/2 + ContinuedFractionFermion5D(_Umu, + FiveDimGrid, + FiveDimRedBlackGrid, + FourDimGrid, + FourDimRedBlackGrid,_mass,_M5,p) + { + assert((this->Ls&0x1)==1); // Odd Ls required + int nrational=this->Ls-1;// Even rational order + Approx::zolotarev_data *zdata = Approx::higham(1.0,nrational);// eps is ignored for higham + this->SetCoefficientsTanh(zdata,scale); + Approx::zolotarev_free(zdata); } -} +}; + +NAMESPACE_END(Grid); + #endif diff --git a/Grid/qcd/action/fermion/OverlapWilsonContfracZolotarevFermion.h b/Grid/qcd/action/fermion/OverlapWilsonContfracZolotarevFermion.h index 6773b4d2..ce796d4a 100644 --- a/Grid/qcd/action/fermion/OverlapWilsonContfracZolotarevFermion.h +++ b/Grid/qcd/action/fermion/OverlapWilsonContfracZolotarevFermion.h @@ -1,4 +1,4 @@ - /************************************************************************************* +/************************************************************************************* Grid physics library, www.github.com/paboyle/Grid @@ -24,51 +24,49 @@ Author: Peter Boyle 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ +*************************************************************************************/ +/* END LEGAL */ #ifndef OVERLAP_WILSON_CONTFRAC_ZOLOTAREV_FERMION_H #define OVERLAP_WILSON_CONTFRAC_ZOLOTAREV_FERMION_H #include -namespace Grid { +NAMESPACE_BEGIN(Grid); - namespace QCD { +template +class OverlapWilsonContFracZolotarevFermion : public ContinuedFractionFermion5D +{ +public: + INHERIT_IMPL_TYPES(Impl); - template - class OverlapWilsonContFracZolotarevFermion : public ContinuedFractionFermion5D - { - public: - INHERIT_IMPL_TYPES(Impl); - - virtual void Instantiatable(void){}; - // Constructors - OverlapWilsonContFracZolotarevFermion(GaugeField &_Umu, - GridCartesian &FiveDimGrid, - GridRedBlackCartesian &FiveDimRedBlackGrid, - GridCartesian &FourDimGrid, - GridRedBlackCartesian &FourDimRedBlackGrid, - RealD _mass,RealD _M5, - RealD lo,RealD hi,const ImplParams &p= ImplParams()): + virtual void Instantiatable(void){}; + // Constructors + OverlapWilsonContFracZolotarevFermion(GaugeField &_Umu, + GridCartesian &FiveDimGrid, + GridRedBlackCartesian &FiveDimRedBlackGrid, + GridCartesian &FourDimGrid, + GridRedBlackCartesian &FourDimRedBlackGrid, + RealD _mass,RealD _M5, + RealD lo,RealD hi,const ImplParams &p= ImplParams()): - // b+c=scale, b-c = 0 <=> b =c = scale/2 - ContinuedFractionFermion5D(_Umu, - FiveDimGrid, - FiveDimRedBlackGrid, - FourDimGrid, - FourDimRedBlackGrid,_mass,_M5,p) - { - assert((this->Ls&0x1)==1); // Odd Ls required + // b+c=scale, b-c = 0 <=> b =c = scale/2 + ContinuedFractionFermion5D(_Umu, + FiveDimGrid, + FiveDimRedBlackGrid, + FourDimGrid, + FourDimRedBlackGrid,_mass,_M5,p) + { + assert((this->Ls&0x1)==1); // Odd Ls required - int nrational=this->Ls;// Odd rational order - RealD eps = lo/hi; + int nrational=this->Ls;// Odd rational order + RealD eps = lo/hi; - Approx::zolotarev_data *zdata = Approx::zolotarev(eps,nrational,0); - this->SetCoefficientsZolotarev(hi,zdata); - Approx::zolotarev_free(zdata); + Approx::zolotarev_data *zdata = Approx::zolotarev(eps,nrational,0); + this->SetCoefficientsZolotarev(hi,zdata); + Approx::zolotarev_free(zdata); - } - }; } -} +}; + +NAMESPACE_END(Grid); #endif diff --git a/Grid/qcd/action/fermion/OverlapWilsonPartialFractionTanhFermion.h b/Grid/qcd/action/fermion/OverlapWilsonPartialFractionTanhFermion.h index 84c4f597..f2fb46cd 100644 --- a/Grid/qcd/action/fermion/OverlapWilsonPartialFractionTanhFermion.h +++ b/Grid/qcd/action/fermion/OverlapWilsonPartialFractionTanhFermion.h @@ -1,4 +1,4 @@ - /************************************************************************************* +/************************************************************************************* Grid physics library, www.github.com/paboyle/Grid @@ -24,48 +24,46 @@ Author: Peter Boyle 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ +*************************************************************************************/ +/* END LEGAL */ #ifndef OVERLAP_WILSON_PARTFRAC_TANH_FERMION_H #define OVERLAP_WILSON_PARTFRAC_TANH_FERMION_H #include -namespace Grid { +NAMESPACE_BEGIN(Grid); - namespace QCD { +template +class OverlapWilsonPartialFractionTanhFermion : public PartialFractionFermion5D +{ +public: + INHERIT_IMPL_TYPES(Impl); +public: - template - class OverlapWilsonPartialFractionTanhFermion : public PartialFractionFermion5D - { - public: - INHERIT_IMPL_TYPES(Impl); - public: - - virtual void Instantiatable(void){}; - // Constructors - OverlapWilsonPartialFractionTanhFermion(GaugeField &_Umu, - GridCartesian &FiveDimGrid, - GridRedBlackCartesian &FiveDimRedBlackGrid, - GridCartesian &FourDimGrid, - GridRedBlackCartesian &FourDimRedBlackGrid, - RealD _mass,RealD _M5, - RealD scale,const ImplParams &p= ImplParams()) : + virtual void Instantiatable(void){}; + // Constructors + OverlapWilsonPartialFractionTanhFermion(GaugeField &_Umu, + GridCartesian &FiveDimGrid, + GridRedBlackCartesian &FiveDimRedBlackGrid, + GridCartesian &FourDimGrid, + GridRedBlackCartesian &FourDimRedBlackGrid, + RealD _mass,RealD _M5, + RealD scale,const ImplParams &p= ImplParams()) : - // b+c=scale, b-c = 0 <=> b =c = scale/2 - PartialFractionFermion5D(_Umu, - FiveDimGrid, - FiveDimRedBlackGrid, - FourDimGrid, - FourDimRedBlackGrid,_mass,_M5,p) - { - assert((this->Ls&0x1)==1); // Odd Ls required - int nrational=this->Ls-1;// Even rational order - Approx::zolotarev_data *zdata = Approx::higham(1.0,nrational);// eps is ignored for higham - this->SetCoefficientsTanh(zdata,scale); - Approx::zolotarev_free(zdata); - } - }; + // b+c=scale, b-c = 0 <=> b =c = scale/2 + PartialFractionFermion5D(_Umu, + FiveDimGrid, + FiveDimRedBlackGrid, + FourDimGrid, + FourDimRedBlackGrid,_mass,_M5,p) + { + assert((this->Ls&0x1)==1); // Odd Ls required + int nrational=this->Ls-1;// Even rational order + Approx::zolotarev_data *zdata = Approx::higham(1.0,nrational);// eps is ignored for higham + this->SetCoefficientsTanh(zdata,scale); + Approx::zolotarev_free(zdata); } -} +}; + +NAMESPACE_END(Grid); #endif diff --git a/Grid/qcd/action/fermion/OverlapWilsonPartialFractionZolotarevFermion.h b/Grid/qcd/action/fermion/OverlapWilsonPartialFractionZolotarevFermion.h index dc275852..f98b64a9 100644 --- a/Grid/qcd/action/fermion/OverlapWilsonPartialFractionZolotarevFermion.h +++ b/Grid/qcd/action/fermion/OverlapWilsonPartialFractionZolotarevFermion.h @@ -1,4 +1,4 @@ - /************************************************************************************* +/************************************************************************************* Grid physics library, www.github.com/paboyle/Grid @@ -24,51 +24,50 @@ Author: Peter Boyle 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ +*************************************************************************************/ +/* END LEGAL */ #ifndef OVERLAP_WILSON_PARTFRAC_ZOLOTAREV_FERMION_H #define OVERLAP_WILSON_PARTFRAC_ZOLOTAREV_FERMION_H #include -namespace Grid { +NAMESPACE_BEGIN(Grid); - namespace QCD { +template +class OverlapWilsonPartialFractionZolotarevFermion : public PartialFractionFermion5D +{ +public: + INHERIT_IMPL_TYPES(Impl); - template - class OverlapWilsonPartialFractionZolotarevFermion : public PartialFractionFermion5D - { - public: - INHERIT_IMPL_TYPES(Impl); - - virtual void Instantiatable(void){}; - // Constructors - OverlapWilsonPartialFractionZolotarevFermion(GaugeField &_Umu, - GridCartesian &FiveDimGrid, - GridRedBlackCartesian &FiveDimRedBlackGrid, - GridCartesian &FourDimGrid, - GridRedBlackCartesian &FourDimRedBlackGrid, - RealD _mass,RealD _M5, - RealD lo,RealD hi,const ImplParams &p= ImplParams()): + virtual void Instantiatable(void){}; + // Constructors + OverlapWilsonPartialFractionZolotarevFermion(GaugeField &_Umu, + GridCartesian &FiveDimGrid, + GridRedBlackCartesian &FiveDimRedBlackGrid, + GridCartesian &FourDimGrid, + GridRedBlackCartesian &FourDimRedBlackGrid, + RealD _mass,RealD _M5, + RealD lo,RealD hi,const ImplParams &p= ImplParams()): - // b+c=scale, b-c = 0 <=> b =c = scale/2 - PartialFractionFermion5D(_Umu, - FiveDimGrid, - FiveDimRedBlackGrid, - FourDimGrid, - FourDimRedBlackGrid,_mass,_M5,p) - { - assert((this->Ls&0x1)==1); // Odd Ls required + // b+c=scale, b-c = 0 <=> b =c = scale/2 + PartialFractionFermion5D(_Umu, + FiveDimGrid, + FiveDimRedBlackGrid, + FourDimGrid, + FourDimRedBlackGrid,_mass,_M5,p) + { + assert((this->Ls&0x1)==1); // Odd Ls required - int nrational=this->Ls;// Odd rational order - RealD eps = lo/hi; + int nrational=this->Ls;// Odd rational order + RealD eps = lo/hi; - Approx::zolotarev_data *zdata = Approx::zolotarev(eps,nrational,0); - this->SetCoefficientsZolotarev(hi,zdata); - Approx::zolotarev_free(zdata); + Approx::zolotarev_data *zdata = Approx::zolotarev(eps,nrational,0); + this->SetCoefficientsZolotarev(hi,zdata); + Approx::zolotarev_free(zdata); - } - }; } -} +}; + +NAMESPACE_END(Grid); + #endif diff --git a/Grid/qcd/action/fermion/PartialFractionFermion5D.cc b/Grid/qcd/action/fermion/PartialFractionFermion5D.cc deleted file mode 100644 index 11840027..00000000 --- a/Grid/qcd/action/fermion/PartialFractionFermion5D.cc +++ /dev/null @@ -1,459 +0,0 @@ - /************************************************************************************* - - Grid physics library, www.github.com/paboyle/Grid - - Source file: ./lib/qcd/action/fermion/PartialFractionFermion5D.cc - - Copyright (C) 2015 - -Author: Peter Boyle -Author: Peter Boyle - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - - See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ -#include -#include - -namespace Grid { - namespace QCD { - - - template - void PartialFractionFermion5D::Mdir (const FermionField &psi, FermionField &chi,int dir,int disp){ - // this does both dag and undag but is trivial; make a common helper routing - - int sign = 1; - int Ls = this->Ls; - - this->DhopDir(psi,chi,dir,disp); - - int nblock=(Ls-1)/2; - for(int b=0;b - void PartialFractionFermion5D::Meooe_internal(const FermionField &psi, FermionField &chi,int dag) - { - int Ls = this->Ls; - int sign = dag ? (-1) : 1; - - if ( psi.checkerboard == Odd ) { - this->DhopEO(psi,chi,DaggerNo); - } else { - this->DhopOE(psi,chi,DaggerNo); - } - - int nblock=(Ls-1)/2; - for(int b=0;b - void PartialFractionFermion5D::Mooee_internal(const FermionField &psi, FermionField &chi,int dag) - { - // again dag and undag are trivially related - int sign = dag ? (-1) : 1; - int Ls = this->Ls; - - int nblock=(Ls-1)/2; - for(int b=0;b - void PartialFractionFermion5D::MooeeInv_internal(const FermionField &psi, FermionField &chi,int dag) - { - int sign = dag ? (-1) : 1; - int Ls = this->Ls; - - FermionField tmp(psi._grid); - - /////////////////////////////////////////////////////////////////////////////////////// - //Linv - /////////////////////////////////////////////////////////////////////////////////////// - int nblock=(Ls-1)/2; - - axpy(chi,0.0,psi,psi); // Identity piece - - for(int b=0;b - void PartialFractionFermion5D::M_internal(const FermionField &psi, FermionField &chi,int dag) - { - FermionField D(psi._grid); - - int Ls = this->Ls; - int sign = dag ? (-1) : 1; - - // For partial frac Hw case (b5=c5=1) chroma quirkily computes - // - // Conventions for partfrac appear to be a mess. - // Tony's Nara lectures have - // - // BlockDiag( H/p_i 1 | 1 ) - // ( 1 p_i H / q_i^2 | 0 ) - // --------------------------------- - // ( -1 0 | R +p0 H ) - // - //Chroma ( -2H 2sqrt(q_i) | 0 ) - // (2 sqrt(q_i) 2H | 2 sqrt(p_i) ) - // --------------------------------- - // ( 0 -2 sqrt(p_i) | 2 R gamma_5 + p0 2H - // - // Edwards/Joo/Kennedy/Wenger - // - // Here, the "beta's" selected by chroma to scale the unphysical bulk constraint fields - // incorporate the approx scale factor. This is obtained by propagating the - // scale on "H" out to the off diagonal elements as follows: - // - // BlockDiag( H/p_i 1 | 1 ) - // ( 1 p_i H / q_i^2 | 0 ) - // --------------------------------- - // ( -1 0 | R + p_0 H ) - // - // becomes: - // BlockDiag( H/ sp_i 1 | 1 ) - // ( 1 sp_i H / s^2q_i^2 | 0 ) - // --------------------------------- - // ( -1 0 | R + p_0/s H ) - // - // - // This is implemented in Chroma by - // p0' = p0/approxMax - // p_i' = p_i*approxMax - // q_i' = q_i*approxMax*approxMax - // - // After the equivalence transform is applied the matrix becomes - // - //Chroma ( -2H sqrt(q'_i) | 0 ) - // (sqrt(q'_i) 2H | sqrt(p'_i) ) - // --------------------------------- - // ( 0 -sqrt(p'_i) | 2 R gamma_5 + p'0 2H - // - // = ( -2H sqrt(q_i)amax | 0 ) - // (sqrt(q_i)amax 2H | sqrt(p_i*amax) ) - // --------------------------------- - // ( 0 -sqrt(p_i)*amax | 2 R gamma_5 + p0/amax 2H - // - - this->DW(psi,D,DaggerNo); - - int nblock=(Ls-1)/2; - for(int b=0;bmass)/(1-this->mass); - //R g5 psi[Ls] + p[0] H - ag5xpbg5y_ssp(chi,R*scale,psi,p[nblock]*scale/amax,D,Ls-1,Ls-1); - - for(int b=0;b - RealD PartialFractionFermion5D::M (const FermionField &in, FermionField &out) - { - M_internal(in,out,DaggerNo); - return norm2(out); - } - template - RealD PartialFractionFermion5D::Mdag (const FermionField &in, FermionField &out) - { - M_internal(in,out,DaggerYes); - return norm2(out); - } - - template - void PartialFractionFermion5D::Meooe (const FermionField &in, FermionField &out) - { - Meooe_internal(in,out,DaggerNo); - } - template - void PartialFractionFermion5D::MeooeDag (const FermionField &in, FermionField &out) - { - Meooe_internal(in,out,DaggerYes); - } - template - void PartialFractionFermion5D::Mooee (const FermionField &in, FermionField &out) - { - Mooee_internal(in,out,DaggerNo); - } - template - void PartialFractionFermion5D::MooeeDag (const FermionField &in, FermionField &out) - { - Mooee_internal(in,out,DaggerYes); - } - - template - void PartialFractionFermion5D::MooeeInv (const FermionField &in, FermionField &out) - { - MooeeInv_internal(in,out,DaggerNo); - } - template - void PartialFractionFermion5D::MooeeInvDag (const FermionField &in, FermionField &out) - { - MooeeInv_internal(in,out,DaggerYes); - } - - - // force terms; five routines; default to Dhop on diagonal - template - void PartialFractionFermion5D::MDeriv (GaugeField &mat,const FermionField &U,const FermionField &V,int dag) - { - int Ls = this->Ls; - - FermionField D(V._grid); - - int nblock=(Ls-1)/2; - for(int b=0;bDhopDeriv(mat,D,V,DaggerNo); - }; - template - void PartialFractionFermion5D::MoeDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag) - { - int Ls = this->Ls; - - FermionField D(V._grid); - - int nblock=(Ls-1)/2; - for(int b=0;bDhopDerivOE(mat,D,V,DaggerNo); - }; - template - void PartialFractionFermion5D::MeoDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag) - { - int Ls = this->Ls; - - FermionField D(V._grid); - - int nblock=(Ls-1)/2; - for(int b=0;bDhopDerivEO(mat,D,V,DaggerNo); - }; - - template - void PartialFractionFermion5D::SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD scale){ - SetCoefficientsZolotarev(1.0/scale,zdata); - } - template - void PartialFractionFermion5D::SetCoefficientsZolotarev(RealD zolo_hi,Approx::zolotarev_data *zdata){ - - // check on degree matching - // std::cout<n << " - n"<da << " -da "<db << " -db"<dn << " -dn"<dd << " -dd"<Ls; - - assert(Ls == (2*zdata->da -1) ); - - // Part frac - // RealD R; - R=(1+mass)/(1-mass); - dw_diag = (4.0-this->M5); - - // std::vector p; - // std::vector q; - p.resize(zdata->da); - q.resize(zdata->dd); - - for(int n=0;nda;n++){ - p[n] = zdata -> alpha[n]; - } - for(int n=0;ndd;n++){ - q[n] = -zdata -> ap[n]; - } - - scale= part_frac_chroma_convention ? 2.0 : 1.0; // Chroma conventions annoy me - - amax=zolo_hi; - } - - template - void PartialFractionFermion5D::ExportPhysicalFermionSolution(const FermionField &solution5d,FermionField &exported4d) - { - int Ls = this->Ls; - conformable(solution5d._grid,this->FermionGrid()); - conformable(exported4d._grid,this->GaugeGrid()); - ExtractSlice(exported4d, solution5d, Ls-1, Ls-1); - } - template - void PartialFractionFermion5D::ImportPhysicalFermionSource(const FermionField &input4d,FermionField &imported5d) - { - int Ls = this->Ls; - conformable(imported5d._grid,this->FermionGrid()); - conformable(input4d._grid ,this->GaugeGrid()); - FermionField tmp(this->FermionGrid()); - tmp=zero; - InsertSlice(input4d, tmp, Ls-1, Ls-1); - tmp=Gamma(Gamma::Algebra::Gamma5)*tmp; - this->Dminus(tmp,imported5d); - } - - // Constructors - template - PartialFractionFermion5D::PartialFractionFermion5D(GaugeField &_Umu, - GridCartesian &FiveDimGrid, - GridRedBlackCartesian &FiveDimRedBlackGrid, - GridCartesian &FourDimGrid, - GridRedBlackCartesian &FourDimRedBlackGrid, - RealD _mass,RealD M5, - const ImplParams &p) : - WilsonFermion5D(_Umu, - FiveDimGrid, FiveDimRedBlackGrid, - FourDimGrid, FourDimRedBlackGrid,M5,p), - mass(_mass) - - { - int Ls = this->Ls; - - assert((Ls&0x1)==1); // Odd Ls required - int nrational=Ls-1; - - - Approx::zolotarev_data *zdata = Approx::higham(1.0,nrational); - - // NB: chroma uses a cast to "float" for the zolotarev range(!?). - // this creates a real difference in the operator which I do not like but we can replicate here - // to demonstrate compatibility - // RealD eps = (zolo_lo / zolo_hi); - // zdata = bfm_zolotarev(eps,nrational,0); - - SetCoefficientsTanh(zdata,1.0); - - Approx::zolotarev_free(zdata); - - } - - FermOpTemplateInstantiate(PartialFractionFermion5D); - - } -} - diff --git a/Grid/qcd/action/fermion/PartialFractionFermion5D.h b/Grid/qcd/action/fermion/PartialFractionFermion5D.h index 91f1bd3c..d61515f0 100644 --- a/Grid/qcd/action/fermion/PartialFractionFermion5D.h +++ b/Grid/qcd/action/fermion/PartialFractionFermion5D.h @@ -1,4 +1,4 @@ - /************************************************************************************* +/************************************************************************************* Grid physics library, www.github.com/paboyle/Grid @@ -24,51 +24,49 @@ Author: Peter Boyle 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ +*************************************************************************************/ +/* END LEGAL */ #ifndef GRID_QCD_PARTIAL_FRACTION_H #define GRID_QCD_PARTIAL_FRACTION_H #include -namespace Grid { +NAMESPACE_BEGIN(Grid); - namespace QCD { +template +class PartialFractionFermion5D : public WilsonFermion5D +{ +public: + INHERIT_IMPL_TYPES(Impl); - template - class PartialFractionFermion5D : public WilsonFermion5D - { - public: - INHERIT_IMPL_TYPES(Impl); + const int part_frac_chroma_convention=1; - const int part_frac_chroma_convention=1; + void Meooe_internal(const FermionField &in, FermionField &out,int dag); + void Mooee_internal(const FermionField &in, FermionField &out,int dag); + void MooeeInv_internal(const FermionField &in, FermionField &out,int dag); + void M_internal(const FermionField &in, FermionField &out,int dag); - void Meooe_internal(const FermionField &in, FermionField &out,int dag); - void Mooee_internal(const FermionField &in, FermionField &out,int dag); - void MooeeInv_internal(const FermionField &in, FermionField &out,int dag); - void M_internal(const FermionField &in, FermionField &out,int dag); + // override multiply + virtual RealD M (const FermionField &in, FermionField &out); + virtual RealD Mdag (const FermionField &in, FermionField &out); - // override multiply - virtual RealD M (const FermionField &in, FermionField &out); - virtual RealD Mdag (const FermionField &in, FermionField &out); + // half checkerboard operaions + virtual void Meooe (const FermionField &in, FermionField &out); + virtual void MeooeDag (const FermionField &in, FermionField &out); + virtual void Mooee (const FermionField &in, FermionField &out); + virtual void MooeeDag (const FermionField &in, FermionField &out); + virtual void MooeeInv (const FermionField &in, FermionField &out); + virtual void MooeeInvDag (const FermionField &in, FermionField &out); - // half checkerboard operaions - virtual void Meooe (const FermionField &in, FermionField &out); - virtual void MeooeDag (const FermionField &in, FermionField &out); - virtual void Mooee (const FermionField &in, FermionField &out); - virtual void MooeeDag (const FermionField &in, FermionField &out); - virtual void MooeeInv (const FermionField &in, FermionField &out); - virtual void MooeeInvDag (const FermionField &in, FermionField &out); + // force terms; five routines; default to Dhop on diagonal + virtual void MDeriv (GaugeField &mat,const FermionField &U,const FermionField &V,int dag); + virtual void MoeDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag); + virtual void MeoDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag); - // force terms; five routines; default to Dhop on diagonal - virtual void MDeriv (GaugeField &mat,const FermionField &U,const FermionField &V,int dag); - virtual void MoeDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag); - virtual void MeoDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag); + virtual void Instantiatable(void) =0; // ensure no make-eee - virtual void Instantiatable(void) =0; // ensure no make-eee - - // Efficient support for multigrid coarsening - virtual void Mdir (const FermionField &in, FermionField &out,int dir,int disp); + // Efficient support for multigrid coarsening + virtual void Mdir (const FermionField &in, FermionField &out,int dir,int disp); /////////////////////////////////////////////////////////////// // Physical surface field utilities @@ -76,32 +74,30 @@ namespace Grid { virtual void ExportPhysicalFermionSolution(const FermionField &solution5d,FermionField &exported4d); virtual void ImportPhysicalFermionSource (const FermionField &input4d,FermionField &imported5d); - // Constructors - PartialFractionFermion5D(GaugeField &_Umu, - GridCartesian &FiveDimGrid, - GridRedBlackCartesian &FiveDimRedBlackGrid, - GridCartesian &FourDimGrid, - GridRedBlackCartesian &FourDimRedBlackGrid, - RealD _mass,RealD M5,const ImplParams &p= ImplParams()); + // Constructors + PartialFractionFermion5D(GaugeField &_Umu, + GridCartesian &FiveDimGrid, + GridRedBlackCartesian &FiveDimRedBlackGrid, + GridCartesian &FourDimGrid, + GridRedBlackCartesian &FourDimRedBlackGrid, + RealD _mass,RealD M5,const ImplParams &p= ImplParams()); - protected: +protected: - virtual void SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD scale); - virtual void SetCoefficientsZolotarev(RealD zolo_hi,Approx::zolotarev_data *zdata); + virtual void SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD scale); + virtual void SetCoefficientsZolotarev(RealD zolo_hi,Approx::zolotarev_data *zdata); - // Part frac - RealD mass; - RealD dw_diag; - RealD R; - RealD amax; - RealD scale; - std::vector p; - std::vector q; + // Part frac + RealD mass; + RealD dw_diag; + RealD R; + RealD amax; + RealD scale; + Vector p; + Vector q; - }; +}; - - } -} +NAMESPACE_END(Grid); #endif diff --git a/Grid/qcd/action/fermion/PauliVillarsInverters.h b/Grid/qcd/action/fermion/PauliVillarsInverters.h index b10640e3..7d003087 100644 --- a/Grid/qcd/action/fermion/PauliVillarsInverters.h +++ b/Grid/qcd/action/fermion/PauliVillarsInverters.h @@ -27,8 +27,7 @@ Author: Peter Boyle /* END LEGAL */ #pragma once -namespace Grid { -namespace QCD { +NAMESPACE_BEGIN(Grid); template class PauliVillarsSolverUnprec @@ -90,6 +89,4 @@ class PauliVillarsSolverFourierAccel }; }; - -} -} +NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/Reconstruct5Dprop.h b/Grid/qcd/action/fermion/Reconstruct5Dprop.h index 6862c5ee..93af1ab8 100644 --- a/Grid/qcd/action/fermion/Reconstruct5Dprop.h +++ b/Grid/qcd/action/fermion/Reconstruct5Dprop.h @@ -27,8 +27,7 @@ Author: Peter Boyle /* END LEGAL */ #pragma once -namespace Grid { -namespace QCD { +NAMESPACE_BEGIN(Grid); template class Reconstruct5DfromPhysical { private: @@ -131,5 +130,5 @@ template class Reconstruct5DfromPhysical { } }; -} -} +NAMESPACE_END(Grid); + diff --git a/Grid/qcd/action/fermion/ScaledShamirFermion.h b/Grid/qcd/action/fermion/ScaledShamirFermion.h index b779b9c0..67ae8012 100644 --- a/Grid/qcd/action/fermion/ScaledShamirFermion.h +++ b/Grid/qcd/action/fermion/ScaledShamirFermion.h @@ -1,4 +1,4 @@ - /************************************************************************************* +/************************************************************************************* Grid physics library, www.github.com/paboyle/Grid @@ -24,46 +24,43 @@ Author: Peter Boyle 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ +*************************************************************************************/ +/* END LEGAL */ #ifndef GRID_QCD_SCALED_SHAMIR_FERMION_H #define GRID_QCD_SCALED_SHAMIR_FERMION_H #include -namespace Grid { +NAMESPACE_BEGIN(Grid); - namespace QCD { +template +class ScaledShamirFermion : public MobiusFermion +{ +public: + INHERIT_IMPL_TYPES(Impl); - template - class ScaledShamirFermion : public MobiusFermion - { - public: - INHERIT_IMPL_TYPES(Impl); - - // Constructors - ScaledShamirFermion(GaugeField &_Umu, - GridCartesian &FiveDimGrid, - GridRedBlackCartesian &FiveDimRedBlackGrid, - GridCartesian &FourDimGrid, - GridRedBlackCartesian &FourDimRedBlackGrid, - RealD _mass,RealD _M5, -// RealD scale): - RealD scale,const ImplParams &p= ImplParams()) : + // Constructors + ScaledShamirFermion(GaugeField &_Umu, + GridCartesian &FiveDimGrid, + GridRedBlackCartesian &FiveDimRedBlackGrid, + GridCartesian &FourDimGrid, + GridRedBlackCartesian &FourDimRedBlackGrid, + RealD _mass,RealD _M5, + // RealD scale): + RealD scale,const ImplParams &p= ImplParams()) : - // b+c=scale, b-c = 1 <=> 2b = scale+1; 2c = scale-1 - MobiusFermion(_Umu, - FiveDimGrid, - FiveDimRedBlackGrid, - FourDimGrid, - FourDimRedBlackGrid,_mass,_M5,0.5*(scale+1.0),0.5*(scale-1.0),p) -// FourDimRedBlackGrid,_mass,_M5,0.5*(scale+1.0),0.5*(scale-1.0)) - { - } - - }; - + // b+c=scale, b-c = 1 <=> 2b = scale+1; 2c = scale-1 + MobiusFermion(_Umu, + FiveDimGrid, + FiveDimRedBlackGrid, + FourDimGrid, + FourDimRedBlackGrid,_mass,_M5,0.5*(scale+1.0),0.5*(scale-1.0),p) + // FourDimRedBlackGrid,_mass,_M5,0.5*(scale+1.0),0.5*(scale-1.0)) + { } -} + +}; + +NAMESPACE_END(Grid); #endif diff --git a/Grid/qcd/action/fermion/SchurDiagTwoKappa.h b/Grid/qcd/action/fermion/SchurDiagTwoKappa.h index 8305f98a..1545c245 100644 --- a/Grid/qcd/action/fermion/SchurDiagTwoKappa.h +++ b/Grid/qcd/action/fermion/SchurDiagTwoKappa.h @@ -1,4 +1,4 @@ - /************************************************************************************* +/************************************************************************************* Grid physics library, www.github.com/paboyle/Grid @@ -24,40 +24,40 @@ Author: Peter Boyle 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ -#ifndef _SCHUR_DIAG_TWO_KAPPA_H -#define _SCHUR_DIAG_TWO_KAPPA_H +*************************************************************************************/ +/* END LEGAL */ +#pragma once -namespace Grid { +NAMESPACE_BEGIN(Grid); - // This is specific to (Z)mobius fermions - template - class KappaSimilarityTransform { - public: - INHERIT_IMPL_TYPES(Matrix); - std::vector kappa, kappaDag, kappaInv, kappaInvDag; +// This is specific to (Z)mobius fermions +template +class KappaSimilarityTransform { +public: + INHERIT_IMPL_TYPES(Matrix); + Vector kappa, kappaDag, kappaInv, kappaInvDag; - KappaSimilarityTransform (Matrix &zmob) { - for (int i=0;i<(int)zmob.bs.size();i++) { - Coeff_t k = 1.0 / ( 2.0 * (zmob.bs[i] *(4 - zmob.M5) + 1.0) ); - kappa.push_back( k ); - kappaDag.push_back( conj(k) ); - kappaInv.push_back( 1.0 / k ); - kappaInvDag.push_back( 1.0 / conj(k) ); - } + KappaSimilarityTransform (Matrix &zmob) { + for (int i=0;i<(int)zmob.bs.size();i++) { + Coeff_t k = 1.0 / ( 2.0 * (zmob.bs[i] *(4 - zmob.M5) + 1.0) ); + kappa.push_back( k ); + kappaDag.push_back( conj(k) ); + kappaInv.push_back( 1.0 / k ); + kappaInvDag.push_back( 1.0 / conj(k) ); } + } template - void sscale(const Lattice& in, Lattice& out, Coeff_t* s) { - GridBase *grid=out._grid; - out.checkerboard = in.checkerboard; + void sscale(const Lattice& in, Lattice& out, Coeff_t* s) { + GridBase *grid=out.Grid(); + out.Checkerboard() = in.Checkerboard(); assert(grid->_simd_layout[0] == 1); // should be fine for ZMobius for now int Ls = grid->_rdimensions[0]; - parallel_for(int ss=0;ssoSites();ss++){ - vobj tmp = s[ss % Ls]*in._odata[ss]; - vstream(out._odata[ss],tmp); - } + thread_for(ss, grid->oSites(), + { + vobj tmp = s[ss % Ls]*in[ss]; + vstream(out[ss],tmp); + }); } RealD sscale_norm(const Field& in, Field& out, Coeff_t* s) { @@ -70,33 +70,33 @@ namespace Grid { virtual RealD MInv (const Field& in, Field& out) { return sscale_norm(in,out,&kappaInv[0]);} virtual RealD MInvDag (const Field& in, Field& out) { return sscale_norm(in,out,&kappaInvDag[0]);} - }; +}; - template - class SchurDiagTwoKappaOperator : public SchurOperatorBase { - public: - KappaSimilarityTransform _S; - SchurDiagTwoOperator _Mat; +template +class SchurDiagTwoKappaOperator : public SchurOperatorBase { +public: + KappaSimilarityTransform _S; + SchurDiagTwoOperator _Mat; - SchurDiagTwoKappaOperator (Matrix &Mat): _S(Mat), _Mat(Mat) {}; + SchurDiagTwoKappaOperator (Matrix &Mat): _S(Mat), _Mat(Mat) {}; - virtual RealD Mpc (const Field &in, Field &out) { - Field tmp(in._grid); + virtual RealD Mpc (const Field &in, Field &out) { + Field tmp(in.Grid()); - _S.MInv(in,out); - _Mat.Mpc(out,tmp); - return _S.M(tmp,out); + _S.MInv(in,out); + _Mat.Mpc(out,tmp); + return _S.M(tmp,out); - } - virtual RealD MpcDag (const Field &in, Field &out){ - Field tmp(in._grid); + } + virtual RealD MpcDag (const Field &in, Field &out){ + Field tmp(in.Grid()); - _S.MDag(in,out); - _Mat.MpcDag(out,tmp); - return _S.MInvDag(tmp,out); - } - }; + _S.MDag(in,out); + _Mat.MpcDag(out,tmp); + return _S.MInvDag(tmp,out); + } +}; + +NAMESPACE_END(Grid); -} -#endif diff --git a/Grid/qcd/action/fermion/ShamirZolotarevFermion.h b/Grid/qcd/action/fermion/ShamirZolotarevFermion.h index f9397911..54463a9f 100644 --- a/Grid/qcd/action/fermion/ShamirZolotarevFermion.h +++ b/Grid/qcd/action/fermion/ShamirZolotarevFermion.h @@ -1,4 +1,4 @@ - /************************************************************************************* +/************************************************************************************* Grid physics library, www.github.com/paboyle/Grid @@ -24,46 +24,43 @@ Author: Peter Boyle 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ +*************************************************************************************/ +/* END LEGAL */ #ifndef GRID_QCD_SHAMIR_ZOLOTAREV_FERMION_H #define GRID_QCD_SHAMIR_ZOLOTAREV_FERMION_H #include -namespace Grid { +NAMESPACE_BEGIN(Grid); - namespace QCD { +template +class ShamirZolotarevFermion : public MobiusZolotarevFermion +{ +public: + INHERIT_IMPL_TYPES(Impl); - template - class ShamirZolotarevFermion : public MobiusZolotarevFermion - { - public: - INHERIT_IMPL_TYPES(Impl); - - // Constructors + // Constructors - ShamirZolotarevFermion(GaugeField &_Umu, - GridCartesian &FiveDimGrid, - GridRedBlackCartesian &FiveDimRedBlackGrid, - GridCartesian &FourDimGrid, - GridRedBlackCartesian &FourDimRedBlackGrid, - RealD _mass,RealD _M5, - RealD lo, RealD hi,const ImplParams &p= ImplParams()) : + ShamirZolotarevFermion(GaugeField &_Umu, + GridCartesian &FiveDimGrid, + GridRedBlackCartesian &FiveDimRedBlackGrid, + GridCartesian &FourDimGrid, + GridRedBlackCartesian &FourDimRedBlackGrid, + RealD _mass,RealD _M5, + RealD lo, RealD hi,const ImplParams &p= ImplParams()) : - // b+c = 1; b-c = 1 => b=1, c=0 - MobiusZolotarevFermion(_Umu, - FiveDimGrid, - FiveDimRedBlackGrid, - FourDimGrid, - FourDimRedBlackGrid,_mass,_M5,1.0,0.0,lo,hi,p) + // b+c = 1; b-c = 1 => b=1, c=0 + MobiusZolotarevFermion(_Umu, + FiveDimGrid, + FiveDimRedBlackGrid, + FourDimGrid, + FourDimRedBlackGrid,_mass,_M5,1.0,0.0,lo,hi,p) - {} + {} - }; +}; - } -} +NAMESPACE_END(Grid); #endif diff --git a/Grid/qcd/action/fermion/StaggeredImpl.h b/Grid/qcd/action/fermion/StaggeredImpl.h new file mode 100644 index 00000000..8adf45a4 --- /dev/null +++ b/Grid/qcd/action/fermion/StaggeredImpl.h @@ -0,0 +1,175 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/FermionOperatorImpl.h + +Copyright (C) 2015 + +Author: Peter Boyle + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ + /* END LEGAL */ +#pragma once + +NAMESPACE_BEGIN(Grid); + +template +class StaggeredImpl : public PeriodicGaugeImpl > +{ + +public: + + typedef RealD _Coeff_t ; + static const int Dimension = Representation::Dimension; + static const bool isFundamental = Representation::isFundamental; + static const bool LsVectorised=false; + typedef PeriodicGaugeImpl > Gimpl; + + //Necessary? + constexpr bool is_fundamental() const{return Dimension == Nc ? 1 : 0;} + + typedef _Coeff_t Coeff_t; + + INHERIT_GIMPL_TYPES(Gimpl); + + template using iImplSpinor = iScalar > >; + template using iImplHalfSpinor = iScalar > >; + template using iImplDoubledGaugeField = iVector >, Nds>; + template using iImplPropagator = iScalar > >; + + typedef iImplSpinor SiteSpinor; + typedef iImplHalfSpinor SiteHalfSpinor; + typedef iImplDoubledGaugeField SiteDoubledGaugeField; + typedef iImplPropagator SitePropagator; + + typedef Lattice FermionField; + typedef Lattice DoubledGaugeField; + typedef Lattice PropagatorField; + + typedef StaggeredImplParams ImplParams; + typedef SimpleCompressor Compressor; + typedef CartesianStencil StencilImpl; + typedef typename StencilImpl::View_type StencilView; + + ImplParams Params; + + StaggeredImpl(const ImplParams &p = ImplParams()) : Params(p){}; + + static accelerator_inline void multLink(SiteSpinor &phi, + const SiteDoubledGaugeField &U, + const SiteSpinor &chi, + int mu) + { + mult(&phi(), &U(mu), &chi()); + } + static accelerator_inline void multLinkAdd(SiteSpinor &phi, + const SiteDoubledGaugeField &U, + const SiteSpinor &chi, + int mu) + { + mac(&phi(), &U(mu), &chi()); + } + + template + static accelerator_inline void loadLinkElement(Simd ®, ref &memory) + { + reg = memory; + } + + inline void InsertGaugeField(DoubledGaugeField &U_ds, + const GaugeLinkField &U,int mu) + { + PokeIndex(U_ds, U, mu); + } + inline void DoubleStore(GridBase *GaugeGrid, + DoubledGaugeField &UUUds, // for Naik term + DoubledGaugeField &Uds, + const GaugeField &Uthin, + const GaugeField &Ufat) { + conformable(Uds.Grid(), GaugeGrid); + conformable(Uthin.Grid(), GaugeGrid); + conformable(Ufat.Grid(), GaugeGrid); + GaugeLinkField U(GaugeGrid); + GaugeLinkField UU(GaugeGrid); + GaugeLinkField UUU(GaugeGrid); + GaugeLinkField Udag(GaugeGrid); + GaugeLinkField UUUdag(GaugeGrid); + for (int mu = 0; mu < Nd; mu++) { + + // Staggered Phase. + Lattice > coor(GaugeGrid); + Lattice > x(GaugeGrid); LatticeCoordinate(x,0); + Lattice > y(GaugeGrid); LatticeCoordinate(y,1); + Lattice > z(GaugeGrid); LatticeCoordinate(z,2); + Lattice > t(GaugeGrid); LatticeCoordinate(t,3); + + Lattice > lin_z(GaugeGrid); lin_z=x+y; + Lattice > lin_t(GaugeGrid); lin_t=x+y+z; + + ComplexField phases(GaugeGrid); phases=1.0; + + if ( mu == 1 ) phases = where( mod(x ,2)==(Integer)0, phases,-phases); + if ( mu == 2 ) phases = where( mod(lin_z,2)==(Integer)0, phases,-phases); + if ( mu == 3 ) phases = where( mod(lin_t,2)==(Integer)0, phases,-phases); + + // 1 hop based on fat links + U = PeekIndex(Ufat, mu); + Udag = adj( Cshift(U, mu, -1)); + + U = U *phases; + Udag = Udag *phases; + + InsertGaugeField(Uds,U,mu); + InsertGaugeField(Uds,Udag,mu+4); + // PokeIndex(Uds, U, mu); + // PokeIndex(Uds, Udag, mu + 4); + + // 3 hop based on thin links. Crazy huh ? + U = PeekIndex(Uthin, mu); + UU = Gimpl::CovShiftForward(U,mu,U); + UUU= Gimpl::CovShiftForward(U,mu,UU); + + UUUdag = adj( Cshift(UUU, mu, -3)); + + UUU = UUU *phases; + UUUdag = UUUdag *phases; + + InsertGaugeField(UUUds,UUU,mu); + InsertGaugeField(UUUds,UUUdag,mu+4); + + } + } + + inline void InsertForce4D(GaugeField &mat, FermionField &Btilde, FermionField &A,int mu){ + GaugeLinkField link(mat.Grid()); + link = TraceIndex(outerProduct(Btilde,A)); + PokeIndex(mat,link,mu); + } + + inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField Ã,int mu){ + assert (0); + // Must never hit + } +}; +typedef StaggeredImpl StaggeredImplR; // Real.. whichever prec +typedef StaggeredImpl StaggeredImplF; // Float +typedef StaggeredImpl StaggeredImplD; // Double + +NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/StaggeredKernels.h b/Grid/qcd/action/fermion/StaggeredKernels.h index 79de1a68..6ef0ab9d 100644 --- a/Grid/qcd/action/fermion/StaggeredKernels.h +++ b/Grid/qcd/action/fermion/StaggeredKernels.h @@ -26,11 +26,9 @@ See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ /* END LEGAL */ -#ifndef GRID_QCD_STAGGERED_KERNELS_H -#define GRID_QCD_STAGGERED_KERNELS_H +#pragma once -namespace Grid { -namespace QCD { +NAMESPACE_BEGIN(Grid) //////////////////////////////////////////////////////////////////////////////////////////////////////////////// // Helper routines that implement Staggered stencil for a single site. @@ -51,72 +49,69 @@ template class StaggeredKernels : public FermionOperator , pub public: - void DhopDir(StencilImpl &st, DoubledGaugeField &U, DoubledGaugeField &UUU, SiteSpinor * buf, - int sF, int sU, const FermionField &in, FermionField &out, int dir,int disp); + void DhopDirKernel(StencilImpl &st, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, SiteSpinor * buf, + int sF, int sU, const FermionFieldView &in, FermionFieldView &out, int dir,int disp); /////////////////////////////////////////////////////////////////////////////////////// // Generic Nc kernels /////////////////////////////////////////////////////////////////////////////////////// void DhopSiteGeneric(StencilImpl &st, LebesgueOrder &lo, - DoubledGaugeField &U, DoubledGaugeField &UUU, + DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, SiteSpinor * buf, int LLs, int sU, - const FermionField &in, FermionField &out,int dag); + const FermionFieldView &in, FermionFieldView &out,int dag); void DhopSiteGenericInt(StencilImpl &st, LebesgueOrder &lo, - DoubledGaugeField &U, DoubledGaugeField &UUU, + DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, SiteSpinor * buf, int LLs, int sU, - const FermionField &in, FermionField &out,int dag); + const FermionFieldView &in, FermionFieldView &out,int dag); void DhopSiteGenericExt(StencilImpl &st, LebesgueOrder &lo, - DoubledGaugeField &U, DoubledGaugeField &UUU, + DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, SiteSpinor * buf, int LLs, int sU, - const FermionField &in, FermionField &out,int dag); + const FermionFieldView &in, FermionFieldView &out,int dag); /////////////////////////////////////////////////////////////////////////////////////// // Nc=3 specific kernels /////////////////////////////////////////////////////////////////////////////////////// void DhopSiteHand(StencilImpl &st, LebesgueOrder &lo, - DoubledGaugeField &U,DoubledGaugeField &UUU, + DoubledGaugeFieldView &U,DoubledGaugeFieldView &UUU, SiteSpinor * buf, int LLs, int sU, - const FermionField &in, FermionField &out,int dag); + const FermionFieldView &in, FermionFieldView &out,int dag); void DhopSiteHandInt(StencilImpl &st, LebesgueOrder &lo, - DoubledGaugeField &U,DoubledGaugeField &UUU, + DoubledGaugeFieldView &U,DoubledGaugeFieldView &UUU, SiteSpinor * buf, int LLs, int sU, - const FermionField &in, FermionField &out,int dag); + const FermionFieldView &in, FermionFieldView &out,int dag); void DhopSiteHandExt(StencilImpl &st, LebesgueOrder &lo, - DoubledGaugeField &U,DoubledGaugeField &UUU, + DoubledGaugeFieldView &U,DoubledGaugeFieldView &UUU, SiteSpinor * buf, int LLs, int sU, - const FermionField &in, FermionField &out,int dag); + const FermionFieldView &in, FermionFieldView &out,int dag); /////////////////////////////////////////////////////////////////////////////////////// // Asm Nc=3 specific kernels /////////////////////////////////////////////////////////////////////////////////////// void DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo, - DoubledGaugeField &U,DoubledGaugeField &UUU, + DoubledGaugeFieldView &U,DoubledGaugeFieldView &UUU, SiteSpinor * buf, int LLs, int sU, - const FermionField &in, FermionField &out,int dag); + const FermionFieldView &in, FermionFieldView &out,int dag); /////////////////////////////////////////////////////////////////////////////////////////////////// // Generic interface; fan out to right routine /////////////////////////////////////////////////////////////////////////////////////////////////// void DhopSite(StencilImpl &st, LebesgueOrder &lo, - DoubledGaugeField &U, DoubledGaugeField &UUU, + DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, SiteSpinor * buf, int LLs, int sU, - const FermionField &in, FermionField &out, int interior=1,int exterior=1); + const FermionFieldView &in, FermionFieldView &out, int interior=1,int exterior=1); void DhopSiteDag(StencilImpl &st, LebesgueOrder &lo, - DoubledGaugeField &U, DoubledGaugeField &UUU, + DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, SiteSpinor * buf, int LLs, int sU, - const FermionField &in, FermionField &out, int interior=1,int exterior=1); + const FermionFieldView &in, FermionFieldView &out, int interior=1,int exterior=1); void DhopSite(StencilImpl &st, LebesgueOrder &lo, - DoubledGaugeField &U, DoubledGaugeField &UUU, + DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, SiteSpinor * buf, int LLs, int sU, - const FermionField &in, FermionField &out, int dag, int interior,int exterior); + const FermionFieldView &in, FermionFieldView &out, int dag, int interior,int exterior); public: StaggeredKernels(const ImplParams &p = ImplParams()); }; - -}} - -#endif +NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/StaggeredVec5dImpl.h b/Grid/qcd/action/fermion/StaggeredVec5dImpl.h new file mode 100644 index 00000000..2d4de18e --- /dev/null +++ b/Grid/qcd/action/fermion/StaggeredVec5dImpl.h @@ -0,0 +1,203 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/FermionOperatorImpl.h + +Copyright (C) 2015 + +Author: Peter Boyle + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ + /* END LEGAL */ +#pragma once + +NAMESPACE_BEGIN(Grid); + +template +class StaggeredVec5dImpl : public PeriodicGaugeImpl > { + +public: + + static const int Dimension = Representation::Dimension; + static const bool isFundamental = Representation::isFundamental; + static const bool LsVectorised=true; + typedef RealD Coeff_t ; + typedef PeriodicGaugeImpl > Gimpl; + + //Necessary? + constexpr bool is_fundamental() const{return Dimension == Nc ? 1 : 0;} + + + INHERIT_GIMPL_TYPES(Gimpl); + + template using iImplSpinor = iScalar > >; + template using iImplHalfSpinor = iScalar > >; + template using iImplDoubledGaugeField = iVector >, Nds>; + template using iImplGaugeField = iVector >, Nd>; + template using iImplGaugeLink = iScalar > >; + template using iImplPropagator = iScalar > >; + + // Make the doubled gauge field a *scalar* + typedef iImplDoubledGaugeField SiteDoubledGaugeField; // This is a scalar + typedef iImplGaugeField SiteScalarGaugeField; // scalar + typedef iImplGaugeLink SiteScalarGaugeLink; // scalar + typedef iImplPropagator SitePropagator; + + typedef Lattice DoubledGaugeField; + typedef Lattice PropagatorField; + + typedef iImplSpinor SiteSpinor; + typedef iImplHalfSpinor SiteHalfSpinor; + + + typedef Lattice FermionField; + + typedef StaggeredImplParams ImplParams; + typedef SimpleCompressor Compressor; + typedef CartesianStencil StencilImpl; + typedef typename StencilImpl::View_type StencilView; + + ImplParams Params; + + StaggeredVec5dImpl(const ImplParams &p = ImplParams()) : Params(p){}; + + template + static accelerator_inline void loadLinkElement(Simd ®, ref &memory) + { + vsplat(reg, memory); + } + + static accelerator_inline void multLink(SiteHalfSpinor &phi, + const SiteDoubledGaugeField &U, + const SiteHalfSpinor &chi, + int mu) + { + SiteGaugeLink UU; + for (int i = 0; i < Dimension; i++) { + for (int j = 0; j < Dimension; j++) { + vsplat(UU()()(i, j), U(mu)()(i, j)); + } + } + mult(&phi(), &UU(), &chi()); + } + static accelerator_inline void multLinkAdd(SiteHalfSpinor &phi, + const SiteDoubledGaugeField &U, + const SiteHalfSpinor &chi, + int mu) + { + SiteGaugeLink UU; + for (int i = 0; i < Dimension; i++) { + for (int j = 0; j < Dimension; j++) { + vsplat(UU()()(i, j), U(mu)()(i, j)); + } + } + mac(&phi(), &UU(), &chi()); + } + + inline void InsertGaugeField(DoubledGaugeField &U_ds,const GaugeLinkField &U,int mu) + { + GridBase *GaugeGrid = U_ds.Grid(); + thread_for(lidx, GaugeGrid->lSites(),{ + + SiteScalarGaugeLink ScalarU; + SiteDoubledGaugeField ScalarUds; + + Coordinate lcoor; + GaugeGrid->LocalIndexToLocalCoor(lidx, lcoor); + peekLocalSite(ScalarUds, U_ds, lcoor); + + peekLocalSite(ScalarU, U, lcoor); + ScalarUds(mu) = ScalarU(); + + }); + } + inline void DoubleStore(GridBase *GaugeGrid, + DoubledGaugeField &UUUds, // for Naik term + DoubledGaugeField &Uds, + const GaugeField &Uthin, + const GaugeField &Ufat) + { + + GridBase * InputGrid = Uthin.Grid(); + conformable(InputGrid,Ufat.Grid()); + + GaugeLinkField U(InputGrid); + GaugeLinkField UU(InputGrid); + GaugeLinkField UUU(InputGrid); + GaugeLinkField Udag(InputGrid); + GaugeLinkField UUUdag(InputGrid); + + for (int mu = 0; mu < Nd; mu++) { + + // Staggered Phase. + Lattice > coor(InputGrid); + Lattice > x(InputGrid); LatticeCoordinate(x,0); + Lattice > y(InputGrid); LatticeCoordinate(y,1); + Lattice > z(InputGrid); LatticeCoordinate(z,2); + Lattice > t(InputGrid); LatticeCoordinate(t,3); + + Lattice > lin_z(InputGrid); lin_z=x+y; + Lattice > lin_t(InputGrid); lin_t=x+y+z; + + ComplexField phases(InputGrid); phases=1.0; + + if ( mu == 1 ) phases = where( mod(x ,2)==(Integer)0, phases,-phases); + if ( mu == 2 ) phases = where( mod(lin_z,2)==(Integer)0, phases,-phases); + if ( mu == 3 ) phases = where( mod(lin_t,2)==(Integer)0, phases,-phases); + + // 1 hop based on fat links + U = PeekIndex(Ufat, mu); + Udag = adj( Cshift(U, mu, -1)); + + U = U *phases; + Udag = Udag *phases; + + InsertGaugeField(Uds,U,mu); + InsertGaugeField(Uds,Udag,mu+4); + + // 3 hop based on thin links. Crazy huh ? + U = PeekIndex(Uthin, mu); + UU = Gimpl::CovShiftForward(U,mu,U); + UUU= Gimpl::CovShiftForward(U,mu,UU); + + UUUdag = adj( Cshift(UUU, mu, -3)); + + UUU = UUU *phases; + UUUdag = UUUdag *phases; + + InsertGaugeField(UUUds,UUU,mu); + InsertGaugeField(UUUds,UUUdag,mu+4); + + } + } + + inline void InsertForce4D(GaugeField &mat, FermionField &Btilde, FermionField &A,int mu){ + assert(0); + } + + inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField Ã,int mu){ + assert (0); + } +}; +typedef StaggeredVec5dImpl StaggeredVec5dImplR; // Real.. whichever prec +typedef StaggeredVec5dImpl StaggeredVec5dImplF; // Float +typedef StaggeredVec5dImpl StaggeredVec5dImplD; // Double + +NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/WilsonCloverFermion.h b/Grid/qcd/action/fermion/WilsonCloverFermion.h index 40d08a76..3847b0d9 100644 --- a/Grid/qcd/action/fermion/WilsonCloverFermion.h +++ b/Grid/qcd/action/fermion/WilsonCloverFermion.h @@ -27,15 +27,11 @@ *************************************************************************************/ /* END LEGAL */ -#ifndef GRID_QCD_WILSON_CLOVER_FERMION_H -#define GRID_QCD_WILSON_CLOVER_FERMION_H +#pragma once #include -namespace Grid -{ -namespace QCD -{ +NAMESPACE_BEGIN(Grid); /////////////////////////////////////////////////////////////////// // Wilson Clover @@ -131,22 +127,22 @@ public: // Derivative parts unpreconditioned pseudofermions void MDeriv(GaugeField &force, const FermionField &X, const FermionField &Y, int dag) { - conformable(X._grid, Y._grid); - conformable(X._grid, force._grid); - GaugeLinkField force_mu(force._grid), lambda(force._grid); - GaugeField clover_force(force._grid); - PropagatorField Lambda(force._grid); + conformable(X.Grid(), Y.Grid()); + conformable(X.Grid(), force.Grid()); + GaugeLinkField force_mu(force.Grid()), lambda(force.Grid()); + GaugeField clover_force(force.Grid()); + PropagatorField Lambda(force.Grid()); // Guido: Here we are hitting some performance issues: // need to extract the components of the DoubledGaugeField // for each call // Possible solution // Create a vector object to store them? (cons: wasting space) - std::vector U(Nd, this->Umu._grid); + std::vector U(Nd, this->Umu.Grid()); Impl::extractLinkField(U, this->Umu); - force = zero; + force = Zero(); // Derivative of the Wilson hopping term this->DhopDeriv(force, X, Y, dag); @@ -179,10 +175,10 @@ public: */ int count = 0; - clover_force = zero; + clover_force = Zero(); for (int mu = 0; mu < 4; mu++) { - force_mu = zero; + force_mu = Zero(); for (int nu = 0; nu < 4; nu++) { if (mu == nu) @@ -212,8 +208,8 @@ public: // Computing C_{\mu \nu}(x) as in Eq.(B.39) in Zbigniew Sroczynski's PhD thesis GaugeLinkField Cmunu(std::vector &U, GaugeLinkField &lambda, int mu, int nu) { - conformable(lambda._grid, U[0]._grid); - GaugeLinkField out(lambda._grid), tmp(lambda._grid); + conformable(lambda.Grid(), U[0].Grid()); + GaugeLinkField out(lambda.Grid()), tmp(lambda.Grid()); // insertion in upper staple // please check redundancy of shift operations @@ -266,102 +262,113 @@ private: // using the DeGrand-Rossi basis for the gamma matrices CloverFieldType fillCloverYZ(const GaugeLinkField &F) { - CloverFieldType T(F._grid); - T = zero; - PARALLEL_FOR_LOOP - for (int i = 0; i < CloverTerm._grid->oSites(); i++) + CloverFieldType T(F.Grid()); + T = Zero(); + auto T_v = T.View(); + auto F_v = F.View(); + thread_for(i, CloverTerm.Grid()->oSites(), { - T._odata[i]()(0, 1) = timesMinusI(F._odata[i]()()); - T._odata[i]()(1, 0) = timesMinusI(F._odata[i]()()); - T._odata[i]()(2, 3) = timesMinusI(F._odata[i]()()); - T._odata[i]()(3, 2) = timesMinusI(F._odata[i]()()); - } + T_v[i]()(0, 1) = timesMinusI(F_v[i]()()); + T_v[i]()(1, 0) = timesMinusI(F_v[i]()()); + T_v[i]()(2, 3) = timesMinusI(F_v[i]()()); + T_v[i]()(3, 2) = timesMinusI(F_v[i]()()); + }); return T; } CloverFieldType fillCloverXZ(const GaugeLinkField &F) { - CloverFieldType T(F._grid); - T = zero; - PARALLEL_FOR_LOOP - for (int i = 0; i < CloverTerm._grid->oSites(); i++) + CloverFieldType T(F.Grid()); + T = Zero(); + + auto T_v = T.View(); + auto F_v = F.View(); + thread_for(i, CloverTerm.Grid()->oSites(), { - T._odata[i]()(0, 1) = -F._odata[i]()(); - T._odata[i]()(1, 0) = F._odata[i]()(); - T._odata[i]()(2, 3) = -F._odata[i]()(); - T._odata[i]()(3, 2) = F._odata[i]()(); - } + T_v[i]()(0, 1) = -F_v[i]()(); + T_v[i]()(1, 0) = F_v[i]()(); + T_v[i]()(2, 3) = -F_v[i]()(); + T_v[i]()(3, 2) = F_v[i]()(); + }); return T; } CloverFieldType fillCloverXY(const GaugeLinkField &F) { - CloverFieldType T(F._grid); - T = zero; - PARALLEL_FOR_LOOP - for (int i = 0; i < CloverTerm._grid->oSites(); i++) - { + CloverFieldType T(F.Grid()); + T = Zero(); - T._odata[i]()(0, 0) = timesMinusI(F._odata[i]()()); - T._odata[i]()(1, 1) = timesI(F._odata[i]()()); - T._odata[i]()(2, 2) = timesMinusI(F._odata[i]()()); - T._odata[i]()(3, 3) = timesI(F._odata[i]()()); - } + auto T_v = T.View(); + auto F_v = F.View(); + thread_for(i, CloverTerm.Grid()->oSites(), + { + T_v[i]()(0, 0) = timesMinusI(F_v[i]()()); + T_v[i]()(1, 1) = timesI(F_v[i]()()); + T_v[i]()(2, 2) = timesMinusI(F_v[i]()()); + T_v[i]()(3, 3) = timesI(F_v[i]()()); + }); return T; } CloverFieldType fillCloverXT(const GaugeLinkField &F) { - CloverFieldType T(F._grid); - T = zero; - PARALLEL_FOR_LOOP - for (int i = 0; i < CloverTerm._grid->oSites(); i++) + CloverFieldType T(F.Grid()); + T = Zero(); + + auto T_v = T.View(); + auto F_v = F.View(); + thread_for(i, CloverTerm.Grid()->oSites(), { - T._odata[i]()(0, 1) = timesI(F._odata[i]()()); - T._odata[i]()(1, 0) = timesI(F._odata[i]()()); - T._odata[i]()(2, 3) = timesMinusI(F._odata[i]()()); - T._odata[i]()(3, 2) = timesMinusI(F._odata[i]()()); - } + T_v[i]()(0, 1) = timesI(F_v[i]()()); + T_v[i]()(1, 0) = timesI(F_v[i]()()); + T_v[i]()(2, 3) = timesMinusI(F_v[i]()()); + T_v[i]()(3, 2) = timesMinusI(F_v[i]()()); + }); return T; } CloverFieldType fillCloverYT(const GaugeLinkField &F) { - CloverFieldType T(F._grid); - T = zero; - PARALLEL_FOR_LOOP - for (int i = 0; i < CloverTerm._grid->oSites(); i++) + CloverFieldType T(F.Grid()); + T = Zero(); + + auto T_v = T.View(); + auto F_v = F.View(); + thread_for(i, CloverTerm.Grid()->oSites(), { - T._odata[i]()(0, 1) = -(F._odata[i]()()); - T._odata[i]()(1, 0) = (F._odata[i]()()); - T._odata[i]()(2, 3) = (F._odata[i]()()); - T._odata[i]()(3, 2) = -(F._odata[i]()()); - } + T_v[i]()(0, 1) = -(F_v[i]()()); + T_v[i]()(1, 0) = (F_v[i]()()); + T_v[i]()(2, 3) = (F_v[i]()()); + T_v[i]()(3, 2) = -(F_v[i]()()); + }); return T; } CloverFieldType fillCloverZT(const GaugeLinkField &F) { - CloverFieldType T(F._grid); - T = zero; - PARALLEL_FOR_LOOP - for (int i = 0; i < CloverTerm._grid->oSites(); i++) + CloverFieldType T(F.Grid()); + + T = Zero(); + + auto T_v = T.View(); + auto F_v = F.View(); + thread_for(i, CloverTerm.Grid()->oSites(), { - T._odata[i]()(0, 0) = timesI(F._odata[i]()()); - T._odata[i]()(1, 1) = timesMinusI(F._odata[i]()()); - T._odata[i]()(2, 2) = timesMinusI(F._odata[i]()()); - T._odata[i]()(3, 3) = timesI(F._odata[i]()()); - } + T_v[i]()(0, 0) = timesI(F_v[i]()()); + T_v[i]()(1, 1) = timesMinusI(F_v[i]()()); + T_v[i]()(2, 2) = timesMinusI(F_v[i]()()); + T_v[i]()(3, 3) = timesI(F_v[i]()()); + }); return T; } }; -} -} +NAMESPACE_END(Grid); + + -#endif // GRID_QCD_WILSON_CLOVER_FERMION_H diff --git a/Grid/qcd/action/fermion/WilsonCompressor.h b/Grid/qcd/action/fermion/WilsonCompressor.h index 7553743e..10e98f33 100644 --- a/Grid/qcd/action/fermion/WilsonCompressor.h +++ b/Grid/qcd/action/fermion/WilsonCompressor.h @@ -1,4 +1,4 @@ - /************************************************************************************* +/************************************************************************************* Grid physics library, www.github.com/paboyle/Grid @@ -25,13 +25,12 @@ Author: paboyle 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ +*************************************************************************************/ +/* END LEGAL */ #ifndef GRID_QCD_WILSON_COMPRESSOR_H #define GRID_QCD_WILSON_COMPRESSOR_H -namespace Grid { -namespace QCD { +NAMESPACE_BEGIN(Grid); ///////////////////////////////////////////////////////////////////////////////////////////// // optimised versions supporting half precision too @@ -43,9 +42,9 @@ class WilsonCompressorTemplate; template class WilsonCompressorTemplate< _HCspinor, _Hspinor, _Spinor, projector, - typename std::enable_if::value>::type > + typename std::enable_if::value>::type > { - public: +public: int mu,dag; @@ -62,15 +61,16 @@ class WilsonCompressorTemplate< _HCspinor, _Hspinor, _Spinor, projector, typedef typename SiteHalfSpinor::vector_type vComplexHigh; constexpr static int Nw=sizeof(SiteHalfSpinor)/sizeof(vComplexHigh); - inline int CommDatumSize(void) { + accelerator_inline int CommDatumSize(void) { return sizeof(SiteHalfCommSpinor); } /*****************************************************/ /* Compress includes precision change if mpi data is not same */ /*****************************************************/ - inline void Compress(SiteHalfSpinor * __restrict__ buf,Integer o,const SiteSpinor &in) { - SiteHalfSpinor tmp; + template + accelerator_inline void Compress(_SiteHalfSpinor *buf,Integer o,const _SiteSpinor &in) { + _SiteHalfSpinor tmp; projector::Proj(tmp,in,mu,dag); vstream(buf[o],tmp); } @@ -78,10 +78,10 @@ class WilsonCompressorTemplate< _HCspinor, _Hspinor, _Spinor, projector, /*****************************************************/ /* Exchange includes precision change if mpi data is not same */ /*****************************************************/ - inline void Exchange(SiteHalfSpinor * __restrict__ mp, - const SiteHalfSpinor * __restrict__ vp0, - const SiteHalfSpinor * __restrict__ vp1, - Integer type,Integer o){ + accelerator_inline void Exchange(SiteHalfSpinor *mp, + const SiteHalfSpinor * __restrict__ vp0, + const SiteHalfSpinor * __restrict__ vp1, + Integer type,Integer o){ SiteHalfSpinor tmp1; SiteHalfSpinor tmp2; exchange(tmp1,tmp2,vp0[o],vp1[o],type); @@ -92,19 +92,21 @@ class WilsonCompressorTemplate< _HCspinor, _Hspinor, _Spinor, projector, /*****************************************************/ /* Have a decompression step if mpi data is not same */ /*****************************************************/ - inline void Decompress(SiteHalfSpinor * __restrict__ out, - SiteHalfSpinor * __restrict__ in, Integer o) { + accelerator_inline void Decompress(SiteHalfSpinor * __restrict__ out, + SiteHalfSpinor * __restrict__ in, Integer o) { assert(0); } /*****************************************************/ /* Compress Exchange */ /*****************************************************/ - inline void CompressExchange(SiteHalfSpinor * __restrict__ out0, - SiteHalfSpinor * __restrict__ out1, - const SiteSpinor * __restrict__ in, - Integer j,Integer k, Integer m,Integer type){ - SiteHalfSpinor temp1, temp2,temp3,temp4; + accelerator_inline void CompressExchange(SiteHalfSpinor * __restrict__ out0, + SiteHalfSpinor * __restrict__ out1, + const SiteSpinor * __restrict__ in, + Integer j,Integer k, Integer m,Integer type) + { + SiteHalfSpinor temp1, temp2; + SiteHalfSpinor temp3, temp4; projector::Proj(temp1,in[k],mu,dag); projector::Proj(temp2,in[m],mu,dag); exchange(temp3,temp4,temp1,temp2,type); @@ -115,15 +117,15 @@ class WilsonCompressorTemplate< _HCspinor, _Hspinor, _Spinor, projector, /*****************************************************/ /* Pass the info to the stencil */ /*****************************************************/ - inline bool DecompressionStep(void) { return false; } + accelerator_inline bool DecompressionStep(void) { return false; } }; template class WilsonCompressorTemplate< _HCspinor, _Hspinor, _Spinor, projector, - typename std::enable_if::value>::type > + typename std::enable_if::value>::type > { - public: +public: int mu,dag; @@ -140,15 +142,16 @@ class WilsonCompressorTemplate< _HCspinor, _Hspinor, _Spinor, projector, typedef typename SiteHalfSpinor::vector_type vComplexHigh; constexpr static int Nw=sizeof(SiteHalfSpinor)/sizeof(vComplexHigh); - inline int CommDatumSize(void) { + accelerator_inline int CommDatumSize(void) { return sizeof(SiteHalfCommSpinor); } /*****************************************************/ /* Compress includes precision change if mpi data is not same */ /*****************************************************/ - inline void Compress(SiteHalfSpinor *buf,Integer o,const SiteSpinor &in) { - SiteHalfSpinor hsp; + template + accelerator_inline void Compress(_SiteHalfSpinor *buf,Integer o,const _SiteSpinor &in) { + _SiteHalfSpinor hsp; SiteHalfCommSpinor *hbuf = (SiteHalfCommSpinor *)buf; projector::Proj(hsp,in,mu,dag); precisionChange((vComplexLow *)&hbuf[o],(vComplexHigh *)&hsp,Nw); @@ -157,7 +160,7 @@ class WilsonCompressorTemplate< _HCspinor, _Hspinor, _Spinor, projector, /*****************************************************/ /* Exchange includes precision change if mpi data is not same */ /*****************************************************/ - inline void Exchange(SiteHalfSpinor *mp, + accelerator_inline void Exchange(SiteHalfSpinor *mp, SiteHalfSpinor *vp0, SiteHalfSpinor *vp1, Integer type,Integer o){ @@ -172,8 +175,7 @@ class WilsonCompressorTemplate< _HCspinor, _Hspinor, _Spinor, projector, /*****************************************************/ /* Have a decompression step if mpi data is not same */ /*****************************************************/ - inline void Decompress(SiteHalfSpinor *out, - SiteHalfSpinor *in, Integer o){ + accelerator_inline void Decompress(SiteHalfSpinor *out, SiteHalfSpinor *in, Integer o){ SiteHalfCommSpinor *hin=(SiteHalfCommSpinor *)in; precisionChange((vComplexHigh *)&out[o],(vComplexLow *)&hin[o],Nw); } @@ -181,7 +183,7 @@ class WilsonCompressorTemplate< _HCspinor, _Hspinor, _Spinor, projector, /*****************************************************/ /* Compress Exchange */ /*****************************************************/ - inline void CompressExchange(SiteHalfSpinor *out0, + accelerator_inline void CompressExchange(SiteHalfSpinor *out0, SiteHalfSpinor *out1, const SiteSpinor *in, Integer j,Integer k, Integer m,Integer type){ @@ -198,19 +200,19 @@ class WilsonCompressorTemplate< _HCspinor, _Hspinor, _Spinor, projector, /*****************************************************/ /* Pass the info to the stencil */ /*****************************************************/ - inline bool DecompressionStep(void) { return true; } + accelerator_inline bool DecompressionStep(void) { return true; } }; #define DECLARE_PROJ(Projector,Compressor,spProj) \ class Projector { \ public: \ - template \ - static void Proj(hsp &result,const fsp &in,int mu,int dag){ \ - spProj(result,in); \ - } \ + template \ + static accelerator void Proj(hsp &result,const fsp &in,int mu,int dag){ \ + spProj(result,in); \ + } \ }; \ -template using Compressor = WilsonCompressorTemplate; + template using Compressor = WilsonCompressorTemplate; DECLARE_PROJ(WilsonXpProjector,WilsonXpCompressor,spProjXp); DECLARE_PROJ(WilsonYpProjector,WilsonYpCompressor,spProjYp); @@ -222,9 +224,9 @@ DECLARE_PROJ(WilsonZmProjector,WilsonZmCompressor,spProjZm); DECLARE_PROJ(WilsonTmProjector,WilsonTmCompressor,spProjTm); class WilsonProjector { - public: +public: template - static void Proj(hsp &result,const fsp &in,int mu,int dag){ + static accelerator void Proj(hsp &result,const fsp &in,int mu,int dag){ int mudag=dag? mu : (mu+Nd)%(2*Nd); switch(mudag) { case Xp: spProjXp(result,in); break; @@ -243,9 +245,14 @@ template using WilsonCompressor = WilsonCom // Fast comms buffer manipulation which should inline right through (avoid direction // dependent logic that prevents inlining -template -class WilsonStencil : public CartesianStencil { +template +class WilsonStencil : public CartesianStencil { public: + + typedef CartesianStencil Base; + typedef typename Base::View_type View_type; + typedef typename Base::StencilVector StencilVector; + double timer0; double timer1; double timer2; @@ -274,16 +281,40 @@ public: if ( timer4 ) std::cout << GridLogMessage << " timer4 " < surface_list; + WilsonStencil(GridBase *grid, int npoints, int checkerboard, const std::vector &directions, - const std::vector &distances) - : CartesianStencil (grid,npoints,checkerboard,directions,distances) + const std::vector &distances,Parameters p) + : CartesianStencil (grid,npoints,checkerboard,directions,distances,p) { ZeroCountersi(); + surface_list.resize(0); + this->same_node.resize(npoints); }; + void BuildSurfaceList(int Ls,int vol4){ + + // find same node for SHM + // Here we know the distance is 1 for WilsonStencil + for(int point=0;point_npoints;point++){ + this->same_node[point] = this->SameNode(point); + } + + for(int site = 0 ;site< vol4;site++){ + int local = 1; + for(int point=0;point_npoints;point++){ + if( (!this->GetNodeLocal(site*Ls,point)) && (!this->same_node[point]) ){ + local = 0; + } + } + if(local == 0) { + surface_list.push_back(site); + } + } + } template < class compressor> void HaloExchangeOpt(const Lattice &source,compressor &compress) @@ -292,8 +323,6 @@ public: this->HaloExchangeOptGather(source,compress); double t1=usecond(); // Asynchronous MPI calls multidirectional, Isend etc... - // this->CommunicateBegin(reqs); - // this->CommunicateComplete(reqs); // Non-overlapped directions within a thread. Asynchronous calls except MPI3, threaded up to comm threads ways. this->Communicate(); double t2=usecond(); timer1 += t2-t1; @@ -327,7 +356,7 @@ public: this->_grid->StencilBarrier(); this->mpi3synctime_g+=usecond(); - assert(source._grid==this->_grid); + assert(source.Grid()==this->_grid); this->halogtime-=usecond(); this->u_comm_offset=0; @@ -365,9 +394,10 @@ public: this->face_table_computed=1; assert(this->u_comm_offset==this->_unified_buffer_size); this->halogtime+=usecond(); + accelerator_barrier(); } - }; +}; -}} // namespace close +NAMESPACE_END(Grid); #endif diff --git a/Grid/qcd/action/fermion/WilsonFermion.h b/Grid/qcd/action/fermion/WilsonFermion.h index 8bfe6c1a..3a712435 100644 --- a/Grid/qcd/action/fermion/WilsonFermion.h +++ b/Grid/qcd/action/fermion/WilsonFermion.h @@ -27,16 +27,13 @@ with this program; if not, write to the Free Software Foundation, Inc., See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ -/* END LEGAL */ -#ifndef GRID_QCD_WILSON_FERMION_H -#define GRID_QCD_WILSON_FERMION_H + /* END LEGAL */ +#pragma once -namespace Grid { - -namespace QCD { +NAMESPACE_BEGIN(Grid); class WilsonFermionStatic { - public: +public: static int HandOptDslash; // these are a temporary hack static int MortonOrder; static const std::vector directions; @@ -60,8 +57,9 @@ class WilsonFermionStatic { }; template -class WilsonFermion : public WilsonKernels, public WilsonFermionStatic { - public: +class WilsonFermion : public WilsonKernels, public WilsonFermionStatic +{ +public: INHERIT_IMPL_TYPES(Impl); typedef WilsonKernels Kernels; @@ -138,10 +136,10 @@ class WilsonFermion : public WilsonKernels, public WilsonFermionStatic { // Constructor WilsonFermion(GaugeField &_Umu, GridCartesian &Fgrid, - GridRedBlackCartesian &Hgrid, RealD _mass, + GridRedBlackCartesian &Hgrid, RealD _mass, const ImplParams &p = ImplParams(), const WilsonAnisotropyCoefficients &anis = WilsonAnisotropyCoefficients() ); - + // DoubleStore impl dependent void ImportGauge(const GaugeField &_Umu); @@ -150,7 +148,7 @@ class WilsonFermion : public WilsonKernels, public WilsonFermionStatic { /////////////////////////////////////////////////////////////// // protected: - public: +public: virtual RealD Mass(void) { return mass; } virtual int isTrivialEE(void) { return 1; }; RealD mass; @@ -171,7 +169,7 @@ class WilsonFermion : public WilsonKernels, public WilsonFermionStatic { LebesgueOrder Lebesgue; LebesgueOrder LebesgueEvenOdd; - + WilsonAnisotropyCoefficients anisotropyCoeff; /////////////////////////////////////////////////////////////// @@ -182,11 +180,11 @@ class WilsonFermion : public WilsonKernels, public WilsonFermionStatic { PropagatorField &q_out, Current curr_type, unsigned int mu); - void SeqConservedCurrent(PropagatorField &q_in, - PropagatorField &q_out, - Current curr_type, - unsigned int mu, - unsigned int tmin, + void SeqConservedCurrent(PropagatorField &q_in, + PropagatorField &q_out, + Current curr_type, + unsigned int mu, + unsigned int tmin, unsigned int tmax, ComplexField &lattice_cmplx); }; @@ -194,7 +192,6 @@ class WilsonFermion : public WilsonKernels, public WilsonFermionStatic { typedef WilsonFermion WilsonFermionF; typedef WilsonFermion WilsonFermionD; +NAMESPACE_END(Grid); + -} -} -#endif diff --git a/Grid/qcd/action/fermion/WilsonFermion5D.h b/Grid/qcd/action/fermion/WilsonFermion5D.h index 4a31bb43..8f1073db 100644 --- a/Grid/qcd/action/fermion/WilsonFermion5D.h +++ b/Grid/qcd/action/fermion/WilsonFermion5D.h @@ -1,5 +1,5 @@ - /************************************************************************************* +/************************************************************************************* Grid physics library, www.github.com/paboyle/Grid @@ -26,216 +26,215 @@ Author: paboyle 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ +*************************************************************************************/ +/* END LEGAL */ #ifndef GRID_QCD_WILSON_FERMION_5D_H #define GRID_QCD_WILSON_FERMION_5D_H #include -namespace Grid { -namespace QCD { +NAMESPACE_BEGIN(Grid); - //////////////////////////////////////////////////////////////////////////////// - // This is the 4d red black case appropriate to support - // - // parity = (x+y+z+t)|2; - // generalised five dim fermions like mobius, zolotarev etc.. - // - // i.e. even even contains fifth dim hopping term. - // - // [DIFFERS from original CPS red black implementation parity = (x+y+z+t+s)|2 ] - //////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////// +// This is the 4d red black case appropriate to support +// +// parity = (x+y+z+t)|2; +// generalised five dim fermions like mobius, zolotarev etc.. +// +// i.e. even even contains fifth dim hopping term. +// +// [DIFFERS from original CPS red black implementation parity = (x+y+z+t+s)|2 ] +//////////////////////////////////////////////////////////////////////////////// - //////////////////////////////////////////////////////////////////////////////// - // This is the 4d red black case appropriate to support - // - // parity = (x+y+z+t)|2; - // generalised five dim fermions like mobius, zolotarev etc.. - // - // i.e. even even contains fifth dim hopping term. - // - // [DIFFERS from original CPS red black implementation parity = (x+y+z+t+s)|2 ] - //////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////// +// This is the 4d red black case appropriate to support +// +// parity = (x+y+z+t)|2; +// generalised five dim fermions like mobius, zolotarev etc.. +// +// i.e. even even contains fifth dim hopping term. +// +// [DIFFERS from original CPS red black implementation parity = (x+y+z+t+s)|2 ] +//////////////////////////////////////////////////////////////////////////////// - class WilsonFermion5DStatic { - public: - // S-direction is INNERMOST and takes no part in the parity. - static const std::vector directions; - static const std::vector displacements; - const int npoint = 8; - }; +class WilsonFermion5DStatic { +public: + // S-direction is INNERMOST and takes no part in the parity. + static const std::vector directions; + static const std::vector displacements; + static constexpr int npoint = 8; +}; - template - class WilsonFermion5D : public WilsonKernels, public WilsonFermion5DStatic - { - public: - INHERIT_IMPL_TYPES(Impl); - typedef WilsonKernels Kernels; - PmuStat stat; +template +class WilsonFermion5D : public WilsonKernels, public WilsonFermion5DStatic +{ +public: + INHERIT_IMPL_TYPES(Impl); + typedef WilsonKernels Kernels; + PmuStat stat; - FermionField _tmp; - FermionField &tmp(void) { return _tmp; } + FermionField _tmp; + FermionField &tmp(void) { return _tmp; } - void Report(void); - void ZeroCounters(void); - double DhopCalls; - double DhopCommTime; - double DhopComputeTime; - double DhopComputeTime2; - double DhopFaceTime; - double DhopTotalTime; + void Report(void); + void ZeroCounters(void); + double DhopCalls; + double DhopCommTime; + double DhopComputeTime; + double DhopComputeTime2; + double DhopFaceTime; + double DhopTotalTime; - double DerivCalls; - double DerivCommTime; - double DerivComputeTime; - double DerivDhopComputeTime; + double DerivCalls; + double DerivCommTime; + double DerivComputeTime; + double DerivDhopComputeTime; - /////////////////////////////////////////////////////////////// - // Implement the abstract base - /////////////////////////////////////////////////////////////// - GridBase *GaugeGrid(void) { return _FourDimGrid ;} - GridBase *GaugeRedBlackGrid(void) { return _FourDimRedBlackGrid ;} - GridBase *FermionGrid(void) { return _FiveDimGrid;} - GridBase *FermionRedBlackGrid(void) { return _FiveDimRedBlackGrid;} + /////////////////////////////////////////////////////////////// + // Implement the abstract base + /////////////////////////////////////////////////////////////// + GridBase *GaugeGrid(void) { return _FourDimGrid ;} + GridBase *GaugeRedBlackGrid(void) { return _FourDimRedBlackGrid ;} + GridBase *FermionGrid(void) { return _FiveDimGrid;} + GridBase *FermionRedBlackGrid(void) { return _FiveDimRedBlackGrid;} - // full checkerboard operations; leave unimplemented as abstract for now - virtual RealD M (const FermionField &in, FermionField &out){assert(0); return 0.0;}; - virtual RealD Mdag (const FermionField &in, FermionField &out){assert(0); return 0.0;}; + // full checkerboard operations; leave unimplemented as abstract for now + virtual RealD M (const FermionField &in, FermionField &out){assert(0); return 0.0;}; + virtual RealD Mdag (const FermionField &in, FermionField &out){assert(0); return 0.0;}; - // half checkerboard operations; leave unimplemented as abstract for now - virtual void Meooe (const FermionField &in, FermionField &out){assert(0);}; - virtual void Mooee (const FermionField &in, FermionField &out){assert(0);}; - virtual void MooeeInv (const FermionField &in, FermionField &out){assert(0);}; + // half checkerboard operations; leave unimplemented as abstract for now + virtual void Meooe (const FermionField &in, FermionField &out){assert(0);}; + virtual void Mooee (const FermionField &in, FermionField &out){assert(0);}; + virtual void MooeeInv (const FermionField &in, FermionField &out){assert(0);}; - virtual void MeooeDag (const FermionField &in, FermionField &out){assert(0);}; - virtual void MooeeDag (const FermionField &in, FermionField &out){assert(0);}; - virtual void MooeeInvDag (const FermionField &in, FermionField &out){assert(0);}; - virtual void Mdir (const FermionField &in, FermionField &out,int dir,int disp){assert(0);}; // case by case Wilson, Clover, Cayley, ContFrac, PartFrac + virtual void MeooeDag (const FermionField &in, FermionField &out){assert(0);}; + virtual void MooeeDag (const FermionField &in, FermionField &out){assert(0);}; + virtual void MooeeInvDag (const FermionField &in, FermionField &out){assert(0);}; + virtual void Mdir (const FermionField &in, FermionField &out,int dir,int disp){assert(0);}; // case by case Wilson, Clover, Cayley, ContFrac, PartFrac - // These can be overridden by fancy 5d chiral action - virtual void DhopDeriv (GaugeField &mat,const FermionField &U,const FermionField &V,int dag); - virtual void DhopDerivEO(GaugeField &mat,const FermionField &U,const FermionField &V,int dag); - virtual void DhopDerivOE(GaugeField &mat,const FermionField &U,const FermionField &V,int dag); + // These can be overridden by fancy 5d chiral action + virtual void DhopDeriv (GaugeField &mat,const FermionField &U,const FermionField &V,int dag); + virtual void DhopDerivEO(GaugeField &mat,const FermionField &U,const FermionField &V,int dag); + virtual void DhopDerivOE(GaugeField &mat,const FermionField &U,const FermionField &V,int dag); void MomentumSpacePropagatorHt_5d(FermionField &out,const FermionField &in,RealD mass,std::vector twist) ; void MomentumSpacePropagatorHt(FermionField &out,const FermionField &in,RealD mass,std::vector twist) ; void MomentumSpacePropagatorHw(FermionField &out,const FermionField &in,RealD mass,std::vector twist) ; - // Implement hopping term non-hermitian hopping term; half cb or both - // Implement s-diagonal DW - void DW (const FermionField &in, FermionField &out,int dag); - void Dhop (const FermionField &in, FermionField &out,int dag); - void DhopOE(const FermionField &in, FermionField &out,int dag); - void DhopEO(const FermionField &in, FermionField &out,int dag); + // Implement hopping term non-hermitian hopping term; half cb or both + // Implement s-diagonal DW + void DW (const FermionField &in, FermionField &out,int dag); + void Dhop (const FermionField &in, FermionField &out,int dag); + void DhopOE(const FermionField &in, FermionField &out,int dag); + void DhopEO(const FermionField &in, FermionField &out,int dag); - // add a DhopComm - // -- suboptimal interface will presently trigger multiple comms. - void DhopDir(const FermionField &in, FermionField &out,int dir,int disp); + // add a DhopComm + // -- suboptimal interface will presently trigger multiple comms. + void DhopDir(const FermionField &in, FermionField &out,int dir,int disp); - /////////////////////////////////////////////////////////////// - // New methods added - /////////////////////////////////////////////////////////////// - void DerivInternal(StencilImpl & st, - DoubledGaugeField & U, - GaugeField &mat, - const FermionField &A, - const FermionField &B, - int dag); + /////////////////////////////////////////////////////////////// + // New methods added + /////////////////////////////////////////////////////////////// + void DerivInternal(StencilImpl & st, + DoubledGaugeField & U, + GaugeField &mat, + const FermionField &A, + const FermionField &B, + int dag); - void DhopInternal(StencilImpl & st, - LebesgueOrder &lo, - DoubledGaugeField &U, - const FermionField &in, - FermionField &out, - int dag); + void DhopInternal(StencilImpl & st, + LebesgueOrder &lo, + DoubledGaugeField &U, + const FermionField &in, + FermionField &out, + int dag); - void DhopInternalOverlappedComms(StencilImpl & st, - LebesgueOrder &lo, - DoubledGaugeField &U, - const FermionField &in, - FermionField &out, - int dag); + void DhopInternalOverlappedComms(StencilImpl & st, + LebesgueOrder &lo, + DoubledGaugeField &U, + const FermionField &in, + FermionField &out, + int dag); - void DhopInternalSerialComms(StencilImpl & st, - LebesgueOrder &lo, - DoubledGaugeField &U, - const FermionField &in, - FermionField &out, - int dag); + void DhopInternalSerialComms(StencilImpl & st, + LebesgueOrder &lo, + DoubledGaugeField &U, + const FermionField &in, + FermionField &out, + int dag); - // Constructors - WilsonFermion5D(GaugeField &_Umu, - GridCartesian &FiveDimGrid, - GridRedBlackCartesian &FiveDimRedBlackGrid, - GridCartesian &FourDimGrid, - GridRedBlackCartesian &FourDimRedBlackGrid, - double _M5,const ImplParams &p= ImplParams()); + // Constructors + WilsonFermion5D(GaugeField &_Umu, + GridCartesian &FiveDimGrid, + GridRedBlackCartesian &FiveDimRedBlackGrid, + GridCartesian &FourDimGrid, + GridRedBlackCartesian &FourDimRedBlackGrid, + double _M5,const ImplParams &p= ImplParams()); - // Constructors - /* - WilsonFermion5D(int simd, - GaugeField &_Umu, - GridCartesian &FiveDimGrid, - GridRedBlackCartesian &FiveDimRedBlackGrid, - GridCartesian &FourDimGrid, - double _M5,const ImplParams &p= ImplParams()); - */ + // Constructors + /* + WilsonFermion5D(int simd, + GaugeField &_Umu, + GridCartesian &FiveDimGrid, + GridRedBlackCartesian &FiveDimRedBlackGrid, + GridCartesian &FourDimGrid, + double _M5,const ImplParams &p= ImplParams()); + */ - // DoubleStore - void ImportGauge(const GaugeField &_Umu); + // DoubleStore + void ImportGauge(const GaugeField &_Umu); - /////////////////////////////////////////////////////////////// - // Data members require to support the functionality - /////////////////////////////////////////////////////////////// - public: + /////////////////////////////////////////////////////////////// + // Data members require to support the functionality + /////////////////////////////////////////////////////////////// +public: - // Add these to the support from Wilson - GridBase *_FourDimGrid; - GridBase *_FourDimRedBlackGrid; - GridBase *_FiveDimGrid; - GridBase *_FiveDimRedBlackGrid; + // Add these to the support from Wilson + GridBase *_FourDimGrid; + GridBase *_FourDimRedBlackGrid; + GridBase *_FiveDimGrid; + GridBase *_FiveDimRedBlackGrid; - double M5; - int Ls; + double M5; + int Ls; - //Defines the stencils for even and odd - StencilImpl Stencil; - StencilImpl StencilEven; - StencilImpl StencilOdd; + //Defines the stencils for even and odd + StencilImpl Stencil; + StencilImpl StencilEven; + StencilImpl StencilOdd; - // Copy of the gauge field , with even and odd subsets - DoubledGaugeField Umu; - DoubledGaugeField UmuEven; - DoubledGaugeField UmuOdd; + // Copy of the gauge field , with even and odd subsets + DoubledGaugeField Umu; + DoubledGaugeField UmuEven; + DoubledGaugeField UmuOdd; - LebesgueOrder Lebesgue; - LebesgueOrder LebesgueEvenOdd; + LebesgueOrder Lebesgue; + LebesgueOrder LebesgueEvenOdd; - // Comms buffer - std::vector > comm_buf; + // Comms buffer + std::vector > comm_buf; - /////////////////////////////////////////////////////////////// - // Conserved current utilities - /////////////////////////////////////////////////////////////// - void ContractConservedCurrent(PropagatorField &q_in_1, - PropagatorField &q_in_2, - PropagatorField &q_out, - Current curr_type, - unsigned int mu); - void SeqConservedCurrent(PropagatorField &q_in, - PropagatorField &q_out, - Current curr_type, - unsigned int mu, - unsigned int tmin, - unsigned int tmax, - ComplexField &lattice_cmplx); + /////////////////////////////////////////////////////////////// + // Conserved current utilities + /////////////////////////////////////////////////////////////// + void ContractConservedCurrent(PropagatorField &q_in_1, + PropagatorField &q_in_2, + PropagatorField &q_out, + Current curr_type, + unsigned int mu); + void SeqConservedCurrent(PropagatorField &q_in, + PropagatorField &q_out, + Current curr_type, + unsigned int mu, + unsigned int tmin, + unsigned int tmax, + ComplexField &lattice_cmplx); - void ContractJ5q(PropagatorField &q_in,ComplexField &J5q); - void ContractJ5q(FermionField &q_in,ComplexField &J5q); + void ContractJ5q(PropagatorField &q_in,ComplexField &J5q); + void ContractJ5q(FermionField &q_in,ComplexField &J5q); - }; +}; -}} +NAMESPACE_END(Grid); #endif diff --git a/Grid/qcd/action/fermion/WilsonImpl.h b/Grid/qcd/action/fermion/WilsonImpl.h new file mode 100644 index 00000000..47160730 --- /dev/null +++ b/Grid/qcd/action/fermion/WilsonImpl.h @@ -0,0 +1,226 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/FermionOperatorImpl.h + +Copyright (C) 2015 + +Author: Peter Boyle + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ + /* END LEGAL */ +#pragma once + +NAMESPACE_BEGIN(Grid); + + +///////////////////////////////////////////////////////////////////////////// +// Single flavour four spinors with colour index +///////////////////////////////////////////////////////////////////////////// +template +class WilsonImpl : public PeriodicGaugeImpl > { +public: + + static const int Dimension = Representation::Dimension; + static const bool isFundamental = Representation::isFundamental; + static const bool LsVectorised=false; + static const int Nhcs = Options::Nhcs; + + typedef PeriodicGaugeImpl > Gimpl; + INHERIT_GIMPL_TYPES(Gimpl); + + //Necessary? + constexpr bool is_fundamental() const{return Dimension == Nc ? 1 : 0;} + + typedef typename Options::_Coeff_t Coeff_t; + typedef typename Options::template PrecisionMapper::LowerPrecVector SimdL; + + template using iImplSpinor = iScalar, Ns> >; + template using iImplPropagator = iScalar, Ns> >; + template using iImplHalfSpinor = iScalar, Nhs> >; + template using iImplHalfCommSpinor = iScalar, Nhcs> >; + template using iImplDoubledGaugeField = iVector >, Nds>; + + typedef iImplSpinor SiteSpinor; + typedef iImplPropagator SitePropagator; + typedef iImplHalfSpinor SiteHalfSpinor; + typedef iImplHalfCommSpinor SiteHalfCommSpinor; + typedef iImplDoubledGaugeField SiteDoubledGaugeField; + + typedef Lattice FermionField; + typedef Lattice PropagatorField; + typedef Lattice DoubledGaugeField; + + typedef WilsonCompressor Compressor; + typedef WilsonImplParams ImplParams; + typedef WilsonStencil StencilImpl; + typedef typename StencilImpl::View_type StencilView; + + ImplParams Params; + + WilsonImpl(const ImplParams &p = ImplParams()) : Params(p){ + assert(Params.boundary_phases.size() == Nd); + }; + + template + static accelerator_inline void multLink(_Spinor &phi, + const SiteDoubledGaugeField &U, + const _Spinor &chi, + int mu) + { + auto UU = coalescedRead(U(mu)); + mult(&phi(), &UU, &chi()); + } + template + static accelerator_inline void multLink(_Spinor &phi, + const SiteDoubledGaugeField &U, + const _Spinor &chi, + int mu, + StencilEntry *SE, + StencilView &St) + { + multLink(phi,U,chi,mu); + } + + + template + static accelerator_inline void loadLinkElement(Simd ®, ref &memory) + { + reg = memory; + } + + inline void DoubleStore(GridBase *GaugeGrid, + DoubledGaugeField &Uds, + const GaugeField &Umu) + { + typedef typename Simd::scalar_type scalar_type; + + conformable(Uds.Grid(), GaugeGrid); + conformable(Umu.Grid(), GaugeGrid); + + GaugeLinkField U(GaugeGrid); + GaugeLinkField tmp(GaugeGrid); + + Lattice > coor(GaugeGrid); + //////////////////////////////////////////////////// + // apply any boundary phase or twists + //////////////////////////////////////////////////// + for (int mu = 0; mu < Nd; mu++) { + + ////////// boundary phase ///////////// + auto pha = Params.boundary_phases[mu]; + scalar_type phase( real(pha),imag(pha) ); + + int L = GaugeGrid->GlobalDimensions()[mu]; + int Lmu = L - 1; + + LatticeCoordinate(coor, mu); + + U = PeekIndex(Umu, mu); + + // apply any twists + RealD theta = Params.twist_n_2pi_L[mu] * 2*M_PI / L; + if ( theta != 0.0) { + scalar_type twphase(::cos(theta),::sin(theta)); + U = twphase*U; + std::cout << GridLogMessage << " Twist ["<(Uds, tmp, mu); + + U = adj(Cshift(U, mu, -1)); + U = where(coor == 0, conjugate(phase) * U, U); + PokeIndex(Uds, U, mu + 4); + } + } + + inline void InsertForce4D(GaugeField &mat, FermionField &Btilde, FermionField &A,int mu){ + GaugeLinkField link(mat.Grid()); + link = TraceIndex(outerProduct(Btilde,A)); + PokeIndex(mat,link,mu); + } + + inline void outerProductImpl(PropagatorField &mat, const FermionField &B, const FermionField &A){ + mat = outerProduct(B,A); + } + + inline void TraceSpinImpl(GaugeLinkField &mat, PropagatorField&P) { + mat = TraceIndex(P); + } + + inline void extractLinkField(std::vector &mat, DoubledGaugeField &Uds){ + for (int mu = 0; mu < Nd; mu++) + mat[mu] = PeekIndex(Uds, mu); + } + + + inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField Ã,int mu){ + + int Ls=Btilde.Grid()->_fdimensions[0]; + GaugeLinkField tmp(mat.Grid()); + tmp = Zero(); + auto tmp_v = tmp.View(); + auto Btilde_v = Btilde.View(); + auto Atilde_v = Atilde.View(); + thread_for(sss,tmp.Grid()->oSites(),{ + int sU=sss; + for(int s=0;s(outerProduct(Btilde_v[sF],Atilde_v[sF])); // ordering here + } + }); + PokeIndex(mat,tmp,mu); + + } +}; + + +typedef WilsonImpl WilsonImplR; // Real.. whichever prec +typedef WilsonImpl WilsonImplF; // Float +typedef WilsonImpl WilsonImplD; // Double + +typedef WilsonImpl WilsonImplRL; // Real.. whichever prec +typedef WilsonImpl WilsonImplFH; // Float +typedef WilsonImpl WilsonImplDF; // Double + +typedef WilsonImpl ZWilsonImplR; // Real.. whichever prec +typedef WilsonImpl ZWilsonImplF; // Float +typedef WilsonImpl ZWilsonImplD; // Double + +typedef WilsonImpl ZWilsonImplRL; // Real.. whichever prec +typedef WilsonImpl ZWilsonImplFH; // Float +typedef WilsonImpl ZWilsonImplDF; // Double + +typedef WilsonImpl WilsonAdjImplR; // Real.. whichever prec +typedef WilsonImpl WilsonAdjImplF; // Float +typedef WilsonImpl WilsonAdjImplD; // Double + +typedef WilsonImpl WilsonTwoIndexSymmetricImplR; // Real.. whichever prec +typedef WilsonImpl WilsonTwoIndexSymmetricImplF; // Float +typedef WilsonImpl WilsonTwoIndexSymmetricImplD; // Double + +typedef WilsonImpl WilsonTwoIndexAntiSymmetricImplR; // Real.. whichever prec +typedef WilsonImpl WilsonTwoIndexAntiSymmetricImplF; // Float +typedef WilsonImpl WilsonTwoIndexAntiSymmetricImplD; // Double + + +NAMESPACE_END(Grid); + diff --git a/Grid/qcd/action/fermion/WilsonKernels.cc b/Grid/qcd/action/fermion/WilsonKernels.cc deleted file mode 100644 index dc66db23..00000000 --- a/Grid/qcd/action/fermion/WilsonKernels.cc +++ /dev/null @@ -1,455 +0,0 @@ -/************************************************************************************* - -Grid physics library, www.github.com/paboyle/Grid - -Source file: ./lib/qcd/action/fermion/WilsonKernels.cc - -Copyright (C) 2015 - -Author: Peter Boyle -Author: Peter Boyle -Author: paboyle - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -See the full license in the file "LICENSE" in the top level distribution -directory -*************************************************************************************/ -/* END LEGAL */ -#include - -namespace Grid { -namespace QCD { - -int WilsonKernelsStatic::Opt = WilsonKernelsStatic::OptGeneric; -int WilsonKernelsStatic::Comms = WilsonKernelsStatic::CommsAndCompute; - -template -WilsonKernels::WilsonKernels(const ImplParams &p) : Base(p){}; - -//////////////////////////////////////////// -// Generic implementation; move to different file? -//////////////////////////////////////////// - -#define GENERIC_STENCIL_LEG(Dir,spProj,Recon) \ - SE = st.GetEntry(ptype, Dir, sF); \ - if (SE->_is_local) { \ - chi_p = χ \ - if (SE->_permute) { \ - spProj(tmp, in._odata[SE->_offset]); \ - permute(chi, tmp, ptype); \ - } else { \ - spProj(chi, in._odata[SE->_offset]); \ - } \ - } else { \ - chi_p = &buf[SE->_offset]; \ - } \ - Impl::multLink(Uchi, U._odata[sU], *chi_p, Dir, SE, st); \ - Recon(result, Uchi); - -#define GENERIC_STENCIL_LEG_INT(Dir,spProj,Recon) \ - SE = st.GetEntry(ptype, Dir, sF); \ - if (SE->_is_local) { \ - chi_p = χ \ - if (SE->_permute) { \ - spProj(tmp, in._odata[SE->_offset]); \ - permute(chi, tmp, ptype); \ - } else { \ - spProj(chi, in._odata[SE->_offset]); \ - } \ - } else if ( st.same_node[Dir] ) { \ - chi_p = &buf[SE->_offset]; \ - } \ - if (SE->_is_local || st.same_node[Dir] ) { \ - Impl::multLink(Uchi, U._odata[sU], *chi_p, Dir, SE, st); \ - Recon(result, Uchi); \ - } - -#define GENERIC_STENCIL_LEG_EXT(Dir,spProj,Recon) \ - SE = st.GetEntry(ptype, Dir, sF); \ - if ((!SE->_is_local) && (!st.same_node[Dir]) ) { \ - chi_p = &buf[SE->_offset]; \ - Impl::multLink(Uchi, U._odata[sU], *chi_p, Dir, SE, st); \ - Recon(result, Uchi); \ - nmu++; \ - } - -#define GENERIC_DHOPDIR_LEG(Dir,spProj,Recon) \ - if (gamma == Dir) { \ - if (SE->_is_local && SE->_permute) { \ - spProj(tmp, in._odata[SE->_offset]); \ - permute(chi, tmp, ptype); \ - } else if (SE->_is_local) { \ - spProj(chi, in._odata[SE->_offset]); \ - } else { \ - chi = buf[SE->_offset]; \ - } \ - Impl::multLink(Uchi, U._odata[sU], chi, dir, SE, st); \ - Recon(result, Uchi); \ - } - - //////////////////////////////////////////////////////////////////// - // All legs kernels ; comms then compute - //////////////////////////////////////////////////////////////////// -template -void WilsonKernels::GenericDhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, - SiteHalfSpinor *buf, int sF, - int sU, const FermionField &in, FermionField &out) -{ - SiteHalfSpinor tmp; - SiteHalfSpinor chi; - SiteHalfSpinor *chi_p; - SiteHalfSpinor Uchi; - SiteSpinor result; - StencilEntry *SE; - int ptype; - - GENERIC_STENCIL_LEG(Xp,spProjXp,spReconXp); - GENERIC_STENCIL_LEG(Yp,spProjYp,accumReconYp); - GENERIC_STENCIL_LEG(Zp,spProjZp,accumReconZp); - GENERIC_STENCIL_LEG(Tp,spProjTp,accumReconTp); - GENERIC_STENCIL_LEG(Xm,spProjXm,accumReconXm); - GENERIC_STENCIL_LEG(Ym,spProjYm,accumReconYm); - GENERIC_STENCIL_LEG(Zm,spProjZm,accumReconZm); - GENERIC_STENCIL_LEG(Tm,spProjTm,accumReconTm); - vstream(out._odata[sF], result); -}; - -template -void WilsonKernels::GenericDhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, - SiteHalfSpinor *buf, int sF, - int sU, const FermionField &in, FermionField &out) -{ - SiteHalfSpinor tmp; - SiteHalfSpinor chi; - SiteHalfSpinor *chi_p; - SiteHalfSpinor Uchi; - SiteSpinor result; - StencilEntry *SE; - int ptype; - - GENERIC_STENCIL_LEG(Xm,spProjXp,spReconXp); - GENERIC_STENCIL_LEG(Ym,spProjYp,accumReconYp); - GENERIC_STENCIL_LEG(Zm,spProjZp,accumReconZp); - GENERIC_STENCIL_LEG(Tm,spProjTp,accumReconTp); - GENERIC_STENCIL_LEG(Xp,spProjXm,accumReconXm); - GENERIC_STENCIL_LEG(Yp,spProjYm,accumReconYm); - GENERIC_STENCIL_LEG(Zp,spProjZm,accumReconZm); - GENERIC_STENCIL_LEG(Tp,spProjTm,accumReconTm); - vstream(out._odata[sF], result); -}; - //////////////////////////////////////////////////////////////////// - // Interior kernels - //////////////////////////////////////////////////////////////////// -template -void WilsonKernels::GenericDhopSiteDagInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, - SiteHalfSpinor *buf, int sF, - int sU, const FermionField &in, FermionField &out) -{ - SiteHalfSpinor tmp; - SiteHalfSpinor chi; - SiteHalfSpinor *chi_p; - SiteHalfSpinor Uchi; - SiteSpinor result; - StencilEntry *SE; - int ptype; - - result=zero; - GENERIC_STENCIL_LEG_INT(Xp,spProjXp,accumReconXp); - GENERIC_STENCIL_LEG_INT(Yp,spProjYp,accumReconYp); - GENERIC_STENCIL_LEG_INT(Zp,spProjZp,accumReconZp); - GENERIC_STENCIL_LEG_INT(Tp,spProjTp,accumReconTp); - GENERIC_STENCIL_LEG_INT(Xm,spProjXm,accumReconXm); - GENERIC_STENCIL_LEG_INT(Ym,spProjYm,accumReconYm); - GENERIC_STENCIL_LEG_INT(Zm,spProjZm,accumReconZm); - GENERIC_STENCIL_LEG_INT(Tm,spProjTm,accumReconTm); - vstream(out._odata[sF], result); -}; - -template -void WilsonKernels::GenericDhopSiteInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, - SiteHalfSpinor *buf, int sF, - int sU, const FermionField &in, FermionField &out) -{ - SiteHalfSpinor tmp; - SiteHalfSpinor chi; - SiteHalfSpinor *chi_p; - SiteHalfSpinor Uchi; - SiteSpinor result; - StencilEntry *SE; - int ptype; - result=zero; - GENERIC_STENCIL_LEG_INT(Xm,spProjXp,accumReconXp); - GENERIC_STENCIL_LEG_INT(Ym,spProjYp,accumReconYp); - GENERIC_STENCIL_LEG_INT(Zm,spProjZp,accumReconZp); - GENERIC_STENCIL_LEG_INT(Tm,spProjTp,accumReconTp); - GENERIC_STENCIL_LEG_INT(Xp,spProjXm,accumReconXm); - GENERIC_STENCIL_LEG_INT(Yp,spProjYm,accumReconYm); - GENERIC_STENCIL_LEG_INT(Zp,spProjZm,accumReconZm); - GENERIC_STENCIL_LEG_INT(Tp,spProjTm,accumReconTm); - vstream(out._odata[sF], result); -}; -//////////////////////////////////////////////////////////////////// -// Exterior kernels -//////////////////////////////////////////////////////////////////// -template -void WilsonKernels::GenericDhopSiteDagExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, - SiteHalfSpinor *buf, int sF, - int sU, const FermionField &in, FermionField &out) -{ - SiteHalfSpinor tmp; - SiteHalfSpinor chi; - SiteHalfSpinor *chi_p; - SiteHalfSpinor Uchi; - SiteSpinor result; - StencilEntry *SE; - int ptype; - int nmu=0; - result=zero; - GENERIC_STENCIL_LEG_EXT(Xp,spProjXp,accumReconXp); - GENERIC_STENCIL_LEG_EXT(Yp,spProjYp,accumReconYp); - GENERIC_STENCIL_LEG_EXT(Zp,spProjZp,accumReconZp); - GENERIC_STENCIL_LEG_EXT(Tp,spProjTp,accumReconTp); - GENERIC_STENCIL_LEG_EXT(Xm,spProjXm,accumReconXm); - GENERIC_STENCIL_LEG_EXT(Ym,spProjYm,accumReconYm); - GENERIC_STENCIL_LEG_EXT(Zm,spProjZm,accumReconZm); - GENERIC_STENCIL_LEG_EXT(Tm,spProjTm,accumReconTm); - if ( nmu ) { - out._odata[sF] = out._odata[sF] + result; - } -}; - -template -void WilsonKernels::GenericDhopSiteExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, - SiteHalfSpinor *buf, int sF, - int sU, const FermionField &in, FermionField &out) -{ - SiteHalfSpinor tmp; - SiteHalfSpinor chi; - SiteHalfSpinor *chi_p; - SiteHalfSpinor Uchi; - SiteSpinor result; - StencilEntry *SE; - int ptype; - int nmu=0; - result=zero; - GENERIC_STENCIL_LEG_EXT(Xm,spProjXp,accumReconXp); - GENERIC_STENCIL_LEG_EXT(Ym,spProjYp,accumReconYp); - GENERIC_STENCIL_LEG_EXT(Zm,spProjZp,accumReconZp); - GENERIC_STENCIL_LEG_EXT(Tm,spProjTp,accumReconTp); - GENERIC_STENCIL_LEG_EXT(Xp,spProjXm,accumReconXm); - GENERIC_STENCIL_LEG_EXT(Yp,spProjYm,accumReconYm); - GENERIC_STENCIL_LEG_EXT(Zp,spProjZm,accumReconZm); - GENERIC_STENCIL_LEG_EXT(Tp,spProjTm,accumReconTm); - if ( nmu ) { - out._odata[sF] = out._odata[sF] + result; - } -}; - -template -void WilsonKernels::DhopDir( StencilImpl &st, DoubledGaugeField &U,SiteHalfSpinor *buf, int sF, - int sU, const FermionField &in, FermionField &out, int dir, int gamma) { - - SiteHalfSpinor tmp; - SiteHalfSpinor chi; - SiteSpinor result; - SiteHalfSpinor Uchi; - StencilEntry *SE; - int ptype; - - SE = st.GetEntry(ptype, dir, sF); - GENERIC_DHOPDIR_LEG(Xp,spProjXp,spReconXp); - GENERIC_DHOPDIR_LEG(Yp,spProjYp,spReconYp); - GENERIC_DHOPDIR_LEG(Zp,spProjZp,spReconZp); - GENERIC_DHOPDIR_LEG(Tp,spProjTp,spReconTp); - GENERIC_DHOPDIR_LEG(Xm,spProjXm,spReconXm); - GENERIC_DHOPDIR_LEG(Ym,spProjYm,spReconYm); - GENERIC_DHOPDIR_LEG(Zm,spProjZm,spReconZm); - GENERIC_DHOPDIR_LEG(Tm,spProjTm,spReconTm); - vstream(out._odata[sF], result); -} - -/******************************************************************************* - * Conserved current utilities for Wilson fermions, for contracting propagators - * to make a conserved current sink or inserting the conserved current - * sequentially. Common to both 4D and 5D. - ******************************************************************************/ -// N.B. Functions below assume a -1/2 factor within U. -#define WilsonCurrentFwd(expr, mu) ((expr - Gamma::gmu[mu]*expr)) -#define WilsonCurrentBwd(expr, mu) ((expr + Gamma::gmu[mu]*expr)) - -/******************************************************************************* - * Name: ContractConservedCurrentSiteFwd - * Operation: (1/2) * q2[x] * U(x) * (g[mu] - 1) * q1[x + mu] - * Notes: - DoubledGaugeField U assumed to contain -1/2 factor. - * - Pass in q_in_1 shifted in +ve mu direction. - ******************************************************************************/ -template -void WilsonKernels::ContractConservedCurrentSiteFwd( - const SitePropagator &q_in_1, - const SitePropagator &q_in_2, - SitePropagator &q_out, - DoubledGaugeField &U, - unsigned int sU, - unsigned int mu, - bool switch_sign) -{ - SitePropagator result, tmp; - Gamma g5(Gamma::Algebra::Gamma5); - Impl::multLinkProp(tmp, U._odata[sU], q_in_1, mu); - result = g5 * adj(q_in_2) * g5 * WilsonCurrentFwd(tmp, mu); - if (switch_sign) - { - q_out -= result; - } - else - { - q_out += result; - } -} - -/******************************************************************************* - * Name: ContractConservedCurrentSiteBwd - * Operation: (1/2) * q2[x + mu] * U^dag(x) * (g[mu] + 1) * q1[x] - * Notes: - DoubledGaugeField U assumed to contain -1/2 factor. - * - Pass in q_in_2 shifted in +ve mu direction. - ******************************************************************************/ -template -void WilsonKernels::ContractConservedCurrentSiteBwd( - const SitePropagator &q_in_1, - const SitePropagator &q_in_2, - SitePropagator &q_out, - DoubledGaugeField &U, - unsigned int sU, - unsigned int mu, - bool switch_sign) -{ - SitePropagator result, tmp; - Gamma g5(Gamma::Algebra::Gamma5); - Impl::multLinkProp(tmp, U._odata[sU], q_in_1, mu + Nd); - result = g5 * adj(q_in_2) * g5 * WilsonCurrentBwd(tmp, mu); - if (switch_sign) - { - q_out += result; - } - else - { - q_out -= result; - } -} - -// G-parity requires more specialised implementation. -#define NO_CURR_SITE(Impl) \ -template <> \ -void WilsonKernels::ContractConservedCurrentSiteFwd( \ - const SitePropagator &q_in_1, \ - const SitePropagator &q_in_2, \ - SitePropagator &q_out, \ - DoubledGaugeField &U, \ - unsigned int sU, \ - unsigned int mu, \ - bool switch_sign) \ -{ \ - assert(0); \ -} \ -template <> \ -void WilsonKernels::ContractConservedCurrentSiteBwd( \ - const SitePropagator &q_in_1, \ - const SitePropagator &q_in_2, \ - SitePropagator &q_out, \ - DoubledGaugeField &U, \ - unsigned int mu, \ - unsigned int sU, \ - bool switch_sign) \ -{ \ - assert(0); \ -} - -NO_CURR_SITE(GparityWilsonImplF); -NO_CURR_SITE(GparityWilsonImplD); -NO_CURR_SITE(GparityWilsonImplFH); -NO_CURR_SITE(GparityWilsonImplDF); - - -/******************************************************************************* - * Name: SeqConservedCurrentSiteFwd - * Operation: (1/2) * U(x) * (g[mu] - 1) * q[x + mu] - * Notes: - DoubledGaugeField U assumed to contain -1/2 factor. - * - Pass in q_in shifted in +ve mu direction. - ******************************************************************************/ -template -void WilsonKernels::SeqConservedCurrentSiteFwd(const SitePropagator &q_in, - SitePropagator &q_out, - DoubledGaugeField &U, - unsigned int sU, - unsigned int mu, - vInteger t_mask, - bool switch_sign) -{ - SitePropagator result; - Impl::multLinkProp(result, U._odata[sU], q_in, mu); - result = WilsonCurrentFwd(result, mu); - - // Zero any unwanted timeslice entries. - result = predicatedWhere(t_mask, result, 0.*result); - - if (switch_sign) - { - q_out -= result; - } - else - { - q_out += result; - } -} - -/******************************************************************************* - * Name: SeqConservedCurrentSiteFwd - * Operation: (1/2) * U^dag(x) * (g[mu] + 1) * q[x - mu] - * Notes: - DoubledGaugeField U assumed to contain -1/2 factor. - * - Pass in q_in shifted in -ve mu direction. - ******************************************************************************/ -template -void WilsonKernels::SeqConservedCurrentSiteBwd(const SitePropagator &q_in, - SitePropagator &q_out, - DoubledGaugeField &U, - unsigned int sU, - unsigned int mu, - vInteger t_mask, - bool switch_sign) -{ - SitePropagator result; - Impl::multLinkProp(result, U._odata[sU], q_in, mu + Nd); - result = WilsonCurrentBwd(result, mu); - - // Zero any unwanted timeslice entries. - result = predicatedWhere(t_mask, result, 0.*result); - - if (switch_sign) - { - q_out += result; - } - else - { - q_out -= result; - } -} - -FermOpTemplateInstantiate(WilsonKernels); -AdjointFermOpTemplateInstantiate(WilsonKernels); -TwoIndexFermOpTemplateInstantiate(WilsonKernels); - -}} - diff --git a/Grid/qcd/action/fermion/WilsonKernels.h b/Grid/qcd/action/fermion/WilsonKernels.h index 2b150896..35715097 100644 --- a/Grid/qcd/action/fermion/WilsonKernels.h +++ b/Grid/qcd/action/fermion/WilsonKernels.h @@ -27,19 +27,17 @@ with this program; if not, write to the Free Software Foundation, Inc., See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ -/* END LEGAL */ -#ifndef GRID_QCD_DHOP_H -#define GRID_QCD_DHOP_H + /* END LEGAL */ +#pragma once -namespace Grid { -namespace QCD { +NAMESPACE_BEGIN(Grid); - //////////////////////////////////////////////////////////////////////////////////////////////////////////////// - // Helper routines that implement Wilson stencil for a single site. - // Common to both the WilsonFermion and WilsonFermion5D - //////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// Helper routines that implement Wilson stencil for a single site. +// Common to both the WilsonFermion and WilsonFermion5D +//////////////////////////////////////////////////////////////////////////////////////////////////////////////// class WilsonKernelsStatic { - public: +public: enum { OptGeneric, OptHandUnroll, OptInlineAsm }; enum { CommsAndCompute, CommsThenCompute }; static int Opt; @@ -47,235 +45,123 @@ class WilsonKernelsStatic { }; template class WilsonKernels : public FermionOperator , public WilsonKernelsStatic { - public: - +public: + INHERIT_IMPL_TYPES(Impl); typedef FermionOperator Base; public: - template - typename std::enable_if::type - DhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf, - int sF, int sU, int Ls, int Ns, const FermionField &in, FermionField &out,int interior=1,int exterior=1) - { - bgq_l1p_optimisation(1); - switch(Opt) { -#if defined(AVX512) || defined (QPX) - case OptInlineAsm: - if(interior&&exterior) WilsonKernels::AsmDhopSite (st,lo,U,buf,sF,sU,Ls,Ns,in,out); - else if (interior) WilsonKernels::AsmDhopSiteInt(st,lo,U,buf,sF,sU,Ls,Ns,in,out); - else if (exterior) WilsonKernels::AsmDhopSiteExt(st,lo,U,buf,sF,sU,Ls,Ns,in,out); - else assert(0); - break; -#endif - case OptHandUnroll: - for (int site = 0; site < Ns; site++) { - for (int s = 0; s < Ls; s++) { - if(interior&&exterior) WilsonKernels::HandDhopSite(st,lo,U,buf,sF,sU,in,out); - else if (interior) WilsonKernels::HandDhopSiteInt(st,lo,U,buf,sF,sU,in,out); - else if (exterior) WilsonKernels::HandDhopSiteExt(st,lo,U,buf,sF,sU,in,out); - sF++; - } - sU++; - } - break; - case OptGeneric: - for (int site = 0; site < Ns; site++) { - for (int s = 0; s < Ls; s++) { - if(interior&&exterior) WilsonKernels::GenericDhopSite(st,lo,U,buf,sF,sU,in,out); - else if (interior) WilsonKernels::GenericDhopSiteInt(st,lo,U,buf,sF,sU,in,out); - else if (exterior) WilsonKernels::GenericDhopSiteExt(st,lo,U,buf,sF,sU,in,out); - else assert(0); - sF++; - } - sU++; - } - break; - default: - assert(0); - } - bgq_l1p_optimisation(0); - } - - template - typename std::enable_if<(Impl::isFundamental==false || (Impl::isFundamental==true && Nc != 3)) && EnableBool, void>::type - DhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf, - int sF, int sU, int Ls, int Ns, const FermionField &in, FermionField &out,int interior=1,int exterior=1 ) { - // no kernel choice - for (int site = 0; site < Ns; site++) { - for (int s = 0; s < Ls; s++) { - if(interior&&exterior) WilsonKernels::GenericDhopSite(st,lo,U,buf,sF,sU,in,out); - else if (interior) WilsonKernels::GenericDhopSiteInt(st,lo,U,buf,sF,sU,in,out); - else if (exterior) WilsonKernels::GenericDhopSiteExt(st,lo,U,buf,sF,sU,in,out); - else assert(0); - sF++; - } - sU++; - } - } - - template - typename std::enable_if::type - DhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf, - int sF, int sU, int Ls, int Ns, const FermionField &in, FermionField &out,int interior=1,int exterior=1) -{ - bgq_l1p_optimisation(1); - switch(Opt) { -#if defined(AVX512) || defined (QPX) - case OptInlineAsm: - if(interior&&exterior) WilsonKernels::AsmDhopSiteDag (st,lo,U,buf,sF,sU,Ls,Ns,in,out); - else if (interior) WilsonKernels::AsmDhopSiteDagInt(st,lo,U,buf,sF,sU,Ls,Ns,in,out); - else if (exterior) WilsonKernels::AsmDhopSiteDagExt(st,lo,U,buf,sF,sU,Ls,Ns,in,out); - else assert(0); - break; -#endif - case OptHandUnroll: - for (int site = 0; site < Ns; site++) { - for (int s = 0; s < Ls; s++) { - if(interior&&exterior) WilsonKernels::HandDhopSiteDag(st,lo,U,buf,sF,sU,in,out); - else if (interior) WilsonKernels::HandDhopSiteDagInt(st,lo,U,buf,sF,sU,in,out); - else if (exterior) WilsonKernels::HandDhopSiteDagExt(st,lo,U,buf,sF,sU,in,out); - else assert(0); - sF++; - } - sU++; - } - break; - case OptGeneric: - for (int site = 0; site < Ns; site++) { - for (int s = 0; s < Ls; s++) { - if(interior&&exterior) WilsonKernels::GenericDhopSiteDag(st,lo,U,buf,sF,sU,in,out); - else if (interior) WilsonKernels::GenericDhopSiteDagInt(st,lo,U,buf,sF,sU,in,out); - else if (exterior) WilsonKernels::GenericDhopSiteDagExt(st,lo,U,buf,sF,sU,in,out); - else assert(0); - sF++; - } - sU++; - } - break; - default: - assert(0); - } - bgq_l1p_optimisation(0); - } + static void DhopKernel(int Opt,StencilImpl &st, DoubledGaugeField &U, SiteHalfSpinor * buf, + int Ls, int Nsite, const FermionField &in, FermionField &out, + int interior=1,int exterior=1) ; - template - typename std::enable_if<(Impl::isFundamental==false || (Impl::isFundamental==true && Nc != 3)) && EnableBool,void>::type - DhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,SiteHalfSpinor * buf, - int sF, int sU, int Ls, int Ns, const FermionField &in, FermionField &out,int interior=1,int exterior=1) { + static void DhopDagKernel(int Opt,StencilImpl &st, DoubledGaugeField &U, SiteHalfSpinor * buf, + int Ls, int Nsite, const FermionField &in, FermionField &out, + int interior=1,int exterior=1) ; - for (int site = 0; site < Ns; site++) { - for (int s = 0; s < Ls; s++) { - if(interior&&exterior) WilsonKernels::GenericDhopSiteDag(st,lo,U,buf,sF,sU,in,out); - else if (interior) WilsonKernels::GenericDhopSiteDagInt(st,lo,U,buf,sF,sU,in,out); - else if (exterior) WilsonKernels::GenericDhopSiteDagExt(st,lo,U,buf,sF,sU,in,out); - else assert(0); - sF++; - } - sU++; - } - } + static void DhopDirKernel(StencilImpl &st, DoubledGaugeField &U,SiteHalfSpinor * buf, + int Ls, int Nsite, const FermionField &in, FermionField &out, int dirdisp, int gamma); - void DhopDir(StencilImpl &st, DoubledGaugeField &U,SiteHalfSpinor * buf, - int sF, int sU, const FermionField &in, FermionField &out, int dirdisp, int gamma); - ////////////////////////////////////////////////////////////////////////////// // Utilities for inserting Wilson conserved current. ////////////////////////////////////////////////////////////////////////////// - void ContractConservedCurrentSiteFwd(const SitePropagator &q_in_1, + static void ContractConservedCurrentSiteFwd(const SitePropagator &q_in_1, const SitePropagator &q_in_2, SitePropagator &q_out, - DoubledGaugeField &U, + DoubledGaugeFieldView &U, unsigned int sU, unsigned int mu, bool switch_sign = false); - void ContractConservedCurrentSiteBwd(const SitePropagator &q_in_1, + + static void ContractConservedCurrentSiteBwd(const SitePropagator &q_in_1, const SitePropagator &q_in_2, SitePropagator &q_out, - DoubledGaugeField &U, + DoubledGaugeFieldView &U, unsigned int sU, unsigned int mu, bool switch_sign = false); - void SeqConservedCurrentSiteFwd(const SitePropagator &q_in, + + static void SeqConservedCurrentSiteFwd(const SitePropagator &q_in, SitePropagator &q_out, - DoubledGaugeField &U, + DoubledGaugeFieldView &U, unsigned int sU, unsigned int mu, - vInteger t_mask, + vPredicate t_mask, bool switch_sign = false); - void SeqConservedCurrentSiteBwd(const SitePropagator &q_in, + + static void SeqConservedCurrentSiteBwd(const SitePropagator &q_in, SitePropagator &q_out, - DoubledGaugeField &U, + DoubledGaugeFieldView &U, unsigned int sU, unsigned int mu, - vInteger t_mask, + vPredicate t_mask, bool switch_sign = false); private: - // Specialised variants - void GenericDhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf, - int sF, int sU, const FermionField &in, FermionField &out); + + static accelerator void DhopDirK(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor * buf, + int sF, int sU, const FermionFieldView &in, FermionFieldView &out, int dirdisp, int gamma); - void GenericDhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf, - int sF, int sU, const FermionField &in, FermionField &out); - - void GenericDhopSiteInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf, - int sF, int sU, const FermionField &in, FermionField &out); + // Specialised variants + static accelerator void GenericDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf, + int sF, int sU, const FermionFieldView &in, FermionFieldView &out); - void GenericDhopSiteDagInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf, - int sF, int sU, const FermionField &in, FermionField &out); - - void GenericDhopSiteExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf, - int sF, int sU, const FermionField &in, FermionField &out); + static accelerator void GenericDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf, + int sF, int sU, const FermionFieldView &in, FermionFieldView &out); + + static accelerator void GenericDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf, + int sF, int sU, const FermionFieldView &in, FermionFieldView &out); - void GenericDhopSiteDagExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf, - int sF, int sU, const FermionField &in, FermionField &out); - - - void AsmDhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf, - int sF, int sU, int Ls, int Ns, const FermionField &in,FermionField &out); - - void AsmDhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf, - int sF, int sU, int Ls, int Ns, const FermionField &in, FermionField &out); - - void AsmDhopSiteInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf, - int sF, int sU, int Ls, int Ns, const FermionField &in,FermionField &out); - - void AsmDhopSiteDagInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf, - int sF, int sU, int Ls, int Ns, const FermionField &in, FermionField &out); - - void AsmDhopSiteExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf, - int sF, int sU, int Ls, int Ns, const FermionField &in,FermionField &out); - - void AsmDhopSiteDagExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf, - int sF, int sU, int Ls, int Ns, const FermionField &in, FermionField &out); - - - void HandDhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf, - int sF, int sU, const FermionField &in, FermionField &out); - - void HandDhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf, - int sF, int sU, const FermionField &in, FermionField &out); + static accelerator void GenericDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf, + int sF, int sU, const FermionFieldView &in, FermionFieldView &out); + + static accelerator void GenericDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf, + int sF, int sU, const FermionFieldView &in, FermionFieldView &out); - void HandDhopSiteInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf, - int sF, int sU, const FermionField &in, FermionField &out); - - void HandDhopSiteDagInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf, - int sF, int sU, const FermionField &in, FermionField &out); - - void HandDhopSiteExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf, - int sF, int sU, const FermionField &in, FermionField &out); - - void HandDhopSiteDagExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf, - int sF, int sU, const FermionField &in, FermionField &out); - -public: + static accelerator void GenericDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf, + int sF, int sU, const FermionFieldView &in, FermionFieldView &out); - WilsonKernels(const ImplParams &p = ImplParams()); + static void AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf, + int sF, int sU, int Ls, int Nsite, const FermionFieldView &in,FermionFieldView &out); + + static void AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf, + int sF, int sU, int Ls, int Nsite, const FermionFieldView &in, FermionFieldView &out); + + static void AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf, + int sF, int sU, int Ls, int Nsite, const FermionFieldView &in,FermionFieldView &out); + + static void AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf, + int sF, int sU, int Ls, int Nsite, const FermionFieldView &in, FermionFieldView &out); + + static void AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf, + int sF, int sU, int Ls, int Nsite, const FermionFieldView &in,FermionFieldView &out); + + static void AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf, + int sF, int sU, int Ls, int Nsite, const FermionFieldView &in, FermionFieldView &out); +// Keep Hand unrolled temporarily + static accelerator void HandDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf, + int sF, int sU, const FermionFieldView &in, FermionFieldView &out); + + static accelerator void HandDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf, + int sF, int sU, const FermionFieldView &in, FermionFieldView &out); + + static accelerator void HandDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf, + int sF, int sU, const FermionFieldView &in, FermionFieldView &out); + + static accelerator void HandDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf, + int sF, int sU, const FermionFieldView &in, FermionFieldView &out); + + static accelerator void HandDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf, + int sF, int sU, const FermionFieldView &in, FermionFieldView &out); + + static accelerator void HandDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf, + int sF, int sU, const FermionFieldView &in, FermionFieldView &out); + public: + WilsonKernels(const ImplParams &p = ImplParams()) : Base(p){}; }; -}} +NAMESPACE_END(Grid); + -#endif diff --git a/Grid/qcd/action/fermion/WilsonKernelsAsm.cc b/Grid/qcd/action/fermion/WilsonKernelsAsm.cc deleted file mode 100644 index 55911988..00000000 --- a/Grid/qcd/action/fermion/WilsonKernelsAsm.cc +++ /dev/null @@ -1,127 +0,0 @@ -/************************************************************************************* - - Grid physics library, www.github.com/paboyle/Grid - - - - Source file: ./lib/qcd/action/fermion/WilsonKernelsAsm.cc - - Copyright (C) 2015 - -Author: Peter Boyle -Author: paboyle -Author: Guido Cossu - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - - See the full license in the file "LICENSE" in the top level distribution directory -*************************************************************************************/ -/* END LEGAL */ - -#include - -namespace Grid { -namespace QCD { - - -/////////////////////////////////////////////////////////// -// Default to no assembler implementation -/////////////////////////////////////////////////////////// -template void -WilsonKernels::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) -{ - assert(0); -} - -template void -WilsonKernels::AsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) -{ - assert(0); -} - -template void -WilsonKernels::AsmDhopSiteInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) -{ - assert(0); -} - -template void -WilsonKernels::AsmDhopSiteDagInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) -{ - assert(0); -} - -template void -WilsonKernels::AsmDhopSiteExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) -{ - assert(0); -} - -template void -WilsonKernels::AsmDhopSiteDagExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) -{ - assert(0); -} - -#include -#include - -#define INSTANTIATE_ASM(A)\ -template void WilsonKernels::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,\ - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);\ - \ -template void WilsonKernels::AsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,\ - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);\ -template void WilsonKernels::AsmDhopSiteInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,\ - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);\ - \ -template void WilsonKernels::AsmDhopSiteDagInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,\ - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);\ -template void WilsonKernels::AsmDhopSiteExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,\ - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);\ - \ -template void WilsonKernels::AsmDhopSiteDagExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,\ - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);\ - -INSTANTIATE_ASM(WilsonImplF); -INSTANTIATE_ASM(WilsonImplD); -INSTANTIATE_ASM(ZWilsonImplF); -INSTANTIATE_ASM(ZWilsonImplD); -INSTANTIATE_ASM(GparityWilsonImplF); -INSTANTIATE_ASM(GparityWilsonImplD); -INSTANTIATE_ASM(DomainWallVec5dImplF); -INSTANTIATE_ASM(DomainWallVec5dImplD); -INSTANTIATE_ASM(ZDomainWallVec5dImplF); -INSTANTIATE_ASM(ZDomainWallVec5dImplD); - -INSTANTIATE_ASM(WilsonImplFH); -INSTANTIATE_ASM(WilsonImplDF); -INSTANTIATE_ASM(ZWilsonImplFH); -INSTANTIATE_ASM(ZWilsonImplDF); -INSTANTIATE_ASM(GparityWilsonImplFH); -INSTANTIATE_ASM(GparityWilsonImplDF); -INSTANTIATE_ASM(DomainWallVec5dImplFH); -INSTANTIATE_ASM(DomainWallVec5dImplDF); -INSTANTIATE_ASM(ZDomainWallVec5dImplFH); -INSTANTIATE_ASM(ZDomainWallVec5dImplDF); - -}} - diff --git a/Grid/qcd/action/fermion/WilsonKernelsAsmAvx512.h b/Grid/qcd/action/fermion/WilsonKernelsAsmAvx512.h deleted file mode 100644 index 948c16a2..00000000 --- a/Grid/qcd/action/fermion/WilsonKernelsAsmAvx512.h +++ /dev/null @@ -1,650 +0,0 @@ -/************************************************************************************* - - Grid physics library, www.github.com/paboyle/Grid - - - - Source file: ./lib/qcd/action/fermion/WilsonKernelsAsmAvx512.h - - Copyright (C) 2015 - -Author: Peter Boyle -Author: paboyle - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - - See the full license in the file "LICENSE" in the top level distribution directory -*************************************************************************************/ -/* END LEGAL */ - - -#if defined(AVX512) - /////////////////////////////////////////////////////////// - // If we are AVX512 specialise the single precision routine - /////////////////////////////////////////////////////////// -#include -#include - -static Vector signsF; - - template - int setupSigns(Vector& signs ){ - Vector bother(2); - signs = bother; - vrsign(signs[0]); - visign(signs[1]); - return 1; - } - - static int signInitF = setupSigns(signsF); - -#define MAYBEPERM(A,perm) if (perm) { A ; } -#define MULT_2SPIN(ptr,pf) MULT_ADDSUB_2SPIN(ptr,pf) -#define COMPLEX_SIGNS(isigns) vComplexF *isigns = &signsF[0]; - -///////////////////////////////////////////////////////////////// -// XYZT vectorised, undag Kernel, single -///////////////////////////////////////////////////////////////// -#undef KERNEL_DAG -#define INTERIOR_AND_EXTERIOR -#undef INTERIOR -#undef EXTERIOR -template<> void -WilsonKernels::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) -#include - -template<> void -WilsonKernels::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) -#include - -template<> void -WilsonKernels::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) -#include - -template<> void -WilsonKernels::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) -#include - -#undef INTERIOR_AND_EXTERIOR -#define INTERIOR -#undef EXTERIOR -template<> void -WilsonKernels::AsmDhopSiteInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) -#include - -template<> void -WilsonKernels::AsmDhopSiteInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) -#include - -template<> void -WilsonKernels::AsmDhopSiteInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) -#include - -template<> void -WilsonKernels::AsmDhopSiteInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) -#include - - -#undef INTERIOR_AND_EXTERIOR -#undef INTERIOR -#define EXTERIOR -template<> void -WilsonKernels::AsmDhopSiteExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) -#include - -template<> void -WilsonKernels::AsmDhopSiteExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) -#include - -template<> void -WilsonKernels::AsmDhopSiteExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) -#include - -template<> void -WilsonKernels::AsmDhopSiteExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) -#include - -///////////////////////////////////////////////////////////////// -// XYZT vectorised, dag Kernel, single -///////////////////////////////////////////////////////////////// -#define KERNEL_DAG -#define INTERIOR_AND_EXTERIOR -#undef INTERIOR -#undef EXTERIOR -template<> void -WilsonKernels::AsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) -#include - -template<> void -WilsonKernels::AsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) -#include - -template<> void -WilsonKernels::AsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) -#include - -template<> void -WilsonKernels::AsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) -#include - -#undef INTERIOR_AND_EXTERIOR -#define INTERIOR -#undef EXTERIOR -template<> void -WilsonKernels::AsmDhopSiteDagInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) -#include - -template<> void -WilsonKernels::AsmDhopSiteDagInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) -#include - -template<> void -WilsonKernels::AsmDhopSiteDagInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) -#include - -template<> void -WilsonKernels::AsmDhopSiteDagInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) -#include - -#undef INTERIOR_AND_EXTERIOR -#undef INTERIOR -#define EXTERIOR -template<> void -WilsonKernels::AsmDhopSiteDagExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) -#include - -template<> void -WilsonKernels::AsmDhopSiteDagExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) -#include - -template<> void -WilsonKernels::AsmDhopSiteDagExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) -#include - -template<> void -WilsonKernels::AsmDhopSiteDagExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) -#include - -#undef MAYBEPERM -#undef MULT_2SPIN -#define MAYBEPERM(A,B) -#define MULT_2SPIN(ptr,pf) MULT_ADDSUB_2SPIN_LS(ptr,pf) - -///////////////////////////////////////////////////////////////// -// Ls vectorised, undag Kernel, single -///////////////////////////////////////////////////////////////// -#undef KERNEL_DAG -#define INTERIOR_AND_EXTERIOR -#undef INTERIOR -#undef EXTERIOR -template<> void -WilsonKernels::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) -#include -template<> void -WilsonKernels::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) -#include - -template<> void -WilsonKernels::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) -#include -template<> void -WilsonKernels::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) -#include - -#undef INTERIOR_AND_EXTERIOR -#define INTERIOR -#undef EXTERIOR -template<> void -WilsonKernels::AsmDhopSiteInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) -#include -template<> void -WilsonKernels::AsmDhopSiteInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) -#include - -template<> void -WilsonKernels::AsmDhopSiteInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) -#include -template<> void -WilsonKernels::AsmDhopSiteInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) -#include - -#undef INTERIOR_AND_EXTERIOR -#undef INTERIOR -#define EXTERIOR -#undef MULT_2SPIN -#define MULT_2SPIN(ptr,pf) MULT_ADDSUB_2SPIN_LSNOPF(ptr,pf) -template<> void -WilsonKernels::AsmDhopSiteExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) -#include - -template<> void -WilsonKernels::AsmDhopSiteExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) -#include - -template<> void -WilsonKernels::AsmDhopSiteExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) -#include - -template<> void -WilsonKernels::AsmDhopSiteExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) -#include - -///////////////////////////////////////////////////////////////// -// Ls vectorised, dag Kernel, single -///////////////////////////////////////////////////////////////// -#define KERNEL_DAG -#define INTERIOR_AND_EXTERIOR -#undef INTERIOR -#undef EXTERIOR -template<> void -WilsonKernels::AsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) -#include -template<> void -WilsonKernels::AsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) -#include - -template<> void -WilsonKernels::AsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) -#include -template<> void -WilsonKernels::AsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) -#include - -#undef INTERIOR_AND_EXTERIOR -#define INTERIOR -#undef EXTERIOR -template<> void -WilsonKernels::AsmDhopSiteDagInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) -#include -template<> void -WilsonKernels::AsmDhopSiteDagInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) -#include - -template<> void -WilsonKernels::AsmDhopSiteDagInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) -#include -template<> void -WilsonKernels::AsmDhopSiteDagInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) -#include - -#undef INTERIOR_AND_EXTERIOR -#undef INTERIOR -#define EXTERIOR -template<> void -WilsonKernels::AsmDhopSiteDagExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) -#include -template<> void -WilsonKernels::AsmDhopSiteDagExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) -#include - -template<> void -WilsonKernels::AsmDhopSiteDagExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) -#include -template<> void -WilsonKernels::AsmDhopSiteDagExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) -#include - -#undef COMPLEX_SIGNS -#undef MAYBEPERM -#undef MULT_2SPIN - - - -/////////////////////////////////////////////////////////// -// If we are AVX512 specialise the double precision routine -/////////////////////////////////////////////////////////// - -#include - -static Vector signsD; -static int signInitD = setupSigns(signsD); - -#define MAYBEPERM(A,perm) if (perm) { A ; } -#define MULT_2SPIN(ptr,pf) MULT_ADDSUB_2SPIN(ptr,pf) -#define COMPLEX_SIGNS(isigns) vComplexD *isigns = &signsD[0]; - - -#define INTERIOR_AND_EXTERIOR -#undef INTERIOR -#undef EXTERIOR - -///////////////////////////////////////////////////////////////// -// XYZT vectorised, undag Kernel, single -///////////////////////////////////////////////////////////////// -#undef KERNEL_DAG -#define INTERIOR_AND_EXTERIOR -#undef INTERIOR -#undef EXTERIOR -template<> void -WilsonKernels::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) -#include -template<> void -WilsonKernels::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) -#include - -template<> void -WilsonKernels::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) -#include -template<> void -WilsonKernels::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) -#include - -#undef INTERIOR_AND_EXTERIOR -#define INTERIOR -#undef EXTERIOR -template<> void -WilsonKernels::AsmDhopSiteInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) -#include -template<> void -WilsonKernels::AsmDhopSiteInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) -#include - -template<> void -WilsonKernels::AsmDhopSiteInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) -#include -template<> void -WilsonKernels::AsmDhopSiteInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) -#include - -#undef INTERIOR_AND_EXTERIOR -#undef INTERIOR -#define EXTERIOR -template<> void -WilsonKernels::AsmDhopSiteExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) -#include -template<> void -WilsonKernels::AsmDhopSiteExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) -#include - -template<> void -WilsonKernels::AsmDhopSiteExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) -#include -template<> void -WilsonKernels::AsmDhopSiteExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) -#include - -///////////////////////////////////////////////////////////////// -// XYZT vectorised, dag Kernel, single -///////////////////////////////////////////////////////////////// -#define KERNEL_DAG -#define INTERIOR_AND_EXTERIOR -#undef INTERIOR -#undef EXTERIOR -template<> void -WilsonKernels::AsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) -#include -template<> void -WilsonKernels::AsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) -#include - -template<> void -WilsonKernels::AsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) -#include -template<> void -WilsonKernels::AsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) -#include - -#undef INTERIOR_AND_EXTERIOR -#define INTERIOR -#undef EXTERIOR -template<> void -WilsonKernels::AsmDhopSiteDagInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) -#include -template<> void -WilsonKernels::AsmDhopSiteDagInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) -#include - -template<> void -WilsonKernels::AsmDhopSiteDagInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) -#include -template<> void -WilsonKernels::AsmDhopSiteDagInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) -#include - -#undef INTERIOR_AND_EXTERIOR -#undef INTERIOR -#define EXTERIOR -template<> void -WilsonKernels::AsmDhopSiteDagExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) -#include -template<> void -WilsonKernels::AsmDhopSiteDagExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) -#include - -template<> void -WilsonKernels::AsmDhopSiteDagExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) -#include -template<> void -WilsonKernels::AsmDhopSiteDagExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) -#include - -#undef MAYBEPERM -#undef MULT_2SPIN -#define MAYBEPERM(A,B) -#define MULT_2SPIN(ptr,pf) MULT_ADDSUB_2SPIN_LS(ptr,pf) - -///////////////////////////////////////////////////////////////// -// Ls vectorised, undag Kernel, single -///////////////////////////////////////////////////////////////// -#undef KERNEL_DAG -#define INTERIOR_AND_EXTERIOR -#undef INTERIOR -#undef EXTERIOR -template<> void -WilsonKernels::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) -#include -template<> void -WilsonKernels::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) -#include - -template<> void -WilsonKernels::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) -#include -template<> void -WilsonKernels::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) -#include - -#undef INTERIOR_AND_EXTERIOR -#define INTERIOR -#undef EXTERIOR -template<> void -WilsonKernels::AsmDhopSiteInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) -#include -template<> void -WilsonKernels::AsmDhopSiteInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) -#include - -template<> void -WilsonKernels::AsmDhopSiteInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) -#include -template<> void -WilsonKernels::AsmDhopSiteInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) -#include - -#undef INTERIOR_AND_EXTERIOR -#undef INTERIOR -#define EXTERIOR -#undef MULT_2SPIN -#define MULT_2SPIN(ptr,pf) MULT_ADDSUB_2SPIN_LSNOPF(ptr,pf) -template<> void -WilsonKernels::AsmDhopSiteExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) -#include -template<> void -WilsonKernels::AsmDhopSiteExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) -#include - -template<> void -WilsonKernels::AsmDhopSiteExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) -#include -template<> void -WilsonKernels::AsmDhopSiteExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) -#include - -///////////////////////////////////////////////////////////////// -// Ls vectorised, dag Kernel, single -///////////////////////////////////////////////////////////////// -#define KERNEL_DAG -#define INTERIOR_AND_EXTERIOR -#undef INTERIOR -#undef EXTERIOR -template<> void -WilsonKernels::AsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) -#include -template<> void -WilsonKernels::AsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) -#include - -template<> void -WilsonKernels::AsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) -#include -template<> void -WilsonKernels::AsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) -#include - -#undef INTERIOR_AND_EXTERIOR -#define INTERIOR -#undef EXTERIOR -template<> void -WilsonKernels::AsmDhopSiteDagInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) -#include -template<> void -WilsonKernels::AsmDhopSiteDagInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) -#include - -template<> void -WilsonKernels::AsmDhopSiteDagInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) -#include -template<> void -WilsonKernels::AsmDhopSiteDagInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) -#include - -#undef INTERIOR_AND_EXTERIOR -#undef INTERIOR -#define EXTERIOR -template<> void -WilsonKernels::AsmDhopSiteDagExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) -#include -template<> void -WilsonKernels::AsmDhopSiteDagExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) -#include - -template<> void -WilsonKernels::AsmDhopSiteDagExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) -#include -template<> void -WilsonKernels::AsmDhopSiteDagExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) -#include - -#undef COMPLEX_SIGNS -#undef MAYBEPERM -#undef MULT_2SPIN - -#endif //AVX512 diff --git a/Grid/qcd/action/fermion/WilsonTMFermion.cc b/Grid/qcd/action/fermion/WilsonTMFermion.cc deleted file mode 100644 index d4604b10..00000000 --- a/Grid/qcd/action/fermion/WilsonTMFermion.cc +++ /dev/null @@ -1,99 +0,0 @@ - /************************************************************************************* - - Grid physics library, www.github.com/paboyle/Grid - - Source file: ./lib/qcd/action/fermion/WilsonTMFermion.cc - - Copyright (C) 2015 - -Author: paboyle - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - - See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ -#include -#include - -namespace Grid { -namespace QCD { - - /* - * BF sequence - * - void bfmbase::MooeeInv(Fermion_t psi, - Fermion_t chi, - int dag, int cb) - - double m = this->mass; - double tm = this->twistedmass; - double mtil = 4.0+this->mass; - - double sq = mtil*mtil + tm*tm; - - double a = mtil/sq; - double b = -tm /sq; - if(dag) b=-b; - axpibg5x(chi,psi,a,b); - - void bfmbase::Mooee(Fermion_t psi, - Fermion_t chi, - int dag,int cb) - double a = 4.0+this->mass; - double b = this->twistedmass; - if(dag) b=-b; - axpibg5x(chi,psi,a,b); - */ - - template - void WilsonTMFermion::Mooee(const FermionField &in, FermionField &out) { - RealD a = 4.0+this->mass; - RealD b = this->mu; - out.checkerboard = in.checkerboard; - axpibg5x(out,in,a,b); - } - template - void WilsonTMFermion::MooeeDag(const FermionField &in, FermionField &out) { - RealD a = 4.0+this->mass; - RealD b = -this->mu; - out.checkerboard = in.checkerboard; - axpibg5x(out,in,a,b); - } - template - void WilsonTMFermion::MooeeInv(const FermionField &in, FermionField &out) { - RealD m = this->mass; - RealD tm = this->mu; - RealD mtil = 4.0+this->mass; - RealD sq = mtil*mtil+tm*tm; - RealD a = mtil/sq; - RealD b = -tm /sq; - axpibg5x(out,in,a,b); - } - template - void WilsonTMFermion::MooeeInvDag(const FermionField &in, FermionField &out) { - RealD m = this->mass; - RealD tm = this->mu; - RealD mtil = 4.0+this->mass; - RealD sq = mtil*mtil+tm*tm; - RealD a = mtil/sq; - RealD b = tm /sq; - axpibg5x(out,in,a,b); - } - - FermOpTemplateInstantiate(WilsonTMFermion); - -} -} diff --git a/Grid/qcd/action/fermion/WilsonTMFermion.h b/Grid/qcd/action/fermion/WilsonTMFermion.h index f75c287b..12c4b71a 100644 --- a/Grid/qcd/action/fermion/WilsonTMFermion.h +++ b/Grid/qcd/action/fermion/WilsonTMFermion.h @@ -1,4 +1,4 @@ - /************************************************************************************* +/************************************************************************************* Grid physics library, www.github.com/paboyle/Grid @@ -23,55 +23,52 @@ Author: paboyle 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ -#ifndef GRID_QCD_WILSON_TM_FERMION_H -#define GRID_QCD_WILSON_TM_FERMION_H +*************************************************************************************/ +/* END LEGAL */ +#pragma once #include #include -namespace Grid { +NAMESPACE_BEGIN(Grid); - namespace QCD { +template +class WilsonTMFermion : public WilsonFermion +{ +public: + INHERIT_IMPL_TYPES(Impl); +public: - template - class WilsonTMFermion : public WilsonFermion - { - public: - INHERIT_IMPL_TYPES(Impl); - public: + virtual void Instantiatable(void) {}; + // Constructors + WilsonTMFermion(GaugeField &_Umu, + GridCartesian &Fgrid, + GridRedBlackCartesian &Hgrid, + RealD _mass, + RealD _mu, + const ImplParams &p= ImplParams() + ) : + WilsonFermion(_Umu, + Fgrid, + Hgrid, + _mass,p) - virtual void Instantiatable(void) {}; - // Constructors - WilsonTMFermion(GaugeField &_Umu, - GridCartesian &Fgrid, - GridRedBlackCartesian &Hgrid, - RealD _mass, - RealD _mu, - const ImplParams &p= ImplParams() - ) : - WilsonFermion(_Umu, - Fgrid, - Hgrid, - _mass,p) - - { - mu = _mu; - } + { + mu = _mu; + } - // allow override for twisted mass and clover - virtual void Mooee(const FermionField &in, FermionField &out) ; - virtual void MooeeDag(const FermionField &in, FermionField &out) ; - virtual void MooeeInv(const FermionField &in, FermionField &out) ; - virtual void MooeeInvDag(const FermionField &in, FermionField &out) ; + // allow override for twisted mass and clover + virtual void Mooee(const FermionField &in, FermionField &out) ; + virtual void MooeeDag(const FermionField &in, FermionField &out) ; + virtual void MooeeInv(const FermionField &in, FermionField &out) ; + virtual void MooeeInvDag(const FermionField &in, FermionField &out) ; - private: - RealD mu; // TwistedMass parameter +private: + RealD mu; // TwistedMass parameter - }; +}; + +NAMESPACE_END(Grid); -}} -#endif diff --git a/Grid/qcd/action/fermion/WilsonTMFermion5D.h b/Grid/qcd/action/fermion/WilsonTMFermion5D.h index 6c75d908..71acf763 100644 --- a/Grid/qcd/action/fermion/WilsonTMFermion5D.h +++ b/Grid/qcd/action/fermion/WilsonTMFermion5D.h @@ -30,126 +30,123 @@ Author: paboyle ; NB Christoph did similar in GPT #include #include - -namespace Grid { - - namespace QCD { +NAMESPACE_BEGIN(Grid); - template - class WilsonTMFermion5D : public WilsonFermion5D - { - public: - INHERIT_IMPL_TYPES(Impl); - public: - - virtual void Instantiatable(void) {}; - - // Constructors - WilsonTMFermion5D(GaugeField &_Umu, - GridCartesian &Fgrid, - GridRedBlackCartesian &Frbgrid, - GridCartesian &Ugrid, - GridRedBlackCartesian &Urbgrid, - const std::vector _mass, - const std::vector _mu, - const ImplParams &p= ImplParams() - ) : - WilsonFermion5D(_Umu, - Fgrid, - Frbgrid, - Ugrid, - Urbgrid, - 4.0,p) - - { - update(_mass,_mu); - } - - virtual void Meooe(const FermionField &in, FermionField &out) { - if (in.checkerboard == Odd) { - this->DhopEO(in, out, DaggerNo); - } else { - this->DhopOE(in, out, DaggerNo); - } - } - - virtual void MeooeDag(const FermionField &in, FermionField &out) { - if (in.checkerboard == Odd) { - this->DhopEO(in, out, DaggerYes); - } else { - this->DhopOE(in, out, DaggerYes); - } - } - - // allow override for twisted mass and clover - virtual void Mooee(const FermionField &in, FermionField &out) { - out.checkerboard = in.checkerboard; - //axpibg5x(out,in,a,b); // out = a*in + b*i*G5*in - for (int s=0;s<(int)this->mass.size();s++) { - ComplexD a = 4.0+this->mass[s]; - ComplexD b(0.0,this->mu[s]); - axpbg5y_ssp(out,a,in,b,in,s,s); - } - } - - virtual void MooeeDag(const FermionField &in, FermionField &out) { - out.checkerboard = in.checkerboard; - for (int s=0;s<(int)this->mass.size();s++) { - ComplexD a = 4.0+this->mass[s]; - ComplexD b(0.0,-this->mu[s]); - axpbg5y_ssp(out,a,in,b,in,s,s); - } - } - virtual void MooeeInv(const FermionField &in, FermionField &out) { - for (int s=0;s<(int)this->mass.size();s++) { - RealD m = this->mass[s]; - RealD tm = this->mu[s]; - RealD mtil = 4.0+this->mass[s]; - RealD sq = mtil*mtil+tm*tm; - ComplexD a = mtil/sq; - ComplexD b(0.0, -tm /sq); - axpbg5y_ssp(out,a,in,b,in,s,s); - } - } - virtual void MooeeInvDag(const FermionField &in, FermionField &out) { - for (int s=0;s<(int)this->mass.size();s++) { - RealD m = this->mass[s]; - RealD tm = this->mu[s]; - RealD mtil = 4.0+this->mass[s]; - RealD sq = mtil*mtil+tm*tm; - ComplexD a = mtil/sq; - ComplexD b(0.0,tm /sq); - axpbg5y_ssp(out,a,in,b,in,s,s); - } - } - - virtual RealD M(const FermionField &in, FermionField &out) { - out.checkerboard = in.checkerboard; - this->Dhop(in, out, DaggerNo); - FermionField tmp(out._grid); - for (int s=0;s<(int)this->mass.size();s++) { - ComplexD a = 4.0+this->mass[s]; - ComplexD b(0.0,this->mu[s]); - axpbg5y_ssp(tmp,a,in,b,in,s,s); - } - return axpy_norm(out, 1.0, tmp, out); - } - - // needed for fast PV - void update(const std::vector& _mass, const std::vector& _mu) { - assert(_mass.size() == _mu.size()); - assert(_mass.size() == this->FermionGrid()->_fdimensions[0]); - this->mass = _mass; - this->mu = _mu; - } - - private: - std::vector mu; - std::vector mass; - - }; +template +class WilsonTMFermion5D : public WilsonFermion5D +{ + public: + INHERIT_IMPL_TYPES(Impl); + public: + + virtual void Instantiatable(void) {}; + + // Constructors + WilsonTMFermion5D(GaugeField &_Umu, + GridCartesian &Fgrid, + GridRedBlackCartesian &Frbgrid, + GridCartesian &Ugrid, + GridRedBlackCartesian &Urbgrid, + const std::vector _mass, + const std::vector _mu, + const ImplParams &p= ImplParams() + ) : + WilsonFermion5D(_Umu, + Fgrid, + Frbgrid, + Ugrid, + Urbgrid, + 4.0,p) - typedef WilsonTMFermion5D WilsonTMFermion5DF; - typedef WilsonTMFermion5D WilsonTMFermion5DD; + { + update(_mass,_mu); + } + + virtual void Meooe(const FermionField &in, FermionField &out) { + if (in.Checkerboard() == Odd) { + this->DhopEO(in, out, DaggerNo); + } else { + this->DhopOE(in, out, DaggerNo); + } + } + + virtual void MeooeDag(const FermionField &in, FermionField &out) { + if (in.Checkerboard() == Odd) { + this->DhopEO(in, out, DaggerYes); + } else { + this->DhopOE(in, out, DaggerYes); + } + } + + // allow override for twisted mass and clover + virtual void Mooee(const FermionField &in, FermionField &out) { + out.Checkerboard() = in.Checkerboard(); + //axpibg5x(out,in,a,b); // out = a*in + b*i*G5*in + for (int s=0;s<(int)this->mass.size();s++) { + ComplexD a = 4.0+this->mass[s]; + ComplexD b(0.0,this->mu[s]); + axpbg5y_ssp(out,a,in,b,in,s,s); + } + } + + virtual void MooeeDag(const FermionField &in, FermionField &out) { + out.Checkerboard() = in.Checkerboard(); + for (int s=0;s<(int)this->mass.size();s++) { + ComplexD a = 4.0+this->mass[s]; + ComplexD b(0.0,-this->mu[s]); + axpbg5y_ssp(out,a,in,b,in,s,s); + } + } + virtual void MooeeInv(const FermionField &in, FermionField &out) { + for (int s=0;s<(int)this->mass.size();s++) { + RealD m = this->mass[s]; + RealD tm = this->mu[s]; + RealD mtil = 4.0+this->mass[s]; + RealD sq = mtil*mtil+tm*tm; + ComplexD a = mtil/sq; + ComplexD b(0.0, -tm /sq); + axpbg5y_ssp(out,a,in,b,in,s,s); + } + } + virtual void MooeeInvDag(const FermionField &in, FermionField &out) { + for (int s=0;s<(int)this->mass.size();s++) { + RealD m = this->mass[s]; + RealD tm = this->mu[s]; + RealD mtil = 4.0+this->mass[s]; + RealD sq = mtil*mtil+tm*tm; + ComplexD a = mtil/sq; + ComplexD b(0.0,tm /sq); + axpbg5y_ssp(out,a,in,b,in,s,s); + } + } + + virtual RealD M(const FermionField &in, FermionField &out) { + out.Checkerboard() = in.Checkerboard(); + this->Dhop(in, out, DaggerNo); + FermionField tmp(out.Grid()); + for (int s=0;s<(int)this->mass.size();s++) { + ComplexD a = 4.0+this->mass[s]; + ComplexD b(0.0,this->mu[s]); + axpbg5y_ssp(tmp,a,in,b,in,s,s); + } + return axpy_norm(out, 1.0, tmp, out); + } + + // needed for fast PV + void update(const std::vector& _mass, const std::vector& _mu) { + assert(_mass.size() == _mu.size()); + assert(_mass.size() == this->FermionGrid()->_fdimensions[0]); + this->mass = _mass; + this->mu = _mu; + } + + private: + std::vector mu; + std::vector mass; + +}; + +typedef WilsonTMFermion5D WilsonTMFermion5DF; +typedef WilsonTMFermion5D WilsonTMFermion5DD; -}} +NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/ZMobiusFermion.h b/Grid/qcd/action/fermion/ZMobiusFermion.h index 32ff7670..fc8a7439 100644 --- a/Grid/qcd/action/fermion/ZMobiusFermion.h +++ b/Grid/qcd/action/fermion/ZMobiusFermion.h @@ -1,4 +1,4 @@ - /************************************************************************************* +/************************************************************************************* Grid physics library, www.github.com/paboyle/Grid @@ -24,56 +24,50 @@ Author: Peter Boyle 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ -#ifndef GRID_QCD_ZMOBIUS_FERMION_H -#define GRID_QCD_ZMOBIUS_FERMION_H +*************************************************************************************/ +/* END LEGAL */ +#pragma once #include -namespace Grid { +NAMESPACE_BEGIN(Grid); - namespace QCD { +template +class ZMobiusFermion : public CayleyFermion5D +{ +public: + INHERIT_IMPL_TYPES(Impl); +public: - template - class ZMobiusFermion : public CayleyFermion5D - { - public: - INHERIT_IMPL_TYPES(Impl); - public: - - virtual void Instantiatable(void) {}; - // Constructors - ZMobiusFermion(GaugeField &_Umu, - GridCartesian &FiveDimGrid, - GridRedBlackCartesian &FiveDimRedBlackGrid, - GridCartesian &FourDimGrid, - GridRedBlackCartesian &FourDimRedBlackGrid, - RealD _mass,RealD _M5, - std::vector &gamma, RealD b,RealD c,const ImplParams &p= ImplParams()) : + virtual void Instantiatable(void) {}; + // Constructors + ZMobiusFermion(GaugeField &_Umu, + GridCartesian &FiveDimGrid, + GridRedBlackCartesian &FiveDimRedBlackGrid, + GridCartesian &FourDimGrid, + GridRedBlackCartesian &FourDimRedBlackGrid, + RealD _mass,RealD _M5, + std::vector &gamma, RealD b,RealD c,const ImplParams &p= ImplParams()) : - CayleyFermion5D(_Umu, - FiveDimGrid, - FiveDimRedBlackGrid, - FourDimGrid, - FourDimRedBlackGrid,_mass,_M5,p) + CayleyFermion5D(_Umu, + FiveDimGrid, + FiveDimRedBlackGrid, + FourDimGrid, + FourDimRedBlackGrid,_mass,_M5,p) - { - RealD eps = 1.0; - - std::cout< zgamma(this->Ls); - for(int s=0;sLs;s++){ - zgamma[s] = gamma[s]; - } - - // Call base setter - this->SetCoefficientsInternal(1.0,zgamma,b,c); - } - - }; + { + // RealD eps = 1.0; + std::cout< zgamma(this->Ls); + for(int s=0;sLs;s++){ + zgamma[s] = gamma[s]; + } + // Call base setter + this->SetCoefficientsInternal(1.0,zgamma,b,c); } -} -#endif +}; + +NAMESPACE_END(Grid); + diff --git a/Grid/qcd/action/fermion/CayleyFermion5Ddense.cc b/Grid/qcd/action/fermion/deprecated/CayleyFermion5Ddense.h similarity index 91% rename from Grid/qcd/action/fermion/CayleyFermion5Ddense.cc rename to Grid/qcd/action/fermion/deprecated/CayleyFermion5Ddense.h index 4014675a..f1acb50c 100644 --- a/Grid/qcd/action/fermion/CayleyFermion5Ddense.cc +++ b/Grid/qcd/action/fermion/deprecated/CayleyFermion5Ddense.h @@ -1,4 +1,4 @@ - /************************************************************************************* +/************************************************************************************* Grid physics library, www.github.com/paboyle/Grid @@ -26,19 +26,19 @@ Author: paboyle 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ +*************************************************************************************/ +/* END LEGAL */ #include #include #include -namespace Grid { -namespace QCD { - /* - * Dense matrix versions of routines - */ +NAMESPACE_BEGIN(Grid); + +/* + * Dense matrix versions of routines + */ template void CayleyFermion5D::MooeeInvDag (const FermionField &psi, FermionField &chi) { @@ -54,10 +54,10 @@ template void CayleyFermion5D::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv) { int Ls=this->Ls; - int LLs = psi._grid->_rdimensions[0]; - int vol = psi._grid->oSites()/LLs; + int LLs = psi.Grid()->_rdimensions[0]; + int vol = psi.Grid()->oSites()/LLs; - chi.checkerboard=psi.checkerboard; + chi.Checkerboard()=psi.Checkerboard(); assert(Ls==LLs); @@ -96,15 +96,14 @@ void CayleyFermion5D::MooeeInternal(const FermionField &psi, FermionField } // For the non-vectorised s-direction this is simple - - for(auto site=0;site::MooeeInternal(const FermionField &psi, FermionField } chi[s1+Ls*site] = SiteChi*0.5; } - } + }); } #ifdef CAYLEY_DPERP_DENSE @@ -153,4 +152,4 @@ template void CayleyFermion5D::MooeeInternal(const FermionField & template void CayleyFermion5D::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv); #endif -}} +NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/CayleyFermion5Dssp.cc b/Grid/qcd/action/fermion/deprecated/CayleyFermion5Dssp.h similarity index 79% rename from Grid/qcd/action/fermion/CayleyFermion5Dssp.cc rename to Grid/qcd/action/fermion/deprecated/CayleyFermion5Dssp.h index cb9b2957..650c391c 100644 --- a/Grid/qcd/action/fermion/CayleyFermion5Dssp.cc +++ b/Grid/qcd/action/fermion/deprecated/CayleyFermion5Dssp.h @@ -1,4 +1,4 @@ - /************************************************************************************* +/************************************************************************************* Grid physics library, www.github.com/paboyle/Grid @@ -26,26 +26,24 @@ Author: paboyle 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ +*************************************************************************************/ +/* END LEGAL */ #include #include -namespace Grid { -namespace QCD { +NAMESPACE_BEGIN(Grid); - // FIXME -- make a version of these routines with site loop outermost for cache reuse. - // Pminus fowards - // Pplus backwards +// Pminus fowards +// Pplus backwards template void CayleyFermion5D::M5D(const FermionField &psi, const FermionField &phi, FermionField &chi, - std::vector &lower, - std::vector &diag, - std::vector &upper) + Vector &lower, + Vector &diag, + Vector &upper) { Coeff_t one(1.0); int Ls=this->Ls; @@ -66,9 +64,9 @@ template void CayleyFermion5D::M5Ddag(const FermionField &psi, const FermionField &phi, FermionField &chi, - std::vector &lower, - std::vector &diag, - std::vector &upper) + Vector &lower, + Vector &diag, + Vector &upper) { Coeff_t one(1.0); int Ls=this->Ls; @@ -91,7 +89,7 @@ void CayleyFermion5D::MooeeInv (const FermionField &psi, FermionField & { Coeff_t one(1.0); Coeff_t czero(0.0); - chi.checkerboard=psi.checkerboard; + chi.Checkerboard()=psi.Checkerboard(); int Ls=this->Ls; // Apply (L^{\prime})^{-1} axpby_ssp (chi,one,psi, czero,psi,0,0); // chi[0]=psi[0] @@ -120,7 +118,7 @@ void CayleyFermion5D::MooeeInvDag (const FermionField &psi, FermionField & { Coeff_t one(1.0); Coeff_t czero(0.0); - chi.checkerboard=psi.checkerboard; + chi.Checkerboard()=psi.Checkerboard(); int Ls=this->Ls; // Apply (U^{\prime})^{-dagger} axpby_ssp (chi,one,psi, czero,psi,0,0); // chi[0]=psi[0] @@ -145,20 +143,19 @@ void CayleyFermion5D::MooeeInvDag (const FermionField &psi, FermionField & #ifdef CAYLEY_DPERP_LINALG - INSTANTIATE_DPERP(WilsonImplF); - INSTANTIATE_DPERP(WilsonImplD); - INSTANTIATE_DPERP(GparityWilsonImplF); - INSTANTIATE_DPERP(GparityWilsonImplD); - INSTANTIATE_DPERP(ZWilsonImplF); - INSTANTIATE_DPERP(ZWilsonImplD); +INSTANTIATE_DPERP(WilsonImplF); +INSTANTIATE_DPERP(WilsonImplD); +INSTANTIATE_DPERP(GparityWilsonImplF); +INSTANTIATE_DPERP(GparityWilsonImplD); +INSTANTIATE_DPERP(ZWilsonImplF); +INSTANTIATE_DPERP(ZWilsonImplD); - INSTANTIATE_DPERP(WilsonImplFH); - INSTANTIATE_DPERP(WilsonImplDF); - INSTANTIATE_DPERP(GparityWilsonImplFH); - INSTANTIATE_DPERP(GparityWilsonImplDF); - INSTANTIATE_DPERP(ZWilsonImplFH); - INSTANTIATE_DPERP(ZWilsonImplDF); +INSTANTIATE_DPERP(WilsonImplFH); +INSTANTIATE_DPERP(WilsonImplDF); +INSTANTIATE_DPERP(GparityWilsonImplFH); +INSTANTIATE_DPERP(GparityWilsonImplDF); +INSTANTIATE_DPERP(ZWilsonImplFH); +INSTANTIATE_DPERP(ZWilsonImplDF); #endif -} -} +NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/deprecated/DomainWallEOFAFermiondense.h b/Grid/qcd/action/fermion/deprecated/DomainWallEOFAFermiondense.h new file mode 100644 index 00000000..3ec4fbac --- /dev/null +++ b/Grid/qcd/action/fermion/deprecated/DomainWallEOFAFermiondense.h @@ -0,0 +1,158 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/DomainWallEOFAFermiondense.cc + +Copyright (C) 2017 + +Author: Peter Boyle +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle +Author: David Murphy + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ + /* END LEGAL */ + +#include +#include +#include + +NAMESPACE_BEGIN(Grid); + +/* + * Dense matrix versions of routines + */ +template +void DomainWallEOFAFermion::MooeeInvDag(const FermionField& psi, FermionField& chi) +{ + this->MooeeInternal(psi, chi, DaggerYes, InverseYes); +} + +template +void DomainWallEOFAFermion::MooeeInv(const FermionField& psi, FermionField& chi) +{ + this->MooeeInternal(psi, chi, DaggerNo, InverseYes); +} + +template +void DomainWallEOFAFermion::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv) +{ + int Ls = this->Ls; + int LLs = psi.Grid()->_rdimensions[0]; + int vol = psi.Grid()->oSites()/LLs; + + chi.Checkerboard() = psi.Checkerboard(); + + assert(Ls==LLs); + + Eigen::MatrixXd Pplus = Eigen::MatrixXd::Zero(Ls,Ls); + Eigen::MatrixXd Pminus = Eigen::MatrixXd::Zero(Ls,Ls); + + for(int s=0;sbee[s]; + Pminus(s,s) = this->bee[s]; + } + + for(int s=0; scee[s]; + } + + for(int s=0; scee[s+1]; + } + + Pplus (0,Ls-1) = this->dp; + Pminus(Ls-1,0) = this->dm; + + Eigen::MatrixXd PplusMat ; + Eigen::MatrixXd PminusMat; + + if(inv) { + PplusMat = Pplus.inverse(); + PminusMat = Pminus.inverse(); + } else { + PplusMat = Pplus; + PminusMat = Pminus; + } + + if(dag){ + PplusMat.adjointInPlace(); + PminusMat.adjointInPlace(); + } + + // For the non-vectorised s-direction this is simple + + for(auto site=0; site::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv); +template void DomainWallEOFAFermion::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv); +template void DomainWallEOFAFermion::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv); +template void DomainWallEOFAFermion::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv); +template void DomainWallEOFAFermion::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv); +template void DomainWallEOFAFermion::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv); + +INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplFH); +INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplDF); +INSTANTIATE_DPERP_DWF_EOFA(WilsonImplFH); +INSTANTIATE_DPERP_DWF_EOFA(WilsonImplDF); +INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplFH); +INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplDF); + +template void DomainWallEOFAFermion::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv); +template void DomainWallEOFAFermion::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv); +template void DomainWallEOFAFermion::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv); +template void DomainWallEOFAFermion::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv); +template void DomainWallEOFAFermion::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv); +template void DomainWallEOFAFermion::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv); + +#endif + +NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/deprecated/DomainWallEOFAFermionssp.h b/Grid/qcd/action/fermion/deprecated/DomainWallEOFAFermionssp.h new file mode 100644 index 00000000..c9e638e5 --- /dev/null +++ b/Grid/qcd/action/fermion/deprecated/DomainWallEOFAFermionssp.h @@ -0,0 +1,167 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/DomainWallEOFAFermionssp.cc + +Copyright (C) 2017 + +Author: Peter Boyle +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle +Author: David Murphy + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ + /* END LEGAL */ + +#include +#include + +NAMESPACE_BEGIN(Grid); + +// FIXME -- make a version of these routines with site loop outermost for cache reuse. +// Pminus fowards +// Pplus backwards +template +void DomainWallEOFAFermion::M5D(const FermionField& psi, const FermionField& phi, + FermionField& chi, Vector& lower, Vector& diag, Vector& upper) +{ + Coeff_t one(1.0); + int Ls = this->Ls; + for(int s=0; s +void DomainWallEOFAFermion::M5Ddag(const FermionField& psi, const FermionField& phi, + FermionField& chi, Vector& lower, Vector& diag, Vector& upper) +{ + Coeff_t one(1.0); + int Ls = this->Ls; + for(int s=0; s +void DomainWallEOFAFermion::MooeeInv(const FermionField& psi, FermionField& chi) +{ + Coeff_t one(1.0); + Coeff_t czero(0.0); + chi.Checkerboard() = psi.Checkerboard(); + int Ls = this->Ls; + + FermionField tmp(psi.Grid()); + + // Apply (L^{\prime})^{-1} + axpby_ssp(chi, one, psi, czero, psi, 0, 0); // chi[0]=psi[0] + for(int s=1; slee[s-1], chi, s, s-1);// recursion Psi[s] -lee P_+ chi[s-1] + } + + // L_m^{-1} + for(int s=0; sleem[s], chi, Ls-1, s); + } + + // U_m^{-1} D^{-1} + for(int s=0; sdee[s], chi, -this->ueem[s]/this->dee[Ls], chi, s, Ls-1); + } + axpby_ssp_pminus(tmp, czero, chi, one/this->dee[Ls-1], chi, Ls-1, Ls-1); + axpby_ssp_pplus(chi, one, tmp, one/this->dee[Ls], chi, Ls-1, Ls-1); + + // Apply U^{-1} + for(int s=Ls-2; s>=0; s--){ + axpby_ssp_pminus(chi, one, chi, -this->uee[s], chi, s, s+1); // chi[Ls] + } +} + +template +void DomainWallEOFAFermion::MooeeInvDag(const FermionField& psi, FermionField& chi) +{ + Coeff_t one(1.0); + Coeff_t czero(0.0); + chi.Checkerboard() = psi.Checkerboard(); + int Ls = this->Ls; + + FermionField tmp(psi.Grid()); + + // Apply (U^{\prime})^{-dagger} + axpby_ssp(chi, one, psi, czero, psi, 0, 0); // chi[0]=psi[0] + for(int s=1; suee[s-1]), chi, s, s-1); + } + + // U_m^{-\dagger} + for(int s=0; sueem[s]), chi, Ls-1, s); + } + + // L_m^{-\dagger} D^{-dagger} + for(int s=0; sdee[s]), chi, -conjugate(this->leem[s]/this->dee[Ls-1]), chi, s, Ls-1); + } + axpby_ssp_pminus(tmp, czero, chi, one/conjugate(this->dee[Ls-1]), chi, Ls-1, Ls-1); + axpby_ssp_pplus(chi, one, tmp, one/conjugate(this->dee[Ls]), chi, Ls-1, Ls-1); + + // Apply L^{-dagger} + for(int s=Ls-2; s>=0; s--){ + axpby_ssp_pplus(chi, one, chi, -conjugate(this->lee[s]), chi, s, s+1); // chi[Ls] + } +} + +#ifdef DOMAIN_WALL_EOFA_DPERP_LINALG + +INSTANTIATE_DPERP_DWF_EOFA(WilsonImplF); +INSTANTIATE_DPERP_DWF_EOFA(WilsonImplD); +INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplF); +INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplD); +INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplF); +INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplD); + +INSTANTIATE_DPERP_DWF_EOFA(WilsonImplFH); +INSTANTIATE_DPERP_DWF_EOFA(WilsonImplDF); +INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplFH); +INSTANTIATE_DPERP_DWF_EOFA(GparityWilsonImplDF); +INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplFH); +INSTANTIATE_DPERP_DWF_EOFA(ZWilsonImplDF); + +#endif + +NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/deprecated/MobiusEOFAFermiondense.h b/Grid/qcd/action/fermion/deprecated/MobiusEOFAFermiondense.h new file mode 100644 index 00000000..8091f344 --- /dev/null +++ b/Grid/qcd/action/fermion/deprecated/MobiusEOFAFermiondense.h @@ -0,0 +1,183 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/MobiusEOFAFermiondense.cc + +Copyright (C) 2017 + +Author: Peter Boyle +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle +Author: David Murphy + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ + /* END LEGAL */ + +#include +#include +#include + +NAMESPACE_BEGIN(Grid); + +/* + * Dense matrix versions of routines + */ +template +void MobiusEOFAFermion::MooeeInv(const FermionField& psi, FermionField& chi) +{ + this->MooeeInternal(psi, chi, DaggerNo, InverseYes); +} + +template +void MobiusEOFAFermion::MooeeInv_shift(const FermionField& psi, FermionField& chi) +{ + this->MooeeInternal(psi, chi, DaggerNo, InverseYes); +} + +template +void MobiusEOFAFermion::MooeeInvDag(const FermionField& psi, FermionField& chi) +{ + this->MooeeInternal(psi, chi, DaggerYes, InverseYes); +} + +template +void MobiusEOFAFermion::MooeeInvDag_shift(const FermionField& psi, FermionField& chi) +{ + this->MooeeInternal(psi, chi, DaggerYes, InverseYes); +} + +template +void MobiusEOFAFermion::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv) +{ + int Ls = this->Ls; + int LLs = psi.Grid()->_rdimensions[0]; + int vol = psi.Grid()->oSites()/LLs; + + int pm = this->pm; + RealD shift = this->shift; + RealD alpha = this->alpha; + RealD k = this->k; + RealD mq1 = this->mq1; + + chi.Checkerboard() = psi.Checkerboard(); + + assert(Ls==LLs); + + Eigen::MatrixXd Pplus = Eigen::MatrixXd::Zero(Ls,Ls); + Eigen::MatrixXd Pminus = Eigen::MatrixXd::Zero(Ls,Ls); + + for(int s=0;sbee[s]; + Pminus(s,s) = this->bee[s]; + } + + for(int s=0; scee[s]; + } + + for(int s=0; scee[s+1]; + } + Pplus (0,Ls-1) = mq1*this->cee[0]; + Pminus(Ls-1,0) = mq1*this->cee[Ls-1]; + + if(shift != 0.0){ + Coeff_t N = 2.0 * ( std::pow(alpha+1.0,Ls) + mq1*std::pow(alpha-1.0,Ls) ); + for(int s=0; s::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv); +template void MobiusEOFAFermion::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv); +template void MobiusEOFAFermion::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv); +template void MobiusEOFAFermion::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv); +template void MobiusEOFAFermion::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv); +template void MobiusEOFAFermion::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv); + +INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplFH); +INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplDF); +INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplFH); +INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplDF); +INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplFH); +INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplDF); + +template void MobiusEOFAFermion::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv); +template void MobiusEOFAFermion::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv); +template void MobiusEOFAFermion::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv); +template void MobiusEOFAFermion::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv); +template void MobiusEOFAFermion::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv); +template void MobiusEOFAFermion::MooeeInternal(const FermionField& psi, FermionField& chi, int dag, int inv); + +#endif + +NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/deprecated/MobiusEOFAFermionssp.h b/Grid/qcd/action/fermion/deprecated/MobiusEOFAFermionssp.h new file mode 100644 index 00000000..254cdb54 --- /dev/null +++ b/Grid/qcd/action/fermion/deprecated/MobiusEOFAFermionssp.h @@ -0,0 +1,289 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/MobiusEOFAFermionssp.cc + +Copyright (C) 2017 + +Author: Peter Boyle +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle +Author: David Murphy + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ + /* END LEGAL */ + +#include +#include + +NAMESPACE_BEGIN(Grid); + +// FIXME -- make a version of these routines with site loop outermost for cache reuse. +// Pminus fowards +// Pplus backwards +template +void MobiusEOFAFermion::M5D(const FermionField& psi, const FermionField& phi, + FermionField& chi, Vector& lower, Vector& diag, Vector& upper) +{ + Coeff_t one(1.0); + int Ls = this->Ls; + for(int s=0; s +void MobiusEOFAFermion::M5D_shift(const FermionField& psi, const FermionField& phi, + FermionField& chi, Vector& lower, Vector& diag, Vector& upper, + Vector& shift_coeffs) +{ + Coeff_t one(1.0); + int Ls = this->Ls; + for(int s=0; spm == 1){ axpby_ssp_pplus(chi, one, chi, shift_coeffs[s], psi, s, Ls-1); } + else{ axpby_ssp_pminus(chi, one, chi, shift_coeffs[s], psi, s, 0); } + } +} + +template +void MobiusEOFAFermion::M5Ddag(const FermionField& psi, const FermionField& phi, + FermionField& chi, Vector& lower, Vector& diag, Vector& upper) +{ + Coeff_t one(1.0); + int Ls = this->Ls; + for(int s=0; s +void MobiusEOFAFermion::M5Ddag_shift(const FermionField& psi, const FermionField& phi, + FermionField& chi, Vector& lower, Vector& diag, Vector& upper, + Vector& shift_coeffs) +{ + Coeff_t one(1.0); + int Ls = this->Ls; + for(int s=0; spm == 1){ axpby_ssp_pplus(chi, one, chi, shift_coeffs[s], psi, Ls-1, s); } + else{ axpby_ssp_pminus(chi, one, chi, shift_coeffs[s], psi, 0, s); } + } +} + +template +void MobiusEOFAFermion::MooeeInv(const FermionField& psi, FermionField& chi) +{ + if(this->shift != 0.0){ MooeeInv_shift(psi,chi); return; } + + Coeff_t one(1.0); + Coeff_t czero(0.0); + chi.Checkerboard() = psi.Checkerboard(); + int Ls = this->Ls; + + // Apply (L^{\prime})^{-1} + axpby_ssp(chi, one, psi, czero, psi, 0, 0); // chi[0]=psi[0] + for(int s=1; slee[s-1], chi, s, s-1);// recursion Psi[s] -lee P_+ chi[s-1] + } + + // L_m^{-1} + for(int s=0; sleem[s], chi, Ls-1, s); + } + + // U_m^{-1} D^{-1} + for(int s=0; sdee[s], chi, -this->ueem[s]/this->dee[Ls-1], chi, s, Ls-1); + } + axpby_ssp(chi, one/this->dee[Ls-1], chi, czero, chi, Ls-1, Ls-1); + + // Apply U^{-1} + for(int s=Ls-2; s>=0; s--){ + axpby_ssp_pminus(chi, one, chi, -this->uee[s], chi, s, s+1); // chi[Ls] + } +} + +template +void MobiusEOFAFermion::MooeeInv_shift(const FermionField& psi, FermionField& chi) +{ + Coeff_t one(1.0); + Coeff_t czero(0.0); + chi.Checkerboard() = psi.Checkerboard(); + int Ls = this->Ls; + + FermionField tmp(psi.Grid()); + + // Apply (L^{\prime})^{-1} + axpby_ssp(chi, one, psi, czero, psi, 0, 0); // chi[0]=psi[0] + axpby_ssp(tmp, czero, tmp, this->MooeeInv_shift_lc[0], psi, 0, 0); + for(int s=1; slee[s-1], chi, s, s-1);// recursion Psi[s] -lee P_+ chi[s-1] + axpby_ssp(tmp, one, tmp, this->MooeeInv_shift_lc[s], psi, 0, s); + } + + // L_m^{-1} + for(int s=0; sleem[s], chi, Ls-1, s); + } + + // U_m^{-1} D^{-1} + for(int s=0; sdee[s], chi, -this->ueem[s]/this->dee[Ls-1], chi, s, Ls-1); + } + axpby_ssp(chi, one/this->dee[Ls-1], chi, czero, chi, Ls-1, Ls-1); + + // Apply U^{-1} and add shift term + if(this->pm == 1){ axpby_ssp_pplus(chi, one, chi, this->MooeeInv_shift_norm[Ls-1], tmp, Ls-1, 0); } + else{ axpby_ssp_pminus(chi, one, chi, this->MooeeInv_shift_norm[Ls-1], tmp, Ls-1, 0); } + for(int s=Ls-2; s>=0; s--){ + axpby_ssp_pminus(chi, one, chi, -this->uee[s], chi, s, s+1); // chi[Ls] + if(this->pm == 1){ axpby_ssp_pplus(chi, one, chi, this->MooeeInv_shift_norm[s], tmp, s, 0); } + else{ axpby_ssp_pminus(chi, one, chi, this->MooeeInv_shift_norm[s], tmp, s, 0); } + } +} + +template +void MobiusEOFAFermion::MooeeInvDag(const FermionField& psi, FermionField& chi) +{ + if(this->shift != 0.0){ MooeeInvDag_shift(psi,chi); return; } + + Coeff_t one(1.0); + Coeff_t czero(0.0); + chi.Checkerboard() = psi.Checkerboard(); + int Ls = this->Ls; + + // Apply (U^{\prime})^{-dagger} + axpby_ssp(chi, one, psi, czero, psi, 0, 0); // chi[0]=psi[0] + for(int s=1; suee[s-1]), chi, s, s-1); + } + + // U_m^{-\dagger} + for(int s=0; sueem[s]), chi, Ls-1, s); + } + + // L_m^{-\dagger} D^{-dagger} + for(int s=0; sdee[s]), chi, -conjugate(this->leem[s]/this->dee[Ls-1]), chi, s, Ls-1); + } + axpby_ssp(chi, one/conjugate(this->dee[Ls-1]), chi, czero, chi, Ls-1, Ls-1); + + // Apply L^{-dagger} + for(int s=Ls-2; s>=0; s--){ + axpby_ssp_pplus(chi, one, chi, -conjugate(this->lee[s]), chi, s, s+1); // chi[Ls] + } +} + +template +void MobiusEOFAFermion::MooeeInvDag_shift(const FermionField& psi, FermionField& chi) +{ + Coeff_t one(1.0); + Coeff_t czero(0.0); + chi.Checkerboard() = psi.Checkerboard(); + int Ls = this->Ls; + + FermionField tmp(psi.Grid()); + + // Apply (U^{\prime})^{-dagger} and accumulate (MooeeInvDag_shift_lc)_{j} \psi_{j} in tmp[0] + axpby_ssp(chi, one, psi, czero, psi, 0, 0); // chi[0]=psi[0] + axpby_ssp(tmp, czero, tmp, this->MooeeInvDag_shift_lc[0], psi, 0, 0); + for(int s=1; suee[s-1]), chi, s, s-1); + axpby_ssp(tmp, one, tmp, this->MooeeInvDag_shift_lc[s], psi, 0, s); + } + + // U_m^{-\dagger} + for(int s=0; sueem[s]), chi, Ls-1, s); + } + + // L_m^{-\dagger} D^{-dagger} + for(int s=0; sdee[s]), chi, -conjugate(this->leem[s]/this->dee[Ls-1]), chi, s, Ls-1); + } + axpby_ssp(chi, one/conjugate(this->dee[Ls-1]), chi, czero, chi, Ls-1, Ls-1); + + // Apply L^{-dagger} and add shift + if(this->pm == 1){ axpby_ssp_pplus(chi, one, chi, this->MooeeInvDag_shift_norm[Ls-1], tmp, Ls-1, 0); } + else{ axpby_ssp_pminus(chi, one, chi, this->MooeeInvDag_shift_norm[Ls-1], tmp, Ls-1, 0); } + for(int s=Ls-2; s>=0; s--){ + axpby_ssp_pplus(chi, one, chi, -conjugate(this->lee[s]), chi, s, s+1); // chi[Ls] + if(this->pm == 1){ axpby_ssp_pplus(chi, one, chi, this->MooeeInvDag_shift_norm[s], tmp, s, 0); } + else{ axpby_ssp_pminus(chi, one, chi, this->MooeeInvDag_shift_norm[s], tmp, s, 0); } + } +} + +#ifdef MOBIUS_EOFA_DPERP_LINALG + +INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplF); +INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplD); +INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplF); +INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplD); +INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplF); +INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplD); + +INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplFH); +INSTANTIATE_DPERP_MOBIUS_EOFA(WilsonImplDF); +INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplFH); +INSTANTIATE_DPERP_MOBIUS_EOFA(GparityWilsonImplDF); +INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplFH); +INSTANTIATE_DPERP_MOBIUS_EOFA(ZWilsonImplDF); + +#endif + +NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/g5HermitianLinop.h b/Grid/qcd/action/fermion/g5HermitianLinop.h index cca7a113..2e417ceb 100644 --- a/Grid/qcd/action/fermion/g5HermitianLinop.h +++ b/Grid/qcd/action/fermion/g5HermitianLinop.h @@ -1,4 +1,4 @@ - /************************************************************************************* +/************************************************************************************* Grid physics library, www.github.com/paboyle/Grid @@ -23,13 +23,12 @@ Author: Peter Boyle 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ +*************************************************************************************/ +/* END LEGAL */ #ifndef G5_HERMITIAN_LINOP #define G5_HERMITIAN_LINOP -namespace Grid { - namespace QCD { +NAMESPACE_BEGIN(Grid); //////////////////////////////////////////////////////////////////// // Wrap an already herm matrix @@ -46,12 +45,12 @@ public: HermOp(in,out); } void OpDiag (const Field &in, Field &out) { - Field tmp(in._grid); + Field tmp(in.Grid()); _Mat.Mdiag(in,tmp); G5R5(out,tmp); } void OpDir (const Field &in, Field &out,int dir,int disp) { - Field tmp(in._grid); + Field tmp(in.Grid()); _Mat.Mdir(in,tmp,dir,disp); G5R5(out,tmp); } @@ -68,7 +67,7 @@ public: n2=real(dot); } void HermOp(const Field &in, Field &out){ - Field tmp(in._grid); + Field tmp(in.Grid()); _Mat.M(in,tmp); G5R5(out,tmp); } @@ -80,7 +79,7 @@ class Gamma5HermitianLinearOperator : public LinearOperatorBase { Matrix &_Mat; Gamma g5; public: - Gamma5HermitianLinearOperator(Matrix &Mat): _Mat(Mat), g5(Gamma::Algebra::Gamma5) {}; + Gamma5HermitianLinearOperator(Matrix &Mat): _Mat(Mat), g5(Gamma::Algebra::Gamma5) {}; void Op (const Field &in, Field &out){ HermOp(in,out); } @@ -88,12 +87,12 @@ public: HermOp(in,out); } void OpDiag (const Field &in, Field &out) { - Field tmp(in._grid); + Field tmp(in.Grid()); _Mat.Mdiag(in,tmp); out=g5*tmp; } void OpDir (const Field &in, Field &out,int dir,int disp) { - Field tmp(in._grid); + Field tmp(in.Grid()); _Mat.Mdir(in,tmp,dir,disp); out=g5*tmp; } @@ -110,12 +109,11 @@ public: n2=real(dot); } void HermOp(const Field &in, Field &out){ - Field tmp(in._grid); + Field tmp(in.Grid()); _Mat.M(in,tmp); out=g5*tmp; } }; - -}} +NAMESPACE_END(Grid); #endif diff --git a/Grid/qcd/action/fermion/CayleyFermion5D.cc b/Grid/qcd/action/fermion/implementation/CayleyFermion5DImplementation.h similarity index 80% rename from Grid/qcd/action/fermion/CayleyFermion5D.cc rename to Grid/qcd/action/fermion/implementation/CayleyFermion5DImplementation.h index a95ea4a0..bab21aea 100644 --- a/Grid/qcd/action/fermion/CayleyFermion5D.cc +++ b/Grid/qcd/action/fermion/implementation/CayleyFermion5DImplementation.h @@ -1,4 +1,4 @@ - /************************************************************************************* +/************************************************************************************* Grid physics library, www.github.com/paboyle/Grid @@ -26,31 +26,30 @@ Author: paboyle 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ +*************************************************************************************/ +/* END LEGAL */ #include #include #include -namespace Grid { -namespace QCD { +NAMESPACE_BEGIN(Grid); - template - CayleyFermion5D::CayleyFermion5D(GaugeField &_Umu, - GridCartesian &FiveDimGrid, - GridRedBlackCartesian &FiveDimRedBlackGrid, - GridCartesian &FourDimGrid, - GridRedBlackCartesian &FourDimRedBlackGrid, - RealD _mass,RealD _M5,const ImplParams &p) : - WilsonFermion5D(_Umu, - FiveDimGrid, - FiveDimRedBlackGrid, - FourDimGrid, - FourDimRedBlackGrid,_M5,p), - mass(_mass) - { - } +template +CayleyFermion5D::CayleyFermion5D(GaugeField &_Umu, + GridCartesian &FiveDimGrid, + GridRedBlackCartesian &FiveDimRedBlackGrid, + GridCartesian &FourDimGrid, + GridRedBlackCartesian &FourDimRedBlackGrid, + RealD _mass,RealD _M5,const ImplParams &p) : + WilsonFermion5D(_Umu, + FiveDimGrid, + FiveDimRedBlackGrid, + FourDimGrid, + FourDimRedBlackGrid,_M5,p), + mass(_mass) +{ +} /////////////////////////////////////////////////////////////// // Physical surface field utilities @@ -61,8 +60,8 @@ void CayleyFermion5D::ExportPhysicalFermionSolution(const FermionField &so int Ls = this->Ls; FermionField tmp(this->FermionGrid()); tmp = solution5d; - conformable(solution5d._grid,this->FermionGrid()); - conformable(exported4d._grid,this->GaugeGrid()); + conformable(solution5d.Grid(),this->FermionGrid()); + conformable(exported4d.Grid(),this->GaugeGrid()); axpby_ssp_pminus(tmp, 0., solution5d, 1., solution5d, 0, 0); axpby_ssp_pplus (tmp, 1., tmp , 1., solution5d, 0, Ls-1); ExtractSlice(exported4d, tmp, 0, 0); @@ -71,7 +70,7 @@ template void CayleyFermion5D::P(const FermionField &psi, FermionField &chi) { int Ls= this->Ls; - chi=zero; + chi=Zero(); for(int s=0;s void CayleyFermion5D::Pdag(const FermionField &psi, FermionField &chi) { int Ls= this->Ls; - chi=zero; + chi=Zero(); for(int s=0;s::ExportPhysicalFermionSource(const FermionField &solu int Ls = this->Ls; FermionField tmp(this->FermionGrid()); tmp = solution5d; - conformable(solution5d._grid,this->FermionGrid()); - conformable(exported4d._grid,this->GaugeGrid()); + conformable(solution5d.Grid(),this->FermionGrid()); + conformable(exported4d.Grid(),this->GaugeGrid()); axpby_ssp_pplus (tmp, 0., solution5d, 1., solution5d, 0, 0); axpby_ssp_pminus(tmp, 1., tmp , 1., solution5d, 0, Ls-1); ExtractSlice(exported4d, tmp, 0, 0); @@ -104,9 +103,9 @@ void CayleyFermion5D::ImportUnphysicalFermion(const FermionField &input4d, { int Ls = this->Ls; FermionField tmp(this->FermionGrid()); - conformable(imported5d._grid,this->FermionGrid()); - conformable(input4d._grid ,this->GaugeGrid()); - tmp = zero; + conformable(imported5d.Grid(),this->FermionGrid()); + conformable(input4d.Grid() ,this->GaugeGrid()); + tmp = Zero(); InsertSlice(input4d, tmp, 0 , 0); InsertSlice(input4d, tmp, Ls-1, 0); axpby_ssp_pplus (tmp, 0., tmp, 1., tmp, 0, 0); @@ -119,9 +118,9 @@ void CayleyFermion5D::ImportPhysicalFermionSource(const FermionField &inpu { int Ls = this->Ls; FermionField tmp(this->FermionGrid()); - conformable(imported5d._grid,this->FermionGrid()); - conformable(input4d._grid ,this->GaugeGrid()); - tmp = zero; + conformable(imported5d.Grid(),this->FermionGrid()); + conformable(input4d.Grid() ,this->GaugeGrid()); + tmp = Zero(); InsertSlice(input4d, tmp, 0 , 0); InsertSlice(input4d, tmp, Ls-1, 0); axpby_ssp_pplus (tmp, 0., tmp, 1., tmp, 0, 0); @@ -156,7 +155,7 @@ void CayleyFermion5D::DminusDag(const FermionField &psi, FermionField &chi template void CayleyFermion5D::CayleyReport(void) { this->Report(); - std::vector latt = GridDefaultLatt(); + Coordinate latt = GridDefaultLatt(); RealD volume = this->Ls; for(int mu=0;mu_FourDimGrid->_Nprocessors; if ( M5Dcalls > 0 ) { @@ -164,10 +163,16 @@ template void CayleyFermion5D::CayleyReport(void) std::cout << GridLogMessage << "CayleyFermion5D Number of M5D Calls : " << M5Dcalls << std::endl; std::cout << GridLogMessage << "CayleyFermion5D ComputeTime/Calls : " << M5Dtime / M5Dcalls << " us" << std::endl; - // Flops = 6.0*(Nc*Ns) *Ls*vol - RealD mflops = 6.0*12*volume*M5Dcalls/M5Dtime/2; // 2 for red black counting + // Flops = 10.0*(Nc*Ns) *Ls*vol + RealD mflops = 10.0*(Nc*Ns)*volume*M5Dcalls/M5Dtime/2; // 2 for red black counting std::cout << GridLogMessage << "Average mflops/s per call : " << mflops << std::endl; std::cout << GridLogMessage << "Average mflops/s per call per rank : " << mflops/NP << std::endl; + + // Bytes = sizeof(Real) * (Nc*Ns*Nreim) * Ls * vol * (read+write) (/2 for red black counting) + // read = 2 ( psi[ss+s+1] and psi[ss+s-1] count as 1 ) + // write = 1 + RealD Gbytes = sizeof(Real) * (Nc*Ns*2) * volume * 3 /2. * 1.e-9; + std::cout << GridLogMessage << "Average bandwidth (GB/s) : " << Gbytes/M5Dtime*M5Dcalls*1.e6 << std::endl; } if ( MooeeInvCalls > 0 ) { @@ -175,11 +180,16 @@ template void CayleyFermion5D::CayleyReport(void) std::cout << GridLogMessage << "#### MooeeInv calls report " << std::endl; std::cout << GridLogMessage << "CayleyFermion5D Number of MooeeInv Calls : " << MooeeInvCalls << std::endl; std::cout << GridLogMessage << "CayleyFermion5D ComputeTime/Calls : " << MooeeInvTime / MooeeInvCalls << " us" << std::endl; - +#ifdef GRID_NVCC + RealD mflops = ( -16.*Nc*Ns+this->Ls*(1.+18.*Nc*Ns) )*volume*MooeeInvCalls/MooeeInvTime/2; // 2 for red black counting + std::cout << GridLogMessage << "Average mflops/s per call : " << mflops << std::endl; + std::cout << GridLogMessage << "Average mflops/s per call per rank : " << mflops/NP << std::endl; +#else // Flops = MADD * Ls *Ls *4dvol * spin/colour/complex RealD mflops = 2.0*24*this->Ls*volume*MooeeInvCalls/MooeeInvTime/2; // 2 for red black counting std::cout << GridLogMessage << "Average mflops/s per call : " << mflops << std::endl; std::cout << GridLogMessage << "Average mflops/s per call per rank : " << mflops/NP << std::endl; +#endif } } @@ -198,18 +208,18 @@ template void CayleyFermion5D::M5D (const FermionField &psi, FermionField &chi) { int Ls=this->Ls; - std::vector diag (Ls,1.0); - std::vector upper(Ls,-1.0); upper[Ls-1]=mass; - std::vector lower(Ls,-1.0); lower[0] =mass; + Vector diag (Ls,1.0); + Vector upper(Ls,-1.0); upper[Ls-1]=mass; + Vector lower(Ls,-1.0); lower[0] =mass; M5D(psi,chi,chi,lower,diag,upper); } template void CayleyFermion5D::Meooe5D (const FermionField &psi, FermionField &Din) { int Ls=this->Ls; - std::vector diag = bs; - std::vector upper= cs; - std::vector lower= cs; + Vector diag = bs; + Vector upper= cs; + Vector lower= cs; upper[Ls-1]=-mass*upper[Ls-1]; lower[0] =-mass*lower[0]; M5D(psi,psi,Din,lower,diag,upper); @@ -218,9 +228,9 @@ void CayleyFermion5D::Meooe5D (const FermionField &psi, FermionField &D template void CayleyFermion5D::Meo5D (const FermionField &psi, FermionField &chi) { int Ls=this->Ls; - std::vector diag = beo; - std::vector upper(Ls); - std::vector lower(Ls); + Vector diag = beo; + Vector upper(Ls); + Vector lower(Ls); for(int i=0;i void CayleyFermion5D::Mooee (const FermionField &psi, FermionField &chi) { int Ls=this->Ls; - std::vector diag = bee; - std::vector upper(Ls); - std::vector lower(Ls); + Vector diag = bee; + Vector upper(Ls); + Vector lower(Ls); for(int i=0;i void CayleyFermion5D::MooeeDag (const FermionField &psi, FermionField &chi) { int Ls=this->Ls; - std::vector diag = bee; - std::vector upper(Ls); - std::vector lower(Ls); + Vector diag = bee; + Vector upper(Ls); + Vector lower(Ls); for (int s=0;s void CayleyFermion5D::M5Ddag (const FermionField &psi, FermionField &chi) { int Ls=this->Ls; - std::vector diag(Ls,1.0); - std::vector upper(Ls,-1.0); - std::vector lower(Ls,-1.0); + Vector diag(Ls,1.0); + Vector upper(Ls,-1.0); + Vector lower(Ls,-1.0); upper[Ls-1]=-mass*upper[Ls-1]; lower[0] =-mass*lower[0]; M5Ddag(psi,chi,chi,lower,diag,upper); @@ -290,9 +300,9 @@ template void CayleyFermion5D::MeooeDag5D (const FermionField &psi, FermionField &Din) { int Ls=this->Ls; - std::vector diag =bs; - std::vector upper=cs; - std::vector lower=cs; + Vector diag =bs; + Vector upper=cs; + Vector lower=cs; for (int s=0;s::MeooeDag5D (const FermionField &psi, FermionField template RealD CayleyFermion5D::M (const FermionField &psi, FermionField &chi) { - int Ls=this->Ls; - - FermionField Din(psi._grid); + FermionField Din(psi.Grid()); // Assemble Din Meooe5D(psi,Din); @@ -337,7 +345,7 @@ RealD CayleyFermion5D::Mdag (const FermionField &psi, FermionField &chi) //D1+ D1- P- -> D1+^dag P+ D2-^dag //D2- P+ D2+ P-D1-^dag D2+dag - FermionField Din(psi._grid); + FermionField Din(psi.Grid()); // Apply Dw this->DW(psi,Din,DaggerYes); @@ -353,11 +361,9 @@ RealD CayleyFermion5D::Mdag (const FermionField &psi, FermionField &chi) template void CayleyFermion5D::Meooe (const FermionField &psi, FermionField &chi) { - int Ls=this->Ls; - Meooe5D(psi,this->tmp()); - if ( psi.checkerboard == Odd ) { + if ( psi.Checkerboard() == Odd ) { this->DhopEO(this->tmp(),chi,DaggerNo); } else { this->DhopOE(this->tmp(),chi,DaggerNo); @@ -368,7 +374,7 @@ template void CayleyFermion5D::MeooeDag (const FermionField &psi, FermionField &chi) { // Apply 4d dslash - if ( psi.checkerboard == Odd ) { + if ( psi.Checkerboard() == Odd ) { this->DhopEO(psi,this->tmp(),DaggerYes); } else { this->DhopOE(psi,this->tmp(),DaggerYes); @@ -386,7 +392,7 @@ void CayleyFermion5D::Mdir (const FermionField &psi, FermionField &chi,in template void CayleyFermion5D::MDeriv (GaugeField &mat,const FermionField &U,const FermionField &V,int dag) { - FermionField Din(V._grid); + FermionField Din(V.Grid()); if ( dag == DaggerNo ) { // U d/du [D_w D5] V = U d/du DW D5 V @@ -401,7 +407,7 @@ void CayleyFermion5D::MDeriv (GaugeField &mat,const FermionField &U,const template void CayleyFermion5D::MoeDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag) { - FermionField Din(V._grid); + FermionField Din(V.Grid()); if ( dag == DaggerNo ) { // U d/du [D_w D5] V = U d/du DW D5 V @@ -416,7 +422,7 @@ void CayleyFermion5D::MoeDeriv(GaugeField &mat,const FermionField &U,const template void CayleyFermion5D::MeoDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag) { - FermionField Din(V._grid); + FermionField Din(V.Grid()); if ( dag == DaggerNo ) { // U d/du [D_w D5] V = U d/du DW D5 V @@ -433,7 +439,7 @@ void CayleyFermion5D::MeoDeriv(GaugeField &mat,const FermionField &U,const template void CayleyFermion5D::SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD b,RealD c) { - std::vector gamma(this->Ls); + Vector gamma(this->Ls); for(int s=0;sLs;s++) gamma[s] = zdata->gamma[s]; SetCoefficientsInternal(1.0,gamma,b,c); } @@ -441,13 +447,13 @@ void CayleyFermion5D::SetCoefficientsTanh(Approx::zolotarev_data *zdata,Re template void CayleyFermion5D::SetCoefficientsZolotarev(RealD zolo_hi,Approx::zolotarev_data *zdata,RealD b,RealD c) { - std::vector gamma(this->Ls); + Vector gamma(this->Ls); for(int s=0;sLs;s++) gamma[s] = zdata->gamma[s]; SetCoefficientsInternal(zolo_hi,gamma,b,c); } //Zolo template -void CayleyFermion5D::SetCoefficientsInternal(RealD zolo_hi,std::vector & gamma,RealD b,RealD c) +void CayleyFermion5D::SetCoefficientsInternal(RealD zolo_hi,Vector & gamma,RealD b,RealD c) { int Ls=this->Ls; @@ -568,12 +574,12 @@ void CayleyFermion5D::SetCoefficientsInternal(RealD zolo_hi,std::vectorMooeeInternalCompute(0,inv,MatpInv,MatmInv); - this->MooeeInternalCompute(1,inv,MatpInvDag,MatmInvDag); + // int inv=1; + // this->MooeeInternalCompute(0,inv,MatpInv,MatmInv); + // this->MooeeInternalCompute(1,inv,MatpInvDag,MatmInvDag); } - +#if 0 template void CayleyFermion5D::MooeeInternalCompute(int dag, int inv, Vector > & Matp, @@ -628,35 +634,32 @@ void CayleyFermion5D::MooeeInternalCompute(int dag, int inv, Matm.resize(Ls*LLs); for(int s2=0;s2::iscomplex() ) { - sp[l] = PplusMat (l*istride+s1*ostride,s2); - sm[l] = PminusMat(l*istride+s1*ostride,s2); - } else { - // if real - scalar_type tmp; - tmp = PplusMat (l*istride+s1*ostride,s2); - sp[l] = scalar_type(tmp.real(),tmp.real()); - tmp = PminusMat(l*istride+s1*ostride,s2); - sm[l] = scalar_type(tmp.real(),tmp.real()); + for(int s1=0;s1::iscomplex() ) { + sp[l] = PplusMat (l*istride+s1*ostride,s2); + sm[l] = PminusMat(l*istride+s1*ostride,s2); + } else { + // if real + scalar_type tmp; + tmp = PplusMat (l*istride+s1*ostride,s2); + sp[l] = scalar_type(tmp.real(),tmp.real()); + tmp = PminusMat(l*istride+s1*ostride,s2); + sm[l] = scalar_type(tmp.real(),tmp.real()); + } } - } - Matp[LLs*s2+s1] = Vp; - Matm[LLs*s2+s1] = Vm; - }} + Matp[LLs*s2+s1] = Vp; + Matm[LLs*s2+s1] = Vm; + }} } - - - FermOpTemplateInstantiate(CayleyFermion5D); - GparityFermOpTemplateInstantiate(CayleyFermion5D); - -}} +#endif + +NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/implementation/CayleyFermion5Dcache.h b/Grid/qcd/action/fermion/implementation/CayleyFermion5Dcache.h new file mode 100644 index 00000000..8af3e7c0 --- /dev/null +++ b/Grid/qcd/action/fermion/implementation/CayleyFermion5Dcache.h @@ -0,0 +1,235 @@ +/************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/qcd/action/fermion/CayleyFermion5D.cc + + Copyright (C) 2015 + +Author: Peter Boyle +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ + +#include +#include + + +NAMESPACE_BEGIN(Grid); + +// Pminus fowards +// Pplus backwards.. +template +void +CayleyFermion5D::M5D(const FermionField &psi_i, + const FermionField &phi_i, + FermionField &chi_i, + Vector &lower, + Vector &diag, + Vector &upper) +{ + + chi_i.Checkerboard()=psi_i.Checkerboard(); + GridBase *grid=psi_i.Grid(); + auto psi = psi_i.View(); + auto phi = phi_i.View(); + auto chi = chi_i.View(); + assert(phi.Checkerboard() == psi.Checkerboard()); + + int Ls =this->Ls; + + // 10 = 3 complex mult + 2 complex add + // Flops = 10.0*(Nc*Ns) *Ls*vol (/2 for red black counting) + M5Dcalls++; + M5Dtime-=usecond(); + + uint64_t nloop = grid->oSites()/Ls; + accelerator_for(sss,nloop,Simd::Nsimd(),{ + uint64_t ss= sss*Ls; + typedef decltype(coalescedRead(psi[0])) spinor; + spinor tmp1, tmp2; + for(int s=0;s +void +CayleyFermion5D::M5Ddag(const FermionField &psi_i, + const FermionField &phi_i, + FermionField &chi_i, + Vector &lower, + Vector &diag, + Vector &upper) +{ + chi_i.Checkerboard()=psi_i.Checkerboard(); + GridBase *grid=psi_i.Grid(); + auto psi = psi_i.View(); + auto phi = phi_i.View(); + auto chi = chi_i.View(); + assert(phi.Checkerboard() == psi.Checkerboard()); + + int Ls=this->Ls; + + // Flops = 6.0*(Nc*Ns) *Ls*vol + M5Dcalls++; + M5Dtime-=usecond(); + + uint64_t nloop = grid->oSites()/Ls; + accelerator_for(sss,nloop,Simd::Nsimd(),{ + uint64_t ss=sss*Ls; + typedef decltype(coalescedRead(psi[0])) spinor; + spinor tmp1,tmp2; + for(int s=0;s +void +CayleyFermion5D::MooeeInv (const FermionField &psi_i, FermionField &chi_i) +{ + chi_i.Checkerboard()=psi_i.Checkerboard(); + GridBase *grid=psi_i.Grid(); + + auto psi = psi_i.View(); + auto chi = chi_i.View(); + + int Ls=this->Ls; + + auto plee = & lee [0]; + auto pdee = & dee [0]; + auto puee = & uee [0]; + auto pleem = & leem[0]; + auto pueem = & ueem[0]; + + MooeeInvCalls++; + MooeeInvTime-=usecond(); + uint64_t nloop = grid->oSites()/Ls; + accelerator_for(sss,nloop,Simd::Nsimd(),{ + uint64_t ss=sss*Ls; + typedef decltype(coalescedRead(psi[0])) spinor; + spinor tmp; + + // flops = 12*2*Ls + 12*2*Ls + 3*12*Ls + 12*2*Ls = 12*Ls * (9) = 108*Ls flops + // Apply (L^{\prime})^{-1} + coalescedWrite(chi[ss],psi(ss)); // chi[0]=psi[0] + for(int s=1;s=0;s--){ + spProj5m(tmp,chi(ss+s+1)); + coalescedWrite(chi[ss+s], chi(ss+s) - puee[s]*tmp); + } + }); + + MooeeInvTime+=usecond(); + +} + +template +void +CayleyFermion5D::MooeeInvDag (const FermionField &psi_i, FermionField &chi_i) +{ + chi_i.Checkerboard()=psi_i.Checkerboard(); + GridBase *grid=psi_i.Grid(); + int Ls=this->Ls; + + auto psi = psi_i.View(); + auto chi = chi_i.View(); + + auto plee = & lee [0]; + auto pdee = & dee [0]; + auto puee = & uee [0]; + auto pleem = & leem[0]; + auto pueem = & ueem[0]; + + assert(psi.Checkerboard() == psi.Checkerboard()); + + MooeeInvCalls++; + MooeeInvTime-=usecond(); + + + uint64_t nloop = grid->oSites()/Ls; + accelerator_for(sss,nloop,Simd::Nsimd(),{ + uint64_t ss=sss*Ls; + typedef decltype(coalescedRead(psi[0])) spinor; + spinor tmp; + + // Apply (U^{\prime})^{-dagger} + coalescedWrite(chi[ss],psi(ss)); + for (int s=1;s=0;s--){ + spProj5p(tmp,chi(ss+s+1)); + coalescedWrite(chi[ss+s], chi(ss+s) - conjugate(plee[s])*tmp); + } + }); + MooeeInvTime+=usecond(); + +} + +NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/implementation/CayleyFermion5Dvec.h b/Grid/qcd/action/fermion/implementation/CayleyFermion5Dvec.h new file mode 100644 index 00000000..034ce642 --- /dev/null +++ b/Grid/qcd/action/fermion/implementation/CayleyFermion5Dvec.h @@ -0,0 +1,831 @@ +/************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/qcd/action/fermion/CayleyFermion5D.cc + + Copyright (C) 2015 + +Author: Peter Boyle +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ + + +#include +#include + +NAMESPACE_BEGIN(Grid); + +/* + * Dense matrix versions of routines + */ +template +void +CayleyFermion5D::MooeeInvDag(const FermionField &psi, FermionField &chi) +{ + EnableIf sfinae=0; + this->MooeeInternal(psi,chi,DaggerYes,InverseYes); +} + +template +void +CayleyFermion5D::MooeeInv(const FermionField &psi, FermionField &chi) +{ + EnableIf sfinae=0; + this->MooeeInternal(psi,chi,DaggerNo,InverseYes); +} +template +void +CayleyFermion5D::M5D(const FermionField &psi_i, + const FermionField &phi_i, + FermionField &chi_i, + Vector &lower, + Vector &diag, + Vector &upper) +{ + EnableIf sfinae=0; + chi_i.Checkerboard()=psi_i.Checkerboard(); + GridBase *grid=psi_i.Grid(); + auto psi = psi_i.View(); + auto phi = phi_i.View(); + auto chi = chi_i.View(); + int Ls = this->Ls; + int LLs = grid->_rdimensions[0]; + const int nsimd= Simd::Nsimd(); + + Vector > u(LLs); + Vector > l(LLs); + Vector > d(LLs); + + assert(Ls/LLs==nsimd); + assert(phi.Checkerboard() == psi.Checkerboard()); + + // just directly address via type pun + typedef typename Simd::scalar_type scalar_type; + scalar_type * u_p = (scalar_type *)&u[0]; + scalar_type * l_p = (scalar_type *)&l[0]; + scalar_type * d_p = (scalar_type *)&d[0]; + + for(int o=0;ooSites();ss+=LLs),{ // adds LLs +#if 0 + alignas(64) SiteHalfSpinor hp; + alignas(64) SiteHalfSpinor hm; + alignas(64) SiteSpinor fp; + alignas(64) SiteSpinor fm; + + for(int v=0;v=v ) rotate(hm,hm,nsimd-1); + + hp=0.5*hp; + hm=0.5*hm; + + spRecon5m(fp,hp); + spRecon5p(fm,hm); + + chi[ss+v] = d[v]*phi[ss+v]; + chi[ss+v] = chi[ss+v] +u[v]*fp; + chi[ss+v] = chi[ss+v] +l[v]*fm; + + } +#else + for(int v=0;v(hp_00.v); + hp_01.v = Optimization::Rotate::tRotate<2>(hp_01.v); + hp_02.v = Optimization::Rotate::tRotate<2>(hp_02.v); + hp_10.v = Optimization::Rotate::tRotate<2>(hp_10.v); + hp_11.v = Optimization::Rotate::tRotate<2>(hp_11.v); + hp_12.v = Optimization::Rotate::tRotate<2>(hp_12.v); + } + if ( vm>=v ) { + hm_00.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_00.v); + hm_01.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_01.v); + hm_02.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_02.v); + hm_10.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_10.v); + hm_11.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_11.v); + hm_12.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_12.v); + } + + // Can force these to real arithmetic and save 2x. + Simd p_00 = switcheroo::mult(d[v]()()(), phi[ss+v]()(0)(0)) + switcheroo::mult(l[v]()()(),hm_00); + Simd p_01 = switcheroo::mult(d[v]()()(), phi[ss+v]()(0)(1)) + switcheroo::mult(l[v]()()(),hm_01); + Simd p_02 = switcheroo::mult(d[v]()()(), phi[ss+v]()(0)(2)) + switcheroo::mult(l[v]()()(),hm_02); + Simd p_10 = switcheroo::mult(d[v]()()(), phi[ss+v]()(1)(0)) + switcheroo::mult(l[v]()()(),hm_10); + Simd p_11 = switcheroo::mult(d[v]()()(), phi[ss+v]()(1)(1)) + switcheroo::mult(l[v]()()(),hm_11); + Simd p_12 = switcheroo::mult(d[v]()()(), phi[ss+v]()(1)(2)) + switcheroo::mult(l[v]()()(),hm_12); + Simd p_20 = switcheroo::mult(d[v]()()(), phi[ss+v]()(2)(0)) + switcheroo::mult(u[v]()()(),hp_00); + Simd p_21 = switcheroo::mult(d[v]()()(), phi[ss+v]()(2)(1)) + switcheroo::mult(u[v]()()(),hp_01); + Simd p_22 = switcheroo::mult(d[v]()()(), phi[ss+v]()(2)(2)) + switcheroo::mult(u[v]()()(),hp_02); + Simd p_30 = switcheroo::mult(d[v]()()(), phi[ss+v]()(3)(0)) + switcheroo::mult(u[v]()()(),hp_10); + Simd p_31 = switcheroo::mult(d[v]()()(), phi[ss+v]()(3)(1)) + switcheroo::mult(u[v]()()(),hp_11); + Simd p_32 = switcheroo::mult(d[v]()()(), phi[ss+v]()(3)(2)) + switcheroo::mult(u[v]()()(),hp_12); + + vstream(chi[ss+v]()(0)(0),p_00); + vstream(chi[ss+v]()(0)(1),p_01); + vstream(chi[ss+v]()(0)(2),p_02); + vstream(chi[ss+v]()(1)(0),p_10); + vstream(chi[ss+v]()(1)(1),p_11); + vstream(chi[ss+v]()(1)(2),p_12); + vstream(chi[ss+v]()(2)(0),p_20); + vstream(chi[ss+v]()(2)(1),p_21); + vstream(chi[ss+v]()(2)(2),p_22); + vstream(chi[ss+v]()(3)(0),p_30); + vstream(chi[ss+v]()(3)(1),p_31); + vstream(chi[ss+v]()(3)(2),p_32); + + } +#endif + }); + M5Dtime+=usecond(); +} + +template +void +CayleyFermion5D::M5Ddag(const FermionField &psi_i, + const FermionField &phi_i, + FermionField &chi_i, + Vector &lower, + Vector &diag, + Vector &upper) +{ + EnableIf sfinae=0; + chi_i.Checkerboard()=psi_i.Checkerboard(); + GridBase *grid=psi_i.Grid(); + auto psi=psi_i.View(); + auto phi=phi_i.View(); + auto chi=chi_i.View(); + int Ls = this->Ls; + int LLs = grid->_rdimensions[0]; + int nsimd= Simd::Nsimd(); + + Vector > u(LLs); + Vector > l(LLs); + Vector > d(LLs); + + assert(Ls/LLs==nsimd); + assert(phi.Checkerboard() == psi.Checkerboard()); + + // just directly address via type pun + typedef typename Simd::scalar_type scalar_type; + scalar_type * u_p = (scalar_type *)&u[0]; + scalar_type * l_p = (scalar_type *)&l[0]; + scalar_type * d_p = (scalar_type *)&d[0]; + + for(int o=0;ooSites();ss+=LLs),{ // adds LLs +#if 0 + alignas(64) SiteHalfSpinor hp; + alignas(64) SiteHalfSpinor hm; + alignas(64) SiteSpinor fp; + alignas(64) SiteSpinor fm; + + for(int v=0;v=v ) rotate(hm,hm,nsimd-1); + + hp=hp*0.5; + hm=hm*0.5; + spRecon5p(fp,hp); + spRecon5m(fm,hm); + + chi[ss+v] = d[v]*phi[ss+v]+u[v]*fp; + chi[ss+v] = chi[ss+v] +l[v]*fm; + + } +#else + for(int v=0;v(hp_00.v); + hp_01.v = Optimization::Rotate::tRotate<2>(hp_01.v); + hp_02.v = Optimization::Rotate::tRotate<2>(hp_02.v); + hp_10.v = Optimization::Rotate::tRotate<2>(hp_10.v); + hp_11.v = Optimization::Rotate::tRotate<2>(hp_11.v); + hp_12.v = Optimization::Rotate::tRotate<2>(hp_12.v); + } + if ( vm>=v ) { + hm_00.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_00.v); + hm_01.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_01.v); + hm_02.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_02.v); + hm_10.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_10.v); + hm_11.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_11.v); + hm_12.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_12.v); + } + + Simd p_00 = switcheroo::mult(d[v]()()(), phi[ss+v]()(0)(0)) + switcheroo::mult(u[v]()()(),hp_00); + Simd p_01 = switcheroo::mult(d[v]()()(), phi[ss+v]()(0)(1)) + switcheroo::mult(u[v]()()(),hp_01); + Simd p_02 = switcheroo::mult(d[v]()()(), phi[ss+v]()(0)(2)) + switcheroo::mult(u[v]()()(),hp_02); + Simd p_10 = switcheroo::mult(d[v]()()(), phi[ss+v]()(1)(0)) + switcheroo::mult(u[v]()()(),hp_10); + Simd p_11 = switcheroo::mult(d[v]()()(), phi[ss+v]()(1)(1)) + switcheroo::mult(u[v]()()(),hp_11); + Simd p_12 = switcheroo::mult(d[v]()()(), phi[ss+v]()(1)(2)) + switcheroo::mult(u[v]()()(),hp_12); + + Simd p_20 = switcheroo::mult(d[v]()()(), phi[ss+v]()(2)(0)) + switcheroo::mult(l[v]()()(),hm_00); + Simd p_21 = switcheroo::mult(d[v]()()(), phi[ss+v]()(2)(1)) + switcheroo::mult(l[v]()()(),hm_01); + Simd p_22 = switcheroo::mult(d[v]()()(), phi[ss+v]()(2)(2)) + switcheroo::mult(l[v]()()(),hm_02); + Simd p_30 = switcheroo::mult(d[v]()()(), phi[ss+v]()(3)(0)) + switcheroo::mult(l[v]()()(),hm_10); + Simd p_31 = switcheroo::mult(d[v]()()(), phi[ss+v]()(3)(1)) + switcheroo::mult(l[v]()()(),hm_11); + Simd p_32 = switcheroo::mult(d[v]()()(), phi[ss+v]()(3)(2)) + switcheroo::mult(l[v]()()(),hm_12); + + vstream(chi[ss+v]()(0)(0),p_00); + vstream(chi[ss+v]()(0)(1),p_01); + vstream(chi[ss+v]()(0)(2),p_02); + vstream(chi[ss+v]()(1)(0),p_10); + vstream(chi[ss+v]()(1)(1),p_11); + vstream(chi[ss+v]()(1)(2),p_12); + vstream(chi[ss+v]()(2)(0),p_20); + vstream(chi[ss+v]()(2)(1),p_21); + vstream(chi[ss+v]()(2)(2),p_22); + vstream(chi[ss+v]()(3)(0),p_30); + vstream(chi[ss+v]()(3)(1),p_31); + vstream(chi[ss+v]()(3)(2),p_32); + } +#endif + }); + M5Dtime+=usecond(); +} + + +#ifdef AVX512 +#include +#include +#include +#endif + +template +void +CayleyFermion5D::MooeeInternalAsm(const FermionField &psi_i, FermionField &chi_i, + int LLs, int site, + Vector > &Matp, + Vector > &Matm) +{ + EnableIf sfinae=0; + auto psi = psi_i.View(); + auto chi = chi_i.View(); +#ifndef AVX512 + { + SiteHalfSpinor BcastP; + SiteHalfSpinor BcastM; + SiteHalfSpinor SiteChiP; + SiteHalfSpinor SiteChiM; + + // Ls*Ls * 2 * 12 * vol flops + for(int s1=0;s1); + for(int s1=0;s1 +void +CayleyFermion5D::MooeeInternalZAsm(const FermionField &psi_i, FermionField &chi_i, + int LLs, int site, Vector > &Matp, Vector > &Matm) +{ + EnableIf sfinae=0; +#ifndef AVX512 + { + auto psi = psi_i.View(); + auto chi = chi_i.View(); + + SiteHalfSpinor BcastP; + SiteHalfSpinor BcastM; + SiteHalfSpinor SiteChiP; + SiteHalfSpinor SiteChiM; + + // Ls*Ls * 2 * 12 * vol flops + for(int s1=0;s1); + for(int s1=0;s1 +void +CayleyFermion5D::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv) +{ + EnableIf sfinae=0; + chi.Checkerboard()=psi.Checkerboard(); + + int Ls=this->Ls; + int LLs = psi.Grid()->_rdimensions[0]; + int vol = psi.Grid()->oSites()/LLs; + + + Vector > Matp; + Vector > Matm; + Vector > *_Matp; + Vector > *_Matm; + + // MooeeInternalCompute(dag,inv,Matp,Matm); + if ( inv && dag ) { + _Matp = &MatpInvDag; + _Matm = &MatmInvDag; + } + if ( inv && (!dag) ) { + _Matp = &MatpInv; + _Matm = &MatmInv; + } + if ( !inv ) { + MooeeInternalCompute(dag,inv,Matp,Matm); + _Matp = &Matp; + _Matm = &Matm; + } + assert(_Matp->size()==Ls*LLs); + + MooeeInvCalls++; + MooeeInvTime-=usecond(); + + if ( switcheroo::iscomplex() ) { + thread_loop( (auto site=0;site +Author: Peter Boyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ +#include +#include + +#pragma once + +NAMESPACE_BEGIN(Grid); + +template +void ContinuedFractionFermion5D::SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD scale) +{ + SetCoefficientsZolotarev(1.0/scale,zdata); +} +template +void ContinuedFractionFermion5D::SetCoefficientsZolotarev(RealD zolo_hi,Approx::zolotarev_data *zdata) +{ + // How to check Ls matches?? + // std::cout<n << " - n"<da << " -da "<db << " -db"<dn << " -dn"<dd << " -dd"<Ls; + assert(zdata->db==Ls);// Beta has Ls coeffs + + R=(1+this->mass)/(1-this->mass); + + Beta.resize(Ls); + cc.resize(Ls); + cc_d.resize(Ls); + sqrt_cc.resize(Ls); + for(int i=0; i < Ls ; i++){ + Beta[i] = zdata -> beta[i]; + cc[i] = 1.0/Beta[i]; + cc_d[i]=std::sqrt(cc[i]); + } + + cc_d[Ls-1]=1.0; + for(int i=0; i < Ls-1 ; i++){ + sqrt_cc[i]= std::sqrt(cc[i]*cc[i+1]); + } + sqrt_cc[Ls-2]=std::sqrt(cc[Ls-2]); + + + ZoloHiInv =1.0/zolo_hi; + dw_diag = (4.0-this->M5)*ZoloHiInv; + + See.resize(Ls); + Aee.resize(Ls); + int sign=1; + for(int s=0;s +RealD ContinuedFractionFermion5D::M (const FermionField &psi, FermionField &chi) +{ + int Ls = this->Ls; + + FermionField D(psi.Grid()); + + this->DW(psi,D,DaggerNo); + + int sign=1; + for(int s=0;s +RealD ContinuedFractionFermion5D::Mdag (const FermionField &psi, FermionField &chi) +{ + // This matrix is already hermitian. (g5 Dw) = Dw dag g5 = (g5 Dw)dag + // The rest of matrix is symmetric. + // Can ignore "dag" + return M(psi,chi); +} +template +void ContinuedFractionFermion5D::Mdir (const FermionField &psi, FermionField &chi,int dir,int disp){ + int Ls = this->Ls; + + this->DhopDir(psi,chi,dir,disp); // Dslash on diagonal. g5 Dslash is hermitian + + int sign=1; + for(int s=0;s +void ContinuedFractionFermion5D::Meooe (const FermionField &psi, FermionField &chi) +{ + int Ls = this->Ls; + + // Apply 4d dslash + if ( psi.Checkerboard() == Odd ) { + this->DhopEO(psi,chi,DaggerNo); // Dslash on diagonal. g5 Dslash is hermitian + } else { + this->DhopOE(psi,chi,DaggerNo); // Dslash on diagonal. g5 Dslash is hermitian + } + + int sign=1; + for(int s=0;s +void ContinuedFractionFermion5D::MeooeDag (const FermionField &psi, FermionField &chi) +{ + this->Meooe(psi,chi); +} +template +void ContinuedFractionFermion5D::Mooee (const FermionField &psi, FermionField &chi) +{ + int Ls = this->Ls; + + int sign=1; + for(int s=0;s +void ContinuedFractionFermion5D::MooeeDag (const FermionField &psi, FermionField &chi) +{ + this->Mooee(psi,chi); +} +template +void ContinuedFractionFermion5D::MooeeInv (const FermionField &psi, FermionField &chi) +{ + int Ls = this->Ls; + + // Apply Linv + axpby_ssp(chi,1.0/cc_d[0],psi,0.0,psi,0,0); + for(int s=1;s=0;s--){ + axpbg5y_ssp(chi,1.0/cc_d[s],chi,-1.0*cc_d[s+1]/See[s]/cc_d[s],chi,s,s+1); + } +} +template +void ContinuedFractionFermion5D::MooeeInvDag (const FermionField &psi, FermionField &chi) +{ + this->MooeeInv(psi,chi); +} + +// force terms; five routines; default to Dhop on diagonal +template +void ContinuedFractionFermion5D::MDeriv (GaugeField &mat,const FermionField &U,const FermionField &V,int dag) +{ + int Ls = this->Ls; + + FermionField D(V.Grid()); + + int sign=1; + for(int s=0;sDhopDeriv(mat,D,V,DaggerNo); +}; +template +void ContinuedFractionFermion5D::MoeDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag) +{ + int Ls = this->Ls; + + FermionField D(V.Grid()); + + int sign=1; + for(int s=0;sDhopDerivOE(mat,D,V,DaggerNo); +}; +template +void ContinuedFractionFermion5D::MeoDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag) +{ + int Ls = this->Ls; + + FermionField D(V.Grid()); + + int sign=1; + for(int s=0;sDhopDerivEO(mat,D,V,DaggerNo); +}; + +// Constructors +template +ContinuedFractionFermion5D::ContinuedFractionFermion5D( + GaugeField &_Umu, + GridCartesian &FiveDimGrid, + GridRedBlackCartesian &FiveDimRedBlackGrid, + GridCartesian &FourDimGrid, + GridRedBlackCartesian &FourDimRedBlackGrid, + RealD _mass,RealD M5,const ImplParams &p) : + WilsonFermion5D(_Umu, + FiveDimGrid, FiveDimRedBlackGrid, + FourDimGrid, FourDimRedBlackGrid,M5,p), + mass(_mass) +{ + int Ls = this->Ls; + assert((Ls&0x1)==1); // Odd Ls required +} + + template + void ContinuedFractionFermion5D::ExportPhysicalFermionSolution(const FermionField &solution5d,FermionField &exported4d) + { + int Ls = this->Ls; + conformable(solution5d.Grid(),this->FermionGrid()); + conformable(exported4d.Grid(),this->GaugeGrid()); + ExtractSlice(exported4d, solution5d, Ls-1, Ls-1); + } + template + void ContinuedFractionFermion5D::ImportPhysicalFermionSource(const FermionField &input4d,FermionField &imported5d) + { + int Ls = this->Ls; + conformable(imported5d.Grid(),this->FermionGrid()); + conformable(input4d.Grid() ,this->GaugeGrid()); + FermionField tmp(this->FermionGrid()); + tmp=Zero(); + InsertSlice(input4d, tmp, Ls-1, Ls-1); + tmp=Gamma(Gamma::Algebra::Gamma5)*tmp; + this->Dminus(tmp,imported5d); + } + + +NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/implementation/DomainWallEOFAFermionCache.h b/Grid/qcd/action/fermion/implementation/DomainWallEOFAFermionCache.h new file mode 100644 index 00000000..a3eca650 --- /dev/null +++ b/Grid/qcd/action/fermion/implementation/DomainWallEOFAFermionCache.h @@ -0,0 +1,227 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/DomainWallEOFAFermioncache.cc + +Copyright (C) 2017 + +Author: Peter Boyle +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle +Author: David Murphy + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ + /* END LEGAL */ + +#include +#include + +NAMESPACE_BEGIN(Grid); + +// FIXME -- make a version of these routines with site loop outermost for cache reuse. +// Pminus fowards +// Pplus backwards.. +template +void DomainWallEOFAFermion::M5D(const FermionField& psi_i, const FermionField& phi_i,FermionField& chi_i, + Vector& lower, Vector& diag, Vector& upper) +{ + chi_i.Checkerboard() = psi_i.Checkerboard(); + int Ls = this->Ls; + GridBase* grid = psi_i.Grid(); + auto phi = phi_i.View(); + auto psi = psi_i.View(); + auto chi = chi_i.View(); + assert(phi.Checkerboard() == psi.Checkerboard()); + // Flops = 6.0*(Nc*Ns) *Ls*vol + this->M5Dcalls++; + this->M5Dtime -= usecond(); + + auto nloop=grid->oSites()/Ls; + accelerator_for(sss,nloop,Simd::Nsimd(),{ + auto ss=sss*Ls; + typedef decltype(coalescedRead(psi[0])) spinor; + for(int s=0; sM5Dtime += usecond(); +} + +template +void DomainWallEOFAFermion::M5Ddag(const FermionField& psi_i, const FermionField& phi_i, FermionField& chi_i, + Vector& lower, Vector& diag, Vector& upper) +{ + chi_i.Checkerboard() = psi_i.Checkerboard(); + GridBase* grid = psi_i.Grid(); + int Ls = this->Ls; + + auto psi = psi_i.View(); + auto phi = phi_i.View(); + auto chi = chi_i.View(); + assert(phi.Checkerboard() == psi.Checkerboard()); + + // Flops = 6.0*(Nc*Ns) *Ls*vol + this->M5Dcalls++; + this->M5Dtime -= usecond(); + + auto nloop=grid->oSites()/Ls; + accelerator_for(sss,nloop,Simd::Nsimd(),{ + typedef decltype(coalescedRead(psi[0])) spinor; + auto ss=sss*Ls; + for(int s=0; sM5Dtime += usecond(); +} + +template +void DomainWallEOFAFermion::MooeeInv(const FermionField& psi_i, FermionField& chi_i) +{ + chi_i.Checkerboard() = psi_i.Checkerboard(); + GridBase* grid = psi_i.Grid(); + auto psi=psi_i.View(); + auto chi=chi_i.View(); + int Ls = this->Ls; + + auto plee = & this->lee[0]; + auto pdee = & this->dee[0]; + auto puee = & this->uee[0]; + + auto pleem = & this->leem[0]; + auto pueem = & this->ueem[0]; + + this->MooeeInvCalls++; + this->MooeeInvTime -= usecond(); + uint64_t nloop=grid->oSites()/Ls; + accelerator_for(sss,nloop,Simd::Nsimd(),{ + auto ss=sss*Ls; + typedef decltype(coalescedRead(psi[0])) spinor; + spinor tmp1,tmp2; + + // flops = 12*2*Ls + 12*2*Ls + 3*12*Ls + 12*2*Ls = 12*Ls * (9) = 108*Ls flops + // Apply (L^{\prime})^{-1} + coalescedWrite(chi[ss],psi(ss)); // chi[0]=psi[0] + for(int s=1; s=0; s--){ + spProj5m(tmp1, chi(ss+s+1)); + coalescedWrite(chi[ss+s], chi(ss+s) - puee[s]*tmp1); + } + }); + this->MooeeInvTime += usecond(); +} + +template +void DomainWallEOFAFermion::MooeeInvDag(const FermionField& psi_i, FermionField& chi_i) +{ + chi_i.Checkerboard() = psi_i.Checkerboard(); + GridBase* grid = psi_i.Grid(); + auto psi = psi_i.View(); + auto chi = chi_i.View(); + int Ls = this->Ls; + + assert(psi.Checkerboard() == psi.Checkerboard()); + + Vector ueec(Ls); + Vector deec(Ls+1); + Vector leec(Ls); + Vector ueemc(Ls); + Vector leemc(Ls); + + for(int s=0; suee[s]); + deec[s] = conjugate(this->dee[s]); + leec[s] = conjugate(this->lee[s]); + ueemc[s] = conjugate(this->ueem[s]); + leemc[s] = conjugate(this->leem[s]); + } + deec[Ls] = conjugate(this->dee[Ls]); + + this->MooeeInvCalls++; + this->MooeeInvTime -= usecond(); + auto nloop = grid->oSites()/Ls; + accelerator_for(sss,nloop,Simd::Nsimd(),{ + typedef decltype(coalescedRead(psi[0])) spinor; + spinor tmp1,tmp2; + auto ss=sss*Ls; + + // Apply (U^{\prime})^{-dagger} + coalescedWrite(chi[ss], psi(ss)); + for(int s=1; s=0; s--){ + spProj5p(tmp1, chi(ss+s+1)); + coalescedWrite(chi[ss+s],chi(ss+s) - leec[s]*tmp1); + } + }); + + this->MooeeInvTime += usecond(); +} + +NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/implementation/DomainWallEOFAFermionImplementation.h b/Grid/qcd/action/fermion/implementation/DomainWallEOFAFermionImplementation.h new file mode 100644 index 00000000..3684fd6c --- /dev/null +++ b/Grid/qcd/action/fermion/implementation/DomainWallEOFAFermionImplementation.h @@ -0,0 +1,321 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/DomainWallEOFAFermion.cc + +Copyright (C) 2017 + +Author: Peter Boyle +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle +Author: David Murphy + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ + /* END LEGAL */ + +#pragma once + +#include +#include +#include + +NAMESPACE_BEGIN(Grid); + +template +DomainWallEOFAFermion::DomainWallEOFAFermion( + GaugeField &_Umu, + GridCartesian &FiveDimGrid, + GridRedBlackCartesian &FiveDimRedBlackGrid, + GridCartesian &FourDimGrid, + GridRedBlackCartesian &FourDimRedBlackGrid, + RealD _mq1, RealD _mq2, RealD _mq3, + RealD _shift, int _pm, RealD _M5, const ImplParams &p) : + AbstractEOFAFermion(_Umu, FiveDimGrid, FiveDimRedBlackGrid, + FourDimGrid, FourDimRedBlackGrid, _mq1, _mq2, _mq3, + _shift, _pm, _M5, 1.0, 0.0, p) +{ + RealD eps = 1.0; + Approx::zolotarev_data *zdata = Approx::higham(eps,this->Ls); + assert(zdata->n == this->Ls); + + std::cout << GridLogMessage << "DomainWallEOFAFermion with Ls=" << this->Ls << std::endl; + this->SetCoefficientsTanh(zdata, 1.0, 0.0); + + Approx::zolotarev_free(zdata); +} + +/*************************************************************** + * Additional EOFA operators only called outside the inverter. + * Since speed is not essential, simple axpby-style + * implementations should be fine. + ***************************************************************/ +template +void DomainWallEOFAFermion::Omega(const FermionField& psi, FermionField& Din, int sign, int dag) +{ + int Ls = this->Ls; + + Din = Zero(); + if((sign == 1) && (dag == 0)){ axpby_ssp(Din, 0.0, psi, 1.0, psi, Ls-1, 0); } + else if((sign == -1) && (dag == 0)){ axpby_ssp(Din, 0.0, psi, 1.0, psi, 0, 0); } + else if((sign == 1 ) && (dag == 1)){ axpby_ssp(Din, 0.0, psi, 1.0, psi, 0, Ls-1); } + else if((sign == -1) && (dag == 1)){ axpby_ssp(Din, 0.0, psi, 1.0, psi, 0, 0); } +} + +// This is just the identity for DWF +template +void DomainWallEOFAFermion::Dtilde(const FermionField& psi, FermionField& chi){ chi = psi; } + +// This is just the identity for DWF +template +void DomainWallEOFAFermion::DtildeInv(const FermionField& psi, FermionField& chi){ chi = psi; } + +/*****************************************************************************************************/ + +template +RealD DomainWallEOFAFermion::M(const FermionField& psi, FermionField& chi) +{ + FermionField Din(psi.Grid()); + + this->Meooe5D(psi, Din); + this->DW(Din, chi, DaggerNo); + axpby(chi, 1.0, 1.0, chi, psi); + this->M5D(psi, chi); + return(norm2(chi)); +} + +template +RealD DomainWallEOFAFermion::Mdag(const FermionField& psi, FermionField& chi) +{ + FermionField Din(psi.Grid()); + + this->DW(psi, Din, DaggerYes); + this->MeooeDag5D(Din, chi); + this->M5Ddag(psi, chi); + axpby(chi, 1.0, 1.0, chi, psi); + return(norm2(chi)); +} + +/******************************************************************** + * Performance critical fermion operators called inside the inverter + ********************************************************************/ + +template +void DomainWallEOFAFermion::M5D(const FermionField& psi, FermionField& chi) +{ + int Ls = this->Ls; + int pm = this->pm; + RealD shift = this->shift; + RealD mq1 = this->mq1; + RealD mq2 = this->mq2; + RealD mq3 = this->mq3; + + // coefficients for shift operator ( = shift*\gamma_{5}*R_{5}*\Delta_{\pm}(mq2,mq3)*P_{\pm} ) + Coeff_t shiftp(0.0), shiftm(0.0); + if(shift != 0.0){ + if(pm == 1){ shiftp = shift*(mq3-mq2); } + else{ shiftm = -shift*(mq3-mq2); } + } + + Vector diag(Ls,1.0); + Vector upper(Ls,-1.0); upper[Ls-1] = mq1 + shiftm; + Vector lower(Ls,-1.0); lower[0] = mq1 + shiftp; + +#if(0) + std::cout << GridLogMessage << "DomainWallEOFAFermion::M5D(FF&,FF&):" << std::endl; + for(int i=0; i #include #include -namespace Grid { -namespace QCD { - -// S-direction is INNERMOST and takes no part in the parity. -const std::vector -ImprovedStaggeredFermion5DStatic::directions({1,2,3,4,1,2,3,4,1,2,3,4,1,2,3,4}); -const std::vector -ImprovedStaggeredFermion5DStatic::displacements({1, 1, 1, 1, -1, -1, -1, -1, 3, 3, 3, 3, -3, -3, -3, -3}); +#pragma once - // 5d lattice for DWF. +NAMESPACE_BEGIN(Grid); + +// 5d lattice for DWF. template ImprovedStaggeredFermion5D::ImprovedStaggeredFermion5D(GridCartesian &FiveDimGrid, GridRedBlackCartesian &FiveDimRedBlackGrid, @@ -53,9 +48,9 @@ ImprovedStaggeredFermion5D::ImprovedStaggeredFermion5D(GridCartesian _FiveDimRedBlackGrid(&FiveDimRedBlackGrid), _FourDimGrid (&FourDimGrid), _FourDimRedBlackGrid(&FourDimRedBlackGrid), - Stencil (&FiveDimGrid,npoint,Even,directions,displacements), - StencilEven(&FiveDimRedBlackGrid,npoint,Even,directions,displacements), // source is Even - StencilOdd (&FiveDimRedBlackGrid,npoint,Odd ,directions,displacements), // source is Odd + Stencil (&FiveDimGrid,npoint,Even,directions,displacements,p), + StencilEven(&FiveDimRedBlackGrid,npoint,Even,directions,displacements,p), // source is Even + StencilOdd (&FiveDimRedBlackGrid,npoint,Odd ,directions,displacements,p), // source is Odd mass(_mass), c1(_c1), c2(_c2), @@ -108,8 +103,8 @@ ImprovedStaggeredFermion5D::ImprovedStaggeredFermion5D(GridCartesian assert(FiveDimRedBlackGrid._simd_layout[0]==nsimd); for(int d=0;d<4;d++){ - assert(FourDimGrid._simd_layout[d]=1); - assert(FourDimRedBlackGrid._simd_layout[d]=1); + assert(FourDimGrid._simd_layout[d]==1); + assert(FourDimRedBlackGrid._simd_layout[d]==1); assert(FiveDimRedBlackGrid._simd_layout[d+1]==1); } @@ -226,24 +221,27 @@ void ImprovedStaggeredFermion5D::DhopDir(const FermionField &in, FermionFi Compressor compressor; Stencil.HaloExchange(in,compressor); - - parallel_for(int ss=0;ssoSites();ss++){ + auto Umu_v = Umu.View(); + auto UUUmu_v = UUUmu.View(); + auto in_v = in.View(); + auto out_v = out.View(); + thread_for( ss,Umu.Grid()->oSites(),{ for(int s=0;s void ImprovedStaggeredFermion5D::DerivInternal(StencilImpl & st, - DoubledGaugeField & U, - DoubledGaugeField & UUU, - GaugeField &mat, - const FermionField &A, - const FermionField &B, - int dag) + DoubledGaugeField & U, + DoubledGaugeField & UUU, + GaugeField &mat, + const FermionField &A, + const FermionField &B, + int dag) { // No force terms in multi-rhs solver staggered assert(0); @@ -251,18 +249,18 @@ void ImprovedStaggeredFermion5D::DerivInternal(StencilImpl & st, template void ImprovedStaggeredFermion5D::DhopDeriv(GaugeField &mat, - const FermionField &A, - const FermionField &B, - int dag) + const FermionField &A, + const FermionField &B, + int dag) { assert(0); } template void ImprovedStaggeredFermion5D::DhopDerivEO(GaugeField &mat, - const FermionField &A, - const FermionField &B, - int dag) + const FermionField &A, + const FermionField &B, + int dag) { assert(0); } @@ -270,9 +268,9 @@ void ImprovedStaggeredFermion5D::DhopDerivEO(GaugeField &mat, template void ImprovedStaggeredFermion5D::DhopDerivOE(GaugeField &mat, - const FermionField &A, - const FermionField &B, - int dag) + const FermionField &A, + const FermionField &B, + int dag) { assert(0); } @@ -301,8 +299,8 @@ void ImprovedStaggeredFermion5D::DhopInternalOverlappedComms(StencilImpl & Compressor compressor; - int LLs = in._grid->_rdimensions[0]; - int len = U._grid->oSites(); + int LLs = in.Grid()->_rdimensions[0]; + int len = U.Grid()->oSites(); DhopFaceTime-=usecond(); st.Prepare(); @@ -328,7 +326,7 @@ void ImprovedStaggeredFermion5D::DhopInternalOverlappedComms(StencilImpl & double start = usecond(); nthreads -= ncomms; int ttid = tid - ncomms; - int n = U._grid->oSites(); // 4d vol + int n = U.Grid()->oSites(); // 4d vol int chunk = n / nthreads; int rem = n % nthreads; int myblock, myn; @@ -341,17 +339,22 @@ void ImprovedStaggeredFermion5D::DhopInternalOverlappedComms(StencilImpl & } // do the compute + auto U_v = U.View(); + auto UUU_v = UUU.View(); + auto in_v = in.View(); + auto out_v = out.View(); + if (dag == DaggerYes) { for (int ss = myblock; ss < myblock+myn; ++ss) { int sU = ss; // Interior = 1; Exterior = 0; must implement for staggered - Kernels::DhopSiteDag(st,lo,U,UUU,st.CommBuf(),LLs,sU,in,out,1,0); //<--------- + Kernels::DhopSiteDag(st,lo,U_v,UUU_v,st.CommBuf(),LLs,sU,in_v,out_v,1,0); //<--------- } } else { for (int ss = myblock; ss < myblock+myn; ++ss) { // Interior = 1; Exterior = 0; int sU = ss; - Kernels::DhopSite(st,lo,U,UUU,st.CommBuf(),LLs,sU,in,out,1,0); //<------------ + Kernels::DhopSite(st,lo,U_v,UUU_v,st.CommBuf(),LLs,sU,in_v,out_v,1,0); //<------------ } } ptime = usecond() - start; @@ -372,18 +375,23 @@ void ImprovedStaggeredFermion5D::DhopInternalOverlappedComms(StencilImpl & DhopFaceTime+=usecond(); DhopComputeTime2-=usecond(); + + auto U_v = U.View(); + auto UUU_v = UUU.View(); + auto in_v = in.View(); + auto out_v = out.View(); if (dag == DaggerYes) { int sz=st.surface_list.size(); - parallel_for (int ss = 0; ss < sz; ss++) { + thread_for( ss,sz,{ int sU = st.surface_list[ss]; - Kernels::DhopSiteDag(st,lo,U,UUU,st.CommBuf(),LLs,sU,in,out,0,1); //<---------- - } + Kernels::DhopSiteDag(st,lo,U_v,UUU_v,st.CommBuf(),LLs,sU,in_v,out_v,0,1); //<---------- + }); } else { int sz=st.surface_list.size(); - parallel_for (int ss = 0; ss < sz; ss++) { + thread_for( ss,sz,{ int sU = st.surface_list[ss]; - Kernels::DhopSite(st,lo,U,UUU,st.CommBuf(),LLs,sU,in,out,0,1);//<---------- - } + Kernels::DhopSite(st,lo,U_v,UUU_v,st.CommBuf(),LLs,sU,in_v,out_v,0,1);//<---------- + }); } DhopComputeTime2+=usecond(); #else @@ -398,7 +406,7 @@ void ImprovedStaggeredFermion5D::DhopInternalSerialComms(StencilImpl & st, const FermionField &in, FermionField &out,int dag) { Compressor compressor; - int LLs = in._grid->_rdimensions[0]; + int LLs = in.Grid()->_rdimensions[0]; @@ -410,16 +418,20 @@ void ImprovedStaggeredFermion5D::DhopInternalSerialComms(StencilImpl & st, DhopComputeTime -= usecond(); // Dhop takes the 4d grid from U, and makes a 5d index for fermion + auto U_v = U.View(); + auto UUU_v = UUU.View(); + auto in_v = in.View(); + auto out_v = out.View(); if (dag == DaggerYes) { - parallel_for (int ss = 0; ss < U._grid->oSites(); ss++) { + thread_for( ss,U.Grid()->oSites(),{ int sU=ss; - Kernels::DhopSiteDag(st, lo, U, UUU, st.CommBuf(), LLs, sU,in, out); - } + Kernels::DhopSiteDag(st, lo, U_v, UUU_v, st.CommBuf(), LLs, sU,in_v, out_v); + }); } else { - parallel_for (int ss = 0; ss < U._grid->oSites(); ss++) { + thread_for( ss,U.Grid()->oSites(),{ int sU=ss; - Kernels::DhopSite(st,lo,U,UUU,st.CommBuf(),LLs,sU,in,out); - } + Kernels::DhopSite(st,lo,U_v,UUU_v,st.CommBuf(),LLs,sU,in_v,out_v); + }); } DhopComputeTime += usecond(); DhopTotalTime += usecond(); @@ -432,50 +444,17 @@ void ImprovedStaggeredFermion5D::DhopInternalSerialComms(StencilImpl & st, } /*CHANGE END*/ -/* ORG -template -void ImprovedStaggeredFermion5D::DhopInternal(StencilImpl & st, LebesgueOrder &lo, - DoubledGaugeField & U,DoubledGaugeField & UUU, - const FermionField &in, FermionField &out,int dag) -{ - Compressor compressor; - int LLs = in._grid->_rdimensions[0]; - - - - DhopTotalTime -= usecond(); - DhopCommTime -= usecond(); - st.HaloExchange(in,compressor); - DhopCommTime += usecond(); - - DhopComputeTime -= usecond(); - // Dhop takes the 4d grid from U, and makes a 5d index for fermion - if (dag == DaggerYes) { - parallel_for (int ss = 0; ss < U._grid->oSites(); ss++) { - int sU=ss; - Kernels::DhopSiteDag(st, lo, U, UUU, st.CommBuf(), LLs, sU,in, out); - } - } else { - parallel_for (int ss = 0; ss < U._grid->oSites(); ss++) { - int sU=ss; - Kernels::DhopSite(st,lo,U,UUU,st.CommBuf(),LLs,sU,in,out); - } - } - DhopComputeTime += usecond(); - DhopTotalTime += usecond(); -} -*/ template void ImprovedStaggeredFermion5D::DhopOE(const FermionField &in, FermionField &out,int dag) { DhopCalls+=1; - conformable(in._grid,FermionRedBlackGrid()); // verifies half grid - conformable(in._grid,out._grid); // drops the cb check + conformable(in.Grid(),FermionRedBlackGrid()); // verifies half grid + conformable(in.Grid(),out.Grid()); // drops the cb check - assert(in.checkerboard==Even); - out.checkerboard = Odd; + assert(in.Checkerboard()==Even); + out.Checkerboard() = Odd; DhopInternal(StencilEven,LebesgueEvenOdd,UmuOdd,UUUmuOdd,in,out,dag); } @@ -483,11 +462,11 @@ template void ImprovedStaggeredFermion5D::DhopEO(const FermionField &in, FermionField &out,int dag) { DhopCalls+=1; - conformable(in._grid,FermionRedBlackGrid()); // verifies half grid - conformable(in._grid,out._grid); // drops the cb check + conformable(in.Grid(),FermionRedBlackGrid()); // verifies half grid + conformable(in.Grid(),out.Grid()); // drops the cb check - assert(in.checkerboard==Odd); - out.checkerboard = Even; + assert(in.Checkerboard()==Odd); + out.Checkerboard() = Even; DhopInternal(StencilOdd,LebesgueEvenOdd,UmuEven,UUUmuEven,in,out,dag); } @@ -495,10 +474,10 @@ template void ImprovedStaggeredFermion5D::Dhop(const FermionField &in, FermionField &out,int dag) { DhopCalls+=2; - conformable(in._grid,FermionGrid()); // verifies full grid - conformable(in._grid,out._grid); + conformable(in.Grid(),FermionGrid()); // verifies full grid + conformable(in.Grid(),out.Grid()); - out.checkerboard = in.checkerboard; + out.Checkerboard() = in.Checkerboard(); DhopInternal(Stencil,Lebesgue,Umu,UUUmu,in,out,dag); } @@ -506,7 +485,7 @@ void ImprovedStaggeredFermion5D::Dhop(const FermionField &in, FermionField template void ImprovedStaggeredFermion5D::Report(void) { - std::vector latt = GridDefaultLatt(); + Coordinate latt = GridDefaultLatt(); RealD volume = Ls; for(int mu=0;mu_Nprocessors; RealD NN = _FourDimGrid->NodeCount(); @@ -564,21 +543,21 @@ void ImprovedStaggeredFermion5D::Mdir(const FermionField &in, FermionField } template RealD ImprovedStaggeredFermion5D::M(const FermionField &in, FermionField &out) { - out.checkerboard = in.checkerboard; + out.Checkerboard() = in.Checkerboard(); Dhop(in, out, DaggerNo); return axpy_norm(out, mass, in, out); } template RealD ImprovedStaggeredFermion5D::Mdag(const FermionField &in, FermionField &out) { - out.checkerboard = in.checkerboard; + out.Checkerboard() = in.Checkerboard(); Dhop(in, out, DaggerYes); return axpy_norm(out, mass, in, out); } template void ImprovedStaggeredFermion5D::Meooe(const FermionField &in, FermionField &out) { - if (in.checkerboard == Odd) { + if (in.Checkerboard() == Odd) { DhopEO(in, out, DaggerNo); } else { DhopOE(in, out, DaggerNo); @@ -586,7 +565,7 @@ void ImprovedStaggeredFermion5D::Meooe(const FermionField &in, FermionFiel } template void ImprovedStaggeredFermion5D::MeooeDag(const FermionField &in, FermionField &out) { - if (in.checkerboard == Odd) { + if (in.Checkerboard() == Odd) { DhopEO(in, out, DaggerYes); } else { DhopOE(in, out, DaggerYes); @@ -595,27 +574,27 @@ void ImprovedStaggeredFermion5D::MeooeDag(const FermionField &in, FermionF template void ImprovedStaggeredFermion5D::Mooee(const FermionField &in, FermionField &out) { - out.checkerboard = in.checkerboard; + out.Checkerboard() = in.Checkerboard(); typename FermionField::scalar_type scal(mass); out = scal * in; } template void ImprovedStaggeredFermion5D::MooeeDag(const FermionField &in, FermionField &out) { - out.checkerboard = in.checkerboard; + out.Checkerboard() = in.Checkerboard(); Mooee(in, out); } template void ImprovedStaggeredFermion5D::MooeeInv(const FermionField &in, FermionField &out) { - out.checkerboard = in.checkerboard; + out.Checkerboard() = in.Checkerboard(); out = (1.0 / (mass)) * in; } template void ImprovedStaggeredFermion5D::MooeeInvDag(const FermionField &in, - FermionField &out) { - out.checkerboard = in.checkerboard; + FermionField &out) { + out.Checkerboard() = in.Checkerboard(); MooeeInv(in, out); } @@ -624,31 +603,28 @@ void ImprovedStaggeredFermion5D::MooeeInvDag(const FermionField &in, //////////////////////////////////////////////////////// template void ImprovedStaggeredFermion5D::ContractConservedCurrent(PropagatorField &q_in_1, - PropagatorField &q_in_2, - PropagatorField &q_out, - Current curr_type, - unsigned int mu) + PropagatorField &q_in_2, + PropagatorField &q_out, + Current curr_type, + unsigned int mu) { - assert(0); + assert(0); } template -void ImprovedStaggeredFermion5D::SeqConservedCurrent(PropagatorField &q_in, - PropagatorField &q_out, - Current curr_type, - unsigned int mu, - unsigned int tmin, +void ImprovedStaggeredFermion5D::SeqConservedCurrent(PropagatorField &q_in, + PropagatorField &q_out, + Current curr_type, + unsigned int mu, + unsigned int tmin, unsigned int tmax, ComplexField &lattice_cmplx) { - assert(0); + assert(0); } - -FermOpStaggeredTemplateInstantiate(ImprovedStaggeredFermion5D); -FermOpStaggeredVec5dTemplateInstantiate(ImprovedStaggeredFermion5D); -}} +NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/ImprovedStaggeredFermion.cc b/Grid/qcd/action/fermion/implementation/ImprovedStaggeredFermionImplementation.h similarity index 75% rename from Grid/qcd/action/fermion/ImprovedStaggeredFermion.cc rename to Grid/qcd/action/fermion/implementation/ImprovedStaggeredFermionImplementation.h index 883db902..e2605d81 100644 --- a/Grid/qcd/action/fermion/ImprovedStaggeredFermion.cc +++ b/Grid/qcd/action/fermion/implementation/ImprovedStaggeredFermionImplementation.h @@ -28,40 +28,35 @@ directory /* END LEGAL */ #include -namespace Grid { -namespace QCD { +#pragma once -const std::vector -ImprovedStaggeredFermionStatic::directions({0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3}); -const std::vector -ImprovedStaggeredFermionStatic::displacements({1, 1, 1, 1, -1, -1, -1, -1, 3, 3, 3, 3, -3, -3, -3, -3}); +NAMESPACE_BEGIN(Grid); ///////////////////////////////// // Constructor and gauge import ///////////////////////////////// - template ImprovedStaggeredFermion::ImprovedStaggeredFermion(GridCartesian &Fgrid, GridRedBlackCartesian &Hgrid, RealD _mass, RealD _c1, RealD _c2,RealD _u0, const ImplParams &p) - : Kernels(p), - _grid(&Fgrid), - _cbgrid(&Hgrid), - Stencil(&Fgrid, npoint, Even, directions, displacements), - StencilEven(&Hgrid, npoint, Even, directions, displacements), // source is Even - StencilOdd(&Hgrid, npoint, Odd, directions, displacements), // source is Odd - mass(_mass), - Lebesgue(_grid), - LebesgueEvenOdd(_cbgrid), - Umu(&Fgrid), - UmuEven(&Hgrid), - UmuOdd(&Hgrid), - UUUmu(&Fgrid), - UUUmuEven(&Hgrid), - UUUmuOdd(&Hgrid) , - _tmp(&Hgrid) + : Kernels(p), + _grid(&Fgrid), + _cbgrid(&Hgrid), + Stencil(&Fgrid, npoint, Even, directions, displacements,p), + StencilEven(&Hgrid, npoint, Even, directions, displacements,p), // source is Even + StencilOdd(&Hgrid, npoint, Odd, directions, displacements,p), // source is Odd + mass(_mass), + Lebesgue(_grid), + LebesgueEvenOdd(_cbgrid), + Umu(&Fgrid), + UmuEven(&Hgrid), + UmuOdd(&Hgrid), + UUUmu(&Fgrid), + UUUmuEven(&Hgrid), + UUUmuOdd(&Hgrid) , + _tmp(&Hgrid) { int vol4; int LLs=1; @@ -85,17 +80,17 @@ ImprovedStaggeredFermion::ImprovedStaggeredFermion(GaugeField &_Uthin, Gau ImportGauge(_Uthin,_Ufat); } - //////////////////////////////////////////////////////////// - // Momentum space propagator should be - // https://arxiv.org/pdf/hep-lat/9712010.pdf - // - // mom space action. - // gamma_mu i ( c1 sin pmu + c2 sin 3 pmu ) + m - // - // must track through staggered flavour/spin reduction in literature to - // turn to free propagator for the one component chi field, a la page 4/5 - // of above link to implmement fourier based solver. - //////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////// +// Momentum space propagator should be +// https://arxiv.org/pdf/hep-lat/9712010.pdf +// +// mom space action. +// gamma_mu i ( c1 sin pmu + c2 sin 3 pmu ) + m +// +// must track through staggered flavour/spin reduction in literature to +// turn to free propagator for the one component chi field, a la page 4/5 +// of above link to implmement fourier based solver. +//////////////////////////////////////////////////////////// template void ImprovedStaggeredFermion::ImportGaugeSimple(const GaugeField &_Utriple,const GaugeField &_Ufat) { @@ -177,21 +172,21 @@ void ImprovedStaggeredFermion::ImportGauge(const GaugeField &_Uthin,const template RealD ImprovedStaggeredFermion::M(const FermionField &in, FermionField &out) { - out.checkerboard = in.checkerboard; + out.Checkerboard() = in.Checkerboard(); Dhop(in, out, DaggerNo); return axpy_norm(out, mass, in, out); } template RealD ImprovedStaggeredFermion::Mdag(const FermionField &in, FermionField &out) { - out.checkerboard = in.checkerboard; + out.Checkerboard() = in.Checkerboard(); Dhop(in, out, DaggerYes); return axpy_norm(out, mass, in, out); } template void ImprovedStaggeredFermion::Meooe(const FermionField &in, FermionField &out) { - if (in.checkerboard == Odd) { + if (in.Checkerboard() == Odd) { DhopEO(in, out, DaggerNo); } else { DhopOE(in, out, DaggerNo); @@ -199,7 +194,7 @@ void ImprovedStaggeredFermion::Meooe(const FermionField &in, FermionField } template void ImprovedStaggeredFermion::MeooeDag(const FermionField &in, FermionField &out) { - if (in.checkerboard == Odd) { + if (in.Checkerboard() == Odd) { DhopEO(in, out, DaggerYes); } else { DhopOE(in, out, DaggerYes); @@ -208,27 +203,27 @@ void ImprovedStaggeredFermion::MeooeDag(const FermionField &in, FermionFie template void ImprovedStaggeredFermion::Mooee(const FermionField &in, FermionField &out) { - out.checkerboard = in.checkerboard; + out.Checkerboard() = in.Checkerboard(); typename FermionField::scalar_type scal(mass); out = scal * in; } template void ImprovedStaggeredFermion::MooeeDag(const FermionField &in, FermionField &out) { - out.checkerboard = in.checkerboard; + out.Checkerboard() = in.Checkerboard(); Mooee(in, out); } template void ImprovedStaggeredFermion::MooeeInv(const FermionField &in, FermionField &out) { - out.checkerboard = in.checkerboard; + out.Checkerboard() = in.Checkerboard(); out = (1.0 / (mass)) * in; } template void ImprovedStaggeredFermion::MooeeInvDag(const FermionField &in, - FermionField &out) { - out.checkerboard = in.checkerboard; + FermionField &out) { + out.Checkerboard() = in.Checkerboard(); MooeeInv(in, out); } @@ -244,8 +239,8 @@ void ImprovedStaggeredFermion::DerivInternal(StencilImpl &st, DoubledGauge Compressor compressor; - FermionField Btilde(B._grid); - FermionField Atilde(B._grid); + FermionField Btilde(B.Grid()); + FermionField Atilde(B.Grid()); Atilde = A; st.HaloExchange(B, compressor); @@ -255,10 +250,13 @@ void ImprovedStaggeredFermion::DerivInternal(StencilImpl &st, DoubledGauge //////////////////////// // Call the single hop //////////////////////// - PARALLEL_FOR_LOOP - for (int sss = 0; sss < B._grid->oSites(); sss++) { - Kernels::DhopDir(st, U, UUU, st.CommBuf(), sss, sss, B, Btilde, mu,1); - } + auto U_v = U.View(); + auto UUU_v = UUU.View(); + auto B_v = B.View(); + auto Btilde_v = Btilde.View(); + thread_for(sss,B.Grid()->oSites(),{ + Kernels::DhopDirKernel(st, U_v, UUU_v, st.CommBuf(), sss, sss, B_v, Btilde_v, mu,1); + }); // Force in three link terms // @@ -288,11 +286,11 @@ void ImprovedStaggeredFermion::DerivInternal(StencilImpl &st, DoubledGauge template void ImprovedStaggeredFermion::DhopDeriv(GaugeField &mat, const FermionField &U, const FermionField &V, int dag) { - conformable(U._grid, _grid); - conformable(U._grid, V._grid); - conformable(U._grid, mat._grid); + conformable(U.Grid(), _grid); + conformable(U.Grid(), V.Grid()); + conformable(U.Grid(), mat.Grid()); - mat.checkerboard = U.checkerboard; + mat.Checkerboard() = U.Checkerboard(); DerivInternal(Stencil, Umu, UUUmu, mat, U, V, dag); } @@ -300,13 +298,13 @@ void ImprovedStaggeredFermion::DhopDeriv(GaugeField &mat, const FermionFie template void ImprovedStaggeredFermion::DhopDerivOE(GaugeField &mat, const FermionField &U, const FermionField &V, int dag) { - conformable(U._grid, _cbgrid); - conformable(U._grid, V._grid); - conformable(U._grid, mat._grid); + conformable(U.Grid(), _cbgrid); + conformable(U.Grid(), V.Grid()); + conformable(U.Grid(), mat.Grid()); - assert(V.checkerboard == Even); - assert(U.checkerboard == Odd); - mat.checkerboard = Odd; + assert(V.Checkerboard() == Even); + assert(U.Checkerboard() == Odd); + mat.Checkerboard() = Odd; DerivInternal(StencilEven, UmuOdd, UUUmuOdd, mat, U, V, dag); } @@ -314,48 +312,51 @@ void ImprovedStaggeredFermion::DhopDerivOE(GaugeField &mat, const FermionF template void ImprovedStaggeredFermion::DhopDerivEO(GaugeField &mat, const FermionField &U, const FermionField &V, int dag) { - conformable(U._grid, _cbgrid); - conformable(U._grid, V._grid); - conformable(U._grid, mat._grid); + conformable(U.Grid(), _cbgrid); + conformable(U.Grid(), V.Grid()); + conformable(U.Grid(), mat.Grid()); - assert(V.checkerboard == Odd); - assert(U.checkerboard == Even); - mat.checkerboard = Even; + assert(V.Checkerboard() == Odd); + assert(U.Checkerboard() == Even); + mat.Checkerboard() = Even; DerivInternal(StencilOdd, UmuEven, UUUmuEven, mat, U, V, dag); } template -void ImprovedStaggeredFermion::Dhop(const FermionField &in, FermionField &out, int dag) { +void ImprovedStaggeredFermion::Dhop(const FermionField &in, FermionField &out, int dag) +{ DhopCalls+=2; - conformable(in._grid, _grid); // verifies full grid - conformable(in._grid, out._grid); + conformable(in.Grid(), _grid); // verifies full grid + conformable(in.Grid(), out.Grid()); - out.checkerboard = in.checkerboard; + out.Checkerboard() = in.Checkerboard(); DhopInternal(Stencil, Lebesgue, Umu, UUUmu, in, out, dag); } template -void ImprovedStaggeredFermion::DhopOE(const FermionField &in, FermionField &out, int dag) { +void ImprovedStaggeredFermion::DhopOE(const FermionField &in, FermionField &out, int dag) +{ DhopCalls+=1; - conformable(in._grid, _cbgrid); // verifies half grid - conformable(in._grid, out._grid); // drops the cb check + conformable(in.Grid(), _cbgrid); // verifies half grid + conformable(in.Grid(), out.Grid()); // drops the cb check - assert(in.checkerboard == Even); - out.checkerboard = Odd; + assert(in.Checkerboard() == Even); + out.Checkerboard() = Odd; DhopInternal(StencilEven, LebesgueEvenOdd, UmuOdd, UUUmuOdd, in, out, dag); } template -void ImprovedStaggeredFermion::DhopEO(const FermionField &in, FermionField &out, int dag) { +void ImprovedStaggeredFermion::DhopEO(const FermionField &in, FermionField &out, int dag) +{ DhopCalls+=1; - conformable(in._grid, _cbgrid); // verifies half grid - conformable(in._grid, out._grid); // drops the cb check + conformable(in.Grid(), _cbgrid); // verifies half grid + conformable(in.Grid(), out.Grid()); // drops the cb check - assert(in.checkerboard == Odd); - out.checkerboard = Even; + assert(in.Checkerboard() == Odd); + out.Checkerboard() = Even; DhopInternal(StencilOdd, LebesgueEvenOdd, UmuEven, UUUmuEven, in, out, dag); } @@ -370,11 +371,13 @@ void ImprovedStaggeredFermion::DhopDir(const FermionField &in, FermionFiel Compressor compressor; Stencil.HaloExchange(in, compressor); - - PARALLEL_FOR_LOOP - for (int sss = 0; sss < in._grid->oSites(); sss++) { - Kernels::DhopDir(Stencil, Umu, UUUmu, Stencil.CommBuf(), sss, sss, in, out, dir, disp); - } + auto Umu_v = Umu.View(); + auto UUUmu_v = UUUmu.View(); + auto in_v = in.View(); + auto out_v = out.View(); + thread_for( sss, in.Grid()->oSites(),{ + Kernels::DhopDirKernel(Stencil, Umu_v, UUUmu_v, Stencil.CommBuf(), sss, sss, in_v, out_v, dir, disp); + }); }; template @@ -400,7 +403,7 @@ void ImprovedStaggeredFermion::DhopInternalOverlappedComms(StencilImpl &st { #ifdef GRID_OMP Compressor compressor; - int len = U._grid->oSites(); + int len = U.Grid()->oSites(); const int LLs = 1; DhopTotalTime -= usecond(); @@ -439,17 +442,21 @@ void ImprovedStaggeredFermion::DhopInternalOverlappedComms(StencilImpl &st } // do the compute + auto U_v = U.View(); + auto UUU_v = UUU.View(); + auto in_v = in.View(); + auto out_v = out.View(); if (dag == DaggerYes) { for (int ss = myblock; ss < myblock+myn; ++ss) { int sU = ss; // Interior = 1; Exterior = 0; must implement for staggered - Kernels::DhopSiteDag(st,lo,U,UUU,st.CommBuf(),1,sU,in,out,1,0); + Kernels::DhopSiteDag(st,lo,U_v,UUU_v,st.CommBuf(),1,sU,in_v,out_v,1,0); } } else { for (int ss = myblock; ss < myblock+myn; ++ss) { // Interior = 1; Exterior = 0; int sU = ss; - Kernels::DhopSite(st,lo,U,UUU,st.CommBuf(),1,sU,in,out,1,0); + Kernels::DhopSite(st,lo,U_v,UUU_v,st.CommBuf(),1,sU,in_v,out_v,1,0); } } } else { @@ -464,17 +471,23 @@ void ImprovedStaggeredFermion::DhopInternalOverlappedComms(StencilImpl &st DhopFaceTime -= usecond(); DhopComputeTime2 -= usecond(); - if (dag == DaggerYes) { - int sz=st.surface_list.size(); - parallel_for (int ss = 0; ss < sz; ss++) { - int sU = st.surface_list[ss]; - Kernels::DhopSiteDag(st,lo,U,UUU,st.CommBuf(),1,sU,in,out,0,1); - } - } else { - int sz=st.surface_list.size(); - parallel_for (int ss = 0; ss < sz; ss++) { - int sU = st.surface_list[ss]; - Kernels::DhopSite(st,lo,U,UUU,st.CommBuf(),1,sU,in,out,0,1); + { + auto U_v = U.View(); + auto UUU_v = UUU.View(); + auto in_v = in.View(); + auto out_v = out.View(); + if (dag == DaggerYes) { + int sz=st.surface_list.size(); + thread_for(ss,sz,{ + int sU = st.surface_list[ss]; + Kernels::DhopSiteDag(st,lo,U_v,UUU_v,st.CommBuf(),1,sU,in_v,out_v,0,1); + }); + } else { + int sz=st.surface_list.size(); + thread_for(ss,sz,{ + int sU = st.surface_list[ss]; + Kernels::DhopSite(st,lo,U_v,UUU_v,st.CommBuf(),1,sU,in_v,out_v,0,1); + }); } } DhopComputeTime2 += usecond(); @@ -500,15 +513,19 @@ void ImprovedStaggeredFermion::DhopInternalSerialComms(StencilImpl &st, Le st.HaloExchange(in, compressor); DhopCommTime += usecond(); + auto U_v = U.View(); + auto UUU_v = UUU.View(); + auto in_v = in.View(); + auto out_v = out.View(); DhopComputeTime -= usecond(); if (dag == DaggerYes) { - parallel_for (int sss = 0; sss < in._grid->oSites(); sss++) { - Kernels::DhopSiteDag(st, lo, U, UUU, st.CommBuf(), 1, sss, in, out); - } + thread_for(sss, in.Grid()->oSites(),{ + Kernels::DhopSiteDag(st, lo, U_v, UUU_v, st.CommBuf(), 1, sss, in_v, out_v); + }); } else { - parallel_for (int sss = 0; sss < in._grid->oSites(); sss++) { - Kernels::DhopSite(st, lo, U, UUU, st.CommBuf(), 1, sss, in, out); - } + thread_for(sss, in.Grid()->oSites(),{ + Kernels::DhopSite(st, lo, U_v, UUU_v, st.CommBuf(), 1, sss, in_v, out_v); + }); } DhopComputeTime += usecond(); DhopTotalTime += usecond(); @@ -520,7 +537,7 @@ void ImprovedStaggeredFermion::DhopInternalSerialComms(StencilImpl &st, Le template void ImprovedStaggeredFermion::Report(void) { - std::vector latt = GridDefaultLatt(); + Coordinate latt = _grid->GlobalDimensions(); RealD volume = 1; for(int mu=0;mu_Nprocessors; RealD NN = _grid->NodeCount(); @@ -574,31 +591,25 @@ void ImprovedStaggeredFermion::ZeroCounters(void) //////////////////////////////////////////////////////// template void ImprovedStaggeredFermion::ContractConservedCurrent(PropagatorField &q_in_1, - PropagatorField &q_in_2, - PropagatorField &q_out, - Current curr_type, - unsigned int mu) + PropagatorField &q_in_2, + PropagatorField &q_out, + Current curr_type, + unsigned int mu) { - assert(0); + assert(0); } template -void ImprovedStaggeredFermion::SeqConservedCurrent(PropagatorField &q_in, - PropagatorField &q_out, - Current curr_type, - unsigned int mu, - unsigned int tmin, +void ImprovedStaggeredFermion::SeqConservedCurrent(PropagatorField &q_in, + PropagatorField &q_out, + Current curr_type, + unsigned int mu, + unsigned int tmin, unsigned int tmax, ComplexField &lattice_cmplx) { - assert(0); + assert(0); } - -FermOpStaggeredTemplateInstantiate(ImprovedStaggeredFermion); - - //AdjointFermOpTemplateInstantiate(ImprovedStaggeredFermion); - //TwoIndexFermOpTemplateInstantiate(ImprovedStaggeredFermion); - -}} +NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/implementation/MobiusEOFAFermionCache.h b/Grid/qcd/action/fermion/implementation/MobiusEOFAFermionCache.h new file mode 100644 index 00000000..650435fc --- /dev/null +++ b/Grid/qcd/action/fermion/implementation/MobiusEOFAFermionCache.h @@ -0,0 +1,453 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/MobiusEOFAFermioncache.cc + +Copyright (C) 2017 + +Author: Peter Boyle +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle +Author: David Murphy + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ + /* END LEGAL */ + +#include +#include + +NAMESPACE_BEGIN(Grid); + + +template +void MobiusEOFAFermion::M5D(const FermionField &psi_i, const FermionField &phi_i, FermionField &chi_i, + Vector &lower, Vector &diag, Vector &upper) +{ + chi_i.Checkerboard() = psi_i.Checkerboard(); + GridBase *grid = psi_i.Grid(); + int Ls = this->Ls; + auto psi = psi_i.View(); + auto phi = phi_i.View(); + auto chi = chi_i.View(); + + assert(phi.Checkerboard() == psi.Checkerboard()); + + // Flops = 6.0*(Nc*Ns) *Ls*vol + this->M5Dcalls++; + this->M5Dtime -= usecond(); + + int nloop = grid->oSites()/Ls; + accelerator_for(sss,nloop,Simd::Nsimd(),{ + uint64_t ss = sss*Ls; + typedef decltype(coalescedRead(psi[0])) spinor; + spinor tmp1; + spinor tmp2; + for(int s=0; sM5Dtime += usecond(); +} + +template +void MobiusEOFAFermion::M5D_shift(const FermionField &psi_i, const FermionField &phi_i, FermionField &chi_i, + Vector &lower, Vector &diag, Vector &upper, + Vector &shift_coeffs) +{ + chi_i.Checkerboard() = psi_i.Checkerboard(); + GridBase *grid = psi_i.Grid(); + int Ls = this->Ls; + auto psi = psi_i.View(); + auto phi = phi_i.View(); + auto chi = chi_i.View(); + + auto pm = this->pm; + int shift_s = (pm == 1) ? (Ls-1) : 0; // s-component modified by shift operator + + assert(phi.Checkerboard() == psi.Checkerboard()); + + // Flops = 6.0*(Nc*Ns) *Ls*vol + this->M5Dcalls++; + this->M5Dtime -= usecond(); + + int nloop = grid->oSites()/Ls; + accelerator_for(sss,nloop,Simd::Nsimd(),{ + uint64_t ss = sss*Ls; + typedef decltype(coalescedRead(psi[0])) spinor; + spinor tmp1; + spinor tmp2; + spinor tmp; + for(int s=0; sM5Dtime += usecond(); +} + +template +void MobiusEOFAFermion::M5Ddag(const FermionField &psi_i, const FermionField &phi_i, FermionField &chi_i, + Vector &lower, Vector &diag, Vector &upper) +{ + chi_i.Checkerboard() = psi_i.Checkerboard(); + GridBase *grid = psi_i.Grid(); + int Ls = this->Ls; + auto psi = psi_i.View(); + auto phi = phi_i.View(); + auto chi = chi_i.View(); + + assert(phi.Checkerboard() == psi.Checkerboard()); + + // Flops = 6.0*(Nc*Ns) *Ls*vol + this->M5Dcalls++; + this->M5Dtime -= usecond(); + + int nloop = grid->oSites()/Ls; + accelerator_for(sss,nloop,Simd::Nsimd(), { + uint64_t ss = sss*Ls; + + typedef decltype(coalescedRead(psi[0])) spinor; + spinor tmp1, tmp2; + + for(int s=0; sM5Dtime += usecond(); +} + +template +void MobiusEOFAFermion::M5Ddag_shift(const FermionField &psi_i, const FermionField &phi_i, FermionField &chi_i, + Vector &lower, Vector &diag, Vector &upper, + Vector &shift_coeffs) +{ + chi_i.Checkerboard() = psi_i.Checkerboard(); + GridBase *grid = psi_i.Grid(); + int Ls = this->Ls; + int shift_s = (this->pm == 1) ? (Ls-1) : 0; // s-component modified by shift operator + auto psi = psi_i.View(); + auto phi = phi_i.View(); + auto chi = chi_i.View(); + + assert(phi.Checkerboard() == psi.Checkerboard()); + + // Flops = 6.0*(Nc*Ns) *Ls*vol + this->M5Dcalls++; + this->M5Dtime -= usecond(); + + auto pm = this->pm; + + int nloop = grid->oSites()/Ls; + accelerator_for(sss,nloop,Simd::Nsimd(),{ + uint64_t ss = sss*Ls; + + typedef decltype(coalescedRead(psi[0])) spinor; + spinor tmp1, tmp2, tmp; + tmp1=Zero(); + coalescedWrite(chi[ss+Ls-1],tmp1); + + for(int s=0; sM5Dtime += usecond(); +} + +template +void MobiusEOFAFermion::MooeeInv(const FermionField &psi_i, FermionField &chi_i) +{ + chi_i.Checkerboard() = psi_i.Checkerboard(); + GridBase *grid = psi_i.Grid(); + int Ls = this->Ls; + auto psi = psi_i.View(); + auto chi = chi_i.View(); + + auto plee = & this->lee [0]; + auto pdee = & this->dee [0]; + auto puee = & this->uee [0]; + auto pleem= & this->leem[0]; + auto pueem= & this->ueem[0]; + + if(this->shift != 0.0){ MooeeInv_shift(psi_i,chi_i); return; } + + this->MooeeInvCalls++; + this->MooeeInvTime -= usecond(); + + int nloop = grid->oSites()/Ls; + accelerator_for(sss,nloop,Simd::Nsimd(),{ + + uint64_t ss = sss*Ls; + + typedef decltype(coalescedRead(psi[0])) spinor; + spinor tmp; + + // Apply (L^{\prime})^{-1} + coalescedWrite(chi[ss], psi(ss)); // chi[0]=psi[0] + for(int s=1; s=0; s--){ + spProj5m(tmp, chi(ss+s+1)); + coalescedWrite(chi[ss+s], chi(ss+s) - puee[s]*tmp); + } + }); + + this->MooeeInvTime += usecond(); +} + +template +void MobiusEOFAFermion::MooeeInv_shift(const FermionField &psi_i, FermionField &chi_i) +{ + chi_i.Checkerboard() = psi_i.Checkerboard(); + GridBase *grid = psi_i.Grid(); + int Ls = this->Ls; + auto psi = psi_i.View(); + auto chi = chi_i.View(); + + auto pm = this->pm; + auto plee = & this->lee [0]; + auto pdee = & this->dee [0]; + auto puee = & this->uee [0]; + auto pleem= & this->leem[0]; + auto pueem= & this->ueem[0]; + auto pMooeeInv_shift_lc = &MooeeInv_shift_lc[0]; + auto pMooeeInv_shift_norm = &MooeeInv_shift_norm[0]; + this->MooeeInvCalls++; + this->MooeeInvTime -= usecond(); + + int nloop = grid->oSites()/Ls; + accelerator_for(sss,nloop,Simd::Nsimd(),{ + + uint64_t ss = sss*Ls; + + typedef decltype(coalescedRead(psi[0])) spinor; + spinor tmp1,tmp2,tmp2_spProj; + + // Apply (L^{\prime})^{-1} and accumulate MooeeInv_shift_lc[j]*psi[j] in tmp2 + coalescedWrite(chi[ss], psi(ss)); // chi[0]=psi[0] + tmp2 = pMooeeInv_shift_lc[0]*psi(ss); + for(int s=1; s=0; s--){ + coalescedWrite(chi[ss+s] , chi(ss+s) - puee[s]*tmp1); + spProj5m(tmp1, chi(ss+s)); + coalescedWrite(chi[ss+s], chi(ss+s) + pMooeeInv_shift_norm[s]*tmp2_spProj); + } + }); + + this->MooeeInvTime += usecond(); +} + +template +void MobiusEOFAFermion::MooeeInvDag(const FermionField &psi_i, FermionField &chi_i) +{ + if(this->shift != 0.0){ MooeeInvDag_shift(psi_i,chi_i); return; } + + chi_i.Checkerboard() = psi_i.Checkerboard(); + GridBase *grid = psi_i.Grid(); + int Ls = this->Ls; + auto psi = psi_i.View(); + auto chi = chi_i.View(); + + auto plee = & this->lee [0]; + auto pdee = & this->dee [0]; + auto puee = & this->uee [0]; + auto pleem= & this->leem[0]; + auto pueem= & this->ueem[0]; + + this->MooeeInvCalls++; + this->MooeeInvTime -= usecond(); + + int nloop = grid->oSites()/Ls; + accelerator_for(sss,nloop,Simd::Nsimd(),{ + + uint64_t ss = sss*Ls; + + typedef decltype(coalescedRead(psi[0])) spinor; + spinor tmp; + + // Apply (U^{\prime})^{-dag} + coalescedWrite(chi[ss], psi(ss)); + for(int s=1; s=0; s--){ + spProj5p(tmp, chi(ss+s+1)); + coalescedWrite(chi[ss+s], chi(ss+s) - plee[s]*tmp); + } + }); + + this->MooeeInvTime += usecond(); +} + +template +void MobiusEOFAFermion::MooeeInvDag_shift(const FermionField &psi_i, FermionField &chi_i) +{ + chi_i.Checkerboard() = psi_i.Checkerboard(); + GridBase *grid = psi_i.Grid(); + auto psi = psi_i.View(); + auto chi = chi_i.View(); + int Ls = this->Ls; + + auto pm = this->pm; + auto plee = & this->lee [0]; + auto pdee = & this->dee [0]; + auto puee = & this->uee [0]; + auto pleem= & this->leem[0]; + auto pueem= & this->ueem[0]; + auto pMooeeInvDag_shift_lc = &MooeeInvDag_shift_lc[0]; + auto pMooeeInvDag_shift_norm = &MooeeInvDag_shift_norm[0]; + + this->MooeeInvCalls++; + this->MooeeInvTime -= usecond(); + + int nloop = grid->oSites()/Ls; + accelerator_for(sss,nloop,Simd::Nsimd(),{ + + uint64_t ss = sss*Ls; + + typedef decltype(coalescedRead(psi[0])) spinor; + spinor tmp1,tmp2,tmp2_spProj; + + // Apply (U^{\prime})^{-dag} and accumulate MooeeInvDag_shift_lc[j]*psi[j] in tmp2 + coalescedWrite(chi[ss], psi(ss)); + tmp2 = pMooeeInvDag_shift_lc[0]*psi(ss); + for(int s=1; s=0; s--){ + coalescedWrite(chi[ss+s], chi(ss+s) - plee[s]*tmp1); + spProj5p(tmp1, chi(ss+s)); + coalescedWrite(chi[ss+s], chi(ss+s) + pMooeeInvDag_shift_norm[s]*tmp2_spProj); + } + }); + + this->MooeeInvTime += usecond(); +} + +NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/implementation/MobiusEOFAFermionImplementation.h b/Grid/qcd/action/fermion/implementation/MobiusEOFAFermionImplementation.h new file mode 100644 index 00000000..256423e6 --- /dev/null +++ b/Grid/qcd/action/fermion/implementation/MobiusEOFAFermionImplementation.h @@ -0,0 +1,407 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/MobiusEOFAFermion.cc + +Copyright (C) 2017 + +Author: Peter Boyle +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle +Author: David Murphy + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ + /* END LEGAL */ + +#pragma once + +#include +#include + +NAMESPACE_BEGIN(Grid); + +template +MobiusEOFAFermion::MobiusEOFAFermion( + GaugeField &_Umu, + GridCartesian &FiveDimGrid, + GridRedBlackCartesian &FiveDimRedBlackGrid, + GridCartesian &FourDimGrid, + GridRedBlackCartesian &FourDimRedBlackGrid, + RealD _mq1, RealD _mq2, RealD _mq3, + RealD _shift, int _pm, RealD _M5, + RealD _b, RealD _c, const ImplParams &p) : + AbstractEOFAFermion(_Umu, FiveDimGrid, FiveDimRedBlackGrid, + FourDimGrid, FourDimRedBlackGrid, _mq1, _mq2, _mq3, + _shift, _pm, _M5, _b, _c, p) +{ + int Ls = this->Ls; + + RealD eps = 1.0; + Approx::zolotarev_data *zdata = Approx::higham(eps, this->Ls); + assert(zdata->n == this->Ls); + + std::cout << GridLogMessage << "MobiusEOFAFermion (b=" << _b << + ",c=" << _c << ") with Ls=" << Ls << std::endl; + this->SetCoefficientsTanh(zdata, _b, _c); + std::cout << GridLogMessage << "EOFA parameters: (mq1=" << _mq1 << + ",mq2=" << _mq2 << ",mq3=" << _mq3 << ",shift=" << _shift << + ",pm=" << _pm << ")" << std::endl; + + Approx::zolotarev_free(zdata); + + if(_shift != 0.0){ + SetCoefficientsPrecondShiftOps(); + } else { + Mooee_shift.resize(Ls, 0.0); + MooeeInv_shift_lc.resize(Ls, 0.0); + MooeeInv_shift_norm.resize(Ls, 0.0); + MooeeInvDag_shift_lc.resize(Ls, 0.0); + MooeeInvDag_shift_norm.resize(Ls, 0.0); + } +} + +/**************************************************************** + * Additional EOFA operators only called outside the inverter. + * Since speed is not essential, simple axpby-style + * implementations should be fine. + ***************************************************************/ +template +void MobiusEOFAFermion::Omega(const FermionField& psi, FermionField& Din, int sign, int dag) +{ + int Ls = this->Ls; + RealD alpha = this->alpha; + + Din = Zero(); + if((sign == 1) && (dag == 0)) { // \Omega_{+} + for(int s=0; s +void MobiusEOFAFermion::Dtilde(const FermionField& psi, FermionField& chi) +{ + int Ls = this->Ls; + RealD b = 0.5 * ( 1.0 + this->alpha ); + RealD c = 0.5 * ( 1.0 - this->alpha ); + RealD mq1 = this->mq1; + + for(int s=0; s +void MobiusEOFAFermion::DtildeInv(const FermionField& psi, FermionField& chi) +{ + int Ls = this->Ls; + RealD m = this->mq1; + RealD c = 0.5 * this->alpha; + RealD d = 0.5; + + RealD DtInv_p(0.0), DtInv_m(0.0); + RealD N = std::pow(c+d,Ls) + m*std::pow(c-d,Ls); + FermionField tmp(this->FermionGrid()); + + for(int s=0; s sp) ? 0.0 : std::pow(-1.0,sp-s) * std::pow(c-d,sp-s) / std::pow(c+d,sp-s+1); + + if(sp == 0){ + axpby_ssp_pplus (tmp, 0.0, tmp, DtInv_p, psi, s, sp); + axpby_ssp_pminus(tmp, 0.0, tmp, DtInv_m, psi, s, sp); + } else { + axpby_ssp_pplus (tmp, 1.0, tmp, DtInv_p, psi, s, sp); + axpby_ssp_pminus(tmp, 1.0, tmp, DtInv_m, psi, s, sp); + } + + }} +} + +/*****************************************************************************************************/ + +template +RealD MobiusEOFAFermion::M(const FermionField& psi, FermionField& chi) +{ + FermionField Din(psi.Grid()); + + this->Meooe5D(psi, Din); + this->DW(Din, chi, DaggerNo); + axpby(chi, 1.0, 1.0, chi, psi); + this->M5D(psi, chi); + return(norm2(chi)); +} + +template +RealD MobiusEOFAFermion::Mdag(const FermionField& psi, FermionField& chi) +{ + FermionField Din(psi.Grid()); + + this->DW(psi, Din, DaggerYes); + this->MeooeDag5D(Din, chi); + this->M5Ddag(psi, chi); + axpby(chi, 1.0, 1.0, chi, psi); + return(norm2(chi)); +} + +/******************************************************************** + * Performance critical fermion operators called inside the inverter + ********************************************************************/ + +template +void MobiusEOFAFermion::M5D(const FermionField& psi, FermionField& chi) +{ + int Ls = this->Ls; + + Vector diag(Ls,1.0); + Vector upper(Ls,-1.0); upper[Ls-1] = this->mq1; + Vector lower(Ls,-1.0); lower[0] = this->mq1; + + // no shift term + if(this->shift == 0.0){ this->M5D(psi, chi, chi, lower, diag, upper); } + + // fused M + shift operation + else{ this->M5D_shift(psi, chi, chi, lower, diag, upper, Mooee_shift); } +} + +template +void MobiusEOFAFermion::M5Ddag(const FermionField& psi, FermionField& chi) +{ + int Ls = this->Ls; + + Vector diag(Ls,1.0); + Vector upper(Ls,-1.0); upper[Ls-1] = this->mq1; + Vector lower(Ls,-1.0); lower[0] = this->mq1; + + // no shift term + if(this->shift == 0.0){ this->M5Ddag(psi, chi, chi, lower, diag, upper); } + + // fused M + shift operation + else{ this->M5Ddag_shift(psi, chi, chi, lower, diag, upper, Mooee_shift); } +} + +// half checkerboard operations +template +void MobiusEOFAFermion::Mooee(const FermionField& psi, FermionField& chi) +{ + int Ls = this->Ls; + + // coefficients of Mooee + Vector diag = this->bee; + Vector upper(Ls); + Vector lower(Ls); + for(int s=0; scee[s]; + lower[s] = -this->cee[s]; + } + upper[Ls-1] *= -this->mq1; + lower[0] *= -this->mq1; + + // no shift term + if(this->shift == 0.0){ this->M5D(psi, psi, chi, lower, diag, upper); } + + // fused M + shift operation + else { this->M5D_shift(psi, psi, chi, lower, diag, upper, Mooee_shift); } +} + +template +void MobiusEOFAFermion::MooeeDag(const FermionField& psi, FermionField& chi) +{ + int Ls = this->Ls; + + // coefficients of MooeeDag + Vector diag = this->bee; + Vector upper(Ls); + Vector lower(Ls); + for(int s=0; scee[s+1]; + lower[s] = this->mq1*this->cee[Ls-1]; + } else if(s==(Ls-1)) { + upper[s] = this->mq1*this->cee[0]; + lower[s] = -this->cee[s-1]; + } else { + upper[s] = -this->cee[s+1]; + lower[s] = -this->cee[s-1]; + } + } + + // no shift term + if(this->shift == 0.0){ this->M5Ddag(psi, psi, chi, lower, diag, upper); } + + // fused M + shift operation + else{ this->M5Ddag_shift(psi, psi, chi, lower, diag, upper, Mooee_shift); } +} + +/****************************************************************************************/ + +// Computes coefficients for applying Cayley preconditioned shift operators +// (Mooee + \Delta) --> Mooee_shift +// (Mooee + \Delta)^{-1} --> MooeeInv_shift_lc, MooeeInv_shift_norm +// (Mooee + \Delta)^{-dag} --> MooeeInvDag_shift_lc, MooeeInvDag_shift_norm +// For the latter two cases, the operation takes the form +// [ (Mooee + \Delta)^{-1} \psi ]_{i} = Mooee_{ij} \psi_{j} + +// ( MooeeInv_shift_norm )_{i} ( \sum_{j} [ MooeeInv_shift_lc ]_{j} P_{pm} \psi_{j} ) +template +void MobiusEOFAFermion::SetCoefficientsPrecondShiftOps() +{ + int Ls = this->Ls; + int pm = this->pm; + RealD alpha = this->alpha; + RealD k = this->k; + RealD mq1 = this->mq1; + RealD shift = this->shift; + + // Initialize + Mooee_shift.resize(Ls); + MooeeInv_shift_lc.resize(Ls); + MooeeInv_shift_norm.resize(Ls); + MooeeInvDag_shift_lc.resize(Ls); + MooeeInvDag_shift_norm.resize(Ls); + + // Construct Mooee_shift + int idx(0); + Coeff_t N = ( (pm == 1) ? 1.0 : -1.0 ) * (2.0*shift*k) * + ( std::pow(alpha+1.0,Ls) + mq1*std::pow(alpha-1.0,Ls) ); + for(int s=0; s d = Mooee_shift; + Vector u(Ls,0.0); + Vector y(Ls,0.0); + Vector q(Ls,0.0); + if(pm == 1){ u[0] = 1.0; } + else{ u[Ls-1] = 1.0; } + + // Tridiagonal matrix algorithm + Sherman-Morrison formula + // + // We solve + // ( Mooee' + u \otimes v ) MooeeInvDag_shift_lc = Mooee_shift + // where Mooee' is the tridiagonal part of Mooee_{+}, and + // u = (1,0,...,0) and v = (0,...,0,mq1*cee[0]) are chosen + // so that the outer-product u \otimes v gives the (0,Ls-1) + // entry of Mooee_{+}. + // + // We do this as two solves: Mooee'*y = d and Mooee'*q = u, + // and then construct the solution to the original system + // MooeeInvDag_shift_lc = y - / ( 1 + ) q + if(pm == 1){ + for(int s=1; scee[s] / this->bee[s-1]; + d[s] -= m*d[s-1]; + u[s] -= m*u[s-1]; + } + } + y[Ls-1] = d[Ls-1] / this->bee[Ls-1]; + q[Ls-1] = u[Ls-1] / this->bee[Ls-1]; + for(int s=Ls-2; s>=0; --s){ + if(pm == 1){ + y[s] = d[s] / this->bee[s]; + q[s] = u[s] / this->bee[s]; + } else { + y[s] = ( d[s] + this->cee[s]*y[s+1] ) / this->bee[s]; + q[s] = ( u[s] + this->cee[s]*q[s+1] ) / this->bee[s]; + } + } + + // Construct MooeeInvDag_shift_lc + for(int s=0; scee[0]*y[Ls-1] / + (1.0+mq1*this->cee[0]*q[Ls-1]) * q[s]; + } else { + MooeeInvDag_shift_lc[s] = y[s] - mq1*this->cee[Ls-1]*y[0] / + (1.0+mq1*this->cee[Ls-1]*q[0]) * q[s]; + } + } + + // Compute remaining coefficients + N = (pm == 1) ? (1.0 + MooeeInvDag_shift_lc[Ls-1]) : (1.0 + MooeeInvDag_shift_lc[0]); + for(int s=0; sbee[s],s) * pow(this->cee[s],Ls-1-s); } + else { MooeeInv_shift_lc[s] = pow(this->bee[s],Ls-1-s) * pow(this->cee[s],s); } + + // MooeeInv_shift_norm + MooeeInv_shift_norm[s] = -MooeeInvDag_shift_lc[s] / + ( pow(this->bee[s],Ls) + mq1*pow(this->cee[s],Ls) ) / N; + + // MooeeInvDag_shift_norm + if(pm == 1){ MooeeInvDag_shift_norm[s] = -pow(this->bee[s],s) * pow(this->cee[s],(Ls-1-s)) / + ( pow(this->bee[s],Ls) + mq1*pow(this->cee[s],Ls) ) / N; } + else{ MooeeInvDag_shift_norm[s] = -pow(this->bee[s],(Ls-1-s)) * pow(this->cee[s],s) / + ( pow(this->bee[s],Ls) + mq1*pow(this->cee[s],Ls) ) / N; } + } + } +} + +// Recompute coefficients for a different value of shift constant +template +void MobiusEOFAFermion::RefreshShiftCoefficients(RealD new_shift) +{ + this->shift = new_shift; + if(new_shift != 0.0){ + SetCoefficientsPrecondShiftOps(); + } else { + int Ls = this->Ls; + Mooee_shift.resize(Ls,0.0); + MooeeInv_shift_lc.resize(Ls,0.0); + MooeeInv_shift_norm.resize(Ls,0.0); + MooeeInvDag_shift_lc.resize(Ls,0.0); + MooeeInvDag_shift_norm.resize(Ls,0.0); + } +} + + +NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/implementation/PartialFractionFermion5DImplementation.h b/Grid/qcd/action/fermion/implementation/PartialFractionFermion5DImplementation.h new file mode 100644 index 00000000..9f8f91ad --- /dev/null +++ b/Grid/qcd/action/fermion/implementation/PartialFractionFermion5DImplementation.h @@ -0,0 +1,450 @@ +/************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/qcd/action/fermion/PartialFractionFermion5D.cc + + Copyright (C) 2015 + +Author: Peter Boyle +Author: Peter Boyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ +#include +#include + +NAMESPACE_BEGIN(Grid); + +template +void PartialFractionFermion5D::Mdir (const FermionField &psi, FermionField &chi,int dir,int disp){ + // this does both dag and undag but is trivial; make a common helper routing + int Ls = this->Ls; + + this->DhopDir(psi,chi,dir,disp); + + int nblock=(Ls-1)/2; + for(int b=0;b +void PartialFractionFermion5D::Meooe_internal(const FermionField &psi, FermionField &chi,int dag) +{ + int Ls = this->Ls; + if ( psi.Checkerboard() == Odd ) { + this->DhopEO(psi,chi,DaggerNo); + } else { + this->DhopOE(psi,chi,DaggerNo); + } + + int nblock=(Ls-1)/2; + for(int b=0;b +void PartialFractionFermion5D::Mooee_internal(const FermionField &psi, FermionField &chi,int dag) +{ + // again dag and undag are trivially related + int sign = dag ? (-1) : 1; + int Ls = this->Ls; + + int nblock=(Ls-1)/2; + for(int b=0;b +void PartialFractionFermion5D::MooeeInv_internal(const FermionField &psi, FermionField &chi,int dag) +{ + int sign = dag ? (-1) : 1; + int Ls = this->Ls; + + FermionField tmp(psi.Grid()); + + /////////////////////////////////////////////////////////////////////////////////////// + //Linv + /////////////////////////////////////////////////////////////////////////////////////// + int nblock=(Ls-1)/2; + + axpy(chi,0.0,psi,psi); // Identity piece + + for(int b=0;b +void PartialFractionFermion5D::M_internal(const FermionField &psi, FermionField &chi,int dag) +{ + FermionField D(psi.Grid()); + + int Ls = this->Ls; + int sign = dag ? (-1) : 1; + + // For partial frac Hw case (b5=c5=1) chroma quirkily computes + // + // Conventions for partfrac appear to be a mess. + // Tony's Nara lectures have + // + // BlockDiag( H/p_i 1 | 1 ) + // ( 1 p_i H / q_i^2 | 0 ) + // --------------------------------- + // ( -1 0 | R +p0 H ) + // + //Chroma ( -2H 2sqrt(q_i) | 0 ) + // (2 sqrt(q_i) 2H | 2 sqrt(p_i) ) + // --------------------------------- + // ( 0 -2 sqrt(p_i) | 2 R gamma_5 + p0 2H + // + // Edwards/Joo/Kennedy/Wenger + // + // Here, the "beta's" selected by chroma to scale the unphysical bulk constraint fields + // incorporate the approx scale factor. This is obtained by propagating the + // scale on "H" out to the off diagonal elements as follows: + // + // BlockDiag( H/p_i 1 | 1 ) + // ( 1 p_i H / q_i^2 | 0 ) + // --------------------------------- + // ( -1 0 | R + p_0 H ) + // + // becomes: + // BlockDiag( H/ sp_i 1 | 1 ) + // ( 1 sp_i H / s^2q_i^2 | 0 ) + // --------------------------------- + // ( -1 0 | R + p_0/s H ) + // + // + // This is implemented in Chroma by + // p0' = p0/approxMax + // p_i' = p_i*approxMax + // q_i' = q_i*approxMax*approxMax + // + // After the equivalence transform is applied the matrix becomes + // + //Chroma ( -2H sqrt(q'_i) | 0 ) + // (sqrt(q'_i) 2H | sqrt(p'_i) ) + // --------------------------------- + // ( 0 -sqrt(p'_i) | 2 R gamma_5 + p'0 2H + // + // = ( -2H sqrt(q_i)amax | 0 ) + // (sqrt(q_i)amax 2H | sqrt(p_i*amax) ) + // --------------------------------- + // ( 0 -sqrt(p_i)*amax | 2 R gamma_5 + p0/amax 2H + // + + this->DW(psi,D,DaggerNo); + + int nblock=(Ls-1)/2; + for(int b=0;bmass)/(1-this->mass); + //R g5 psi[Ls] + p[0] H + ag5xpbg5y_ssp(chi,R*scale,psi,p[nblock]*scale/amax,D,Ls-1,Ls-1); + + for(int b=0;b +RealD PartialFractionFermion5D::M (const FermionField &in, FermionField &out) +{ + M_internal(in,out,DaggerNo); + return norm2(out); +} +template +RealD PartialFractionFermion5D::Mdag (const FermionField &in, FermionField &out) +{ + M_internal(in,out,DaggerYes); + return norm2(out); +} + +template +void PartialFractionFermion5D::Meooe (const FermionField &in, FermionField &out) +{ + Meooe_internal(in,out,DaggerNo); +} +template +void PartialFractionFermion5D::MeooeDag (const FermionField &in, FermionField &out) +{ + Meooe_internal(in,out,DaggerYes); +} +template +void PartialFractionFermion5D::Mooee (const FermionField &in, FermionField &out) +{ + Mooee_internal(in,out,DaggerNo); +} +template +void PartialFractionFermion5D::MooeeDag (const FermionField &in, FermionField &out) +{ + Mooee_internal(in,out,DaggerYes); +} + +template +void PartialFractionFermion5D::MooeeInv (const FermionField &in, FermionField &out) +{ + MooeeInv_internal(in,out,DaggerNo); +} +template +void PartialFractionFermion5D::MooeeInvDag (const FermionField &in, FermionField &out) +{ + MooeeInv_internal(in,out,DaggerYes); +} + + +// force terms; five routines; default to Dhop on diagonal +template +void PartialFractionFermion5D::MDeriv (GaugeField &mat,const FermionField &U,const FermionField &V,int dag) +{ + int Ls = this->Ls; + + FermionField D(V.Grid()); + + int nblock=(Ls-1)/2; + for(int b=0;bDhopDeriv(mat,D,V,DaggerNo); +}; +template +void PartialFractionFermion5D::MoeDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag) +{ + int Ls = this->Ls; + + FermionField D(V.Grid()); + + int nblock=(Ls-1)/2; + for(int b=0;bDhopDerivOE(mat,D,V,DaggerNo); +}; +template +void PartialFractionFermion5D::MeoDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag) +{ + int Ls = this->Ls; + + FermionField D(V.Grid()); + + int nblock=(Ls-1)/2; + for(int b=0;bDhopDerivEO(mat,D,V,DaggerNo); +}; + +template +void PartialFractionFermion5D::SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD scale){ + SetCoefficientsZolotarev(1.0/scale,zdata); +} +template +void PartialFractionFermion5D::SetCoefficientsZolotarev(RealD zolo_hi,Approx::zolotarev_data *zdata){ + + // check on degree matching + // std::cout<n << " - n"<da << " -da "<db << " -db"<dn << " -dn"<dd << " -dd"<Ls; + + assert(Ls == (2*zdata->da -1) ); + + // Part frac + // RealD R; + R=(1+mass)/(1-mass); + dw_diag = (4.0-this->M5); + + // std::vector p; + // std::vector q; + p.resize(zdata->da); + q.resize(zdata->dd); + + for(int n=0;nda;n++){ + p[n] = zdata -> alpha[n]; + } + for(int n=0;ndd;n++){ + q[n] = -zdata -> ap[n]; + } + + scale= part_frac_chroma_convention ? 2.0 : 1.0; // Chroma conventions annoy me + + amax=zolo_hi; +} + + template + void PartialFractionFermion5D::ExportPhysicalFermionSolution(const FermionField &solution5d,FermionField &exported4d) + { + int Ls = this->Ls; + conformable(solution5d.Grid(),this->FermionGrid()); + conformable(exported4d.Grid(),this->GaugeGrid()); + ExtractSlice(exported4d, solution5d, Ls-1, Ls-1); + } + template + void PartialFractionFermion5D::ImportPhysicalFermionSource(const FermionField &input4d,FermionField &imported5d) + { + int Ls = this->Ls; + conformable(imported5d.Grid(),this->FermionGrid()); + conformable(input4d.Grid() ,this->GaugeGrid()); + FermionField tmp(this->FermionGrid()); + tmp=Zero(); + InsertSlice(input4d, tmp, Ls-1, Ls-1); + tmp=Gamma(Gamma::Algebra::Gamma5)*tmp; + this->Dminus(tmp,imported5d); + } + +// Constructors +template +PartialFractionFermion5D::PartialFractionFermion5D(GaugeField &_Umu, + GridCartesian &FiveDimGrid, + GridRedBlackCartesian &FiveDimRedBlackGrid, + GridCartesian &FourDimGrid, + GridRedBlackCartesian &FourDimRedBlackGrid, + RealD _mass,RealD M5, + const ImplParams &p) : + WilsonFermion5D(_Umu, + FiveDimGrid, FiveDimRedBlackGrid, + FourDimGrid, FourDimRedBlackGrid,M5,p), + mass(_mass) + +{ + int Ls = this->Ls; + + assert((Ls&0x1)==1); // Odd Ls required + int nrational=Ls-1; + + + Approx::zolotarev_data *zdata = Approx::higham(1.0,nrational); + + // NB: chroma uses a cast to "float" for the zolotarev range(!?). + // this creates a real difference in the operator which I do not like but we can replicate here + // to demonstrate compatibility + // RealD eps = (zolo_lo / zolo_hi); + // zdata = bfm_zolotarev(eps,nrational,0); + + SetCoefficientsTanh(zdata,1.0); + + Approx::zolotarev_free(zdata); + +} + +NAMESPACE_END(Grid); + diff --git a/Grid/qcd/action/fermion/StaggeredKernelsAsm.cc b/Grid/qcd/action/fermion/implementation/StaggeredKernelsAsm.h similarity index 93% rename from Grid/qcd/action/fermion/StaggeredKernelsAsm.cc rename to Grid/qcd/action/fermion/implementation/StaggeredKernelsAsm.h index 9711c487..1a13e73a 100644 --- a/Grid/qcd/action/fermion/StaggeredKernelsAsm.cc +++ b/Grid/qcd/action/fermion/implementation/StaggeredKernelsAsm.h @@ -26,6 +26,8 @@ Author: paboyle See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ /* END LEGAL */ +#pragma once + #include #ifdef AVX512 @@ -586,11 +588,11 @@ Author: paboyle VADD(UChi_00,UChi_10,UChi_00) \ VADD(UChi_01,UChi_11,UChi_01) \ VADD(UChi_02,UChi_12,UChi_02) ); \ - asm ( \ - VSTORE(0,%0,pUChi_00) \ - VSTORE(1,%0,pUChi_01) \ - VSTORE(2,%0,pUChi_02) \ - : : "r" (out) : "memory" ); + asm ( \ + VSTORE(0,%0,pUChi_00) \ + VSTORE(1,%0,pUChi_01) \ + VSTORE(2,%0,pUChi_02) \ + : : "r" (out) : "memory" ); // FIXME is sign right in the VSUB ? #define nREDUCEa(out) \ @@ -613,20 +615,20 @@ Author: paboyle permute##dir(Chi_1,Chi_1);\ permute##dir(Chi_2,Chi_2); -namespace Grid { -namespace QCD { +NAMESPACE_BEGIN(Grid); template void StaggeredKernels::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo, - DoubledGaugeField &U, DoubledGaugeField &UUU, - SiteSpinor *buf, int LLs, int sU, - const FermionField &in, FermionField &out,int dag) + DoubledGaugeFieldView &U, + DoubledGaugeFieldView &UUU, + SiteSpinor *buf, int LLs, + int sU, const FermionFieldView &in, FermionFieldView &out,int dag) { assert(0); }; -//#define CONDITIONAL_MOVE(l,o,out) if ( l ) { out = (uint64_t) &in._odata[o] ; } else { out =(uint64_t) &buf[o]; } +//#define CONDITIONAL_MOVE(l,o,out) if ( l ) { out = (uint64_t) &in[o] ; } else { out =(uint64_t) &buf[o]; } #define CONDITIONAL_MOVE(l,o,out) { const SiteSpinor *ptr = l? in_p : buf; out = (uint64_t) &ptr[o]; } @@ -673,22 +675,23 @@ void StaggeredKernels::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo, CONDITIONAL_MOVE(l3,o3,addr3); \ PF_CHI(addr3); \ \ - gauge0 =(uint64_t)&UU._odata[sU]( X ); \ - gauge1 =(uint64_t)&UU._odata[sU]( Y ); \ - gauge2 =(uint64_t)&UU._odata[sU]( Z ); \ - gauge3 =(uint64_t)&UU._odata[sU]( T ); + gauge0 =(uint64_t)&UU[sU]( X ); \ + gauge1 =(uint64_t)&UU[sU]( Y ); \ + gauge2 =(uint64_t)&UU[sU]( Z ); \ + gauge3 =(uint64_t)&UU[sU]( T ); // This is the single precision 5th direction vectorised kernel #include template <> void StaggeredKernels::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo, - DoubledGaugeField &U, DoubledGaugeField &UUU, - SiteSpinor *buf, int LLs, int sU, - const FermionField &in, FermionField &out,int dag) + DoubledGaugeFieldView &U, + DoubledGaugeFieldView &UUU, + SiteSpinor *buf, int LLs, + int sU, const FermionFieldView &in, FermionFieldView &out,int dag) { #ifdef AVX512 uint64_t gauge0,gauge1,gauge2,gauge3; uint64_t addr0,addr1,addr2,addr3; - const SiteSpinor *in_p; in_p = &in._odata[0]; + const SiteSpinor *in_p; in_p = &in[0]; int o0,o1,o2,o3; // offsets int l0,l1,l2,l3; // local @@ -719,7 +722,7 @@ template <> void StaggeredKernels::DhopSiteAsm(StencilImpl LOAD_CHI(addr0,addr1,addr2,addr3); MULT_ADD_LS(gauge0,gauge1,gauge2,gauge3); - addr0 = (uint64_t) &out._odata[sF]; + addr0 = (uint64_t) &out[sF]; if ( dag ) { nREDUCE(addr0); } else { @@ -734,14 +737,15 @@ template <> void StaggeredKernels::DhopSiteAsm(StencilImpl #include template <> void StaggeredKernels::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo, - DoubledGaugeField &U, DoubledGaugeField &UUU, - SiteSpinor *buf, int LLs, int sU, - const FermionField &in, FermionField &out,int dag) + DoubledGaugeFieldView &U, + DoubledGaugeFieldView &UUU, + SiteSpinor *buf, int LLs, + int sU, const FermionFieldView &in, FermionFieldView &out, int dag) { #ifdef AVX512 uint64_t gauge0,gauge1,gauge2,gauge3; uint64_t addr0,addr1,addr2,addr3; - const SiteSpinor *in_p; in_p = &in._odata[0]; + const SiteSpinor *in_p; in_p = &in[0]; int o0,o1,o2,o3; // offsets int l0,l1,l2,l3; // local @@ -771,7 +775,7 @@ template <> void StaggeredKernels::DhopSiteAsm(StencilImpl LOAD_CHI(addr0,addr1,addr2,addr3); MULT_ADD_LS(gauge0,gauge1,gauge2,gauge3); - addr0 = (uint64_t) &out._odata[sF]; + addr0 = (uint64_t) &out[sF]; if ( dag ) { nREDUCE(addr0); } else { @@ -818,14 +822,15 @@ template <> void StaggeredKernels::DhopSiteAsm(StencilImpl #include template <> void StaggeredKernels::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo, - DoubledGaugeField &U, DoubledGaugeField &UUU, - SiteSpinor *buf, int LLs, int sU, - const FermionField &in, FermionField &out,int dag) + DoubledGaugeFieldView &U, + DoubledGaugeFieldView &UUU, + SiteSpinor *buf, int LLs, + int sU, const FermionFieldView &in, FermionFieldView &out,int dag) { #ifdef AVX512 uint64_t gauge0,gauge1,gauge2,gauge3; uint64_t addr0,addr1,addr2,addr3; - const SiteSpinor *in_p; in_p = &in._odata[0]; + const SiteSpinor *in_p; in_p = &in[0]; int o0,o1,o2,o3; // offsets int l0,l1,l2,l3; // local @@ -872,7 +877,7 @@ template <> void StaggeredKernels::DhopSiteAsm(StencilImpl &st, PERMUTE23; MULT_ADD_XYZT(gauge2,gauge3); - addr0 = (uint64_t) &out._odata[sF]; + addr0 = (uint64_t) &out[sF]; if ( dag ) { nREDUCEa(addr0); } else { @@ -886,14 +891,15 @@ template <> void StaggeredKernels::DhopSiteAsm(StencilImpl &st, #include template <> void StaggeredKernels::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo, - DoubledGaugeField &U, DoubledGaugeField &UUU, - SiteSpinor *buf, int LLs, int sU, - const FermionField &in, FermionField &out,int dag) + DoubledGaugeFieldView &U, + DoubledGaugeFieldView &UUU, + SiteSpinor *buf, int LLs, + int sU, const FermionFieldView &in, FermionFieldView &out,int dag) { #ifdef AVX512 uint64_t gauge0,gauge1,gauge2,gauge3; uint64_t addr0,addr1,addr2,addr3; - const SiteSpinor *in_p; in_p = &in._odata[0]; + const SiteSpinor *in_p; in_p = &in[0]; int o0,o1,o2,o3; // offsets int l0,l1,l2,l3; // local @@ -940,7 +946,7 @@ template <> void StaggeredKernels::DhopSiteAsm(StencilImpl &st, PERMUTE23; MULT_ADD_XYZT(gauge2,gauge3); - addr0 = (uint64_t) &out._odata[sF]; + addr0 = (uint64_t) &out[sF]; if ( dag ) { nREDUCEa(addr0); } else { @@ -952,17 +958,5 @@ template <> void StaggeredKernels::DhopSiteAsm(StencilImpl &st, #endif } -#define KERNEL_INSTANTIATE(CLASS,FUNC,IMPL) \ - template void CLASS::FUNC(StencilImpl &st, LebesgueOrder &lo, \ - DoubledGaugeField &U, \ - DoubledGaugeField &UUU, \ - SiteSpinor *buf, int LLs, \ - int sU, const FermionField &in, FermionField &out,int dag); - -KERNEL_INSTANTIATE(StaggeredKernels,DhopSiteAsm,StaggeredImplD); -KERNEL_INSTANTIATE(StaggeredKernels,DhopSiteAsm,StaggeredImplF); -KERNEL_INSTANTIATE(StaggeredKernels,DhopSiteAsm,StaggeredVec5dImplD); -KERNEL_INSTANTIATE(StaggeredKernels,DhopSiteAsm,StaggeredVec5dImplF); - -}} +NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/StaggeredKernelsHand.cc b/Grid/qcd/action/fermion/implementation/StaggeredKernelsHand.h similarity index 86% rename from Grid/qcd/action/fermion/StaggeredKernelsHand.cc rename to Grid/qcd/action/fermion/implementation/StaggeredKernelsHand.h index f304b00f..62370d75 100644 --- a/Grid/qcd/action/fermion/StaggeredKernelsHand.cc +++ b/Grid/qcd/action/fermion/implementation/StaggeredKernelsHand.h @@ -28,6 +28,9 @@ Author: paboyle /* END LEGAL */ #include +#pragma once + +NAMESPACE_BEGIN(Grid); #define LOAD_CHI(b) \ const SiteSpinor & ref (b[offset]); \ @@ -38,7 +41,7 @@ Author: paboyle // To splat or not to splat depends on the implementation #define MULT(A,UChi) \ - auto & ref(U._odata[sU](A)); \ + auto & ref(U[sU](A)); \ Impl::loadLinkElement(U_00,ref()(0,0)); \ Impl::loadLinkElement(U_10,ref()(1,0)); \ Impl::loadLinkElement(U_20,ref()(2,0)); \ @@ -59,7 +62,7 @@ Author: paboyle UChi ## _2 += U_22*Chi_2; #define MULT_ADD(U,A,UChi) \ - auto & ref(U._odata[sU](A)); \ + auto & ref(U[sU](A)); \ Impl::loadLinkElement(U_00,ref()(0,0)); \ Impl::loadLinkElement(U_10,ref()(1,0)); \ Impl::loadLinkElement(U_20,ref()(2,0)); \ @@ -92,7 +95,7 @@ Author: paboyle local = SE->_is_local; \ perm = SE->_permute; \ if ( local ) { \ - LOAD_CHI(in._odata); \ + LOAD_CHI(in); \ if ( perm) { \ PERMUTE_DIR(Perm); \ } \ @@ -120,14 +123,14 @@ Author: paboyle local = SE->_is_local; \ perm = SE->_permute; \ if ( local ) { \ - LOAD_CHI(in._odata); \ + LOAD_CHI(in); \ if ( perm) { \ PERMUTE_DIR(Perm); \ } \ } else if ( st.same_node[Dir] ) { \ LOAD_CHI(buf); \ } \ - if (SE->_is_local || st.same_node[Dir] ) { \ + if (local || st.same_node[Dir] ) { \ MULT_ADD(U,Dir,even); \ } @@ -135,22 +138,18 @@ Author: paboyle SE=st.GetEntry(ptype,Dir+skew,sF); \ offset = SE->_offset; \ local = SE->_is_local; \ - perm = SE->_permute; \ - if ((!SE->_is_local) && (!st.same_node[Dir]) ) { \ + if ((!local) && (!st.same_node[Dir]) ) { \ nmu++; \ { LOAD_CHI(buf); } \ { MULT_ADD(U,Dir,even); } \ } -namespace Grid { -namespace QCD { - template void StaggeredKernels::DhopSiteHand(StencilImpl &st, LebesgueOrder &lo, - DoubledGaugeField &U,DoubledGaugeField &UUU, + DoubledGaugeFieldView &U,DoubledGaugeFieldView &UUU, SiteSpinor *buf, int LLs, int sU, - const FermionField &in, FermionField &out,int dag) + const FermionFieldView &in, FermionFieldView &out,int dag) { typedef typename Simd::scalar_type S; typedef typename Simd::vector_type V; @@ -213,16 +212,16 @@ void StaggeredKernels::DhopSiteHand(StencilImpl &st, LebesgueOrder &lo, result()()(1) = even_1 + odd_1; result()()(2) = even_2 + odd_2; } - vstream(out._odata[sF],result); + vstream(out[sF],result); } } template void StaggeredKernels::DhopSiteHandInt(StencilImpl &st, LebesgueOrder &lo, - DoubledGaugeField &U, DoubledGaugeField &UUU, + DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, SiteSpinor *buf, int LLs, int sU, - const FermionField &in, FermionField &out,int dag) + const FermionFieldView &in, FermionFieldView &out,int dag) { typedef typename Simd::scalar_type S; typedef typename Simd::vector_type V; @@ -249,7 +248,7 @@ void StaggeredKernels::DhopSiteHandInt(StencilImpl &st, LebesgueOrder &lo, Simd U_22; SiteSpinor result; - int offset,local,perm, ptype; + int offset, ptype, local, perm; StencilEntry *SE; int skew; @@ -257,8 +256,8 @@ void StaggeredKernels::DhopSiteHandInt(StencilImpl &st, LebesgueOrder &lo, for(int s=0;s::DhopSiteHandInt(StencilImpl &st, LebesgueOrder &lo, result()()(1) = even_1 + odd_1; result()()(2) = even_2 + odd_2; } - vstream(out._odata[sF],result); + vstream(out[sF],result); } } template void StaggeredKernels::DhopSiteHandExt(StencilImpl &st, LebesgueOrder &lo, - DoubledGaugeField &U, DoubledGaugeField &UUU, + DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, SiteSpinor *buf, int LLs, int sU, - const FermionField &in, FermionField &out,int dag) + const FermionFieldView &in, FermionFieldView &out,int dag) { typedef typename Simd::scalar_type S; typedef typename Simd::vector_type V; @@ -325,7 +324,7 @@ void StaggeredKernels::DhopSiteHandExt(StencilImpl &st, LebesgueOrder &lo, Simd U_22; SiteSpinor result; - int offset,local,perm, ptype; + int offset, ptype, local; StencilEntry *SE; int skew; @@ -333,8 +332,8 @@ void StaggeredKernels::DhopSiteHandExt(StencilImpl &st, LebesgueOrder &lo, for(int s=0;s::DhopSiteHandExt(StencilImpl &st, LebesgueOrder &lo, result()()(1) = even_1 + odd_1; result()()(2) = even_2 + odd_2; } - out._odata[sF] = out._odata[sF] + result; + out[sF] = out[sF] + result; } } } - #define DHOP_SITE_HAND_INSTANTIATE(IMPL) \ template void StaggeredKernels::DhopSiteHand(StencilImpl &st, LebesgueOrder &lo, \ - DoubledGaugeField &U,DoubledGaugeField &UUU, \ + DoubledGaugeFieldView &U,DoubledGaugeFieldView &UUU, \ SiteSpinor *buf, int LLs, int sU, \ - const FermionField &in, FermionField &out, int dag); \ + const FermionFieldView &in, FermionFieldView &out, int dag); \ \ template void StaggeredKernels::DhopSiteHandInt(StencilImpl &st, LebesgueOrder &lo, \ - DoubledGaugeField &U,DoubledGaugeField &UUU, \ + DoubledGaugeFieldView &U,DoubledGaugeFieldView &UUU, \ SiteSpinor *buf, int LLs, int sU, \ - const FermionField &in, FermionField &out, int dag); \ + const FermionFieldView &in, FermionFieldView &out, int dag); \ \ template void StaggeredKernels::DhopSiteHandExt(StencilImpl &st, LebesgueOrder &lo, \ - DoubledGaugeField &U,DoubledGaugeField &UUU, \ + DoubledGaugeFieldView &U,DoubledGaugeFieldView &UUU, \ SiteSpinor *buf, int LLs, int sU, \ - const FermionField &in, FermionField &out, int dag); \ + const FermionFieldView &in, FermionFieldView &out, int dag); \ -DHOP_SITE_HAND_INSTANTIATE(StaggeredImplD); -DHOP_SITE_HAND_INSTANTIATE(StaggeredImplF); -DHOP_SITE_HAND_INSTANTIATE(StaggeredVec5dImplD); -DHOP_SITE_HAND_INSTANTIATE(StaggeredVec5dImplF); +#undef LOAD_CHI + +NAMESPACE_END(Grid); -} -} - diff --git a/Grid/qcd/action/fermion/StaggeredKernels.cc b/Grid/qcd/action/fermion/implementation/StaggeredKernelsImplementation.h similarity index 83% rename from Grid/qcd/action/fermion/StaggeredKernels.cc rename to Grid/qcd/action/fermion/implementation/StaggeredKernelsImplementation.h index b7e568c2..d301556c 100644 --- a/Grid/qcd/action/fermion/StaggeredKernels.cc +++ b/Grid/qcd/action/fermion/implementation/StaggeredKernelsImplementation.h @@ -28,40 +28,38 @@ directory /* END LEGAL */ #include -namespace Grid { -namespace QCD { +#pragma once -int StaggeredKernelsStatic::Opt= StaggeredKernelsStatic::OptGeneric; -int StaggeredKernelsStatic::Comms = StaggeredKernelsStatic::CommsAndCompute; +NAMESPACE_BEGIN(Grid); #define GENERIC_STENCIL_LEG(U,Dir,skew,multLink) \ SE = st.GetEntry(ptype, Dir+skew, sF); \ if (SE->_is_local ) { \ if (SE->_permute) { \ chi_p = χ \ - permute(chi, in._odata[SE->_offset], ptype); \ + permute(chi, in[SE->_offset], ptype); \ } else { \ - chi_p = &in._odata[SE->_offset]; \ + chi_p = &in[SE->_offset]; \ } \ } else { \ chi_p = &buf[SE->_offset]; \ } \ - multLink(Uchi, U._odata[sU], *chi_p, Dir); + multLink(Uchi, U[sU], *chi_p, Dir); #define GENERIC_STENCIL_LEG_INT(U,Dir,skew,multLink) \ SE = st.GetEntry(ptype, Dir+skew, sF); \ if (SE->_is_local ) { \ if (SE->_permute) { \ chi_p = χ \ - permute(chi, in._odata[SE->_offset], ptype); \ + permute(chi, in[SE->_offset], ptype); \ } else { \ - chi_p = &in._odata[SE->_offset]; \ + chi_p = &in[SE->_offset]; \ } \ } else if ( st.same_node[Dir] ) { \ chi_p = &buf[SE->_offset]; \ } \ if (SE->_is_local || st.same_node[Dir] ) { \ - multLink(Uchi, U._odata[sU], *chi_p, Dir); \ + multLink(Uchi, U[sU], *chi_p, Dir); \ } #define GENERIC_STENCIL_LEG_EXT(U,Dir,skew,multLink) \ @@ -69,7 +67,7 @@ int StaggeredKernelsStatic::Comms = StaggeredKernelsStatic::CommsAndCompute; if ((!SE->_is_local) && (!st.same_node[Dir]) ) { \ nmu++; \ chi_p = &buf[SE->_offset]; \ - multLink(Uchi, U._odata[sU], *chi_p, Dir); \ + multLink(Uchi, U[sU], *chi_p, Dir); \ } template @@ -81,9 +79,9 @@ StaggeredKernels::StaggeredKernels(const ImplParams &p) : Base(p){}; //////////////////////////////////////////////////////////////////////////////////// template void StaggeredKernels::DhopSiteGeneric(StencilImpl &st, LebesgueOrder &lo, - DoubledGaugeField &U, DoubledGaugeField &UUU, + DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, SiteSpinor *buf, int LLs, int sU, - const FermionField &in, FermionField &out, int dag) { + const FermionFieldView &in, FermionFieldView &out, int dag) { const SiteSpinor *chi_p; SiteSpinor chi; SiteSpinor Uchi; @@ -114,7 +112,7 @@ void StaggeredKernels::DhopSiteGeneric(StencilImpl &st, LebesgueOrder &lo, if ( dag ) { Uchi = - Uchi; } - vstream(out._odata[sF], Uchi); + vstream(out[sF], Uchi); } }; @@ -123,9 +121,9 @@ void StaggeredKernels::DhopSiteGeneric(StencilImpl &st, LebesgueOrder &lo, /////////////////////////////////////////////////// template void StaggeredKernels::DhopSiteGenericInt(StencilImpl &st, LebesgueOrder &lo, - DoubledGaugeField &U, DoubledGaugeField &UUU, + DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, SiteSpinor *buf, int LLs, int sU, - const FermionField &in, FermionField &out,int dag) { + const FermionFieldView &in, FermionFieldView &out,int dag) { const SiteSpinor *chi_p; SiteSpinor chi; SiteSpinor Uchi; @@ -136,7 +134,7 @@ void StaggeredKernels::DhopSiteGenericInt(StencilImpl &st, LebesgueOrder & for(int s=0;s::DhopSiteGenericInt(StencilImpl &st, LebesgueOrder & if ( dag ) { Uchi = - Uchi; } - vstream(out._odata[sF], Uchi); + vstream(out[sF], Uchi); } }; @@ -167,11 +165,11 @@ void StaggeredKernels::DhopSiteGenericInt(StencilImpl &st, LebesgueOrder & /////////////////////////////////////////////////// template void StaggeredKernels::DhopSiteGenericExt(StencilImpl &st, LebesgueOrder &lo, - DoubledGaugeField &U, DoubledGaugeField &UUU, + DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, SiteSpinor *buf, int LLs, int sU, - const FermionField &in, FermionField &out,int dag) { + const FermionFieldView &in, FermionFieldView &out,int dag) { const SiteSpinor *chi_p; - SiteSpinor chi; + // SiteSpinor chi; SiteSpinor Uchi; StencilEntry *SE; int ptype; @@ -181,7 +179,7 @@ void StaggeredKernels::DhopSiteGenericExt(StencilImpl &st, LebesgueOrder & for(int s=0;s::DhopSiteGenericExt(StencilImpl &st, LebesgueOrder & if ( nmu ) { if ( dag ) { - out._odata[sF] = out._odata[sF] - Uchi; + out[sF] = out[sF] - Uchi; } else { - out._odata[sF] = out._odata[sF] + Uchi; + out[sF] = out[sF] + Uchi; } } } @@ -215,9 +213,9 @@ void StaggeredKernels::DhopSiteGenericExt(StencilImpl &st, LebesgueOrder & //////////////////////////////////////////////////////////////////////////////////// template -void StaggeredKernels::DhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, DoubledGaugeField &UUU, +void StaggeredKernels::DhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, SiteSpinor *buf, int LLs, int sU, - const FermionField &in, FermionField &out, + const FermionFieldView &in, FermionFieldView &out, int interior,int exterior) { int dag=1; @@ -225,9 +223,9 @@ void StaggeredKernels::DhopSiteDag(StencilImpl &st, LebesgueOrder &lo, Dou }; template -void StaggeredKernels::DhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, DoubledGaugeField &UUU, +void StaggeredKernels::DhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, SiteSpinor *buf, int LLs, int sU, - const FermionField &in, FermionField &out, + const FermionFieldView &in, FermionFieldView &out, int interior,int exterior) { int dag=0; @@ -235,9 +233,9 @@ void StaggeredKernels::DhopSite(StencilImpl &st, LebesgueOrder &lo, Double }; template -void StaggeredKernels::DhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, DoubledGaugeField &UUU, +void StaggeredKernels::DhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, SiteSpinor *buf, int LLs, - int sU, const FermionField &in, FermionField &out, + int sU, const FermionFieldView &in, FermionFieldView &out, int dag,int interior,int exterior) { switch(Opt) { @@ -277,8 +275,8 @@ void StaggeredKernels::DhopSite(StencilImpl &st, LebesgueOrder &lo, Double }; template -void StaggeredKernels::DhopDir( StencilImpl &st, DoubledGaugeField &U, DoubledGaugeField &UUU, SiteSpinor *buf, int sF, - int sU, const FermionField &in, FermionField &out, int dir, int disp) +void StaggeredKernels::DhopDirKernel( StencilImpl &st, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, SiteSpinor *buf, int sF, + int sU, const FermionFieldView &in, FermionFieldView &out, int dir, int disp) { // Disp should be either +1,-1,+3,-3 // What about "dag" ? @@ -287,8 +285,6 @@ void StaggeredKernels::DhopDir( StencilImpl &st, DoubledGaugeField &U, Do assert(0); } -FermOpStaggeredTemplateInstantiate(StaggeredKernels); -FermOpStaggeredVec5dTemplateInstantiate(StaggeredKernels); +NAMESPACE_END(Grid); -}} diff --git a/Grid/qcd/action/fermion/WilsonCloverFermion.cc b/Grid/qcd/action/fermion/implementation/WilsonCloverFermionImplementation.h similarity index 85% rename from Grid/qcd/action/fermion/WilsonCloverFermion.cc rename to Grid/qcd/action/fermion/implementation/WilsonCloverFermionImplementation.h index ed06c673..5744d3bb 100644 --- a/Grid/qcd/action/fermion/WilsonCloverFermion.cc +++ b/Grid/qcd/action/fermion/implementation/WilsonCloverFermionImplementation.h @@ -26,23 +26,21 @@ See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ /* END LEGAL */ -#include -//#include -#include -namespace Grid -{ -namespace QCD -{ +#include +#include +#include + +NAMESPACE_BEGIN(Grid); // *NOT* EO template RealD WilsonCloverFermion::M(const FermionField &in, FermionField &out) { - FermionField temp(out._grid); + FermionField temp(out.Grid()); // Wilson term - out.checkerboard = in.checkerboard; + out.Checkerboard() = in.Checkerboard(); this->Dhop(in, out, DaggerNo); // Clover term @@ -55,10 +53,10 @@ RealD WilsonCloverFermion::M(const FermionField &in, FermionField &out) template RealD WilsonCloverFermion::Mdag(const FermionField &in, FermionField &out) { - FermionField temp(out._grid); + FermionField temp(out.Grid()); // Wilson term - out.checkerboard = in.checkerboard; + out.Checkerboard() = in.Checkerboard(); this->Dhop(in, out, DaggerYes); // Clover term @@ -72,7 +70,7 @@ template void WilsonCloverFermion::ImportGauge(const GaugeField &_Umu) { WilsonFermion::ImportGauge(_Umu); - GridBase *grid = _Umu._grid; + GridBase *grid = _Umu.Grid(); typename Impl::GaugeLinkField Bx(grid), By(grid), Bz(grid), Ex(grid), Ey(grid), Ez(grid); // Compute the field strength terms mu>nu @@ -93,27 +91,29 @@ void WilsonCloverFermion::ImportGauge(const GaugeField &_Umu) CloverTerm += fillCloverZT(Ez) * csw_t; CloverTerm += diag_mass; - int lvol = _Umu._grid->lSites(); + int lvol = _Umu.Grid()->lSites(); int DimRep = Impl::Dimension; Eigen::MatrixXcd EigenCloverOp = Eigen::MatrixXcd::Zero(Ns * DimRep, Ns * DimRep); Eigen::MatrixXcd EigenInvCloverOp = Eigen::MatrixXcd::Zero(Ns * DimRep, Ns * DimRep); - std::vector lcoor; - typename SiteCloverType::scalar_object Qx = zero, Qxinv = zero; + Coordinate lcoor; + typename SiteCloverType::scalar_object Qx = Zero(), Qxinv = Zero(); for (int site = 0; site < lvol; site++) { grid->LocalIndexToLocalCoor(site, lcoor); EigenCloverOp = Eigen::MatrixXcd::Zero(Ns * DimRep, Ns * DimRep); peekLocalSite(Qx, CloverTerm, lcoor); - Qxinv = zero; + Qxinv = Zero(); //if (csw!=0){ for (int j = 0; j < Ns; j++) for (int k = 0; k < Ns; k++) for (int a = 0; a < DimRep; a++) - for (int b = 0; b < DimRep; b++) - EigenCloverOp(a + j * DimRep, b + k * DimRep) = Qx()(j, k)(a, b); + for (int b = 0; b < DimRep; b++){ + auto zz = Qx()(j, k)(a, b); + EigenCloverOp(a + j * DimRep, b + k * DimRep) = std::complex(zz); + } // if (site==0) std::cout << "site =" << site << "\n" << EigenCloverOp << std::endl; EigenInvCloverOp = EigenCloverOp.inverse(); @@ -169,15 +169,15 @@ void WilsonCloverFermion::MooeeInvDag(const FermionField &in, FermionField template void WilsonCloverFermion::MooeeInternal(const FermionField &in, FermionField &out, int dag, int inv) { - out.checkerboard = in.checkerboard; + out.Checkerboard() = in.Checkerboard(); CloverFieldType *Clover; - assert(in.checkerboard == Odd || in.checkerboard == Even); + assert(in.Checkerboard() == Odd || in.Checkerboard() == Even); if (dag) { - if (in._grid->_isCheckerBoarded) + if (in.Grid()->_isCheckerBoarded) { - if (in.checkerboard == Odd) + if (in.Checkerboard() == Odd) { Clover = (inv) ? &CloverTermInvDagOdd : &CloverTermDagOdd; } @@ -195,10 +195,10 @@ void WilsonCloverFermion::MooeeInternal(const FermionField &in, FermionFie } else { - if (in._grid->_isCheckerBoarded) + if (in.Grid()->_isCheckerBoarded) { - if (in.checkerboard == Odd) + if (in.Checkerboard() == Odd) { // std::cout << "Calling clover term Odd" << std::endl; Clover = (inv) ? &CloverTermInvOdd : &CloverTermOdd; @@ -209,7 +209,7 @@ void WilsonCloverFermion::MooeeInternal(const FermionField &in, FermionFie Clover = (inv) ? &CloverTermInvEven : &CloverTermEven; } out = *Clover * in; - // std::cout << GridLogMessage << "*Clover.checkerboard " << (*Clover).checkerboard << std::endl; + // std::cout << GridLogMessage << "*Clover.Checkerboard() " << (*Clover).Checkerboard() << std::endl; } else { @@ -235,9 +235,4 @@ void WilsonCloverFermion::MeeDeriv(GaugeField &mat, const FermionField &U, assert(0); // not implemented yet } -FermOpTemplateInstantiate(WilsonCloverFermion); -AdjointFermOpTemplateInstantiate(WilsonCloverFermion); -TwoIndexFermOpTemplateInstantiate(WilsonCloverFermion); -//GparityFermOpTemplateInstantiate(WilsonCloverFermion); -} -} +NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/WilsonFermion5D.cc b/Grid/qcd/action/fermion/implementation/WilsonFermion5DImplementation.h similarity index 74% rename from Grid/qcd/action/fermion/WilsonFermion5D.cc rename to Grid/qcd/action/fermion/implementation/WilsonFermion5DImplementation.h index fb3fd861..1bdc9a64 100644 --- a/Grid/qcd/action/fermion/WilsonFermion5D.cc +++ b/Grid/qcd/action/fermion/implementation/WilsonFermion5DImplementation.h @@ -36,13 +36,8 @@ Author: Vera Guelpers #include #include -namespace Grid { -namespace QCD { +NAMESPACE_BEGIN(Grid); -// S-direction is INNERMOST and takes no part in the parity. -const std::vector WilsonFermion5DStatic::directions ({1,2,3,4, 1, 2, 3, 4}); -const std::vector WilsonFermion5DStatic::displacements({1,1,1,1,-1,-1,-1,-1}); - // 5d lattice for DWF. template WilsonFermion5D::WilsonFermion5D(GaugeField &_Umu, @@ -56,9 +51,9 @@ WilsonFermion5D::WilsonFermion5D(GaugeField &_Umu, _FiveDimRedBlackGrid(&FiveDimRedBlackGrid), _FourDimGrid (&FourDimGrid), _FourDimRedBlackGrid(&FourDimRedBlackGrid), - Stencil (_FiveDimGrid,npoint,Even,directions,displacements), - StencilEven(_FiveDimRedBlackGrid,npoint,Even,directions,displacements), // source is Even - StencilOdd (_FiveDimRedBlackGrid,npoint,Odd ,directions,displacements), // source is Odd + Stencil (_FiveDimGrid,npoint,Even,directions,displacements,p), + StencilEven(_FiveDimRedBlackGrid,npoint,Even,directions,displacements,p), // source is Even + StencilOdd (_FiveDimRedBlackGrid,npoint,Odd ,directions,displacements,p), // source is Odd M5(_M5), Umu(_FourDimGrid), UmuEven(_FourDimRedBlackGrid), @@ -105,8 +100,8 @@ WilsonFermion5D::WilsonFermion5D(GaugeField &_Umu, assert(FiveDimRedBlackGrid._simd_layout[0]==nsimd); for(int d=0;d<4;d++){ - assert(FourDimGrid._simd_layout[d]=1); - assert(FourDimRedBlackGrid._simd_layout[d]=1); + assert(FourDimGrid._simd_layout[d]==1); + assert(FourDimRedBlackGrid._simd_layout[d]==1); assert(FiveDimRedBlackGrid._simd_layout[d+1]==1); } @@ -141,7 +136,7 @@ void WilsonFermion5D::Report(void) RealD NP = _FourDimGrid->_Nprocessors; RealD NN = _FourDimGrid->NodeCount(); RealD volume = Ls; - std::vector latt = _FourDimGrid->GlobalDimensions(); + Coordinate latt = _FourDimGrid->GlobalDimensions(); for(int mu=0;mu 0 ) { @@ -221,7 +216,7 @@ void WilsonFermion5D::ZeroCounters(void) { template void WilsonFermion5D::ImportGauge(const GaugeField &_Umu) { - GaugeField HUmu(_Umu._grid); + GaugeField HUmu(_Umu.Grid()); HUmu = _Umu*(-0.5); Impl::DoubleStore(GaugeGrid(),Umu,HUmu); pickCheckerboard(Even,UmuEven,Umu); @@ -235,51 +230,43 @@ void WilsonFermion5D::DhopDir(const FermionField &in, FermionField &out,in // assert( (disp==1)||(disp==-1) ); // assert( (dir>=0)&&(dir<4) ); //must do x,y,z or t; - Compressor compressor(DaggerNo); - Stencil.HaloExchange(in,compressor); - int skip = (disp==1) ? 0 : 1; - int dirdisp = dir+skip*4; int gamma = dir+(1-skip)*4; - assert(dirdisp<=7); - assert(dirdisp>=0); + Compressor compressor(DaggerNo); + Stencil.HaloExchange(in,compressor); + + uint64_t Nsite = Umu.Grid()->oSites(); + Kernels::DhopDirKernel(Stencil,Umu,Stencil.CommBuf(),Ls,Nsite,in,out,dirdisp,gamma); - parallel_for(int ss=0;ssoSites();ss++){ - for(int s=0;s void WilsonFermion5D::DerivInternal(StencilImpl & st, - DoubledGaugeField & U, - GaugeField &mat, - const FermionField &A, - const FermionField &B, - int dag) + DoubledGaugeField & U, + GaugeField &mat, + const FermionField &A, + const FermionField &B, + int dag) { DerivCalls++; assert((dag==DaggerNo) ||(dag==DaggerYes)); - conformable(st._grid,A._grid); - conformable(st._grid,B._grid); + conformable(st.Grid(),A.Grid()); + conformable(st.Grid(),B.Grid()); Compressor compressor(dag); - FermionField Btilde(B._grid); - FermionField Atilde(B._grid); + FermionField Btilde(B.Grid()); + FermionField Atilde(B.Grid()); DerivCommTime-=usecond(); st.HaloExchange(B,compressor); DerivCommTime+=usecond(); Atilde=A; - int LLs = B._grid->_rdimensions[0]; + int LLs = B.Grid()->_rdimensions[0]; DerivComputeTime-=usecond(); @@ -295,21 +282,11 @@ void WilsonFermion5D::DerivInternal(StencilImpl & st, //////////////////////// DerivDhopComputeTime -= usecond(); - parallel_for (int sss = 0; sss < U._grid->oSites(); sss++) { - for (int s = 0; s < Ls; s++) { - int sU = sss; - int sF = s + Ls * sU; - assert(sF < B._grid->oSites()); - assert(sU < U._grid->oSites()); + int Usites = U.Grid()->oSites(); - Kernels::DhopDir(st, U, st.CommBuf(), sF, sU, B, Btilde, mu, gamma); + Kernels::DhopDirKernel(st, U, st.CommBuf(), Ls, Usites, B, Btilde, mu,gamma); - //////////////////////////// - // spin trace outer product - //////////////////////////// - } - } //////////////////////////// // spin trace outer product //////////////////////////// @@ -325,12 +302,13 @@ void WilsonFermion5D::DhopDeriv(GaugeField &mat, const FermionField &B, int dag) { - conformable(A._grid,FermionGrid()); - conformable(A._grid,B._grid); + conformable(A.Grid(),FermionGrid()); + conformable(A.Grid(),B.Grid()); - //conformable(GaugeGrid(),mat._grid);// this is not general! leaving as a comment + //conformable(GaugeGrid(),mat.Grid());// this is not general! leaving as a comment - mat.checkerboard = A.checkerboard; + mat.Checkerboard() = A.Checkerboard(); + // mat.checkerboard = A.checkerboard; DerivInternal(Stencil,Umu,mat,A,B,dag); } @@ -341,12 +319,12 @@ void WilsonFermion5D::DhopDerivEO(GaugeField &mat, const FermionField &B, int dag) { - conformable(A._grid,FermionRedBlackGrid()); - conformable(A._grid,B._grid); + conformable(A.Grid(),FermionRedBlackGrid()); + conformable(A.Grid(),B.Grid()); - assert(B.checkerboard==Odd); - assert(A.checkerboard==Even); - mat.checkerboard = Even; + assert(B.Checkerboard()==Odd); + assert(A.Checkerboard()==Even); + mat.Checkerboard() = Even; DerivInternal(StencilOdd,UmuEven,mat,A,B,dag); } @@ -358,12 +336,12 @@ void WilsonFermion5D::DhopDerivOE(GaugeField &mat, const FermionField &B, int dag) { - conformable(A._grid,FermionRedBlackGrid()); - conformable(A._grid,B._grid); + conformable(A.Grid(),FermionRedBlackGrid()); + conformable(A.Grid(),B.Grid()); - assert(B.checkerboard==Even); - assert(A.checkerboard==Odd); - mat.checkerboard = Odd; + assert(B.Checkerboard()==Even); + assert(A.Checkerboard()==Odd); + mat.Checkerboard() = Odd; DerivInternal(StencilEven,UmuOdd,mat,A,B,dag); } @@ -374,11 +352,9 @@ void WilsonFermion5D::DhopInternal(StencilImpl & st, LebesgueOrder &lo, const FermionField &in, FermionField &out,int dag) { DhopTotalTime-=usecond(); -#ifdef GRID_OMP if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute ) DhopInternalOverlappedComms(st,lo,U,in,out,dag); else -#endif DhopInternalSerialComms(st,lo,U,in,out,dag); DhopTotalTime+=usecond(); } @@ -389,131 +365,84 @@ void WilsonFermion5D::DhopInternalOverlappedComms(StencilImpl & st, Lebesg DoubledGaugeField & U, const FermionField &in, FermionField &out,int dag) { -#ifdef GRID_OMP - // assert((dag==DaggerNo) ||(dag==DaggerYes)); - Compressor compressor(dag); - int LLs = in._grid->_rdimensions[0]; - int len = U._grid->oSites(); + int LLs = in.Grid()->_rdimensions[0]; + int len = U.Grid()->oSites(); + ///////////////////////////// + // Start comms // Gather intranode and extra node differentiated?? + ///////////////////////////// DhopFaceTime-=usecond(); st.HaloExchangeOptGather(in,compressor); - st.CommsMergeSHM(compressor);// Could do this inside parallel region overlapped with comms DhopFaceTime+=usecond(); - double ctime=0; - double ptime=0; + DhopCommTime -=usecond(); + std::vector > requests; + st.CommunicateBegin(requests); - ////////////////////////////////////////////////////////////////////////////////////////////////////// - // Ugly explicit thread mapping introduced for OPA reasons. - ////////////////////////////////////////////////////////////////////////////////////////////////////// -#pragma omp parallel reduction(max:ctime) reduction(max:ptime) - { - int tid = omp_get_thread_num(); - int nthreads = omp_get_num_threads(); - int ncomms = CartesianCommunicator::nCommThreads; - if (ncomms == -1) ncomms = 1; - assert(nthreads > ncomms); - if (tid >= ncomms) { - double start = usecond(); - nthreads -= ncomms; - int ttid = tid - ncomms; - int n = U._grid->oSites(); - int chunk = n / nthreads; - int rem = n % nthreads; - int myblock, myn; - if (ttid < rem) { - myblock = ttid * chunk + ttid; - myn = chunk+1; - } else { - myblock = ttid*chunk + rem; - myn = chunk; - } + ///////////////////////////// + // Overlap with comms + ///////////////////////////// + DhopFaceTime-=usecond(); + st.CommsMergeSHM(compressor);// Could do this inside parallel region overlapped with comms + DhopFaceTime+=usecond(); - // do the compute - if (dag == DaggerYes) { - for (int ss = myblock; ss < myblock+myn; ++ss) { - int sU = ss; - int sF = LLs * sU; - Kernels::DhopSiteDag(st,lo,U,st.CommBuf(),sF,sU,LLs,1,in,out,1,0); - } - } else { - for (int ss = myblock; ss < myblock+myn; ++ss) { - int sU = ss; - int sF = LLs * sU; - Kernels::DhopSite(st,lo,U,st.CommBuf(),sF,sU,LLs,1,in,out,1,0); - } - } - ptime = usecond() - start; - } else { - double start = usecond(); - st.CommunicateThreaded(); - ctime = usecond() - start; - } + ///////////////////////////// + // do the compute interior + ///////////////////////////// + int Opt = WilsonKernelsStatic::Opt; // Why pass this. Kernels should know + DhopComputeTime-=usecond(); + if (dag == DaggerYes) { + Kernels::DhopDagKernel(Opt,st,U,st.CommBuf(),LLs,U.oSites(),in,out,1,0); + } else { + Kernels::DhopKernel (Opt,st,U,st.CommBuf(),LLs,U.oSites(),in,out,1,0); } - DhopCommTime += ctime; - DhopComputeTime+=ptime; + DhopComputeTime+=usecond(); - // First to enter, last to leave timing - st.CollateThreads(); + ///////////////////////////// + // Complete comms + ///////////////////////////// + st.CommunicateComplete(requests); + DhopCommTime +=usecond(); + ///////////////////////////// + // do the compute exterior + ///////////////////////////// DhopFaceTime-=usecond(); st.CommsMerge(compressor); DhopFaceTime+=usecond(); DhopComputeTime2-=usecond(); if (dag == DaggerYes) { - int sz=st.surface_list.size(); - parallel_for (int ss = 0; ss < sz; ss++) { - int sU = st.surface_list[ss]; - int sF = LLs * sU; - Kernels::DhopSiteDag(st,lo,U,st.CommBuf(),sF,sU,LLs,1,in,out,0,1); - } + Kernels::DhopDagKernel(Opt,st,U,st.CommBuf(),LLs,U.oSites(),in,out,0,1); } else { - int sz=st.surface_list.size(); - parallel_for (int ss = 0; ss < sz; ss++) { - int sU = st.surface_list[ss]; - int sF = LLs * sU; - Kernels::DhopSite(st,lo,U,st.CommBuf(),sF,sU,LLs,1,in,out,0,1); - } + Kernels::DhopKernel (Opt,st,U,st.CommBuf(),LLs,U.oSites(),in,out,0,1); } DhopComputeTime2+=usecond(); -#else - assert(0); -#endif } template void WilsonFermion5D::DhopInternalSerialComms(StencilImpl & st, LebesgueOrder &lo, - DoubledGaugeField & U, - const FermionField &in, FermionField &out,int dag) + DoubledGaugeField & U, + const FermionField &in, + FermionField &out,int dag) { - // assert((dag==DaggerNo) ||(dag==DaggerYes)); Compressor compressor(dag); - int LLs = in._grid->_rdimensions[0]; + int LLs = in.Grid()->_rdimensions[0]; DhopCommTime-=usecond(); st.HaloExchangeOpt(in,compressor); DhopCommTime+=usecond(); DhopComputeTime-=usecond(); - // Dhop takes the 4d grid from U, and makes a 5d index for fermion - + int Opt = WilsonKernelsStatic::Opt; if (dag == DaggerYes) { - parallel_for (int ss = 0; ss < U._grid->oSites(); ss++) { - int sU = ss; - int sF = LLs * sU; - Kernels::DhopSiteDag(st,lo,U,st.CommBuf(),sF,sU,LLs,1,in,out); - } + Kernels::DhopDagKernel(Opt,st,U,st.CommBuf(),LLs,U.oSites(),in,out); } else { - parallel_for (int ss = 0; ss < U._grid->oSites(); ss++) { - int sU = ss; - int sF = LLs * sU; - Kernels::DhopSite(st,lo,U,st.CommBuf(),sF,sU,LLs,1,in,out); - } + Kernels::DhopKernel(Opt,st,U,st.CommBuf(),LLs,U.oSites(),in,out); } DhopComputeTime+=usecond(); } @@ -523,11 +452,11 @@ template void WilsonFermion5D::DhopOE(const FermionField &in, FermionField &out,int dag) { DhopCalls++; - conformable(in._grid,FermionRedBlackGrid()); // verifies half grid - conformable(in._grid,out._grid); // drops the cb check + conformable(in.Grid(),FermionRedBlackGrid()); // verifies half grid + conformable(in.Grid(),out.Grid()); // drops the cb check - assert(in.checkerboard==Even); - out.checkerboard = Odd; + assert(in.Checkerboard()==Even); + out.Checkerboard() = Odd; DhopInternal(StencilEven,LebesgueEvenOdd,UmuOdd,in,out,dag); } @@ -535,11 +464,11 @@ template void WilsonFermion5D::DhopEO(const FermionField &in, FermionField &out,int dag) { DhopCalls++; - conformable(in._grid,FermionRedBlackGrid()); // verifies half grid - conformable(in._grid,out._grid); // drops the cb check + conformable(in.Grid(),FermionRedBlackGrid()); // verifies half grid + conformable(in.Grid(),out.Grid()); // drops the cb check - assert(in.checkerboard==Odd); - out.checkerboard = Even; + assert(in.Checkerboard()==Odd); + out.Checkerboard() = Even; DhopInternal(StencilOdd,LebesgueEvenOdd,UmuEven,in,out,dag); } @@ -547,17 +476,17 @@ template void WilsonFermion5D::Dhop(const FermionField &in, FermionField &out,int dag) { DhopCalls+=2; - conformable(in._grid,FermionGrid()); // verifies full grid - conformable(in._grid,out._grid); + conformable(in.Grid(),FermionGrid()); // verifies full grid + conformable(in.Grid(),out.Grid()); - out.checkerboard = in.checkerboard; + out.Checkerboard() = in.Checkerboard(); DhopInternal(Stencil,Lebesgue,Umu,in,out,dag); } template void WilsonFermion5D::DW(const FermionField &in, FermionField &out,int dag) { - out.checkerboard=in.checkerboard; + out.Checkerboard()=in.Checkerboard(); Dhop(in,out,dag); // -0.5 is included axpy(out,4.0-M5,in,out); } @@ -569,7 +498,7 @@ void WilsonFermion5D::MomentumSpacePropagatorHt_5d(FermionField &out,const GridBase *_grid = _FourDimGrid; GridBase *_5dgrid = _FiveDimGrid; - conformable(_5dgrid,out._grid); + conformable(_5dgrid,out.Grid()); FermionField PRsource(_5dgrid); FermionField PLsource(_5dgrid); @@ -580,7 +509,7 @@ void WilsonFermion5D::MomentumSpacePropagatorHt_5d(FermionField &out,const FermionField bufL_4d(_grid); FermionField bufR_4d(_grid); - unsigned int Ls = in._grid->_rdimensions[0]; + unsigned int Ls = in.Grid()->_rdimensions[0]; typedef typename FermionField::vector_type vector_type; typedef typename FermionField::scalar_type ScalComplex; @@ -596,12 +525,12 @@ void WilsonFermion5D::MomentumSpacePropagatorHt_5d(FermionField &out,const Gamma g5(Gamma::Algebra::Gamma5); - std::vector latt_size = _grid->_fdimensions; + Coordinate latt_size = _grid->_fdimensions; - LatComplex sk(_grid); sk = zero; - LatComplex sk2(_grid); sk2= zero; - LatComplex W(_grid); W= zero; - LatComplex a(_grid); a= zero; + LatComplex sk(_grid); sk = Zero(); + LatComplex sk2(_grid); sk2= Zero(); + LatComplex W(_grid); W= Zero(); + LatComplex a(_grid); a= Zero(); LatComplex one (_grid); one = ScalComplex(1.0,0.0); LatComplex cosha(_grid); LatComplex kmu(_grid); @@ -643,9 +572,9 @@ void WilsonFermion5D::MomentumSpacePropagatorHt_5d(FermionField &out,const // FIXME Need a Lattice acosh for(int idx=0;idx<_grid->lSites();idx++){ - std::vector lcoor(Nd); + Coordinate lcoor(Nd); Tcomplex cc; - RealD sgn; + // RealD sgn; _grid->LocalIndexToLocalCoor(idx,lcoor); peekLocalSite(cc,cosha,lcoor); assert((double)real(cc)>=1.0); @@ -678,8 +607,8 @@ void WilsonFermion5D::MomentumSpacePropagatorHt_5d(FermionField &out,const //calculate GR, GL for(unsigned int ss=1;ss<=Ls;ss++) { - bufR_4d = zero; - bufL_4d = zero; + bufR_4d = Zero(); + bufL_4d = Zero(); for(unsigned int tt=1;tt<=Ls;tt++) { //possible sign if W<0 @@ -688,7 +617,7 @@ void WilsonFermion5D::MomentumSpacePropagatorHt_5d(FermionField &out,const unsigned int f = (ss > tt) ? ss-tt : tt-ss; //f = abs(ss-tt) //GR - buf1_4d = zero; + buf1_4d = Zero(); ExtractSlice(buf1_4d, PRsource, (tt-1), 0); //G(s,t) bufR_4d = bufR_4d + A * exp(a*Ls) * exp(-a*f) * signW * buf1_4d + A * exp(-a*Ls) * exp(a*f) * signW * buf1_4d; @@ -702,7 +631,7 @@ void WilsonFermion5D::MomentumSpacePropagatorHt_5d(FermionField &out,const bufR_4d = bufR_4d + Amm * exp(-a*ss) * exp(-a*tt) * signW * buf1_4d ; //GL - buf2_4d = zero; + buf2_4d = Zero(); ExtractSlice(buf2_4d, PLsource, (tt-1), 0); //G(s,t) bufL_4d = bufL_4d + A * exp(a*Ls) * exp(-a*f) * signW * buf2_4d + A * exp(-a*Ls) * exp(a*f) * signW * buf2_4d; @@ -722,13 +651,13 @@ void WilsonFermion5D::MomentumSpacePropagatorHt_5d(FermionField &out,const //calculate propagator for(unsigned int ss=1;ss<=Ls;ss++) { - bufR_4d = zero; - bufL_4d = zero; + bufR_4d = Zero(); + bufL_4d = Zero(); //(i*gamma_mu*sin(p_mu) - W)*(GL*P- source) - buf1_4d = zero; + buf1_4d = Zero(); ExtractSlice(buf1_4d, GL, (ss-1), 0); - buf2_4d = zero; + buf2_4d = Zero(); for(int mu=0;mu::MomentumSpacePropagatorHt_5d(FermionField &out,const bufL_4d = buf2_4d - W * buf1_4d; //(i*gamma_mu*sin(p_mu) - W)*(GR*P+ source) - buf1_4d = zero; + buf1_4d = Zero(); ExtractSlice(buf1_4d, GR, (ss-1), 0); - buf2_4d = zero; + buf2_4d = Zero(); for(int mu=0;mu::MomentumSpacePropagatorHt(FermionField &out,const Fe { // what type LatticeComplex GridBase *_grid = _FourDimGrid; - conformable(_grid,out._grid); + conformable(_grid,out.Grid()); typedef typename FermionField::vector_type vector_type; typedef typename FermionField::scalar_type ScalComplex; @@ -795,17 +724,17 @@ void WilsonFermion5D::MomentumSpacePropagatorHt(FermionField &out,const Fe Gamma::Algebra::GammaT }; - std::vector latt_size = _grid->_fdimensions; + Coordinate latt_size = _grid->_fdimensions; - FermionField num (_grid); num = zero; + FermionField num (_grid); num = Zero(); - LatComplex sk(_grid); sk = zero; - LatComplex sk2(_grid); sk2= zero; - LatComplex W(_grid); W= zero; - LatComplex a(_grid); a= zero; + LatComplex sk(_grid); sk = Zero(); + LatComplex sk2(_grid); sk2= Zero(); + LatComplex W(_grid); W= Zero(); + LatComplex a(_grid); a= Zero(); LatComplex one (_grid); one = ScalComplex(1.0,0.0); - LatComplex denom(_grid); denom= zero; + LatComplex denom(_grid); denom= Zero(); LatComplex cosha(_grid); LatComplex kmu(_grid); LatComplex Wea(_grid); @@ -838,9 +767,9 @@ void WilsonFermion5D::MomentumSpacePropagatorHt(FermionField &out,const Fe // FIXME Need a Lattice acosh for(int idx=0;idx<_grid->lSites();idx++){ - std::vector lcoor(Nd); + Coordinate lcoor(Nd); Tcomplex cc; - RealD sgn; + // RealD sgn; _grid->LocalIndexToLocalCoor(idx,lcoor); peekLocalSite(cc,cosha,lcoor); assert((double)real(cc)>=1.0); @@ -868,7 +797,7 @@ void WilsonFermion5D::MomentumSpacePropagatorHw(FermionField &out,const Fe }; GridBase *_grid = _FourDimGrid; - conformable(_grid,out._grid); + conformable(_grid,out.Grid()); typedef typename FermionField::vector_type vector_type; typedef typename FermionField::scalar_type ScalComplex; @@ -876,18 +805,18 @@ void WilsonFermion5D::MomentumSpacePropagatorHw(FermionField &out,const Fe typedef Lattice > LatComplex; - std::vector latt_size = _grid->_fdimensions; + Coordinate latt_size = _grid->_fdimensions; - LatComplex sk(_grid); sk = zero; - LatComplex sk2(_grid); sk2= zero; + LatComplex sk(_grid); sk = Zero(); + LatComplex sk2(_grid); sk2= Zero(); - LatComplex w_k(_grid); w_k= zero; - LatComplex b_k(_grid); b_k= zero; + LatComplex w_k(_grid); w_k= Zero(); + LatComplex b_k(_grid); b_k= Zero(); LatComplex one (_grid); one = ScalComplex(1.0,0.0); - FermionField num (_grid); num = zero; - LatComplex denom(_grid); denom= zero; + FermionField num (_grid); num = Zero(); + LatComplex denom(_grid); denom= Zero(); LatComplex kmu(_grid); ScalComplex ci(0.0,1.0); @@ -928,7 +857,7 @@ void WilsonFermion5D::MomentumSpacePropagatorHw(FermionField &out,const Fe // Helper macro to reverse Simd vector. Fixme: slow, generic implementation. #define REVERSE_LS(qSite, qSiteRev, Nsimd) \ { \ - std::vector qSiteVec(Nsimd); \ + ExtractBuffer qSiteVec(Nsimd); \ extract(qSite, qSiteVec); \ for (int i = 0; i < Nsimd / 2; ++i) \ { \ @@ -946,31 +875,35 @@ void WilsonFermion5D::MomentumSpacePropagatorHw(FermionField &out,const Fe template Lattice spProj5p(const Lattice & in) { - GridBase *grid=in._grid; + GridBase *grid=in.Grid(); Gamma G5(Gamma::Algebra::Gamma5); Lattice ret(grid); - parallel_for(int ss=0;ssoSites();ss++){ - ret._odata[ss] = in._odata[ss] + G5*in._odata[ss]; - } + auto ret_v = ret.View(); + auto in_v = in.View(); + thread_for(ss,grid->oSites(),{ + ret_v[ss] = in_v[ss] + G5*in_v[ss]; + }); return ret; } template Lattice spProj5m(const Lattice & in) { Gamma G5(Gamma::Algebra::Gamma5); - GridBase *grid=in._grid; + GridBase *grid=in.Grid(); Lattice ret(grid); - parallel_for(int ss=0;ssoSites();ss++){ - ret._odata[ss] = in._odata[ss] - G5*in._odata[ss]; - } + auto ret_v = ret.View(); + auto in_v = in.View(); + thread_for(ss,grid->oSites(),{ + ret_v[ss] = in_v[ss] - G5*in_v[ss]; + }); return ret; } template void WilsonFermion5D::ContractJ5q(FermionField &q_in,ComplexField &J5q) { - conformable(GaugeGrid(), J5q._grid); - conformable(q_in._grid, FermionGrid()); + conformable(GaugeGrid(), J5q.Grid()); + conformable(q_in.Grid(), FermionGrid()); // 4d field int Ls = this->Ls; @@ -990,8 +923,8 @@ void WilsonFermion5D::ContractJ5q(FermionField &q_in,ComplexField &J5q) template void WilsonFermion5D::ContractJ5q(PropagatorField &q_in,ComplexField &J5q) { - conformable(GaugeGrid(), J5q._grid); - conformable(q_in._grid, FermionGrid()); + conformable(GaugeGrid(), J5q.Grid()); + conformable(q_in.Grid(), FermionGrid()); // 4d field int Ls = this->Ls; @@ -1015,20 +948,26 @@ void WilsonFermion5D::ContractConservedCurrent(PropagatorField &q_in_1, Current curr_type, unsigned int mu) { - conformable(q_in_1._grid, FermionGrid()); - conformable(q_in_1._grid, q_in_2._grid); - conformable(_FourDimGrid, q_out._grid); + conformable(q_in_1.Grid(), FermionGrid()); + conformable(q_in_1.Grid(), q_in_2.Grid()); + conformable(_FourDimGrid, q_out.Grid()); PropagatorField tmp1(FermionGrid()), tmp2(FermionGrid()); - unsigned int LLs = q_in_1._grid->_rdimensions[0]; - q_out = zero; + unsigned int LLs = q_in_1.Grid()->_rdimensions[0]; + q_out = Zero(); // Forward, need q1(x + mu, s), q2(x, Ls - 1 - s). Backward, need q1(x, s), // q2(x + mu, Ls - 1 - s). 5D lattice so shift 4D coordinate mu by one. tmp1 = Cshift(q_in_1, mu + 1, 1); tmp2 = Cshift(q_in_2, mu + 1, 1); - parallel_for (unsigned int sU = 0; sU < Umu._grid->oSites(); ++sU) - { + auto q_in_1_v = q_in_1.View(); + auto q_in_2_v = q_in_2.View(); + auto tmp1_v = tmp1.View(); + auto tmp2_v = tmp2.View(); + auto q_out_v = q_out.View(); + auto Umu_v = Umu.View(); + thread_for(sU, Umu.Grid()->oSites(),{ + unsigned int sF1 = sU * LLs; unsigned int sF2 = (sU + 1) * LLs - 1; @@ -1042,26 +981,26 @@ void WilsonFermion5D::ContractConservedCurrent(PropagatorField &q_in_1, // sites correctly. if (Impl::LsVectorised) { - REVERSE_LS(q_in_2._odata[sF2], qSite2, Ls / LLs); - REVERSE_LS(tmp2._odata[sF2], qmuSite2, Ls / LLs); + REVERSE_LS(q_in_2_v[sF2], qSite2, Ls / LLs); + REVERSE_LS(tmp2_v[sF2], qmuSite2, Ls / LLs); } else { - qSite2 = q_in_2._odata[sF2]; - qmuSite2 = tmp2._odata[sF2]; + qSite2 = q_in_2_v[sF2]; + qmuSite2 = tmp2_v[sF2]; } - Kernels::ContractConservedCurrentSiteFwd(tmp1._odata[sF1], + Kernels::ContractConservedCurrentSiteFwd(tmp1_v[sF1], qSite2, - q_out._odata[sU], - Umu, sU, mu, axial_sign); - Kernels::ContractConservedCurrentSiteBwd(q_in_1._odata[sF1], + q_out_v[sU], + Umu_v, sU, mu, axial_sign); + Kernels::ContractConservedCurrentSiteBwd(q_in_1_v[sF1], qmuSite2, - q_out._odata[sU], - Umu, sU, mu, axial_sign); + q_out_v[sU], + Umu_v, sU, mu, axial_sign); sF1++; sF2--; } - } + }); } @@ -1074,18 +1013,21 @@ void WilsonFermion5D::SeqConservedCurrent(PropagatorField &q_in, unsigned int tmax, ComplexField &lattice_cmplx) { - conformable(q_in._grid, FermionGrid()); - conformable(q_in._grid, q_out._grid); + conformable(q_in.Grid(), FermionGrid()); + conformable(q_in.Grid(), q_out.Grid()); PropagatorField tmp(GaugeGrid()),tmp2(GaugeGrid()); unsigned int tshift = (mu == Tp) ? 1 : 0; - unsigned int LLs = q_in._grid->_rdimensions[0]; + unsigned int LLs = q_in.Grid()->_rdimensions[0]; unsigned int LLt = GridDefaultLatt()[Tp]; - q_out = zero; + q_out = Zero(); LatticeInteger coords(_FourDimGrid); LatticeCoordinate(coords, Tp); - - + + auto q_out_v = q_out.View(); + auto tmp2_v = tmp2.View(); + auto coords_v= coords.View(); + auto Umu_v = Umu.View(); for (unsigned int s = 0; s < LLs; ++s) { bool axial_sign = ((curr_type == Current::Axial) && (s < (LLs / 2))); @@ -1098,59 +1040,51 @@ void WilsonFermion5D::SeqConservedCurrent(PropagatorField &q_in, tmp = Cshift(tmp2, mu, 1); //q(x+mu,s) tmp2 = tmp*lattice_cmplx; //q(x+mu,s)*A(x) - parallel_for (unsigned int sU = 0; sU < Umu._grid->oSites(); ++sU) - { + thread_for(sU, Umu.Grid()->oSites(),{ // Compute the sequential conserved current insertion only if our simd // object contains a timeslice we need. - vInteger t_mask = ((coords._odata[sU] >= tmin) && - (coords._odata[sU] <= tmax)); - Integer timeSlices = Reduce(t_mask); + vPredicate t_mask; + t_mask() = ((coords_v[sU] >= tmin) && (coords_v[sU] <= tmax)); + Integer timeSlices = Reduce(t_mask()); if (timeSlices > 0) { unsigned int sF = sU * LLs + s; - Kernels::SeqConservedCurrentSiteFwd(tmp2._odata[sU], - q_out._odata[sF], Umu, sU, - mu, t_mask, switch_sgn); + Kernels::SeqConservedCurrentSiteFwd(tmp2_v[sU], + q_out_v[sF], Umu_v, sU, + mu, t_mask, switch_sgn); } - } + }); //backward direction: Need q(x - mu, s)*A(x-mu) ExtractSlice(tmp2, q_in, s, 0); //q(x,s) tmp = lattice_cmplx*tmp2; //q(x,s)*A(x) tmp2 = Cshift(tmp, mu, -1); //q(x-mu,s)*A(x-mu,s) - parallel_for (unsigned int sU = 0; sU < Umu._grid->oSites(); ++sU) + thread_for(sU, Umu.Grid()->oSites(), { - vInteger t_mask = ((coords._odata[sU] >= (tmin + tshift)) && - (coords._odata[sU] <= (tmax + tshift))); + vPredicate t_mask; + t_mask()= ((coords_v[sU] >= (tmin + tshift)) && (coords_v[sU] <= (tmax + tshift))); - //if tmax = LLt-1 (last timeslice) include timeslice 0 if the time is shifted (mu=3) - unsigned int t0 = 0; - if((tmax==LLt-1) && (tshift==1)) t_mask = (t_mask || (coords._odata[sU] == t0 )); - - Integer timeSlices = Reduce(t_mask); - - if (timeSlices > 0) - { - unsigned int sF = sU * LLs + s; - Kernels::SeqConservedCurrentSiteBwd(tmp2._odata[sU], - q_out._odata[sF], Umu, sU, - mu, t_mask, axial_sign); - } - } + //if tmax = LLt-1 (last timeslice) include timeslice 0 if the time is shifted (mu=3) + unsigned int t0 = 0; + if((tmax==LLt-1) && (tshift==1)) t_mask() = (t_mask() || (coords_v[sU] == t0 )); + + Integer timeSlices = Reduce(t_mask()); + + if (timeSlices > 0) { + unsigned int sF = sU * LLs + s; + Kernels::SeqConservedCurrentSiteBwd(tmp2_v[sU], + q_out_v[sF], Umu_v, sU, + mu, t_mask, axial_sign); + } + }); } } - - - - - -FermOpTemplateInstantiate(WilsonFermion5D); -GparityFermOpTemplateInstantiate(WilsonFermion5D); -}} +NAMESPACE_END(Grid); + diff --git a/Grid/qcd/action/fermion/WilsonFermion.cc b/Grid/qcd/action/fermion/implementation/WilsonFermionImplementation.h similarity index 57% rename from Grid/qcd/action/fermion/WilsonFermion.cc rename to Grid/qcd/action/fermion/implementation/WilsonFermionImplementation.h index ffc41396..756bdbf4 100644 --- a/Grid/qcd/action/fermion/WilsonFermion.cc +++ b/Grid/qcd/action/fermion/implementation/WilsonFermionImplementation.h @@ -1,4 +1,3 @@ - /************************************************************************************* Grid physics library, www.github.com/paboyle/Grid @@ -29,16 +28,11 @@ with this program; if not, write to the Free Software Foundation, Inc., See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ -/* END LEGAL */ + /* END LEGAL */ #include #include -namespace Grid { -namespace QCD { - -const std::vector WilsonFermionStatic::directions({0, 1, 2, 3, 0, 1, 2, 3}); -const std::vector WilsonFermionStatic::displacements({1, 1, 1, 1, -1, -1, -1, -1}); -int WilsonFermionStatic::HandOptDslash; +NAMESPACE_BEGIN(Grid); ///////////////////////////////// // Constructor and gauge import @@ -49,18 +43,19 @@ WilsonFermion::WilsonFermion(GaugeField &_Umu, GridCartesian &Fgrid, GridRedBlackCartesian &Hgrid, RealD _mass, const ImplParams &p, const WilsonAnisotropyCoefficients &anis) - : Kernels(p), - _grid(&Fgrid), - _cbgrid(&Hgrid), - Stencil(&Fgrid, npoint, Even, directions, displacements), - StencilEven(&Hgrid, npoint, Even, directions,displacements), // source is Even - StencilOdd(&Hgrid, npoint, Odd, directions,displacements), // source is Odd - mass(_mass), - Lebesgue(_grid), - LebesgueEvenOdd(_cbgrid), - Umu(&Fgrid), - UmuEven(&Hgrid), - UmuOdd(&Hgrid), + : + Kernels(p), + _grid(&Fgrid), + _cbgrid(&Hgrid), + Stencil(&Fgrid, npoint, Even, directions, displacements,p), + StencilEven(&Hgrid, npoint, Even, directions,displacements,p), // source is Even + StencilOdd(&Hgrid, npoint, Odd, directions,displacements,p), // source is Odd + mass(_mass), + Lebesgue(_grid), + LebesgueEvenOdd(_cbgrid), + Umu(&Fgrid), + UmuEven(&Hgrid), + UmuOdd(&Hgrid), _tmp(&Hgrid), anisotropyCoeff(anis) { @@ -76,8 +71,9 @@ WilsonFermion::WilsonFermion(GaugeField &_Umu, GridCartesian &Fgrid, } template -void WilsonFermion::ImportGauge(const GaugeField &_Umu) { - GaugeField HUmu(_Umu._grid); +void WilsonFermion::ImportGauge(const GaugeField &_Umu) +{ + GaugeField HUmu(_Umu.Grid()); //Here multiply the anisotropy coefficients if (anisotropyCoeff.isAnisotropic) @@ -107,21 +103,21 @@ void WilsonFermion::ImportGauge(const GaugeField &_Umu) { template RealD WilsonFermion::M(const FermionField &in, FermionField &out) { - out.checkerboard = in.checkerboard; + out.Checkerboard() = in.Checkerboard(); Dhop(in, out, DaggerNo); return axpy_norm(out, diag_mass, in, out); } template RealD WilsonFermion::Mdag(const FermionField &in, FermionField &out) { - out.checkerboard = in.checkerboard; + out.Checkerboard() = in.Checkerboard(); Dhop(in, out, DaggerYes); return axpy_norm(out, diag_mass, in, out); } template void WilsonFermion::Meooe(const FermionField &in, FermionField &out) { - if (in.checkerboard == Odd) { + if (in.Checkerboard() == Odd) { DhopEO(in, out, DaggerNo); } else { DhopOE(in, out, DaggerNo); @@ -130,7 +126,7 @@ void WilsonFermion::Meooe(const FermionField &in, FermionField &out) { template void WilsonFermion::MeooeDag(const FermionField &in, FermionField &out) { - if (in.checkerboard == Odd) { + if (in.Checkerboard() == Odd) { DhopEO(in, out, DaggerYes); } else { DhopOE(in, out, DaggerYes); @@ -139,26 +135,26 @@ void WilsonFermion::MeooeDag(const FermionField &in, FermionField &out) { template void WilsonFermion::Mooee(const FermionField &in, FermionField &out) { - out.checkerboard = in.checkerboard; + out.Checkerboard() = in.Checkerboard(); typename FermionField::scalar_type scal(diag_mass); out = scal * in; } template void WilsonFermion::MooeeDag(const FermionField &in, FermionField &out) { - out.checkerboard = in.checkerboard; + out.Checkerboard() = in.Checkerboard(); Mooee(in, out); } template void WilsonFermion::MooeeInv(const FermionField &in, FermionField &out) { - out.checkerboard = in.checkerboard; + out.Checkerboard() = in.Checkerboard(); out = (1.0/(diag_mass))*in; } template void WilsonFermion::MooeeInvDag(const FermionField &in, FermionField &out) { - out.checkerboard = in.checkerboard; + out.Checkerboard() = in.Checkerboard(); MooeeInv(in,out); } template @@ -169,7 +165,7 @@ void WilsonFermion::MomentumSpacePropagator(FermionField &out, const Fermi typedef Lattice > LatComplex; // what type LatticeComplex - conformable(_grid,out._grid); + conformable(_grid,out.Grid()); Gamma::Algebra Gmu [] = { Gamma::Algebra::GammaX, @@ -178,13 +174,13 @@ void WilsonFermion::MomentumSpacePropagator(FermionField &out, const Fermi Gamma::Algebra::GammaT }; - std::vector latt_size = _grid->_fdimensions; + Coordinate latt_size = _grid->_fdimensions; - FermionField num (_grid); num = zero; - LatComplex wilson(_grid); wilson= zero; + FermionField num (_grid); num = Zero(); + LatComplex wilson(_grid); wilson= Zero(); LatComplex one (_grid); one = ScalComplex(1.0,0.0); - LatComplex denom(_grid); denom= zero; + LatComplex denom(_grid); denom= Zero(); LatComplex kmu(_grid); ScalComplex ci(0.0,1.0); // momphase = n * 2pi / L @@ -229,9 +225,9 @@ void WilsonFermion::DerivInternal(StencilImpl &st, DoubledGaugeField &U, Compressor compressor(dag); - FermionField Btilde(B._grid); - FermionField Atilde(B._grid); - Atilde = A;//redundant + FermionField Btilde(B.Grid()); + FermionField Atilde(B.Grid()); + Atilde = A; st.HaloExchange(B, compressor); @@ -242,12 +238,8 @@ void WilsonFermion::DerivInternal(StencilImpl &st, DoubledGaugeField &U, int gamma = mu; if (!dag) gamma += Nd; - //////////////////////// - // Call the single hop - //////////////////////// - parallel_for (int sss = 0; sss < B._grid->oSites(); sss++) { - Kernels::DhopDir(st, U, st.CommBuf(), sss, sss, B, Btilde, mu, gamma); - } + int Ls=1; + Kernels::DhopDirKernel(st, U, st.CommBuf(), Ls, B.Grid()->oSites(), B, Btilde, mu, gamma); ////////////////////////////////////////////////// // spin trace outer product @@ -258,70 +250,70 @@ void WilsonFermion::DerivInternal(StencilImpl &st, DoubledGaugeField &U, template void WilsonFermion::DhopDeriv(GaugeField &mat, const FermionField &U, const FermionField &V, int dag) { - conformable(U._grid, _grid); - conformable(U._grid, V._grid); - conformable(U._grid, mat._grid); + conformable(U.Grid(), _grid); + conformable(U.Grid(), V.Grid()); + conformable(U.Grid(), mat.Grid()); - mat.checkerboard = U.checkerboard; + mat.Checkerboard() = U.Checkerboard(); DerivInternal(Stencil, Umu, mat, U, V, dag); } template void WilsonFermion::DhopDerivOE(GaugeField &mat, const FermionField &U, const FermionField &V, int dag) { - conformable(U._grid, _cbgrid); - conformable(U._grid, V._grid); - //conformable(U._grid, mat._grid); not general, leaving as a comment (Guido) + conformable(U.Grid(), _cbgrid); + conformable(U.Grid(), V.Grid()); + //conformable(U.Grid(), mat.Grid()); not general, leaving as a comment (Guido) // Motivation: look at the SchurDiff operator - assert(V.checkerboard == Even); - assert(U.checkerboard == Odd); - mat.checkerboard = Odd; + assert(V.Checkerboard() == Even); + assert(U.Checkerboard() == Odd); + mat.Checkerboard() = Odd; DerivInternal(StencilEven, UmuOdd, mat, U, V, dag); } template void WilsonFermion::DhopDerivEO(GaugeField &mat, const FermionField &U, const FermionField &V, int dag) { - conformable(U._grid, _cbgrid); - conformable(U._grid, V._grid); - //conformable(U._grid, mat._grid); + conformable(U.Grid(), _cbgrid); + conformable(U.Grid(), V.Grid()); + //conformable(U.Grid(), mat.Grid()); - assert(V.checkerboard == Odd); - assert(U.checkerboard == Even); - mat.checkerboard = Even; + assert(V.Checkerboard() == Odd); + assert(U.Checkerboard() == Even); + mat.Checkerboard() = Even; DerivInternal(StencilOdd, UmuEven, mat, U, V, dag); } template void WilsonFermion::Dhop(const FermionField &in, FermionField &out, int dag) { - conformable(in._grid, _grid); // verifies full grid - conformable(in._grid, out._grid); + conformable(in.Grid(), _grid); // verifies full grid + conformable(in.Grid(), out.Grid()); - out.checkerboard = in.checkerboard; + out.Checkerboard() = in.Checkerboard(); DhopInternal(Stencil, Lebesgue, Umu, in, out, dag); } template void WilsonFermion::DhopOE(const FermionField &in, FermionField &out, int dag) { - conformable(in._grid, _cbgrid); // verifies half grid - conformable(in._grid, out._grid); // drops the cb check + conformable(in.Grid(), _cbgrid); // verifies half grid + conformable(in.Grid(), out.Grid()); // drops the cb check - assert(in.checkerboard == Even); - out.checkerboard = Odd; + assert(in.Checkerboard() == Even); + out.Checkerboard() = Odd; DhopInternal(StencilEven, LebesgueEvenOdd, UmuOdd, in, out, dag); } template void WilsonFermion::DhopEO(const FermionField &in, FermionField &out,int dag) { - conformable(in._grid, _cbgrid); // verifies half grid - conformable(in._grid, out._grid); // drops the cb check + conformable(in.Grid(), _cbgrid); // verifies half grid + conformable(in.Grid(), out.Grid()); // drops the cb check - assert(in.checkerboard == Odd); - out.checkerboard = Even; + assert(in.Checkerboard() == Odd); + out.Checkerboard() = Even; DhopInternal(StencilOdd, LebesgueEvenOdd, UmuEven, in, out, dag); } @@ -332,7 +324,8 @@ void WilsonFermion::Mdir(const FermionField &in, FermionField &out, int di } template -void WilsonFermion::DhopDir(const FermionField &in, FermionField &out, int dir, int disp) { +void WilsonFermion::DhopDir(const FermionField &in, FermionField &out, int dir, int disp) +{ int skip = (disp == 1) ? 0 : 1; int dirdisp = dir + skip * 4; int gamma = dir + (1 - skip) * 4; @@ -341,16 +334,16 @@ void WilsonFermion::DhopDir(const FermionField &in, FermionField &out, int }; template -void WilsonFermion::DhopDirDisp(const FermionField &in, FermionField &out,int dirdisp, int gamma, int dag) { +void WilsonFermion::DhopDirDisp(const FermionField &in, FermionField &out,int dirdisp, int gamma, int dag) +{ Compressor compressor(dag); Stencil.HaloExchange(in, compressor); + int Ls=1; + int Nsite=in.oSites(); + Kernels::DhopDirKernel(Stencil, Umu, Stencil.CommBuf(), Ls, Nsite, in, out, dirdisp, gamma); +}; - parallel_for (int sss = 0; sss < in._grid->oSites(); sss++) { - Kernels::DhopDir(Stencil, Umu, Stencil.CommBuf(), sss, sss, in, out, dirdisp, gamma); - } -} -/*Change starts*/ template void WilsonFermion::DhopInternal(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, @@ -367,71 +360,51 @@ void WilsonFermion::DhopInternal(StencilImpl &st, LebesgueOrder &lo, template void WilsonFermion::DhopInternalOverlappedComms(StencilImpl &st, LebesgueOrder &lo, - DoubledGaugeField &U, - const FermionField &in, - FermionField &out, int dag) { + DoubledGaugeField &U, + const FermionField &in, + FermionField &out, int dag) { assert((dag == DaggerNo) || (dag == DaggerYes)); -#ifdef GRID_OMP - Compressor compressor; - int len = U._grid->oSites(); - const int LLs = 1; - - st.Prepare(); - st.HaloGather(in,compressor); - st.CommsMergeSHM(compressor); -#pragma omp parallel - { - int tid = omp_get_thread_num(); - int nthreads = omp_get_num_threads(); - int ncomms = CartesianCommunicator::nCommThreads; - if (ncomms == -1) ncomms = 1; - assert(nthreads > ncomms); - if (tid >= ncomms) { - nthreads -= ncomms; - int ttid = tid - ncomms; - int n = len; - int chunk = n / nthreads; - int rem = n % nthreads; - int myblock, myn; - if (ttid < rem) { - myblock = ttid * chunk + ttid; - myn = chunk+1; - } else { - myblock = ttid*chunk + rem; - myn = chunk; - } - // do the compute - if (dag == DaggerYes) { - - for (int sss = myblock; sss < myblock+myn; ++sss) { - Kernels::DhopSiteDag(st, lo, U, st.CommBuf(), sss, sss, 1, 1, in, out); - } - } else { - for (int sss = myblock; sss < myblock+myn; ++sss) { - Kernels::DhopSite(st, lo, U, st.CommBuf(), sss, sss, 1, 1, in, out); - } - } //else - - } else { - st.CommunicateThreaded(); - } Compressor compressor(dag); + int len = U.Grid()->oSites(); + ///////////////////////////// + // Start comms // Gather intranode and extra node differentiated?? + ///////////////////////////// + std::vector > requests; + st.Prepare(); + st.HaloGather(in,compressor); + st.CommunicateBegin(requests); + + ///////////////////////////// + // Overlap with comms + ///////////////////////////// + st.CommsMergeSHM(compressor); + + ///////////////////////////// + // do the compute interior + ///////////////////////////// + int Opt = WilsonKernelsStatic::Opt; if (dag == DaggerYes) { - parallel_for (int sss = 0; sss < in._grid->oSites(); sss++) { - Kernels::DhopSiteDag(st, lo, U, st.CommBuf(), sss, sss, 1, 1, in, out); - } + Kernels::DhopDagKernel(Opt,st,U,st.CommBuf(),1,U.oSites(),in,out,1,0); } else { - parallel_for (int sss = 0; sss < in._grid->oSites(); sss++) { - Kernels::DhopSite(st, lo, U, st.CommBuf(), sss, sss, 1, 1, in, out); - } - } + Kernels::DhopKernel(Opt,st,U,st.CommBuf(),1,U.oSites(),in,out,1,0); + } - } //pragma -#else - assert(0); -#endif + ///////////////////////////// + // Complete comms + ///////////////////////////// + st.CommunicateComplete(requests); + st.CommsMerge(compressor); + + ///////////////////////////// + // do the compute exterior + ///////////////////////////// + if (dag == DaggerYes) { + Kernels::DhopDagKernel(Opt,st,U,st.CommBuf(),1,U.oSites(),in,out,0,1); + } else { + Kernels::DhopKernel(Opt,st,U,st.CommBuf(),1,U.oSites(),in,out,0,1); + } }; @@ -444,14 +417,11 @@ void WilsonFermion::DhopInternalSerial(StencilImpl &st, LebesgueOrder &lo, Compressor compressor(dag); st.HaloExchange(in, compressor); + int Opt = WilsonKernelsStatic::Opt; if (dag == DaggerYes) { - parallel_for (int sss = 0; sss < in._grid->oSites(); sss++) { - Kernels::DhopSiteDag(st, lo, U, st.CommBuf(), sss, sss, 1, 1, in, out); - } + Kernels::DhopDagKernel(Opt,st,U,st.CommBuf(),1,U.oSites(),in,out); } else { - parallel_for (int sss = 0; sss < in._grid->oSites(); sss++) { - Kernels::DhopSite(st, lo, U, st.CommBuf(), sss, sss, 1, 1, in, out); - } + Kernels::DhopKernel(Opt,st,U,st.CommBuf(),1,U.oSites(),in,out); } }; /*Change ends */ @@ -468,28 +438,33 @@ void WilsonFermion::ContractConservedCurrent(PropagatorField &q_in_1, Current curr_type, unsigned int mu) { - Gamma g5(Gamma::Algebra::Gamma5); - conformable(_grid, q_in_1._grid); - conformable(_grid, q_in_2._grid); - conformable(_grid, q_out._grid); - PropagatorField tmp1(_grid), tmp2(_grid); - q_out = zero; + Gamma g5(Gamma::Algebra::Gamma5); + conformable(_grid, q_in_1.Grid()); + conformable(_grid, q_in_2.Grid()); + conformable(_grid, q_out.Grid()); + PropagatorField tmp1(_grid), tmp2(_grid); + q_out = Zero(); - // Forward, need q1(x + mu), q2(x). Backward, need q1(x), q2(x + mu). - // Inefficient comms method but not performance critical. - tmp1 = Cshift(q_in_1, mu, 1); - tmp2 = Cshift(q_in_2, mu, 1); - parallel_for (unsigned int sU = 0; sU < Umu._grid->oSites(); ++sU) - { - Kernels::ContractConservedCurrentSiteFwd(tmp1._odata[sU], - q_in_2._odata[sU], - q_out._odata[sU], - Umu, sU, mu); - Kernels::ContractConservedCurrentSiteBwd(q_in_1._odata[sU], - tmp2._odata[sU], - q_out._odata[sU], - Umu, sU, mu); - } + // Forward, need q1(x + mu), q2(x). Backward, need q1(x), q2(x + mu). + // Inefficient comms method but not performance critical. + tmp1 = Cshift(q_in_1, mu, 1); + tmp2 = Cshift(q_in_2, mu, 1); + auto tmp1_v = tmp1.View(); + auto tmp2_v = tmp2.View(); + auto q_in_1_v=q_in_1.View(); + auto q_in_2_v=q_in_2.View(); + auto q_out_v = q_out.View(); + auto Umu_v = Umu.View(); + thread_for(sU, Umu.Grid()->oSites(),{ + Kernels::ContractConservedCurrentSiteFwd(tmp1_v[sU], + q_in_2_v[sU], + q_out_v[sU], + Umu_v, sU, mu); + Kernels::ContractConservedCurrentSiteBwd(q_in_1_v[sU], + tmp2_v[sU], + q_out_v[sU], + Umu_v, sU, mu); + }); } @@ -502,61 +477,61 @@ void WilsonFermion::SeqConservedCurrent(PropagatorField &q_in, unsigned int tmax, ComplexField &lattice_cmplx) { - conformable(_grid, q_in._grid); - conformable(_grid, q_out._grid); - PropagatorField tmpFwd(_grid), tmpBwd(_grid), tmp(_grid); - unsigned int tshift = (mu == Tp) ? 1 : 0; - unsigned int LLt = GridDefaultLatt()[Tp]; + conformable(_grid, q_in.Grid()); + conformable(_grid, q_out.Grid()); - q_out = zero; - LatticeInteger coords(_grid); - LatticeCoordinate(coords, Tp); + // Lattice> ph(_grid), coor(_grid); + Complex i(0.0,1.0); + PropagatorField tmpFwd(_grid), tmpBwd(_grid), tmp(_grid); + unsigned int tshift = (mu == Tp) ? 1 : 0; + unsigned int LLt = GridDefaultLatt()[Tp]; - // Need q(x + mu) and q(x - mu). - tmp = Cshift(q_in, mu, 1); - tmpFwd = tmp*lattice_cmplx; - tmp = lattice_cmplx*q_in; - tmpBwd = Cshift(tmp, mu, -1); + q_out = Zero(); + LatticeInteger coords(_grid); + LatticeCoordinate(coords, Tp); - parallel_for (unsigned int sU = 0; sU < Umu._grid->oSites(); ++sU) - { - // Compute the sequential conserved current insertion only if our simd - // object contains a timeslice we need. - vInteger t_mask = ((coords._odata[sU] >= tmin) && - (coords._odata[sU] <= tmax)); - Integer timeSlices = Reduce(t_mask); + // Need q(x + mu) and q(x - mu). + tmp = Cshift(q_in, mu, 1); + tmpFwd = tmp*lattice_cmplx; + tmp = lattice_cmplx*q_in; + tmpBwd = Cshift(tmp, mu, -1); - if (timeSlices > 0) - { - Kernels::SeqConservedCurrentSiteFwd(tmpFwd._odata[sU], - q_out._odata[sU], - Umu, sU, mu, t_mask); - } + auto coords_v = coords.View(); + auto tmpFwd_v = tmpFwd.View(); + auto tmpBwd_v = tmpBwd.View(); + auto Umu_v = Umu.View(); + auto q_out_v = q_out.View(); - // Repeat for backward direction. - t_mask = ((coords._odata[sU] >= (tmin + tshift)) && - (coords._odata[sU] <= (tmax + tshift))); + thread_for(sU, Umu.Grid()->oSites(), { - //if tmax = LLt-1 (last timeslice) include timeslice 0 if the time is shifted (mu=3) - unsigned int t0 = 0; - if((tmax==LLt-1) && (tshift==1)) t_mask = (t_mask || (coords._odata[sU] == t0 )); + // Compute the sequential conserved current insertion only if our simd + // object contains a timeslice we need. + vPredicate t_mask; + t_mask() = ((coords_v[sU] >= tmin) && (coords_v[sU] <= tmax)); + Integer timeSlices = Reduce(t_mask()); - timeSlices = Reduce(t_mask); - - if (timeSlices > 0) - { - Kernels::SeqConservedCurrentSiteBwd(tmpBwd._odata[sU], - q_out._odata[sU], - Umu, sU, mu, t_mask); - } + if (timeSlices > 0) { + Kernels::SeqConservedCurrentSiteFwd(tmpFwd_v[sU], + q_out_v[sU], + Umu_v, sU, mu, t_mask); } + // Repeat for backward direction. + t_mask() = ((coords_v[sU] >= (tmin + tshift)) && + (coords_v[sU] <= (tmax + tshift))); + + //if tmax = LLt-1 (last timeslice) include timeslice 0 if the time is shifted (mu=3) + unsigned int t0 = 0; + if((tmax==LLt-1) && (tshift==1)) t_mask() = (t_mask() || (coords_v[sU] == t0 )); + + timeSlices = Reduce(t_mask()); + if (timeSlices > 0) { + Kernels::SeqConservedCurrentSiteBwd(tmpBwd_v[sU], + q_out_v[sU], + Umu_v, sU, mu, t_mask); + } + }); } -FermOpTemplateInstantiate(WilsonFermion); -AdjointFermOpTemplateInstantiate(WilsonFermion); -TwoIndexFermOpTemplateInstantiate(WilsonFermion); -GparityFermOpTemplateInstantiate(WilsonFermion); -} -} +NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/implementation/WilsonKernelsAsmAvx512.h b/Grid/qcd/action/fermion/implementation/WilsonKernelsAsmAvx512.h new file mode 100644 index 00000000..4aed13bf --- /dev/null +++ b/Grid/qcd/action/fermion/implementation/WilsonKernelsAsmAvx512.h @@ -0,0 +1,716 @@ +/************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + + + Source file: ./lib/qcd/action/fermion/WilsonKernelsAsmAvx512.h + + Copyright (C) 2015 + +Author: Peter Boyle +Author: paboyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ +#pragma once + +#if defined(AVX512) + /////////////////////////////////////////////////////////// + // If we are AVX512 specialise the single precision routine + /////////////////////////////////////////////////////////// +#include +#include + +/// Switch off the 5d vectorised code optimisations +#undef DWFVEC5D + +static Vector signsF; + + template + int setupSigns(Vector& signs ){ + Vector bother(2); + signs = bother; + vrsign(signs[0]); + visign(signs[1]); + return 1; + } + + static int signInitF = setupSigns(signsF); + +#define MAYBEPERM(A,perm) if (perm) { A ; } +#define MULT_2SPIN(ptr,pf) MULT_ADDSUB_2SPIN(ptr,pf) +#define COMPLEX_SIGNS(isigns) vComplexF *isigns = &signsF[0]; + +///////////////////////////////////////////////////////////////// +// XYZT vectorised, undag Kernel, single +///////////////////////////////////////////////////////////////// +#undef KERNEL_DAG +#define INTERIOR_AND_EXTERIOR +#undef INTERIOR +#undef EXTERIOR +template<> void +WilsonKernels::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) +#include + +template<> void +WilsonKernels::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) +#include + +template<> void +WilsonKernels::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) +#include + +template<> void +WilsonKernels::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) +#include + +#undef INTERIOR_AND_EXTERIOR +#define INTERIOR +#undef EXTERIOR +template<> void +WilsonKernels::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) +#include + +template<> void +WilsonKernels::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) +#include + +template<> void +WilsonKernels::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) +#include + +template<> void +WilsonKernels::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) +#include + + +#undef INTERIOR_AND_EXTERIOR +#undef INTERIOR +#define EXTERIOR +template<> void +WilsonKernels::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) +#include + +template<> void +WilsonKernels::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) +#include + +template<> void +WilsonKernels::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) +#include + +template<> void +WilsonKernels::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) +#include + +///////////////////////////////////////////////////////////////// +// XYZT vectorised, dag Kernel, single +///////////////////////////////////////////////////////////////// +#define KERNEL_DAG +#define INTERIOR_AND_EXTERIOR +#undef INTERIOR +#undef EXTERIOR +template<> void +WilsonKernels::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) +#include + +template<> void +WilsonKernels::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) +#include + +template<> void +WilsonKernels::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) +#include + +template<> void +WilsonKernels::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) +#include + +#undef INTERIOR_AND_EXTERIOR +#define INTERIOR +#undef EXTERIOR +template<> void +WilsonKernels::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) +#include + +template<> void +WilsonKernels::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) +#include + +template<> void +WilsonKernels::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) +#include + +template<> void +WilsonKernels::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) +#include + +#undef INTERIOR_AND_EXTERIOR +#undef INTERIOR +#define EXTERIOR +template<> void +WilsonKernels::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) +#include + +template<> void +WilsonKernels::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) +#include + +template<> void +WilsonKernels::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) +#include + +template<> void +WilsonKernels::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) +#include + +#undef MAYBEPERM +#undef MULT_2SPIN +#define MAYBEPERM(A,B) +#define MULT_2SPIN(ptr,pf) MULT_ADDSUB_2SPIN_LS(ptr,pf) + +///////////////////////////////////////////////////////////////// +// Ls vectorised, undag Kernel, single +///////////////////////////////////////////////////////////////// + +#ifdef DWFVEC5D + +#undef KERNEL_DAG +#define INTERIOR_AND_EXTERIOR +#undef INTERIOR +#undef EXTERIOR +template<> void +WilsonKernels::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) +#include +template<> void +WilsonKernels::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) +#include + +template<> void +WilsonKernels::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) +#include +template<> void +WilsonKernels::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) +#include + +#undef INTERIOR_AND_EXTERIOR +#define INTERIOR +#undef EXTERIOR +template<> void +WilsonKernels::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) +#include +template<> void +WilsonKernels::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) +#include + +template<> void +WilsonKernels::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) +#include +template<> void +WilsonKernels::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) +#include + +#undef INTERIOR_AND_EXTERIOR +#undef INTERIOR +#define EXTERIOR +#undef MULT_2SPIN +#define MULT_2SPIN(ptr,pf) MULT_ADDSUB_2SPIN_LSNOPF(ptr,pf) +template<> void +WilsonKernels::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) +#include + +template<> void +WilsonKernels::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) +#include + +template<> void +WilsonKernels::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) +#include + +template<> void +WilsonKernels::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) +#include + +///////////////////////////////////////////////////////////////// +// Ls vectorised, dag Kernel, single +///////////////////////////////////////////////////////////////// +#define KERNEL_DAG +#define INTERIOR_AND_EXTERIOR +#undef INTERIOR +#undef EXTERIOR +template<> void +WilsonKernels::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) +#include +template<> void +WilsonKernels::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) +#include + +template<> void +WilsonKernels::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) +#include +template<> void +WilsonKernels::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) +#include + +#undef INTERIOR_AND_EXTERIOR +#define INTERIOR +#undef EXTERIOR +template<> void +WilsonKernels::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) +#include +template<> void +WilsonKernels::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) +#include + +template<> void +WilsonKernels::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) +#include +template<> void +WilsonKernels::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) +#include + +#undef INTERIOR_AND_EXTERIOR +#undef INTERIOR +#define EXTERIOR +template<> void +WilsonKernels::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) +#include +template<> void +WilsonKernels::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) +#include + +template<> void +WilsonKernels::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) +#include +template<> void +WilsonKernels::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) +#include + +#endif // VEC 5D + +#undef COMPLEX_SIGNS +#undef MAYBEPERM +#undef MULT_2SPIN + + + +/////////////////////////////////////////////////////////// +// If we are AVX512 specialise the double precision routine +/////////////////////////////////////////////////////////// + +#include + +static Vector signsD; +static int signInitD = setupSigns(signsD); + +#define MAYBEPERM(A,perm) if (perm) { A ; } +#define MULT_2SPIN(ptr,pf) MULT_ADDSUB_2SPIN(ptr,pf) +#define COMPLEX_SIGNS(isigns) vComplexD *isigns = &signsD[0]; + + +#define INTERIOR_AND_EXTERIOR +#undef INTERIOR +#undef EXTERIOR + +///////////////////////////////////////////////////////////////// +// XYZT vectorised, undag Kernel, single +///////////////////////////////////////////////////////////////// +#undef KERNEL_DAG +#define INTERIOR_AND_EXTERIOR +#undef INTERIOR +#undef EXTERIOR +template<> void +WilsonKernels::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) +#include +template<> void +WilsonKernels::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) +#include + +template<> void +WilsonKernels::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) +#include +template<> void +WilsonKernels::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) +#include + +#undef INTERIOR_AND_EXTERIOR +#define INTERIOR +#undef EXTERIOR +template<> void +WilsonKernels::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) +#include +template<> void +WilsonKernels::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) +#include + +template<> void +WilsonKernels::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) +#include +template<> void +WilsonKernels::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) +#include + +#undef INTERIOR_AND_EXTERIOR +#undef INTERIOR +#define EXTERIOR +template<> void +WilsonKernels::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) +#include +template<> void +WilsonKernels::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) +#include + +template<> void +WilsonKernels::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) +#include +template<> void +WilsonKernels::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) +#include + +///////////////////////////////////////////////////////////////// +// XYZT vectorised, dag Kernel, single +///////////////////////////////////////////////////////////////// +#define KERNEL_DAG +#define INTERIOR_AND_EXTERIOR +#undef INTERIOR +#undef EXTERIOR +template<> void +WilsonKernels::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) +#include +template<> void +WilsonKernels::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) +#include + +template<> void +WilsonKernels::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) +#include +template<> void +WilsonKernels::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) +#include + +#undef INTERIOR_AND_EXTERIOR +#define INTERIOR +#undef EXTERIOR +template<> void +WilsonKernels::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) +#include +template<> void +WilsonKernels::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) +#include + +template<> void +WilsonKernels::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) +#include +template<> void +WilsonKernels::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) +#include + +#undef INTERIOR_AND_EXTERIOR +#undef INTERIOR +#define EXTERIOR +template<> void +WilsonKernels::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) +#include +template<> void +WilsonKernels::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) +#include + +template<> void +WilsonKernels::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) +#include +template<> void +WilsonKernels::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) +#include + +#undef MAYBEPERM +#undef MULT_2SPIN +#define MAYBEPERM(A,B) +#define MULT_2SPIN(ptr,pf) MULT_ADDSUB_2SPIN_LS(ptr,pf) + +///////////////////////////////////////////////////////////////// +// Ls vectorised, undag Kernel, single +///////////////////////////////////////////////////////////////// +#ifdef DWFVEC5D + +#undef KERNEL_DAG +#define INTERIOR_AND_EXTERIOR +#undef INTERIOR +#undef EXTERIOR +template<> void +WilsonKernels::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) +#include +template<> void +WilsonKernels::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) +#include + +template<> void +WilsonKernels::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) +#include +template<> void +WilsonKernels::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) +#include + +#undef INTERIOR_AND_EXTERIOR +#define INTERIOR +#undef EXTERIOR +template<> void +WilsonKernels::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) +#include +template<> void +WilsonKernels::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) +#include + +template<> void +WilsonKernels::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) +#include +template<> void +WilsonKernels::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) +#include + +#undef INTERIOR_AND_EXTERIOR +#undef INTERIOR +#define EXTERIOR +#undef MULT_2SPIN +#define MULT_2SPIN(ptr,pf) MULT_ADDSUB_2SPIN_LSNOPF(ptr,pf) +template<> void +WilsonKernels::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) +#include +template<> void +WilsonKernels::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) +#include + +template<> void +WilsonKernels::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) +#include +template<> void +WilsonKernels::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) +#include + +///////////////////////////////////////////////////////////////// +// Ls vectorised, dag Kernel, single +///////////////////////////////////////////////////////////////// +#define KERNEL_DAG +#define INTERIOR_AND_EXTERIOR +#undef INTERIOR +#undef EXTERIOR +template<> void +WilsonKernels::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) +#include +template<> void +WilsonKernels::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) +#include + +template<> void +WilsonKernels::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) +#include +template<> void +WilsonKernels::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) +#include + +#undef INTERIOR_AND_EXTERIOR +#define INTERIOR +#undef EXTERIOR +template<> void +WilsonKernels::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) +#include +template<> void +WilsonKernels::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) +#include + +template<> void +WilsonKernels::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) +#include +template<> void +WilsonKernels::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) +#include + +#undef INTERIOR_AND_EXTERIOR +#undef INTERIOR +#define EXTERIOR + +template<> void +WilsonKernels::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) +#include +template<> void +WilsonKernels::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) +#include + +template<> void +WilsonKernels::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) +#include +template<> void +WilsonKernels::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) +#include + +#endif // VEC 5D + +#undef COMPLEX_SIGNS +#undef MAYBEPERM +#undef MULT_2SPIN + +#undef Chi_00 +#undef Chi_01 +#undef Chi_02 +#undef Chi_10 +#undef Chi_11 +#undef Chi_12 +#undef Chi_20 +#undef Chi_21 +#undef Chi_22 +#undef Chi_30 +#undef Chi_31 +#undef Chi_32 + +#undef UChi_00 +#undef UChi_01 +#undef UChi_02 +#undef UChi_10 +#undef UChi_11 +#undef UChi_12 +#undef UChi_20 +#undef UChi_21 +#undef UChi_22 +#undef UChi_30 +#undef UChi_31 +#undef UChi_32 + +#undef Psi_00 +#undef Psi_01 +#undef Psi_02 +#undef Psi_10 +#undef Psi_11 +#undef Psi_12 +#undef Psi_20 +#undef Psi_21 +#undef Psi_22 +#undef Psi_30 +#undef Psi_31 +#undef Psi_32 + +#undef Phi_00 +#undef Phi_01 +#undef Phi_02 +#undef Phi_10 +#undef Phi_11 +#undef Phi_12 +#undef Phi_20 +#undef Phi_21 +#undef Phi_22 +#undef Phi_30 +#undef Phi_31 +#undef Phi_32 + + +#endif //AVX512 diff --git a/Grid/qcd/action/fermion/WilsonKernelsAsmBody.h b/Grid/qcd/action/fermion/implementation/WilsonKernelsAsmBody.h similarity index 96% rename from Grid/qcd/action/fermion/WilsonKernelsAsmBody.h rename to Grid/qcd/action/fermion/implementation/WilsonKernelsAsmBody.h index db8651ab..9f38bfc8 100644 --- a/Grid/qcd/action/fermion/WilsonKernelsAsmBody.h +++ b/Grid/qcd/action/fermion/implementation/WilsonKernelsAsmBody.h @@ -130,16 +130,18 @@ int local,perm, ptype; uint64_t base; uint64_t basep; - const uint64_t plocal =(uint64_t) & in._odata[0]; + const uint64_t plocal =(uint64_t) & in[0]; COMPLEX_SIGNS(isigns); MASK_REGS; - int nmax=U._grid->oSites(); + int nmax=U.oSites(); for(int site=0;site=nmax) ssn=0; - int sUn=lo.Reorder(ssn); + // int sUn=lo.Reorder(ssn); + int sUn=ssn; LOCK_GAUGE(0); #else int sU =ssU; @@ -166,7 +168,7 @@ if (nmu==0) break; // if (nmu!=0) std::cout << "EXT "< +Author: paboyle +Author: Guido Cossu + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ + +#pragma once + +#include + +NAMESPACE_BEGIN(Grid); + +/////////////////////////////////////////////////////////// +// Default to no assembler implementation +// Will specialise to +/////////////////////////////////////////////////////////// +template void +WilsonKernels::AsmDhopSite(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) +{ + assert(0); +} + +template void +WilsonKernels::AsmDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) +{ + assert(0); +} + +template void +WilsonKernels::AsmDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) +{ + assert(0); +} + +template void +WilsonKernels::AsmDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) +{ + assert(0); +} + +template void +WilsonKernels::AsmDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) +{ + assert(0); +} + +template void +WilsonKernels::AsmDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionFieldView &in, FermionFieldView &out) +{ + assert(0); +} + +NAMESPACE_END(Grid); + diff --git a/Grid/qcd/action/fermion/WilsonKernelsAsmQPX.h b/Grid/qcd/action/fermion/implementation/WilsonKernelsAsmQPX.h similarity index 77% rename from Grid/qcd/action/fermion/WilsonKernelsAsmQPX.h rename to Grid/qcd/action/fermion/implementation/WilsonKernelsAsmQPX.h index 612234d7..1f4a5f36 100644 --- a/Grid/qcd/action/fermion/WilsonKernelsAsmQPX.h +++ b/Grid/qcd/action/fermion/implementation/WilsonKernelsAsmQPX.h @@ -28,7 +28,7 @@ Author: paboyle See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ /* END LEGAL */ - +#pragma once #if defined(QPX) @@ -52,18 +52,18 @@ Author: paboyle ///////////////////////////////////////////////////////////////// #undef KERNEL_DAG template<> void -WilsonKernels::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, +WilsonKernels::AsmDhopSite(StencilView &st, DoubledGaugeField &U, SiteHalfSpinor *buf, int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) -#include +#include ///////////////////////////////////////////////////////////////// // XYZT vectorised, dag Kernel, single ///////////////////////////////////////////////////////////////// #define KERNEL_DAG template<> void -WilsonKernels::AsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf, +WilsonKernels::AsmDhopSiteDag(StencilView &st, DoubledGaugeField &U,SiteHalfSpinor *buf, int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) -#include +#include #undef MAYBEPERM #undef MULT_2SPIN @@ -75,18 +75,18 @@ WilsonKernels::AsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,Do ///////////////////////////////////////////////////////////////// #undef KERNEL_DAG template<> void -WilsonKernels::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, +WilsonKernels::AsmDhopSite(StencilView &st, DoubledGaugeField &U, SiteHalfSpinor *buf, int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) -#include +#include ///////////////////////////////////////////////////////////////// // Ls vectorised, dag Kernel, single ///////////////////////////////////////////////////////////////// #define KERNEL_DAG template<> void -WilsonKernels::AsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf, +WilsonKernels::AsmDhopSiteDag(StencilView &st, DoubledGaugeField &U,SiteHalfSpinor *buf, int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) -#include +#include #undef MAYBEPERM #undef MULT_2SPIN @@ -104,9 +104,9 @@ WilsonKernels::AsmDhopSiteDag(StencilImpl &st,LebesgueOrde ///////////////////////////////////////////////////////////////// #undef KERNEL_DAG template<> void -WilsonKernels::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, +WilsonKernels::AsmDhopSite(StencilView &st, DoubledGaugeField &U, SiteHalfSpinor *buf, int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) -#include +#include ///////////////////////////////////////////////////////////////// @@ -115,9 +115,9 @@ WilsonKernels::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,Doubl ///////////////////////////////////////////////////////////////// #define KERNEL_DAG template<> void -WilsonKernels::AsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf, +WilsonKernels::AsmDhopSiteDag(StencilView &st, DoubledGaugeField &U,SiteHalfSpinor *buf, int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) -#include +#include ///////////////////////////////////////////////////////////////// #undef MAYBEPERM @@ -129,9 +129,9 @@ WilsonKernels::AsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,Do ///////////////////////////////////////////////////////////////// #undef KERNEL_DAG template<> void -WilsonKernels::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, +WilsonKernels::AsmDhopSite(StencilView &st, DoubledGaugeField &U, SiteHalfSpinor *buf, int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) -#include +#include ///////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////// @@ -139,9 +139,9 @@ WilsonKernels::AsmDhopSite(StencilImpl &st,LebesgueOrder & ///////////////////////////////////////////////////////////////// #define KERNEL_DAG template<> void -WilsonKernels::AsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf, +WilsonKernels::AsmDhopSiteDag(StencilView &st, DoubledGaugeField &U,SiteHalfSpinor *buf, int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) -#include +#include ///////////////////////////////////////////////////////////////// #undef MAYBEPERM diff --git a/Grid/qcd/action/fermion/WilsonKernelsHandGparity.cc b/Grid/qcd/action/fermion/implementation/WilsonKernelsHandGparityImplementation.h similarity index 79% rename from Grid/qcd/action/fermion/WilsonKernelsHandGparity.cc rename to Grid/qcd/action/fermion/implementation/WilsonKernelsHandGparityImplementation.h index 3bf2f10a..2150938f 100644 --- a/Grid/qcd/action/fermion/WilsonKernelsHandGparity.cc +++ b/Grid/qcd/action/fermion/implementation/WilsonKernelsHandGparityImplementation.h @@ -26,6 +26,9 @@ Author: paboyle See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ /* END LEGAL */ + +#pragma once + #include #define REGISTER @@ -45,7 +48,7 @@ Author: paboyle Chimu_32=ref(F)(3)(2) #define LOAD_CHIMU(DIR,F,PERM) \ - { const SiteSpinor & ref (in._odata[offset]); LOAD_CHIMU_BODY(F); } + { const SiteSpinor & ref (in[offset]); LOAD_CHIMU_BODY(F); } #define LOAD_CHI_BODY(F) \ Chi_00 = ref(F)(0)(0);\ @@ -92,9 +95,9 @@ Author: paboyle g = F; \ direction = st._directions[DIR]; \ distance = st._distances[DIR]; \ - sl = st._grid->_simd_layout[direction]; \ + sl = st._simd_layout[direction]; \ inplace_twist = 0; \ - if(SE->_around_the_world && this->Params.twists[DIR % 4]){ \ + if(SE->_around_the_world && st.parameters.twists[DIR % 4]){ \ if(sl == 1){ \ g = (F+1) % 2; \ }else{ \ @@ -103,7 +106,7 @@ Author: paboyle } #define LOAD_CHIMU_GPARITY_INPLACE_TWIST(DIR,F,PERM) \ - { const SiteSpinor &ref(in._odata[offset]); \ + { const SiteSpinor &ref(in[offset]); \ LOAD_CHI_SETUP(DIR,F); \ if(!inplace_twist){ \ LOAD_CHIMU_BODY(g); \ @@ -201,10 +204,10 @@ Author: paboyle #define MULT_2SPIN(A,F) \ - {auto & ref(U._odata[sU](A)); MULT_2SPIN_BODY; } + {auto & ref(U[sU](A)); MULT_2SPIN_BODY; } #define MULT_2SPIN_GPARITY(A,F) \ - {auto & ref(U._odata[sU](F)(A)); MULT_2SPIN_BODY; } + {auto & ref(U[sU](F)(A)); MULT_2SPIN_BODY; } #define PERMUTE_DIR(dir) \ @@ -468,8 +471,7 @@ Author: paboyle #define HAND_STENCIL_LEG_EXT(PROJ,PERM,DIR,RECON,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \ SE=st.GetEntry(ptype,DIR,ss); \ offset = SE->_offset; \ - local = SE->_is_local; \ - perm = SE->_permute; \ + perm = SE->_permute; \ if((!SE->_is_local)&&(!st.same_node[DIR]) ) { \ LOAD_CHI_IMPL(DIR,F,PERM); \ MULT_2SPIN_IMPL(DIR,F); \ @@ -479,7 +481,7 @@ Author: paboyle #define HAND_RESULT(ss,F) \ { \ - SiteSpinor & ref (out._odata[ss]); \ + SiteSpinor & ref (out[ss]); \ vstream(ref(F)(0)(0),result_00); \ vstream(ref(F)(0)(1),result_01); \ vstream(ref(F)(0)(2),result_02); \ @@ -496,7 +498,7 @@ Author: paboyle #define HAND_RESULT_EXT(ss,F) \ if (nmu){ \ - SiteSpinor & ref (out._odata[ss]); \ + SiteSpinor & ref (out[ss]); \ ref(F)(0)(0)+=result_00; \ ref(F)(0)(1)+=result_01; \ ref(F)(0)(2)+=result_02; \ @@ -545,18 +547,18 @@ Author: paboyle Simd U_21; #define ZERO_RESULT \ - result_00=zero; \ - result_01=zero; \ - result_02=zero; \ - result_10=zero; \ - result_11=zero; \ - result_12=zero; \ - result_20=zero; \ - result_21=zero; \ - result_22=zero; \ - result_30=zero; \ - result_31=zero; \ - result_32=zero; + result_00=Zero(); \ + result_01=Zero(); \ + result_02=Zero(); \ + result_10=Zero(); \ + result_11=Zero(); \ + result_12=Zero(); \ + result_20=Zero(); \ + result_21=Zero(); \ + result_22=Zero(); \ + result_30=Zero(); \ + result_31=Zero(); \ + result_32=Zero(); #define Chimu_00 Chi_00 #define Chimu_01 Chi_01 @@ -571,21 +573,7 @@ Author: paboyle #define Chimu_31 UChi_11 #define Chimu_32 UChi_12 -namespace Grid { -namespace QCD { - -template void -WilsonKernels::HandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, - int ss,int sU,const FermionField &in, FermionField &out) -{ -// T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc... - typedef typename Simd::scalar_type S; - typedef typename Simd::vector_type V; - - HAND_DECLARATIONS(ignore); - - int offset,local,perm, ptype; - StencilEntry *SE; +NAMESPACE_BEGIN(Grid); #define HAND_DOP_SITE(F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \ HAND_STENCIL_LEG(XM_PROJ,3,Xp,XM_RECON,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \ @@ -598,21 +586,6 @@ WilsonKernels::HandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGauge HAND_STENCIL_LEG(TP_PROJ,0,Tm,TP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \ HAND_RESULT(ss,F) - HAND_DOP_SITE(, LOAD_CHI,LOAD_CHIMU,MULT_2SPIN); -} - -template -void WilsonKernels::HandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, - int ss,int sU,const FermionField &in, FermionField &out) -{ - typedef typename Simd::scalar_type S; - typedef typename Simd::vector_type V; - - HAND_DECLARATIONS(ignore); - - StencilEntry *SE; - int offset,local,perm, ptype; - #define HAND_DOP_SITE_DAG(F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \ HAND_STENCIL_LEG(XP_PROJ,3,Xp,XP_RECON,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \ HAND_STENCIL_LEG(YP_PROJ,2,Yp,YP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \ @@ -624,22 +597,6 @@ void WilsonKernels::HandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,Doub HAND_STENCIL_LEG(TM_PROJ,0,Tm,TM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \ HAND_RESULT(ss,F) - HAND_DOP_SITE_DAG(, LOAD_CHI,LOAD_CHIMU,MULT_2SPIN); -} - -template void -WilsonKernels::HandDhopSiteInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, - int ss,int sU,const FermionField &in, FermionField &out) -{ -// T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc... - typedef typename Simd::scalar_type S; - typedef typename Simd::vector_type V; - - HAND_DECLARATIONS(ignore); - - int offset,local,perm, ptype; - StencilEntry *SE; - #define HAND_DOP_SITE_INT(F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \ ZERO_RESULT; \ HAND_STENCIL_LEG_INT(XM_PROJ,3,Xp,XM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \ @@ -652,21 +609,6 @@ WilsonKernels::HandDhopSiteInt(StencilImpl &st,LebesgueOrder &lo,DoubledGa HAND_STENCIL_LEG_INT(TP_PROJ,0,Tm,TP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \ HAND_RESULT(ss,F) - HAND_DOP_SITE_INT(, LOAD_CHI,LOAD_CHIMU,MULT_2SPIN); -} - -template -void WilsonKernels::HandDhopSiteDagInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, - int ss,int sU,const FermionField &in, FermionField &out) -{ - typedef typename Simd::scalar_type S; - typedef typename Simd::vector_type V; - - HAND_DECLARATIONS(ignore); - - StencilEntry *SE; - int offset,local,perm, ptype; - #define HAND_DOP_SITE_DAG_INT(F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \ ZERO_RESULT; \ HAND_STENCIL_LEG_INT(XP_PROJ,3,Xp,XP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \ @@ -678,23 +620,6 @@ void WilsonKernels::HandDhopSiteDagInt(StencilImpl &st,LebesgueOrder &lo,D HAND_STENCIL_LEG_INT(ZM_PROJ,1,Zm,ZM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \ HAND_STENCIL_LEG_INT(TM_PROJ,0,Tm,TM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \ HAND_RESULT(ss,F) - - HAND_DOP_SITE_DAG_INT(, LOAD_CHI,LOAD_CHIMU,MULT_2SPIN); -} - -template void -WilsonKernels::HandDhopSiteExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, - int ss,int sU,const FermionField &in, FermionField &out) -{ -// T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc... - typedef typename Simd::scalar_type S; - typedef typename Simd::vector_type V; - - HAND_DECLARATIONS(ignore); - - int offset,local,perm, ptype; - StencilEntry *SE; - int nmu=0; #define HAND_DOP_SITE_EXT(F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \ ZERO_RESULT; \ @@ -708,22 +633,6 @@ WilsonKernels::HandDhopSiteExt(StencilImpl &st,LebesgueOrder &lo,DoubledGa HAND_STENCIL_LEG_EXT(TP_PROJ,0,Tm,TP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \ HAND_RESULT_EXT(ss,F) - HAND_DOP_SITE_EXT(, LOAD_CHI,LOAD_CHIMU,MULT_2SPIN); -} - -template -void WilsonKernels::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, - int ss,int sU,const FermionField &in, FermionField &out) -{ - typedef typename Simd::scalar_type S; - typedef typename Simd::vector_type V; - - HAND_DECLARATIONS(ignore); - - StencilEntry *SE; - int offset,local,perm, ptype; - int nmu=0; - #define HAND_DOP_SITE_DAG_EXT(F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL) \ ZERO_RESULT; \ HAND_STENCIL_LEG_EXT(XP_PROJ,3,Xp,XP_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \ @@ -736,13 +645,10 @@ void WilsonKernels::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,D HAND_STENCIL_LEG_EXT(TM_PROJ,0,Tm,TM_RECON_ACCUM,F,LOAD_CHI_IMPL,LOAD_CHIMU_IMPL,MULT_2SPIN_IMPL); \ HAND_RESULT_EXT(ss,F) - HAND_DOP_SITE_DAG_EXT(, LOAD_CHI,LOAD_CHIMU,MULT_2SPIN); -} - #define HAND_SPECIALISE_GPARITY(IMPL) \ - template<> void \ - WilsonKernels::HandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \ - int ss,int sU,const FermionField &in, FermionField &out) \ + template<> void \ + WilsonKernels::HandDhopSite(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, \ + int ss,int sU,const FermionFieldView &in, FermionFieldView &out) \ { \ typedef IMPL Impl; \ typedef typename Simd::scalar_type S; \ @@ -756,9 +662,9 @@ void WilsonKernels::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,D HAND_DOP_SITE(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \ } \ \ - template<> \ - void WilsonKernels::HandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \ - int ss,int sU,const FermionField &in, FermionField &out) \ + template<> void \ + WilsonKernels::HandDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, \ + int ss,int sU,const FermionFieldView &in, FermionFieldView &out) \ { \ typedef IMPL Impl; \ typedef typename Simd::scalar_type S; \ @@ -772,9 +678,9 @@ void WilsonKernels::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,D HAND_DOP_SITE_DAG(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \ } \ \ - template<> void \ - WilsonKernels::HandDhopSiteInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \ - int ss,int sU,const FermionField &in, FermionField &out) \ + template<> void \ + WilsonKernels::HandDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, \ + int ss,int sU,const FermionFieldView &in, FermionFieldView &out) \ { \ typedef IMPL Impl; \ typedef typename Simd::scalar_type S; \ @@ -788,9 +694,9 @@ void WilsonKernels::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,D HAND_DOP_SITE_INT(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \ } \ \ - template<> \ - void WilsonKernels::HandDhopSiteDagInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \ - int ss,int sU,const FermionField &in, FermionField &out) \ + template<> void \ + WilsonKernels::HandDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, \ + int ss,int sU,const FermionFieldView &in, FermionFieldView &out) \ { \ typedef IMPL Impl; \ typedef typename Simd::scalar_type S; \ @@ -805,8 +711,8 @@ void WilsonKernels::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,D } \ \ template<> void \ - WilsonKernels::HandDhopSiteExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \ - int ss,int sU,const FermionField &in, FermionField &out) \ + WilsonKernels::HandDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, \ + int ss,int sU,const FermionFieldView &in, FermionFieldView &out) \ { \ typedef IMPL Impl; \ typedef typename Simd::scalar_type S; \ @@ -814,16 +720,16 @@ void WilsonKernels::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,D \ HAND_DECLARATIONS(ignore); \ \ - int offset,local,perm, ptype, g, direction, distance, sl, inplace_twist; \ + int offset,perm, ptype, g, direction, distance, sl, inplace_twist; \ StencilEntry *SE; \ int nmu=0; \ HAND_DOP_SITE_EXT(0, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \ nmu = 0; \ HAND_DOP_SITE_EXT(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \ } \ - template<> \ - void WilsonKernels::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \ - int ss,int sU,const FermionField &in, FermionField &out) \ + template<> void \ + WilsonKernels::HandDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, \ + int ss,int sU,const FermionFieldView &in, FermionFieldView &out) \ { \ typedef IMPL Impl; \ typedef typename Simd::scalar_type S; \ @@ -832,47 +738,11 @@ void WilsonKernels::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,D HAND_DECLARATIONS(ignore); \ \ StencilEntry *SE; \ - int offset,local,perm, ptype, g, direction, distance, sl, inplace_twist; \ + int offset,perm, ptype, g, direction, distance, sl, inplace_twist; \ int nmu=0; \ HAND_DOP_SITE_DAG_EXT(0, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \ nmu = 0; \ HAND_DOP_SITE_DAG_EXT(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \ } - -HAND_SPECIALISE_GPARITY(GparityWilsonImplF); -HAND_SPECIALISE_GPARITY(GparityWilsonImplD); -HAND_SPECIALISE_GPARITY(GparityWilsonImplFH); -HAND_SPECIALISE_GPARITY(GparityWilsonImplDF); - - - - - - - - - - - -////////////// Wilson ; uses this implementation ///////////////////// - -#define INSTANTIATE_THEM(A) \ -template void WilsonKernels::HandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,\ - int ss,int sU,const FermionField &in, FermionField &out); \ -template void WilsonKernels::HandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \ - int ss,int sU,const FermionField &in, FermionField &out);\ -template void WilsonKernels::HandDhopSiteInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,\ - int ss,int sU,const FermionField &in, FermionField &out); \ -template void WilsonKernels::HandDhopSiteDagInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \ - int ss,int sU,const FermionField &in, FermionField &out); \ -template void WilsonKernels::HandDhopSiteExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,\ - int ss,int sU,const FermionField &in, FermionField &out); \ -template void WilsonKernels::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \ - int ss,int sU,const FermionField &in, FermionField &out); - -INSTANTIATE_THEM(GparityWilsonImplF); -INSTANTIATE_THEM(GparityWilsonImplD); -INSTANTIATE_THEM(GparityWilsonImplFH); -INSTANTIATE_THEM(GparityWilsonImplDF); -}} +NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/WilsonKernelsHand.cc b/Grid/qcd/action/fermion/implementation/WilsonKernelsHandImplementation.h similarity index 81% rename from Grid/qcd/action/fermion/WilsonKernelsHand.cc rename to Grid/qcd/action/fermion/implementation/WilsonKernelsHandImplementation.h index 50816495..f7b018fa 100644 --- a/Grid/qcd/action/fermion/WilsonKernelsHand.cc +++ b/Grid/qcd/action/fermion/implementation/WilsonKernelsHandImplementation.h @@ -26,12 +26,58 @@ Author: paboyle See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ /* END LEGAL */ + +#pragma once + #include + +#undef LOAD_CHIMU +#undef LOAD_CHI +#undef MULT_2SPIN +#undef PERMUTE_DIR +#undef XP_PROJ +#undef YP_PROJ +#undef ZP_PROJ +#undef TP_PROJ +#undef XM_PROJ +#undef YM_PROJ +#undef ZM_PROJ +#undef TM_PROJ +#undef XP_RECON +#undef XP_RECON_ACCUM +#undef XM_RECON +#undef XM_RECON_ACCUM +#undef YP_RECON_ACCUM +#undef YM_RECON_ACCUM +#undef ZP_RECON_ACCUM +#undef ZM_RECON_ACCUM +#undef TP_RECON_ACCUM +#undef TM_RECON_ACCUM +#undef ZERO_RESULT +#undef Chimu_00 +#undef Chimu_01 +#undef Chimu_02 +#undef Chimu_10 +#undef Chimu_11 +#undef Chimu_12 +#undef Chimu_20 +#undef Chimu_21 +#undef Chimu_22 +#undef Chimu_30 +#undef Chimu_31 +#undef Chimu_32 +#undef HAND_STENCIL_LEG +#undef HAND_STENCIL_LEG_INT +#undef HAND_STENCIL_LEG_EXT +#undef HAND_RESULT +#undef HAND_RESULT_INT +#undef HAND_RESULT_EXT + #define REGISTER #define LOAD_CHIMU \ - {const SiteSpinor & ref (in._odata[offset]); \ + {const SiteSpinor & ref (in[offset]); \ Chimu_00=ref()(0)(0);\ Chimu_01=ref()(0)(1);\ Chimu_02=ref()(0)(2);\ @@ -56,7 +102,7 @@ Author: paboyle // To splat or not to splat depends on the implementation #define MULT_2SPIN(A)\ - {auto & ref(U._odata[sU](A)); \ + {auto & ref(U[sU](A)); \ Impl::loadLinkElement(U_00,ref()(0,0)); \ Impl::loadLinkElement(U_10,ref()(1,0)); \ Impl::loadLinkElement(U_20,ref()(2,0)); \ @@ -355,7 +401,7 @@ Author: paboyle #define HAND_RESULT(ss) \ { \ - SiteSpinor & ref (out._odata[ss]); \ + SiteSpinor & ref (out[ss]); \ vstream(ref()(0)(0),result_00); \ vstream(ref()(0)(1),result_01); \ vstream(ref()(0)(2),result_02); \ @@ -372,7 +418,7 @@ Author: paboyle #define HAND_RESULT_EXT(ss) \ if (nmu){ \ - SiteSpinor & ref (out._odata[ss]); \ + SiteSpinor & ref (out[ss]); \ ref()(0)(0)+=result_00; \ ref()(0)(1)+=result_01; \ ref()(0)(2)+=result_02; \ @@ -421,18 +467,18 @@ Author: paboyle Simd U_21; #define ZERO_RESULT \ - result_00=zero; \ - result_01=zero; \ - result_02=zero; \ - result_10=zero; \ - result_11=zero; \ - result_12=zero; \ - result_20=zero; \ - result_21=zero; \ - result_22=zero; \ - result_30=zero; \ - result_31=zero; \ - result_32=zero; + result_00=Zero(); \ + result_01=Zero(); \ + result_02=Zero(); \ + result_10=Zero(); \ + result_11=Zero(); \ + result_12=Zero(); \ + result_20=Zero(); \ + result_21=Zero(); \ + result_22=Zero(); \ + result_30=Zero(); \ + result_31=Zero(); \ + result_32=Zero(); #define Chimu_00 Chi_00 #define Chimu_01 Chi_01 @@ -447,12 +493,11 @@ Author: paboyle #define Chimu_31 UChi_11 #define Chimu_32 UChi_12 -namespace Grid { -namespace QCD { +NAMESPACE_BEGIN(Grid); template void -WilsonKernels::HandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, - int ss,int sU,const FermionField &in, FermionField &out) +WilsonKernels::HandDhopSite(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, + int ss,int sU,const FermionFieldView &in, FermionFieldView &out) { // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc... typedef typename Simd::scalar_type S; @@ -475,8 +520,8 @@ WilsonKernels::HandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGauge } template -void WilsonKernels::HandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, - int ss,int sU,const FermionField &in, FermionField &out) +void WilsonKernels::HandDhopSiteDag(StencilView &st,DoubledGaugeFieldView &U,SiteHalfSpinor *buf, + int ss,int sU,const FermionFieldView &in, FermionFieldView &out) { typedef typename Simd::scalar_type S; typedef typename Simd::vector_type V; @@ -498,8 +543,8 @@ void WilsonKernels::HandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,Doub } template void -WilsonKernels::HandDhopSiteInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, - int ss,int sU,const FermionField &in, FermionField &out) +WilsonKernels::HandDhopSiteInt(StencilView &st,DoubledGaugeFieldView &U,SiteHalfSpinor *buf, + int ss,int sU,const FermionFieldView &in, FermionFieldView &out) { // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc... typedef typename Simd::scalar_type S; @@ -522,8 +567,8 @@ WilsonKernels::HandDhopSiteInt(StencilImpl &st,LebesgueOrder &lo,DoubledGa } template -void WilsonKernels::HandDhopSiteDagInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, - int ss,int sU,const FermionField &in, FermionField &out) +void WilsonKernels::HandDhopSiteDagInt(StencilView &st,DoubledGaugeFieldView &U,SiteHalfSpinor *buf, + int ss,int sU,const FermionFieldView &in, FermionFieldView &out) { typedef typename Simd::scalar_type S; typedef typename Simd::vector_type V; @@ -545,8 +590,8 @@ void WilsonKernels::HandDhopSiteDagInt(StencilImpl &st,LebesgueOrder &lo,D } template void -WilsonKernels::HandDhopSiteExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, - int ss,int sU,const FermionField &in, FermionField &out) +WilsonKernels::HandDhopSiteExt(StencilView &st,DoubledGaugeFieldView &U,SiteHalfSpinor *buf, + int ss,int sU,const FermionFieldView &in, FermionFieldView &out) { // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc... typedef typename Simd::scalar_type S; @@ -554,7 +599,7 @@ WilsonKernels::HandDhopSiteExt(StencilImpl &st,LebesgueOrder &lo,DoubledGa HAND_DECLARATIONS(ignore); - int offset,local,perm, ptype; + int offset, ptype; StencilEntry *SE; int nmu=0; ZERO_RESULT; @@ -570,8 +615,8 @@ WilsonKernels::HandDhopSiteExt(StencilImpl &st,LebesgueOrder &lo,DoubledGa } template -void WilsonKernels::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, - int ss,int sU,const FermionField &in, FermionField &out) +void WilsonKernels::HandDhopSiteDagExt(StencilView &st,DoubledGaugeFieldView &U,SiteHalfSpinor *buf, + int ss,int sU,const FermionFieldView &in, FermionFieldView &out) { typedef typename Simd::scalar_type S; typedef typename Simd::vector_type V; @@ -579,7 +624,7 @@ void WilsonKernels::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,D HAND_DECLARATIONS(ignore); StencilEntry *SE; - int offset,local,perm, ptype; + int offset, ptype; int nmu=0; ZERO_RESULT; HAND_STENCIL_LEG_EXT(XP_PROJ,3,Xp,XP_RECON_ACCUM); @@ -595,37 +640,45 @@ void WilsonKernels::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,D ////////////// Wilson ; uses this implementation ///////////////////// -#define INSTANTIATE_THEM(A) \ -template void WilsonKernels::HandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,\ - int ss,int sU,const FermionField &in, FermionField &out); \ -template void WilsonKernels::HandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \ - int ss,int sU,const FermionField &in, FermionField &out);\ -template void WilsonKernels::HandDhopSiteInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,\ - int ss,int sU,const FermionField &in, FermionField &out); \ -template void WilsonKernels::HandDhopSiteDagInt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \ - int ss,int sU,const FermionField &in, FermionField &out); \ -template void WilsonKernels::HandDhopSiteExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,\ - int ss,int sU,const FermionField &in, FermionField &out); \ -template void WilsonKernels::HandDhopSiteDagExt(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, \ - int ss,int sU,const FermionField &in, FermionField &out); - -INSTANTIATE_THEM(WilsonImplF); -INSTANTIATE_THEM(WilsonImplD); -INSTANTIATE_THEM(ZWilsonImplF); -INSTANTIATE_THEM(ZWilsonImplD); -INSTANTIATE_THEM(DomainWallVec5dImplF); -INSTANTIATE_THEM(DomainWallVec5dImplD); -INSTANTIATE_THEM(ZDomainWallVec5dImplF); -INSTANTIATE_THEM(ZDomainWallVec5dImplD); -INSTANTIATE_THEM(WilsonImplFH); -INSTANTIATE_THEM(WilsonImplDF); -INSTANTIATE_THEM(ZWilsonImplFH); -INSTANTIATE_THEM(ZWilsonImplDF); -INSTANTIATE_THEM(DomainWallVec5dImplFH); -INSTANTIATE_THEM(DomainWallVec5dImplDF); -INSTANTIATE_THEM(ZDomainWallVec5dImplFH); -INSTANTIATE_THEM(ZDomainWallVec5dImplDF); -INSTANTIATE_THEM(WilsonTwoIndexAntiSymmetricImplF); -INSTANTIATE_THEM(WilsonTwoIndexAntiSymmetricImplD); - -}} +NAMESPACE_END(Grid); +#undef LOAD_CHIMU +#undef LOAD_CHI +#undef MULT_2SPIN +#undef PERMUTE_DIR +#undef XP_PROJ +#undef YP_PROJ +#undef ZP_PROJ +#undef TP_PROJ +#undef XM_PROJ +#undef YM_PROJ +#undef ZM_PROJ +#undef TM_PROJ +#undef XP_RECON +#undef XP_RECON_ACCUM +#undef XM_RECON +#undef XM_RECON_ACCUM +#undef YP_RECON_ACCUM +#undef YM_RECON_ACCUM +#undef ZP_RECON_ACCUM +#undef ZM_RECON_ACCUM +#undef TP_RECON_ACCUM +#undef TM_RECON_ACCUM +#undef ZERO_RESULT +#undef Chimu_00 +#undef Chimu_01 +#undef Chimu_02 +#undef Chimu_10 +#undef Chimu_11 +#undef Chimu_12 +#undef Chimu_20 +#undef Chimu_21 +#undef Chimu_22 +#undef Chimu_30 +#undef Chimu_31 +#undef Chimu_32 +#undef HAND_STENCIL_LEG +#undef HAND_STENCIL_LEG_INT +#undef HAND_STENCIL_LEG_EXT +#undef HAND_RESULT +#undef HAND_RESULT_INT +#undef HAND_RESULT_EXT diff --git a/Grid/qcd/action/fermion/implementation/WilsonKernelsImplementation.h b/Grid/qcd/action/fermion/implementation/WilsonKernelsImplementation.h new file mode 100644 index 00000000..a787fa79 --- /dev/null +++ b/Grid/qcd/action/fermion/implementation/WilsonKernelsImplementation.h @@ -0,0 +1,551 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/WilsonKernels.cc + +Copyright (C) 2015 + +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#pragma once + +#include + +NAMESPACE_BEGIN(Grid); + + +//////////////////////////////////////////// +// Generic implementation; move to different file? +//////////////////////////////////////////// + +accelerator_inline void get_stencil(StencilEntry * mem, StencilEntry &chip) +{ +#ifdef __CUDA_ARCH__ + static_assert(sizeof(StencilEntry)==sizeof(uint4),"Unexpected Stencil Entry Size"); + uint4 * mem_pun = (uint4 *)mem; // force 128 bit loads + uint4 * chip_pun = (uint4 *)&chip; + * chip_pun = * mem_pun; +#else + chip = *mem; +#endif + return; +} + +#define GENERIC_STENCIL_LEG(Dir,spProj,Recon) \ + SE = st.GetEntry(ptype, Dir, sF); \ + if (SE->_is_local) { \ + int perm= SE->_permute; \ + auto tmp = coalescedReadPermute(in[SE->_offset],ptype,perm,lane); \ + spProj(chi,tmp); \ + } else { \ + chi = coalescedRead(buf[SE->_offset],lane); \ + } \ + synchronise(); \ + Impl::multLink(Uchi, U[sU], chi, Dir, SE, st); \ + Recon(result, Uchi); + +#define GENERIC_STENCIL_LEG_INT(Dir,spProj,Recon) \ + SE = st.GetEntry(ptype, Dir, sF); \ + if (SE->_is_local) { \ + int perm= SE->_permute; \ + auto tmp = coalescedReadPermute(in[SE->_offset],ptype,perm,lane); \ + spProj(chi,tmp); \ + } else if ( st.same_node[Dir] ) { \ + chi = coalescedRead(buf[SE->_offset],lane); \ + } \ + synchronise(); \ + if (SE->_is_local || st.same_node[Dir] ) { \ + Impl::multLink(Uchi, U[sU], chi, Dir, SE, st); \ + Recon(result, Uchi); \ + } \ + synchronise(); + +#define GENERIC_STENCIL_LEG_EXT(Dir,spProj,Recon) \ + SE = st.GetEntry(ptype, Dir, sF); \ + if ((!SE->_is_local) && (!st.same_node[Dir]) ) { \ + auto chi = coalescedRead(buf[SE->_offset],lane); \ + Impl::multLink(Uchi, U[sU], chi, Dir, SE, st); \ + Recon(result, Uchi); \ + nmu++; \ + } \ + synchronise(); + +#define GENERIC_DHOPDIR_LEG(Dir,spProj,Recon) \ + if (gamma == Dir) { \ + if (SE->_is_local ) { \ + int perm= SE->_permute; \ + auto tmp = coalescedReadPermute(in[SE->_offset],ptype,perm,lane); \ + spProj(chi,tmp); \ + } else { \ + chi = coalescedRead(buf[SE->_offset],lane); \ + } \ + synchronise(); \ + Impl::multLink(Uchi, U[sU], chi, dir, SE, st); \ + Recon(result, Uchi); \ + synchronise(); \ + } + + //////////////////////////////////////////////////////////////////// + // All legs kernels ; comms then compute + //////////////////////////////////////////////////////////////////// +template +void WilsonKernels::GenericDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, + SiteHalfSpinor *buf, int sF, + int sU, const FermionFieldView &in, FermionFieldView &out) +{ + typedef decltype(coalescedRead(buf[0])) calcHalfSpinor; + typedef decltype(coalescedRead(in[0])) calcSpinor; + calcHalfSpinor chi; + // calcHalfSpinor *chi_p; + calcHalfSpinor Uchi; + calcSpinor result; + StencilEntry *SE; + int ptype; + const int Nsimd = SiteHalfSpinor::Nsimd(); + const int lane=SIMTlane(Nsimd); + GENERIC_STENCIL_LEG(Xp,spProjXp,spReconXp); + GENERIC_STENCIL_LEG(Yp,spProjYp,accumReconYp); + GENERIC_STENCIL_LEG(Zp,spProjZp,accumReconZp); + GENERIC_STENCIL_LEG(Tp,spProjTp,accumReconTp); + GENERIC_STENCIL_LEG(Xm,spProjXm,accumReconXm); + GENERIC_STENCIL_LEG(Ym,spProjYm,accumReconYm); + GENERIC_STENCIL_LEG(Zm,spProjZm,accumReconZm); + GENERIC_STENCIL_LEG(Tm,spProjTm,accumReconTm); + coalescedWrite(out[sF],result,lane); +}; + +template +void WilsonKernels::GenericDhopSite(StencilView &st, DoubledGaugeFieldView &U, + SiteHalfSpinor *buf, int sF, + int sU, const FermionFieldView &in, FermionFieldView &out) +{ + typedef decltype(coalescedRead(buf[0])) calcHalfSpinor; + typedef decltype(coalescedRead(in[0])) calcSpinor; + calcHalfSpinor chi; + // calcHalfSpinor *chi_p; + calcHalfSpinor Uchi; + calcSpinor result; + StencilEntry *SE; + int ptype; + + const int Nsimd = SiteHalfSpinor::Nsimd(); + const int lane=SIMTlane(Nsimd); + GENERIC_STENCIL_LEG(Xm,spProjXp,spReconXp); + GENERIC_STENCIL_LEG(Ym,spProjYp,accumReconYp); + GENERIC_STENCIL_LEG(Zm,spProjZp,accumReconZp); + GENERIC_STENCIL_LEG(Tm,spProjTp,accumReconTp); + GENERIC_STENCIL_LEG(Xp,spProjXm,accumReconXm); + GENERIC_STENCIL_LEG(Yp,spProjYm,accumReconYm); + GENERIC_STENCIL_LEG(Zp,spProjZm,accumReconZm); + GENERIC_STENCIL_LEG(Tp,spProjTm,accumReconTm); + coalescedWrite(out[sF], result,lane); +}; + //////////////////////////////////////////////////////////////////// + // Interior kernels + //////////////////////////////////////////////////////////////////// +template +void WilsonKernels::GenericDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, + SiteHalfSpinor *buf, int sF, + int sU, const FermionFieldView &in, FermionFieldView &out) +{ + typedef decltype(coalescedRead(buf[0])) calcHalfSpinor; + typedef decltype(coalescedRead(in[0])) calcSpinor; + calcHalfSpinor chi; + // calcHalfSpinor *chi_p; + calcHalfSpinor Uchi; + calcSpinor result; + StencilEntry *SE; + int ptype; + const int Nsimd = SiteHalfSpinor::Nsimd(); + const int lane=SIMTlane(Nsimd); + + result=Zero(); + GENERIC_STENCIL_LEG_INT(Xp,spProjXp,accumReconXp); + GENERIC_STENCIL_LEG_INT(Yp,spProjYp,accumReconYp); + GENERIC_STENCIL_LEG_INT(Zp,spProjZp,accumReconZp); + GENERIC_STENCIL_LEG_INT(Tp,spProjTp,accumReconTp); + GENERIC_STENCIL_LEG_INT(Xm,spProjXm,accumReconXm); + GENERIC_STENCIL_LEG_INT(Ym,spProjYm,accumReconYm); + GENERIC_STENCIL_LEG_INT(Zm,spProjZm,accumReconZm); + GENERIC_STENCIL_LEG_INT(Tm,spProjTm,accumReconTm); + coalescedWrite(out[sF], result,lane); +}; + +template +void WilsonKernels::GenericDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, + SiteHalfSpinor *buf, int sF, + int sU, const FermionFieldView &in, FermionFieldView &out) +{ + typedef decltype(coalescedRead(buf[0])) calcHalfSpinor; + typedef decltype(coalescedRead(in[0])) calcSpinor; + const int Nsimd = SiteHalfSpinor::Nsimd(); + const int lane=SIMTlane(Nsimd); + + calcHalfSpinor chi; + // calcHalfSpinor *chi_p; + calcHalfSpinor Uchi; + calcSpinor result; + StencilEntry *SE; + int ptype; + result=Zero(); + GENERIC_STENCIL_LEG_INT(Xm,spProjXp,accumReconXp); + GENERIC_STENCIL_LEG_INT(Ym,spProjYp,accumReconYp); + GENERIC_STENCIL_LEG_INT(Zm,spProjZp,accumReconZp); + GENERIC_STENCIL_LEG_INT(Tm,spProjTp,accumReconTp); + GENERIC_STENCIL_LEG_INT(Xp,spProjXm,accumReconXm); + GENERIC_STENCIL_LEG_INT(Yp,spProjYm,accumReconYm); + GENERIC_STENCIL_LEG_INT(Zp,spProjZm,accumReconZm); + GENERIC_STENCIL_LEG_INT(Tp,spProjTm,accumReconTm); + coalescedWrite(out[sF], result,lane); +}; +//////////////////////////////////////////////////////////////////// +// Exterior kernels +//////////////////////////////////////////////////////////////////// +template +void WilsonKernels::GenericDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, + SiteHalfSpinor *buf, int sF, + int sU, const FermionFieldView &in, FermionFieldView &out) +{ + typedef decltype(coalescedRead(buf[0])) calcHalfSpinor; + typedef decltype(coalescedRead(in[0])) calcSpinor; + // calcHalfSpinor *chi_p; + calcHalfSpinor Uchi; + calcSpinor result; + StencilEntry *SE; + int ptype; + int nmu=0; + const int Nsimd = SiteHalfSpinor::Nsimd(); + const int lane=SIMTlane(Nsimd); + result=Zero(); + GENERIC_STENCIL_LEG_EXT(Xp,spProjXp,accumReconXp); + GENERIC_STENCIL_LEG_EXT(Yp,spProjYp,accumReconYp); + GENERIC_STENCIL_LEG_EXT(Zp,spProjZp,accumReconZp); + GENERIC_STENCIL_LEG_EXT(Tp,spProjTp,accumReconTp); + GENERIC_STENCIL_LEG_EXT(Xm,spProjXm,accumReconXm); + GENERIC_STENCIL_LEG_EXT(Ym,spProjYm,accumReconYm); + GENERIC_STENCIL_LEG_EXT(Zm,spProjZm,accumReconZm); + GENERIC_STENCIL_LEG_EXT(Tm,spProjTm,accumReconTm); + if ( nmu ) { + auto out_t = coalescedRead(out[sF],lane); + out_t = out_t + result; + coalescedWrite(out[sF],out_t,lane); + } +}; + +template +void WilsonKernels::GenericDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, + SiteHalfSpinor *buf, int sF, + int sU, const FermionFieldView &in, FermionFieldView &out) +{ + typedef decltype(coalescedRead(buf[0])) calcHalfSpinor; + typedef decltype(coalescedRead(in[0])) calcSpinor; + // calcHalfSpinor *chi_p; + calcHalfSpinor Uchi; + calcSpinor result; + StencilEntry *SE; + int ptype; + int nmu=0; + const int Nsimd = SiteHalfSpinor::Nsimd(); + const int lane=SIMTlane(Nsimd); + result=Zero(); + GENERIC_STENCIL_LEG_EXT(Xm,spProjXp,accumReconXp); + GENERIC_STENCIL_LEG_EXT(Ym,spProjYp,accumReconYp); + GENERIC_STENCIL_LEG_EXT(Zm,spProjZp,accumReconZp); + GENERIC_STENCIL_LEG_EXT(Tm,spProjTp,accumReconTp); + GENERIC_STENCIL_LEG_EXT(Xp,spProjXm,accumReconXm); + GENERIC_STENCIL_LEG_EXT(Yp,spProjYm,accumReconYm); + GENERIC_STENCIL_LEG_EXT(Zp,spProjZm,accumReconZm); + GENERIC_STENCIL_LEG_EXT(Tp,spProjTm,accumReconTm); + if ( nmu ) { + auto out_t = coalescedRead(out[sF],lane); + out_t = out_t + result; + coalescedWrite(out[sF],out_t,lane); + } +}; + +template +void WilsonKernels::DhopDirK( StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, int sF, + int sU, const FermionFieldView &in, FermionFieldView &out, int dir, int gamma) +{ + typedef decltype(coalescedRead(buf[0])) calcHalfSpinor; + typedef decltype(coalescedRead(in[0])) calcSpinor; + calcHalfSpinor chi; + calcSpinor result; + calcHalfSpinor Uchi; + StencilEntry *SE; + int ptype; + const int Nsimd = SiteHalfSpinor::Nsimd(); + const int lane=SIMTlane(Nsimd); + + SE = st.GetEntry(ptype, dir, sF); + if (gamma == Xp) { + if (SE->_is_local ) { + int perm= SE->_permute; + auto tmp = coalescedReadPermute(in[SE->_offset],ptype,perm,lane); + spProjXp(chi,tmp); + } else { + chi = coalescedRead(buf[SE->_offset],lane); + } + Impl::multLink(Uchi, U[sU], chi, dir, SE, st); + spReconXp(result, Uchi); + } + + GENERIC_DHOPDIR_LEG(Yp,spProjYp,spReconYp); + GENERIC_DHOPDIR_LEG(Zp,spProjZp,spReconZp); + GENERIC_DHOPDIR_LEG(Tp,spProjTp,spReconTp); + GENERIC_DHOPDIR_LEG(Xm,spProjXm,spReconXm); + GENERIC_DHOPDIR_LEG(Ym,spProjYm,spReconYm); + GENERIC_DHOPDIR_LEG(Zm,spProjZm,spReconZm); + GENERIC_DHOPDIR_LEG(Tm,spProjTm,spReconTm); + coalescedWrite(out[sF], result,lane); +} + +template +void WilsonKernels::DhopDirKernel( StencilImpl &st, DoubledGaugeField &U,SiteHalfSpinor *buf, int Ls, + int Nsite, const FermionField &in, FermionField &out, int dirdisp, int gamma) +{ + assert(dirdisp<=7); + assert(dirdisp>=0); + + auto U_v = U.View(); + auto in_v = in.View(); + auto out_v = out.View(); + auto st_v = st.View(); + accelerator_for(ss,Nsite,Simd::Nsimd(),{ + for(int s=0;s::A(st_v,U_v,buf,sF,sU,in_v,out_v); \ + }); + +#define KERNEL_CALL(A) KERNEL_CALLNB(A); accelerator_barrier(); + +#define ASM_CALL(A) \ + thread_for( ss, Nsite, { \ + int sU = ss; \ + int sF = ss*Ls; \ + WilsonKernels::A(st_v,U_v,buf,sF,sU,Ls,1,in_v,out_v); \ + }); + +template +void WilsonKernels::DhopKernel(int Opt,StencilImpl &st, DoubledGaugeField &U, SiteHalfSpinor * buf, + int Ls, int Nsite, const FermionField &in, FermionField &out, + int interior,int exterior) +{ + auto U_v = U.View(); + auto in_v = in.View(); + auto out_v = out.View(); + auto st_v = st.View(); + + if( interior && exterior ) { + if (Opt == WilsonKernelsStatic::OptGeneric ) { KERNEL_CALL(GenericDhopSite); return;} +#ifndef GRID_NVCC + if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL(HandDhopSite); return;} + if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSite); printf("."); return;} +#endif + } else if( interior ) { + if (Opt == WilsonKernelsStatic::OptGeneric ) { KERNEL_CALLNB(GenericDhopSiteInt); return;} +#ifndef GRID_NVCC + if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALLNB(HandDhopSiteInt); return;} + if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSiteInt); printf("-"); return;} +#endif + } else if( exterior ) { + if (Opt == WilsonKernelsStatic::OptGeneric ) { KERNEL_CALL(GenericDhopSiteExt); return;} +#ifndef GRID_NVCC + if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL(HandDhopSiteExt); return;} + if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSiteExt); printf("+"); return;} +#endif + } + assert(0 && " Kernel optimisation case not covered "); + } + template + void WilsonKernels::DhopDagKernel(int Opt,StencilImpl &st, DoubledGaugeField &U, SiteHalfSpinor * buf, + int Ls, int Nsite, const FermionField &in, FermionField &out, + int interior,int exterior) + { + auto U_v = U.View(); + auto in_v = in.View(); + auto out_v = out.View(); + auto st_v = st.View(); + + if( interior && exterior ) { + if (Opt == WilsonKernelsStatic::OptGeneric ) { KERNEL_CALL(GenericDhopSiteDag); return;} +#ifndef GRID_NVCC + if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL(HandDhopSiteDag); return;} + if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSiteDag); return;} +#endif + } else if( interior ) { + if (Opt == WilsonKernelsStatic::OptGeneric ) { KERNEL_CALL(GenericDhopSiteDagInt); return;} +#ifndef GRID_NVCC + if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL(HandDhopSiteDagInt); return;} + if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSiteDagInt); return;} +#endif + } else if( exterior ) { + if (Opt == WilsonKernelsStatic::OptGeneric ) { KERNEL_CALL(GenericDhopSiteDagExt); return;} +#ifndef GRID_NVCC + if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL(HandDhopSiteDagExt); return;} + if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSiteDagExt); return;} +#endif + } + assert(0 && " Kernel optimisation case not covered "); + } + +/******************************************************************************* + * Conserved current utilities for Wilson fermions, for contracting propagators + * to make a conserved current sink or inserting the conserved current + * sequentially. Common to both 4D and 5D. + ******************************************************************************/ +// N.B. Functions below assume a -1/2 factor within U. +#define WilsonCurrentFwd(expr, mu) ((expr - Gamma::gmu[mu]*expr)) +#define WilsonCurrentBwd(expr, mu) ((expr + Gamma::gmu[mu]*expr)) + +/******************************************************************************* + * Name: ContractConservedCurrentSiteFwd + * Operation: (1/2) * q2[x] * U(x) * (g[mu] - 1) * q1[x + mu] + * Notes: - DoubledGaugeField U assumed to contain -1/2 factor. + * - Pass in q_in_1 shifted in +ve mu direction. + ******************************************************************************/ +template +void WilsonKernels::ContractConservedCurrentSiteFwd(const SitePropagator &q_in_1, + const SitePropagator &q_in_2, + SitePropagator &q_out, + DoubledGaugeFieldView &U, + unsigned int sU, + unsigned int mu, + bool switch_sign) +{ + SitePropagator result, tmp; + Gamma g5(Gamma::Algebra::Gamma5); + + Impl::multLink(tmp, U[sU], q_in_1, mu); + + result = g5 * adj(q_in_2) * g5 * WilsonCurrentFwd(tmp, mu); + + if (switch_sign) { + q_out -= result; + } else { + q_out += result; + } +} + +/******************************************************************************* + * Name: ContractConservedCurrentSiteBwd + * Operation: (1/2) * q2[x + mu] * U^dag(x) * (g[mu] + 1) * q1[x] + * Notes: - DoubledGaugeField U assumed to contain -1/2 factor. + * - Pass in q_in_2 shifted in +ve mu direction. + ******************************************************************************/ +template +void WilsonKernels::ContractConservedCurrentSiteBwd(const SitePropagator &q_in_1, + const SitePropagator &q_in_2, + SitePropagator &q_out, + DoubledGaugeFieldView &U, + unsigned int sU, + unsigned int mu, + bool switch_sign) +{ + SitePropagator result, tmp; + Gamma g5(Gamma::Algebra::Gamma5); + + Impl::multLink(tmp, U[sU], q_in_1, mu + Nd); + + result = g5 * adj(q_in_2) * g5 * WilsonCurrentBwd(tmp, mu); + if (switch_sign) { + q_out += result; + } else { + q_out -= result; + } +} + +/******************************************************************************* + * Name: SeqConservedCurrentSiteFwd + * Operation: (1/2) * U(x) * (g[mu] - 1) * q[x + mu] + * Notes: - DoubledGaugeField U assumed to contain -1/2 factor. + * - Pass in q_in shifted in +ve mu direction. + ******************************************************************************/ +template +void WilsonKernels::SeqConservedCurrentSiteFwd(const SitePropagator &q_in, + SitePropagator &q_out, + DoubledGaugeFieldView &U, + unsigned int sU, + unsigned int mu, + vPredicate t_mask, + bool switch_sign) +{ + SitePropagator result; + + Impl::multLink(result, U[sU], q_in, mu); + result = WilsonCurrentFwd(result, mu); + + // Zero any unwanted timeslice entries. + result = predicatedWhere(t_mask, result, 0.*result); + + if (switch_sign) { + q_out -= result; + } else { + q_out += result; + } +} + +/******************************************************************************* + * Name: SeqConservedCurrentSiteFwd + * Operation: (1/2) * U^dag(x) * (g[mu] + 1) * q[x - mu] + * Notes: - DoubledGaugeField U assumed to contain -1/2 factor. + * - Pass in q_in shifted in -ve mu direction. + ******************************************************************************/ +template +void WilsonKernels::SeqConservedCurrentSiteBwd(const SitePropagator &q_in, + SitePropagator &q_out, + DoubledGaugeFieldView &U, + unsigned int sU, + unsigned int mu, + vPredicate t_mask, + bool switch_sign) +{ + SitePropagator result; + Impl::multLink(result, U[sU], q_in, mu + Nd); + result = WilsonCurrentBwd(result, mu); + + // Zero any unwanted timeslice entries. + result = predicatedWhere(t_mask, result, 0.*result); + + if (switch_sign) { + q_out += result; + } else { + q_out -= result; + } +} + + +NAMESPACE_END(Grid); + diff --git a/Grid/qcd/action/fermion/implementation/WilsonTMFermionImplementation.h b/Grid/qcd/action/fermion/implementation/WilsonTMFermionImplementation.h new file mode 100644 index 00000000..9a1a152c --- /dev/null +++ b/Grid/qcd/action/fermion/implementation/WilsonTMFermionImplementation.h @@ -0,0 +1,97 @@ +/************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/qcd/action/fermion/WilsonTMFermion.cc + + Copyright (C) 2015 + +Author: paboyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ +#include +#include + +#pragma once + +NAMESPACE_BEGIN(Grid); + +/* + * BF sequence + * + void bfmbase::MooeeInv(Fermion_t psi, + Fermion_t chi, + int dag, int cb) + + double m = this->mass; + double tm = this->twistedmass; + double mtil = 4.0+this->mass; + + double sq = mtil*mtil + tm*tm; + + double a = mtil/sq; + double b = -tm /sq; + if(dag) b=-b; + axpibg5x(chi,psi,a,b); + + void bfmbase::Mooee(Fermion_t psi, + Fermion_t chi, + int dag,int cb) + double a = 4.0+this->mass; + double b = this->twistedmass; + if(dag) b=-b; + axpibg5x(chi,psi,a,b); +*/ + +template +void WilsonTMFermion::Mooee(const FermionField &in, FermionField &out) { + RealD a = 4.0+this->mass; + RealD b = this->mu; + out.Checkerboard() = in.Checkerboard(); + axpibg5x(out,in,a,b); +} +template +void WilsonTMFermion::MooeeDag(const FermionField &in, FermionField &out) { + RealD a = 4.0+this->mass; + RealD b = -this->mu; + out.Checkerboard() = in.Checkerboard(); + axpibg5x(out,in,a,b); +} +template +void WilsonTMFermion::MooeeInv(const FermionField &in, FermionField &out) { + RealD m = this->mass; + RealD tm = this->mu; + RealD mtil = 4.0+m; + RealD sq = mtil*mtil+tm*tm; + RealD a = mtil/sq; + RealD b = -tm /sq; + axpibg5x(out,in,a,b); +} +template +void WilsonTMFermion::MooeeInvDag(const FermionField &in, FermionField &out) { + RealD m = this->mass; + RealD tm = this->mu; + RealD mtil = 4.0+m; + RealD sq = mtil*mtil+tm*tm; + RealD a = mtil/sq; + RealD b = tm /sq; + axpibg5x(out,in,a,b); +} + +NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/CayleyFermion5DInstantiation.cc.master b/Grid/qcd/action/fermion/instantiation/CayleyFermion5DInstantiation.cc.master new file mode 100644 index 00000000..5130db9c --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/CayleyFermion5DInstantiation.cc.master @@ -0,0 +1,44 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/WilsonKernels.cc + +Copyright (C) 2015 + +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#include +#include +#include + + //#include + //#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class CayleyFermion5D; + +NAMESPACE_END(Grid); + diff --git a/Grid/qcd/action/fermion/instantiation/ContinuedFractionFermion5DInstantiation.cc.master b/Grid/qcd/action/fermion/instantiation/ContinuedFractionFermion5DInstantiation.cc.master new file mode 100644 index 00000000..ca0d6cea --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/ContinuedFractionFermion5DInstantiation.cc.master @@ -0,0 +1,38 @@ +/************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/qcd/action/fermion/ContinuedFractionFermion5D.cc + + Copyright (C) 2015 + +Author: Peter Boyle +Author: Peter Boyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ +#include +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class ContinuedFractionFermion5D; + +NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/DomainWallEOFAFermionInstantiation.cc.master b/Grid/qcd/action/fermion/instantiation/DomainWallEOFAFermionInstantiation.cc.master new file mode 100644 index 00000000..f7198131 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/DomainWallEOFAFermionInstantiation.cc.master @@ -0,0 +1,44 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/DomainWallEOFAFermion.cc + +Copyright (C) 2017 + +Author: Peter Boyle +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle +Author: David Murphy + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ + /* END LEGAL */ + +#include +#include +#include +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class DomainWallEOFAFermion; + +NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplD/CayleyFermion5DInstantiationGparityWilsonImplD.cc b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplD/CayleyFermion5DInstantiationGparityWilsonImplD.cc new file mode 100644 index 00000000..5130db9c --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplD/CayleyFermion5DInstantiationGparityWilsonImplD.cc @@ -0,0 +1,44 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/WilsonKernels.cc + +Copyright (C) 2015 + +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#include +#include +#include + + //#include + //#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class CayleyFermion5D; + +NAMESPACE_END(Grid); + diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplD/ContinuedFractionFermion5DInstantiationGparityWilsonImplD.cc b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplD/ContinuedFractionFermion5DInstantiationGparityWilsonImplD.cc new file mode 100644 index 00000000..ca0d6cea --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplD/ContinuedFractionFermion5DInstantiationGparityWilsonImplD.cc @@ -0,0 +1,38 @@ +/************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/qcd/action/fermion/ContinuedFractionFermion5D.cc + + Copyright (C) 2015 + +Author: Peter Boyle +Author: Peter Boyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ +#include +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class ContinuedFractionFermion5D; + +NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplD/DomainWallEOFAFermionInstantiationGparityWilsonImplD.cc b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplD/DomainWallEOFAFermionInstantiationGparityWilsonImplD.cc new file mode 100644 index 00000000..f7198131 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplD/DomainWallEOFAFermionInstantiationGparityWilsonImplD.cc @@ -0,0 +1,44 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/DomainWallEOFAFermion.cc + +Copyright (C) 2017 + +Author: Peter Boyle +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle +Author: David Murphy + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ + /* END LEGAL */ + +#include +#include +#include +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class DomainWallEOFAFermion; + +NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplD/MobiusEOFAFermionInstantiationGparityWilsonImplD.cc b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplD/MobiusEOFAFermionInstantiationGparityWilsonImplD.cc new file mode 100644 index 00000000..ce7eaac9 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplD/MobiusEOFAFermionInstantiationGparityWilsonImplD.cc @@ -0,0 +1,44 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/MobiusEOFAFermion.cc + +Copyright (C) 2017 + +Author: Peter Boyle +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle +Author: David Murphy + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ + /* END LEGAL */ + +#include +#include +#include +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class MobiusEOFAFermion; + +NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplD/PartialFractionFermion5DInstantiationGparityWilsonImplD.cc b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplD/PartialFractionFermion5DInstantiationGparityWilsonImplD.cc new file mode 100644 index 00000000..757719b6 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplD/PartialFractionFermion5DInstantiationGparityWilsonImplD.cc @@ -0,0 +1,39 @@ +/************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/qcd/action/fermion/PartialFractionFermion5D.cc + + Copyright (C) 2015 + +Author: Peter Boyle +Author: Peter Boyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ +#include +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class PartialFractionFermion5D; + +NAMESPACE_END(Grid); + diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplD/WilsonCloverFermionInstantiationGparityWilsonImplD.cc b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplD/WilsonCloverFermionInstantiationGparityWilsonImplD.cc new file mode 100644 index 00000000..af99dfb6 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplD/WilsonCloverFermionInstantiationGparityWilsonImplD.cc @@ -0,0 +1,40 @@ +/************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/qcd/action/fermion/WilsonCloverFermion.cc + + Copyright (C) 2017 + + Author: paboyle + Author: Guido Cossu + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ +/* END LEGAL */ + +#include +#include +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class WilsonCloverFermion; + +NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplD/WilsonFermion5DInstantiationGparityWilsonImplD.cc b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplD/WilsonFermion5DInstantiationGparityWilsonImplD.cc new file mode 100644 index 00000000..0dac989c --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplD/WilsonFermion5DInstantiationGparityWilsonImplD.cc @@ -0,0 +1,40 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/WilsonKernels.cc + +Copyright (C) 2015 + +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class WilsonFermion5D; + +NAMESPACE_END(Grid); + diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplD/WilsonFermionInstantiationGparityWilsonImplD.cc b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplD/WilsonFermionInstantiationGparityWilsonImplD.cc new file mode 100644 index 00000000..6fd9c5ca --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplD/WilsonFermionInstantiationGparityWilsonImplD.cc @@ -0,0 +1,40 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/WilsonKernels.cc + +Copyright (C) 2015 + +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class WilsonFermion; + +NAMESPACE_END(Grid); + diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplD/WilsonKernelsInstantiationGparityWilsonImplD.cc b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplD/WilsonKernelsInstantiationGparityWilsonImplD.cc new file mode 100644 index 00000000..75f143cb --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplD/WilsonKernelsInstantiationGparityWilsonImplD.cc @@ -0,0 +1,74 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/WilsonKernels.cc + +Copyright (C) 2015 + +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#include +#include +#include +#include +#include + +NAMESPACE_BEGIN(Grid); + +// Move these + +#include "impl.h" + +// G-parity requires more specialised implementation. +template <> +void WilsonKernels::ContractConservedCurrentSiteFwd(const SitePropagator &q_in_1, + const SitePropagator &q_in_2, + SitePropagator &q_out, + DoubledGaugeFieldView &U, + unsigned int sU, + unsigned int mu, + bool switch_sign) +{ + assert(0); +} +template <> +void WilsonKernels::ContractConservedCurrentSiteBwd( const SitePropagator &q_in_1, + const SitePropagator &q_in_2, + SitePropagator &q_out, + DoubledGaugeFieldView &U, + unsigned int mu, + unsigned int sU, + bool switch_sign) +{ + assert(0); +} + +HAND_SPECIALISE_GPARITY(IMPLEMENTATION); + + +template class WilsonKernels; + + +NAMESPACE_END(Grid); + diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplD/WilsonTMFermionInstantiationGparityWilsonImplD.cc b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplD/WilsonTMFermionInstantiationGparityWilsonImplD.cc new file mode 100644 index 00000000..adfa310c --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplD/WilsonTMFermionInstantiationGparityWilsonImplD.cc @@ -0,0 +1,37 @@ +/************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/qcd/action/fermion/WilsonTMFermion.cc + + Copyright (C) 2015 + +Author: paboyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ +#include +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class WilsonTMFermion; + +NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplD/impl.h b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplD/impl.h new file mode 100644 index 00000000..930380b1 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplD/impl.h @@ -0,0 +1 @@ +#define IMPLEMENTATION GparityWilsonImplD diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplDF/CayleyFermion5DInstantiationGparityWilsonImplDF.cc b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplDF/CayleyFermion5DInstantiationGparityWilsonImplDF.cc new file mode 100644 index 00000000..5130db9c --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplDF/CayleyFermion5DInstantiationGparityWilsonImplDF.cc @@ -0,0 +1,44 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/WilsonKernels.cc + +Copyright (C) 2015 + +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#include +#include +#include + + //#include + //#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class CayleyFermion5D; + +NAMESPACE_END(Grid); + diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplDF/ContinuedFractionFermion5DInstantiationGparityWilsonImplDF.cc b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplDF/ContinuedFractionFermion5DInstantiationGparityWilsonImplDF.cc new file mode 100644 index 00000000..ca0d6cea --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplDF/ContinuedFractionFermion5DInstantiationGparityWilsonImplDF.cc @@ -0,0 +1,38 @@ +/************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/qcd/action/fermion/ContinuedFractionFermion5D.cc + + Copyright (C) 2015 + +Author: Peter Boyle +Author: Peter Boyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ +#include +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class ContinuedFractionFermion5D; + +NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplDF/DomainWallEOFAFermionInstantiationGparityWilsonImplDF.cc b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplDF/DomainWallEOFAFermionInstantiationGparityWilsonImplDF.cc new file mode 100644 index 00000000..f7198131 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplDF/DomainWallEOFAFermionInstantiationGparityWilsonImplDF.cc @@ -0,0 +1,44 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/DomainWallEOFAFermion.cc + +Copyright (C) 2017 + +Author: Peter Boyle +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle +Author: David Murphy + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ + /* END LEGAL */ + +#include +#include +#include +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class DomainWallEOFAFermion; + +NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplDF/MobiusEOFAFermionInstantiationGparityWilsonImplDF.cc b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplDF/MobiusEOFAFermionInstantiationGparityWilsonImplDF.cc new file mode 100644 index 00000000..ce7eaac9 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplDF/MobiusEOFAFermionInstantiationGparityWilsonImplDF.cc @@ -0,0 +1,44 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/MobiusEOFAFermion.cc + +Copyright (C) 2017 + +Author: Peter Boyle +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle +Author: David Murphy + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ + /* END LEGAL */ + +#include +#include +#include +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class MobiusEOFAFermion; + +NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplDF/PartialFractionFermion5DInstantiationGparityWilsonImplDF.cc b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplDF/PartialFractionFermion5DInstantiationGparityWilsonImplDF.cc new file mode 100644 index 00000000..757719b6 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplDF/PartialFractionFermion5DInstantiationGparityWilsonImplDF.cc @@ -0,0 +1,39 @@ +/************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/qcd/action/fermion/PartialFractionFermion5D.cc + + Copyright (C) 2015 + +Author: Peter Boyle +Author: Peter Boyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ +#include +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class PartialFractionFermion5D; + +NAMESPACE_END(Grid); + diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplDF/WilsonCloverFermionInstantiationGparityWilsonImplDF.cc b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplDF/WilsonCloverFermionInstantiationGparityWilsonImplDF.cc new file mode 100644 index 00000000..af99dfb6 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplDF/WilsonCloverFermionInstantiationGparityWilsonImplDF.cc @@ -0,0 +1,40 @@ +/************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/qcd/action/fermion/WilsonCloverFermion.cc + + Copyright (C) 2017 + + Author: paboyle + Author: Guido Cossu + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ +/* END LEGAL */ + +#include +#include +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class WilsonCloverFermion; + +NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplDF/WilsonFermion5DInstantiationGparityWilsonImplDF.cc b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplDF/WilsonFermion5DInstantiationGparityWilsonImplDF.cc new file mode 100644 index 00000000..0dac989c --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplDF/WilsonFermion5DInstantiationGparityWilsonImplDF.cc @@ -0,0 +1,40 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/WilsonKernels.cc + +Copyright (C) 2015 + +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class WilsonFermion5D; + +NAMESPACE_END(Grid); + diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplDF/WilsonFermionInstantiationGparityWilsonImplDF.cc b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplDF/WilsonFermionInstantiationGparityWilsonImplDF.cc new file mode 100644 index 00000000..6fd9c5ca --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplDF/WilsonFermionInstantiationGparityWilsonImplDF.cc @@ -0,0 +1,40 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/WilsonKernels.cc + +Copyright (C) 2015 + +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class WilsonFermion; + +NAMESPACE_END(Grid); + diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplDF/WilsonKernelsInstantiationGparityWilsonImplDF.cc b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplDF/WilsonKernelsInstantiationGparityWilsonImplDF.cc new file mode 100644 index 00000000..75f143cb --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplDF/WilsonKernelsInstantiationGparityWilsonImplDF.cc @@ -0,0 +1,74 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/WilsonKernels.cc + +Copyright (C) 2015 + +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#include +#include +#include +#include +#include + +NAMESPACE_BEGIN(Grid); + +// Move these + +#include "impl.h" + +// G-parity requires more specialised implementation. +template <> +void WilsonKernels::ContractConservedCurrentSiteFwd(const SitePropagator &q_in_1, + const SitePropagator &q_in_2, + SitePropagator &q_out, + DoubledGaugeFieldView &U, + unsigned int sU, + unsigned int mu, + bool switch_sign) +{ + assert(0); +} +template <> +void WilsonKernels::ContractConservedCurrentSiteBwd( const SitePropagator &q_in_1, + const SitePropagator &q_in_2, + SitePropagator &q_out, + DoubledGaugeFieldView &U, + unsigned int mu, + unsigned int sU, + bool switch_sign) +{ + assert(0); +} + +HAND_SPECIALISE_GPARITY(IMPLEMENTATION); + + +template class WilsonKernels; + + +NAMESPACE_END(Grid); + diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplDF/WilsonTMFermionInstantiationGparityWilsonImplDF.cc b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplDF/WilsonTMFermionInstantiationGparityWilsonImplDF.cc new file mode 100644 index 00000000..adfa310c --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplDF/WilsonTMFermionInstantiationGparityWilsonImplDF.cc @@ -0,0 +1,37 @@ +/************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/qcd/action/fermion/WilsonTMFermion.cc + + Copyright (C) 2015 + +Author: paboyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ +#include +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class WilsonTMFermion; + +NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplDF/impl.h b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplDF/impl.h new file mode 100644 index 00000000..2f13ce8a --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplDF/impl.h @@ -0,0 +1 @@ +#define IMPLEMENTATION GparityWilsonImplDF diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplF/CayleyFermion5DInstantiationGparityWilsonImplF.cc b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplF/CayleyFermion5DInstantiationGparityWilsonImplF.cc new file mode 100644 index 00000000..5130db9c --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplF/CayleyFermion5DInstantiationGparityWilsonImplF.cc @@ -0,0 +1,44 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/WilsonKernels.cc + +Copyright (C) 2015 + +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#include +#include +#include + + //#include + //#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class CayleyFermion5D; + +NAMESPACE_END(Grid); + diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplF/ContinuedFractionFermion5DInstantiationGparityWilsonImplF.cc b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplF/ContinuedFractionFermion5DInstantiationGparityWilsonImplF.cc new file mode 100644 index 00000000..ca0d6cea --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplF/ContinuedFractionFermion5DInstantiationGparityWilsonImplF.cc @@ -0,0 +1,38 @@ +/************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/qcd/action/fermion/ContinuedFractionFermion5D.cc + + Copyright (C) 2015 + +Author: Peter Boyle +Author: Peter Boyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ +#include +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class ContinuedFractionFermion5D; + +NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplF/DomainWallEOFAFermionInstantiationGparityWilsonImplF.cc b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplF/DomainWallEOFAFermionInstantiationGparityWilsonImplF.cc new file mode 100644 index 00000000..f7198131 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplF/DomainWallEOFAFermionInstantiationGparityWilsonImplF.cc @@ -0,0 +1,44 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/DomainWallEOFAFermion.cc + +Copyright (C) 2017 + +Author: Peter Boyle +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle +Author: David Murphy + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ + /* END LEGAL */ + +#include +#include +#include +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class DomainWallEOFAFermion; + +NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplF/MobiusEOFAFermionInstantiationGparityWilsonImplF.cc b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplF/MobiusEOFAFermionInstantiationGparityWilsonImplF.cc new file mode 100644 index 00000000..ce7eaac9 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplF/MobiusEOFAFermionInstantiationGparityWilsonImplF.cc @@ -0,0 +1,44 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/MobiusEOFAFermion.cc + +Copyright (C) 2017 + +Author: Peter Boyle +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle +Author: David Murphy + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ + /* END LEGAL */ + +#include +#include +#include +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class MobiusEOFAFermion; + +NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplF/PartialFractionFermion5DInstantiationGparityWilsonImplF.cc b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplF/PartialFractionFermion5DInstantiationGparityWilsonImplF.cc new file mode 100644 index 00000000..757719b6 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplF/PartialFractionFermion5DInstantiationGparityWilsonImplF.cc @@ -0,0 +1,39 @@ +/************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/qcd/action/fermion/PartialFractionFermion5D.cc + + Copyright (C) 2015 + +Author: Peter Boyle +Author: Peter Boyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ +#include +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class PartialFractionFermion5D; + +NAMESPACE_END(Grid); + diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplF/WilsonCloverFermionInstantiationGparityWilsonImplF.cc b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplF/WilsonCloverFermionInstantiationGparityWilsonImplF.cc new file mode 100644 index 00000000..af99dfb6 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplF/WilsonCloverFermionInstantiationGparityWilsonImplF.cc @@ -0,0 +1,40 @@ +/************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/qcd/action/fermion/WilsonCloverFermion.cc + + Copyright (C) 2017 + + Author: paboyle + Author: Guido Cossu + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ +/* END LEGAL */ + +#include +#include +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class WilsonCloverFermion; + +NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplF/WilsonFermion5DInstantiationGparityWilsonImplF.cc b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplF/WilsonFermion5DInstantiationGparityWilsonImplF.cc new file mode 100644 index 00000000..0dac989c --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplF/WilsonFermion5DInstantiationGparityWilsonImplF.cc @@ -0,0 +1,40 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/WilsonKernels.cc + +Copyright (C) 2015 + +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class WilsonFermion5D; + +NAMESPACE_END(Grid); + diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplF/WilsonFermionInstantiationGparityWilsonImplF.cc b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplF/WilsonFermionInstantiationGparityWilsonImplF.cc new file mode 100644 index 00000000..6fd9c5ca --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplF/WilsonFermionInstantiationGparityWilsonImplF.cc @@ -0,0 +1,40 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/WilsonKernels.cc + +Copyright (C) 2015 + +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class WilsonFermion; + +NAMESPACE_END(Grid); + diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplF/WilsonKernelsInstantiationGparityWilsonImplF.cc b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplF/WilsonKernelsInstantiationGparityWilsonImplF.cc new file mode 100644 index 00000000..75f143cb --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplF/WilsonKernelsInstantiationGparityWilsonImplF.cc @@ -0,0 +1,74 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/WilsonKernels.cc + +Copyright (C) 2015 + +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#include +#include +#include +#include +#include + +NAMESPACE_BEGIN(Grid); + +// Move these + +#include "impl.h" + +// G-parity requires more specialised implementation. +template <> +void WilsonKernels::ContractConservedCurrentSiteFwd(const SitePropagator &q_in_1, + const SitePropagator &q_in_2, + SitePropagator &q_out, + DoubledGaugeFieldView &U, + unsigned int sU, + unsigned int mu, + bool switch_sign) +{ + assert(0); +} +template <> +void WilsonKernels::ContractConservedCurrentSiteBwd( const SitePropagator &q_in_1, + const SitePropagator &q_in_2, + SitePropagator &q_out, + DoubledGaugeFieldView &U, + unsigned int mu, + unsigned int sU, + bool switch_sign) +{ + assert(0); +} + +HAND_SPECIALISE_GPARITY(IMPLEMENTATION); + + +template class WilsonKernels; + + +NAMESPACE_END(Grid); + diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplF/WilsonTMFermionInstantiationGparityWilsonImplF.cc b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplF/WilsonTMFermionInstantiationGparityWilsonImplF.cc new file mode 100644 index 00000000..adfa310c --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplF/WilsonTMFermionInstantiationGparityWilsonImplF.cc @@ -0,0 +1,37 @@ +/************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/qcd/action/fermion/WilsonTMFermion.cc + + Copyright (C) 2015 + +Author: paboyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ +#include +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class WilsonTMFermion; + +NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplF/impl.h b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplF/impl.h new file mode 100644 index 00000000..43f27bb8 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplF/impl.h @@ -0,0 +1 @@ +#define IMPLEMENTATION GparityWilsonImplF diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplFH/CayleyFermion5DInstantiationGparityWilsonImplFH.cc b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplFH/CayleyFermion5DInstantiationGparityWilsonImplFH.cc new file mode 100644 index 00000000..5130db9c --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplFH/CayleyFermion5DInstantiationGparityWilsonImplFH.cc @@ -0,0 +1,44 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/WilsonKernels.cc + +Copyright (C) 2015 + +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#include +#include +#include + + //#include + //#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class CayleyFermion5D; + +NAMESPACE_END(Grid); + diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplFH/ContinuedFractionFermion5DInstantiationGparityWilsonImplFH.cc b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplFH/ContinuedFractionFermion5DInstantiationGparityWilsonImplFH.cc new file mode 100644 index 00000000..ca0d6cea --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplFH/ContinuedFractionFermion5DInstantiationGparityWilsonImplFH.cc @@ -0,0 +1,38 @@ +/************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/qcd/action/fermion/ContinuedFractionFermion5D.cc + + Copyright (C) 2015 + +Author: Peter Boyle +Author: Peter Boyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ +#include +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class ContinuedFractionFermion5D; + +NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplFH/DomainWallEOFAFermionInstantiationGparityWilsonImplFH.cc b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplFH/DomainWallEOFAFermionInstantiationGparityWilsonImplFH.cc new file mode 100644 index 00000000..f7198131 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplFH/DomainWallEOFAFermionInstantiationGparityWilsonImplFH.cc @@ -0,0 +1,44 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/DomainWallEOFAFermion.cc + +Copyright (C) 2017 + +Author: Peter Boyle +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle +Author: David Murphy + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ + /* END LEGAL */ + +#include +#include +#include +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class DomainWallEOFAFermion; + +NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplFH/MobiusEOFAFermionInstantiationGparityWilsonImplFH.cc b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplFH/MobiusEOFAFermionInstantiationGparityWilsonImplFH.cc new file mode 100644 index 00000000..ce7eaac9 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplFH/MobiusEOFAFermionInstantiationGparityWilsonImplFH.cc @@ -0,0 +1,44 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/MobiusEOFAFermion.cc + +Copyright (C) 2017 + +Author: Peter Boyle +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle +Author: David Murphy + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ + /* END LEGAL */ + +#include +#include +#include +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class MobiusEOFAFermion; + +NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplFH/PartialFractionFermion5DInstantiationGparityWilsonImplFH.cc b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplFH/PartialFractionFermion5DInstantiationGparityWilsonImplFH.cc new file mode 100644 index 00000000..757719b6 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplFH/PartialFractionFermion5DInstantiationGparityWilsonImplFH.cc @@ -0,0 +1,39 @@ +/************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/qcd/action/fermion/PartialFractionFermion5D.cc + + Copyright (C) 2015 + +Author: Peter Boyle +Author: Peter Boyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ +#include +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class PartialFractionFermion5D; + +NAMESPACE_END(Grid); + diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplFH/WilsonCloverFermionInstantiationGparityWilsonImplFH.cc b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplFH/WilsonCloverFermionInstantiationGparityWilsonImplFH.cc new file mode 100644 index 00000000..af99dfb6 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplFH/WilsonCloverFermionInstantiationGparityWilsonImplFH.cc @@ -0,0 +1,40 @@ +/************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/qcd/action/fermion/WilsonCloverFermion.cc + + Copyright (C) 2017 + + Author: paboyle + Author: Guido Cossu + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ +/* END LEGAL */ + +#include +#include +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class WilsonCloverFermion; + +NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplFH/WilsonFermion5DInstantiationGparityWilsonImplFH.cc b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplFH/WilsonFermion5DInstantiationGparityWilsonImplFH.cc new file mode 100644 index 00000000..0dac989c --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplFH/WilsonFermion5DInstantiationGparityWilsonImplFH.cc @@ -0,0 +1,40 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/WilsonKernels.cc + +Copyright (C) 2015 + +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class WilsonFermion5D; + +NAMESPACE_END(Grid); + diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplFH/WilsonFermionInstantiationGparityWilsonImplFH.cc b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplFH/WilsonFermionInstantiationGparityWilsonImplFH.cc new file mode 100644 index 00000000..6fd9c5ca --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplFH/WilsonFermionInstantiationGparityWilsonImplFH.cc @@ -0,0 +1,40 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/WilsonKernels.cc + +Copyright (C) 2015 + +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class WilsonFermion; + +NAMESPACE_END(Grid); + diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplFH/WilsonKernelsInstantiationGparityWilsonImplFH.cc b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplFH/WilsonKernelsInstantiationGparityWilsonImplFH.cc new file mode 100644 index 00000000..75f143cb --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplFH/WilsonKernelsInstantiationGparityWilsonImplFH.cc @@ -0,0 +1,74 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/WilsonKernels.cc + +Copyright (C) 2015 + +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#include +#include +#include +#include +#include + +NAMESPACE_BEGIN(Grid); + +// Move these + +#include "impl.h" + +// G-parity requires more specialised implementation. +template <> +void WilsonKernels::ContractConservedCurrentSiteFwd(const SitePropagator &q_in_1, + const SitePropagator &q_in_2, + SitePropagator &q_out, + DoubledGaugeFieldView &U, + unsigned int sU, + unsigned int mu, + bool switch_sign) +{ + assert(0); +} +template <> +void WilsonKernels::ContractConservedCurrentSiteBwd( const SitePropagator &q_in_1, + const SitePropagator &q_in_2, + SitePropagator &q_out, + DoubledGaugeFieldView &U, + unsigned int mu, + unsigned int sU, + bool switch_sign) +{ + assert(0); +} + +HAND_SPECIALISE_GPARITY(IMPLEMENTATION); + + +template class WilsonKernels; + + +NAMESPACE_END(Grid); + diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplFH/WilsonTMFermionInstantiationGparityWilsonImplFH.cc b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplFH/WilsonTMFermionInstantiationGparityWilsonImplFH.cc new file mode 100644 index 00000000..adfa310c --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplFH/WilsonTMFermionInstantiationGparityWilsonImplFH.cc @@ -0,0 +1,37 @@ +/************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/qcd/action/fermion/WilsonTMFermion.cc + + Copyright (C) 2015 + +Author: paboyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ +#include +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class WilsonTMFermion; + +NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/GparityWilsonImplFH/impl.h b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplFH/impl.h new file mode 100644 index 00000000..ebcb6e62 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/GparityWilsonImplFH/impl.h @@ -0,0 +1 @@ +#define IMPLEMENTATION GparityWilsonImplFH diff --git a/Grid/qcd/action/fermion/instantiation/ImprovedStaggeredFermion5DInstantiation.cc b/Grid/qcd/action/fermion/instantiation/ImprovedStaggeredFermion5DInstantiation.cc new file mode 100644 index 00000000..986460b0 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/ImprovedStaggeredFermion5DInstantiation.cc @@ -0,0 +1,41 @@ +/************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/qcd/action/fermion/ImprovedStaggeredFermion5D.cc + + Copyright (C) 2015 + +Author: Azusa Yamaguchi +Author: Peter Boyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ +#include +#include + +NAMESPACE_BEGIN(Grid); + +// S-direction is INNERMOST and takes no part in the parity. +const std::vector ImprovedStaggeredFermion5DStatic::directions({1,2,3,4,1,2,3,4,1,2,3,4,1,2,3,4}); +const std::vector ImprovedStaggeredFermion5DStatic::displacements({1, 1, 1, 1, -1, -1, -1, -1, 3, 3, 3, 3, -3, -3, -3, -3}); + +NAMESPACE_END(Grid); + + + diff --git a/Grid/qcd/action/fermion/instantiation/ImprovedStaggeredFermion5DInstantiation.cc.master b/Grid/qcd/action/fermion/instantiation/ImprovedStaggeredFermion5DInstantiation.cc.master new file mode 100644 index 00000000..572b375c --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/ImprovedStaggeredFermion5DInstantiation.cc.master @@ -0,0 +1,44 @@ +/************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/qcd/action/fermion/ImprovedStaggeredFermion5D.cc + + Copyright (C) 2015 + +Author: Azusa Yamaguchi +Author: Peter Boyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ +#include + +#include +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class ImprovedStaggeredFermion5D; + +NAMESPACE_END(Grid); + + + + diff --git a/Grid/qcd/action/fermion/instantiation/ImprovedStaggeredFermionInstantiation.cc b/Grid/qcd/action/fermion/instantiation/ImprovedStaggeredFermionInstantiation.cc new file mode 100644 index 00000000..a617f6cb --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/ImprovedStaggeredFermionInstantiation.cc @@ -0,0 +1,36 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/ImprovedStaggeredFermion.cc + +Copyright (C) 2015 + +Author: Azusa Yamaguchi, Peter Boyle + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ + /* END LEGAL */ +#include + +NAMESPACE_BEGIN(Grid); + +const std::vector ImprovedStaggeredFermionStatic::directions({0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3}); +const std::vector ImprovedStaggeredFermionStatic::displacements({1, 1, 1, 1, -1, -1, -1, -1, 3, 3, 3, 3, -3, -3, -3, -3}); + +NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/ImprovedStaggeredFermionInstantiation.cc.master b/Grid/qcd/action/fermion/instantiation/ImprovedStaggeredFermionInstantiation.cc.master new file mode 100644 index 00000000..2023adc2 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/ImprovedStaggeredFermionInstantiation.cc.master @@ -0,0 +1,37 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/ImprovedStaggeredFermion.cc + +Copyright (C) 2015 + +Author: Azusa Yamaguchi, Peter Boyle + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ + /* END LEGAL */ +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class ImprovedStaggeredFermion; + +NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/MobiusEOFAFermionInstantiation.cc.master b/Grid/qcd/action/fermion/instantiation/MobiusEOFAFermionInstantiation.cc.master new file mode 100644 index 00000000..ce7eaac9 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/MobiusEOFAFermionInstantiation.cc.master @@ -0,0 +1,44 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/MobiusEOFAFermion.cc + +Copyright (C) 2017 + +Author: Peter Boyle +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle +Author: David Murphy + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ + /* END LEGAL */ + +#include +#include +#include +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class MobiusEOFAFermion; + +NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/PartialFractionFermion5DInstantiation.cc.master b/Grid/qcd/action/fermion/instantiation/PartialFractionFermion5DInstantiation.cc.master new file mode 100644 index 00000000..757719b6 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/PartialFractionFermion5DInstantiation.cc.master @@ -0,0 +1,39 @@ +/************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/qcd/action/fermion/PartialFractionFermion5D.cc + + Copyright (C) 2015 + +Author: Peter Boyle +Author: Peter Boyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ +#include +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class PartialFractionFermion5D; + +NAMESPACE_END(Grid); + diff --git a/Grid/qcd/action/fermion/instantiation/StaggeredImplD/ImprovedStaggeredFermion5DInstantiationStaggeredImplD.cc b/Grid/qcd/action/fermion/instantiation/StaggeredImplD/ImprovedStaggeredFermion5DInstantiationStaggeredImplD.cc new file mode 100644 index 00000000..572b375c --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/StaggeredImplD/ImprovedStaggeredFermion5DInstantiationStaggeredImplD.cc @@ -0,0 +1,44 @@ +/************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/qcd/action/fermion/ImprovedStaggeredFermion5D.cc + + Copyright (C) 2015 + +Author: Azusa Yamaguchi +Author: Peter Boyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ +#include + +#include +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class ImprovedStaggeredFermion5D; + +NAMESPACE_END(Grid); + + + + diff --git a/Grid/qcd/action/fermion/instantiation/StaggeredImplD/ImprovedStaggeredFermionInstantiationStaggeredImplD.cc b/Grid/qcd/action/fermion/instantiation/StaggeredImplD/ImprovedStaggeredFermionInstantiationStaggeredImplD.cc new file mode 100644 index 00000000..2023adc2 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/StaggeredImplD/ImprovedStaggeredFermionInstantiationStaggeredImplD.cc @@ -0,0 +1,37 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/ImprovedStaggeredFermion.cc + +Copyright (C) 2015 + +Author: Azusa Yamaguchi, Peter Boyle + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ + /* END LEGAL */ +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class ImprovedStaggeredFermion; + +NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/StaggeredImplD/StaggeredKernelsInstantiationStaggeredImplD.cc b/Grid/qcd/action/fermion/instantiation/StaggeredImplD/StaggeredKernelsInstantiationStaggeredImplD.cc new file mode 100644 index 00000000..c3acf963 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/StaggeredImplD/StaggeredKernelsInstantiationStaggeredImplD.cc @@ -0,0 +1,39 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/WilsonKernels.cc + +Copyright (C) 2015 + +Author: Azusa Yamaguchi, Peter Boyle + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#include +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class StaggeredKernels; + +NAMESPACE_END(Grid); + diff --git a/Grid/qcd/action/fermion/instantiation/StaggeredImplD/impl.h b/Grid/qcd/action/fermion/instantiation/StaggeredImplD/impl.h new file mode 100644 index 00000000..15644489 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/StaggeredImplD/impl.h @@ -0,0 +1 @@ +#define IMPLEMENTATION StaggeredImplD diff --git a/Grid/qcd/action/fermion/instantiation/StaggeredImplF/ImprovedStaggeredFermion5DInstantiationStaggeredImplF.cc b/Grid/qcd/action/fermion/instantiation/StaggeredImplF/ImprovedStaggeredFermion5DInstantiationStaggeredImplF.cc new file mode 100644 index 00000000..572b375c --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/StaggeredImplF/ImprovedStaggeredFermion5DInstantiationStaggeredImplF.cc @@ -0,0 +1,44 @@ +/************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/qcd/action/fermion/ImprovedStaggeredFermion5D.cc + + Copyright (C) 2015 + +Author: Azusa Yamaguchi +Author: Peter Boyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ +#include + +#include +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class ImprovedStaggeredFermion5D; + +NAMESPACE_END(Grid); + + + + diff --git a/Grid/qcd/action/fermion/instantiation/StaggeredImplF/ImprovedStaggeredFermionInstantiationStaggeredImplF.cc b/Grid/qcd/action/fermion/instantiation/StaggeredImplF/ImprovedStaggeredFermionInstantiationStaggeredImplF.cc new file mode 100644 index 00000000..2023adc2 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/StaggeredImplF/ImprovedStaggeredFermionInstantiationStaggeredImplF.cc @@ -0,0 +1,37 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/ImprovedStaggeredFermion.cc + +Copyright (C) 2015 + +Author: Azusa Yamaguchi, Peter Boyle + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ + /* END LEGAL */ +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class ImprovedStaggeredFermion; + +NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/StaggeredImplF/StaggeredKernelsInstantiationStaggeredImplF.cc b/Grid/qcd/action/fermion/instantiation/StaggeredImplF/StaggeredKernelsInstantiationStaggeredImplF.cc new file mode 100644 index 00000000..c3acf963 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/StaggeredImplF/StaggeredKernelsInstantiationStaggeredImplF.cc @@ -0,0 +1,39 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/WilsonKernels.cc + +Copyright (C) 2015 + +Author: Azusa Yamaguchi, Peter Boyle + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#include +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class StaggeredKernels; + +NAMESPACE_END(Grid); + diff --git a/Grid/qcd/action/fermion/instantiation/StaggeredImplF/impl.h b/Grid/qcd/action/fermion/instantiation/StaggeredImplF/impl.h new file mode 100644 index 00000000..6569f6ad --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/StaggeredImplF/impl.h @@ -0,0 +1 @@ +#define IMPLEMENTATION StaggeredImplF diff --git a/Grid/qcd/action/fermion/instantiation/StaggeredKernelsInstantiation.cc b/Grid/qcd/action/fermion/instantiation/StaggeredKernelsInstantiation.cc new file mode 100644 index 00000000..5e5175e8 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/StaggeredKernelsInstantiation.cc @@ -0,0 +1,39 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/WilsonKernels.cc + +Copyright (C) 2015 + +Author: Azusa Yamaguchi, Peter Boyle + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#include +#include +#include + +NAMESPACE_BEGIN(Grid); + +int StaggeredKernelsStatic::Opt= StaggeredKernelsStatic::OptGeneric; +int StaggeredKernelsStatic::Comms = StaggeredKernelsStatic::CommsAndCompute; + +NAMESPACE_END(Grid); + diff --git a/Grid/qcd/action/fermion/instantiation/StaggeredKernelsInstantiation.cc.master b/Grid/qcd/action/fermion/instantiation/StaggeredKernelsInstantiation.cc.master new file mode 100644 index 00000000..c3acf963 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/StaggeredKernelsInstantiation.cc.master @@ -0,0 +1,39 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/WilsonKernels.cc + +Copyright (C) 2015 + +Author: Azusa Yamaguchi, Peter Boyle + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#include +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class StaggeredKernels; + +NAMESPACE_END(Grid); + diff --git a/Grid/qcd/action/fermion/instantiation/WilsonAdjImplD/WilsonCloverFermionInstantiationWilsonAdjImplD.cc b/Grid/qcd/action/fermion/instantiation/WilsonAdjImplD/WilsonCloverFermionInstantiationWilsonAdjImplD.cc new file mode 100644 index 00000000..af99dfb6 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonAdjImplD/WilsonCloverFermionInstantiationWilsonAdjImplD.cc @@ -0,0 +1,40 @@ +/************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/qcd/action/fermion/WilsonCloverFermion.cc + + Copyright (C) 2017 + + Author: paboyle + Author: Guido Cossu + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ +/* END LEGAL */ + +#include +#include +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class WilsonCloverFermion; + +NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/WilsonAdjImplD/WilsonFermionInstantiationWilsonAdjImplD.cc b/Grid/qcd/action/fermion/instantiation/WilsonAdjImplD/WilsonFermionInstantiationWilsonAdjImplD.cc new file mode 100644 index 00000000..6fd9c5ca --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonAdjImplD/WilsonFermionInstantiationWilsonAdjImplD.cc @@ -0,0 +1,40 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/WilsonKernels.cc + +Copyright (C) 2015 + +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class WilsonFermion; + +NAMESPACE_END(Grid); + diff --git a/Grid/qcd/action/fermion/instantiation/WilsonAdjImplD/WilsonKernelsInstantiationWilsonAdjImplD.cc b/Grid/qcd/action/fermion/instantiation/WilsonAdjImplD/WilsonKernelsInstantiationWilsonAdjImplD.cc new file mode 100644 index 00000000..9af5ed85 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonAdjImplD/WilsonKernelsInstantiationWilsonAdjImplD.cc @@ -0,0 +1,47 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/WilsonKernels.cc + +Copyright (C) 2015 + +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#include +#include +#include + +#ifndef AVX512 +#ifndef QPX +#include +#endif +#endif + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class WilsonKernels; + +NAMESPACE_END(Grid); + diff --git a/Grid/qcd/action/fermion/instantiation/WilsonAdjImplD/WilsonTMFermionInstantiationWilsonAdjImplD.cc b/Grid/qcd/action/fermion/instantiation/WilsonAdjImplD/WilsonTMFermionInstantiationWilsonAdjImplD.cc new file mode 100644 index 00000000..adfa310c --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonAdjImplD/WilsonTMFermionInstantiationWilsonAdjImplD.cc @@ -0,0 +1,37 @@ +/************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/qcd/action/fermion/WilsonTMFermion.cc + + Copyright (C) 2015 + +Author: paboyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ +#include +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class WilsonTMFermion; + +NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/WilsonAdjImplD/impl.h b/Grid/qcd/action/fermion/instantiation/WilsonAdjImplD/impl.h new file mode 100644 index 00000000..1c2fce25 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonAdjImplD/impl.h @@ -0,0 +1 @@ +#define IMPLEMENTATION WilsonAdjImplD diff --git a/Grid/qcd/action/fermion/instantiation/WilsonAdjImplF/WilsonCloverFermionInstantiationWilsonAdjImplF.cc b/Grid/qcd/action/fermion/instantiation/WilsonAdjImplF/WilsonCloverFermionInstantiationWilsonAdjImplF.cc new file mode 100644 index 00000000..af99dfb6 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonAdjImplF/WilsonCloverFermionInstantiationWilsonAdjImplF.cc @@ -0,0 +1,40 @@ +/************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/qcd/action/fermion/WilsonCloverFermion.cc + + Copyright (C) 2017 + + Author: paboyle + Author: Guido Cossu + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ +/* END LEGAL */ + +#include +#include +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class WilsonCloverFermion; + +NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/WilsonAdjImplF/WilsonFermionInstantiationWilsonAdjImplF.cc b/Grid/qcd/action/fermion/instantiation/WilsonAdjImplF/WilsonFermionInstantiationWilsonAdjImplF.cc new file mode 100644 index 00000000..6fd9c5ca --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonAdjImplF/WilsonFermionInstantiationWilsonAdjImplF.cc @@ -0,0 +1,40 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/WilsonKernels.cc + +Copyright (C) 2015 + +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class WilsonFermion; + +NAMESPACE_END(Grid); + diff --git a/Grid/qcd/action/fermion/instantiation/WilsonAdjImplF/WilsonKernelsInstantiationWilsonAdjImplF.cc b/Grid/qcd/action/fermion/instantiation/WilsonAdjImplF/WilsonKernelsInstantiationWilsonAdjImplF.cc new file mode 100644 index 00000000..9af5ed85 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonAdjImplF/WilsonKernelsInstantiationWilsonAdjImplF.cc @@ -0,0 +1,47 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/WilsonKernels.cc + +Copyright (C) 2015 + +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#include +#include +#include + +#ifndef AVX512 +#ifndef QPX +#include +#endif +#endif + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class WilsonKernels; + +NAMESPACE_END(Grid); + diff --git a/Grid/qcd/action/fermion/instantiation/WilsonAdjImplF/WilsonTMFermionInstantiationWilsonAdjImplF.cc b/Grid/qcd/action/fermion/instantiation/WilsonAdjImplF/WilsonTMFermionInstantiationWilsonAdjImplF.cc new file mode 100644 index 00000000..adfa310c --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonAdjImplF/WilsonTMFermionInstantiationWilsonAdjImplF.cc @@ -0,0 +1,37 @@ +/************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/qcd/action/fermion/WilsonTMFermion.cc + + Copyright (C) 2015 + +Author: paboyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ +#include +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class WilsonTMFermion; + +NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/WilsonAdjImplF/impl.h b/Grid/qcd/action/fermion/instantiation/WilsonAdjImplF/impl.h new file mode 100644 index 00000000..93e4d7d3 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonAdjImplF/impl.h @@ -0,0 +1 @@ +#define IMPLEMENTATION WilsonAdjImplF diff --git a/Grid/qcd/action/fermion/instantiation/WilsonCloverFermionInstantiation.cc.master b/Grid/qcd/action/fermion/instantiation/WilsonCloverFermionInstantiation.cc.master new file mode 100644 index 00000000..af99dfb6 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonCloverFermionInstantiation.cc.master @@ -0,0 +1,40 @@ +/************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/qcd/action/fermion/WilsonCloverFermion.cc + + Copyright (C) 2017 + + Author: paboyle + Author: Guido Cossu + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ +/* END LEGAL */ + +#include +#include +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class WilsonCloverFermion; + +NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/WilsonFermion5DInstantiation.cc b/Grid/qcd/action/fermion/instantiation/WilsonFermion5DInstantiation.cc new file mode 100644 index 00000000..0836849b --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonFermion5DInstantiation.cc @@ -0,0 +1,41 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/WilsonKernels.cc + +Copyright (C) 2015 + +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#include +#include + +NAMESPACE_BEGIN(Grid); + +// S-direction is INNERMOST and takes no part in the parity. +const std::vector WilsonFermion5DStatic::directions ({1,2,3,4, 1, 2, 3, 4}); +const std::vector WilsonFermion5DStatic::displacements({1,1,1,1,-1,-1,-1,-1}); + +NAMESPACE_END(Grid); + diff --git a/Grid/qcd/action/fermion/instantiation/WilsonFermion5DInstantiation.cc.master b/Grid/qcd/action/fermion/instantiation/WilsonFermion5DInstantiation.cc.master new file mode 100644 index 00000000..0dac989c --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonFermion5DInstantiation.cc.master @@ -0,0 +1,40 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/WilsonKernels.cc + +Copyright (C) 2015 + +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class WilsonFermion5D; + +NAMESPACE_END(Grid); + diff --git a/Grid/qcd/action/fermion/instantiation/WilsonFermionInstantiation.cc b/Grid/qcd/action/fermion/instantiation/WilsonFermionInstantiation.cc new file mode 100644 index 00000000..a35ef74c --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonFermionInstantiation.cc @@ -0,0 +1,41 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/WilsonKernels.cc + +Copyright (C) 2015 + +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#include +#include + +NAMESPACE_BEGIN(Grid); + +const std::vector WilsonFermionStatic::directions({0, 1, 2, 3, 0, 1, 2, 3}); +const std::vector WilsonFermionStatic::displacements({1, 1, 1, 1, -1, -1, -1, -1}); +int WilsonFermionStatic::HandOptDslash; + +NAMESPACE_END(Grid); + diff --git a/Grid/qcd/action/fermion/instantiation/WilsonFermionInstantiation.cc.master b/Grid/qcd/action/fermion/instantiation/WilsonFermionInstantiation.cc.master new file mode 100644 index 00000000..6fd9c5ca --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonFermionInstantiation.cc.master @@ -0,0 +1,40 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/WilsonKernels.cc + +Copyright (C) 2015 + +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class WilsonFermion; + +NAMESPACE_END(Grid); + diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplD/CayleyFermion5DInstantiationWilsonImplD.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplD/CayleyFermion5DInstantiationWilsonImplD.cc new file mode 100644 index 00000000..5130db9c --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonImplD/CayleyFermion5DInstantiationWilsonImplD.cc @@ -0,0 +1,44 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/WilsonKernels.cc + +Copyright (C) 2015 + +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#include +#include +#include + + //#include + //#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class CayleyFermion5D; + +NAMESPACE_END(Grid); + diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplD/ContinuedFractionFermion5DInstantiationWilsonImplD.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplD/ContinuedFractionFermion5DInstantiationWilsonImplD.cc new file mode 100644 index 00000000..ca0d6cea --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonImplD/ContinuedFractionFermion5DInstantiationWilsonImplD.cc @@ -0,0 +1,38 @@ +/************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/qcd/action/fermion/ContinuedFractionFermion5D.cc + + Copyright (C) 2015 + +Author: Peter Boyle +Author: Peter Boyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ +#include +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class ContinuedFractionFermion5D; + +NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplD/DomainWallEOFAFermionInstantiationWilsonImplD.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplD/DomainWallEOFAFermionInstantiationWilsonImplD.cc new file mode 100644 index 00000000..f7198131 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonImplD/DomainWallEOFAFermionInstantiationWilsonImplD.cc @@ -0,0 +1,44 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/DomainWallEOFAFermion.cc + +Copyright (C) 2017 + +Author: Peter Boyle +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle +Author: David Murphy + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ + /* END LEGAL */ + +#include +#include +#include +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class DomainWallEOFAFermion; + +NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplD/MobiusEOFAFermionInstantiationWilsonImplD.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplD/MobiusEOFAFermionInstantiationWilsonImplD.cc new file mode 100644 index 00000000..ce7eaac9 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonImplD/MobiusEOFAFermionInstantiationWilsonImplD.cc @@ -0,0 +1,44 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/MobiusEOFAFermion.cc + +Copyright (C) 2017 + +Author: Peter Boyle +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle +Author: David Murphy + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ + /* END LEGAL */ + +#include +#include +#include +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class MobiusEOFAFermion; + +NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplD/PartialFractionFermion5DInstantiationWilsonImplD.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplD/PartialFractionFermion5DInstantiationWilsonImplD.cc new file mode 100644 index 00000000..757719b6 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonImplD/PartialFractionFermion5DInstantiationWilsonImplD.cc @@ -0,0 +1,39 @@ +/************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/qcd/action/fermion/PartialFractionFermion5D.cc + + Copyright (C) 2015 + +Author: Peter Boyle +Author: Peter Boyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ +#include +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class PartialFractionFermion5D; + +NAMESPACE_END(Grid); + diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplD/WilsonCloverFermionInstantiationWilsonImplD.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplD/WilsonCloverFermionInstantiationWilsonImplD.cc new file mode 100644 index 00000000..af99dfb6 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonImplD/WilsonCloverFermionInstantiationWilsonImplD.cc @@ -0,0 +1,40 @@ +/************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/qcd/action/fermion/WilsonCloverFermion.cc + + Copyright (C) 2017 + + Author: paboyle + Author: Guido Cossu + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ +/* END LEGAL */ + +#include +#include +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class WilsonCloverFermion; + +NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplD/WilsonFermion5DInstantiationWilsonImplD.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplD/WilsonFermion5DInstantiationWilsonImplD.cc new file mode 100644 index 00000000..0dac989c --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonImplD/WilsonFermion5DInstantiationWilsonImplD.cc @@ -0,0 +1,40 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/WilsonKernels.cc + +Copyright (C) 2015 + +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class WilsonFermion5D; + +NAMESPACE_END(Grid); + diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplD/WilsonFermionInstantiationWilsonImplD.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplD/WilsonFermionInstantiationWilsonImplD.cc new file mode 100644 index 00000000..6fd9c5ca --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonImplD/WilsonFermionInstantiationWilsonImplD.cc @@ -0,0 +1,40 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/WilsonKernels.cc + +Copyright (C) 2015 + +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class WilsonFermion; + +NAMESPACE_END(Grid); + diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplD/WilsonKernelsInstantiationWilsonImplD.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplD/WilsonKernelsInstantiationWilsonImplD.cc new file mode 100644 index 00000000..9af5ed85 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonImplD/WilsonKernelsInstantiationWilsonImplD.cc @@ -0,0 +1,47 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/WilsonKernels.cc + +Copyright (C) 2015 + +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#include +#include +#include + +#ifndef AVX512 +#ifndef QPX +#include +#endif +#endif + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class WilsonKernels; + +NAMESPACE_END(Grid); + diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplD/WilsonTMFermionInstantiationWilsonImplD.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplD/WilsonTMFermionInstantiationWilsonImplD.cc new file mode 100644 index 00000000..adfa310c --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonImplD/WilsonTMFermionInstantiationWilsonImplD.cc @@ -0,0 +1,37 @@ +/************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/qcd/action/fermion/WilsonTMFermion.cc + + Copyright (C) 2015 + +Author: paboyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ +#include +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class WilsonTMFermion; + +NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplD/impl.h b/Grid/qcd/action/fermion/instantiation/WilsonImplD/impl.h new file mode 100644 index 00000000..401d2774 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonImplD/impl.h @@ -0,0 +1 @@ +#define IMPLEMENTATION WilsonImplD diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplDF/CayleyFermion5DInstantiationWilsonImplDF.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplDF/CayleyFermion5DInstantiationWilsonImplDF.cc new file mode 100644 index 00000000..5130db9c --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonImplDF/CayleyFermion5DInstantiationWilsonImplDF.cc @@ -0,0 +1,44 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/WilsonKernels.cc + +Copyright (C) 2015 + +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#include +#include +#include + + //#include + //#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class CayleyFermion5D; + +NAMESPACE_END(Grid); + diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplDF/ContinuedFractionFermion5DInstantiationWilsonImplDF.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplDF/ContinuedFractionFermion5DInstantiationWilsonImplDF.cc new file mode 100644 index 00000000..ca0d6cea --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonImplDF/ContinuedFractionFermion5DInstantiationWilsonImplDF.cc @@ -0,0 +1,38 @@ +/************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/qcd/action/fermion/ContinuedFractionFermion5D.cc + + Copyright (C) 2015 + +Author: Peter Boyle +Author: Peter Boyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ +#include +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class ContinuedFractionFermion5D; + +NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplDF/DomainWallEOFAFermionInstantiationWilsonImplDF.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplDF/DomainWallEOFAFermionInstantiationWilsonImplDF.cc new file mode 100644 index 00000000..f7198131 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonImplDF/DomainWallEOFAFermionInstantiationWilsonImplDF.cc @@ -0,0 +1,44 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/DomainWallEOFAFermion.cc + +Copyright (C) 2017 + +Author: Peter Boyle +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle +Author: David Murphy + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ + /* END LEGAL */ + +#include +#include +#include +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class DomainWallEOFAFermion; + +NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplDF/MobiusEOFAFermionInstantiationWilsonImplDF.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplDF/MobiusEOFAFermionInstantiationWilsonImplDF.cc new file mode 100644 index 00000000..ce7eaac9 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonImplDF/MobiusEOFAFermionInstantiationWilsonImplDF.cc @@ -0,0 +1,44 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/MobiusEOFAFermion.cc + +Copyright (C) 2017 + +Author: Peter Boyle +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle +Author: David Murphy + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ + /* END LEGAL */ + +#include +#include +#include +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class MobiusEOFAFermion; + +NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplDF/PartialFractionFermion5DInstantiationWilsonImplDF.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplDF/PartialFractionFermion5DInstantiationWilsonImplDF.cc new file mode 100644 index 00000000..757719b6 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonImplDF/PartialFractionFermion5DInstantiationWilsonImplDF.cc @@ -0,0 +1,39 @@ +/************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/qcd/action/fermion/PartialFractionFermion5D.cc + + Copyright (C) 2015 + +Author: Peter Boyle +Author: Peter Boyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ +#include +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class PartialFractionFermion5D; + +NAMESPACE_END(Grid); + diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplDF/WilsonCloverFermionInstantiationWilsonImplDF.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplDF/WilsonCloverFermionInstantiationWilsonImplDF.cc new file mode 100644 index 00000000..af99dfb6 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonImplDF/WilsonCloverFermionInstantiationWilsonImplDF.cc @@ -0,0 +1,40 @@ +/************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/qcd/action/fermion/WilsonCloverFermion.cc + + Copyright (C) 2017 + + Author: paboyle + Author: Guido Cossu + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ +/* END LEGAL */ + +#include +#include +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class WilsonCloverFermion; + +NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplDF/WilsonFermion5DInstantiationWilsonImplDF.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplDF/WilsonFermion5DInstantiationWilsonImplDF.cc new file mode 100644 index 00000000..0dac989c --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonImplDF/WilsonFermion5DInstantiationWilsonImplDF.cc @@ -0,0 +1,40 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/WilsonKernels.cc + +Copyright (C) 2015 + +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class WilsonFermion5D; + +NAMESPACE_END(Grid); + diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplDF/WilsonFermionInstantiationWilsonImplDF.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplDF/WilsonFermionInstantiationWilsonImplDF.cc new file mode 100644 index 00000000..6fd9c5ca --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonImplDF/WilsonFermionInstantiationWilsonImplDF.cc @@ -0,0 +1,40 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/WilsonKernels.cc + +Copyright (C) 2015 + +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class WilsonFermion; + +NAMESPACE_END(Grid); + diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplDF/WilsonKernelsInstantiationWilsonImplDF.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplDF/WilsonKernelsInstantiationWilsonImplDF.cc new file mode 100644 index 00000000..9af5ed85 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonImplDF/WilsonKernelsInstantiationWilsonImplDF.cc @@ -0,0 +1,47 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/WilsonKernels.cc + +Copyright (C) 2015 + +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#include +#include +#include + +#ifndef AVX512 +#ifndef QPX +#include +#endif +#endif + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class WilsonKernels; + +NAMESPACE_END(Grid); + diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplDF/WilsonTMFermionInstantiationWilsonImplDF.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplDF/WilsonTMFermionInstantiationWilsonImplDF.cc new file mode 100644 index 00000000..adfa310c --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonImplDF/WilsonTMFermionInstantiationWilsonImplDF.cc @@ -0,0 +1,37 @@ +/************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/qcd/action/fermion/WilsonTMFermion.cc + + Copyright (C) 2015 + +Author: paboyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ +#include +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class WilsonTMFermion; + +NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplDF/impl.h b/Grid/qcd/action/fermion/instantiation/WilsonImplDF/impl.h new file mode 100644 index 00000000..2adc6136 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonImplDF/impl.h @@ -0,0 +1 @@ +#define IMPLEMENTATION WilsonImplDF diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplF/CayleyFermion5DInstantiationWilsonImplF.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplF/CayleyFermion5DInstantiationWilsonImplF.cc new file mode 100644 index 00000000..5130db9c --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonImplF/CayleyFermion5DInstantiationWilsonImplF.cc @@ -0,0 +1,44 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/WilsonKernels.cc + +Copyright (C) 2015 + +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#include +#include +#include + + //#include + //#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class CayleyFermion5D; + +NAMESPACE_END(Grid); + diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplF/ContinuedFractionFermion5DInstantiationWilsonImplF.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplF/ContinuedFractionFermion5DInstantiationWilsonImplF.cc new file mode 100644 index 00000000..ca0d6cea --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonImplF/ContinuedFractionFermion5DInstantiationWilsonImplF.cc @@ -0,0 +1,38 @@ +/************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/qcd/action/fermion/ContinuedFractionFermion5D.cc + + Copyright (C) 2015 + +Author: Peter Boyle +Author: Peter Boyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ +#include +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class ContinuedFractionFermion5D; + +NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplF/DomainWallEOFAFermionInstantiationWilsonImplF.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplF/DomainWallEOFAFermionInstantiationWilsonImplF.cc new file mode 100644 index 00000000..f7198131 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonImplF/DomainWallEOFAFermionInstantiationWilsonImplF.cc @@ -0,0 +1,44 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/DomainWallEOFAFermion.cc + +Copyright (C) 2017 + +Author: Peter Boyle +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle +Author: David Murphy + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ + /* END LEGAL */ + +#include +#include +#include +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class DomainWallEOFAFermion; + +NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplF/MobiusEOFAFermionInstantiationWilsonImplF.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplF/MobiusEOFAFermionInstantiationWilsonImplF.cc new file mode 100644 index 00000000..ce7eaac9 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonImplF/MobiusEOFAFermionInstantiationWilsonImplF.cc @@ -0,0 +1,44 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/MobiusEOFAFermion.cc + +Copyright (C) 2017 + +Author: Peter Boyle +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle +Author: David Murphy + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ + /* END LEGAL */ + +#include +#include +#include +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class MobiusEOFAFermion; + +NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplF/PartialFractionFermion5DInstantiationWilsonImplF.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplF/PartialFractionFermion5DInstantiationWilsonImplF.cc new file mode 100644 index 00000000..757719b6 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonImplF/PartialFractionFermion5DInstantiationWilsonImplF.cc @@ -0,0 +1,39 @@ +/************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/qcd/action/fermion/PartialFractionFermion5D.cc + + Copyright (C) 2015 + +Author: Peter Boyle +Author: Peter Boyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ +#include +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class PartialFractionFermion5D; + +NAMESPACE_END(Grid); + diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplF/WilsonCloverFermionInstantiationWilsonImplF.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplF/WilsonCloverFermionInstantiationWilsonImplF.cc new file mode 100644 index 00000000..af99dfb6 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonImplF/WilsonCloverFermionInstantiationWilsonImplF.cc @@ -0,0 +1,40 @@ +/************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/qcd/action/fermion/WilsonCloverFermion.cc + + Copyright (C) 2017 + + Author: paboyle + Author: Guido Cossu + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ +/* END LEGAL */ + +#include +#include +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class WilsonCloverFermion; + +NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplF/WilsonFermion5DInstantiationWilsonImplF.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplF/WilsonFermion5DInstantiationWilsonImplF.cc new file mode 100644 index 00000000..0dac989c --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonImplF/WilsonFermion5DInstantiationWilsonImplF.cc @@ -0,0 +1,40 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/WilsonKernels.cc + +Copyright (C) 2015 + +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class WilsonFermion5D; + +NAMESPACE_END(Grid); + diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplF/WilsonFermionInstantiationWilsonImplF.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplF/WilsonFermionInstantiationWilsonImplF.cc new file mode 100644 index 00000000..6fd9c5ca --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonImplF/WilsonFermionInstantiationWilsonImplF.cc @@ -0,0 +1,40 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/WilsonKernels.cc + +Copyright (C) 2015 + +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class WilsonFermion; + +NAMESPACE_END(Grid); + diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplF/WilsonKernelsInstantiationWilsonImplF.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplF/WilsonKernelsInstantiationWilsonImplF.cc new file mode 100644 index 00000000..9af5ed85 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonImplF/WilsonKernelsInstantiationWilsonImplF.cc @@ -0,0 +1,47 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/WilsonKernels.cc + +Copyright (C) 2015 + +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#include +#include +#include + +#ifndef AVX512 +#ifndef QPX +#include +#endif +#endif + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class WilsonKernels; + +NAMESPACE_END(Grid); + diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplF/WilsonTMFermionInstantiationWilsonImplF.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplF/WilsonTMFermionInstantiationWilsonImplF.cc new file mode 100644 index 00000000..adfa310c --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonImplF/WilsonTMFermionInstantiationWilsonImplF.cc @@ -0,0 +1,37 @@ +/************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/qcd/action/fermion/WilsonTMFermion.cc + + Copyright (C) 2015 + +Author: paboyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ +#include +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class WilsonTMFermion; + +NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplF/impl.h b/Grid/qcd/action/fermion/instantiation/WilsonImplF/impl.h new file mode 100644 index 00000000..78966b84 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonImplF/impl.h @@ -0,0 +1 @@ +#define IMPLEMENTATION WilsonImplF diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplFH/CayleyFermion5DInstantiationWilsonImplFH.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplFH/CayleyFermion5DInstantiationWilsonImplFH.cc new file mode 100644 index 00000000..5130db9c --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonImplFH/CayleyFermion5DInstantiationWilsonImplFH.cc @@ -0,0 +1,44 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/WilsonKernels.cc + +Copyright (C) 2015 + +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#include +#include +#include + + //#include + //#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class CayleyFermion5D; + +NAMESPACE_END(Grid); + diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplFH/ContinuedFractionFermion5DInstantiationWilsonImplFH.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplFH/ContinuedFractionFermion5DInstantiationWilsonImplFH.cc new file mode 100644 index 00000000..ca0d6cea --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonImplFH/ContinuedFractionFermion5DInstantiationWilsonImplFH.cc @@ -0,0 +1,38 @@ +/************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/qcd/action/fermion/ContinuedFractionFermion5D.cc + + Copyright (C) 2015 + +Author: Peter Boyle +Author: Peter Boyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ +#include +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class ContinuedFractionFermion5D; + +NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplFH/DomainWallEOFAFermionInstantiationWilsonImplFH.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplFH/DomainWallEOFAFermionInstantiationWilsonImplFH.cc new file mode 100644 index 00000000..f7198131 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonImplFH/DomainWallEOFAFermionInstantiationWilsonImplFH.cc @@ -0,0 +1,44 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/DomainWallEOFAFermion.cc + +Copyright (C) 2017 + +Author: Peter Boyle +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle +Author: David Murphy + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ + /* END LEGAL */ + +#include +#include +#include +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class DomainWallEOFAFermion; + +NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplFH/MobiusEOFAFermionInstantiationWilsonImplFH.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplFH/MobiusEOFAFermionInstantiationWilsonImplFH.cc new file mode 100644 index 00000000..ce7eaac9 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonImplFH/MobiusEOFAFermionInstantiationWilsonImplFH.cc @@ -0,0 +1,44 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/MobiusEOFAFermion.cc + +Copyright (C) 2017 + +Author: Peter Boyle +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle +Author: David Murphy + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ + /* END LEGAL */ + +#include +#include +#include +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class MobiusEOFAFermion; + +NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplFH/PartialFractionFermion5DInstantiationWilsonImplFH.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplFH/PartialFractionFermion5DInstantiationWilsonImplFH.cc new file mode 100644 index 00000000..757719b6 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonImplFH/PartialFractionFermion5DInstantiationWilsonImplFH.cc @@ -0,0 +1,39 @@ +/************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/qcd/action/fermion/PartialFractionFermion5D.cc + + Copyright (C) 2015 + +Author: Peter Boyle +Author: Peter Boyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ +#include +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class PartialFractionFermion5D; + +NAMESPACE_END(Grid); + diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplFH/WilsonCloverFermionInstantiationWilsonImplFH.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplFH/WilsonCloverFermionInstantiationWilsonImplFH.cc new file mode 100644 index 00000000..af99dfb6 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonImplFH/WilsonCloverFermionInstantiationWilsonImplFH.cc @@ -0,0 +1,40 @@ +/************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/qcd/action/fermion/WilsonCloverFermion.cc + + Copyright (C) 2017 + + Author: paboyle + Author: Guido Cossu + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ +/* END LEGAL */ + +#include +#include +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class WilsonCloverFermion; + +NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplFH/WilsonFermion5DInstantiationWilsonImplFH.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplFH/WilsonFermion5DInstantiationWilsonImplFH.cc new file mode 100644 index 00000000..0dac989c --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonImplFH/WilsonFermion5DInstantiationWilsonImplFH.cc @@ -0,0 +1,40 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/WilsonKernels.cc + +Copyright (C) 2015 + +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class WilsonFermion5D; + +NAMESPACE_END(Grid); + diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplFH/WilsonFermionInstantiationWilsonImplFH.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplFH/WilsonFermionInstantiationWilsonImplFH.cc new file mode 100644 index 00000000..6fd9c5ca --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonImplFH/WilsonFermionInstantiationWilsonImplFH.cc @@ -0,0 +1,40 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/WilsonKernels.cc + +Copyright (C) 2015 + +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class WilsonFermion; + +NAMESPACE_END(Grid); + diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplFH/WilsonKernelsInstantiationWilsonImplFH.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplFH/WilsonKernelsInstantiationWilsonImplFH.cc new file mode 100644 index 00000000..9af5ed85 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonImplFH/WilsonKernelsInstantiationWilsonImplFH.cc @@ -0,0 +1,47 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/WilsonKernels.cc + +Copyright (C) 2015 + +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#include +#include +#include + +#ifndef AVX512 +#ifndef QPX +#include +#endif +#endif + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class WilsonKernels; + +NAMESPACE_END(Grid); + diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplFH/WilsonTMFermionInstantiationWilsonImplFH.cc b/Grid/qcd/action/fermion/instantiation/WilsonImplFH/WilsonTMFermionInstantiationWilsonImplFH.cc new file mode 100644 index 00000000..adfa310c --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonImplFH/WilsonTMFermionInstantiationWilsonImplFH.cc @@ -0,0 +1,37 @@ +/************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/qcd/action/fermion/WilsonTMFermion.cc + + Copyright (C) 2015 + +Author: paboyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ +#include +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class WilsonTMFermion; + +NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/WilsonImplFH/impl.h b/Grid/qcd/action/fermion/instantiation/WilsonImplFH/impl.h new file mode 100644 index 00000000..e442863d --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonImplFH/impl.h @@ -0,0 +1 @@ +#define IMPLEMENTATION WilsonImplFH diff --git a/Grid/qcd/action/fermion/instantiation/WilsonKernelsInstantiation.cc b/Grid/qcd/action/fermion/instantiation/WilsonKernelsInstantiation.cc new file mode 100644 index 00000000..c02da6c8 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonKernelsInstantiation.cc @@ -0,0 +1,40 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/WilsonKernels.cc + +Copyright (C) 2015 + +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#include + +NAMESPACE_BEGIN(Grid); + +// Move these +int WilsonKernelsStatic::Opt = WilsonKernelsStatic::OptGeneric; +int WilsonKernelsStatic::Comms = WilsonKernelsStatic::CommsAndCompute; + +NAMESPACE_END(Grid); + diff --git a/Grid/qcd/action/fermion/instantiation/WilsonKernelsInstantiation.cc.master b/Grid/qcd/action/fermion/instantiation/WilsonKernelsInstantiation.cc.master new file mode 100644 index 00000000..9af5ed85 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonKernelsInstantiation.cc.master @@ -0,0 +1,47 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/WilsonKernels.cc + +Copyright (C) 2015 + +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#include +#include +#include + +#ifndef AVX512 +#ifndef QPX +#include +#endif +#endif + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class WilsonKernels; + +NAMESPACE_END(Grid); + diff --git a/Grid/qcd/action/fermion/instantiation/WilsonKernelsInstantiationAsm.cc b/Grid/qcd/action/fermion/instantiation/WilsonKernelsInstantiationAsm.cc new file mode 100644 index 00000000..f6f235c8 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonKernelsInstantiationAsm.cc @@ -0,0 +1,43 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/WilsonKernels.cc + +Copyright (C) 2015 + +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#include +#include +#include + +//////////////////////////////////////////////////////////////////////// +// Include the specialisations for ASM kernels +//////////////////////////////////////////////////////////////////////// +NAMESPACE_BEGIN(Grid); +#include +#include +NAMESPACE_END(Grid); + + diff --git a/Grid/qcd/action/fermion/instantiation/WilsonKernelsInstantiationGparity.cc.master b/Grid/qcd/action/fermion/instantiation/WilsonKernelsInstantiationGparity.cc.master new file mode 100644 index 00000000..75f143cb --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonKernelsInstantiationGparity.cc.master @@ -0,0 +1,74 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/WilsonKernels.cc + +Copyright (C) 2015 + +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#include +#include +#include +#include +#include + +NAMESPACE_BEGIN(Grid); + +// Move these + +#include "impl.h" + +// G-parity requires more specialised implementation. +template <> +void WilsonKernels::ContractConservedCurrentSiteFwd(const SitePropagator &q_in_1, + const SitePropagator &q_in_2, + SitePropagator &q_out, + DoubledGaugeFieldView &U, + unsigned int sU, + unsigned int mu, + bool switch_sign) +{ + assert(0); +} +template <> +void WilsonKernels::ContractConservedCurrentSiteBwd( const SitePropagator &q_in_1, + const SitePropagator &q_in_2, + SitePropagator &q_out, + DoubledGaugeFieldView &U, + unsigned int mu, + unsigned int sU, + bool switch_sign) +{ + assert(0); +} + +HAND_SPECIALISE_GPARITY(IMPLEMENTATION); + + +template class WilsonKernels; + + +NAMESPACE_END(Grid); + diff --git a/Grid/qcd/action/fermion/instantiation/WilsonTMFermionInstantiation.cc.master b/Grid/qcd/action/fermion/instantiation/WilsonTMFermionInstantiation.cc.master new file mode 100644 index 00000000..adfa310c --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonTMFermionInstantiation.cc.master @@ -0,0 +1,37 @@ +/************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/qcd/action/fermion/WilsonTMFermion.cc + + Copyright (C) 2015 + +Author: paboyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ +#include +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class WilsonTMFermion; + +NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexAntiSymmetricImplD/WilsonCloverFermionInstantiationWilsonTwoIndexAntiSymmetricImplD.cc b/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexAntiSymmetricImplD/WilsonCloverFermionInstantiationWilsonTwoIndexAntiSymmetricImplD.cc new file mode 100644 index 00000000..af99dfb6 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexAntiSymmetricImplD/WilsonCloverFermionInstantiationWilsonTwoIndexAntiSymmetricImplD.cc @@ -0,0 +1,40 @@ +/************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/qcd/action/fermion/WilsonCloverFermion.cc + + Copyright (C) 2017 + + Author: paboyle + Author: Guido Cossu + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ +/* END LEGAL */ + +#include +#include +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class WilsonCloverFermion; + +NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexAntiSymmetricImplD/WilsonFermionInstantiationWilsonTwoIndexAntiSymmetricImplD.cc b/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexAntiSymmetricImplD/WilsonFermionInstantiationWilsonTwoIndexAntiSymmetricImplD.cc new file mode 100644 index 00000000..6fd9c5ca --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexAntiSymmetricImplD/WilsonFermionInstantiationWilsonTwoIndexAntiSymmetricImplD.cc @@ -0,0 +1,40 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/WilsonKernels.cc + +Copyright (C) 2015 + +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class WilsonFermion; + +NAMESPACE_END(Grid); + diff --git a/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexAntiSymmetricImplD/WilsonKernelsInstantiationWilsonTwoIndexAntiSymmetricImplD.cc b/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexAntiSymmetricImplD/WilsonKernelsInstantiationWilsonTwoIndexAntiSymmetricImplD.cc new file mode 100644 index 00000000..9af5ed85 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexAntiSymmetricImplD/WilsonKernelsInstantiationWilsonTwoIndexAntiSymmetricImplD.cc @@ -0,0 +1,47 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/WilsonKernels.cc + +Copyright (C) 2015 + +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#include +#include +#include + +#ifndef AVX512 +#ifndef QPX +#include +#endif +#endif + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class WilsonKernels; + +NAMESPACE_END(Grid); + diff --git a/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexAntiSymmetricImplD/WilsonTMFermionInstantiationWilsonTwoIndexAntiSymmetricImplD.cc b/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexAntiSymmetricImplD/WilsonTMFermionInstantiationWilsonTwoIndexAntiSymmetricImplD.cc new file mode 100644 index 00000000..adfa310c --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexAntiSymmetricImplD/WilsonTMFermionInstantiationWilsonTwoIndexAntiSymmetricImplD.cc @@ -0,0 +1,37 @@ +/************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/qcd/action/fermion/WilsonTMFermion.cc + + Copyright (C) 2015 + +Author: paboyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ +#include +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class WilsonTMFermion; + +NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexAntiSymmetricImplD/impl.h b/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexAntiSymmetricImplD/impl.h new file mode 100644 index 00000000..e2341833 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexAntiSymmetricImplD/impl.h @@ -0,0 +1 @@ +#define IMPLEMENTATION WilsonTwoIndexAntiSymmetricImplD diff --git a/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexAntiSymmetricImplF/WilsonCloverFermionInstantiationWilsonTwoIndexAntiSymmetricImplF.cc b/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexAntiSymmetricImplF/WilsonCloverFermionInstantiationWilsonTwoIndexAntiSymmetricImplF.cc new file mode 100644 index 00000000..af99dfb6 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexAntiSymmetricImplF/WilsonCloverFermionInstantiationWilsonTwoIndexAntiSymmetricImplF.cc @@ -0,0 +1,40 @@ +/************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/qcd/action/fermion/WilsonCloverFermion.cc + + Copyright (C) 2017 + + Author: paboyle + Author: Guido Cossu + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ +/* END LEGAL */ + +#include +#include +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class WilsonCloverFermion; + +NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexAntiSymmetricImplF/WilsonFermionInstantiationWilsonTwoIndexAntiSymmetricImplF.cc b/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexAntiSymmetricImplF/WilsonFermionInstantiationWilsonTwoIndexAntiSymmetricImplF.cc new file mode 100644 index 00000000..6fd9c5ca --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexAntiSymmetricImplF/WilsonFermionInstantiationWilsonTwoIndexAntiSymmetricImplF.cc @@ -0,0 +1,40 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/WilsonKernels.cc + +Copyright (C) 2015 + +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class WilsonFermion; + +NAMESPACE_END(Grid); + diff --git a/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexAntiSymmetricImplF/WilsonKernelsInstantiationWilsonTwoIndexAntiSymmetricImplF.cc b/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexAntiSymmetricImplF/WilsonKernelsInstantiationWilsonTwoIndexAntiSymmetricImplF.cc new file mode 100644 index 00000000..9af5ed85 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexAntiSymmetricImplF/WilsonKernelsInstantiationWilsonTwoIndexAntiSymmetricImplF.cc @@ -0,0 +1,47 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/WilsonKernels.cc + +Copyright (C) 2015 + +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#include +#include +#include + +#ifndef AVX512 +#ifndef QPX +#include +#endif +#endif + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class WilsonKernels; + +NAMESPACE_END(Grid); + diff --git a/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexAntiSymmetricImplF/WilsonTMFermionInstantiationWilsonTwoIndexAntiSymmetricImplF.cc b/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexAntiSymmetricImplF/WilsonTMFermionInstantiationWilsonTwoIndexAntiSymmetricImplF.cc new file mode 100644 index 00000000..adfa310c --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexAntiSymmetricImplF/WilsonTMFermionInstantiationWilsonTwoIndexAntiSymmetricImplF.cc @@ -0,0 +1,37 @@ +/************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/qcd/action/fermion/WilsonTMFermion.cc + + Copyright (C) 2015 + +Author: paboyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ +#include +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class WilsonTMFermion; + +NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexAntiSymmetricImplF/impl.h b/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexAntiSymmetricImplF/impl.h new file mode 100644 index 00000000..86410435 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexAntiSymmetricImplF/impl.h @@ -0,0 +1 @@ +#define IMPLEMENTATION WilsonTwoIndexAntiSymmetricImplF diff --git a/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexSymmetricImplD/WilsonCloverFermionInstantiationWilsonTwoIndexSymmetricImplD.cc b/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexSymmetricImplD/WilsonCloverFermionInstantiationWilsonTwoIndexSymmetricImplD.cc new file mode 100644 index 00000000..af99dfb6 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexSymmetricImplD/WilsonCloverFermionInstantiationWilsonTwoIndexSymmetricImplD.cc @@ -0,0 +1,40 @@ +/************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/qcd/action/fermion/WilsonCloverFermion.cc + + Copyright (C) 2017 + + Author: paboyle + Author: Guido Cossu + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ +/* END LEGAL */ + +#include +#include +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class WilsonCloverFermion; + +NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexSymmetricImplD/WilsonFermionInstantiationWilsonTwoIndexSymmetricImplD.cc b/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexSymmetricImplD/WilsonFermionInstantiationWilsonTwoIndexSymmetricImplD.cc new file mode 100644 index 00000000..6fd9c5ca --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexSymmetricImplD/WilsonFermionInstantiationWilsonTwoIndexSymmetricImplD.cc @@ -0,0 +1,40 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/WilsonKernels.cc + +Copyright (C) 2015 + +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class WilsonFermion; + +NAMESPACE_END(Grid); + diff --git a/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexSymmetricImplD/WilsonKernelsInstantiationWilsonTwoIndexSymmetricImplD.cc b/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexSymmetricImplD/WilsonKernelsInstantiationWilsonTwoIndexSymmetricImplD.cc new file mode 100644 index 00000000..9af5ed85 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexSymmetricImplD/WilsonKernelsInstantiationWilsonTwoIndexSymmetricImplD.cc @@ -0,0 +1,47 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/WilsonKernels.cc + +Copyright (C) 2015 + +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#include +#include +#include + +#ifndef AVX512 +#ifndef QPX +#include +#endif +#endif + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class WilsonKernels; + +NAMESPACE_END(Grid); + diff --git a/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexSymmetricImplD/WilsonTMFermionInstantiationWilsonTwoIndexSymmetricImplD.cc b/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexSymmetricImplD/WilsonTMFermionInstantiationWilsonTwoIndexSymmetricImplD.cc new file mode 100644 index 00000000..adfa310c --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexSymmetricImplD/WilsonTMFermionInstantiationWilsonTwoIndexSymmetricImplD.cc @@ -0,0 +1,37 @@ +/************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/qcd/action/fermion/WilsonTMFermion.cc + + Copyright (C) 2015 + +Author: paboyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ +#include +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class WilsonTMFermion; + +NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexSymmetricImplD/impl.h b/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexSymmetricImplD/impl.h new file mode 100644 index 00000000..449c02fc --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexSymmetricImplD/impl.h @@ -0,0 +1 @@ +#define IMPLEMENTATION WilsonTwoIndexSymmetricImplD diff --git a/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexSymmetricImplF/WilsonCloverFermionInstantiationWilsonTwoIndexSymmetricImplF.cc b/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexSymmetricImplF/WilsonCloverFermionInstantiationWilsonTwoIndexSymmetricImplF.cc new file mode 100644 index 00000000..af99dfb6 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexSymmetricImplF/WilsonCloverFermionInstantiationWilsonTwoIndexSymmetricImplF.cc @@ -0,0 +1,40 @@ +/************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/qcd/action/fermion/WilsonCloverFermion.cc + + Copyright (C) 2017 + + Author: paboyle + Author: Guido Cossu + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ +/* END LEGAL */ + +#include +#include +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class WilsonCloverFermion; + +NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexSymmetricImplF/WilsonFermionInstantiationWilsonTwoIndexSymmetricImplF.cc b/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexSymmetricImplF/WilsonFermionInstantiationWilsonTwoIndexSymmetricImplF.cc new file mode 100644 index 00000000..6fd9c5ca --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexSymmetricImplF/WilsonFermionInstantiationWilsonTwoIndexSymmetricImplF.cc @@ -0,0 +1,40 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/WilsonKernels.cc + +Copyright (C) 2015 + +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class WilsonFermion; + +NAMESPACE_END(Grid); + diff --git a/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexSymmetricImplF/WilsonKernelsInstantiationWilsonTwoIndexSymmetricImplF.cc b/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexSymmetricImplF/WilsonKernelsInstantiationWilsonTwoIndexSymmetricImplF.cc new file mode 100644 index 00000000..9af5ed85 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexSymmetricImplF/WilsonKernelsInstantiationWilsonTwoIndexSymmetricImplF.cc @@ -0,0 +1,47 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/WilsonKernels.cc + +Copyright (C) 2015 + +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#include +#include +#include + +#ifndef AVX512 +#ifndef QPX +#include +#endif +#endif + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class WilsonKernels; + +NAMESPACE_END(Grid); + diff --git a/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexSymmetricImplF/WilsonTMFermionInstantiationWilsonTwoIndexSymmetricImplF.cc b/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexSymmetricImplF/WilsonTMFermionInstantiationWilsonTwoIndexSymmetricImplF.cc new file mode 100644 index 00000000..adfa310c --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexSymmetricImplF/WilsonTMFermionInstantiationWilsonTwoIndexSymmetricImplF.cc @@ -0,0 +1,37 @@ +/************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/qcd/action/fermion/WilsonTMFermion.cc + + Copyright (C) 2015 + +Author: paboyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ +#include +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class WilsonTMFermion; + +NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexSymmetricImplF/impl.h b/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexSymmetricImplF/impl.h new file mode 100644 index 00000000..9c1b14d1 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/WilsonTwoIndexSymmetricImplF/impl.h @@ -0,0 +1 @@ +#define IMPLEMENTATION WilsonTwoIndexSymmetricImplF diff --git a/Grid/qcd/action/fermion/instantiation/ZWilsonImplD/CayleyFermion5DInstantiationZWilsonImplD.cc b/Grid/qcd/action/fermion/instantiation/ZWilsonImplD/CayleyFermion5DInstantiationZWilsonImplD.cc new file mode 100644 index 00000000..5130db9c --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/ZWilsonImplD/CayleyFermion5DInstantiationZWilsonImplD.cc @@ -0,0 +1,44 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/WilsonKernels.cc + +Copyright (C) 2015 + +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#include +#include +#include + + //#include + //#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class CayleyFermion5D; + +NAMESPACE_END(Grid); + diff --git a/Grid/qcd/action/fermion/instantiation/ZWilsonImplD/ContinuedFractionFermion5DInstantiationZWilsonImplD.cc b/Grid/qcd/action/fermion/instantiation/ZWilsonImplD/ContinuedFractionFermion5DInstantiationZWilsonImplD.cc new file mode 100644 index 00000000..ca0d6cea --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/ZWilsonImplD/ContinuedFractionFermion5DInstantiationZWilsonImplD.cc @@ -0,0 +1,38 @@ +/************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/qcd/action/fermion/ContinuedFractionFermion5D.cc + + Copyright (C) 2015 + +Author: Peter Boyle +Author: Peter Boyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ +#include +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class ContinuedFractionFermion5D; + +NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/ZWilsonImplD/DomainWallEOFAFermionInstantiationZWilsonImplD.cc b/Grid/qcd/action/fermion/instantiation/ZWilsonImplD/DomainWallEOFAFermionInstantiationZWilsonImplD.cc new file mode 100644 index 00000000..f7198131 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/ZWilsonImplD/DomainWallEOFAFermionInstantiationZWilsonImplD.cc @@ -0,0 +1,44 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/DomainWallEOFAFermion.cc + +Copyright (C) 2017 + +Author: Peter Boyle +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle +Author: David Murphy + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ + /* END LEGAL */ + +#include +#include +#include +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class DomainWallEOFAFermion; + +NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/ZWilsonImplD/MobiusEOFAFermionInstantiationZWilsonImplD.cc b/Grid/qcd/action/fermion/instantiation/ZWilsonImplD/MobiusEOFAFermionInstantiationZWilsonImplD.cc new file mode 100644 index 00000000..ce7eaac9 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/ZWilsonImplD/MobiusEOFAFermionInstantiationZWilsonImplD.cc @@ -0,0 +1,44 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/MobiusEOFAFermion.cc + +Copyright (C) 2017 + +Author: Peter Boyle +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle +Author: David Murphy + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ + /* END LEGAL */ + +#include +#include +#include +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class MobiusEOFAFermion; + +NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/ZWilsonImplD/PartialFractionFermion5DInstantiationZWilsonImplD.cc b/Grid/qcd/action/fermion/instantiation/ZWilsonImplD/PartialFractionFermion5DInstantiationZWilsonImplD.cc new file mode 100644 index 00000000..757719b6 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/ZWilsonImplD/PartialFractionFermion5DInstantiationZWilsonImplD.cc @@ -0,0 +1,39 @@ +/************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/qcd/action/fermion/PartialFractionFermion5D.cc + + Copyright (C) 2015 + +Author: Peter Boyle +Author: Peter Boyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ +#include +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class PartialFractionFermion5D; + +NAMESPACE_END(Grid); + diff --git a/Grid/qcd/action/fermion/instantiation/ZWilsonImplD/WilsonFermion5DInstantiationZWilsonImplD.cc b/Grid/qcd/action/fermion/instantiation/ZWilsonImplD/WilsonFermion5DInstantiationZWilsonImplD.cc new file mode 100644 index 00000000..0dac989c --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/ZWilsonImplD/WilsonFermion5DInstantiationZWilsonImplD.cc @@ -0,0 +1,40 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/WilsonKernels.cc + +Copyright (C) 2015 + +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class WilsonFermion5D; + +NAMESPACE_END(Grid); + diff --git a/Grid/qcd/action/fermion/instantiation/ZWilsonImplD/WilsonKernelsInstantiationZWilsonImplD.cc b/Grid/qcd/action/fermion/instantiation/ZWilsonImplD/WilsonKernelsInstantiationZWilsonImplD.cc new file mode 100644 index 00000000..9af5ed85 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/ZWilsonImplD/WilsonKernelsInstantiationZWilsonImplD.cc @@ -0,0 +1,47 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/WilsonKernels.cc + +Copyright (C) 2015 + +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#include +#include +#include + +#ifndef AVX512 +#ifndef QPX +#include +#endif +#endif + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class WilsonKernels; + +NAMESPACE_END(Grid); + diff --git a/Grid/qcd/action/fermion/instantiation/ZWilsonImplD/impl.h b/Grid/qcd/action/fermion/instantiation/ZWilsonImplD/impl.h new file mode 100644 index 00000000..d8f93091 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/ZWilsonImplD/impl.h @@ -0,0 +1 @@ +#define IMPLEMENTATION ZWilsonImplD diff --git a/Grid/qcd/action/fermion/instantiation/ZWilsonImplDF/CayleyFermion5DInstantiationZWilsonImplDF.cc b/Grid/qcd/action/fermion/instantiation/ZWilsonImplDF/CayleyFermion5DInstantiationZWilsonImplDF.cc new file mode 100644 index 00000000..5130db9c --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/ZWilsonImplDF/CayleyFermion5DInstantiationZWilsonImplDF.cc @@ -0,0 +1,44 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/WilsonKernels.cc + +Copyright (C) 2015 + +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#include +#include +#include + + //#include + //#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class CayleyFermion5D; + +NAMESPACE_END(Grid); + diff --git a/Grid/qcd/action/fermion/instantiation/ZWilsonImplDF/ContinuedFractionFermion5DInstantiationZWilsonImplDF.cc b/Grid/qcd/action/fermion/instantiation/ZWilsonImplDF/ContinuedFractionFermion5DInstantiationZWilsonImplDF.cc new file mode 100644 index 00000000..ca0d6cea --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/ZWilsonImplDF/ContinuedFractionFermion5DInstantiationZWilsonImplDF.cc @@ -0,0 +1,38 @@ +/************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/qcd/action/fermion/ContinuedFractionFermion5D.cc + + Copyright (C) 2015 + +Author: Peter Boyle +Author: Peter Boyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ +#include +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class ContinuedFractionFermion5D; + +NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/ZWilsonImplDF/DomainWallEOFAFermionInstantiationZWilsonImplDF.cc b/Grid/qcd/action/fermion/instantiation/ZWilsonImplDF/DomainWallEOFAFermionInstantiationZWilsonImplDF.cc new file mode 100644 index 00000000..f7198131 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/ZWilsonImplDF/DomainWallEOFAFermionInstantiationZWilsonImplDF.cc @@ -0,0 +1,44 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/DomainWallEOFAFermion.cc + +Copyright (C) 2017 + +Author: Peter Boyle +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle +Author: David Murphy + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ + /* END LEGAL */ + +#include +#include +#include +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class DomainWallEOFAFermion; + +NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/ZWilsonImplDF/MobiusEOFAFermionInstantiationZWilsonImplDF.cc b/Grid/qcd/action/fermion/instantiation/ZWilsonImplDF/MobiusEOFAFermionInstantiationZWilsonImplDF.cc new file mode 100644 index 00000000..ce7eaac9 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/ZWilsonImplDF/MobiusEOFAFermionInstantiationZWilsonImplDF.cc @@ -0,0 +1,44 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/MobiusEOFAFermion.cc + +Copyright (C) 2017 + +Author: Peter Boyle +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle +Author: David Murphy + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ + /* END LEGAL */ + +#include +#include +#include +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class MobiusEOFAFermion; + +NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/ZWilsonImplDF/PartialFractionFermion5DInstantiationZWilsonImplDF.cc b/Grid/qcd/action/fermion/instantiation/ZWilsonImplDF/PartialFractionFermion5DInstantiationZWilsonImplDF.cc new file mode 100644 index 00000000..757719b6 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/ZWilsonImplDF/PartialFractionFermion5DInstantiationZWilsonImplDF.cc @@ -0,0 +1,39 @@ +/************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/qcd/action/fermion/PartialFractionFermion5D.cc + + Copyright (C) 2015 + +Author: Peter Boyle +Author: Peter Boyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ +#include +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class PartialFractionFermion5D; + +NAMESPACE_END(Grid); + diff --git a/Grid/qcd/action/fermion/instantiation/ZWilsonImplDF/WilsonFermion5DInstantiationZWilsonImplDF.cc b/Grid/qcd/action/fermion/instantiation/ZWilsonImplDF/WilsonFermion5DInstantiationZWilsonImplDF.cc new file mode 100644 index 00000000..0dac989c --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/ZWilsonImplDF/WilsonFermion5DInstantiationZWilsonImplDF.cc @@ -0,0 +1,40 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/WilsonKernels.cc + +Copyright (C) 2015 + +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class WilsonFermion5D; + +NAMESPACE_END(Grid); + diff --git a/Grid/qcd/action/fermion/instantiation/ZWilsonImplDF/WilsonKernelsInstantiationZWilsonImplDF.cc b/Grid/qcd/action/fermion/instantiation/ZWilsonImplDF/WilsonKernelsInstantiationZWilsonImplDF.cc new file mode 100644 index 00000000..9af5ed85 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/ZWilsonImplDF/WilsonKernelsInstantiationZWilsonImplDF.cc @@ -0,0 +1,47 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/WilsonKernels.cc + +Copyright (C) 2015 + +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#include +#include +#include + +#ifndef AVX512 +#ifndef QPX +#include +#endif +#endif + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class WilsonKernels; + +NAMESPACE_END(Grid); + diff --git a/Grid/qcd/action/fermion/instantiation/ZWilsonImplDF/impl.h b/Grid/qcd/action/fermion/instantiation/ZWilsonImplDF/impl.h new file mode 100644 index 00000000..7daf76ef --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/ZWilsonImplDF/impl.h @@ -0,0 +1 @@ +#define IMPLEMENTATION ZWilsonImplDF diff --git a/Grid/qcd/action/fermion/instantiation/ZWilsonImplF/CayleyFermion5DInstantiationZWilsonImplF.cc b/Grid/qcd/action/fermion/instantiation/ZWilsonImplF/CayleyFermion5DInstantiationZWilsonImplF.cc new file mode 100644 index 00000000..5130db9c --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/ZWilsonImplF/CayleyFermion5DInstantiationZWilsonImplF.cc @@ -0,0 +1,44 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/WilsonKernels.cc + +Copyright (C) 2015 + +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#include +#include +#include + + //#include + //#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class CayleyFermion5D; + +NAMESPACE_END(Grid); + diff --git a/Grid/qcd/action/fermion/instantiation/ZWilsonImplF/ContinuedFractionFermion5DInstantiationZWilsonImplF.cc b/Grid/qcd/action/fermion/instantiation/ZWilsonImplF/ContinuedFractionFermion5DInstantiationZWilsonImplF.cc new file mode 100644 index 00000000..ca0d6cea --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/ZWilsonImplF/ContinuedFractionFermion5DInstantiationZWilsonImplF.cc @@ -0,0 +1,38 @@ +/************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/qcd/action/fermion/ContinuedFractionFermion5D.cc + + Copyright (C) 2015 + +Author: Peter Boyle +Author: Peter Boyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ +#include +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class ContinuedFractionFermion5D; + +NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/ZWilsonImplF/DomainWallEOFAFermionInstantiationZWilsonImplF.cc b/Grid/qcd/action/fermion/instantiation/ZWilsonImplF/DomainWallEOFAFermionInstantiationZWilsonImplF.cc new file mode 100644 index 00000000..f7198131 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/ZWilsonImplF/DomainWallEOFAFermionInstantiationZWilsonImplF.cc @@ -0,0 +1,44 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/DomainWallEOFAFermion.cc + +Copyright (C) 2017 + +Author: Peter Boyle +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle +Author: David Murphy + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ + /* END LEGAL */ + +#include +#include +#include +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class DomainWallEOFAFermion; + +NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/ZWilsonImplF/MobiusEOFAFermionInstantiationZWilsonImplF.cc b/Grid/qcd/action/fermion/instantiation/ZWilsonImplF/MobiusEOFAFermionInstantiationZWilsonImplF.cc new file mode 100644 index 00000000..ce7eaac9 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/ZWilsonImplF/MobiusEOFAFermionInstantiationZWilsonImplF.cc @@ -0,0 +1,44 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/MobiusEOFAFermion.cc + +Copyright (C) 2017 + +Author: Peter Boyle +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle +Author: David Murphy + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ + /* END LEGAL */ + +#include +#include +#include +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class MobiusEOFAFermion; + +NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/ZWilsonImplF/PartialFractionFermion5DInstantiationZWilsonImplF.cc b/Grid/qcd/action/fermion/instantiation/ZWilsonImplF/PartialFractionFermion5DInstantiationZWilsonImplF.cc new file mode 100644 index 00000000..757719b6 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/ZWilsonImplF/PartialFractionFermion5DInstantiationZWilsonImplF.cc @@ -0,0 +1,39 @@ +/************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/qcd/action/fermion/PartialFractionFermion5D.cc + + Copyright (C) 2015 + +Author: Peter Boyle +Author: Peter Boyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ +#include +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class PartialFractionFermion5D; + +NAMESPACE_END(Grid); + diff --git a/Grid/qcd/action/fermion/instantiation/ZWilsonImplF/WilsonFermion5DInstantiationZWilsonImplF.cc b/Grid/qcd/action/fermion/instantiation/ZWilsonImplF/WilsonFermion5DInstantiationZWilsonImplF.cc new file mode 100644 index 00000000..0dac989c --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/ZWilsonImplF/WilsonFermion5DInstantiationZWilsonImplF.cc @@ -0,0 +1,40 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/WilsonKernels.cc + +Copyright (C) 2015 + +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class WilsonFermion5D; + +NAMESPACE_END(Grid); + diff --git a/Grid/qcd/action/fermion/instantiation/ZWilsonImplF/WilsonKernelsInstantiationZWilsonImplF.cc b/Grid/qcd/action/fermion/instantiation/ZWilsonImplF/WilsonKernelsInstantiationZWilsonImplF.cc new file mode 100644 index 00000000..9af5ed85 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/ZWilsonImplF/WilsonKernelsInstantiationZWilsonImplF.cc @@ -0,0 +1,47 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/WilsonKernels.cc + +Copyright (C) 2015 + +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#include +#include +#include + +#ifndef AVX512 +#ifndef QPX +#include +#endif +#endif + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class WilsonKernels; + +NAMESPACE_END(Grid); + diff --git a/Grid/qcd/action/fermion/instantiation/ZWilsonImplF/impl.h b/Grid/qcd/action/fermion/instantiation/ZWilsonImplF/impl.h new file mode 100644 index 00000000..cfb6a73e --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/ZWilsonImplF/impl.h @@ -0,0 +1 @@ +#define IMPLEMENTATION ZWilsonImplF diff --git a/Grid/qcd/action/fermion/instantiation/ZWilsonImplFH/CayleyFermion5DInstantiationZWilsonImplFH.cc b/Grid/qcd/action/fermion/instantiation/ZWilsonImplFH/CayleyFermion5DInstantiationZWilsonImplFH.cc new file mode 100644 index 00000000..5130db9c --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/ZWilsonImplFH/CayleyFermion5DInstantiationZWilsonImplFH.cc @@ -0,0 +1,44 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/WilsonKernels.cc + +Copyright (C) 2015 + +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#include +#include +#include + + //#include + //#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class CayleyFermion5D; + +NAMESPACE_END(Grid); + diff --git a/Grid/qcd/action/fermion/instantiation/ZWilsonImplFH/ContinuedFractionFermion5DInstantiationZWilsonImplFH.cc b/Grid/qcd/action/fermion/instantiation/ZWilsonImplFH/ContinuedFractionFermion5DInstantiationZWilsonImplFH.cc new file mode 100644 index 00000000..ca0d6cea --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/ZWilsonImplFH/ContinuedFractionFermion5DInstantiationZWilsonImplFH.cc @@ -0,0 +1,38 @@ +/************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/qcd/action/fermion/ContinuedFractionFermion5D.cc + + Copyright (C) 2015 + +Author: Peter Boyle +Author: Peter Boyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ +#include +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class ContinuedFractionFermion5D; + +NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/ZWilsonImplFH/DomainWallEOFAFermionInstantiationZWilsonImplFH.cc b/Grid/qcd/action/fermion/instantiation/ZWilsonImplFH/DomainWallEOFAFermionInstantiationZWilsonImplFH.cc new file mode 100644 index 00000000..f7198131 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/ZWilsonImplFH/DomainWallEOFAFermionInstantiationZWilsonImplFH.cc @@ -0,0 +1,44 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/DomainWallEOFAFermion.cc + +Copyright (C) 2017 + +Author: Peter Boyle +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle +Author: David Murphy + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ + /* END LEGAL */ + +#include +#include +#include +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class DomainWallEOFAFermion; + +NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/ZWilsonImplFH/MobiusEOFAFermionInstantiationZWilsonImplFH.cc b/Grid/qcd/action/fermion/instantiation/ZWilsonImplFH/MobiusEOFAFermionInstantiationZWilsonImplFH.cc new file mode 100644 index 00000000..ce7eaac9 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/ZWilsonImplFH/MobiusEOFAFermionInstantiationZWilsonImplFH.cc @@ -0,0 +1,44 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/MobiusEOFAFermion.cc + +Copyright (C) 2017 + +Author: Peter Boyle +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle +Author: David Murphy + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ + /* END LEGAL */ + +#include +#include +#include +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class MobiusEOFAFermion; + +NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/instantiation/ZWilsonImplFH/PartialFractionFermion5DInstantiationZWilsonImplFH.cc b/Grid/qcd/action/fermion/instantiation/ZWilsonImplFH/PartialFractionFermion5DInstantiationZWilsonImplFH.cc new file mode 100644 index 00000000..757719b6 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/ZWilsonImplFH/PartialFractionFermion5DInstantiationZWilsonImplFH.cc @@ -0,0 +1,39 @@ +/************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/qcd/action/fermion/PartialFractionFermion5D.cc + + Copyright (C) 2015 + +Author: Peter Boyle +Author: Peter Boyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ +#include +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class PartialFractionFermion5D; + +NAMESPACE_END(Grid); + diff --git a/Grid/qcd/action/fermion/instantiation/ZWilsonImplFH/WilsonFermion5DInstantiationZWilsonImplFH.cc b/Grid/qcd/action/fermion/instantiation/ZWilsonImplFH/WilsonFermion5DInstantiationZWilsonImplFH.cc new file mode 100644 index 00000000..0dac989c --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/ZWilsonImplFH/WilsonFermion5DInstantiationZWilsonImplFH.cc @@ -0,0 +1,40 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/WilsonKernels.cc + +Copyright (C) 2015 + +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#include +#include + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class WilsonFermion5D; + +NAMESPACE_END(Grid); + diff --git a/Grid/qcd/action/fermion/instantiation/ZWilsonImplFH/WilsonKernelsInstantiationZWilsonImplFH.cc b/Grid/qcd/action/fermion/instantiation/ZWilsonImplFH/WilsonKernelsInstantiationZWilsonImplFH.cc new file mode 100644 index 00000000..9af5ed85 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/ZWilsonImplFH/WilsonKernelsInstantiationZWilsonImplFH.cc @@ -0,0 +1,47 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/qcd/action/fermion/WilsonKernels.cc + +Copyright (C) 2015 + +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#include +#include +#include + +#ifndef AVX512 +#ifndef QPX +#include +#endif +#endif + +NAMESPACE_BEGIN(Grid); + +#include "impl.h" +template class WilsonKernels; + +NAMESPACE_END(Grid); + diff --git a/Grid/qcd/action/fermion/instantiation/ZWilsonImplFH/impl.h b/Grid/qcd/action/fermion/instantiation/ZWilsonImplFH/impl.h new file mode 100644 index 00000000..7eb490db --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/ZWilsonImplFH/impl.h @@ -0,0 +1 @@ +#define IMPLEMENTATION ZWilsonImplFH diff --git a/Grid/qcd/action/fermion/instantiation/generate_instantiations.sh b/Grid/qcd/action/fermion/instantiation/generate_instantiations.sh new file mode 100755 index 00000000..330dcfa8 --- /dev/null +++ b/Grid/qcd/action/fermion/instantiation/generate_instantiations.sh @@ -0,0 +1,104 @@ +#!/bin/sh + +STAG_IMPL_LIST=" \ + StaggeredImplF \ + StaggeredImplD " + +STAG5_IMPL_LIST="" + +WILSON_IMPL_LIST=" \ + WilsonImplF \ + WilsonImplD \ + WilsonImplFH \ + WilsonImplDF \ + WilsonAdjImplF \ + WilsonAdjImplD \ + WilsonTwoIndexSymmetricImplF \ + WilsonTwoIndexSymmetricImplD \ + WilsonTwoIndexAntiSymmetricImplF \ + WilsonTwoIndexAntiSymmetricImplD \ + GparityWilsonImplF \ + GparityWilsonImplD \ + GparityWilsonImplFH \ + GparityWilsonImplDF" + +DWF_IMPL_LIST=" \ + WilsonImplF \ + WilsonImplD \ + WilsonImplFH \ + WilsonImplDF \ + ZWilsonImplF \ + ZWilsonImplD \ + ZWilsonImplFH \ + ZWilsonImplDF " + +GDWF_IMPL_LIST=" \ + GparityWilsonImplF \ + GparityWilsonImplD \ + GparityWilsonImplFH \ + GparityWilsonImplDF" + + +IMPL_LIST="$STAG_IMPL_LIST $WILSON_IMPL_LIST $DWF_IMPL_LIST $GDWF_IMPL_LIST" + +for impl in $IMPL_LIST +do + echo $impl + mkdir -p $impl +cat > $impl/impl.h < #include -namespace Grid { -namespace QCD { +NAMESPACE_BEGIN(Grid); typedef WilsonGaugeAction WilsonGaugeActionR; typedef WilsonGaugeAction WilsonGaugeActionF; @@ -64,7 +63,6 @@ typedef SymanzikGaugeAction ConjugateSymanzikGaugeAction typedef SymanzikGaugeAction ConjugateSymanzikGaugeActionF; typedef SymanzikGaugeAction ConjugateSymanzikGaugeActionD; -}} - +NAMESPACE_END(Grid); #endif diff --git a/Grid/qcd/action/gauge/GaugeImplTypes.h b/Grid/qcd/action/gauge/GaugeImplTypes.h index 52e1e100..b9a5296d 100644 --- a/Grid/qcd/action/gauge/GaugeImplTypes.h +++ b/Grid/qcd/action/gauge/GaugeImplTypes.h @@ -29,6 +29,9 @@ directory #ifndef GRID_GAUGE_IMPL_TYPES_H #define GRID_GAUGE_IMPL_TYPES_H + +NAMESPACE_BEGIN(Grid); + #define CPS_MD_TIME #ifdef CPS_MD_TIME @@ -37,9 +40,6 @@ directory #define HMC_MOMENTUM_DENOMINATOR (1.0) #endif -namespace Grid { -namespace QCD { - //////////////////////////////////////////////////////////////////////// // Implementation dependent gauge types //////////////////////////////////////////////////////////////////////// @@ -85,13 +85,12 @@ public: // Move this elsewhere? FIXME - static inline void AddLink(Field &U, LinkField &W, - int mu) { // U[mu] += W - PARALLEL_FOR_LOOP - for (auto ss = 0; ss < U._grid->oSites(); ss++) { - U._odata[ss]._internal[mu] = - U._odata[ss]._internal[mu] + W._odata[ss]._internal; - } + static inline void AddLink(Field &U, LinkField &W, int mu) { // U[mu] += W + auto U_v = U.View(); + auto W_v = W.View(); + thread_for( ss, U.Grid()->oSites(), { + U_v[ss](mu) = U_v[ss](mu) + W_v[ss](); + }); } /////////////////////////////////////////////////////////// @@ -111,13 +110,12 @@ public: // // = N \Prod_{x,\mu,a} e^-{1/2 (c_xmua/sqrt{2})^2 } // - // Expect c' = cxmua/sqrt(2) to be a unit variance gaussian. // // Expect cxmua variance sqrt(2). // // Must scale the momentum by sqrt(2) to invoke CPS and UKQCD conventions // - LinkField Pmu(P._grid); + LinkField Pmu(P.Grid()); Pmu = Zero(); for (int mu = 0; mu < Nd; mu++) { SU::GaussianFundamentalLieAlgebraMatrix(pRNG, Pmu); @@ -133,10 +131,13 @@ public: //static std::chrono::duration diff; //auto start = std::chrono::high_resolution_clock::now(); - parallel_for(int ss=0;ssoSites();ss++){ - for (int mu = 0; mu < Nd; mu++) - U[ss]._internal[mu] = ProjectOnGroup(Exponentiate(P[ss]._internal[mu], ep, Nexp) * U[ss]._internal[mu]); - } + auto U_v = U.View(); + auto P_v = P.View(); + thread_for(ss, P.Grid()->oSites(),{ + for (int mu = 0; mu < Nd; mu++) { + U_v[ss](mu) = ProjectOnGroup(Exponentiate(P_v[ss](mu), ep, Nexp) * U_v[ss](mu)); + } + }); //auto end = std::chrono::high_resolution_clock::now(); // diff += end - start; @@ -144,13 +145,13 @@ public: } static inline RealD FieldSquareNorm(Field& U){ - LatticeComplex Hloc(U._grid); - Hloc = zero; + LatticeComplex Hloc(U.Grid()); + Hloc = Zero(); for (int mu = 0; mu < Nd; mu++) { auto Umu = PeekIndex(U, mu); Hloc += trace(Umu * Umu); } - Complex Hsum = sum(Hloc); + auto Hsum = TensorRemove(sum(Hloc)); return Hsum.real(); } @@ -176,8 +177,6 @@ typedef GaugeImplTypes::AdjointDimension> GimplAdjointTypesR; typedef GaugeImplTypes::AdjointDimension> GimplAdjointTypesF; typedef GaugeImplTypes::AdjointDimension> GimplAdjointTypesD; - -} // QCD -} // Grid +NAMESPACE_END(Grid); #endif // GRID_GAUGE_IMPL_TYPES_H diff --git a/Grid/qcd/action/gauge/GaugeImplementations.h b/Grid/qcd/action/gauge/GaugeImplementations.h index 2d7464a9..a14aec1b 100644 --- a/Grid/qcd/action/gauge/GaugeImplementations.h +++ b/Grid/qcd/action/gauge/GaugeImplementations.h @@ -25,14 +25,13 @@ with this program; if not, write to the Free Software Foundation, Inc., See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ -/* END LEGAL */ + /* END LEGAL */ #ifndef GRID_QCD_GAUGE_IMPLEMENTATIONS_H #define GRID_QCD_GAUGE_IMPLEMENTATIONS_H #include "GaugeImplTypes.h" -namespace Grid { -namespace QCD { +NAMESPACE_BEGIN(Grid); // Composition with smeared link, bc's etc.. probably need multiple inheritance // Variable precision "S" and variable Nc @@ -42,7 +41,7 @@ public: //////////////////////////////////////////////////////////////////////////////////////////////////////////// // Support needed for the assembly of loops including all boundary condition - // effects such as conjugate bcs + // effects such as Conjugate bcs //////////////////////////////////////////////////////////////////////////////////////////////////////////// template @@ -97,7 +96,7 @@ public: static inline GaugeLinkField CovShiftIdentityBackward(const GaugeLinkField &Link, int mu) { - GridBase *grid = Link._grid; + GridBase *grid = Link.Grid(); int Lmu = grid->GlobalDimensions()[mu] - 1; Lattice> coor(grid); @@ -114,7 +113,7 @@ public: } static inline GaugeLinkField ShiftStaple(const GaugeLinkField &Link, int mu) { - GridBase *grid = Link._grid; + GridBase *grid = Link.Grid(); int Lmu = grid->GlobalDimensions()[mu] - 1; Lattice> coor(grid); @@ -141,8 +140,6 @@ typedef ConjugateGaugeImpl ConjugateGimplR; // Real.. whichever pre typedef ConjugateGaugeImpl ConjugateGimplF; // Float typedef ConjugateGaugeImpl ConjugateGimplD; // Double - -} -} +NAMESPACE_END(Grid); #endif diff --git a/Grid/qcd/action/gauge/Photon.h b/Grid/qcd/action/gauge/Photon.h index 9afafe6c..465aa8bd 100644 --- a/Grid/qcd/action/gauge/Photon.h +++ b/Grid/qcd/action/gauge/Photon.h @@ -27,11 +27,9 @@ Copyright (C) 2015-2018 See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ /* END LEGAL */ -#ifndef QCD_PHOTON_ACTION_H -#define QCD_PHOTON_ACTION_H +#pragma once -namespace Grid{ -namespace QCD{ +NAMESPACE_BEGIN(Grid); template class QedGImpl @@ -119,9 +117,9 @@ namespace QCD{ void Photon::makeSpatialNorm(LatticeInteger &spNrm) { LatticeInteger coor(grid_); - std::vector l = grid_->FullDimensions(); + auto l = grid_->FullDimensions(); - spNrm = zero; + spNrm = Zero(); for(int mu = 0; mu < grid_->Nd() - 1; mu++) { LatticeCoordinate(coor, mu); @@ -134,7 +132,7 @@ namespace QCD{ void Photon::makeKHat(std::vector &khat) { const unsigned int nd = grid_->Nd(); - std::vector l = grid_->FullDimensions(); + auto l = grid_->FullDimensions(); Complex ci(0., 1.); khat.resize(nd, grid_); @@ -153,10 +151,10 @@ namespace QCD{ std::vector khat; GaugeLinkField lone(grid_); const unsigned int nd = grid_->Nd(); - std::vector zm(nd, 0); + Coordinate zm(nd, 0); ScalarSite one = ScalarComplex(1., 0.), z = ScalarComplex(0., 0.); - out = zero; + out = Zero(); makeKHat(khat); for(int mu = 0; mu < nd; mu++) { @@ -175,7 +173,7 @@ namespace QCD{ { case ZmScheme::qedTL: { - std::vector zm(grid_->Nd(), 0); + Coordinate zm(grid_->Nd(), 0); ScalarSite z = ScalarComplex(0., 0.); pokeSite(z, out, zm); @@ -208,7 +206,7 @@ namespace QCD{ LatticeInteger spNrm(grid_); std::vector khat, a(nd, grid_), aProj(nd, grid_); - invKHat = zero; + invKHat = Zero(); makeSpatialNorm(spNrm); makeKHat(khat); for (unsigned int mu = 0; mu < nd; ++mu) @@ -222,9 +220,9 @@ namespace QCD{ cst = ScalarComplex(1., 0.); invKHat = where(spNrm == Integer(0), cst, invKHat); invKHat = cst/invKHat; - cst = zero; + cst = Zero(); invKHat = where(spNrm == Integer(0), cst, invKHat); - spdiv = zero; + spdiv = Zero(); for (unsigned int nu = 0; nu < nd - 1; ++nu) { spdiv += conjugate(khat[nu])*a[nu]; @@ -272,7 +270,7 @@ namespace QCD{ void Photon::StochasticWeight(GaugeLinkField &weight) { const unsigned int nd = grid_->Nd(); - std::vector l = grid_->FullDimensions(); + auto l = grid_->FullDimensions(); Integer vol = 1; for(unsigned int mu = 0; mu < nd; mu++) @@ -327,5 +325,5 @@ namespace QCD{ out = real(out); } -}} -#endif +NAMESPACE_END(Grid); + diff --git a/Grid/qcd/action/gauge/PlaqPlusRectangleAction.h b/Grid/qcd/action/gauge/PlaqPlusRectangleAction.h index bdbc8479..639aca19 100644 --- a/Grid/qcd/action/gauge/PlaqPlusRectangleAction.h +++ b/Grid/qcd/action/gauge/PlaqPlusRectangleAction.h @@ -1,4 +1,4 @@ - /************************************************************************************* +/************************************************************************************* Grid physics library, www.github.com/paboyle/Grid @@ -24,129 +24,127 @@ Author: paboyle 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ +*************************************************************************************/ +/* END LEGAL */ #ifndef QCD_PLAQ_PLUS_RECTANGLE_ACTION_H #define QCD_PLAQ_PLUS_RECTANGLE_ACTION_H -namespace Grid{ - namespace QCD{ +NAMESPACE_BEGIN(Grid); - //////////////////////////////////////////////////////////////////////// - // PlaqPlusRectangleActoin - //////////////////////////////////////////////////////////////////////// - template - class PlaqPlusRectangleAction : public Action { - public: +//////////////////////////////////////////////////////////////////////// +// PlaqPlusRectangleActoin +//////////////////////////////////////////////////////////////////////// +template +class PlaqPlusRectangleAction : public Action { +public: - INHERIT_GIMPL_TYPES(Gimpl); + INHERIT_GIMPL_TYPES(Gimpl); - private: - RealD c_plaq; - RealD c_rect; +private: + RealD c_plaq; + RealD c_rect; - public: - PlaqPlusRectangleAction(RealD b,RealD c): c_plaq(b),c_rect(c){}; +public: + PlaqPlusRectangleAction(RealD b,RealD c): c_plaq(b),c_rect(c){}; - virtual std::string action_name(){return "PlaqPlusRectangleAction";} + virtual std::string action_name(){return "PlaqPlusRectangleAction";} - virtual void refresh(const GaugeField &U, GridParallelRNG& pRNG) {}; // noop as no pseudoferms + virtual void refresh(const GaugeField &U, GridParallelRNG& pRNG) {}; // noop as no pseudoferms - virtual std::string LogParameters(){ - std::stringstream sstream; - sstream << GridLogMessage << "["<gSites(); - - RealD plaq = WilsonLoops::avgPlaquette(U); - RealD rect = WilsonLoops::avgRectangle(U); - - RealD action=c_plaq*(1.0 -plaq)*(Nd*(Nd-1.0))*vol*0.5 - +c_rect*(1.0 -rect)*(Nd*(Nd-1.0))*vol; - - return action; - }; - - virtual void deriv(const GaugeField &Umu,GaugeField & dSdU) { - //extend Ta to include Lorentz indexes - RealD factor_p = c_plaq/RealD(Nc)*0.5; - RealD factor_r = c_rect/RealD(Nc)*0.5; - - GridBase *grid = Umu._grid; - - std::vector U (Nd,grid); - std::vector U2(Nd,grid); - - for(int mu=0;mu(Umu,mu); - WilsonLoops::RectStapleDouble(U2[mu],U[mu],mu); - } - - GaugeLinkField dSdU_mu(grid); - GaugeLinkField staple(grid); - - for (int mu=0; mu < Nd; mu++){ - - // Staple in direction mu - - WilsonLoops::Staple(staple,Umu,mu); - - dSdU_mu = Ta(U[mu]*staple)*factor_p; - - WilsonLoops::RectStaple(Umu,staple,U2,U,mu); - - dSdU_mu = dSdU_mu + Ta(U[mu]*staple)*factor_r; - - PokeIndex(dSdU, dSdU_mu, mu); - } - - }; - - }; - - // Convenience for common physically defined cases. - // - // RBC c1 parameterisation is not really RBC but don't have good - // reference and we are happy to change name if prior use of this plaq coeff - // parameterisation is made known to us. - template - class RBCGaugeAction : public PlaqPlusRectangleAction { - public: - INHERIT_GIMPL_TYPES(Gimpl); - RBCGaugeAction(RealD beta,RealD c1) : PlaqPlusRectangleAction(beta*(1.0-8.0*c1), beta*c1) {}; - virtual std::string action_name(){return "RBCGaugeAction";} - }; - - template - class IwasakiGaugeAction : public RBCGaugeAction { - public: - INHERIT_GIMPL_TYPES(Gimpl); - IwasakiGaugeAction(RealD beta) : RBCGaugeAction(beta,-0.331) {}; - virtual std::string action_name(){return "IwasakiGaugeAction";} - }; - - template - class SymanzikGaugeAction : public RBCGaugeAction { - public: - INHERIT_GIMPL_TYPES(Gimpl); - SymanzikGaugeAction(RealD beta) : RBCGaugeAction(beta,-1.0/12.0) {}; - virtual std::string action_name(){return "SymanzikGaugeAction";} - }; - - template - class DBW2GaugeAction : public RBCGaugeAction { - public: - INHERIT_GIMPL_TYPES(Gimpl); - DBW2GaugeAction(RealD beta) : RBCGaugeAction(beta,-1.4067) {}; - virtual std::string action_name(){return "DBW2GaugeAction";} - }; - + virtual std::string LogParameters(){ + std::stringstream sstream; + sstream << GridLogMessage << "["<gSites(); + + RealD plaq = WilsonLoops::avgPlaquette(U); + RealD rect = WilsonLoops::avgRectangle(U); + + RealD action=c_plaq*(1.0 -plaq)*(Nd*(Nd-1.0))*vol*0.5 + +c_rect*(1.0 -rect)*(Nd*(Nd-1.0))*vol; + + return action; + }; + + virtual void deriv(const GaugeField &Umu,GaugeField & dSdU) { + //extend Ta to include Lorentz indexes + RealD factor_p = c_plaq/RealD(Nc)*0.5; + RealD factor_r = c_rect/RealD(Nc)*0.5; + + GridBase *grid = Umu.Grid(); + + std::vector U (Nd,grid); + std::vector U2(Nd,grid); + + for(int mu=0;mu(Umu,mu); + WilsonLoops::RectStapleDouble(U2[mu],U[mu],mu); + } + + GaugeLinkField dSdU_mu(grid); + GaugeLinkField staple(grid); + + for (int mu=0; mu < Nd; mu++){ + + // Staple in direction mu + + WilsonLoops::Staple(staple,Umu,mu); + + dSdU_mu = Ta(U[mu]*staple)*factor_p; + + WilsonLoops::RectStaple(Umu,staple,U2,U,mu); + + dSdU_mu = dSdU_mu + Ta(U[mu]*staple)*factor_r; + + PokeIndex(dSdU, dSdU_mu, mu); + } + + }; + +}; + +// Convenience for common physically defined cases. +// +// RBC c1 parameterisation is not really RBC but don't have good +// reference and we are happy to change name if prior use of this plaq coeff +// parameterisation is made known to us. +template +class RBCGaugeAction : public PlaqPlusRectangleAction { +public: + INHERIT_GIMPL_TYPES(Gimpl); + RBCGaugeAction(RealD beta,RealD c1) : PlaqPlusRectangleAction(beta*(1.0-8.0*c1), beta*c1) {}; + virtual std::string action_name(){return "RBCGaugeAction";} +}; + +template +class IwasakiGaugeAction : public RBCGaugeAction { +public: + INHERIT_GIMPL_TYPES(Gimpl); + IwasakiGaugeAction(RealD beta) : RBCGaugeAction(beta,-0.331) {}; + virtual std::string action_name(){return "IwasakiGaugeAction";} +}; + +template +class SymanzikGaugeAction : public RBCGaugeAction { +public: + INHERIT_GIMPL_TYPES(Gimpl); + SymanzikGaugeAction(RealD beta) : RBCGaugeAction(beta,-1.0/12.0) {}; + virtual std::string action_name(){return "SymanzikGaugeAction";} +}; + +template +class DBW2GaugeAction : public RBCGaugeAction { +public: + INHERIT_GIMPL_TYPES(Gimpl); + DBW2GaugeAction(RealD beta) : RBCGaugeAction(beta,-1.4067) {}; + virtual std::string action_name(){return "DBW2GaugeAction";} +}; + +NAMESPACE_END(Grid); #endif diff --git a/Grid/qcd/action/gauge/WilsonGaugeAction.h b/Grid/qcd/action/gauge/WilsonGaugeAction.h index 77c2424c..40d600d2 100644 --- a/Grid/qcd/action/gauge/WilsonGaugeAction.h +++ b/Grid/qcd/action/gauge/WilsonGaugeAction.h @@ -29,19 +29,18 @@ with this program; if not, write to the Free Software Foundation, Inc., See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ -/* END LEGAL */ + /* END LEGAL */ #ifndef QCD_WILSON_GAUGE_ACTION_H #define QCD_WILSON_GAUGE_ACTION_H -namespace Grid { -namespace QCD { +NAMESPACE_BEGIN(Grid); //////////////////////////////////////////////////////////////////////// // Wilson Gauge Action .. should I template the Nc etc.. //////////////////////////////////////////////////////////////////////// template class WilsonGaugeAction : public Action { - public: +public: INHERIT_GIMPL_TYPES(Gimpl); /////////////////////////// constructors @@ -60,7 +59,7 @@ class WilsonGaugeAction : public Action { virtual RealD S(const GaugeField &U) { RealD plaq = WilsonLoops::avgPlaquette(U); - RealD vol = U._grid->gSites(); + RealD vol = U.Grid()->gSites(); RealD action = beta * (1.0 - plaq) * (Nd * (Nd - 1.0)) * vol * 0.5; return action; }; @@ -71,25 +70,22 @@ class WilsonGaugeAction : public Action { RealD factor = 0.5 * beta / RealD(Nc); - GaugeLinkField Umu(U._grid); - GaugeLinkField dSdU_mu(U._grid); + GaugeLinkField Umu(U.Grid()); + GaugeLinkField dSdU_mu(U.Grid()); for (int mu = 0; mu < Nd; mu++) { - Umu = PeekIndex(U, mu); + Umu = PeekIndex(U, mu); + // Staple in direction mu WilsonLoops::Staple(dSdU_mu, U, mu); dSdU_mu = Ta(Umu * dSdU_mu) * factor; - + PokeIndex(dSdU, dSdU_mu, mu); } } private: RealD beta; -}; - - - -} -} + }; +NAMESPACE_END(Grid); #endif diff --git a/Grid/qcd/action/pseudofermion/Bounds.h b/Grid/qcd/action/pseudofermion/Bounds.h index 081ebba2..535e1a49 100644 --- a/Grid/qcd/action/pseudofermion/Bounds.h +++ b/Grid/qcd/action/pseudofermion/Bounds.h @@ -1,7 +1,6 @@ #pragma once -namespace Grid{ - namespace QCD{ +NAMESPACE_BEGIN(Grid); template void HighBoundCheck(LinearOperatorBase &HermOp, @@ -20,7 +19,7 @@ namespace Grid{ Field &GaussNoise, MultiShiftFunction &PowerNegHalf) { - GridBase *FermionGrid = GaussNoise._grid; + GridBase *FermionGrid = GaussNoise.Grid(); Field X(FermionGrid); Field Y(FermionGrid); @@ -49,5 +48,5 @@ namespace Grid{ assert( (std::sqrt(Nd/Nx) 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ +*************************************************************************************/ +/* END LEGAL */ #ifndef QCD_EVEN_ODD_SCHUR_DIFFERENTIABLE_H #define QCD_EVEN_ODD_SCHUR_DIFFERENTIABLE_H -namespace Grid{ - namespace QCD{ +NAMESPACE_BEGIN(Grid); - // Base even odd HMC on the normal Mee based schur decomposition. - // - // M = (Mee Meo) = (1 0 ) (Mee 0 ) (1 Mee^{-1} Meo) - // (Moe Moo) (Moe Mee^-1 1 ) (0 Moo-Moe Mee^-1 Meo) (0 1 ) - // - // Determinant is det of middle factor - // This assumes Mee is indept of U. - // - template - class SchurDifferentiableOperator : public SchurDiagMooeeOperator,typename Impl::FermionField> - { - public: - INHERIT_IMPL_TYPES(Impl); +// Base even odd HMC on the normal Mee based schur decomposition. +// +// M = (Mee Meo) = (1 0 ) (Mee 0 ) (1 Mee^{-1} Meo) +// (Moe Moo) (Moe Mee^-1 1 ) (0 Moo-Moe Mee^-1 Meo) (0 1 ) +// +// Determinant is det of middle factor +// This assumes Mee is indept of U. +// +template +class SchurDifferentiableOperator : public SchurDiagMooeeOperator,typename Impl::FermionField> +{ +public: + INHERIT_IMPL_TYPES(Impl); - typedef FermionOperator Matrix; + typedef FermionOperator Matrix; - SchurDifferentiableOperator (Matrix &Mat) : SchurDiagMooeeOperator(Mat) {}; + SchurDifferentiableOperator (Matrix &Mat) : SchurDiagMooeeOperator(Mat) {}; - void MpcDeriv(GaugeField &Force,const FermionField &U,const FermionField &V) { + void MpcDeriv(GaugeField &Force,const FermionField &U,const FermionField &V) { - GridBase *fgrid = this->_Mat.FermionGrid(); - GridBase *fcbgrid = this->_Mat.FermionRedBlackGrid(); + GridBase *fgrid = this->_Mat.FermionGrid(); + GridBase *fcbgrid = this->_Mat.FermionRedBlackGrid(); - FermionField tmp1(fcbgrid); - FermionField tmp2(fcbgrid); + FermionField tmp1(fcbgrid); + FermionField tmp2(fcbgrid); - conformable(fcbgrid,U._grid); - conformable(fcbgrid,V._grid); + conformable(fcbgrid,U.Grid()); + conformable(fcbgrid,V.Grid()); - // Assert the checkerboard?? or code for either - assert(U.checkerboard==Odd); - assert(V.checkerboard==U.checkerboard); + // Assert the checkerboard?? or code for either + assert(U.Checkerboard()==Odd); + assert(V.Checkerboard()==U.Checkerboard()); - // NOTE Guido: WE DO NOT WANT TO USE THE ucbgrid GRID FOR THE FORCE - // it is not conformable with the HMC force field - // Case: Ls vectorised fields - // INHERIT FROM THE Force field instead - GridRedBlackCartesian* forcecb = new GridRedBlackCartesian(Force._grid); - GaugeField ForceO(forcecb); - GaugeField ForceE(forcecb); + // NOTE Guido: WE DO NOT WANT TO USE THE ucbgrid GRID FOR THE FORCE + // it is not conformable with the HMC force field + // Case: Ls vectorised fields + // INHERIT FROM THE Force field instead + GridRedBlackCartesian* forcecb = new GridRedBlackCartesian(Force.Grid()); + GaugeField ForceO(forcecb); + GaugeField ForceE(forcecb); - // X^dag Der_oe MeeInv Meo Y - // Use Mooee as nontrivial but gauge field indept - this->_Mat.Meooe (V,tmp1); // odd->even -- implicit -0.5 factor to be applied - this->_Mat.MooeeInv(tmp1,tmp2); // even->even - this->_Mat.MoeDeriv(ForceO,U,tmp2,DaggerNo); - // Accumulate X^dag M_oe MeeInv Der_eo Y - this->_Mat.MeooeDag (U,tmp1); // even->odd -- implicit -0.5 factor to be applied - this->_Mat.MooeeInvDag(tmp1,tmp2); // even->even - this->_Mat.MeoDeriv(ForceE,tmp2,V,DaggerNo); + // X^dag Der_oe MeeInv Meo Y + // Use Mooee as nontrivial but gauge field indept + this->_Mat.Meooe (V,tmp1); // odd->even -- implicit -0.5 factor to be applied + this->_Mat.MooeeInv(tmp1,tmp2); // even->even + this->_Mat.MoeDeriv(ForceO,U,tmp2,DaggerNo); + // Accumulate X^dag M_oe MeeInv Der_eo Y + this->_Mat.MeooeDag (U,tmp1); // even->odd -- implicit -0.5 factor to be applied + this->_Mat.MooeeInvDag(tmp1,tmp2); // even->even + this->_Mat.MeoDeriv(ForceE,tmp2,V,DaggerNo); - assert(ForceE.checkerboard==Even); - assert(ForceO.checkerboard==Odd); + assert(ForceE.Checkerboard()==Even); + assert(ForceO.Checkerboard()==Odd); - setCheckerboard(Force,ForceE); - setCheckerboard(Force,ForceO); - Force=-Force; - - delete forcecb; - } - - - void MpcDagDeriv(GaugeField &Force,const FermionField &U,const FermionField &V) { - - GridBase *fgrid = this->_Mat.FermionGrid(); - GridBase *fcbgrid = this->_Mat.FermionRedBlackGrid(); - - FermionField tmp1(fcbgrid); - FermionField tmp2(fcbgrid); - - conformable(fcbgrid,U._grid); - conformable(fcbgrid,V._grid); - - // Assert the checkerboard?? or code for either - assert(V.checkerboard==Odd); - assert(V.checkerboard==V.checkerboard); - - // NOTE Guido: WE DO NOT WANT TO USE THE ucbgrid GRID FOR THE FORCE - // it is not conformable with the HMC force field - // INHERIT FROM THE Force field instead - GridRedBlackCartesian* forcecb = new GridRedBlackCartesian(Force._grid); - GaugeField ForceO(forcecb); - GaugeField ForceE(forcecb); - - // X^dag Der_oe MeeInv Meo Y - // Use Mooee as nontrivial but gauge field indept - this->_Mat.MeooeDag (V,tmp1); // odd->even -- implicit -0.5 factor to be applied - this->_Mat.MooeeInvDag(tmp1,tmp2); // even->even - this->_Mat.MoeDeriv(ForceO,U,tmp2,DaggerYes); - - // Accumulate X^dag M_oe MeeInv Der_eo Y - this->_Mat.Meooe (U,tmp1); // even->odd -- implicit -0.5 factor to be applied - this->_Mat.MooeeInv(tmp1,tmp2); // even->even - this->_Mat.MeoDeriv(ForceE,tmp2,V,DaggerYes); - - assert(ForceE.checkerboard==Even); - assert(ForceO.checkerboard==Odd); - - setCheckerboard(Force,ForceE); - setCheckerboard(Force,ForceO); - Force=-Force; - - delete forcecb; - } - - }; + setCheckerboard(Force,ForceE); + setCheckerboard(Force,ForceO); + Force=-Force; + delete forcecb; } -} + + + void MpcDagDeriv(GaugeField &Force,const FermionField &U,const FermionField &V) { + + GridBase *fgrid = this->_Mat.FermionGrid(); + GridBase *fcbgrid = this->_Mat.FermionRedBlackGrid(); + + FermionField tmp1(fcbgrid); + FermionField tmp2(fcbgrid); + + conformable(fcbgrid,U.Grid()); + conformable(fcbgrid,V.Grid()); + + // Assert the checkerboard?? or code for either + assert(V.Checkerboard()==Odd); + assert(V.Checkerboard()==V.Checkerboard()); + + // NOTE Guido: WE DO NOT WANT TO USE THE ucbgrid GRID FOR THE FORCE + // it is not conformable with the HMC force field + // INHERIT FROM THE Force field instead + GridRedBlackCartesian* forcecb = new GridRedBlackCartesian(Force.Grid()); + GaugeField ForceO(forcecb); + GaugeField ForceE(forcecb); + + // X^dag Der_oe MeeInv Meo Y + // Use Mooee as nontrivial but gauge field indept + this->_Mat.MeooeDag (V,tmp1); // odd->even -- implicit -0.5 factor to be applied + this->_Mat.MooeeInvDag(tmp1,tmp2); // even->even + this->_Mat.MoeDeriv(ForceO,U,tmp2,DaggerYes); + + // Accumulate X^dag M_oe MeeInv Der_eo Y + this->_Mat.Meooe (U,tmp1); // even->odd -- implicit -0.5 factor to be applied + this->_Mat.MooeeInv(tmp1,tmp2); // even->even + this->_Mat.MeoDeriv(ForceE,tmp2,V,DaggerYes); + + assert(ForceE.Checkerboard()==Even); + assert(ForceO.Checkerboard()==Odd); + + setCheckerboard(Force,ForceE); + setCheckerboard(Force,ForceO); + Force=-Force; + + delete forcecb; + } + +}; + +NAMESPACE_END(Grid); + #endif diff --git a/Grid/qcd/action/pseudofermion/ExactOneFlavourRatio.h b/Grid/qcd/action/pseudofermion/ExactOneFlavourRatio.h index c6746a88..9fc0a3b0 100644 --- a/Grid/qcd/action/pseudofermion/ExactOneFlavourRatio.h +++ b/Grid/qcd/action/pseudofermion/ExactOneFlavourRatio.h @@ -38,8 +38,7 @@ See the full license in the file "LICENSE" in the top level distribution directo #ifndef QCD_PSEUDOFERMION_EXACT_ONE_FLAVOUR_RATIO_H #define QCD_PSEUDOFERMION_EXACT_ONE_FLAVOUR_RATIO_H -namespace Grid{ -namespace QCD{ +NAMESPACE_BEGIN(Grid); /////////////////////////////////////////////////////////////// // Exact one flavour implementation of DWF determinant ratio // @@ -67,6 +66,13 @@ namespace QCD{ public: + ExactOneFlavourRatioPseudoFermionAction(AbstractEOFAFermion& _Lop, + AbstractEOFAFermion& _Rop, + OperatorFunction& CG, + Params& p, + bool use_fc=false) + : ExactOneFlavourRatioPseudoFermionAction(_Lop,_Rop,CG,CG,CG,CG,CG,p,use_fc) {}; + ExactOneFlavourRatioPseudoFermionAction(AbstractEOFAFermion& _Lop, AbstractEOFAFermion& _Rop, OperatorFunction& HeatbathCG, @@ -151,7 +157,7 @@ namespace QCD{ spProj(eta, tmp[0], -1, Lop.Ls); Lop.Omega(tmp[0], tmp[1], -1, 0); G5R5(CG_src, tmp[1]); - tmp[1] = zero; + tmp[1] = Zero(); for(int k=0; k class OneFlavourEvenOddRationalPseudoFermionAction - : public Action { - public: + : public Action { +public: INHERIT_IMPL_TYPES(Impl); typedef OneFlavourRationalParams Params; @@ -57,7 +56,7 @@ class OneFlavourEvenOddRationalPseudoFermionAction MultiShiftFunction PowerQuarter; MultiShiftFunction PowerNegQuarter; - private: +private: FermionOperator &FermOp; // the basic operator // NOT using "Nroots"; IroIro is -- perhaps later, but this wasn't good for us @@ -67,13 +66,13 @@ class OneFlavourEvenOddRationalPseudoFermionAction FermionField PhiEven; // the pseudo fermion field for this trajectory FermionField PhiOdd; // the pseudo fermion field for this trajectory - public: +public: OneFlavourEvenOddRationalPseudoFermionAction(FermionOperator &Op, Params &p) - : FermOp(Op), - PhiEven(Op.FermionRedBlackGrid()), - PhiOdd(Op.FermionRedBlackGrid()), - param(p) { + : FermOp(Op), + PhiEven(Op.FermionRedBlackGrid()), + PhiOdd(Op.FermionRedBlackGrid()), + param(p) { AlgRemez remez(param.lo, param.hi, param.precision); // MdagM^(+- 1/2) @@ -139,7 +138,7 @@ class OneFlavourEvenOddRationalPseudoFermionAction ////////////////////////////////////////////////////// assert(FermOp.ConstEE() == 1); - PhiEven = zero; + PhiEven = Zero(); }; ////////////////////////////////////////////////////// @@ -166,7 +165,7 @@ class OneFlavourEvenOddRationalPseudoFermionAction RealD action = norm2(Y); std::cout << GridLogMessage << "Pseudofermion action FIXME -- is -1/4 " - "solve or -1/2 solve faster??? " + "solve or -1/2 solve faster??? " << action << std::endl; return action; @@ -213,7 +212,7 @@ class OneFlavourEvenOddRationalPseudoFermionAction msCG(Mpc, PhiOdd, MPhi_k); - dSdU = zero; + dSdU = Zero(); for (int k = 0; k < Npole; k++) { RealD ak = PowerNegHalf.residues[k]; @@ -229,7 +228,7 @@ class OneFlavourEvenOddRationalPseudoFermionAction // dSdU = Ta(dSdU); }; }; -} -} + +NAMESPACE_END(Grid); #endif diff --git a/Grid/qcd/action/pseudofermion/OneFlavourEvenOddRationalRatio.h b/Grid/qcd/action/pseudofermion/OneFlavourEvenOddRationalRatio.h index 84fe4de0..e5f0b602 100644 --- a/Grid/qcd/action/pseudofermion/OneFlavourEvenOddRationalRatio.h +++ b/Grid/qcd/action/pseudofermion/OneFlavourEvenOddRationalRatio.h @@ -28,8 +28,7 @@ Author: Peter Boyle #ifndef QCD_PSEUDOFERMION_ONE_FLAVOUR_EVEN_ODD_RATIONAL_RATIO_H #define QCD_PSEUDOFERMION_ONE_FLAVOUR_EVEN_ODD_RATIONAL_RATIO_H -namespace Grid{ - namespace QCD{ +NAMESPACE_BEGIN(Grid); /////////////////////////////////////// // One flavour rational @@ -145,7 +144,7 @@ namespace Grid{ assert(NumOp.ConstEE() == 1); assert(DenOp.ConstEE() == 1); - PhiEven = zero; + PhiEven = Zero(); }; @@ -245,7 +244,7 @@ namespace Grid{ RealD ak; - dSdU = zero; + dSdU = Zero(); // With these building blocks // @@ -282,8 +281,7 @@ namespace Grid{ }; }; - } -} +NAMESPACE_END(Grid); #endif diff --git a/Grid/qcd/action/pseudofermion/OneFlavourRational.h b/Grid/qcd/action/pseudofermion/OneFlavourRational.h index 3aa6c780..f6c823c9 100644 --- a/Grid/qcd/action/pseudofermion/OneFlavourRational.h +++ b/Grid/qcd/action/pseudofermion/OneFlavourRational.h @@ -28,8 +28,7 @@ Author: Peter Boyle #ifndef QCD_PSEUDOFERMION_ONE_FLAVOUR_RATIONAL_H #define QCD_PSEUDOFERMION_ONE_FLAVOUR_RATIONAL_H -namespace Grid{ - namespace QCD{ +NAMESPACE_BEGIN(Grid); /////////////////////////////////////// // One flavour rational @@ -196,7 +195,7 @@ namespace Grid{ msCG(MdagMOp,Phi,MPhi_k); - dSdU = zero; + dSdU = Zero(); for(int k=0;k #ifndef QCD_PSEUDOFERMION_ONE_FLAVOUR_RATIONAL_RATIO_H #define QCD_PSEUDOFERMION_ONE_FLAVOUR_RATIONAL_RATIO_H -namespace Grid{ - namespace QCD{ +NAMESPACE_BEGIN(Grid); /////////////////////////////////////// // One flavour rational @@ -231,7 +230,7 @@ namespace Grid{ RealD ak; - dSdU = zero; + dSdU = Zero(); // With these building blocks // @@ -268,8 +267,7 @@ namespace Grid{ }; }; - } -} +NAMESPACE_END(Grid); #endif diff --git a/Grid/qcd/action/pseudofermion/TwoFlavour.h b/Grid/qcd/action/pseudofermion/TwoFlavour.h index f189e71c..f905a675 100644 --- a/Grid/qcd/action/pseudofermion/TwoFlavour.h +++ b/Grid/qcd/action/pseudofermion/TwoFlavour.h @@ -26,22 +26,21 @@ with this program; if not, write to the Free Software Foundation, Inc., See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ -/* END LEGAL */ + /* END LEGAL */ #ifndef QCD_PSEUDOFERMION_TWO_FLAVOUR_H #define QCD_PSEUDOFERMION_TWO_FLAVOUR_H -namespace Grid { -namespace QCD { +NAMESPACE_BEGIN(Grid); //////////////////////////////////////////////////////////////////////// // Two flavour pseudofermion action for any dop //////////////////////////////////////////////////////////////////////// template class TwoFlavourPseudoFermionAction : public Action { - public: +public: INHERIT_IMPL_TYPES(Impl); - private: +private: FermionOperator &FermOp; // the basic operator OperatorFunction &DerivativeSolver; @@ -50,17 +49,17 @@ class TwoFlavourPseudoFermionAction : public Action { FermionField Phi; // the pseudo fermion field for this trajectory - public: +public: ///////////////////////////////////////////////// // Pass in required objects. ///////////////////////////////////////////////// TwoFlavourPseudoFermionAction(FermionOperator &Op, OperatorFunction &DS, OperatorFunction &AS) - : FermOp(Op), - DerivativeSolver(DS), - ActionSolver(AS), - Phi(Op.FermionGrid()){}; + : FermOp(Op), + DerivativeSolver(DS), + ActionSolver(AS), + Phi(Op.FermionGrid()){}; virtual std::string action_name(){return "TwoFlavourPseudoFermionAction";} @@ -111,7 +110,7 @@ class TwoFlavourPseudoFermionAction : public Action { FermionField Y(FermOp.FermionGrid()); MdagMLinearOperator, FermionField> MdagMOp(FermOp); - X = zero; + X = Zero(); ActionSolver(MdagMOp, Phi, X); MdagMOp.Op(X, Y); @@ -138,7 +137,7 @@ class TwoFlavourPseudoFermionAction : public Action { MdagMLinearOperator, FermionField> MdagMOp(FermOp); - X = zero; + X = Zero(); DerivativeSolver(MdagMOp, Phi, X); // X = (MdagM)^-1 phi MdagMOp.Op(X, Y); // Y = M X = (Mdag)^-1 phi @@ -153,7 +152,7 @@ class TwoFlavourPseudoFermionAction : public Action { // not taking here the traceless antihermitian component }; }; -} -} + +NAMESPACE_END(Grid); #endif diff --git a/Grid/qcd/action/pseudofermion/TwoFlavourEvenOdd.h b/Grid/qcd/action/pseudofermion/TwoFlavourEvenOdd.h index 0bbc0ae6..a3cf8f08 100644 --- a/Grid/qcd/action/pseudofermion/TwoFlavourEvenOdd.h +++ b/Grid/qcd/action/pseudofermion/TwoFlavourEvenOdd.h @@ -26,164 +26,163 @@ with this program; if not, write to the Free Software Foundation, Inc., See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ -/* END LEGAL */ + /* END LEGAL */ #ifndef QCD_PSEUDOFERMION_TWO_FLAVOUR_EVEN_ODD_H #define QCD_PSEUDOFERMION_TWO_FLAVOUR_EVEN_ODD_H -namespace Grid { - namespace QCD { - - //////////////////////////////////////////////////////////////////////// - // Two flavour pseudofermion action for any EO prec dop - //////////////////////////////////////////////////////////////////////// - template - class TwoFlavourEvenOddPseudoFermionAction - : public Action { - public: - INHERIT_IMPL_TYPES(Impl); - - private: - FermionOperator &FermOp; // the basic operator - - OperatorFunction &DerivativeSolver; - OperatorFunction &ActionSolver; - - FermionField PhiOdd; // the pseudo fermion field for this trajectory - FermionField PhiEven; // the pseudo fermion field for this trajectory - - public: - ///////////////////////////////////////////////// - // Pass in required objects. - ///////////////////////////////////////////////// - TwoFlavourEvenOddPseudoFermionAction(FermionOperator &Op, - OperatorFunction &DS, - OperatorFunction &AS) - : FermOp(Op), - DerivativeSolver(DS), - ActionSolver(AS), - PhiEven(Op.FermionRedBlackGrid()), - PhiOdd(Op.FermionRedBlackGrid()) - {}; +NAMESPACE_BEGIN(Grid); - virtual std::string action_name(){return "TwoFlavourEvenOddPseudoFermionAction";} + +//////////////////////////////////////////////////////////////////////// +// Two flavour pseudofermion action for any EO prec dop +//////////////////////////////////////////////////////////////////////// +template +class TwoFlavourEvenOddPseudoFermionAction + : public Action { +public: + INHERIT_IMPL_TYPES(Impl); + +private: + FermionOperator &FermOp; // the basic operator + + OperatorFunction &DerivativeSolver; + OperatorFunction &ActionSolver; + + FermionField PhiOdd; // the pseudo fermion field for this trajectory + FermionField PhiEven; // the pseudo fermion field for this trajectory + +public: + ///////////////////////////////////////////////// + // Pass in required objects. + ///////////////////////////////////////////////// + TwoFlavourEvenOddPseudoFermionAction(FermionOperator &Op, + OperatorFunction &DS, + OperatorFunction &AS) + : FermOp(Op), + DerivativeSolver(DS), + ActionSolver(AS), + PhiEven(Op.FermionRedBlackGrid()), + PhiOdd(Op.FermionRedBlackGrid()) + {}; + + virtual std::string action_name(){return "TwoFlavourEvenOddPseudoFermionAction";} - virtual std::string LogParameters(){ - std::stringstream sstream; - sstream << GridLogMessage << "["< sig^2 = 0.5. + // P(phi) = e^{- phi^dag (MpcdagMpc)^-1 phi} + // Phi = McpDag eta + // P(eta) = e^{- eta^dag eta} + // + // e^{x^2/2 sig^2} => sig^2 = 0.5. - RealD scale = std::sqrt(0.5); + RealD scale = std::sqrt(0.5); - FermionField eta (FermOp.FermionGrid()); - FermionField etaOdd (FermOp.FermionRedBlackGrid()); - FermionField etaEven(FermOp.FermionRedBlackGrid()); + FermionField eta (FermOp.FermionGrid()); + FermionField etaOdd (FermOp.FermionRedBlackGrid()); + FermionField etaEven(FermOp.FermionRedBlackGrid()); - gaussian(pRNG,eta); - pickCheckerboard(Even,etaEven,eta); - pickCheckerboard(Odd,etaOdd,eta); + gaussian(pRNG,eta); + pickCheckerboard(Even,etaEven,eta); + pickCheckerboard(Odd,etaOdd,eta); - FermOp.ImportGauge(U); - SchurDifferentiableOperator PCop(FermOp); + FermOp.ImportGauge(U); + SchurDifferentiableOperator PCop(FermOp); - PCop.MpcDag(etaOdd,PhiOdd); + PCop.MpcDag(etaOdd,PhiOdd); - FermOp.MooeeDag(etaEven,PhiEven); + FermOp.MooeeDag(etaEven,PhiEven); - PhiOdd =PhiOdd*scale; - PhiEven=PhiEven*scale; + PhiOdd =PhiOdd*scale; + PhiEven=PhiEven*scale; - }; + }; - ////////////////////////////////////////////////////// - // S = phi^dag (Mdag M)^-1 phi (odd) - // + phi^dag (Mdag M)^-1 phi (even) - ////////////////////////////////////////////////////// - virtual RealD S(const GaugeField &U) { + ////////////////////////////////////////////////////// + // S = phi^dag (Mdag M)^-1 phi (odd) + // + phi^dag (Mdag M)^-1 phi (even) + ////////////////////////////////////////////////////// + virtual RealD S(const GaugeField &U) { - FermOp.ImportGauge(U); + FermOp.ImportGauge(U); - FermionField X(FermOp.FermionRedBlackGrid()); - FermionField Y(FermOp.FermionRedBlackGrid()); + FermionField X(FermOp.FermionRedBlackGrid()); + FermionField Y(FermOp.FermionRedBlackGrid()); - SchurDifferentiableOperator PCop(FermOp); + SchurDifferentiableOperator PCop(FermOp); - X=zero; - ActionSolver(PCop,PhiOdd,X); - PCop.Op(X,Y); - RealD action = norm2(Y); + X=Zero(); + ActionSolver(PCop,PhiOdd,X); + PCop.Op(X,Y); + RealD action = norm2(Y); - // The EE factorised block; normally can replace with zero if det is constant (gauge field indept) - // Only really clover term that creates this. - FermOp.MooeeInvDag(PhiEven,Y); - action = action + norm2(Y); + // The EE factorised block; normally can replace with zero if det is constant (gauge field indept) + // Only really clover term that creates this. + FermOp.MooeeInvDag(PhiEven,Y); + action = action + norm2(Y); - std::cout << GridLogMessage << "Pseudofermion EO action "< Mpc(FermOp); + SchurDifferentiableOperator Mpc(FermOp); - // Our conventions really make this UdSdU; We do not differentiate wrt Udag here. - // So must take dSdU - adj(dSdU) and left multiply by mom to get dS/dt. + // Our conventions really make this UdSdU; We do not differentiate wrt Udag here. + // So must take dSdU - adj(dSdU) and left multiply by mom to get dS/dt. - X=zero; - DerivativeSolver(Mpc,PhiOdd,X); - Mpc.Mpc(X,Y); - Mpc.MpcDeriv(tmp , Y, X ); dSdU=tmp; - Mpc.MpcDagDeriv(tmp , X, Y); dSdU=dSdU+tmp; + X=Zero(); + DerivativeSolver(Mpc,PhiOdd,X); + Mpc.Mpc(X,Y); + Mpc.MpcDeriv(tmp , Y, X ); dSdU=tmp; + Mpc.MpcDagDeriv(tmp , X, Y); dSdU=dSdU+tmp; - // Treat the EE case. (MdagM)^-1 = Minv Minvdag - // Deriv defaults to zero. - // FermOp.MooeeInvDag(PhiOdd,Y); - // FermOp.MooeeInv(Y,X); - // FermOp.MeeDeriv(tmp , Y, X,DaggerNo ); dSdU=tmp; - // FermOp.MeeDeriv(tmp , X, Y,DaggerYes); dSdU=dSdU+tmp; + // Treat the EE case. (MdagM)^-1 = Minv Minvdag + // Deriv defaults to zero. + // FermOp.MooeeInvDag(PhiOdd,Y); + // FermOp.MooeeInv(Y,X); + // FermOp.MeeDeriv(tmp , Y, X,DaggerNo ); dSdU=tmp; + // FermOp.MeeDeriv(tmp , X, Y,DaggerYes); dSdU=dSdU+tmp; - assert(FermOp.ConstEE() == 1); + assert(FermOp.ConstEE() == 1); - /* - FermOp.MooeeInvDag(PhiOdd,Y); - FermOp.MooeeInv(Y,X); - FermOp.MeeDeriv(tmp , Y, X,DaggerNo ); dSdU=tmp; - FermOp.MeeDeriv(tmp , X, Y,DaggerYes); dSdU=dSdU+tmp; - */ + /* + FermOp.MooeeInvDag(PhiOdd,Y); + FermOp.MooeeInv(Y,X); + FermOp.MeeDeriv(tmp , Y, X,DaggerNo ); dSdU=tmp; + FermOp.MeeDeriv(tmp , X, Y,DaggerYes); dSdU=dSdU+tmp; + */ - //dSdU = Ta(dSdU); + //dSdU = Ta(dSdU); - }; + }; - }; +}; - } -} +NAMESPACE_END(Grid); #endif diff --git a/Grid/qcd/action/pseudofermion/TwoFlavourEvenOddRatio.h b/Grid/qcd/action/pseudofermion/TwoFlavourEvenOddRatio.h index e9a8853a..d1d6f336 100644 --- a/Grid/qcd/action/pseudofermion/TwoFlavourEvenOddRatio.h +++ b/Grid/qcd/action/pseudofermion/TwoFlavourEvenOddRatio.h @@ -29,8 +29,7 @@ Author: paboyle #ifndef QCD_PSEUDOFERMION_TWO_FLAVOUR_EVEN_ODD_RATIO_H #define QCD_PSEUDOFERMION_TWO_FLAVOUR_EVEN_ODD_RATIO_H -namespace Grid{ - namespace QCD{ +NAMESPACE_BEGIN(Grid); /////////////////////////////////////// // Two flavour ratio @@ -118,7 +117,7 @@ namespace Grid{ // Odd det factors Mpc.MpcDag(etaOdd,PhiOdd); - tmp=zero; + tmp=Zero(); HeatbathSolver(Vpc,PhiOdd,tmp); Vpc.Mpc(tmp,PhiOdd); @@ -146,7 +145,7 @@ namespace Grid{ FermionField Y(NumOp.FermionRedBlackGrid()); Vpc.MpcDag(PhiOdd,Y); // Y= Vdag phi - X=zero; + X=Zero(); ActionSolver(Mpc,Y,X); // X= (MdagM)^-1 Vdag phi //Mpc.Mpc(X,Y); // Y= Mdag^-1 Vdag phi // Multiply by Ydag @@ -182,13 +181,13 @@ namespace Grid{ FermionField Y(NumOp.FermionRedBlackGrid()); // This assignment is necessary to be compliant with the HMC grids - GaugeField force(dSdU._grid); + GaugeField force(dSdU.Grid()); //Y=Vdag phi //X = (Mdag M)^-1 V^dag phi //Y = (Mdag)^-1 V^dag phi Vpc.MpcDag(PhiOdd,Y); // Y= Vdag phi - X=zero; + X=Zero(); DerivativeSolver(Mpc,Y,X); // X= (MdagM)^-1 Vdag phi Mpc.Mpc(X,Y); // Y= Mdag^-1 Vdag phi @@ -212,6 +211,5 @@ namespace Grid{ }; }; - } -} +NAMESPACE_END(Grid); #endif diff --git a/Grid/qcd/action/pseudofermion/TwoFlavourRatio.h b/Grid/qcd/action/pseudofermion/TwoFlavourRatio.h index bcbf9364..4d72faba 100644 --- a/Grid/qcd/action/pseudofermion/TwoFlavourRatio.h +++ b/Grid/qcd/action/pseudofermion/TwoFlavourRatio.h @@ -1,4 +1,4 @@ - /************************************************************************************* +/************************************************************************************* Grid physics library, www.github.com/paboyle/Grid @@ -25,149 +25,149 @@ Author: paboyle 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ +*************************************************************************************/ +/* END LEGAL */ #ifndef QCD_PSEUDOFERMION_TWO_FLAVOUR_RATIO_H #define QCD_PSEUDOFERMION_TWO_FLAVOUR_RATIO_H -namespace Grid{ - namespace QCD{ +NAMESPACE_BEGIN(Grid); - /////////////////////////////////////// - // Two flavour ratio - /////////////////////////////////////// - template - class TwoFlavourRatioPseudoFermionAction : public Action { - public: - INHERIT_IMPL_TYPES(Impl); +/////////////////////////////////////// +// Two flavour ratio +/////////////////////////////////////// +template +class TwoFlavourRatioPseudoFermionAction : public Action { +public: + INHERIT_IMPL_TYPES(Impl); - private: - FermionOperator & NumOp;// the basic operator - FermionOperator & DenOp;// the basic operator +private: + FermionOperator & NumOp;// the basic operator + FermionOperator & DenOp;// the basic operator - OperatorFunction &DerivativeSolver; - OperatorFunction &ActionSolver; + OperatorFunction &DerivativeSolver; + OperatorFunction &ActionSolver; - FermionField Phi; // the pseudo fermion field for this trajectory + FermionField Phi; // the pseudo fermion field for this trajectory - public: - TwoFlavourRatioPseudoFermionAction(FermionOperator &_NumOp, - FermionOperator &_DenOp, - OperatorFunction & DS, - OperatorFunction & AS - ) : NumOp(_NumOp), DenOp(_DenOp), DerivativeSolver(DS), ActionSolver(AS), Phi(_NumOp.FermionGrid()) {}; +public: + TwoFlavourRatioPseudoFermionAction(FermionOperator &_NumOp, + FermionOperator &_DenOp, + OperatorFunction & DS, + OperatorFunction & AS + ) : NumOp(_NumOp), DenOp(_DenOp), DerivativeSolver(DS), ActionSolver(AS), Phi(_NumOp.FermionGrid()) {}; - virtual std::string action_name(){return "TwoFlavourRatioPseudoFermionAction";} + virtual std::string action_name(){return "TwoFlavourRatioPseudoFermionAction";} - virtual std::string LogParameters(){ - std::stringstream sstream; - sstream << GridLogMessage << "["< sig^2 = 0.5. - // - // So eta should be of width sig = 1/sqrt(2) and must multiply by 0.707.... - // - RealD scale = std::sqrt(0.5); + // P(phi) = e^{- phi^dag V (MdagM)^-1 Vdag phi} + // + // NumOp == V + // DenOp == M + // + // Take phi = Vdag^{-1} Mdag eta ; eta = Mdag^{-1} Vdag Phi + // + // P(eta) = e^{- eta^dag eta} + // + // e^{x^2/2 sig^2} => sig^2 = 0.5. + // + // So eta should be of width sig = 1/sqrt(2) and must multiply by 0.707.... + // + RealD scale = std::sqrt(0.5); - FermionField eta(NumOp.FermionGrid()); - FermionField tmp(NumOp.FermionGrid()); + FermionField eta(NumOp.FermionGrid()); + FermionField tmp(NumOp.FermionGrid()); - gaussian(pRNG,eta); + gaussian(pRNG,eta); - NumOp.ImportGauge(U); - DenOp.ImportGauge(U); + NumOp.ImportGauge(U); + DenOp.ImportGauge(U); - // Note: this hard codes normal equations type solvers; alternate implementation needed for - // non-herm style solvers. - MdagMLinearOperator ,FermionField> MdagMOp(NumOp); + // Note: this hard codes normal equations type solvers; alternate implementation needed for + // non-herm style solvers. + MdagMLinearOperator ,FermionField> MdagMOp(NumOp); - DenOp.Mdag(eta,Phi); // Mdag eta - tmp = zero; - ActionSolver(MdagMOp,Phi,tmp); // (VdagV)^-1 Mdag eta = V^-1 Vdag^-1 Mdag eta - NumOp.M(tmp,Phi); // Vdag^-1 Mdag eta + DenOp.Mdag(eta,Phi); // Mdag eta + tmp = Zero(); + ActionSolver(MdagMOp,Phi,tmp); // (VdagV)^-1 Mdag eta = V^-1 Vdag^-1 Mdag eta + NumOp.M(tmp,Phi); // Vdag^-1 Mdag eta - Phi=Phi*scale; + Phi=Phi*scale; - }; + }; - ////////////////////////////////////////////////////// - // S = phi^dag V (Mdag M)^-1 Vdag phi - ////////////////////////////////////////////////////// - virtual RealD S(const GaugeField &U) { + ////////////////////////////////////////////////////// + // S = phi^dag V (Mdag M)^-1 Vdag phi + ////////////////////////////////////////////////////// + virtual RealD S(const GaugeField &U) { - NumOp.ImportGauge(U); - DenOp.ImportGauge(U); + NumOp.ImportGauge(U); + DenOp.ImportGauge(U); - FermionField X(NumOp.FermionGrid()); - FermionField Y(NumOp.FermionGrid()); + FermionField X(NumOp.FermionGrid()); + FermionField Y(NumOp.FermionGrid()); - MdagMLinearOperator ,FermionField> MdagMOp(DenOp); + MdagMLinearOperator ,FermionField> MdagMOp(DenOp); - NumOp.Mdag(Phi,Y); // Y= Vdag phi - X=zero; - ActionSolver(MdagMOp,Y,X); // X= (MdagM)^-1 Vdag phi - DenOp.M(X,Y); // Y= Mdag^-1 Vdag phi + NumOp.Mdag(Phi,Y); // Y= Vdag phi + X=Zero(); + ActionSolver(MdagMOp,Y,X); // X= (MdagM)^-1 Vdag phi + DenOp.M(X,Y); // Y= Mdag^-1 Vdag phi - RealD action = norm2(Y); + RealD action = norm2(Y); - return action; - }; + return action; + }; - ////////////////////////////////////////////////////// - // dS/du = phi^dag dV (Mdag M)^-1 V^dag phi - // - phi^dag V (Mdag M)^-1 [ Mdag dM + dMdag M ] (Mdag M)^-1 V^dag phi - // + phi^dag V (Mdag M)^-1 dV^dag phi - ////////////////////////////////////////////////////// - virtual void deriv(const GaugeField &U,GaugeField & dSdU) { + ////////////////////////////////////////////////////// + // dS/du = phi^dag dV (Mdag M)^-1 V^dag phi + // - phi^dag V (Mdag M)^-1 [ Mdag dM + dMdag M ] (Mdag M)^-1 V^dag phi + // + phi^dag V (Mdag M)^-1 dV^dag phi + ////////////////////////////////////////////////////// + virtual void deriv(const GaugeField &U,GaugeField & dSdU) { - NumOp.ImportGauge(U); - DenOp.ImportGauge(U); + NumOp.ImportGauge(U); + DenOp.ImportGauge(U); - MdagMLinearOperator ,FermionField> MdagMOp(DenOp); + MdagMLinearOperator ,FermionField> MdagMOp(DenOp); - FermionField X(NumOp.FermionGrid()); - FermionField Y(NumOp.FermionGrid()); + FermionField X(NumOp.FermionGrid()); + FermionField Y(NumOp.FermionGrid()); - GaugeField force(NumOp.GaugeGrid()); + GaugeField force(NumOp.GaugeGrid()); - //Y=Vdag phi - //X = (Mdag M)^-1 V^dag phi - //Y = (Mdag)^-1 V^dag phi - NumOp.Mdag(Phi,Y); // Y= Vdag phi - X=zero; - DerivativeSolver(MdagMOp,Y,X); // X= (MdagM)^-1 Vdag phi - DenOp.M(X,Y); // Y= Mdag^-1 Vdag phi + //Y=Vdag phi + //X = (Mdag M)^-1 V^dag phi + //Y = (Mdag)^-1 V^dag phi + NumOp.Mdag(Phi,Y); // Y= Vdag phi + X=Zero(); + DerivativeSolver(MdagMOp,Y,X); // X= (MdagM)^-1 Vdag phi + DenOp.M(X,Y); // Y= Mdag^-1 Vdag phi - // phi^dag V (Mdag M)^-1 dV^dag phi - NumOp.MDeriv(force , X, Phi, DaggerYes ); dSdU=force; + // phi^dag V (Mdag M)^-1 dV^dag phi + NumOp.MDeriv(force , X, Phi, DaggerYes ); dSdU=force; - // phi^dag dV (Mdag M)^-1 V^dag phi - NumOp.MDeriv(force , Phi, X ,DaggerNo ); dSdU=dSdU+force; + // phi^dag dV (Mdag M)^-1 V^dag phi + NumOp.MDeriv(force , Phi, X ,DaggerNo ); dSdU=dSdU+force; - // - phi^dag V (Mdag M)^-1 Mdag dM (Mdag M)^-1 V^dag phi - // - phi^dag V (Mdag M)^-1 dMdag M (Mdag M)^-1 V^dag phi - DenOp.MDeriv(force,Y,X,DaggerNo); dSdU=dSdU-force; - DenOp.MDeriv(force,X,Y,DaggerYes); dSdU=dSdU-force; + // - phi^dag V (Mdag M)^-1 Mdag dM (Mdag M)^-1 V^dag phi + // - phi^dag V (Mdag M)^-1 dMdag M (Mdag M)^-1 V^dag phi + DenOp.MDeriv(force,Y,X,DaggerNo); dSdU=dSdU-force; + DenOp.MDeriv(force,X,Y,DaggerYes); dSdU=dSdU-force; - dSdU *= -1.0; - //dSdU = - Ta(dSdU); + dSdU *= -1.0; + //dSdU = - Ta(dSdU); + + }; +}; + +NAMESPACE_END(Grid); - }; - }; - } -} #endif diff --git a/Grid/qcd/action/scalar/Scalar.h b/Grid/qcd/action/scalar/Scalar.h index 485a6765..44f7c450 100644 --- a/Grid/qcd/action/scalar/Scalar.h +++ b/Grid/qcd/action/scalar/Scalar.h @@ -25,26 +25,24 @@ with this program; if not, write to the Free Software Foundation, Inc., See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ -/* END LEGAL */ -#ifndef GRID_QCD_SCALAR_H -#define GRID_QCD_SCALAR_H + /* END LEGAL */ + +#pragma once #include #include #include -namespace Grid { -namespace QCD { +NAMESPACE_BEGIN(Grid); - typedef ScalarAction ScalarActionR; - typedef ScalarAction ScalarActionF; - typedef ScalarAction ScalarActionD; +typedef ScalarAction ScalarActionR; +typedef ScalarAction ScalarActionF; +typedef ScalarAction ScalarActionD; - template using ScalarAdjActionR = ScalarInteractionAction, Dimensions>; - template using ScalarAdjActionF = ScalarInteractionAction, Dimensions>; - template using ScalarAdjActionD = ScalarInteractionAction, Dimensions>; +template using ScalarAdjActionR = ScalarInteractionAction, Dimensions>; +template using ScalarAdjActionF = ScalarInteractionAction, Dimensions>; +template using ScalarAdjActionD = ScalarInteractionAction, Dimensions>; -} -} +NAMESPACE_END(Grid); + -#endif // GRID_QCD_SCALAR_H diff --git a/Grid/qcd/action/scalar/ScalarAction.h b/Grid/qcd/action/scalar/ScalarAction.h index 2c82d2e3..34fc4fac 100644 --- a/Grid/qcd/action/scalar/ScalarAction.h +++ b/Grid/qcd/action/scalar/ScalarAction.h @@ -27,57 +27,54 @@ See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ +*************************************************************************************/ /* END LEGAL */ #ifndef SCALAR_ACTION_H #define SCALAR_ACTION_H -namespace Grid { - // FIXME drop the QCD namespace everywhere here +NAMESPACE_BEGIN(Grid); template -class ScalarAction : public QCD::Action { - public: - INHERIT_FIELD_TYPES(Impl); +class ScalarAction : public Action { +public: + INHERIT_FIELD_TYPES(Impl); - private: - RealD mass_square; - RealD lambda; +private: + RealD mass_square; + RealD lambda; - public: - ScalarAction(RealD ms, RealD l) : mass_square(ms), lambda(l) {} +public: + ScalarAction(RealD ms, RealD l) : mass_square(ms), lambda(l) {} - virtual std::string LogParameters() { - std::stringstream sstream; - sstream << GridLogMessage << "[ScalarAction] lambda : " << lambda << std::endl; - sstream << GridLogMessage << "[ScalarAction] mass_square : " << mass_square << std::endl; - return sstream.str(); - } - virtual std::string action_name() {return "ScalarAction";} + virtual std::string LogParameters() { + std::stringstream sstream; + sstream << GridLogMessage << "[ScalarAction] lambda : " << lambda << std::endl; + sstream << GridLogMessage << "[ScalarAction] mass_square : " << mass_square << std::endl; + return sstream.str(); + } + virtual std::string action_name() {return "ScalarAction";} - virtual void refresh(const Field &U, GridParallelRNG &pRNG) {} // noop as no pseudoferms + virtual void refresh(const Field &U, GridParallelRNG &pRNG) {} // noop as no pseudoferms - virtual RealD S(const Field &p) { - return (mass_square * 0.5 + QCD::Nd) * ScalarObs::sumphisquared(p) + - (lambda / 24.) * ScalarObs::sumphifourth(p) + - ScalarObs::sumphider(p); - }; + virtual RealD S(const Field &p) { + return (mass_square * 0.5 + Nd) * ScalarObs::sumphisquared(p) + + (lambda / 24.) * ScalarObs::sumphifourth(p) + + ScalarObs::sumphider(p); + }; - virtual void deriv(const Field &p, - Field &force) { - Field tmp(p._grid); - Field p2(p._grid); - ScalarObs::phisquared(p2, p); - tmp = -(Cshift(p, 0, -1) + Cshift(p, 0, 1)); - for (int mu = 1; mu < QCD::Nd; mu++) tmp -= Cshift(p, mu, -1) + Cshift(p, mu, 1); + virtual void deriv(const Field &p, + Field &force) { + Field tmp(p.Grid()); + Field p2(p.Grid()); + ScalarObs::phisquared(p2, p); + tmp = -(Cshift(p, 0, -1) + Cshift(p, 0, 1)); + for (int mu = 1; mu < Nd; mu++) tmp -= Cshift(p, mu, -1) + Cshift(p, mu, 1); - force =+(mass_square + 2. * QCD::Nd) * p + (lambda / 6.) * p2 * p + tmp; - } + force =+(mass_square + 2. * Nd) * p + (lambda / 6.) * p2 * p + tmp; + } }; - - -} // namespace Grid +NAMESPACE_END(Grid); #endif // SCALAR_ACTION_H diff --git a/Grid/qcd/action/scalar/ScalarImpl.h b/Grid/qcd/action/scalar/ScalarImpl.h index 55f5049d..febb315e 100644 --- a/Grid/qcd/action/scalar/ScalarImpl.h +++ b/Grid/qcd/action/scalar/ScalarImpl.h @@ -1,93 +1,90 @@ -#ifndef SCALAR_IMPL -#define SCALAR_IMPL +#pragma once - -namespace Grid { - //namespace QCD { +NAMESPACE_BEGIN(Grid); template class ScalarImplTypes { - public: - typedef S Simd; +public: + typedef S Simd; - template - using iImplField = iScalar > >; + template + using iImplField = iScalar > >; - typedef iImplField SiteField; - typedef SiteField SitePropagator; - typedef SiteField SiteComplex; + typedef iImplField SiteField; + typedef SiteField SitePropagator; + typedef SiteField SiteComplex; + + typedef Lattice Field; + typedef Field ComplexField; + typedef Field FermionField; + typedef Field PropagatorField; + + static inline void generate_momenta(Field& P, GridParallelRNG& pRNG){ + gaussian(pRNG, P); + } - typedef Lattice Field; - typedef Field ComplexField; - typedef Field FermionField; - typedef Field PropagatorField; + static inline Field projectForce(Field& P){return P;} - static inline void generate_momenta(Field& P, GridParallelRNG& pRNG){ - gaussian(pRNG, P); - } + static inline void update_field(Field& P, Field& U, double ep) { + U += P*ep; + } - static inline Field projectForce(Field& P){return P;} + static inline RealD FieldSquareNorm(Field& U) { + return (- sum(trace(U*U))/2.0); + } - static inline void update_field(Field& P, Field& U, double ep) { - U += P*ep; - } + static inline void HotConfiguration(GridParallelRNG &pRNG, Field &U) { + gaussian(pRNG, U); + } - static inline RealD FieldSquareNorm(Field& U) { - return (- sum(trace(U*U))/2.0); - } + static inline void TepidConfiguration(GridParallelRNG &pRNG, Field &U) { + gaussian(pRNG, U); + } - static inline void HotConfiguration(GridParallelRNG &pRNG, Field &U) { - gaussian(pRNG, U); - } - - static inline void TepidConfiguration(GridParallelRNG &pRNG, Field &U) { - gaussian(pRNG, U); - } - - static inline void ColdConfiguration(GridParallelRNG &pRNG, Field &U) { - U = 1.0; - } - - static void MomentumSpacePropagator(Field &out, RealD m) - { - GridBase *grid = out._grid; - Field kmu(grid), one(grid); - const unsigned int nd = grid->_ndimension; - std::vector &l = grid->_fdimensions; - - one = Complex(1.0,0.0); - out = m*m; - for(int mu = 0; mu < nd; mu++) + static inline void ColdConfiguration(GridParallelRNG &pRNG, Field &U) { + U = 1.0; + } + + static void MomentumSpacePropagator(Field &out, RealD m) + { + GridBase *grid = out.Grid(); + Field kmu(grid), one(grid); + const unsigned int nd = grid->_ndimension; + Coordinate &l = grid->_fdimensions; + + one = Complex(1.0,0.0); + out = m*m; + for(int mu = 0; mu < nd; mu++) { Real twoPiL = M_PI*2./l[mu]; - + LatticeCoordinate(kmu,mu); kmu = 2.*sin(.5*twoPiL*kmu); out = out + kmu*kmu; } - out = one/out; - } - - static void FreePropagator(const Field &in, Field &out, - const Field &momKernel) - { - FFT fft((GridCartesian *)in._grid); - Field inFT(in._grid); - - fft.FFT_all_dim(inFT, in, FFT::forward); - inFT = inFT*momKernel; - fft.FFT_all_dim(out, inFT, FFT::backward); - } - - static void FreePropagator(const Field &in, Field &out, RealD m) - { - Field momKernel(in._grid); - - MomentumSpacePropagator(momKernel, m); - FreePropagator(in, out, momKernel); - } - - }; + out = one/out; + } + + static void FreePropagator(const Field &in, Field &out, + const Field &momKernel) + { + FFT fft((GridCartesian *)in.Grid()); + Field inFT(in.Grid()); + + fft.FFT_all_dim(inFT, in, FFT::forward); + inFT = inFT*momKernel; + fft.FFT_all_dim(out, inFT, FFT::backward); + } + + static void FreePropagator(const Field &in, Field &out, RealD m) + { + Field momKernel(in.Grid()); + + MomentumSpacePropagator(momKernel, m); + FreePropagator(in, out, momKernel); + } + +}; #ifdef USE_FFT_ACCELERATION #ifndef FFT_MASS @@ -95,30 +92,30 @@ class ScalarImplTypes { #endif #endif - template - class ScalarAdjMatrixImplTypes { - public: - typedef S Simd; - typedef QCD::SU Group; +template +class ScalarAdjMatrixImplTypes { +public: + typedef S Simd; + typedef SU Group; + + template + using iImplField = iScalar>>; + template + using iImplComplex = iScalar>>; - template - using iImplField = iScalar>>; - template - using iImplComplex = iScalar>>; - - typedef iImplField SiteField; - typedef SiteField SitePropagator; - typedef iImplComplex SiteComplex; - - typedef Lattice Field; - typedef Lattice ComplexField; - typedef Field FermionField; - typedef Field PropagatorField; + typedef iImplField SiteField; + typedef SiteField SitePropagator; + typedef iImplComplex SiteComplex; + + typedef Lattice Field; + typedef Lattice ComplexField; + typedef Field FermionField; + typedef Field PropagatorField; static void MomentaSquare(ComplexField &out) { - GridBase *grid = out._grid; - const std::vector &l = grid->FullDimensions(); + GridBase *grid = out.Grid(); + const Coordinate &l = grid->FullDimensions(); ComplexField kmu(grid); for (int mu = 0; mu < grid->Nd(); mu++) @@ -132,7 +129,7 @@ class ScalarImplTypes { static void MomentumSpacePropagator(ComplexField &out, RealD m) { - GridBase *grid = out._grid; + GridBase *grid = out.Grid(); ComplexField one(grid); one = Complex(1.0, 0.0); out = m * m; @@ -143,16 +140,16 @@ class ScalarImplTypes { static inline void generate_momenta(Field &P, GridParallelRNG &pRNG) { #ifndef USE_FFT_ACCELERATION - Group::GaussianFundamentalLieAlgebraMatrix(pRNG, P); + Group::GaussianFundamentalLieAlgebraMatrix(pRNG, P); #else - Field Pgaussian(P._grid), Pp(P._grid); - ComplexField p2(P._grid); p2 = zero; + Field Pgaussian(P.Grid()), Pp(P.Grid()); + ComplexField p2(P.Grid()); p2 = zero; RealD M = FFT_MASS; Group::GaussianFundamentalLieAlgebraMatrix(pRNG, Pgaussian); - FFT theFFT((GridCartesian*)P._grid); + FFT theFFT((GridCartesian*)P.Grid()); theFFT.FFT_all_dim(Pp, Pgaussian, FFT::forward); MomentaSquare(p2); p2 += M * M; @@ -161,15 +158,15 @@ class ScalarImplTypes { theFFT.FFT_all_dim(P, Pp, FFT::backward); #endif //USE_FFT_ACCELERATION - } + } - static inline Field projectForce(Field& P) {return P;} + static inline Field projectForce(Field& P) {return P;} static inline void update_field(Field &P, Field &U, double ep) { #ifndef USE_FFT_ACCELERATION double t0=usecond(); - U += P * ep; + U += P*ep; double t1=usecond(); double total_time = (t1-t0)/1e6; std::cout << GridLogIntegrator << "Total time for updating field (s) : " << total_time << std::endl; @@ -180,11 +177,11 @@ class ScalarImplTypes { // Transform back -> P'(x) // U += P'(x)*ep - Field Pp(U._grid), P_FFT(U._grid); - static ComplexField p2(U._grid); + Field Pp(U.Grid()), P_FFT(U.Grid()); + static ComplexField p2(U.Grid()); RealD M = FFT_MASS; - FFT theFFT((GridCartesian*)U._grid); + FFT theFFT((GridCartesian*)U.Grid()); theFFT.FFT_all_dim(Pp, P, FFT::forward); static bool first_call = true; @@ -193,7 +190,7 @@ class ScalarImplTypes { // avoid recomputing MomentumSpacePropagator(p2, M); first_call = false; - } + } Pp *= p2; theFFT.FFT_all_dim(P_FFT, Pp, FFT::backward); U += P_FFT * ep; @@ -204,60 +201,55 @@ class ScalarImplTypes { static inline RealD FieldSquareNorm(Field &U) { #ifndef USE_FFT_ACCELERATION - return (TensorRemove(sum(trace(U * U))).real()); + return (TensorRemove(sum(trace(U*U))).real()); #else // In case of Fourier acceleration we have to: // compute U(p)*U(p)/(M^2+p^2)) Parseval theorem // 1 FFT needed U(x) -> U(p) // M to be passed - FFT theFFT((GridCartesian*)U._grid); - Field Up(U._grid); + FFT theFFT((GridCartesian*)U.Grid()); + Field Up(U.Grid()); theFFT.FFT_all_dim(Up, U, FFT::forward); RealD M = FFT_MASS; - ComplexField p2(U._grid); + ComplexField p2(U.Grid()); MomentumSpacePropagator(p2, M); Field Up2 = Up * p2; // from the definition of the DFT we need to divide by the volume - return (-TensorRemove(sum(trace(adj(Up) * Up2))).real() / U._grid->gSites()); + return (-TensorRemove(sum(trace(adj(Up) * Up2))).real() / U.Grid()->gSites()); #endif //USE_FFT_ACCELERATION - } + } - static inline void HotConfiguration(GridParallelRNG &pRNG, Field &U) { - Group::GaussianFundamentalLieAlgebraMatrix(pRNG, U); - } + static inline void HotConfiguration(GridParallelRNG &pRNG, Field &U) { + Group::GaussianFundamentalLieAlgebraMatrix(pRNG, U); + } - static inline void TepidConfiguration(GridParallelRNG &pRNG, Field &U) { - Group::GaussianFundamentalLieAlgebraMatrix(pRNG, U, 0.01); - } + static inline void TepidConfiguration(GridParallelRNG &pRNG, Field &U) { + Group::GaussianFundamentalLieAlgebraMatrix(pRNG, U, 0.01); + } - static inline void ColdConfiguration(GridParallelRNG &pRNG, Field &U) { - U = zero; - } + static inline void ColdConfiguration(GridParallelRNG &pRNG, Field &U) { + U = Zero(); + } - }; +}; +typedef ScalarImplTypes ScalarImplR; +typedef ScalarImplTypes ScalarImplF; +typedef ScalarImplTypes ScalarImplD; +typedef ScalarImplTypes ScalarImplCR; +typedef ScalarImplTypes ScalarImplCF; +typedef ScalarImplTypes ScalarImplCD; + +// Hardcoding here the size of the matrices +typedef ScalarAdjMatrixImplTypes ScalarAdjImplR; +typedef ScalarAdjMatrixImplTypes ScalarAdjImplF; +typedef ScalarAdjMatrixImplTypes ScalarAdjImplD; +template using ScalarNxNAdjImplR = ScalarAdjMatrixImplTypes; +template using ScalarNxNAdjImplF = ScalarAdjMatrixImplTypes; +template using ScalarNxNAdjImplD = ScalarAdjMatrixImplTypes; +NAMESPACE_END(Grid); - typedef ScalarImplTypes ScalarImplR; - typedef ScalarImplTypes ScalarImplF; - typedef ScalarImplTypes ScalarImplD; - typedef ScalarImplTypes ScalarImplCR; - typedef ScalarImplTypes ScalarImplCF; - typedef ScalarImplTypes ScalarImplCD; - - // Hardcoding here the size of the matrices - typedef ScalarAdjMatrixImplTypes ScalarAdjImplR; - typedef ScalarAdjMatrixImplTypes ScalarAdjImplF; - typedef ScalarAdjMatrixImplTypes ScalarAdjImplD; - - template using ScalarNxNAdjImplR = ScalarAdjMatrixImplTypes; - template using ScalarNxNAdjImplF = ScalarAdjMatrixImplTypes; - template using ScalarNxNAdjImplD = ScalarAdjMatrixImplTypes; - - //} -} - -#endif diff --git a/Grid/qcd/action/scalar/ScalarInteractionAction.h b/Grid/qcd/action/scalar/ScalarInteractionAction.h index 8738b647..3be84480 100644 --- a/Grid/qcd/action/scalar/ScalarInteractionAction.h +++ b/Grid/qcd/action/scalar/ScalarInteractionAction.h @@ -27,18 +27,15 @@ directory *************************************************************************************/ /* END LEGAL */ -#ifndef SCALAR_INT_ACTION_H -#define SCALAR_INT_ACTION_H +#pragma once // Note: this action can completely absorb the ScalarAction for real float fields // use the scalarObjs to generalise the structure -namespace Grid -{ -// FIXME drop the QCD namespace everywhere here +NAMESPACE_BEGIN(Grid); template -class ScalarInteractionAction : public QCD::Action +class ScalarInteractionAction : public Action { public: INHERIT_FIELD_TYPES(Impl); @@ -50,7 +47,7 @@ private: const unsigned int N = Impl::Group::Dimension; typedef typename Field::vector_object vobj; - typedef CartesianStencil Stencil; + typedef CartesianStencil Stencil; SimpleCompressor compressor; int npoint = 2 * Ndim; @@ -84,16 +81,20 @@ public: virtual RealD S(const Field &p) { - assert(p._grid->Nd() == Ndim); - static Stencil phiStencil(p._grid, npoint, 0, directions, displacements); + assert(p.Grid()->Nd() == Ndim); + static Stencil phiStencil(p.Grid(), npoint, 0, directions, displacements,0); phiStencil.HaloExchange(p, compressor); - Field action(p._grid), pshift(p._grid), phisquared(p._grid); + Field action(p.Grid()), pshift(p.Grid()), phisquared(p.Grid()); phisquared = p * p; action = (2.0 * Ndim + mass_square) * phisquared - lambda * phisquared * phisquared; + + + auto p_v = p.View(); + auto action_v = action.View(); for (int mu = 0; mu < Ndim; mu++) { // pshift = Cshift(p, mu, +1); // not efficient, implement with stencils - parallel_for(int i = 0; i < p._grid->oSites(); i++) + thread_for(i, p.Grid()->oSites(), { int permute_type; StencilEntry *SE; @@ -101,25 +102,22 @@ public: const vobj *temp, *t_p; SE = phiStencil.GetEntry(permute_type, mu, i); - t_p = &p._odata[i]; + t_p = &p_v[i]; if (SE->_is_local) { - temp = &p._odata[SE->_offset]; - if (SE->_permute) - { + temp = &p_v[SE->_offset]; + if (SE->_permute) { permute(temp2, *temp, permute_type); - action._odata[i] -= temp2 * (*t_p) + (*t_p) * temp2; - } - else - { - action._odata[i] -= (*temp) * (*t_p) + (*t_p) * (*temp); + action_v[i] -= temp2 * (*t_p) + (*t_p) * temp2; + } else { + action_v[i] -= (*temp) * (*t_p) + (*t_p) * (*temp); } } else { - action._odata[i] -= phiStencil.CommBuf()[SE->_offset] * (*t_p) + (*t_p) * phiStencil.CommBuf()[SE->_offset]; + action_v[i] -= phiStencil.CommBuf()[SE->_offset] * (*t_p) + (*t_p) * phiStencil.CommBuf()[SE->_offset]; } - } + }); // action -= pshift*p + p*pshift; } // NB the trace in the algebra is normalised to 1/2 @@ -130,17 +128,17 @@ public: virtual void deriv(const Field &p, Field &force) { double t0 = usecond(); - assert(p._grid->Nd() == Ndim); + assert(p.Grid()->Nd() == Ndim); force = (2. * Ndim + mass_square) * p - 2. * lambda * p * p * p; double interm_t = usecond(); // move this outside - static Stencil phiStencil(p._grid, npoint, 0, directions, displacements); + static Stencil phiStencil(p.Grid(), npoint, 0, directions, displacements,0); phiStencil.HaloExchange(p, compressor); double halo_t = usecond(); - int chunk = 128; - //for (int mu = 0; mu < QCD::Nd; mu++) force -= Cshift(p, mu, -1) + Cshift(p, mu, 1); + // int chunk = 128; + //for (int mu = 0; mu < Nd; mu++) force -= Cshift(p, mu, -1) + Cshift(p, mu, 1); // inverting the order of the loops slows down the code(! g++ 7) // cannot try to reduce the number of force writes by factor npoint... @@ -148,61 +146,53 @@ public: for (int point = 0; point < npoint; point++) { -#pragma omp parallel -{ - int permute_type; - StencilEntry *SE; - const vobj *temp; + auto p_v = p.View(); + auto force_v = force.View(); + + int permute_type; + StencilEntry *SE; + const vobj *temp; -#pragma omp for schedule(static, chunk) - for (int i = 0; i < p._grid->oSites(); i++) + thread_for(i, p.Grid()->oSites(), { - SE = phiStencil.GetEntry(permute_type, point, i); - // prefetch next p? - - if (SE->_is_local) - { - temp = &p._odata[SE->_offset]; - - if (SE->_permute) - { + SE = phiStencil.GetEntry(permute_type, point, i); + // prefetch next p? + + if (SE->_is_local) { + temp = &p_v[SE->_offset]; + + if (SE->_permute) { vobj temp2; permute(temp2, *temp, permute_type); - force._odata[i] -= temp2; - } - else - { - force._odata[i] -= *temp; // slow part. Dominated by this read/write (BW) + force_v[i] -= temp2; + } else { + force_v[i] -= *temp; // slow part. Dominated by this read/write (BW) } + } else { + force_v[i] -= phiStencil.CommBuf()[SE->_offset]; } - else - { - force._odata[i] -= phiStencil.CommBuf()[SE->_offset]; - } - } - + }); } - } - force *= N / g; + force *= N / g; - double t1 = usecond(); - double total_time = (t1 - t0) / 1e6; - double interm_time = (interm_t - t0) / 1e6; - double halo_time = (halo_t - interm_t) / 1e6; - double stencil_time = (t1 - halo_t) / 1e6; - std::cout << GridLogIntegrator << "Total time for force computation (s) : " << total_time << std::endl; - std::cout << GridLogIntegrator << "Intermediate time for force computation (s): " << interm_time << std::endl; - std::cout << GridLogIntegrator << "Halo time in force computation (s) : " << halo_time << std::endl; - std::cout << GridLogIntegrator << "Stencil time in force computation (s) : " << stencil_time << std::endl; - double flops = p._grid->gSites() * (14 * N * N * N + 18 * N * N + 2); - double flops_no_stencil = p._grid->gSites() * (14 * N * N * N + 6 * N * N + 2); - double Gflops = flops / (total_time * 1e9); - double Gflops_no_stencil = flops_no_stencil / (interm_time * 1e9); - std::cout << GridLogIntegrator << "Flops: " << flops << " - Gflop/s : " << Gflops << std::endl; - std::cout << GridLogIntegrator << "Flops NS: " << flops_no_stencil << " - Gflop/s NS: " << Gflops_no_stencil << std::endl; -} + double t1 = usecond(); + double total_time = (t1 - t0) / 1e6; + double interm_time = (interm_t - t0) / 1e6; + double halo_time = (halo_t - interm_t) / 1e6; + double stencil_time = (t1 - halo_t) / 1e6; + std::cout << GridLogIntegrator << "Total time for force computation (s) : " << total_time << std::endl; + std::cout << GridLogIntegrator << "Intermediate time for force computation (s): " << interm_time << std::endl; + std::cout << GridLogIntegrator << "Halo time in force computation (s) : " << halo_time << std::endl; + std::cout << GridLogIntegrator << "Stencil time in force computation (s) : " << stencil_time << std::endl; + double flops = p.Grid()->gSites() * (14 * N * N * N + 18 * N * N + 2); + double flops_no_stencil = p.Grid()->gSites() * (14 * N * N * N + 6 * N * N + 2); + double Gflops = flops / (total_time * 1e9); + double Gflops_no_stencil = flops_no_stencil / (interm_time * 1e9); + std::cout << GridLogIntegrator << "Flops: " << flops << " - Gflop/s : " << Gflops << std::endl; + std::cout << GridLogIntegrator << "Flops NS: " << flops_no_stencil << " - Gflop/s NS: " << Gflops_no_stencil << std::endl; + } }; -} // namespace Grid +NAMESPACE_END(Grid); + -#endif // SCALAR_INT_ACTION_H diff --git a/Grid/qcd/hmc/GenericHMCrunner.h b/Grid/qcd/hmc/GenericHMCrunner.h index 26fec3d5..c2443dd0 100644 --- a/Grid/qcd/hmc/GenericHMCrunner.h +++ b/Grid/qcd/hmc/GenericHMCrunner.h @@ -25,16 +25,14 @@ with this program; if not, write to the Free Software Foundation, Inc., See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ -/* END LEGAL */ +*************************************************************************************/ + /* END LEGAL */ #ifndef GRID_GENERIC_HMC_RUNNER #define GRID_GENERIC_HMC_RUNNER #include -namespace Grid { -namespace QCD { - +NAMESPACE_BEGIN(Grid); // very ugly here but possibly resolved if we had a base Reader class template < class ReaderClass > @@ -44,12 +42,11 @@ public: virtual void initialize(ReaderClass& ) = 0; }; - template class Integrator, class RepresentationsPolicy = NoHirep, class ReaderClass = XmlReader> class HMCWrapperTemplate: public HMCRunnerBase { - public: +public: INHERIT_FIELD_TYPES(Implementation); typedef Implementation ImplPolicy; // visible from outside template > @@ -88,8 +85,8 @@ class HMCWrapperTemplate: public HMCRunnerBase { arg != "CheckpointStart") { std::cout << GridLogError << "Unrecognized option in --StartingType\n"; std::cout - << GridLogError - << "Valid [HotStart, ColdStart, TepidStart, CheckpointStart]\n"; + << GridLogError + << "Valid [HotStart, ColdStart, TepidStart, CheckpointStart]\n"; exit(1); } Parameters.StartingType = arg; @@ -134,7 +131,7 @@ class HMCWrapperTemplate: public HMCRunnerBase { ////////////////////////////////////////////////////////////////// - private: +private: template void Runner(SmearingPolicy &Smearing) { auto UGrid = Resources.GetCartesian(); @@ -160,8 +157,8 @@ class HMCWrapperTemplate: public HMCRunnerBase { } else if (Parameters.StartingType == "CheckpointStart") { // CheckpointRestart Resources.GetCheckPointer()->CheckpointRestore(Parameters.StartTrajectory, U, - Resources.GetSerialRNG(), - Resources.GetParallelRNG()); + Resources.GetSerialRNG(), + Resources.GetParallelRNG()); } Smearing.set_Field(U); @@ -198,22 +195,21 @@ using ConjugateHMCRunnerD = HMCWrapperTemplate; template class Integrator> using GenericHMCRunnerHirep = - HMCWrapperTemplate; + HMCWrapperTemplate; template class Integrator> using GenericHMCRunnerTemplate = HMCWrapperTemplate; typedef HMCWrapperTemplate - ScalarGenericHMCRunner; +ScalarGenericHMCRunner; typedef HMCWrapperTemplate - ScalarAdjGenericHMCRunner; +ScalarAdjGenericHMCRunner; template using ScalarNxNAdjGenericHMCRunner = HMCWrapperTemplate < ScalarNxNAdjImplR, ForceGradient, ScalarNxNMatrixFields >; -} // namespace QCD -} // namespace Grid +NAMESPACE_END(Grid); #endif // GRID_GENERIC_HMC_RUNNER diff --git a/Grid/qcd/hmc/HMC.h b/Grid/qcd/hmc/HMC.h index 5688bb24..0f933204 100644 --- a/Grid/qcd/hmc/HMC.h +++ b/Grid/qcd/hmc/HMC.h @@ -28,30 +28,26 @@ with this program; if not, write to the Free Software Foundation, Inc., See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ -/* END LEGAL */ -//-------------------------------------------------------------------- -/*! @file HMC.h - * @brief Classes for Hybrid Monte Carlo update - * - * @author Guido Cossu - */ -//-------------------------------------------------------------------- -#ifndef HMC_INCLUDED -#define HMC_INCLUDED + /* END LEGAL */ + //-------------------------------------------------------------------- + /*! @file HMC.h + * @brief Classes for Hybrid Monte Carlo update + * + * @author Guido Cossu + */ + //-------------------------------------------------------------------- +#pragma once #include #include - - #include #include -namespace Grid { -namespace QCD { +NAMESPACE_BEGIN(Grid); struct HMCparameters: Serializable { - GRID_SERIALIZABLE_CLASS_MEMBERS(HMCparameters, + GRID_SERIALIZABLE_CLASS_MEMBERS(HMCparameters, Integer, StartTrajectory, Integer, Trajectories, /* @brief Number of sweeps in this run */ bool, MetropolisTest, @@ -71,13 +67,13 @@ struct HMCparameters: Serializable { template HMCparameters(Reader & TheReader){ - initialize(TheReader); + initialize(TheReader); } template < class ReaderClass > void initialize(Reader &TheReader){ - std::cout << GridLogMessage << "Reading HMC\n"; - read(TheReader, "HMC", *this); + std::cout << GridLogMessage << "Reading HMC\n"; + read(TheReader, "HMC", *this); } @@ -94,20 +90,20 @@ struct HMCparameters: Serializable { template class HybridMonteCarlo { - private: +private: const HMCparameters Params; typedef typename IntegratorType::Field Field; typedef std::vector< HmcObservable * > ObsListType; - //pass these from the resource manager + //pass these from the resource manager GridSerialRNG &sRNG; GridParallelRNG &pRNG; Field &Ucur; IntegratorType &TheIntegrator; - ObsListType Observables; + ObsListType Observables; ///////////////////////////////////////////////////////// // Metropolis step @@ -179,7 +175,7 @@ class HybridMonteCarlo { - public: +public: ///////////////////////////////////////// // Constructor ///////////////////////////////////////// @@ -192,7 +188,7 @@ class HybridMonteCarlo { void evolve(void) { Real DeltaH; - Field Ucopy(Ucur._grid); + Field Ucopy(Ucur.Grid()); Params.print_parameters(); TheIntegrator.print_actions(); @@ -238,14 +234,9 @@ class HybridMonteCarlo { }; - -} // QCD -} // Grid - - +NAMESPACE_END(Grid); // april 11 2017 merge, Guido, commenting out //#include //#include //#include -#endif diff --git a/Grid/qcd/hmc/HMCModules.h b/Grid/qcd/hmc/HMCModules.h index 8b9b0479..4c61a006 100644 --- a/Grid/qcd/hmc/HMCModules.h +++ b/Grid/qcd/hmc/HMCModules.h @@ -26,21 +26,19 @@ with this program; if not, write to the Free Software Foundation, Inc., See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ -/* END LEGAL */ + /* END LEGAL */ #ifndef GRID_HMC_MODULES #define GRID_HMC_MODULES - #include "HMC_GridModules.h" -namespace Grid { -namespace QCD { +NAMESPACE_BEGIN(Grid); //////////////////////////////////////////////////////////////////// struct RNGModuleParameters: Serializable { GRID_SERIALIZABLE_CLASS_MEMBERS(RNGModuleParameters, - std::string, serial_seeds, - std::string, parallel_seeds,); + std::string, serial_seeds, + std::string, parallel_seeds,); std::vector getSerialSeeds(){return strToVec(serial_seeds);} std::vector getParallelSeeds(){return strToVec(parallel_seeds);} @@ -56,9 +54,9 @@ struct RNGModuleParameters: Serializable { // Random number generators module class RNGModule{ - GridSerialRNG sRNG_; - std::unique_ptr pRNG_; - RNGModuleParameters Params_; + GridSerialRNG sRNG_; + std::unique_ptr pRNG_; + RNGModuleParameters Params_; public: @@ -93,19 +91,16 @@ public: /// Smearing module template class SmearingModule{ - virtual void get_smearing(); +virtual void get_smearing(); }; template class StoutSmearingModule: public SmearingModule{ - SmearedConfiguration SmearingPolicy; +SmearedConfiguration SmearingPolicy; }; */ - - -} // namespace QCD -} // namespace Grid +NAMESPACE_END(Grid); #endif // GRID_HMC_MODULES diff --git a/Grid/qcd/hmc/HMCResourceManager.h b/Grid/qcd/hmc/HMCResourceManager.h index fcfaeaed..783e4890 100644 --- a/Grid/qcd/hmc/HMCResourceManager.h +++ b/Grid/qcd/hmc/HMCResourceManager.h @@ -25,8 +25,8 @@ with this program; if not, write to the Free Software Foundation, Inc., See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ -/* END LEGAL */ +*************************************************************************************/ + /* END LEGAL */ #ifndef HMC_RESOURCE_MANAGER_H #define HMC_RESOURCE_MANAGER_H @@ -64,15 +64,14 @@ with this program; if not, write to the Free Software Foundation, Inc., } \ } -namespace Grid { -namespace QCD { +NAMESPACE_BEGIN(Grid); // HMC Resource manager template class HMCResourceManager { - typedef HMCModuleBase< QCD::BaseHmcCheckpointer > CheckpointerBaseModule; - typedef HMCModuleBase< QCD::HmcObservable > ObservableBaseModule; - typedef ActionModuleBase< QCD::Action, GridModule > ActionBaseModule; + typedef HMCModuleBase< BaseHmcCheckpointer > CheckpointerBaseModule; + typedef HMCModuleBase< HmcObservable > ObservableBaseModule; + typedef ActionModuleBase< Action, GridModule > ActionBaseModule; // Named storage for grid pairs (std + red-black) std::unordered_map Grids; @@ -101,7 +100,7 @@ class HMCResourceManager { } - public: +public: HMCResourceManager() : have_RNG(false), have_CheckPointer(false) {} template @@ -151,13 +150,13 @@ class HMCResourceManager { } if(!Read.push("Level")){// push must check if the node exist - std::cout << "Level not found" << std::endl; + std::cout << "Level not found" << std::endl; exit(1); } do - { - fill_ActionsLevel(Read); - } + { + fill_ActionsLevel(Read); + } while(Read.push("Level")); Read.pop(); @@ -268,10 +267,10 @@ class HMCResourceManager { RegisterLoadCheckPointerFunction(Binary); RegisterLoadCheckPointerFunction(Nersc); - #ifdef HAVE_LIME +#ifdef HAVE_LIME RegisterLoadCheckPointerFunction(ILDG); RegisterLoadCheckPointerMetadataFunction(Scidac); - #endif +#endif //////////////////////////////////////////////////////// // Observables @@ -297,7 +296,7 @@ class HMCResourceManager { private: - // this private + // this private template void fill_ActionsLevel(ReaderClass &Read){ // Actions set @@ -322,7 +321,7 @@ private: }; -} -} + +NAMESPACE_END(Grid); #endif // HMC_RESOURCE_MANAGER_H diff --git a/Grid/qcd/hmc/HMCRunnerModule.h b/Grid/qcd/hmc/HMCRunnerModule.h index f595d996..fa714a1c 100644 --- a/Grid/qcd/hmc/HMCRunnerModule.h +++ b/Grid/qcd/hmc/HMCRunnerModule.h @@ -26,28 +26,28 @@ with this program; if not, write to the Free Software Foundation, Inc., See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ -/* END LEGAL */ + /* END LEGAL */ #ifndef HMC_RUNNER_MODULE #define HMC_RUNNER_MODULE -namespace Grid { +NAMESPACE_BEGIN(Grid); // the reader class is necessary here for the automatic initialization of the resources // if we had a virtual reader would have been unecessary template class HMCModule - : public Parametrized< QCD::HMCparameters >, - public HMCModuleBase< QCD::HMCRunnerBase > { - public: - typedef HMCModuleBase< QCD::HMCRunnerBase > Base; + : public Parametrized< HMCparameters >, + public HMCModuleBase< HMCRunnerBase > { +public: + typedef HMCModuleBase< HMCRunnerBase > Base; typedef typename Base::Product Product; std::unique_ptr HMCPtr; - HMCModule(QCD::HMCparameters Par) : Parametrized(Par) {} + HMCModule(HMCparameters Par) : Parametrized(Par) {} template - HMCModule(Reader& R) : Parametrized(R, "HMC"){}; + HMCModule(Reader& R) : Parametrized(R, "HMC"){}; Product* getPtr() { if (!HMCPtr) initialize(); @@ -55,17 +55,17 @@ class HMCModule return HMCPtr.get(); } - private: +private: virtual void initialize() = 0; }; // Factory template class HMCRunnerModuleFactory - : public Factory < HMCModuleBase< QCD::HMCRunnerBase > , Reader > { - public: - typedef Reader TheReader; - // use SINGLETON FUNCTOR MACRO HERE + : public Factory < HMCModuleBase< HMCRunnerBase > , Reader > { +public: + typedef Reader TheReader; + // use SINGLETON FUNCTOR MACRO HERE HMCRunnerModuleFactory(const HMCRunnerModuleFactory& e) = delete; void operator=(const HMCRunnerModuleFactory& e) = delete; static HMCRunnerModuleFactory& getInstance(void) { @@ -73,10 +73,10 @@ class HMCRunnerModuleFactory return e; } - private: +private: HMCRunnerModuleFactory(void) = default; std::string obj_type() const { - return std::string(str); + return std::string(str); } }; @@ -88,50 +88,43 @@ class HMCRunnerModuleFactory // macro for these template < class ImplementationPolicy, class RepresentationPolicy, class ReaderClass > -class HMCLeapFrog: public HMCModule< QCD::GenericHMCRunnerTemplate, ReaderClass >{ - typedef HMCModule< QCD::GenericHMCRunnerTemplate, ReaderClass > HMCBaseMod; +class HMCLeapFrog: public HMCModule< GenericHMCRunnerTemplate, ReaderClass >{ + typedef HMCModule< GenericHMCRunnerTemplate, ReaderClass > HMCBaseMod; using HMCBaseMod::HMCBaseMod; // aquire resource virtual void initialize(){ - this->HMCPtr.reset(new QCD::GenericHMCRunnerTemplate(this->Par_) ); + this->HMCPtr.reset(new GenericHMCRunnerTemplate(this->Par_) ); } }; template < class ImplementationPolicy, class RepresentationPolicy, class ReaderClass > -class HMCMinimumNorm2: public HMCModule< QCD::GenericHMCRunnerTemplate, ReaderClass >{ - typedef HMCModule< QCD::GenericHMCRunnerTemplate, ReaderClass > HMCBaseMod; +class HMCMinimumNorm2: public HMCModule< GenericHMCRunnerTemplate, ReaderClass >{ + typedef HMCModule< GenericHMCRunnerTemplate, ReaderClass > HMCBaseMod; using HMCBaseMod::HMCBaseMod; // aquire resource virtual void initialize(){ - this->HMCPtr.reset(new QCD::GenericHMCRunnerTemplate(this->Par_)); + this->HMCPtr.reset(new GenericHMCRunnerTemplate(this->Par_)); } }; template < class ImplementationPolicy, class RepresentationPolicy, class ReaderClass > -class HMCForceGradient: public HMCModule< QCD::GenericHMCRunnerTemplate, ReaderClass >{ - typedef HMCModule< QCD::GenericHMCRunnerTemplate, ReaderClass > HMCBaseMod; +class HMCForceGradient: public HMCModule< GenericHMCRunnerTemplate, ReaderClass >{ + typedef HMCModule< GenericHMCRunnerTemplate, ReaderClass > HMCBaseMod; using HMCBaseMod::HMCBaseMod; // aquire resource virtual void initialize(){ - this->HMCPtr.reset(new QCD::GenericHMCRunnerTemplate(this->Par_) ); + this->HMCPtr.reset(new GenericHMCRunnerTemplate(this->Par_) ); } }; extern char hmc_string[]; - - - - - ////////////////////////////////////////////////////////////// +NAMESPACE_END(Grid); - -} - -#endif \ No newline at end of file +#endif diff --git a/Grid/qcd/hmc/HMC_GridModules.h b/Grid/qcd/hmc/HMC_GridModules.h index 0f34e9a7..0c834cf2 100644 --- a/Grid/qcd/hmc/HMC_GridModules.h +++ b/Grid/qcd/hmc/HMC_GridModules.h @@ -26,11 +26,11 @@ with this program; if not, write to the Free Software Foundation, Inc., See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ -/* END LEGAL */ + /* END LEGAL */ #ifndef HMC_GRID_MODULES #define HMC_GRID_MODULES -namespace Grid { +NAMESPACE_BEGIN(Grid); // Resources // Modules for grids @@ -40,8 +40,8 @@ namespace Grid { class GridModuleParameters: Serializable{ public: GRID_SERIALIZABLE_CLASS_MEMBERS(GridModuleParameters, - std::string, lattice, - std::string, mpi); + std::string, lattice, + std::string, mpi); std::vector getLattice() const {return strToVec(lattice);} std::vector getMpi() const {return strToVec(mpi);} @@ -51,7 +51,7 @@ public: if (getLattice().size() != getMpi().size() ) { std::cout << GridLogError << "Error in GridModuleParameters: lattice and mpi dimensions " - "do not match" + "do not match" << std::endl; exit(1); } @@ -70,12 +70,12 @@ public: write(Writer, name, *this); } private: - std::string name; + std::string name; }; // Lower level class class GridModule { - public: +public: GridCartesian* get_full() { std::cout << GridLogDebug << "Getting cartesian in module"<< std::endl; return grid_.get(); } @@ -88,7 +88,7 @@ class GridModule { void show_full_decomposition(){ grid_->show_decomposition(); } void show_rb_decomposition(){ rbgrid_->show_decomposition(); } - protected: +protected: std::unique_ptr grid_; std::unique_ptr rbgrid_; @@ -104,24 +104,23 @@ class GridFourDimModule : public GridModule public: GridFourDimModule() { - using namespace QCD; set_full(SpaceTimeGrid::makeFourDimGrid( - GridDefaultLatt(), - GridDefaultSimd(4, vector_type::Nsimd()), - GridDefaultMpi())); + GridDefaultLatt(), + GridDefaultSimd(4, vector_type::Nsimd()), + GridDefaultMpi())); set_rb(SpaceTimeGrid::makeFourDimRedBlackGrid(grid_.get())); } GridFourDimModule(const std::vector tweak_simd) { - using namespace QCD; + Coordinate _tweak_simd(tweak_simd); if (tweak_simd.size() != 4) - { - std::cout << GridLogError - << "Error in GridFourDimModule: SIMD size different from 4" - << std::endl; - exit(1); - } + { + std::cout << GridLogError + << "Error in GridFourDimModule: SIMD size different from 4" + << std::endl; + exit(1); + } // Checks that the product agrees with the expectation int simd_sum = 1; @@ -130,48 +129,47 @@ public: std::cout << GridLogDebug << "TweakSIMD: " << tweak_simd << " Sum: " << simd_sum << std::endl; if (simd_sum == vector_type::Nsimd()) - { - set_full(SpaceTimeGrid::makeFourDimGrid( - GridDefaultLatt(), - tweak_simd, - GridDefaultMpi())); - set_rb(SpaceTimeGrid::makeFourDimRedBlackGrid(grid_.get())); - } + { + set_full(SpaceTimeGrid::makeFourDimGrid( + GridDefaultLatt(), + _tweak_simd, + GridDefaultMpi())); + set_rb(SpaceTimeGrid::makeFourDimRedBlackGrid(grid_.get())); + } else - { - std::cout << GridLogError - << "Error in GridFourDimModule: SIMD lanes must sum to " - << vector_type::Nsimd() - << std::endl; - } + { + std::cout << GridLogError + << "Error in GridFourDimModule: SIMD lanes must sum to " + << vector_type::Nsimd() + << std::endl; + } } GridFourDimModule(const GridModuleParameters Params) { - using namespace QCD; std::vector lattice_v = Params.getLattice(); std::vector mpi_v = Params.getMpi(); if (lattice_v.size() == 4) - { - set_full(SpaceTimeGrid::makeFourDimGrid( - lattice_v, - GridDefaultSimd(4, vector_type::Nsimd()), - mpi_v)); - set_rb(SpaceTimeGrid::makeFourDimRedBlackGrid(grid_.get())); - } + { + set_full(SpaceTimeGrid::makeFourDimGrid( + lattice_v, + GridDefaultSimd(4, vector_type::Nsimd()), + mpi_v)); + set_rb(SpaceTimeGrid::makeFourDimRedBlackGrid(grid_.get())); + } else - { - std::cout << GridLogError - << "Error in GridFourDimModule: lattice dimension different from 4" - << std::endl; - exit(1); - } + { + std::cout << GridLogError + << "Error in GridFourDimModule: lattice dimension different from 4" + << std::endl; + exit(1); + } } }; typedef GridFourDimModule GridDefaultFourDimModule; -} // namespace Grid +NAMESPACE_END(Grid); #endif // HMC_GRID_MODULES diff --git a/Grid/qcd/hmc/HMC_aggregate.h b/Grid/qcd/hmc/HMC_aggregate.h index 53d4b9c8..e4d2ce83 100644 --- a/Grid/qcd/hmc/HMC_aggregate.h +++ b/Grid/qcd/hmc/HMC_aggregate.h @@ -28,25 +28,26 @@ directory /* END LEGAL */ //-------------------------------------------------------------------- //-------------------------------------------------------------------- -#ifndef HMC_AGGREGATE_INCLUDED -#define HMC_AGGREGATE_INCLUDED +#pragma once #include #include #include - // annoying location; should move this ? #include #include #include +NAMESPACE_CHECK(Ildg); #include #include #include +NAMESPACE_CHECK(HMCmodules); #include +NAMESPACE_CHECK(HMCresourcemanager); #include #include +NAMESPACE_CHECK(HMCrunner); -#endif diff --git a/Grid/qcd/hmc/checkpointers/BaseCheckpointer.h b/Grid/qcd/hmc/checkpointers/BaseCheckpointer.h index f4ef252b..3cd05ebc 100644 --- a/Grid/qcd/hmc/checkpointers/BaseCheckpointer.h +++ b/Grid/qcd/hmc/checkpointers/BaseCheckpointer.h @@ -25,27 +25,26 @@ with this program; if not, write to the Free Software Foundation, Inc., See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ -/* END LEGAL */ + /* END LEGAL */ #ifndef BASE_CHECKPOINTER #define BASE_CHECKPOINTER -namespace Grid { -namespace QCD { +NAMESPACE_BEGIN(Grid); class CheckpointerParameters : Serializable { - public: +public: GRID_SERIALIZABLE_CLASS_MEMBERS(CheckpointerParameters, - std::string, config_prefix, - std::string, rng_prefix, - int, saveInterval, - std::string, format, ); + std::string, config_prefix, + std::string, rng_prefix, + int, saveInterval, + std::string, format, ); CheckpointerParameters(std::string cf = "cfg", std::string rn = "rng", - int savemodulo = 1, const std::string &f = "IEEE64BIG") - : config_prefix(cf), - rng_prefix(rn), - saveInterval(savemodulo), - format(f){}; + int savemodulo = 1, const std::string &f = "IEEE64BIG") + : config_prefix(cf), + rng_prefix(rn), + saveInterval(savemodulo), + format(f){}; template @@ -60,7 +59,7 @@ class CheckpointerParameters : Serializable { // Base class for checkpointers template class BaseHmcCheckpointer : public HmcObservable { - public: +public: void build_filenames(int traj, CheckpointerParameters &Params, std::string &conf_file, std::string &rng_file) { { @@ -74,7 +73,7 @@ class BaseHmcCheckpointer : public HmcObservable { os << Params.config_prefix << "." << traj; conf_file = os.str(); } - } + } void check_filename(const std::string &filename){ std::ifstream f(filename.c_str()); @@ -92,6 +91,7 @@ class BaseHmcCheckpointer : public HmcObservable { }; // class BaseHmcCheckpointer /////////////////////////////////////////////////////////////////////////////// -} -} + +NAMESPACE_END(Grid); + #endif diff --git a/Grid/qcd/hmc/checkpointers/BinaryCheckpointer.h b/Grid/qcd/hmc/checkpointers/BinaryCheckpointer.h index 025398eb..ef9e6194 100644 --- a/Grid/qcd/hmc/checkpointers/BinaryCheckpointer.h +++ b/Grid/qcd/hmc/checkpointers/BinaryCheckpointer.h @@ -25,7 +25,7 @@ with this program; if not, write to the Free Software Foundation, Inc., See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ -/* END LEGAL */ + /* END LEGAL */ #ifndef BINARY_CHECKPOINTER #define BINARY_CHECKPOINTER @@ -33,16 +33,15 @@ directory #include #include -namespace Grid { -namespace QCD { +NAMESPACE_BEGIN(Grid); // Simple checkpointer, only binary file template class BinaryHmcCheckpointer : public BaseHmcCheckpointer { - private: +private: CheckpointerParameters Params; - public: +public: INHERIT_FIELD_TYPES(Impl); // Gets the Field type, a Lattice object // Extract types from the Field @@ -111,6 +110,6 @@ class BinaryHmcCheckpointer : public BaseHmcCheckpointer { << std::dec << std::endl; }; }; -} -} + +NAMESPACE_END(Grid); #endif diff --git a/Grid/qcd/hmc/checkpointers/CheckPointerModules.h b/Grid/qcd/hmc/checkpointers/CheckPointerModules.h index f17cd2c8..13b6cf13 100644 --- a/Grid/qcd/hmc/checkpointers/CheckPointerModules.h +++ b/Grid/qcd/hmc/checkpointers/CheckPointerModules.h @@ -25,27 +25,23 @@ with this program; if not, write to the Free Software Foundation, Inc., See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ -/* END LEGAL */ + /* END LEGAL */ #ifndef CP_MODULES_H #define CP_MODULES_H - -// FIXME Reorganize QCD namespace - - -namespace Grid { +NAMESPACE_BEGIN(Grid); //////////////////////////////////////////////////////////////////////// // Checkpoint module, owns the Checkpointer //////////////////////////////////////////////////////////////////////// template -class CheckPointerModule: public Parametrized, public HMCModuleBase< QCD::BaseHmcCheckpointer > { - public: - std::unique_ptr > CheckPointPtr; - typedef QCD::CheckpointerParameters APar; - typedef HMCModuleBase< QCD::BaseHmcCheckpointer > Base; +class CheckPointerModule: public Parametrized, public HMCModuleBase< BaseHmcCheckpointer > { +public: + std::unique_ptr > CheckPointPtr; + typedef CheckpointerParameters APar; + typedef HMCModuleBase< BaseHmcCheckpointer > Base; typedef typename Base::Product Product; CheckPointerModule(APar Par): Parametrized(Par) {} @@ -53,7 +49,7 @@ class CheckPointerModule: public Parametrized, publ CheckPointerModule(Reader& Reader) : Parametrized(Reader){}; virtual void print_parameters(){ - std::cout << this->Par_ << std::endl; + std::cout << this->Par_ << std::endl; } Product* getPtr() { @@ -62,19 +58,17 @@ class CheckPointerModule: public Parametrized, publ return CheckPointPtr.get(); } - private: +private: virtual void initialize() = 0; }; - - template class HMC_CPModuleFactory - : public Factory < HMCModuleBase< QCD::BaseHmcCheckpointer > , Reader > { - public: - typedef Reader TheReader; - // use SINGLETON FUNCTOR MACRO HERE + : public Factory < HMCModuleBase< BaseHmcCheckpointer > , Reader > { +public: + typedef Reader TheReader; + // use SINGLETON FUNCTOR MACRO HERE HMC_CPModuleFactory(const HMC_CPModuleFactory& e) = delete; void operator=(const HMC_CPModuleFactory& e) = delete; static HMC_CPModuleFactory& getInstance(void) { @@ -82,19 +76,16 @@ class HMC_CPModuleFactory return e; } - private: +private: HMC_CPModuleFactory(void) = default; std::string obj_type() const { - return std::string(str); + return std::string(str); } }; - - ///////////////////////////////////////////////////////////////////// // Concrete classes ///////////////////////////////////////////////////////////////////// -namespace QCD{ template class BinaryCPModule: public CheckPointerModule< ImplementationPolicy> { @@ -116,7 +107,7 @@ class NerscCPModule: public CheckPointerModule< ImplementationPolicy> { // acquire resource virtual void initialize(){ - this->CheckPointPtr.reset(new NerscHmcCheckpointer(this->Par_)); + this->CheckPointPtr.reset(new NerscHmcCheckpointer(this->Par_)); } }; @@ -131,7 +122,7 @@ class ILDGCPModule: public CheckPointerModule< ImplementationPolicy> { // acquire resource virtual void initialize(){ - this->CheckPointPtr.reset(new ILDGHmcCheckpointer(this->Par_)); + this->CheckPointPtr.reset(new ILDGHmcCheckpointer(this->Par_)); } }; @@ -152,21 +143,18 @@ public: }; #endif - -}// QCD temporarily here - - extern char cp_string[]; /* // use macros? -static Registrar, HMC_CPModuleFactory > __CPBinarymodXMLInit("Binary"); -static Registrar , HMC_CPModuleFactory > __CPNerscmodXMLInit("Nersc"); +static Registrar, HMC_CPModuleFactory > __CPBinarymodXMLInit("Binary"); +static Registrar , HMC_CPModuleFactory > __CPNerscmodXMLInit("Nersc"); #ifdef HAVE_LIME -static Registrar , HMC_CPModuleFactory > __CPILDGmodXMLInit("ILDG"); +static Registrar , HMC_CPModuleFactory > __CPILDGmodXMLInit("ILDG"); #endif */ -}// Grid +NAMESPACE_END(Grid); + #endif //CP_MODULES_H diff --git a/Grid/qcd/hmc/checkpointers/ILDGCheckpointer.h b/Grid/qcd/hmc/checkpointers/ILDGCheckpointer.h index f7e6b17e..269caa6e 100644 --- a/Grid/qcd/hmc/checkpointers/ILDGCheckpointer.h +++ b/Grid/qcd/hmc/checkpointers/ILDGCheckpointer.h @@ -25,7 +25,7 @@ with this program; if not, write to the Free Software Foundation, Inc., See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ -/* END LEGAL */ + /* END LEGAL */ #ifndef ILDG_CHECKPOINTER #define ILDG_CHECKPOINTER @@ -35,16 +35,15 @@ directory #include #include -namespace Grid { -namespace QCD { +NAMESPACE_BEGIN(Grid); // Only for Gauge fields template class ILDGHmcCheckpointer : public BaseHmcCheckpointer { - private: +private: CheckpointerParameters Params; - public: +public: INHERIT_GIMPL_TYPES(Implementation); ILDGHmcCheckpointer(const CheckpointerParameters &Params_) { initialize(Params_); } @@ -74,7 +73,7 @@ class ILDGHmcCheckpointer : public BaseHmcCheckpointer { if ((traj % Params.saveInterval) == 0) { std::string config, rng; this->build_filenames(traj, Params, config, rng); - GridBase *grid = U._grid; + GridBase *grid = U.Grid(); uint32_t nersc_csum,scidac_csuma,scidac_csumb; BinaryIO::writeRNG(sRNG, pRNG, rng, 0,nersc_csum,scidac_csuma,scidac_csumb); IldgWriter _IldgWriter(grid->IsBoss()); @@ -117,8 +116,8 @@ class ILDGHmcCheckpointer : public BaseHmcCheckpointer { << std::dec << std::endl; }; }; -} -} + +NAMESPACE_END(Grid); #endif // HAVE_LIME #endif // ILDG_CHECKPOINTER diff --git a/Grid/qcd/hmc/checkpointers/NerscCheckpointer.h b/Grid/qcd/hmc/checkpointers/NerscCheckpointer.h index d452b994..cfcc44d8 100644 --- a/Grid/qcd/hmc/checkpointers/NerscCheckpointer.h +++ b/Grid/qcd/hmc/checkpointers/NerscCheckpointer.h @@ -25,7 +25,7 @@ with this program; if not, write to the Free Software Foundation, Inc., See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ -/* END LEGAL */ + /* END LEGAL */ #ifndef NERSC_CHECKPOINTER #define NERSC_CHECKPOINTER @@ -33,16 +33,15 @@ directory #include #include -namespace Grid { -namespace QCD { +NAMESPACE_BEGIN(Grid); // Only for Gauge fields template class NerscHmcCheckpointer : public BaseHmcCheckpointer { - private: +private: CheckpointerParameters Params; - public: +public: INHERIT_GIMPL_TYPES(Gimpl); // only for gauge configurations NerscHmcCheckpointer(const CheckpointerParameters &Params_) { initialize(Params_); } @@ -78,6 +77,6 @@ class NerscHmcCheckpointer : public BaseHmcCheckpointer { NerscIO::readConfiguration(U, header, config); }; }; -} -} + +NAMESPACE_END(Grid); #endif diff --git a/Grid/qcd/hmc/checkpointers/ScidacCheckpointer.h b/Grid/qcd/hmc/checkpointers/ScidacCheckpointer.h index 063a475c..986585ea 100644 --- a/Grid/qcd/hmc/checkpointers/ScidacCheckpointer.h +++ b/Grid/qcd/hmc/checkpointers/ScidacCheckpointer.h @@ -26,8 +26,7 @@ See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ /* END LEGAL */ -#ifndef SCIDAC_CHECKPOINTER -#define SCIDAC_CHECKPOINTER +#pragma once #ifdef HAVE_LIME @@ -35,9 +34,7 @@ directory #include #include -namespace Grid { -namespace QCD { - +NAMESPACE_BEGIN(Grid); // For generic fields template class ScidacHmcCheckpointer : public BaseHmcCheckpointer { @@ -78,7 +75,7 @@ class ScidacHmcCheckpointer : public BaseHmcCheckpointer { if ((traj % Params.saveInterval) == 0) { std::string config, rng; this->build_filenames(traj, Params, config, rng); - GridBase *grid = U._grid; + GridBase *grid = U.Grid(); uint32_t nersc_csum,scidac_csuma,scidac_csumb; BinaryIO::writeRNG(sRNG, pRNG, rng, 0,nersc_csum,scidac_csuma,scidac_csumb); ScidacWriter _ScidacWriter(grid->IsBoss()); @@ -115,8 +112,8 @@ class ScidacHmcCheckpointer : public BaseHmcCheckpointer { << std::dec << std::endl; }; }; -} -} +NAMESPACE_END(Grid); + #endif // HAVE_LIME -#endif // ILDG_CHECKPOINTER + diff --git a/Grid/qcd/hmc/integrators/Integrator.h b/Grid/qcd/hmc/integrators/Integrator.h index 9d4376fc..91af7372 100644 --- a/Grid/qcd/hmc/integrators/Integrator.h +++ b/Grid/qcd/hmc/integrators/Integrator.h @@ -27,33 +27,29 @@ with this program; if not, write to the Free Software Foundation, Inc., See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ -/* END LEGAL */ -//-------------------------------------------------------------------- + /* END LEGAL */ + //-------------------------------------------------------------------- #ifndef INTEGRATOR_INCLUDED #define INTEGRATOR_INCLUDED #include -namespace Grid { -namespace QCD { +NAMESPACE_BEGIN(Grid); class IntegratorParameters: Serializable { public: - GRID_SERIALIZABLE_CLASS_MEMBERS(IntegratorParameters, - std::string, name, // name of the integrator - unsigned int, MDsteps, // number of outer steps - RealD, trajL, // trajectory length - ) + GRID_SERIALIZABLE_CLASS_MEMBERS(IntegratorParameters, + std::string, name, // name of the integrator + unsigned int, MDsteps, // number of outer steps + RealD, trajL) // trajectory length IntegratorParameters(int MDsteps_ = 10, RealD trajL_ = 1.0) - : MDsteps(MDsteps_), - trajL(trajL_){ - // empty body constructor - }; - + : MDsteps(MDsteps_), + trajL(trajL_) {}; template ::value, int >::type = 0 > - IntegratorParameters(ReaderClass & Reader){ + IntegratorParameters(ReaderClass & Reader) + { std::cout << GridLogMessage << "Reading integrator\n"; read(Reader, "Integrator", *this); } @@ -69,7 +65,7 @@ public: /*! @brief Class for Molecular Dynamics management */ template class Integrator { - protected: +protected: typedef typename FieldImplementation::Field MomentaField; //for readability typedef typename FieldImplementation::Field Field; @@ -84,7 +80,8 @@ class Integrator { const ActionSet as; - void update_P(Field& U, int level, double ep) { + void update_P(Field& U, int level, double ep) + { t_P[level] += ep; update_P(P, U, level, ep); @@ -93,16 +90,17 @@ class Integrator { // to be used by the actionlevel class to iterate // over the representations - struct _updateP { + struct _updateP + { template void operator()(std::vector*> repr_set, Repr& Rep, GF& Mom, GF& U, double ep) { for (int a = 0; a < repr_set.size(); ++a) { - FieldType forceR(U._grid); + FieldType forceR(U.Grid()); // Implement smearing only for the fundamental representation now repr_set.at(a)->deriv(Rep.U, forceR); GF force = Rep.RtoFundamentalProject(forceR); // Ta for the fundamental rep - Real force_abs = std::sqrt(norm2(force)/(U._grid->gSites())); + Real force_abs = std::sqrt(norm2(force)/(U.Grid()->gSites())); std::cout << GridLogIntegrator << "Hirep Force average: " << force_abs << std::endl; Mom -= force * ep* HMC_MOMENTUM_DENOMINATOR;; } @@ -115,8 +113,8 @@ class Integrator { for (int a = 0; a < as[level].actions.size(); ++a) { double start_full = usecond(); - Field force(U._grid); - conformable(U._grid, Mom._grid); + Field force(U.Grid()); + conformable(U.Grid(), Mom.Grid()); Field& Us = Smearer.get_U(as[level].actions.at(a)->is_smeared); double start_force = usecond(); @@ -126,7 +124,7 @@ class Integrator { if (as[level].actions.at(a)->is_smeared) Smearer.smeared_force(force); force = FieldImplementation::projectForce(force); // Ta for gauge fields double end_force = usecond(); - Real force_abs = std::sqrt(norm2(force)/U._grid->gSites()); + Real force_abs = std::sqrt(norm2(force)/U.Grid()->gSites()); std::cout << GridLogIntegrator << "["<& Aset, SmearingPolicy& Sm) - : Params(Par), - as(Aset), - P(grid), - levels(Aset.size()), - Smearer(Sm), - Representations(grid) { + : Params(Par), + as(Aset), + P(grid), + levels(Aset.size()), + Smearer(Sm), + Representations(grid) + { t_P.resize(levels, 0.0); t_U = 0.0; // initialization of smearer delegated outside of Integrator @@ -179,26 +180,29 @@ class Integrator { virtual std::string integrator_name() = 0; - void print_parameters(){ + void print_parameters() + { std::cout << GridLogMessage << "[Integrator] Name : "<< integrator_name() << std::endl; Params.print_parameters(); } - void print_actions(){ - std::cout << GridLogMessage << ":::::::::::::::::::::::::::::::::::::::::" << std::endl; - std::cout << GridLogMessage << "[Integrator] Action summary: "<action_name() << "] ID: " << actionID << std::endl; - std::cout << as[level].actions.at(actionID)->LogParameters(); - } - } - std::cout << GridLogMessage << ":::::::::::::::::::::::::::::::::::::::::"<< std::endl; + void print_actions() + { + std::cout << GridLogMessage << ":::::::::::::::::::::::::::::::::::::::::" << std::endl; + std::cout << GridLogMessage << "[Integrator] Action summary: "<action_name() << "] ID: " << actionID << std::endl; + std::cout << as[level].actions.at(actionID)->LogParameters(); + } + } + std::cout << GridLogMessage << ":::::::::::::::::::::::::::::::::::::::::"<< std::endl; } - void reverse_momenta(){ + void reverse_momenta() + { P *= -1.0; } @@ -211,14 +215,15 @@ class Integrator { for (int a = 0; a < repr_set.size(); ++a){ repr_set.at(a)->refresh(Rep.U, pRNG); - std::cout << GridLogDebug << "Hirep refreshing pseudofermions" << std::endl; - } + std::cout << GridLogDebug << "Hirep refreshing pseudofermions" << std::endl; + } } } refresh_hireps{}; // Initialization of momenta and actions - void refresh(Field& U, GridParallelRNG& pRNG) { - assert(P._grid == U._grid); + void refresh(Field& U, GridParallelRNG& pRNG) + { + assert(P.Grid() == U.Grid()); std::cout << GridLogIntegrator << "Integrator refresh\n"; FieldImplementation::generate_momenta(P, pRNG); @@ -262,7 +267,8 @@ class Integrator { } S_hireps{}; // Calculate action - RealD S(Field& U) { // here also U not used + RealD S(Field& U) + { // here also U not used std::cout << GridLogIntegrator << "Integrator action\n"; @@ -287,7 +293,8 @@ class Integrator { return H; } - void integrate(Field& U) { + void integrate(Field& U) + { // reset the clocks t_U = 0; for (int level = 0; level < as.size(); ++level) { @@ -311,11 +318,9 @@ class Integrator { } - - - }; -} -} + +NAMESPACE_END(Grid); + #endif // INTEGRATOR_INCLUDED diff --git a/Grid/qcd/hmc/integrators/Integrator_algorithm.h b/Grid/qcd/hmc/integrators/Integrator_algorithm.h index 47574026..b05c4ea8 100644 --- a/Grid/qcd/hmc/integrators/Integrator_algorithm.h +++ b/Grid/qcd/hmc/integrators/Integrator_algorithm.h @@ -39,8 +39,7 @@ directory #ifndef INTEGRATOR_ALG_INCLUDED #define INTEGRATOR_ALG_INCLUDED -namespace Grid { -namespace QCD { +NAMESPACE_BEGIN(Grid); /* PAB: * @@ -93,22 +92,17 @@ namespace QCD { * P 1/2 P 1/2 */ -template > -class LeapFrog : public Integrator { - public: - typedef LeapFrog - Algorithm; +template > +class LeapFrog : public Integrator +{ +public: + typedef LeapFrog Algorithm; INHERIT_FIELD_TYPES(FieldImplementation); std::string integrator_name(){return "LeapFrog";} - LeapFrog(GridBase* grid, IntegratorParameters Par, - ActionSet& Aset, SmearingPolicy& Sm) - : Integrator( - grid, Par, Aset, Sm){}; + LeapFrog(GridBase* grid, IntegratorParameters Par, ActionSet& Aset, SmearingPolicy& Sm) + : Integrator(grid, Par, Aset, Sm){}; void step(Field& U, int level, int _first, int _last) { int fl = this->as.size() - 1; @@ -141,21 +135,17 @@ class LeapFrog : public Integrator > -class MinimumNorm2 : public Integrator { - private: +template > +class MinimumNorm2 : public Integrator +{ +private: const RealD lambda = 0.1931833275037836; - public: +public: INHERIT_FIELD_TYPES(FieldImplementation); - MinimumNorm2(GridBase* grid, IntegratorParameters Par, - ActionSet& Aset, SmearingPolicy& Sm) - : Integrator( - grid, Par, Aset, Sm){}; + MinimumNorm2(GridBase* grid, IntegratorParameters Par, ActionSet& Aset, SmearingPolicy& Sm) + : Integrator(grid, Par, Aset, Sm){}; std::string integrator_name(){return "MininumNorm2";} @@ -202,35 +192,32 @@ class MinimumNorm2 : public Integrator > -class ForceGradient : public Integrator { - private: +template > +class ForceGradient : public Integrator +{ +private: const RealD lambda = 1.0 / 6.0; - ; const RealD chi = 1.0 / 72.0; const RealD xi = 0.0; const RealD theta = 0.0; - public: +public: INHERIT_FIELD_TYPES(FieldImplementation); // Looks like dH scales as dt^4. tested wilson/wilson 2 level. ForceGradient(GridBase* grid, IntegratorParameters Par, ActionSet& Aset, SmearingPolicy& Sm) - : Integrator( - grid, Par, Aset, Sm){}; + : Integrator( + grid, Par, Aset, Sm){}; std::string integrator_name(){return "ForceGradient";} void FG_update_P(Field& U, int level, double fg_dt, double ep) { - Field Ufg(U._grid); - Field Pfg(U._grid); + Field Ufg(U.Grid()); + Field Pfg(U.Grid()); Ufg = U; - Pfg = zero; + Pfg = Zero(); std::cout << GridLogIntegrator << "FG update " << fg_dt << " " << ep << std::endl; // prepare_fg; no prediction/result cache for now // could relax CG stopping conditions for the @@ -284,10 +271,6 @@ class ForceGradient : public Integrator class ActionModule - : public Parametrized, - public ActionModuleBase< QCD::Action , QCD::GridModule > { - public: - typedef ActionModuleBase< QCD::Action, QCD::GridModule > Base; + : public Parametrized, + public ActionModuleBase< Action , GridModule > { +public: + typedef ActionModuleBase< Action, GridModule > Base; typedef typename Base::Product Product; typedef APar Parameters; @@ -76,7 +76,7 @@ class ActionModule return ActionPtr.get(); } - private: +private: virtual void initialize() = 0; }; @@ -85,28 +85,28 @@ class ActionModule // Modules ////////////////////////// -namespace QCD{ + class PlaqPlusRectangleGaugeActionParameters : Serializable { - public: +public: GRID_SERIALIZABLE_CLASS_MEMBERS(PlaqPlusRectangleGaugeActionParameters, - RealD, c_plaq, - RealD, c_rect); + RealD, c_plaq, + RealD, c_rect); }; class RBCGaugeActionParameters : Serializable { - public: +public: GRID_SERIALIZABLE_CLASS_MEMBERS(RBCGaugeActionParameters, - RealD, beta, - RealD, c1); + RealD, beta, + RealD, c1); }; class BetaGaugeActionParameters : Serializable { - public: +public: GRID_SERIALIZABLE_CLASS_MEMBERS(BetaGaugeActionParameters, - RealD, beta); + RealD, beta); }; @@ -232,17 +232,17 @@ class TwoFlavourFModule: public PseudoFermionModuleBaseAddGridPair(GridMod); } - // constructor - template - TwoFlavourFModule(Reader& R): Base(R) { + // constructor + template + TwoFlavourFModule(Reader& R): Base(R) { this->getSolverOperator(R, solver_mod, "Solver"); this->getFermionOperator(R, fop_mod, "Operator"); - } + } // acquire resource virtual void initialize() { @@ -261,17 +261,17 @@ class TwoFlavourEOFModule: public PseudoFermionModuleBaseAddGridPair(GridMod); } - // constructor - template - TwoFlavourEOFModule(Reader& R): PseudoFermionModuleBase(R) { + // constructor + template + TwoFlavourEOFModule(Reader& R): PseudoFermionModuleBase(R) { this->getSolverOperator(R, solver_mod, "Solver"); this->getFermionOperator(R, fop_mod, "Operator"); - } + } // acquire resource virtual void initialize() { @@ -291,25 +291,25 @@ class TwoFlavourRatioFModule: public PseudoFermionModuleBaseAddGridPair(GridMod); fop_denominator_mod->AddGridPair(GridMod); } - // constructor - template - TwoFlavourRatioFModule(Reader& R): PseudoFermionModuleBase(R) { + // constructor + template + TwoFlavourRatioFModule(Reader& R): PseudoFermionModuleBase(R) { this->getSolverOperator(R, solver_mod, "Solver"); this->getFermionOperator(R, fop_numerator_mod, "Numerator"); this->getFermionOperator(R, fop_denominator_mod, "Denominator"); - } + } // acquire resource virtual void initialize() { // here temporarily assuming that the force and action solver are the same this->ActionPtr.reset(new TwoFlavourRatioPseudoFermionAction(*(this->fop_numerator_mod->getPtr()), - *(this->fop_denominator_mod->getPtr()), *(this->solver_mod->getPtr()), *(this->solver_mod->getPtr()))); + *(this->fop_denominator_mod->getPtr()), *(this->solver_mod->getPtr()), *(this->solver_mod->getPtr()))); } }; @@ -323,25 +323,25 @@ class TwoFlavourRatioEOFModule: public PseudoFermionModuleBaseAddGridPair(GridMod); fop_denominator_mod->AddGridPair(GridMod); } - // constructor - template - TwoFlavourRatioEOFModule(Reader& R): Base(R) { + // constructor + template + TwoFlavourRatioEOFModule(Reader& R): Base(R) { this->getSolverOperator(R, solver_mod, "Solver"); this->getFermionOperator(R, fop_numerator_mod, "Numerator"); this->getFermionOperator(R, fop_denominator_mod, "Denominator"); - } + } // acquire resource virtual void initialize() { // here temporarily assuming that the force and action solver are the same this->ActionPtr.reset(new TwoFlavourEvenOddRatioPseudoFermionAction(*(this->fop_numerator_mod->getPtr()), - *(this->fop_denominator_mod->getPtr()), *(this->solver_mod->getPtr()), *(this->solver_mod->getPtr()))); + *(this->fop_denominator_mod->getPtr()), *(this->solver_mod->getPtr()), *(this->solver_mod->getPtr()))); } }; @@ -354,16 +354,16 @@ class OneFlavourFModule: public PseudoFermionModuleBaseAddGridPair(GridMod); } - // constructor - template - OneFlavourFModule(Reader& R): Base(R) { + // constructor + template + OneFlavourFModule(Reader& R): Base(R) { this->getFermionOperator(R, fop_mod, "Operator"); - } + } // acquire resource virtual void initialize() { @@ -375,22 +375,22 @@ class OneFlavourFModule: public PseudoFermionModuleBase class OneFlavourEOFModule: public PseudoFermionModuleBase - { +{ typedef PseudoFermionModuleBase Base; using Base::Base; typename Base::operator_type fop_mod; - public: +public: virtual void acquireResource(typename Base::Resource& GridMod){ fop_mod->AddGridPair(GridMod); } - // constructor - template - OneFlavourEOFModule(Reader& R): Base(R) { + // constructor + template + OneFlavourEOFModule(Reader& R): Base(R) { this->getFermionOperator(R, fop_mod, "Operator"); - } + } // acquire resource virtual void initialize() { @@ -403,7 +403,7 @@ class OneFlavourEOFModule: template class OneFlavourRatioFModule: public PseudoFermionModuleBase - { +{ typedef PseudoFermionModuleBase Base; using Base::Base; @@ -411,18 +411,18 @@ class OneFlavourRatioFModule: typename Base::operator_type fop_numerator_mod; typename Base::operator_type fop_denominator_mod; - public: +public: virtual void acquireResource(typename Base::Resource& GridMod){ fop_numerator_mod->AddGridPair(GridMod); fop_denominator_mod->AddGridPair(GridMod); } - // constructor - template - OneFlavourRatioFModule(Reader& R): Base(R) { + // constructor + template + OneFlavourRatioFModule(Reader& R): Base(R) { this->getFermionOperator(R, fop_numerator_mod, "Numerator"); this->getFermionOperator(R, fop_denominator_mod, "Denominator"); - } + } // acquire resource virtual void initialize() { @@ -437,7 +437,7 @@ class OneFlavourRatioFModule: template class OneFlavourRatioEOFModule: public PseudoFermionModuleBase - { +{ typedef PseudoFermionModuleBase Base; using Base::Base; @@ -445,18 +445,18 @@ class OneFlavourRatioEOFModule: typename Base::operator_type fop_numerator_mod; typename Base::operator_type fop_denominator_mod; - public: +public: virtual void acquireResource(typename Base::Resource& GridMod){ fop_numerator_mod->AddGridPair(GridMod); fop_denominator_mod->AddGridPair(GridMod); } - // constructor - template - OneFlavourRatioEOFModule(Reader& R): Base(R) { + // constructor + template + OneFlavourRatioEOFModule(Reader& R): Base(R) { this->getFermionOperator(R, fop_numerator_mod, "Numerator"); this->getFermionOperator(R, fop_denominator_mod, "Denominator"); - } + } // acquire resource virtual void initialize() { @@ -467,32 +467,20 @@ class OneFlavourRatioEOFModule: }; -}// QCD temporarily here - - - - - - - //////////////////////////////////////// // Factories specialisations //////////////////////////////////////// - - - // use the same classed defined by Antonin, does not make sense to rewrite // Factory is perfectly fine // Registar must be changed because I do not want to use the ModuleFactory - // explicit ref to LatticeGaugeField must be changed or put in the factory -//typedef ActionModuleBase< QCD::Action< QCD::LatticeGaugeField >, QCD::GridModule > HMC_LGTActionModBase; -//typedef ActionModuleBase< QCD::Action< QCD::LatticeReal >, QCD::GridModule > HMC_ScalarActionModBase; +//typedef ActionModuleBase< Action< LatticeGaugeField >, GridModule > HMC_LGTActionModBase; +//typedef ActionModuleBase< Action< LatticeReal >, GridModule > HMC_ScalarActionModBase; template class HMC_ActionModuleFactory - : public Factory < ActionModuleBase< QCD::Action< Field >, QCD::GridModule > , Reader > { - public: + : public Factory < ActionModuleBase< Action< Field >, GridModule > , Reader > { +public: typedef Reader TheReader; // use SINGLETON FUNCTOR MACRO HERE HMC_ActionModuleFactory(const HMC_ActionModuleFactory& e) = delete; @@ -502,16 +490,15 @@ class HMC_ActionModuleFactory return e; } - private: +private: HMC_ActionModuleFactory(void) = default; - std::string obj_type() const { - return std::string(str); + std::string obj_type() const { + return std::string(str); } }; - extern char gauge_string[]; -} // Grid +NAMESPACE_END(Grid); -#endif //HMC_MODULES_H \ No newline at end of file +#endif //HMC_MODULES_H diff --git a/Grid/qcd/modules/FermionOperatorModules.h b/Grid/qcd/modules/FermionOperatorModules.h index fc9d96a7..5af1378e 100644 --- a/Grid/qcd/modules/FermionOperatorModules.h +++ b/Grid/qcd/modules/FermionOperatorModules.h @@ -25,11 +25,11 @@ with this program; if not, write to the Free Software Foundation, Inc., See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ -/* END LEGAL */ + /* END LEGAL */ #ifndef FERMIONOPERATOR_MODULES_H #define FERMIONOPERATOR_MODULES_H -namespace Grid { +NAMESPACE_BEGIN(Grid); //////////////////////////////////// // Fermion operators @@ -37,19 +37,19 @@ namespace Grid { template < class Product> class FermionOperatorModuleBase : public HMCModuleBase{ public: - virtual void AddGridPair(QCD::GridModule&) = 0; + virtual void AddGridPair(GridModule&) = 0; }; template