diff --git a/lib/lattice/Lattice_where.h b/lib/lattice/Lattice_where.h index c83d80c1..72bc936e 100644 --- a/lib/lattice/Lattice_where.h +++ b/lib/lattice/Lattice_where.h @@ -22,7 +22,6 @@ inline void whereWolf(Lattice &ret,const Lattice &predicate,Lattice< typedef typename iobj::vector_type mask_type; const int Nsimd = grid->Nsimd(); - const int words = sizeof(vobj)/sizeof(vector_type); std::vector mask(Nsimd); std::vector truevals (Nsimd); diff --git a/lib/qcd/action/fermion/FermionOperator.h b/lib/qcd/action/fermion/FermionOperator.h index e1f65a27..9829d350 100644 --- a/lib/qcd/action/fermion/FermionOperator.h +++ b/lib/qcd/action/fermion/FermionOperator.h @@ -7,7 +7,7 @@ namespace Grid { //////////////////////////////////////////////////////////////// // Hardwire to four spinors, allow to select - // between gauge representation rank, and gparity/flavour index, + // between gauge representation rank bc's, flavours etc. // and single/double precision. //////////////////////////////////////////////////////////////// @@ -37,8 +37,7 @@ namespace Grid { typedef WilsonCompressor Compressor; - // provide the multiply by link that is differentiated between Gparity (with flavour index) and non-Gparity - static inline void multLink(SiteHalfSpinor &phi,const SiteDoubledGaugeField &U,const SiteHalfSpinor &chi,int mu,StencilEntry *SE){ + static inline void multLink(SiteHalfSpinor &phi,const SiteDoubledGaugeField &U,const SiteHalfSpinor &chi,int mu,StencilEntry *SE,CartesianStencil &St){ mult(&phi(),&U(mu),&chi()); } static inline void DoubleStore(GridBase *GaugeGrid,DoubledGaugeField &Uds,const GaugeField &Umu) @@ -94,15 +93,73 @@ namespace Grid { // provide the multiply by link that is differentiated between Gparity (with flavour index) and // non-Gparity - static inline void multLink(SiteHalfSpinor &phi,const SiteDoubledGaugeField &U,const SiteHalfSpinor &chi,int mu,StencilEntry *SE){ + static inline void multLink(SiteHalfSpinor &phi,const SiteDoubledGaugeField &U,const SiteHalfSpinor &chi,int mu,StencilEntry *SE,CartesianStencil &St){ // FIXME; need to be more careful. If this is a simd direction we are still stuffed - if ( SE->_around_the_world && ((mu==Xp)||(mu==Xm)) ) { - mult(&phi(0),&U(0)(mu),&chi(1)); - mult(&phi(1),&U(1)(mu),&chi(0)); + // Need access to _simd_layout[mu]. mu is not necessarily dim. + typedef SiteHalfSpinor vobj; + typedef typename SiteHalfSpinor::scalar_object sobj; + + vobj vtmp; + sobj stmp; + std::vector gpbc({0,0,0,1,0,0,0,1}); + + GridBase *grid = St._grid; + + const int Nsimd = grid->Nsimd(); + + int direction = St._directions[mu]; + int distance = St._distances[mu]; + int ptype = St._permute_type[mu]; + int sl = St._grid->_simd_layout[direction]; + + // assert our assumptions + assert((distance==1)||(distance==-1)); // nearest neighbour stencil hard code + assert((sl==1)||(sl==2)); + + std::vector icoor; + + if ( SE->_around_the_world && gpbc[mu] ) { + if ( sl == 2 ) { + + // std::cout << "multLink for mu= "< vals(Nsimd); + extract(chi,vals); + + for(int s=0;siCoorFromIindex(icoor,s); + + assert((icoor[direction]==0)||(icoor[direction]==1)); + + int permute_lane; + if ( distance == 1) { + permute_lane = icoor[direction]?1:0; + } else { + permute_lane = icoor[direction]?0:1; + } + + if ( permute_lane ) { + stmp(0) = vals[s](1); + stmp(1) = vals[s](0); + vals[s] = stmp; + } + } + + merge(vtmp,vals); + + } else { + vtmp(0) = chi(1); + vtmp(1) = chi(0); + } + mult(&phi(0),&U(0)(mu),&vtmp(0)); + mult(&phi(1),&U(1)(mu),&vtmp(1)); + } else { mult(&phi(0),&U(0)(mu),&chi(0)); mult(&phi(1),&U(1)(mu),&chi(1)); } + } static inline void InsertForce(GaugeField &mat,const FermionField &Btilde,const FermionField &A,int mu){ @@ -120,7 +177,7 @@ namespace Grid { Lattice > coor(GaugeGrid); - std::vector gpdirs({1,0,0,0}); + std::vector gpdirs({0,0,0,1}); for(int mu=0;mu - class GparityWilsonCompressor : public WilsonCompressor{ - public: - GparityWilsonCompressor(int _dag) : WilsonCompressor (_dag){}; - - SiteHalfSpinor operator () (const SiteSpinor &in,int dim,int plane,int osite,GridBase *grid) - { - std::vector Gbcs({1,0,0,0}); - - typedef typename SiteHalfSpinor::scalar_object scalar_object; - - const int Nsimd = grid->Nsimd(); - - int checkered=grid->CheckerBoarded(dim); - int ocb=grid->CheckerBoardFromOindex(osite); - - SiteHalfSpinor tmp = this->spinproject(in); // spin projected - - ////////////////////////////////////////////////////////////// - // Check whether we must flavour flip - // do this if source is plane 0 on processor 0 in dimension dim - ////////////////////////////////////////////////////////////// - int do_flip = 0; - int flipicoor= 0; - if(Gbcs[this->mu]){ - - std::cout << "Applying Gparity BC's in direction "<mu<mu==Xp)||(this->mu==Yp)||(this->mu==Zp)||(this->mu==Tp) ) { - if ( (grid->_processor_coor[dim] == 0) - && (plane==0) - && ( (!checkered)||(ocb==0) ) ) { - do_flip=1; - flipicoor=0; - } - } - if ( (this->mu==Xm)||(this->mu==Ym)||(this->mu==Zm)||(this->mu==Tm) ) { - if ( (grid->_processor_coor[dim] == (grid->_processors[dim]-1) ) - && (plane==grid->_rdimensions[dim]-1) - && ( (!checkered)||(ocb==1) ) ) { - do_flip=1; - flipicoor=grid->_simd_layout[dim]-1; - } - } - } - if ( do_flip ) { - - std::cout << "Applying Gparity BC's in direction "<mu<< " osite " << osite << " plane "< flat(Nsimd); - std::vector coor; - - extract(tmp,flat); - for(int i=0;iiCoorFromIindex(coor,i); - if ( coor[dim]==flipicoor ) { - scalar_object stmp; - stmp(0) = flat[i](1); - stmp(1) = flat[i](0); - flat[i] = stmp; - } - } - merge(tmp,flat); - - } - - return tmp; - } - - }; - - */ }} // namespace close #endif diff --git a/lib/qcd/action/fermion/WilsonKernels.cc b/lib/qcd/action/fermion/WilsonKernels.cc index d098d138..77f68e4d 100644 --- a/lib/qcd/action/fermion/WilsonKernels.cc +++ b/lib/qcd/action/fermion/WilsonKernels.cc @@ -24,7 +24,7 @@ void WilsonKernels::DiracOptDhopSite(CartesianStencil &st,DoubledGaugeFiel } else { chi=buf[SE->_offset]; } - Impl::multLink(Uchi,U._odata[sU],chi,Xp,SE); + Impl::multLink(Uchi,U._odata[sU],chi,Xp,SE,st); spReconXp(result,Uchi); // Yp @@ -37,7 +37,7 @@ void WilsonKernels::DiracOptDhopSite(CartesianStencil &st,DoubledGaugeFiel } else { chi=buf[SE->_offset]; } - Impl::multLink(Uchi,U._odata[sU],chi,Yp,SE); + Impl::multLink(Uchi,U._odata[sU],chi,Yp,SE,st); accumReconYp(result,Uchi); // Zp @@ -50,7 +50,7 @@ void WilsonKernels::DiracOptDhopSite(CartesianStencil &st,DoubledGaugeFiel } else { chi=buf[SE->_offset]; } - Impl::multLink(Uchi,U._odata[sU],chi,Zp,SE); + Impl::multLink(Uchi,U._odata[sU],chi,Zp,SE,st); accumReconZp(result,Uchi); // Tp @@ -63,7 +63,7 @@ void WilsonKernels::DiracOptDhopSite(CartesianStencil &st,DoubledGaugeFiel } else { chi=buf[SE->_offset]; } - Impl::multLink(Uchi,U._odata[sU],chi,Tp,SE); + Impl::multLink(Uchi,U._odata[sU],chi,Tp,SE,st); accumReconTp(result,Uchi); // Xm @@ -76,7 +76,7 @@ void WilsonKernels::DiracOptDhopSite(CartesianStencil &st,DoubledGaugeFiel } else { chi=buf[SE->_offset]; } - Impl::multLink(Uchi,U._odata[sU],chi,Xm,SE); + Impl::multLink(Uchi,U._odata[sU],chi,Xm,SE,st); accumReconXm(result,Uchi); // Ym @@ -89,7 +89,7 @@ void WilsonKernels::DiracOptDhopSite(CartesianStencil &st,DoubledGaugeFiel } else { chi=buf[SE->_offset]; } - Impl::multLink(Uchi,U._odata[sU],chi,Ym,SE); + Impl::multLink(Uchi,U._odata[sU],chi,Ym,SE,st); accumReconYm(result,Uchi); // Zm @@ -102,7 +102,7 @@ void WilsonKernels::DiracOptDhopSite(CartesianStencil &st,DoubledGaugeFiel } else { chi=buf[SE->_offset]; } - Impl::multLink(Uchi,U._odata[sU],chi,Zm,SE); + Impl::multLink(Uchi,U._odata[sU],chi,Zm,SE,st); accumReconZm(result,Uchi); // Tm @@ -115,7 +115,7 @@ void WilsonKernels::DiracOptDhopSite(CartesianStencil &st,DoubledGaugeFiel } else { chi=buf[SE->_offset]; } - Impl::multLink(Uchi,U._odata[sU],chi,Tm,SE); + Impl::multLink(Uchi,U._odata[sU],chi,Tm,SE,st); accumReconTm(result,Uchi); vstream(out._odata[sF],result*(-0.5)); @@ -143,7 +143,7 @@ void WilsonKernels::DiracOptDhopSiteDag(CartesianStencil &st,DoubledGaugeF } else { chi=buf[SE->_offset]; } - Impl::multLink(Uchi,U._odata[sU],chi,Xm,SE); + Impl::multLink(Uchi,U._odata[sU],chi,Xm,SE,st); spReconXp(result,Uchi); // Yp @@ -156,7 +156,7 @@ void WilsonKernels::DiracOptDhopSiteDag(CartesianStencil &st,DoubledGaugeF } else { chi=buf[SE->_offset]; } - Impl::multLink(Uchi,U._odata[sU],chi,Ym,SE); + Impl::multLink(Uchi,U._odata[sU],chi,Ym,SE,st); accumReconYp(result,Uchi); // Zp @@ -169,7 +169,7 @@ void WilsonKernels::DiracOptDhopSiteDag(CartesianStencil &st,DoubledGaugeF } else { chi=buf[SE->_offset]; } - Impl::multLink(Uchi,U._odata[sU],chi,Zm,SE); + Impl::multLink(Uchi,U._odata[sU],chi,Zm,SE,st); accumReconZp(result,Uchi); // Tp @@ -182,7 +182,7 @@ void WilsonKernels::DiracOptDhopSiteDag(CartesianStencil &st,DoubledGaugeF } else { chi=buf[SE->_offset]; } - Impl::multLink(Uchi,U._odata[sU],chi,Tm,SE); + Impl::multLink(Uchi,U._odata[sU],chi,Tm,SE,st); accumReconTp(result,Uchi); // Xm @@ -195,7 +195,7 @@ void WilsonKernels::DiracOptDhopSiteDag(CartesianStencil &st,DoubledGaugeF } else { chi=buf[SE->_offset]; } - Impl::multLink(Uchi,U._odata[sU],chi,Xp,SE); + Impl::multLink(Uchi,U._odata[sU],chi,Xp,SE,st); accumReconXm(result,Uchi); // Ym @@ -208,7 +208,7 @@ void WilsonKernels::DiracOptDhopSiteDag(CartesianStencil &st,DoubledGaugeF } else { chi=buf[SE->_offset]; } - Impl::multLink(Uchi,U._odata[sU],chi,Yp,SE); + Impl::multLink(Uchi,U._odata[sU],chi,Yp,SE,st); accumReconYm(result,Uchi); // Zm @@ -221,7 +221,7 @@ void WilsonKernels::DiracOptDhopSiteDag(CartesianStencil &st,DoubledGaugeF } else { chi=buf[SE->_offset]; } - Impl::multLink(Uchi,U._odata[sU],chi,Zp,SE); + Impl::multLink(Uchi,U._odata[sU],chi,Zp,SE,st); accumReconZm(result,Uchi); // Tm @@ -234,7 +234,7 @@ void WilsonKernels::DiracOptDhopSiteDag(CartesianStencil &st,DoubledGaugeF } else { chi=buf[SE->_offset]; } - Impl::multLink(Uchi,U._odata[sU],chi,Tp,SE); + Impl::multLink(Uchi,U._odata[sU],chi,Tp,SE,st); accumReconTm(result,Uchi); vstream(out._odata[sF],result*(-0.5)); @@ -264,7 +264,7 @@ void WilsonKernels::DiracOptDhopDir(CartesianStencil &st,DoubledGaugeField } else { chi=buf[SE->_offset]; } - Impl::multLink(Uchi,U._odata[sU],chi,dir,SE); + Impl::multLink(Uchi,U._odata[sU],chi,dir,SE,st); spReconXp(result,Uchi); } @@ -278,7 +278,7 @@ void WilsonKernels::DiracOptDhopDir(CartesianStencil &st,DoubledGaugeField } else { chi=buf[SE->_offset]; } - Impl::multLink(Uchi,U._odata[sU],chi,dir,SE); + Impl::multLink(Uchi,U._odata[sU],chi,dir,SE,st); spReconYp(result,Uchi); } @@ -292,7 +292,7 @@ void WilsonKernels::DiracOptDhopDir(CartesianStencil &st,DoubledGaugeField } else { chi=buf[SE->_offset]; } - Impl::multLink(Uchi,U._odata[sU],chi,dir,SE); + Impl::multLink(Uchi,U._odata[sU],chi,dir,SE,st); spReconZp(result,Uchi); } @@ -306,7 +306,7 @@ void WilsonKernels::DiracOptDhopDir(CartesianStencil &st,DoubledGaugeField } else { chi=buf[SE->_offset]; } - Impl::multLink(Uchi,U._odata[sU],chi,dir,SE); + Impl::multLink(Uchi,U._odata[sU],chi,dir,SE,st); spReconTp(result,Uchi); } @@ -320,7 +320,7 @@ void WilsonKernels::DiracOptDhopDir(CartesianStencil &st,DoubledGaugeField } else { chi=buf[SE->_offset]; } - Impl::multLink(Uchi,U._odata[sU],chi,dir,SE); + Impl::multLink(Uchi,U._odata[sU],chi,dir,SE,st); spReconXm(result,Uchi); } @@ -334,7 +334,7 @@ void WilsonKernels::DiracOptDhopDir(CartesianStencil &st,DoubledGaugeField } else { chi=buf[SE->_offset]; } - Impl::multLink(Uchi,U._odata[sU],chi,dir,SE); + Impl::multLink(Uchi,U._odata[sU],chi,dir,SE,st); spReconYm(result,Uchi); } @@ -348,7 +348,7 @@ void WilsonKernels::DiracOptDhopDir(CartesianStencil &st,DoubledGaugeField } else { chi=buf[SE->_offset]; } - Impl::multLink(Uchi,U._odata[sU],chi,dir,SE); + Impl::multLink(Uchi,U._odata[sU],chi,dir,SE,st); spReconZm(result,Uchi); } @@ -362,7 +362,7 @@ void WilsonKernels::DiracOptDhopDir(CartesianStencil &st,DoubledGaugeField } else { chi=buf[SE->_offset]; } - Impl::multLink(Uchi,U._odata[sU],chi,dir,SE); + Impl::multLink(Uchi,U._odata[sU],chi,dir,SE,st); spReconTm(result,Uchi); } diff --git a/tests/Test_gparity.cc b/tests/Test_gparity.cc index 52bf5e27..271c8548 100644 --- a/tests/Test_gparity.cc +++ b/tests/Test_gparity.cc @@ -44,7 +44,7 @@ void Replicate(Lattice &coarse,Lattice & fine) int main (int argc, char ** argv) { - const int nu = 0; + const int nu = 3; Grid_init(&argc,&argv);