mirror of
https://github.com/paboyle/Grid.git
synced 2025-06-11 03:46:55 +01:00
Merge remote-tracking branch 'origin/develop' into temporary-smearing
This commit is contained in:
@ -90,7 +90,7 @@ namespace QCD {
|
||||
template<typename vtype> using iHalfSpinVector = iScalar<iVector<iScalar<vtype>, Nhs> >;
|
||||
template<typename vtype> using iHalfSpinColourVector = iScalar<iVector<iVector<vtype, Nc>, Nhs> >;
|
||||
|
||||
template<typename vtype> using iGparitySpinColourVector = iVector<iVector<iVector<vtype, Nc>, Nhs>, Ngp >;
|
||||
template<typename vtype> using iGparitySpinColourVector = iVector<iVector<iVector<vtype, Nc>, Ns>, Ngp >;
|
||||
template<typename vtype> using iGparityHalfSpinColourVector = iVector<iVector<iVector<vtype, Nc>, Nhs>, Ngp >;
|
||||
|
||||
// Spin matrix
|
||||
@ -383,7 +383,6 @@ namespace QCD {
|
||||
//////////////////////////////////////////////
|
||||
// Poke scalars
|
||||
//////////////////////////////////////////////
|
||||
|
||||
template<class vobj> void pokeSpin(vobj &lhs,const decltype(peekIndex<SpinIndex>(lhs,0)) & rhs,int i)
|
||||
{
|
||||
pokeIndex<SpinIndex>(lhs,rhs,i);
|
||||
@ -407,7 +406,41 @@ namespace QCD {
|
||||
pokeIndex<LorentzIndex>(lhs,rhs,i);
|
||||
}
|
||||
|
||||
|
||||
//////////////////////////////////////////////
|
||||
// Fermion <-> propagator assignements
|
||||
//////////////////////////////////////////////
|
||||
template <class Prop, class Ferm>
|
||||
void FermToProp(Prop &p, const Ferm &f, const int s, const int c)
|
||||
{
|
||||
for(int j = 0; j < Ns; ++j)
|
||||
{
|
||||
auto pjs = peekSpin(p, j, s);
|
||||
auto fj = peekSpin(f, j);
|
||||
|
||||
for(int i = 0; i < Nc; ++i)
|
||||
{
|
||||
pokeColour(pjs, peekColour(fj, i), i, c);
|
||||
}
|
||||
pokeSpin(p, pjs, j, s);
|
||||
}
|
||||
}
|
||||
|
||||
template <class Prop, class Ferm>
|
||||
void PropToFerm(Ferm &f, const Prop &p, const int s, const int c)
|
||||
{
|
||||
for(int j = 0; j < Ns; ++j)
|
||||
{
|
||||
auto pjs = peekSpin(p, j, s);
|
||||
auto fj = peekSpin(f, j);
|
||||
|
||||
for(int i = 0; i < Nc; ++i)
|
||||
{
|
||||
pokeColour(fj, peekColour(pjs, i, c), i);
|
||||
}
|
||||
pokeSpin(f, fj, j);
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////
|
||||
// transpose array and scalar
|
||||
//////////////////////////////////////////////
|
||||
|
@ -109,10 +109,12 @@ typedef SymanzikGaugeAction<ConjugateGimplD> ConjugateSymanzikGaugeAction
|
||||
|
||||
#define FermOpTemplateInstantiate(A) \
|
||||
template class A<WilsonImplF>; \
|
||||
template class A<WilsonImplD>; \
|
||||
template class A<WilsonImplD>; \
|
||||
template class A<GparityWilsonImplF>; \
|
||||
template class A<GparityWilsonImplD>;
|
||||
|
||||
#define GparityFermOpTemplateInstantiate(A)
|
||||
|
||||
////////////////////////////////////////////
|
||||
// Fermion operators / actions
|
||||
////////////////////////////////////////////
|
||||
@ -208,6 +210,14 @@ typedef DomainWallFermion<GparityWilsonImplR> GparityDomainWallFermionR;
|
||||
typedef DomainWallFermion<GparityWilsonImplF> GparityDomainWallFermionF;
|
||||
typedef DomainWallFermion<GparityWilsonImplD> GparityDomainWallFermionD;
|
||||
|
||||
typedef WilsonTMFermion<GparityWilsonImplR> GparityWilsonTMFermionR;
|
||||
typedef WilsonTMFermion<GparityWilsonImplF> GparityWilsonTMFermionF;
|
||||
typedef WilsonTMFermion<GparityWilsonImplD> GparityWilsonTMFermionD;
|
||||
typedef MobiusFermion<GparityWilsonImplR> GparityMobiusFermionR;
|
||||
typedef MobiusFermion<GparityWilsonImplF> GparityMobiusFermionF;
|
||||
typedef MobiusFermion<GparityWilsonImplD> GparityMobiusFermionD;
|
||||
|
||||
|
||||
}}
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// G5 herm -- this has to live in QCD since dirac matrix is not in the broader sector of code
|
||||
|
@ -527,6 +527,7 @@ namespace QCD {
|
||||
}
|
||||
|
||||
FermOpTemplateInstantiate(CayleyFermion5D);
|
||||
GparityFermOpTemplateInstantiate(CayleyFermion5D);
|
||||
|
||||
}}
|
||||
|
||||
|
@ -130,7 +130,7 @@ namespace Grid {
|
||||
|
||||
typedef WilsonCompressor<SiteHalfSpinor,SiteSpinor> Compressor;
|
||||
typedef WilsonImplParams ImplParams;
|
||||
typedef CartesianStencil<SiteSpinor,SiteHalfSpinor,Compressor> StencilImpl;
|
||||
typedef WilsonStencil<SiteSpinor,SiteHalfSpinor> StencilImpl;
|
||||
|
||||
ImplParams Params;
|
||||
|
||||
@ -142,6 +142,10 @@ namespace Grid {
|
||||
mult(&phi(),&U(mu),&chi());
|
||||
}
|
||||
|
||||
template<class ref>
|
||||
inline void loadLinkElement(Simd & reg,ref &memory){
|
||||
reg = memory;
|
||||
}
|
||||
inline void DoubleStore(GridBase *GaugeGrid,DoubledGaugeField &Uds,const GaugeField &Umu)
|
||||
{
|
||||
conformable(Uds._grid,GaugeGrid);
|
||||
@ -181,6 +185,100 @@ PARALLEL_FOR_LOOP
|
||||
|
||||
};
|
||||
|
||||
|
||||
|
||||
///////
|
||||
// Single flavour four spinors with colour index, 5d redblack
|
||||
///////
|
||||
template<class S,int Nrepresentation=Nc>
|
||||
class DomainWallRedBlack5dImpl : public PeriodicGaugeImpl< GaugeImplTypes< S,Nrepresentation> > {
|
||||
public:
|
||||
|
||||
typedef PeriodicGaugeImpl< GaugeImplTypes< S,Nrepresentation> > Gimpl;
|
||||
|
||||
INHERIT_GIMPL_TYPES(Gimpl);
|
||||
|
||||
template<typename vtype> using iImplSpinor = iScalar<iVector<iVector<vtype, Nrepresentation>, Ns> >;
|
||||
template<typename vtype> using iImplHalfSpinor = iScalar<iVector<iVector<vtype, Nrepresentation>, Nhs> >;
|
||||
template<typename vtype> using iImplDoubledGaugeField = iVector<iScalar<iMatrix<vtype, Nrepresentation> >, Nds >;
|
||||
template<typename vtype> using iImplGaugeField = iVector<iScalar<iMatrix<vtype, Nrepresentation> >, Nd >;
|
||||
template<typename vtype> using iImplGaugeLink = iScalar<iScalar<iMatrix<vtype, Nrepresentation> > >;
|
||||
|
||||
typedef iImplSpinor <Simd> SiteSpinor;
|
||||
typedef iImplHalfSpinor<Simd> SiteHalfSpinor;
|
||||
typedef Lattice<SiteSpinor> FermionField;
|
||||
|
||||
// Make the doubled gauge field a *scalar*
|
||||
typedef iImplDoubledGaugeField<typename Simd::scalar_type> SiteDoubledGaugeField; // This is a scalar
|
||||
typedef iImplGaugeField<typename Simd::scalar_type> SiteScalarGaugeField; // scalar
|
||||
typedef iImplGaugeLink <typename Simd::scalar_type> SiteScalarGaugeLink; // scalar
|
||||
|
||||
typedef Lattice<SiteDoubledGaugeField> DoubledGaugeField;
|
||||
|
||||
typedef WilsonCompressor<SiteHalfSpinor,SiteSpinor> Compressor;
|
||||
typedef WilsonImplParams ImplParams;
|
||||
typedef WilsonStencil<SiteSpinor,SiteHalfSpinor> StencilImpl;
|
||||
|
||||
ImplParams Params;
|
||||
|
||||
DomainWallRedBlack5dImpl(const ImplParams &p= ImplParams()) : Params(p) {};
|
||||
|
||||
bool overlapCommsCompute(void) { return false; };
|
||||
|
||||
template<class ref>
|
||||
inline void loadLinkElement(Simd & reg,ref &memory){
|
||||
vsplat(reg,memory);
|
||||
}
|
||||
inline void multLink(SiteHalfSpinor &phi,const SiteDoubledGaugeField &U,const SiteHalfSpinor &chi,int mu,StencilEntry *SE,StencilImpl &St)
|
||||
{
|
||||
SiteGaugeLink UU;
|
||||
for(int i=0;i<Nrepresentation;i++){
|
||||
for(int j=0;j<Nrepresentation;j++){
|
||||
vsplat(UU()()(i,j),U(mu)()(i,j));
|
||||
}
|
||||
}
|
||||
mult(&phi(),&UU(),&chi());
|
||||
}
|
||||
|
||||
inline void DoubleStore(GridBase *GaugeGrid,DoubledGaugeField &Uds,const GaugeField &Umu)
|
||||
{
|
||||
SiteScalarGaugeField ScalarUmu;
|
||||
SiteDoubledGaugeField ScalarUds;
|
||||
|
||||
GaugeLinkField U (Umu._grid);
|
||||
GaugeField Uadj(Umu._grid);
|
||||
for(int mu=0;mu<Nd;mu++){
|
||||
U = PeekIndex<LorentzIndex>(Umu,mu);
|
||||
U = adj(Cshift(U,mu,-1));
|
||||
PokeIndex<LorentzIndex>(Uadj,U,mu);
|
||||
}
|
||||
|
||||
for(int lidx=0;lidx<GaugeGrid->lSites();lidx++){
|
||||
std::vector<int> lcoor;
|
||||
GaugeGrid->LocalIndexToLocalCoor(lidx,lcoor);
|
||||
|
||||
peekLocalSite(ScalarUmu,Umu,lcoor);
|
||||
for(int mu=0;mu<4;mu++) ScalarUds(mu) = ScalarUmu(mu);
|
||||
|
||||
peekLocalSite(ScalarUmu,Uadj,lcoor);
|
||||
for(int mu=0;mu<4;mu++) ScalarUds(mu+4) = ScalarUmu(mu);
|
||||
|
||||
pokeLocalSite(ScalarUds,Uds,lcoor);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
inline void InsertForce4D(GaugeField &mat, FermionField &Btilde, FermionField &A,int mu){
|
||||
assert(0);
|
||||
}
|
||||
|
||||
inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField Ã,int mu){
|
||||
assert(0);
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Flavour doubled spinors; is Gparity the only? what about C*?
|
||||
////////////////////////////////////////////////////////////////////////////////////////
|
||||
@ -205,7 +303,7 @@ PARALLEL_FOR_LOOP
|
||||
typedef Lattice<SiteDoubledGaugeField> DoubledGaugeField;
|
||||
|
||||
typedef WilsonCompressor<SiteHalfSpinor,SiteSpinor> Compressor;
|
||||
typedef CartesianStencil<SiteSpinor,SiteHalfSpinor,Compressor> StencilImpl;
|
||||
typedef WilsonStencil<SiteSpinor,SiteHalfSpinor> StencilImpl;
|
||||
|
||||
typedef GparityWilsonImplParams ImplParams;
|
||||
|
||||
@ -290,8 +388,8 @@ PARALLEL_FOR_LOOP
|
||||
conformable(Uds._grid,GaugeGrid);
|
||||
conformable(Umu._grid,GaugeGrid);
|
||||
|
||||
GaugeLinkField Utmp(GaugeGrid);
|
||||
GaugeLinkField U(GaugeGrid);
|
||||
GaugeLinkField Utmp (GaugeGrid);
|
||||
GaugeLinkField U (GaugeGrid);
|
||||
GaugeLinkField Uconj(GaugeGrid);
|
||||
|
||||
Lattice<iScalar<vInteger> > coor(GaugeGrid);
|
||||
@ -379,6 +477,10 @@ PARALLEL_FOR_LOOP
|
||||
typedef WilsonImpl<vComplexF,Nc> WilsonImplF; // Float
|
||||
typedef WilsonImpl<vComplexD,Nc> WilsonImplD; // Double
|
||||
|
||||
typedef DomainWallRedBlack5dImpl<vComplex ,Nc> DomainWallRedBlack5dImplR; // Real.. whichever prec
|
||||
typedef DomainWallRedBlack5dImpl<vComplexF,Nc> DomainWallRedBlack5dImplF; // Float
|
||||
typedef DomainWallRedBlack5dImpl<vComplexD,Nc> DomainWallRedBlack5dImplD; // Double
|
||||
|
||||
typedef GparityWilsonImpl<vComplex ,Nc> GparityWilsonImplR; // Real.. whichever prec
|
||||
typedef GparityWilsonImpl<vComplexF,Nc> GparityWilsonImplF; // Float
|
||||
typedef GparityWilsonImpl<vComplexD,Nc> GparityWilsonImplD; // Double
|
||||
|
@ -48,14 +48,16 @@ namespace Grid {
|
||||
GridCartesian &FourDimGrid,
|
||||
GridRedBlackCartesian &FourDimRedBlackGrid,
|
||||
RealD _mass,RealD _M5,
|
||||
RealD scale) :
|
||||
// RealD scale):
|
||||
RealD scale,const ImplParams &p= ImplParams()) :
|
||||
|
||||
// b+c=scale, b-c = 1 <=> 2b = scale+1; 2c = scale-1
|
||||
MobiusFermion<Impl>(_Umu,
|
||||
FiveDimGrid,
|
||||
FiveDimRedBlackGrid,
|
||||
FourDimGrid,
|
||||
FourDimRedBlackGrid,_mass,_M5,0.5*(scale+1.0),0.5*(scale-1.0))
|
||||
FourDimRedBlackGrid,_mass,_M5,0.5*(scale+1.0),0.5*(scale-1.0),p)
|
||||
// FourDimRedBlackGrid,_mass,_M5,0.5*(scale+1.0),0.5*(scale-1.0))
|
||||
{
|
||||
}
|
||||
|
||||
|
@ -48,12 +48,7 @@ namespace QCD {
|
||||
mu=p;
|
||||
};
|
||||
|
||||
virtual SiteHalfSpinor operator () (const SiteSpinor &in,int dim,int plane,int osite,GridBase *grid) {
|
||||
return spinproject(in);
|
||||
}
|
||||
|
||||
SiteHalfSpinor spinproject(const SiteSpinor &in)
|
||||
{
|
||||
inline SiteHalfSpinor operator () (const SiteSpinor &in) {
|
||||
SiteHalfSpinor ret;
|
||||
int mudag=mu;
|
||||
if (!dag) {
|
||||
@ -92,6 +87,173 @@ namespace QCD {
|
||||
}
|
||||
};
|
||||
|
||||
/////////////////////////
|
||||
// optimised versions
|
||||
/////////////////////////
|
||||
|
||||
template<class SiteHalfSpinor,class SiteSpinor>
|
||||
class WilsonXpCompressor {
|
||||
public:
|
||||
inline SiteHalfSpinor operator () (const SiteSpinor &in) {
|
||||
SiteHalfSpinor ret;
|
||||
spProjXp(ret,in);
|
||||
return ret;
|
||||
}
|
||||
};
|
||||
template<class SiteHalfSpinor,class SiteSpinor>
|
||||
class WilsonYpCompressor {
|
||||
public:
|
||||
inline SiteHalfSpinor operator () (const SiteSpinor &in) {
|
||||
SiteHalfSpinor ret;
|
||||
spProjYp(ret,in);
|
||||
return ret;
|
||||
}
|
||||
};
|
||||
template<class SiteHalfSpinor,class SiteSpinor>
|
||||
class WilsonZpCompressor {
|
||||
public:
|
||||
inline SiteHalfSpinor operator () (const SiteSpinor &in) {
|
||||
SiteHalfSpinor ret;
|
||||
spProjZp(ret,in);
|
||||
return ret;
|
||||
}
|
||||
};
|
||||
template<class SiteHalfSpinor,class SiteSpinor>
|
||||
class WilsonTpCompressor {
|
||||
public:
|
||||
inline SiteHalfSpinor operator () (const SiteSpinor &in) {
|
||||
SiteHalfSpinor ret;
|
||||
spProjTp(ret,in);
|
||||
return ret;
|
||||
}
|
||||
};
|
||||
|
||||
template<class SiteHalfSpinor,class SiteSpinor>
|
||||
class WilsonXmCompressor {
|
||||
public:
|
||||
inline SiteHalfSpinor operator () (const SiteSpinor &in) {
|
||||
SiteHalfSpinor ret;
|
||||
spProjXm(ret,in);
|
||||
return ret;
|
||||
}
|
||||
};
|
||||
template<class SiteHalfSpinor,class SiteSpinor>
|
||||
class WilsonYmCompressor {
|
||||
public:
|
||||
inline SiteHalfSpinor operator () (const SiteSpinor &in) {
|
||||
SiteHalfSpinor ret;
|
||||
spProjYm(ret,in);
|
||||
return ret;
|
||||
}
|
||||
};
|
||||
template<class SiteHalfSpinor,class SiteSpinor>
|
||||
class WilsonZmCompressor {
|
||||
public:
|
||||
inline SiteHalfSpinor operator () (const SiteSpinor &in) {
|
||||
SiteHalfSpinor ret;
|
||||
spProjZm(ret,in);
|
||||
return ret;
|
||||
}
|
||||
};
|
||||
template<class SiteHalfSpinor,class SiteSpinor>
|
||||
class WilsonTmCompressor {
|
||||
public:
|
||||
inline SiteHalfSpinor operator () (const SiteSpinor &in) {
|
||||
SiteHalfSpinor ret;
|
||||
spProjTm(ret,in);
|
||||
return ret;
|
||||
}
|
||||
};
|
||||
|
||||
// Fast comms buffer manipulation which should inline right through (avoid direction
|
||||
// dependent logic that prevents inlining
|
||||
template<class vobj,class cobj>
|
||||
class WilsonStencil : public CartesianStencil<vobj,cobj> {
|
||||
public:
|
||||
|
||||
WilsonStencil(GridBase *grid,
|
||||
int npoints,
|
||||
int checkerboard,
|
||||
const std::vector<int> &directions,
|
||||
const std::vector<int> &distances) : CartesianStencil<vobj,cobj> (grid,npoints,checkerboard,directions,distances)
|
||||
{ };
|
||||
|
||||
template < class compressor>
|
||||
std::thread HaloExchangeOptBegin(const Lattice<vobj> &source,compressor &compress) {
|
||||
this->Mergers.resize(0);
|
||||
this->Packets.resize(0);
|
||||
this->HaloGatherOpt(source,compress);
|
||||
return std::thread([&] { this->Communicate(); });
|
||||
}
|
||||
|
||||
template < class compressor>
|
||||
void HaloExchangeOpt(const Lattice<vobj> &source,compressor &compress)
|
||||
{
|
||||
auto thr = this->HaloExchangeOptBegin(source,compress);
|
||||
this->HaloExchangeOptComplete(thr);
|
||||
}
|
||||
|
||||
void HaloExchangeOptComplete(std::thread &thr)
|
||||
{
|
||||
this->CommsMerge(); // spins
|
||||
this->jointime-=usecond();
|
||||
thr.join();
|
||||
this->jointime+=usecond();
|
||||
}
|
||||
|
||||
template < class compressor>
|
||||
void HaloGatherOpt(const Lattice<vobj> &source,compressor &compress)
|
||||
{
|
||||
// conformable(source._grid,_grid);
|
||||
assert(source._grid==this->_grid);
|
||||
this->halogtime-=usecond();
|
||||
|
||||
assert (this->comm_buf.size() == this->_unified_buffer_size );
|
||||
this->u_comm_offset=0;
|
||||
|
||||
int dag = compress.dag;
|
||||
static std::vector<int> dirs(Nd*2);
|
||||
for(int mu=0;mu<Nd;mu++){
|
||||
if ( dag ) {
|
||||
dirs[mu] =mu;
|
||||
dirs[mu+4]=mu+Nd;
|
||||
} else {
|
||||
dirs[mu] =mu+Nd;
|
||||
dirs[mu+Nd]=mu;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
WilsonXpCompressor<cobj,vobj> XpCompress;
|
||||
this->HaloGatherDir(source,XpCompress,dirs[0]);
|
||||
|
||||
WilsonYpCompressor<cobj,vobj> YpCompress;
|
||||
this->HaloGatherDir(source,YpCompress,dirs[1]);
|
||||
|
||||
WilsonZpCompressor<cobj,vobj> ZpCompress;
|
||||
this->HaloGatherDir(source,ZpCompress,dirs[2]);
|
||||
|
||||
WilsonTpCompressor<cobj,vobj> TpCompress;
|
||||
this->HaloGatherDir(source,TpCompress,dirs[3]);
|
||||
|
||||
WilsonXmCompressor<cobj,vobj> XmCompress;
|
||||
this->HaloGatherDir(source,XmCompress,dirs[4]);
|
||||
|
||||
WilsonYmCompressor<cobj,vobj> YmCompress;
|
||||
this->HaloGatherDir(source,YmCompress,dirs[5]);
|
||||
|
||||
WilsonZmCompressor<cobj,vobj> ZmCompress;
|
||||
this->HaloGatherDir(source,ZmCompress,dirs[6]);
|
||||
|
||||
WilsonTmCompressor<cobj,vobj> TmCompress;
|
||||
this->HaloGatherDir(source,TmCompress,dirs[7]);
|
||||
|
||||
assert(this->u_comm_offset==this->_unified_buffer_size);
|
||||
this->halogtime+=usecond();
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
|
||||
}} // namespace close
|
||||
#endif
|
||||
|
@ -64,7 +64,9 @@ namespace QCD {
|
||||
template<class Impl>
|
||||
void WilsonFermion<Impl>::ImportGauge(const GaugeField &_Umu)
|
||||
{
|
||||
Impl::DoubleStore(GaugeGrid(),Umu,_Umu);
|
||||
GaugeField HUmu(_Umu._grid);
|
||||
HUmu = _Umu*(-0.5);
|
||||
Impl::DoubleStore(GaugeGrid(),Umu,HUmu);
|
||||
pickCheckerboard(Even,UmuEven,Umu);
|
||||
pickCheckerboard(Odd ,UmuOdd,Umu);
|
||||
}
|
||||
@ -286,121 +288,27 @@ PARALLEL_FOR_LOOP
|
||||
void WilsonFermion<Impl>::DhopInternal(StencilImpl & st,DoubledGaugeField & U,
|
||||
const FermionField &in, FermionField &out,int dag)
|
||||
{
|
||||
if ( Impl::overlapCommsCompute () ) {
|
||||
DhopInternalCommsOverlapCompute(st,U,in,out,dag);
|
||||
} else {
|
||||
DhopInternalCommsThenCompute(st,U,in,out,dag);
|
||||
}
|
||||
}
|
||||
template<class Impl>
|
||||
void WilsonFermion<Impl>::DhopInternalCommsThenCompute(StencilImpl & st,DoubledGaugeField & U,
|
||||
const FermionField &in, FermionField &out,int dag) {
|
||||
|
||||
assert((dag==DaggerNo) ||(dag==DaggerYes));
|
||||
|
||||
Compressor compressor(dag);
|
||||
st.HaloExchange(in,compressor);
|
||||
|
||||
if ( dag == DaggerYes ) {
|
||||
if( HandOptDslash ) {
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int sss=0;sss<in._grid->oSites();sss++){
|
||||
Kernels::DiracOptHandDhopSiteDag(st,U,st.comm_buf,sss,sss,in,out);
|
||||
}
|
||||
} else {
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int sss=0;sss<in._grid->oSites();sss++){
|
||||
Kernels::DiracOptDhopSiteDag(st,U,st.comm_buf,sss,sss,in,out);
|
||||
}
|
||||
for(int sss=0;sss<in._grid->oSites();sss++){
|
||||
Kernels::DiracOptDhopSiteDag(st,U,st.comm_buf,sss,sss,1,1,in,out);
|
||||
}
|
||||
} else {
|
||||
if( HandOptDslash ) {
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int sss=0;sss<in._grid->oSites();sss++){
|
||||
Kernels::DiracOptHandDhopSite(st,U,st.comm_buf,sss,sss,in,out);
|
||||
}
|
||||
} else {
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int sss=0;sss<in._grid->oSites();sss++){
|
||||
Kernels::DiracOptDhopSite(st,U,st.comm_buf,sss,sss,in,out);
|
||||
}
|
||||
for(int sss=0;sss<in._grid->oSites();sss++){
|
||||
Kernels::DiracOptDhopSite(st,U,st.comm_buf,sss,sss,1,1,in,out);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
template<class Impl>
|
||||
void WilsonFermion<Impl>::DhopInternalCommsOverlapCompute(StencilImpl & st,DoubledGaugeField & U,
|
||||
const FermionField &in, FermionField &out,int dag) {
|
||||
|
||||
assert((dag==DaggerNo) ||(dag==DaggerYes));
|
||||
|
||||
Compressor compressor(dag);
|
||||
|
||||
auto handle = st.HaloExchangeBegin(in,compressor);
|
||||
|
||||
bool local = true;
|
||||
bool nonlocal = false;
|
||||
if ( dag == DaggerYes ) {
|
||||
if( HandOptDslash ) {
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int sss=0;sss<in._grid->oSites();sss++){
|
||||
Kernels::DiracOptHandDhopSiteDag(st,U,st.comm_buf,sss,sss,in,out,local,nonlocal);
|
||||
}
|
||||
} else {
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int sss=0;sss<in._grid->oSites();sss++){
|
||||
Kernels::DiracOptDhopSiteDag(st,U,st.comm_buf,sss,sss,in,out,local,nonlocal);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if( HandOptDslash ) {
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int sss=0;sss<in._grid->oSites();sss++){
|
||||
Kernels::DiracOptHandDhopSite(st,U,st.comm_buf,sss,sss,in,out,local,nonlocal);
|
||||
}
|
||||
} else {
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int sss=0;sss<in._grid->oSites();sss++){
|
||||
Kernels::DiracOptDhopSite(st,U,st.comm_buf,sss,sss,in,out,local,nonlocal);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
st.HaloExchangeComplete(handle);
|
||||
|
||||
local = false;
|
||||
nonlocal = true;
|
||||
if ( dag == DaggerYes ) {
|
||||
if( HandOptDslash ) {
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int sss=0;sss<in._grid->oSites();sss++){
|
||||
Kernels::DiracOptHandDhopSiteDag(st,U,st.comm_buf,sss,sss,in,out,local,nonlocal);
|
||||
}
|
||||
} else {
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int sss=0;sss<in._grid->oSites();sss++){
|
||||
Kernels::DiracOptDhopSiteDag(st,U,st.comm_buf,sss,sss,in,out,local,nonlocal);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if( HandOptDslash ) {
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int sss=0;sss<in._grid->oSites();sss++){
|
||||
Kernels::DiracOptHandDhopSite(st,U,st.comm_buf,sss,sss,in,out,local,nonlocal);
|
||||
}
|
||||
} else {
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int sss=0;sss<in._grid->oSites();sss++){
|
||||
Kernels::DiracOptDhopSite(st,U,st.comm_buf,sss,sss,in,out,local,nonlocal);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
|
||||
FermOpTemplateInstantiate(WilsonFermion);
|
||||
GparityFermOpTemplateInstantiate(WilsonFermion);
|
||||
|
||||
|
||||
}}
|
||||
|
@ -114,12 +114,6 @@ namespace Grid {
|
||||
void DhopInternal(StencilImpl & st,DoubledGaugeField & U,
|
||||
const FermionField &in, FermionField &out,int dag) ;
|
||||
|
||||
void DhopInternalCommsThenCompute(StencilImpl & st,DoubledGaugeField & U,
|
||||
const FermionField &in, FermionField &out,int dag) ;
|
||||
void DhopInternalCommsOverlapCompute(StencilImpl & st,DoubledGaugeField & U,
|
||||
const FermionField &in, FermionField &out,int dag) ;
|
||||
|
||||
|
||||
// Constructor
|
||||
WilsonFermion(GaugeField &_Umu,
|
||||
GridCartesian &Fgrid,
|
||||
|
@ -1,4 +1,4 @@
|
||||
/*************************************************************************************
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
@ -38,8 +38,6 @@ namespace QCD {
|
||||
// S-direction is INNERMOST and takes no part in the parity.
|
||||
const std::vector<int> WilsonFermion5DStatic::directions ({1,2,3,4, 1, 2, 3, 4});
|
||||
const std::vector<int> WilsonFermion5DStatic::displacements({1,1,1,1,-1,-1,-1,-1});
|
||||
int WilsonFermion5DStatic::HandOptDslash;
|
||||
int WilsonFermion5DStatic::AsmOptDslash;
|
||||
|
||||
// 5d lattice for DWF.
|
||||
template<class Impl>
|
||||
@ -67,10 +65,8 @@ WilsonFermion5D<Impl>::WilsonFermion5D(GaugeField &_Umu,
|
||||
// some assertions
|
||||
assert(FiveDimGrid._ndimension==5);
|
||||
assert(FourDimGrid._ndimension==4);
|
||||
|
||||
assert(FiveDimRedBlackGrid._ndimension==5);
|
||||
assert(FourDimRedBlackGrid._ndimension==4);
|
||||
|
||||
assert(FiveDimRedBlackGrid._checker_dim==1);
|
||||
|
||||
// Dimension zero of the five-d is the Ls direction
|
||||
@ -99,16 +95,74 @@ WilsonFermion5D<Impl>::WilsonFermion5D(GaugeField &_Umu,
|
||||
|
||||
// Allocate the required comms buffer
|
||||
ImportGauge(_Umu);
|
||||
alltime=0;
|
||||
commtime=0;
|
||||
jointime=0;
|
||||
dslashtime=0;
|
||||
dslash1time=0;
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
WilsonFermion5D<Impl>::WilsonFermion5D(int simd,GaugeField &_Umu,
|
||||
GridCartesian &FiveDimGrid,
|
||||
GridRedBlackCartesian &FiveDimRedBlackGrid,
|
||||
GridCartesian &FourDimGrid,
|
||||
RealD _M5,const ImplParams &p) :
|
||||
Kernels(p),
|
||||
_FiveDimGrid (&FiveDimGrid),
|
||||
_FiveDimRedBlackGrid(&FiveDimRedBlackGrid),
|
||||
_FourDimGrid (&FourDimGrid),
|
||||
Stencil (_FiveDimGrid,npoint,Even,directions,displacements),
|
||||
StencilEven(_FiveDimRedBlackGrid,npoint,Even,directions,displacements), // source is Even
|
||||
StencilOdd (_FiveDimRedBlackGrid,npoint,Odd ,directions,displacements), // source is Odd
|
||||
M5(_M5),
|
||||
Umu(_FourDimGrid),
|
||||
UmuEven(_FourDimGrid),
|
||||
UmuOdd (_FourDimGrid),
|
||||
Lebesgue(_FourDimGrid),
|
||||
LebesgueEvenOdd(_FourDimGrid)
|
||||
{
|
||||
int nsimd = Simd::Nsimd();
|
||||
|
||||
// some assertions
|
||||
assert(FiveDimGrid._ndimension==5);
|
||||
assert(FiveDimRedBlackGrid._ndimension==5);
|
||||
assert(FiveDimRedBlackGrid._checker_dim==0); // Checkerboard the s-direction
|
||||
assert(FourDimGrid._ndimension==4);
|
||||
|
||||
// Dimension zero of the five-d is the Ls direction
|
||||
Ls=FiveDimGrid._fdimensions[0];
|
||||
assert(FiveDimGrid._processors[0] ==1);
|
||||
assert(FiveDimGrid._simd_layout[0] ==nsimd);
|
||||
|
||||
assert(FiveDimRedBlackGrid._fdimensions[0]==Ls);
|
||||
assert(FiveDimRedBlackGrid._processors[0] ==1);
|
||||
assert(FiveDimRedBlackGrid._simd_layout[0]==nsimd);
|
||||
|
||||
// Other dimensions must match the decomposition of the four-D fields
|
||||
for(int d=0;d<4;d++){
|
||||
assert(FiveDimRedBlackGrid._fdimensions[d+1]==FourDimGrid._fdimensions[d]);
|
||||
assert(FiveDimRedBlackGrid._processors[d+1] ==FourDimGrid._processors[d]);
|
||||
|
||||
assert(FourDimGrid._simd_layout[d]=1);
|
||||
assert(FiveDimRedBlackGrid._simd_layout[d+1]==1);
|
||||
|
||||
assert(FiveDimGrid._fdimensions[d+1] ==FourDimGrid._fdimensions[d]);
|
||||
assert(FiveDimGrid._processors[d+1] ==FourDimGrid._processors[d]);
|
||||
assert(FiveDimGrid._simd_layout[d+1] ==FourDimGrid._simd_layout[d]);
|
||||
}
|
||||
|
||||
{
|
||||
GaugeField HUmu(_Umu._grid);
|
||||
HUmu = _Umu*(-0.5);
|
||||
Impl::DoubleStore(GaugeGrid(),Umu,HUmu);
|
||||
UmuEven=Umu;// Really want a reference.
|
||||
UmuOdd =Umu;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template<class Impl>
|
||||
void WilsonFermion5D<Impl>::ImportGauge(const GaugeField &_Umu)
|
||||
{
|
||||
Impl::DoubleStore(GaugeGrid(),Umu,_Umu);
|
||||
GaugeField HUmu(_Umu._grid);
|
||||
HUmu = _Umu*(-0.5);
|
||||
Impl::DoubleStore(GaugeGrid(),Umu,HUmu);
|
||||
pickCheckerboard(Even,UmuEven,Umu);
|
||||
pickCheckerboard(Odd ,UmuOdd,Umu);
|
||||
}
|
||||
@ -232,30 +286,6 @@ void WilsonFermion5D<Impl>::DhopDerivEO(GaugeField &mat,
|
||||
}
|
||||
|
||||
|
||||
template<class Impl>
|
||||
void WilsonFermion5D<Impl>::Report(void)
|
||||
{
|
||||
std::cout<<GridLogMessage << "******************** WilsonFermion"<<std::endl;
|
||||
std::cout<<GridLogMessage << "Wilson5d time "<<alltime <<" us"<<std::endl;
|
||||
std::cout<<GridLogMessage << "HaloBegin time "<<commtime <<" us"<<std::endl;
|
||||
std::cout<<GridLogMessage << "Dslash time "<<dslashtime<<" us"<<std::endl;
|
||||
std::cout<<GridLogMessage << "Dslash1 time "<<dslash1time<<" us"<<std::endl;
|
||||
std::cout<<GridLogMessage << "HaloComplete time "<<jointime<<" us"<<std::endl;
|
||||
std::cout<<GridLogMessage << "******************** Stencil"<<std::endl;
|
||||
std::cout<<GridLogMessage << "Stencil all gather time "<<Stencil.halogtime<<" us"<<std::endl;
|
||||
std::cout<<GridLogMessage << "Stencil nosplice gather time "<<Stencil.nosplicetime<<" us"<<std::endl;
|
||||
std::cout<<GridLogMessage << "Stencil splice gather time "<<Stencil.splicetime<<" us"<<std::endl;
|
||||
std::cout<<GridLogMessage << "********************"<<std::endl;
|
||||
std::cout<<GridLogMessage << "Stencil gather "<<Stencil.gathertime<<" us"<<std::endl;
|
||||
std::cout<<GridLogMessage << "Stencil gather simd "<<Stencil.gathermtime<<" us"<<std::endl;
|
||||
std::cout<<GridLogMessage << "Stencil merge simd "<<Stencil.mergetime<<" us"<<std::endl;
|
||||
std::cout<<GridLogMessage << "Stencil spin simd "<<Stencil.spintime<<" us"<<std::endl;
|
||||
std::cout<<GridLogMessage << "********************"<<std::endl;
|
||||
std::cout<<GridLogMessage << "Stencil MB/s "<<(double)Stencil.comms_bytes/Stencil.commtime<<std::endl;
|
||||
std::cout<<GridLogMessage << "Stencil comm time "<<Stencil.commtime<<" us"<<std::endl;
|
||||
std::cout<<GridLogMessage << "Stencil join time "<<Stencil.jointime<<" us"<<std::endl;
|
||||
std::cout<<GridLogMessage << "********************"<<std::endl;
|
||||
}
|
||||
template<class Impl>
|
||||
void WilsonFermion5D<Impl>::DhopDerivOE(GaugeField &mat,
|
||||
const FermionField &A,
|
||||
@ -277,280 +307,32 @@ template<class Impl>
|
||||
void WilsonFermion5D<Impl>::DhopInternal(StencilImpl & st, LebesgueOrder &lo,
|
||||
DoubledGaugeField & U,
|
||||
const FermionField &in, FermionField &out,int dag)
|
||||
{
|
||||
if ( Impl::overlapCommsCompute () ) {
|
||||
DhopInternalCommsOverlapCompute(st,lo,U,in,out,dag);
|
||||
} else {
|
||||
DhopInternalCommsThenCompute(st,lo,U,in,out,dag);
|
||||
}
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void WilsonFermion5D<Impl>::DhopInternalCommsThenCompute(StencilImpl & st, LebesgueOrder &lo,
|
||||
DoubledGaugeField & U,
|
||||
const FermionField &in, FermionField &out,int dag)
|
||||
{
|
||||
// assert((dag==DaggerNo) ||(dag==DaggerYes));
|
||||
alltime-=usecond();
|
||||
Compressor compressor(dag);
|
||||
|
||||
// Assume balanced KMP_AFFINITY; this is forced in GridThread.h
|
||||
|
||||
int threads = GridThread::GetThreads();
|
||||
int HT = GridThread::GetHyperThreads();
|
||||
int cores = GridThread::GetCores();
|
||||
int nwork = U._grid->oSites();
|
||||
int LLs = in._grid->_rdimensions[0];
|
||||
|
||||
commtime -=usecond();
|
||||
auto handle = st.HaloExchangeBegin(in,compressor);
|
||||
st.HaloExchangeComplete(handle);
|
||||
commtime +=usecond();
|
||||
|
||||
jointime -=usecond();
|
||||
jointime +=usecond();
|
||||
st.HaloExchange(in,compressor);
|
||||
|
||||
// Dhop takes the 4d grid from U, and makes a 5d index for fermion
|
||||
// Not loop ordering and data layout.
|
||||
// Designed to create
|
||||
// - per thread reuse in L1 cache for U
|
||||
// - 8 linear access unit stride streams per thread for Fermion for hw prefetchable.
|
||||
dslashtime -=usecond();
|
||||
if ( dag == DaggerYes ) {
|
||||
if( this->HandOptDslash ) {
|
||||
#pragma omp parallel for schedule(static)
|
||||
for(int ss=0;ss<U._grid->oSites();ss++){
|
||||
int sU=ss;
|
||||
for(int s=0;s<Ls;s++){
|
||||
int sF = s+Ls*sU;
|
||||
Kernels::DiracOptHandDhopSiteDag(st,U,st.comm_buf,sF,sU,in,out);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int ss=0;ss<U._grid->oSites();ss++){
|
||||
{
|
||||
int sd;
|
||||
for(sd=0;sd<Ls;sd++){
|
||||
int sU=ss;
|
||||
int sF = sd+Ls*sU;
|
||||
Kernels::DiracOptDhopSiteDag(st,U,st.comm_buf,sF,sU,in,out);
|
||||
}
|
||||
}
|
||||
}
|
||||
for(int ss=0;ss<U._grid->oSites();ss++){
|
||||
int sU=ss;
|
||||
int sF=LLs*sU;
|
||||
Kernels::DiracOptDhopSiteDag(st,U,st.comm_buf,sF,sU,LLs,1,in,out);
|
||||
}
|
||||
} else {
|
||||
if( this->AsmOptDslash ) {
|
||||
// for(int i=0;i<1;i++){
|
||||
// for(int i=0;i< PerformanceCounter::NumTypes(); i++ ){
|
||||
// PerformanceCounter Counter(i);
|
||||
// Counter.Start();
|
||||
|
||||
#pragma omp parallel for
|
||||
for(int t=0;t<threads;t++){
|
||||
|
||||
int hyperthread = t%HT;
|
||||
int core = t/HT;
|
||||
|
||||
int sswork, swork,soff,ssoff, sU,sF;
|
||||
|
||||
GridThread::GetWork(nwork,core,sswork,ssoff,cores);
|
||||
GridThread::GetWork(Ls , hyperthread, swork, soff,HT);
|
||||
|
||||
for(int ss=0;ss<sswork;ss++){
|
||||
for(int s=soff;s<soff+swork;s++){
|
||||
|
||||
sU=ss+ ssoff;
|
||||
|
||||
if ( LebesgueOrder::UseLebesgueOrder ) {
|
||||
sU = lo.Reorder(sU);
|
||||
}
|
||||
sF = s+Ls*sU;
|
||||
Kernels::DiracOptAsmDhopSite(st,U,st.comm_buf,sF,sU,in,out,(uint64_t *)0);// &buf[0]
|
||||
}
|
||||
}
|
||||
}
|
||||
// Counter.Stop();
|
||||
// Counter.Report();
|
||||
// }
|
||||
} else if( this->HandOptDslash ) {
|
||||
/*
|
||||
|
||||
#pragma omp parallel for schedule(static)
|
||||
for(int t=0;t<threads;t++){
|
||||
|
||||
int hyperthread = t%HT;
|
||||
int core = t/HT;
|
||||
|
||||
int sswork, swork,soff,ssoff, sU,sF;
|
||||
|
||||
GridThread::GetWork(nwork,core,sswork,ssoff,cores);
|
||||
GridThread::GetWork(Ls , hyperthread, swork, soff,HT);
|
||||
|
||||
for(int ss=0;ss<sswork;ss++){
|
||||
sU=ss+ ssoff;
|
||||
for(int s=soff;s<soff+swork;s++){
|
||||
sF = s+Ls*sU;
|
||||
Kernels::DiracOptHandDhopSite(st,U,st.comm_buf,sF,sU,in,out);
|
||||
}
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
#pragma omp parallel for schedule(static)
|
||||
for(int ss=0;ss<U._grid->oSites();ss++){
|
||||
int sU=ss;
|
||||
for(int s=0;s<Ls;s++){
|
||||
int sF = s+Ls*sU;
|
||||
Kernels::DiracOptHandDhopSite(st,U,st.comm_buf,sF,sU,in,out);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int ss=0;ss<U._grid->oSites();ss++){
|
||||
int sU=ss;
|
||||
for(int s=0;s<Ls;s++){
|
||||
int sF = s+Ls*sU;
|
||||
Kernels::DiracOptDhopSite(st,U,st.comm_buf,sF,sU,in,out);
|
||||
}
|
||||
}
|
||||
for(int ss=0;ss<U._grid->oSites();ss++){
|
||||
int sU=ss;
|
||||
int sF=LLs*sU;
|
||||
Kernels::DiracOptDhopSite(st,U,st.comm_buf,sF,sU,LLs,1,in,out);
|
||||
}
|
||||
}
|
||||
dslashtime +=usecond();
|
||||
alltime+=usecond();
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void WilsonFermion5D<Impl>::DhopInternalCommsOverlapCompute(StencilImpl & st, LebesgueOrder &lo,
|
||||
DoubledGaugeField & U,
|
||||
const FermionField &in, FermionField &out,int dag)
|
||||
{
|
||||
// assert((dag==DaggerNo) ||(dag==DaggerYes));
|
||||
alltime-=usecond();
|
||||
|
||||
int calls;
|
||||
int updates;
|
||||
Compressor compressor(dag);
|
||||
|
||||
// Assume balanced KMP_AFFINITY; this is forced in GridThread.h
|
||||
|
||||
int threads = GridThread::GetThreads();
|
||||
int HT = GridThread::GetHyperThreads();
|
||||
int cores = GridThread::GetCores();
|
||||
int nwork = U._grid->oSites();
|
||||
|
||||
commtime -=usecond();
|
||||
auto handle = st.HaloExchangeBegin(in,compressor);
|
||||
commtime +=usecond();
|
||||
|
||||
// Dhop takes the 4d grid from U, and makes a 5d index for fermion
|
||||
// Not loop ordering and data layout.
|
||||
// Designed to create
|
||||
// - per thread reuse in L1 cache for U
|
||||
// - 8 linear access unit stride streams per thread for Fermion for hw prefetchable.
|
||||
bool local = true;
|
||||
bool nonlocal = false;
|
||||
dslashtime -=usecond();
|
||||
if ( dag == DaggerYes ) {
|
||||
if( this->HandOptDslash ) {
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int ss=0;ss<U._grid->oSites();ss++){
|
||||
int sU=ss;
|
||||
for(int s=0;s<Ls;s++){
|
||||
int sF = s+Ls*sU;
|
||||
Kernels::DiracOptHandDhopSiteDag(st,U,st.comm_buf,sF,sU,in,out,local,nonlocal);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int ss=0;ss<U._grid->oSites();ss++){
|
||||
{
|
||||
int sd;
|
||||
for(sd=0;sd<Ls;sd++){
|
||||
int sU=ss;
|
||||
int sF = sd+Ls*sU;
|
||||
Kernels::DiracOptDhopSiteDag(st,U,st.comm_buf,sF,sU,in,out,local,nonlocal);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if( this->HandOptDslash ) {
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int ss=0;ss<U._grid->oSites();ss++){
|
||||
int sU=ss;
|
||||
for(int s=0;s<Ls;s++){
|
||||
int sF = s+Ls*sU;
|
||||
Kernels::DiracOptHandDhopSite(st,U,st.comm_buf,sF,sU,in,out,local,nonlocal);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int ss=0;ss<U._grid->oSites();ss++){
|
||||
int sU=ss;
|
||||
for(int s=0;s<Ls;s++){
|
||||
int sF = s+Ls*sU;
|
||||
Kernels::DiracOptDhopSite(st,U,st.comm_buf,sF,sU,in,out,local,nonlocal);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
dslashtime +=usecond();
|
||||
|
||||
jointime -=usecond();
|
||||
st.HaloExchangeComplete(handle);
|
||||
jointime +=usecond();
|
||||
|
||||
local = false;
|
||||
nonlocal = true;
|
||||
dslash1time -=usecond();
|
||||
if ( dag == DaggerYes ) {
|
||||
if( this->HandOptDslash ) {
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int ss=0;ss<U._grid->oSites();ss++){
|
||||
int sU=ss;
|
||||
for(int s=0;s<Ls;s++){
|
||||
int sF = s+Ls*sU;
|
||||
Kernels::DiracOptHandDhopSiteDag(st,U,st.comm_buf,sF,sU,in,out,local,nonlocal);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int ss=0;ss<U._grid->oSites();ss++){
|
||||
{
|
||||
int sd;
|
||||
for(sd=0;sd<Ls;sd++){
|
||||
int sU=ss;
|
||||
int sF = sd+Ls*sU;
|
||||
Kernels::DiracOptDhopSiteDag(st,U,st.comm_buf,sF,sU,in,out,local,nonlocal);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if( this->HandOptDslash ) {
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int ss=0;ss<U._grid->oSites();ss++){
|
||||
int sU=ss;
|
||||
for(int s=0;s<Ls;s++){
|
||||
int sF = s+Ls*sU;
|
||||
Kernels::DiracOptHandDhopSite(st,U,st.comm_buf,sF,sU,in,out,local,nonlocal);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int ss=0;ss<U._grid->oSites();ss++){
|
||||
int sU=ss;
|
||||
for(int s=0;s<Ls;s++){
|
||||
int sF = s+Ls*sU;
|
||||
Kernels::DiracOptDhopSite(st,U,st.comm_buf,sF,sU,in,out,local,nonlocal);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
dslash1time +=usecond();
|
||||
alltime+=usecond();
|
||||
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void WilsonFermion5D<Impl>::DhopOE(const FermionField &in, FermionField &out,int dag)
|
||||
@ -593,7 +375,10 @@ void WilsonFermion5D<Impl>::DW(const FermionField &in, FermionField &out,int dag
|
||||
}
|
||||
|
||||
FermOpTemplateInstantiate(WilsonFermion5D);
|
||||
|
||||
GparityFermOpTemplateInstantiate(WilsonFermion5D);
|
||||
template class WilsonFermion5D<DomainWallRedBlack5dImplF>;
|
||||
template class WilsonFermion5D<DomainWallRedBlack5dImplD>;
|
||||
|
||||
}}
|
||||
|
||||
|
||||
|
@ -1,3 +1,4 @@
|
||||
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
@ -48,8 +49,6 @@ namespace Grid {
|
||||
class WilsonFermion5DStatic {
|
||||
public:
|
||||
// S-direction is INNERMOST and takes no part in the parity.
|
||||
static int AsmOptDslash; // these are a temporary hack
|
||||
static int HandOptDslash; // these are a temporary hack
|
||||
static const std::vector<int> directions;
|
||||
static const std::vector<int> displacements;
|
||||
const int npoint = 8;
|
||||
@ -61,11 +60,7 @@ namespace Grid {
|
||||
public:
|
||||
INHERIT_IMPL_TYPES(Impl);
|
||||
typedef WilsonKernels<Impl> Kernels;
|
||||
double alltime;
|
||||
double jointime;
|
||||
double commtime;
|
||||
double dslashtime;
|
||||
double dslash1time;
|
||||
|
||||
///////////////////////////////////////////////////////////////
|
||||
// Implement the abstract base
|
||||
///////////////////////////////////////////////////////////////
|
||||
@ -86,6 +81,7 @@ namespace Grid {
|
||||
virtual void MeooeDag (const FermionField &in, FermionField &out){assert(0);};
|
||||
virtual void MooeeDag (const FermionField &in, FermionField &out){assert(0);};
|
||||
virtual void MooeeInvDag (const FermionField &in, FermionField &out){assert(0);};
|
||||
virtual void Mdir (const FermionField &in, FermionField &out,int dir,int disp){assert(0);}; // case by case Wilson, Clover, Cayley, ContFrac, PartFrac
|
||||
|
||||
// These can be overridden by fancy 5d chiral action
|
||||
virtual void DhopDeriv (GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
|
||||
@ -120,19 +116,6 @@ namespace Grid {
|
||||
FermionField &out,
|
||||
int dag);
|
||||
|
||||
void DhopInternalCommsThenCompute(StencilImpl & st,
|
||||
LebesgueOrder &lo,
|
||||
DoubledGaugeField &U,
|
||||
const FermionField &in,
|
||||
FermionField &out,
|
||||
int dag);
|
||||
void DhopInternalCommsOverlapCompute(StencilImpl & st,
|
||||
LebesgueOrder &lo,
|
||||
DoubledGaugeField &U,
|
||||
const FermionField &in,
|
||||
FermionField &out,
|
||||
int dag);
|
||||
|
||||
// Constructors
|
||||
WilsonFermion5D(GaugeField &_Umu,
|
||||
GridCartesian &FiveDimGrid,
|
||||
@ -141,14 +124,21 @@ namespace Grid {
|
||||
GridRedBlackCartesian &FourDimRedBlackGrid,
|
||||
double _M5,const ImplParams &p= ImplParams());
|
||||
|
||||
// Constructors
|
||||
WilsonFermion5D(int simd,
|
||||
GaugeField &_Umu,
|
||||
GridCartesian &FiveDimGrid,
|
||||
GridRedBlackCartesian &FiveDimRedBlackGrid,
|
||||
GridCartesian &FourDimGrid,
|
||||
double _M5,const ImplParams &p= ImplParams());
|
||||
|
||||
// DoubleStore
|
||||
void ImportGauge(const GaugeField &_Umu);
|
||||
|
||||
void Report(void);
|
||||
///////////////////////////////////////////////////////////////
|
||||
// Data members require to support the functionality
|
||||
///////////////////////////////////////////////////////////////
|
||||
protected:
|
||||
public:
|
||||
|
||||
// Add these to the support from Wilson
|
||||
GridBase *_FourDimGrid;
|
||||
|
@ -31,440 +31,410 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
namespace Grid {
|
||||
namespace QCD {
|
||||
|
||||
int WilsonKernelsStatic::HandOpt;
|
||||
int WilsonKernelsStatic::AsmOpt;
|
||||
|
||||
template<class Impl>
|
||||
WilsonKernels<Impl>::WilsonKernels(const ImplParams &p): Base(p) {};
|
||||
|
||||
// Need controls to do interior, exterior, or both
|
||||
template<class Impl>
|
||||
void WilsonKernels<Impl>::DiracOptDhopSite(StencilImpl &st,DoubledGaugeField &U,
|
||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
||||
int sF,int sU,int Ls, int Ns, const FermionField &in, FermionField &out)
|
||||
{
|
||||
if ( AsmOpt ) {
|
||||
|
||||
WilsonKernels<Impl>::DiracOptAsmDhopSite(st,U,buf,sF,sU,Ls,Ns,in,out);
|
||||
|
||||
} else {
|
||||
|
||||
for(int site=0;site<Ns;site++) {
|
||||
for(int s=0;s<Ls;s++) {
|
||||
if (HandOpt) WilsonKernels<Impl>::DiracOptHandDhopSite(st,U,buf,sF,sU,in,out);
|
||||
else WilsonKernels<Impl>::DiracOptGenericDhopSite(st,U,buf,sF,sU,in,out);
|
||||
sF++;
|
||||
}
|
||||
sU++;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void WilsonKernels<Impl>::DiracOptDhopSiteDag(StencilImpl &st,DoubledGaugeField &U,
|
||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
||||
int sF,int sU,const FermionField &in, FermionField &out,bool local, bool nonlocal)
|
||||
int sF,int sU,int Ls, int Ns, const FermionField &in, FermionField &out)
|
||||
{
|
||||
// No asm implementation yet.
|
||||
// if ( AsmOpt ) WilsonKernels<Impl>::DiracOptAsmDhopSiteDag(st,U,buf,sF,sU,in,out);
|
||||
// else
|
||||
for(int site=0;site<Ns;site++) {
|
||||
for(int s=0;s<Ls;s++) {
|
||||
if (HandOpt) WilsonKernels<Impl>::DiracOptHandDhopSiteDag(st,U,buf,sF,sU,in,out);
|
||||
else WilsonKernels<Impl>::DiracOptGenericDhopSiteDag(st,U,buf,sF,sU,in,out);
|
||||
sF++;
|
||||
}
|
||||
sU++;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
////////////////////////////////////////////
|
||||
// Generic implementation; move to different file?
|
||||
////////////////////////////////////////////
|
||||
|
||||
template<class Impl>
|
||||
void WilsonKernels<Impl>::DiracOptGenericDhopSiteDag(StencilImpl &st,DoubledGaugeField &U,
|
||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
||||
int sF,int sU,const FermionField &in, FermionField &out)
|
||||
{
|
||||
SiteHalfSpinor tmp;
|
||||
SiteHalfSpinor chi;
|
||||
SiteHalfSpinor *chi_p;
|
||||
SiteHalfSpinor Uchi;
|
||||
SiteSpinor result;
|
||||
StencilEntry *SE;
|
||||
int ptype;
|
||||
|
||||
int num = 0;
|
||||
|
||||
result=zero;
|
||||
|
||||
///////////////////////////
|
||||
// Xp
|
||||
///////////////////////////
|
||||
SE=st.GetEntry(ptype,Xp,sF);
|
||||
|
||||
if (local && SE->_is_local ) {
|
||||
if (SE->_is_local ) {
|
||||
chi_p = χ
|
||||
if ( SE->_permute ) {
|
||||
spProjXp(tmp,in._odata[SE->_offset]);
|
||||
permute(chi,tmp,ptype);
|
||||
} else {
|
||||
spProjXp(chi,in._odata[SE->_offset]);
|
||||
}
|
||||
}
|
||||
|
||||
if ( nonlocal && (!SE->_is_local) ) {
|
||||
chi=buf[SE->_offset];
|
||||
} else {
|
||||
chi_p=&buf[SE->_offset];
|
||||
}
|
||||
|
||||
if ( (local && SE->_is_local) || ( nonlocal && (!SE->_is_local)) ) {
|
||||
Impl::multLink(Uchi,U._odata[sU],chi,Xp,SE,st);
|
||||
accumReconXp(result,Uchi);
|
||||
num++;
|
||||
}
|
||||
Impl::multLink(Uchi,U._odata[sU],*chi_p,Xp,SE,st);
|
||||
spReconXp(result,Uchi);
|
||||
|
||||
///////////////////////////
|
||||
// Yp
|
||||
///////////////////////////
|
||||
SE=st.GetEntry(ptype,Yp,sF);
|
||||
|
||||
if (local && SE->_is_local ) {
|
||||
if ( SE->_is_local ) {
|
||||
chi_p = χ
|
||||
if ( SE->_permute ) {
|
||||
spProjYp(tmp,in._odata[SE->_offset]);
|
||||
permute(chi,tmp,ptype);
|
||||
} else {
|
||||
spProjYp(chi,in._odata[SE->_offset]);
|
||||
}
|
||||
} else {
|
||||
chi_p=&buf[SE->_offset];
|
||||
}
|
||||
|
||||
if ( nonlocal && (!SE->_is_local) ) {
|
||||
chi=buf[SE->_offset];
|
||||
}
|
||||
|
||||
if ( (local && SE->_is_local) || ( nonlocal && (!SE->_is_local)) ) {
|
||||
Impl::multLink(Uchi,U._odata[sU],chi,Yp,SE,st);
|
||||
accumReconYp(result,Uchi);
|
||||
num++;
|
||||
}
|
||||
Impl::multLink(Uchi,U._odata[sU],*chi_p,Yp,SE,st);
|
||||
accumReconYp(result,Uchi);
|
||||
|
||||
///////////////////////////
|
||||
// Zp
|
||||
///////////////////////////
|
||||
SE=st.GetEntry(ptype,Zp,sF);
|
||||
|
||||
if (local && SE->_is_local ) {
|
||||
if ( SE->_is_local ) {
|
||||
chi_p = χ
|
||||
if ( SE->_permute ) {
|
||||
spProjZp(tmp,in._odata[SE->_offset]);
|
||||
permute(chi,tmp,ptype);
|
||||
} else {
|
||||
spProjZp(chi,in._odata[SE->_offset]);
|
||||
}
|
||||
} else {
|
||||
chi_p=&buf[SE->_offset];
|
||||
}
|
||||
|
||||
if ( nonlocal && (!SE->_is_local) ) {
|
||||
chi=buf[SE->_offset];
|
||||
}
|
||||
|
||||
if ( (local && SE->_is_local) || ( nonlocal && (!SE->_is_local)) ) {
|
||||
Impl::multLink(Uchi,U._odata[sU],chi,Zp,SE,st);
|
||||
accumReconZp(result,Uchi);
|
||||
num++;
|
||||
}
|
||||
Impl::multLink(Uchi,U._odata[sU],*chi_p,Zp,SE,st);
|
||||
accumReconZp(result,Uchi);
|
||||
|
||||
///////////////////////////
|
||||
// Tp
|
||||
///////////////////////////
|
||||
SE=st.GetEntry(ptype,Tp,sF);
|
||||
|
||||
if (local && SE->_is_local ) {
|
||||
if ( SE->_is_local ) {
|
||||
chi_p = χ
|
||||
if ( SE->_permute ) {
|
||||
spProjTp(tmp,in._odata[SE->_offset]);
|
||||
permute(chi,tmp,ptype);
|
||||
} else {
|
||||
spProjTp(chi,in._odata[SE->_offset]);
|
||||
}
|
||||
} else {
|
||||
chi_p=&buf[SE->_offset];
|
||||
}
|
||||
|
||||
if ( nonlocal && (!SE->_is_local) ) {
|
||||
chi=buf[SE->_offset];
|
||||
}
|
||||
|
||||
if ( (local && SE->_is_local) || ( nonlocal && (!SE->_is_local)) ) {
|
||||
Impl::multLink(Uchi,U._odata[sU],chi,Tp,SE,st);
|
||||
accumReconTp(result,Uchi);
|
||||
num++;
|
||||
}
|
||||
Impl::multLink(Uchi,U._odata[sU],*chi_p,Tp,SE,st);
|
||||
accumReconTp(result,Uchi);
|
||||
|
||||
///////////////////////////
|
||||
// Xm
|
||||
///////////////////////////
|
||||
SE=st.GetEntry(ptype,Xm,sF);
|
||||
|
||||
if (local && SE->_is_local ) {
|
||||
if ( SE->_is_local ) {
|
||||
chi_p = χ
|
||||
if ( SE->_permute ) {
|
||||
spProjXm(tmp,in._odata[SE->_offset]);
|
||||
permute(chi,tmp,ptype);
|
||||
} else {
|
||||
spProjXm(chi,in._odata[SE->_offset]);
|
||||
}
|
||||
} else {
|
||||
chi_p=&buf[SE->_offset];
|
||||
}
|
||||
|
||||
if ( nonlocal && (!SE->_is_local) ) {
|
||||
chi=buf[SE->_offset];
|
||||
}
|
||||
|
||||
if( (local && SE->_is_local) || ( nonlocal && (!SE->_is_local)) ) {
|
||||
Impl::multLink(Uchi,U._odata[sU],chi,Xm,SE,st);
|
||||
accumReconXm(result,Uchi);
|
||||
num++;
|
||||
}
|
||||
|
||||
Impl::multLink(Uchi,U._odata[sU],*chi_p,Xm,SE,st);
|
||||
accumReconXm(result,Uchi);
|
||||
|
||||
///////////////////////////
|
||||
// Ym
|
||||
///////////////////////////
|
||||
SE=st.GetEntry(ptype,Ym,sF);
|
||||
|
||||
if (local && SE->_is_local ) {
|
||||
if ( SE->_is_local ) {
|
||||
chi_p = χ
|
||||
if ( SE->_permute ) {
|
||||
spProjYm(tmp,in._odata[SE->_offset]);
|
||||
permute(chi,tmp,ptype);
|
||||
} else {
|
||||
spProjYm(chi,in._odata[SE->_offset]);
|
||||
}
|
||||
} else {
|
||||
chi_p=&buf[SE->_offset];
|
||||
}
|
||||
|
||||
if ( nonlocal && (!SE->_is_local) ) {
|
||||
chi=buf[SE->_offset];
|
||||
}
|
||||
|
||||
if( (local && SE->_is_local) || ( nonlocal && (!SE->_is_local)) ) {
|
||||
Impl::multLink(Uchi,U._odata[sU],chi,Ym,SE,st);
|
||||
accumReconYm(result,Uchi);
|
||||
num++;
|
||||
}
|
||||
Impl::multLink(Uchi,U._odata[sU],*chi_p,Ym,SE,st);
|
||||
accumReconYm(result,Uchi);
|
||||
|
||||
///////////////////////////
|
||||
// Zm
|
||||
///////////////////////////
|
||||
SE=st.GetEntry(ptype,Zm,sF);
|
||||
|
||||
if (local && SE->_is_local ) {
|
||||
if ( SE->_is_local ) {
|
||||
chi_p = χ
|
||||
if ( SE->_permute ) {
|
||||
spProjZm(tmp,in._odata[SE->_offset]);
|
||||
permute(chi,tmp,ptype);
|
||||
} else {
|
||||
spProjZm(chi,in._odata[SE->_offset]);
|
||||
}
|
||||
} else {
|
||||
chi_p=&buf[SE->_offset];
|
||||
}
|
||||
|
||||
if ( nonlocal && (!SE->_is_local) ) {
|
||||
chi=buf[SE->_offset];
|
||||
}
|
||||
|
||||
if( (local && SE->_is_local) || ( nonlocal && (!SE->_is_local)) ) {
|
||||
Impl::multLink(Uchi,U._odata[sU],chi,Zm,SE,st);
|
||||
accumReconZm(result,Uchi);
|
||||
num++;
|
||||
}
|
||||
Impl::multLink(Uchi,U._odata[sU],*chi_p,Zm,SE,st);
|
||||
accumReconZm(result,Uchi);
|
||||
|
||||
///////////////////////////
|
||||
// Tm
|
||||
///////////////////////////
|
||||
SE=st.GetEntry(ptype,Tm,sF);
|
||||
|
||||
if (local && SE->_is_local ) {
|
||||
if ( SE->_is_local ) {
|
||||
chi_p = χ
|
||||
if ( SE->_permute ) {
|
||||
spProjTm(tmp,in._odata[SE->_offset]);
|
||||
permute(chi,tmp,ptype);
|
||||
} else {
|
||||
spProjTm(chi,in._odata[SE->_offset]);
|
||||
}
|
||||
} else {
|
||||
chi_p=&buf[SE->_offset];
|
||||
}
|
||||
|
||||
if ( nonlocal && (!SE->_is_local) ) {
|
||||
chi=buf[SE->_offset];
|
||||
}
|
||||
Impl::multLink(Uchi,U._odata[sU],*chi_p,Tm,SE,st);
|
||||
accumReconTm(result,Uchi);
|
||||
|
||||
if( (local && SE->_is_local) || ( nonlocal && (!SE->_is_local)) ) {
|
||||
Impl::multLink(Uchi,U._odata[sU],chi,Tm,SE,st);
|
||||
accumReconTm(result,Uchi);
|
||||
num++;
|
||||
}
|
||||
|
||||
if ( local ) {
|
||||
vstream(out._odata[sF],result*(-0.5));
|
||||
} else if ( num ) {
|
||||
vstream(out._odata[sF],out._odata[sF]+result*(-0.5));
|
||||
}
|
||||
vstream(out._odata[sF],result);
|
||||
};
|
||||
|
||||
|
||||
// Need controls to do interior, exterior, or both
|
||||
template<class Impl>
|
||||
void WilsonKernels<Impl>::DiracOptDhopSite(StencilImpl &st,DoubledGaugeField &U,
|
||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
||||
int sF,int sU,const FermionField &in, FermionField &out,bool local, bool nonlocal)
|
||||
void WilsonKernels<Impl>::DiracOptGenericDhopSite(StencilImpl &st,DoubledGaugeField &U,
|
||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
||||
int sF,int sU,const FermionField &in, FermionField &out)
|
||||
{
|
||||
SiteHalfSpinor tmp;
|
||||
SiteHalfSpinor chi;
|
||||
SiteHalfSpinor *chi_p;
|
||||
SiteHalfSpinor Uchi;
|
||||
SiteSpinor result;
|
||||
StencilEntry *SE;
|
||||
int ptype;
|
||||
|
||||
int num = 0;
|
||||
|
||||
result=zero;
|
||||
|
||||
///////////////////////////
|
||||
// Xp
|
||||
///////////////////////////
|
||||
SE=st.GetEntry(ptype,Xm,sF);
|
||||
|
||||
if (local && SE->_is_local ) {
|
||||
if ( SE->_is_local ) {
|
||||
chi_p = χ
|
||||
if ( SE->_permute ) {
|
||||
spProjXp(tmp,in._odata[SE->_offset]);
|
||||
permute(chi,tmp,ptype);
|
||||
} else {
|
||||
spProjXp(chi,in._odata[SE->_offset]);
|
||||
}
|
||||
}
|
||||
|
||||
if ( nonlocal && (!SE->_is_local) ) {
|
||||
chi=buf[SE->_offset];
|
||||
} else {
|
||||
chi_p=&buf[SE->_offset];
|
||||
}
|
||||
|
||||
if ( (local && SE->_is_local) || ( nonlocal && (!SE->_is_local)) ) {
|
||||
Impl::multLink(Uchi,U._odata[sU],chi,Xm,SE,st);
|
||||
accumReconXp(result,Uchi);
|
||||
num++;
|
||||
}
|
||||
Impl::multLink(Uchi,U._odata[sU],*chi_p,Xm,SE,st);
|
||||
spReconXp(result,Uchi);
|
||||
|
||||
///////////////////////////
|
||||
// Yp
|
||||
///////////////////////////
|
||||
SE=st.GetEntry(ptype,Ym,sF);
|
||||
|
||||
if (local && SE->_is_local ) {
|
||||
if ( SE->_is_local ) {
|
||||
chi_p = χ
|
||||
if ( SE->_permute ) {
|
||||
spProjYp(tmp,in._odata[SE->_offset]);
|
||||
permute(chi,tmp,ptype);
|
||||
} else {
|
||||
spProjYp(chi,in._odata[SE->_offset]);
|
||||
}
|
||||
} else {
|
||||
chi_p=&buf[SE->_offset];
|
||||
}
|
||||
|
||||
if ( nonlocal && (!SE->_is_local) ) {
|
||||
chi=buf[SE->_offset];
|
||||
}
|
||||
|
||||
if ( (local && SE->_is_local) || ( nonlocal && (!SE->_is_local)) ) {
|
||||
Impl::multLink(Uchi,U._odata[sU],chi,Ym,SE,st);
|
||||
accumReconYp(result,Uchi);
|
||||
num++;
|
||||
}
|
||||
Impl::multLink(Uchi,U._odata[sU],*chi_p,Ym,SE,st);
|
||||
accumReconYp(result,Uchi);
|
||||
|
||||
///////////////////////////
|
||||
// Zp
|
||||
///////////////////////////
|
||||
SE=st.GetEntry(ptype,Zm,sF);
|
||||
|
||||
if (local && SE->_is_local ) {
|
||||
if ( SE->_is_local ) {
|
||||
chi_p = χ
|
||||
if ( SE->_permute ) {
|
||||
spProjZp(tmp,in._odata[SE->_offset]);
|
||||
permute(chi,tmp,ptype);
|
||||
} else {
|
||||
spProjZp(chi,in._odata[SE->_offset]);
|
||||
}
|
||||
} else {
|
||||
chi_p=&buf[SE->_offset];
|
||||
}
|
||||
|
||||
if ( nonlocal && (!SE->_is_local) ) {
|
||||
chi=buf[SE->_offset];
|
||||
}
|
||||
|
||||
if ( (local && SE->_is_local) || ( nonlocal && (!SE->_is_local)) ) {
|
||||
Impl::multLink(Uchi,U._odata[sU],chi,Zm,SE,st);
|
||||
accumReconZp(result,Uchi);
|
||||
num++;
|
||||
}
|
||||
Impl::multLink(Uchi,U._odata[sU],*chi_p,Zm,SE,st);
|
||||
accumReconZp(result,Uchi);
|
||||
|
||||
///////////////////////////
|
||||
// Tp
|
||||
///////////////////////////
|
||||
SE=st.GetEntry(ptype,Tm,sF);
|
||||
|
||||
if (local && SE->_is_local ) {
|
||||
if ( SE->_is_local ) {
|
||||
chi_p = χ
|
||||
if ( SE->_permute ) {
|
||||
spProjTp(tmp,in._odata[SE->_offset]);
|
||||
permute(chi,tmp,ptype);
|
||||
} else {
|
||||
spProjTp(chi,in._odata[SE->_offset]);
|
||||
}
|
||||
} else {
|
||||
chi_p=&buf[SE->_offset];
|
||||
}
|
||||
|
||||
if ( nonlocal && (!SE->_is_local) ) {
|
||||
chi=buf[SE->_offset];
|
||||
}
|
||||
|
||||
if ( (local && SE->_is_local) || ( nonlocal && (!SE->_is_local)) ) {
|
||||
Impl::multLink(Uchi,U._odata[sU],chi,Tm,SE,st);
|
||||
accumReconTp(result,Uchi);
|
||||
num++;
|
||||
}
|
||||
Impl::multLink(Uchi,U._odata[sU],*chi_p,Tm,SE,st);
|
||||
accumReconTp(result,Uchi);
|
||||
|
||||
///////////////////////////
|
||||
// Xm
|
||||
///////////////////////////
|
||||
SE=st.GetEntry(ptype,Xp,sF);
|
||||
|
||||
if (local && SE->_is_local ) {
|
||||
if ( SE->_is_local ) {
|
||||
chi_p = χ
|
||||
if ( SE->_permute ) {
|
||||
spProjXm(tmp,in._odata[SE->_offset]);
|
||||
permute(chi,tmp,ptype);
|
||||
} else {
|
||||
spProjXm(chi,in._odata[SE->_offset]);
|
||||
}
|
||||
} else {
|
||||
chi_p=&buf[SE->_offset];
|
||||
}
|
||||
|
||||
if ( nonlocal && (!SE->_is_local) ) {
|
||||
chi=buf[SE->_offset];
|
||||
}
|
||||
|
||||
if( (local && SE->_is_local) || ( nonlocal && (!SE->_is_local)) ) {
|
||||
Impl::multLink(Uchi,U._odata[sU],chi,Xp,SE,st);
|
||||
accumReconXm(result,Uchi);
|
||||
num++;
|
||||
}
|
||||
Impl::multLink(Uchi,U._odata[sU],*chi_p,Xp,SE,st);
|
||||
accumReconXm(result,Uchi);
|
||||
|
||||
///////////////////////////
|
||||
// Ym
|
||||
///////////////////////////
|
||||
SE=st.GetEntry(ptype,Yp,sF);
|
||||
|
||||
if (local && SE->_is_local ) {
|
||||
if ( SE->_is_local ) {
|
||||
chi_p = χ
|
||||
if ( SE->_permute ) {
|
||||
spProjYm(tmp,in._odata[SE->_offset]);
|
||||
permute(chi,tmp,ptype);
|
||||
} else {
|
||||
spProjYm(chi,in._odata[SE->_offset]);
|
||||
}
|
||||
} else {
|
||||
chi_p=&buf[SE->_offset];
|
||||
}
|
||||
|
||||
if ( nonlocal && (!SE->_is_local) ) {
|
||||
chi=buf[SE->_offset];
|
||||
}
|
||||
|
||||
if( (local && SE->_is_local) || ( nonlocal && (!SE->_is_local)) ) {
|
||||
Impl::multLink(Uchi,U._odata[sU],chi,Yp,SE,st);
|
||||
accumReconYm(result,Uchi);
|
||||
num++;
|
||||
}
|
||||
Impl::multLink(Uchi,U._odata[sU],*chi_p,Yp,SE,st);
|
||||
accumReconYm(result,Uchi);
|
||||
|
||||
///////////////////////////
|
||||
// Zm
|
||||
///////////////////////////
|
||||
SE=st.GetEntry(ptype,Zp,sF);
|
||||
|
||||
if (local && SE->_is_local ) {
|
||||
if ( SE->_is_local ) {
|
||||
chi_p = χ
|
||||
if ( SE->_permute ) {
|
||||
spProjZm(tmp,in._odata[SE->_offset]);
|
||||
permute(chi,tmp,ptype);
|
||||
} else {
|
||||
spProjZm(chi,in._odata[SE->_offset]);
|
||||
}
|
||||
} else {
|
||||
chi_p=&buf[SE->_offset];
|
||||
}
|
||||
|
||||
if ( nonlocal && (!SE->_is_local) ) {
|
||||
chi=buf[SE->_offset];
|
||||
}
|
||||
|
||||
if( (local && SE->_is_local) || ( nonlocal && (!SE->_is_local)) ) {
|
||||
Impl::multLink(Uchi,U._odata[sU],chi,Zp,SE,st);
|
||||
accumReconZm(result,Uchi);
|
||||
num++;
|
||||
}
|
||||
Impl::multLink(Uchi,U._odata[sU],*chi_p,Zp,SE,st);
|
||||
accumReconZm(result,Uchi);
|
||||
|
||||
///////////////////////////
|
||||
// Tm
|
||||
///////////////////////////
|
||||
SE=st.GetEntry(ptype,Tp,sF);
|
||||
|
||||
if (local && SE->_is_local ) {
|
||||
if ( SE->_is_local ) {
|
||||
chi_p = χ
|
||||
if ( SE->_permute ) {
|
||||
spProjTm(tmp,in._odata[SE->_offset]);
|
||||
permute(chi,tmp,ptype);
|
||||
} else {
|
||||
spProjTm(chi,in._odata[SE->_offset]);
|
||||
}
|
||||
} else {
|
||||
chi_p=&buf[SE->_offset];
|
||||
}
|
||||
|
||||
if ( nonlocal && (!SE->_is_local) ) {
|
||||
chi=buf[SE->_offset];
|
||||
}
|
||||
Impl::multLink(Uchi,U._odata[sU],*chi_p,Tp,SE,st);
|
||||
accumReconTm(result,Uchi);
|
||||
|
||||
if( (local && SE->_is_local) || ( nonlocal && (!SE->_is_local)) ) {
|
||||
Impl::multLink(Uchi,U._odata[sU],chi,Tp,SE,st);
|
||||
accumReconTm(result,Uchi);
|
||||
num++;
|
||||
}
|
||||
|
||||
if ( local ) {
|
||||
vstream(out._odata[sF],result*(-0.5));
|
||||
} else if ( num ) {
|
||||
vstream(out._odata[sF],out._odata[sF]+result*(-0.5));
|
||||
}
|
||||
vstream(out._odata[sF],result);
|
||||
};
|
||||
|
||||
template<class Impl>
|
||||
@ -593,19 +563,13 @@ void WilsonKernels<Impl>::DiracOptDhopDir(StencilImpl &st,DoubledGaugeField &U,
|
||||
spReconTm(result,Uchi);
|
||||
}
|
||||
|
||||
vstream(out._odata[sF],result*(-0.5));
|
||||
vstream(out._odata[sF],result);
|
||||
}
|
||||
|
||||
#if ( ! defined(AVX512) ) && ( ! defined(IMCI) )
|
||||
template<class Impl>
|
||||
void WilsonKernels<Impl>::DiracOptAsmDhopSite(StencilImpl &st,DoubledGaugeField &U,
|
||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
||||
int sF,int sU,const FermionField &in, FermionField &out,bool local, bool nonlocal)
|
||||
{
|
||||
DiracOptDhopSite(st,U,buf,sF,sU,in,out); // will template override for Wilson Nc=3
|
||||
}
|
||||
#endif
|
||||
|
||||
FermOpTemplateInstantiate(WilsonKernels);
|
||||
|
||||
template class WilsonKernels<DomainWallRedBlack5dImplF>;
|
||||
template class WilsonKernels<DomainWallRedBlack5dImplD>;
|
||||
|
||||
}}
|
||||
|
@ -38,37 +38,56 @@ namespace Grid {
|
||||
// Helper routines that implement Wilson stencil for a single site.
|
||||
// Common to both the WilsonFermion and WilsonFermion5D
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
class WilsonKernelsStatic {
|
||||
public:
|
||||
// S-direction is INNERMOST and takes no part in the parity.
|
||||
static int AsmOpt; // these are a temporary hack
|
||||
static int HandOpt; // these are a temporary hack
|
||||
};
|
||||
|
||||
template<class Impl> class WilsonKernels : public FermionOperator<Impl> {
|
||||
template<class Impl> class WilsonKernels : public FermionOperator<Impl> , public WilsonKernelsStatic {
|
||||
public:
|
||||
|
||||
INHERIT_IMPL_TYPES(Impl);
|
||||
typedef FermionOperator<Impl> Base;
|
||||
|
||||
public:
|
||||
|
||||
void DiracOptDhopSite(StencilImpl &st,DoubledGaugeField &U,
|
||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
||||
int sF,int sU,const FermionField &in, FermionField &out,bool local= true, bool nonlocal=true);
|
||||
int sF, int sU,int Ls, int Ns, const FermionField &in, FermionField &out);
|
||||
|
||||
void DiracOptDhopSiteDag(StencilImpl &st,DoubledGaugeField &U,
|
||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
||||
int sF,int sU,const FermionField &in,FermionField &out,bool local= true, bool nonlocal=true);
|
||||
int sF,int sU,int Ls, int Ns, const FermionField &in,FermionField &out);
|
||||
|
||||
void DiracOptDhopDir(StencilImpl &st,DoubledGaugeField &U,
|
||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
||||
int sF,int sU,const FermionField &in, FermionField &out,int dirdisp,int gamma);
|
||||
|
||||
private:
|
||||
// Specialised variants
|
||||
void DiracOptGenericDhopSite(StencilImpl &st,DoubledGaugeField &U,
|
||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
||||
int sF,int sU, const FermionField &in, FermionField &out);
|
||||
|
||||
void DiracOptGenericDhopSiteDag(StencilImpl &st,DoubledGaugeField &U,
|
||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
||||
int sF,int sU,const FermionField &in,FermionField &out);
|
||||
|
||||
void DiracOptAsmDhopSite(StencilImpl &st,DoubledGaugeField &U,
|
||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
||||
int sF,int sU,const FermionField &in, FermionField &out,bool local= true, bool nonlocal=true);
|
||||
int sF,int sU,int Ls, int Ns, const FermionField &in, FermionField &out);
|
||||
|
||||
int DiracOptHandDhopSite(StencilImpl &st,DoubledGaugeField &U,
|
||||
|
||||
void DiracOptHandDhopSite(StencilImpl &st,DoubledGaugeField &U,
|
||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
||||
int sF,int sU,const FermionField &in, FermionField &out,bool local= true, bool nonlocal=true);
|
||||
int sF,int sU,const FermionField &in, FermionField &out);
|
||||
|
||||
int DiracOptHandDhopSiteDag(StencilImpl &st,DoubledGaugeField &U,
|
||||
void DiracOptHandDhopSiteDag(StencilImpl &st,DoubledGaugeField &U,
|
||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
||||
int sF,int sU,const FermionField &in, FermionField &out,bool local= true, bool nonlocal=true);
|
||||
int sF,int sU,const FermionField &in, FermionField &out);
|
||||
public:
|
||||
|
||||
WilsonKernels(const ImplParams &p= ImplParams());
|
||||
|
||||
|
@ -2,6 +2,8 @@
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
|
||||
|
||||
Source file: ./lib/qcd/action/fermion/WilsonKernelsAsm.cc
|
||||
|
||||
Copyright (C) 2015
|
||||
@ -26,320 +28,88 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#include <Grid.h>
|
||||
#if defined(AVX512) || defined (IMCI)
|
||||
|
||||
#include <simd/Avx512Asm.h>
|
||||
|
||||
#undef VLOAD
|
||||
#undef VSTORE
|
||||
#undef VMUL
|
||||
#undef VMADD
|
||||
#undef ZEND
|
||||
#undef ZLOAD
|
||||
#undef ZMUL
|
||||
#undef ZMADD
|
||||
#undef VZERO
|
||||
#undef VTIMESI
|
||||
#undef VTIMESMINUSI
|
||||
|
||||
#define VZERO(A) VZEROf(A)
|
||||
#define VMOV(A,B) VMOVf(A,B)
|
||||
#define VLOAD(OFF,PTR,DEST) VLOADf(OFF,PTR,DEST)
|
||||
#define VSTORE(OFF,PTR,SRC) VSTOREf(OFF,PTR,SRC)
|
||||
|
||||
#define VADD(A,B,C) VADDf(A,B,C)
|
||||
#define VSUB(A,B,C) VSUBf(A,B,C)
|
||||
#define VMUL(Uri,Uir,Chi,UChi,Z) VMULf(Uri,Uir,Chi,UChi,Z)
|
||||
#define VMADD(Uri,Uir,Chi,UChi,Z) VMADDf(Uri,Uir,Chi,UChi,Z)
|
||||
|
||||
#define VTIMESI(A,B,C) VTIMESIf(A,B,C)
|
||||
#define VTIMESMINUSI(A,B,C) VTIMESMINUSIf(A,B,C)
|
||||
#define VACCTIMESI(A,B,C) VACCTIMESIf(A,B,C)
|
||||
#define VACCTIMESMINUSI(A,B,C) VACCTIMESMINUSIf(A,B,C)
|
||||
|
||||
#define VTIMESI0(A,B,C) VTIMESI0f(A,B,C)
|
||||
#define VTIMESMINUSI0(A,B,C) VTIMESMINUSI0f(A,B,C)
|
||||
#define VACCTIMESI0(A,B,C) VACCTIMESI0f(A,B,C)
|
||||
#define VACCTIMESMINUSI0(A,B,C) VACCTIMESMINUSI0f(A,B,C)
|
||||
|
||||
#define VTIMESI1(A,B,C) VTIMESI1f(A,B,C)
|
||||
#define VTIMESMINUSI1(A,B,C) VTIMESMINUSI1f(A,B,C)
|
||||
#define VACCTIMESI1(A,B,C) VACCTIMESI1f(A,B,C)
|
||||
#define VACCTIMESMINUSI1(A,B,C) VACCTIMESMINUSI1f(A,B,C)
|
||||
|
||||
#define VTIMESI2(A,B,C) VTIMESI2f(A,B,C)
|
||||
#define VTIMESMINUSI2(A,B,C) VTIMESMINUSI2f(A,B,C)
|
||||
#define VACCTIMESI2(A,B,C) VACCTIMESI2f(A,B,C)
|
||||
#define VACCTIMESMINUSI2(A,B,C) VACCTIMESMINUSI2f(A,B,C)
|
||||
|
||||
#define VACCTIMESI1MEM(A,ACC,O,P) VACCTIMESI1MEMf(A,ACC,O,P)
|
||||
#define VACCTIMESI2MEM(A,ACC,O,P) VACCTIMESI2MEMf(A,ACC,O,P)
|
||||
#define VACCTIMESMINUSI1MEM(A,ACC,O,P) VACCTIMESMINUSI1MEMf(A,ACC,O,P)
|
||||
#define VACCTIMESMINUSI2MEM(A,ACC,O,P) VACCTIMESMINUSI2MEMf(A,ACC,O,P)
|
||||
|
||||
#define VPERM0(A,B) VPERM0f(A,B)
|
||||
#define VPERM1(A,B) VPERM1f(A,B)
|
||||
#define VPERM2(A,B) VPERM2f(A,B)
|
||||
#define VPERM3(A,B) VPERM3f(A,B)
|
||||
#define VSHUFMEM(OFF,A,DEST) VSHUFMEMf(OFF,A,DEST)
|
||||
|
||||
#define ZEND1(A,B,C) ZEND1f(A,B,C)
|
||||
#define ZEND2(A,B,C) ZEND2f(A,B,C)
|
||||
#define ZLOAD(A,B,C,D) ZLOADf(A,B,C,D)
|
||||
#define ZMUL(A,B,C,D,E) ZMULf(A,B,C,D,E)
|
||||
#define ZMADD(A,B,C,D,E) ZMADDf(A,B,C,D,E)
|
||||
|
||||
#define ZMUL(A,B,C,D,E) ZMULf(A,B,C,D,E)
|
||||
#define ZMADD(A,B,C,D,E) ZMADDf(A,B,C,D,E)
|
||||
|
||||
#define VADDMEM(O,A,B,C) VADDMEMf(O,A,B,C)
|
||||
#define VSUBMEM(O,A,B,C) VSUBMEMf(O,A,B,C)
|
||||
|
||||
#define ZMULMEM2SP(O,P,tmp,B,C,Briir,Biirr,Criir,Ciirr) ZMULMEM2SPf(O,P,tmp,B,C,Briir,Biirr,Criir,Ciirr)
|
||||
#define ZMADDMEM2SP(O,P,tmp,B,C,Briir,Biirr,Criir,Ciirr) ZMADDMEM2SPf(O,P,tmp,B,C,Briir,Biirr,Criir,Ciirr)
|
||||
|
||||
namespace Grid {
|
||||
namespace QCD {
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////
|
||||
// Default to no assembler implementation
|
||||
///////////////////////////////////////////////////////////
|
||||
template<class Impl>
|
||||
void WilsonKernels<Impl >::DiracOptAsmDhopSite(StencilImpl &st,DoubledGaugeField &U,
|
||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
||||
int ss,int sU,const FermionField &in, FermionField &out,uint64_t *timers)
|
||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
||||
int ss,int sU,int Ls,int Ns,const FermionField &in, FermionField &out)
|
||||
{
|
||||
uint64_t now;
|
||||
uint64_t first ;
|
||||
int offset,local,perm, ptype;
|
||||
const SiteHalfSpinor *pbuf = & buf[0];
|
||||
const SiteSpinor *plocal = & in._odata[0];
|
||||
void *pf;
|
||||
int osites = in._grid->oSites();
|
||||
|
||||
|
||||
StencilEntry *SE;
|
||||
|
||||
//#define STAMP(i) timers[i] = __rdtsc() ;
|
||||
#define STAMP(i) //timers[i] = __rdtsc() ;
|
||||
|
||||
MASK_REGS;
|
||||
|
||||
first = __rdtsc();
|
||||
|
||||
SE=st.GetEntry(ptype,Xm,ss);
|
||||
|
||||
#if 0
|
||||
if (SE->_is_local) pf=(void *)&plocal[SE->_offset];
|
||||
else pf=(void *)&pbuf[SE->_offset];
|
||||
|
||||
LOAD64(%r9,pf);
|
||||
__asm__(
|
||||
VPREFETCH(0,%r9)
|
||||
VPREFETCH(1,%r9)
|
||||
VPREFETCH(2,%r9)
|
||||
VPREFETCH(3,%r9)
|
||||
VPREFETCH(4,%r9)
|
||||
VPREFETCH(5,%r9)
|
||||
VPREFETCH(6,%r9)
|
||||
VPREFETCH(7,%r9)
|
||||
VPREFETCH(8,%r9)
|
||||
VPREFETCH(9,%r9)
|
||||
VPREFETCH(10,%r9)
|
||||
VPREFETCH(11,%r9) );
|
||||
#endif
|
||||
|
||||
// Xm
|
||||
offset = SE->_offset;
|
||||
local = SE->_is_local;
|
||||
perm = SE->_permute;
|
||||
|
||||
// Prefetch
|
||||
SE=st.GetEntry(ptype,Ym,ss);
|
||||
if (SE->_is_local) pf=(void *)&plocal[SE->_offset];
|
||||
else pf=(void *)&pbuf[SE->_offset];
|
||||
|
||||
if ( local ) {
|
||||
XM_PROJMEM(&plocal[offset]);
|
||||
if ( perm) {
|
||||
PERMUTE_DIR3; // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
||||
}
|
||||
} else {
|
||||
LOAD_CHI(&pbuf[offset]);
|
||||
}
|
||||
{
|
||||
MULT_2SPIN_DIR_PFXM(Xm,pf);
|
||||
}
|
||||
XM_RECON;
|
||||
|
||||
// Ym
|
||||
offset = SE->_offset;
|
||||
local = SE->_is_local;
|
||||
perm = SE->_permute;
|
||||
|
||||
// Prefetch
|
||||
SE=st.GetEntry(ptype,Zm,ss);
|
||||
if (SE->_is_local) pf=(void *)&plocal[SE->_offset];
|
||||
else pf=(void *)&pbuf[SE->_offset];
|
||||
|
||||
if ( local ) {
|
||||
YM_PROJMEM(&plocal[offset]);
|
||||
if ( perm) {
|
||||
PERMUTE_DIR2; // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
||||
}
|
||||
} else {
|
||||
LOAD_CHI(&pbuf[offset]);
|
||||
}
|
||||
{
|
||||
MULT_2SPIN_DIR_PFYM(Ym,pf);
|
||||
}
|
||||
YM_RECON_ACCUM;
|
||||
|
||||
// Zm
|
||||
offset = SE->_offset;
|
||||
local = SE->_is_local;
|
||||
perm = SE->_permute;
|
||||
|
||||
// Prefetch
|
||||
SE=st.GetEntry(ptype,Tm,ss);
|
||||
if (SE->_is_local) pf=(void *)&plocal[SE->_offset];
|
||||
else pf=(void *)&pbuf[SE->_offset];
|
||||
|
||||
if ( local ) {
|
||||
ZM_PROJMEM(&plocal[offset]);
|
||||
if ( perm) {
|
||||
PERMUTE_DIR1; // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
||||
}
|
||||
} else {
|
||||
LOAD_CHI(&pbuf[offset]);
|
||||
}
|
||||
{
|
||||
MULT_2SPIN_DIR_PFZM(Zm,pf);
|
||||
}
|
||||
ZM_RECON_ACCUM;
|
||||
|
||||
// Tm
|
||||
offset = SE->_offset;
|
||||
local = SE->_is_local;
|
||||
perm = SE->_permute;
|
||||
|
||||
SE=st.GetEntry(ptype,Tp,ss);
|
||||
if (SE->_is_local) pf=(void *)&plocal[SE->_offset];
|
||||
else pf=(void *)&pbuf[SE->_offset];
|
||||
|
||||
|
||||
if ( local ) {
|
||||
TM_PROJMEM(&plocal[offset]);
|
||||
if ( perm) {
|
||||
PERMUTE_DIR0; // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
||||
}
|
||||
} else {
|
||||
LOAD_CHI(&pbuf[offset]);
|
||||
}
|
||||
{
|
||||
MULT_2SPIN_DIR_PFTM(Tm,pf);
|
||||
}
|
||||
TM_RECON_ACCUM;
|
||||
|
||||
// Tp
|
||||
offset = SE->_offset;
|
||||
local = SE->_is_local;
|
||||
perm = SE->_permute;
|
||||
|
||||
// Prefetch
|
||||
SE=st.GetEntry(ptype,Zp,ss);
|
||||
if (SE->_is_local) pf=(void *)&plocal[SE->_offset];
|
||||
else pf=(void *)&pbuf[SE->_offset];
|
||||
|
||||
if ( local ) {
|
||||
TP_PROJMEM(&plocal[offset]);
|
||||
if ( perm) {
|
||||
PERMUTE_DIR0; // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
||||
}
|
||||
} else {
|
||||
LOAD_CHI(&pbuf[offset]);
|
||||
}
|
||||
{
|
||||
MULT_2SPIN_DIR_PFTP(Tp,pf);
|
||||
}
|
||||
TP_RECON_ACCUM;
|
||||
|
||||
// Zp
|
||||
offset = SE->_offset;
|
||||
local = SE->_is_local;
|
||||
perm = SE->_permute;
|
||||
|
||||
// Prefetch
|
||||
SE=st.GetEntry(ptype,Yp,ss);
|
||||
if (SE->_is_local) pf=(void *)&plocal[SE->_offset];
|
||||
else pf=(void *)&pbuf[SE->_offset];
|
||||
|
||||
if ( local ) {
|
||||
ZP_PROJMEM(&plocal[offset]);
|
||||
if ( perm) {
|
||||
PERMUTE_DIR1; // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
||||
}
|
||||
} else {
|
||||
LOAD_CHI(&pbuf[offset]);
|
||||
}
|
||||
{
|
||||
MULT_2SPIN_DIR_PFZP(Zp,pf);
|
||||
}
|
||||
ZP_RECON_ACCUM;
|
||||
|
||||
|
||||
offset = SE->_offset;
|
||||
local = SE->_is_local;
|
||||
perm = SE->_permute;
|
||||
|
||||
// Prefetch
|
||||
SE=st.GetEntry(ptype,Xp,ss);
|
||||
if (SE->_is_local) pf=(void *)&plocal[SE->_offset];
|
||||
else pf=(void *)&pbuf[SE->_offset];
|
||||
|
||||
if ( local ) {
|
||||
YP_PROJMEM(&plocal[offset]);
|
||||
if ( perm) {
|
||||
PERMUTE_DIR2; // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
||||
}
|
||||
} else {
|
||||
LOAD_CHI(&pbuf[offset]);
|
||||
}
|
||||
{
|
||||
MULT_2SPIN_DIR_PFYP(Yp,pf);
|
||||
}
|
||||
YP_RECON_ACCUM;
|
||||
|
||||
// Xp
|
||||
perm = SE->_permute;
|
||||
offset = SE->_offset;
|
||||
local = SE->_is_local;
|
||||
|
||||
// PREFETCH_R(A);
|
||||
|
||||
// Prefetch
|
||||
SE=st.GetEntry(ptype,Xm,(ss+1)%osites);
|
||||
if (SE->_is_local) pf=(void *)&plocal[SE->_offset];
|
||||
else pf=(void *)&pbuf[SE->_offset];
|
||||
|
||||
if ( local ) {
|
||||
XP_PROJMEM(&plocal[offset]);
|
||||
if ( perm) {
|
||||
PERMUTE_DIR3; // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
||||
}
|
||||
} else {
|
||||
LOAD_CHI(&pbuf[offset]);
|
||||
}
|
||||
{
|
||||
MULT_2SPIN_DIR_PFXP(Xp,pf);
|
||||
}
|
||||
XP_RECON_ACCUM;
|
||||
|
||||
debug:
|
||||
SAVE_RESULT(&out._odata[ss]);
|
||||
|
||||
assert(0);
|
||||
}
|
||||
|
||||
template class WilsonKernels<WilsonImplF>;
|
||||
template class WilsonKernels<WilsonImplD>;
|
||||
#if defined(AVX512)
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////
|
||||
// If we are AVX512 specialise the single precision routine
|
||||
///////////////////////////////////////////////////////////
|
||||
|
||||
#include <simd/Intel512wilson.h>
|
||||
#include <simd/Intel512single.h>
|
||||
|
||||
static Vector<vComplexF> signs;
|
||||
|
||||
int setupSigns(void ){
|
||||
Vector<vComplexF> bother(2);
|
||||
signs = bother;
|
||||
vrsign(signs[0]);
|
||||
visign(signs[1]);
|
||||
return 1;
|
||||
}
|
||||
static int signInit = setupSigns();
|
||||
|
||||
#define MAYBEPERM(A,perm) if (perm) { A ; }
|
||||
#define MULT_2SPIN(ptr,pf) MULT_ADDSUB_2SPIN(ptr,pf)
|
||||
|
||||
template<>
|
||||
void WilsonKernels<WilsonImplF>::DiracOptAsmDhopSite(StencilImpl &st,DoubledGaugeField &U,
|
||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
||||
int ss,int sU,int Ls,int Ns,const FermionField &in, FermionField &out)
|
||||
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
|
||||
|
||||
#undef VMOVIDUP
|
||||
#undef VMOVRDUP
|
||||
#undef MAYBEPERM
|
||||
#undef MULT_2SPIN
|
||||
#define MAYBEPERM(A,B)
|
||||
#define VMOVIDUP(A,B,C) VBCASTIDUPf(A,B,C)
|
||||
#define VMOVRDUP(A,B,C) VBCASTRDUPf(A,B,C)
|
||||
#define MULT_2SPIN(ptr,pf) MULT_ADDSUB_2SPIN_LS(ptr,pf)
|
||||
template<>
|
||||
void WilsonKernels<DomainWallRedBlack5dImplF>::DiracOptAsmDhopSite(StencilImpl &st,DoubledGaugeField &U,
|
||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
||||
int ss,int sU,int Ls,int Ns,const FermionField &in, FermionField &out)
|
||||
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
|
||||
|
||||
}}
|
||||
#endif
|
||||
|
||||
template void WilsonKernels<WilsonImplF>::DiracOptAsmDhopSite(StencilImpl &st,DoubledGaugeField &U,
|
||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
||||
int ss,int sU,int Ls,int Ns,const FermionField &in, FermionField &out);
|
||||
|
||||
template void WilsonKernels<WilsonImplD>::DiracOptAsmDhopSite(StencilImpl &st,DoubledGaugeField &U,
|
||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
||||
int ss,int sU,int Ls,int Ns,const FermionField &in, FermionField &out);
|
||||
template void WilsonKernels<GparityWilsonImplF>::DiracOptAsmDhopSite(StencilImpl &st,DoubledGaugeField &U,
|
||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
||||
int ss,int sU,int Ls,int Ns,const FermionField &in, FermionField &out);
|
||||
template void WilsonKernels<GparityWilsonImplD>::DiracOptAsmDhopSite(StencilImpl &st,DoubledGaugeField &U,
|
||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
||||
int ss,int sU,int Ls,int Ns,const FermionField &in, FermionField &out);
|
||||
template void WilsonKernels<DomainWallRedBlack5dImplF>::DiracOptAsmDhopSite(StencilImpl &st,DoubledGaugeField &U,
|
||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
||||
int ss,int sU,int Ls,int Ns,const FermionField &in, FermionField &out);
|
||||
template void WilsonKernels<DomainWallRedBlack5dImplD>::DiracOptAsmDhopSite(StencilImpl &st,DoubledGaugeField &U,
|
||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
||||
int ss,int sU,int Ls,int Ns,const FermionField &in, FermionField &out);
|
||||
}}
|
||||
|
||||
|
164
lib/qcd/action/fermion/WilsonKernelsAsmBody.h
Normal file
164
lib/qcd/action/fermion/WilsonKernelsAsmBody.h
Normal file
@ -0,0 +1,164 @@
|
||||
{
|
||||
int locala,perma, ptypea;
|
||||
int localb,permb, ptypeb;
|
||||
uint64_t basea, baseb;
|
||||
uint64_t basex;
|
||||
const uint64_t plocal =(uint64_t) & in._odata[0];
|
||||
|
||||
// vComplexF isigns[2] = { signs[0], signs[1] };
|
||||
vComplexF *isigns = &signs[0];
|
||||
|
||||
MASK_REGS;
|
||||
|
||||
for(int site=0;site<Ns;site++) {
|
||||
for(int s=0;s<Ls;s++) {
|
||||
|
||||
////////////////////////////////
|
||||
// Xp
|
||||
////////////////////////////////
|
||||
int ent=ss*8;// 2*Ndim
|
||||
basea = st.GetInfo(ptypea,locala,perma,Xp,ent,plocal); ent++;
|
||||
baseb = st.GetInfo(ptypeb,localb,permb,Yp,ent,plocal); ent++;
|
||||
basex = basea;
|
||||
|
||||
if ( locala ) {
|
||||
LOAD64(%r10,isigns);
|
||||
XM_PROJMEM(basea);
|
||||
MAYBEPERM(PERMUTE_DIR3,perma);
|
||||
} else {
|
||||
LOAD_CHI(basea);
|
||||
}
|
||||
{
|
||||
MULT_2SPIN_DIR_PFXP(Xp,baseb);
|
||||
}
|
||||
LOAD64(%r10,isigns);
|
||||
XM_RECON;
|
||||
|
||||
////////////////////////////////
|
||||
// Yp
|
||||
////////////////////////////////
|
||||
basea = st.GetInfo(ptypea,locala,perma,Xp,ent,plocal); ent++;
|
||||
if ( localb ) {
|
||||
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
|
||||
YM_PROJMEM(baseb);
|
||||
MAYBEPERM(PERMUTE_DIR2,permb);
|
||||
} else {
|
||||
LOAD_CHI(baseb);
|
||||
}
|
||||
{
|
||||
MULT_2SPIN_DIR_PFYP(Yp,basea);
|
||||
}
|
||||
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
|
||||
YM_RECON_ACCUM;
|
||||
|
||||
////////////////////////////////
|
||||
// Zp
|
||||
////////////////////////////////
|
||||
baseb = st.GetInfo(ptypeb,localb,permb,Yp,ent,plocal); ent++;
|
||||
if ( locala ) {
|
||||
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
|
||||
ZM_PROJMEM(basea);
|
||||
MAYBEPERM(PERMUTE_DIR1,perma);
|
||||
} else {
|
||||
LOAD_CHI(basea);
|
||||
}
|
||||
{
|
||||
MULT_2SPIN_DIR_PFZP(Zp,baseb);
|
||||
}
|
||||
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
|
||||
ZM_RECON_ACCUM;
|
||||
|
||||
////////////////////////////////
|
||||
// Tp
|
||||
////////////////////////////////
|
||||
basea = st.GetInfo(ptypea,locala,perma,Xp,ent,plocal); ent++;
|
||||
if ( localb ) {
|
||||
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
|
||||
TM_PROJMEM(baseb);
|
||||
MAYBEPERM(PERMUTE_DIR0,permb);
|
||||
} else {
|
||||
LOAD_CHI(baseb);
|
||||
}
|
||||
{
|
||||
MULT_2SPIN_DIR_PFTP(Tp,basea);
|
||||
}
|
||||
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
|
||||
TM_RECON_ACCUM;
|
||||
|
||||
////////////////////////////////
|
||||
// Xm
|
||||
////////////////////////////////
|
||||
baseb = st.GetInfo(ptypeb,localb,permb,Yp,ent,plocal); ent++;
|
||||
if ( locala ) {
|
||||
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
|
||||
XP_PROJMEM(basea);
|
||||
MAYBEPERM(PERMUTE_DIR3,perma);
|
||||
} else {
|
||||
LOAD_CHI(basea);
|
||||
}
|
||||
{
|
||||
MULT_2SPIN_DIR_PFXM(Xm,baseb);
|
||||
}
|
||||
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
|
||||
XP_RECON_ACCUM;
|
||||
|
||||
////////////////////////////////
|
||||
// Ym
|
||||
////////////////////////////////
|
||||
basea = st.GetInfo(ptypea,locala,perma,Xp,ent,plocal); ent++;
|
||||
if ( localb ) {
|
||||
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
|
||||
YP_PROJMEM(baseb);
|
||||
MAYBEPERM(PERMUTE_DIR2,permb);
|
||||
} else {
|
||||
LOAD_CHI(baseb);
|
||||
}
|
||||
{
|
||||
MULT_2SPIN_DIR_PFYM(Ym,basea);
|
||||
}
|
||||
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
|
||||
YP_RECON_ACCUM;
|
||||
|
||||
////////////////////////////////
|
||||
// Zm
|
||||
////////////////////////////////
|
||||
baseb = st.GetInfo(ptypeb,localb,permb,Yp,ent,plocal); ent++;
|
||||
if ( locala ) {
|
||||
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
|
||||
ZP_PROJMEM(basea);
|
||||
MAYBEPERM(PERMUTE_DIR1,perma);
|
||||
} else {
|
||||
LOAD_CHI(basea);
|
||||
}
|
||||
{
|
||||
MULT_2SPIN_DIR_PFZM(Zm,baseb);
|
||||
}
|
||||
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
|
||||
ZP_RECON_ACCUM;
|
||||
|
||||
////////////////////////////////
|
||||
// Tm
|
||||
////////////////////////////////
|
||||
basea = (uint64_t)&out._odata[ss];
|
||||
if ( localb ) {
|
||||
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
|
||||
TP_PROJMEM(baseb);
|
||||
MAYBEPERM(PERMUTE_DIR0,permb);
|
||||
} else {
|
||||
LOAD_CHI(baseb);
|
||||
}
|
||||
{
|
||||
MULT_2SPIN_DIR_PFTM(Tm,basea);
|
||||
}
|
||||
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
|
||||
TP_RECON_ACCUM;
|
||||
|
||||
PREFETCH_CHIMU(basex);
|
||||
SAVE_RESULT(&out._odata[ss]);
|
||||
|
||||
|
||||
ss++;
|
||||
}
|
||||
sU++;
|
||||
}
|
||||
}
|
@ -54,14 +54,15 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
Chi_11 = ref()(1)(1);\
|
||||
Chi_12 = ref()(1)(2);
|
||||
|
||||
// To splat or not to splat depends on the implementation
|
||||
#define MULT_2SPIN(A)\
|
||||
auto & ref(U._odata[sU](A)); \
|
||||
U_00 = ref()(0,0);\
|
||||
U_10 = ref()(1,0);\
|
||||
U_20 = ref()(2,0);\
|
||||
U_01 = ref()(0,1);\
|
||||
U_11 = ref()(1,1); \
|
||||
U_21 = ref()(2,1);\
|
||||
Impl::loadLinkElement(U_00,ref()(0,0)); \
|
||||
Impl::loadLinkElement(U_10,ref()(1,0)); \
|
||||
Impl::loadLinkElement(U_20,ref()(2,0)); \
|
||||
Impl::loadLinkElement(U_01,ref()(0,1)); \
|
||||
Impl::loadLinkElement(U_11,ref()(1,1)); \
|
||||
Impl::loadLinkElement(U_21,ref()(2,1)); \
|
||||
UChi_00 = U_00*Chi_00;\
|
||||
UChi_10 = U_00*Chi_10;\
|
||||
UChi_01 = U_10*Chi_00;\
|
||||
@ -74,9 +75,9 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
UChi_11+= U_11*Chi_11;\
|
||||
UChi_02+= U_21*Chi_01;\
|
||||
UChi_12+= U_21*Chi_11;\
|
||||
U_00 = ref()(0,2);\
|
||||
U_10 = ref()(1,2);\
|
||||
U_20 = ref()(2,2);\
|
||||
Impl::loadLinkElement(U_00,ref()(0,2)); \
|
||||
Impl::loadLinkElement(U_10,ref()(1,2)); \
|
||||
Impl::loadLinkElement(U_20,ref()(2,2)); \
|
||||
UChi_00+= U_00*Chi_02;\
|
||||
UChi_10+= U_00*Chi_12;\
|
||||
UChi_01+= U_10*Chi_02;\
|
||||
@ -84,6 +85,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
UChi_02+= U_20*Chi_02;\
|
||||
UChi_12+= U_20*Chi_12;
|
||||
|
||||
|
||||
#define PERMUTE_DIR(dir) \
|
||||
permute##dir(Chi_00,Chi_00);\
|
||||
permute##dir(Chi_01,Chi_01);\
|
||||
@ -309,546 +311,10 @@ namespace Grid {
|
||||
namespace QCD {
|
||||
|
||||
|
||||
template<class Impl>
|
||||
int WilsonKernels<Impl >::DiracOptHandDhopSiteDag(StencilImpl &st,DoubledGaugeField &U,
|
||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
||||
int ss,int sU,const FermionField &in, FermionField &out, bool Local, bool Nonlocal)
|
||||
{
|
||||
// std::cout << "Hand op Dhop "<<std::endl;
|
||||
typedef typename Simd::scalar_type S;
|
||||
typedef typename Simd::vector_type V;
|
||||
|
||||
REGISTER Simd result_00 ; zeroit(result_00); // 12 regs on knc
|
||||
REGISTER Simd result_01 ; zeroit(result_01); // 12 regs on knc
|
||||
REGISTER Simd result_02 ; zeroit(result_02); // 12 regs on knc
|
||||
|
||||
REGISTER Simd result_10 ; zeroit(result_10); // 12 regs on knc
|
||||
REGISTER Simd result_11 ; zeroit(result_11); // 12 regs on knc
|
||||
REGISTER Simd result_12 ; zeroit(result_12); // 12 regs on knc
|
||||
|
||||
REGISTER Simd result_20 ; zeroit(result_20); // 12 regs on knc
|
||||
REGISTER Simd result_21 ; zeroit(result_21); // 12 regs on knc
|
||||
REGISTER Simd result_22 ; zeroit(result_22); // 12 regs on knc
|
||||
|
||||
REGISTER Simd result_30 ; zeroit(result_30); // 12 regs on knc
|
||||
REGISTER Simd result_31 ; zeroit(result_31); // 12 regs on knc
|
||||
REGISTER Simd result_32 ; zeroit(result_32); // 12 regs on knc
|
||||
|
||||
REGISTER Simd Chi_00; // two spinor; 6 regs
|
||||
REGISTER Simd Chi_01;
|
||||
REGISTER Simd Chi_02;
|
||||
|
||||
REGISTER Simd Chi_10;
|
||||
REGISTER Simd Chi_11;
|
||||
REGISTER Simd Chi_12; // 14 left
|
||||
|
||||
REGISTER Simd UChi_00; // two spinor; 6 regs
|
||||
REGISTER Simd UChi_01;
|
||||
REGISTER Simd UChi_02;
|
||||
|
||||
REGISTER Simd UChi_10;
|
||||
REGISTER Simd UChi_11;
|
||||
REGISTER Simd UChi_12; // 8 left
|
||||
|
||||
REGISTER Simd U_00; // two rows of U matrix
|
||||
REGISTER Simd U_10;
|
||||
REGISTER Simd U_20;
|
||||
REGISTER Simd U_01;
|
||||
REGISTER Simd U_11;
|
||||
REGISTER Simd U_21; // 2 reg left.
|
||||
|
||||
#define Chimu_00 Chi_00
|
||||
#define Chimu_01 Chi_01
|
||||
#define Chimu_02 Chi_02
|
||||
#define Chimu_10 Chi_10
|
||||
#define Chimu_11 Chi_11
|
||||
#define Chimu_12 Chi_12
|
||||
#define Chimu_20 UChi_00
|
||||
#define Chimu_21 UChi_01
|
||||
#define Chimu_22 UChi_02
|
||||
#define Chimu_30 UChi_10
|
||||
#define Chimu_31 UChi_11
|
||||
#define Chimu_32 UChi_12
|
||||
|
||||
|
||||
StencilEntry *SE;
|
||||
int offset, ptype;
|
||||
int num = 0;
|
||||
|
||||
// Xp
|
||||
SE=st.GetEntry(ptype,Xp,ss);
|
||||
offset = SE->_offset;
|
||||
|
||||
if (Local && SE->_is_local ) {
|
||||
LOAD_CHIMU;
|
||||
XP_PROJ;
|
||||
if ( SE->_permute ) {
|
||||
PERMUTE_DIR(3); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if ( Nonlocal && (!SE->_is_local) ) {
|
||||
LOAD_CHI;
|
||||
}
|
||||
|
||||
if ( (Local && SE->_is_local) || ( Nonlocal && (!SE->_is_local)) ) {
|
||||
MULT_2SPIN(Xp);
|
||||
XP_RECON_ACCUM;
|
||||
num++;
|
||||
}
|
||||
|
||||
// Yp
|
||||
SE=st.GetEntry(ptype,Yp,ss);
|
||||
offset = SE->_offset;
|
||||
|
||||
if (Local && SE->_is_local ) {
|
||||
LOAD_CHIMU;
|
||||
YP_PROJ;
|
||||
if ( SE->_permute ) {
|
||||
PERMUTE_DIR(2); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
||||
}
|
||||
}
|
||||
|
||||
if ( Nonlocal && (!SE->_is_local) ) {
|
||||
LOAD_CHI;
|
||||
}
|
||||
if ( (Local && SE->_is_local) || ( Nonlocal && (!SE->_is_local)) ) {
|
||||
MULT_2SPIN(Yp);
|
||||
YP_RECON_ACCUM;
|
||||
num++;
|
||||
}
|
||||
|
||||
|
||||
// Zp
|
||||
SE=st.GetEntry(ptype,Zp,ss);
|
||||
offset = SE->_offset;
|
||||
|
||||
if (Local && SE->_is_local ) {
|
||||
LOAD_CHIMU;
|
||||
ZP_PROJ;
|
||||
if ( SE->_permute ) {
|
||||
PERMUTE_DIR(1); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
||||
}
|
||||
}
|
||||
|
||||
if ( Nonlocal && (!SE->_is_local) ) {
|
||||
LOAD_CHI;
|
||||
}
|
||||
|
||||
if ( (Local && SE->_is_local) || ( Nonlocal && (!SE->_is_local)) ) {
|
||||
MULT_2SPIN(Zp);
|
||||
ZP_RECON_ACCUM;
|
||||
num++;
|
||||
}
|
||||
|
||||
// Tp
|
||||
SE=st.GetEntry(ptype,Tp,ss);
|
||||
offset = SE->_offset;
|
||||
|
||||
if (Local && SE->_is_local ) {
|
||||
LOAD_CHIMU;
|
||||
TP_PROJ;
|
||||
if ( SE->_permute ) {
|
||||
PERMUTE_DIR(0); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
||||
}
|
||||
}
|
||||
if ( Nonlocal && (!SE->_is_local) ) {
|
||||
LOAD_CHI;
|
||||
}
|
||||
if ( (Local && SE->_is_local) || ( Nonlocal && (!SE->_is_local)) ) {
|
||||
MULT_2SPIN(Tp);
|
||||
TP_RECON_ACCUM;
|
||||
num++;
|
||||
}
|
||||
|
||||
// Xm
|
||||
SE=st.GetEntry(ptype,Xm,ss);
|
||||
offset = SE->_offset;
|
||||
|
||||
if (Local && SE->_is_local ) {
|
||||
LOAD_CHIMU;
|
||||
XM_PROJ;
|
||||
if ( SE->_permute ) {
|
||||
PERMUTE_DIR(3); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
||||
}
|
||||
}
|
||||
if ( Nonlocal && (!SE->_is_local) ) {
|
||||
LOAD_CHI;
|
||||
}
|
||||
if ( (Local && SE->_is_local) || ( Nonlocal && (!SE->_is_local)) ) {
|
||||
MULT_2SPIN(Xm);
|
||||
XM_RECON_ACCUM;
|
||||
num++;
|
||||
}
|
||||
|
||||
// Ym
|
||||
SE=st.GetEntry(ptype,Ym,ss);
|
||||
offset = SE->_offset;
|
||||
|
||||
if (Local && SE->_is_local ) {
|
||||
LOAD_CHIMU;
|
||||
YM_PROJ;
|
||||
if ( SE->_permute ) {
|
||||
PERMUTE_DIR(2); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
||||
}
|
||||
}
|
||||
if ( Nonlocal && (!SE->_is_local) ) {
|
||||
LOAD_CHI;
|
||||
}
|
||||
if ( (Local && SE->_is_local) || ( Nonlocal && (!SE->_is_local)) ) {
|
||||
MULT_2SPIN(Ym);
|
||||
YM_RECON_ACCUM;
|
||||
num++;
|
||||
}
|
||||
|
||||
// Zm
|
||||
SE=st.GetEntry(ptype,Zm,ss);
|
||||
offset = SE->_offset;
|
||||
|
||||
if (Local && SE->_is_local ) {
|
||||
LOAD_CHIMU;
|
||||
ZM_PROJ;
|
||||
if ( SE->_permute ) {
|
||||
PERMUTE_DIR(1); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
||||
}
|
||||
}
|
||||
if ( Nonlocal && (!SE->_is_local) ) {
|
||||
LOAD_CHI;
|
||||
}
|
||||
if ( (Local && SE->_is_local) || ( Nonlocal && (!SE->_is_local)) ) {
|
||||
MULT_2SPIN(Zm);
|
||||
ZM_RECON_ACCUM;
|
||||
num++;
|
||||
}
|
||||
|
||||
// Tm
|
||||
SE=st.GetEntry(ptype,Tm,ss);
|
||||
offset = SE->_offset;
|
||||
|
||||
if (Local && SE->_is_local ) {
|
||||
LOAD_CHIMU;
|
||||
TM_PROJ;
|
||||
if ( SE->_permute ) {
|
||||
PERMUTE_DIR(0); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
||||
}
|
||||
}
|
||||
if ( Nonlocal && (!SE->_is_local) ) {
|
||||
LOAD_CHI;
|
||||
}
|
||||
if ( (Local && SE->_is_local) || ( Nonlocal && (!SE->_is_local)) ) {
|
||||
MULT_2SPIN(Tm);
|
||||
TM_RECON_ACCUM;
|
||||
num++;
|
||||
}
|
||||
|
||||
SiteSpinor & ref (out._odata[ss]);
|
||||
if ( Local ) {
|
||||
vstream(ref()(0)(0),result_00*(-0.5));
|
||||
vstream(ref()(0)(1),result_01*(-0.5));
|
||||
vstream(ref()(0)(2),result_02*(-0.5));
|
||||
vstream(ref()(1)(0),result_10*(-0.5));
|
||||
vstream(ref()(1)(1),result_11*(-0.5));
|
||||
vstream(ref()(1)(2),result_12*(-0.5));
|
||||
vstream(ref()(2)(0),result_20*(-0.5));
|
||||
vstream(ref()(2)(1),result_21*(-0.5));
|
||||
vstream(ref()(2)(2),result_22*(-0.5));
|
||||
vstream(ref()(3)(0),result_30*(-0.5));
|
||||
vstream(ref()(3)(1),result_31*(-0.5));
|
||||
vstream(ref()(3)(2),result_32*(-0.5));
|
||||
return 1;
|
||||
} else if ( num ) {
|
||||
vstream(ref()(0)(0),ref()(0)(0)+result_00*(-0.5));
|
||||
vstream(ref()(0)(1),ref()(0)(1)+result_01*(-0.5));
|
||||
vstream(ref()(0)(2),ref()(0)(2)+result_02*(-0.5));
|
||||
vstream(ref()(1)(0),ref()(1)(0)+result_10*(-0.5));
|
||||
vstream(ref()(1)(1),ref()(1)(1)+result_11*(-0.5));
|
||||
vstream(ref()(1)(2),ref()(1)(2)+result_12*(-0.5));
|
||||
vstream(ref()(2)(0),ref()(2)(0)+result_20*(-0.5));
|
||||
vstream(ref()(2)(1),ref()(2)(1)+result_21*(-0.5));
|
||||
vstream(ref()(2)(2),ref()(2)(2)+result_22*(-0.5));
|
||||
vstream(ref()(3)(0),ref()(3)(0)+result_30*(-0.5));
|
||||
vstream(ref()(3)(1),ref()(3)(1)+result_31*(-0.5));
|
||||
vstream(ref()(3)(2),ref()(3)(2)+result_32*(-0.5));
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
template<class Impl>
|
||||
int WilsonKernels<Impl >::DiracOptHandDhopSite(StencilImpl &st,DoubledGaugeField &U,
|
||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
||||
int ss,int sU,const FermionField &in, FermionField &out, bool Local, bool Nonlocal)
|
||||
{
|
||||
// std::cout << "Hand op Dhop "<<std::endl;
|
||||
typedef typename Simd::scalar_type S;
|
||||
typedef typename Simd::vector_type V;
|
||||
|
||||
REGISTER Simd result_00 ; zeroit(result_00); // 12 regs on knc
|
||||
REGISTER Simd result_01 ; zeroit(result_01); // 12 regs on knc
|
||||
REGISTER Simd result_02 ; zeroit(result_02); // 12 regs on knc
|
||||
|
||||
REGISTER Simd result_10 ; zeroit(result_10); // 12 regs on knc
|
||||
REGISTER Simd result_11 ; zeroit(result_11); // 12 regs on knc
|
||||
REGISTER Simd result_12 ; zeroit(result_12); // 12 regs on knc
|
||||
|
||||
REGISTER Simd result_20 ; zeroit(result_20); // 12 regs on knc
|
||||
REGISTER Simd result_21 ; zeroit(result_21); // 12 regs on knc
|
||||
REGISTER Simd result_22 ; zeroit(result_22); // 12 regs on knc
|
||||
|
||||
REGISTER Simd result_30 ; zeroit(result_30); // 12 regs on knc
|
||||
REGISTER Simd result_31 ; zeroit(result_31); // 12 regs on knc
|
||||
REGISTER Simd result_32 ; zeroit(result_32); // 12 regs on knc
|
||||
|
||||
REGISTER Simd Chi_00; // two spinor; 6 regs
|
||||
REGISTER Simd Chi_01;
|
||||
REGISTER Simd Chi_02;
|
||||
|
||||
REGISTER Simd Chi_10;
|
||||
REGISTER Simd Chi_11;
|
||||
REGISTER Simd Chi_12; // 14 left
|
||||
|
||||
REGISTER Simd UChi_00; // two spinor; 6 regs
|
||||
REGISTER Simd UChi_01;
|
||||
REGISTER Simd UChi_02;
|
||||
|
||||
REGISTER Simd UChi_10;
|
||||
REGISTER Simd UChi_11;
|
||||
REGISTER Simd UChi_12; // 8 left
|
||||
|
||||
REGISTER Simd U_00; // two rows of U matrix
|
||||
REGISTER Simd U_10;
|
||||
REGISTER Simd U_20;
|
||||
REGISTER Simd U_01;
|
||||
REGISTER Simd U_11;
|
||||
REGISTER Simd U_21; // 2 reg left.
|
||||
|
||||
#define Chimu_00 Chi_00
|
||||
#define Chimu_01 Chi_01
|
||||
#define Chimu_02 Chi_02
|
||||
#define Chimu_10 Chi_10
|
||||
#define Chimu_11 Chi_11
|
||||
#define Chimu_12 Chi_12
|
||||
#define Chimu_20 UChi_00
|
||||
#define Chimu_21 UChi_01
|
||||
#define Chimu_22 UChi_02
|
||||
#define Chimu_30 UChi_10
|
||||
#define Chimu_31 UChi_11
|
||||
#define Chimu_32 UChi_12
|
||||
|
||||
|
||||
StencilEntry *SE;
|
||||
int offset, ptype;
|
||||
int num = 0;
|
||||
|
||||
// Xp
|
||||
SE=st.GetEntry(ptype,Xp,ss);
|
||||
offset = SE->_offset;
|
||||
|
||||
if (Local && SE->_is_local ) {
|
||||
LOAD_CHIMU;
|
||||
XM_PROJ;
|
||||
if ( SE->_permute ) {
|
||||
PERMUTE_DIR(3); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
||||
}
|
||||
}
|
||||
|
||||
if ( Nonlocal && (!SE->_is_local) ) {
|
||||
LOAD_CHI;
|
||||
}
|
||||
|
||||
if ( (Local && SE->_is_local) || ( Nonlocal && (!SE->_is_local)) ) {
|
||||
MULT_2SPIN(Xp);
|
||||
XM_RECON_ACCUM;
|
||||
num++;
|
||||
}
|
||||
|
||||
|
||||
// Yp
|
||||
SE=st.GetEntry(ptype,Yp,ss);
|
||||
offset = SE->_offset;
|
||||
|
||||
if (Local && SE->_is_local ) {
|
||||
LOAD_CHIMU;
|
||||
YM_PROJ;
|
||||
if ( SE->_permute ) {
|
||||
PERMUTE_DIR(2); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
||||
}
|
||||
}
|
||||
|
||||
if ( Nonlocal && (!SE->_is_local) ) {
|
||||
LOAD_CHI;
|
||||
}
|
||||
if ( (Local && SE->_is_local) || ( Nonlocal && (!SE->_is_local)) ) {
|
||||
MULT_2SPIN(Yp);
|
||||
YM_RECON_ACCUM;
|
||||
num++;
|
||||
}
|
||||
|
||||
|
||||
// Zp
|
||||
SE=st.GetEntry(ptype,Zp,ss);
|
||||
offset = SE->_offset;
|
||||
|
||||
if (Local && SE->_is_local ) {
|
||||
LOAD_CHIMU;
|
||||
ZM_PROJ;
|
||||
if ( SE->_permute ) {
|
||||
PERMUTE_DIR(1); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
||||
}
|
||||
}
|
||||
|
||||
if ( Nonlocal && (!SE->_is_local) ) {
|
||||
LOAD_CHI;
|
||||
}
|
||||
|
||||
if ( (Local && SE->_is_local) || ( Nonlocal && (!SE->_is_local)) ) {
|
||||
MULT_2SPIN(Zp);
|
||||
ZM_RECON_ACCUM;
|
||||
num++;
|
||||
}
|
||||
|
||||
// Tp
|
||||
SE=st.GetEntry(ptype,Tp,ss);
|
||||
offset = SE->_offset;
|
||||
|
||||
if (Local && SE->_is_local ) {
|
||||
LOAD_CHIMU;
|
||||
TM_PROJ;
|
||||
if ( SE->_permute ) {
|
||||
PERMUTE_DIR(0); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
||||
}
|
||||
}
|
||||
if ( Nonlocal && (!SE->_is_local) ) {
|
||||
LOAD_CHI;
|
||||
}
|
||||
if ( (Local && SE->_is_local) || ( Nonlocal && (!SE->_is_local)) ) {
|
||||
MULT_2SPIN(Tp);
|
||||
TM_RECON_ACCUM;
|
||||
num++;
|
||||
}
|
||||
|
||||
// Xm
|
||||
SE=st.GetEntry(ptype,Xm,ss);
|
||||
offset = SE->_offset;
|
||||
|
||||
if (Local && SE->_is_local ) {
|
||||
LOAD_CHIMU;
|
||||
XP_PROJ;
|
||||
if ( SE->_permute ) {
|
||||
PERMUTE_DIR(3); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
||||
}
|
||||
}
|
||||
if ( Nonlocal && (!SE->_is_local) ) {
|
||||
LOAD_CHI;
|
||||
}
|
||||
if ( (Local && SE->_is_local) || ( Nonlocal && (!SE->_is_local)) ) {
|
||||
MULT_2SPIN(Xm);
|
||||
XP_RECON_ACCUM;
|
||||
num++;
|
||||
}
|
||||
|
||||
// Ym
|
||||
SE=st.GetEntry(ptype,Ym,ss);
|
||||
offset = SE->_offset;
|
||||
|
||||
if (Local && SE->_is_local ) {
|
||||
LOAD_CHIMU;
|
||||
YP_PROJ;
|
||||
if ( SE->_permute ) {
|
||||
PERMUTE_DIR(2); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
||||
}
|
||||
}
|
||||
if ( Nonlocal && (!SE->_is_local) ) {
|
||||
LOAD_CHI;
|
||||
}
|
||||
if ( (Local && SE->_is_local) || ( Nonlocal && (!SE->_is_local)) ) {
|
||||
MULT_2SPIN(Ym);
|
||||
YP_RECON_ACCUM;
|
||||
num++;
|
||||
}
|
||||
|
||||
// Zm
|
||||
SE=st.GetEntry(ptype,Zm,ss);
|
||||
offset = SE->_offset;
|
||||
|
||||
if (Local && SE->_is_local ) {
|
||||
LOAD_CHIMU;
|
||||
ZP_PROJ;
|
||||
if ( SE->_permute ) {
|
||||
PERMUTE_DIR(1); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
||||
}
|
||||
}
|
||||
if ( Nonlocal && (!SE->_is_local) ) {
|
||||
LOAD_CHI;
|
||||
}
|
||||
if ( (Local && SE->_is_local) || ( Nonlocal && (!SE->_is_local)) ) {
|
||||
MULT_2SPIN(Zm);
|
||||
ZP_RECON_ACCUM;
|
||||
num++;
|
||||
}
|
||||
|
||||
// Tm
|
||||
SE=st.GetEntry(ptype,Tm,ss);
|
||||
offset = SE->_offset;
|
||||
|
||||
if (Local && SE->_is_local ) {
|
||||
LOAD_CHIMU;
|
||||
TP_PROJ;
|
||||
if ( SE->_permute ) {
|
||||
PERMUTE_DIR(0); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
||||
}
|
||||
}
|
||||
if ( Nonlocal && (!SE->_is_local) ) {
|
||||
LOAD_CHI;
|
||||
}
|
||||
if ( (Local && SE->_is_local) || ( Nonlocal && (!SE->_is_local)) ) {
|
||||
MULT_2SPIN(Tm);
|
||||
TP_RECON_ACCUM;
|
||||
num++;
|
||||
}
|
||||
|
||||
SiteSpinor & ref (out._odata[ss]);
|
||||
if ( Local ) {
|
||||
vstream(ref()(0)(0),result_00*(-0.5));
|
||||
vstream(ref()(0)(1),result_01*(-0.5));
|
||||
vstream(ref()(0)(2),result_02*(-0.5));
|
||||
vstream(ref()(1)(0),result_10*(-0.5));
|
||||
vstream(ref()(1)(1),result_11*(-0.5));
|
||||
vstream(ref()(1)(2),result_12*(-0.5));
|
||||
vstream(ref()(2)(0),result_20*(-0.5));
|
||||
vstream(ref()(2)(1),result_21*(-0.5));
|
||||
vstream(ref()(2)(2),result_22*(-0.5));
|
||||
vstream(ref()(3)(0),result_30*(-0.5));
|
||||
vstream(ref()(3)(1),result_31*(-0.5));
|
||||
vstream(ref()(3)(2),result_32*(-0.5));
|
||||
return 1;
|
||||
} else if ( num ) {
|
||||
vstream(ref()(0)(0),ref()(0)(0)+result_00*(-0.5));
|
||||
vstream(ref()(0)(1),ref()(0)(1)+result_01*(-0.5));
|
||||
vstream(ref()(0)(2),ref()(0)(2)+result_02*(-0.5));
|
||||
vstream(ref()(1)(0),ref()(1)(0)+result_10*(-0.5));
|
||||
vstream(ref()(1)(1),ref()(1)(1)+result_11*(-0.5));
|
||||
vstream(ref()(1)(2),ref()(1)(2)+result_12*(-0.5));
|
||||
vstream(ref()(2)(0),ref()(2)(0)+result_20*(-0.5));
|
||||
vstream(ref()(2)(1),ref()(2)(1)+result_21*(-0.5));
|
||||
vstream(ref()(2)(2),ref()(2)(2)+result_22*(-0.5));
|
||||
vstream(ref()(3)(0),ref()(3)(0)+result_30*(-0.5));
|
||||
vstream(ref()(3)(1),ref()(3)(1)+result_31*(-0.5));
|
||||
vstream(ref()(3)(2),ref()(3)(2)+result_32*(-0.5));
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
template<class Impl>
|
||||
void WilsonKernels<Impl >::DiracOptHandDhopSite(StencilImpl &st,DoubledGaugeField &U,
|
||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
||||
int ss,int sU,const FermionField &in, FermionField &out, bool Local, bool Nonlocal)
|
||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
||||
int ss,int sU,const FermionField &in, FermionField &out)
|
||||
{
|
||||
typedef typename Simd::scalar_type S;
|
||||
typedef typename Simd::vector_type V;
|
||||
@ -1073,89 +539,346 @@ void WilsonKernels<Impl >::DiracOptHandDhopSite(StencilImpl &st,DoubledGaugeFiel
|
||||
|
||||
{
|
||||
SiteSpinor & ref (out._odata[ss]);
|
||||
vstream(ref()(0)(0),result_00*(-0.5));
|
||||
vstream(ref()(0)(1),result_01*(-0.5));
|
||||
vstream(ref()(0)(2),result_02*(-0.5));
|
||||
vstream(ref()(1)(0),result_10*(-0.5));
|
||||
vstream(ref()(1)(1),result_11*(-0.5));
|
||||
vstream(ref()(1)(2),result_12*(-0.5));
|
||||
vstream(ref()(2)(0),result_20*(-0.5));
|
||||
vstream(ref()(2)(1),result_21*(-0.5));
|
||||
vstream(ref()(2)(2),result_22*(-0.5));
|
||||
vstream(ref()(3)(0),result_30*(-0.5));
|
||||
vstream(ref()(3)(1),result_31*(-0.5));
|
||||
vstream(ref()(3)(2),result_32*(-0.5));
|
||||
vstream(ref()(0)(0),result_00);
|
||||
vstream(ref()(0)(1),result_01);
|
||||
vstream(ref()(0)(2),result_02);
|
||||
vstream(ref()(1)(0),result_10);
|
||||
vstream(ref()(1)(1),result_11);
|
||||
vstream(ref()(1)(2),result_12);
|
||||
vstream(ref()(2)(0),result_20);
|
||||
vstream(ref()(2)(1),result_21);
|
||||
vstream(ref()(2)(2),result_22);
|
||||
vstream(ref()(3)(0),result_30);
|
||||
vstream(ref()(3)(1),result_31);
|
||||
vstream(ref()(3)(2),result_32);
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
template<class Impl>
|
||||
void WilsonKernels<Impl >::DiracOptHandDhopSiteDag(StencilImpl &st,DoubledGaugeField &U,
|
||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
||||
int ss,int sU,const FermionField &in, FermionField &out)
|
||||
{
|
||||
// std::cout << "Hand op Dhop "<<std::endl;
|
||||
typedef typename Simd::scalar_type S;
|
||||
typedef typename Simd::vector_type V;
|
||||
|
||||
REGISTER Simd result_00; // 12 regs on knc
|
||||
REGISTER Simd result_01;
|
||||
REGISTER Simd result_02;
|
||||
|
||||
REGISTER Simd result_10;
|
||||
REGISTER Simd result_11;
|
||||
REGISTER Simd result_12;
|
||||
|
||||
REGISTER Simd result_20;
|
||||
REGISTER Simd result_21;
|
||||
REGISTER Simd result_22;
|
||||
|
||||
REGISTER Simd result_30;
|
||||
REGISTER Simd result_31;
|
||||
REGISTER Simd result_32; // 20 left
|
||||
|
||||
REGISTER Simd Chi_00; // two spinor; 6 regs
|
||||
REGISTER Simd Chi_01;
|
||||
REGISTER Simd Chi_02;
|
||||
|
||||
REGISTER Simd Chi_10;
|
||||
REGISTER Simd Chi_11;
|
||||
REGISTER Simd Chi_12; // 14 left
|
||||
|
||||
REGISTER Simd UChi_00; // two spinor; 6 regs
|
||||
REGISTER Simd UChi_01;
|
||||
REGISTER Simd UChi_02;
|
||||
|
||||
REGISTER Simd UChi_10;
|
||||
REGISTER Simd UChi_11;
|
||||
REGISTER Simd UChi_12; // 8 left
|
||||
|
||||
REGISTER Simd U_00; // two rows of U matrix
|
||||
REGISTER Simd U_10;
|
||||
REGISTER Simd U_20;
|
||||
REGISTER Simd U_01;
|
||||
REGISTER Simd U_11;
|
||||
REGISTER Simd U_21; // 2 reg left.
|
||||
|
||||
#define Chimu_00 Chi_00
|
||||
#define Chimu_01 Chi_01
|
||||
#define Chimu_02 Chi_02
|
||||
#define Chimu_10 Chi_10
|
||||
#define Chimu_11 Chi_11
|
||||
#define Chimu_12 Chi_12
|
||||
#define Chimu_20 UChi_00
|
||||
#define Chimu_21 UChi_01
|
||||
#define Chimu_22 UChi_02
|
||||
#define Chimu_30 UChi_10
|
||||
#define Chimu_31 UChi_11
|
||||
#define Chimu_32 UChi_12
|
||||
|
||||
|
||||
StencilEntry *SE;
|
||||
int offset,local,perm, ptype;
|
||||
|
||||
// Xp
|
||||
SE=st.GetEntry(ptype,Xp,ss);
|
||||
offset = SE->_offset;
|
||||
local = SE->_is_local;
|
||||
perm = SE->_permute;
|
||||
|
||||
if ( local ) {
|
||||
LOAD_CHIMU;
|
||||
XP_PROJ;
|
||||
if ( perm) {
|
||||
PERMUTE_DIR(3); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
||||
}
|
||||
} else {
|
||||
LOAD_CHI;
|
||||
}
|
||||
|
||||
{
|
||||
MULT_2SPIN(Xp);
|
||||
}
|
||||
XP_RECON;
|
||||
|
||||
// Yp
|
||||
SE=st.GetEntry(ptype,Yp,ss);
|
||||
offset = SE->_offset;
|
||||
local = SE->_is_local;
|
||||
perm = SE->_permute;
|
||||
|
||||
if ( local ) {
|
||||
LOAD_CHIMU;
|
||||
YP_PROJ;
|
||||
if ( perm) {
|
||||
PERMUTE_DIR(2); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
||||
}
|
||||
} else {
|
||||
LOAD_CHI;
|
||||
}
|
||||
{
|
||||
MULT_2SPIN(Yp);
|
||||
}
|
||||
YP_RECON_ACCUM;
|
||||
|
||||
|
||||
// Zp
|
||||
SE=st.GetEntry(ptype,Zp,ss);
|
||||
offset = SE->_offset;
|
||||
local = SE->_is_local;
|
||||
perm = SE->_permute;
|
||||
|
||||
if ( local ) {
|
||||
LOAD_CHIMU;
|
||||
ZP_PROJ;
|
||||
if ( perm) {
|
||||
PERMUTE_DIR(1); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
||||
}
|
||||
} else {
|
||||
LOAD_CHI;
|
||||
}
|
||||
{
|
||||
MULT_2SPIN(Zp);
|
||||
}
|
||||
ZP_RECON_ACCUM;
|
||||
|
||||
// Tp
|
||||
SE=st.GetEntry(ptype,Tp,ss);
|
||||
offset = SE->_offset;
|
||||
local = SE->_is_local;
|
||||
perm = SE->_permute;
|
||||
|
||||
if ( local ) {
|
||||
LOAD_CHIMU;
|
||||
TP_PROJ;
|
||||
if ( perm) {
|
||||
PERMUTE_DIR(0); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
||||
}
|
||||
} else {
|
||||
LOAD_CHI;
|
||||
}
|
||||
{
|
||||
MULT_2SPIN(Tp);
|
||||
}
|
||||
TP_RECON_ACCUM;
|
||||
|
||||
// Xm
|
||||
SE=st.GetEntry(ptype,Xm,ss);
|
||||
offset = SE->_offset;
|
||||
local = SE->_is_local;
|
||||
perm = SE->_permute;
|
||||
|
||||
if ( local ) {
|
||||
LOAD_CHIMU;
|
||||
XM_PROJ;
|
||||
if ( perm) {
|
||||
PERMUTE_DIR(3); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
||||
}
|
||||
} else {
|
||||
LOAD_CHI;
|
||||
}
|
||||
{
|
||||
MULT_2SPIN(Xm);
|
||||
}
|
||||
XM_RECON_ACCUM;
|
||||
|
||||
// Ym
|
||||
SE=st.GetEntry(ptype,Ym,ss);
|
||||
offset = SE->_offset;
|
||||
local = SE->_is_local;
|
||||
perm = SE->_permute;
|
||||
|
||||
if ( local ) {
|
||||
LOAD_CHIMU;
|
||||
YM_PROJ;
|
||||
if ( perm) {
|
||||
PERMUTE_DIR(2); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
||||
}
|
||||
} else {
|
||||
LOAD_CHI;
|
||||
}
|
||||
{
|
||||
MULT_2SPIN(Ym);
|
||||
}
|
||||
YM_RECON_ACCUM;
|
||||
|
||||
// Zm
|
||||
SE=st.GetEntry(ptype,Zm,ss);
|
||||
offset = SE->_offset;
|
||||
local = SE->_is_local;
|
||||
perm = SE->_permute;
|
||||
|
||||
if ( local ) {
|
||||
LOAD_CHIMU;
|
||||
ZM_PROJ;
|
||||
if ( perm) {
|
||||
PERMUTE_DIR(1); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
||||
}
|
||||
} else {
|
||||
LOAD_CHI;
|
||||
}
|
||||
{
|
||||
MULT_2SPIN(Zm);
|
||||
}
|
||||
ZM_RECON_ACCUM;
|
||||
|
||||
// Tm
|
||||
SE=st.GetEntry(ptype,Tm,ss);
|
||||
offset = SE->_offset;
|
||||
local = SE->_is_local;
|
||||
perm = SE->_permute;
|
||||
|
||||
if ( local ) {
|
||||
LOAD_CHIMU;
|
||||
TM_PROJ;
|
||||
if ( perm) {
|
||||
PERMUTE_DIR(0); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
||||
}
|
||||
} else {
|
||||
LOAD_CHI;
|
||||
}
|
||||
{
|
||||
MULT_2SPIN(Tm);
|
||||
}
|
||||
TM_RECON_ACCUM;
|
||||
|
||||
{
|
||||
SiteSpinor & ref (out._odata[ss]);
|
||||
vstream(ref()(0)(0),result_00);
|
||||
vstream(ref()(0)(1),result_01);
|
||||
vstream(ref()(0)(2),result_02);
|
||||
vstream(ref()(1)(0),result_10);
|
||||
vstream(ref()(1)(1),result_11);
|
||||
vstream(ref()(1)(2),result_12);
|
||||
vstream(ref()(2)(0),result_20);
|
||||
vstream(ref()(2)(1),result_21);
|
||||
vstream(ref()(2)(2),result_22);
|
||||
vstream(ref()(3)(0),result_30);
|
||||
vstream(ref()(3)(1),result_31);
|
||||
vstream(ref()(3)(2),result_32);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
////////////////////////////////////////////////
|
||||
// Specialise Gparity to simple implementation
|
||||
////////////////////////////////////////////////
|
||||
template<>
|
||||
int WilsonKernels<GparityWilsonImplF>::DiracOptHandDhopSite(StencilImpl &st,DoubledGaugeField &U,
|
||||
void WilsonKernels<GparityWilsonImplF>::DiracOptHandDhopSite(StencilImpl &st,DoubledGaugeField &U,
|
||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
||||
int sF,int sU,const FermionField &in, FermionField &out, bool Local, bool Nonlocal)
|
||||
int sF,int sU,const FermionField &in, FermionField &out)
|
||||
{
|
||||
DiracOptDhopSite(st,U,buf,sF,sU,in,out); // returns void, will template override for Wilson Nc=3
|
||||
//check consistency of return types between these functions and the ones in WilsonKernels.cc
|
||||
return 0;
|
||||
|
||||
assert(0);
|
||||
}
|
||||
|
||||
template<>
|
||||
int WilsonKernels<GparityWilsonImplF>::DiracOptHandDhopSiteDag(StencilImpl &st,DoubledGaugeField &U,
|
||||
void WilsonKernels<GparityWilsonImplF>::DiracOptHandDhopSiteDag(StencilImpl &st,DoubledGaugeField &U,
|
||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
||||
int sF,int sU,const FermionField &in, FermionField &out, bool Local, bool Nonlocal)
|
||||
int sF,int sU,const FermionField &in, FermionField &out)
|
||||
{
|
||||
DiracOptDhopSiteDag(st,U,buf,sF,sU,in,out); // will template override for Wilson Nc=3
|
||||
return 0;
|
||||
assert(0);
|
||||
}
|
||||
|
||||
template<>
|
||||
int WilsonKernels<GparityWilsonImplD>::DiracOptHandDhopSite(StencilImpl &st,DoubledGaugeField &U,
|
||||
void WilsonKernels<GparityWilsonImplD>::DiracOptHandDhopSite(StencilImpl &st,DoubledGaugeField &U,
|
||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
||||
int sF,int sU,const FermionField &in, FermionField &out, bool Local, bool Nonlocal)
|
||||
int sF,int sU,const FermionField &in, FermionField &out)
|
||||
{
|
||||
DiracOptDhopSite(st,U,buf,sF,sU,in,out); // will template override for Wilson Nc=3
|
||||
return 0;
|
||||
assert(0);
|
||||
}
|
||||
|
||||
template<>
|
||||
int WilsonKernels<GparityWilsonImplD>::DiracOptHandDhopSiteDag(StencilImpl &st,DoubledGaugeField &U,
|
||||
void WilsonKernels<GparityWilsonImplD>::DiracOptHandDhopSiteDag(StencilImpl &st,DoubledGaugeField &U,
|
||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
||||
int sF,int sU,const FermionField &in, FermionField &out, bool Local, bool Nonlocal)
|
||||
int sF,int sU,const FermionField &in, FermionField &out)
|
||||
{
|
||||
DiracOptDhopSiteDag(st,U,buf,sF,sU,in,out); // will template override for Wilson Nc=3
|
||||
return 0;
|
||||
assert(0);
|
||||
}
|
||||
|
||||
|
||||
|
||||
template int WilsonKernels<WilsonImplF>::DiracOptHandDhopSite(StencilImpl &st,DoubledGaugeField &U,
|
||||
////////////// Wilson ; uses this implementation /////////////////////
|
||||
// Need Nc=3 though //
|
||||
|
||||
template void WilsonKernels<WilsonImplF>::DiracOptHandDhopSite(StencilImpl &st,DoubledGaugeField &U,
|
||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
||||
int ss,int sU,const FermionField &in, FermionField &out,bool l,bool n);
|
||||
template int WilsonKernels<WilsonImplD>::DiracOptHandDhopSite(StencilImpl &st,DoubledGaugeField &U,
|
||||
int ss,int sU,const FermionField &in, FermionField &out);
|
||||
template void WilsonKernels<WilsonImplD>::DiracOptHandDhopSite(StencilImpl &st,DoubledGaugeField &U,
|
||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
||||
int ss,int sU,const FermionField &in, FermionField &out, bool l, bool n);
|
||||
template int WilsonKernels<WilsonImplF>::DiracOptHandDhopSiteDag(StencilImpl &st,DoubledGaugeField &U,
|
||||
int ss,int sU,const FermionField &in, FermionField &out);
|
||||
template void WilsonKernels<WilsonImplF>::DiracOptHandDhopSiteDag(StencilImpl &st,DoubledGaugeField &U,
|
||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
||||
int ss,int sU,const FermionField &in, FermionField &out, bool l, bool n);
|
||||
template int WilsonKernels<WilsonImplD>::DiracOptHandDhopSiteDag(StencilImpl &st,DoubledGaugeField &U,
|
||||
int ss,int sU,const FermionField &in, FermionField &out);
|
||||
template void WilsonKernels<WilsonImplD>::DiracOptHandDhopSiteDag(StencilImpl &st,DoubledGaugeField &U,
|
||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
||||
int ss,int sU,const FermionField &in, FermionField &out, bool l, bool n);
|
||||
int ss,int sU,const FermionField &in, FermionField &out);
|
||||
|
||||
|
||||
template int WilsonKernels<GparityWilsonImplF>::DiracOptHandDhopSite(StencilImpl &st,DoubledGaugeField &U,
|
||||
template void WilsonKernels<GparityWilsonImplF>::DiracOptHandDhopSite(StencilImpl &st,DoubledGaugeField &U,
|
||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
||||
int ss,int sU,const FermionField &in, FermionField &out, bool l, bool nl);
|
||||
template int WilsonKernels<GparityWilsonImplD>::DiracOptHandDhopSite(StencilImpl &st,DoubledGaugeField &U,
|
||||
int ss,int sU,const FermionField &in, FermionField &out);
|
||||
template void WilsonKernels<GparityWilsonImplD>::DiracOptHandDhopSite(StencilImpl &st,DoubledGaugeField &U,
|
||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
||||
int ss,int sU,const FermionField &in, FermionField &out, bool l, bool nl);
|
||||
template int WilsonKernels<GparityWilsonImplF>::DiracOptHandDhopSiteDag(StencilImpl &st,DoubledGaugeField &U,
|
||||
int ss,int sU,const FermionField &in, FermionField &out);
|
||||
template void WilsonKernels<GparityWilsonImplF>::DiracOptHandDhopSiteDag(StencilImpl &st,DoubledGaugeField &U,
|
||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
||||
int ss,int sU,const FermionField &in, FermionField &out, bool l, bool nl);
|
||||
template int WilsonKernels<GparityWilsonImplD>::DiracOptHandDhopSiteDag(StencilImpl &st,DoubledGaugeField &U,
|
||||
int ss,int sU,const FermionField &in, FermionField &out);
|
||||
template void WilsonKernels<GparityWilsonImplD>::DiracOptHandDhopSiteDag(StencilImpl &st,DoubledGaugeField &U,
|
||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
||||
int ss,int sU,const FermionField &in, FermionField &out, bool l, bool nl);
|
||||
int ss,int sU,const FermionField &in, FermionField &out);
|
||||
|
||||
|
||||
template void WilsonKernels<DomainWallRedBlack5dImplF>::DiracOptHandDhopSite(StencilImpl &st,DoubledGaugeField &U,
|
||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
||||
int ss,int sU,const FermionField &in, FermionField &out);
|
||||
template void WilsonKernels<DomainWallRedBlack5dImplD>::DiracOptHandDhopSite(StencilImpl &st,DoubledGaugeField &U,
|
||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
||||
int ss,int sU,const FermionField &in, FermionField &out);
|
||||
template void WilsonKernels<DomainWallRedBlack5dImplF>::DiracOptHandDhopSiteDag(StencilImpl &st,DoubledGaugeField &U,
|
||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
||||
int ss,int sU,const FermionField &in, FermionField &out);
|
||||
template void WilsonKernels<DomainWallRedBlack5dImplD>::DiracOptHandDhopSiteDag(StencilImpl &st,DoubledGaugeField &U,
|
||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
||||
int ss,int sU,const FermionField &in, FermionField &out);
|
||||
|
||||
|
||||
}}
|
||||
|
@ -42,7 +42,9 @@ template<class Gimpl> class WilsonLoops;
|
||||
#define INHERIT_GIMPL_TYPES(GImpl) \
|
||||
typedef typename GImpl::Simd Simd;\
|
||||
typedef typename GImpl::GaugeLinkField GaugeLinkField;\
|
||||
typedef typename GImpl::GaugeField GaugeField;
|
||||
typedef typename GImpl::GaugeField GaugeField;\
|
||||
typedef typename GImpl::SiteGaugeField SiteGaugeField;\
|
||||
typedef typename GImpl::SiteGaugeLink SiteGaugeLink;
|
||||
|
||||
//
|
||||
template<class S,int Nrepresentation=Nc>
|
||||
@ -62,9 +64,9 @@ template<class Gimpl> class WilsonLoops;
|
||||
|
||||
// Move this elsewhere?
|
||||
static inline void AddGaugeLink(GaugeField& U, GaugeLinkField& W, int mu){ // U[mu] += W
|
||||
PARALLEL_FOR_LOOP
|
||||
PARALLEL_FOR_LOOP
|
||||
for(auto ss=0;ss<U._grid->oSites();ss++){
|
||||
U._odata[ss]._internal[mu] = U._odata[ss]._internal[mu] + W._odata[ss]._internal;
|
||||
U._odata[ss]._internal[mu] = U._odata[ss]._internal[mu] + W._odata[ss]._internal;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -92,13 +92,13 @@ public:
|
||||
|
||||
// Create integrator, including the smearing policy
|
||||
// Smearing policy
|
||||
std::cout << GridLogMessage << " Creating the Stout class\n";
|
||||
double rho = 0.1; // smearing parameter
|
||||
std::cout << GridLogDebug << " Creating the Stout class\n";
|
||||
double rho = 0.1; // smearing parameter, now hardcoded
|
||||
int Nsmear = 1; // number of smearing levels
|
||||
Smear_Stout<Gimpl> Stout(rho);
|
||||
std::cout << GridLogMessage << " Creating the SmearedConfiguration class\n";
|
||||
std::cout << GridLogDebug << " Creating the SmearedConfiguration class\n";
|
||||
SmearedConfiguration<Gimpl> SmearingPolicy(UGrid, Nsmear, Stout);
|
||||
std::cout << GridLogMessage << " done\n";
|
||||
std::cout << GridLogDebug << " done\n";
|
||||
//////////////
|
||||
typedef MinimumNorm2<GaugeField, SmearedConfiguration<Gimpl> > IntegratorType;// change here to change the algorithm
|
||||
IntegratorParameters MDpar(20);
|
||||
@ -116,27 +116,27 @@ public:
|
||||
|
||||
if ( StartType == HotStart ) {
|
||||
// Hot start
|
||||
HMCpar.NoMetropolisUntil =0;
|
||||
HMCpar.NoMetropolisUntil =10;
|
||||
HMCpar.MetropolisTest = true;
|
||||
sRNG.SeedFixedIntegers(SerSeed);
|
||||
pRNG.SeedFixedIntegers(ParSeed);
|
||||
SU3::HotConfiguration(pRNG, U);
|
||||
} else if ( StartType == ColdStart ) {
|
||||
// Cold start
|
||||
HMCpar.NoMetropolisUntil =0;
|
||||
HMCpar.NoMetropolisUntil =10;
|
||||
HMCpar.MetropolisTest = true;
|
||||
sRNG.SeedFixedIntegers(SerSeed);
|
||||
pRNG.SeedFixedIntegers(ParSeed);
|
||||
SU3::ColdConfiguration(pRNG, U);
|
||||
} else if ( StartType == TepidStart ) {
|
||||
// Tepid start
|
||||
HMCpar.NoMetropolisUntil =0;
|
||||
HMCpar.NoMetropolisUntil =10;
|
||||
HMCpar.MetropolisTest = true;
|
||||
sRNG.SeedFixedIntegers(SerSeed);
|
||||
pRNG.SeedFixedIntegers(ParSeed);
|
||||
SU3::TepidConfiguration(pRNG, U);
|
||||
} else if ( StartType == CheckpointStart ) {
|
||||
HMCpar.NoMetropolisUntil =0;
|
||||
HMCpar.NoMetropolisUntil =10;
|
||||
HMCpar.MetropolisTest = true;
|
||||
// CheckpointRestart
|
||||
Checkpoint.CheckpointRestore(StartTraj, U, sRNG, pRNG);
|
||||
@ -144,7 +144,7 @@ public:
|
||||
|
||||
// Attach the gauge field to the smearing Policy and create the fill the smeared set
|
||||
// notice that the unit configuration is singular in this procedure
|
||||
std::cout << GridLogMessage << "Filling the smeared set\n";
|
||||
std::cout << GridLogMessage << "Filling the smeared set\n";
|
||||
SmearingPolicy.set_GaugeField(U);
|
||||
|
||||
HybridMonteCarlo<GaugeField,IntegratorType> HMC(HMCpar, MDynamics,sRNG,pRNG,U);
|
||||
|
@ -60,6 +60,31 @@ namespace Grid {
|
||||
"-Gamma5 ",
|
||||
" "
|
||||
};
|
||||
|
||||
SpinMatrix makeGammaProd(const unsigned int i)
|
||||
{
|
||||
SpinMatrix g;
|
||||
|
||||
g = 1.;
|
||||
if (i & 0x1)
|
||||
{
|
||||
g = g*Gamma(Gamma::GammaMatrix::GammaX);
|
||||
}
|
||||
if (i & 0x2)
|
||||
{
|
||||
g = g*Gamma(Gamma::GammaMatrix::GammaY);
|
||||
}
|
||||
if (i & 0x4)
|
||||
{
|
||||
g = g*Gamma(Gamma::GammaMatrix::GammaZ);
|
||||
}
|
||||
if (i & 0x8)
|
||||
{
|
||||
g = g*Gamma(Gamma::GammaMatrix::GammaT);
|
||||
}
|
||||
|
||||
return g;
|
||||
}
|
||||
|
||||
// void sprojMul( vHalfSpinColourVector &out,vColourMatrix &u, vSpinColourVector &in){
|
||||
// vHalfSpinColourVector hspin;
|
||||
|
@ -82,7 +82,10 @@ namespace QCD {
|
||||
GammaMatrix _g;
|
||||
|
||||
};
|
||||
|
||||
|
||||
// Make gamma products (Chroma convention)
|
||||
SpinMatrix makeGammaProd(const unsigned int i);
|
||||
|
||||
/* Gx
|
||||
* 0 0 0 i
|
||||
* 0 0 i 0
|
||||
|
@ -608,14 +608,14 @@ Note that in step D setting B ~ X - A and using B in place of A in step E will g
|
||||
LatticeMatrix Umu(out._grid);
|
||||
for(int mu=0;mu<Nd;mu++){
|
||||
LieRandomize(pRNG,Umu,0.01);
|
||||
pokeLorentz(out,Umu,mu);
|
||||
PokeIndex<LorentzIndex>(out,Umu,mu);
|
||||
}
|
||||
}
|
||||
static void ColdConfiguration(GridParallelRNG &pRNG,LatticeGaugeField &out){
|
||||
LatticeMatrix Umu(out._grid);
|
||||
Umu=1.0;
|
||||
for(int mu=0;mu<Nd;mu++){
|
||||
pokeLorentz(out,Umu,mu);
|
||||
PokeIndex<LorentzIndex>(out,Umu,mu);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -41,7 +41,11 @@ GridRedBlackCartesian *SpaceTimeGrid::makeFourDimRedBlackGrid(const GridCartesia
|
||||
{
|
||||
return new GridRedBlackCartesian(FourDimGrid);
|
||||
}
|
||||
|
||||
GridCartesian *SpaceTimeGrid::makeFourDimDWFGrid(const std::vector<int> & latt,const std::vector<int> &mpi)
|
||||
{
|
||||
std::vector<int> simd(4,1);
|
||||
return makeFourDimGrid(latt,simd,mpi);
|
||||
}
|
||||
GridCartesian *SpaceTimeGrid::makeFiveDimGrid(int Ls,const GridCartesian *FourDimGrid)
|
||||
{
|
||||
int N4=FourDimGrid->_ndimension;
|
||||
@ -58,6 +62,7 @@ GridCartesian *SpaceTimeGrid::makeFiveDimGrid(int Ls,const GridCartesian
|
||||
return new GridCartesian(latt5,simd5,mpi5);
|
||||
}
|
||||
|
||||
|
||||
GridRedBlackCartesian *SpaceTimeGrid::makeFiveDimRedBlackGrid(int Ls,const GridCartesian *FourDimGrid)
|
||||
{
|
||||
int N4=FourDimGrid->_ndimension;
|
||||
@ -76,4 +81,42 @@ GridRedBlackCartesian *SpaceTimeGrid::makeFiveDimRedBlackGrid(int Ls,const GridC
|
||||
return new GridRedBlackCartesian(latt5,simd5,mpi5,cb5,cbd);
|
||||
}
|
||||
|
||||
|
||||
GridCartesian *SpaceTimeGrid::makeFiveDimDWFGrid(int Ls,const GridCartesian *FourDimGrid)
|
||||
{
|
||||
int N4=FourDimGrid->_ndimension;
|
||||
int nsimd = FourDimGrid->Nsimd();
|
||||
|
||||
std::vector<int> latt5(1,Ls);
|
||||
std::vector<int> simd5(1,nsimd);
|
||||
std::vector<int> mpi5(1,1);
|
||||
|
||||
for(int d=0;d<N4;d++){
|
||||
latt5.push_back(FourDimGrid->_fdimensions[d]);
|
||||
simd5.push_back(1);
|
||||
mpi5.push_back(FourDimGrid->_processors[d]);
|
||||
}
|
||||
return new GridCartesian(latt5,simd5,mpi5);
|
||||
}
|
||||
|
||||
GridRedBlackCartesian *SpaceTimeGrid::makeFiveDimDWFRedBlackGrid(int Ls,const GridCartesian *FourDimGrid)
|
||||
{
|
||||
int N4=FourDimGrid->_ndimension;
|
||||
int nsimd = FourDimGrid->Nsimd();
|
||||
int cbd=0;
|
||||
std::vector<int> latt5(1,Ls);
|
||||
std::vector<int> simd5(1,nsimd);
|
||||
std::vector<int> mpi5(1,1);
|
||||
std::vector<int> cb5(1,1);
|
||||
|
||||
for(int d=0;d<N4;d++){
|
||||
latt5.push_back(FourDimGrid->_fdimensions[d]);
|
||||
simd5.push_back(1);
|
||||
mpi5.push_back(FourDimGrid->_processors[d]);
|
||||
cb5.push_back(1);
|
||||
}
|
||||
return new GridRedBlackCartesian(latt5,simd5,mpi5,cb5,cbd);
|
||||
}
|
||||
|
||||
|
||||
}}
|
||||
|
@ -35,9 +35,14 @@ class SpaceTimeGrid {
|
||||
|
||||
static GridCartesian *makeFourDimGrid(const std::vector<int> & latt,const std::vector<int> &simd,const std::vector<int> &mpi);
|
||||
static GridRedBlackCartesian *makeFourDimRedBlackGrid (const GridCartesian *FourDimGrid);
|
||||
|
||||
static GridCartesian *makeFiveDimGrid (int Ls,const GridCartesian *FourDimGrid);
|
||||
static GridRedBlackCartesian *makeFiveDimRedBlackGrid(int Ls,const GridCartesian *FourDimGrid);
|
||||
|
||||
static GridCartesian *makeFiveDimDWFGrid (int Ls,const GridCartesian *FourDimGrid);
|
||||
static GridRedBlackCartesian *makeFiveDimDWFRedBlackGrid(int Ls,const GridCartesian *FourDimGrid);
|
||||
static GridCartesian *makeFourDimDWFGrid (const std::vector<int> & latt,const std::vector<int> &mpi);
|
||||
|
||||
};
|
||||
|
||||
}}
|
||||
|
@ -52,9 +52,9 @@ namespace Grid {
|
||||
// or this-> ; there is no "this" in a static method. This forces explicit Gimpl scope
|
||||
// resolution throughout the usage in this file, and rather defeats the purpose of deriving
|
||||
// from Gimpl.
|
||||
plaq= Gimpl::CovShiftBackward(U[mu],mu,
|
||||
Gimpl::CovShiftBackward(U[nu],nu,
|
||||
Gimpl::CovShiftForward (U[mu],mu,U[nu])));
|
||||
plaq = Gimpl::CovShiftBackward(U[mu],mu,
|
||||
Gimpl::CovShiftBackward(U[nu],nu,
|
||||
Gimpl::CovShiftForward (U[mu],mu,U[nu])));
|
||||
}
|
||||
//////////////////////////////////////////////////
|
||||
// trace of directed plaquette oriented in mu,nu plane
|
||||
@ -100,16 +100,16 @@ namespace Grid {
|
||||
//////////////////////////////////////////////////
|
||||
// average over all x,y,z,t and over all planes of plaquette
|
||||
//////////////////////////////////////////////////
|
||||
static RealD avgPlaquette(const GaugeLorentz &Umu){
|
||||
|
||||
RealD sumplaq = sumPlaquette(Umu);
|
||||
|
||||
double vol = Umu._grid->gSites();
|
||||
|
||||
double faces = (1.0*Nd*(Nd-1))/2.0;
|
||||
|
||||
return sumplaq/vol/faces/Nc; // Nd , Nc dependent... FIXME
|
||||
}
|
||||
static RealD avgPlaquette(const GaugeLorentz &Umu){
|
||||
RealD sumplaq = sumPlaquette(Umu);
|
||||
double vol = Umu._grid->gSites();
|
||||
double faces = (1.0*Nd*(Nd-1))/2.0;
|
||||
return sumplaq/vol/faces/Nc; // Nd , Nc dependent... FIXME
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////
|
||||
// average over traced single links
|
||||
//////////////////////////////////////////////////
|
||||
static RealD linkTrace(const GaugeLorentz &Umu){
|
||||
std::vector<GaugeMat> U(4,Umu._grid);
|
||||
|
||||
@ -126,47 +126,6 @@ namespace Grid {
|
||||
|
||||
return p.real()/vol/4.0/3.0;
|
||||
};
|
||||
//////////////////////////////////////////////////
|
||||
// the sum over all staples on each site
|
||||
//////////////////////////////////////////////////
|
||||
static void Staple(GaugeMat &staple,const GaugeLorentz &Umu,int mu){
|
||||
|
||||
GridBase *grid = Umu._grid;
|
||||
|
||||
std::vector<GaugeMat> U(4,grid);
|
||||
for(int d=0;d<Nd;d++){
|
||||
U[d] = PeekIndex<LorentzIndex>(Umu,d);
|
||||
}
|
||||
staple = zero;
|
||||
|
||||
|
||||
for(int nu=0;nu<Nd;nu++){
|
||||
|
||||
if(nu != mu) {
|
||||
|
||||
// mu
|
||||
// ^
|
||||
// |__> nu
|
||||
|
||||
// __
|
||||
// |
|
||||
// __|
|
||||
//
|
||||
|
||||
staple+=Gimpl::ShiftStaple(Gimpl::CovShiftForward (U[nu],nu,
|
||||
Gimpl::CovShiftBackward(U[mu],mu,
|
||||
Gimpl::CovShiftIdentityBackward(U[nu],nu))),mu);
|
||||
|
||||
// __
|
||||
// |
|
||||
// |__
|
||||
//
|
||||
//
|
||||
staple+=Gimpl::ShiftStaple(Gimpl::CovShiftBackward(U[nu],nu,
|
||||
Gimpl::CovShiftBackward(U[mu],mu,U[nu])),mu);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////
|
||||
// the sum over all staples on each site in direction mu,nu
|
||||
@ -210,6 +169,51 @@ namespace Grid {
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////
|
||||
// the sum over all staples on each site
|
||||
//////////////////////////////////////////////////
|
||||
static void Staple(GaugeMat &staple,const GaugeLorentz &Umu,int mu){
|
||||
|
||||
GridBase *grid = Umu._grid;
|
||||
|
||||
std::vector<GaugeMat> U(Nd,grid);
|
||||
for(int d=0;d<Nd;d++){
|
||||
U[d] = PeekIndex<LorentzIndex>(Umu,d);
|
||||
}
|
||||
staple = zero;
|
||||
GaugeMat tmp(grid);
|
||||
|
||||
|
||||
for(int nu=0;nu<Nd;nu++){
|
||||
|
||||
if(nu != mu) {
|
||||
|
||||
// mu
|
||||
// ^
|
||||
// |__> nu
|
||||
|
||||
// __
|
||||
// |
|
||||
// __|
|
||||
//
|
||||
|
||||
staple+=Gimpl::ShiftStaple(
|
||||
Gimpl::CovShiftForward (U[nu],nu,
|
||||
Gimpl::CovShiftBackward(U[mu],mu,
|
||||
Gimpl::CovShiftIdentityBackward(U[nu],nu))),mu);
|
||||
|
||||
// __
|
||||
// |
|
||||
// |__
|
||||
//
|
||||
//
|
||||
staple+=Gimpl::ShiftStaple(
|
||||
Gimpl::CovShiftBackward(U[nu],nu,
|
||||
Gimpl::CovShiftBackward(U[mu],mu,U[nu])),mu);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
//////////////////////////////////////////////////
|
||||
// the sum over all staples on each site in direction mu,nu, upper part
|
||||
@ -247,246 +251,246 @@ namespace Grid {
|
||||
|
||||
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// Similar to above for rectangle is required
|
||||
//////////////////////////////////////////////////////
|
||||
static void dirRectangle(GaugeMat &rect,const std::vector<GaugeMat> &U, const int mu, const int nu)
|
||||
{
|
||||
rect = Gimpl::CovShiftForward(U[mu],mu,Gimpl::CovShiftForward(U[mu],mu,U[nu]))* // ->->|
|
||||
adj(Gimpl::CovShiftForward(U[nu],nu,Gimpl::CovShiftForward(U[mu],mu,U[mu]))) ;
|
||||
rect = rect +
|
||||
//////////////////////////////////////////////////////
|
||||
// Similar to above for rectangle is required
|
||||
//////////////////////////////////////////////////////
|
||||
static void dirRectangle(GaugeMat &rect,const std::vector<GaugeMat> &U, const int mu, const int nu)
|
||||
{
|
||||
rect = Gimpl::CovShiftForward(U[mu],mu,Gimpl::CovShiftForward(U[mu],mu,U[nu]))* // ->->|
|
||||
adj(Gimpl::CovShiftForward(U[nu],nu,Gimpl::CovShiftForward(U[mu],mu,U[mu]))) ;
|
||||
rect = rect +
|
||||
Gimpl::CovShiftForward(U[mu],mu,Gimpl::CovShiftForward(U[nu],nu,U[nu]))* // ->||
|
||||
adj(Gimpl::CovShiftForward(U[nu],nu,Gimpl::CovShiftForward(U[nu],nu,U[mu]))) ;
|
||||
adj(Gimpl::CovShiftForward(U[nu],nu,Gimpl::CovShiftForward(U[nu],nu,U[mu]))) ;
|
||||
}
|
||||
static void traceDirRectangle(LatticeComplex &rect, const std::vector<GaugeMat> &U, const int mu, const int nu)
|
||||
{
|
||||
GaugeMat sp(U[0]._grid);
|
||||
dirRectangle(sp,U,mu,nu);
|
||||
rect=trace(sp);
|
||||
}
|
||||
static void siteRectangle(LatticeComplex &Rect,const std::vector<GaugeMat> &U)
|
||||
{
|
||||
LatticeComplex siteRect(U[0]._grid);
|
||||
Rect=zero;
|
||||
for(int mu=1;mu<Nd;mu++){
|
||||
for(int nu=0;nu<mu;nu++){
|
||||
traceDirRectangle(siteRect,U,mu,nu);
|
||||
Rect = Rect + siteRect;
|
||||
}
|
||||
static void traceDirRectangle(LatticeComplex &rect, const std::vector<GaugeMat> &U, const int mu, const int nu)
|
||||
{
|
||||
GaugeMat sp(U[0]._grid);
|
||||
dirRectangle(sp,U,mu,nu);
|
||||
rect=trace(sp);
|
||||
}
|
||||
static void siteRectangle(LatticeComplex &Rect,const std::vector<GaugeMat> &U)
|
||||
{
|
||||
LatticeComplex siteRect(U[0]._grid);
|
||||
Rect=zero;
|
||||
for(int mu=1;mu<Nd;mu++){
|
||||
for(int nu=0;nu<mu;nu++){
|
||||
traceDirRectangle(siteRect,U,mu,nu);
|
||||
Rect = Rect + siteRect;
|
||||
}
|
||||
}
|
||||
}
|
||||
//////////////////////////////////////////////////
|
||||
// sum over all x,y,z,t and over all planes of plaquette
|
||||
//////////////////////////////////////////////////
|
||||
static RealD sumRectangle(const GaugeLorentz &Umu){
|
||||
std::vector<GaugeMat> U(4,Umu._grid);
|
||||
}
|
||||
}
|
||||
|
||||
for(int mu=0;mu<Nd;mu++){
|
||||
U[mu] = PeekIndex<LorentzIndex>(Umu,mu);
|
||||
}
|
||||
//////////////////////////////////////////////////
|
||||
// sum over all x,y,z,t and over all planes of plaquette
|
||||
//////////////////////////////////////////////////
|
||||
static RealD sumRectangle(const GaugeLorentz &Umu){
|
||||
std::vector<GaugeMat> U(Nd,Umu._grid);
|
||||
|
||||
LatticeComplex Rect(Umu._grid);
|
||||
for(int mu=0;mu<Nd;mu++){
|
||||
U[mu] = PeekIndex<LorentzIndex>(Umu,mu);
|
||||
}
|
||||
|
||||
LatticeComplex Rect(Umu._grid);
|
||||
|
||||
siteRectangle(Rect,U);
|
||||
siteRectangle(Rect,U);
|
||||
|
||||
TComplex Tp = sum(Rect);
|
||||
Complex p = TensorRemove(Tp);
|
||||
return p.real();
|
||||
}
|
||||
//////////////////////////////////////////////////
|
||||
// average over all x,y,z,t and over all planes of plaquette
|
||||
//////////////////////////////////////////////////
|
||||
static RealD avgRectangle(const GaugeLorentz &Umu){
|
||||
TComplex Tp = sum(Rect);
|
||||
Complex p = TensorRemove(Tp);
|
||||
return p.real();
|
||||
}
|
||||
//////////////////////////////////////////////////
|
||||
// average over all x,y,z,t and over all planes of plaquette
|
||||
//////////////////////////////////////////////////
|
||||
static RealD avgRectangle(const GaugeLorentz &Umu){
|
||||
|
||||
RealD sumrect = sumRectangle(Umu);
|
||||
RealD sumrect = sumRectangle(Umu);
|
||||
|
||||
double vol = Umu._grid->gSites();
|
||||
double vol = Umu._grid->gSites();
|
||||
|
||||
double faces = (1.0*Nd*(Nd-1)); // 2 distinct orientations summed
|
||||
double faces = (1.0*Nd*(Nd-1)); // 2 distinct orientations summed
|
||||
|
||||
return sumrect/vol/faces/Nc; // Nd , Nc dependent... FIXME
|
||||
}
|
||||
return sumrect/vol/faces/Nc; // Nd , Nc dependent... FIXME
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////
|
||||
// the sum over all staples on each site
|
||||
//////////////////////////////////////////////////
|
||||
static void RectStapleDouble(GaugeMat &U2,const GaugeMat & U,int mu){
|
||||
U2 = U * Cshift(U,mu,1);
|
||||
}
|
||||
//////////////////////////////////////////////////
|
||||
// the sum over all staples on each site
|
||||
//////////////////////////////////////////////////
|
||||
static void RectStapleDouble(GaugeMat &U2,const GaugeMat & U,int mu){
|
||||
U2 = U * Cshift(U,mu,1);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
// Hop by two optimisation strategy does not work nicely with Gparity. (could do,
|
||||
// but need to track two deep where cross boundary and apply a conjugation).
|
||||
// Must differentiate this in Gimpl, and use Gimpl::isPeriodicGaugeField to do so .
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
static void RectStapleOptimised(GaugeMat &Stap,std::vector<GaugeMat> &U2,std::vector<GaugeMat> &U,int mu){
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
// Hop by two optimisation strategy does not work nicely with Gparity. (could do,
|
||||
// but need to track two deep where cross boundary and apply a conjugation).
|
||||
// Must differentiate this in Gimpl, and use Gimpl::isPeriodicGaugeField to do so .
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
static void RectStapleOptimised(GaugeMat &Stap,std::vector<GaugeMat> &U2,std::vector<GaugeMat> &U,int mu){
|
||||
|
||||
Stap = zero;
|
||||
Stap = zero;
|
||||
|
||||
GridBase *grid = U[0]._grid;
|
||||
GridBase *grid = U[0]._grid;
|
||||
|
||||
GaugeMat Staple2x1 (grid);
|
||||
GaugeMat tmp (grid);
|
||||
GaugeMat Staple2x1 (grid);
|
||||
GaugeMat tmp (grid);
|
||||
|
||||
for(int nu=0;nu<Nd;nu++){
|
||||
if ( nu!=mu) {
|
||||
for(int nu=0;nu<Nd;nu++){
|
||||
if ( nu!=mu) {
|
||||
|
||||
// Up staple ___ ___
|
||||
// | |
|
||||
tmp = Cshift(adj(U[nu]),nu,-1);
|
||||
tmp = adj(U2[mu])*tmp;
|
||||
tmp = Cshift(tmp,mu,-2);
|
||||
// Up staple ___ ___
|
||||
// | |
|
||||
tmp = Cshift(adj(U[nu]),nu,-1);
|
||||
tmp = adj(U2[mu])*tmp;
|
||||
tmp = Cshift(tmp,mu,-2);
|
||||
|
||||
Staple2x1 = Gimpl::CovShiftForward (U[nu],nu,tmp);
|
||||
Staple2x1 = Gimpl::CovShiftForward (U[nu],nu,tmp);
|
||||
|
||||
|
||||
// Down staple
|
||||
// |___ ___|
|
||||
//
|
||||
tmp = adj(U2[mu])*U[nu];
|
||||
Staple2x1+= Gimpl::CovShiftBackward(U[nu],nu,Cshift(tmp,mu,-2));
|
||||
// Down staple
|
||||
// |___ ___|
|
||||
//
|
||||
tmp = adj(U2[mu])*U[nu];
|
||||
Staple2x1+= Gimpl::CovShiftBackward(U[nu],nu,Cshift(tmp,mu,-2));
|
||||
|
||||
|
||||
// ___ ___
|
||||
// | ___|
|
||||
// |___ ___|
|
||||
//
|
||||
// ___ ___
|
||||
// | ___|
|
||||
// |___ ___|
|
||||
//
|
||||
|
||||
Stap+= Cshift(Gimpl::CovShiftForward (U[mu],mu,Staple2x1),mu,1);
|
||||
Stap+= Cshift(Gimpl::CovShiftForward (U[mu],mu,Staple2x1),mu,1);
|
||||
|
||||
// ___ ___
|
||||
// |___ |
|
||||
// |___ ___|
|
||||
//
|
||||
// ___ ___
|
||||
// |___ |
|
||||
// |___ ___|
|
||||
//
|
||||
|
||||
// tmp= Staple2x1* Cshift(U[mu],mu,-2);
|
||||
// Stap+= Cshift(tmp,mu,1) ;
|
||||
Stap+= Cshift(Staple2x1,mu,1)*Cshift(U[mu],mu,-1); ;
|
||||
// tmp= Staple2x1* Cshift(U[mu],mu,-2);
|
||||
// Stap+= Cshift(tmp,mu,1) ;
|
||||
Stap+= Cshift(Staple2x1,mu,1)*Cshift(U[mu],mu,-1); ;
|
||||
|
||||
// --
|
||||
// | |
|
||||
//
|
||||
// | |
|
||||
// --
|
||||
// | |
|
||||
//
|
||||
// | |
|
||||
|
||||
tmp = Cshift(adj(U2[nu]),nu,-2);
|
||||
tmp = Gimpl::CovShiftBackward(U[mu],mu,tmp);
|
||||
tmp = U2[nu]*Cshift(tmp,nu,2);
|
||||
Stap+= Cshift(tmp, mu, 1);
|
||||
tmp = Cshift(adj(U2[nu]),nu,-2);
|
||||
tmp = Gimpl::CovShiftBackward(U[mu],mu,tmp);
|
||||
tmp = U2[nu]*Cshift(tmp,nu,2);
|
||||
Stap+= Cshift(tmp, mu, 1);
|
||||
|
||||
// | |
|
||||
//
|
||||
// | |
|
||||
// --
|
||||
// | |
|
||||
//
|
||||
// | |
|
||||
// --
|
||||
|
||||
tmp = Gimpl::CovShiftBackward(U[mu],mu,U2[nu]);
|
||||
tmp = adj(U2[nu])*tmp;
|
||||
tmp = Cshift(tmp,nu,-2);
|
||||
Stap+=Cshift(tmp, mu, 1);
|
||||
}}
|
||||
tmp = Gimpl::CovShiftBackward(U[mu],mu,U2[nu]);
|
||||
tmp = adj(U2[nu])*tmp;
|
||||
tmp = Cshift(tmp,nu,-2);
|
||||
Stap+=Cshift(tmp, mu, 1);
|
||||
}}
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
static void RectStaple(GaugeMat &Stap,const GaugeLorentz & Umu,int mu)
|
||||
{
|
||||
RectStapleUnoptimised(Stap,Umu,mu);
|
||||
}
|
||||
static void RectStaple(const GaugeLorentz & Umu,GaugeMat &Stap,
|
||||
std::vector<GaugeMat> &U2,
|
||||
std::vector<GaugeMat> &U, int mu)
|
||||
{
|
||||
if ( Gimpl::isPeriodicGaugeField() ){
|
||||
RectStapleOptimised(Stap,U2,U,mu);
|
||||
} else {
|
||||
RectStapleUnoptimised(Stap,Umu,mu);
|
||||
}
|
||||
}
|
||||
static void RectStaple(GaugeMat &Stap,const GaugeLorentz & Umu,int mu)
|
||||
{
|
||||
RectStapleUnoptimised(Stap,Umu,mu);
|
||||
}
|
||||
static void RectStaple(const GaugeLorentz & Umu,GaugeMat &Stap,
|
||||
std::vector<GaugeMat> &U2,
|
||||
std::vector<GaugeMat> &U, int mu)
|
||||
{
|
||||
if ( Gimpl::isPeriodicGaugeField() ){
|
||||
RectStapleOptimised(Stap,U2,U,mu);
|
||||
} else {
|
||||
RectStapleUnoptimised(Stap,Umu,mu);
|
||||
}
|
||||
}
|
||||
|
||||
static void RectStapleUnoptimised(GaugeMat &Stap,const GaugeLorentz &Umu,int mu){
|
||||
GridBase *grid = Umu._grid;
|
||||
static void RectStapleUnoptimised(GaugeMat &Stap,const GaugeLorentz &Umu,int mu){
|
||||
GridBase *grid = Umu._grid;
|
||||
|
||||
std::vector<GaugeMat> U(4,grid);
|
||||
for(int d=0;d<Nd;d++){
|
||||
U[d] = PeekIndex<LorentzIndex>(Umu,d);
|
||||
}
|
||||
std::vector<GaugeMat> U(Nd,grid);
|
||||
for(int d=0;d<Nd;d++){
|
||||
U[d] = PeekIndex<LorentzIndex>(Umu,d);
|
||||
}
|
||||
|
||||
Stap=zero;
|
||||
Stap=zero;
|
||||
|
||||
for(int nu=0;nu<Nd;nu++){
|
||||
if ( nu!=mu) {
|
||||
// __ ___
|
||||
// | __ |
|
||||
//
|
||||
Stap+= Gimpl::ShiftStaple(
|
||||
Gimpl::CovShiftForward (U[mu],mu,
|
||||
Gimpl::CovShiftForward (U[nu],nu,
|
||||
Gimpl::CovShiftBackward(U[mu],mu,
|
||||
Gimpl::CovShiftBackward(U[mu],mu,
|
||||
Gimpl::CovShiftIdentityBackward(U[nu],nu))))) , mu);
|
||||
for(int nu=0;nu<Nd;nu++){
|
||||
if ( nu!=mu) {
|
||||
// __ ___
|
||||
// | __ |
|
||||
//
|
||||
Stap+= Gimpl::ShiftStaple(
|
||||
Gimpl::CovShiftForward (U[mu],mu,
|
||||
Gimpl::CovShiftForward (U[nu],nu,
|
||||
Gimpl::CovShiftBackward(U[mu],mu,
|
||||
Gimpl::CovShiftBackward(U[mu],mu,
|
||||
Gimpl::CovShiftIdentityBackward(U[nu],nu))))) , mu);
|
||||
|
||||
// __
|
||||
// |__ __ |
|
||||
// __
|
||||
// |__ __ |
|
||||
|
||||
Stap+= Gimpl::ShiftStaple(
|
||||
Gimpl::CovShiftForward (U[mu],mu,
|
||||
Gimpl::CovShiftBackward(U[nu],nu,
|
||||
Gimpl::CovShiftBackward(U[mu],mu,
|
||||
Gimpl::CovShiftBackward(U[mu],mu, U[nu])))) , mu);
|
||||
Stap+= Gimpl::ShiftStaple(
|
||||
Gimpl::CovShiftForward (U[mu],mu,
|
||||
Gimpl::CovShiftBackward(U[nu],nu,
|
||||
Gimpl::CovShiftBackward(U[mu],mu,
|
||||
Gimpl::CovShiftBackward(U[mu],mu, U[nu])))) , mu);
|
||||
|
||||
// __
|
||||
// |__ __ |
|
||||
// __
|
||||
// |__ __ |
|
||||
|
||||
Stap+= Gimpl::ShiftStaple(
|
||||
Gimpl::CovShiftBackward(U[nu],nu,
|
||||
Gimpl::CovShiftBackward(U[mu],mu,
|
||||
Gimpl::CovShiftBackward(U[mu],mu,
|
||||
Gimpl::CovShiftForward(U[nu],nu,U[mu])))) , mu);
|
||||
Stap+= Gimpl::ShiftStaple(
|
||||
Gimpl::CovShiftBackward(U[nu],nu,
|
||||
Gimpl::CovShiftBackward(U[mu],mu,
|
||||
Gimpl::CovShiftBackward(U[mu],mu,
|
||||
Gimpl::CovShiftForward(U[nu],nu,U[mu])))) , mu);
|
||||
|
||||
// __ ___
|
||||
// |__ |
|
||||
// __ ___
|
||||
// |__ |
|
||||
|
||||
Stap+= Gimpl::ShiftStaple(
|
||||
Gimpl::CovShiftForward (U[nu],nu,
|
||||
Gimpl::CovShiftBackward(U[mu],mu,
|
||||
Gimpl::CovShiftBackward(U[mu],mu,
|
||||
Gimpl::CovShiftBackward(U[nu],nu,U[mu])))) , mu);
|
||||
Stap+= Gimpl::ShiftStaple(
|
||||
Gimpl::CovShiftForward (U[nu],nu,
|
||||
Gimpl::CovShiftBackward(U[mu],mu,
|
||||
Gimpl::CovShiftBackward(U[mu],mu,
|
||||
Gimpl::CovShiftBackward(U[nu],nu,U[mu])))) , mu);
|
||||
|
||||
// --
|
||||
// | |
|
||||
//
|
||||
// | |
|
||||
// --
|
||||
// | |
|
||||
//
|
||||
// | |
|
||||
|
||||
Stap+= Gimpl::ShiftStaple(
|
||||
Gimpl::CovShiftForward(U[nu],nu,
|
||||
Gimpl::CovShiftForward(U[nu],nu,
|
||||
Gimpl::CovShiftBackward(U[mu],mu,
|
||||
Gimpl::CovShiftBackward(U[nu],nu,
|
||||
Gimpl::CovShiftIdentityBackward(U[nu],nu))))) , mu);
|
||||
Stap+= Gimpl::ShiftStaple(
|
||||
Gimpl::CovShiftForward(U[nu],nu,
|
||||
Gimpl::CovShiftForward(U[nu],nu,
|
||||
Gimpl::CovShiftBackward(U[mu],mu,
|
||||
Gimpl::CovShiftBackward(U[nu],nu,
|
||||
Gimpl::CovShiftIdentityBackward(U[nu],nu))))) , mu);
|
||||
|
||||
|
||||
// | |
|
||||
//
|
||||
// | |
|
||||
// --
|
||||
// | |
|
||||
//
|
||||
// | |
|
||||
// --
|
||||
|
||||
Stap+= Gimpl::ShiftStaple(
|
||||
Gimpl::CovShiftBackward(U[nu],nu,
|
||||
Gimpl::CovShiftBackward(U[nu],nu,
|
||||
Gimpl::CovShiftBackward(U[mu],mu,
|
||||
Gimpl::CovShiftForward (U[nu],nu,U[nu])))) , mu);
|
||||
}}
|
||||
}
|
||||
Stap+= Gimpl::ShiftStaple(
|
||||
Gimpl::CovShiftBackward(U[nu],nu,
|
||||
Gimpl::CovShiftBackward(U[nu],nu,
|
||||
Gimpl::CovShiftBackward(U[mu],mu,
|
||||
Gimpl::CovShiftForward (U[nu],nu,U[nu])))) , mu);
|
||||
}}
|
||||
}
|
||||
|
||||
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
typedef WilsonLoops<PeriodicGimplR> ColourWilsonLoops;
|
||||
typedef WilsonLoops<PeriodicGimplR> U1WilsonLoops;
|
||||
typedef WilsonLoops<PeriodicGimplR> SU2WilsonLoops;
|
||||
typedef WilsonLoops<PeriodicGimplR> SU3WilsonLoops;
|
||||
typedef WilsonLoops<PeriodicGimplR> ColourWilsonLoops;
|
||||
typedef WilsonLoops<PeriodicGimplR> U1WilsonLoops;
|
||||
typedef WilsonLoops<PeriodicGimplR> SU2WilsonLoops;
|
||||
typedef WilsonLoops<PeriodicGimplR> SU3WilsonLoops;
|
||||
|
||||
}}
|
||||
}}
|
||||
|
||||
#endif
|
||||
#endif
|
Reference in New Issue
Block a user