mirror of
https://github.com/paboyle/Grid.git
synced 2025-06-16 23:07:05 +01:00
Merge branch 'feature/hadrons' into feature/qed-fvol
This commit is contained in:
@ -8,6 +8,7 @@
|
||||
|
||||
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: Vera Guelpers <V.M.Guelpers@soton.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
@ -42,8 +43,58 @@ namespace Grid {
|
||||
INHERIT_IMPL_TYPES(Impl);
|
||||
public:
|
||||
|
||||
void MomentumSpacePropagator(FermionField &out,const FermionField &in,RealD _m) {
|
||||
this->MomentumSpacePropagatorHt(out,in,_m);
|
||||
void FreePropagator(const FermionField &in,FermionField &out,RealD mass, std::vector<double> twist, bool fiveD) {
|
||||
FermionField in_k(in._grid);
|
||||
FermionField prop_k(in._grid);
|
||||
|
||||
FFT theFFT((GridCartesian *) in._grid);
|
||||
|
||||
//phase for boundary condition
|
||||
ComplexField coor(in._grid);
|
||||
ComplexField ph(in._grid); ph = zero;
|
||||
FermionField in_buf(in._grid); in_buf = zero;
|
||||
Complex ci(0.0,1.0);
|
||||
assert(twist.size() == Nd);//check that twist is Nd
|
||||
int shift = 0;
|
||||
if(fiveD) shift = 1;
|
||||
for(unsigned int nu = 0; nu < Nd; nu++)
|
||||
{
|
||||
// Shift coordinate lattice index by 1 to account for 5th dimension.
|
||||
LatticeCoordinate(coor, nu + shift);
|
||||
ph = ph + twist[nu]*coor*((1./(in._grid->_fdimensions[nu+shift])));
|
||||
}
|
||||
in_buf = exp((Real)(2.0*M_PI)*ci*ph*(-1.0))*in;
|
||||
|
||||
if(fiveD){//FFT only on temporal and spatial dimensions
|
||||
std::vector<int> mask(Nd+1,1); mask[0] = 0;
|
||||
theFFT.FFT_dim_mask(in_k,in_buf,mask,FFT::forward);
|
||||
this->MomentumSpacePropagatorHt_5d(prop_k,in_k,mass,twist);
|
||||
theFFT.FFT_dim_mask(out,prop_k,mask,FFT::backward);
|
||||
}
|
||||
else{
|
||||
theFFT.FFT_all_dim(in_k,in,FFT::forward);
|
||||
this->MomentumSpacePropagatorHt(prop_k,in_k,mass,twist);
|
||||
theFFT.FFT_all_dim(out,prop_k,FFT::backward);
|
||||
}
|
||||
|
||||
//phase for boundary condition
|
||||
out = out * exp((Real)(2.0*M_PI)*ci*ph);
|
||||
};
|
||||
|
||||
virtual void FreePropagator(const FermionField &in,FermionField &out,RealD mass,std::vector<double> twist) {
|
||||
bool fiveD = true; //5d propagator by default
|
||||
FreePropagator(in,out,mass,twist,fiveD);
|
||||
};
|
||||
|
||||
virtual void FreePropagator(const FermionField &in,FermionField &out,RealD mass, bool fiveD) {
|
||||
std::vector<double> twist(Nd,0.0); //default: periodic boundarys in all directions
|
||||
FreePropagator(in,out,mass,twist,fiveD);
|
||||
};
|
||||
|
||||
virtual void FreePropagator(const FermionField &in,FermionField &out,RealD mass) {
|
||||
bool fiveD = true; //5d propagator by default
|
||||
std::vector<double> twist(Nd,0.0); //default: periodic boundarys in all directions
|
||||
FreePropagator(in,out,mass,twist,fiveD);
|
||||
};
|
||||
|
||||
virtual void Instantiatable(void) {};
|
||||
|
@ -9,6 +9,7 @@
|
||||
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
|
||||
Author: Vera Guelpers <V.M.Guelpers@soton.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
@ -63,9 +64,12 @@ namespace Grid {
|
||||
virtual RealD M (const FermionField &in, FermionField &out)=0;
|
||||
virtual RealD Mdag (const FermionField &in, FermionField &out)=0;
|
||||
|
||||
// half checkerboard operaions
|
||||
// Query the even even properties to make algorithmic decisions
|
||||
virtual int ConstEE(void) { return 1; }; // clover returns zero as EE depends on gauge field
|
||||
virtual int isTrivialEE(void) { return 0; };
|
||||
virtual RealD Mass(void) {return 0.0;};
|
||||
|
||||
// half checkerboard operaions
|
||||
virtual void Meooe (const FermionField &in, FermionField &out)=0;
|
||||
virtual void MeooeDag (const FermionField &in, FermionField &out)=0;
|
||||
virtual void Mooee (const FermionField &in, FermionField &out)=0;
|
||||
@ -95,17 +99,38 @@ namespace Grid {
|
||||
virtual void Mdir (const FermionField &in, FermionField &out,int dir,int disp)=0; // case by case Wilson, Clover, Cayley, ContFrac, PartFrac
|
||||
|
||||
|
||||
virtual void MomentumSpacePropagator(FermionField &out,const FermionField &in,RealD _m) { assert(0);};
|
||||
virtual void MomentumSpacePropagator(FermionField &out,const FermionField &in,RealD _m,std::vector<double> twist) { assert(0);};
|
||||
|
||||
virtual void FreePropagator(const FermionField &in,FermionField &out,RealD mass) {
|
||||
virtual void FreePropagator(const FermionField &in,FermionField &out,RealD mass,std::vector<double> twist) {
|
||||
FFT theFFT((GridCartesian *) in._grid);
|
||||
|
||||
FermionField in_k(in._grid);
|
||||
FermionField prop_k(in._grid);
|
||||
|
||||
theFFT.FFT_all_dim(in_k,in,FFT::forward);
|
||||
this->MomentumSpacePropagator(prop_k,in_k,mass);
|
||||
//phase for boundary condition
|
||||
ComplexField coor(in._grid);
|
||||
ComplexField ph(in._grid); ph = zero;
|
||||
FermionField in_buf(in._grid); in_buf = zero;
|
||||
Complex ci(0.0,1.0);
|
||||
assert(twist.size() == Nd);//check that twist is Nd
|
||||
for(unsigned int nu = 0; nu < Nd; nu++)
|
||||
{
|
||||
LatticeCoordinate(coor, nu);
|
||||
ph = ph + twist[nu]*coor*((1./(in._grid->_fdimensions[nu])));
|
||||
}
|
||||
in_buf = exp((Real)(2.0*M_PI)*ci*ph*(-1.0))*in;
|
||||
|
||||
theFFT.FFT_all_dim(in_k,in_buf,FFT::forward);
|
||||
this->MomentumSpacePropagator(prop_k,in_k,mass,twist);
|
||||
theFFT.FFT_all_dim(out,prop_k,FFT::backward);
|
||||
|
||||
//phase for boundary condition
|
||||
out = out * exp((Real)(2.0*M_PI)*ci*ph);
|
||||
|
||||
};
|
||||
virtual void FreePropagator(const FermionField &in,FermionField &out,RealD mass) {
|
||||
std::vector<double> twist(Nd,0.0); //default: periodic boundarys in all directions
|
||||
FreePropagator(in,out,mass,twist);
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////
|
||||
|
@ -764,7 +764,12 @@ class StaggeredImpl : public PeriodicGaugeImpl<GaugeImplTypes<S, Representation:
|
||||
inline void loadLinkElement(Simd ®, ref &memory) {
|
||||
reg = memory;
|
||||
}
|
||||
|
||||
|
||||
inline void InsertGaugeField(DoubledGaugeField &U_ds,
|
||||
const GaugeLinkField &U,int mu)
|
||||
{
|
||||
PokeIndex<LorentzIndex>(U_ds, U, mu);
|
||||
}
|
||||
inline void DoubleStore(GridBase *GaugeGrid,
|
||||
DoubledGaugeField &UUUds, // for Naik term
|
||||
DoubledGaugeField &Uds,
|
||||
@ -803,8 +808,10 @@ class StaggeredImpl : public PeriodicGaugeImpl<GaugeImplTypes<S, Representation:
|
||||
U = U *phases;
|
||||
Udag = Udag *phases;
|
||||
|
||||
PokeIndex<LorentzIndex>(Uds, U, mu);
|
||||
PokeIndex<LorentzIndex>(Uds, Udag, mu + 4);
|
||||
InsertGaugeField(Uds,U,mu);
|
||||
InsertGaugeField(Uds,Udag,mu+4);
|
||||
// PokeIndex<LorentzIndex>(Uds, U, mu);
|
||||
// PokeIndex<LorentzIndex>(Uds, Udag, mu + 4);
|
||||
|
||||
// 3 hop based on thin links. Crazy huh ?
|
||||
U = PeekIndex<LorentzIndex>(Uthin, mu);
|
||||
@ -816,8 +823,8 @@ class StaggeredImpl : public PeriodicGaugeImpl<GaugeImplTypes<S, Representation:
|
||||
UUU = UUU *phases;
|
||||
UUUdag = UUUdag *phases;
|
||||
|
||||
PokeIndex<LorentzIndex>(UUUds, UUU, mu);
|
||||
PokeIndex<LorentzIndex>(UUUds, UUUdag, mu+4);
|
||||
InsertGaugeField(UUUds,UUU,mu);
|
||||
InsertGaugeField(UUUds,UUUdag,mu+4);
|
||||
|
||||
}
|
||||
}
|
||||
@ -910,6 +917,23 @@ class StaggeredImpl : public PeriodicGaugeImpl<GaugeImplTypes<S, Representation:
|
||||
mac(&phi(), &UU(), &chi());
|
||||
}
|
||||
|
||||
inline void InsertGaugeField(DoubledGaugeField &U_ds,const GaugeLinkField &U,int mu)
|
||||
{
|
||||
GridBase *GaugeGrid = U_ds._grid;
|
||||
parallel_for (int lidx = 0; lidx < GaugeGrid->lSites(); lidx++) {
|
||||
|
||||
SiteScalarGaugeLink ScalarU;
|
||||
SiteDoubledGaugeField ScalarUds;
|
||||
|
||||
std::vector<int> lcoor;
|
||||
GaugeGrid->LocalIndexToLocalCoor(lidx, lcoor);
|
||||
peekLocalSite(ScalarUds, U_ds, lcoor);
|
||||
|
||||
peekLocalSite(ScalarU, U, lcoor);
|
||||
ScalarUds(mu) = ScalarU();
|
||||
|
||||
}
|
||||
}
|
||||
inline void DoubleStore(GridBase *GaugeGrid,
|
||||
DoubledGaugeField &UUUds, // for Naik term
|
||||
DoubledGaugeField &Uds,
|
||||
@ -951,23 +975,8 @@ class StaggeredImpl : public PeriodicGaugeImpl<GaugeImplTypes<S, Representation:
|
||||
U = U *phases;
|
||||
Udag = Udag *phases;
|
||||
|
||||
|
||||
for (int lidx = 0; lidx < GaugeGrid->lSites(); lidx++) {
|
||||
SiteScalarGaugeLink ScalarU;
|
||||
SiteDoubledGaugeField ScalarUds;
|
||||
|
||||
std::vector<int> lcoor;
|
||||
GaugeGrid->LocalIndexToLocalCoor(lidx, lcoor);
|
||||
peekLocalSite(ScalarUds, Uds, lcoor);
|
||||
|
||||
peekLocalSite(ScalarU, U, lcoor);
|
||||
ScalarUds(mu) = ScalarU();
|
||||
|
||||
peekLocalSite(ScalarU, Udag, lcoor);
|
||||
ScalarUds(mu + 4) = ScalarU();
|
||||
|
||||
pokeLocalSite(ScalarUds, Uds, lcoor);
|
||||
}
|
||||
InsertGaugeField(Uds,U,mu);
|
||||
InsertGaugeField(Uds,Udag,mu+4);
|
||||
|
||||
// 3 hop based on thin links. Crazy huh ?
|
||||
U = PeekIndex<LorentzIndex>(Uthin, mu);
|
||||
@ -979,24 +988,8 @@ class StaggeredImpl : public PeriodicGaugeImpl<GaugeImplTypes<S, Representation:
|
||||
UUU = UUU *phases;
|
||||
UUUdag = UUUdag *phases;
|
||||
|
||||
for (int lidx = 0; lidx < GaugeGrid->lSites(); lidx++) {
|
||||
|
||||
SiteScalarGaugeLink ScalarU;
|
||||
SiteDoubledGaugeField ScalarUds;
|
||||
|
||||
std::vector<int> lcoor;
|
||||
GaugeGrid->LocalIndexToLocalCoor(lidx, lcoor);
|
||||
|
||||
peekLocalSite(ScalarUds, UUUds, lcoor);
|
||||
|
||||
peekLocalSite(ScalarU, UUU, lcoor);
|
||||
ScalarUds(mu) = ScalarU();
|
||||
|
||||
peekLocalSite(ScalarU, UUUdag, lcoor);
|
||||
ScalarUds(mu + 4) = ScalarU();
|
||||
|
||||
pokeLocalSite(ScalarUds, UUUds, lcoor);
|
||||
}
|
||||
InsertGaugeField(UUUds,UUU,mu);
|
||||
InsertGaugeField(UUUds,UUUdag,mu+4);
|
||||
|
||||
}
|
||||
}
|
||||
|
@ -44,6 +44,7 @@ ImprovedStaggeredFermionStatic::displacements({1, 1, 1, 1, -1, -1, -1, -1, 3, 3,
|
||||
template <class Impl>
|
||||
ImprovedStaggeredFermion<Impl>::ImprovedStaggeredFermion(GridCartesian &Fgrid, GridRedBlackCartesian &Hgrid,
|
||||
RealD _mass,
|
||||
RealD _c1, RealD _c2,RealD _u0,
|
||||
const ImplParams &p)
|
||||
: Kernels(p),
|
||||
_grid(&Fgrid),
|
||||
@ -62,6 +63,16 @@ ImprovedStaggeredFermion<Impl>::ImprovedStaggeredFermion(GridCartesian &Fgrid, G
|
||||
UUUmuOdd(&Hgrid) ,
|
||||
_tmp(&Hgrid)
|
||||
{
|
||||
int vol4;
|
||||
int LLs=1;
|
||||
c1=_c1;
|
||||
c2=_c2;
|
||||
u0=_u0;
|
||||
vol4= _grid->oSites();
|
||||
Stencil.BuildSurfaceList(LLs,vol4);
|
||||
vol4= _cbgrid->oSites();
|
||||
StencilEven.BuildSurfaceList(LLs,vol4);
|
||||
StencilOdd.BuildSurfaceList(LLs,vol4);
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
@ -69,22 +80,10 @@ ImprovedStaggeredFermion<Impl>::ImprovedStaggeredFermion(GaugeField &_Uthin, Gau
|
||||
GridRedBlackCartesian &Hgrid, RealD _mass,
|
||||
RealD _c1, RealD _c2,RealD _u0,
|
||||
const ImplParams &p)
|
||||
: ImprovedStaggeredFermion(Fgrid,Hgrid,_mass,p)
|
||||
: ImprovedStaggeredFermion(Fgrid,Hgrid,_mass,_c1,_c2,_u0,p)
|
||||
{
|
||||
c1=_c1;
|
||||
c2=_c2;
|
||||
u0=_u0;
|
||||
ImportGauge(_Uthin,_Ufat);
|
||||
}
|
||||
template <class Impl>
|
||||
ImprovedStaggeredFermion<Impl>::ImprovedStaggeredFermion(GaugeField &_Uthin,GaugeField &_Utriple, GaugeField &_Ufat, GridCartesian &Fgrid,
|
||||
GridRedBlackCartesian &Hgrid, RealD _mass,
|
||||
const ImplParams &p)
|
||||
: ImprovedStaggeredFermion(Fgrid,Hgrid,_mass,p)
|
||||
{
|
||||
ImportGaugeSimple(_Utriple,_Ufat);
|
||||
}
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
// Momentum space propagator should be
|
||||
@ -98,11 +97,6 @@ ImprovedStaggeredFermion<Impl>::ImprovedStaggeredFermion(GaugeField &_Uthin,Gaug
|
||||
// of above link to implmement fourier based solver.
|
||||
////////////////////////////////////////////////////////////
|
||||
template <class Impl>
|
||||
void ImprovedStaggeredFermion<Impl>::ImportGauge(const GaugeField &_Uthin)
|
||||
{
|
||||
ImportGauge(_Uthin,_Uthin);
|
||||
};
|
||||
template <class Impl>
|
||||
void ImprovedStaggeredFermion<Impl>::ImportGaugeSimple(const GaugeField &_Utriple,const GaugeField &_Ufat)
|
||||
{
|
||||
/////////////////////////////////////////////////////////////////
|
||||
@ -125,6 +119,20 @@ void ImprovedStaggeredFermion<Impl>::ImportGaugeSimple(const GaugeField &_Utripl
|
||||
PokeIndex<LorentzIndex>(Umu, -U, mu+4);
|
||||
|
||||
}
|
||||
CopyGaugeCheckerboards();
|
||||
}
|
||||
template <class Impl>
|
||||
void ImprovedStaggeredFermion<Impl>::ImportGaugeSimple(const DoubledGaugeField &_UUU,const DoubledGaugeField &_U)
|
||||
{
|
||||
|
||||
Umu = _U;
|
||||
UUUmu = _UUU;
|
||||
CopyGaugeCheckerboards();
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void ImprovedStaggeredFermion<Impl>::CopyGaugeCheckerboards(void)
|
||||
{
|
||||
pickCheckerboard(Even, UmuEven, Umu);
|
||||
pickCheckerboard(Odd, UmuOdd , Umu);
|
||||
pickCheckerboard(Even, UUUmuEven,UUUmu);
|
||||
@ -160,10 +168,7 @@ void ImprovedStaggeredFermion<Impl>::ImportGauge(const GaugeField &_Uthin,const
|
||||
PokeIndex<LorentzIndex>(UUUmu, U*(-0.5*c2/u0/u0/u0), mu+4);
|
||||
}
|
||||
|
||||
pickCheckerboard(Even, UmuEven, Umu);
|
||||
pickCheckerboard(Odd, UmuOdd , Umu);
|
||||
pickCheckerboard(Even, UUUmuEven, UUUmu);
|
||||
pickCheckerboard(Odd, UUUmuOdd, UUUmu);
|
||||
CopyGaugeCheckerboards();
|
||||
}
|
||||
|
||||
/////////////////////////////
|
||||
@ -322,6 +327,7 @@ void ImprovedStaggeredFermion<Impl>::DhopDerivEO(GaugeField &mat, const FermionF
|
||||
|
||||
template <class Impl>
|
||||
void ImprovedStaggeredFermion<Impl>::Dhop(const FermionField &in, FermionField &out, int dag) {
|
||||
DhopCalls+=2;
|
||||
conformable(in._grid, _grid); // verifies full grid
|
||||
conformable(in._grid, out._grid);
|
||||
|
||||
@ -332,6 +338,7 @@ void ImprovedStaggeredFermion<Impl>::Dhop(const FermionField &in, FermionField &
|
||||
|
||||
template <class Impl>
|
||||
void ImprovedStaggeredFermion<Impl>::DhopOE(const FermionField &in, FermionField &out, int dag) {
|
||||
DhopCalls+=1;
|
||||
conformable(in._grid, _cbgrid); // verifies half grid
|
||||
conformable(in._grid, out._grid); // drops the cb check
|
||||
|
||||
@ -343,6 +350,7 @@ void ImprovedStaggeredFermion<Impl>::DhopOE(const FermionField &in, FermionField
|
||||
|
||||
template <class Impl>
|
||||
void ImprovedStaggeredFermion<Impl>::DhopEO(const FermionField &in, FermionField &out, int dag) {
|
||||
DhopCalls+=1;
|
||||
conformable(in._grid, _cbgrid); // verifies half grid
|
||||
conformable(in._grid, out._grid); // drops the cb check
|
||||
|
||||
@ -374,25 +382,193 @@ void ImprovedStaggeredFermion<Impl>::DhopInternal(StencilImpl &st, LebesgueOrder
|
||||
DoubledGaugeField &U,
|
||||
DoubledGaugeField &UUU,
|
||||
const FermionField &in,
|
||||
FermionField &out, int dag) {
|
||||
FermionField &out, int dag)
|
||||
{
|
||||
#ifdef GRID_OMP
|
||||
if ( StaggeredKernelsStatic::Comms == StaggeredKernelsStatic::CommsAndCompute )
|
||||
DhopInternalOverlappedComms(st,lo,U,UUU,in,out,dag);
|
||||
else
|
||||
#endif
|
||||
DhopInternalSerialComms(st,lo,U,UUU,in,out,dag);
|
||||
}
|
||||
template <class Impl>
|
||||
void ImprovedStaggeredFermion<Impl>::DhopInternalOverlappedComms(StencilImpl &st, LebesgueOrder &lo,
|
||||
DoubledGaugeField &U,
|
||||
DoubledGaugeField &UUU,
|
||||
const FermionField &in,
|
||||
FermionField &out, int dag)
|
||||
{
|
||||
#ifdef GRID_OMP
|
||||
Compressor compressor;
|
||||
int len = U._grid->oSites();
|
||||
const int LLs = 1;
|
||||
|
||||
DhopTotalTime -= usecond();
|
||||
|
||||
DhopFaceTime -= usecond();
|
||||
st.Prepare();
|
||||
st.HaloGather(in,compressor);
|
||||
st.CommsMergeSHM(compressor);
|
||||
DhopFaceTime += usecond();
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Ugly explicit thread mapping introduced for OPA reasons.
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
DhopComputeTime -= usecond();
|
||||
#pragma omp parallel
|
||||
{
|
||||
int tid = omp_get_thread_num();
|
||||
int nthreads = omp_get_num_threads();
|
||||
int ncomms = CartesianCommunicator::nCommThreads;
|
||||
if (ncomms == -1) ncomms = 1;
|
||||
assert(nthreads > ncomms);
|
||||
|
||||
if (tid >= ncomms) {
|
||||
nthreads -= ncomms;
|
||||
int ttid = tid - ncomms;
|
||||
int n = len;
|
||||
int chunk = n / nthreads;
|
||||
int rem = n % nthreads;
|
||||
int myblock, myn;
|
||||
if (ttid < rem) {
|
||||
myblock = ttid * chunk + ttid;
|
||||
myn = chunk+1;
|
||||
} else {
|
||||
myblock = ttid*chunk + rem;
|
||||
myn = chunk;
|
||||
}
|
||||
|
||||
// do the compute
|
||||
if (dag == DaggerYes) {
|
||||
for (int ss = myblock; ss < myblock+myn; ++ss) {
|
||||
int sU = ss;
|
||||
// Interior = 1; Exterior = 0; must implement for staggered
|
||||
Kernels::DhopSiteDag(st,lo,U,UUU,st.CommBuf(),1,sU,in,out,1,0);
|
||||
}
|
||||
} else {
|
||||
for (int ss = myblock; ss < myblock+myn; ++ss) {
|
||||
// Interior = 1; Exterior = 0;
|
||||
int sU = ss;
|
||||
Kernels::DhopSite(st,lo,U,UUU,st.CommBuf(),1,sU,in,out,1,0);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
st.CommunicateThreaded();
|
||||
}
|
||||
}
|
||||
DhopComputeTime += usecond();
|
||||
|
||||
// First to enter, last to leave timing
|
||||
DhopFaceTime -= usecond();
|
||||
st.CommsMerge(compressor);
|
||||
DhopFaceTime -= usecond();
|
||||
|
||||
DhopComputeTime2 -= usecond();
|
||||
if (dag == DaggerYes) {
|
||||
int sz=st.surface_list.size();
|
||||
parallel_for (int ss = 0; ss < sz; ss++) {
|
||||
int sU = st.surface_list[ss];
|
||||
Kernels::DhopSiteDag(st,lo,U,UUU,st.CommBuf(),1,sU,in,out,0,1);
|
||||
}
|
||||
} else {
|
||||
int sz=st.surface_list.size();
|
||||
parallel_for (int ss = 0; ss < sz; ss++) {
|
||||
int sU = st.surface_list[ss];
|
||||
Kernels::DhopSite(st,lo,U,UUU,st.CommBuf(),1,sU,in,out,0,1);
|
||||
}
|
||||
}
|
||||
DhopComputeTime2 += usecond();
|
||||
#else
|
||||
assert(0);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
template <class Impl>
|
||||
void ImprovedStaggeredFermion<Impl>::DhopInternalSerialComms(StencilImpl &st, LebesgueOrder &lo,
|
||||
DoubledGaugeField &U,
|
||||
DoubledGaugeField &UUU,
|
||||
const FermionField &in,
|
||||
FermionField &out, int dag)
|
||||
{
|
||||
assert((dag == DaggerNo) || (dag == DaggerYes));
|
||||
|
||||
DhopTotalTime -= usecond();
|
||||
|
||||
DhopCommTime -= usecond();
|
||||
Compressor compressor;
|
||||
st.HaloExchange(in, compressor);
|
||||
DhopCommTime += usecond();
|
||||
|
||||
DhopComputeTime -= usecond();
|
||||
if (dag == DaggerYes) {
|
||||
PARALLEL_FOR_LOOP
|
||||
for (int sss = 0; sss < in._grid->oSites(); sss++) {
|
||||
parallel_for (int sss = 0; sss < in._grid->oSites(); sss++) {
|
||||
Kernels::DhopSiteDag(st, lo, U, UUU, st.CommBuf(), 1, sss, in, out);
|
||||
}
|
||||
} else {
|
||||
PARALLEL_FOR_LOOP
|
||||
for (int sss = 0; sss < in._grid->oSites(); sss++) {
|
||||
parallel_for (int sss = 0; sss < in._grid->oSites(); sss++) {
|
||||
Kernels::DhopSite(st, lo, U, UUU, st.CommBuf(), 1, sss, in, out);
|
||||
}
|
||||
}
|
||||
DhopComputeTime += usecond();
|
||||
DhopTotalTime += usecond();
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////////
|
||||
// Reporting
|
||||
////////////////////////////////////////////////////////////////
|
||||
template<class Impl>
|
||||
void ImprovedStaggeredFermion<Impl>::Report(void)
|
||||
{
|
||||
std::vector<int> latt = GridDefaultLatt();
|
||||
RealD volume = 1; for(int mu=0;mu<Nd;mu++) volume=volume*latt[mu];
|
||||
RealD NP = _grid->_Nprocessors;
|
||||
RealD NN = _grid->NodeCount();
|
||||
|
||||
std::cout << GridLogMessage << "#### Dhop calls report " << std::endl;
|
||||
|
||||
std::cout << GridLogMessage << "ImprovedStaggeredFermion Number of DhopEO Calls : "
|
||||
<< DhopCalls << std::endl;
|
||||
std::cout << GridLogMessage << "ImprovedStaggeredFermion TotalTime /Calls : "
|
||||
<< DhopTotalTime / DhopCalls << " us" << std::endl;
|
||||
std::cout << GridLogMessage << "ImprovedStaggeredFermion CommTime /Calls : "
|
||||
<< DhopCommTime / DhopCalls << " us" << std::endl;
|
||||
std::cout << GridLogMessage << "ImprovedStaggeredFermion ComputeTime/Calls : "
|
||||
<< DhopComputeTime / DhopCalls << " us" << std::endl;
|
||||
|
||||
// Average the compute time
|
||||
_grid->GlobalSum(DhopComputeTime);
|
||||
DhopComputeTime/=NP;
|
||||
|
||||
RealD mflops = 1154*volume*DhopCalls/DhopComputeTime/2; // 2 for red black counting
|
||||
std::cout << GridLogMessage << "Average mflops/s per call : " << mflops << std::endl;
|
||||
std::cout << GridLogMessage << "Average mflops/s per call per rank : " << mflops/NP << std::endl;
|
||||
std::cout << GridLogMessage << "Average mflops/s per call per node : " << mflops/NN << std::endl;
|
||||
|
||||
RealD Fullmflops = 1154*volume*DhopCalls/(DhopTotalTime)/2; // 2 for red black counting
|
||||
std::cout << GridLogMessage << "Average mflops/s per call (full) : " << Fullmflops << std::endl;
|
||||
std::cout << GridLogMessage << "Average mflops/s per call per rank (full): " << Fullmflops/NP << std::endl;
|
||||
std::cout << GridLogMessage << "Average mflops/s per call per node (full): " << Fullmflops/NN << std::endl;
|
||||
|
||||
std::cout << GridLogMessage << "ImprovedStaggeredFermion Stencil" <<std::endl; Stencil.Report();
|
||||
std::cout << GridLogMessage << "ImprovedStaggeredFermion StencilEven"<<std::endl; StencilEven.Report();
|
||||
std::cout << GridLogMessage << "ImprovedStaggeredFermion StencilOdd" <<std::endl; StencilOdd.Report();
|
||||
}
|
||||
template<class Impl>
|
||||
void ImprovedStaggeredFermion<Impl>::ZeroCounters(void)
|
||||
{
|
||||
DhopCalls = 0;
|
||||
DhopTotalTime = 0;
|
||||
DhopCommTime = 0;
|
||||
DhopComputeTime = 0;
|
||||
DhopFaceTime = 0;
|
||||
|
||||
Stencil.ZeroCounters();
|
||||
StencilEven.ZeroCounters();
|
||||
StencilOdd.ZeroCounters();
|
||||
}
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////
|
||||
// Conserved current - not yet implemented.
|
||||
////////////////////////////////////////////////////////
|
||||
|
@ -49,6 +49,18 @@ class ImprovedStaggeredFermion : public StaggeredKernels<Impl>, public ImprovedS
|
||||
FermionField _tmp;
|
||||
FermionField &tmp(void) { return _tmp; }
|
||||
|
||||
////////////////////////////////////////
|
||||
// Performance monitoring
|
||||
////////////////////////////////////////
|
||||
void Report(void);
|
||||
void ZeroCounters(void);
|
||||
double DhopTotalTime;
|
||||
double DhopCalls;
|
||||
double DhopCommTime;
|
||||
double DhopComputeTime;
|
||||
double DhopComputeTime2;
|
||||
double DhopFaceTime;
|
||||
|
||||
///////////////////////////////////////////////////////////////
|
||||
// Implement the abstract base
|
||||
///////////////////////////////////////////////////////////////
|
||||
@ -105,25 +117,34 @@ class ImprovedStaggeredFermion : public StaggeredKernels<Impl>, public ImprovedS
|
||||
|
||||
void DhopInternal(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,DoubledGaugeField &UUU,
|
||||
const FermionField &in, FermionField &out, int dag);
|
||||
void DhopInternalSerialComms(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,DoubledGaugeField &UUU,
|
||||
const FermionField &in, FermionField &out, int dag);
|
||||
void DhopInternalOverlappedComms(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,DoubledGaugeField &UUU,
|
||||
const FermionField &in, FermionField &out, int dag);
|
||||
|
||||
// Constructor
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// Grid own interface Constructor
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
ImprovedStaggeredFermion(GaugeField &_Uthin, GaugeField &_Ufat, GridCartesian &Fgrid,
|
||||
GridRedBlackCartesian &Hgrid, RealD _mass,
|
||||
RealD _c1=9.0/8.0, RealD _c2=-1.0/24.0,RealD _u0=1.0,
|
||||
const ImplParams &p = ImplParams());
|
||||
|
||||
ImprovedStaggeredFermion(GaugeField &_Uthin, GaugeField &_Utriple, GaugeField &_Ufat, GridCartesian &Fgrid,
|
||||
GridRedBlackCartesian &Hgrid, RealD _mass,
|
||||
RealD _c1, RealD _c2,RealD _u0,
|
||||
const ImplParams &p = ImplParams());
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// MILC constructor no gauge fields
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
ImprovedStaggeredFermion(GridCartesian &Fgrid, GridRedBlackCartesian &Hgrid, RealD _mass,
|
||||
RealD _c1=1.0, RealD _c2=1.0,RealD _u0=1.0,
|
||||
const ImplParams &p = ImplParams());
|
||||
|
||||
|
||||
// DoubleStore impl dependent
|
||||
void ImportGaugeSimple(const GaugeField &_Utriple, const GaugeField &_Ufat);
|
||||
void ImportGauge(const GaugeField &_Uthin, const GaugeField &_Ufat);
|
||||
void ImportGauge(const GaugeField &_Uthin);
|
||||
void ImportGauge (const GaugeField &_Uthin ) { assert(0); }
|
||||
void ImportGauge (const GaugeField &_Uthin ,const GaugeField &_Ufat);
|
||||
void ImportGaugeSimple(const GaugeField &_UUU ,const GaugeField &_U);
|
||||
void ImportGaugeSimple(const DoubledGaugeField &_UUU,const DoubledGaugeField &_U);
|
||||
DoubledGaugeField &GetU(void) { return Umu ; } ;
|
||||
DoubledGaugeField &GetUUU(void) { return UUUmu; };
|
||||
void CopyGaugeCheckerboards(void);
|
||||
|
||||
///////////////////////////////////////////////////////////////
|
||||
// Data members require to support the functionality
|
||||
@ -132,7 +153,8 @@ class ImprovedStaggeredFermion : public StaggeredKernels<Impl>, public ImprovedS
|
||||
// protected:
|
||||
public:
|
||||
// any other parameters of action ???
|
||||
|
||||
virtual int isTrivialEE(void) { return 1; };
|
||||
virtual RealD Mass(void) { return mass; }
|
||||
RealD mass;
|
||||
RealD u0;
|
||||
RealD c1;
|
||||
|
@ -41,8 +41,7 @@ ImprovedStaggeredFermion5DStatic::displacements({1, 1, 1, 1, -1, -1, -1, -1, 3,
|
||||
|
||||
// 5d lattice for DWF.
|
||||
template<class Impl>
|
||||
ImprovedStaggeredFermion5D<Impl>::ImprovedStaggeredFermion5D(GaugeField &_Uthin,GaugeField &_Ufat,
|
||||
GridCartesian &FiveDimGrid,
|
||||
ImprovedStaggeredFermion5D<Impl>::ImprovedStaggeredFermion5D(GridCartesian &FiveDimGrid,
|
||||
GridRedBlackCartesian &FiveDimRedBlackGrid,
|
||||
GridCartesian &FourDimGrid,
|
||||
GridRedBlackCartesian &FourDimRedBlackGrid,
|
||||
@ -121,16 +120,74 @@ ImprovedStaggeredFermion5D<Impl>::ImprovedStaggeredFermion5D(GaugeField &_Uthin,
|
||||
assert(FiveDimGrid._simd_layout[0] ==1);
|
||||
|
||||
}
|
||||
int LLs = FiveDimGrid._rdimensions[0];
|
||||
int vol4= FourDimGrid.oSites();
|
||||
Stencil.BuildSurfaceList(LLs,vol4);
|
||||
|
||||
// Allocate the required comms buffer
|
||||
vol4=FourDimRedBlackGrid.oSites();
|
||||
StencilEven.BuildSurfaceList(LLs,vol4);
|
||||
StencilOdd.BuildSurfaceList(LLs,vol4);
|
||||
}
|
||||
template <class Impl>
|
||||
void ImprovedStaggeredFermion5D<Impl>::CopyGaugeCheckerboards(void)
|
||||
{
|
||||
pickCheckerboard(Even, UmuEven, Umu);
|
||||
pickCheckerboard(Odd, UmuOdd , Umu);
|
||||
pickCheckerboard(Even, UUUmuEven,UUUmu);
|
||||
pickCheckerboard(Odd, UUUmuOdd, UUUmu);
|
||||
}
|
||||
template<class Impl>
|
||||
ImprovedStaggeredFermion5D<Impl>::ImprovedStaggeredFermion5D(GaugeField &_Uthin,GaugeField &_Ufat,
|
||||
GridCartesian &FiveDimGrid,
|
||||
GridRedBlackCartesian &FiveDimRedBlackGrid,
|
||||
GridCartesian &FourDimGrid,
|
||||
GridRedBlackCartesian &FourDimRedBlackGrid,
|
||||
RealD _mass,
|
||||
RealD _c1,RealD _c2, RealD _u0,
|
||||
const ImplParams &p) :
|
||||
ImprovedStaggeredFermion5D(FiveDimGrid,FiveDimRedBlackGrid,
|
||||
FourDimGrid,FourDimRedBlackGrid,
|
||||
_mass,_c1,_c2,_u0,p)
|
||||
{
|
||||
ImportGauge(_Uthin,_Ufat);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////
|
||||
// For MILC use; pass three link U's and 1 link U
|
||||
///////////////////////////////////////////////////
|
||||
template <class Impl>
|
||||
void ImprovedStaggeredFermion5D<Impl>::ImportGauge(const GaugeField &_Uthin)
|
||||
void ImprovedStaggeredFermion5D<Impl>::ImportGaugeSimple(const GaugeField &_Utriple,const GaugeField &_Ufat)
|
||||
{
|
||||
ImportGauge(_Uthin,_Uthin);
|
||||
};
|
||||
/////////////////////////////////////////////////////////////////
|
||||
// Trivial import; phases and fattening and such like preapplied
|
||||
/////////////////////////////////////////////////////////////////
|
||||
for (int mu = 0; mu < Nd; mu++) {
|
||||
|
||||
auto U = PeekIndex<LorentzIndex>(_Utriple, mu);
|
||||
Impl::InsertGaugeField(UUUmu,U,mu);
|
||||
|
||||
U = adj( Cshift(U, mu, -3));
|
||||
Impl::InsertGaugeField(UUUmu,-U,mu+4);
|
||||
|
||||
U = PeekIndex<LorentzIndex>(_Ufat, mu);
|
||||
Impl::InsertGaugeField(Umu,U,mu);
|
||||
|
||||
U = adj( Cshift(U, mu, -1));
|
||||
Impl::InsertGaugeField(Umu,-U,mu+4);
|
||||
|
||||
}
|
||||
CopyGaugeCheckerboards();
|
||||
}
|
||||
template <class Impl>
|
||||
void ImprovedStaggeredFermion5D<Impl>::ImportGaugeSimple(const DoubledGaugeField &_UUU,const DoubledGaugeField &_U)
|
||||
{
|
||||
/////////////////////////////////////////////////////////////////
|
||||
// Trivial import; phases and fattening and such like preapplied
|
||||
/////////////////////////////////////////////////////////////////
|
||||
Umu = _U;
|
||||
UUUmu = _UUU;
|
||||
CopyGaugeCheckerboards();
|
||||
}
|
||||
template<class Impl>
|
||||
void ImprovedStaggeredFermion5D<Impl>::ImportGauge(const GaugeField &_Uthin,const GaugeField &_Ufat)
|
||||
{
|
||||
@ -159,10 +216,7 @@ void ImprovedStaggeredFermion5D<Impl>::ImportGauge(const GaugeField &_Uthin,cons
|
||||
PokeIndex<LorentzIndex>(UUUmu, U*(-0.5*c2/u0/u0/u0), mu+4);
|
||||
}
|
||||
|
||||
pickCheckerboard(Even, UmuEven, Umu);
|
||||
pickCheckerboard(Odd, UmuOdd , Umu);
|
||||
pickCheckerboard(Even, UUUmuEven, UUUmu);
|
||||
pickCheckerboard(Odd, UUUmuOdd, UUUmu);
|
||||
CopyGaugeCheckerboards();
|
||||
}
|
||||
template<class Impl>
|
||||
void ImprovedStaggeredFermion5D<Impl>::DhopDir(const FermionField &in, FermionField &out,int dir5,int disp)
|
||||
@ -223,6 +277,162 @@ void ImprovedStaggeredFermion5D<Impl>::DhopDerivOE(GaugeField &mat,
|
||||
assert(0);
|
||||
}
|
||||
|
||||
/*CHANGE */
|
||||
template<class Impl>
|
||||
void ImprovedStaggeredFermion5D<Impl>::DhopInternal(StencilImpl & st, LebesgueOrder &lo,
|
||||
DoubledGaugeField & U,DoubledGaugeField & UUU,
|
||||
const FermionField &in, FermionField &out,int dag)
|
||||
{
|
||||
#ifdef GRID_OMP
|
||||
if ( StaggeredKernelsStatic::Comms == StaggeredKernelsStatic::CommsAndCompute )
|
||||
DhopInternalOverlappedComms(st,lo,U,UUU,in,out,dag);
|
||||
else
|
||||
#endif
|
||||
DhopInternalSerialComms(st,lo,U,UUU,in,out,dag);
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void ImprovedStaggeredFermion5D<Impl>::DhopInternalOverlappedComms(StencilImpl & st, LebesgueOrder &lo,
|
||||
DoubledGaugeField & U,DoubledGaugeField & UUU,
|
||||
const FermionField &in, FermionField &out,int dag)
|
||||
{
|
||||
#ifdef GRID_OMP
|
||||
// assert((dag==DaggerNo) ||(dag==DaggerYes));
|
||||
|
||||
Compressor compressor;
|
||||
|
||||
int LLs = in._grid->_rdimensions[0];
|
||||
int len = U._grid->oSites();
|
||||
|
||||
DhopFaceTime-=usecond();
|
||||
st.Prepare();
|
||||
st.HaloGather(in,compressor);
|
||||
// st.HaloExchangeOptGather(in,compressor); // Wilson compressor
|
||||
st.CommsMergeSHM(compressor);// Could do this inside parallel region overlapped with comms
|
||||
DhopFaceTime+=usecond();
|
||||
|
||||
double ctime=0;
|
||||
double ptime=0;
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Ugly explicit thread mapping introduced for OPA reasons.
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
#pragma omp parallel reduction(max:ctime) reduction(max:ptime)
|
||||
{
|
||||
int tid = omp_get_thread_num();
|
||||
int nthreads = omp_get_num_threads();
|
||||
int ncomms = CartesianCommunicator::nCommThreads;
|
||||
if (ncomms == -1) ncomms = 1;
|
||||
assert(nthreads > ncomms);
|
||||
if (tid >= ncomms) {
|
||||
double start = usecond();
|
||||
nthreads -= ncomms;
|
||||
int ttid = tid - ncomms;
|
||||
int n = U._grid->oSites(); // 4d vol
|
||||
int chunk = n / nthreads;
|
||||
int rem = n % nthreads;
|
||||
int myblock, myn;
|
||||
if (ttid < rem) {
|
||||
myblock = ttid * chunk + ttid;
|
||||
myn = chunk+1;
|
||||
} else {
|
||||
myblock = ttid*chunk + rem;
|
||||
myn = chunk;
|
||||
}
|
||||
|
||||
// do the compute
|
||||
if (dag == DaggerYes) {
|
||||
for (int ss = myblock; ss < myblock+myn; ++ss) {
|
||||
int sU = ss;
|
||||
// Interior = 1; Exterior = 0; must implement for staggered
|
||||
Kernels::DhopSiteDag(st,lo,U,UUU,st.CommBuf(),LLs,sU,in,out,1,0); //<---------
|
||||
}
|
||||
} else {
|
||||
for (int ss = myblock; ss < myblock+myn; ++ss) {
|
||||
// Interior = 1; Exterior = 0;
|
||||
int sU = ss;
|
||||
Kernels::DhopSite(st,lo,U,UUU,st.CommBuf(),LLs,sU,in,out,1,0); //<------------
|
||||
}
|
||||
}
|
||||
ptime = usecond() - start;
|
||||
} else {
|
||||
double start = usecond();
|
||||
st.CommunicateThreaded();
|
||||
ctime = usecond() - start;
|
||||
}
|
||||
}
|
||||
DhopCommTime += ctime;
|
||||
DhopComputeTime+=ptime;
|
||||
|
||||
// First to enter, last to leave timing
|
||||
st.CollateThreads();
|
||||
|
||||
DhopFaceTime-=usecond();
|
||||
st.CommsMerge(compressor);
|
||||
DhopFaceTime+=usecond();
|
||||
|
||||
DhopComputeTime2-=usecond();
|
||||
if (dag == DaggerYes) {
|
||||
int sz=st.surface_list.size();
|
||||
parallel_for (int ss = 0; ss < sz; ss++) {
|
||||
int sU = st.surface_list[ss];
|
||||
Kernels::DhopSiteDag(st,lo,U,UUU,st.CommBuf(),LLs,sU,in,out,0,1); //<----------
|
||||
}
|
||||
} else {
|
||||
int sz=st.surface_list.size();
|
||||
parallel_for (int ss = 0; ss < sz; ss++) {
|
||||
int sU = st.surface_list[ss];
|
||||
Kernels::DhopSite(st,lo,U,UUU,st.CommBuf(),LLs,sU,in,out,0,1);//<----------
|
||||
}
|
||||
}
|
||||
DhopComputeTime2+=usecond();
|
||||
#else
|
||||
assert(0);
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void ImprovedStaggeredFermion5D<Impl>::DhopInternalSerialComms(StencilImpl & st, LebesgueOrder &lo,
|
||||
DoubledGaugeField & U,DoubledGaugeField & UUU,
|
||||
const FermionField &in, FermionField &out,int dag)
|
||||
{
|
||||
Compressor compressor;
|
||||
int LLs = in._grid->_rdimensions[0];
|
||||
|
||||
|
||||
|
||||
//double t1=usecond();
|
||||
DhopTotalTime -= usecond();
|
||||
DhopCommTime -= usecond();
|
||||
st.HaloExchange(in,compressor);
|
||||
DhopCommTime += usecond();
|
||||
|
||||
DhopComputeTime -= usecond();
|
||||
// Dhop takes the 4d grid from U, and makes a 5d index for fermion
|
||||
if (dag == DaggerYes) {
|
||||
parallel_for (int ss = 0; ss < U._grid->oSites(); ss++) {
|
||||
int sU=ss;
|
||||
Kernels::DhopSiteDag(st, lo, U, UUU, st.CommBuf(), LLs, sU,in, out);
|
||||
}
|
||||
} else {
|
||||
parallel_for (int ss = 0; ss < U._grid->oSites(); ss++) {
|
||||
int sU=ss;
|
||||
Kernels::DhopSite(st,lo,U,UUU,st.CommBuf(),LLs,sU,in,out);
|
||||
}
|
||||
}
|
||||
DhopComputeTime += usecond();
|
||||
DhopTotalTime += usecond();
|
||||
//double t2=usecond();
|
||||
//std::cout << __FILE__ << " " << __func__ << " Total Time " << DhopTotalTime << std::endl;
|
||||
//std::cout << __FILE__ << " " << __func__ << " Total Time Org " << t2-t1 << std::endl;
|
||||
//std::cout << __FILE__ << " " << __func__ << " Comml Time " << DhopCommTime << std::endl;
|
||||
//std::cout << __FILE__ << " " << __func__ << " Compute Time " << DhopComputeTime << std::endl;
|
||||
|
||||
}
|
||||
/*CHANGE END*/
|
||||
|
||||
/* ORG
|
||||
template<class Impl>
|
||||
void ImprovedStaggeredFermion5D<Impl>::DhopInternal(StencilImpl & st, LebesgueOrder &lo,
|
||||
DoubledGaugeField & U,DoubledGaugeField & UUU,
|
||||
@ -254,6 +464,7 @@ void ImprovedStaggeredFermion5D<Impl>::DhopInternal(StencilImpl & st, LebesgueOr
|
||||
DhopComputeTime += usecond();
|
||||
DhopTotalTime += usecond();
|
||||
}
|
||||
*/
|
||||
|
||||
|
||||
template<class Impl>
|
||||
@ -336,6 +547,9 @@ void ImprovedStaggeredFermion5D<Impl>::ZeroCounters(void)
|
||||
DhopTotalTime = 0;
|
||||
DhopCommTime = 0;
|
||||
DhopComputeTime = 0;
|
||||
DhopFaceTime = 0;
|
||||
|
||||
|
||||
Stencil.ZeroCounters();
|
||||
StencilEven.ZeroCounters();
|
||||
StencilOdd.ZeroCounters();
|
||||
|
@ -64,6 +64,8 @@ namespace QCD {
|
||||
double DhopCalls;
|
||||
double DhopCommTime;
|
||||
double DhopComputeTime;
|
||||
double DhopComputeTime2;
|
||||
double DhopFaceTime;
|
||||
|
||||
///////////////////////////////////////////////////////////////
|
||||
// Implement the abstract base
|
||||
@ -119,7 +121,27 @@ namespace QCD {
|
||||
FermionField &out,
|
||||
int dag);
|
||||
|
||||
void DhopInternalOverlappedComms(StencilImpl & st,
|
||||
LebesgueOrder &lo,
|
||||
DoubledGaugeField &U,
|
||||
DoubledGaugeField &UUU,
|
||||
const FermionField &in,
|
||||
FermionField &out,
|
||||
int dag);
|
||||
|
||||
void DhopInternalSerialComms(StencilImpl & st,
|
||||
LebesgueOrder &lo,
|
||||
DoubledGaugeField &U,
|
||||
DoubledGaugeField &UUU,
|
||||
const FermionField &in,
|
||||
FermionField &out,
|
||||
int dag);
|
||||
|
||||
|
||||
// Constructors
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Grid internal interface -- Thin link and fat link, with coefficients
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
ImprovedStaggeredFermion5D(GaugeField &_Uthin,
|
||||
GaugeField &_Ufat,
|
||||
GridCartesian &FiveDimGrid,
|
||||
@ -127,17 +149,37 @@ namespace QCD {
|
||||
GridCartesian &FourDimGrid,
|
||||
GridRedBlackCartesian &FourDimRedBlackGrid,
|
||||
double _mass,
|
||||
RealD _c1=9.0/8.0, RealD _c2=-1.0/24.0,RealD _u0=1.0,
|
||||
RealD _c1, RealD _c2,RealD _u0,
|
||||
const ImplParams &p= ImplParams());
|
||||
|
||||
// DoubleStore
|
||||
void ImportGauge(const GaugeField &_U);
|
||||
void ImportGauge(const GaugeField &_Uthin,const GaugeField &_Ufat);
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// MILC constructor ; triple links, no rescale factors; must be externally pre multiplied
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
ImprovedStaggeredFermion5D(GridCartesian &FiveDimGrid,
|
||||
GridRedBlackCartesian &FiveDimRedBlackGrid,
|
||||
GridCartesian &FourDimGrid,
|
||||
GridRedBlackCartesian &FourDimRedBlackGrid,
|
||||
double _mass,
|
||||
RealD _c1=1.0, RealD _c2=1.0,RealD _u0=1.0,
|
||||
const ImplParams &p= ImplParams());
|
||||
|
||||
// DoubleStore gauge field in operator
|
||||
void ImportGauge (const GaugeField &_Uthin ) { assert(0); }
|
||||
void ImportGauge (const GaugeField &_Uthin ,const GaugeField &_Ufat);
|
||||
void ImportGaugeSimple(const GaugeField &_UUU,const GaugeField &_U);
|
||||
void ImportGaugeSimple(const DoubledGaugeField &_UUU,const DoubledGaugeField &_U);
|
||||
// Give a reference; can be used to do an assignment or copy back out after import
|
||||
// if Carleton wants to cache them and not use the ImportSimple
|
||||
DoubledGaugeField &GetU(void) { return Umu ; } ;
|
||||
DoubledGaugeField &GetUUU(void) { return UUUmu; };
|
||||
void CopyGaugeCheckerboards(void);
|
||||
|
||||
///////////////////////////////////////////////////////////////
|
||||
// Data members require to support the functionality
|
||||
///////////////////////////////////////////////////////////////
|
||||
public:
|
||||
|
||||
virtual int isTrivialEE(void) { return 1; };
|
||||
virtual RealD Mass(void) { return mass; }
|
||||
|
||||
GridBase *_FourDimGrid;
|
||||
GridBase *_FourDimRedBlackGrid;
|
||||
|
@ -42,8 +42,8 @@ namespace Grid {
|
||||
INHERIT_IMPL_TYPES(Impl);
|
||||
public:
|
||||
|
||||
void MomentumSpacePropagator(FermionField &out,const FermionField &in,RealD _m) {
|
||||
this->MomentumSpacePropagatorHw(out,in,_m);
|
||||
void MomentumSpacePropagator(FermionField &out,const FermionField &in,RealD _m,std::vector<double> twist) {
|
||||
this->MomentumSpacePropagatorHw(out,in,_m,twist);
|
||||
};
|
||||
|
||||
// Constructors
|
||||
|
@ -32,223 +32,241 @@ namespace Grid {
|
||||
namespace QCD {
|
||||
|
||||
int StaggeredKernelsStatic::Opt= StaggeredKernelsStatic::OptGeneric;
|
||||
int StaggeredKernelsStatic::Comms = StaggeredKernelsStatic::CommsAndCompute;
|
||||
|
||||
#define GENERIC_STENCIL_LEG(U,Dir,skew,multLink) \
|
||||
SE = st.GetEntry(ptype, Dir+skew, sF); \
|
||||
if (SE->_is_local ) { \
|
||||
if (SE->_permute) { \
|
||||
chi_p = χ \
|
||||
permute(chi, in._odata[SE->_offset], ptype); \
|
||||
} else { \
|
||||
chi_p = &in._odata[SE->_offset]; \
|
||||
} \
|
||||
} else { \
|
||||
chi_p = &buf[SE->_offset]; \
|
||||
} \
|
||||
multLink(Uchi, U._odata[sU], *chi_p, Dir);
|
||||
|
||||
#define GENERIC_STENCIL_LEG_INT(U,Dir,skew,multLink) \
|
||||
SE = st.GetEntry(ptype, Dir+skew, sF); \
|
||||
if (SE->_is_local ) { \
|
||||
if (SE->_permute) { \
|
||||
chi_p = χ \
|
||||
permute(chi, in._odata[SE->_offset], ptype); \
|
||||
} else { \
|
||||
chi_p = &in._odata[SE->_offset]; \
|
||||
} \
|
||||
} else if ( st.same_node[Dir] ) { \
|
||||
chi_p = &buf[SE->_offset]; \
|
||||
} \
|
||||
if (SE->_is_local || st.same_node[Dir] ) { \
|
||||
multLink(Uchi, U._odata[sU], *chi_p, Dir); \
|
||||
}
|
||||
|
||||
#define GENERIC_STENCIL_LEG_EXT(U,Dir,skew,multLink) \
|
||||
SE = st.GetEntry(ptype, Dir+skew, sF); \
|
||||
if ((!SE->_is_local) && (!st.same_node[Dir]) ) { \
|
||||
nmu++; \
|
||||
chi_p = &buf[SE->_offset]; \
|
||||
multLink(Uchi, U._odata[sU], *chi_p, Dir); \
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
StaggeredKernels<Impl>::StaggeredKernels(const ImplParams &p) : Base(p){};
|
||||
|
||||
////////////////////////////////////////////
|
||||
////////////////////////////////////////////////////////////////////////////////////
|
||||
// Generic implementation; move to different file?
|
||||
////////////////////////////////////////////
|
||||
|
||||
// Int, Ext, Int+Ext cases for comms overlap
|
||||
////////////////////////////////////////////////////////////////////////////////////
|
||||
template <class Impl>
|
||||
void StaggeredKernels<Impl>::DhopSiteDepth(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
|
||||
SiteSpinor *buf, int sF,
|
||||
int sU, const FermionField &in, SiteSpinor &out,int threeLink) {
|
||||
void StaggeredKernels<Impl>::DhopSiteGeneric(StencilImpl &st, LebesgueOrder &lo,
|
||||
DoubledGaugeField &U, DoubledGaugeField &UUU,
|
||||
SiteSpinor *buf, int LLs, int sU,
|
||||
const FermionField &in, FermionField &out, int dag) {
|
||||
const SiteSpinor *chi_p;
|
||||
SiteSpinor chi;
|
||||
SiteSpinor Uchi;
|
||||
StencilEntry *SE;
|
||||
int ptype;
|
||||
int skew = 0;
|
||||
if (threeLink) skew=8;
|
||||
///////////////////////////
|
||||
// Xp
|
||||
///////////////////////////
|
||||
int skew;
|
||||
|
||||
SE = st.GetEntry(ptype, Xp+skew, sF);
|
||||
if (SE->_is_local) {
|
||||
if (SE->_permute) {
|
||||
chi_p = χ
|
||||
permute(chi, in._odata[SE->_offset], ptype);
|
||||
} else {
|
||||
chi_p = &in._odata[SE->_offset];
|
||||
}
|
||||
} else {
|
||||
chi_p = &buf[SE->_offset];
|
||||
for(int s=0;s<LLs;s++){
|
||||
int sF=LLs*sU+s;
|
||||
skew = 0;
|
||||
GENERIC_STENCIL_LEG(U,Xp,skew,Impl::multLink);
|
||||
GENERIC_STENCIL_LEG(U,Yp,skew,Impl::multLinkAdd);
|
||||
GENERIC_STENCIL_LEG(U,Zp,skew,Impl::multLinkAdd);
|
||||
GENERIC_STENCIL_LEG(U,Tp,skew,Impl::multLinkAdd);
|
||||
GENERIC_STENCIL_LEG(U,Xm,skew,Impl::multLinkAdd);
|
||||
GENERIC_STENCIL_LEG(U,Ym,skew,Impl::multLinkAdd);
|
||||
GENERIC_STENCIL_LEG(U,Zm,skew,Impl::multLinkAdd);
|
||||
GENERIC_STENCIL_LEG(U,Tm,skew,Impl::multLinkAdd);
|
||||
skew=8;
|
||||
GENERIC_STENCIL_LEG(UUU,Xp,skew,Impl::multLinkAdd);
|
||||
GENERIC_STENCIL_LEG(UUU,Yp,skew,Impl::multLinkAdd);
|
||||
GENERIC_STENCIL_LEG(UUU,Zp,skew,Impl::multLinkAdd);
|
||||
GENERIC_STENCIL_LEG(UUU,Tp,skew,Impl::multLinkAdd);
|
||||
GENERIC_STENCIL_LEG(UUU,Xm,skew,Impl::multLinkAdd);
|
||||
GENERIC_STENCIL_LEG(UUU,Ym,skew,Impl::multLinkAdd);
|
||||
GENERIC_STENCIL_LEG(UUU,Zm,skew,Impl::multLinkAdd);
|
||||
GENERIC_STENCIL_LEG(UUU,Tm,skew,Impl::multLinkAdd);
|
||||
if ( dag ) {
|
||||
Uchi = - Uchi;
|
||||
}
|
||||
vstream(out._odata[sF], Uchi);
|
||||
}
|
||||
Impl::multLink(Uchi, U._odata[sU], *chi_p, Xp);
|
||||
|
||||
///////////////////////////
|
||||
// Yp
|
||||
///////////////////////////
|
||||
SE = st.GetEntry(ptype, Yp+skew, sF);
|
||||
if (SE->_is_local) {
|
||||
if (SE->_permute) {
|
||||
chi_p = χ
|
||||
permute(chi, in._odata[SE->_offset], ptype);
|
||||
} else {
|
||||
chi_p = &in._odata[SE->_offset];
|
||||
}
|
||||
} else {
|
||||
chi_p = &buf[SE->_offset];
|
||||
}
|
||||
Impl::multLinkAdd(Uchi, U._odata[sU], *chi_p, Yp);
|
||||
|
||||
///////////////////////////
|
||||
// Zp
|
||||
///////////////////////////
|
||||
SE = st.GetEntry(ptype, Zp+skew, sF);
|
||||
if (SE->_is_local) {
|
||||
if (SE->_permute) {
|
||||
chi_p = χ
|
||||
permute(chi, in._odata[SE->_offset], ptype);
|
||||
} else {
|
||||
chi_p = &in._odata[SE->_offset];
|
||||
}
|
||||
} else {
|
||||
chi_p = &buf[SE->_offset];
|
||||
}
|
||||
Impl::multLinkAdd(Uchi, U._odata[sU], *chi_p, Zp);
|
||||
|
||||
///////////////////////////
|
||||
// Tp
|
||||
///////////////////////////
|
||||
SE = st.GetEntry(ptype, Tp+skew, sF);
|
||||
if (SE->_is_local) {
|
||||
if (SE->_permute) {
|
||||
chi_p = χ
|
||||
permute(chi, in._odata[SE->_offset], ptype);
|
||||
} else {
|
||||
chi_p = &in._odata[SE->_offset];
|
||||
}
|
||||
} else {
|
||||
chi_p = &buf[SE->_offset];
|
||||
}
|
||||
Impl::multLinkAdd(Uchi, U._odata[sU], *chi_p, Tp);
|
||||
|
||||
///////////////////////////
|
||||
// Xm
|
||||
///////////////////////////
|
||||
SE = st.GetEntry(ptype, Xm+skew, sF);
|
||||
if (SE->_is_local) {
|
||||
if (SE->_permute) {
|
||||
chi_p = χ
|
||||
permute(chi, in._odata[SE->_offset], ptype);
|
||||
} else {
|
||||
chi_p = &in._odata[SE->_offset];
|
||||
}
|
||||
} else {
|
||||
chi_p = &buf[SE->_offset];
|
||||
}
|
||||
Impl::multLinkAdd(Uchi, U._odata[sU], *chi_p, Xm);
|
||||
|
||||
///////////////////////////
|
||||
// Ym
|
||||
///////////////////////////
|
||||
SE = st.GetEntry(ptype, Ym+skew, sF);
|
||||
if (SE->_is_local) {
|
||||
if (SE->_permute) {
|
||||
chi_p = χ
|
||||
permute(chi, in._odata[SE->_offset], ptype);
|
||||
} else {
|
||||
chi_p = &in._odata[SE->_offset];
|
||||
}
|
||||
} else {
|
||||
chi_p = &buf[SE->_offset];
|
||||
}
|
||||
Impl::multLinkAdd(Uchi, U._odata[sU], *chi_p, Ym);
|
||||
|
||||
///////////////////////////
|
||||
// Zm
|
||||
///////////////////////////
|
||||
SE = st.GetEntry(ptype, Zm+skew, sF);
|
||||
if (SE->_is_local) {
|
||||
if (SE->_permute) {
|
||||
chi_p = χ
|
||||
permute(chi, in._odata[SE->_offset], ptype);
|
||||
} else {
|
||||
chi_p = &in._odata[SE->_offset];
|
||||
}
|
||||
} else {
|
||||
chi_p = &buf[SE->_offset];
|
||||
}
|
||||
Impl::multLinkAdd(Uchi, U._odata[sU], *chi_p, Zm);
|
||||
|
||||
///////////////////////////
|
||||
// Tm
|
||||
///////////////////////////
|
||||
SE = st.GetEntry(ptype, Tm+skew, sF);
|
||||
if (SE->_is_local) {
|
||||
if (SE->_permute) {
|
||||
chi_p = χ
|
||||
permute(chi, in._odata[SE->_offset], ptype);
|
||||
} else {
|
||||
chi_p = &in._odata[SE->_offset];
|
||||
}
|
||||
} else {
|
||||
chi_p = &buf[SE->_offset];
|
||||
}
|
||||
Impl::multLinkAdd(Uchi, U._odata[sU], *chi_p, Tm);
|
||||
|
||||
vstream(out, Uchi);
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////
|
||||
// Only contributions from interior of our node
|
||||
///////////////////////////////////////////////////
|
||||
template <class Impl>
|
||||
void StaggeredKernels<Impl>::DhopSiteGenericInt(StencilImpl &st, LebesgueOrder &lo,
|
||||
DoubledGaugeField &U, DoubledGaugeField &UUU,
|
||||
SiteSpinor *buf, int LLs, int sU,
|
||||
const FermionField &in, FermionField &out,int dag) {
|
||||
const SiteSpinor *chi_p;
|
||||
SiteSpinor chi;
|
||||
SiteSpinor Uchi;
|
||||
StencilEntry *SE;
|
||||
int ptype;
|
||||
int skew ;
|
||||
|
||||
for(int s=0;s<LLs;s++){
|
||||
int sF=LLs*sU+s;
|
||||
skew = 0;
|
||||
Uchi=zero;
|
||||
GENERIC_STENCIL_LEG_INT(U,Xp,skew,Impl::multLinkAdd);
|
||||
GENERIC_STENCIL_LEG_INT(U,Yp,skew,Impl::multLinkAdd);
|
||||
GENERIC_STENCIL_LEG_INT(U,Zp,skew,Impl::multLinkAdd);
|
||||
GENERIC_STENCIL_LEG_INT(U,Tp,skew,Impl::multLinkAdd);
|
||||
GENERIC_STENCIL_LEG_INT(U,Xm,skew,Impl::multLinkAdd);
|
||||
GENERIC_STENCIL_LEG_INT(U,Ym,skew,Impl::multLinkAdd);
|
||||
GENERIC_STENCIL_LEG_INT(U,Zm,skew,Impl::multLinkAdd);
|
||||
GENERIC_STENCIL_LEG_INT(U,Tm,skew,Impl::multLinkAdd);
|
||||
skew=8;
|
||||
GENERIC_STENCIL_LEG_INT(UUU,Xp,skew,Impl::multLinkAdd);
|
||||
GENERIC_STENCIL_LEG_INT(UUU,Yp,skew,Impl::multLinkAdd);
|
||||
GENERIC_STENCIL_LEG_INT(UUU,Zp,skew,Impl::multLinkAdd);
|
||||
GENERIC_STENCIL_LEG_INT(UUU,Tp,skew,Impl::multLinkAdd);
|
||||
GENERIC_STENCIL_LEG_INT(UUU,Xm,skew,Impl::multLinkAdd);
|
||||
GENERIC_STENCIL_LEG_INT(UUU,Ym,skew,Impl::multLinkAdd);
|
||||
GENERIC_STENCIL_LEG_INT(UUU,Zm,skew,Impl::multLinkAdd);
|
||||
GENERIC_STENCIL_LEG_INT(UUU,Tm,skew,Impl::multLinkAdd);
|
||||
if ( dag ) {
|
||||
Uchi = - Uchi;
|
||||
}
|
||||
vstream(out._odata[sF], Uchi);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
///////////////////////////////////////////////////
|
||||
// Only contributions from exterior of our node
|
||||
///////////////////////////////////////////////////
|
||||
template <class Impl>
|
||||
void StaggeredKernels<Impl>::DhopSiteGenericExt(StencilImpl &st, LebesgueOrder &lo,
|
||||
DoubledGaugeField &U, DoubledGaugeField &UUU,
|
||||
SiteSpinor *buf, int LLs, int sU,
|
||||
const FermionField &in, FermionField &out,int dag) {
|
||||
const SiteSpinor *chi_p;
|
||||
SiteSpinor chi;
|
||||
SiteSpinor Uchi;
|
||||
StencilEntry *SE;
|
||||
int ptype;
|
||||
int nmu=0;
|
||||
int skew ;
|
||||
|
||||
for(int s=0;s<LLs;s++){
|
||||
int sF=LLs*sU+s;
|
||||
skew = 0;
|
||||
Uchi=zero;
|
||||
GENERIC_STENCIL_LEG_EXT(U,Xp,skew,Impl::multLinkAdd);
|
||||
GENERIC_STENCIL_LEG_EXT(U,Yp,skew,Impl::multLinkAdd);
|
||||
GENERIC_STENCIL_LEG_EXT(U,Zp,skew,Impl::multLinkAdd);
|
||||
GENERIC_STENCIL_LEG_EXT(U,Tp,skew,Impl::multLinkAdd);
|
||||
GENERIC_STENCIL_LEG_EXT(U,Xm,skew,Impl::multLinkAdd);
|
||||
GENERIC_STENCIL_LEG_EXT(U,Ym,skew,Impl::multLinkAdd);
|
||||
GENERIC_STENCIL_LEG_EXT(U,Zm,skew,Impl::multLinkAdd);
|
||||
GENERIC_STENCIL_LEG_EXT(U,Tm,skew,Impl::multLinkAdd);
|
||||
skew=8;
|
||||
GENERIC_STENCIL_LEG_EXT(UUU,Xp,skew,Impl::multLinkAdd);
|
||||
GENERIC_STENCIL_LEG_EXT(UUU,Yp,skew,Impl::multLinkAdd);
|
||||
GENERIC_STENCIL_LEG_EXT(UUU,Zp,skew,Impl::multLinkAdd);
|
||||
GENERIC_STENCIL_LEG_EXT(UUU,Tp,skew,Impl::multLinkAdd);
|
||||
GENERIC_STENCIL_LEG_EXT(UUU,Xm,skew,Impl::multLinkAdd);
|
||||
GENERIC_STENCIL_LEG_EXT(UUU,Ym,skew,Impl::multLinkAdd);
|
||||
GENERIC_STENCIL_LEG_EXT(UUU,Zm,skew,Impl::multLinkAdd);
|
||||
GENERIC_STENCIL_LEG_EXT(UUU,Tm,skew,Impl::multLinkAdd);
|
||||
|
||||
if ( nmu ) {
|
||||
if ( dag ) {
|
||||
out._odata[sF] = out._odata[sF] - Uchi;
|
||||
} else {
|
||||
out._odata[sF] = out._odata[sF] + Uchi;
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////
|
||||
// Driving / wrapping routine to select right kernel
|
||||
////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
template <class Impl>
|
||||
void StaggeredKernels<Impl>::DhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, DoubledGaugeField &UUU,
|
||||
SiteSpinor *buf, int LLs, int sU,
|
||||
const FermionField &in, FermionField &out) {
|
||||
SiteSpinor naik;
|
||||
SiteSpinor naive;
|
||||
int oneLink =0;
|
||||
int threeLink=1;
|
||||
SiteSpinor *buf, int LLs, int sU,
|
||||
const FermionField &in, FermionField &out,
|
||||
int interior,int exterior)
|
||||
{
|
||||
int dag=1;
|
||||
switch(Opt) {
|
||||
#ifdef AVX512
|
||||
//FIXME; move the sign into the Asm routine
|
||||
case OptInlineAsm:
|
||||
DhopSiteAsm(st,lo,U,UUU,buf,LLs,sU,in,out);
|
||||
for(int s=0;s<LLs;s++) {
|
||||
int sF=s+LLs*sU;
|
||||
out._odata[sF]=-out._odata[sF];
|
||||
}
|
||||
break;
|
||||
#endif
|
||||
case OptHandUnroll:
|
||||
DhopSiteHand(st,lo,U,UUU,buf,LLs,sU,in,out,dag);
|
||||
break;
|
||||
case OptGeneric:
|
||||
for(int s=0;s<LLs;s++){
|
||||
int sF=s+LLs*sU;
|
||||
DhopSiteDepth(st,lo,U,buf,sF,sU,in,naive,oneLink);
|
||||
DhopSiteDepth(st,lo,UUU,buf,sF,sU,in,naik,threeLink);
|
||||
out._odata[sF] =-naive-naik;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
std::cout<<"Oops Opt = "<<Opt<<std::endl;
|
||||
assert(0);
|
||||
break;
|
||||
}
|
||||
DhopSite(st,lo,U,UUU,buf,LLs,sU,in,out,dag,interior,exterior);
|
||||
};
|
||||
|
||||
template <class Impl>
|
||||
void StaggeredKernels<Impl>::DhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, DoubledGaugeField &UUU,
|
||||
SiteSpinor *buf, int LLs, int sU,
|
||||
const FermionField &in, FermionField &out,
|
||||
int interior,int exterior)
|
||||
{
|
||||
int dag=0;
|
||||
DhopSite(st,lo,U,UUU,buf,LLs,sU,in,out,dag,interior,exterior);
|
||||
};
|
||||
|
||||
template <class Impl>
|
||||
void StaggeredKernels<Impl>::DhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, DoubledGaugeField &UUU,
|
||||
SiteSpinor *buf, int LLs,
|
||||
int sU, const FermionField &in, FermionField &out)
|
||||
int sU, const FermionField &in, FermionField &out,
|
||||
int dag,int interior,int exterior)
|
||||
{
|
||||
int oneLink =0;
|
||||
int threeLink=1;
|
||||
SiteSpinor naik;
|
||||
SiteSpinor naive;
|
||||
int dag=0;
|
||||
switch(Opt) {
|
||||
#ifdef AVX512
|
||||
case OptInlineAsm:
|
||||
DhopSiteAsm(st,lo,U,UUU,buf,LLs,sU,in,out);
|
||||
if ( interior && exterior ) {
|
||||
DhopSiteAsm(st,lo,U,UUU,buf,LLs,sU,in,out,dag);
|
||||
} else {
|
||||
std::cout << GridLogError << "Cannot overlap comms and compute with Staggered assembly"<<std::endl;
|
||||
assert(0);
|
||||
}
|
||||
break;
|
||||
#endif
|
||||
case OptHandUnroll:
|
||||
DhopSiteHand(st,lo,U,UUU,buf,LLs,sU,in,out,dag);
|
||||
if ( interior && exterior ) {
|
||||
DhopSiteHand (st,lo,U,UUU,buf,LLs,sU,in,out,dag);
|
||||
} else if ( interior ) {
|
||||
DhopSiteHandInt(st,lo,U,UUU,buf,LLs,sU,in,out,dag);
|
||||
} else if ( exterior ) {
|
||||
DhopSiteHandExt(st,lo,U,UUU,buf,LLs,sU,in,out,dag);
|
||||
}
|
||||
break;
|
||||
case OptGeneric:
|
||||
for(int s=0;s<LLs;s++){
|
||||
int sF=LLs*sU+s;
|
||||
// assert(sF<in._odata.size());
|
||||
// assert(sU< U._odata.size());
|
||||
// assert(sF>=0); assert(sU>=0);
|
||||
DhopSiteDepth(st,lo,U,buf,sF,sU,in,naive,oneLink);
|
||||
DhopSiteDepth(st,lo,UUU,buf,sF,sU,in,naik,threeLink);
|
||||
out._odata[sF] =naive+naik;
|
||||
if ( interior && exterior ) {
|
||||
DhopSiteGeneric (st,lo,U,UUU,buf,LLs,sU,in,out,dag);
|
||||
} else if ( interior ) {
|
||||
DhopSiteGenericInt(st,lo,U,UUU,buf,LLs,sU,in,out,dag);
|
||||
} else if ( exterior ) {
|
||||
DhopSiteGenericExt(st,lo,U,UUU,buf,LLs,sU,in,out,dag);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
|
@ -38,8 +38,9 @@ namespace QCD {
|
||||
class StaggeredKernelsStatic {
|
||||
public:
|
||||
enum { OptGeneric, OptHandUnroll, OptInlineAsm };
|
||||
// S-direction is INNERMOST and takes no part in the parity.
|
||||
static int Opt; // these are a temporary hack
|
||||
enum { CommsAndCompute, CommsThenCompute };
|
||||
static int Opt;
|
||||
static int Comms;
|
||||
};
|
||||
|
||||
template<class Impl> class StaggeredKernels : public FermionOperator<Impl> , public StaggeredKernelsStatic {
|
||||
@ -53,24 +54,62 @@ public:
|
||||
void DhopDir(StencilImpl &st, DoubledGaugeField &U, DoubledGaugeField &UUU, SiteSpinor * buf,
|
||||
int sF, int sU, const FermionField &in, FermionField &out, int dir,int disp);
|
||||
|
||||
void DhopSiteDepth(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteSpinor * buf,
|
||||
int sF, int sU, const FermionField &in, SiteSpinor &out,int threeLink);
|
||||
///////////////////////////////////////////////////////////////////////////////////////
|
||||
// Generic Nc kernels
|
||||
///////////////////////////////////////////////////////////////////////////////////////
|
||||
void DhopSiteGeneric(StencilImpl &st, LebesgueOrder &lo,
|
||||
DoubledGaugeField &U, DoubledGaugeField &UUU,
|
||||
SiteSpinor * buf, int LLs, int sU,
|
||||
const FermionField &in, FermionField &out,int dag);
|
||||
void DhopSiteGenericInt(StencilImpl &st, LebesgueOrder &lo,
|
||||
DoubledGaugeField &U, DoubledGaugeField &UUU,
|
||||
SiteSpinor * buf, int LLs, int sU,
|
||||
const FermionField &in, FermionField &out,int dag);
|
||||
void DhopSiteGenericExt(StencilImpl &st, LebesgueOrder &lo,
|
||||
DoubledGaugeField &U, DoubledGaugeField &UUU,
|
||||
SiteSpinor * buf, int LLs, int sU,
|
||||
const FermionField &in, FermionField &out,int dag);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////
|
||||
// Nc=3 specific kernels
|
||||
///////////////////////////////////////////////////////////////////////////////////////
|
||||
void DhopSiteHand(StencilImpl &st, LebesgueOrder &lo,
|
||||
DoubledGaugeField &U,DoubledGaugeField &UUU,
|
||||
SiteSpinor * buf, int LLs, int sU,
|
||||
const FermionField &in, FermionField &out,int dag);
|
||||
void DhopSiteHandInt(StencilImpl &st, LebesgueOrder &lo,
|
||||
DoubledGaugeField &U,DoubledGaugeField &UUU,
|
||||
SiteSpinor * buf, int LLs, int sU,
|
||||
const FermionField &in, FermionField &out,int dag);
|
||||
void DhopSiteHandExt(StencilImpl &st, LebesgueOrder &lo,
|
||||
DoubledGaugeField &U,DoubledGaugeField &UUU,
|
||||
SiteSpinor * buf, int LLs, int sU,
|
||||
const FermionField &in, FermionField &out,int dag);
|
||||
|
||||
void DhopSiteDepthHand(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteSpinor * buf,
|
||||
int sF, int sU, const FermionField &in, SiteSpinor&out,int threeLink);
|
||||
///////////////////////////////////////////////////////////////////////////////////////
|
||||
// Asm Nc=3 specific kernels
|
||||
///////////////////////////////////////////////////////////////////////////////////////
|
||||
void DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo,
|
||||
DoubledGaugeField &U,DoubledGaugeField &UUU,
|
||||
SiteSpinor * buf, int LLs, int sU,
|
||||
const FermionField &in, FermionField &out,int dag);
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Generic interface; fan out to right routine
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
void DhopSite(StencilImpl &st, LebesgueOrder &lo,
|
||||
DoubledGaugeField &U, DoubledGaugeField &UUU,
|
||||
SiteSpinor * buf, int LLs, int sU,
|
||||
const FermionField &in, FermionField &out, int interior=1,int exterior=1);
|
||||
|
||||
void DhopSiteHand(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, DoubledGaugeField &UUU,SiteSpinor * buf,
|
||||
int LLs, int sU, const FermionField &in, FermionField &out, int dag);
|
||||
void DhopSiteDag(StencilImpl &st, LebesgueOrder &lo,
|
||||
DoubledGaugeField &U, DoubledGaugeField &UUU,
|
||||
SiteSpinor * buf, int LLs, int sU,
|
||||
const FermionField &in, FermionField &out, int interior=1,int exterior=1);
|
||||
|
||||
void DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,DoubledGaugeField &UUU, SiteSpinor * buf,
|
||||
int LLs, int sU, const FermionField &in, FermionField &out);
|
||||
|
||||
void DhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, DoubledGaugeField &UUU, SiteSpinor * buf,
|
||||
int sF, int sU, const FermionField &in, FermionField &out);
|
||||
|
||||
void DhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, DoubledGaugeField &UUU, SiteSpinor *buf,
|
||||
int LLs, int sU, const FermionField &in, FermionField &out);
|
||||
void DhopSite(StencilImpl &st, LebesgueOrder &lo,
|
||||
DoubledGaugeField &U, DoubledGaugeField &UUU,
|
||||
SiteSpinor * buf, int LLs, int sU,
|
||||
const FermionField &in, FermionField &out, int dag, int interior,int exterior);
|
||||
|
||||
public:
|
||||
|
||||
|
@ -560,16 +560,53 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
VSTORE(2,%0,pUChi_02) \
|
||||
: : "r" (out) : "memory" );
|
||||
|
||||
#define nREDUCE(out) \
|
||||
asm ( \
|
||||
VADD(UChi_00,UChi_10,UChi_00) \
|
||||
VADD(UChi_01,UChi_11,UChi_01) \
|
||||
VADD(UChi_02,UChi_12,UChi_02) \
|
||||
VADD(UChi_30,UChi_20,UChi_30) \
|
||||
VADD(UChi_31,UChi_21,UChi_31) \
|
||||
VADD(UChi_32,UChi_22,UChi_32) \
|
||||
VADD(UChi_00,UChi_30,UChi_00) \
|
||||
VADD(UChi_01,UChi_31,UChi_01) \
|
||||
VADD(UChi_02,UChi_32,UChi_02) ); \
|
||||
asm (VZERO(Chi_00) \
|
||||
VSUB(UChi_00,Chi_00,UChi_00) \
|
||||
VSUB(UChi_01,Chi_00,UChi_01) \
|
||||
VSUB(UChi_02,Chi_00,UChi_02) ); \
|
||||
asm ( \
|
||||
VSTORE(0,%0,pUChi_00) \
|
||||
VSTORE(1,%0,pUChi_01) \
|
||||
VSTORE(2,%0,pUChi_02) \
|
||||
: : "r" (out) : "memory" );
|
||||
|
||||
#define REDUCEa(out) \
|
||||
asm ( \
|
||||
VADD(UChi_00,UChi_10,UChi_00) \
|
||||
VADD(UChi_01,UChi_11,UChi_01) \
|
||||
VADD(UChi_02,UChi_12,UChi_02) ); \
|
||||
asm ( \
|
||||
VSTORE(0,%0,pUChi_00) \
|
||||
VSTORE(1,%0,pUChi_01) \
|
||||
VSTORE(2,%0,pUChi_02) \
|
||||
: : "r" (out) : "memory" );
|
||||
|
||||
// FIXME is sign right in the VSUB ?
|
||||
#define nREDUCEa(out) \
|
||||
asm ( \
|
||||
VSTORE(0,%0,pUChi_00) \
|
||||
VSTORE(1,%0,pUChi_01) \
|
||||
VSTORE(2,%0,pUChi_02) \
|
||||
: : "r" (out) : "memory" );
|
||||
VADD(UChi_00,UChi_10,UChi_00) \
|
||||
VADD(UChi_01,UChi_11,UChi_01) \
|
||||
VADD(UChi_02,UChi_12,UChi_02) ); \
|
||||
asm (VZERO(Chi_00) \
|
||||
VSUB(UChi_00,Chi_00,UChi_00) \
|
||||
VSUB(UChi_01,Chi_00,UChi_01) \
|
||||
VSUB(UChi_02,Chi_00,UChi_02) ); \
|
||||
asm ( \
|
||||
VSTORE(0,%0,pUChi_00) \
|
||||
VSTORE(1,%0,pUChi_01) \
|
||||
VSTORE(2,%0,pUChi_02) \
|
||||
: : "r" (out) : "memory" );
|
||||
|
||||
#define PERMUTE_DIR(dir) \
|
||||
permute##dir(Chi_0,Chi_0);\
|
||||
@ -581,10 +618,9 @@ namespace QCD {
|
||||
|
||||
template <class Impl>
|
||||
void StaggeredKernels<Impl>::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo,
|
||||
DoubledGaugeField &U,
|
||||
DoubledGaugeField &UUU,
|
||||
SiteSpinor *buf, int LLs,
|
||||
int sU, const FermionField &in, FermionField &out)
|
||||
DoubledGaugeField &U, DoubledGaugeField &UUU,
|
||||
SiteSpinor *buf, int LLs, int sU,
|
||||
const FermionField &in, FermionField &out,int dag)
|
||||
{
|
||||
assert(0);
|
||||
};
|
||||
@ -645,10 +681,9 @@ void StaggeredKernels<Impl>::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo,
|
||||
// This is the single precision 5th direction vectorised kernel
|
||||
#include <simd/Intel512single.h>
|
||||
template <> void StaggeredKernels<StaggeredVec5dImplF>::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo,
|
||||
DoubledGaugeField &U,
|
||||
DoubledGaugeField &UUU,
|
||||
SiteSpinor *buf, int LLs,
|
||||
int sU, const FermionField &in, FermionField &out)
|
||||
DoubledGaugeField &U, DoubledGaugeField &UUU,
|
||||
SiteSpinor *buf, int LLs, int sU,
|
||||
const FermionField &in, FermionField &out,int dag)
|
||||
{
|
||||
#ifdef AVX512
|
||||
uint64_t gauge0,gauge1,gauge2,gauge3;
|
||||
@ -685,7 +720,11 @@ template <> void StaggeredKernels<StaggeredVec5dImplF>::DhopSiteAsm(StencilImpl
|
||||
MULT_ADD_LS(gauge0,gauge1,gauge2,gauge3);
|
||||
|
||||
addr0 = (uint64_t) &out._odata[sF];
|
||||
REDUCE(addr0);
|
||||
if ( dag ) {
|
||||
nREDUCE(addr0);
|
||||
} else {
|
||||
REDUCE(addr0);
|
||||
}
|
||||
}
|
||||
#else
|
||||
assert(0);
|
||||
@ -695,10 +734,9 @@ template <> void StaggeredKernels<StaggeredVec5dImplF>::DhopSiteAsm(StencilImpl
|
||||
|
||||
#include <simd/Intel512double.h>
|
||||
template <> void StaggeredKernels<StaggeredVec5dImplD>::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo,
|
||||
DoubledGaugeField &U,
|
||||
DoubledGaugeField &UUU,
|
||||
SiteSpinor *buf, int LLs,
|
||||
int sU, const FermionField &in, FermionField &out)
|
||||
DoubledGaugeField &U, DoubledGaugeField &UUU,
|
||||
SiteSpinor *buf, int LLs, int sU,
|
||||
const FermionField &in, FermionField &out,int dag)
|
||||
{
|
||||
#ifdef AVX512
|
||||
uint64_t gauge0,gauge1,gauge2,gauge3;
|
||||
@ -734,7 +772,11 @@ template <> void StaggeredKernels<StaggeredVec5dImplD>::DhopSiteAsm(StencilImpl
|
||||
MULT_ADD_LS(gauge0,gauge1,gauge2,gauge3);
|
||||
|
||||
addr0 = (uint64_t) &out._odata[sF];
|
||||
REDUCE(addr0);
|
||||
if ( dag ) {
|
||||
nREDUCE(addr0);
|
||||
} else {
|
||||
REDUCE(addr0);
|
||||
}
|
||||
}
|
||||
#else
|
||||
assert(0);
|
||||
@ -776,10 +818,9 @@ template <> void StaggeredKernels<StaggeredVec5dImplD>::DhopSiteAsm(StencilImpl
|
||||
|
||||
#include <simd/Intel512single.h>
|
||||
template <> void StaggeredKernels<StaggeredImplF>::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo,
|
||||
DoubledGaugeField &U,
|
||||
DoubledGaugeField &UUU,
|
||||
SiteSpinor *buf, int LLs,
|
||||
int sU, const FermionField &in, FermionField &out)
|
||||
DoubledGaugeField &U, DoubledGaugeField &UUU,
|
||||
SiteSpinor *buf, int LLs, int sU,
|
||||
const FermionField &in, FermionField &out,int dag)
|
||||
{
|
||||
#ifdef AVX512
|
||||
uint64_t gauge0,gauge1,gauge2,gauge3;
|
||||
@ -832,7 +873,11 @@ template <> void StaggeredKernels<StaggeredImplF>::DhopSiteAsm(StencilImpl &st,
|
||||
MULT_ADD_XYZT(gauge2,gauge3);
|
||||
|
||||
addr0 = (uint64_t) &out._odata[sF];
|
||||
REDUCEa(addr0);
|
||||
if ( dag ) {
|
||||
nREDUCEa(addr0);
|
||||
} else {
|
||||
REDUCEa(addr0);
|
||||
}
|
||||
}
|
||||
#else
|
||||
assert(0);
|
||||
@ -841,10 +886,9 @@ template <> void StaggeredKernels<StaggeredImplF>::DhopSiteAsm(StencilImpl &st,
|
||||
|
||||
#include <simd/Intel512double.h>
|
||||
template <> void StaggeredKernels<StaggeredImplD>::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo,
|
||||
DoubledGaugeField &U,
|
||||
DoubledGaugeField &UUU,
|
||||
SiteSpinor *buf, int LLs,
|
||||
int sU, const FermionField &in, FermionField &out)
|
||||
DoubledGaugeField &U, DoubledGaugeField &UUU,
|
||||
SiteSpinor *buf, int LLs, int sU,
|
||||
const FermionField &in, FermionField &out,int dag)
|
||||
{
|
||||
#ifdef AVX512
|
||||
uint64_t gauge0,gauge1,gauge2,gauge3;
|
||||
@ -897,7 +941,11 @@ template <> void StaggeredKernels<StaggeredImplD>::DhopSiteAsm(StencilImpl &st,
|
||||
MULT_ADD_XYZT(gauge2,gauge3);
|
||||
|
||||
addr0 = (uint64_t) &out._odata[sF];
|
||||
REDUCEa(addr0);
|
||||
if ( dag ) {
|
||||
nREDUCEa(addr0);
|
||||
} else {
|
||||
REDUCEa(addr0);
|
||||
}
|
||||
}
|
||||
#else
|
||||
assert(0);
|
||||
@ -909,7 +957,7 @@ template <> void StaggeredKernels<StaggeredImplD>::DhopSiteAsm(StencilImpl &st,
|
||||
DoubledGaugeField &U, \
|
||||
DoubledGaugeField &UUU, \
|
||||
SiteSpinor *buf, int LLs, \
|
||||
int sU, const FermionField &in, FermionField &out);
|
||||
int sU, const FermionField &in, FermionField &out,int dag);
|
||||
|
||||
KERNEL_INSTANTIATE(StaggeredKernels,DhopSiteAsm,StaggeredImplD);
|
||||
KERNEL_INSTANTIATE(StaggeredKernels,DhopSiteAsm,StaggeredImplF);
|
||||
|
@ -28,7 +28,6 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
/* END LEGAL */
|
||||
#include <Grid.h>
|
||||
|
||||
#define REGISTER
|
||||
|
||||
#define LOAD_CHI(b) \
|
||||
const SiteSpinor & ref (b[offset]); \
|
||||
@ -59,7 +58,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
UChi ## _1 += U_12*Chi_2;\
|
||||
UChi ## _2 += U_22*Chi_2;
|
||||
|
||||
#define MULT_ADD(A,UChi) \
|
||||
#define MULT_ADD(U,A,UChi) \
|
||||
auto & ref(U._odata[sU](A)); \
|
||||
Impl::loadLinkElement(U_00,ref()(0,0)); \
|
||||
Impl::loadLinkElement(U_10,ref()(1,0)); \
|
||||
@ -82,241 +81,319 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
|
||||
#define PERMUTE_DIR(dir) \
|
||||
permute##dir(Chi_0,Chi_0);\
|
||||
permute##dir(Chi_1,Chi_1);\
|
||||
permute##dir(Chi_2,Chi_2);
|
||||
permute##dir(Chi_0,Chi_0); \
|
||||
permute##dir(Chi_1,Chi_1); \
|
||||
permute##dir(Chi_2,Chi_2);
|
||||
|
||||
|
||||
#define HAND_STENCIL_LEG_BASE(Dir,Perm,skew) \
|
||||
SE=st.GetEntry(ptype,Dir+skew,sF); \
|
||||
offset = SE->_offset; \
|
||||
local = SE->_is_local; \
|
||||
perm = SE->_permute; \
|
||||
if ( local ) { \
|
||||
LOAD_CHI(in._odata); \
|
||||
if ( perm) { \
|
||||
PERMUTE_DIR(Perm); \
|
||||
} \
|
||||
} else { \
|
||||
LOAD_CHI(buf); \
|
||||
}
|
||||
|
||||
#define HAND_STENCIL_LEG_BEGIN(Dir,Perm,skew,even) \
|
||||
HAND_STENCIL_LEG_BASE(Dir,Perm,skew) \
|
||||
{ \
|
||||
MULT(Dir,even); \
|
||||
}
|
||||
|
||||
#define HAND_STENCIL_LEG(U,Dir,Perm,skew,even) \
|
||||
HAND_STENCIL_LEG_BASE(Dir,Perm,skew) \
|
||||
{ \
|
||||
MULT_ADD(U,Dir,even); \
|
||||
}
|
||||
|
||||
|
||||
|
||||
#define HAND_STENCIL_LEG_INT(U,Dir,Perm,skew,even) \
|
||||
SE=st.GetEntry(ptype,Dir+skew,sF); \
|
||||
offset = SE->_offset; \
|
||||
local = SE->_is_local; \
|
||||
perm = SE->_permute; \
|
||||
if ( local ) { \
|
||||
LOAD_CHI(in._odata); \
|
||||
if ( perm) { \
|
||||
PERMUTE_DIR(Perm); \
|
||||
} \
|
||||
} else if ( st.same_node[Dir] ) { \
|
||||
LOAD_CHI(buf); \
|
||||
} \
|
||||
if (SE->_is_local || st.same_node[Dir] ) { \
|
||||
MULT_ADD(U,Dir,even); \
|
||||
}
|
||||
|
||||
#define HAND_STENCIL_LEG_EXT(U,Dir,Perm,skew,even) \
|
||||
SE=st.GetEntry(ptype,Dir+skew,sF); \
|
||||
offset = SE->_offset; \
|
||||
local = SE->_is_local; \
|
||||
perm = SE->_permute; \
|
||||
if ((!SE->_is_local) && (!st.same_node[Dir]) ) { \
|
||||
nmu++; \
|
||||
{ LOAD_CHI(buf); } \
|
||||
{ MULT_ADD(U,Dir,even); } \
|
||||
}
|
||||
|
||||
namespace Grid {
|
||||
namespace QCD {
|
||||
|
||||
|
||||
template <class Impl>
|
||||
void StaggeredKernels<Impl>::DhopSiteHand(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,DoubledGaugeField &UUU,
|
||||
SiteSpinor *buf, int LLs,
|
||||
int sU, const FermionField &in, FermionField &out, int dag)
|
||||
{
|
||||
SiteSpinor naik;
|
||||
SiteSpinor naive;
|
||||
int oneLink =0;
|
||||
int threeLink=1;
|
||||
int skew(0);
|
||||
Real scale(1.0);
|
||||
|
||||
if(dag) scale = -1.0;
|
||||
|
||||
for(int s=0;s<LLs;s++){
|
||||
int sF=s+LLs*sU;
|
||||
DhopSiteDepthHand(st,lo,U,buf,sF,sU,in,naive,oneLink);
|
||||
DhopSiteDepthHand(st,lo,UUU,buf,sF,sU,in,naik,threeLink);
|
||||
out._odata[sF] =scale*(naive+naik);
|
||||
}
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void StaggeredKernels<Impl>::DhopSiteDepthHand(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
|
||||
SiteSpinor *buf, int sF,
|
||||
int sU, const FermionField &in, SiteSpinor &out,int threeLink)
|
||||
void StaggeredKernels<Impl>::DhopSiteHand(StencilImpl &st, LebesgueOrder &lo,
|
||||
DoubledGaugeField &U,DoubledGaugeField &UUU,
|
||||
SiteSpinor *buf, int LLs, int sU,
|
||||
const FermionField &in, FermionField &out,int dag)
|
||||
{
|
||||
typedef typename Simd::scalar_type S;
|
||||
typedef typename Simd::vector_type V;
|
||||
|
||||
REGISTER Simd even_0; // 12 regs on knc
|
||||
REGISTER Simd even_1;
|
||||
REGISTER Simd even_2;
|
||||
REGISTER Simd odd_0; // 12 regs on knc
|
||||
REGISTER Simd odd_1;
|
||||
REGISTER Simd odd_2;
|
||||
Simd even_0; // 12 regs on knc
|
||||
Simd even_1;
|
||||
Simd even_2;
|
||||
Simd odd_0; // 12 regs on knc
|
||||
Simd odd_1;
|
||||
Simd odd_2;
|
||||
|
||||
REGISTER Simd Chi_0; // two spinor; 6 regs
|
||||
REGISTER Simd Chi_1;
|
||||
REGISTER Simd Chi_2;
|
||||
|
||||
REGISTER Simd U_00; // two rows of U matrix
|
||||
REGISTER Simd U_10;
|
||||
REGISTER Simd U_20;
|
||||
REGISTER Simd U_01;
|
||||
REGISTER Simd U_11;
|
||||
REGISTER Simd U_21; // 2 reg left.
|
||||
REGISTER Simd U_02;
|
||||
REGISTER Simd U_12;
|
||||
REGISTER Simd U_22;
|
||||
|
||||
int skew = 0;
|
||||
if (threeLink) skew=8;
|
||||
Simd Chi_0; // two spinor; 6 regs
|
||||
Simd Chi_1;
|
||||
Simd Chi_2;
|
||||
|
||||
Simd U_00; // two rows of U matrix
|
||||
Simd U_10;
|
||||
Simd U_20;
|
||||
Simd U_01;
|
||||
Simd U_11;
|
||||
Simd U_21; // 2 reg left.
|
||||
Simd U_02;
|
||||
Simd U_12;
|
||||
Simd U_22;
|
||||
|
||||
SiteSpinor result;
|
||||
int offset,local,perm, ptype;
|
||||
|
||||
StencilEntry *SE;
|
||||
int skew;
|
||||
|
||||
// Xp
|
||||
SE=st.GetEntry(ptype,Xp+skew,sF);
|
||||
offset = SE->_offset;
|
||||
local = SE->_is_local;
|
||||
perm = SE->_permute;
|
||||
|
||||
if ( local ) {
|
||||
LOAD_CHI(in._odata);
|
||||
if ( perm) {
|
||||
PERMUTE_DIR(3); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
||||
}
|
||||
} else {
|
||||
LOAD_CHI(buf);
|
||||
}
|
||||
{
|
||||
MULT(Xp,even);
|
||||
}
|
||||
|
||||
// Yp
|
||||
SE=st.GetEntry(ptype,Yp+skew,sF);
|
||||
offset = SE->_offset;
|
||||
local = SE->_is_local;
|
||||
perm = SE->_permute;
|
||||
|
||||
if ( local ) {
|
||||
LOAD_CHI(in._odata);
|
||||
if ( perm) {
|
||||
PERMUTE_DIR(2); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
||||
}
|
||||
} else {
|
||||
LOAD_CHI(buf);
|
||||
}
|
||||
{
|
||||
MULT(Yp,odd);
|
||||
}
|
||||
for(int s=0;s<LLs;s++){
|
||||
int sF=s+LLs*sU;
|
||||
|
||||
|
||||
// Zp
|
||||
SE=st.GetEntry(ptype,Zp+skew,sF);
|
||||
offset = SE->_offset;
|
||||
local = SE->_is_local;
|
||||
perm = SE->_permute;
|
||||
|
||||
if ( local ) {
|
||||
LOAD_CHI(in._odata);
|
||||
if ( perm) {
|
||||
PERMUTE_DIR(1); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
||||
skew = 0;
|
||||
HAND_STENCIL_LEG_BEGIN(Xp,3,skew,even);
|
||||
HAND_STENCIL_LEG_BEGIN(Yp,2,skew,odd);
|
||||
HAND_STENCIL_LEG (U,Zp,1,skew,even);
|
||||
HAND_STENCIL_LEG (U,Tp,0,skew,odd);
|
||||
HAND_STENCIL_LEG (U,Xm,3,skew,even);
|
||||
HAND_STENCIL_LEG (U,Ym,2,skew,odd);
|
||||
HAND_STENCIL_LEG (U,Zm,1,skew,even);
|
||||
HAND_STENCIL_LEG (U,Tm,0,skew,odd);
|
||||
skew = 8;
|
||||
HAND_STENCIL_LEG(UUU,Xp,3,skew,even);
|
||||
HAND_STENCIL_LEG(UUU,Yp,2,skew,odd);
|
||||
HAND_STENCIL_LEG(UUU,Zp,1,skew,even);
|
||||
HAND_STENCIL_LEG(UUU,Tp,0,skew,odd);
|
||||
HAND_STENCIL_LEG(UUU,Xm,3,skew,even);
|
||||
HAND_STENCIL_LEG(UUU,Ym,2,skew,odd);
|
||||
HAND_STENCIL_LEG(UUU,Zm,1,skew,even);
|
||||
HAND_STENCIL_LEG(UUU,Tm,0,skew,odd);
|
||||
|
||||
if ( dag ) {
|
||||
result()()(0) = - even_0 - odd_0;
|
||||
result()()(1) = - even_1 - odd_1;
|
||||
result()()(2) = - even_2 - odd_2;
|
||||
} else {
|
||||
result()()(0) = even_0 + odd_0;
|
||||
result()()(1) = even_1 + odd_1;
|
||||
result()()(2) = even_2 + odd_2;
|
||||
}
|
||||
} else {
|
||||
LOAD_CHI(buf);
|
||||
vstream(out._odata[sF],result);
|
||||
}
|
||||
{
|
||||
MULT_ADD(Zp,even);
|
||||
}
|
||||
|
||||
// Tp
|
||||
SE=st.GetEntry(ptype,Tp+skew,sF);
|
||||
offset = SE->_offset;
|
||||
local = SE->_is_local;
|
||||
perm = SE->_permute;
|
||||
|
||||
if ( local ) {
|
||||
LOAD_CHI(in._odata);
|
||||
if ( perm) {
|
||||
PERMUTE_DIR(0); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
||||
}
|
||||
} else {
|
||||
LOAD_CHI(buf);
|
||||
}
|
||||
{
|
||||
MULT_ADD(Tp,odd);
|
||||
}
|
||||
|
||||
// Xm
|
||||
SE=st.GetEntry(ptype,Xm+skew,sF);
|
||||
offset = SE->_offset;
|
||||
local = SE->_is_local;
|
||||
perm = SE->_permute;
|
||||
|
||||
if ( local ) {
|
||||
LOAD_CHI(in._odata);
|
||||
if ( perm) {
|
||||
PERMUTE_DIR(3); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
||||
}
|
||||
} else {
|
||||
LOAD_CHI(buf);
|
||||
}
|
||||
{
|
||||
MULT_ADD(Xm,even);
|
||||
}
|
||||
|
||||
|
||||
// Ym
|
||||
SE=st.GetEntry(ptype,Ym+skew,sF);
|
||||
offset = SE->_offset;
|
||||
local = SE->_is_local;
|
||||
perm = SE->_permute;
|
||||
|
||||
if ( local ) {
|
||||
LOAD_CHI(in._odata);
|
||||
if ( perm) {
|
||||
PERMUTE_DIR(2); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
||||
}
|
||||
} else {
|
||||
LOAD_CHI(buf);
|
||||
}
|
||||
{
|
||||
MULT_ADD(Ym,odd);
|
||||
}
|
||||
|
||||
// Zm
|
||||
SE=st.GetEntry(ptype,Zm+skew,sF);
|
||||
offset = SE->_offset;
|
||||
local = SE->_is_local;
|
||||
perm = SE->_permute;
|
||||
|
||||
if ( local ) {
|
||||
LOAD_CHI(in._odata);
|
||||
if ( perm) {
|
||||
PERMUTE_DIR(1); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
||||
}
|
||||
} else {
|
||||
LOAD_CHI(buf);
|
||||
}
|
||||
{
|
||||
MULT_ADD(Zm,even);
|
||||
}
|
||||
|
||||
// Tm
|
||||
SE=st.GetEntry(ptype,Tm+skew,sF);
|
||||
offset = SE->_offset;
|
||||
local = SE->_is_local;
|
||||
perm = SE->_permute;
|
||||
|
||||
if ( local ) {
|
||||
LOAD_CHI(in._odata);
|
||||
if ( perm) {
|
||||
PERMUTE_DIR(0); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
||||
}
|
||||
} else {
|
||||
LOAD_CHI(buf);
|
||||
}
|
||||
{
|
||||
MULT_ADD(Tm,odd);
|
||||
}
|
||||
|
||||
vstream(out()()(0),even_0+odd_0);
|
||||
vstream(out()()(1),even_1+odd_1);
|
||||
vstream(out()()(2),even_2+odd_2);
|
||||
|
||||
}
|
||||
|
||||
|
||||
template <class Impl>
|
||||
void StaggeredKernels<Impl>::DhopSiteHandInt(StencilImpl &st, LebesgueOrder &lo,
|
||||
DoubledGaugeField &U, DoubledGaugeField &UUU,
|
||||
SiteSpinor *buf, int LLs, int sU,
|
||||
const FermionField &in, FermionField &out,int dag)
|
||||
{
|
||||
typedef typename Simd::scalar_type S;
|
||||
typedef typename Simd::vector_type V;
|
||||
|
||||
Simd even_0; // 12 regs on knc
|
||||
Simd even_1;
|
||||
Simd even_2;
|
||||
Simd odd_0; // 12 regs on knc
|
||||
Simd odd_1;
|
||||
Simd odd_2;
|
||||
|
||||
Simd Chi_0; // two spinor; 6 regs
|
||||
Simd Chi_1;
|
||||
Simd Chi_2;
|
||||
|
||||
Simd U_00; // two rows of U matrix
|
||||
Simd U_10;
|
||||
Simd U_20;
|
||||
Simd U_01;
|
||||
Simd U_11;
|
||||
Simd U_21; // 2 reg left.
|
||||
Simd U_02;
|
||||
Simd U_12;
|
||||
Simd U_22;
|
||||
|
||||
SiteSpinor result;
|
||||
int offset,local,perm, ptype;
|
||||
|
||||
StencilEntry *SE;
|
||||
int skew;
|
||||
|
||||
for(int s=0;s<LLs;s++){
|
||||
int sF=s+LLs*sU;
|
||||
|
||||
even_0 = zero; even_1 = zero; even_2 = zero;
|
||||
odd_0 = zero; odd_1 = zero; odd_2 = zero;
|
||||
|
||||
skew = 0;
|
||||
HAND_STENCIL_LEG_INT(U,Xp,3,skew,even);
|
||||
HAND_STENCIL_LEG_INT(U,Yp,2,skew,odd);
|
||||
HAND_STENCIL_LEG_INT(U,Zp,1,skew,even);
|
||||
HAND_STENCIL_LEG_INT(U,Tp,0,skew,odd);
|
||||
HAND_STENCIL_LEG_INT(U,Xm,3,skew,even);
|
||||
HAND_STENCIL_LEG_INT(U,Ym,2,skew,odd);
|
||||
HAND_STENCIL_LEG_INT(U,Zm,1,skew,even);
|
||||
HAND_STENCIL_LEG_INT(U,Tm,0,skew,odd);
|
||||
skew = 8;
|
||||
HAND_STENCIL_LEG_INT(UUU,Xp,3,skew,even);
|
||||
HAND_STENCIL_LEG_INT(UUU,Yp,2,skew,odd);
|
||||
HAND_STENCIL_LEG_INT(UUU,Zp,1,skew,even);
|
||||
HAND_STENCIL_LEG_INT(UUU,Tp,0,skew,odd);
|
||||
HAND_STENCIL_LEG_INT(UUU,Xm,3,skew,even);
|
||||
HAND_STENCIL_LEG_INT(UUU,Ym,2,skew,odd);
|
||||
HAND_STENCIL_LEG_INT(UUU,Zm,1,skew,even);
|
||||
HAND_STENCIL_LEG_INT(UUU,Tm,0,skew,odd);
|
||||
|
||||
// Assume every site must be connected to at least one interior point. No 1^4 subvols.
|
||||
if ( dag ) {
|
||||
result()()(0) = - even_0 - odd_0;
|
||||
result()()(1) = - even_1 - odd_1;
|
||||
result()()(2) = - even_2 - odd_2;
|
||||
} else {
|
||||
result()()(0) = even_0 + odd_0;
|
||||
result()()(1) = even_1 + odd_1;
|
||||
result()()(2) = even_2 + odd_2;
|
||||
}
|
||||
vstream(out._odata[sF],result);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template <class Impl>
|
||||
void StaggeredKernels<Impl>::DhopSiteHandExt(StencilImpl &st, LebesgueOrder &lo,
|
||||
DoubledGaugeField &U, DoubledGaugeField &UUU,
|
||||
SiteSpinor *buf, int LLs, int sU,
|
||||
const FermionField &in, FermionField &out,int dag)
|
||||
{
|
||||
typedef typename Simd::scalar_type S;
|
||||
typedef typename Simd::vector_type V;
|
||||
|
||||
Simd even_0; // 12 regs on knc
|
||||
Simd even_1;
|
||||
Simd even_2;
|
||||
Simd odd_0; // 12 regs on knc
|
||||
Simd odd_1;
|
||||
Simd odd_2;
|
||||
|
||||
Simd Chi_0; // two spinor; 6 regs
|
||||
Simd Chi_1;
|
||||
Simd Chi_2;
|
||||
|
||||
Simd U_00; // two rows of U matrix
|
||||
Simd U_10;
|
||||
Simd U_20;
|
||||
Simd U_01;
|
||||
Simd U_11;
|
||||
Simd U_21; // 2 reg left.
|
||||
Simd U_02;
|
||||
Simd U_12;
|
||||
Simd U_22;
|
||||
|
||||
SiteSpinor result;
|
||||
int offset,local,perm, ptype;
|
||||
|
||||
StencilEntry *SE;
|
||||
int skew;
|
||||
|
||||
for(int s=0;s<LLs;s++){
|
||||
int sF=s+LLs*sU;
|
||||
|
||||
even_0 = zero; even_1 = zero; even_2 = zero;
|
||||
odd_0 = zero; odd_1 = zero; odd_2 = zero;
|
||||
int nmu=0;
|
||||
skew = 0;
|
||||
HAND_STENCIL_LEG_EXT(U,Xp,3,skew,even);
|
||||
HAND_STENCIL_LEG_EXT(U,Yp,2,skew,odd);
|
||||
HAND_STENCIL_LEG_EXT(U,Zp,1,skew,even);
|
||||
HAND_STENCIL_LEG_EXT(U,Tp,0,skew,odd);
|
||||
HAND_STENCIL_LEG_EXT(U,Xm,3,skew,even);
|
||||
HAND_STENCIL_LEG_EXT(U,Ym,2,skew,odd);
|
||||
HAND_STENCIL_LEG_EXT(U,Zm,1,skew,even);
|
||||
HAND_STENCIL_LEG_EXT(U,Tm,0,skew,odd);
|
||||
skew = 8;
|
||||
HAND_STENCIL_LEG_EXT(UUU,Xp,3,skew,even);
|
||||
HAND_STENCIL_LEG_EXT(UUU,Yp,2,skew,odd);
|
||||
HAND_STENCIL_LEG_EXT(UUU,Zp,1,skew,even);
|
||||
HAND_STENCIL_LEG_EXT(UUU,Tp,0,skew,odd);
|
||||
HAND_STENCIL_LEG_EXT(UUU,Xm,3,skew,even);
|
||||
HAND_STENCIL_LEG_EXT(UUU,Ym,2,skew,odd);
|
||||
HAND_STENCIL_LEG_EXT(UUU,Zm,1,skew,even);
|
||||
HAND_STENCIL_LEG_EXT(UUU,Tm,0,skew,odd);
|
||||
|
||||
// Add sum of all exterior connected stencil legs
|
||||
if ( nmu ) {
|
||||
if ( dag ) {
|
||||
result()()(0) = - even_0 - odd_0;
|
||||
result()()(1) = - even_1 - odd_1;
|
||||
result()()(2) = - even_2 - odd_2;
|
||||
} else {
|
||||
result()()(0) = even_0 + odd_0;
|
||||
result()()(1) = even_1 + odd_1;
|
||||
result()()(2) = even_2 + odd_2;
|
||||
}
|
||||
out._odata[sF] = out._odata[sF] + result;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#define DHOP_SITE_HAND_INSTANTIATE(IMPL) \
|
||||
template void StaggeredKernels<IMPL>::DhopSiteHand(StencilImpl &st, LebesgueOrder &lo, \
|
||||
DoubledGaugeField &U,DoubledGaugeField &UUU, \
|
||||
SiteSpinor *buf, int LLs, \
|
||||
int sU, const FermionField &in, FermionField &out, int dag);
|
||||
SiteSpinor *buf, int LLs, int sU, \
|
||||
const FermionField &in, FermionField &out, int dag); \
|
||||
\
|
||||
template void StaggeredKernels<IMPL>::DhopSiteHandInt(StencilImpl &st, LebesgueOrder &lo, \
|
||||
DoubledGaugeField &U,DoubledGaugeField &UUU, \
|
||||
SiteSpinor *buf, int LLs, int sU, \
|
||||
const FermionField &in, FermionField &out, int dag); \
|
||||
\
|
||||
template void StaggeredKernels<IMPL>::DhopSiteHandExt(StencilImpl &st, LebesgueOrder &lo, \
|
||||
DoubledGaugeField &U,DoubledGaugeField &UUU, \
|
||||
SiteSpinor *buf, int LLs, int sU, \
|
||||
const FermionField &in, FermionField &out, int dag); \
|
||||
|
||||
#define DHOP_SITE_DEPTH_HAND_INSTANTIATE(IMPL) \
|
||||
template void StaggeredKernels<IMPL>::DhopSiteDepthHand(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, \
|
||||
SiteSpinor *buf, int sF, \
|
||||
int sU, const FermionField &in, SiteSpinor &out,int threeLink) ;
|
||||
DHOP_SITE_HAND_INSTANTIATE(StaggeredImplD);
|
||||
DHOP_SITE_HAND_INSTANTIATE(StaggeredImplF);
|
||||
DHOP_SITE_HAND_INSTANTIATE(StaggeredVec5dImplD);
|
||||
DHOP_SITE_HAND_INSTANTIATE(StaggeredVec5dImplF);
|
||||
|
||||
DHOP_SITE_DEPTH_HAND_INSTANTIATE(StaggeredImplD);
|
||||
DHOP_SITE_DEPTH_HAND_INSTANTIATE(StaggeredImplF);
|
||||
DHOP_SITE_DEPTH_HAND_INSTANTIATE(StaggeredVec5dImplD);
|
||||
DHOP_SITE_DEPTH_HAND_INSTANTIATE(StaggeredVec5dImplF);
|
||||
|
||||
}}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -274,41 +274,16 @@ public:
|
||||
if ( timer4 ) std::cout << GridLogMessage << " timer4 " <<timer4 <<std::endl;
|
||||
}
|
||||
|
||||
std::vector<int> same_node;
|
||||
std::vector<int> surface_list;
|
||||
|
||||
WilsonStencil(GridBase *grid,
|
||||
int npoints,
|
||||
int checkerboard,
|
||||
const std::vector<int> &directions,
|
||||
const std::vector<int> &distances)
|
||||
: CartesianStencil<vobj,cobj> (grid,npoints,checkerboard,directions,distances) ,
|
||||
same_node(npoints)
|
||||
: CartesianStencil<vobj,cobj> (grid,npoints,checkerboard,directions,distances)
|
||||
{
|
||||
ZeroCountersi();
|
||||
surface_list.resize(0);
|
||||
};
|
||||
|
||||
void BuildSurfaceList(int Ls,int vol4){
|
||||
|
||||
// find same node for SHM
|
||||
// Here we know the distance is 1 for WilsonStencil
|
||||
for(int point=0;point<this->_npoints;point++){
|
||||
same_node[point] = this->SameNode(point);
|
||||
}
|
||||
|
||||
for(int site = 0 ;site< vol4;site++){
|
||||
int local = 1;
|
||||
for(int point=0;point<this->_npoints;point++){
|
||||
if( (!this->GetNodeLocal(site*Ls,point)) && (!same_node[point]) ){
|
||||
local = 0;
|
||||
}
|
||||
}
|
||||
if(local == 0) {
|
||||
surface_list.push_back(site);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template < class compressor>
|
||||
void HaloExchangeOpt(const Lattice<vobj> &source,compressor &compress)
|
||||
@ -369,23 +344,23 @@ public:
|
||||
int dag = compress.dag;
|
||||
int face_idx=0;
|
||||
if ( dag ) {
|
||||
assert(same_node[Xp]==this->HaloGatherDir(source,XpCompress,Xp,face_idx));
|
||||
assert(same_node[Yp]==this->HaloGatherDir(source,YpCompress,Yp,face_idx));
|
||||
assert(same_node[Zp]==this->HaloGatherDir(source,ZpCompress,Zp,face_idx));
|
||||
assert(same_node[Tp]==this->HaloGatherDir(source,TpCompress,Tp,face_idx));
|
||||
assert(same_node[Xm]==this->HaloGatherDir(source,XmCompress,Xm,face_idx));
|
||||
assert(same_node[Ym]==this->HaloGatherDir(source,YmCompress,Ym,face_idx));
|
||||
assert(same_node[Zm]==this->HaloGatherDir(source,ZmCompress,Zm,face_idx));
|
||||
assert(same_node[Tm]==this->HaloGatherDir(source,TmCompress,Tm,face_idx));
|
||||
assert(this->same_node[Xp]==this->HaloGatherDir(source,XpCompress,Xp,face_idx));
|
||||
assert(this->same_node[Yp]==this->HaloGatherDir(source,YpCompress,Yp,face_idx));
|
||||
assert(this->same_node[Zp]==this->HaloGatherDir(source,ZpCompress,Zp,face_idx));
|
||||
assert(this->same_node[Tp]==this->HaloGatherDir(source,TpCompress,Tp,face_idx));
|
||||
assert(this->same_node[Xm]==this->HaloGatherDir(source,XmCompress,Xm,face_idx));
|
||||
assert(this->same_node[Ym]==this->HaloGatherDir(source,YmCompress,Ym,face_idx));
|
||||
assert(this->same_node[Zm]==this->HaloGatherDir(source,ZmCompress,Zm,face_idx));
|
||||
assert(this->same_node[Tm]==this->HaloGatherDir(source,TmCompress,Tm,face_idx));
|
||||
} else {
|
||||
assert(same_node[Xp]==this->HaloGatherDir(source,XmCompress,Xp,face_idx));
|
||||
assert(same_node[Yp]==this->HaloGatherDir(source,YmCompress,Yp,face_idx));
|
||||
assert(same_node[Zp]==this->HaloGatherDir(source,ZmCompress,Zp,face_idx));
|
||||
assert(same_node[Tp]==this->HaloGatherDir(source,TmCompress,Tp,face_idx));
|
||||
assert(same_node[Xm]==this->HaloGatherDir(source,XpCompress,Xm,face_idx));
|
||||
assert(same_node[Ym]==this->HaloGatherDir(source,YpCompress,Ym,face_idx));
|
||||
assert(same_node[Zm]==this->HaloGatherDir(source,ZpCompress,Zm,face_idx));
|
||||
assert(same_node[Tm]==this->HaloGatherDir(source,TpCompress,Tm,face_idx));
|
||||
assert(this->same_node[Xp]==this->HaloGatherDir(source,XmCompress,Xp,face_idx));
|
||||
assert(this->same_node[Yp]==this->HaloGatherDir(source,YmCompress,Yp,face_idx));
|
||||
assert(this->same_node[Zp]==this->HaloGatherDir(source,ZmCompress,Zp,face_idx));
|
||||
assert(this->same_node[Tp]==this->HaloGatherDir(source,TmCompress,Tp,face_idx));
|
||||
assert(this->same_node[Xm]==this->HaloGatherDir(source,XpCompress,Xm,face_idx));
|
||||
assert(this->same_node[Ym]==this->HaloGatherDir(source,YpCompress,Ym,face_idx));
|
||||
assert(this->same_node[Zm]==this->HaloGatherDir(source,ZpCompress,Zm,face_idx));
|
||||
assert(this->same_node[Tm]==this->HaloGatherDir(source,TpCompress,Tm,face_idx));
|
||||
}
|
||||
this->face_table_computed=1;
|
||||
assert(this->u_comm_offset==this->_unified_buffer_size);
|
||||
|
@ -162,7 +162,7 @@ void WilsonFermion<Impl>::MooeeInvDag(const FermionField &in, FermionField &out)
|
||||
MooeeInv(in,out);
|
||||
}
|
||||
template<class Impl>
|
||||
void WilsonFermion<Impl>::MomentumSpacePropagator(FermionField &out, const FermionField &in,RealD _m)
|
||||
void WilsonFermion<Impl>::MomentumSpacePropagator(FermionField &out, const FermionField &in,RealD _m,std::vector<double> twist)
|
||||
{
|
||||
typedef typename FermionField::vector_type vector_type;
|
||||
typedef typename FermionField::scalar_type ScalComplex;
|
||||
@ -195,6 +195,7 @@ void WilsonFermion<Impl>::MomentumSpacePropagator(FermionField &out, const Fermi
|
||||
RealD TwoPiL = M_PI * 2.0/ latt_size[mu];
|
||||
|
||||
kmu = TwoPiL * kmu;
|
||||
kmu = kmu + TwoPiL * one * twist[mu];//momentum for twisted boundary conditions
|
||||
|
||||
wilson = wilson + 2.0*sin(kmu*0.5)*sin(kmu*0.5); // Wilson term
|
||||
|
||||
@ -348,15 +349,98 @@ void WilsonFermion<Impl>::DhopDirDisp(const FermionField &in, FermionField &out,
|
||||
parallel_for (int sss = 0; sss < in._grid->oSites(); sss++) {
|
||||
Kernels::DhopDir(Stencil, Umu, Stencil.CommBuf(), sss, sss, in, out, dirdisp, gamma);
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
/*Change starts*/
|
||||
template <class Impl>
|
||||
void WilsonFermion<Impl>::DhopInternal(StencilImpl &st, LebesgueOrder &lo,
|
||||
DoubledGaugeField &U,
|
||||
const FermionField &in,
|
||||
FermionField &out, int dag) {
|
||||
assert((dag == DaggerNo) || (dag == DaggerYes));
|
||||
#ifdef GRID_OMP
|
||||
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute )
|
||||
DhopInternalOverlappedComms(st,lo,U,in,out,dag);
|
||||
else
|
||||
#endif
|
||||
DhopInternalSerial(st,lo,U,in,out,dag);
|
||||
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void WilsonFermion<Impl>::DhopInternalOverlappedComms(StencilImpl &st, LebesgueOrder &lo,
|
||||
DoubledGaugeField &U,
|
||||
const FermionField &in,
|
||||
FermionField &out, int dag) {
|
||||
assert((dag == DaggerNo) || (dag == DaggerYes));
|
||||
#ifdef GRID_OMP
|
||||
Compressor compressor;
|
||||
int len = U._grid->oSites();
|
||||
const int LLs = 1;
|
||||
|
||||
st.Prepare();
|
||||
st.HaloGather(in,compressor);
|
||||
st.CommsMergeSHM(compressor);
|
||||
#pragma omp parallel
|
||||
{
|
||||
int tid = omp_get_thread_num();
|
||||
int nthreads = omp_get_num_threads();
|
||||
int ncomms = CartesianCommunicator::nCommThreads;
|
||||
if (ncomms == -1) ncomms = 1;
|
||||
assert(nthreads > ncomms);
|
||||
if (tid >= ncomms) {
|
||||
nthreads -= ncomms;
|
||||
int ttid = tid - ncomms;
|
||||
int n = len;
|
||||
int chunk = n / nthreads;
|
||||
int rem = n % nthreads;
|
||||
int myblock, myn;
|
||||
if (ttid < rem) {
|
||||
myblock = ttid * chunk + ttid;
|
||||
myn = chunk+1;
|
||||
} else {
|
||||
myblock = ttid*chunk + rem;
|
||||
myn = chunk;
|
||||
}
|
||||
// do the compute
|
||||
if (dag == DaggerYes) {
|
||||
|
||||
for (int sss = myblock; sss < myblock+myn; ++sss) {
|
||||
Kernels::DhopSiteDag(st, lo, U, st.CommBuf(), sss, sss, 1, 1, in, out);
|
||||
}
|
||||
} else {
|
||||
for (int sss = myblock; sss < myblock+myn; ++sss) {
|
||||
Kernels::DhopSite(st, lo, U, st.CommBuf(), sss, sss, 1, 1, in, out);
|
||||
}
|
||||
} //else
|
||||
|
||||
} else {
|
||||
st.CommunicateThreaded();
|
||||
}
|
||||
|
||||
Compressor compressor(dag);
|
||||
|
||||
if (dag == DaggerYes) {
|
||||
parallel_for (int sss = 0; sss < in._grid->oSites(); sss++) {
|
||||
Kernels::DhopSiteDag(st, lo, U, st.CommBuf(), sss, sss, 1, 1, in, out);
|
||||
}
|
||||
} else {
|
||||
parallel_for (int sss = 0; sss < in._grid->oSites(); sss++) {
|
||||
Kernels::DhopSite(st, lo, U, st.CommBuf(), sss, sss, 1, 1, in, out);
|
||||
}
|
||||
}
|
||||
|
||||
} //pragma
|
||||
#else
|
||||
assert(0);
|
||||
#endif
|
||||
};
|
||||
|
||||
|
||||
template <class Impl>
|
||||
void WilsonFermion<Impl>::DhopInternalSerial(StencilImpl &st, LebesgueOrder &lo,
|
||||
DoubledGaugeField &U,
|
||||
const FermionField &in,
|
||||
FermionField &out, int dag) {
|
||||
assert((dag == DaggerNo) || (dag == DaggerYes));
|
||||
Compressor compressor(dag);
|
||||
st.HaloExchange(in, compressor);
|
||||
|
||||
@ -370,6 +454,7 @@ void WilsonFermion<Impl>::DhopInternal(StencilImpl &st, LebesgueOrder &lo,
|
||||
}
|
||||
}
|
||||
};
|
||||
/*Change ends */
|
||||
|
||||
/*******************************************************************************
|
||||
* Conserved current utilities for Wilson fermions, for contracting propagators
|
||||
|
@ -96,7 +96,7 @@ class WilsonFermion : public WilsonKernels<Impl>, public WilsonFermionStatic {
|
||||
virtual void MooeeInv(const FermionField &in, FermionField &out);
|
||||
virtual void MooeeInvDag(const FermionField &in, FermionField &out);
|
||||
|
||||
virtual void MomentumSpacePropagator(FermionField &out,const FermionField &in,RealD _mass) ;
|
||||
virtual void MomentumSpacePropagator(FermionField &out,const FermionField &in,RealD _mass,std::vector<double> twist) ;
|
||||
|
||||
////////////////////////
|
||||
// Derivative interface
|
||||
@ -130,6 +130,12 @@ class WilsonFermion : public WilsonKernels<Impl>, public WilsonFermionStatic {
|
||||
void DhopInternal(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
|
||||
const FermionField &in, FermionField &out, int dag);
|
||||
|
||||
void DhopInternalSerial(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
|
||||
const FermionField &in, FermionField &out, int dag);
|
||||
|
||||
void DhopInternalOverlappedComms(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
|
||||
const FermionField &in, FermionField &out, int dag);
|
||||
|
||||
// Constructor
|
||||
WilsonFermion(GaugeField &_Umu, GridCartesian &Fgrid,
|
||||
GridRedBlackCartesian &Hgrid, RealD _mass,
|
||||
@ -145,6 +151,8 @@ class WilsonFermion : public WilsonKernels<Impl>, public WilsonFermionStatic {
|
||||
|
||||
// protected:
|
||||
public:
|
||||
virtual RealD Mass(void) { return mass; }
|
||||
virtual int isTrivialEE(void) { return 1; };
|
||||
RealD mass;
|
||||
RealD diag_mass;
|
||||
|
||||
|
@ -13,6 +13,7 @@ Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
Author: Guido Cossu <guido.cossu@ed.ac.uk>
|
||||
Author: Andrew Lawson <andrew.lawson1991@gmail.com>
|
||||
Author: Vera Guelpers <V.M.Guelpers@soton.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
@ -445,8 +446,7 @@ void WilsonFermion5D<Impl>::DhopInternalOverlappedComms(StencilImpl & st, Lebesg
|
||||
}
|
||||
}
|
||||
ptime = usecond() - start;
|
||||
}
|
||||
{
|
||||
} else {
|
||||
double start = usecond();
|
||||
st.CommunicateThreaded();
|
||||
ctime = usecond() - start;
|
||||
@ -563,7 +563,221 @@ void WilsonFermion5D<Impl>::DW(const FermionField &in, FermionField &out,int dag
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void WilsonFermion5D<Impl>::MomentumSpacePropagatorHt(FermionField &out,const FermionField &in, RealD mass)
|
||||
void WilsonFermion5D<Impl>::MomentumSpacePropagatorHt_5d(FermionField &out,const FermionField &in, RealD mass,std::vector<double> twist)
|
||||
{
|
||||
// what type LatticeComplex
|
||||
GridBase *_grid = _FourDimGrid;
|
||||
GridBase *_5dgrid = _FiveDimGrid;
|
||||
|
||||
conformable(_5dgrid,out._grid);
|
||||
|
||||
FermionField PRsource(_5dgrid);
|
||||
FermionField PLsource(_5dgrid);
|
||||
FermionField buf1_4d(_grid);
|
||||
FermionField buf2_4d(_grid);
|
||||
FermionField GR(_5dgrid);
|
||||
FermionField GL(_5dgrid);
|
||||
FermionField bufL_4d(_grid);
|
||||
FermionField bufR_4d(_grid);
|
||||
|
||||
unsigned int Ls = in._grid->_rdimensions[0];
|
||||
|
||||
typedef typename FermionField::vector_type vector_type;
|
||||
typedef typename FermionField::scalar_type ScalComplex;
|
||||
typedef iSinglet<ScalComplex> Tcomplex;
|
||||
typedef Lattice<iSinglet<vector_type> > LatComplex;
|
||||
|
||||
Gamma::Algebra Gmu [] = {
|
||||
Gamma::Algebra::GammaX,
|
||||
Gamma::Algebra::GammaY,
|
||||
Gamma::Algebra::GammaZ,
|
||||
Gamma::Algebra::GammaT
|
||||
};
|
||||
|
||||
Gamma g5(Gamma::Algebra::Gamma5);
|
||||
|
||||
std::vector<int> latt_size = _grid->_fdimensions;
|
||||
|
||||
LatComplex sk(_grid); sk = zero;
|
||||
LatComplex sk2(_grid); sk2= zero;
|
||||
LatComplex W(_grid); W= zero;
|
||||
LatComplex a(_grid); a= zero;
|
||||
LatComplex one (_grid); one = ScalComplex(1.0,0.0);
|
||||
LatComplex cosha(_grid);
|
||||
LatComplex kmu(_grid);
|
||||
LatComplex Wea(_grid);
|
||||
LatComplex Wema(_grid);
|
||||
LatComplex sinha(_grid);
|
||||
LatComplex sinhaLs(_grid);
|
||||
LatComplex coshaLs(_grid);
|
||||
LatComplex A(_grid);
|
||||
LatComplex F(_grid);
|
||||
LatComplex App(_grid);
|
||||
LatComplex Amm(_grid);
|
||||
LatComplex Bpp(_grid);
|
||||
LatComplex Bmm(_grid);
|
||||
LatComplex ABpm(_grid); //Apm=Amp=Bpm=Bmp
|
||||
LatComplex signW(_grid);
|
||||
|
||||
ScalComplex ci(0.0,1.0);
|
||||
|
||||
for(int mu=0;mu<Nd;mu++) {
|
||||
|
||||
LatticeCoordinate(kmu,mu);
|
||||
|
||||
RealD TwoPiL = M_PI * 2.0/ latt_size[mu];
|
||||
|
||||
kmu = TwoPiL * kmu;
|
||||
kmu = kmu + TwoPiL * one * twist[mu];//momentum for twisted boundary conditions
|
||||
|
||||
sk2 = sk2 + 2.0*sin(kmu*0.5)*sin(kmu*0.5);
|
||||
sk = sk + sin(kmu) *sin(kmu);
|
||||
}
|
||||
|
||||
W = one - M5 + sk2;
|
||||
|
||||
////////////////////////////////////////////
|
||||
// Cosh alpha -> alpha
|
||||
////////////////////////////////////////////
|
||||
cosha = (one + W*W + sk) / (abs(W)*2.0);
|
||||
|
||||
// FIXME Need a Lattice acosh
|
||||
for(int idx=0;idx<_grid->lSites();idx++){
|
||||
std::vector<int> lcoor(Nd);
|
||||
Tcomplex cc;
|
||||
RealD sgn;
|
||||
_grid->LocalIndexToLocalCoor(idx,lcoor);
|
||||
peekLocalSite(cc,cosha,lcoor);
|
||||
assert((double)real(cc)>=1.0);
|
||||
assert(fabs((double)imag(cc))<=1.0e-15);
|
||||
cc = ScalComplex(::acosh(real(cc)),0.0);
|
||||
pokeLocalSite(cc,a,lcoor);
|
||||
}
|
||||
|
||||
Wea = ( exp( a) * abs(W) );
|
||||
Wema= ( exp(-a) * abs(W) );
|
||||
sinha = 0.5*(exp( a) - exp(-a));
|
||||
sinhaLs = 0.5*(exp( a*Ls) - exp(-a*Ls));
|
||||
coshaLs = 0.5*(exp( a*Ls) + exp(-a*Ls));
|
||||
|
||||
A = one / (abs(W) * sinha * 2.0) * one / (sinhaLs * 2.0);
|
||||
F = exp( a*Ls) * (one - Wea + (Wema - one) * mass*mass);
|
||||
F = F + exp(-a*Ls) * (Wema - one + (one - Wea) * mass*mass);
|
||||
F = F - abs(W) * sinha * 4.0 * mass;
|
||||
|
||||
Bpp = (A/F) * (exp(-a*Ls*2.0) - one) * (one - Wema) * (one - mass*mass * one);
|
||||
Bmm = (A/F) * (one - exp(a*Ls*2.0)) * (one - Wea) * (one - mass*mass * one);
|
||||
App = (A/F) * (exp(-a*Ls*2.0) - one) * exp(-a) * (exp(-a) - abs(W)) * (one - mass*mass * one);
|
||||
Amm = (A/F) * (one - exp(a*Ls*2.0)) * exp(a) * (exp(a) - abs(W)) * (one - mass*mass * one);
|
||||
ABpm = (A/F) * abs(W) * sinha * 2.0 * (one + mass * coshaLs * 2.0 + mass*mass * one);
|
||||
|
||||
//P+ source, P- source
|
||||
PRsource = (in + g5 * in) * 0.5;
|
||||
PLsource = (in - g5 * in) * 0.5;
|
||||
|
||||
//calculate GR, GL
|
||||
for(unsigned int ss=1;ss<=Ls;ss++)
|
||||
{
|
||||
bufR_4d = zero;
|
||||
bufL_4d = zero;
|
||||
for(unsigned int tt=1;tt<=Ls;tt++)
|
||||
{
|
||||
//possible sign if W<0
|
||||
if((ss+tt)%2==1) signW = abs(W)/W;
|
||||
else signW = one;
|
||||
|
||||
unsigned int f = (ss > tt) ? ss-tt : tt-ss; //f = abs(ss-tt)
|
||||
//GR
|
||||
buf1_4d = zero;
|
||||
ExtractSlice(buf1_4d, PRsource, (tt-1), 0);
|
||||
//G(s,t)
|
||||
bufR_4d = bufR_4d + A * exp(a*Ls) * exp(-a*f) * signW * buf1_4d + A * exp(-a*Ls) * exp(a*f) * signW * buf1_4d;
|
||||
//A++*exp(a(s+t))
|
||||
bufR_4d = bufR_4d + App * exp(a*ss) * exp(a*tt) * signW * buf1_4d ;
|
||||
//A+-*exp(a(s-t))
|
||||
bufR_4d = bufR_4d + ABpm * exp(a*ss) * exp(-a*tt) * signW * buf1_4d ;
|
||||
//A-+*exp(a(-s+t))
|
||||
bufR_4d = bufR_4d + ABpm * exp(-a*ss) * exp(a*tt) * signW * buf1_4d ;
|
||||
//A--*exp(a(-s-t))
|
||||
bufR_4d = bufR_4d + Amm * exp(-a*ss) * exp(-a*tt) * signW * buf1_4d ;
|
||||
|
||||
//GL
|
||||
buf2_4d = zero;
|
||||
ExtractSlice(buf2_4d, PLsource, (tt-1), 0);
|
||||
//G(s,t)
|
||||
bufL_4d = bufL_4d + A * exp(a*Ls) * exp(-a*f) * signW * buf2_4d + A * exp(-a*Ls) * exp(a*f) * signW * buf2_4d;
|
||||
//B++*exp(a(s+t))
|
||||
bufL_4d = bufL_4d + Bpp * exp(a*ss) * exp(a*tt) * signW * buf2_4d ;
|
||||
//B+-*exp(a(s-t))
|
||||
bufL_4d = bufL_4d + ABpm * exp(a*ss) * exp(-a*tt) * signW * buf2_4d ;
|
||||
//B-+*exp(a(-s+t))
|
||||
bufL_4d = bufL_4d + ABpm * exp(-a*ss) * exp(a*tt) * signW * buf2_4d ;
|
||||
//B--*exp(a(-s-t))
|
||||
bufL_4d = bufL_4d + Bmm * exp(-a*ss) * exp(-a*tt) * signW * buf2_4d ;
|
||||
}
|
||||
InsertSlice(bufR_4d, GR, (ss-1), 0);
|
||||
InsertSlice(bufL_4d, GL, (ss-1), 0);
|
||||
}
|
||||
|
||||
//calculate propagator
|
||||
for(unsigned int ss=1;ss<=Ls;ss++)
|
||||
{
|
||||
bufR_4d = zero;
|
||||
bufL_4d = zero;
|
||||
|
||||
//(i*gamma_mu*sin(p_mu) - W)*(GL*P- source)
|
||||
buf1_4d = zero;
|
||||
ExtractSlice(buf1_4d, GL, (ss-1), 0);
|
||||
buf2_4d = zero;
|
||||
for(int mu=0;mu<Nd;mu++) {
|
||||
LatticeCoordinate(kmu,mu);
|
||||
RealD TwoPiL = M_PI * 2.0/ latt_size[mu];
|
||||
kmu = TwoPiL * kmu + TwoPiL * one * twist[mu];//twisted boundary
|
||||
buf2_4d = buf2_4d + sin(kmu)*ci*(Gamma(Gmu[mu])*buf1_4d);
|
||||
}
|
||||
bufL_4d = buf2_4d - W * buf1_4d;
|
||||
|
||||
//(i*gamma_mu*sin(p_mu) - W)*(GR*P+ source)
|
||||
buf1_4d = zero;
|
||||
ExtractSlice(buf1_4d, GR, (ss-1), 0);
|
||||
buf2_4d = zero;
|
||||
for(int mu=0;mu<Nd;mu++) {
|
||||
LatticeCoordinate(kmu,mu);
|
||||
RealD TwoPiL = M_PI * 2.0/ latt_size[mu];
|
||||
kmu = TwoPiL * kmu + TwoPiL * one * twist[mu];//twisted boundary
|
||||
buf2_4d = buf2_4d + sin(kmu)*ci*(Gamma(Gmu[mu])*buf1_4d);
|
||||
}
|
||||
bufR_4d = buf2_4d - W * buf1_4d;
|
||||
|
||||
//(delta(s-1,u) - m*delta(s,1)*delta(u,Ls))*GL
|
||||
if(ss==1){
|
||||
ExtractSlice(buf1_4d, GL, (Ls-1), 0);
|
||||
bufL_4d = bufL_4d - mass*buf1_4d;
|
||||
}
|
||||
else {
|
||||
ExtractSlice(buf1_4d, GL, (ss-2), 0);
|
||||
bufL_4d = bufL_4d + buf1_4d;
|
||||
}
|
||||
|
||||
//(delta(s+1,u) - m*delta(s,Ls)*delta(u,1))*GR
|
||||
if(ss==Ls){
|
||||
ExtractSlice(buf1_4d, GR, 0, 0);
|
||||
bufR_4d = bufR_4d - mass*buf1_4d;
|
||||
}
|
||||
else {
|
||||
ExtractSlice(buf1_4d, GR, ss, 0);
|
||||
bufR_4d = bufR_4d + buf1_4d;
|
||||
}
|
||||
buf1_4d = bufL_4d + bufR_4d;
|
||||
InsertSlice(buf1_4d, out, (ss-1), 0);
|
||||
}
|
||||
|
||||
|
||||
out = out * (-1.0);
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void WilsonFermion5D<Impl>::MomentumSpacePropagatorHt(FermionField &out,const FermionField &in, RealD mass,std::vector<double> twist)
|
||||
{
|
||||
// what type LatticeComplex
|
||||
GridBase *_grid = _FourDimGrid;
|
||||
@ -606,6 +820,7 @@ void WilsonFermion5D<Impl>::MomentumSpacePropagatorHt(FermionField &out,const Fe
|
||||
RealD TwoPiL = M_PI * 2.0/ latt_size[mu];
|
||||
|
||||
kmu = TwoPiL * kmu;
|
||||
kmu = kmu + TwoPiL * one * twist[mu];//momentum for twisted boundary conditions
|
||||
|
||||
sk2 = sk2 + 2.0*sin(kmu*0.5)*sin(kmu*0.5);
|
||||
sk = sk + sin(kmu) *sin(kmu);
|
||||
@ -619,7 +834,7 @@ void WilsonFermion5D<Impl>::MomentumSpacePropagatorHt(FermionField &out,const Fe
|
||||
////////////////////////////////////////////
|
||||
// Cosh alpha -> alpha
|
||||
////////////////////////////////////////////
|
||||
cosha = (one + W*W + sk) / (W*2.0);
|
||||
cosha = (one + W*W + sk) / (abs(W)*2.0);
|
||||
|
||||
// FIXME Need a Lattice acosh
|
||||
for(int idx=0;idx<_grid->lSites();idx++){
|
||||
@ -634,8 +849,8 @@ void WilsonFermion5D<Impl>::MomentumSpacePropagatorHt(FermionField &out,const Fe
|
||||
pokeLocalSite(cc,a,lcoor);
|
||||
}
|
||||
|
||||
Wea = ( exp( a) * W );
|
||||
Wema= ( exp(-a) * W );
|
||||
Wea = ( exp( a) * abs(W) );
|
||||
Wema= ( exp(-a) * abs(W) );
|
||||
|
||||
num = num + ( one - Wema ) * mass * in;
|
||||
denom= ( Wea - one ) + mass*mass * (one - Wema);
|
||||
@ -643,7 +858,7 @@ void WilsonFermion5D<Impl>::MomentumSpacePropagatorHt(FermionField &out,const Fe
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void WilsonFermion5D<Impl>::MomentumSpacePropagatorHw(FermionField &out,const FermionField &in,RealD mass)
|
||||
void WilsonFermion5D<Impl>::MomentumSpacePropagatorHw(FermionField &out,const FermionField &in,RealD mass,std::vector<double> twist)
|
||||
{
|
||||
Gamma::Algebra Gmu [] = {
|
||||
Gamma::Algebra::GammaX,
|
||||
@ -683,6 +898,7 @@ void WilsonFermion5D<Impl>::MomentumSpacePropagatorHw(FermionField &out,const Fe
|
||||
RealD TwoPiL = M_PI * 2.0/ latt_size[mu];
|
||||
|
||||
kmu = TwoPiL * kmu;
|
||||
kmu = kmu + TwoPiL * one * twist[mu];//momentum for twisted boundary conditions
|
||||
|
||||
sk2 = sk2 + 2.0*sin(kmu*0.5)*sin(kmu*0.5);
|
||||
sk = sk + sin(kmu)*sin(kmu);
|
||||
|
@ -118,8 +118,9 @@ namespace QCD {
|
||||
virtual void DhopDerivEO(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
|
||||
virtual void DhopDerivOE(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
|
||||
|
||||
void MomentumSpacePropagatorHt(FermionField &out,const FermionField &in,RealD mass) ;
|
||||
void MomentumSpacePropagatorHw(FermionField &out,const FermionField &in,RealD mass) ;
|
||||
void MomentumSpacePropagatorHt_5d(FermionField &out,const FermionField &in,RealD mass,std::vector<double> twist) ;
|
||||
void MomentumSpacePropagatorHt(FermionField &out,const FermionField &in,RealD mass,std::vector<double> twist) ;
|
||||
void MomentumSpacePropagatorHw(FermionField &out,const FermionField &in,RealD mass,std::vector<double> twist) ;
|
||||
|
||||
// Implement hopping term non-hermitian hopping term; half cb or both
|
||||
// Implement s-diagonal DW
|
||||
|
@ -53,7 +53,7 @@ template<class Impl> class WilsonKernels : public FermionOperator<Impl> , public
|
||||
typedef FermionOperator<Impl> Base;
|
||||
|
||||
public:
|
||||
|
||||
|
||||
template <bool EnableBool = true>
|
||||
typename std::enable_if<Impl::isFundamental==true && Nc == 3 &&EnableBool, void>::type
|
||||
DhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
||||
@ -70,27 +70,27 @@ public:
|
||||
break;
|
||||
#endif
|
||||
case OptHandUnroll:
|
||||
for (int site = 0; site < Ns; site++) {
|
||||
for (int s = 0; s < Ls; s++) {
|
||||
if(interior&&exterior) WilsonKernels<Impl>::HandDhopSite(st,lo,U,buf,sF,sU,in,out);
|
||||
else if (interior) WilsonKernels<Impl>::HandDhopSiteInt(st,lo,U,buf,sF,sU,in,out);
|
||||
else if (exterior) WilsonKernels<Impl>::HandDhopSiteExt(st,lo,U,buf,sF,sU,in,out);
|
||||
sF++;
|
||||
}
|
||||
sU++;
|
||||
}
|
||||
for (int site = 0; site < Ns; site++) {
|
||||
for (int s = 0; s < Ls; s++) {
|
||||
if(interior&&exterior) WilsonKernels<Impl>::HandDhopSite(st,lo,U,buf,sF,sU,in,out);
|
||||
else if (interior) WilsonKernels<Impl>::HandDhopSiteInt(st,lo,U,buf,sF,sU,in,out);
|
||||
else if (exterior) WilsonKernels<Impl>::HandDhopSiteExt(st,lo,U,buf,sF,sU,in,out);
|
||||
sF++;
|
||||
}
|
||||
sU++;
|
||||
}
|
||||
break;
|
||||
case OptGeneric:
|
||||
for (int site = 0; site < Ns; site++) {
|
||||
for (int s = 0; s < Ls; s++) {
|
||||
if(interior&&exterior) WilsonKernels<Impl>::GenericDhopSite(st,lo,U,buf,sF,sU,in,out);
|
||||
else if (interior) WilsonKernels<Impl>::GenericDhopSiteInt(st,lo,U,buf,sF,sU,in,out);
|
||||
else if (exterior) WilsonKernels<Impl>::GenericDhopSiteExt(st,lo,U,buf,sF,sU,in,out);
|
||||
else assert(0);
|
||||
sF++;
|
||||
}
|
||||
sU++;
|
||||
}
|
||||
for (int site = 0; site < Ns; site++) {
|
||||
for (int s = 0; s < Ls; s++) {
|
||||
if(interior&&exterior) WilsonKernels<Impl>::GenericDhopSite(st,lo,U,buf,sF,sU,in,out);
|
||||
else if (interior) WilsonKernels<Impl>::GenericDhopSiteInt(st,lo,U,buf,sF,sU,in,out);
|
||||
else if (exterior) WilsonKernels<Impl>::GenericDhopSiteExt(st,lo,U,buf,sF,sU,in,out);
|
||||
else assert(0);
|
||||
sF++;
|
||||
}
|
||||
sU++;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
@ -232,6 +232,7 @@ private:
|
||||
void GenericDhopSiteDagExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
||||
int sF, int sU, const FermionField &in, FermionField &out);
|
||||
|
||||
|
||||
void AsmDhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
||||
int sF, int sU, int Ls, int Ns, const FermionField &in,FermionField &out);
|
||||
|
||||
|
Reference in New Issue
Block a user