mirror of
https://github.com/paboyle/Grid.git
synced 2025-04-09 21:50:45 +01:00
Do the force term on the accelerator too. Needed particularly because comms buffers
are device memory.
This commit is contained in:
parent
275c1c920f
commit
1282e1067f
@ -60,9 +60,9 @@ public:
|
|||||||
int Ls, int Nsite, const FermionField &in, FermionField &out,
|
int Ls, int Nsite, const FermionField &in, FermionField &out,
|
||||||
int interior=1,int exterior=1) ;
|
int interior=1,int exterior=1) ;
|
||||||
|
|
||||||
static accelerator void DhopDirK(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor * buf,
|
static void DhopDirKernel(StencilImpl &st, DoubledGaugeField &U,SiteHalfSpinor * buf,
|
||||||
int sF, int sU, const FermionFieldView &in, FermionFieldView &out, int dirdisp, int gamma);
|
int Ls, int Nsite, const FermionField &in, FermionField &out, int dirdisp, int gamma);
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////
|
||||||
// Utilities for inserting Wilson conserved current.
|
// Utilities for inserting Wilson conserved current.
|
||||||
//////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////
|
||||||
@ -99,6 +99,10 @@ public:
|
|||||||
bool switch_sign = false);
|
bool switch_sign = false);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
|
||||||
|
static accelerator void DhopDirK(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor * buf,
|
||||||
|
int sF, int sU, const FermionFieldView &in, FermionFieldView &out, int dirdisp, int gamma);
|
||||||
|
|
||||||
// Specialised variants
|
// Specialised variants
|
||||||
static accelerator void GenericDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
|
static accelerator void GenericDhopSite(StencilView &st, DoubledGaugeFieldView &U, SiteHalfSpinor * buf,
|
||||||
int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
|
int sF, int sU, const FermionFieldView &in, FermionFieldView &out);
|
||||||
|
@ -230,38 +230,25 @@ void WilsonFermion5D<Impl>::DhopDir(const FermionField &in, FermionField &out,in
|
|||||||
// assert( (disp==1)||(disp==-1) );
|
// assert( (disp==1)||(disp==-1) );
|
||||||
// assert( (dir>=0)&&(dir<4) ); //must do x,y,z or t;
|
// assert( (dir>=0)&&(dir<4) ); //must do x,y,z or t;
|
||||||
|
|
||||||
Compressor compressor(DaggerNo);
|
|
||||||
Stencil.HaloExchange(in,compressor);
|
|
||||||
|
|
||||||
int skip = (disp==1) ? 0 : 1;
|
int skip = (disp==1) ? 0 : 1;
|
||||||
|
|
||||||
int dirdisp = dir+skip*4;
|
int dirdisp = dir+skip*4;
|
||||||
int gamma = dir+(1-skip)*4;
|
int gamma = dir+(1-skip)*4;
|
||||||
|
|
||||||
assert(dirdisp<=7);
|
Compressor compressor(DaggerNo);
|
||||||
assert(dirdisp>=0);
|
Stencil.HaloExchange(in,compressor);
|
||||||
|
|
||||||
|
uint64_t Nsite = Umu.Grid()->oSites();
|
||||||
|
Kernels::DhopDirKernel(Stencil,Umu,Stencil.CommBuf(),Ls,Nsite,in,out,dirdisp,gamma);
|
||||||
|
|
||||||
auto Umu_v = Umu.View();
|
|
||||||
auto in_v = in.View();
|
|
||||||
auto out_v = out.View();
|
|
||||||
thread_for(ss,Umu.Grid()->oSites(),{
|
|
||||||
// parallel_for(int ss=0;ss<Umu.Grid()->oSites();ss++){
|
|
||||||
for(int s=0;s<Ls;s++){
|
|
||||||
int sU=ss;
|
|
||||||
int sF = s+Ls*sU;
|
|
||||||
Kernels::DhopDirK(Stencil,Umu_v,Stencil.CommBuf(),sF,sU,in_v,out_v,dirdisp,gamma);
|
|
||||||
// Kernels::DhopDir(Stencil,Umu,Stencil.CommBuf(),sF,sU,in,out,dirdisp,gamma);
|
|
||||||
}
|
|
||||||
});
|
|
||||||
};
|
};
|
||||||
|
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
void WilsonFermion5D<Impl>::DerivInternal(StencilImpl & st,
|
void WilsonFermion5D<Impl>::DerivInternal(StencilImpl & st,
|
||||||
DoubledGaugeField & U,
|
DoubledGaugeField & U,
|
||||||
GaugeField &mat,
|
GaugeField &mat,
|
||||||
const FermionField &A,
|
const FermionField &A,
|
||||||
const FermionField &B,
|
const FermionField &B,
|
||||||
int dag)
|
int dag)
|
||||||
{
|
{
|
||||||
DerivCalls++;
|
DerivCalls++;
|
||||||
assert((dag==DaggerNo) ||(dag==DaggerYes));
|
assert((dag==DaggerNo) ||(dag==DaggerYes));
|
||||||
@ -296,27 +283,10 @@ void WilsonFermion5D<Impl>::DerivInternal(StencilImpl & st,
|
|||||||
|
|
||||||
DerivDhopComputeTime -= usecond();
|
DerivDhopComputeTime -= usecond();
|
||||||
|
|
||||||
auto U_v = U.View();
|
|
||||||
auto Btilde_v = Btilde.View();
|
|
||||||
auto B_v = B.View();
|
|
||||||
int Bsites = B.Grid()->oSites();
|
|
||||||
int Usites = U.Grid()->oSites();
|
int Usites = U.Grid()->oSites();
|
||||||
thread_for(sss, U.Grid()->oSites(),{
|
|
||||||
// parallel_for (int sss = 0; sss < U.Grid()->oSites(); sss++) {
|
|
||||||
for (int s = 0; s < Ls; s++) {
|
|
||||||
int sU = sss;
|
|
||||||
int sF = s + Ls * sU;
|
|
||||||
|
|
||||||
assert(sF < Bsites);
|
Kernels::DhopDirKernel(st, U, st.CommBuf(), Ls, Usites, B, Btilde, mu,gamma);
|
||||||
assert(sU < Usites);
|
|
||||||
|
|
||||||
Kernels::DhopDirK(st, U_v, st.CommBuf(), sF, sU, B_v, Btilde_v, mu, gamma);
|
|
||||||
// Kernels::DhopDir(st, U, st.CommBuf(), sF, sU, B, Btilde, mu, gamma);
|
|
||||||
////////////////////////////
|
|
||||||
// spin trace outer product
|
|
||||||
////////////////////////////
|
|
||||||
}
|
|
||||||
});
|
|
||||||
////////////////////////////
|
////////////////////////////
|
||||||
// spin trace outer product
|
// spin trace outer product
|
||||||
////////////////////////////
|
////////////////////////////
|
||||||
|
@ -238,16 +238,8 @@ void WilsonFermion<Impl>::DerivInternal(StencilImpl &st, DoubledGaugeField &U,
|
|||||||
int gamma = mu;
|
int gamma = mu;
|
||||||
if (!dag) gamma += Nd;
|
if (!dag) gamma += Nd;
|
||||||
|
|
||||||
////////////////////////
|
int Ls=1;
|
||||||
// Call the single hop
|
Kernels::DhopDirKernel(st, U, st.CommBuf(), Ls, B.Grid()->oSites(), B, Btilde, mu, gamma);
|
||||||
////////////////////////
|
|
||||||
auto U_v = U.View();
|
|
||||||
auto B_v = B.View();
|
|
||||||
auto Btilde_v = Btilde.View();
|
|
||||||
auto st_v = st.View();
|
|
||||||
thread_for( sss, B.Grid()->oSites(), {
|
|
||||||
Kernels::DhopDirK(st_v, U_v, st.CommBuf(), sss, sss, B_v, Btilde_v, mu, gamma);
|
|
||||||
});
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////
|
//////////////////////////////////////////////////
|
||||||
// spin trace outer product
|
// spin trace outer product
|
||||||
@ -332,7 +324,8 @@ void WilsonFermion<Impl>::Mdir(const FermionField &in, FermionField &out, int di
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
void WilsonFermion<Impl>::DhopDir(const FermionField &in, FermionField &out, int dir, int disp) {
|
void WilsonFermion<Impl>::DhopDir(const FermionField &in, FermionField &out, int dir, int disp)
|
||||||
|
{
|
||||||
int skip = (disp == 1) ? 0 : 1;
|
int skip = (disp == 1) ? 0 : 1;
|
||||||
int dirdisp = dir + skip * 4;
|
int dirdisp = dir + skip * 4;
|
||||||
int gamma = dir + (1 - skip) * 4;
|
int gamma = dir + (1 - skip) * 4;
|
||||||
@ -346,13 +339,9 @@ void WilsonFermion<Impl>::DhopDirDisp(const FermionField &in, FermionField &out,
|
|||||||
Compressor compressor(dag);
|
Compressor compressor(dag);
|
||||||
|
|
||||||
Stencil.HaloExchange(in, compressor);
|
Stencil.HaloExchange(in, compressor);
|
||||||
auto in_v = in.View();
|
int Ls=1;
|
||||||
auto out_v = in.View();
|
int Nsite=in.oSites();
|
||||||
auto Umu_v = Umu.View();
|
Kernels::DhopDirKernel(Stencil, Umu, Stencil.CommBuf(), Ls, Nsite, in, out, dirdisp, gamma);
|
||||||
auto Stencil_v = Stencil.View();
|
|
||||||
thread_for(sss, in.Grid()->oSites(),{
|
|
||||||
Kernels::DhopDirK(Stencil_v, Umu_v, Stencil.CommBuf(), sss, sss, in_v, out_v, dirdisp, gamma);
|
|
||||||
});
|
|
||||||
};
|
};
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
|
@ -320,7 +320,26 @@ void WilsonKernels<Impl>::DhopDirK( StencilView &st, DoubledGaugeFieldView &U,Si
|
|||||||
GENERIC_DHOPDIR_LEG(Tm,spProjTm,spReconTm);
|
GENERIC_DHOPDIR_LEG(Tm,spProjTm,spReconTm);
|
||||||
coalescedWrite(out[sF], result,lane);
|
coalescedWrite(out[sF], result,lane);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <class Impl>
|
||||||
|
void WilsonKernels<Impl>::DhopDirKernel( StencilImpl &st, DoubledGaugeField &U,SiteHalfSpinor *buf, int Ls,
|
||||||
|
int Nsite, const FermionField &in, FermionField &out, int dirdisp, int gamma)
|
||||||
|
{
|
||||||
|
assert(dirdisp<=7);
|
||||||
|
assert(dirdisp>=0);
|
||||||
|
|
||||||
|
auto U_v = U.View();
|
||||||
|
auto in_v = in.View();
|
||||||
|
auto out_v = out.View();
|
||||||
|
auto st_v = st.View();
|
||||||
|
accelerator_for(ss,Nsite,1,{
|
||||||
|
for(int s=0;s<Ls;s++){
|
||||||
|
int sU=ss;
|
||||||
|
int sF = s+Ls*sU;
|
||||||
|
DhopDirK(st_v,U_v,st.CommBuf(),sF,sU,in_v,out_v,dirdisp,gamma);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
#define KERNEL_CALLNB(A) \
|
#define KERNEL_CALLNB(A) \
|
||||||
const uint64_t NN = Nsite*Ls; \
|
const uint64_t NN = Nsite*Ls; \
|
||||||
|
Loading…
x
Reference in New Issue
Block a user