1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-06-14 13:57:07 +01:00

Zero changes, acceleartor on kernels and some thread loop changes

This commit is contained in:
paboyle
2018-01-27 23:47:38 +00:00
parent 45df59720e
commit 2d0bcc2606
40 changed files with 174 additions and 175 deletions

View File

@ -96,15 +96,14 @@ void CayleyFermion5D<Impl>::MooeeInternal(const FermionField &psi, FermionField
}
// For the non-vectorised s-direction this is simple
for(auto site=0;site<vol;site++){
thread_loop( (auto site=0;site<vol;site++), {
SiteSpinor SiteChi;
SiteHalfSpinor SitePplus;
SiteHalfSpinor SitePminus;
for(int s1=0;s1<Ls;s1++){
SiteChi =zero;
SiteChi =Zero();
for(int s2=0;s2<Ls;s2++){
int lex2 = s2+Ls*site;
@ -120,7 +119,7 @@ void CayleyFermion5D<Impl>::MooeeInternal(const FermionField &psi, FermionField
}
chi[s1+Ls*site] = SiteChi*0.5;
}
}
});
}
#ifdef CAYLEY_DPERP_DENSE

View File

@ -360,8 +360,8 @@ void CayleyFermion5D<Impl>::MooeeInternalAsm(const FermionField &psi, FermionFie
int lex=s2+LLs*site;
if ( s2==0 && l==0) {
SiteChiP=zero;
SiteChiM=zero;
SiteChiP=Zero();
SiteChiM=Zero();
}
for(int sp=0;sp<2;sp++){
@ -532,8 +532,8 @@ void CayleyFermion5D<Impl>::MooeeInternalZAsm(const FermionField &psi, FermionFi
int lex=s2+LLs*site;
if ( s2==0 && l==0) {
SiteChiP=zero;
SiteChiM=zero;
SiteChiP=Zero();
SiteChiM=Zero();
}
for(int sp=0;sp<2;sp++){

View File

@ -69,7 +69,7 @@ void DomainWallEOFAFermion<Impl>::Omega(const FermionField& psi, FermionField& D
{
int Ls = this->Ls;
Din = zero;
Din = Zero();
if((sign == 1) && (dag == 0)){ axpby_ssp(Din, 0.0, psi, 1.0, psi, Ls-1, 0); }
else if((sign == -1) && (dag == 0)){ axpby_ssp(Din, 0.0, psi, 1.0, psi, 0, 0); }
else if((sign == 1 ) && (dag == 1)){ axpby_ssp(Din, 0.0, psi, 1.0, psi, 0, Ls-1); }

View File

@ -106,7 +106,7 @@ void DomainWallEOFAFermion<Impl>::MooeeInternal(const FermionField& psi, Fermion
SiteHalfSpinor SitePminus;
for(int s1=0; s1<Ls; s1++){
SiteChi = zero;
SiteChi = Zero();
for(int s2=0; s2<Ls; s2++){
int lex2 = s2 + Ls*site;
if(PplusMat(s1,s2) != 0.0){

View File

@ -362,8 +362,8 @@ void DomainWallEOFAFermion<Impl>::MooeeInternalAsm(const FermionField& psi, Ferm
int lex = s2 + LLs*site;
if( s2==0 && l==0 ){
SiteChiP=zero;
SiteChiM=zero;
SiteChiP=Zero();
SiteChiM=Zero();
}
for(int sp=0; sp<2; sp++){

View File

@ -81,8 +81,8 @@ public:
virtual void MDeriv (GaugeField &mat,const FermionField &U,const FermionField &V,int dag){DhopDeriv(mat,U,V,dag);};
virtual void MoeDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag){DhopDerivOE(mat,U,V,dag);};
virtual void MeoDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag){DhopDerivEO(mat,U,V,dag);};
virtual void MooDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag){mat=zero;}; // Clover can override these
virtual void MeeDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag){mat=zero;};
virtual void MooDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag){mat=Zero();}; // Clover can override these
virtual void MeeDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag){mat=Zero();};
virtual void DhopDeriv (GaugeField &mat,const FermionField &U,const FermionField &V,int dag)=0;
virtual void DhopDerivEO(GaugeField &mat,const FermionField &U,const FermionField &V,int dag)=0;

View File

@ -266,7 +266,7 @@ public:
int Ls=Btilde.Grid()->_fdimensions[0];
GaugeLinkField tmp(mat.Grid());
tmp = zero;
tmp = Zero();
parallel_for(int sss=0;sss<tmp.Grid()->oSites();sss++){
int sU=sss;
@ -406,7 +406,7 @@ public:
unsigned int dimU = grid->Nd();
unsigned int dimF = Bgrid->Nd();
GaugeLinkField tmp(grid);
tmp = zero;
tmp = Zero();
// FIXME
// Current implementation works, thread safe, probably suboptimal
@ -417,7 +417,7 @@ public:
std::vector<typename result_type::scalar_object> vres(Bgrid->Nsimd());
std::vector<int> ocoor; grid->oCoorFromOindex(ocoor,so);
for (int si = 0; si < tmp.Grid()->iSites(); si++){
typename result_type::scalar_object scalar_object; scalar_object = zero;
typename result_type::scalar_object scalar_object; scalar_object = Zero();
std::vector<int> local_coor;
std::vector<int> icoor; grid->iCoorFromIindex(icoor,si);
grid->InOutCoorToLocalCoor(ocoor, icoor, local_coor);
@ -639,7 +639,7 @@ public:
int Ls = Btilde.Grid()->_fdimensions[0];
GaugeLinkField tmp(mat.Grid());
tmp = zero;
tmp = Zero();
parallel_for(int ss = 0; ss < tmp.Grid()->oSites(); ss++) {
for (int s = 0; s < Ls; s++) {
int sF = s + Ls * ss;

View File

@ -87,7 +87,7 @@ void MobiusEOFAFermion<Impl>::Omega(const FermionField& psi, FermionField& Din,
int Ls = this->Ls;
RealD alpha = this->alpha;
Din = zero;
Din = Zero();
if((sign == 1) && (dag == 0)) { // \Omega_{+}
for(int s=0; s<Ls; ++s){
axpby_ssp(Din, 0.0, psi, 2.0*std::pow(1.0-alpha,Ls-s-1)/std::pow(1.0+alpha,Ls-s), psi, s, 0);

View File

@ -175,7 +175,7 @@ void MobiusEOFAFermion<Impl>::M5Ddag_shift(const FermionField &psi, const Fermio
this->M5Dtime -= usecond();
parallel_for(int ss=0; ss<grid->oSites(); ss+=Ls){
chi[ss+Ls-1] = zero;
chi[ss+Ls-1] = Zero();
auto tmp = psi[0];
for(int s=0; s<Ls; s++){
if(s==0) {

View File

@ -131,7 +131,7 @@ void MobiusEOFAFermion<Impl>::MooeeInternal(const FermionField& psi, FermionFiel
SiteHalfSpinor SitePminus;
for(int s1=0; s1<Ls; s1++){
SiteChi = zero;
SiteChi = Zero();
for(int s2=0; s2<Ls; s2++){
int lex2 = s2 + Ls*site;
if(PplusMat(s1,s2) != 0.0){

View File

@ -737,8 +737,8 @@ void MobiusEOFAFermion<Impl>::MooeeInternalAsm(const FermionField& psi, FermionF
int lex = s2 + LLs*site;
if( s2==0 && l==0 ){
SiteChiP=zero;
SiteChiM=zero;
SiteChiP=Zero();
SiteChiM=Zero();
}
for(int sp=0; sp<2; sp++){

View File

@ -152,11 +152,11 @@ void WilsonFermion<Impl>::MomentumSpacePropagator(FermionField &out, const Fermi
std::vector<int> latt_size = _grid->_fdimensions;
FermionField num (_grid); num = zero;
LatComplex wilson(_grid); wilson= zero;
FermionField num (_grid); num = Zero();
LatComplex wilson(_grid); wilson= Zero();
LatComplex one (_grid); one = ScalComplex(1.0,0.0);
LatComplex denom(_grid); denom= zero;
LatComplex denom(_grid); denom= Zero();
LatComplex kmu(_grid);
ScalComplex ci(0.0,1.0);
// momphase = n * 2pi / L
@ -360,7 +360,7 @@ void WilsonFermion<Impl>::ContractConservedCurrent(PropagatorField &q_in_1,
conformable(_grid, q_in_2.Grid());
conformable(_grid, q_out.Grid());
PropagatorField tmp1(_grid), tmp2(_grid);
q_out = zero;
q_out = Zero();
// Forward, need q1(x + mu), q2(x). Backward, need q1(x), q2(x + mu).
// Inefficient comms method but not performance critical.
@ -397,7 +397,7 @@ void WilsonFermion<Impl>::SeqConservedCurrent(PropagatorField &q_in,
unsigned int LLt = GridDefaultLatt()[Tp];
// Momentum projection
ph = zero;
ph = Zero();
for(unsigned int mu = 0; mu < Nd - 1; mu++)
{
LatticeCoordinate(coor, mu);
@ -405,7 +405,7 @@ void WilsonFermion<Impl>::SeqConservedCurrent(PropagatorField &q_in,
}
ph = exp((Real)(2*M_PI)*i*ph);
q_out = zero;
q_out = Zero();
LatticeInteger coords(_grid);
LatticeCoordinate(coords, Tp);

View File

@ -583,14 +583,14 @@ void WilsonFermion5D<Impl>::MomentumSpacePropagatorHt(FermionField &out,const Fe
std::vector<int> latt_size = _grid->_fdimensions;
FermionField num (_grid); num = zero;
FermionField num (_grid); num = Zero();
LatComplex sk(_grid); sk = zero;
LatComplex sk2(_grid); sk2= zero;
LatComplex W(_grid); W= zero;
LatComplex a(_grid); a= zero;
LatComplex sk(_grid); sk = Zero();
LatComplex sk2(_grid); sk2= Zero();
LatComplex W(_grid); W= Zero();
LatComplex a(_grid); a= Zero();
LatComplex one (_grid); one = ScalComplex(1.0,0.0);
LatComplex denom(_grid); denom= zero;
LatComplex denom(_grid); denom= Zero();
LatComplex cosha(_grid);
LatComplex kmu(_grid);
LatComplex Wea(_grid);
@ -661,16 +661,16 @@ void WilsonFermion5D<Impl>::MomentumSpacePropagatorHw(FermionField &out,const Fe
std::vector<int> latt_size = _grid->_fdimensions;
LatComplex sk(_grid); sk = zero;
LatComplex sk2(_grid); sk2= zero;
LatComplex sk(_grid); sk = Zero();
LatComplex sk2(_grid); sk2= Zero();
LatComplex w_k(_grid); w_k= zero;
LatComplex b_k(_grid); b_k= zero;
LatComplex w_k(_grid); w_k= Zero();
LatComplex b_k(_grid); b_k= Zero();
LatComplex one (_grid); one = ScalComplex(1.0,0.0);
FermionField num (_grid); num = zero;
LatComplex denom(_grid); denom= zero;
FermionField num (_grid); num = Zero();
LatComplex denom(_grid); denom= Zero();
LatComplex kmu(_grid);
ScalComplex ci(0.0,1.0);
@ -733,7 +733,7 @@ void WilsonFermion5D<Impl>::ContractConservedCurrent(PropagatorField &q_in_1,
conformable(_FourDimGrid, q_out.Grid());
PropagatorField tmp1(FermionGrid()), tmp2(FermionGrid());
unsigned int LLs = q_in_1.Grid()->_rdimensions[0];
q_out = zero;
q_out = Zero();
// Forward, need q1(x + mu, s), q2(x, Ls - 1 - s). Backward, need q1(x, s),
// q2(x + mu, Ls - 1 - s). 5D lattice so shift 4D coordinate mu by one.
@ -797,7 +797,7 @@ void WilsonFermion5D<Impl>::SeqConservedCurrent(PropagatorField &q_in,
unsigned int LLt = GridDefaultLatt()[Tp];
// Momentum projection.
ph = zero;
ph = Zero();
for(unsigned int nu = 0; nu < Nd - 1; nu++)
{
// Shift coordinate lattice index by 1 to account for 5th dimension.
@ -806,7 +806,7 @@ void WilsonFermion5D<Impl>::SeqConservedCurrent(PropagatorField &q_in,
}
ph = exp((Real)(2*M_PI)*i*ph);
q_out = zero;
q_out = Zero();
LatticeInteger coords(_FourDimGrid);
LatticeCoordinate(coords, Tp);

View File

@ -36,7 +36,7 @@ int WilsonKernelsStatic::Opt = WilsonKernelsStatic::OptGeneric;
int WilsonKernelsStatic::Comms = WilsonKernelsStatic::CommsAndCompute;
template <class Impl>
WilsonKernels<Impl>::WilsonKernels(const ImplParams &p) : Base(p){};
accelerator WilsonKernels<Impl>::WilsonKernels(const ImplParams &p) : Base(p){};
////////////////////////////////////////////
// Generic implementation; move to different file?
@ -103,9 +103,9 @@ WilsonKernels<Impl>::WilsonKernels(const ImplParams &p) : Base(p){};
// All legs kernels ; comms then compute
////////////////////////////////////////////////////////////////////
template <class Impl>
void WilsonKernels<Impl>::GenericDhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
SiteHalfSpinor *buf, int sF,
int sU, const FermionField &in, FermionField &out)
accelerator void WilsonKernels<Impl>::GenericDhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
SiteHalfSpinor *buf, int sF,
int sU, const FermionField &in, FermionField &out)
{
SiteHalfSpinor tmp;
SiteHalfSpinor chi;
@ -127,9 +127,9 @@ void WilsonKernels<Impl>::GenericDhopSiteDag(StencilImpl &st, LebesgueOrder &lo,
};
template <class Impl>
void WilsonKernels<Impl>::GenericDhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
SiteHalfSpinor *buf, int sF,
int sU, const FermionField &in, FermionField &out)
accelerator void WilsonKernels<Impl>::GenericDhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
SiteHalfSpinor *buf, int sF,
int sU, const FermionField &in, FermionField &out)
{
SiteHalfSpinor tmp;
SiteHalfSpinor chi;
@ -153,7 +153,7 @@ void WilsonKernels<Impl>::GenericDhopSite(StencilImpl &st, LebesgueOrder &lo, Do
// Interior kernels
////////////////////////////////////////////////////////////////////
template <class Impl>
void WilsonKernels<Impl>::GenericDhopSiteDagInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
accelerator void WilsonKernels<Impl>::GenericDhopSiteDagInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
SiteHalfSpinor *buf, int sF,
int sU, const FermionField &in, FermionField &out)
{
@ -165,7 +165,7 @@ void WilsonKernels<Impl>::GenericDhopSiteDagInt(StencilImpl &st, LebesgueOrder &
StencilEntry *SE;
int ptype;
result=zero;
result=Zero();
GENERIC_STENCIL_LEG_INT(Xp,spProjXp,accumReconXp);
GENERIC_STENCIL_LEG_INT(Yp,spProjYp,accumReconYp);
GENERIC_STENCIL_LEG_INT(Zp,spProjZp,accumReconZp);
@ -178,9 +178,9 @@ void WilsonKernels<Impl>::GenericDhopSiteDagInt(StencilImpl &st, LebesgueOrder &
};
template <class Impl>
void WilsonKernels<Impl>::GenericDhopSiteInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
SiteHalfSpinor *buf, int sF,
int sU, const FermionField &in, FermionField &out)
accelerator void WilsonKernels<Impl>::GenericDhopSiteInt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
SiteHalfSpinor *buf, int sF,
int sU, const FermionField &in, FermionField &out)
{
SiteHalfSpinor tmp;
SiteHalfSpinor chi;
@ -189,7 +189,7 @@ void WilsonKernels<Impl>::GenericDhopSiteInt(StencilImpl &st, LebesgueOrder &lo,
SiteSpinor result;
StencilEntry *SE;
int ptype;
result=zero;
result=Zero();
GENERIC_STENCIL_LEG_INT(Xm,spProjXp,accumReconXp);
GENERIC_STENCIL_LEG_INT(Ym,spProjYp,accumReconYp);
GENERIC_STENCIL_LEG_INT(Zm,spProjZp,accumReconZp);
@ -204,7 +204,7 @@ void WilsonKernels<Impl>::GenericDhopSiteInt(StencilImpl &st, LebesgueOrder &lo,
// Exterior kernels
////////////////////////////////////////////////////////////////////
template <class Impl>
void WilsonKernels<Impl>::GenericDhopSiteDagExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
accelerator void WilsonKernels<Impl>::GenericDhopSiteDagExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
SiteHalfSpinor *buf, int sF,
int sU, const FermionField &in, FermionField &out)
{
@ -216,7 +216,7 @@ void WilsonKernels<Impl>::GenericDhopSiteDagExt(StencilImpl &st, LebesgueOrder &
StencilEntry *SE;
int ptype;
int nmu=0;
result=zero;
result=Zero();
GENERIC_STENCIL_LEG_EXT(Xp,spProjXp,accumReconXp);
GENERIC_STENCIL_LEG_EXT(Yp,spProjYp,accumReconYp);
GENERIC_STENCIL_LEG_EXT(Zp,spProjZp,accumReconZp);
@ -231,7 +231,7 @@ void WilsonKernels<Impl>::GenericDhopSiteDagExt(StencilImpl &st, LebesgueOrder &
};
template <class Impl>
void WilsonKernels<Impl>::GenericDhopSiteExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
accelerator void WilsonKernels<Impl>::GenericDhopSiteExt(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
SiteHalfSpinor *buf, int sF,
int sU, const FermionField &in, FermionField &out)
{
@ -243,7 +243,7 @@ void WilsonKernels<Impl>::GenericDhopSiteExt(StencilImpl &st, LebesgueOrder &lo,
StencilEntry *SE;
int ptype;
int nmu=0;
result=zero;
result=Zero();
GENERIC_STENCIL_LEG_EXT(Xm,spProjXp,accumReconXp);
GENERIC_STENCIL_LEG_EXT(Ym,spProjYp,accumReconYp);
GENERIC_STENCIL_LEG_EXT(Zm,spProjZp,accumReconZp);
@ -258,8 +258,8 @@ void WilsonKernels<Impl>::GenericDhopSiteExt(StencilImpl &st, LebesgueOrder &lo,
};
template <class Impl>
void WilsonKernels<Impl>::DhopDirK( StencilImpl &st, DoubledGaugeField &U,SiteHalfSpinor *buf, int sF,
int sU, const FermionField &in, FermionField &out, int dir, int gamma) {
accelerator void WilsonKernels<Impl>::DhopDirK( StencilImpl &st, DoubledGaugeField &U,SiteHalfSpinor *buf, int sF,
int sU, const FermionField &in, FermionField &out, int dir, int gamma) {
SiteHalfSpinor tmp;
SiteHalfSpinor chi;

View File

@ -544,18 +544,18 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
Simd U_21;
#define ZERO_RESULT \
result_00=zero; \
result_01=zero; \
result_02=zero; \
result_10=zero; \
result_11=zero; \
result_12=zero; \
result_20=zero; \
result_21=zero; \
result_22=zero; \
result_30=zero; \
result_31=zero; \
result_32=zero;
result_00=Zero(); \
result_01=Zero(); \
result_02=Zero(); \
result_10=Zero(); \
result_11=Zero(); \
result_12=Zero(); \
result_20=Zero(); \
result_21=Zero(); \
result_22=Zero(); \
result_30=Zero(); \
result_31=Zero(); \
result_32=Zero();
#define Chimu_00 Chi_00
#define Chimu_01 Chi_01