1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-06-17 07:17:06 +01:00

Added overlap comms compute mode

This commit is contained in:
paboyle
2016-01-03 01:38:11 +00:00
parent 4aac345bea
commit 331768dcff
13 changed files with 704 additions and 270 deletions

View File

@ -34,10 +34,11 @@ namespace QCD {
template<class Impl>
WilsonKernels<Impl>::WilsonKernels(const ImplParams &p): Base(p) {};
// Need controls to do interior, exterior, or both
template<class Impl>
void WilsonKernels<Impl>::DiracOptDhopSiteDag(StencilImpl &st,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int sF,int sU,const FermionField &in, FermionField &out)
int sF,int sU,const FermionField &in, FermionField &out,bool local, bool nonlocal)
{
SiteHalfSpinor tmp;
SiteHalfSpinor chi;
@ -46,231 +47,425 @@ void WilsonKernels<Impl>::DiracOptDhopSiteDag(StencilImpl &st,DoubledGaugeField
StencilEntry *SE;
int ptype;
int num = 0;
result=zero;
///////////////////////////
// Xp
///////////////////////////
SE=st.GetEntry(ptype,Xp,sF);
if ( SE->_is_local && SE->_permute ) {
spProjXp(tmp,in._odata[SE->_offset]);
permute(chi,tmp,ptype);
} else if ( SE->_is_local ) {
spProjXp(chi,in._odata[SE->_offset]);
} else {
chi=buf[SE->_offset];
}
Impl::multLink(Uchi,U._odata[sU],chi,Xp,SE,st);
spReconXp(result,Uchi);
// Yp
SE=st.GetEntry(ptype,Yp,sF);
if ( SE->_is_local && SE->_permute ) {
spProjYp(tmp,in._odata[SE->_offset]);
permute(chi,tmp,ptype);
} else if ( SE->_is_local ) {
spProjYp(chi,in._odata[SE->_offset]);
} else {
chi=buf[SE->_offset];
}
Impl::multLink(Uchi,U._odata[sU],chi,Yp,SE,st);
accumReconYp(result,Uchi);
// Zp
SE=st.GetEntry(ptype,Zp,sF);
if ( SE->_is_local && SE->_permute ) {
spProjZp(tmp,in._odata[SE->_offset]);
permute(chi,tmp,ptype);
} else if ( SE->_is_local ) {
spProjZp(chi,in._odata[SE->_offset]);
} else {
chi=buf[SE->_offset];
if (local && SE->_is_local ) {
if ( SE->_permute ) {
spProjXp(tmp,in._odata[SE->_offset]);
permute(chi,tmp,ptype);
} else {
spProjXp(chi,in._odata[SE->_offset]);
}
}
Impl::multLink(Uchi,U._odata[sU],chi,Zp,SE,st);
accumReconZp(result,Uchi);
// Tp
SE=st.GetEntry(ptype,Tp,sF);
if ( SE->_is_local && SE->_permute ) {
spProjTp(tmp,in._odata[SE->_offset]);
permute(chi,tmp,ptype);
} else if ( SE->_is_local ) {
spProjTp(chi,in._odata[SE->_offset]);
} else {
if ( nonlocal && (!SE->_is_local) ) {
chi=buf[SE->_offset];
}
Impl::multLink(Uchi,U._odata[sU],chi,Tp,SE,st);
accumReconTp(result,Uchi);
// Xm
SE=st.GetEntry(ptype,Xm,sF);
if ( SE->_is_local && SE->_permute ) {
spProjXm(tmp,in._odata[SE->_offset]);
permute(chi,tmp,ptype);
} else if ( SE->_is_local ) {
spProjXm(chi,in._odata[SE->_offset]);
} else {
chi=buf[SE->_offset];
}
Impl::multLink(Uchi,U._odata[sU],chi,Xm,SE,st);
accumReconXm(result,Uchi);
// Ym
SE=st.GetEntry(ptype,Ym,sF);
if ( SE->_is_local && SE->_permute ) {
spProjYm(tmp,in._odata[SE->_offset]);
permute(chi,tmp,ptype);
} else if ( SE->_is_local ) {
spProjYm(chi,in._odata[SE->_offset]);
} else {
chi=buf[SE->_offset];
}
Impl::multLink(Uchi,U._odata[sU],chi,Ym,SE,st);
accumReconYm(result,Uchi);
if ( (local && SE->_is_local) || ( nonlocal && (!SE->_is_local)) ) {
Impl::multLink(Uchi,U._odata[sU],chi,Xp,SE,st);
accumReconXp(result,Uchi);
num++;
}
///////////////////////////
// Yp
///////////////////////////
SE=st.GetEntry(ptype,Yp,sF);
if (local && SE->_is_local ) {
if ( SE->_permute ) {
spProjYp(tmp,in._odata[SE->_offset]);
permute(chi,tmp,ptype);
} else {
spProjYp(chi,in._odata[SE->_offset]);
}
}
if ( nonlocal && (!SE->_is_local) ) {
chi=buf[SE->_offset];
}
if ( (local && SE->_is_local) || ( nonlocal && (!SE->_is_local)) ) {
Impl::multLink(Uchi,U._odata[sU],chi,Yp,SE,st);
accumReconYp(result,Uchi);
num++;
}
///////////////////////////
// Zp
///////////////////////////
SE=st.GetEntry(ptype,Zp,sF);
if (local && SE->_is_local ) {
if ( SE->_permute ) {
spProjZp(tmp,in._odata[SE->_offset]);
permute(chi,tmp,ptype);
} else {
spProjZp(chi,in._odata[SE->_offset]);
}
}
if ( nonlocal && (!SE->_is_local) ) {
chi=buf[SE->_offset];
}
if ( (local && SE->_is_local) || ( nonlocal && (!SE->_is_local)) ) {
Impl::multLink(Uchi,U._odata[sU],chi,Zp,SE,st);
accumReconZp(result,Uchi);
num++;
}
///////////////////////////
// Tp
///////////////////////////
SE=st.GetEntry(ptype,Tp,sF);
if (local && SE->_is_local ) {
if ( SE->_permute ) {
spProjTp(tmp,in._odata[SE->_offset]);
permute(chi,tmp,ptype);
} else {
spProjTp(chi,in._odata[SE->_offset]);
}
}
if ( nonlocal && (!SE->_is_local) ) {
chi=buf[SE->_offset];
}
if ( (local && SE->_is_local) || ( nonlocal && (!SE->_is_local)) ) {
Impl::multLink(Uchi,U._odata[sU],chi,Tp,SE,st);
accumReconTp(result,Uchi);
num++;
}
///////////////////////////
// Xm
///////////////////////////
SE=st.GetEntry(ptype,Xm,sF);
if (local && SE->_is_local ) {
if ( SE->_permute ) {
spProjXm(tmp,in._odata[SE->_offset]);
permute(chi,tmp,ptype);
} else {
spProjXm(chi,in._odata[SE->_offset]);
}
}
if ( nonlocal && (!SE->_is_local) ) {
chi=buf[SE->_offset];
}
if( (local && SE->_is_local) || ( nonlocal && (!SE->_is_local)) ) {
Impl::multLink(Uchi,U._odata[sU],chi,Xm,SE,st);
accumReconXm(result,Uchi);
num++;
}
///////////////////////////
// Ym
///////////////////////////
SE=st.GetEntry(ptype,Ym,sF);
if (local && SE->_is_local ) {
if ( SE->_permute ) {
spProjYm(tmp,in._odata[SE->_offset]);
permute(chi,tmp,ptype);
} else {
spProjYm(chi,in._odata[SE->_offset]);
}
}
if ( nonlocal && (!SE->_is_local) ) {
chi=buf[SE->_offset];
}
if( (local && SE->_is_local) || ( nonlocal && (!SE->_is_local)) ) {
Impl::multLink(Uchi,U._odata[sU],chi,Ym,SE,st);
accumReconYm(result,Uchi);
num++;
}
///////////////////////////
// Zm
///////////////////////////
SE=st.GetEntry(ptype,Zm,sF);
if ( SE->_is_local && SE->_permute ) {
spProjZm(tmp,in._odata[SE->_offset]);
permute(chi,tmp,ptype);
} else if ( SE->_is_local ) {
spProjZm(chi,in._odata[SE->_offset]);
} else {
if (local && SE->_is_local ) {
if ( SE->_permute ) {
spProjZm(tmp,in._odata[SE->_offset]);
permute(chi,tmp,ptype);
} else {
spProjZm(chi,in._odata[SE->_offset]);
}
}
if ( nonlocal && (!SE->_is_local) ) {
chi=buf[SE->_offset];
}
Impl::multLink(Uchi,U._odata[sU],chi,Zm,SE,st);
accumReconZm(result,Uchi);
if( (local && SE->_is_local) || ( nonlocal && (!SE->_is_local)) ) {
Impl::multLink(Uchi,U._odata[sU],chi,Zm,SE,st);
accumReconZm(result,Uchi);
num++;
}
///////////////////////////
// Tm
///////////////////////////
SE=st.GetEntry(ptype,Tm,sF);
if ( SE->_is_local && SE->_permute ) {
spProjTm(tmp,in._odata[SE->_offset]);
permute(chi,tmp,ptype);
} else if ( SE->_is_local ) {
spProjTm(chi,in._odata[SE->_offset]);
} else {
if (local && SE->_is_local ) {
if ( SE->_permute ) {
spProjTm(tmp,in._odata[SE->_offset]);
permute(chi,tmp,ptype);
} else {
spProjTm(chi,in._odata[SE->_offset]);
}
}
if ( nonlocal && (!SE->_is_local) ) {
chi=buf[SE->_offset];
}
Impl::multLink(Uchi,U._odata[sU],chi,Tm,SE,st);
accumReconTm(result,Uchi);
vstream(out._odata[sF],result*(-0.5));
if( (local && SE->_is_local) || ( nonlocal && (!SE->_is_local)) ) {
Impl::multLink(Uchi,U._odata[sU],chi,Tm,SE,st);
accumReconTm(result,Uchi);
num++;
}
if ( local ) {
vstream(out._odata[sF],result*(-0.5));
} else if ( num ) {
vstream(out._odata[sF],out._odata[sF]+result*(-0.5));
}
};
// Need controls to do interior, exterior, or both
template<class Impl>
void WilsonKernels<Impl>::DiracOptDhopSite(StencilImpl &st,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int sF,int sU,const FermionField &in, FermionField &out)
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int sF,int sU,const FermionField &in, FermionField &out,bool local, bool nonlocal)
{
SiteHalfSpinor tmp;
SiteHalfSpinor chi;
SiteSpinor result;
SiteHalfSpinor Uchi;
SiteSpinor result;
StencilEntry *SE;
int ptype;
int num = 0;
result=zero;
///////////////////////////
// Xp
///////////////////////////
SE=st.GetEntry(ptype,Xm,sF);
if ( SE->_is_local && SE->_permute ) {
spProjXp(tmp,in._odata[SE->_offset]);
permute(chi,tmp,ptype);
} else if ( SE->_is_local ) {
spProjXp(chi,in._odata[SE->_offset]);
} else {
chi=buf[SE->_offset];
}
Impl::multLink(Uchi,U._odata[sU],chi,Xm,SE,st);
spReconXp(result,Uchi);
// Yp
SE=st.GetEntry(ptype,Ym,sF);
if ( SE->_is_local && SE->_permute ) {
spProjYp(tmp,in._odata[SE->_offset]);
permute(chi,tmp,ptype);
} else if ( SE->_is_local ) {
spProjYp(chi,in._odata[SE->_offset]);
} else {
chi=buf[SE->_offset];
if (local && SE->_is_local ) {
if ( SE->_permute ) {
spProjXp(tmp,in._odata[SE->_offset]);
permute(chi,tmp,ptype);
} else {
spProjXp(chi,in._odata[SE->_offset]);
}
}
Impl::multLink(Uchi,U._odata[sU],chi,Ym,SE,st);
accumReconYp(result,Uchi);
// Zp
SE=st.GetEntry(ptype,Zm,sF);
if ( SE->_is_local && SE->_permute ) {
spProjZp(tmp,in._odata[SE->_offset]);
permute(chi,tmp,ptype);
} else if ( SE->_is_local ) {
spProjZp(chi,in._odata[SE->_offset]);
} else {
chi=buf[SE->_offset];
}
Impl::multLink(Uchi,U._odata[sU],chi,Zm,SE,st);
accumReconZp(result,Uchi);
// Tp
SE=st.GetEntry(ptype,Tm,sF);
if ( SE->_is_local && SE->_permute ) {
spProjTp(tmp,in._odata[SE->_offset]);
permute(chi,tmp,ptype);
} else if ( SE->_is_local ) {
spProjTp(chi,in._odata[SE->_offset]);
} else {
chi=buf[SE->_offset];
}
Impl::multLink(Uchi,U._odata[sU],chi,Tm,SE,st);
accumReconTp(result,Uchi);
// Xm
SE=st.GetEntry(ptype,Xp,sF);
if ( SE->_is_local && SE->_permute ) {
spProjXm(tmp,in._odata[SE->_offset]);
permute(chi,tmp,ptype);
} else if ( SE->_is_local ) {
spProjXm(chi,in._odata[SE->_offset]);
} else {
chi=buf[SE->_offset];
}
Impl::multLink(Uchi,U._odata[sU],chi,Xp,SE,st);
accumReconXm(result,Uchi);
// Ym
SE=st.GetEntry(ptype,Yp,sF);
if ( SE->_is_local && SE->_permute ) {
spProjYm(tmp,in._odata[SE->_offset]);
permute(chi,tmp,ptype);
} else if ( SE->_is_local ) {
spProjYm(chi,in._odata[SE->_offset]);
} else {
if ( nonlocal && (!SE->_is_local) ) {
chi=buf[SE->_offset];
}
Impl::multLink(Uchi,U._odata[sU],chi,Yp,SE,st);
accumReconYm(result,Uchi);
// Zm
SE=st.GetEntry(ptype,Zp,sF);
if ( SE->_is_local && SE->_permute ) {
spProjZm(tmp,in._odata[SE->_offset]);
permute(chi,tmp,ptype);
} else if ( SE->_is_local ) {
spProjZm(chi,in._odata[SE->_offset]);
} else {
chi=buf[SE->_offset];
if ( (local && SE->_is_local) || ( nonlocal && (!SE->_is_local)) ) {
Impl::multLink(Uchi,U._odata[sU],chi,Xm,SE,st);
accumReconXp(result,Uchi);
num++;
}
Impl::multLink(Uchi,U._odata[sU],chi,Zp,SE,st);
accumReconZm(result,Uchi);
// Tm
SE=st.GetEntry(ptype,Tp,sF);
if ( SE->_is_local && SE->_permute ) {
spProjTm(tmp,in._odata[SE->_offset]);
permute(chi,tmp,ptype);
} else if ( SE->_is_local ) {
spProjTm(chi,in._odata[SE->_offset]);
} else {
///////////////////////////
// Yp
///////////////////////////
SE=st.GetEntry(ptype,Ym,sF);
if (local && SE->_is_local ) {
if ( SE->_permute ) {
spProjYp(tmp,in._odata[SE->_offset]);
permute(chi,tmp,ptype);
} else {
spProjYp(chi,in._odata[SE->_offset]);
}
}
if ( nonlocal && (!SE->_is_local) ) {
chi=buf[SE->_offset];
}
Impl::multLink(Uchi,U._odata[sU],chi,Tp,SE,st);
accumReconTm(result,Uchi);
if ( (local && SE->_is_local) || ( nonlocal && (!SE->_is_local)) ) {
Impl::multLink(Uchi,U._odata[sU],chi,Ym,SE,st);
accumReconYp(result,Uchi);
num++;
}
///////////////////////////
// Zp
///////////////////////////
SE=st.GetEntry(ptype,Zm,sF);
if (local && SE->_is_local ) {
if ( SE->_permute ) {
spProjZp(tmp,in._odata[SE->_offset]);
permute(chi,tmp,ptype);
} else {
spProjZp(chi,in._odata[SE->_offset]);
}
}
if ( nonlocal && (!SE->_is_local) ) {
chi=buf[SE->_offset];
}
if ( (local && SE->_is_local) || ( nonlocal && (!SE->_is_local)) ) {
Impl::multLink(Uchi,U._odata[sU],chi,Zm,SE,st);
accumReconZp(result,Uchi);
num++;
}
///////////////////////////
// Tp
///////////////////////////
SE=st.GetEntry(ptype,Tm,sF);
if (local && SE->_is_local ) {
if ( SE->_permute ) {
spProjTp(tmp,in._odata[SE->_offset]);
permute(chi,tmp,ptype);
} else {
spProjTp(chi,in._odata[SE->_offset]);
}
}
if ( nonlocal && (!SE->_is_local) ) {
chi=buf[SE->_offset];
}
if ( (local && SE->_is_local) || ( nonlocal && (!SE->_is_local)) ) {
Impl::multLink(Uchi,U._odata[sU],chi,Tm,SE,st);
accumReconTp(result,Uchi);
num++;
}
///////////////////////////
// Xm
///////////////////////////
SE=st.GetEntry(ptype,Xp,sF);
if (local && SE->_is_local ) {
if ( SE->_permute ) {
spProjXm(tmp,in._odata[SE->_offset]);
permute(chi,tmp,ptype);
} else {
spProjXm(chi,in._odata[SE->_offset]);
}
}
if ( nonlocal && (!SE->_is_local) ) {
chi=buf[SE->_offset];
}
if( (local && SE->_is_local) || ( nonlocal && (!SE->_is_local)) ) {
Impl::multLink(Uchi,U._odata[sU],chi,Xp,SE,st);
accumReconXm(result,Uchi);
num++;
}
///////////////////////////
// Ym
///////////////////////////
SE=st.GetEntry(ptype,Yp,sF);
if (local && SE->_is_local ) {
if ( SE->_permute ) {
spProjYm(tmp,in._odata[SE->_offset]);
permute(chi,tmp,ptype);
} else {
spProjYm(chi,in._odata[SE->_offset]);
}
}
if ( nonlocal && (!SE->_is_local) ) {
chi=buf[SE->_offset];
}
if( (local && SE->_is_local) || ( nonlocal && (!SE->_is_local)) ) {
Impl::multLink(Uchi,U._odata[sU],chi,Yp,SE,st);
accumReconYm(result,Uchi);
num++;
}
vstream(out._odata[sF],result*(-0.5));
}
///////////////////////////
// Zm
///////////////////////////
SE=st.GetEntry(ptype,Zp,sF);
if (local && SE->_is_local ) {
if ( SE->_permute ) {
spProjZm(tmp,in._odata[SE->_offset]);
permute(chi,tmp,ptype);
} else {
spProjZm(chi,in._odata[SE->_offset]);
}
}
if ( nonlocal && (!SE->_is_local) ) {
chi=buf[SE->_offset];
}
if( (local && SE->_is_local) || ( nonlocal && (!SE->_is_local)) ) {
Impl::multLink(Uchi,U._odata[sU],chi,Zp,SE,st);
accumReconZm(result,Uchi);
num++;
}
///////////////////////////
// Tm
///////////////////////////
SE=st.GetEntry(ptype,Tp,sF);
if (local && SE->_is_local ) {
if ( SE->_permute ) {
spProjTm(tmp,in._odata[SE->_offset]);
permute(chi,tmp,ptype);
} else {
spProjTm(chi,in._odata[SE->_offset]);
}
}
if ( nonlocal && (!SE->_is_local) ) {
chi=buf[SE->_offset];
}
if( (local && SE->_is_local) || ( nonlocal && (!SE->_is_local)) ) {
Impl::multLink(Uchi,U._odata[sU],chi,Tp,SE,st);
accumReconTm(result,Uchi);
num++;
}
if ( local ) {
vstream(out._odata[sF],result*(-0.5));
} else if ( num ) {
vstream(out._odata[sF],out._odata[sF]+result*(-0.5));
}
};
template<class Impl>
void WilsonKernels<Impl>::DiracOptDhopDir(StencilImpl &st,DoubledGaugeField &U,
@ -405,7 +600,7 @@ void WilsonKernels<Impl>::DiracOptDhopDir(StencilImpl &st,DoubledGaugeField &U,
template<class Impl>
void WilsonKernels<Impl>::DiracOptAsmDhopSite(StencilImpl &st,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int sF,int sU,const FermionField &in, FermionField &out,uint64_t *p)
int sF,int sU,const FermionField &in, FermionField &out,bool local, bool nonlocal)
{
DiracOptDhopSite(st,U,buf,sF,sU,in,out); // will template override for Wilson Nc=3
}