1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-06-13 20:57:06 +01:00

Gparity valence test now working.

Interface in FermionOperator will change a lot in future
This commit is contained in:
Peter Boyle
2015-08-14 00:01:04 +01:00
parent 2c216a42f9
commit 028e2061e0
14 changed files with 572 additions and 371 deletions

View File

@ -41,6 +41,12 @@
namespace Grid {
struct StencilEntry {
int _offset;
int _is_local;
int _permute;
int _around_the_world;
};
class CartesianStencil { // Stencil runs along coordinate axes only; NO diagonal fill in.
public:
@ -58,9 +64,9 @@ namespace Grid {
std::vector<int> _permute_type;
// npoints x Osites() of these
std::vector<std::vector<int> > _offsets;
std::vector<std::vector<int> > _is_local;
std::vector<std::vector<int> > _permute;
std::vector<std::vector<StencilEntry> > _entries;
inline StencilEntry * GetEntry(int &ptype,int point,int osite) { ptype = _permute_type[point]; return & _entries[point][osite]; }
int _unified_buffer_size;
int _request_count;
@ -77,8 +83,8 @@ namespace Grid {
// Can this be avoided with simpler coding of comms?
void Local (int point, int dimension,int shift,int cbmask);
void Comms (int point, int dimension,int shift,int cbmask);
void CopyPlane(int point, int dimension,int lplane,int rplane,int cbmask,int permute);
void ScatterPlane (int point,int dimension,int plane,int cbmask,int offset);
void CopyPlane(int point, int dimension,int lplane,int rplane,int cbmask,int permute,int wrap);
void ScatterPlane (int point,int dimension,int plane,int cbmask,int offset,int wrap);
// Could allow a functional munging of the halo to another type during the comms.
// this could implement the 16bit/32bit/64bit compression.

View File

@ -193,20 +193,18 @@ PARALLEL_FOR_LOOP
for(int ss=0;ss<Grid()->oSites();ss++){
siteVector res = zero;
siteVector nbr;
int offset,local,perm,ptype;
int ptype;
StencilEntry *SE;
for(int point=0;point<geom.npoint;point++){
offset = Stencil._offsets [point][ss];
local = Stencil._is_local[point][ss];
perm = Stencil._permute [point][ss];
ptype = Stencil._permute_type[point];
SE=Stencil.GetEntry(ptype,point,ss);
if(local&&perm) {
permute(nbr,in._odata[offset],ptype);
} else if(local) {
nbr = in._odata[offset];
if(SE->_is_local&&SE->_permute) {
permute(nbr,in._odata[SE->_offset],ptype);
} else if(SE->_is_local) {
nbr = in._odata[SE->_offset];
} else {
nbr = comm_buf[offset];
nbr = comm_buf[SE->_offset];
}
res = res + A[point]._odata[ss]*nbr;
}

View File

@ -14,6 +14,7 @@ namespace Grid {
auto PeekIndex(const Lattice<vobj> &lhs,int i) -> Lattice<decltype(peekIndex<Index>(lhs._odata[0],i))>
{
Lattice<decltype(peekIndex<Index>(lhs._odata[0],i))> ret(lhs._grid);
ret.checkerboard=lhs.checkerboard;
PARALLEL_FOR_LOOP
for(int ss=0;ss<lhs._grid->oSites();ss++){
ret._odata[ss] = peekIndex<Index>(lhs._odata[ss],i);
@ -24,6 +25,7 @@ PARALLEL_FOR_LOOP
auto PeekIndex(const Lattice<vobj> &lhs,int i,int j) -> Lattice<decltype(peekIndex<Index>(lhs._odata[0],i,j))>
{
Lattice<decltype(peekIndex<Index>(lhs._odata[0],i,j))> ret(lhs._grid);
ret.checkerboard=lhs.checkerboard;
PARALLEL_FOR_LOOP
for(int ss=0;ss<lhs._grid->oSites();ss++){
ret._odata[ss] = peekIndex<Index>(lhs._odata[ss],i,j);

View File

@ -36,7 +36,7 @@ namespace QCD {
template<typename T> struct isSpinor {
static const bool value = (SpinorIndex==T::TensorLevel);
};
template <typename T> using IfSpinor = Invoke<std::enable_if<isSpinor<T>::value,int> > ;
template <typename T> using IfSpinor = Invoke<std::enable_if< isSpinor<T>::value,int> > ;
template <typename T> using IfNotSpinor = Invoke<std::enable_if<!isSpinor<T>::value,int> > ;
// ChrisK very keen to add extra space for Gparity doubling.

View File

@ -46,6 +46,8 @@
////////////////////////////////////////////////////////////////////////////////////////////////////
#define FermOpTemplateInstantiate(A) \
template class A<GparityWilsonImplF>; \
template class A<GparityWilsonImplD>; \
template class A<WilsonImplF>; \
template class A<WilsonImplD>;
@ -131,6 +133,15 @@ typedef OverlapWilsonPartialFractionTanhFermion<WilsonImplD> OverlapWilsonPartia
typedef OverlapWilsonPartialFractionZolotarevFermion<WilsonImplR> OverlapWilsonPartialFractionZolotarevFermionR;
typedef OverlapWilsonPartialFractionZolotarevFermion<WilsonImplF> OverlapWilsonPartialFractionZolotarevFermionF;
typedef OverlapWilsonPartialFractionZolotarevFermion<WilsonImplD> OverlapWilsonPartialFractionZolotarevFermionD;
// Gparity cases; partial list until tested
typedef WilsonFermion<GparityWilsonImplR> GparityWilsonFermionR;
typedef WilsonFermion<GparityWilsonImplF> GparityWilsonFermionF;
typedef WilsonFermion<GparityWilsonImplD> GparityWilsonFermionD;
typedef DomainWallFermion<GparityWilsonImplR> GparityDomainWallFermionR;
typedef DomainWallFermion<GparityWilsonImplF> GparityDomainWallFermionF;
typedef DomainWallFermion<GparityWilsonImplD> GparityDomainWallFermionD;
}}
///////////////////////////////////////////////////////////////////////////////
// G5 herm -- this has to live in QCD since dirac matrix is not in the broader sector of code

View File

@ -37,12 +37,10 @@ namespace Grid {
typedef WilsonCompressor<SiteHalfSpinor,SiteSpinor> Compressor;
// provide the multiply by link that is differentiated between Gparity (with flavour index) and
// non-Gparity
static inline void multLink(SiteHalfSpinor &phi,const SiteDoubledGaugeField &U,const SiteHalfSpinor &chi,int mu){
// provide the multiply by link that is differentiated between Gparity (with flavour index) and non-Gparity
static inline void multLink(SiteHalfSpinor &phi,const SiteDoubledGaugeField &U,const SiteHalfSpinor &chi,int mu,StencilEntry *SE){
mult(&phi(),&U(mu),&chi());
}
static inline void DoubleStore(GridBase *GaugeGrid,DoubledGaugeField &Uds,const GaugeField &Umu)
{
conformable(Uds._grid,GaugeGrid);
@ -55,6 +53,11 @@ namespace Grid {
PokeIndex<LorentzIndex>(Uds,U,mu+4);
}
}
static inline void InsertForce(GaugeField &mat,const FermionField &Btilde,const FermionField &A,int mu){
GaugeLinkField link(mat._grid);
link = TraceIndex<SpinIndex>(outerProduct(Btilde,A));
PokeIndex<LorentzIndex>(mat,link,mu);
}
};
@ -62,8 +65,6 @@ namespace Grid {
typedef WilsonImpl<vComplexF,Nc> WilsonImplF; // Float
typedef WilsonImpl<vComplexD,Nc> WilsonImplD; // Double
template<class S,int Nrepresentation=Nc>
class GparityWilsonImpl {
public:
@ -74,7 +75,7 @@ namespace Grid {
template<typename vtype> using iImplHalfSpinor = iVector<iVector<iVector<vtype, Nrepresentation>, Nhs>, Ngp >;
template<typename vtype> using iImplGaugeField = iVector<iScalar<iMatrix<vtype, Nrepresentation> >, Nd >;
template<typename vtype> using iImplGaugeLink = iScalar<iScalar<iScalar<iMatrix<vtype, Nrepresentation> > > >;
template<typename vtype> using iImplGaugeLink = iScalar<iScalar<iMatrix<vtype, Nrepresentation> > >;
template<typename vtype> using iImplDoubledGaugeField = iVector<iVector<iScalar<iMatrix<vtype, Nrepresentation> >, Nds >, Ngp >;
typedef iImplSpinor <Simd> SiteSpinor;
@ -88,16 +89,26 @@ namespace Grid {
typedef Lattice<SiteGaugeField> GaugeField;
typedef Lattice<SiteDoubledGaugeField> DoubledGaugeField;
typedef GparityWilsonCompressor<SiteHalfSpinor,SiteSpinor> Compressor;
// typedef GparityWilsonCompressor<SiteHalfSpinor,SiteSpinor> Compressor;
typedef WilsonCompressor<SiteHalfSpinor,SiteSpinor> Compressor;
// provide the multiply by link that is differentiated between Gparity (with flavour index) and
// non-Gparity
static inline void multLink(SiteHalfSpinor &phi,const SiteDoubledGaugeField &U,const SiteHalfSpinor &chi,int mu){
for(int f=0;f<Ngp;f++){
mult(&phi(f),&U(f)(mu),&chi(f));
}
static inline void multLink(SiteHalfSpinor &phi,const SiteDoubledGaugeField &U,const SiteHalfSpinor &chi,int mu,StencilEntry *SE){
// FIXME; need to be more careful. If this is a simd direction we are still stuffed
if ( SE->_around_the_world && ((mu==Xp)||(mu==Xm)) ) {
mult(&phi(0),&U(0)(mu),&chi(1));
mult(&phi(1),&U(1)(mu),&chi(0));
} else {
mult(&phi(0),&U(0)(mu),&chi(0));
mult(&phi(1),&U(1)(mu),&chi(1));
}
}
static inline void InsertForce(GaugeField &mat,const FermionField &Btilde,const FermionField &A,int mu){
// Fixme
return;
}
static inline void DoubleStore(GridBase *GaugeGrid,DoubledGaugeField &Uds,const GaugeField &Umu)
{
conformable(Uds._grid,GaugeGrid);
@ -116,44 +127,51 @@ namespace Grid {
LatticeCoordinate(coor,mu);
U = PeekIndex<LorentzIndex>(Umu,mu);
Uconj = conj(U);
Uconj = conjugate(U);
int neglink = GaugeGrid->GlobalDimensions()[mu]-1;
if ( gpdirs[mu] ) {
Uconj = where(coor==neglink,-Uconj,Uconj);
}
PARALLEL_FOR_LOOP
for(auto ss=U.begin();ss<U.end();ss++){
Uds[ss](0)(mu) = U[ss]();
Uds[ss](1)(mu) = Uconj[ss]();
}
U = adj(Cshift(U,mu,-1)); // correct except for spanning the boundary
U = adj(Cshift(U ,mu,-1)); // correct except for spanning the boundary
Uconj = adj(Cshift(Uconj,mu,-1));
Utmp = where(coor==0,U,Uconj);
PARALLEL_FOR_LOOP
for(auto ss=U.begin();ss<U.end();ss++){
Uds[ss](1)(mu) = Utmp[ss]();
Utmp = U;
if ( gpdirs[mu] ) {
Utmp = where(coor==0,Uconj,Utmp);
}
Utmp = where(coor==0,Uconj,U);
PARALLEL_FOR_LOOP
for(auto ss=U.begin();ss<U.end();ss++){
Uds[ss](0)(mu) = Utmp[ss]();
Uds[ss](0)(mu+4) = Utmp[ss]();
}
Utmp = Uconj;
if ( gpdirs[mu] ) {
Utmp = where(coor==0,U,Utmp);
}
PARALLEL_FOR_LOOP
for(auto ss=U.begin();ss<U.end();ss++){
Uds[ss](1)(mu+4) = Utmp[ss]();
}
}
}
};
typedef WilsonImpl<vComplex,Nc> WilsonImplR; // Real.. whichever prec
typedef WilsonImpl<vComplexF,Nc> WilsonImplF; // Float
typedef WilsonImpl<vComplexD,Nc> WilsonImplD; // Double
typedef GparityWilsonImpl<vComplex,Nc> GparityWilsonImplR; // Real.. whichever prec
typedef GparityWilsonImpl<vComplexF,Nc> GparityWilsonImplF; // Float
typedef GparityWilsonImpl<vComplexD,Nc> GparityWilsonImplD; // Double

View File

@ -63,6 +63,7 @@ namespace QCD {
}
};
/*
template<class SiteHalfSpinor,class SiteSpinor>
class GparityWilsonCompressor : public WilsonCompressor<SiteHalfSpinor,SiteSpinor>{
@ -77,55 +78,65 @@ namespace QCD {
const int Nsimd = grid->Nsimd();
int ocb=grid->CheckerBoardFromOindex(osite);
int checkered=grid->CheckerBoarded(dim);
int ocb=grid->CheckerBoardFromOindex(osite);
SiteHalfSpinor tmp = spinproject(in); // spin projected
SiteHalfSpinor tmp = this->spinproject(in); // spin projected
//////////////////////////////////////////////////////////////
// Check whether we must flavour flip
// do this if source is plane 0 on processor 0 in dimension dim
//////////////////////////////////////////////////////////////
if ( (this->mu==Xp)||(this->mu==Yp)||(this->mu==Zp)||(this->mu==Tp) ) {
int do_flip = 0;
int flipicoor= 0;
if(Gbcs[this->mu]){
if ( Gbcs[this->mu] && (grid->_processor_coor[dim] == 0) && (plane==0) && (ocb==0) ) {
std::cout << "Applying Gparity BC's in direction "<<this->mu<<std::endl;
std::vector<scalar_object> flat(Nsimd);
extract(tmp,flat);
for(int i=0;i<Nsimd;i++) {
std::vector<int> coor;
grid->iCoorFromIindex(coor,i);
if ( coor[dim]==0 ) {
scalar_object stmp;
stmp(0) = flat[i](1);
stmp(1) = flat[i](0);
flat[i] = stmp;
}
if ( (this->mu==Xp)||(this->mu==Yp)||(this->mu==Zp)||(this->mu==Tp) ) {
if ( (grid->_processor_coor[dim] == 0)
&& (plane==0)
&& ( (!checkered)||(ocb==0) ) ) {
do_flip=1;
flipicoor=0;
}
}
if ( (this->mu==Xm)||(this->mu==Ym)||(this->mu==Zm)||(this->mu==Tm) ) {
if ( (grid->_processor_coor[dim] == (grid->_processors[dim]-1) )
&& (plane==grid->_rdimensions[dim]-1)
&& ( (!checkered)||(ocb==1) ) ) {
do_flip=1;
flipicoor=grid->_simd_layout[dim]-1;
}
}
}
if ( do_flip ) {
merge(tmp,flat);
std::cout << "Applying Gparity BC's in direction "<<this->mu<< " osite " << osite << " plane "<<plane <<std::endl;
} // coor & bc guard
} // shift guard
std::vector<scalar_object> flat(Nsimd);
std::vector<int> coor;
extract(tmp,flat);
for(int i=0;i<Nsimd;i++) {
grid->iCoorFromIindex(coor,i);
if ( coor[dim]==flipicoor ) {
scalar_object stmp;
stmp(0) = flat[i](1);
stmp(1) = flat[i](0);
flat[i] = stmp;
}
}
merge(tmp,flat);
}
return tmp;
}
SiteHalfSpinor flavourflip(const SiteHalfSpinor &in) {
SiteHalfSpinor ret;
for(int f=0;f<Ngp;f++){
ret(0) = in(1);
ret(1) = in(0);
}
return ret;
}
};
*/
}} // namespace close
#endif

View File

@ -117,7 +117,6 @@ namespace QCD {
Compressor compressor(dag);
GaugeLinkField tmp(B._grid);
FermionField Btilde(B._grid);
st.HaloExchange<SiteSpinor,SiteHalfSpinor,Compressor>(B,comm_buf,compressor);
@ -141,9 +140,8 @@ PARALLEL_FOR_LOOP
//////////////////////////////////////////////////
// spin trace outer product
//////////////////////////////////////////////////
tmp = TraceIndex<SpinIndex>(outerProduct(Btilde,A));
PokeIndex<LorentzIndex>(mat,tmp,mu);
Impl::InsertForce(mat,Btilde,A,mu);
}
}

View File

@ -137,7 +137,6 @@ void WilsonFermion5D<Impl>::DerivInternal(CartesianStencil & st,
////////////////////////
// Call the single hop
////////////////////////
tmp = zero;
PARALLEL_FOR_LOOP
for(int sss=0;sss<U._grid->oSites();sss++){
@ -154,14 +153,18 @@ PARALLEL_FOR_LOOP
// spin trace outer product
////////////////////////////
tmp[sU] = tmp[sU]+ traceIndex<SpinIndex>(
outerProduct(Btilde[sF],Atilde[sF])); // ordering here
}
}
Impl::InsertForce(mat,Btilde,A,mu);
/*
tmp = zero;
for(int sss=0;sss<U._grid->oSites();sss++){
tmp[sU] = tmp[sU]+ traceIndex<SpinIndex>(outerProduct(Btilde[sF],Atilde[sF])); // ordering here
}
PokeIndex<LorentzIndex>(mat,tmp,mu);
*/
}
}

View File

@ -9,143 +9,116 @@ void WilsonKernels<Impl>::DiracOptDhopSite(CartesianStencil &st,DoubledGaugeFiel
{
SiteHalfSpinor tmp;
SiteHalfSpinor chi;
SiteSpinor result;
SiteHalfSpinor Uchi;
int offset,local,perm, ptype;
SiteSpinor result;
StencilEntry *SE;
int ptype;
// Xp
int ss = sF;
offset = st._offsets [Xp][ss];
local = st._is_local[Xp][ss];
perm = st._permute[Xp][ss];
ptype = st._permute_type[Xp];
if ( local && perm ) {
spProjXp(tmp,in._odata[offset]);
SE=st.GetEntry(ptype,Xp,sF);
if ( SE->_is_local && SE->_permute ) {
spProjXp(tmp,in._odata[SE->_offset]);
permute(chi,tmp,ptype);
} else if ( local ) {
spProjXp(chi,in._odata[offset]);
} else if ( SE->_is_local ) {
spProjXp(chi,in._odata[SE->_offset]);
} else {
chi=buf[offset];
chi=buf[SE->_offset];
}
Impl::multLink(Uchi,U._odata[sU],chi,Xp);
Impl::multLink(Uchi,U._odata[sU],chi,Xp,SE);
spReconXp(result,Uchi);
// Yp
offset = st._offsets [Yp][ss];
local = st._is_local[Yp][ss];
perm = st._permute[Yp][ss];
ptype = st._permute_type[Yp];
if ( local && perm ) {
spProjYp(tmp,in._odata[offset]);
SE=st.GetEntry(ptype,Yp,sF);
if ( SE->_is_local && SE->_permute ) {
spProjYp(tmp,in._odata[SE->_offset]);
permute(chi,tmp,ptype);
} else if ( local ) {
spProjYp(chi,in._odata[offset]);
} else if ( SE->_is_local ) {
spProjYp(chi,in._odata[SE->_offset]);
} else {
chi=buf[offset];
chi=buf[SE->_offset];
}
Impl::multLink(Uchi,U._odata[sU],chi,Yp);
Impl::multLink(Uchi,U._odata[sU],chi,Yp,SE);
accumReconYp(result,Uchi);
// Zp
offset = st._offsets [Zp][ss];
local = st._is_local[Zp][ss];
perm = st._permute[Zp][ss];
ptype = st._permute_type[Zp];
if ( local && perm ) {
spProjZp(tmp,in._odata[offset]);
SE=st.GetEntry(ptype,Zp,sF);
if ( SE->_is_local && SE->_permute ) {
spProjZp(tmp,in._odata[SE->_offset]);
permute(chi,tmp,ptype);
} else if ( local ) {
spProjZp(chi,in._odata[offset]);
} else if ( SE->_is_local ) {
spProjZp(chi,in._odata[SE->_offset]);
} else {
chi=buf[offset];
chi=buf[SE->_offset];
}
Impl::multLink(Uchi,U._odata[sU],chi,Zp);
Impl::multLink(Uchi,U._odata[sU],chi,Zp,SE);
accumReconZp(result,Uchi);
// Tp
offset = st._offsets [Tp][ss];
local = st._is_local[Tp][ss];
perm = st._permute[Tp][ss];
ptype = st._permute_type[Tp];
if ( local && perm ) {
spProjTp(tmp,in._odata[offset]);
SE=st.GetEntry(ptype,Tp,sF);
if ( SE->_is_local && SE->_permute ) {
spProjTp(tmp,in._odata[SE->_offset]);
permute(chi,tmp,ptype);
} else if ( local ) {
spProjTp(chi,in._odata[offset]);
} else if ( SE->_is_local ) {
spProjTp(chi,in._odata[SE->_offset]);
} else {
chi=buf[offset];
chi=buf[SE->_offset];
}
Impl::multLink(Uchi,U._odata[sU],chi,Tp);
Impl::multLink(Uchi,U._odata[sU],chi,Tp,SE);
accumReconTp(result,Uchi);
// Xm
offset = st._offsets [Xm][ss];
local = st._is_local[Xm][ss];
perm = st._permute[Xm][ss];
ptype = st._permute_type[Xm];
if ( local && perm ) {
spProjXm(tmp,in._odata[offset]);
SE=st.GetEntry(ptype,Xm,sF);
if ( SE->_is_local && SE->_permute ) {
spProjXm(tmp,in._odata[SE->_offset]);
permute(chi,tmp,ptype);
} else if ( local ) {
spProjXm(chi,in._odata[offset]);
} else if ( SE->_is_local ) {
spProjXm(chi,in._odata[SE->_offset]);
} else {
chi=buf[offset];
chi=buf[SE->_offset];
}
Impl::multLink(Uchi,U._odata[sU],chi,Xm);
Impl::multLink(Uchi,U._odata[sU],chi,Xm,SE);
accumReconXm(result,Uchi);
// Ym
offset = st._offsets [Ym][ss];
local = st._is_local[Ym][ss];
perm = st._permute[Ym][ss];
ptype = st._permute_type[Ym];
if ( local && perm ) {
spProjYm(tmp,in._odata[offset]);
SE=st.GetEntry(ptype,Ym,sF);
if ( SE->_is_local && SE->_permute ) {
spProjYm(tmp,in._odata[SE->_offset]);
permute(chi,tmp,ptype);
} else if ( local ) {
spProjYm(chi,in._odata[offset]);
} else if ( SE->_is_local ) {
spProjYm(chi,in._odata[SE->_offset]);
} else {
chi=buf[offset];
chi=buf[SE->_offset];
}
Impl::multLink(Uchi,U._odata[sU],chi,Ym);
Impl::multLink(Uchi,U._odata[sU],chi,Ym,SE);
accumReconYm(result,Uchi);
// Zm
offset = st._offsets [Zm][ss];
local = st._is_local[Zm][ss];
perm = st._permute[Zm][ss];
ptype = st._permute_type[Zm];
if ( local && perm ) {
spProjZm(tmp,in._odata[offset]);
SE=st.GetEntry(ptype,Zm,sF);
if ( SE->_is_local && SE->_permute ) {
spProjZm(tmp,in._odata[SE->_offset]);
permute(chi,tmp,ptype);
} else if ( local ) {
spProjZm(chi,in._odata[offset]);
} else if ( SE->_is_local ) {
spProjZm(chi,in._odata[SE->_offset]);
} else {
chi=buf[offset];
chi=buf[SE->_offset];
}
Impl::multLink(Uchi,U._odata[sU],chi,Zm);
Impl::multLink(Uchi,U._odata[sU],chi,Zm,SE);
accumReconZm(result,Uchi);
// Tm
offset = st._offsets [Tm][ss];
local = st._is_local[Tm][ss];
perm = st._permute[Tm][ss];
ptype = st._permute_type[Tm];
if ( local && perm ) {
spProjTm(tmp,in._odata[offset]);
SE=st.GetEntry(ptype,Tm,sF);
if ( SE->_is_local && SE->_permute ) {
spProjTm(tmp,in._odata[SE->_offset]);
permute(chi,tmp,ptype);
} else if ( local ) {
spProjTm(chi,in._odata[offset]);
} else if ( SE->_is_local ) {
spProjTm(chi,in._odata[SE->_offset]);
} else {
chi=buf[offset];
chi=buf[SE->_offset];
}
Impl::multLink(Uchi,U._odata[sU],chi,Tm);
Impl::multLink(Uchi,U._odata[sU],chi,Tm,SE);
accumReconTm(result,Uchi);
vstream(out._odata[ss],result*(-0.5));
vstream(out._odata[sF],result*(-0.5));
};
template<class Impl>
@ -157,141 +130,114 @@ void WilsonKernels<Impl>::DiracOptDhopSiteDag(CartesianStencil &st,DoubledGaugeF
SiteHalfSpinor chi;
SiteSpinor result;
SiteHalfSpinor Uchi;
int offset,local,perm, ptype;
StencilEntry *SE;
int ptype;
// Xp
int ss=sF;
offset = st._offsets [Xm][ss];
local = st._is_local[Xm][ss];
perm = st._permute[Xm][ss];
ptype = st._permute_type[Xm];
if ( local && perm ) {
spProjXp(tmp,in._odata[offset]);
SE=st.GetEntry(ptype,Xm,sF);
if ( SE->_is_local && SE->_permute ) {
spProjXp(tmp,in._odata[SE->_offset]);
permute(chi,tmp,ptype);
} else if ( local ) {
spProjXp(chi,in._odata[offset]);
} else if ( SE->_is_local ) {
spProjXp(chi,in._odata[SE->_offset]);
} else {
chi=buf[offset];
chi=buf[SE->_offset];
}
Impl::multLink(Uchi,U._odata[sU],chi,Xm);
Impl::multLink(Uchi,U._odata[sU],chi,Xm,SE);
spReconXp(result,Uchi);
// Yp
offset = st._offsets [Ym][ss];
local = st._is_local[Ym][ss];
perm = st._permute[Ym][ss];
ptype = st._permute_type[Ym];
if ( local && perm ) {
spProjYp(tmp,in._odata[offset]);
SE=st.GetEntry(ptype,Ym,sF);
if ( SE->_is_local && SE->_permute ) {
spProjYp(tmp,in._odata[SE->_offset]);
permute(chi,tmp,ptype);
} else if ( local ) {
spProjYp(chi,in._odata[offset]);
} else if ( SE->_is_local ) {
spProjYp(chi,in._odata[SE->_offset]);
} else {
chi=buf[offset];
chi=buf[SE->_offset];
}
Impl::multLink(Uchi,U._odata[sU],chi,Ym);
Impl::multLink(Uchi,U._odata[sU],chi,Ym,SE);
accumReconYp(result,Uchi);
// Zp
offset = st._offsets [Zm][ss];
local = st._is_local[Zm][ss];
perm = st._permute[Zm][ss];
ptype = st._permute_type[Zm];
if ( local && perm ) {
spProjZp(tmp,in._odata[offset]);
SE=st.GetEntry(ptype,Zm,sF);
if ( SE->_is_local && SE->_permute ) {
spProjZp(tmp,in._odata[SE->_offset]);
permute(chi,tmp,ptype);
} else if ( local ) {
spProjZp(chi,in._odata[offset]);
} else if ( SE->_is_local ) {
spProjZp(chi,in._odata[SE->_offset]);
} else {
chi=buf[offset];
chi=buf[SE->_offset];
}
Impl::multLink(Uchi,U._odata[sU],chi,Zm);
Impl::multLink(Uchi,U._odata[sU],chi,Zm,SE);
accumReconZp(result,Uchi);
// Tp
offset = st._offsets [Tm][ss];
local = st._is_local[Tm][ss];
perm = st._permute[Tm][ss];
ptype = st._permute_type[Tm];
if ( local && perm ) {
spProjTp(tmp,in._odata[offset]);
SE=st.GetEntry(ptype,Tm,sF);
if ( SE->_is_local && SE->_permute ) {
spProjTp(tmp,in._odata[SE->_offset]);
permute(chi,tmp,ptype);
} else if ( local ) {
spProjTp(chi,in._odata[offset]);
} else if ( SE->_is_local ) {
spProjTp(chi,in._odata[SE->_offset]);
} else {
chi=buf[offset];
chi=buf[SE->_offset];
}
Impl::multLink(Uchi,U._odata[sU],chi,Tm);
Impl::multLink(Uchi,U._odata[sU],chi,Tm,SE);
accumReconTp(result,Uchi);
// Xm
offset = st._offsets [Xp][ss];
local = st._is_local[Xp][ss];
perm = st._permute[Xp][ss];
ptype = st._permute_type[Xp];
if ( local && perm ) {
spProjXm(tmp,in._odata[offset]);
SE=st.GetEntry(ptype,Xp,sF);
if ( SE->_is_local && SE->_permute ) {
spProjXm(tmp,in._odata[SE->_offset]);
permute(chi,tmp,ptype);
} else if ( local ) {
spProjXm(chi,in._odata[offset]);
} else if ( SE->_is_local ) {
spProjXm(chi,in._odata[SE->_offset]);
} else {
chi=buf[offset];
chi=buf[SE->_offset];
}
Impl::multLink(Uchi,U._odata[sU],chi,Xp);
Impl::multLink(Uchi,U._odata[sU],chi,Xp,SE);
accumReconXm(result,Uchi);
// Ym
offset = st._offsets [Yp][ss];
local = st._is_local[Yp][ss];
perm = st._permute[Yp][ss];
ptype = st._permute_type[Yp];
if ( local && perm ) {
spProjYm(tmp,in._odata[offset]);
SE=st.GetEntry(ptype,Yp,sF);
if ( SE->_is_local && SE->_permute ) {
spProjYm(tmp,in._odata[SE->_offset]);
permute(chi,tmp,ptype);
} else if ( local ) {
spProjYm(chi,in._odata[offset]);
} else if ( SE->_is_local ) {
spProjYm(chi,in._odata[SE->_offset]);
} else {
chi=buf[offset];
chi=buf[SE->_offset];
}
Impl::multLink(Uchi,U._odata[sU],chi,Yp);
Impl::multLink(Uchi,U._odata[sU],chi,Yp,SE);
accumReconYm(result,Uchi);
// Zm
offset = st._offsets [Zp][ss];
local = st._is_local[Zp][ss];
perm = st._permute[Zp][ss];
ptype = st._permute_type[Zp];
if ( local && perm ) {
spProjZm(tmp,in._odata[offset]);
SE=st.GetEntry(ptype,Zp,sF);
if ( SE->_is_local && SE->_permute ) {
spProjZm(tmp,in._odata[SE->_offset]);
permute(chi,tmp,ptype);
} else if ( local ) {
spProjZm(chi,in._odata[offset]);
} else if ( SE->_is_local ) {
spProjZm(chi,in._odata[SE->_offset]);
} else {
chi=buf[offset];
chi=buf[SE->_offset];
}
Impl::multLink(Uchi,U._odata[sU],chi,Zp);
Impl::multLink(Uchi,U._odata[sU],chi,Zp,SE);
accumReconZm(result,Uchi);
// Tm
offset = st._offsets [Tp][ss];
local = st._is_local[Tp][ss];
perm = st._permute[Tp][ss];
ptype = st._permute_type[Tp];
if ( local && perm ) {
spProjTm(tmp,in._odata[offset]);
SE=st.GetEntry(ptype,Tp,sF);
if ( SE->_is_local && SE->_permute ) {
spProjTm(tmp,in._odata[SE->_offset]);
permute(chi,tmp,ptype);
} else if ( local ) {
spProjTm(chi,in._odata[offset]);
} else if ( SE->_is_local ) {
spProjTm(chi,in._odata[SE->_offset]);
} else {
chi=buf[offset];
chi=buf[SE->_offset];
}
Impl::multLink(Uchi,U._odata[sU],chi,Tp);
Impl::multLink(Uchi,U._odata[sU],chi,Tp,SE);
accumReconTm(result,Uchi);
vstream(out._odata[ss],result*(-0.5));
vstream(out._odata[sF],result*(-0.5));
}
template<class Impl>
@ -303,127 +249,124 @@ void WilsonKernels<Impl>::DiracOptDhopDir(CartesianStencil &st,DoubledGaugeField
SiteHalfSpinor chi;
SiteSpinor result;
SiteHalfSpinor Uchi;
int offset,local,perm, ptype;
int ss=sF;
StencilEntry *SE;
int ptype;
offset = st._offsets [dir][ss];
local = st._is_local[dir][ss];
perm = st._permute[dir][ss];
ptype = st._permute_type[dir];
SE=st.GetEntry(ptype,dir,sF);
// Xp
if(gamma==Xp){
if ( local && perm ) {
spProjXp(tmp,in._odata[offset]);
if ( SE->_is_local && SE->_permute ) {
spProjXp(tmp,in._odata[SE->_offset]);
permute(chi,tmp,ptype);
} else if ( local ) {
spProjXp(chi,in._odata[offset]);
} else if ( SE->_is_local ) {
spProjXp(chi,in._odata[SE->_offset]);
} else {
chi=buf[offset];
chi=buf[SE->_offset];
}
Impl::multLink(Uchi,U._odata[sU],chi,dir);
Impl::multLink(Uchi,U._odata[sU],chi,dir,SE);
spReconXp(result,Uchi);
}
// Yp
if ( gamma==Yp ){
if ( local && perm ) {
spProjYp(tmp,in._odata[offset]);
if ( SE->_is_local && SE->_permute ) {
spProjYp(tmp,in._odata[SE->_offset]);
permute(chi,tmp,ptype);
} else if ( local ) {
spProjYp(chi,in._odata[offset]);
} else if ( SE->_is_local ) {
spProjYp(chi,in._odata[SE->_offset]);
} else {
chi=buf[offset];
chi=buf[SE->_offset];
}
Impl::multLink(Uchi,U._odata[sU],chi,dir);
Impl::multLink(Uchi,U._odata[sU],chi,dir,SE);
spReconYp(result,Uchi);
}
// Zp
if ( gamma ==Zp ){
if ( local && perm ) {
spProjZp(tmp,in._odata[offset]);
if ( SE->_is_local && SE->_permute ) {
spProjZp(tmp,in._odata[SE->_offset]);
permute(chi,tmp,ptype);
} else if ( local ) {
spProjZp(chi,in._odata[offset]);
} else if ( SE->_is_local ) {
spProjZp(chi,in._odata[SE->_offset]);
} else {
chi=buf[offset];
chi=buf[SE->_offset];
}
Impl::multLink(Uchi,U._odata[sU],chi,dir);
Impl::multLink(Uchi,U._odata[sU],chi,dir,SE);
spReconZp(result,Uchi);
}
// Tp
if ( gamma ==Tp ){
if ( local && perm ) {
spProjTp(tmp,in._odata[offset]);
if ( SE->_is_local && SE->_permute ) {
spProjTp(tmp,in._odata[SE->_offset]);
permute(chi,tmp,ptype);
} else if ( local ) {
spProjTp(chi,in._odata[offset]);
} else if ( SE->_is_local ) {
spProjTp(chi,in._odata[SE->_offset]);
} else {
chi=buf[offset];
chi=buf[SE->_offset];
}
Impl::multLink(Uchi,U._odata[sU],chi,dir);
Impl::multLink(Uchi,U._odata[sU],chi,dir,SE);
spReconTp(result,Uchi);
}
// Xm
if ( gamma==Xm ){
if ( local && perm ) {
spProjXm(tmp,in._odata[offset]);
if ( SE->_is_local && SE->_permute ) {
spProjXm(tmp,in._odata[SE->_offset]);
permute(chi,tmp,ptype);
} else if ( local ) {
spProjXm(chi,in._odata[offset]);
} else if ( SE->_is_local ) {
spProjXm(chi,in._odata[SE->_offset]);
} else {
chi=buf[offset];
chi=buf[SE->_offset];
}
Impl::multLink(Uchi,U._odata[sU],chi,dir);
Impl::multLink(Uchi,U._odata[sU],chi,dir,SE);
spReconXm(result,Uchi);
}
// Ym
if ( gamma == Ym ){
if ( local && perm ) {
spProjYm(tmp,in._odata[offset]);
if ( SE->_is_local && SE->_permute ) {
spProjYm(tmp,in._odata[SE->_offset]);
permute(chi,tmp,ptype);
} else if ( local ) {
spProjYm(chi,in._odata[offset]);
} else if ( SE->_is_local ) {
spProjYm(chi,in._odata[SE->_offset]);
} else {
chi=buf[offset];
chi=buf[SE->_offset];
}
Impl::multLink(Uchi,U._odata[sU],chi,dir);
Impl::multLink(Uchi,U._odata[sU],chi,dir,SE);
spReconYm(result,Uchi);
}
// Zm
if ( gamma == Zm ){
if ( local && perm ) {
spProjZm(tmp,in._odata[offset]);
if ( SE->_is_local && SE->_permute ) {
spProjZm(tmp,in._odata[SE->_offset]);
permute(chi,tmp,ptype);
} else if ( local ) {
spProjZm(chi,in._odata[offset]);
} else if ( SE->_is_local ) {
spProjZm(chi,in._odata[SE->_offset]);
} else {
chi=buf[offset];
chi=buf[SE->_offset];
}
Impl::multLink(Uchi,U._odata[sU],chi,dir);
Impl::multLink(Uchi,U._odata[sU],chi,dir,SE);
spReconZm(result,Uchi);
}
// Tm
if ( gamma==Tm ) {
if ( local && perm ) {
spProjTm(tmp,in._odata[offset]);
if ( SE->_is_local && SE->_permute ) {
spProjTm(tmp,in._odata[SE->_offset]);
permute(chi,tmp,ptype);
} else if ( local ) {
spProjTm(chi,in._odata[offset]);
} else if ( SE->_is_local ) {
spProjTm(chi,in._odata[SE->_offset]);
} else {
chi=buf[offset];
chi=buf[SE->_offset];
}
Impl::multLink(Uchi,U._odata[sU],chi,dir);
Impl::multLink(Uchi,U._odata[sU],chi,dir,SE);
spReconTm(result,Uchi);
}
vstream(out._odata[ss],result*(-0.5));
vstream(out._odata[sF],result*(-0.5));
}
FermOpTemplateInstantiate(WilsonKernels);

View File

@ -8,11 +8,7 @@ namespace Grid {
int checkerboard,
const std::vector<int> &directions,
const std::vector<int> &distances)
: _offsets(npoints),
_is_local(npoints),
_comm_buf_size(npoints),
_permute_type(npoints),
_permute(npoints)
: _entries(npoints), _permute_type(npoints)
{
_npoints = npoints;
_grid = grid;
@ -27,9 +23,7 @@ namespace Grid {
int point = i;
_offsets[i].resize( osites);
_is_local[i].resize(osites);
_permute[i].resize( osites);
_entries[i].resize( osites);
int dimension = directions[i];
int displacement = distances[i];
@ -76,7 +70,7 @@ namespace Grid {
}
void CartesianStencil::Local (int point, int dimension,int shift,int cbmask)
void CartesianStencil::Local (int point, int dimension,int shiftpm,int cbmask)
{
int fd = _grid->_fdimensions[dimension];
int rd = _grid->_rdimensions[dimension];
@ -84,7 +78,7 @@ namespace Grid {
int gd = _grid->_gdimensions[dimension];
// Map to always positive shift modulo global full dimension.
shift = (shift+fd)%fd;
int shift = (shiftpm+fd)%fd;
// the permute type
int permute_dim =_grid->PermuteDim(dimension);
@ -98,6 +92,15 @@ namespace Grid {
int sshift = _grid->CheckerBoardShiftForCB(_checkerboard,dimension,shift,cb);
int sx = (x+sshift)%rd;
int wraparound=0;
if ( (shiftpm==-1) && (sx>x) ) {
wraparound = 1;
}
if ( (shiftpm== 1) && (sx<x) ) {
wraparound = 1;
}
int permute_slice=0;
if(permute_dim){
@ -107,12 +110,12 @@ namespace Grid {
else permute_slice = 1-wrap;
}
CopyPlane(point,dimension,x,sx,cbmask,permute_slice);
CopyPlane(point,dimension,x,sx,cbmask,permute_slice,wraparound);
}
}
void CartesianStencil::Comms (int point,int dimension,int shift,int cbmask)
void CartesianStencil::Comms (int point,int dimension,int shiftpm,int cbmask)
{
GridBase *grid=_grid;
@ -125,7 +128,7 @@ namespace Grid {
// assert(simd_layout==1); // Why?
assert(comm_dim==1);
shift = (shift + fd) %fd;
int shift = (shiftpm + fd) %fd;
assert(shift>=0);
assert(shift<fd);
@ -143,10 +146,17 @@ namespace Grid {
// int offnode = (comm_proc!=0);
int sx = (x+sshift)%rd;
int wraparound=0;
if ( (shiftpm==-1) && (sx>x) && (grid->_processor_coor[dimension]==0) ) {
wraparound = 1;
}
if ( (shiftpm== 1) && (sx<x) && (grid->_processor_coor[dimension]==grid->_processors[dimension]-1) ) {
wraparound = 1;
}
if (!offnode) {
int permute_slice=0;
CopyPlane(point,dimension,x,sx,cbmask,permute_slice);
CopyPlane(point,dimension,x,sx,cbmask,permute_slice,wraparound);
} else {
@ -161,13 +171,13 @@ namespace Grid {
int unified_buffer_offset = _unified_buffer_size;
_unified_buffer_size += words;
ScatterPlane(point,dimension,x,cbmask,unified_buffer_offset); // permute/extract/merge is done in comms phase
ScatterPlane(point,dimension,x,cbmask,unified_buffer_offset,wraparound); // permute/extract/merge is done in comms phase
}
}
}
// Routine builds up integer table for each site in _offsets, _is_local, _permute
void CartesianStencil::CopyPlane(int point, int dimension,int lplane,int rplane,int cbmask,int permute)
void CartesianStencil::CopyPlane(int point, int dimension,int lplane,int rplane,int cbmask,int permute,int wrap)
{
int rd = _grid->_rdimensions[dimension];
@ -180,9 +190,10 @@ namespace Grid {
// Simple block stride gather of SIMD objects
for(int n=0;n<_grid->_slice_nblock[dimension];n++){
for(int b=0;b<_grid->_slice_block[dimension];b++){
_offsets [point][lo+o+b]=ro+o+b;
_is_local[point][lo+o+b]=1;
_permute [point][lo+o+b]=permute;
_entries[point][lo+o+b]._offset =ro+o+b;
_entries[point][lo+o+b]._is_local=1;
_entries[point][lo+o+b]._permute=permute;
_entries[point][lo+o+b]._around_the_world=wrap;
}
o +=_grid->_slice_stride[dimension];
}
@ -199,9 +210,10 @@ namespace Grid {
int ocb=1<<_grid->CheckerBoardFromOindex(o+b);
if ( ocb&cbmask ) {
_offsets [point][lo+o+b]=ro+o+b;
_is_local[point][lo+o+b]=1;
_permute [point][lo+o+b]=permute;
_entries[point][lo+o+b]._offset =ro+o+b;
_entries[point][lo+o+b]._is_local=1;
_entries[point][lo+o+b]._permute=permute;
_entries[point][lo+o+b]._around_the_world=wrap;
}
}
@ -211,7 +223,7 @@ namespace Grid {
}
}
// Routine builds up integer table for each site in _offsets, _is_local, _permute
void CartesianStencil::ScatterPlane (int point,int dimension,int plane,int cbmask,int offset)
void CartesianStencil::ScatterPlane (int point,int dimension,int plane,int cbmask,int offset, int wrap)
{
int rd = _grid->_rdimensions[dimension];
@ -224,9 +236,10 @@ namespace Grid {
// Simple block stride gather of SIMD objects
for(int n=0;n<_grid->_slice_nblock[dimension];n++){
for(int b=0;b<_grid->_slice_block[dimension];b++){
_offsets [point][so+o+b]=offset+(bo++);
_is_local[point][so+o+b]=0;
_permute [point][so+o+b]=0;
_entries[point][so+o+b]._offset =offset+(bo++);
_entries[point][so+o+b]._is_local=0;
_entries[point][so+o+b]._permute=0;
_entries[point][so+o+b]._around_the_world=wrap;
}
o +=_grid->_slice_stride[dimension];
}
@ -242,9 +255,10 @@ namespace Grid {
int ocb=1<<_grid->CheckerBoardFromOindex(o+b);// Could easily be a table lookup
if ( ocb & cbmask ) {
_offsets [point][so+o+b]=offset+(bo++);
_is_local[point][so+o+b]=0;
_permute [point][so+o+b]=0;
_entries[point][so+o+b]._offset =offset+(bo++);
_entries[point][so+o+b]._is_local=0;
_entries[point][so+o+b]._permute =0;
_entries[point][so+o+b]._around_the_world=wrap;
}
}
o +=_grid->_slice_stride[dimension];