mirror of
https://github.com/paboyle/Grid.git
synced 2025-06-17 07:17:06 +01:00
Added single threaded version of the derivative for the Ls vectorised DWF
This commit is contained in:
@ -44,14 +44,14 @@ namespace QCD {
|
||||
// Ultimately need Impl to always define types where XXX is opaque
|
||||
//
|
||||
// typedef typename XXX Simd;
|
||||
// typedef typename XXX GaugeLinkField;
|
||||
// typedef typename XXX GaugeLinkField;
|
||||
// typedef typename XXX GaugeField;
|
||||
// typedef typename XXX GaugeActField;
|
||||
// typedef typename XXX FermionField;
|
||||
// typedef typename XXX DoubledGaugeField;
|
||||
// typedef typename XXX SiteSpinor;
|
||||
// typedef typename XXX SiteHalfSpinor;
|
||||
// typedef typename XXX Compressor;
|
||||
// typedef typename XXX SiteHalfSpinor;
|
||||
// typedef typename XXX Compressor;
|
||||
//
|
||||
// and Methods:
|
||||
// void ImportGauge(GridBase *GaugeGrid,DoubledGaugeField &Uds,const GaugeField &Umu)
|
||||
@ -94,17 +94,17 @@ namespace QCD {
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#define INHERIT_FIMPL_TYPES(Impl)\
|
||||
typedef typename Impl::FermionField FermionField; \
|
||||
typedef typename Impl::DoubledGaugeField DoubledGaugeField; \
|
||||
typedef typename Impl::SiteSpinor SiteSpinor; \
|
||||
typedef typename Impl::SiteHalfSpinor SiteHalfSpinor; \
|
||||
typedef typename Impl::Compressor Compressor; \
|
||||
typedef typename Impl::StencilImpl StencilImpl; \
|
||||
typedef typename Impl::ImplParams ImplParams; \
|
||||
typedef typename Impl::FermionField FermionField; \
|
||||
typedef typename Impl::DoubledGaugeField DoubledGaugeField; \
|
||||
typedef typename Impl::SiteSpinor SiteSpinor; \
|
||||
typedef typename Impl::SiteHalfSpinor SiteHalfSpinor; \
|
||||
typedef typename Impl::Compressor Compressor; \
|
||||
typedef typename Impl::StencilImpl StencilImpl; \
|
||||
typedef typename Impl::ImplParams ImplParams; \
|
||||
typedef typename Impl::Coeff_t Coeff_t;
|
||||
|
||||
#define INHERIT_IMPL_TYPES(Base) \
|
||||
INHERIT_GIMPL_TYPES(Base) \
|
||||
INHERIT_GIMPL_TYPES(Base) \
|
||||
INHERIT_FIMPL_TYPES(Base)
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
@ -148,11 +148,11 @@ namespace QCD {
|
||||
bool overlapCommsCompute(void) { return Params.overlapCommsCompute; };
|
||||
|
||||
inline void multLink(SiteHalfSpinor &phi,
|
||||
const SiteDoubledGaugeField &U,
|
||||
const SiteHalfSpinor &chi,
|
||||
int mu,
|
||||
StencilEntry *SE,
|
||||
StencilImpl &St) {
|
||||
const SiteDoubledGaugeField &U,
|
||||
const SiteHalfSpinor &chi,
|
||||
int mu,
|
||||
StencilEntry *SE,
|
||||
StencilImpl &St) {
|
||||
mult(&phi(), &U(mu), &chi());
|
||||
}
|
||||
|
||||
@ -162,16 +162,16 @@ namespace QCD {
|
||||
}
|
||||
|
||||
inline void DoubleStore(GridBase *GaugeGrid,
|
||||
DoubledGaugeField &Uds,
|
||||
const GaugeField &Umu) {
|
||||
DoubledGaugeField &Uds,
|
||||
const GaugeField &Umu) {
|
||||
conformable(Uds._grid, GaugeGrid);
|
||||
conformable(Umu._grid, GaugeGrid);
|
||||
GaugeLinkField U(GaugeGrid);
|
||||
for (int mu = 0; mu < Nd; mu++) {
|
||||
U = PeekIndex<LorentzIndex>(Umu, mu);
|
||||
PokeIndex<LorentzIndex>(Uds, U, mu);
|
||||
U = adj(Cshift(U, mu, -1));
|
||||
PokeIndex<LorentzIndex>(Uds, U, mu + 4);
|
||||
U = PeekIndex<LorentzIndex>(Umu, mu);
|
||||
PokeIndex<LorentzIndex>(Uds, U, mu);
|
||||
U = adj(Cshift(U, mu, -1));
|
||||
PokeIndex<LorentzIndex>(Uds, U, mu + 4);
|
||||
}
|
||||
}
|
||||
|
||||
@ -189,11 +189,11 @@ namespace QCD {
|
||||
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int sss=0;sss<tmp._grid->oSites();sss++){
|
||||
int sU=sss;
|
||||
for(int s=0;s<Ls;s++){
|
||||
int sF = s+Ls*sU;
|
||||
tmp[sU] = tmp[sU]+ traceIndex<SpinIndex>(outerProduct(Btilde[sF],Atilde[sF])); // ordering here
|
||||
}
|
||||
int sU=sss;
|
||||
for(int s=0;s<Ls;s++){
|
||||
int sF = s+Ls*sU;
|
||||
tmp[sU] = tmp[sU]+ traceIndex<SpinIndex>(outerProduct(Btilde[sF],Atilde[sF])); // ordering here
|
||||
}
|
||||
}
|
||||
PokeIndex<LorentzIndex>(mat,tmp,mu);
|
||||
|
||||
@ -248,12 +248,12 @@ class DomainWallVec5dImpl : public PeriodicGaugeImpl< GaugeImplTypes< S,Nrepres
|
||||
}
|
||||
|
||||
inline void multLink(SiteHalfSpinor &phi, const SiteDoubledGaugeField &U,
|
||||
const SiteHalfSpinor &chi, int mu, StencilEntry *SE,
|
||||
StencilImpl &St) {
|
||||
const SiteHalfSpinor &chi, int mu, StencilEntry *SE,
|
||||
StencilImpl &St) {
|
||||
SiteGaugeLink UU;
|
||||
for (int i = 0; i < Nrepresentation; i++) {
|
||||
for (int j = 0; j < Nrepresentation; j++) {
|
||||
vsplat(UU()()(i, j), U(mu)()(i, j));
|
||||
vsplat(UU()()(i, j), U(mu)()(i, j));
|
||||
}
|
||||
}
|
||||
mult(&phi(), &UU(), &chi());
|
||||
@ -290,10 +290,40 @@ class DomainWallVec5dImpl : public PeriodicGaugeImpl< GaugeImplTypes< S,Nrepres
|
||||
{
|
||||
assert(0);
|
||||
}
|
||||
|
||||
inline void InsertForce5D(GaugeField &mat, FermionField &Btilde,FermionField Ã, int mu)
|
||||
{
|
||||
assert(0);
|
||||
|
||||
inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField Ã, int mu) {
|
||||
int LLs = Btilde._grid->_rdimensions[0];
|
||||
conformable(Atilde._grid,Btilde._grid);
|
||||
GaugeLinkField tmp(mat._grid);
|
||||
tmp = zero;
|
||||
typedef decltype(traceIndex<SpinIndex>(outerProduct(Btilde[0], Atilde[0]))) result_type;
|
||||
std::vector<typename result_type::scalar_object> v_scalar_object(Btilde._grid->Nsimd());
|
||||
|
||||
PARALLEL_FOR_LOOP
|
||||
for (int sss = 0; sss < tmp._grid->oSites(); sss++) {
|
||||
std::vector<int> ocoor;
|
||||
tmp._grid->oCoorFromOindex(ocoor,sss);
|
||||
for (int si = 0; si < tmp._grid->iSites(); si++){
|
||||
typename result_type::scalar_object scalar_object;
|
||||
scalar_object = zero;
|
||||
std::vector<int> local_coor(tmp._grid->Nd());
|
||||
std::vector<int> icoor;
|
||||
tmp._grid->iCoorFromIindex(icoor,si);
|
||||
for (int i = 0; i < tmp._grid->Nd(); i++) local_coor[i] = ocoor[i] + tmp._grid->_rdimensions[i]*icoor[i];
|
||||
|
||||
for (int s = 0; s < LLs; s++) {
|
||||
std::vector<int> slocal_coor(Btilde._grid->Nd());
|
||||
slocal_coor[0] = s;
|
||||
for (int s4d = 1; s4d< Btilde._grid->Nd(); s4d++) slocal_coor[s4d] = local_coor[s4d-1];
|
||||
int sF = Btilde._grid->oIndexReduced(slocal_coor);
|
||||
assert(sF < Btilde._grid->oSites());
|
||||
extract(traceIndex<SpinIndex>(outerProduct(Btilde[sF], Atilde[sF])), v_scalar_object);
|
||||
for (int sv = 0; sv < v_scalar_object.size(); sv++) scalar_object += v_scalar_object[sv]; // sum across the 5d dimension
|
||||
}
|
||||
tmp._odata[sss].putlane(scalar_object, si);
|
||||
}
|
||||
}
|
||||
PokeIndex<LorentzIndex>(mat, tmp, mu);
|
||||
}
|
||||
};
|
||||
|
||||
@ -339,19 +369,19 @@ class GparityWilsonImpl : public ConjugateGaugeImpl<GaugeImplTypes<S, Nrepresent
|
||||
// provide the multiply by link that is differentiated between Gparity (with
|
||||
// flavour index) and non-Gparity
|
||||
inline void multLink(SiteHalfSpinor &phi, const SiteDoubledGaugeField &U,
|
||||
const SiteHalfSpinor &chi, int mu, StencilEntry *SE,
|
||||
StencilImpl &St) {
|
||||
const SiteHalfSpinor &chi, int mu, StencilEntry *SE,
|
||||
StencilImpl &St) {
|
||||
|
||||
typedef SiteHalfSpinor vobj;
|
||||
typedef typename SiteHalfSpinor::scalar_object sobj;
|
||||
|
||||
|
||||
vobj vtmp;
|
||||
sobj stmp;
|
||||
|
||||
|
||||
GridBase *grid = St._grid;
|
||||
|
||||
|
||||
const int Nsimd = grid->Nsimd();
|
||||
|
||||
|
||||
int direction = St._directions[mu];
|
||||
int distance = St._distances[mu];
|
||||
int ptype = St._permute_type[mu];
|
||||
@ -359,13 +389,13 @@ class GparityWilsonImpl : public ConjugateGaugeImpl<GaugeImplTypes<S, Nrepresent
|
||||
|
||||
// Fixme X.Y.Z.T hardcode in stencil
|
||||
int mmu = mu % Nd;
|
||||
|
||||
|
||||
// assert our assumptions
|
||||
assert((distance == 1) || (distance == -1)); // nearest neighbour stencil hard code
|
||||
assert((sl == 1) || (sl == 2));
|
||||
|
||||
std::vector<int> icoor;
|
||||
|
||||
|
||||
if ( SE->_around_the_world && Params.twists[mmu] ) {
|
||||
|
||||
if ( sl == 2 ) {
|
||||
@ -375,25 +405,25 @@ class GparityWilsonImpl : public ConjugateGaugeImpl<GaugeImplTypes<S, Nrepresent
|
||||
extract(chi,vals);
|
||||
for(int s=0;s<Nsimd;s++){
|
||||
|
||||
grid->iCoorFromIindex(icoor,s);
|
||||
|
||||
assert((icoor[direction]==0)||(icoor[direction]==1));
|
||||
|
||||
int permute_lane;
|
||||
if ( distance == 1) {
|
||||
permute_lane = icoor[direction]?1:0;
|
||||
} else {
|
||||
permute_lane = icoor[direction]?0:1;
|
||||
}
|
||||
|
||||
if ( permute_lane ) {
|
||||
stmp(0) = vals[s](1);
|
||||
stmp(1) = vals[s](0);
|
||||
vals[s] = stmp;
|
||||
}
|
||||
grid->iCoorFromIindex(icoor,s);
|
||||
|
||||
assert((icoor[direction]==0)||(icoor[direction]==1));
|
||||
|
||||
int permute_lane;
|
||||
if ( distance == 1) {
|
||||
permute_lane = icoor[direction]?1:0;
|
||||
} else {
|
||||
permute_lane = icoor[direction]?0:1;
|
||||
}
|
||||
|
||||
if ( permute_lane ) {
|
||||
stmp(0) = vals[s](1);
|
||||
stmp(1) = vals[s](0);
|
||||
vals[s] = stmp;
|
||||
}
|
||||
}
|
||||
merge(vtmp,vals);
|
||||
|
||||
|
||||
} else {
|
||||
vtmp(0) = chi(1);
|
||||
vtmp(1) = chi(0);
|
||||
@ -418,11 +448,11 @@ class GparityWilsonImpl : public ConjugateGaugeImpl<GaugeImplTypes<S, Nrepresent
|
||||
GaugeLinkField Uconj(GaugeGrid);
|
||||
|
||||
Lattice<iScalar<vInteger> > coor(GaugeGrid);
|
||||
|
||||
|
||||
for(int mu=0;mu<Nd;mu++){
|
||||
|
||||
|
||||
LatticeCoordinate(coor,mu);
|
||||
|
||||
|
||||
U = PeekIndex<LorentzIndex>(Umu,mu);
|
||||
Uconj = conjugate(U);
|
||||
|
||||
@ -431,13 +461,13 @@ class GparityWilsonImpl : public ConjugateGaugeImpl<GaugeImplTypes<S, Nrepresent
|
||||
if ( Params.twists[mu] ) {
|
||||
Uconj = where(coor==neglink,-Uconj,Uconj);
|
||||
}
|
||||
|
||||
|
||||
PARALLEL_FOR_LOOP
|
||||
for(auto ss=U.begin();ss<U.end();ss++){
|
||||
Uds[ss](0)(mu) = U[ss]();
|
||||
Uds[ss](1)(mu) = Uconj[ss]();
|
||||
}
|
||||
|
||||
|
||||
U = adj(Cshift(U ,mu,-1)); // correct except for spanning the boundary
|
||||
Uconj = adj(Cshift(Uconj,mu,-1));
|
||||
|
||||
@ -445,22 +475,22 @@ PARALLEL_FOR_LOOP
|
||||
if ( Params.twists[mu] ) {
|
||||
Utmp = where(coor==0,Uconj,Utmp);
|
||||
}
|
||||
|
||||
|
||||
PARALLEL_FOR_LOOP
|
||||
for(auto ss=U.begin();ss<U.end();ss++){
|
||||
Uds[ss](0)(mu+4) = Utmp[ss]();
|
||||
}
|
||||
|
||||
|
||||
Utmp = Uconj;
|
||||
if ( Params.twists[mu] ) {
|
||||
Utmp = where(coor==0,U,Utmp);
|
||||
}
|
||||
|
||||
|
||||
PARALLEL_FOR_LOOP
|
||||
for(auto ss=U.begin();ss<U.end();ss++){
|
||||
Uds[ss](1)(mu+4) = Utmp[ss]();
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
@ -482,7 +512,7 @@ PARALLEL_FOR_LOOP
|
||||
inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField Ã, int mu) {
|
||||
|
||||
int Ls = Btilde._grid->_fdimensions[0];
|
||||
|
||||
|
||||
GaugeLinkField tmp(mat._grid);
|
||||
tmp = zero;
|
||||
PARALLEL_FOR_LOOP
|
||||
|
Reference in New Issue
Block a user