mirror of
https://github.com/paboyle/Grid.git
synced 2025-04-25 13:15:55 +01:00
Reimplemented GparityWilsonImpl::InsertForce5D to run efficiently on GPUs
Swapped order of templated tensor code and c-number specializations in Tensor_outer.h to fix compile issue with type deduction on Summit
This commit is contained in:
parent
a0ca362690
commit
ba5dc670a5
@ -30,7 +30,6 @@ directory
|
|||||||
|
|
||||||
NAMESPACE_BEGIN(Grid);
|
NAMESPACE_BEGIN(Grid);
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
Policy implementation for G-parity boundary conditions
|
Policy implementation for G-parity boundary conditions
|
||||||
|
|
||||||
@ -360,27 +359,40 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField Ã, int mu) {
|
inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField Ã, int mu) {
|
||||||
|
int Ls=Btilde.Grid()->_fdimensions[0];
|
||||||
|
|
||||||
int Ls = Btilde.Grid()->_fdimensions[0];
|
|
||||||
|
|
||||||
GaugeLinkField tmp(mat.Grid());
|
|
||||||
tmp = Zero();
|
|
||||||
{
|
{
|
||||||
autoView( tmp_v , tmp, CpuWrite);
|
autoView( mat_v , mat, AcceleratorWrite);
|
||||||
autoView( Atilde_v , Atilde, CpuRead);
|
autoView( Btilde_v , Btilde, AcceleratorRead);
|
||||||
autoView( Btilde_v , Btilde, CpuRead);
|
autoView( Atilde_v , Atilde, AcceleratorRead);
|
||||||
thread_for(ss,tmp.Grid()->oSites(),{
|
accelerator_for(sss,mat.Grid()->oSites(), FermionField::vector_type::Nsimd(),{
|
||||||
for (int s = 0; s < Ls; s++) {
|
int sU=sss;
|
||||||
int sF = s + Ls * ss;
|
typedef decltype(coalescedRead(mat_v[sU](mu)() )) ColorMatrixType;
|
||||||
auto ttmp = traceIndex<SpinIndex>(outerProduct(Btilde_v[sF], Atilde_v[sF]));
|
ColorMatrixType sum;
|
||||||
tmp_v[ss]() = tmp_v[ss]() + ttmp(0, 0) + conjugate(ttmp(1, 1));
|
zeroit(sum);
|
||||||
}
|
for(int s=0;s<Ls;s++){
|
||||||
});
|
int sF = s+Ls*sU;
|
||||||
|
for(int spn=0;spn<Ns;spn++){ //sum over spin
|
||||||
|
//Flavor 0
|
||||||
|
auto bb = coalescedRead(Btilde_v[sF](0)(spn) ); //color vector
|
||||||
|
auto aa = coalescedRead(Atilde_v[sF](0)(spn) );
|
||||||
|
sum = sum + outerProduct(bb,aa);
|
||||||
|
|
||||||
|
//Flavor 1
|
||||||
|
bb = coalescedRead(Btilde_v[sF](1)(spn) );
|
||||||
|
aa = coalescedRead(Atilde_v[sF](1)(spn) );
|
||||||
|
sum = sum + conjugate(outerProduct(bb,aa));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
coalescedWrite(mat_v[sU](mu)(), sum);
|
||||||
|
});
|
||||||
}
|
}
|
||||||
PokeIndex<LorentzIndex>(mat, tmp, mu);
|
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef GparityWilsonImpl<vComplex , FundamentalRepresentation,CoeffReal> GparityWilsonImplR; // Real.. whichever prec
|
typedef GparityWilsonImpl<vComplex , FundamentalRepresentation,CoeffReal> GparityWilsonImplR; // Real.. whichever prec
|
||||||
|
@ -35,6 +35,17 @@ NAMESPACE_BEGIN(Grid);
|
|||||||
// Vector x Vector -> Matrix
|
// Vector x Vector -> Matrix
|
||||||
///////////////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
template<class CC,IfComplex<CC> = 0>
|
||||||
|
accelerator_inline CC outerProduct(const CC &l, const CC& r)
|
||||||
|
{
|
||||||
|
return l*conj(r);
|
||||||
|
}
|
||||||
|
template<class RR,IfReal<RR> = 0>
|
||||||
|
accelerator_inline RR outerProduct(const RR &l, const RR& r)
|
||||||
|
{
|
||||||
|
return l*r;
|
||||||
|
}
|
||||||
|
|
||||||
template<class l,class r,int N> accelerator_inline
|
template<class l,class r,int N> accelerator_inline
|
||||||
auto outerProduct (const iVector<l,N>& lhs,const iVector<r,N>& rhs) -> iMatrix<decltype(outerProduct(lhs._internal[0],rhs._internal[0])),N>
|
auto outerProduct (const iVector<l,N>& lhs,const iVector<r,N>& rhs) -> iMatrix<decltype(outerProduct(lhs._internal[0],rhs._internal[0])),N>
|
||||||
{
|
{
|
||||||
@ -57,16 +68,6 @@ auto outerProduct (const iScalar<l>& lhs,const iScalar<r>& rhs) -> iScalar<declt
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class CC,IfComplex<CC> = 0>
|
|
||||||
accelerator_inline CC outerProduct(const CC &l, const CC& r)
|
|
||||||
{
|
|
||||||
return l*conj(r);
|
|
||||||
}
|
|
||||||
template<class RR,IfReal<RR> = 0>
|
|
||||||
accelerator_inline RR outerProduct(const RR &l, const RR& r)
|
|
||||||
{
|
|
||||||
return l*r;
|
|
||||||
}
|
|
||||||
|
|
||||||
NAMESPACE_END(Grid);
|
NAMESPACE_END(Grid);
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user