mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-15 02:05:37 +00:00
Fix in partial fraction import/export physical and
make the GPU happier on the deprecate-uvm -- don't use static vectors, make member of class
This commit is contained in:
parent
655c79f39e
commit
5603464f39
@ -55,6 +55,11 @@ public:
|
|||||||
RealD alpha; // Mobius scale
|
RealD alpha; // Mobius scale
|
||||||
RealD k; // EOFA normalization constant
|
RealD k; // EOFA normalization constant
|
||||||
|
|
||||||
|
// Device resident
|
||||||
|
deviceVector<Coeff_t> d_shift_coefficients;
|
||||||
|
deviceVector<Coeff_t> d_MooeeInv_shift_lc;
|
||||||
|
deviceVector<Coeff_t> d_MooeeInv_shift_norm;
|
||||||
|
|
||||||
virtual void Instantiatable(void) = 0;
|
virtual void Instantiatable(void) = 0;
|
||||||
|
|
||||||
// EOFA-specific operations
|
// EOFA-specific operations
|
||||||
@ -92,6 +97,11 @@ public:
|
|||||||
this->k = this->alpha * (_mq3-_mq2) * std::pow(this->alpha+1.0,2*Ls) /
|
this->k = this->alpha * (_mq3-_mq2) * std::pow(this->alpha+1.0,2*Ls) /
|
||||||
( std::pow(this->alpha+1.0,Ls) + _mq2*std::pow(this->alpha-1.0,Ls) ) /
|
( std::pow(this->alpha+1.0,Ls) + _mq2*std::pow(this->alpha-1.0,Ls) ) /
|
||||||
( std::pow(this->alpha+1.0,Ls) + _mq3*std::pow(this->alpha-1.0,Ls) );
|
( std::pow(this->alpha+1.0,Ls) + _mq3*std::pow(this->alpha-1.0,Ls) );
|
||||||
|
|
||||||
|
d_shift_coefficients.resize(Ls);
|
||||||
|
d_MooeeInv_shift_lc.resize(Ls);
|
||||||
|
d_MooeeInv_shift_norm.resize(Ls);
|
||||||
|
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -143,6 +143,17 @@ public:
|
|||||||
std::vector<Coeff_t> ueem;
|
std::vector<Coeff_t> ueem;
|
||||||
std::vector<Coeff_t> dee;
|
std::vector<Coeff_t> dee;
|
||||||
|
|
||||||
|
// Device memory
|
||||||
|
deviceVector<Coeff_t> d_diag;
|
||||||
|
deviceVector<Coeff_t> d_upper;
|
||||||
|
deviceVector<Coeff_t> d_lower;
|
||||||
|
|
||||||
|
deviceVector<Coeff_t> d_lee;
|
||||||
|
deviceVector<Coeff_t> d_dee;
|
||||||
|
deviceVector<Coeff_t> d_uee;
|
||||||
|
deviceVector<Coeff_t> d_leem;
|
||||||
|
deviceVector<Coeff_t> d_ueem;
|
||||||
|
|
||||||
// Matrices of 5d ee inverse params
|
// Matrices of 5d ee inverse params
|
||||||
// std::vector<iSinglet<Simd> > MatpInv;
|
// std::vector<iSinglet<Simd> > MatpInv;
|
||||||
// std::vector<iSinglet<Simd> > MatmInv;
|
// std::vector<iSinglet<Simd> > MatmInv;
|
||||||
|
@ -41,6 +41,10 @@ public:
|
|||||||
public:
|
public:
|
||||||
|
|
||||||
// Constructors
|
// Constructors
|
||||||
|
virtual void Instantiatable(void){};
|
||||||
|
void MomentumSpacePropagator(FermionField &out,const FermionField &in,RealD _m,std::vector<double> twist) {
|
||||||
|
this->MomentumSpacePropagatorHw(out,in,_m,twist);
|
||||||
|
};
|
||||||
|
|
||||||
OverlapWilsonCayleyZolotarevFermion(GaugeField &_Umu,
|
OverlapWilsonCayleyZolotarevFermion(GaugeField &_Umu,
|
||||||
GridCartesian &FiveDimGrid,
|
GridCartesian &FiveDimGrid,
|
||||||
|
@ -41,6 +41,9 @@ public:
|
|||||||
public:
|
public:
|
||||||
|
|
||||||
virtual void Instantiatable(void){};
|
virtual void Instantiatable(void){};
|
||||||
|
void MomentumSpacePropagator(FermionField &out,const FermionField &in,RealD _m,std::vector<double> twist) {
|
||||||
|
this->MomentumSpacePropagatorHw(out,in,_m,twist);
|
||||||
|
};
|
||||||
// Constructors
|
// Constructors
|
||||||
OverlapWilsonContFracTanhFermion(GaugeField &_Umu,
|
OverlapWilsonContFracTanhFermion(GaugeField &_Umu,
|
||||||
GridCartesian &FiveDimGrid,
|
GridCartesian &FiveDimGrid,
|
||||||
|
@ -40,6 +40,9 @@ public:
|
|||||||
INHERIT_IMPL_TYPES(Impl);
|
INHERIT_IMPL_TYPES(Impl);
|
||||||
|
|
||||||
virtual void Instantiatable(void){};
|
virtual void Instantiatable(void){};
|
||||||
|
void MomentumSpacePropagator(FermionField &out,const FermionField &in,RealD _m,std::vector<double> twist) {
|
||||||
|
this->MomentumSpacePropagatorHw(out,in,_m,twist);
|
||||||
|
};
|
||||||
// Constructors
|
// Constructors
|
||||||
OverlapWilsonContFracZolotarevFermion(GaugeField &_Umu,
|
OverlapWilsonContFracZolotarevFermion(GaugeField &_Umu,
|
||||||
GridCartesian &FiveDimGrid,
|
GridCartesian &FiveDimGrid,
|
||||||
|
@ -41,6 +41,9 @@ public:
|
|||||||
public:
|
public:
|
||||||
|
|
||||||
virtual void Instantiatable(void){};
|
virtual void Instantiatable(void){};
|
||||||
|
void MomentumSpacePropagator(FermionField &out,const FermionField &in,RealD _m,std::vector<double> twist) {
|
||||||
|
this->MomentumSpacePropagatorHw(out,in,_m,twist);
|
||||||
|
};
|
||||||
// Constructors
|
// Constructors
|
||||||
OverlapWilsonPartialFractionTanhFermion(GaugeField &_Umu,
|
OverlapWilsonPartialFractionTanhFermion(GaugeField &_Umu,
|
||||||
GridCartesian &FiveDimGrid,
|
GridCartesian &FiveDimGrid,
|
||||||
|
@ -40,6 +40,11 @@ public:
|
|||||||
INHERIT_IMPL_TYPES(Impl);
|
INHERIT_IMPL_TYPES(Impl);
|
||||||
|
|
||||||
virtual void Instantiatable(void){};
|
virtual void Instantiatable(void){};
|
||||||
|
|
||||||
|
void MomentumSpacePropagator(FermionField &out,const FermionField &in,RealD _m,std::vector<double> twist) {
|
||||||
|
this->MomentumSpacePropagatorHw(out,in,_m,twist);
|
||||||
|
};
|
||||||
|
|
||||||
// Constructors
|
// Constructors
|
||||||
OverlapWilsonPartialFractionZolotarevFermion(GaugeField &_Umu,
|
OverlapWilsonPartialFractionZolotarevFermion(GaugeField &_Umu,
|
||||||
GridCartesian &FiveDimGrid,
|
GridCartesian &FiveDimGrid,
|
||||||
|
@ -39,7 +39,7 @@ class PartialFractionFermion5D : public WilsonFermion5D<Impl>
|
|||||||
public:
|
public:
|
||||||
INHERIT_IMPL_TYPES(Impl);
|
INHERIT_IMPL_TYPES(Impl);
|
||||||
|
|
||||||
const int part_frac_chroma_convention=1;
|
const int part_frac_chroma_convention=0;
|
||||||
|
|
||||||
void Meooe_internal(const FermionField &in, FermionField &out,int dag);
|
void Meooe_internal(const FermionField &in, FermionField &out,int dag);
|
||||||
void Mooee_internal(const FermionField &in, FermionField &out,int dag);
|
void Mooee_internal(const FermionField &in, FermionField &out,int dag);
|
||||||
|
@ -415,29 +415,6 @@ public:
|
|||||||
this->same_node.resize(npoints);
|
this->same_node.resize(npoints);
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
|
||||||
void BuildSurfaceList(int Ls,int vol4){
|
|
||||||
|
|
||||||
// find same node for SHM
|
|
||||||
// Here we know the distance is 1 for WilsonStencil
|
|
||||||
for(int point=0;point<this->_npoints;point++){
|
|
||||||
this->same_node[point] = this->SameNode(point);
|
|
||||||
}
|
|
||||||
|
|
||||||
for(int site = 0 ;site< vol4;site++){
|
|
||||||
int local = 1;
|
|
||||||
for(int point=0;point<this->_npoints;point++){
|
|
||||||
if( (!this->GetNodeLocal(site*Ls,point)) && (!this->same_node[point]) ){
|
|
||||||
local = 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if(local == 0) {
|
|
||||||
surface_list.push_back(site);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
|
|
||||||
template < class compressor>
|
template < class compressor>
|
||||||
void HaloExchangeOpt(const Lattice<vobj> &source,compressor &compress)
|
void HaloExchangeOpt(const Lattice<vobj> &source,compressor &compress)
|
||||||
{
|
{
|
||||||
|
@ -529,6 +529,18 @@ void CayleyFermion5D<Impl>::SetCoefficientsInternal(RealD zolo_hi,std::vector<Co
|
|||||||
dee[Ls-1] += delta_d;
|
dee[Ls-1] += delta_d;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//////////////////////////////////////////
|
||||||
|
// Device buffers
|
||||||
|
//////////////////////////////////////////
|
||||||
|
d_diag.resize(Ls);
|
||||||
|
d_upper.resize(Ls);
|
||||||
|
d_lower.resize(Ls);
|
||||||
|
|
||||||
|
d_dee.resize(Ls);
|
||||||
|
d_lee.resize(Ls);
|
||||||
|
d_uee.resize(Ls);
|
||||||
|
d_leem.resize(Ls);
|
||||||
|
d_ueem.resize(Ls);
|
||||||
// int inv=1;
|
// int inv=1;
|
||||||
// this->MooeeInternalCompute(0,inv,MatpInv,MatmInv);
|
// this->MooeeInternalCompute(0,inv,MatpInv,MatmInv);
|
||||||
// this->MooeeInternalCompute(1,inv,MatpInvDag,MatmInvDag);
|
// this->MooeeInternalCompute(1,inv,MatpInvDag,MatmInvDag);
|
||||||
|
@ -57,9 +57,9 @@ CayleyFermion5D<Impl>::M5D(const FermionField &psi_i,
|
|||||||
|
|
||||||
int Ls =this->Ls;
|
int Ls =this->Ls;
|
||||||
|
|
||||||
static deviceVector<Coeff_t> d_diag(Ls) ; acceleratorCopyToDevice(&diag[0] ,&d_diag[0],Ls*sizeof(Coeff_t));
|
acceleratorCopyToDevice(&diag[0] ,&this->d_diag[0],Ls*sizeof(Coeff_t));
|
||||||
static deviceVector<Coeff_t> d_upper(Ls); acceleratorCopyToDevice(&upper[0],&d_upper[0],Ls*sizeof(Coeff_t));
|
acceleratorCopyToDevice(&upper[0],&this->d_upper[0],Ls*sizeof(Coeff_t));
|
||||||
static deviceVector<Coeff_t> d_lower(Ls); acceleratorCopyToDevice(&lower[0],&d_lower[0],Ls*sizeof(Coeff_t));
|
acceleratorCopyToDevice(&lower[0],&this->d_lower[0],Ls*sizeof(Coeff_t));
|
||||||
|
|
||||||
auto pdiag = &d_diag[0];
|
auto pdiag = &d_diag[0];
|
||||||
auto pupper = &d_upper[0];
|
auto pupper = &d_upper[0];
|
||||||
@ -99,9 +99,9 @@ CayleyFermion5D<Impl>::M5Ddag(const FermionField &psi_i,
|
|||||||
|
|
||||||
int Ls=this->Ls;
|
int Ls=this->Ls;
|
||||||
|
|
||||||
static deviceVector<Coeff_t> d_diag(Ls) ; acceleratorCopyToDevice(&diag[0] ,&d_diag[0],Ls*sizeof(Coeff_t));
|
acceleratorCopyToDevice(&diag[0] ,&this->d_diag[0],Ls*sizeof(Coeff_t));
|
||||||
static deviceVector<Coeff_t> d_upper(Ls); acceleratorCopyToDevice(&upper[0],&d_upper[0],Ls*sizeof(Coeff_t));
|
acceleratorCopyToDevice(&upper[0],&this->d_upper[0],Ls*sizeof(Coeff_t));
|
||||||
static deviceVector<Coeff_t> d_lower(Ls); acceleratorCopyToDevice(&lower[0],&d_lower[0],Ls*sizeof(Coeff_t));
|
acceleratorCopyToDevice(&lower[0],&this->d_lower[0],Ls*sizeof(Coeff_t));
|
||||||
|
|
||||||
auto pdiag = &d_diag[0];
|
auto pdiag = &d_diag[0];
|
||||||
auto pupper = &d_upper[0];
|
auto pupper = &d_upper[0];
|
||||||
@ -134,11 +134,11 @@ CayleyFermion5D<Impl>::MooeeInv (const FermionField &psi_i, FermionField &chi
|
|||||||
|
|
||||||
int Ls=this->Ls;
|
int Ls=this->Ls;
|
||||||
|
|
||||||
static deviceVector<Coeff_t> d_lee(Ls); acceleratorCopyToDevice(&lee[0],&d_lee[0],Ls*sizeof(Coeff_t));
|
acceleratorCopyToDevice(&lee[0],&d_lee[0],Ls*sizeof(Coeff_t));
|
||||||
static deviceVector<Coeff_t> d_dee(Ls); acceleratorCopyToDevice(&dee[0],&d_dee[0],Ls*sizeof(Coeff_t));
|
acceleratorCopyToDevice(&dee[0],&d_dee[0],Ls*sizeof(Coeff_t));
|
||||||
static deviceVector<Coeff_t> d_uee(Ls); acceleratorCopyToDevice(&uee[0],&d_uee[0],Ls*sizeof(Coeff_t));
|
acceleratorCopyToDevice(&uee[0],&d_uee[0],Ls*sizeof(Coeff_t));
|
||||||
static deviceVector<Coeff_t> d_leem(Ls); acceleratorCopyToDevice(&leem[0],&d_leem[0],Ls*sizeof(Coeff_t));
|
acceleratorCopyToDevice(&leem[0],&d_leem[0],Ls*sizeof(Coeff_t));
|
||||||
static deviceVector<Coeff_t> d_ueem(Ls); acceleratorCopyToDevice(&ueem[0],&d_ueem[0],Ls*sizeof(Coeff_t));
|
acceleratorCopyToDevice(&ueem[0],&d_ueem[0],Ls*sizeof(Coeff_t));
|
||||||
|
|
||||||
auto plee = & d_lee [0];
|
auto plee = & d_lee [0];
|
||||||
auto pdee = & d_dee [0];
|
auto pdee = & d_dee [0];
|
||||||
@ -196,11 +196,11 @@ CayleyFermion5D<Impl>::MooeeInvDag (const FermionField &psi_i, FermionField &chi
|
|||||||
autoView(psi , psi_i,AcceleratorRead);
|
autoView(psi , psi_i,AcceleratorRead);
|
||||||
autoView(chi , chi_i,AcceleratorWrite);
|
autoView(chi , chi_i,AcceleratorWrite);
|
||||||
|
|
||||||
static deviceVector<Coeff_t> d_lee(Ls); acceleratorCopyToDevice(&lee[0],&d_lee[0],Ls*sizeof(Coeff_t));
|
acceleratorCopyToDevice(&lee[0],&d_lee[0],Ls*sizeof(Coeff_t));
|
||||||
static deviceVector<Coeff_t> d_dee(Ls); acceleratorCopyToDevice(&dee[0],&d_dee[0],Ls*sizeof(Coeff_t));
|
acceleratorCopyToDevice(&dee[0],&d_dee[0],Ls*sizeof(Coeff_t));
|
||||||
static deviceVector<Coeff_t> d_uee(Ls); acceleratorCopyToDevice(&uee[0],&d_uee[0],Ls*sizeof(Coeff_t));
|
acceleratorCopyToDevice(&uee[0],&d_uee[0],Ls*sizeof(Coeff_t));
|
||||||
static deviceVector<Coeff_t> d_leem(Ls); acceleratorCopyToDevice(&leem[0],&d_leem[0],Ls*sizeof(Coeff_t));
|
acceleratorCopyToDevice(&leem[0],&d_leem[0],Ls*sizeof(Coeff_t));
|
||||||
static deviceVector<Coeff_t> d_ueem(Ls); acceleratorCopyToDevice(&ueem[0],&d_ueem[0],Ls*sizeof(Coeff_t));
|
acceleratorCopyToDevice(&ueem[0],&d_ueem[0],Ls*sizeof(Coeff_t));
|
||||||
|
|
||||||
auto plee = & d_lee [0];
|
auto plee = & d_lee [0];
|
||||||
auto pdee = & d_dee [0];
|
auto pdee = & d_dee [0];
|
||||||
|
@ -51,13 +51,13 @@ void DomainWallEOFAFermion<Impl>::M5D(const FermionField& psi_i, const FermionFi
|
|||||||
autoView( chi , chi_i, AcceleratorWrite);
|
autoView( chi , chi_i, AcceleratorWrite);
|
||||||
assert(phi.Checkerboard() == psi.Checkerboard());
|
assert(phi.Checkerboard() == psi.Checkerboard());
|
||||||
|
|
||||||
static deviceVector<Coeff_t> d_diag(Ls); acceleratorCopyToDevice(&diag[0],&d_diag[0],Ls*sizeof(Coeff_t));
|
auto pdiag = &this->d_diag[0];
|
||||||
static deviceVector<Coeff_t> d_upper(Ls);acceleratorCopyToDevice(&upper[0],&d_upper[0],Ls*sizeof(Coeff_t));
|
auto pupper = &this->d_upper[0];
|
||||||
static deviceVector<Coeff_t> d_lower(Ls);acceleratorCopyToDevice(&lower[0],&d_lower[0],Ls*sizeof(Coeff_t));
|
auto plower = &this->d_lower[0];
|
||||||
|
|
||||||
auto pdiag = &d_diag[0];
|
acceleratorCopyToDevice(&diag[0],&pdiag[0],Ls*sizeof(Coeff_t));
|
||||||
auto pupper = &d_upper[0];
|
acceleratorCopyToDevice(&upper[0],&pupper[0],Ls*sizeof(Coeff_t));
|
||||||
auto plower = &d_lower[0];
|
acceleratorCopyToDevice(&lower[0],&plower[0],Ls*sizeof(Coeff_t));
|
||||||
|
|
||||||
// Flops = 6.0*(Nc*Ns) *Ls*vol
|
// Flops = 6.0*(Nc*Ns) *Ls*vol
|
||||||
|
|
||||||
@ -90,13 +90,13 @@ void DomainWallEOFAFermion<Impl>::M5Ddag(const FermionField& psi_i, const Fermio
|
|||||||
autoView( chi , chi_i, AcceleratorWrite);
|
autoView( chi , chi_i, AcceleratorWrite);
|
||||||
assert(phi.Checkerboard() == psi.Checkerboard());
|
assert(phi.Checkerboard() == psi.Checkerboard());
|
||||||
|
|
||||||
static deviceVector<Coeff_t> d_diag(Ls); acceleratorCopyToDevice(&diag[0],&d_diag[0],Ls*sizeof(Coeff_t));
|
auto pdiag = &this->d_diag[0];
|
||||||
static deviceVector<Coeff_t> d_upper(Ls);acceleratorCopyToDevice(&upper[0],&d_upper[0],Ls*sizeof(Coeff_t));
|
auto pupper = &this->d_upper[0];
|
||||||
static deviceVector<Coeff_t> d_lower(Ls);acceleratorCopyToDevice(&lower[0],&d_lower[0],Ls*sizeof(Coeff_t));
|
auto plower = &this->d_lower[0];
|
||||||
|
|
||||||
auto pdiag = &d_diag[0];
|
acceleratorCopyToDevice(&diag[0] ,&pdiag[0],Ls*sizeof(Coeff_t));
|
||||||
auto pupper = &d_upper[0];
|
acceleratorCopyToDevice(&upper[0],&pupper[0],Ls*sizeof(Coeff_t));
|
||||||
auto plower = &d_lower[0];
|
acceleratorCopyToDevice(&lower[0],&plower[0],Ls*sizeof(Coeff_t));
|
||||||
|
|
||||||
// Flops = 6.0*(Nc*Ns) *Ls*vol
|
// Flops = 6.0*(Nc*Ns) *Ls*vol
|
||||||
|
|
||||||
@ -125,17 +125,17 @@ void DomainWallEOFAFermion<Impl>::MooeeInv(const FermionField& psi_i, FermionFie
|
|||||||
autoView( chi, chi_i, AcceleratorWrite);
|
autoView( chi, chi_i, AcceleratorWrite);
|
||||||
int Ls = this->Ls;
|
int Ls = this->Ls;
|
||||||
|
|
||||||
static deviceVector<Coeff_t> d_lee(Ls); acceleratorCopyToDevice(&this->lee[0],&d_lee[0],Ls*sizeof(Coeff_t));
|
auto plee = & this->d_lee [0];
|
||||||
static deviceVector<Coeff_t> d_dee(Ls); acceleratorCopyToDevice(&this->dee[0],&d_dee[0],Ls*sizeof(Coeff_t));
|
auto pdee = & this->d_dee [0];
|
||||||
static deviceVector<Coeff_t> d_uee(Ls); acceleratorCopyToDevice(&this->uee[0],&d_uee[0],Ls*sizeof(Coeff_t));
|
auto puee = & this->d_uee [0];
|
||||||
static deviceVector<Coeff_t> d_leem(Ls); acceleratorCopyToDevice(&this->leem[0],&d_leem[0],Ls*sizeof(Coeff_t));
|
auto pleem = & this->d_leem[0];
|
||||||
static deviceVector<Coeff_t> d_ueem(Ls); acceleratorCopyToDevice(&this->ueem[0],&d_ueem[0],Ls*sizeof(Coeff_t));
|
auto pueem = & this->d_ueem[0];
|
||||||
|
|
||||||
auto plee = & d_lee [0];
|
acceleratorCopyToDevice(&this->lee[0],&plee[0],Ls*sizeof(Coeff_t));
|
||||||
auto pdee = & d_dee [0];
|
acceleratorCopyToDevice(&this->dee[0],&pdee[0],Ls*sizeof(Coeff_t));
|
||||||
auto puee = & d_uee [0];
|
acceleratorCopyToDevice(&this->uee[0],&puee[0],Ls*sizeof(Coeff_t));
|
||||||
auto pleem = & d_leem[0];
|
acceleratorCopyToDevice(&this->leem[0],&pleem[0],Ls*sizeof(Coeff_t));
|
||||||
auto pueem = & d_ueem[0];
|
acceleratorCopyToDevice(&this->ueem[0],&pueem[0],Ls*sizeof(Coeff_t));
|
||||||
|
|
||||||
uint64_t nloop=grid->oSites()/Ls;
|
uint64_t nloop=grid->oSites()/Ls;
|
||||||
accelerator_for(sss,nloop,Simd::Nsimd(),{
|
accelerator_for(sss,nloop,Simd::Nsimd(),{
|
||||||
|
@ -50,13 +50,13 @@ void MobiusEOFAFermion<Impl>::M5D(const FermionField &psi_i, const FermionField
|
|||||||
|
|
||||||
assert(phi.Checkerboard() == psi.Checkerboard());
|
assert(phi.Checkerboard() == psi.Checkerboard());
|
||||||
|
|
||||||
static deviceVector<Coeff_t> d_diag(Ls); acceleratorCopyToDevice(&diag[0],&d_diag[0],Ls*sizeof(Coeff_t));
|
auto pdiag = &this->d_diag[0];
|
||||||
static deviceVector<Coeff_t> d_upper(Ls);acceleratorCopyToDevice(&upper[0],&d_upper[0],Ls*sizeof(Coeff_t));
|
auto pupper = &this->d_upper[0];
|
||||||
static deviceVector<Coeff_t> d_lower(Ls);acceleratorCopyToDevice(&lower[0],&d_lower[0],Ls*sizeof(Coeff_t));
|
auto plower = &this->d_lower[0];
|
||||||
|
|
||||||
auto pdiag = &d_diag[0];
|
acceleratorCopyToDevice(&diag[0],&pdiag[0],Ls*sizeof(Coeff_t));
|
||||||
auto pupper = &d_upper[0];
|
acceleratorCopyToDevice(&upper[0],&pupper[0],Ls*sizeof(Coeff_t));
|
||||||
auto plower = &d_lower[0];
|
acceleratorCopyToDevice(&lower[0],&plower[0],Ls*sizeof(Coeff_t));
|
||||||
|
|
||||||
// Flops = 6.0*(Nc*Ns) *Ls*vol
|
// Flops = 6.0*(Nc*Ns) *Ls*vol
|
||||||
int nloop = grid->oSites()/Ls;
|
int nloop = grid->oSites()/Ls;
|
||||||
@ -93,15 +93,15 @@ void MobiusEOFAFermion<Impl>::M5D_shift(const FermionField &psi_i, const Fermion
|
|||||||
|
|
||||||
assert(phi.Checkerboard() == psi.Checkerboard());
|
assert(phi.Checkerboard() == psi.Checkerboard());
|
||||||
|
|
||||||
static deviceVector<Coeff_t> d_diag(Ls); acceleratorCopyToDevice(&diag[0],&d_diag[0],Ls*sizeof(Coeff_t));
|
auto pdiag = &this->d_diag[0];
|
||||||
static deviceVector<Coeff_t> d_upper(Ls);acceleratorCopyToDevice(&upper[0],&d_upper[0],Ls*sizeof(Coeff_t));
|
auto pupper = &this->d_upper[0];
|
||||||
static deviceVector<Coeff_t> d_lower(Ls);acceleratorCopyToDevice(&lower[0],&d_lower[0],Ls*sizeof(Coeff_t));
|
auto plower = &this->d_lower[0];
|
||||||
static deviceVector<Coeff_t> d_shift_coeffs(Ls);acceleratorCopyToDevice(&shift_coeffs[0],&d_shift_coeffs[0],Ls*sizeof(Coeff_t));
|
auto pshift_coeffs = &this->d_shift_coefficients[0];
|
||||||
|
|
||||||
auto pdiag = &d_diag[0];
|
acceleratorCopyToDevice(&diag[0],&pdiag[0],Ls*sizeof(Coeff_t));
|
||||||
auto pupper = &d_upper[0];
|
acceleratorCopyToDevice(&upper[0],&pupper[0],Ls*sizeof(Coeff_t));
|
||||||
auto plower = &d_lower[0];
|
acceleratorCopyToDevice(&lower[0],&plower[0],Ls*sizeof(Coeff_t));
|
||||||
auto pshift_coeffs = &d_shift_coeffs[0];
|
acceleratorCopyToDevice(&shift_coeffs[0],&pshift_coeffs[0],Ls*sizeof(Coeff_t));
|
||||||
|
|
||||||
// Flops = 6.0*(Nc*Ns) *Ls*vol
|
// Flops = 6.0*(Nc*Ns) *Ls*vol
|
||||||
int nloop = grid->oSites()/Ls;
|
int nloop = grid->oSites()/Ls;
|
||||||
@ -139,13 +139,13 @@ void MobiusEOFAFermion<Impl>::M5Ddag(const FermionField &psi_i, const FermionFie
|
|||||||
|
|
||||||
assert(phi.Checkerboard() == psi.Checkerboard());
|
assert(phi.Checkerboard() == psi.Checkerboard());
|
||||||
|
|
||||||
static deviceVector<Coeff_t> d_diag(Ls); acceleratorCopyToDevice(&diag[0],&d_diag[0],Ls*sizeof(Coeff_t));
|
auto pdiag = &this->d_diag[0];
|
||||||
static deviceVector<Coeff_t> d_upper(Ls);acceleratorCopyToDevice(&upper[0],&d_upper[0],Ls*sizeof(Coeff_t));
|
auto pupper = &this->d_upper[0];
|
||||||
static deviceVector<Coeff_t> d_lower(Ls);acceleratorCopyToDevice(&lower[0],&d_lower[0],Ls*sizeof(Coeff_t));
|
auto plower = &this->d_lower[0];
|
||||||
|
|
||||||
auto pdiag = &d_diag[0];
|
acceleratorCopyToDevice(&diag[0],&pdiag[0],Ls*sizeof(Coeff_t));
|
||||||
auto pupper = &d_upper[0];
|
acceleratorCopyToDevice(&upper[0],&pupper[0],Ls*sizeof(Coeff_t));
|
||||||
auto plower = &d_lower[0];
|
acceleratorCopyToDevice(&lower[0],&plower[0],Ls*sizeof(Coeff_t));
|
||||||
|
|
||||||
// Flops = 6.0*(Nc*Ns) *Ls*vol
|
// Flops = 6.0*(Nc*Ns) *Ls*vol
|
||||||
int nloop = grid->oSites()/Ls;
|
int nloop = grid->oSites()/Ls;
|
||||||
@ -180,15 +180,15 @@ void MobiusEOFAFermion<Impl>::M5Ddag_shift(const FermionField &psi_i, const Ferm
|
|||||||
|
|
||||||
assert(phi.Checkerboard() == psi.Checkerboard());
|
assert(phi.Checkerboard() == psi.Checkerboard());
|
||||||
|
|
||||||
static deviceVector<Coeff_t> d_diag(Ls); acceleratorCopyToDevice(&diag[0],&d_diag[0],Ls*sizeof(Coeff_t));
|
auto pdiag = &this->d_diag[0];
|
||||||
static deviceVector<Coeff_t> d_upper(Ls);acceleratorCopyToDevice(&upper[0],&d_upper[0],Ls*sizeof(Coeff_t));
|
auto pupper = &this->d_upper[0];
|
||||||
static deviceVector<Coeff_t> d_lower(Ls);acceleratorCopyToDevice(&lower[0],&d_lower[0],Ls*sizeof(Coeff_t));
|
auto plower = &this->d_lower[0];
|
||||||
static deviceVector<Coeff_t> d_shift_coeffs(Ls);acceleratorCopyToDevice(&shift_coeffs[0],&d_shift_coeffs[0],Ls*sizeof(Coeff_t));
|
auto pshift_coeffs = &this->d_shift_coefficients[0];
|
||||||
|
|
||||||
auto pdiag = &d_diag[0];
|
acceleratorCopyToDevice(&diag[0],&pdiag[0],Ls*sizeof(Coeff_t));
|
||||||
auto pupper = &d_upper[0];
|
acceleratorCopyToDevice(&upper[0],&pupper[0],Ls*sizeof(Coeff_t));
|
||||||
auto plower = &d_lower[0];
|
acceleratorCopyToDevice(&lower[0],&plower[0],Ls*sizeof(Coeff_t));
|
||||||
auto pshift_coeffs = &d_shift_coeffs[0];
|
acceleratorCopyToDevice(&shift_coeffs[0],&pshift_coeffs[0],Ls*sizeof(Coeff_t));
|
||||||
|
|
||||||
// Flops = 6.0*(Nc*Ns) *Ls*vol
|
// Flops = 6.0*(Nc*Ns) *Ls*vol
|
||||||
auto pm = this->pm;
|
auto pm = this->pm;
|
||||||
@ -230,17 +230,17 @@ void MobiusEOFAFermion<Impl>::MooeeInv(const FermionField &psi_i, FermionField &
|
|||||||
autoView(psi , psi_i, AcceleratorRead);
|
autoView(psi , psi_i, AcceleratorRead);
|
||||||
autoView(chi , chi_i, AcceleratorWrite);
|
autoView(chi , chi_i, AcceleratorWrite);
|
||||||
|
|
||||||
static deviceVector<Coeff_t> d_lee(Ls); acceleratorCopyToDevice(&this->lee[0],&d_lee[0],Ls*sizeof(Coeff_t));
|
auto plee = & this->d_lee [0];
|
||||||
static deviceVector<Coeff_t> d_dee(Ls); acceleratorCopyToDevice(&this->dee[0],&d_dee[0],Ls*sizeof(Coeff_t));
|
auto pdee = & this->d_dee [0];
|
||||||
static deviceVector<Coeff_t> d_uee(Ls); acceleratorCopyToDevice(&this->uee[0],&d_uee[0],Ls*sizeof(Coeff_t));
|
auto puee = & this->d_uee [0];
|
||||||
static deviceVector<Coeff_t> d_leem(Ls); acceleratorCopyToDevice(&this->leem[0],&d_leem[0],Ls*sizeof(Coeff_t));
|
auto pleem = & this->d_leem[0];
|
||||||
static deviceVector<Coeff_t> d_ueem(Ls); acceleratorCopyToDevice(&this->ueem[0],&d_ueem[0],Ls*sizeof(Coeff_t));
|
auto pueem = & this->d_ueem[0];
|
||||||
|
|
||||||
auto plee = & d_lee [0];
|
acceleratorCopyToDevice(&this->lee[0],&plee[0],Ls*sizeof(Coeff_t));
|
||||||
auto pdee = & d_dee [0];
|
acceleratorCopyToDevice(&this->dee[0],&pdee[0],Ls*sizeof(Coeff_t));
|
||||||
auto puee = & d_uee [0];
|
acceleratorCopyToDevice(&this->uee[0],&puee[0],Ls*sizeof(Coeff_t));
|
||||||
auto pleem = & d_leem[0];
|
acceleratorCopyToDevice(&this->leem[0],&pleem[0],Ls*sizeof(Coeff_t));
|
||||||
auto pueem = & d_ueem[0];
|
acceleratorCopyToDevice(&this->ueem[0],&pueem[0],Ls*sizeof(Coeff_t));
|
||||||
|
|
||||||
if(this->shift != 0.0){ MooeeInv_shift(psi_i,chi_i); return; }
|
if(this->shift != 0.0){ MooeeInv_shift(psi_i,chi_i); return; }
|
||||||
|
|
||||||
@ -293,23 +293,22 @@ void MobiusEOFAFermion<Impl>::MooeeInv_shift(const FermionField &psi_i, FermionF
|
|||||||
autoView(chi , chi_i, AcceleratorWrite);
|
autoView(chi , chi_i, AcceleratorWrite);
|
||||||
|
|
||||||
// Move into object and constructor
|
// Move into object and constructor
|
||||||
static deviceVector<Coeff_t> d_lee(Ls); acceleratorCopyToDevice(&this->lee[0],&d_lee[0],Ls*sizeof(Coeff_t));
|
|
||||||
static deviceVector<Coeff_t> d_dee(Ls); acceleratorCopyToDevice(&this->dee[0],&d_dee[0],Ls*sizeof(Coeff_t));
|
|
||||||
static deviceVector<Coeff_t> d_uee(Ls); acceleratorCopyToDevice(&this->uee[0],&d_uee[0],Ls*sizeof(Coeff_t));
|
|
||||||
static deviceVector<Coeff_t> d_leem(Ls); acceleratorCopyToDevice(&this->leem[0],&d_leem[0],Ls*sizeof(Coeff_t));
|
|
||||||
static deviceVector<Coeff_t> d_ueem(Ls); acceleratorCopyToDevice(&this->ueem[0],&d_ueem[0],Ls*sizeof(Coeff_t));
|
|
||||||
|
|
||||||
auto pm = this->pm;
|
auto pm = this->pm;
|
||||||
auto plee = & d_lee [0];
|
auto plee = & this->d_lee [0];
|
||||||
auto pdee = & d_dee [0];
|
auto pdee = & this->d_dee [0];
|
||||||
auto puee = & d_uee [0];
|
auto puee = & this->d_uee [0];
|
||||||
auto pleem = & d_leem[0];
|
auto pleem = & this->d_leem[0];
|
||||||
auto pueem = & d_ueem[0];
|
auto pueem = & this->d_ueem[0];
|
||||||
|
auto pMooeeInv_shift_lc = &this->d_MooeeInv_shift_lc[0];
|
||||||
|
auto pMooeeInv_shift_norm = &this->d_MooeeInv_shift_norm[0];
|
||||||
|
|
||||||
static deviceVector<Coeff_t> d_MooeeInv_shift_lc(Ls); acceleratorCopyToDevice(&MooeeInv_shift_lc[0],&d_MooeeInv_shift_lc[0],Ls*sizeof(Coeff_t));
|
acceleratorCopyToDevice(&this->lee[0],&plee[0],Ls*sizeof(Coeff_t));
|
||||||
static deviceVector<Coeff_t> d_MooeeInv_shift_norm(Ls); acceleratorCopyToDevice(&MooeeInv_shift_norm[0],&d_MooeeInv_shift_norm[0],Ls*sizeof(Coeff_t));
|
acceleratorCopyToDevice(&this->dee[0],&pdee[0],Ls*sizeof(Coeff_t));
|
||||||
auto pMooeeInv_shift_lc = &d_MooeeInv_shift_lc[0];
|
acceleratorCopyToDevice(&this->uee[0],&puee[0],Ls*sizeof(Coeff_t));
|
||||||
auto pMooeeInv_shift_norm = &d_MooeeInv_shift_norm[0];
|
acceleratorCopyToDevice(&this->leem[0],&pleem[0],Ls*sizeof(Coeff_t));
|
||||||
|
acceleratorCopyToDevice(&this->ueem[0],&pueem[0],Ls*sizeof(Coeff_t));
|
||||||
|
acceleratorCopyToDevice(&MooeeInv_shift_lc[0],&pMooeeInv_shift_lc[0],Ls*sizeof(Coeff_t));
|
||||||
|
acceleratorCopyToDevice(&MooeeInv_shift_norm[0],&pMooeeInv_shift_norm[0],Ls*sizeof(Coeff_t));
|
||||||
|
|
||||||
int nloop = grid->oSites()/Ls;
|
int nloop = grid->oSites()/Ls;
|
||||||
accelerator_for(sss,nloop,Simd::Nsimd(),{
|
accelerator_for(sss,nloop,Simd::Nsimd(),{
|
||||||
@ -367,17 +366,17 @@ void MobiusEOFAFermion<Impl>::MooeeInvDag(const FermionField &psi_i, FermionFiel
|
|||||||
autoView(psi , psi_i, AcceleratorRead);
|
autoView(psi , psi_i, AcceleratorRead);
|
||||||
autoView(chi , chi_i, AcceleratorWrite);
|
autoView(chi , chi_i, AcceleratorWrite);
|
||||||
|
|
||||||
static deviceVector<Coeff_t> d_lee(Ls); acceleratorCopyToDevice(&this->lee[0],&d_lee[0],Ls*sizeof(Coeff_t));
|
auto plee = &this->d_lee [0];
|
||||||
static deviceVector<Coeff_t> d_dee(Ls); acceleratorCopyToDevice(&this->dee[0],&d_dee[0],Ls*sizeof(Coeff_t));
|
auto pdee = &this->d_dee [0];
|
||||||
static deviceVector<Coeff_t> d_uee(Ls); acceleratorCopyToDevice(&this->uee[0],&d_uee[0],Ls*sizeof(Coeff_t));
|
auto puee = &this->d_uee [0];
|
||||||
static deviceVector<Coeff_t> d_leem(Ls); acceleratorCopyToDevice(&this->leem[0],&d_leem[0],Ls*sizeof(Coeff_t));
|
auto pleem = &this->d_leem[0];
|
||||||
static deviceVector<Coeff_t> d_ueem(Ls); acceleratorCopyToDevice(&this->ueem[0],&d_ueem[0],Ls*sizeof(Coeff_t));
|
auto pueem = &this->d_ueem[0];
|
||||||
|
|
||||||
auto plee = & d_lee [0];
|
acceleratorCopyToDevice(&this->lee[0],&plee[0],Ls*sizeof(Coeff_t));
|
||||||
auto pdee = & d_dee [0];
|
acceleratorCopyToDevice(&this->dee[0],&pdee[0],Ls*sizeof(Coeff_t));
|
||||||
auto puee = & d_uee [0];
|
acceleratorCopyToDevice(&this->uee[0],&puee[0],Ls*sizeof(Coeff_t));
|
||||||
auto pleem = & d_leem[0];
|
acceleratorCopyToDevice(&this->leem[0],&pleem[0],Ls*sizeof(Coeff_t));
|
||||||
auto pueem = & d_ueem[0];
|
acceleratorCopyToDevice(&this->ueem[0],&pueem[0],Ls*sizeof(Coeff_t));
|
||||||
|
|
||||||
int nloop = grid->oSites()/Ls;
|
int nloop = grid->oSites()/Ls;
|
||||||
accelerator_for(sss,nloop,Simd::Nsimd(),{
|
accelerator_for(sss,nloop,Simd::Nsimd(),{
|
||||||
@ -426,25 +425,23 @@ void MobiusEOFAFermion<Impl>::MooeeInvDag_shift(const FermionField &psi_i, Fermi
|
|||||||
autoView(chi , chi_i, AcceleratorWrite);
|
autoView(chi , chi_i, AcceleratorWrite);
|
||||||
int Ls = this->Ls;
|
int Ls = this->Ls;
|
||||||
|
|
||||||
static deviceVector<Coeff_t> d_lee(Ls); acceleratorCopyToDevice(&this->lee[0],&d_lee[0],Ls*sizeof(Coeff_t));
|
|
||||||
static deviceVector<Coeff_t> d_dee(Ls); acceleratorCopyToDevice(&this->dee[0],&d_dee[0],Ls*sizeof(Coeff_t));
|
|
||||||
static deviceVector<Coeff_t> d_uee(Ls); acceleratorCopyToDevice(&this->uee[0],&d_uee[0],Ls*sizeof(Coeff_t));
|
|
||||||
static deviceVector<Coeff_t> d_leem(Ls); acceleratorCopyToDevice(&this->leem[0],&d_leem[0],Ls*sizeof(Coeff_t));
|
|
||||||
static deviceVector<Coeff_t> d_ueem(Ls); acceleratorCopyToDevice(&this->ueem[0],&d_ueem[0],Ls*sizeof(Coeff_t));
|
|
||||||
|
|
||||||
auto pm = this->pm;
|
auto pm = this->pm;
|
||||||
auto plee = & d_lee [0];
|
auto plee = & this->d_lee [0];
|
||||||
auto pdee = & d_dee [0];
|
auto pdee = & this->d_dee [0];
|
||||||
auto puee = & d_uee [0];
|
auto puee = & this->d_uee [0];
|
||||||
auto pleem = & d_leem[0];
|
auto pleem = & this->d_leem[0];
|
||||||
auto pueem = & d_ueem[0];
|
auto pueem = & this->d_ueem[0];
|
||||||
|
|
||||||
static deviceVector<Coeff_t> d_MooeeInvDag_shift_lc(Ls);
|
auto pMooeeInvDag_shift_lc = &this->d_MooeeInv_shift_lc[0];
|
||||||
static deviceVector<Coeff_t> d_MooeeInvDag_shift_norm(Ls);
|
auto pMooeeInvDag_shift_norm = &this->d_MooeeInv_shift_norm[0];
|
||||||
acceleratorCopyToDevice(&MooeeInvDag_shift_lc[0],&d_MooeeInvDag_shift_lc[0],Ls*sizeof(Coeff_t));
|
|
||||||
acceleratorCopyToDevice(&MooeeInvDag_shift_norm[0],&d_MooeeInvDag_shift_norm[0],Ls*sizeof(Coeff_t));
|
acceleratorCopyToDevice(&this->lee[0],&plee[0],Ls*sizeof(Coeff_t));
|
||||||
auto pMooeeInvDag_shift_lc = &d_MooeeInvDag_shift_lc[0];
|
acceleratorCopyToDevice(&this->dee[0],&pdee[0],Ls*sizeof(Coeff_t));
|
||||||
auto pMooeeInvDag_shift_norm = &d_MooeeInvDag_shift_norm[0];
|
acceleratorCopyToDevice(&this->uee[0],&puee[0],Ls*sizeof(Coeff_t));
|
||||||
|
acceleratorCopyToDevice(&this->leem[0],&pleem[0],Ls*sizeof(Coeff_t));
|
||||||
|
acceleratorCopyToDevice(&this->ueem[0],&pueem[0],Ls*sizeof(Coeff_t));
|
||||||
|
acceleratorCopyToDevice(&MooeeInvDag_shift_lc[0],&pMooeeInvDag_shift_lc[0],Ls*sizeof(Coeff_t));
|
||||||
|
acceleratorCopyToDevice(&MooeeInvDag_shift_norm[0],&pMooeeInvDag_shift_norm[0],Ls*sizeof(Coeff_t));
|
||||||
|
|
||||||
// auto pMooeeInvDag_shift_lc = &MooeeInvDag_shift_lc[0];
|
// auto pMooeeInvDag_shift_lc = &MooeeInvDag_shift_lc[0];
|
||||||
// auto pMooeeInvDag_shift_norm = &MooeeInvDag_shift_norm[0];
|
// auto pMooeeInvDag_shift_norm = &MooeeInvDag_shift_norm[0];
|
||||||
|
@ -411,17 +411,18 @@ void PartialFractionFermion5D<Impl>::SetCoefficientsZolotarev(RealD zolo_hi,App
|
|||||||
int Ls = this->Ls;
|
int Ls = this->Ls;
|
||||||
conformable(solution5d.Grid(),this->FermionGrid());
|
conformable(solution5d.Grid(),this->FermionGrid());
|
||||||
conformable(exported4d.Grid(),this->GaugeGrid());
|
conformable(exported4d.Grid(),this->GaugeGrid());
|
||||||
ExtractSlice(exported4d, solution5d, Ls-1, Ls-1);
|
ExtractSlice(exported4d, solution5d, Ls-1, 0);
|
||||||
}
|
}
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
void PartialFractionFermion5D<Impl>::ImportPhysicalFermionSource(const FermionField &input4d,FermionField &imported5d)
|
void PartialFractionFermion5D<Impl>::ImportPhysicalFermionSource(const FermionField &input4d,FermionField &imported5d)
|
||||||
{
|
{
|
||||||
|
//void InsertSlice(const Lattice<vobj> &lowDim,Lattice<vobj> & higherDim,int slice, int orthog)
|
||||||
int Ls = this->Ls;
|
int Ls = this->Ls;
|
||||||
conformable(imported5d.Grid(),this->FermionGrid());
|
conformable(imported5d.Grid(),this->FermionGrid());
|
||||||
conformable(input4d.Grid() ,this->GaugeGrid());
|
conformable(input4d.Grid() ,this->GaugeGrid());
|
||||||
FermionField tmp(this->FermionGrid());
|
FermionField tmp(this->FermionGrid());
|
||||||
tmp=Zero();
|
tmp=Zero();
|
||||||
InsertSlice(input4d, tmp, Ls-1, Ls-1);
|
InsertSlice(input4d, tmp, Ls-1, 0);
|
||||||
tmp=Gamma(Gamma::Algebra::Gamma5)*tmp;
|
tmp=Gamma(Gamma::Algebra::Gamma5)*tmp;
|
||||||
this->Dminus(tmp,imported5d);
|
this->Dminus(tmp,imported5d);
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user