From 5603464f39f50ea8f0f620600189031c7ca99cc7 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Wed, 23 Oct 2024 14:45:58 -0400 Subject: [PATCH] Fix in partial fraction import/export physical and make the GPU happier on the deprecate-uvm -- don't use static vectors, make member of class --- Grid/qcd/action/fermion/AbstractEOFAFermion.h | 10 ++ Grid/qcd/action/fermion/CayleyFermion5D.h | 11 ++ .../fermion/OverlapWilsonCayleyTanhFermion.h | 2 +- .../OverlapWilsonCayleyZolotarevFermion.h | 4 + .../OverlapWilsonContfracTanhFermion.h | 3 + .../OverlapWilsonContfracZolotarevFermion.h | 3 + .../OverlapWilsonPartialFractionTanhFermion.h | 3 + ...lapWilsonPartialFractionZolotarevFermion.h | 5 + .../action/fermion/PartialFractionFermion5D.h | 2 +- Grid/qcd/action/fermion/WilsonCompressor.h | 23 --- .../CayleyFermion5DImplementation.h | 14 +- .../implementation/CayleyFermion5Dcache.h | 32 ++-- .../DomainWallEOFAFermionCache.h | 50 +++--- .../implementation/MobiusEOFAFermionCache.h | 165 +++++++++--------- .../PartialFractionFermion5DImplementation.h | 5 +- 15 files changed, 179 insertions(+), 153 deletions(-) diff --git a/Grid/qcd/action/fermion/AbstractEOFAFermion.h b/Grid/qcd/action/fermion/AbstractEOFAFermion.h index 18bcb394..3c203d17 100644 --- a/Grid/qcd/action/fermion/AbstractEOFAFermion.h +++ b/Grid/qcd/action/fermion/AbstractEOFAFermion.h @@ -55,6 +55,11 @@ public: RealD alpha; // Mobius scale RealD k; // EOFA normalization constant + // Device resident + deviceVector d_shift_coefficients; + deviceVector d_MooeeInv_shift_lc; + deviceVector d_MooeeInv_shift_norm; + virtual void Instantiatable(void) = 0; // EOFA-specific operations @@ -92,6 +97,11 @@ public: this->k = this->alpha * (_mq3-_mq2) * std::pow(this->alpha+1.0,2*Ls) / ( std::pow(this->alpha+1.0,Ls) + _mq2*std::pow(this->alpha-1.0,Ls) ) / ( std::pow(this->alpha+1.0,Ls) + _mq3*std::pow(this->alpha-1.0,Ls) ); + + d_shift_coefficients.resize(Ls); + d_MooeeInv_shift_lc.resize(Ls); + d_MooeeInv_shift_norm.resize(Ls); + }; }; diff --git a/Grid/qcd/action/fermion/CayleyFermion5D.h b/Grid/qcd/action/fermion/CayleyFermion5D.h index 2c56c7ed..c8fbe5a8 100644 --- a/Grid/qcd/action/fermion/CayleyFermion5D.h +++ b/Grid/qcd/action/fermion/CayleyFermion5D.h @@ -143,6 +143,17 @@ public: std::vector ueem; std::vector dee; + // Device memory + deviceVector d_diag; + deviceVector d_upper; + deviceVector d_lower; + + deviceVector d_lee; + deviceVector d_dee; + deviceVector d_uee; + deviceVector d_leem; + deviceVector d_ueem; + // Matrices of 5d ee inverse params // std::vector > MatpInv; // std::vector > MatmInv; diff --git a/Grid/qcd/action/fermion/OverlapWilsonCayleyTanhFermion.h b/Grid/qcd/action/fermion/OverlapWilsonCayleyTanhFermion.h index 350e89e2..8f0c91eb 100644 --- a/Grid/qcd/action/fermion/OverlapWilsonCayleyTanhFermion.h +++ b/Grid/qcd/action/fermion/OverlapWilsonCayleyTanhFermion.h @@ -42,7 +42,7 @@ public: void MomentumSpacePropagator(FermionField &out,const FermionField &in,RealD _m,std::vector twist) { this->MomentumSpacePropagatorHw(out,in,_m,twist); - }; + }; // Constructors OverlapWilsonCayleyTanhFermion(GaugeField &_Umu, diff --git a/Grid/qcd/action/fermion/OverlapWilsonCayleyZolotarevFermion.h b/Grid/qcd/action/fermion/OverlapWilsonCayleyZolotarevFermion.h index d15690fa..33e59b88 100644 --- a/Grid/qcd/action/fermion/OverlapWilsonCayleyZolotarevFermion.h +++ b/Grid/qcd/action/fermion/OverlapWilsonCayleyZolotarevFermion.h @@ -41,6 +41,10 @@ public: public: // Constructors + virtual void Instantiatable(void){}; + void MomentumSpacePropagator(FermionField &out,const FermionField &in,RealD _m,std::vector twist) { + this->MomentumSpacePropagatorHw(out,in,_m,twist); + }; OverlapWilsonCayleyZolotarevFermion(GaugeField &_Umu, GridCartesian &FiveDimGrid, diff --git a/Grid/qcd/action/fermion/OverlapWilsonContfracTanhFermion.h b/Grid/qcd/action/fermion/OverlapWilsonContfracTanhFermion.h index 9d1a9a86..5b603017 100644 --- a/Grid/qcd/action/fermion/OverlapWilsonContfracTanhFermion.h +++ b/Grid/qcd/action/fermion/OverlapWilsonContfracTanhFermion.h @@ -41,6 +41,9 @@ public: public: virtual void Instantiatable(void){}; + void MomentumSpacePropagator(FermionField &out,const FermionField &in,RealD _m,std::vector twist) { + this->MomentumSpacePropagatorHw(out,in,_m,twist); + }; // Constructors OverlapWilsonContFracTanhFermion(GaugeField &_Umu, GridCartesian &FiveDimGrid, diff --git a/Grid/qcd/action/fermion/OverlapWilsonContfracZolotarevFermion.h b/Grid/qcd/action/fermion/OverlapWilsonContfracZolotarevFermion.h index ce796d4a..747cb508 100644 --- a/Grid/qcd/action/fermion/OverlapWilsonContfracZolotarevFermion.h +++ b/Grid/qcd/action/fermion/OverlapWilsonContfracZolotarevFermion.h @@ -40,6 +40,9 @@ public: INHERIT_IMPL_TYPES(Impl); virtual void Instantiatable(void){}; + void MomentumSpacePropagator(FermionField &out,const FermionField &in,RealD _m,std::vector twist) { + this->MomentumSpacePropagatorHw(out,in,_m,twist); + }; // Constructors OverlapWilsonContFracZolotarevFermion(GaugeField &_Umu, GridCartesian &FiveDimGrid, diff --git a/Grid/qcd/action/fermion/OverlapWilsonPartialFractionTanhFermion.h b/Grid/qcd/action/fermion/OverlapWilsonPartialFractionTanhFermion.h index f2fb46cd..7210d6af 100644 --- a/Grid/qcd/action/fermion/OverlapWilsonPartialFractionTanhFermion.h +++ b/Grid/qcd/action/fermion/OverlapWilsonPartialFractionTanhFermion.h @@ -41,6 +41,9 @@ public: public: virtual void Instantiatable(void){}; + void MomentumSpacePropagator(FermionField &out,const FermionField &in,RealD _m,std::vector twist) { + this->MomentumSpacePropagatorHw(out,in,_m,twist); + }; // Constructors OverlapWilsonPartialFractionTanhFermion(GaugeField &_Umu, GridCartesian &FiveDimGrid, diff --git a/Grid/qcd/action/fermion/OverlapWilsonPartialFractionZolotarevFermion.h b/Grid/qcd/action/fermion/OverlapWilsonPartialFractionZolotarevFermion.h index f98b64a9..f0be4388 100644 --- a/Grid/qcd/action/fermion/OverlapWilsonPartialFractionZolotarevFermion.h +++ b/Grid/qcd/action/fermion/OverlapWilsonPartialFractionZolotarevFermion.h @@ -40,6 +40,11 @@ public: INHERIT_IMPL_TYPES(Impl); virtual void Instantiatable(void){}; + + void MomentumSpacePropagator(FermionField &out,const FermionField &in,RealD _m,std::vector twist) { + this->MomentumSpacePropagatorHw(out,in,_m,twist); + }; + // Constructors OverlapWilsonPartialFractionZolotarevFermion(GaugeField &_Umu, GridCartesian &FiveDimGrid, diff --git a/Grid/qcd/action/fermion/PartialFractionFermion5D.h b/Grid/qcd/action/fermion/PartialFractionFermion5D.h index e50a9922..47406730 100644 --- a/Grid/qcd/action/fermion/PartialFractionFermion5D.h +++ b/Grid/qcd/action/fermion/PartialFractionFermion5D.h @@ -39,7 +39,7 @@ class PartialFractionFermion5D : public WilsonFermion5D public: INHERIT_IMPL_TYPES(Impl); - const int part_frac_chroma_convention=1; + const int part_frac_chroma_convention=0; void Meooe_internal(const FermionField &in, FermionField &out,int dag); void Mooee_internal(const FermionField &in, FermionField &out,int dag); diff --git a/Grid/qcd/action/fermion/WilsonCompressor.h b/Grid/qcd/action/fermion/WilsonCompressor.h index baa1f684..605bdcec 100644 --- a/Grid/qcd/action/fermion/WilsonCompressor.h +++ b/Grid/qcd/action/fermion/WilsonCompressor.h @@ -414,29 +414,6 @@ public: // surface_list.resize(0); this->same_node.resize(npoints); }; - - /* - void BuildSurfaceList(int Ls,int vol4){ - - // find same node for SHM - // Here we know the distance is 1 for WilsonStencil - for(int point=0;point_npoints;point++){ - this->same_node[point] = this->SameNode(point); - } - - for(int site = 0 ;site< vol4;site++){ - int local = 1; - for(int point=0;point_npoints;point++){ - if( (!this->GetNodeLocal(site*Ls,point)) && (!this->same_node[point]) ){ - local = 0; - } - } - if(local == 0) { - surface_list.push_back(site); - } - } - } - */ template < class compressor> void HaloExchangeOpt(const Lattice &source,compressor &compress) diff --git a/Grid/qcd/action/fermion/implementation/CayleyFermion5DImplementation.h b/Grid/qcd/action/fermion/implementation/CayleyFermion5DImplementation.h index 8dc4fbc8..69b5b02c 100644 --- a/Grid/qcd/action/fermion/implementation/CayleyFermion5DImplementation.h +++ b/Grid/qcd/action/fermion/implementation/CayleyFermion5DImplementation.h @@ -488,7 +488,7 @@ void CayleyFermion5D::SetCoefficientsInternal(RealD zolo_hi,std::vector::SetCoefficientsInternal(RealD zolo_hi,std::vectorMooeeInternalCompute(0,inv,MatpInv,MatmInv); // this->MooeeInternalCompute(1,inv,MatpInvDag,MatmInvDag); diff --git a/Grid/qcd/action/fermion/implementation/CayleyFermion5Dcache.h b/Grid/qcd/action/fermion/implementation/CayleyFermion5Dcache.h index d3d88cbf..5fbc7612 100644 --- a/Grid/qcd/action/fermion/implementation/CayleyFermion5Dcache.h +++ b/Grid/qcd/action/fermion/implementation/CayleyFermion5Dcache.h @@ -57,9 +57,9 @@ CayleyFermion5D::M5D(const FermionField &psi_i, int Ls =this->Ls; - static deviceVector d_diag(Ls) ; acceleratorCopyToDevice(&diag[0] ,&d_diag[0],Ls*sizeof(Coeff_t)); - static deviceVector d_upper(Ls); acceleratorCopyToDevice(&upper[0],&d_upper[0],Ls*sizeof(Coeff_t)); - static deviceVector d_lower(Ls); acceleratorCopyToDevice(&lower[0],&d_lower[0],Ls*sizeof(Coeff_t)); + acceleratorCopyToDevice(&diag[0] ,&this->d_diag[0],Ls*sizeof(Coeff_t)); + acceleratorCopyToDevice(&upper[0],&this->d_upper[0],Ls*sizeof(Coeff_t)); + acceleratorCopyToDevice(&lower[0],&this->d_lower[0],Ls*sizeof(Coeff_t)); auto pdiag = &d_diag[0]; auto pupper = &d_upper[0]; @@ -99,9 +99,9 @@ CayleyFermion5D::M5Ddag(const FermionField &psi_i, int Ls=this->Ls; - static deviceVector d_diag(Ls) ; acceleratorCopyToDevice(&diag[0] ,&d_diag[0],Ls*sizeof(Coeff_t)); - static deviceVector d_upper(Ls); acceleratorCopyToDevice(&upper[0],&d_upper[0],Ls*sizeof(Coeff_t)); - static deviceVector d_lower(Ls); acceleratorCopyToDevice(&lower[0],&d_lower[0],Ls*sizeof(Coeff_t)); + acceleratorCopyToDevice(&diag[0] ,&this->d_diag[0],Ls*sizeof(Coeff_t)); + acceleratorCopyToDevice(&upper[0],&this->d_upper[0],Ls*sizeof(Coeff_t)); + acceleratorCopyToDevice(&lower[0],&this->d_lower[0],Ls*sizeof(Coeff_t)); auto pdiag = &d_diag[0]; auto pupper = &d_upper[0]; @@ -134,11 +134,11 @@ CayleyFermion5D::MooeeInv (const FermionField &psi_i, FermionField &chi int Ls=this->Ls; - static deviceVector d_lee(Ls); acceleratorCopyToDevice(&lee[0],&d_lee[0],Ls*sizeof(Coeff_t)); - static deviceVector d_dee(Ls); acceleratorCopyToDevice(&dee[0],&d_dee[0],Ls*sizeof(Coeff_t)); - static deviceVector d_uee(Ls); acceleratorCopyToDevice(&uee[0],&d_uee[0],Ls*sizeof(Coeff_t)); - static deviceVector d_leem(Ls); acceleratorCopyToDevice(&leem[0],&d_leem[0],Ls*sizeof(Coeff_t)); - static deviceVector d_ueem(Ls); acceleratorCopyToDevice(&ueem[0],&d_ueem[0],Ls*sizeof(Coeff_t)); + acceleratorCopyToDevice(&lee[0],&d_lee[0],Ls*sizeof(Coeff_t)); + acceleratorCopyToDevice(&dee[0],&d_dee[0],Ls*sizeof(Coeff_t)); + acceleratorCopyToDevice(&uee[0],&d_uee[0],Ls*sizeof(Coeff_t)); + acceleratorCopyToDevice(&leem[0],&d_leem[0],Ls*sizeof(Coeff_t)); + acceleratorCopyToDevice(&ueem[0],&d_ueem[0],Ls*sizeof(Coeff_t)); auto plee = & d_lee [0]; auto pdee = & d_dee [0]; @@ -196,11 +196,11 @@ CayleyFermion5D::MooeeInvDag (const FermionField &psi_i, FermionField &chi autoView(psi , psi_i,AcceleratorRead); autoView(chi , chi_i,AcceleratorWrite); - static deviceVector d_lee(Ls); acceleratorCopyToDevice(&lee[0],&d_lee[0],Ls*sizeof(Coeff_t)); - static deviceVector d_dee(Ls); acceleratorCopyToDevice(&dee[0],&d_dee[0],Ls*sizeof(Coeff_t)); - static deviceVector d_uee(Ls); acceleratorCopyToDevice(&uee[0],&d_uee[0],Ls*sizeof(Coeff_t)); - static deviceVector d_leem(Ls); acceleratorCopyToDevice(&leem[0],&d_leem[0],Ls*sizeof(Coeff_t)); - static deviceVector d_ueem(Ls); acceleratorCopyToDevice(&ueem[0],&d_ueem[0],Ls*sizeof(Coeff_t)); + acceleratorCopyToDevice(&lee[0],&d_lee[0],Ls*sizeof(Coeff_t)); + acceleratorCopyToDevice(&dee[0],&d_dee[0],Ls*sizeof(Coeff_t)); + acceleratorCopyToDevice(&uee[0],&d_uee[0],Ls*sizeof(Coeff_t)); + acceleratorCopyToDevice(&leem[0],&d_leem[0],Ls*sizeof(Coeff_t)); + acceleratorCopyToDevice(&ueem[0],&d_ueem[0],Ls*sizeof(Coeff_t)); auto plee = & d_lee [0]; auto pdee = & d_dee [0]; diff --git a/Grid/qcd/action/fermion/implementation/DomainWallEOFAFermionCache.h b/Grid/qcd/action/fermion/implementation/DomainWallEOFAFermionCache.h index 8a9a0ffa..ae126bb5 100644 --- a/Grid/qcd/action/fermion/implementation/DomainWallEOFAFermionCache.h +++ b/Grid/qcd/action/fermion/implementation/DomainWallEOFAFermionCache.h @@ -51,13 +51,13 @@ void DomainWallEOFAFermion::M5D(const FermionField& psi_i, const FermionFi autoView( chi , chi_i, AcceleratorWrite); assert(phi.Checkerboard() == psi.Checkerboard()); - static deviceVector d_diag(Ls); acceleratorCopyToDevice(&diag[0],&d_diag[0],Ls*sizeof(Coeff_t)); - static deviceVector d_upper(Ls);acceleratorCopyToDevice(&upper[0],&d_upper[0],Ls*sizeof(Coeff_t)); - static deviceVector d_lower(Ls);acceleratorCopyToDevice(&lower[0],&d_lower[0],Ls*sizeof(Coeff_t)); - - auto pdiag = &d_diag[0]; - auto pupper = &d_upper[0]; - auto plower = &d_lower[0]; + auto pdiag = &this->d_diag[0]; + auto pupper = &this->d_upper[0]; + auto plower = &this->d_lower[0]; + + acceleratorCopyToDevice(&diag[0],&pdiag[0],Ls*sizeof(Coeff_t)); + acceleratorCopyToDevice(&upper[0],&pupper[0],Ls*sizeof(Coeff_t)); + acceleratorCopyToDevice(&lower[0],&plower[0],Ls*sizeof(Coeff_t)); // Flops = 6.0*(Nc*Ns) *Ls*vol @@ -89,14 +89,14 @@ void DomainWallEOFAFermion::M5Ddag(const FermionField& psi_i, const Fermio autoView( phi , phi_i, AcceleratorRead); autoView( chi , chi_i, AcceleratorWrite); assert(phi.Checkerboard() == psi.Checkerboard()); - - static deviceVector d_diag(Ls); acceleratorCopyToDevice(&diag[0],&d_diag[0],Ls*sizeof(Coeff_t)); - static deviceVector d_upper(Ls);acceleratorCopyToDevice(&upper[0],&d_upper[0],Ls*sizeof(Coeff_t)); - static deviceVector d_lower(Ls);acceleratorCopyToDevice(&lower[0],&d_lower[0],Ls*sizeof(Coeff_t)); - auto pdiag = &d_diag[0]; - auto pupper = &d_upper[0]; - auto plower = &d_lower[0]; + auto pdiag = &this->d_diag[0]; + auto pupper = &this->d_upper[0]; + auto plower = &this->d_lower[0]; + + acceleratorCopyToDevice(&diag[0] ,&pdiag[0],Ls*sizeof(Coeff_t)); + acceleratorCopyToDevice(&upper[0],&pupper[0],Ls*sizeof(Coeff_t)); + acceleratorCopyToDevice(&lower[0],&plower[0],Ls*sizeof(Coeff_t)); // Flops = 6.0*(Nc*Ns) *Ls*vol @@ -125,18 +125,18 @@ void DomainWallEOFAFermion::MooeeInv(const FermionField& psi_i, FermionFie autoView( chi, chi_i, AcceleratorWrite); int Ls = this->Ls; - static deviceVector d_lee(Ls); acceleratorCopyToDevice(&this->lee[0],&d_lee[0],Ls*sizeof(Coeff_t)); - static deviceVector d_dee(Ls); acceleratorCopyToDevice(&this->dee[0],&d_dee[0],Ls*sizeof(Coeff_t)); - static deviceVector d_uee(Ls); acceleratorCopyToDevice(&this->uee[0],&d_uee[0],Ls*sizeof(Coeff_t)); - static deviceVector d_leem(Ls); acceleratorCopyToDevice(&this->leem[0],&d_leem[0],Ls*sizeof(Coeff_t)); - static deviceVector d_ueem(Ls); acceleratorCopyToDevice(&this->ueem[0],&d_ueem[0],Ls*sizeof(Coeff_t)); - - auto plee = & d_lee [0]; - auto pdee = & d_dee [0]; - auto puee = & d_uee [0]; - auto pleem = & d_leem[0]; - auto pueem = & d_ueem[0]; + auto plee = & this->d_lee [0]; + auto pdee = & this->d_dee [0]; + auto puee = & this->d_uee [0]; + auto pleem = & this->d_leem[0]; + auto pueem = & this->d_ueem[0]; + acceleratorCopyToDevice(&this->lee[0],&plee[0],Ls*sizeof(Coeff_t)); + acceleratorCopyToDevice(&this->dee[0],&pdee[0],Ls*sizeof(Coeff_t)); + acceleratorCopyToDevice(&this->uee[0],&puee[0],Ls*sizeof(Coeff_t)); + acceleratorCopyToDevice(&this->leem[0],&pleem[0],Ls*sizeof(Coeff_t)); + acceleratorCopyToDevice(&this->ueem[0],&pueem[0],Ls*sizeof(Coeff_t)); + uint64_t nloop=grid->oSites()/Ls; accelerator_for(sss,nloop,Simd::Nsimd(),{ uint64_t ss=sss*Ls; diff --git a/Grid/qcd/action/fermion/implementation/MobiusEOFAFermionCache.h b/Grid/qcd/action/fermion/implementation/MobiusEOFAFermionCache.h index 4827e516..b9165edb 100644 --- a/Grid/qcd/action/fermion/implementation/MobiusEOFAFermionCache.h +++ b/Grid/qcd/action/fermion/implementation/MobiusEOFAFermionCache.h @@ -50,14 +50,14 @@ void MobiusEOFAFermion::M5D(const FermionField &psi_i, const FermionField assert(phi.Checkerboard() == psi.Checkerboard()); - static deviceVector d_diag(Ls); acceleratorCopyToDevice(&diag[0],&d_diag[0],Ls*sizeof(Coeff_t)); - static deviceVector d_upper(Ls);acceleratorCopyToDevice(&upper[0],&d_upper[0],Ls*sizeof(Coeff_t)); - static deviceVector d_lower(Ls);acceleratorCopyToDevice(&lower[0],&d_lower[0],Ls*sizeof(Coeff_t)); - - auto pdiag = &d_diag[0]; - auto pupper = &d_upper[0]; - auto plower = &d_lower[0]; + auto pdiag = &this->d_diag[0]; + auto pupper = &this->d_upper[0]; + auto plower = &this->d_lower[0]; + acceleratorCopyToDevice(&diag[0],&pdiag[0],Ls*sizeof(Coeff_t)); + acceleratorCopyToDevice(&upper[0],&pupper[0],Ls*sizeof(Coeff_t)); + acceleratorCopyToDevice(&lower[0],&plower[0],Ls*sizeof(Coeff_t)); + // Flops = 6.0*(Nc*Ns) *Ls*vol int nloop = grid->oSites()/Ls; accelerator_for(sss,nloop,Simd::Nsimd(),{ @@ -93,15 +93,15 @@ void MobiusEOFAFermion::M5D_shift(const FermionField &psi_i, const Fermion assert(phi.Checkerboard() == psi.Checkerboard()); - static deviceVector d_diag(Ls); acceleratorCopyToDevice(&diag[0],&d_diag[0],Ls*sizeof(Coeff_t)); - static deviceVector d_upper(Ls);acceleratorCopyToDevice(&upper[0],&d_upper[0],Ls*sizeof(Coeff_t)); - static deviceVector d_lower(Ls);acceleratorCopyToDevice(&lower[0],&d_lower[0],Ls*sizeof(Coeff_t)); - static deviceVector d_shift_coeffs(Ls);acceleratorCopyToDevice(&shift_coeffs[0],&d_shift_coeffs[0],Ls*sizeof(Coeff_t)); - - auto pdiag = &d_diag[0]; - auto pupper = &d_upper[0]; - auto plower = &d_lower[0]; - auto pshift_coeffs = &d_shift_coeffs[0]; + auto pdiag = &this->d_diag[0]; + auto pupper = &this->d_upper[0]; + auto plower = &this->d_lower[0]; + auto pshift_coeffs = &this->d_shift_coefficients[0]; + + acceleratorCopyToDevice(&diag[0],&pdiag[0],Ls*sizeof(Coeff_t)); + acceleratorCopyToDevice(&upper[0],&pupper[0],Ls*sizeof(Coeff_t)); + acceleratorCopyToDevice(&lower[0],&plower[0],Ls*sizeof(Coeff_t)); + acceleratorCopyToDevice(&shift_coeffs[0],&pshift_coeffs[0],Ls*sizeof(Coeff_t)); // Flops = 6.0*(Nc*Ns) *Ls*vol int nloop = grid->oSites()/Ls; @@ -138,14 +138,14 @@ void MobiusEOFAFermion::M5Ddag(const FermionField &psi_i, const FermionFie autoView(chi , chi_i, AcceleratorWrite); assert(phi.Checkerboard() == psi.Checkerboard()); - - static deviceVector d_diag(Ls); acceleratorCopyToDevice(&diag[0],&d_diag[0],Ls*sizeof(Coeff_t)); - static deviceVector d_upper(Ls);acceleratorCopyToDevice(&upper[0],&d_upper[0],Ls*sizeof(Coeff_t)); - static deviceVector d_lower(Ls);acceleratorCopyToDevice(&lower[0],&d_lower[0],Ls*sizeof(Coeff_t)); - auto pdiag = &d_diag[0]; - auto pupper = &d_upper[0]; - auto plower = &d_lower[0]; + auto pdiag = &this->d_diag[0]; + auto pupper = &this->d_upper[0]; + auto plower = &this->d_lower[0]; + + acceleratorCopyToDevice(&diag[0],&pdiag[0],Ls*sizeof(Coeff_t)); + acceleratorCopyToDevice(&upper[0],&pupper[0],Ls*sizeof(Coeff_t)); + acceleratorCopyToDevice(&lower[0],&plower[0],Ls*sizeof(Coeff_t)); // Flops = 6.0*(Nc*Ns) *Ls*vol int nloop = grid->oSites()/Ls; @@ -180,16 +180,16 @@ void MobiusEOFAFermion::M5Ddag_shift(const FermionField &psi_i, const Ferm assert(phi.Checkerboard() == psi.Checkerboard()); - static deviceVector d_diag(Ls); acceleratorCopyToDevice(&diag[0],&d_diag[0],Ls*sizeof(Coeff_t)); - static deviceVector d_upper(Ls);acceleratorCopyToDevice(&upper[0],&d_upper[0],Ls*sizeof(Coeff_t)); - static deviceVector d_lower(Ls);acceleratorCopyToDevice(&lower[0],&d_lower[0],Ls*sizeof(Coeff_t)); - static deviceVector d_shift_coeffs(Ls);acceleratorCopyToDevice(&shift_coeffs[0],&d_shift_coeffs[0],Ls*sizeof(Coeff_t)); - - auto pdiag = &d_diag[0]; - auto pupper = &d_upper[0]; - auto plower = &d_lower[0]; - auto pshift_coeffs = &d_shift_coeffs[0]; + auto pdiag = &this->d_diag[0]; + auto pupper = &this->d_upper[0]; + auto plower = &this->d_lower[0]; + auto pshift_coeffs = &this->d_shift_coefficients[0]; + acceleratorCopyToDevice(&diag[0],&pdiag[0],Ls*sizeof(Coeff_t)); + acceleratorCopyToDevice(&upper[0],&pupper[0],Ls*sizeof(Coeff_t)); + acceleratorCopyToDevice(&lower[0],&plower[0],Ls*sizeof(Coeff_t)); + acceleratorCopyToDevice(&shift_coeffs[0],&pshift_coeffs[0],Ls*sizeof(Coeff_t)); + // Flops = 6.0*(Nc*Ns) *Ls*vol auto pm = this->pm; @@ -230,17 +230,17 @@ void MobiusEOFAFermion::MooeeInv(const FermionField &psi_i, FermionField & autoView(psi , psi_i, AcceleratorRead); autoView(chi , chi_i, AcceleratorWrite); - static deviceVector d_lee(Ls); acceleratorCopyToDevice(&this->lee[0],&d_lee[0],Ls*sizeof(Coeff_t)); - static deviceVector d_dee(Ls); acceleratorCopyToDevice(&this->dee[0],&d_dee[0],Ls*sizeof(Coeff_t)); - static deviceVector d_uee(Ls); acceleratorCopyToDevice(&this->uee[0],&d_uee[0],Ls*sizeof(Coeff_t)); - static deviceVector d_leem(Ls); acceleratorCopyToDevice(&this->leem[0],&d_leem[0],Ls*sizeof(Coeff_t)); - static deviceVector d_ueem(Ls); acceleratorCopyToDevice(&this->ueem[0],&d_ueem[0],Ls*sizeof(Coeff_t)); + auto plee = & this->d_lee [0]; + auto pdee = & this->d_dee [0]; + auto puee = & this->d_uee [0]; + auto pleem = & this->d_leem[0]; + auto pueem = & this->d_ueem[0]; - auto plee = & d_lee [0]; - auto pdee = & d_dee [0]; - auto puee = & d_uee [0]; - auto pleem = & d_leem[0]; - auto pueem = & d_ueem[0]; + acceleratorCopyToDevice(&this->lee[0],&plee[0],Ls*sizeof(Coeff_t)); + acceleratorCopyToDevice(&this->dee[0],&pdee[0],Ls*sizeof(Coeff_t)); + acceleratorCopyToDevice(&this->uee[0],&puee[0],Ls*sizeof(Coeff_t)); + acceleratorCopyToDevice(&this->leem[0],&pleem[0],Ls*sizeof(Coeff_t)); + acceleratorCopyToDevice(&this->ueem[0],&pueem[0],Ls*sizeof(Coeff_t)); if(this->shift != 0.0){ MooeeInv_shift(psi_i,chi_i); return; } @@ -293,23 +293,22 @@ void MobiusEOFAFermion::MooeeInv_shift(const FermionField &psi_i, FermionF autoView(chi , chi_i, AcceleratorWrite); // Move into object and constructor - static deviceVector d_lee(Ls); acceleratorCopyToDevice(&this->lee[0],&d_lee[0],Ls*sizeof(Coeff_t)); - static deviceVector d_dee(Ls); acceleratorCopyToDevice(&this->dee[0],&d_dee[0],Ls*sizeof(Coeff_t)); - static deviceVector d_uee(Ls); acceleratorCopyToDevice(&this->uee[0],&d_uee[0],Ls*sizeof(Coeff_t)); - static deviceVector d_leem(Ls); acceleratorCopyToDevice(&this->leem[0],&d_leem[0],Ls*sizeof(Coeff_t)); - static deviceVector d_ueem(Ls); acceleratorCopyToDevice(&this->ueem[0],&d_ueem[0],Ls*sizeof(Coeff_t)); - auto pm = this->pm; - auto plee = & d_lee [0]; - auto pdee = & d_dee [0]; - auto puee = & d_uee [0]; - auto pleem = & d_leem[0]; - auto pueem = & d_ueem[0]; + auto plee = & this->d_lee [0]; + auto pdee = & this->d_dee [0]; + auto puee = & this->d_uee [0]; + auto pleem = & this->d_leem[0]; + auto pueem = & this->d_ueem[0]; + auto pMooeeInv_shift_lc = &this->d_MooeeInv_shift_lc[0]; + auto pMooeeInv_shift_norm = &this->d_MooeeInv_shift_norm[0]; - static deviceVector d_MooeeInv_shift_lc(Ls); acceleratorCopyToDevice(&MooeeInv_shift_lc[0],&d_MooeeInv_shift_lc[0],Ls*sizeof(Coeff_t)); - static deviceVector d_MooeeInv_shift_norm(Ls); acceleratorCopyToDevice(&MooeeInv_shift_norm[0],&d_MooeeInv_shift_norm[0],Ls*sizeof(Coeff_t)); - auto pMooeeInv_shift_lc = &d_MooeeInv_shift_lc[0]; - auto pMooeeInv_shift_norm = &d_MooeeInv_shift_norm[0]; + acceleratorCopyToDevice(&this->lee[0],&plee[0],Ls*sizeof(Coeff_t)); + acceleratorCopyToDevice(&this->dee[0],&pdee[0],Ls*sizeof(Coeff_t)); + acceleratorCopyToDevice(&this->uee[0],&puee[0],Ls*sizeof(Coeff_t)); + acceleratorCopyToDevice(&this->leem[0],&pleem[0],Ls*sizeof(Coeff_t)); + acceleratorCopyToDevice(&this->ueem[0],&pueem[0],Ls*sizeof(Coeff_t)); + acceleratorCopyToDevice(&MooeeInv_shift_lc[0],&pMooeeInv_shift_lc[0],Ls*sizeof(Coeff_t)); + acceleratorCopyToDevice(&MooeeInv_shift_norm[0],&pMooeeInv_shift_norm[0],Ls*sizeof(Coeff_t)); int nloop = grid->oSites()/Ls; accelerator_for(sss,nloop,Simd::Nsimd(),{ @@ -367,17 +366,17 @@ void MobiusEOFAFermion::MooeeInvDag(const FermionField &psi_i, FermionFiel autoView(psi , psi_i, AcceleratorRead); autoView(chi , chi_i, AcceleratorWrite); - static deviceVector d_lee(Ls); acceleratorCopyToDevice(&this->lee[0],&d_lee[0],Ls*sizeof(Coeff_t)); - static deviceVector d_dee(Ls); acceleratorCopyToDevice(&this->dee[0],&d_dee[0],Ls*sizeof(Coeff_t)); - static deviceVector d_uee(Ls); acceleratorCopyToDevice(&this->uee[0],&d_uee[0],Ls*sizeof(Coeff_t)); - static deviceVector d_leem(Ls); acceleratorCopyToDevice(&this->leem[0],&d_leem[0],Ls*sizeof(Coeff_t)); - static deviceVector d_ueem(Ls); acceleratorCopyToDevice(&this->ueem[0],&d_ueem[0],Ls*sizeof(Coeff_t)); + auto plee = &this->d_lee [0]; + auto pdee = &this->d_dee [0]; + auto puee = &this->d_uee [0]; + auto pleem = &this->d_leem[0]; + auto pueem = &this->d_ueem[0]; - auto plee = & d_lee [0]; - auto pdee = & d_dee [0]; - auto puee = & d_uee [0]; - auto pleem = & d_leem[0]; - auto pueem = & d_ueem[0]; + acceleratorCopyToDevice(&this->lee[0],&plee[0],Ls*sizeof(Coeff_t)); + acceleratorCopyToDevice(&this->dee[0],&pdee[0],Ls*sizeof(Coeff_t)); + acceleratorCopyToDevice(&this->uee[0],&puee[0],Ls*sizeof(Coeff_t)); + acceleratorCopyToDevice(&this->leem[0],&pleem[0],Ls*sizeof(Coeff_t)); + acceleratorCopyToDevice(&this->ueem[0],&pueem[0],Ls*sizeof(Coeff_t)); int nloop = grid->oSites()/Ls; accelerator_for(sss,nloop,Simd::Nsimd(),{ @@ -426,25 +425,23 @@ void MobiusEOFAFermion::MooeeInvDag_shift(const FermionField &psi_i, Fermi autoView(chi , chi_i, AcceleratorWrite); int Ls = this->Ls; - static deviceVector d_lee(Ls); acceleratorCopyToDevice(&this->lee[0],&d_lee[0],Ls*sizeof(Coeff_t)); - static deviceVector d_dee(Ls); acceleratorCopyToDevice(&this->dee[0],&d_dee[0],Ls*sizeof(Coeff_t)); - static deviceVector d_uee(Ls); acceleratorCopyToDevice(&this->uee[0],&d_uee[0],Ls*sizeof(Coeff_t)); - static deviceVector d_leem(Ls); acceleratorCopyToDevice(&this->leem[0],&d_leem[0],Ls*sizeof(Coeff_t)); - static deviceVector d_ueem(Ls); acceleratorCopyToDevice(&this->ueem[0],&d_ueem[0],Ls*sizeof(Coeff_t)); - auto pm = this->pm; - auto plee = & d_lee [0]; - auto pdee = & d_dee [0]; - auto puee = & d_uee [0]; - auto pleem = & d_leem[0]; - auto pueem = & d_ueem[0]; + auto plee = & this->d_lee [0]; + auto pdee = & this->d_dee [0]; + auto puee = & this->d_uee [0]; + auto pleem = & this->d_leem[0]; + auto pueem = & this->d_ueem[0]; - static deviceVector d_MooeeInvDag_shift_lc(Ls); - static deviceVector d_MooeeInvDag_shift_norm(Ls); - acceleratorCopyToDevice(&MooeeInvDag_shift_lc[0],&d_MooeeInvDag_shift_lc[0],Ls*sizeof(Coeff_t)); - acceleratorCopyToDevice(&MooeeInvDag_shift_norm[0],&d_MooeeInvDag_shift_norm[0],Ls*sizeof(Coeff_t)); - auto pMooeeInvDag_shift_lc = &d_MooeeInvDag_shift_lc[0]; - auto pMooeeInvDag_shift_norm = &d_MooeeInvDag_shift_norm[0]; + auto pMooeeInvDag_shift_lc = &this->d_MooeeInv_shift_lc[0]; + auto pMooeeInvDag_shift_norm = &this->d_MooeeInv_shift_norm[0]; + + acceleratorCopyToDevice(&this->lee[0],&plee[0],Ls*sizeof(Coeff_t)); + acceleratorCopyToDevice(&this->dee[0],&pdee[0],Ls*sizeof(Coeff_t)); + acceleratorCopyToDevice(&this->uee[0],&puee[0],Ls*sizeof(Coeff_t)); + acceleratorCopyToDevice(&this->leem[0],&pleem[0],Ls*sizeof(Coeff_t)); + acceleratorCopyToDevice(&this->ueem[0],&pueem[0],Ls*sizeof(Coeff_t)); + acceleratorCopyToDevice(&MooeeInvDag_shift_lc[0],&pMooeeInvDag_shift_lc[0],Ls*sizeof(Coeff_t)); + acceleratorCopyToDevice(&MooeeInvDag_shift_norm[0],&pMooeeInvDag_shift_norm[0],Ls*sizeof(Coeff_t)); // auto pMooeeInvDag_shift_lc = &MooeeInvDag_shift_lc[0]; // auto pMooeeInvDag_shift_norm = &MooeeInvDag_shift_norm[0]; diff --git a/Grid/qcd/action/fermion/implementation/PartialFractionFermion5DImplementation.h b/Grid/qcd/action/fermion/implementation/PartialFractionFermion5DImplementation.h index 0206828b..93684929 100644 --- a/Grid/qcd/action/fermion/implementation/PartialFractionFermion5DImplementation.h +++ b/Grid/qcd/action/fermion/implementation/PartialFractionFermion5DImplementation.h @@ -411,17 +411,18 @@ void PartialFractionFermion5D::SetCoefficientsZolotarev(RealD zolo_hi,App int Ls = this->Ls; conformable(solution5d.Grid(),this->FermionGrid()); conformable(exported4d.Grid(),this->GaugeGrid()); - ExtractSlice(exported4d, solution5d, Ls-1, Ls-1); + ExtractSlice(exported4d, solution5d, Ls-1, 0); } template void PartialFractionFermion5D::ImportPhysicalFermionSource(const FermionField &input4d,FermionField &imported5d) { + //void InsertSlice(const Lattice &lowDim,Lattice & higherDim,int slice, int orthog) int Ls = this->Ls; conformable(imported5d.Grid(),this->FermionGrid()); conformable(input4d.Grid() ,this->GaugeGrid()); FermionField tmp(this->FermionGrid()); tmp=Zero(); - InsertSlice(input4d, tmp, Ls-1, Ls-1); + InsertSlice(input4d, tmp, Ls-1, 0); tmp=Gamma(Gamma::Algebra::Gamma5)*tmp; this->Dminus(tmp,imported5d); }