diff --git a/Grid/allocator/AlignedAllocator.h b/Grid/allocator/AlignedAllocator.h index ed1fbec2..644fc3c7 100644 --- a/Grid/allocator/AlignedAllocator.h +++ b/Grid/allocator/AlignedAllocator.h @@ -220,7 +220,10 @@ public: #endif #endif } - void construct(pointer __p, const _Tp& __val) { }; + + // FIXME: hack for the copy constructor, eventually it must be avoided + void construct(pointer __p, const _Tp& __val) { new((void *)__p) _Tp(__val); }; + //void construct(pointer __p, const _Tp& __val) { }; void construct(pointer __p) { }; void destroy(pointer __p) { }; }; diff --git a/Grid/lattice/Lattice_reduction.h b/Grid/lattice/Lattice_reduction.h index 2d92ead8..ba871d1f 100644 --- a/Grid/lattice/Lattice_reduction.h +++ b/Grid/lattice/Lattice_reduction.h @@ -23,12 +23,7 @@ Author: paboyle #include #ifdef GRID_NVCC -#include -#include -#include -#include -#include -#include +#include #endif NAMESPACE_BEGIN(Grid); @@ -41,23 +36,12 @@ template inline RealD norm2(const Lattice &arg){ return real(nrm); } -#if 0 -//#warning "ThrustReduce compiled" -//#include -template -vobj ThrustNorm(const Lattice &lat) +#ifdef GRID_NVCC +template +struct innerProductFunctor : public thrust::binary_function { - typedef typename vobj::scalar_type scalar_type; - auto lat_v=lat.View(); - Integer s0=0; - Integer sN=lat_v.end(); - scalar_type sum = 0; - scalar_type * begin = (scalar_type *)&lat_v[s0]; - scalar_type * end = (scalar_type *)&lat_v[sN]; - thrust::reduce(begin,end,sum); - std::cout <<" thrust::reduce sum "<< sum << std::endl; - return sum; -} + accelerator R operator()(T x, T y) { return innerProduct(x,y); } +}; #endif // Double inner product @@ -75,24 +59,17 @@ inline ComplexD innerProduct(const Lattice &left,const Lattice &righ auto left_v = left.View(); auto right_v=right.View(); -#if 0 +#ifdef GRID_NVCC - typedef decltype(TensorRemove(innerProduct(left_v[0],right_v[0]))) inner_t; + typedef decltype(innerProduct(left_v[0],right_v[0])) inner_t; + thrust::plus binary_sum; + innerProductFunctor binary_inner_p; + Integer sN = left_v.end(); + inner_t zero = Zero(); + // is there a way of using the efficient thrust reduction while maintaining memory coalescing? + inner_t vnrm = thrust::inner_product(thrust::device, &left_v[0], &left_v[sN], &right_v[0], zero, binary_sum, binary_inner_p); + nrm = Reduce(TensorRemove(vnrm));// sum across simd - Lattice inner_tmp(grid); - - ///////////////////////// - // localInnerProduct - ///////////////////////// - auto inner_tmp_v = inner_tmp.View(); - accelerator_loop(ss,left_v,{ - inner_tmp_v[ss] = TensorRemove(innerProduct(left_v[ss],right_v[ss])); - }); - ///////////////////////// - // and site sum the scalars - ///////////////////////// - inner_t vnrm = ThrustNorm(inner_tmp); - auto vvnrm = vnrm; #else thread_loop( (int thr=0;thrSumArraySize();thr++),{ int mywork, myoff; @@ -108,9 +85,9 @@ inline ComplexD innerProduct(const Lattice &left,const Lattice &righ vector_type vvnrm; vvnrm=Zero(); // sum across threads for(int i=0;iSumArraySize();i++){ vvnrm = vvnrm+sumarray[i]; - } -#endif + } nrm = Reduce(vvnrm);// sum across simd +#endif right.Grid()->GlobalSum(nrm); return nrm; } diff --git a/Grid/qcd/action/fermion/CayleyFermion5D.cc b/Grid/qcd/action/fermion/CayleyFermion5D.cc index 4a6c4c91..be4f9127 100644 --- a/Grid/qcd/action/fermion/CayleyFermion5D.cc +++ b/Grid/qcd/action/fermion/CayleyFermion5D.cc @@ -163,10 +163,16 @@ template void CayleyFermion5D::CayleyReport(void) std::cout << GridLogMessage << "CayleyFermion5D Number of M5D Calls : " << M5Dcalls << std::endl; std::cout << GridLogMessage << "CayleyFermion5D ComputeTime/Calls : " << M5Dtime / M5Dcalls << " us" << std::endl; - // Flops = 6.0*(Nc*Ns) *Ls*vol - RealD mflops = 6.0*12*volume*M5Dcalls/M5Dtime/2; // 2 for red black counting + // Flops = 10.0*(Nc*Ns) *Ls*vol + RealD mflops = 10.0*(Nc*Ns)*volume*M5Dcalls/M5Dtime/2; // 2 for red black counting std::cout << GridLogMessage << "Average mflops/s per call : " << mflops << std::endl; std::cout << GridLogMessage << "Average mflops/s per call per rank : " << mflops/NP << std::endl; + + // Bytes = sizeof(Real) * (Nc*Ns*Nreim) * Ls * vol * (read+write) (/2 for red black counting) + // read = 2 ( psi[ss+s+1] and psi[ss+s-1] count as 1 ) + // write = 1 + RealD Gbytes = sizeof(Real) * (Nc*Ns*2) * volume * 3 /2. * 1.e-9; + std::cout << GridLogMessage << "Average bandwidth (GB/s) : " << Gbytes/M5Dtime*M5Dcalls*1.e6 << std::endl; } if ( MooeeInvCalls > 0 ) { @@ -174,11 +180,16 @@ template void CayleyFermion5D::CayleyReport(void) std::cout << GridLogMessage << "#### MooeeInv calls report " << std::endl; std::cout << GridLogMessage << "CayleyFermion5D Number of MooeeInv Calls : " << MooeeInvCalls << std::endl; std::cout << GridLogMessage << "CayleyFermion5D ComputeTime/Calls : " << MooeeInvTime / MooeeInvCalls << " us" << std::endl; - +#ifdef GRID_NVCC + RealD mflops = ( -16.*Nc*Ns+this->Ls*(1.+18.*Nc*Ns) )*volume*MooeeInvCalls/MooeeInvTime/2; // 2 for red black counting + std::cout << GridLogMessage << "Average mflops/s per call : " << mflops << std::endl; + std::cout << GridLogMessage << "Average mflops/s per call per rank : " << mflops/NP << std::endl; +#else // Flops = MADD * Ls *Ls *4dvol * spin/colour/complex RealD mflops = 2.0*24*this->Ls*volume*MooeeInvCalls/MooeeInvTime/2; // 2 for red black counting std::cout << GridLogMessage << "Average mflops/s per call : " << mflops << std::endl; std::cout << GridLogMessage << "Average mflops/s per call per rank : " << mflops/NP << std::endl; +#endif } } @@ -197,18 +208,18 @@ template void CayleyFermion5D::M5D (const FermionField &psi, FermionField &chi) { int Ls=this->Ls; - std::vector diag (Ls,1.0); - std::vector upper(Ls,-1.0); upper[Ls-1]=mass; - std::vector lower(Ls,-1.0); lower[0] =mass; + Vector diag (Ls,1.0); + Vector upper(Ls,-1.0); upper[Ls-1]=mass; + Vector lower(Ls,-1.0); lower[0] =mass; M5D(psi,chi,chi,lower,diag,upper); } template void CayleyFermion5D::Meooe5D (const FermionField &psi, FermionField &Din) { int Ls=this->Ls; - std::vector diag = bs; - std::vector upper= cs; - std::vector lower= cs; + Vector diag = bs; + Vector upper= cs; + Vector lower= cs; upper[Ls-1]=-mass*upper[Ls-1]; lower[0] =-mass*lower[0]; M5D(psi,psi,Din,lower,diag,upper); @@ -217,9 +228,9 @@ void CayleyFermion5D::Meooe5D (const FermionField &psi, FermionField &D template void CayleyFermion5D::Meo5D (const FermionField &psi, FermionField &chi) { int Ls=this->Ls; - std::vector diag = beo; - std::vector upper(Ls); - std::vector lower(Ls); + Vector diag = beo; + Vector upper(Ls); + Vector lower(Ls); for(int i=0;i void CayleyFermion5D::Mooee (const FermionField &psi, FermionField &chi) { int Ls=this->Ls; - std::vector diag = bee; - std::vector upper(Ls); - std::vector lower(Ls); + Vector diag = bee; + Vector upper(Ls); + Vector lower(Ls); for(int i=0;i void CayleyFermion5D::MooeeDag (const FermionField &psi, FermionField &chi) { int Ls=this->Ls; - std::vector diag = bee; - std::vector upper(Ls); - std::vector lower(Ls); + Vector diag = bee; + Vector upper(Ls); + Vector lower(Ls); for (int s=0;s void CayleyFermion5D::M5Ddag (const FermionField &psi, FermionField &chi) { int Ls=this->Ls; - std::vector diag(Ls,1.0); - std::vector upper(Ls,-1.0); - std::vector lower(Ls,-1.0); + Vector diag(Ls,1.0); + Vector upper(Ls,-1.0); + Vector lower(Ls,-1.0); upper[Ls-1]=-mass*upper[Ls-1]; lower[0] =-mass*lower[0]; M5Ddag(psi,chi,chi,lower,diag,upper); @@ -289,9 +300,9 @@ template void CayleyFermion5D::MeooeDag5D (const FermionField &psi, FermionField &Din) { int Ls=this->Ls; - std::vector diag =bs; - std::vector upper=cs; - std::vector lower=cs; + Vector diag =bs; + Vector upper=cs; + Vector lower=cs; for (int s=0;s::MeoDeriv(GaugeField &mat,const FermionField &U,const template void CayleyFermion5D::SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD b,RealD c) { - std::vector gamma(this->Ls); + Vector gamma(this->Ls); for(int s=0;sLs;s++) gamma[s] = zdata->gamma[s]; SetCoefficientsInternal(1.0,gamma,b,c); } @@ -436,13 +447,13 @@ void CayleyFermion5D::SetCoefficientsTanh(Approx::zolotarev_data *zdata,Re template void CayleyFermion5D::SetCoefficientsZolotarev(RealD zolo_hi,Approx::zolotarev_data *zdata,RealD b,RealD c) { - std::vector gamma(this->Ls); + Vector gamma(this->Ls); for(int s=0;sLs;s++) gamma[s] = zdata->gamma[s]; SetCoefficientsInternal(zolo_hi,gamma,b,c); } //Zolo template -void CayleyFermion5D::SetCoefficientsInternal(RealD zolo_hi,std::vector & gamma,RealD b,RealD c) +void CayleyFermion5D::SetCoefficientsInternal(RealD zolo_hi,Vector & gamma,RealD b,RealD c) { int Ls=this->Ls; diff --git a/Grid/qcd/action/fermion/CayleyFermion5D.h b/Grid/qcd/action/fermion/CayleyFermion5D.h index 6dbc630e..916bd0c0 100644 --- a/Grid/qcd/action/fermion/CayleyFermion5D.h +++ b/Grid/qcd/action/fermion/CayleyFermion5D.h @@ -108,16 +108,16 @@ public: void M5D(const FermionField &psi, const FermionField &phi, FermionField &chi, - std::vector &lower, - std::vector &diag, - std::vector &upper); + Vector &lower, + Vector &diag, + Vector &upper); void M5Ddag(const FermionField &psi, const FermionField &phi, FermionField &chi, - std::vector &lower, - std::vector &diag, - std::vector &upper); + Vector &lower, + Vector &diag, + Vector &upper); void MooeeInternal(const FermionField &in, FermionField &out,int dag,int inv); void MooeeInternalCompute(int dag, int inv, Vector > & Matp, Vector > & Matm); @@ -149,29 +149,29 @@ public: RealD mass; // Save arguments to SetCoefficientsInternal - std::vector _gamma; + Vector _gamma; RealD _zolo_hi; RealD _b; RealD _c; // Cayley form Moebius (tanh and zolotarev) - std::vector omega; - std::vector bs; // S dependent coeffs - std::vector cs; - std::vector as; + Vector omega; + Vector bs; // S dependent coeffs + Vector cs; + Vector as; // For preconditioning Cayley form - std::vector bee; - std::vector cee; - std::vector aee; - std::vector beo; - std::vector ceo; - std::vector aeo; + Vector bee; + Vector cee; + Vector aee; + Vector beo; + Vector ceo; + Vector aeo; // LDU factorisation of the eeoo matrix - std::vector lee; - std::vector leem; - std::vector uee; - std::vector ueem; - std::vector dee; + Vector lee; + Vector leem; + Vector uee; + Vector ueem; + Vector dee; // Matrices of 5d ee inverse params Vector > MatpInv; @@ -203,22 +203,26 @@ public: protected: virtual void SetCoefficientsZolotarev(RealD zolohi,Approx::zolotarev_data *zdata,RealD b,RealD c); virtual void SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD b,RealD c); - virtual void SetCoefficientsInternal(RealD zolo_hi,std::vector & gamma,RealD b,RealD c); + virtual void SetCoefficientsInternal(RealD zolo_hi,Vector & gamma,RealD b,RealD c); }; NAMESPACE_END(Grid); #define INSTANTIATE_DPERP(A) \ template void CayleyFermion5D< A >::M5D(const FermionField &psi,const FermionField &phi,FermionField &chi, \ - std::vector &lower,std::vector &diag,std::vector &upper); \ + Vector &lower,Vector &diag,Vector &upper); \ template void CayleyFermion5D< A >::M5Ddag(const FermionField &psi,const FermionField &phi,FermionField &chi, \ - std::vector &lower,std::vector &diag,std::vector &upper); \ + Vector &lower,Vector &diag,Vector &upper); \ template void CayleyFermion5D< A >::MooeeInv (const FermionField &psi, FermionField &chi); \ template void CayleyFermion5D< A >::MooeeInvDag (const FermionField &psi, FermionField &chi); +#ifdef GRID_NVCC +#define CAYLEY_DPERP_GPU +#else #undef CAYLEY_DPERP_DENSE #define CAYLEY_DPERP_CACHE #undef CAYLEY_DPERP_LINALG +#endif #define CAYLEY_DPERP_VEC #endif diff --git a/Grid/qcd/action/fermion/CayleyFermion5Dcache.cc b/Grid/qcd/action/fermion/CayleyFermion5Dcache.cc index 560f5dcb..c84b7f8d 100644 --- a/Grid/qcd/action/fermion/CayleyFermion5Dcache.cc +++ b/Grid/qcd/action/fermion/CayleyFermion5Dcache.cc @@ -41,9 +41,9 @@ template void CayleyFermion5D::M5D(const FermionField &psi_i, const FermionField &phi_i, FermionField &chi_i, - std::vector &lower, - std::vector &diag, - std::vector &upper) + Vector &lower, + Vector &diag, + Vector &upper) { chi_i.Checkerboard()=psi_i.Checkerboard(); GridBase *grid=psi_i.Grid(); @@ -52,7 +52,8 @@ void CayleyFermion5D::M5D(const FermionField &psi_i, auto chi = chi_i.View(); int Ls =this->Ls; assert(phi.Checkerboard() == psi.Checkerboard()); - // Flops = 6.0*(Nc*Ns) *Ls*vol + // 10 = 3 complex mult + 2 complex add + // Flops = 10.0*(Nc*Ns) *Ls*vol (/2 for red black counting) M5Dcalls++; M5Dtime-=usecond(); @@ -87,9 +88,9 @@ template void CayleyFermion5D::M5Ddag(const FermionField &psi_i, const FermionField &phi_i, FermionField &chi_i, - std::vector &lower, - std::vector &diag, - std::vector &upper) + Vector &lower, + Vector &diag, + Vector &upper) { chi_i.Checkerboard()=psi_i.Checkerboard(); GridBase *grid=psi_i.Grid(); diff --git a/Grid/qcd/action/fermion/CayleyFermion5Dgpu.cc b/Grid/qcd/action/fermion/CayleyFermion5Dgpu.cc new file mode 100644 index 00000000..367c5ff1 --- /dev/null +++ b/Grid/qcd/action/fermion/CayleyFermion5Dgpu.cc @@ -0,0 +1,284 @@ +/************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/qcd/action/fermion/CayleyFermion5D.cc + + Copyright (C) 2015 + +Author: Peter Boyle +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ + +#include +#include + + +NAMESPACE_BEGIN(Grid); + +// Pminus fowards +// Pplus backwards.. +template +void CayleyFermion5D::M5D(const FermionField &psi_i, + const FermionField &phi_i, + FermionField &chi_i, + Vector &lower, + Vector &diag, + Vector &upper) +{ + chi_i.Checkerboard()=psi_i.Checkerboard(); + GridBase *grid=psi_i.Grid(); + auto psi = psi_i.View(); + auto phi = phi_i.View(); + auto chi = chi_i.View(); + Coeff_t *lower_v = &lower[0]; + Coeff_t *diag_v = &diag[0]; + Coeff_t *upper_v = &upper[0]; + int Ls =this->Ls; + assert(phi.Checkerboard() == psi.Checkerboard()); + + const uint64_t nsimd = grid->Nsimd(); + const uint64_t sites4d = nsimd * grid->oSites() / Ls; + + // 10 = 3 complex mult + 2 complex add + // Flops = 10.0*(Nc*Ns) *Ls*vol (/2 for red black counting) + M5Dcalls++; + M5Dtime-=usecond(); + + accelerator_loopN( sss, sites4d ,{ + uint64_t lane = sss % nsimd; + uint64_t ss = Ls * (sss / nsimd); + + for(int s=0;s +void CayleyFermion5D::M5Ddag(const FermionField &psi_i, + const FermionField &phi_i, + FermionField &chi_i, + Vector &lower, + Vector &diag, + Vector &upper) +{ + chi_i.Checkerboard()=psi_i.Checkerboard(); + GridBase *grid=psi_i.Grid(); + auto psi = psi_i.View(); + auto phi = phi_i.View(); + auto chi = chi_i.View(); + Coeff_t *lower_v = &lower[0]; + Coeff_t *diag_v = &diag[0]; + Coeff_t *upper_v = &upper[0]; + int Ls =this->Ls; + assert(phi.Checkerboard() == psi.Checkerboard()); + + const uint64_t nsimd = grid->Nsimd(); + const uint64_t sites4d = nsimd * grid->oSites() / Ls; + + // 10 = 3 complex mult + 2 complex add + // Flops = 10.0*(Nc*Ns) *Ls*vol (/2 for red black counting) + M5Dcalls++; + M5Dtime-=usecond(); + + accelerator_loopN( sss, sites4d ,{ + uint64_t lane = sss % nsimd; + uint64_t ss = Ls * (sss / nsimd); + + for(int s=0;s +void CayleyFermion5D::MooeeInv (const FermionField &psi_i, FermionField &chi_i) +{ + chi_i.Checkerboard()=psi_i.Checkerboard(); + GridBase *grid=psi_i.Grid(); + + auto psi = psi_i.View(); + auto chi = chi_i.View(); + Coeff_t *lee_v = &lee[0]; + Coeff_t *leem_v = &leem[0]; + Coeff_t *uee_v = &uee[0]; + Coeff_t *ueem_v = &ueem[0]; + Coeff_t *dee_v = &dee[0]; + + int Ls=this->Ls; + const uint64_t nsimd = grid->Nsimd(); + const uint64_t sites4d = nsimd * grid->oSites() / Ls; + + typedef typename SiteSpinor::scalar_object ScalarSiteSpinor; + + MooeeInvCalls++; + MooeeInvTime-=usecond(); + + accelerator_loopN( sss, sites4d ,{ + uint64_t lane = sss % nsimd; + uint64_t ss = Ls * (sss / nsimd); + ScalarSiteSpinor res, tmp, acc; + + // X = Nc*Ns + // flops = 2X + (Ls-2)(4X + 4X) + 6X + 1 + 2X + (Ls-1)(10X + 1) = -16X + Ls(1+18X) = -192 + 217*Ls flops + // Apply (L^{\prime})^{-1} L_m^{-1} + res = extractLane(lane,psi[ss]); + spProj5m(tmp,res); + acc = leem_v[0]*tmp; + spProj5p(tmp,res); + insertLane(lane,chi[ss],res); + + for(int s=1;s=0;s--){ + res = extractLane(lane,chi[ss+s]); + res = (1.0/dee_v[s])*res - uee_v[s]*tmp - ueem_v[s]*acc; + spProj5m(tmp,res); + insertLane(lane,chi[ss+s],res); + } + }); + + MooeeInvTime+=usecond(); + +} + +template +void CayleyFermion5D::MooeeInvDag (const FermionField &psi_i, FermionField &chi_i) +{ + chi_i.Checkerboard()=psi_i.Checkerboard(); + GridBase *grid=psi_i.Grid(); + + auto psi = psi_i.View(); + auto chi = chi_i.View(); + Coeff_t *lee_v = &lee[0]; + Coeff_t *leem_v = &leem[0]; + Coeff_t *uee_v = &uee[0]; + Coeff_t *ueem_v = &ueem[0]; + Coeff_t *dee_v = &dee[0]; + + int Ls=this->Ls; + const uint64_t nsimd = grid->Nsimd(); + const uint64_t sites4d = nsimd * grid->oSites() / Ls; + + typedef typename SiteSpinor::scalar_object ScalarSiteSpinor; + + MooeeInvCalls++; + MooeeInvTime-=usecond(); + + accelerator_loopN( sss, sites4d ,{ + uint64_t lane = sss % nsimd; + uint64_t ss = Ls * (sss / nsimd); + ScalarSiteSpinor res, tmp, acc; + + // X = Nc*Ns + // flops = 2X + (Ls-2)(4X + 4X) + 6X + 1 + 2X + (Ls-1)(10X + 1) = -16X + Ls(1+18X) = -192 + 217*Ls flops + // Apply (U^{\prime})^{-dagger} U_m^{-\dagger} + res = extractLane(lane,psi[ss]); + spProj5p(tmp,res); + acc = conjugate(ueem_v[0])*tmp; + spProj5m(tmp,res); + insertLane(lane,chi[ss],res); + + for(int s=1;s=0;s--){ + res = extractLane(lane,chi[ss+s]); + res = conjugate(1.0/dee_v[s])*res - conjugate(lee_v[s])*tmp - conjugate(leem_v[s])*acc; + spProj5p(tmp,res); + insertLane(lane,chi[ss+s],res); + } + }); + + MooeeInvTime+=usecond(); + +} + +#ifdef CAYLEY_DPERP_GPU +INSTANTIATE_DPERP(WilsonImplF); +INSTANTIATE_DPERP(WilsonImplD); +INSTANTIATE_DPERP(GparityWilsonImplF); +INSTANTIATE_DPERP(GparityWilsonImplD); +INSTANTIATE_DPERP(ZWilsonImplF); +INSTANTIATE_DPERP(ZWilsonImplD); + +INSTANTIATE_DPERP(WilsonImplFH); +INSTANTIATE_DPERP(WilsonImplDF); +INSTANTIATE_DPERP(GparityWilsonImplFH); +INSTANTIATE_DPERP(GparityWilsonImplDF); +INSTANTIATE_DPERP(ZWilsonImplFH); +INSTANTIATE_DPERP(ZWilsonImplDF); +#endif + +NAMESPACE_END(Grid); diff --git a/Grid/qcd/action/fermion/CayleyFermion5Dssp.cc b/Grid/qcd/action/fermion/CayleyFermion5Dssp.cc index 80cc4688..650c391c 100644 --- a/Grid/qcd/action/fermion/CayleyFermion5Dssp.cc +++ b/Grid/qcd/action/fermion/CayleyFermion5Dssp.cc @@ -41,9 +41,9 @@ template void CayleyFermion5D::M5D(const FermionField &psi, const FermionField &phi, FermionField &chi, - std::vector &lower, - std::vector &diag, - std::vector &upper) + Vector &lower, + Vector &diag, + Vector &upper) { Coeff_t one(1.0); int Ls=this->Ls; @@ -64,9 +64,9 @@ template void CayleyFermion5D::M5Ddag(const FermionField &psi, const FermionField &phi, FermionField &chi, - std::vector &lower, - std::vector &diag, - std::vector &upper) + Vector &lower, + Vector &diag, + Vector &upper) { Coeff_t one(1.0); int Ls=this->Ls; diff --git a/Grid/qcd/action/fermion/CayleyFermion5Dvec.cc b/Grid/qcd/action/fermion/CayleyFermion5Dvec.cc index 95bd31bd..5482c384 100644 --- a/Grid/qcd/action/fermion/CayleyFermion5Dvec.cc +++ b/Grid/qcd/action/fermion/CayleyFermion5Dvec.cc @@ -54,9 +54,9 @@ template void CayleyFermion5D::M5D(const FermionField &psi_i, const FermionField &phi_i, FermionField &chi_i, - std::vector &lower, - std::vector &diag, - std::vector &upper) + Vector &lower, + Vector &diag, + Vector &upper) { chi_i.Checkerboard()=psi_i.Checkerboard(); GridBase *grid=psi_i.Grid(); @@ -200,9 +200,9 @@ template void CayleyFermion5D::M5Ddag(const FermionField &psi_i, const FermionField &phi_i, FermionField &chi_i, - std::vector &lower, - std::vector &diag, - std::vector &upper) + Vector &lower, + Vector &diag, + Vector &upper) { chi_i.Checkerboard()=psi_i.Checkerboard(); GridBase *grid=psi_i.Grid(); diff --git a/Grid/qcd/action/fermion/ContinuedFractionFermion5D.h b/Grid/qcd/action/fermion/ContinuedFractionFermion5D.h index 0e0c1d75..379c5f8f 100644 --- a/Grid/qcd/action/fermion/ContinuedFractionFermion5D.h +++ b/Grid/qcd/action/fermion/ContinuedFractionFermion5D.h @@ -89,12 +89,12 @@ protected: RealD mass; RealD R; RealD ZoloHiInv; - std::vector Beta; - std::vector cc;; - std::vector cc_d;; - std::vector sqrt_cc; - std::vector See; - std::vector Aee; + Vector Beta; + Vector cc;; + Vector cc_d;; + Vector sqrt_cc; + Vector See; + Vector Aee; }; diff --git a/Grid/qcd/action/fermion/DomainWallEOFAFermion.cc b/Grid/qcd/action/fermion/DomainWallEOFAFermion.cc index 6aa848cc..89de7315 100644 --- a/Grid/qcd/action/fermion/DomainWallEOFAFermion.cc +++ b/Grid/qcd/action/fermion/DomainWallEOFAFermion.cc @@ -131,9 +131,9 @@ void DomainWallEOFAFermion::M5D(const FermionField& psi, FermionField& chi else{ shiftm = -shift*(mq3-mq2); } } - std::vector diag(Ls,1.0); - std::vector upper(Ls,-1.0); upper[Ls-1] = mq1 + shiftm; - std::vector lower(Ls,-1.0); lower[0] = mq1 + shiftp; + Vector diag(Ls,1.0); + Vector upper(Ls,-1.0); upper[Ls-1] = mq1 + shiftm; + Vector lower(Ls,-1.0); lower[0] = mq1 + shiftp; #if(0) std::cout << GridLogMessage << "DomainWallEOFAFermion::M5D(FF&,FF&):" << std::endl; @@ -168,9 +168,9 @@ void DomainWallEOFAFermion::M5Ddag(const FermionField& psi, FermionField& else{ shiftm = -shift*(mq3-mq2); } } - std::vector diag(Ls,1.0); - std::vector upper(Ls,-1.0); upper[Ls-1] = mq1 + shiftp; - std::vector lower(Ls,-1.0); lower[0] = mq1 + shiftm; + Vector diag(Ls,1.0); + Vector upper(Ls,-1.0); upper[Ls-1] = mq1 + shiftp; + Vector lower(Ls,-1.0); lower[0] = mq1 + shiftm; #if(0) std::cout << GridLogMessage << "DomainWallEOFAFermion::M5Ddag(FF&,FF&):" << std::endl; @@ -194,9 +194,9 @@ void DomainWallEOFAFermion::Mooee(const FermionField& psi, FermionField& c { int Ls = this->Ls; - std::vector diag = this->bee; - std::vector upper(Ls); - std::vector lower(Ls); + Vector diag = this->bee; + Vector upper(Ls); + Vector lower(Ls); for(int s=0; scee[s]; @@ -213,9 +213,9 @@ void DomainWallEOFAFermion::MooeeDag(const FermionField& psi, FermionField { int Ls = this->Ls; - std::vector diag = this->bee; - std::vector upper(Ls); - std::vector lower(Ls); + Vector diag = this->bee; + Vector upper(Ls); + Vector lower(Ls); for(int s=0; scee[s]; @@ -231,7 +231,7 @@ void DomainWallEOFAFermion::MooeeDag(const FermionField& psi, FermionField //Zolo template -void DomainWallEOFAFermion::SetCoefficientsInternal(RealD zolo_hi, std::vector& gamma, RealD b, RealD c) +void DomainWallEOFAFermion::SetCoefficientsInternal(RealD zolo_hi, Vector& gamma, RealD b, RealD c) { int Ls = this->Ls; int pm = this->pm; diff --git a/Grid/qcd/action/fermion/DomainWallEOFAFermion.h b/Grid/qcd/action/fermion/DomainWallEOFAFermion.h index a3344ec4..eab56346 100644 --- a/Grid/qcd/action/fermion/DomainWallEOFAFermion.h +++ b/Grid/qcd/action/fermion/DomainWallEOFAFermion.h @@ -70,10 +70,10 @@ public: // Instantiate different versions depending on Impl ///////////////////////////////////////////////////// void M5D(const FermionField& psi, const FermionField& phi, FermionField& chi, - std::vector& lower, std::vector& diag, std::vector& upper); + Vector& lower, Vector& diag, Vector& upper); void M5Ddag(const FermionField& psi, const FermionField& phi, FermionField& chi, - std::vector& lower, std::vector& diag, std::vector& upper); + Vector& lower, Vector& diag, Vector& upper); void MooeeInternal(const FermionField& in, FermionField& out, int dag, int inv); @@ -94,16 +94,16 @@ public: RealD _M5, const ImplParams& p=ImplParams()); protected: - void SetCoefficientsInternal(RealD zolo_hi, std::vector& gamma, RealD b, RealD c); + void SetCoefficientsInternal(RealD zolo_hi, Vector& gamma, RealD b, RealD c); }; NAMESPACE_END(Grid); #define INSTANTIATE_DPERP_DWF_EOFA(A) \ template void DomainWallEOFAFermion::M5D(const FermionField& psi, const FermionField& phi, FermionField& chi, \ - std::vector& lower, std::vector& diag, std::vector& upper); \ + Vector& lower, Vector& diag, Vector& upper); \ template void DomainWallEOFAFermion::M5Ddag(const FermionField& psi, const FermionField& phi, FermionField& chi, \ - std::vector& lower, std::vector& diag, std::vector& upper); \ + Vector& lower, Vector& diag, Vector& upper); \ template void DomainWallEOFAFermion::MooeeInv(const FermionField& psi, FermionField& chi); \ template void DomainWallEOFAFermion::MooeeInvDag(const FermionField& psi, FermionField& chi); diff --git a/Grid/qcd/action/fermion/DomainWallEOFAFermioncache.cc b/Grid/qcd/action/fermion/DomainWallEOFAFermioncache.cc index 58aee4ff..74e9a62f 100644 --- a/Grid/qcd/action/fermion/DomainWallEOFAFermioncache.cc +++ b/Grid/qcd/action/fermion/DomainWallEOFAFermioncache.cc @@ -41,7 +41,7 @@ NAMESPACE_BEGIN(Grid); // Pplus backwards.. template void DomainWallEOFAFermion::M5D(const FermionField& psi_i, const FermionField& phi_i,FermionField& chi_i, - std::vector& lower, std::vector& diag, std::vector& upper) + Vector& lower, Vector& diag, Vector& upper) { chi_i.Checkerboard() = psi_i.Checkerboard(); int Ls = this->Ls; @@ -81,7 +81,7 @@ void DomainWallEOFAFermion::M5D(const FermionField& psi_i, const FermionFi template void DomainWallEOFAFermion::M5Ddag(const FermionField& psi_i, const FermionField& phi_i, FermionField& chi_i, - std::vector& lower, std::vector& diag, std::vector& upper) + Vector& lower, Vector& diag, Vector& upper) { chi_i.Checkerboard() = psi_i.Checkerboard(); GridBase* grid = psi_i.Grid(); @@ -180,11 +180,11 @@ void DomainWallEOFAFermion::MooeeInvDag(const FermionField& psi_i, Fermion assert(psi.Checkerboard() == psi.Checkerboard()); - std::vector ueec(Ls); - std::vector deec(Ls+1); - std::vector leec(Ls); - std::vector ueemc(Ls); - std::vector leemc(Ls); + Vector ueec(Ls); + Vector deec(Ls+1); + Vector leec(Ls); + Vector ueemc(Ls); + Vector leemc(Ls); for(int s=0; suee[s]); diff --git a/Grid/qcd/action/fermion/DomainWallEOFAFermionssp.cc b/Grid/qcd/action/fermion/DomainWallEOFAFermionssp.cc index 1825d07e..c9e638e5 100644 --- a/Grid/qcd/action/fermion/DomainWallEOFAFermionssp.cc +++ b/Grid/qcd/action/fermion/DomainWallEOFAFermionssp.cc @@ -40,7 +40,7 @@ NAMESPACE_BEGIN(Grid); // Pplus backwards template void DomainWallEOFAFermion::M5D(const FermionField& psi, const FermionField& phi, - FermionField& chi, std::vector& lower, std::vector& diag, std::vector& upper) + FermionField& chi, Vector& lower, Vector& diag, Vector& upper) { Coeff_t one(1.0); int Ls = this->Ls; @@ -60,7 +60,7 @@ void DomainWallEOFAFermion::M5D(const FermionField& psi, const FermionFiel template void DomainWallEOFAFermion::M5Ddag(const FermionField& psi, const FermionField& phi, - FermionField& chi, std::vector& lower, std::vector& diag, std::vector& upper) + FermionField& chi, Vector& lower, Vector& diag, Vector& upper) { Coeff_t one(1.0); int Ls = this->Ls; diff --git a/Grid/qcd/action/fermion/DomainWallEOFAFermionvec.cc b/Grid/qcd/action/fermion/DomainWallEOFAFermionvec.cc index 43fa16ec..5ad8be27 100644 --- a/Grid/qcd/action/fermion/DomainWallEOFAFermionvec.cc +++ b/Grid/qcd/action/fermion/DomainWallEOFAFermionvec.cc @@ -53,7 +53,7 @@ void DomainWallEOFAFermion::MooeeInv(const FermionField& psi, FermionField template void DomainWallEOFAFermion::M5D(const FermionField& psi_i, const FermionField& phi_i, FermionField& chi_i, - std::vector& lower, std::vector& diag, std::vector& upper) + Vector& lower, Vector& diag, Vector& upper) { chi_i.Checkerboard() = psi_i.Checkerboard(); GridBase* grid = psi_i.Grid(); @@ -201,7 +201,7 @@ void DomainWallEOFAFermion::M5D(const FermionField& psi_i, const FermionFi template void DomainWallEOFAFermion::M5Ddag(const FermionField& psi_i, const FermionField& phi_i,FermionField& chi_i, - std::vector& lower, std::vector& diag, std::vector& upper) + Vector& lower, Vector& diag, Vector& upper) { chi_i.Checkerboard() = psi_i.Checkerboard(); GridBase* grid = psi_i.Grid(); diff --git a/Grid/qcd/action/fermion/MobiusEOFAFermion.cc b/Grid/qcd/action/fermion/MobiusEOFAFermion.cc index d368e1da..86ce3e56 100644 --- a/Grid/qcd/action/fermion/MobiusEOFAFermion.cc +++ b/Grid/qcd/action/fermion/MobiusEOFAFermion.cc @@ -197,9 +197,9 @@ void MobiusEOFAFermion::M5D(const FermionField& psi, FermionField& chi) { int Ls = this->Ls; - std::vector diag(Ls,1.0); - std::vector upper(Ls,-1.0); upper[Ls-1] = this->mq1; - std::vector lower(Ls,-1.0); lower[0] = this->mq1; + Vector diag(Ls,1.0); + Vector upper(Ls,-1.0); upper[Ls-1] = this->mq1; + Vector lower(Ls,-1.0); lower[0] = this->mq1; // no shift term if(this->shift == 0.0){ this->M5D(psi, chi, chi, lower, diag, upper); } @@ -213,9 +213,9 @@ void MobiusEOFAFermion::M5Ddag(const FermionField& psi, FermionField& chi) { int Ls = this->Ls; - std::vector diag(Ls,1.0); - std::vector upper(Ls,-1.0); upper[Ls-1] = this->mq1; - std::vector lower(Ls,-1.0); lower[0] = this->mq1; + Vector diag(Ls,1.0); + Vector upper(Ls,-1.0); upper[Ls-1] = this->mq1; + Vector lower(Ls,-1.0); lower[0] = this->mq1; // no shift term if(this->shift == 0.0){ this->M5Ddag(psi, chi, chi, lower, diag, upper); } @@ -231,9 +231,9 @@ void MobiusEOFAFermion::Mooee(const FermionField& psi, FermionField& chi) int Ls = this->Ls; // coefficients of Mooee - std::vector diag = this->bee; - std::vector upper(Ls); - std::vector lower(Ls); + Vector diag = this->bee; + Vector upper(Ls); + Vector lower(Ls); for(int s=0; scee[s]; lower[s] = -this->cee[s]; @@ -254,9 +254,9 @@ void MobiusEOFAFermion::MooeeDag(const FermionField& psi, FermionField& ch int Ls = this->Ls; // coefficients of MooeeDag - std::vector diag = this->bee; - std::vector upper(Ls); - std::vector lower(Ls); + Vector diag = this->bee; + Vector upper(Ls); + Vector lower(Ls); for(int s=0; scee[s+1]; @@ -315,10 +315,10 @@ void MobiusEOFAFermion::SetCoefficientsPrecondShiftOps() // Tridiagonal solve for MooeeInvDag_shift_lc { Coeff_t m(0.0); - std::vector d = Mooee_shift; - std::vector u(Ls,0.0); - std::vector y(Ls,0.0); - std::vector q(Ls,0.0); + Vector d = Mooee_shift; + Vector u(Ls,0.0); + Vector y(Ls,0.0); + Vector q(Ls,0.0); if(pm == 1){ u[0] = 1.0; } else{ u[Ls-1] = 1.0; } diff --git a/Grid/qcd/action/fermion/MobiusEOFAFermion.h b/Grid/qcd/action/fermion/MobiusEOFAFermion.h index e30bfb47..e7e4df39 100644 --- a/Grid/qcd/action/fermion/MobiusEOFAFermion.h +++ b/Grid/qcd/action/fermion/MobiusEOFAFermion.h @@ -42,11 +42,11 @@ public: public: // Shift operator coefficients for red-black preconditioned Mobius EOFA - std::vector Mooee_shift; - std::vector MooeeInv_shift_lc; - std::vector MooeeInv_shift_norm; - std::vector MooeeInvDag_shift_lc; - std::vector MooeeInvDag_shift_norm; + Vector Mooee_shift; + Vector MooeeInv_shift_lc; + Vector MooeeInv_shift_norm; + Vector MooeeInvDag_shift_lc; + Vector MooeeInvDag_shift_norm; virtual void Instantiatable(void) {}; @@ -74,18 +74,18 @@ public: // Instantiate different versions depending on Impl ///////////////////////////////////////////////////// void M5D(const FermionField& psi, const FermionField& phi, FermionField& chi, - std::vector& lower, std::vector& diag, std::vector& upper); + Vector& lower, Vector& diag, Vector& upper); void M5D_shift(const FermionField& psi, const FermionField& phi, FermionField& chi, - std::vector& lower, std::vector& diag, std::vector& upper, - std::vector& shift_coeffs); + Vector& lower, Vector& diag, Vector& upper, + Vector& shift_coeffs); void M5Ddag(const FermionField& psi, const FermionField& phi, FermionField& chi, - std::vector& lower, std::vector& diag, std::vector& upper); + Vector& lower, Vector& diag, Vector& upper); void M5Ddag_shift(const FermionField& psi, const FermionField& phi, FermionField& chi, - std::vector& lower, std::vector& diag, std::vector& upper, - std::vector& shift_coeffs); + Vector& lower, Vector& diag, Vector& upper, + Vector& shift_coeffs); void MooeeInternal(const FermionField& in, FermionField& out, int dag, int inv); @@ -113,13 +113,13 @@ NAMESPACE_END(Grid); #define INSTANTIATE_DPERP_MOBIUS_EOFA(A) \ template void MobiusEOFAFermion::M5D(const FermionField& psi, const FermionField& phi, FermionField& chi, \ - std::vector& lower, std::vector& diag, std::vector& upper); \ + Vector& lower, Vector& diag, Vector& upper); \ template void MobiusEOFAFermion::M5D_shift(const FermionField& psi, const FermionField& phi, FermionField& chi, \ - std::vector& lower, std::vector& diag, std::vector& upper, std::vector& shift_coeffs); \ + Vector& lower, Vector& diag, Vector& upper, Vector& shift_coeffs); \ template void MobiusEOFAFermion::M5Ddag(const FermionField& psi, const FermionField& phi, FermionField& chi, \ - std::vector& lower, std::vector& diag, std::vector& upper); \ + Vector& lower, Vector& diag, Vector& upper); \ template void MobiusEOFAFermion::M5Ddag_shift(const FermionField& psi, const FermionField& phi, FermionField& chi, \ - std::vector& lower, std::vector& diag, std::vector& upper, std::vector& shift_coeffs); \ + Vector& lower, Vector& diag, Vector& upper, Vector& shift_coeffs); \ template void MobiusEOFAFermion::MooeeInv(const FermionField& psi, FermionField& chi); \ template void MobiusEOFAFermion::MooeeInv_shift(const FermionField& psi, FermionField& chi); \ template void MobiusEOFAFermion::MooeeInvDag(const FermionField& psi, FermionField& chi); \ diff --git a/Grid/qcd/action/fermion/MobiusEOFAFermioncache.cc b/Grid/qcd/action/fermion/MobiusEOFAFermioncache.cc index dc865b4f..8d0b0524 100644 --- a/Grid/qcd/action/fermion/MobiusEOFAFermioncache.cc +++ b/Grid/qcd/action/fermion/MobiusEOFAFermioncache.cc @@ -37,7 +37,7 @@ NAMESPACE_BEGIN(Grid); template void MobiusEOFAFermion::M5D(const FermionField &psi_i, const FermionField &phi_i, FermionField &chi_i, - std::vector &lower, std::vector &diag, std::vector &upper) + Vector &lower, Vector &diag, Vector &upper) { chi_i.Checkerboard() = psi_i.Checkerboard(); GridBase *grid = psi_i.Grid(); @@ -79,8 +79,8 @@ void MobiusEOFAFermion::M5D(const FermionField &psi_i, const FermionField template void MobiusEOFAFermion::M5D_shift(const FermionField &psi_i, const FermionField &phi_i, FermionField &chi_i, - std::vector &lower, std::vector &diag, std::vector &upper, - std::vector &shift_coeffs) + Vector &lower, Vector &diag, Vector &upper, + Vector &shift_coeffs) { chi_i.Checkerboard() = psi_i.Checkerboard(); GridBase *grid = psi_i.Grid(); @@ -127,7 +127,7 @@ void MobiusEOFAFermion::M5D_shift(const FermionField &psi_i, const Fermion template void MobiusEOFAFermion::M5Ddag(const FermionField &psi_i, const FermionField &phi_i, FermionField &chi_i, - std::vector &lower, std::vector &diag, std::vector &upper) + Vector &lower, Vector &diag, Vector &upper) { chi_i.Checkerboard() = psi_i.Checkerboard(); GridBase *grid = psi_i.Grid(); @@ -169,8 +169,8 @@ void MobiusEOFAFermion::M5Ddag(const FermionField &psi_i, const FermionFie template void MobiusEOFAFermion::M5Ddag_shift(const FermionField &psi_i, const FermionField &phi_i, FermionField &chi_i, - std::vector &lower, std::vector &diag, std::vector &upper, - std::vector &shift_coeffs) + Vector &lower, Vector &diag, Vector &upper, + Vector &shift_coeffs) { chi_i.Checkerboard() = psi_i.Checkerboard(); GridBase *grid = psi_i.Grid(); diff --git a/Grid/qcd/action/fermion/MobiusEOFAFermionssp.cc b/Grid/qcd/action/fermion/MobiusEOFAFermionssp.cc index 937b5019..254cdb54 100644 --- a/Grid/qcd/action/fermion/MobiusEOFAFermionssp.cc +++ b/Grid/qcd/action/fermion/MobiusEOFAFermionssp.cc @@ -40,7 +40,7 @@ NAMESPACE_BEGIN(Grid); // Pplus backwards template void MobiusEOFAFermion::M5D(const FermionField& psi, const FermionField& phi, - FermionField& chi, std::vector& lower, std::vector& diag, std::vector& upper) + FermionField& chi, Vector& lower, Vector& diag, Vector& upper) { Coeff_t one(1.0); int Ls = this->Ls; @@ -60,8 +60,8 @@ void MobiusEOFAFermion::M5D(const FermionField& psi, const FermionField& p template void MobiusEOFAFermion::M5D_shift(const FermionField& psi, const FermionField& phi, - FermionField& chi, std::vector& lower, std::vector& diag, std::vector& upper, - std::vector& shift_coeffs) + FermionField& chi, Vector& lower, Vector& diag, Vector& upper, + Vector& shift_coeffs) { Coeff_t one(1.0); int Ls = this->Ls; @@ -83,7 +83,7 @@ void MobiusEOFAFermion::M5D_shift(const FermionField& psi, const FermionFi template void MobiusEOFAFermion::M5Ddag(const FermionField& psi, const FermionField& phi, - FermionField& chi, std::vector& lower, std::vector& diag, std::vector& upper) + FermionField& chi, Vector& lower, Vector& diag, Vector& upper) { Coeff_t one(1.0); int Ls = this->Ls; @@ -103,8 +103,8 @@ void MobiusEOFAFermion::M5Ddag(const FermionField& psi, const FermionField template void MobiusEOFAFermion::M5Ddag_shift(const FermionField& psi, const FermionField& phi, - FermionField& chi, std::vector& lower, std::vector& diag, std::vector& upper, - std::vector& shift_coeffs) + FermionField& chi, Vector& lower, Vector& diag, Vector& upper, + Vector& shift_coeffs) { Coeff_t one(1.0); int Ls = this->Ls; diff --git a/Grid/qcd/action/fermion/MobiusEOFAFermionvec.cc b/Grid/qcd/action/fermion/MobiusEOFAFermionvec.cc index ff8c5816..1cd99ab5 100644 --- a/Grid/qcd/action/fermion/MobiusEOFAFermionvec.cc +++ b/Grid/qcd/action/fermion/MobiusEOFAFermionvec.cc @@ -64,7 +64,7 @@ void MobiusEOFAFermion::MooeeInvDag_shift(const FermionField& psi, Fermion template void MobiusEOFAFermion::M5D(const FermionField& psi_i, const FermionField& phi_i,FermionField& chi_i, - std::vector& lower, std::vector& diag, std::vector& upper) + Vector& lower, Vector& diag, Vector& upper) { chi_i.Checkerboard() = psi_i.Checkerboard(); GridBase* grid = psi_i.Grid(); @@ -211,8 +211,8 @@ void MobiusEOFAFermion::M5D(const FermionField& psi_i, const FermionField& template void MobiusEOFAFermion::M5D_shift(const FermionField& psi_i, const FermionField& phi_i, - FermionField& chi_i, std::vector& lower, std::vector& diag, std::vector& upper, - std::vector& shift_coeffs) + FermionField& chi_i, Vector& lower, Vector& diag, Vector& upper, + Vector& shift_coeffs) { #if 0 auto & psi = psi_i; @@ -397,7 +397,7 @@ void MobiusEOFAFermion::M5D_shift(const FermionField& psi_i, const Fermion template void MobiusEOFAFermion::M5Ddag(const FermionField& psi_i, const FermionField& phi_i,FermionField& chi_i, - std::vector& lower, std::vector& diag, std::vector& upper) + Vector& lower, Vector& diag, Vector& upper) { chi_i.Checkerboard() = psi_i.Checkerboard(); GridBase* grid = psi_i.Grid(); @@ -542,8 +542,8 @@ void MobiusEOFAFermion::M5Ddag(const FermionField& psi_i, const FermionFie template void MobiusEOFAFermion::M5Ddag_shift(const FermionField& psi_i, const FermionField& phi_i, FermionField& chi_i, - std::vector& lower, std::vector& diag, std::vector& upper, - std::vector& shift_coeffs) + Vector& lower, Vector& diag, Vector& upper, + Vector& shift_coeffs) { #if 0 auto & psi = psi_i; diff --git a/Grid/qcd/action/fermion/PartialFractionFermion5D.h b/Grid/qcd/action/fermion/PartialFractionFermion5D.h index 7a3de997..d61515f0 100644 --- a/Grid/qcd/action/fermion/PartialFractionFermion5D.h +++ b/Grid/qcd/action/fermion/PartialFractionFermion5D.h @@ -93,8 +93,8 @@ protected: RealD R; RealD amax; RealD scale; - std::vector p; - std::vector q; + Vector p; + Vector q; }; diff --git a/Grid/qcd/action/fermion/SchurDiagTwoKappa.h b/Grid/qcd/action/fermion/SchurDiagTwoKappa.h index c6e5470b..b6ab8a55 100644 --- a/Grid/qcd/action/fermion/SchurDiagTwoKappa.h +++ b/Grid/qcd/action/fermion/SchurDiagTwoKappa.h @@ -36,7 +36,7 @@ template class KappaSimilarityTransform { public: INHERIT_IMPL_TYPES(Matrix); - std::vector kappa, kappaDag, kappaInv, kappaInvDag; + Vector kappa, kappaDag, kappaInv, kappaInvDag; KappaSimilarityTransform (Matrix &zmob) { for (int i=0;i<(int)zmob.bs.size();i++) { diff --git a/Grid/qcd/action/fermion/ZMobiusFermion.h b/Grid/qcd/action/fermion/ZMobiusFermion.h index 4c2390d6..bdea3271 100644 --- a/Grid/qcd/action/fermion/ZMobiusFermion.h +++ b/Grid/qcd/action/fermion/ZMobiusFermion.h @@ -48,7 +48,7 @@ public: GridCartesian &FourDimGrid, GridRedBlackCartesian &FourDimRedBlackGrid, RealD _mass,RealD _M5, - std::vector &gamma, RealD b,RealD c,const ImplParams &p= ImplParams()) : + Vector &gamma, RealD b,RealD c,const ImplParams &p= ImplParams()) : CayleyFermion5D(_Umu, FiveDimGrid, @@ -59,7 +59,7 @@ public: { // RealD eps = 1.0; std::cout< zgamma(this->Ls); + Vector zgamma(this->Ls); for(int s=0;sLs;s++){ zgamma[s] = gamma[s]; } diff --git a/Grid/simd/Grid_gpu.h b/Grid/simd/Grid_gpu.h index 2f7d47ec..6dc5123a 100644 --- a/Grid/simd/Grid_gpu.h +++ b/Grid/simd/Grid_gpu.h @@ -245,18 +245,18 @@ namespace Optimization { struct MultRealPart{ accelerator_inline float4 operator()(float4 a, float4 b){ float4 ymm0; - ymm0.x = a.y; - ymm0.y = a.y; - ymm0.z = a.w; - ymm0.w = a.w; + ymm0.x = a.x; + ymm0.y = a.x; + ymm0.z = a.z; + ymm0.w = a.z; return ymm0*b; // ymm0 = _mm_shuffle_ps(a,a,_MM_SELECT_FOUR_FOUR(2,2,0,0)); // ymm0 <- ar ar, // return _mm_mul_ps(ymm0,b); // ymm0 <- ar bi, ar br } accelerator_inline double2 operator()(double2 a, double2 b){ double2 ymm0; - ymm0.x = a.y; - ymm0.y = a.y; + ymm0.x = a.x; + ymm0.y = a.x; return ymm0*b; // ymm0 = _mm_shuffle_pd(a,a,0x0); // ymm0 <- ar ar, ar,ar b'00,00 // return _mm_mul_pd(ymm0,b); // ymm0 <- ar bi, ar br @@ -265,17 +265,17 @@ namespace Optimization { struct MaddRealPart{ accelerator_inline float4 operator()(float4 a, float4 b, float4 c){ float4 ymm0; // = _mm_shuffle_ps(a,a,_MM_SELECT_FOUR_FOUR(2,2,0,0)); // ymm0 <- ar ar, - ymm0.x = a.y; - ymm0.y = a.y; - ymm0.z = a.w; - ymm0.w = a.w; + ymm0.x = a.x; + ymm0.y = a.x; + ymm0.z = a.z; + ymm0.w = a.z; return c+ymm0*b; } accelerator_inline double2 operator()(double2 a, double2 b, double2 c){ // ymm0 = _mm_shuffle_pd( a, a, 0x0 ); double2 ymm0; - ymm0.x = a.y; - ymm0.y = a.y; + ymm0.x = a.x; + ymm0.y = a.x; return c+ymm0*b; } }; diff --git a/benchmarks/Benchmark_mooee.cc b/benchmarks/Benchmark_mooee.cc index dfaaae30..cbeb6c2f 100644 --- a/benchmarks/Benchmark_mooee.cc +++ b/benchmarks/Benchmark_mooee.cc @@ -76,13 +76,20 @@ int main (int argc, char ** argv) std::cout << GridLogMessage<< "* Benchmarking DomainWallFermionR::Dhop "< diag = Dw.bs; + Vector upper= Dw.cs; + Vector lower= Dw.cs; + upper[Ls-1]=-Dw.mass*upper[Ls-1]; + lower[0] =-Dw.mass*lower[0]; + LatticeFermion r_eo(FGrid); LatticeFermion src_e (FrbGrid); LatticeFermion src_o (FrbGrid); @@ -99,13 +106,13 @@ int main (int argc, char ** argv) r_o = Zero(); -#define BENCH_DW(A,in,out) \ - Dw.CayleyZeroCounters(); \ - Dw. A (in,out); \ +#define BENCH_DW(A,...) \ + Dw. A (__VA_ARGS__); \ FGrid->Barrier(); \ + Dw.CayleyZeroCounters(); \ t0=usecond(); \ for(int i=0;iBarrier(); \ @@ -114,9 +121,9 @@ int main (int argc, char ** argv) std::cout<Barrier(); \ + zDw.CayleyZeroCounters(); \ t0=usecond(); \ for(int i=0;iBarrier(); \ + Dw.CayleyZeroCounters(); \ t0=usecond(); \ for(int i=0;iBarrier(); \ - t0=usecond(); \ - for(int i=0;iBarrier(); \ - Dw.CayleyReport(); \ - std::cout< gamma(Ls,std::complex(1.0,0.0)); + Vector gamma(Ls,std::complex(1.0,0.0)); ZMobiusFermionVec5dR zDw(Umu,*sFGrid,*sFrbGrid,*sUGrid,*sUrbGrid,mass,M5,gamma,b,c); std::cout<