From 90e70790f312bff33a397d78a5b133c7f3d7a136 Mon Sep 17 00:00:00 2001 From: paboyle Date: Mon, 15 Aug 2016 22:31:29 +0100 Subject: [PATCH] Feature for z-Mobius prep --- configure.ac | 2 +- lib/qcd/action/Actions.h | 12 ++- lib/qcd/action/fermion/CayleyFermion5D.cc | 58 ++++++++------ lib/qcd/action/fermion/CayleyFermion5D.h | 49 ++++++------ .../action/fermion/CayleyFermion5Dcache.cc | 14 ++-- lib/qcd/action/fermion/CayleyFermion5Dssp.cc | 12 +-- lib/qcd/action/fermion/CayleyFermion5Dvec.cc | 24 +++--- lib/qcd/action/fermion/FermionOperatorImpl.h | 25 +++++- lib/qcd/action/fermion/WilsonKernels.cc | 23 +++--- lib/qcd/action/fermion/WilsonKernels.h | 4 + lib/qcd/action/fermion/WilsonKernelsAsm.cc | 20 +++++ lib/qcd/action/fermion/WilsonKernelsAsmBody.h | 65 ++++++++++++++- lib/qcd/action/fermion/ZMobiusFermion.h | 79 +++++++++++++++++++ tests/debug/Test_cayley_even_odd_vec.cc | 12 +++ 14 files changed, 312 insertions(+), 87 deletions(-) create mode 100644 lib/qcd/action/fermion/ZMobiusFermion.h diff --git a/configure.ac b/configure.ac index cdcd9d58..2e46a411 100644 --- a/configure.ac +++ b/configure.ac @@ -12,7 +12,7 @@ CXXFLAGS="-O3 $CXXFLAGS" AC_PROG_CXX AC_OPENMP AM_CXXFLAGS="$OPENMP_CXXFLAGS $AM_CXXFLAGS" -LT_INIT([disable-shared]) +LT_INIT ############### Checks for header files AC_CHECK_HEADERS(stdint.h) diff --git a/lib/qcd/action/Actions.h b/lib/qcd/action/Actions.h index be08cdde..1ad3a584 100644 --- a/lib/qcd/action/Actions.h +++ b/lib/qcd/action/Actions.h @@ -111,12 +111,16 @@ typedef SymanzikGaugeAction ConjugateSymanzikGaugeAction #define FermOp4dVecTemplateInstantiate(A) \ template class A; \ template class A; \ + template class A; \ + template class A; \ template class A; \ template class A; #define FermOp5dVecTemplateInstantiate(A) \ template class A; \ - template class A; + template class A; \ + template class A; \ + template class A; #define FermOpTemplateInstantiate(A) \ FermOp4dVecTemplateInstantiate(A) \ @@ -138,6 +142,7 @@ typedef SymanzikGaugeAction ConjugateSymanzikGaugeAction #include #include #include +#include #include #include #include @@ -176,6 +181,11 @@ typedef DomainWallFermion DomainWallFermionD; typedef MobiusFermion MobiusFermionR; typedef MobiusFermion MobiusFermionF; typedef MobiusFermion MobiusFermionD; + +typedef ZMobiusFermion ZMobiusFermionR; +typedef ZMobiusFermion ZMobiusFermionF; +typedef ZMobiusFermion ZMobiusFermionD; + typedef ScaledShamirFermion ScaledShamirFermionR; typedef ScaledShamirFermion ScaledShamirFermionF; typedef ScaledShamirFermion ScaledShamirFermionD; diff --git a/lib/qcd/action/fermion/CayleyFermion5D.cc b/lib/qcd/action/fermion/CayleyFermion5D.cc index 066cede4..c4196043 100644 --- a/lib/qcd/action/fermion/CayleyFermion5D.cc +++ b/lib/qcd/action/fermion/CayleyFermion5D.cc @@ -54,18 +54,18 @@ template void CayleyFermion5D::M5D (const FermionField &psi, FermionField &chi) { int Ls=this->Ls; - std::vector diag (Ls,1.0); - std::vector upper(Ls,-1.0); upper[Ls-1]=mass; - std::vector lower(Ls,-1.0); lower[0] =mass; + std::vector diag (Ls,1.0); + std::vector upper(Ls,-1.0); upper[Ls-1]=mass; + std::vector lower(Ls,-1.0); lower[0] =mass; M5D(psi,chi,chi,lower,diag,upper); } template void CayleyFermion5D::Meooe5D (const FermionField &psi, FermionField &Din) { int Ls=this->Ls; - std::vector diag = bs; - std::vector upper= cs; - std::vector lower= cs; + std::vector diag = bs; + std::vector upper= cs; + std::vector lower= cs; upper[Ls-1]=-mass*upper[Ls-1]; lower[0] =-mass*lower[0]; M5D(psi,psi,Din,lower,diag,upper); @@ -73,9 +73,9 @@ void CayleyFermion5D::Meooe5D (const FermionField &psi, FermionField &D template void CayleyFermion5D::Meo5D (const FermionField &psi, FermionField &chi) { int Ls=this->Ls; - std::vector diag = beo; - std::vector upper(Ls); - std::vector lower(Ls); + std::vector diag = beo; + std::vector upper(Ls); + std::vector lower(Ls); for(int i=0;i void CayleyFermion5D::Mooee (const FermionField &psi, FermionField &chi) { int Ls=this->Ls; - std::vector diag = bee; - std::vector upper(Ls); - std::vector lower(Ls); + std::vector diag = bee; + std::vector upper(Ls); + std::vector lower(Ls); for(int i=0;i void CayleyFermion5D::MooeeDag (const FermionField &psi, FermionField &chi) { int Ls=this->Ls; - std::vector diag = bee; - std::vector upper(Ls); - std::vector lower(Ls); + std::vector diag = bee; + std::vector upper(Ls); + std::vector lower(Ls); for (int s=0;s void CayleyFermion5D::M5Ddag (const FermionField &psi, FermionField &chi) { int Ls=this->Ls; - std::vector diag(Ls,1.0); - std::vector upper(Ls,-1.0); - std::vector lower(Ls,-1.0); + std::vector diag(Ls,1.0); + std::vector upper(Ls,-1.0); + std::vector lower(Ls,-1.0); upper[Ls-1]=-mass*upper[Ls-1]; lower[0] =-mass*lower[0]; M5Ddag(psi,chi,chi,lower,diag,upper); @@ -141,9 +141,9 @@ template void CayleyFermion5D::MeooeDag5D (const FermionField &psi, FermionField &Din) { int Ls=this->Ls; - std::vector diag =bs; - std::vector upper=cs; - std::vector lower=cs; + std::vector diag =bs; + std::vector upper=cs; + std::vector lower=cs; upper[Ls-1]=-mass*upper[Ls-1]; lower[0] =-mass*lower[0]; M5Ddag(psi,psi,Din,lower,diag,upper); @@ -273,11 +273,21 @@ void CayleyFermion5D::MeoDeriv(GaugeField &mat,const FermionField &U,const template void CayleyFermion5D::SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD b,RealD c) { - SetCoefficientsZolotarev(1.0,zdata,b,c); + std::vector gamma(this->Ls); + for(int s=0;sLs;s++) gamma[s] = zdata->gamma[s]; + SetCoefficientsInternal(1.0,gamma,b,c); } //Zolo template void CayleyFermion5D::SetCoefficientsZolotarev(RealD zolo_hi,Approx::zolotarev_data *zdata,RealD b,RealD c) +{ + std::vector gamma(this->Ls); + for(int s=0;sLs;s++) gamma[s] = zdata->gamma[s]; + SetCoefficientsInternal(zolo_hi,gamma,b,c); +} +//Zolo +template +void CayleyFermion5D::SetCoefficientsInternal(RealD zolo_hi,std::vector & gamma,RealD b,RealD c) { int Ls=this->Ls; @@ -315,7 +325,7 @@ void CayleyFermion5D::SetCoefficientsZolotarev(RealD zolo_hi,Approx::zolot double bmc = b-c; for(int i=0; i < Ls; i++){ as[i] = 1.0; - omega[i] = ((double)zdata->gamma[i])*zolo_hi; //NB reciprocal relative to Chroma NEF code + omega[i] = gamma[i]*zolo_hi; //NB reciprocal relative to Chroma NEF code bs[i] = 0.5*(bpc/omega[i] + bmc); cs[i] = 0.5*(bpc/omega[i] - bmc); } @@ -377,7 +387,7 @@ void CayleyFermion5D::SetCoefficientsZolotarev(RealD zolo_hi,Approx::zolot } { - double delta_d=mass*cee[Ls-1]; + Coeff_t delta_d=mass*cee[Ls-1]; for(int j=0;j &lower, - std::vector &diag, - std::vector &upper); + std::vector &lower, + std::vector &diag, + std::vector &upper); void M5Ddag(const FermionField &psi, const FermionField &phi, FermionField &chi, - std::vector &lower, - std::vector &diag, - std::vector &upper); + std::vector &lower, + std::vector &diag, + std::vector &upper); void MooeeInternal(const FermionField &in, FermionField &out,int dag,int inv); virtual void Instantiatable(void)=0; @@ -91,23 +91,23 @@ namespace Grid { RealD mass; // Cayley form Moebius (tanh and zolotarev) - std::vector omega; - std::vector bs; // S dependent coeffs - std::vector cs; - std::vector as; + std::vector omega; + std::vector bs; // S dependent coeffs + std::vector cs; + std::vector as; // For preconditioning Cayley form - std::vector bee; - std::vector cee; - std::vector aee; - std::vector beo; - std::vector ceo; - std::vector aeo; + std::vector bee; + std::vector cee; + std::vector aee; + std::vector beo; + std::vector ceo; + std::vector aeo; // LDU factorisation of the eeoo matrix - std::vector lee; - std::vector leem; - std::vector uee; - std::vector ueem; - std::vector dee; + std::vector lee; + std::vector leem; + std::vector uee; + std::vector ueem; + std::vector dee; // Constructors CayleyFermion5D(GaugeField &_Umu, @@ -117,20 +117,19 @@ namespace Grid { GridRedBlackCartesian &FourDimRedBlackGrid, RealD _mass,RealD _M5,const ImplParams &p= ImplParams()); - - protected: void SetCoefficientsZolotarev(RealD zolohi,Approx::zolotarev_data *zdata,RealD b,RealD c); void SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD b,RealD c); + void SetCoefficientsInternal(RealD zolo_hi,std::vector & gamma,RealD b,RealD c); }; } } #define INSTANTIATE_DPERP(A)\ template void CayleyFermion5D< A >::M5D(const FermionField &psi,const FermionField &phi,FermionField &chi,\ - std::vector &lower,std::vector &diag,std::vector &upper); \ + std::vector &lower,std::vector &diag,std::vector &upper); \ template void CayleyFermion5D< A >::M5Ddag(const FermionField &psi,const FermionField &phi,FermionField &chi,\ - std::vector &lower,std::vector &diag,std::vector &upper); \ + std::vector &lower,std::vector &diag,std::vector &upper); \ template void CayleyFermion5D< A >::MooeeInv (const FermionField &psi, FermionField &chi); \ template void CayleyFermion5D< A >::MooeeInvDag (const FermionField &psi, FermionField &chi); diff --git a/lib/qcd/action/fermion/CayleyFermion5Dcache.cc b/lib/qcd/action/fermion/CayleyFermion5Dcache.cc index 4791e7a2..62e91dd4 100644 --- a/lib/qcd/action/fermion/CayleyFermion5Dcache.cc +++ b/lib/qcd/action/fermion/CayleyFermion5Dcache.cc @@ -43,9 +43,9 @@ template void CayleyFermion5D::M5D(const FermionField &psi, const FermionField &phi, FermionField &chi, - std::vector &lower, - std::vector &diag, - std::vector &upper) + std::vector &lower, + std::vector &diag, + std::vector &upper) { int Ls =this->Ls; GridBase *grid=psi._grid; @@ -82,9 +82,9 @@ template void CayleyFermion5D::M5Ddag(const FermionField &psi, const FermionField &phi, FermionField &chi, - std::vector &lower, - std::vector &diag, - std::vector &upper) + std::vector &lower, + std::vector &diag, + std::vector &upper) { int Ls =this->Ls; GridBase *grid=psi._grid; @@ -204,6 +204,8 @@ PARALLEL_FOR_LOOP INSTANTIATE_DPERP(WilsonImplD); INSTANTIATE_DPERP(GparityWilsonImplF); INSTANTIATE_DPERP(GparityWilsonImplD); + INSTANTIATE_DPERP(ZWilsonImplF); + INSTANTIATE_DPERP(ZWilsonImplD); #endif }} diff --git a/lib/qcd/action/fermion/CayleyFermion5Dssp.cc b/lib/qcd/action/fermion/CayleyFermion5Dssp.cc index a2c96d87..ad7daddb 100644 --- a/lib/qcd/action/fermion/CayleyFermion5Dssp.cc +++ b/lib/qcd/action/fermion/CayleyFermion5Dssp.cc @@ -43,9 +43,9 @@ template void CayleyFermion5D::M5D(const FermionField &psi, const FermionField &phi, FermionField &chi, - std::vector &lower, - std::vector &diag, - std::vector &upper) + std::vector &lower, + std::vector &diag, + std::vector &upper) { int Ls=this->Ls; for(int s=0;s void CayleyFermion5D::M5Ddag(const FermionField &psi, const FermionField &phi, FermionField &chi, - std::vector &lower, - std::vector &diag, - std::vector &upper) + std::vector &lower, + std::vector &diag, + std::vector &upper) { int Ls=this->Ls; for(int s=0;s void CayleyFermion5D::M5D(const FermionField &psi, const FermionField &phi, FermionField &chi, - std::vector &lower, - std::vector &diag, - std::vector &upper) + std::vector &lower, + std::vector &diag, + std::vector &upper) { GridBase *grid=psi._grid; int Ls = this->Ls; @@ -121,9 +121,9 @@ template void CayleyFermion5D::M5Ddag(const FermionField &psi, const FermionField &phi, FermionField &chi, - std::vector &lower, - std::vector &diag, - std::vector &upper) + std::vector &lower, + std::vector &diag, + std::vector &upper) { GridBase *grid=psi._grid; int Ls = this->Ls; @@ -194,8 +194,8 @@ void CayleyFermion5D::MooeeInternal(const FermionField &psi, FermionField chi.checkerboard=psi.checkerboard; - Eigen::MatrixXd Pplus = Eigen::MatrixXd::Zero(Ls,Ls); - Eigen::MatrixXd Pminus = Eigen::MatrixXd::Zero(Ls,Ls); + Eigen::MatrixXcd Pplus = Eigen::MatrixXcd::Zero(Ls,Ls); + Eigen::MatrixXcd Pminus = Eigen::MatrixXcd::Zero(Ls,Ls); for(int s=0;s::MooeeInternal(const FermionField &psi, FermionField Pplus (0,Ls-1) = mass*cee[0]; Pminus(Ls-1,0) = mass*cee[Ls-1]; - Eigen::MatrixXd PplusMat ; - Eigen::MatrixXd PminusMat; + Eigen::MatrixXcd PplusMat ; + Eigen::MatrixXcd PminusMat; if ( inv ) { PplusMat =Pplus.inverse(); @@ -298,8 +298,12 @@ PARALLEL_FOR_LOOP INSTANTIATE_DPERP(DomainWallVec5dImplD); INSTANTIATE_DPERP(DomainWallVec5dImplF); +INSTANTIATE_DPERP(ZDomainWallVec5dImplD); +INSTANTIATE_DPERP(ZDomainWallVec5dImplF); template void CayleyFermion5D::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv); template void CayleyFermion5D::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv); +template void CayleyFermion5D::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv); +template void CayleyFermion5D::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv); }} diff --git a/lib/qcd/action/fermion/FermionOperatorImpl.h b/lib/qcd/action/fermion/FermionOperatorImpl.h index 4126d185..b756936b 100644 --- a/lib/qcd/action/fermion/FermionOperatorImpl.h +++ b/lib/qcd/action/fermion/FermionOperatorImpl.h @@ -100,7 +100,8 @@ namespace Grid { typedef typename Impl::SiteHalfSpinor SiteHalfSpinor; \ typedef typename Impl::Compressor Compressor; \ typedef typename Impl::StencilImpl StencilImpl; \ - typedef typename Impl::ImplParams ImplParams; + typedef typename Impl::ImplParams ImplParams; \ + typedef typename Impl::Coeff_t Coeff_t; #define INHERIT_IMPL_TYPES(Base) \ INHERIT_GIMPL_TYPES(Base)\ @@ -109,12 +110,14 @@ namespace Grid { /////// // Single flavour four spinors with colour index /////// - template + template class WilsonImpl : public PeriodicGaugeImpl< GaugeImplTypes< S, Nrepresentation> > { public: + const bool LsVectorised=false; + typedef _Coeff_t Coeff_t; typedef PeriodicGaugeImpl< GaugeImplTypes< S,Nrepresentation> > Gimpl; INHERIT_GIMPL_TYPES(Gimpl); @@ -192,12 +195,13 @@ PARALLEL_FOR_LOOP /////// // Single flavour four spinors with colour index, 5d redblack /////// - template + template class DomainWallVec5dImpl : public PeriodicGaugeImpl< GaugeImplTypes< S,Nrepresentation> > { public: const bool LsVectorised=true; + typedef _Coeff_t Coeff_t; typedef PeriodicGaugeImpl< GaugeImplTypes< S,Nrepresentation> > Gimpl; INHERIT_GIMPL_TYPES(Gimpl); @@ -287,12 +291,13 @@ PARALLEL_FOR_LOOP // Flavour doubled spinors; is Gparity the only? what about C*? //////////////////////////////////////////////////////////////////////////////////////// - template + template class GparityWilsonImpl : public ConjugateGaugeImpl< GaugeImplTypes >{ public: const bool LsVectorised=false; + typedef _Coeff_t Coeff_t; typedef ConjugateGaugeImpl< GaugeImplTypes > Gimpl; INHERIT_GIMPL_TYPES(Gimpl); @@ -483,6 +488,18 @@ PARALLEL_FOR_LOOP typedef WilsonImpl WilsonImplF; // Float typedef WilsonImpl WilsonImplD; // Double + typedef WilsonImpl ZWilsonImplR; // Real.. whichever prec + typedef WilsonImpl ZWilsonImplF; // Float + typedef WilsonImpl ZWilsonImplD; // Double + + typedef DomainWallVec5dImpl DomainWallVec5dImplR; // Real.. whichever prec + typedef DomainWallVec5dImpl DomainWallVec5dImplF; // Float + typedef DomainWallVec5dImpl DomainWallVec5dImplD; // Double + + typedef DomainWallVec5dImpl ZDomainWallVec5dImplR; // Real.. whichever prec + typedef DomainWallVec5dImpl ZDomainWallVec5dImplF; // Float + typedef DomainWallVec5dImpl ZDomainWallVec5dImplD; // Double + typedef DomainWallVec5dImpl DomainWallVec5dImplR; // Real.. whichever prec typedef DomainWallVec5dImpl DomainWallVec5dImplF; // Float typedef DomainWallVec5dImpl DomainWallVec5dImplD; // Double diff --git a/lib/qcd/action/fermion/WilsonKernels.cc b/lib/qcd/action/fermion/WilsonKernels.cc index d644f6ef..a12a2b16 100644 --- a/lib/qcd/action/fermion/WilsonKernels.cc +++ b/lib/qcd/action/fermion/WilsonKernels.cc @@ -68,16 +68,21 @@ void WilsonKernels::DiracOptDhopSiteDag(StencilImpl &st,LebesgueOrder &lo, std::vector > &buf, int sF,int sU,int Ls, int Ns, const FermionField &in, FermionField &out) { - // No asm implementation yet. - // if ( AsmOpt ) WilsonKernels::DiracOptAsmDhopSiteDag(st,lo,U,buf,sF,sU,in,out); - // else - for(int site=0;site::DiracOptHandDhopSiteDag(st,lo,U,buf,sF,sU,in,out); - else WilsonKernels::DiracOptGenericDhopSiteDag(st,lo,U,buf,sF,sU,in,out); - sF++; +#ifdef AVX512 + if ( AsmOpt ) { + WilsonKernels::DiracOptAsmDhopSiteDag(st,lo,U,buf,sF,sU,Ls,Ns,in,out); + } else { +#else + { +#endif + for(int site=0;site::DiracOptHandDhopSiteDag(st,lo,U,buf,sF,sU,in,out); + else WilsonKernels::DiracOptGenericDhopSiteDag(st,lo,U,buf,sF,sU,in,out); + sF++; + } + sU++; } - sU++; } } diff --git a/lib/qcd/action/fermion/WilsonKernels.h b/lib/qcd/action/fermion/WilsonKernels.h index 231fa293..b679d3f9 100644 --- a/lib/qcd/action/fermion/WilsonKernels.h +++ b/lib/qcd/action/fermion/WilsonKernels.h @@ -79,6 +79,10 @@ namespace Grid { std::vector > &buf, int sF,int sU,int Ls, int Ns, const FermionField &in, FermionField &out); + void DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U, + std::vector > &buf, + int sF,int sU,int Ls, int Ns, const FermionField &in, FermionField &out); + void DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U, std::vector > &buf, diff --git a/lib/qcd/action/fermion/WilsonKernelsAsm.cc b/lib/qcd/action/fermion/WilsonKernelsAsm.cc index 83871b7b..b443ccf9 100644 --- a/lib/qcd/action/fermion/WilsonKernelsAsm.cc +++ b/lib/qcd/action/fermion/WilsonKernelsAsm.cc @@ -73,12 +73,21 @@ static int signInit = setupSigns(); #define MAYBEPERM(A,perm) if (perm) { A ; } #define MULT_2SPIN(ptr,pf) MULT_ADDSUB_2SPIN(ptr,pf) #define FX(A) WILSONASM_ ##A + +#undef KERNEL_DAG template<> void WilsonKernels::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, std::vector > &buf, int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) #include +#define KERNEL_DAG +template<> +void WilsonKernels::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, + std::vector > &buf, + int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) +#include + #undef VMOVIDUP #undef VMOVRDUP #undef MAYBEPERM @@ -89,14 +98,25 @@ void WilsonKernels::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrd #define VMOVIDUP(A,B,C) VBCASTIDUPf(A,B,C) #define VMOVRDUP(A,B,C) VBCASTRDUPf(A,B,C) #define MULT_2SPIN(ptr,pf) MULT_ADDSUB_2SPIN_LS(ptr,pf) + +#undef KERNEL_DAG template<> void WilsonKernels::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, std::vector > &buf, int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) #include +#define KERNEL_DAG +template<> +void WilsonKernels::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, + std::vector > &buf, + int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) +#include + #endif + + template void WilsonKernels::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, std::vector > &buf, int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out); diff --git a/lib/qcd/action/fermion/WilsonKernelsAsmBody.h b/lib/qcd/action/fermion/WilsonKernelsAsmBody.h index 4f3ef861..d236a774 100644 --- a/lib/qcd/action/fermion/WilsonKernelsAsmBody.h +++ b/lib/qcd/action/fermion/WilsonKernelsAsmBody.h @@ -30,7 +30,11 @@ basep = st.GetPFInfo(nent,plocal); nent++; if ( local ) { LOAD64(%r10,isigns); +#ifdef KERNEL_DAG + XP_PROJMEM(base); +#else XM_PROJMEM(base); +#endif MAYBEPERM(PERMUTE_DIR3,perm); } else { LOAD_CHI(base); @@ -41,15 +45,22 @@ MULT_2SPIN_DIR_PFXP(Xp,basep); } LOAD64(%r10,isigns); +#ifdef KERNEL_DAG + XP_RECON; +#else XM_RECON; - +#endif //////////////////////////////// // Yp //////////////////////////////// basep = st.GetPFInfo(nent,plocal); nent++; if ( local ) { LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit +#ifdef KERNEL_DAG + YP_PROJMEM(base); +#else YM_PROJMEM(base); +#endif MAYBEPERM(PERMUTE_DIR2,perm); } else { LOAD_CHI(base); @@ -60,7 +71,11 @@ MULT_2SPIN_DIR_PFYP(Yp,basep); } LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit +#ifdef KERNEL_DAG + YP_RECON_ACCUM; +#else YM_RECON_ACCUM; +#endif //////////////////////////////// // Zp @@ -68,7 +83,11 @@ basep = st.GetPFInfo(nent,plocal); nent++; if ( local ) { LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit +#ifdef KERNEL_DAG + ZP_PROJMEM(base); +#else ZM_PROJMEM(base); +#endif MAYBEPERM(PERMUTE_DIR1,perm); } else { LOAD_CHI(base); @@ -79,7 +98,11 @@ MULT_2SPIN_DIR_PFZP(Zp,basep); } LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit +#ifdef KERNEL_DAG + ZP_RECON_ACCUM; +#else ZM_RECON_ACCUM; +#endif //////////////////////////////// // Tp @@ -87,7 +110,11 @@ basep = st.GetPFInfo(nent,plocal); nent++; if ( local ) { LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit +#ifdef KERNEL_DAG + TP_PROJMEM(base); +#else TM_PROJMEM(base); +#endif MAYBEPERM(PERMUTE_DIR0,perm); } else { LOAD_CHI(base); @@ -98,7 +125,11 @@ MULT_2SPIN_DIR_PFTP(Tp,basep); } LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit +#ifdef KERNEL_DAG + TP_RECON_ACCUM; +#else TM_RECON_ACCUM; +#endif //////////////////////////////// // Xm @@ -107,7 +138,11 @@ // basep= st.GetPFInfo(nent,plocal); nent++; if ( local ) { LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit +#ifdef KERNEL_DAG + XM_PROJMEM(base); +#else XP_PROJMEM(base); +#endif MAYBEPERM(PERMUTE_DIR3,perm); } else { LOAD_CHI(base); @@ -118,7 +153,11 @@ MULT_2SPIN_DIR_PFXM(Xm,basep); } LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit +#ifdef KERNEL_DAG + XM_RECON_ACCUM; +#else XP_RECON_ACCUM; +#endif //////////////////////////////// // Ym @@ -126,7 +165,11 @@ basep= st.GetPFInfo(nent,plocal); nent++; if ( local ) { LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit +#ifdef KERNEL_DAG + YM_PROJMEM(base); +#else YP_PROJMEM(base); +#endif MAYBEPERM(PERMUTE_DIR2,perm); } else { LOAD_CHI(base); @@ -137,7 +180,11 @@ MULT_2SPIN_DIR_PFYM(Ym,basep); } LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit +#ifdef KERNEL_DAG + YM_RECON_ACCUM; +#else YP_RECON_ACCUM; +#endif //////////////////////////////// // Zm @@ -145,7 +192,11 @@ basep= st.GetPFInfo(nent,plocal); nent++; if ( local ) { LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit +#ifdef KERNEL_DAG + ZM_PROJMEM(base); +#else ZP_PROJMEM(base); +#endif MAYBEPERM(PERMUTE_DIR1,perm); } else { LOAD_CHI(base); @@ -156,7 +207,11 @@ MULT_2SPIN_DIR_PFZM(Zm,basep); } LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit +#ifdef KERNEL_DAG + ZM_RECON_ACCUM; +#else ZP_RECON_ACCUM; +#endif //////////////////////////////// // Tm @@ -164,7 +219,11 @@ basep= st.GetPFInfo(nent,plocal); nent++; if ( local ) { LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit +#ifdef KERNEL_DAG + TM_PROJMEM(base); +#else TP_PROJMEM(base); +#endif MAYBEPERM(PERMUTE_DIR0,perm); } else { LOAD_CHI(base); @@ -175,7 +234,11 @@ MULT_2SPIN_DIR_PFTM(Tm,basep); } LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit +#ifdef KERNEL_DAG + TM_RECON_ACCUM; +#else TP_RECON_ACCUM; +#endif basep= st.GetPFInfo(nent,plocal); nent++; SAVE_RESULT(base,basep); diff --git a/lib/qcd/action/fermion/ZMobiusFermion.h b/lib/qcd/action/fermion/ZMobiusFermion.h new file mode 100644 index 00000000..d0e00657 --- /dev/null +++ b/lib/qcd/action/fermion/ZMobiusFermion.h @@ -0,0 +1,79 @@ + /************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/qcd/action/fermion/MobiusFermion.h + + Copyright (C) 2015 + +Author: Peter Boyle +Author: Peter Boyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +#ifndef GRID_QCD_ZMOBIUS_FERMION_H +#define GRID_QCD_ZMOBIUS_FERMION_H + +#include + +namespace Grid { + + namespace QCD { + + template + class ZMobiusFermion : public CayleyFermion5D + { + public: + INHERIT_IMPL_TYPES(Impl); + public: + + virtual void Instantiatable(void) {}; + // Constructors + ZMobiusFermion(GaugeField &_Umu, + GridCartesian &FiveDimGrid, + GridRedBlackCartesian &FiveDimRedBlackGrid, + GridCartesian &FourDimGrid, + GridRedBlackCartesian &FourDimRedBlackGrid, + RealD _mass,RealD _M5, + std::vector &gamma, RealD b,RealD c,const ImplParams &p= ImplParams()) : + + CayleyFermion5D(_Umu, + FiveDimGrid, + FiveDimRedBlackGrid, + FourDimGrid, + FourDimRedBlackGrid,_mass,_M5,p) + + { + RealD eps = 1.0; + + std::cout< zgamma(this->Ls); + for(int s=0;sLs;s++){ + zgamma[s] = gamma[s]; + } + + // Call base setter + this->SetCoefficientsInternal(1.0,zgamma,b,c); + } + + }; + + } +} + +#endif diff --git a/tests/debug/Test_cayley_even_odd_vec.cc b/tests/debug/Test_cayley_even_odd_vec.cc index 68315b88..f8600782 100644 --- a/tests/debug/Test_cayley_even_odd_vec.cc +++ b/tests/debug/Test_cayley_even_odd_vec.cc @@ -44,6 +44,7 @@ struct scal { }; typedef DomainWallFermion DomainWallVecFermionR; +typedef ZMobiusFermion ZMobiusVecFermionR; typedef MobiusFermion MobiusVecFermionR; typedef MobiusZolotarevFermion MobiusZolotarevVecFermionR; typedef ScaledShamirFermion ScaledShamirVecFermionR; @@ -117,6 +118,17 @@ int main (int argc, char ** argv) TestWhat(Dmob,FGrid,FrbGrid,UGrid,mass,M5,&RNG4,&RNG5); TestWhat(sDmob,sFGrid,sFrbGrid,sUGrid,mass,M5,&sRNG4,&sRNG5); + + std::cout< gamma(Ls,std::complex(1.0,0.0)); + ZMobiusFermionR zDmob(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,gamma,b,c); + ZMobiusVecFermionR szDmob(Umu,*sFGrid,*sFrbGrid,*sUGrid,*sUrbGrid,mass,M5,gamma,b,c); + TestMoo(zDmob,szDmob); + TestWhat(zDmob,FGrid,FrbGrid,UGrid,mass,M5,&RNG4,&RNG5); + TestWhat(szDmob,sFGrid,sFrbGrid,sUGrid,mass,M5,&sRNG4,&sRNG5); + std::cout<