diff --git a/lib/algorithms/approx/Remez.cc b/lib/algorithms/approx/Remez.cc index 046fbd47..38d60088 100644 --- a/lib/algorithms/approx/Remez.cc +++ b/lib/algorithms/approx/Remez.cc @@ -11,8 +11,7 @@ Note this class requires the gnu multiprecision (GNU MP) library. */ -// fixing ::max_align_t error for some versions of g++/clang++ -#include + #include #include #include diff --git a/lib/algorithms/iterative/ConjugateGradient.h b/lib/algorithms/iterative/ConjugateGradient.h index 8624eac2..e1062359 100644 --- a/lib/algorithms/iterative/ConjugateGradient.h +++ b/lib/algorithms/iterative/ConjugateGradient.h @@ -143,7 +143,7 @@ public: } } std::cout<::DiracOptDhopDir(StencilImpl &st,DoubledGaugeField &U, vstream(out._odata[sF],result*(-0.5)); } -//#if ( ! defined(AVX512) ) && ( ! defined(IMCI) ) -#if 1 +#if ( ! defined(AVX512) ) && ( ! defined(IMCI) ) template void WilsonKernels::DiracOptAsmDhopSite(StencilImpl &st,DoubledGaugeField &U, std::vector > &buf, @@ -608,6 +607,5 @@ void WilsonKernels::DiracOptAsmDhopSite(StencilImpl &st,DoubledGaugeField #endif FermOpTemplateInstantiate(WilsonKernels); - GparityFermOpTemplateInstantiate(WilsonKernels); }} diff --git a/lib/qcd/action/fermion/WilsonKernels.h b/lib/qcd/action/fermion/WilsonKernels.h index d48d5f88..bec4aaa5 100644 --- a/lib/qcd/action/fermion/WilsonKernels.h +++ b/lib/qcd/action/fermion/WilsonKernels.h @@ -60,13 +60,8 @@ namespace Grid { void DiracOptAsmDhopSite(StencilImpl &st,DoubledGaugeField &U, std::vector > &buf, -#if 0 -//<<<<<<< HEAD - int sF,int sU,const FermionField &in, FermionField &out,bool local= true, bool nonlocal=true); -// int sF,int sU,const FermionField &in, FermionField &out,uint64_t *); -// doesn't seem to work with Gparity at the moment -#undef HANDOPT -#if 1 + int sF,int sU,const FermionField &in, FermionField &out,bool local= true, bool nonlocal=true); + void DiracOptHandDhopSite(StencilImpl &st,DoubledGaugeField &U, std::vector > &buf, int sF,int sU,const FermionField &in, FermionField &out,bool local= true, bool nonlocal=true); @@ -74,19 +69,6 @@ namespace Grid { void DiracOptHandDhopSiteDag(StencilImpl &st,DoubledGaugeField &U, std::vector > &buf, int sF,int sU,const FermionField &in, FermionField &out,bool local= true, bool nonlocal=true); -#endif -#else - int sF,int sU,const FermionField &in, FermionField &out,bool local= true, bool nonlocal=true); - - int DiracOptHandDhopSite(StencilImpl &st,DoubledGaugeField &U, - std::vector > &buf, - int sF,int sU,const FermionField &in, FermionField &out,bool local= true, bool nonlocal=true); - - int DiracOptHandDhopSiteDag(StencilImpl &st,DoubledGaugeField &U, - std::vector > &buf, - int sF,int sU,const FermionField &in, FermionField &out,bool local= true, bool nonlocal=true); -//>>>>>>> fc6ad657514c7966291c19f22af89de5d5a96f93 -#endif WilsonKernels(const ImplParams &p= ImplParams()); diff --git a/lib/qcd/action/fermion/WilsonKernelsHand.cc b/lib/qcd/action/fermion/WilsonKernelsHand.cc index a2e0622c..613d332f 100644 --- a/lib/qcd/action/fermion/WilsonKernelsHand.cc +++ b/lib/qcd/action/fermion/WilsonKernelsHand.cc @@ -310,7 +310,7 @@ namespace QCD { template -int WilsonKernels::DiracOptHandDhopSiteDag(StencilImpl &st,DoubledGaugeField &U, +void WilsonKernels::DiracOptHandDhopSiteDag(StencilImpl &st,DoubledGaugeField &U, std::vector > &buf, int ss,int sU,const FermionField &in, FermionField &out, bool Local, bool Nonlocal) { @@ -318,21 +318,21 @@ int WilsonKernels::DiracOptHandDhopSiteDag(StencilImpl &st,DoubledGaugeFi typedef typename Simd::scalar_type S; typedef typename Simd::vector_type V; - REGISTER Simd result_00 ; zeroit(result_00); // 12 regs on knc - REGISTER Simd result_01 ; zeroit(result_01); // 12 regs on knc - REGISTER Simd result_02 ; zeroit(result_02); // 12 regs on knc + REGISTER Simd result_00; // 12 regs on knc + REGISTER Simd result_01; + REGISTER Simd result_02; - REGISTER Simd result_10 ; zeroit(result_10); // 12 regs on knc - REGISTER Simd result_11 ; zeroit(result_11); // 12 regs on knc - REGISTER Simd result_12 ; zeroit(result_12); // 12 regs on knc + REGISTER Simd result_10; + REGISTER Simd result_11; + REGISTER Simd result_12; - REGISTER Simd result_20 ; zeroit(result_20); // 12 regs on knc - REGISTER Simd result_21 ; zeroit(result_21); // 12 regs on knc - REGISTER Simd result_22 ; zeroit(result_22); // 12 regs on knc + REGISTER Simd result_20; + REGISTER Simd result_21; + REGISTER Simd result_22; - REGISTER Simd result_30 ; zeroit(result_30); // 12 regs on knc - REGISTER Simd result_31 ; zeroit(result_31); // 12 regs on knc - REGISTER Simd result_32 ; zeroit(result_32); // 12 regs on knc + REGISTER Simd result_30; + REGISTER Simd result_31; + REGISTER Simd result_32; // 20 left REGISTER Simd Chi_00; // two spinor; 6 regs REGISTER Simd Chi_01; @@ -372,178 +372,172 @@ int WilsonKernels::DiracOptHandDhopSiteDag(StencilImpl &st,DoubledGaugeFi StencilEntry *SE; - int offset, ptype; - int num = 0; - + int offset,local,perm, ptype; + // Xp SE=st.GetEntry(ptype,Xp,ss); offset = SE->_offset; + local = SE->_is_local; + perm = SE->_permute; - if (Local && SE->_is_local ) { + if ( local ) { LOAD_CHIMU; XP_PROJ; - if ( SE->_permute ) { + if ( perm) { PERMUTE_DIR(3); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc... } - - } - - if ( Nonlocal && (!SE->_is_local) ) { + } else { LOAD_CHI; } - if ( (Local && SE->_is_local) || ( Nonlocal && (!SE->_is_local)) ) { + { MULT_2SPIN(Xp); - XP_RECON_ACCUM; - num++; } + XP_RECON; // Yp SE=st.GetEntry(ptype,Yp,ss); offset = SE->_offset; + local = SE->_is_local; + perm = SE->_permute; - if (Local && SE->_is_local ) { + if ( local ) { LOAD_CHIMU; YP_PROJ; - if ( SE->_permute ) { + if ( perm) { PERMUTE_DIR(2); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc... } - } - - if ( Nonlocal && (!SE->_is_local) ) { + } else { LOAD_CHI; } - if ( (Local && SE->_is_local) || ( Nonlocal && (!SE->_is_local)) ) { + { MULT_2SPIN(Yp); - YP_RECON_ACCUM; - num++; } + YP_RECON_ACCUM; // Zp SE=st.GetEntry(ptype,Zp,ss); offset = SE->_offset; + local = SE->_is_local; + perm = SE->_permute; - if (Local && SE->_is_local ) { + if ( local ) { LOAD_CHIMU; ZP_PROJ; - if ( SE->_permute ) { + if ( perm) { PERMUTE_DIR(1); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc... } - } - - if ( Nonlocal && (!SE->_is_local) ) { + } else { LOAD_CHI; } - - if ( (Local && SE->_is_local) || ( Nonlocal && (!SE->_is_local)) ) { + { MULT_2SPIN(Zp); - ZP_RECON_ACCUM; - num++; } + ZP_RECON_ACCUM; // Tp SE=st.GetEntry(ptype,Tp,ss); offset = SE->_offset; + local = SE->_is_local; + perm = SE->_permute; - if (Local && SE->_is_local ) { + if ( local ) { LOAD_CHIMU; TP_PROJ; - if ( SE->_permute ) { + if ( perm) { PERMUTE_DIR(0); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc... } - } - if ( Nonlocal && (!SE->_is_local) ) { + } else { LOAD_CHI; } - if ( (Local && SE->_is_local) || ( Nonlocal && (!SE->_is_local)) ) { + { MULT_2SPIN(Tp); - TP_RECON_ACCUM; - num++; } + TP_RECON_ACCUM; // Xm SE=st.GetEntry(ptype,Xm,ss); offset = SE->_offset; + local = SE->_is_local; + perm = SE->_permute; - if (Local && SE->_is_local ) { + if ( local ) { LOAD_CHIMU; XM_PROJ; - if ( SE->_permute ) { + if ( perm) { PERMUTE_DIR(3); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc... } - } - if ( Nonlocal && (!SE->_is_local) ) { + } else { LOAD_CHI; } - if ( (Local && SE->_is_local) || ( Nonlocal && (!SE->_is_local)) ) { + { MULT_2SPIN(Xm); - XM_RECON_ACCUM; - num++; } + XM_RECON_ACCUM; // Ym SE=st.GetEntry(ptype,Ym,ss); offset = SE->_offset; + local = SE->_is_local; + perm = SE->_permute; - if (Local && SE->_is_local ) { + if ( local ) { LOAD_CHIMU; YM_PROJ; - if ( SE->_permute ) { + if ( perm) { PERMUTE_DIR(2); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc... } - } - if ( Nonlocal && (!SE->_is_local) ) { + } else { LOAD_CHI; } - if ( (Local && SE->_is_local) || ( Nonlocal && (!SE->_is_local)) ) { + { MULT_2SPIN(Ym); - YM_RECON_ACCUM; - num++; } + YM_RECON_ACCUM; // Zm SE=st.GetEntry(ptype,Zm,ss); offset = SE->_offset; + local = SE->_is_local; + perm = SE->_permute; - if (Local && SE->_is_local ) { + if ( local ) { LOAD_CHIMU; ZM_PROJ; - if ( SE->_permute ) { + if ( perm) { PERMUTE_DIR(1); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc... } - } - if ( Nonlocal && (!SE->_is_local) ) { + } else { LOAD_CHI; } - if ( (Local && SE->_is_local) || ( Nonlocal && (!SE->_is_local)) ) { + { MULT_2SPIN(Zm); - ZM_RECON_ACCUM; - num++; } + ZM_RECON_ACCUM; // Tm SE=st.GetEntry(ptype,Tm,ss); offset = SE->_offset; + local = SE->_is_local; + perm = SE->_permute; - if (Local && SE->_is_local ) { + if ( local ) { LOAD_CHIMU; TM_PROJ; - if ( SE->_permute ) { + if ( perm) { PERMUTE_DIR(0); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc... } - } - if ( Nonlocal && (!SE->_is_local) ) { + } else { LOAD_CHI; } - if ( (Local && SE->_is_local) || ( Nonlocal && (!SE->_is_local)) ) { + { MULT_2SPIN(Tm); - TM_RECON_ACCUM; - num++; } + TM_RECON_ACCUM; - SiteSpinor & ref (out._odata[ss]); - if ( Local ) { + { + SiteSpinor & ref (out._odata[ss]); vstream(ref()(0)(0),result_00*(-0.5)); vstream(ref()(0)(1),result_01*(-0.5)); vstream(ref()(0)(2),result_02*(-0.5)); @@ -556,295 +550,9 @@ int WilsonKernels::DiracOptHandDhopSiteDag(StencilImpl &st,DoubledGaugeFi vstream(ref()(3)(0),result_30*(-0.5)); vstream(ref()(3)(1),result_31*(-0.5)); vstream(ref()(3)(2),result_32*(-0.5)); - return 1; - } else if ( num ) { - vstream(ref()(0)(0),ref()(0)(0)+result_00*(-0.5)); - vstream(ref()(0)(1),ref()(0)(1)+result_01*(-0.5)); - vstream(ref()(0)(2),ref()(0)(2)+result_02*(-0.5)); - vstream(ref()(1)(0),ref()(1)(0)+result_10*(-0.5)); - vstream(ref()(1)(1),ref()(1)(1)+result_11*(-0.5)); - vstream(ref()(1)(2),ref()(1)(2)+result_12*(-0.5)); - vstream(ref()(2)(0),ref()(2)(0)+result_20*(-0.5)); - vstream(ref()(2)(1),ref()(2)(1)+result_21*(-0.5)); - vstream(ref()(2)(2),ref()(2)(2)+result_22*(-0.5)); - vstream(ref()(3)(0),ref()(3)(0)+result_30*(-0.5)); - vstream(ref()(3)(1),ref()(3)(1)+result_31*(-0.5)); - vstream(ref()(3)(2),ref()(3)(2)+result_32*(-0.5)); - return 1; } - return 0; } - - - -template -int WilsonKernels::DiracOptHandDhopSite(StencilImpl &st,DoubledGaugeField &U, - std::vector > &buf, - int ss,int sU,const FermionField &in, FermionField &out, bool Local, bool Nonlocal) -{ - // std::cout << "Hand op Dhop "<_offset; - - if (Local && SE->_is_local ) { - LOAD_CHIMU; - XM_PROJ; - if ( SE->_permute ) { - PERMUTE_DIR(3); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc... - } - } - - if ( Nonlocal && (!SE->_is_local) ) { - LOAD_CHI; - } - - if ( (Local && SE->_is_local) || ( Nonlocal && (!SE->_is_local)) ) { - MULT_2SPIN(Xp); - XM_RECON_ACCUM; - num++; - } - - - // Yp - SE=st.GetEntry(ptype,Yp,ss); - offset = SE->_offset; - - if (Local && SE->_is_local ) { - LOAD_CHIMU; - YM_PROJ; - if ( SE->_permute ) { - PERMUTE_DIR(2); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc... - } - } - - if ( Nonlocal && (!SE->_is_local) ) { - LOAD_CHI; - } - if ( (Local && SE->_is_local) || ( Nonlocal && (!SE->_is_local)) ) { - MULT_2SPIN(Yp); - YM_RECON_ACCUM; - num++; - } - - - // Zp - SE=st.GetEntry(ptype,Zp,ss); - offset = SE->_offset; - - if (Local && SE->_is_local ) { - LOAD_CHIMU; - ZM_PROJ; - if ( SE->_permute ) { - PERMUTE_DIR(1); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc... - } - } - - if ( Nonlocal && (!SE->_is_local) ) { - LOAD_CHI; - } - - if ( (Local && SE->_is_local) || ( Nonlocal && (!SE->_is_local)) ) { - MULT_2SPIN(Zp); - ZM_RECON_ACCUM; - num++; - } - - // Tp - SE=st.GetEntry(ptype,Tp,ss); - offset = SE->_offset; - - if (Local && SE->_is_local ) { - LOAD_CHIMU; - TM_PROJ; - if ( SE->_permute ) { - PERMUTE_DIR(0); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc... - } - } - if ( Nonlocal && (!SE->_is_local) ) { - LOAD_CHI; - } - if ( (Local && SE->_is_local) || ( Nonlocal && (!SE->_is_local)) ) { - MULT_2SPIN(Tp); - TM_RECON_ACCUM; - num++; - } - - // Xm - SE=st.GetEntry(ptype,Xm,ss); - offset = SE->_offset; - - if (Local && SE->_is_local ) { - LOAD_CHIMU; - XP_PROJ; - if ( SE->_permute ) { - PERMUTE_DIR(3); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc... - } - } - if ( Nonlocal && (!SE->_is_local) ) { - LOAD_CHI; - } - if ( (Local && SE->_is_local) || ( Nonlocal && (!SE->_is_local)) ) { - MULT_2SPIN(Xm); - XP_RECON_ACCUM; - num++; - } - - // Ym - SE=st.GetEntry(ptype,Ym,ss); - offset = SE->_offset; - - if (Local && SE->_is_local ) { - LOAD_CHIMU; - YP_PROJ; - if ( SE->_permute ) { - PERMUTE_DIR(2); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc... - } - } - if ( Nonlocal && (!SE->_is_local) ) { - LOAD_CHI; - } - if ( (Local && SE->_is_local) || ( Nonlocal && (!SE->_is_local)) ) { - MULT_2SPIN(Ym); - YP_RECON_ACCUM; - num++; - } - - // Zm - SE=st.GetEntry(ptype,Zm,ss); - offset = SE->_offset; - - if (Local && SE->_is_local ) { - LOAD_CHIMU; - ZP_PROJ; - if ( SE->_permute ) { - PERMUTE_DIR(1); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc... - } - } - if ( Nonlocal && (!SE->_is_local) ) { - LOAD_CHI; - } - if ( (Local && SE->_is_local) || ( Nonlocal && (!SE->_is_local)) ) { - MULT_2SPIN(Zm); - ZP_RECON_ACCUM; - num++; - } - - // Tm - SE=st.GetEntry(ptype,Tm,ss); - offset = SE->_offset; - - if (Local && SE->_is_local ) { - LOAD_CHIMU; - TP_PROJ; - if ( SE->_permute ) { - PERMUTE_DIR(0); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc... - } - } - if ( Nonlocal && (!SE->_is_local) ) { - LOAD_CHI; - } - if ( (Local && SE->_is_local) || ( Nonlocal && (!SE->_is_local)) ) { - MULT_2SPIN(Tm); - TP_RECON_ACCUM; - num++; - } - - SiteSpinor & ref (out._odata[ss]); - if ( Local ) { - vstream(ref()(0)(0),result_00*(-0.5)); - vstream(ref()(0)(1),result_01*(-0.5)); - vstream(ref()(0)(2),result_02*(-0.5)); - vstream(ref()(1)(0),result_10*(-0.5)); - vstream(ref()(1)(1),result_11*(-0.5)); - vstream(ref()(1)(2),result_12*(-0.5)); - vstream(ref()(2)(0),result_20*(-0.5)); - vstream(ref()(2)(1),result_21*(-0.5)); - vstream(ref()(2)(2),result_22*(-0.5)); - vstream(ref()(3)(0),result_30*(-0.5)); - vstream(ref()(3)(1),result_31*(-0.5)); - vstream(ref()(3)(2),result_32*(-0.5)); - return 1; - } else if ( num ) { - vstream(ref()(0)(0),ref()(0)(0)+result_00*(-0.5)); - vstream(ref()(0)(1),ref()(0)(1)+result_01*(-0.5)); - vstream(ref()(0)(2),ref()(0)(2)+result_02*(-0.5)); - vstream(ref()(1)(0),ref()(1)(0)+result_10*(-0.5)); - vstream(ref()(1)(1),ref()(1)(1)+result_11*(-0.5)); - vstream(ref()(1)(2),ref()(1)(2)+result_12*(-0.5)); - vstream(ref()(2)(0),ref()(2)(0)+result_20*(-0.5)); - vstream(ref()(2)(1),ref()(2)(1)+result_21*(-0.5)); - vstream(ref()(2)(2),ref()(2)(2)+result_22*(-0.5)); - vstream(ref()(3)(0),ref()(3)(0)+result_30*(-0.5)); - vstream(ref()(3)(1),ref()(3)(1)+result_31*(-0.5)); - vstream(ref()(3)(2),ref()(3)(2)+result_32*(-0.5)); - return 1; - } - return 0; -} - - /* template void WilsonKernels::DiracOptHandDhopSite(StencilImpl &st,DoubledGaugeField &U, std::vector > &buf, @@ -1087,21 +795,16 @@ void WilsonKernels::DiracOptHandDhopSite(StencilImpl &st,DoubledGaugeFiel vstream(ref()(3)(2),result_32*(-0.5)); } } -*/ + //////////////////////////////////////////////// // Specialise Gparity to simple implementation //////////////////////////////////////////////// -//template -//int WilsonKernels::DiracOptHandDhopSiteDag(StencilImpl &st,DoubledGaugeField &U, -// std::vector > &buf, -// int ss,int sU,const FermionField &in, FermionField &out, bool Local, bool Nonlocal) -#if 0 -template +template<> void WilsonKernels::DiracOptHandDhopSite(StencilImpl &st,DoubledGaugeField &U, std::vector > &buf, int sF,int sU,const FermionField &in, FermionField &out, bool Local, bool Nonlocal) { - DiracOptDhopSite(st,U,buf,sF,sU,in,out,Local,Nonlocal); // will template override for Wilson Nc=3 + DiracOptDhopSite(st,U,buf,sF,sU,in,out); // will template override for Wilson Nc=3 } template<> @@ -1109,7 +812,7 @@ void WilsonKernels::DiracOptHandDhopSiteDag(StencilImpl &st, std::vector > &buf, int sF,int sU,const FermionField &in, FermionField &out, bool Local, bool Nonlocal) { - DiracOptDhopSiteDag(st,U,buf,sF,sU,in,out,Local,Nonlocal); // will template override for Wilson Nc=3 + DiracOptDhopSiteDag(st,U,buf,sF,sU,in,out); // will template override for Wilson Nc=3 } template<> @@ -1117,7 +820,7 @@ void WilsonKernels::DiracOptHandDhopSite(StencilImpl &st,Dou std::vector > &buf, int sF,int sU,const FermionField &in, FermionField &out, bool Local, bool Nonlocal) { - DiracOptDhopSite(st,U,buf,sF,sU,in,out,Local,Nonlocal); // will template override for Wilson Nc=3 + DiracOptDhopSite(st,U,buf,sF,sU,in,out); // will template override for Wilson Nc=3 } template<> @@ -1125,9 +828,8 @@ void WilsonKernels::DiracOptHandDhopSiteDag(StencilImpl &st, std::vector > &buf, int sF,int sU,const FermionField &in, FermionField &out, bool Local, bool Nonlocal) { - DiracOptDhopSiteDag(st,U,buf,sF,sU,in,out,Local,Nonlocal); // will template override for Wilson Nc=3 + DiracOptDhopSiteDag(st,U,buf,sF,sU,in,out); // will template override for Wilson Nc=3 } -#endif