From 72acb0e48f1108a8f8f73289164ffe62bbe75e6a Mon Sep 17 00:00:00 2001 From: paboyle Date: Sun, 14 Jan 2018 23:41:59 +0000 Subject: [PATCH] Namespace, indent --- lib/qcd/action/fermion/CayleyFermion5Dvec.cc | 820 +++++++++---------- 1 file changed, 409 insertions(+), 411 deletions(-) diff --git a/lib/qcd/action/fermion/CayleyFermion5Dvec.cc b/lib/qcd/action/fermion/CayleyFermion5Dvec.cc index 653e6ab3..21800523 100644 --- a/lib/qcd/action/fermion/CayleyFermion5Dvec.cc +++ b/lib/qcd/action/fermion/CayleyFermion5Dvec.cc @@ -1,4 +1,4 @@ - /************************************************************************************* +/************************************************************************************* Grid physics library, www.github.com/paboyle/Grid @@ -26,19 +26,19 @@ Author: paboyle 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ +*************************************************************************************/ +/* END LEGAL */ #include #include -namespace Grid { -namespace QCD { - /* - * Dense matrix versions of routines - */ +NAMESPACE_BEGIN(Grid); + +/* + * Dense matrix versions of routines + */ template void CayleyFermion5D::MooeeInvDag (const FermionField &psi, FermionField &chi) { @@ -79,13 +79,13 @@ void CayleyFermion5D::M5D(const FermionField &psi, scalar_type * d_p = (scalar_type *)&d[0]; for(int o=0;o::M5D(const FermionField &psi, parallel_for(int ss=0;ssoSites();ss+=LLs){ // adds LLs #if 0 - alignas(64) SiteHalfSpinor hp; - alignas(64) SiteHalfSpinor hm; - alignas(64) SiteSpinor fp; - alignas(64) SiteSpinor fm; + alignas(64) SiteHalfSpinor hp; + alignas(64) SiteHalfSpinor hm; + alignas(64) SiteSpinor fp; + alignas(64) SiteSpinor fm; - for(int v=0;v=v ) rotate(hm,hm,nsimd-1); + if ( vp<=v ) rotate(hp,hp,1); + if ( vm>=v ) rotate(hm,hm,nsimd-1); - hp=0.5*hp; - hm=0.5*hm; + hp=0.5*hp; + hm=0.5*hm; - spRecon5m(fp,hp); - spRecon5p(fm,hm); + spRecon5m(fp,hp); + spRecon5p(fm,hm); - chi[ss+v] = d[v]*phi[ss+v]; - chi[ss+v] = chi[ss+v] +u[v]*fp; - chi[ss+v] = chi[ss+v] +l[v]*fm; + chi[ss+v] = d[v]*phi[ss+v]; + chi[ss+v] = chi[ss+v] +u[v]*fp; + chi[ss+v] = chi[ss+v] +l[v]*fm; - } + } #else - for(int v=0;v(hp_00.v); - hp_01.v = Optimization::Rotate::tRotate<2>(hp_01.v); - hp_02.v = Optimization::Rotate::tRotate<2>(hp_02.v); - hp_10.v = Optimization::Rotate::tRotate<2>(hp_10.v); - hp_11.v = Optimization::Rotate::tRotate<2>(hp_11.v); - hp_12.v = Optimization::Rotate::tRotate<2>(hp_12.v); - } - if ( vm>=v ) { - hm_00.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_00.v); - hm_01.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_01.v); - hm_02.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_02.v); - hm_10.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_10.v); - hm_11.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_11.v); - hm_12.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_12.v); - } - - // Can force these to real arithmetic and save 2x. - Simd p_00 = switcheroo::mult(d[v]()()(), phi[ss+v]()(0)(0)) + switcheroo::mult(l[v]()()(),hm_00); - Simd p_01 = switcheroo::mult(d[v]()()(), phi[ss+v]()(0)(1)) + switcheroo::mult(l[v]()()(),hm_01); - Simd p_02 = switcheroo::mult(d[v]()()(), phi[ss+v]()(0)(2)) + switcheroo::mult(l[v]()()(),hm_02); - Simd p_10 = switcheroo::mult(d[v]()()(), phi[ss+v]()(1)(0)) + switcheroo::mult(l[v]()()(),hm_10); - Simd p_11 = switcheroo::mult(d[v]()()(), phi[ss+v]()(1)(1)) + switcheroo::mult(l[v]()()(),hm_11); - Simd p_12 = switcheroo::mult(d[v]()()(), phi[ss+v]()(1)(2)) + switcheroo::mult(l[v]()()(),hm_12); - Simd p_20 = switcheroo::mult(d[v]()()(), phi[ss+v]()(2)(0)) + switcheroo::mult(u[v]()()(),hp_00); - Simd p_21 = switcheroo::mult(d[v]()()(), phi[ss+v]()(2)(1)) + switcheroo::mult(u[v]()()(),hp_01); - Simd p_22 = switcheroo::mult(d[v]()()(), phi[ss+v]()(2)(2)) + switcheroo::mult(u[v]()()(),hp_02); - Simd p_30 = switcheroo::mult(d[v]()()(), phi[ss+v]()(3)(0)) + switcheroo::mult(u[v]()()(),hp_10); - Simd p_31 = switcheroo::mult(d[v]()()(), phi[ss+v]()(3)(1)) + switcheroo::mult(u[v]()()(),hp_11); - Simd p_32 = switcheroo::mult(d[v]()()(), phi[ss+v]()(3)(2)) + switcheroo::mult(u[v]()()(),hp_12); - - vstream(chi[ss+v]()(0)(0),p_00); - vstream(chi[ss+v]()(0)(1),p_01); - vstream(chi[ss+v]()(0)(2),p_02); - vstream(chi[ss+v]()(1)(0),p_10); - vstream(chi[ss+v]()(1)(1),p_11); - vstream(chi[ss+v]()(1)(2),p_12); - vstream(chi[ss+v]()(2)(0),p_20); - vstream(chi[ss+v]()(2)(1),p_21); - vstream(chi[ss+v]()(2)(2),p_22); - vstream(chi[ss+v]()(3)(0),p_30); - vstream(chi[ss+v]()(3)(1),p_31); - vstream(chi[ss+v]()(3)(2),p_32); + Simd hm_00 = psi[ss+vm]()(0)(0); + Simd hm_01 = psi[ss+vm]()(0)(1); + Simd hm_02 = psi[ss+vm]()(0)(2); + Simd hm_10 = psi[ss+vm]()(1)(0); + Simd hm_11 = psi[ss+vm]()(1)(1); + Simd hm_12 = psi[ss+vm]()(1)(2); + if ( vp<=v ) { + hp_00.v = Optimization::Rotate::tRotate<2>(hp_00.v); + hp_01.v = Optimization::Rotate::tRotate<2>(hp_01.v); + hp_02.v = Optimization::Rotate::tRotate<2>(hp_02.v); + hp_10.v = Optimization::Rotate::tRotate<2>(hp_10.v); + hp_11.v = Optimization::Rotate::tRotate<2>(hp_11.v); + hp_12.v = Optimization::Rotate::tRotate<2>(hp_12.v); } + if ( vm>=v ) { + hm_00.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_00.v); + hm_01.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_01.v); + hm_02.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_02.v); + hm_10.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_10.v); + hm_11.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_11.v); + hm_12.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_12.v); + } + + // Can force these to real arithmetic and save 2x. + Simd p_00 = switcheroo::mult(d[v]()()(), phi[ss+v]()(0)(0)) + switcheroo::mult(l[v]()()(),hm_00); + Simd p_01 = switcheroo::mult(d[v]()()(), phi[ss+v]()(0)(1)) + switcheroo::mult(l[v]()()(),hm_01); + Simd p_02 = switcheroo::mult(d[v]()()(), phi[ss+v]()(0)(2)) + switcheroo::mult(l[v]()()(),hm_02); + Simd p_10 = switcheroo::mult(d[v]()()(), phi[ss+v]()(1)(0)) + switcheroo::mult(l[v]()()(),hm_10); + Simd p_11 = switcheroo::mult(d[v]()()(), phi[ss+v]()(1)(1)) + switcheroo::mult(l[v]()()(),hm_11); + Simd p_12 = switcheroo::mult(d[v]()()(), phi[ss+v]()(1)(2)) + switcheroo::mult(l[v]()()(),hm_12); + Simd p_20 = switcheroo::mult(d[v]()()(), phi[ss+v]()(2)(0)) + switcheroo::mult(u[v]()()(),hp_00); + Simd p_21 = switcheroo::mult(d[v]()()(), phi[ss+v]()(2)(1)) + switcheroo::mult(u[v]()()(),hp_01); + Simd p_22 = switcheroo::mult(d[v]()()(), phi[ss+v]()(2)(2)) + switcheroo::mult(u[v]()()(),hp_02); + Simd p_30 = switcheroo::mult(d[v]()()(), phi[ss+v]()(3)(0)) + switcheroo::mult(u[v]()()(),hp_10); + Simd p_31 = switcheroo::mult(d[v]()()(), phi[ss+v]()(3)(1)) + switcheroo::mult(u[v]()()(),hp_11); + Simd p_32 = switcheroo::mult(d[v]()()(), phi[ss+v]()(3)(2)) + switcheroo::mult(u[v]()()(),hp_12); + + vstream(chi[ss+v]()(0)(0),p_00); + vstream(chi[ss+v]()(0)(1),p_01); + vstream(chi[ss+v]()(0)(2),p_02); + vstream(chi[ss+v]()(1)(0),p_10); + vstream(chi[ss+v]()(1)(1),p_11); + vstream(chi[ss+v]()(1)(2),p_12); + vstream(chi[ss+v]()(2)(0),p_20); + vstream(chi[ss+v]()(2)(1),p_21); + vstream(chi[ss+v]()(2)(2),p_22); + vstream(chi[ss+v]()(3)(0),p_30); + vstream(chi[ss+v]()(3)(1),p_31); + vstream(chi[ss+v]()(3)(2),p_32); + + } #endif } M5Dtime+=usecond(); @@ -223,13 +223,13 @@ void CayleyFermion5D::M5Ddag(const FermionField &psi, scalar_type * d_p = (scalar_type *)&d[0]; for(int o=0;o::M5Ddag(const FermionField &psi, } #else - for(int v=0;v(hp_00.v); - hp_01.v = Optimization::Rotate::tRotate<2>(hp_01.v); - hp_02.v = Optimization::Rotate::tRotate<2>(hp_02.v); - hp_10.v = Optimization::Rotate::tRotate<2>(hp_10.v); - hp_11.v = Optimization::Rotate::tRotate<2>(hp_11.v); - hp_12.v = Optimization::Rotate::tRotate<2>(hp_12.v); - } - if ( vm>=v ) { - hm_00.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_00.v); - hm_01.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_01.v); - hm_02.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_02.v); - hm_10.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_10.v); - hm_11.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_11.v); - hm_12.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_12.v); - } - - Simd p_00 = switcheroo::mult(d[v]()()(), phi[ss+v]()(0)(0)) + switcheroo::mult(u[v]()()(),hp_00); - Simd p_01 = switcheroo::mult(d[v]()()(), phi[ss+v]()(0)(1)) + switcheroo::mult(u[v]()()(),hp_01); - Simd p_02 = switcheroo::mult(d[v]()()(), phi[ss+v]()(0)(2)) + switcheroo::mult(u[v]()()(),hp_02); - Simd p_10 = switcheroo::mult(d[v]()()(), phi[ss+v]()(1)(0)) + switcheroo::mult(u[v]()()(),hp_10); - Simd p_11 = switcheroo::mult(d[v]()()(), phi[ss+v]()(1)(1)) + switcheroo::mult(u[v]()()(),hp_11); - Simd p_12 = switcheroo::mult(d[v]()()(), phi[ss+v]()(1)(2)) + switcheroo::mult(u[v]()()(),hp_12); - - Simd p_20 = switcheroo::mult(d[v]()()(), phi[ss+v]()(2)(0)) + switcheroo::mult(l[v]()()(),hm_00); - Simd p_21 = switcheroo::mult(d[v]()()(), phi[ss+v]()(2)(1)) + switcheroo::mult(l[v]()()(),hm_01); - Simd p_22 = switcheroo::mult(d[v]()()(), phi[ss+v]()(2)(2)) + switcheroo::mult(l[v]()()(),hm_02); - Simd p_30 = switcheroo::mult(d[v]()()(), phi[ss+v]()(3)(0)) + switcheroo::mult(l[v]()()(),hm_10); - Simd p_31 = switcheroo::mult(d[v]()()(), phi[ss+v]()(3)(1)) + switcheroo::mult(l[v]()()(),hm_11); - Simd p_32 = switcheroo::mult(d[v]()()(), phi[ss+v]()(3)(2)) + switcheroo::mult(l[v]()()(),hm_12); - - vstream(chi[ss+v]()(0)(0),p_00); - vstream(chi[ss+v]()(0)(1),p_01); - vstream(chi[ss+v]()(0)(2),p_02); - vstream(chi[ss+v]()(1)(0),p_10); - vstream(chi[ss+v]()(1)(1),p_11); - vstream(chi[ss+v]()(1)(2),p_12); - vstream(chi[ss+v]()(2)(0),p_20); - vstream(chi[ss+v]()(2)(1),p_21); - vstream(chi[ss+v]()(2)(2),p_22); - vstream(chi[ss+v]()(3)(0),p_30); - vstream(chi[ss+v]()(3)(1),p_31); - vstream(chi[ss+v]()(3)(2),p_32); + if ( vp<=v ) { + hp_00.v = Optimization::Rotate::tRotate<2>(hp_00.v); + hp_01.v = Optimization::Rotate::tRotate<2>(hp_01.v); + hp_02.v = Optimization::Rotate::tRotate<2>(hp_02.v); + hp_10.v = Optimization::Rotate::tRotate<2>(hp_10.v); + hp_11.v = Optimization::Rotate::tRotate<2>(hp_11.v); + hp_12.v = Optimization::Rotate::tRotate<2>(hp_12.v); } + if ( vm>=v ) { + hm_00.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_00.v); + hm_01.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_01.v); + hm_02.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_02.v); + hm_10.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_10.v); + hm_11.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_11.v); + hm_12.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_12.v); + } + + Simd p_00 = switcheroo::mult(d[v]()()(), phi[ss+v]()(0)(0)) + switcheroo::mult(u[v]()()(),hp_00); + Simd p_01 = switcheroo::mult(d[v]()()(), phi[ss+v]()(0)(1)) + switcheroo::mult(u[v]()()(),hp_01); + Simd p_02 = switcheroo::mult(d[v]()()(), phi[ss+v]()(0)(2)) + switcheroo::mult(u[v]()()(),hp_02); + Simd p_10 = switcheroo::mult(d[v]()()(), phi[ss+v]()(1)(0)) + switcheroo::mult(u[v]()()(),hp_10); + Simd p_11 = switcheroo::mult(d[v]()()(), phi[ss+v]()(1)(1)) + switcheroo::mult(u[v]()()(),hp_11); + Simd p_12 = switcheroo::mult(d[v]()()(), phi[ss+v]()(1)(2)) + switcheroo::mult(u[v]()()(),hp_12); + + Simd p_20 = switcheroo::mult(d[v]()()(), phi[ss+v]()(2)(0)) + switcheroo::mult(l[v]()()(),hm_00); + Simd p_21 = switcheroo::mult(d[v]()()(), phi[ss+v]()(2)(1)) + switcheroo::mult(l[v]()()(),hm_01); + Simd p_22 = switcheroo::mult(d[v]()()(), phi[ss+v]()(2)(2)) + switcheroo::mult(l[v]()()(),hm_02); + Simd p_30 = switcheroo::mult(d[v]()()(), phi[ss+v]()(3)(0)) + switcheroo::mult(l[v]()()(),hm_10); + Simd p_31 = switcheroo::mult(d[v]()()(), phi[ss+v]()(3)(1)) + switcheroo::mult(l[v]()()(),hm_11); + Simd p_32 = switcheroo::mult(d[v]()()(), phi[ss+v]()(3)(2)) + switcheroo::mult(l[v]()()(),hm_12); + + vstream(chi[ss+v]()(0)(0),p_00); + vstream(chi[ss+v]()(0)(1),p_01); + vstream(chi[ss+v]()(0)(2),p_02); + vstream(chi[ss+v]()(1)(0),p_10); + vstream(chi[ss+v]()(1)(1),p_11); + vstream(chi[ss+v]()(1)(2),p_12); + vstream(chi[ss+v]()(2)(0),p_20); + vstream(chi[ss+v]()(2)(1),p_21); + vstream(chi[ss+v]()(2)(2),p_22); + vstream(chi[ss+v]()(3)(0),p_30); + vstream(chi[ss+v]()(3)(1),p_31); + vstream(chi[ss+v]()(3)(2),p_32); + } #endif } M5Dtime+=usecond(); @@ -346,54 +346,54 @@ void CayleyFermion5D::MooeeInternalAsm(const FermionField &psi, FermionFie { #ifndef AVX512 { - SiteHalfSpinor BcastP; - SiteHalfSpinor BcastM; - SiteHalfSpinor SiteChiP; - SiteHalfSpinor SiteChiM; + SiteHalfSpinor BcastP; + SiteHalfSpinor BcastM; + SiteHalfSpinor SiteChiP; + SiteHalfSpinor SiteChiM; - // Ls*Ls * 2 * 12 * vol flops - for(int s1=0;s1::MooeeInternalAsm(const FermionField &psi, FermionFie #define BCAST10 %%zmm23 #define BCAST11 %%zmm24 - int incr=LLs*LLs*sizeof(iSinglet); - for(int s1=0;s1); + for(int s1=0;s1::MooeeInternalAsm(const FermionField &psi, FermionFie #endif }; - // Z-mobius version +// Z-mobius version template void CayleyFermion5D::MooeeInternalZAsm(const FermionField &psi, FermionField &chi, - int LLs, int site, Vector > &Matp, Vector > &Matm) + int LLs, int site, Vector > &Matp, Vector > &Matm) { #ifndef AVX512 { - SiteHalfSpinor BcastP; - SiteHalfSpinor BcastM; - SiteHalfSpinor SiteChiP; - SiteHalfSpinor SiteChiM; + SiteHalfSpinor BcastP; + SiteHalfSpinor BcastM; + SiteHalfSpinor SiteChiP; + SiteHalfSpinor SiteChiM; - // Ls*Ls * 2 * 12 * vol flops - for(int s1=0;s1::MooeeInternalZAsm(const FermionField &psi, FermionFi #define Mm %zmm26 #define Mms %zmm27 #define N 8 - int incr=LLs*LLs*sizeof(iSinglet); - for(int s1=0;s1); + for(int s1=0;s1::MooeeInternal(const Fermio template void CayleyFermion5D::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv); template void CayleyFermion5D::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv); - - -}} +NAMESPACE_END(Grid);