From 6adf35da54fb6df2d71980447c8d6271ab34565b Mon Sep 17 00:00:00 2001 From: paboyle Date: Thu, 1 Dec 2016 11:39:04 +0000 Subject: [PATCH] Faster Mobius --- lib/qcd/action/fermion/CayleyFermion5Dvec.cc | 137 ++++++++++++++++--- lib/simd/Grid_vector_types.h | 2 +- 2 files changed, 117 insertions(+), 22 deletions(-) diff --git a/lib/qcd/action/fermion/CayleyFermion5Dvec.cc b/lib/qcd/action/fermion/CayleyFermion5Dvec.cc index 3f3f215c..cfd96aaf 100644 --- a/lib/qcd/action/fermion/CayleyFermion5Dvec.cc +++ b/lib/qcd/action/fermion/CayleyFermion5Dvec.cc @@ -60,7 +60,7 @@ void CayleyFermion5D::M5D(const FermionField &psi, GridBase *grid=psi._grid; int Ls = this->Ls; int LLs = grid->_rdimensions[0]; - int nsimd= Simd::Nsimd(); + const int nsimd= Simd::Nsimd(); Vector > u(LLs); Vector > l(LLs); @@ -71,7 +71,6 @@ void CayleyFermion5D::M5D(const FermionField &psi, chi.checkerboard=psi.checkerboard; - // just directly address via type pun typedef typename Simd::scalar_type scalar_type; scalar_type * u_p = (scalar_type *)&u[0]; @@ -87,36 +86,133 @@ void CayleyFermion5D::M5D(const FermionField &psi, d_p[ss] = diag[s]; }} + M5Dcalls++; M5Dtime-=usecond(); + + assert(Nc==3); + PARALLEL_FOR_LOOP for(int ss=0;ssoSites();ss+=LLs){ // adds LLs +#if 0 + alignas(64) SiteHalfSpinor hp; + alignas(64) SiteHalfSpinor hm; + alignas(64) SiteSpinor fp; + alignas(64) SiteSpinor fm; - alignas(64) SiteHalfSpinor hp; - alignas(64) SiteHalfSpinor hm; - alignas(64) SiteSpinor fp; - alignas(64) SiteSpinor fm; + for(int v=0;v=v ) rotate(hm,hm,nsimd-1); + + hp=0.5*hp; + hm=0.5*hm; + + spRecon5m(fp,hp); + spRecon5p(fm,hm); + + chi[ss+v] = d[v]*phi[ss+v]; + chi[ss+v] = chi[ss+v] +u[v]*fp; + chi[ss+v] = chi[ss+v] +l[v]*fm; + + } +#else + for(int v=0;v=v ) rotate(hm,hm,nsimd-1); + int vp= (v==LLs-1) ? 0 : v+1; + int vm= (v==0 ) ? LLs-1 : v-1; + + Simd hp_00 = psi[ss+vp]()(2)(0); + Simd hp_01 = psi[ss+vp]()(2)(1); + Simd hp_02 = psi[ss+vp]()(2)(2); + Simd hp_10 = psi[ss+vp]()(3)(0); + Simd hp_11 = psi[ss+vp]()(3)(1); + Simd hp_12 = psi[ss+vp]()(3)(2); + + Simd hm_00 = psi[ss+vm]()(0)(0); + Simd hm_01 = psi[ss+vm]()(0)(1); + Simd hm_02 = psi[ss+vm]()(0)(2); + Simd hm_10 = psi[ss+vm]()(1)(0); + Simd hm_11 = psi[ss+vm]()(1)(1); + Simd hm_12 = psi[ss+vm]()(1)(2); - hp=hp*0.5; - hm=hm*0.5; - spRecon5m(fp,hp); - spRecon5p(fm,hm); + // if ( ss==0) std::cout << " hp_00 " <(hp_00.v); + hp_01.v = Optimization::Rotate::tRotate<2>(hp_01.v); + hp_02.v = Optimization::Rotate::tRotate<2>(hp_02.v); + hp_10.v = Optimization::Rotate::tRotate<2>(hp_10.v); + hp_11.v = Optimization::Rotate::tRotate<2>(hp_11.v); + hp_12.v = Optimization::Rotate::tRotate<2>(hp_12.v); + } + if ( vm>=v ) { + hm_00.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_00.v); + hm_01.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_01.v); + hm_02.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_02.v); + hm_10.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_10.v); + hm_11.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_11.v); + hm_12.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_12.v); + } - } + /* + if ( ss==0) std::cout << " dphi_00 " < void CayleyFermion5D::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv) { diff --git a/lib/simd/Grid_vector_types.h b/lib/simd/Grid_vector_types.h index 080dd5c0..42f28b34 100644 --- a/lib/simd/Grid_vector_types.h +++ b/lib/simd/Grid_vector_types.h @@ -130,7 +130,7 @@ class Grid_simd { Vector_type v; - static inline int Nsimd(void) { + static inline constexpr int Nsimd(void) { return sizeof(Vector_type) / sizeof(Scalar_type); }