mirror of
https://github.com/paboyle/Grid.git
synced 2025-04-04 19:25:56 +01:00
Faster Mobius
This commit is contained in:
parent
bd0430b34f
commit
6adf35da54
@ -60,7 +60,7 @@ void CayleyFermion5D<Impl>::M5D(const FermionField &psi,
|
|||||||
GridBase *grid=psi._grid;
|
GridBase *grid=psi._grid;
|
||||||
int Ls = this->Ls;
|
int Ls = this->Ls;
|
||||||
int LLs = grid->_rdimensions[0];
|
int LLs = grid->_rdimensions[0];
|
||||||
int nsimd= Simd::Nsimd();
|
const int nsimd= Simd::Nsimd();
|
||||||
|
|
||||||
Vector<iSinglet<Simd> > u(LLs);
|
Vector<iSinglet<Simd> > u(LLs);
|
||||||
Vector<iSinglet<Simd> > l(LLs);
|
Vector<iSinglet<Simd> > l(LLs);
|
||||||
@ -71,7 +71,6 @@ void CayleyFermion5D<Impl>::M5D(const FermionField &psi,
|
|||||||
|
|
||||||
chi.checkerboard=psi.checkerboard;
|
chi.checkerboard=psi.checkerboard;
|
||||||
|
|
||||||
|
|
||||||
// just directly address via type pun
|
// just directly address via type pun
|
||||||
typedef typename Simd::scalar_type scalar_type;
|
typedef typename Simd::scalar_type scalar_type;
|
||||||
scalar_type * u_p = (scalar_type *)&u[0];
|
scalar_type * u_p = (scalar_type *)&u[0];
|
||||||
@ -87,36 +86,133 @@ void CayleyFermion5D<Impl>::M5D(const FermionField &psi,
|
|||||||
d_p[ss] = diag[s];
|
d_p[ss] = diag[s];
|
||||||
}}
|
}}
|
||||||
|
|
||||||
|
|
||||||
M5Dcalls++;
|
M5Dcalls++;
|
||||||
M5Dtime-=usecond();
|
M5Dtime-=usecond();
|
||||||
|
|
||||||
|
assert(Nc==3);
|
||||||
|
|
||||||
PARALLEL_FOR_LOOP
|
PARALLEL_FOR_LOOP
|
||||||
for(int ss=0;ss<grid->oSites();ss+=LLs){ // adds LLs
|
for(int ss=0;ss<grid->oSites();ss+=LLs){ // adds LLs
|
||||||
|
#if 0
|
||||||
|
alignas(64) SiteHalfSpinor hp;
|
||||||
|
alignas(64) SiteHalfSpinor hm;
|
||||||
|
alignas(64) SiteSpinor fp;
|
||||||
|
alignas(64) SiteSpinor fm;
|
||||||
|
|
||||||
alignas(64) SiteHalfSpinor hp;
|
for(int v=0;v<LLs;v++){
|
||||||
alignas(64) SiteHalfSpinor hm;
|
|
||||||
alignas(64) SiteSpinor fp;
|
|
||||||
alignas(64) SiteSpinor fm;
|
|
||||||
|
|
||||||
for(int v=0;v<LLs;v++){
|
int vp=(v+1)%LLs;
|
||||||
|
int vm=(v+LLs-1)%LLs;
|
||||||
|
|
||||||
int vp=(v+1)%LLs;
|
spProj5m(hp,psi[ss+vp]);
|
||||||
int vm=(v+LLs-1)%LLs;
|
spProj5p(hm,psi[ss+vm]);
|
||||||
|
|
||||||
spProj5m(hp,psi[ss+vp]);
|
if ( vp<=v ) rotate(hp,hp,1);
|
||||||
spProj5p(hm,psi[ss+vm]);
|
if ( vm>=v ) rotate(hm,hm,nsimd-1);
|
||||||
|
|
||||||
|
hp=0.5*hp;
|
||||||
|
hm=0.5*hm;
|
||||||
|
|
||||||
|
spRecon5m(fp,hp);
|
||||||
|
spRecon5p(fm,hm);
|
||||||
|
|
||||||
|
chi[ss+v] = d[v]*phi[ss+v];
|
||||||
|
chi[ss+v] = chi[ss+v] +u[v]*fp;
|
||||||
|
chi[ss+v] = chi[ss+v] +l[v]*fm;
|
||||||
|
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
for(int v=0;v<LLs;v++){
|
||||||
|
|
||||||
if ( vp<=v ) rotate(hp,hp,1);
|
int vp= (v==LLs-1) ? 0 : v+1;
|
||||||
if ( vm>=v ) rotate(hm,hm,nsimd-1);
|
int vm= (v==0 ) ? LLs-1 : v-1;
|
||||||
|
|
||||||
|
Simd hp_00 = psi[ss+vp]()(2)(0);
|
||||||
|
Simd hp_01 = psi[ss+vp]()(2)(1);
|
||||||
|
Simd hp_02 = psi[ss+vp]()(2)(2);
|
||||||
|
Simd hp_10 = psi[ss+vp]()(3)(0);
|
||||||
|
Simd hp_11 = psi[ss+vp]()(3)(1);
|
||||||
|
Simd hp_12 = psi[ss+vp]()(3)(2);
|
||||||
|
|
||||||
|
Simd hm_00 = psi[ss+vm]()(0)(0);
|
||||||
|
Simd hm_01 = psi[ss+vm]()(0)(1);
|
||||||
|
Simd hm_02 = psi[ss+vm]()(0)(2);
|
||||||
|
Simd hm_10 = psi[ss+vm]()(1)(0);
|
||||||
|
Simd hm_11 = psi[ss+vm]()(1)(1);
|
||||||
|
Simd hm_12 = psi[ss+vm]()(1)(2);
|
||||||
|
|
||||||
hp=hp*0.5;
|
// if ( ss==0) std::cout << " hp_00 " <<hp_00<<std::endl;
|
||||||
hm=hm*0.5;
|
// if ( ss==0) std::cout << " hm_00 " <<hm_00<<std::endl;
|
||||||
spRecon5m(fp,hp);
|
|
||||||
spRecon5p(fm,hm);
|
|
||||||
|
|
||||||
chi[ss+v] = d[v]*phi[ss+v]+u[v]*fp;
|
if ( vp<=v ) {
|
||||||
chi[ss+v] = chi[ss+v] +l[v]*fm;
|
hp_00.v = Optimization::Rotate::tRotate<2>(hp_00.v);
|
||||||
|
hp_01.v = Optimization::Rotate::tRotate<2>(hp_01.v);
|
||||||
|
hp_02.v = Optimization::Rotate::tRotate<2>(hp_02.v);
|
||||||
|
hp_10.v = Optimization::Rotate::tRotate<2>(hp_10.v);
|
||||||
|
hp_11.v = Optimization::Rotate::tRotate<2>(hp_11.v);
|
||||||
|
hp_12.v = Optimization::Rotate::tRotate<2>(hp_12.v);
|
||||||
|
}
|
||||||
|
if ( vm>=v ) {
|
||||||
|
hm_00.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_00.v);
|
||||||
|
hm_01.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_01.v);
|
||||||
|
hm_02.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_02.v);
|
||||||
|
hm_10.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_10.v);
|
||||||
|
hm_11.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_11.v);
|
||||||
|
hm_12.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_12.v);
|
||||||
|
}
|
||||||
|
|
||||||
}
|
/*
|
||||||
|
if ( ss==0) std::cout << " dphi_00 " <<d[v]()()() * phi[ss+v]()(0)(0) <<std::endl;
|
||||||
|
if ( ss==0) std::cout << " dphi_10 " <<d[v]()()() * phi[ss+v]()(1)(0) <<std::endl;
|
||||||
|
if ( ss==0) std::cout << " dphi_20 " <<d[v]()()() * phi[ss+v]()(2)(0) <<std::endl;
|
||||||
|
if ( ss==0) std::cout << " dphi_30 " <<d[v]()()() * phi[ss+v]()(3)(0) <<std::endl;
|
||||||
|
*/
|
||||||
|
Simd p_00 = d[v]()()() * phi[ss+v]()(0)(0) + l[v]()()()*hm_00;
|
||||||
|
Simd p_01 = d[v]()()() * phi[ss+v]()(0)(1) + l[v]()()()*hm_01;
|
||||||
|
Simd p_02 = d[v]()()() * phi[ss+v]()(0)(2) + l[v]()()()*hm_02;
|
||||||
|
Simd p_10 = d[v]()()() * phi[ss+v]()(1)(0) + l[v]()()()*hm_10;
|
||||||
|
Simd p_11 = d[v]()()() * phi[ss+v]()(1)(1) + l[v]()()()*hm_11;
|
||||||
|
Simd p_12 = d[v]()()() * phi[ss+v]()(1)(2) + l[v]()()()*hm_12;
|
||||||
|
Simd p_20 = d[v]()()() * phi[ss+v]()(2)(0) + u[v]()()()*hp_00;
|
||||||
|
Simd p_21 = d[v]()()() * phi[ss+v]()(2)(1) + u[v]()()()*hp_01;
|
||||||
|
Simd p_22 = d[v]()()() * phi[ss+v]()(2)(2) + u[v]()()()*hp_02;
|
||||||
|
Simd p_30 = d[v]()()() * phi[ss+v]()(3)(0) + u[v]()()()*hp_10;
|
||||||
|
Simd p_31 = d[v]()()() * phi[ss+v]()(3)(1) + u[v]()()()*hp_11;
|
||||||
|
Simd p_32 = d[v]()()() * phi[ss+v]()(3)(2) + u[v]()()()*hp_12;
|
||||||
|
|
||||||
|
|
||||||
|
// if ( ss==0){
|
||||||
|
/*
|
||||||
|
std::cout << ss<<" "<< v<< " good "<< chi[ss+v]()(0)(0) << " bad "<<p_00<<" diff "<<chi[ss+v]()(0)(0)-p_00<<std::endl;
|
||||||
|
std::cout << ss<<" "<< v<< " good "<< chi[ss+v]()(0)(1) << " bad "<<p_01<<" diff "<<chi[ss+v]()(0)(1)-p_01<<std::endl;
|
||||||
|
std::cout << ss<<" "<< v<< " good "<< chi[ss+v]()(0)(2) << " bad "<<p_02<<" diff "<<chi[ss+v]()(0)(2)-p_02<<std::endl;
|
||||||
|
std::cout << ss<<" "<< v<< " good "<< chi[ss+v]()(1)(0) << " bad "<<p_10<<" diff "<<chi[ss+v]()(1)(0)-p_10<<std::endl;
|
||||||
|
std::cout << ss<<" "<< v<< " good "<< chi[ss+v]()(1)(1) << " bad "<<p_11<<" diff "<<chi[ss+v]()(1)(1)-p_11<<std::endl;
|
||||||
|
std::cout << ss<<" "<< v<< " good "<< chi[ss+v]()(1)(2) << " bad "<<p_12<<" diff "<<chi[ss+v]()(1)(2)-p_12<<std::endl;
|
||||||
|
std::cout << ss<<" "<< v<< " good "<< chi[ss+v]()(2)(0) << " bad "<<p_20<<" diff "<<chi[ss+v]()(2)(0)-p_20<<std::endl;
|
||||||
|
std::cout << ss<<" "<< v<< " good "<< chi[ss+v]()(2)(1) << " bad "<<p_21<<" diff "<<chi[ss+v]()(2)(1)-p_21<<std::endl;
|
||||||
|
std::cout << ss<<" "<< v<< " good "<< chi[ss+v]()(2)(2) << " bad "<<p_22<<" diff "<<chi[ss+v]()(2)(2)-p_22<<std::endl;
|
||||||
|
std::cout << ss<<" "<< v<< " good "<< chi[ss+v]()(3)(0) << " bad "<<p_30<<" diff "<<chi[ss+v]()(3)(0)-p_30<<std::endl;
|
||||||
|
std::cout << ss<<" "<< v<< " good "<< chi[ss+v]()(3)(1) << " bad "<<p_31<<" diff "<<chi[ss+v]()(3)(1)-p_31<<std::endl;
|
||||||
|
std::cout << ss<<" "<< v<< " good "<< chi[ss+v]()(3)(2) << " bad "<<p_32<<" diff "<<chi[ss+v]()(3)(2)-p_32<<std::endl;
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
vstream(chi[ss+v]()(0)(0),p_00);
|
||||||
|
vstream(chi[ss+v]()(0)(1),p_01);
|
||||||
|
vstream(chi[ss+v]()(0)(2),p_02);
|
||||||
|
vstream(chi[ss+v]()(1)(0),p_10);
|
||||||
|
vstream(chi[ss+v]()(1)(1),p_11);
|
||||||
|
vstream(chi[ss+v]()(1)(2),p_12);
|
||||||
|
vstream(chi[ss+v]()(2)(0),p_20);
|
||||||
|
vstream(chi[ss+v]()(2)(1),p_21);
|
||||||
|
vstream(chi[ss+v]()(2)(2),p_22);
|
||||||
|
vstream(chi[ss+v]()(3)(0),p_30);
|
||||||
|
vstream(chi[ss+v]()(3)(1),p_31);
|
||||||
|
vstream(chi[ss+v]()(3)(2),p_32);
|
||||||
|
|
||||||
|
}
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
M5Dtime+=usecond();
|
M5Dtime+=usecond();
|
||||||
}
|
}
|
||||||
@ -191,7 +287,6 @@ PARALLEL_FOR_LOOP
|
|||||||
}
|
}
|
||||||
M5Dtime+=usecond();
|
M5Dtime+=usecond();
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
void CayleyFermion5D<Impl>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv)
|
void CayleyFermion5D<Impl>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv)
|
||||||
{
|
{
|
||||||
|
@ -130,7 +130,7 @@ class Grid_simd {
|
|||||||
|
|
||||||
Vector_type v;
|
Vector_type v;
|
||||||
|
|
||||||
static inline int Nsimd(void) {
|
static inline constexpr int Nsimd(void) {
|
||||||
return sizeof(Vector_type) / sizeof(Scalar_type);
|
return sizeof(Vector_type) / sizeof(Scalar_type);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user