mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-11-02 21:14:32 +00:00 
			
		
		
		
	Faster Mobius
This commit is contained in:
		@@ -60,7 +60,7 @@ void CayleyFermion5D<Impl>::M5D(const FermionField &psi,
 | 
			
		||||
  GridBase *grid=psi._grid;
 | 
			
		||||
  int Ls   = this->Ls;
 | 
			
		||||
  int LLs  = grid->_rdimensions[0];
 | 
			
		||||
  int nsimd= Simd::Nsimd();
 | 
			
		||||
  const int nsimd= Simd::Nsimd();
 | 
			
		||||
 | 
			
		||||
  Vector<iSinglet<Simd> > u(LLs);
 | 
			
		||||
  Vector<iSinglet<Simd> > l(LLs);
 | 
			
		||||
@@ -71,7 +71,6 @@ void CayleyFermion5D<Impl>::M5D(const FermionField &psi,
 | 
			
		||||
 | 
			
		||||
  chi.checkerboard=psi.checkerboard;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  // just directly address via type pun
 | 
			
		||||
  typedef typename Simd::scalar_type scalar_type;
 | 
			
		||||
  scalar_type * u_p = (scalar_type *)&u[0];
 | 
			
		||||
@@ -87,36 +86,133 @@ void CayleyFermion5D<Impl>::M5D(const FermionField &psi,
 | 
			
		||||
    d_p[ss] = diag[s];
 | 
			
		||||
  }}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  M5Dcalls++;
 | 
			
		||||
  M5Dtime-=usecond();
 | 
			
		||||
 | 
			
		||||
  assert(Nc==3);
 | 
			
		||||
 | 
			
		||||
PARALLEL_FOR_LOOP
 | 
			
		||||
  for(int ss=0;ss<grid->oSites();ss+=LLs){ // adds LLs
 | 
			
		||||
#if 0
 | 
			
		||||
      alignas(64) SiteHalfSpinor hp;
 | 
			
		||||
      alignas(64) SiteHalfSpinor hm;
 | 
			
		||||
      alignas(64) SiteSpinor fp;
 | 
			
		||||
      alignas(64) SiteSpinor fm;
 | 
			
		||||
 | 
			
		||||
    alignas(64) SiteHalfSpinor hp;
 | 
			
		||||
    alignas(64) SiteHalfSpinor hm;
 | 
			
		||||
    alignas(64) SiteSpinor fp;
 | 
			
		||||
    alignas(64) SiteSpinor fm;
 | 
			
		||||
      for(int v=0;v<LLs;v++){
 | 
			
		||||
 | 
			
		||||
    for(int v=0;v<LLs;v++){
 | 
			
		||||
	int vp=(v+1)%LLs;
 | 
			
		||||
	int vm=(v+LLs-1)%LLs;
 | 
			
		||||
 | 
			
		||||
      int vp=(v+1)%LLs;
 | 
			
		||||
      int vm=(v+LLs-1)%LLs;
 | 
			
		||||
	spProj5m(hp,psi[ss+vp]);
 | 
			
		||||
	spProj5p(hm,psi[ss+vm]);
 | 
			
		||||
 | 
			
		||||
      spProj5m(hp,psi[ss+vp]);
 | 
			
		||||
      spProj5p(hm,psi[ss+vm]);
 | 
			
		||||
	if ( vp<=v ) rotate(hp,hp,1);
 | 
			
		||||
	if ( vm>=v ) rotate(hm,hm,nsimd-1);
 | 
			
		||||
	
 | 
			
		||||
	hp=0.5*hp;
 | 
			
		||||
        hm=0.5*hm;
 | 
			
		||||
 | 
			
		||||
	spRecon5m(fp,hp);
 | 
			
		||||
	spRecon5p(fm,hm);
 | 
			
		||||
 | 
			
		||||
	chi[ss+v] = d[v]*phi[ss+v];
 | 
			
		||||
	chi[ss+v] = chi[ss+v]     +u[v]*fp;
 | 
			
		||||
	chi[ss+v] = chi[ss+v]     +l[v]*fm;
 | 
			
		||||
 | 
			
		||||
      }
 | 
			
		||||
#else
 | 
			
		||||
      for(int v=0;v<LLs;v++){
 | 
			
		||||
      
 | 
			
		||||
      if ( vp<=v ) rotate(hp,hp,1);
 | 
			
		||||
      if ( vm>=v ) rotate(hm,hm,nsimd-1);
 | 
			
		||||
	int vp= (v==LLs-1) ? 0     : v+1;
 | 
			
		||||
	int vm= (v==0    ) ? LLs-1 : v-1;
 | 
			
		||||
	
 | 
			
		||||
	Simd hp_00 = psi[ss+vp]()(2)(0); 
 | 
			
		||||
	Simd hp_01 = psi[ss+vp]()(2)(1); 
 | 
			
		||||
	Simd hp_02 = psi[ss+vp]()(2)(2); 
 | 
			
		||||
	Simd hp_10 = psi[ss+vp]()(3)(0); 
 | 
			
		||||
	Simd hp_11 = psi[ss+vp]()(3)(1); 
 | 
			
		||||
	Simd hp_12 = psi[ss+vp]()(3)(2); 
 | 
			
		||||
	
 | 
			
		||||
	Simd hm_00 = psi[ss+vm]()(0)(0); 
 | 
			
		||||
	Simd hm_01 = psi[ss+vm]()(0)(1); 
 | 
			
		||||
	Simd hm_02 = psi[ss+vm]()(0)(2); 
 | 
			
		||||
	Simd hm_10 = psi[ss+vm]()(1)(0); 
 | 
			
		||||
	Simd hm_11 = psi[ss+vm]()(1)(1); 
 | 
			
		||||
	Simd hm_12 = psi[ss+vm]()(1)(2); 
 | 
			
		||||
 | 
			
		||||
      hp=hp*0.5;
 | 
			
		||||
      hm=hm*0.5;
 | 
			
		||||
      spRecon5m(fp,hp);
 | 
			
		||||
      spRecon5p(fm,hm);
 | 
			
		||||
	//	if ( ss==0) std::cout << " hp_00 " <<hp_00<<std::endl;
 | 
			
		||||
	//	if ( ss==0) std::cout << " hm_00 " <<hm_00<<std::endl;
 | 
			
		||||
 | 
			
		||||
      chi[ss+v] = d[v]*phi[ss+v]+u[v]*fp;
 | 
			
		||||
      chi[ss+v] = chi[ss+v]     +l[v]*fm;
 | 
			
		||||
	if ( vp<=v ) {
 | 
			
		||||
	  hp_00.v = Optimization::Rotate::tRotate<2>(hp_00.v);
 | 
			
		||||
	  hp_01.v = Optimization::Rotate::tRotate<2>(hp_01.v);
 | 
			
		||||
	  hp_02.v = Optimization::Rotate::tRotate<2>(hp_02.v);
 | 
			
		||||
	  hp_10.v = Optimization::Rotate::tRotate<2>(hp_10.v);
 | 
			
		||||
	  hp_11.v = Optimization::Rotate::tRotate<2>(hp_11.v);
 | 
			
		||||
	  hp_12.v = Optimization::Rotate::tRotate<2>(hp_12.v);
 | 
			
		||||
	}
 | 
			
		||||
	if ( vm>=v ) {
 | 
			
		||||
	  hm_00.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_00.v);
 | 
			
		||||
	  hm_01.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_01.v);
 | 
			
		||||
	  hm_02.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_02.v);
 | 
			
		||||
	  hm_10.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_10.v);
 | 
			
		||||
	  hm_11.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_11.v);
 | 
			
		||||
	  hm_12.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_12.v);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
    }
 | 
			
		||||
	/*
 | 
			
		||||
	if ( ss==0) std::cout << " dphi_00 " <<d[v]()()() * phi[ss+v]()(0)(0) <<std::endl;
 | 
			
		||||
	if ( ss==0) std::cout << " dphi_10 " <<d[v]()()() * phi[ss+v]()(1)(0) <<std::endl;
 | 
			
		||||
	if ( ss==0) std::cout << " dphi_20 " <<d[v]()()() * phi[ss+v]()(2)(0) <<std::endl;
 | 
			
		||||
	if ( ss==0) std::cout << " dphi_30 " <<d[v]()()() * phi[ss+v]()(3)(0) <<std::endl;
 | 
			
		||||
	*/	
 | 
			
		||||
	Simd p_00  = d[v]()()() * phi[ss+v]()(0)(0)  + l[v]()()()*hm_00; 
 | 
			
		||||
	Simd p_01  = d[v]()()() * phi[ss+v]()(0)(1)  + l[v]()()()*hm_01; 
 | 
			
		||||
	Simd p_02  = d[v]()()() * phi[ss+v]()(0)(2)  + l[v]()()()*hm_02; 
 | 
			
		||||
	Simd p_10  = d[v]()()() * phi[ss+v]()(1)(0)  + l[v]()()()*hm_10; 
 | 
			
		||||
	Simd p_11  = d[v]()()() * phi[ss+v]()(1)(1)  + l[v]()()()*hm_11; 
 | 
			
		||||
	Simd p_12  = d[v]()()() * phi[ss+v]()(1)(2)  + l[v]()()()*hm_12; 
 | 
			
		||||
	Simd p_20  = d[v]()()() * phi[ss+v]()(2)(0)  + u[v]()()()*hp_00; 
 | 
			
		||||
	Simd p_21  = d[v]()()() * phi[ss+v]()(2)(1)  + u[v]()()()*hp_01; 
 | 
			
		||||
	Simd p_22  = d[v]()()() * phi[ss+v]()(2)(2)  + u[v]()()()*hp_02;  
 | 
			
		||||
	Simd p_30  = d[v]()()() * phi[ss+v]()(3)(0)  + u[v]()()()*hp_10; 
 | 
			
		||||
	Simd p_31  = d[v]()()() * phi[ss+v]()(3)(1)  + u[v]()()()*hp_11; 
 | 
			
		||||
	Simd p_32  = d[v]()()() * phi[ss+v]()(3)(2)  + u[v]()()()*hp_12; 
 | 
			
		||||
 | 
			
		||||
	
 | 
			
		||||
	//	if ( ss==0){
 | 
			
		||||
	/*
 | 
			
		||||
	std::cout << ss<<" "<< v<< " good "<< chi[ss+v]()(0)(0) << " bad "<<p_00<<" diff "<<chi[ss+v]()(0)(0)-p_00<<std::endl;
 | 
			
		||||
	std::cout << ss<<" "<< v<< " good "<< chi[ss+v]()(0)(1) << " bad "<<p_01<<" diff "<<chi[ss+v]()(0)(1)-p_01<<std::endl;
 | 
			
		||||
	std::cout << ss<<" "<< v<< " good "<< chi[ss+v]()(0)(2) << " bad "<<p_02<<" diff "<<chi[ss+v]()(0)(2)-p_02<<std::endl;
 | 
			
		||||
	std::cout << ss<<" "<< v<< " good "<< chi[ss+v]()(1)(0) << " bad "<<p_10<<" diff "<<chi[ss+v]()(1)(0)-p_10<<std::endl;
 | 
			
		||||
	std::cout << ss<<" "<< v<< " good "<< chi[ss+v]()(1)(1) << " bad "<<p_11<<" diff "<<chi[ss+v]()(1)(1)-p_11<<std::endl;
 | 
			
		||||
	std::cout << ss<<" "<< v<< " good "<< chi[ss+v]()(1)(2) << " bad "<<p_12<<" diff "<<chi[ss+v]()(1)(2)-p_12<<std::endl;
 | 
			
		||||
	std::cout << ss<<" "<< v<< " good "<< chi[ss+v]()(2)(0) << " bad "<<p_20<<" diff "<<chi[ss+v]()(2)(0)-p_20<<std::endl;
 | 
			
		||||
	std::cout << ss<<" "<< v<< " good "<< chi[ss+v]()(2)(1) << " bad "<<p_21<<" diff "<<chi[ss+v]()(2)(1)-p_21<<std::endl;
 | 
			
		||||
	std::cout << ss<<" "<< v<< " good "<< chi[ss+v]()(2)(2) << " bad "<<p_22<<" diff "<<chi[ss+v]()(2)(2)-p_22<<std::endl;
 | 
			
		||||
	std::cout << ss<<" "<< v<< " good "<< chi[ss+v]()(3)(0) << " bad "<<p_30<<" diff "<<chi[ss+v]()(3)(0)-p_30<<std::endl;
 | 
			
		||||
	std::cout << ss<<" "<< v<< " good "<< chi[ss+v]()(3)(1) << " bad "<<p_31<<" diff "<<chi[ss+v]()(3)(1)-p_31<<std::endl;
 | 
			
		||||
	std::cout << ss<<" "<< v<< " good "<< chi[ss+v]()(3)(2) << " bad "<<p_32<<" diff "<<chi[ss+v]()(3)(2)-p_32<<std::endl;
 | 
			
		||||
	}
 | 
			
		||||
	*/
 | 
			
		||||
	vstream(chi[ss+v]()(0)(0),p_00);
 | 
			
		||||
	vstream(chi[ss+v]()(0)(1),p_01);
 | 
			
		||||
	vstream(chi[ss+v]()(0)(2),p_02);
 | 
			
		||||
	vstream(chi[ss+v]()(1)(0),p_10);
 | 
			
		||||
	vstream(chi[ss+v]()(1)(1),p_11);
 | 
			
		||||
	vstream(chi[ss+v]()(1)(2),p_12);
 | 
			
		||||
	vstream(chi[ss+v]()(2)(0),p_20);
 | 
			
		||||
	vstream(chi[ss+v]()(2)(1),p_21);
 | 
			
		||||
	vstream(chi[ss+v]()(2)(2),p_22);
 | 
			
		||||
	vstream(chi[ss+v]()(3)(0),p_30);
 | 
			
		||||
	vstream(chi[ss+v]()(3)(1),p_31);
 | 
			
		||||
	vstream(chi[ss+v]()(3)(2),p_32);
 | 
			
		||||
 | 
			
		||||
      }
 | 
			
		||||
#endif
 | 
			
		||||
  }
 | 
			
		||||
  M5Dtime+=usecond();
 | 
			
		||||
}
 | 
			
		||||
@@ -191,7 +287,6 @@ PARALLEL_FOR_LOOP
 | 
			
		||||
  }
 | 
			
		||||
  M5Dtime+=usecond();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template<class Impl>
 | 
			
		||||
void CayleyFermion5D<Impl>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv)
 | 
			
		||||
{
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user