mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-10-25 18:19:34 +01:00 
			
		
		
		
	Merge branch 'develop' of https://github.com/paboyle/Grid into develop
This commit is contained in:
		| @@ -60,7 +60,7 @@ void CayleyFermion5D<Impl>::M5D(const FermionField &psi, | |||||||
|   GridBase *grid=psi._grid; |   GridBase *grid=psi._grid; | ||||||
|   int Ls   = this->Ls; |   int Ls   = this->Ls; | ||||||
|   int LLs  = grid->_rdimensions[0]; |   int LLs  = grid->_rdimensions[0]; | ||||||
|   int nsimd= Simd::Nsimd(); |   const int nsimd= Simd::Nsimd(); | ||||||
|  |  | ||||||
|   Vector<iSinglet<Simd> > u(LLs); |   Vector<iSinglet<Simd> > u(LLs); | ||||||
|   Vector<iSinglet<Simd> > l(LLs); |   Vector<iSinglet<Simd> > l(LLs); | ||||||
| @@ -71,7 +71,6 @@ void CayleyFermion5D<Impl>::M5D(const FermionField &psi, | |||||||
|  |  | ||||||
|   chi.checkerboard=psi.checkerboard; |   chi.checkerboard=psi.checkerboard; | ||||||
|  |  | ||||||
|  |  | ||||||
|   // just directly address via type pun |   // just directly address via type pun | ||||||
|   typedef typename Simd::scalar_type scalar_type; |   typedef typename Simd::scalar_type scalar_type; | ||||||
|   scalar_type * u_p = (scalar_type *)&u[0]; |   scalar_type * u_p = (scalar_type *)&u[0]; | ||||||
| @@ -87,11 +86,15 @@ void CayleyFermion5D<Impl>::M5D(const FermionField &psi, | |||||||
|     d_p[ss] = diag[s]; |     d_p[ss] = diag[s]; | ||||||
|   }} |   }} | ||||||
|  |  | ||||||
|  |  | ||||||
|   M5Dcalls++; |   M5Dcalls++; | ||||||
|   M5Dtime-=usecond(); |   M5Dtime-=usecond(); | ||||||
|  |  | ||||||
|  |   assert(Nc==3); | ||||||
|  |  | ||||||
| PARALLEL_FOR_LOOP | PARALLEL_FOR_LOOP | ||||||
|   for(int ss=0;ss<grid->oSites();ss+=LLs){ // adds LLs |   for(int ss=0;ss<grid->oSites();ss+=LLs){ // adds LLs | ||||||
|  | #if 0 | ||||||
|       alignas(64) SiteHalfSpinor hp; |       alignas(64) SiteHalfSpinor hp; | ||||||
|       alignas(64) SiteHalfSpinor hm; |       alignas(64) SiteHalfSpinor hm; | ||||||
|       alignas(64) SiteSpinor fp; |       alignas(64) SiteSpinor fp; | ||||||
| @@ -108,15 +111,108 @@ PARALLEL_FOR_LOOP | |||||||
| 	if ( vp<=v ) rotate(hp,hp,1); | 	if ( vp<=v ) rotate(hp,hp,1); | ||||||
| 	if ( vm>=v ) rotate(hm,hm,nsimd-1); | 	if ( vm>=v ) rotate(hm,hm,nsimd-1); | ||||||
| 	 | 	 | ||||||
|       hp=hp*0.5; | 	hp=0.5*hp; | ||||||
|       hm=hm*0.5; |         hm=0.5*hm; | ||||||
|  |  | ||||||
| 	spRecon5m(fp,hp); | 	spRecon5m(fp,hp); | ||||||
| 	spRecon5p(fm,hm); | 	spRecon5p(fm,hm); | ||||||
|  |  | ||||||
|       chi[ss+v] = d[v]*phi[ss+v]+u[v]*fp; | 	chi[ss+v] = d[v]*phi[ss+v]; | ||||||
|  | 	chi[ss+v] = chi[ss+v]     +u[v]*fp; | ||||||
| 	chi[ss+v] = chi[ss+v]     +l[v]*fm; | 	chi[ss+v] = chi[ss+v]     +l[v]*fm; | ||||||
|  |  | ||||||
|       } |       } | ||||||
|  | #else | ||||||
|  |       for(int v=0;v<LLs;v++){ | ||||||
|  |        | ||||||
|  | 	int vp= (v==LLs-1) ? 0     : v+1; | ||||||
|  | 	int vm= (v==0    ) ? LLs-1 : v-1; | ||||||
|  | 	 | ||||||
|  | 	Simd hp_00 = psi[ss+vp]()(2)(0);  | ||||||
|  | 	Simd hp_01 = psi[ss+vp]()(2)(1);  | ||||||
|  | 	Simd hp_02 = psi[ss+vp]()(2)(2);  | ||||||
|  | 	Simd hp_10 = psi[ss+vp]()(3)(0);  | ||||||
|  | 	Simd hp_11 = psi[ss+vp]()(3)(1);  | ||||||
|  | 	Simd hp_12 = psi[ss+vp]()(3)(2);  | ||||||
|  | 	 | ||||||
|  | 	Simd hm_00 = psi[ss+vm]()(0)(0);  | ||||||
|  | 	Simd hm_01 = psi[ss+vm]()(0)(1);  | ||||||
|  | 	Simd hm_02 = psi[ss+vm]()(0)(2);  | ||||||
|  | 	Simd hm_10 = psi[ss+vm]()(1)(0);  | ||||||
|  | 	Simd hm_11 = psi[ss+vm]()(1)(1);  | ||||||
|  | 	Simd hm_12 = psi[ss+vm]()(1)(2);  | ||||||
|  |  | ||||||
|  | 	//	if ( ss==0) std::cout << " hp_00 " <<hp_00<<std::endl; | ||||||
|  | 	//	if ( ss==0) std::cout << " hm_00 " <<hm_00<<std::endl; | ||||||
|  |  | ||||||
|  | 	if ( vp<=v ) { | ||||||
|  | 	  hp_00.v = Optimization::Rotate::tRotate<2>(hp_00.v); | ||||||
|  | 	  hp_01.v = Optimization::Rotate::tRotate<2>(hp_01.v); | ||||||
|  | 	  hp_02.v = Optimization::Rotate::tRotate<2>(hp_02.v); | ||||||
|  | 	  hp_10.v = Optimization::Rotate::tRotate<2>(hp_10.v); | ||||||
|  | 	  hp_11.v = Optimization::Rotate::tRotate<2>(hp_11.v); | ||||||
|  | 	  hp_12.v = Optimization::Rotate::tRotate<2>(hp_12.v); | ||||||
|  | 	} | ||||||
|  | 	if ( vm>=v ) { | ||||||
|  | 	  hm_00.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_00.v); | ||||||
|  | 	  hm_01.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_01.v); | ||||||
|  | 	  hm_02.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_02.v); | ||||||
|  | 	  hm_10.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_10.v); | ||||||
|  | 	  hm_11.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_11.v); | ||||||
|  | 	  hm_12.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_12.v); | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	/* | ||||||
|  | 	if ( ss==0) std::cout << " dphi_00 " <<d[v]()()() * phi[ss+v]()(0)(0) <<std::endl; | ||||||
|  | 	if ( ss==0) std::cout << " dphi_10 " <<d[v]()()() * phi[ss+v]()(1)(0) <<std::endl; | ||||||
|  | 	if ( ss==0) std::cout << " dphi_20 " <<d[v]()()() * phi[ss+v]()(2)(0) <<std::endl; | ||||||
|  | 	if ( ss==0) std::cout << " dphi_30 " <<d[v]()()() * phi[ss+v]()(3)(0) <<std::endl; | ||||||
|  | 	*/	 | ||||||
|  | 	Simd p_00  = d[v]()()() * phi[ss+v]()(0)(0)  + l[v]()()()*hm_00;  | ||||||
|  | 	Simd p_01  = d[v]()()() * phi[ss+v]()(0)(1)  + l[v]()()()*hm_01;  | ||||||
|  | 	Simd p_02  = d[v]()()() * phi[ss+v]()(0)(2)  + l[v]()()()*hm_02;  | ||||||
|  | 	Simd p_10  = d[v]()()() * phi[ss+v]()(1)(0)  + l[v]()()()*hm_10;  | ||||||
|  | 	Simd p_11  = d[v]()()() * phi[ss+v]()(1)(1)  + l[v]()()()*hm_11;  | ||||||
|  | 	Simd p_12  = d[v]()()() * phi[ss+v]()(1)(2)  + l[v]()()()*hm_12;  | ||||||
|  | 	Simd p_20  = d[v]()()() * phi[ss+v]()(2)(0)  + u[v]()()()*hp_00;  | ||||||
|  | 	Simd p_21  = d[v]()()() * phi[ss+v]()(2)(1)  + u[v]()()()*hp_01;  | ||||||
|  | 	Simd p_22  = d[v]()()() * phi[ss+v]()(2)(2)  + u[v]()()()*hp_02;   | ||||||
|  | 	Simd p_30  = d[v]()()() * phi[ss+v]()(3)(0)  + u[v]()()()*hp_10;  | ||||||
|  | 	Simd p_31  = d[v]()()() * phi[ss+v]()(3)(1)  + u[v]()()()*hp_11;  | ||||||
|  | 	Simd p_32  = d[v]()()() * phi[ss+v]()(3)(2)  + u[v]()()()*hp_12;  | ||||||
|  |  | ||||||
|  | 	 | ||||||
|  | 	//	if ( ss==0){ | ||||||
|  | 	/* | ||||||
|  | 	std::cout << ss<<" "<< v<< " good "<< chi[ss+v]()(0)(0) << " bad "<<p_00<<" diff "<<chi[ss+v]()(0)(0)-p_00<<std::endl; | ||||||
|  | 	std::cout << ss<<" "<< v<< " good "<< chi[ss+v]()(0)(1) << " bad "<<p_01<<" diff "<<chi[ss+v]()(0)(1)-p_01<<std::endl; | ||||||
|  | 	std::cout << ss<<" "<< v<< " good "<< chi[ss+v]()(0)(2) << " bad "<<p_02<<" diff "<<chi[ss+v]()(0)(2)-p_02<<std::endl; | ||||||
|  | 	std::cout << ss<<" "<< v<< " good "<< chi[ss+v]()(1)(0) << " bad "<<p_10<<" diff "<<chi[ss+v]()(1)(0)-p_10<<std::endl; | ||||||
|  | 	std::cout << ss<<" "<< v<< " good "<< chi[ss+v]()(1)(1) << " bad "<<p_11<<" diff "<<chi[ss+v]()(1)(1)-p_11<<std::endl; | ||||||
|  | 	std::cout << ss<<" "<< v<< " good "<< chi[ss+v]()(1)(2) << " bad "<<p_12<<" diff "<<chi[ss+v]()(1)(2)-p_12<<std::endl; | ||||||
|  | 	std::cout << ss<<" "<< v<< " good "<< chi[ss+v]()(2)(0) << " bad "<<p_20<<" diff "<<chi[ss+v]()(2)(0)-p_20<<std::endl; | ||||||
|  | 	std::cout << ss<<" "<< v<< " good "<< chi[ss+v]()(2)(1) << " bad "<<p_21<<" diff "<<chi[ss+v]()(2)(1)-p_21<<std::endl; | ||||||
|  | 	std::cout << ss<<" "<< v<< " good "<< chi[ss+v]()(2)(2) << " bad "<<p_22<<" diff "<<chi[ss+v]()(2)(2)-p_22<<std::endl; | ||||||
|  | 	std::cout << ss<<" "<< v<< " good "<< chi[ss+v]()(3)(0) << " bad "<<p_30<<" diff "<<chi[ss+v]()(3)(0)-p_30<<std::endl; | ||||||
|  | 	std::cout << ss<<" "<< v<< " good "<< chi[ss+v]()(3)(1) << " bad "<<p_31<<" diff "<<chi[ss+v]()(3)(1)-p_31<<std::endl; | ||||||
|  | 	std::cout << ss<<" "<< v<< " good "<< chi[ss+v]()(3)(2) << " bad "<<p_32<<" diff "<<chi[ss+v]()(3)(2)-p_32<<std::endl; | ||||||
|  | 	} | ||||||
|  | 	*/ | ||||||
|  | 	vstream(chi[ss+v]()(0)(0),p_00); | ||||||
|  | 	vstream(chi[ss+v]()(0)(1),p_01); | ||||||
|  | 	vstream(chi[ss+v]()(0)(2),p_02); | ||||||
|  | 	vstream(chi[ss+v]()(1)(0),p_10); | ||||||
|  | 	vstream(chi[ss+v]()(1)(1),p_11); | ||||||
|  | 	vstream(chi[ss+v]()(1)(2),p_12); | ||||||
|  | 	vstream(chi[ss+v]()(2)(0),p_20); | ||||||
|  | 	vstream(chi[ss+v]()(2)(1),p_21); | ||||||
|  | 	vstream(chi[ss+v]()(2)(2),p_22); | ||||||
|  | 	vstream(chi[ss+v]()(3)(0),p_30); | ||||||
|  | 	vstream(chi[ss+v]()(3)(1),p_31); | ||||||
|  | 	vstream(chi[ss+v]()(3)(2),p_32); | ||||||
|  |  | ||||||
|  |       } | ||||||
|  | #endif | ||||||
|   } |   } | ||||||
|   M5Dtime+=usecond(); |   M5Dtime+=usecond(); | ||||||
| } | } | ||||||
| @@ -191,7 +287,6 @@ PARALLEL_FOR_LOOP | |||||||
|   } |   } | ||||||
|   M5Dtime+=usecond(); |   M5Dtime+=usecond(); | ||||||
| } | } | ||||||
|  |  | ||||||
| template<class Impl> | template<class Impl> | ||||||
| void CayleyFermion5D<Impl>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv) | void CayleyFermion5D<Impl>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv) | ||||||
| { | { | ||||||
|   | |||||||
| @@ -130,7 +130,7 @@ class Grid_simd { | |||||||
|  |  | ||||||
|   Vector_type v; |   Vector_type v; | ||||||
|  |  | ||||||
|   static inline int Nsimd(void) { |   static inline constexpr int Nsimd(void) { | ||||||
|     return sizeof(Vector_type) / sizeof(Scalar_type); |     return sizeof(Vector_type) / sizeof(Scalar_type); | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user