mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-10-30 11:34:32 +00:00 
			
		
		
		
	Revert back to Grid develop version since new LLVM compilers now do not require static loop count variables.
This commit is contained in:
		| @@ -120,8 +120,8 @@ public: | |||||||
|     int DimRep = Impl::Dimension; |     int DimRep = Impl::Dimension; | ||||||
|  |  | ||||||
|     autoView(in_v, in, AcceleratorWrite); |     autoView(in_v, in, AcceleratorWrite); | ||||||
|     int osites=in.Grid()->oSites(); |  | ||||||
|     accelerator_for(ss, osites, 1, { |     accelerator_for(ss, in.Grid()->oSites(), 1, { | ||||||
|       for (int sa=0; sa<Ns; sa++) |       for (int sa=0; sa<Ns; sa++) | ||||||
|         for (int ca=0; ca<DimRep; ca++) |         for (int ca=0; ca<DimRep; ca++) | ||||||
|           in_v[ss]()(sa,sa)(ca,ca) = c; |           in_v[ss]()(sa,sa)(ca,ca) = c; | ||||||
| @@ -246,8 +246,8 @@ public: | |||||||
|     int DimRep = Impl::Dimension; |     int DimRep = Impl::Dimension; | ||||||
|  |  | ||||||
|     autoView(in_v, in, AcceleratorWrite); |     autoView(in_v, in, AcceleratorWrite); | ||||||
|     int osites=in.Grid()->oSites(); |  | ||||||
|     accelerator_for(ss, osites, 1, { |     accelerator_for(ss, in.Grid()->oSites(), 1, { | ||||||
|       for (int sa=0; sa<Ns; sa++) |       for (int sa=0; sa<Ns; sa++) | ||||||
|         for (int ca=0; ca<DimRep; ca++) |         for (int ca=0; ca<DimRep; ca++) | ||||||
|           in_v[ss]()(sa,sa)(ca,ca) = c; |           in_v[ss]()(sa,sa)(ca,ca) = c; | ||||||
|   | |||||||
| @@ -85,8 +85,7 @@ public: | |||||||
|     T = Zero(); |     T = Zero(); | ||||||
|     autoView(T_v,T,AcceleratorWrite); |     autoView(T_v,T,AcceleratorWrite); | ||||||
|     autoView(F_v,F,AcceleratorRead); |     autoView(F_v,F,AcceleratorRead); | ||||||
|     int osites=T.Grid()->oSites(); |     accelerator_for(i, T.Grid()->oSites(),CloverField::vector_type::Nsimd(), | ||||||
|     accelerator_for(i, osites,CloverField::vector_type::Nsimd(), |  | ||||||
|     { |     { | ||||||
|       coalescedWrite(T_v[i]()(0, 1), coalescedRead(timesMinusI(F_v[i]()()))); |       coalescedWrite(T_v[i]()(0, 1), coalescedRead(timesMinusI(F_v[i]()()))); | ||||||
|       coalescedWrite(T_v[i]()(1, 0), coalescedRead(timesMinusI(F_v[i]()()))); |       coalescedWrite(T_v[i]()(1, 0), coalescedRead(timesMinusI(F_v[i]()()))); | ||||||
| @@ -104,8 +103,7 @@ public: | |||||||
|      |      | ||||||
|     autoView(T_v, T,AcceleratorWrite); |     autoView(T_v, T,AcceleratorWrite); | ||||||
|     autoView(F_v, F,AcceleratorRead); |     autoView(F_v, F,AcceleratorRead); | ||||||
|     int osites=T.Grid()->oSites(); |     accelerator_for(i, T.Grid()->oSites(),CloverField::vector_type::Nsimd(), | ||||||
|     accelerator_for(i, osites,CloverField::vector_type::Nsimd(), |  | ||||||
|     { |     { | ||||||
|       coalescedWrite(T_v[i]()(0, 1), coalescedRead(-F_v[i]()())); |       coalescedWrite(T_v[i]()(0, 1), coalescedRead(-F_v[i]()())); | ||||||
|       coalescedWrite(T_v[i]()(1, 0), coalescedRead(F_v[i]()())); |       coalescedWrite(T_v[i]()(1, 0), coalescedRead(F_v[i]()())); | ||||||
| @@ -123,8 +121,7 @@ public: | |||||||
|  |  | ||||||
|     autoView(T_v,T,AcceleratorWrite); |     autoView(T_v,T,AcceleratorWrite); | ||||||
|     autoView(F_v,F,AcceleratorRead); |     autoView(F_v,F,AcceleratorRead); | ||||||
|     int osites=T.Grid()->oSites(); |     accelerator_for(i, T.Grid()->oSites(),CloverField::vector_type::Nsimd(), | ||||||
|     accelerator_for(i, osites,CloverField::vector_type::Nsimd(), |  | ||||||
|     { |     { | ||||||
|       coalescedWrite(T_v[i]()(0, 0), coalescedRead(timesMinusI(F_v[i]()()))); |       coalescedWrite(T_v[i]()(0, 0), coalescedRead(timesMinusI(F_v[i]()()))); | ||||||
|       coalescedWrite(T_v[i]()(1, 1), coalescedRead(timesI(F_v[i]()()))); |       coalescedWrite(T_v[i]()(1, 1), coalescedRead(timesI(F_v[i]()()))); | ||||||
| @@ -142,8 +139,7 @@ public: | |||||||
|  |  | ||||||
|     autoView( T_v , T, AcceleratorWrite); |     autoView( T_v , T, AcceleratorWrite); | ||||||
|     autoView( F_v , F, AcceleratorRead); |     autoView( F_v , F, AcceleratorRead); | ||||||
|     int osites=T.Grid()->oSites(); |     accelerator_for(i, T.Grid()->oSites(),CloverField::vector_type::Nsimd(), | ||||||
|     accelerator_for(i, osites,CloverField::vector_type::Nsimd(), |  | ||||||
|     { |     { | ||||||
|       coalescedWrite(T_v[i]()(0, 1), coalescedRead(timesI(F_v[i]()()))); |       coalescedWrite(T_v[i]()(0, 1), coalescedRead(timesI(F_v[i]()()))); | ||||||
|       coalescedWrite(T_v[i]()(1, 0), coalescedRead(timesI(F_v[i]()()))); |       coalescedWrite(T_v[i]()(1, 0), coalescedRead(timesI(F_v[i]()()))); | ||||||
| @@ -161,8 +157,7 @@ public: | |||||||
|      |      | ||||||
|     autoView( T_v ,T,AcceleratorWrite); |     autoView( T_v ,T,AcceleratorWrite); | ||||||
|     autoView( F_v ,F,AcceleratorRead); |     autoView( F_v ,F,AcceleratorRead); | ||||||
|     int osites=T.Grid()->oSites(); |     accelerator_for(i, T.Grid()->oSites(),CloverField::vector_type::Nsimd(), | ||||||
|     accelerator_for(i, osites,CloverField::vector_type::Nsimd(), |  | ||||||
|     { |     { | ||||||
|       coalescedWrite(T_v[i]()(0, 1), coalescedRead(-(F_v[i]()()))); |       coalescedWrite(T_v[i]()(0, 1), coalescedRead(-(F_v[i]()()))); | ||||||
|       coalescedWrite(T_v[i]()(1, 0), coalescedRead((F_v[i]()()))); |       coalescedWrite(T_v[i]()(1, 0), coalescedRead((F_v[i]()()))); | ||||||
| @@ -181,8 +176,7 @@ public: | |||||||
|  |  | ||||||
|     autoView( T_v , T,AcceleratorWrite); |     autoView( T_v , T,AcceleratorWrite); | ||||||
|     autoView( F_v , F,AcceleratorRead); |     autoView( F_v , F,AcceleratorRead); | ||||||
|     int osites=T.Grid()->oSites(); |     accelerator_for(i, T.Grid()->oSites(),CloverField::vector_type::Nsimd(), | ||||||
|     accelerator_for(i, osites,CloverField::vector_type::Nsimd(), |  | ||||||
|     { |     { | ||||||
|       coalescedWrite(T_v[i]()(0, 0), coalescedRead(timesI(F_v[i]()()))); |       coalescedWrite(T_v[i]()(0, 0), coalescedRead(timesI(F_v[i]()()))); | ||||||
|       coalescedWrite(T_v[i]()(1, 1), coalescedRead(timesMinusI(F_v[i]()()))); |       coalescedWrite(T_v[i]()(1, 1), coalescedRead(timesMinusI(F_v[i]()()))); | ||||||
| @@ -206,8 +200,7 @@ public: | |||||||
|     autoView(phi_v, phi, AcceleratorRead); |     autoView(phi_v, phi, AcceleratorRead); | ||||||
|     autoView(C_v,   C,   AcceleratorRead); |     autoView(C_v,   C,   AcceleratorRead); | ||||||
|     typedef decltype(coalescedRead(out_v[0])) calcSpinor; |     typedef decltype(coalescedRead(out_v[0])) calcSpinor; | ||||||
|     int osites=out.Grid()->oSites(); |     accelerator_for(sss,out.Grid()->oSites(),Nsimd,{ | ||||||
|     accelerator_for(sss,osites,Nsimd,{ |  | ||||||
|       calcSpinor tmp; |       calcSpinor tmp; | ||||||
|       multClover(tmp,C_v[sss],phi_v(sss)); |       multClover(tmp,C_v[sss],phi_v(sss)); | ||||||
|       coalescedWrite(out_v[sss],tmp); |       coalescedWrite(out_v[sss],tmp); | ||||||
| @@ -617,8 +610,7 @@ public: | |||||||
|     autoView(triangle_v, triangle, AcceleratorWrite); |     autoView(triangle_v, triangle, AcceleratorWrite); | ||||||
|  |  | ||||||
|     // NOTE: this function cannot be 'private' since nvcc forbids this for kernels |     // NOTE: this function cannot be 'private' since nvcc forbids this for kernels | ||||||
|     int osites=full.Grid()->oSites(); |     accelerator_for(ss, full.Grid()->oSites(), 1, { | ||||||
|     accelerator_for(ss, osites, 1, { |  | ||||||
|       for(int s_row = 0; s_row < Ns; s_row++) { |       for(int s_row = 0; s_row < Ns; s_row++) { | ||||||
|         for(int s_col = 0; s_col < Ns; s_col++) { |         for(int s_col = 0; s_col < Ns; s_col++) { | ||||||
|           if(abs(s_row - s_col) > 1 || s_row + s_col == 3) continue; |           if(abs(s_row - s_col) > 1 || s_row + s_col == 3) continue; | ||||||
| @@ -658,8 +650,7 @@ public: | |||||||
|     autoView(full_v,     full,     AcceleratorWrite); |     autoView(full_v,     full,     AcceleratorWrite); | ||||||
|  |  | ||||||
|     // NOTE: this function cannot be 'private' since nvcc forbids this for kernels |     // NOTE: this function cannot be 'private' since nvcc forbids this for kernels | ||||||
|     int osites=full.Grid()->oSites(); |     accelerator_for(ss, full.Grid()->oSites(), 1, { | ||||||
|     accelerator_for(ss, osites, 1, { |  | ||||||
|       for(int s_row = 0; s_row < Ns; s_row++) { |       for(int s_row = 0; s_row < Ns; s_row++) { | ||||||
|         for(int s_col = 0; s_col < Ns; s_col++) { |         for(int s_col = 0; s_col < Ns; s_col++) { | ||||||
|           if(abs(s_row - s_col) > 1 || s_row + s_col == 3) continue; |           if(abs(s_row - s_col) > 1 || s_row + s_col == 3) continue; | ||||||
|   | |||||||
| @@ -110,10 +110,8 @@ public: | |||||||
|     autoView( out_v, out, AcceleratorWrite); |     autoView( out_v, out, AcceleratorWrite); | ||||||
|     autoView( phi_v, phi, AcceleratorRead); |     autoView( phi_v, phi, AcceleratorRead); | ||||||
|     autoView( Umu_v, Umu, AcceleratorRead); |     autoView( Umu_v, Umu, AcceleratorRead); | ||||||
|     int size=out.Grid()->oSites(); |  | ||||||
|  |  | ||||||
|     typedef decltype(coalescedRead(out_v[0]))   calcSpinor; |     typedef decltype(coalescedRead(out_v[0]))   calcSpinor; | ||||||
|     accelerator_for(sss,size,Nsimd,{ |     accelerator_for(sss,out.Grid()->oSites(),Nsimd,{ | ||||||
| 	calcSpinor tmp; | 	calcSpinor tmp; | ||||||
| 	multLink(tmp,Umu_v[sss],phi_v(sss),mu); | 	multLink(tmp,Umu_v[sss],phi_v(sss),mu); | ||||||
| 	coalescedWrite(out_v[sss],tmp); | 	coalescedWrite(out_v[sss],tmp); | ||||||
| @@ -205,8 +203,7 @@ public: | |||||||
|       autoView( tmp_v , tmp, AcceleratorWrite); |       autoView( tmp_v , tmp, AcceleratorWrite); | ||||||
|       autoView( Btilde_v , Btilde, AcceleratorRead); |       autoView( Btilde_v , Btilde, AcceleratorRead); | ||||||
|       autoView( Atilde_v , Atilde, AcceleratorRead); |       autoView( Atilde_v , Atilde, AcceleratorRead); | ||||||
|       int size=tmp.Grid()->oSites(); |       accelerator_for(sss,tmp.Grid()->oSites(),1,{ | ||||||
|       accelerator_for(sss,size,1,{ |  | ||||||
| 	  int sU=sss; | 	  int sU=sss; | ||||||
| 	  for(int s=0;s<Ls;s++){ | 	  for(int s=0;s<Ls;s++){ | ||||||
| 	    int sF = s+Ls*sU; | 	    int sF = s+Ls*sU; | ||||||
| @@ -220,8 +217,7 @@ public: | |||||||
|       const int Nsimd = SiteSpinor::Nsimd(); |       const int Nsimd = SiteSpinor::Nsimd(); | ||||||
|       autoView( Btilde_v , Btilde, AcceleratorRead); |       autoView( Btilde_v , Btilde, AcceleratorRead); | ||||||
|       autoView( Atilde_v , Atilde, AcceleratorRead); |       autoView( Atilde_v , Atilde, AcceleratorRead); | ||||||
|       int size=mat.Grid()->oSites(); |       accelerator_for(sss,mat.Grid()->oSites(),Nsimd,{ | ||||||
|       accelerator_for(sss,size,Nsimd,{ |  | ||||||
| 	  int sU=sss; | 	  int sU=sss; | ||||||
|   	  typedef decltype(coalescedRead(mat_v[sU](mu)() )) ColorMatrixType; |   	  typedef decltype(coalescedRead(mat_v[sU](mu)() )) ColorMatrixType; | ||||||
|   	  ColorMatrixType sum; |   	  ColorMatrixType sum; | ||||||
|   | |||||||
| @@ -88,8 +88,7 @@ public: | |||||||
|   static inline void AddLink(Field &U, LinkField &W, int mu) { // U[mu] += W |   static inline void AddLink(Field &U, LinkField &W, int mu) { // U[mu] += W | ||||||
|     autoView(U_v,U,AcceleratorWrite); |     autoView(U_v,U,AcceleratorWrite); | ||||||
|     autoView(W_v,W,AcceleratorRead); |     autoView(W_v,W,AcceleratorRead); | ||||||
|     int size=U.Grid()->oSites(); |     accelerator_for( ss, U.Grid()->oSites(), 1, { | ||||||
|     accelerator_for( ss, size, 1, { |  | ||||||
|       U_v[ss](mu) = U_v[ss](mu) + W_v[ss](); |       U_v[ss](mu) = U_v[ss](mu) + W_v[ss](); | ||||||
|     }); |     }); | ||||||
|   } |   } | ||||||
| @@ -139,8 +138,7 @@ public: | |||||||
|     //auto start = std::chrono::high_resolution_clock::now(); |     //auto start = std::chrono::high_resolution_clock::now(); | ||||||
|     autoView(U_v,U,AcceleratorWrite); |     autoView(U_v,U,AcceleratorWrite); | ||||||
|     autoView(P_v,P,AcceleratorRead); |     autoView(P_v,P,AcceleratorRead); | ||||||
|     int size=P.Grid()->oSites(); |     accelerator_for(ss, P.Grid()->oSites(),1,{ | ||||||
|     accelerator_for(ss, size,1,{ |  | ||||||
|       for (int mu = 0; mu < Nd; mu++) { |       for (int mu = 0; mu < Nd; mu++) { | ||||||
|           U_v[ss](mu) = Exponentiate(P_v[ss](mu), ep, Nexp) * U_v[ss](mu); |           U_v[ss](mu) = Exponentiate(P_v[ss](mu), ep, Nexp) * U_v[ss](mu); | ||||||
|           U_v[ss](mu) = Group::ProjectOnGeneralGroup(U_v[ss](mu)); |           U_v[ss](mu) = Group::ProjectOnGeneralGroup(U_v[ss](mu)); | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user