1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-11-09 23:45:36 +00:00

Revert back to Grid develop version since new LLVM compilers now do not require static loop count variables.

This commit is contained in:
Meifeng Lin 2023-12-15 08:59:18 -05:00
parent cc5ab624a2
commit 1381dbc8ef
4 changed files with 18 additions and 33 deletions

View File

@ -120,8 +120,8 @@ public:
int DimRep = Impl::Dimension;
autoView(in_v, in, AcceleratorWrite);
int osites=in.Grid()->oSites();
accelerator_for(ss, osites, 1, {
accelerator_for(ss, in.Grid()->oSites(), 1, {
for (int sa=0; sa<Ns; sa++)
for (int ca=0; ca<DimRep; ca++)
in_v[ss]()(sa,sa)(ca,ca) = c;
@ -246,8 +246,8 @@ public:
int DimRep = Impl::Dimension;
autoView(in_v, in, AcceleratorWrite);
int osites=in.Grid()->oSites();
accelerator_for(ss, osites, 1, {
accelerator_for(ss, in.Grid()->oSites(), 1, {
for (int sa=0; sa<Ns; sa++)
for (int ca=0; ca<DimRep; ca++)
in_v[ss]()(sa,sa)(ca,ca) = c;

View File

@ -85,8 +85,7 @@ public:
T = Zero();
autoView(T_v,T,AcceleratorWrite);
autoView(F_v,F,AcceleratorRead);
int osites=T.Grid()->oSites();
accelerator_for(i, osites,CloverField::vector_type::Nsimd(),
accelerator_for(i, T.Grid()->oSites(),CloverField::vector_type::Nsimd(),
{
coalescedWrite(T_v[i]()(0, 1), coalescedRead(timesMinusI(F_v[i]()())));
coalescedWrite(T_v[i]()(1, 0), coalescedRead(timesMinusI(F_v[i]()())));
@ -104,8 +103,7 @@ public:
autoView(T_v, T,AcceleratorWrite);
autoView(F_v, F,AcceleratorRead);
int osites=T.Grid()->oSites();
accelerator_for(i, osites,CloverField::vector_type::Nsimd(),
accelerator_for(i, T.Grid()->oSites(),CloverField::vector_type::Nsimd(),
{
coalescedWrite(T_v[i]()(0, 1), coalescedRead(-F_v[i]()()));
coalescedWrite(T_v[i]()(1, 0), coalescedRead(F_v[i]()()));
@ -123,8 +121,7 @@ public:
autoView(T_v,T,AcceleratorWrite);
autoView(F_v,F,AcceleratorRead);
int osites=T.Grid()->oSites();
accelerator_for(i, osites,CloverField::vector_type::Nsimd(),
accelerator_for(i, T.Grid()->oSites(),CloverField::vector_type::Nsimd(),
{
coalescedWrite(T_v[i]()(0, 0), coalescedRead(timesMinusI(F_v[i]()())));
coalescedWrite(T_v[i]()(1, 1), coalescedRead(timesI(F_v[i]()())));
@ -142,8 +139,7 @@ public:
autoView( T_v , T, AcceleratorWrite);
autoView( F_v , F, AcceleratorRead);
int osites=T.Grid()->oSites();
accelerator_for(i, osites,CloverField::vector_type::Nsimd(),
accelerator_for(i, T.Grid()->oSites(),CloverField::vector_type::Nsimd(),
{
coalescedWrite(T_v[i]()(0, 1), coalescedRead(timesI(F_v[i]()())));
coalescedWrite(T_v[i]()(1, 0), coalescedRead(timesI(F_v[i]()())));
@ -161,8 +157,7 @@ public:
autoView( T_v ,T,AcceleratorWrite);
autoView( F_v ,F,AcceleratorRead);
int osites=T.Grid()->oSites();
accelerator_for(i, osites,CloverField::vector_type::Nsimd(),
accelerator_for(i, T.Grid()->oSites(),CloverField::vector_type::Nsimd(),
{
coalescedWrite(T_v[i]()(0, 1), coalescedRead(-(F_v[i]()())));
coalescedWrite(T_v[i]()(1, 0), coalescedRead((F_v[i]()())));
@ -181,8 +176,7 @@ public:
autoView( T_v , T,AcceleratorWrite);
autoView( F_v , F,AcceleratorRead);
int osites=T.Grid()->oSites();
accelerator_for(i, osites,CloverField::vector_type::Nsimd(),
accelerator_for(i, T.Grid()->oSites(),CloverField::vector_type::Nsimd(),
{
coalescedWrite(T_v[i]()(0, 0), coalescedRead(timesI(F_v[i]()())));
coalescedWrite(T_v[i]()(1, 1), coalescedRead(timesMinusI(F_v[i]()())));
@ -206,8 +200,7 @@ public:
autoView(phi_v, phi, AcceleratorRead);
autoView(C_v, C, AcceleratorRead);
typedef decltype(coalescedRead(out_v[0])) calcSpinor;
int osites=out.Grid()->oSites();
accelerator_for(sss,osites,Nsimd,{
accelerator_for(sss,out.Grid()->oSites(),Nsimd,{
calcSpinor tmp;
multClover(tmp,C_v[sss],phi_v(sss));
coalescedWrite(out_v[sss],tmp);
@ -617,8 +610,7 @@ public:
autoView(triangle_v, triangle, AcceleratorWrite);
// NOTE: this function cannot be 'private' since nvcc forbids this for kernels
int osites=full.Grid()->oSites();
accelerator_for(ss, osites, 1, {
accelerator_for(ss, full.Grid()->oSites(), 1, {
for(int s_row = 0; s_row < Ns; s_row++) {
for(int s_col = 0; s_col < Ns; s_col++) {
if(abs(s_row - s_col) > 1 || s_row + s_col == 3) continue;
@ -658,8 +650,7 @@ public:
autoView(full_v, full, AcceleratorWrite);
// NOTE: this function cannot be 'private' since nvcc forbids this for kernels
int osites=full.Grid()->oSites();
accelerator_for(ss, osites, 1, {
accelerator_for(ss, full.Grid()->oSites(), 1, {
for(int s_row = 0; s_row < Ns; s_row++) {
for(int s_col = 0; s_col < Ns; s_col++) {
if(abs(s_row - s_col) > 1 || s_row + s_col == 3) continue;

View File

@ -110,10 +110,8 @@ public:
autoView( out_v, out, AcceleratorWrite);
autoView( phi_v, phi, AcceleratorRead);
autoView( Umu_v, Umu, AcceleratorRead);
int size=out.Grid()->oSites();
typedef decltype(coalescedRead(out_v[0])) calcSpinor;
accelerator_for(sss,size,Nsimd,{
accelerator_for(sss,out.Grid()->oSites(),Nsimd,{
calcSpinor tmp;
multLink(tmp,Umu_v[sss],phi_v(sss),mu);
coalescedWrite(out_v[sss],tmp);
@ -205,8 +203,7 @@ public:
autoView( tmp_v , tmp, AcceleratorWrite);
autoView( Btilde_v , Btilde, AcceleratorRead);
autoView( Atilde_v , Atilde, AcceleratorRead);
int size=tmp.Grid()->oSites();
accelerator_for(sss,size,1,{
accelerator_for(sss,tmp.Grid()->oSites(),1,{
int sU=sss;
for(int s=0;s<Ls;s++){
int sF = s+Ls*sU;
@ -220,8 +217,7 @@ public:
const int Nsimd = SiteSpinor::Nsimd();
autoView( Btilde_v , Btilde, AcceleratorRead);
autoView( Atilde_v , Atilde, AcceleratorRead);
int size=mat.Grid()->oSites();
accelerator_for(sss,size,Nsimd,{
accelerator_for(sss,mat.Grid()->oSites(),Nsimd,{
int sU=sss;
typedef decltype(coalescedRead(mat_v[sU](mu)() )) ColorMatrixType;
ColorMatrixType sum;

View File

@ -88,8 +88,7 @@ public:
static inline void AddLink(Field &U, LinkField &W, int mu) { // U[mu] += W
autoView(U_v,U,AcceleratorWrite);
autoView(W_v,W,AcceleratorRead);
int size=U.Grid()->oSites();
accelerator_for( ss, size, 1, {
accelerator_for( ss, U.Grid()->oSites(), 1, {
U_v[ss](mu) = U_v[ss](mu) + W_v[ss]();
});
}
@ -139,8 +138,7 @@ public:
//auto start = std::chrono::high_resolution_clock::now();
autoView(U_v,U,AcceleratorWrite);
autoView(P_v,P,AcceleratorRead);
int size=P.Grid()->oSites();
accelerator_for(ss, size,1,{
accelerator_for(ss, P.Grid()->oSites(),1,{
for (int mu = 0; mu < Nd; mu++) {
U_v[ss](mu) = Exponentiate(P_v[ss](mu), ep, Nexp) * U_v[ss](mu);
U_v[ss](mu) = Group::ProjectOnGeneralGroup(U_v[ss](mu));