mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-09 23:45:36 +00:00
Revert back to Grid develop version since new LLVM compilers now do not require static loop count variables.
This commit is contained in:
parent
cc5ab624a2
commit
1381dbc8ef
@ -120,8 +120,8 @@ public:
|
||||
int DimRep = Impl::Dimension;
|
||||
|
||||
autoView(in_v, in, AcceleratorWrite);
|
||||
int osites=in.Grid()->oSites();
|
||||
accelerator_for(ss, osites, 1, {
|
||||
|
||||
accelerator_for(ss, in.Grid()->oSites(), 1, {
|
||||
for (int sa=0; sa<Ns; sa++)
|
||||
for (int ca=0; ca<DimRep; ca++)
|
||||
in_v[ss]()(sa,sa)(ca,ca) = c;
|
||||
@ -246,8 +246,8 @@ public:
|
||||
int DimRep = Impl::Dimension;
|
||||
|
||||
autoView(in_v, in, AcceleratorWrite);
|
||||
int osites=in.Grid()->oSites();
|
||||
accelerator_for(ss, osites, 1, {
|
||||
|
||||
accelerator_for(ss, in.Grid()->oSites(), 1, {
|
||||
for (int sa=0; sa<Ns; sa++)
|
||||
for (int ca=0; ca<DimRep; ca++)
|
||||
in_v[ss]()(sa,sa)(ca,ca) = c;
|
||||
|
@ -85,8 +85,7 @@ public:
|
||||
T = Zero();
|
||||
autoView(T_v,T,AcceleratorWrite);
|
||||
autoView(F_v,F,AcceleratorRead);
|
||||
int osites=T.Grid()->oSites();
|
||||
accelerator_for(i, osites,CloverField::vector_type::Nsimd(),
|
||||
accelerator_for(i, T.Grid()->oSites(),CloverField::vector_type::Nsimd(),
|
||||
{
|
||||
coalescedWrite(T_v[i]()(0, 1), coalescedRead(timesMinusI(F_v[i]()())));
|
||||
coalescedWrite(T_v[i]()(1, 0), coalescedRead(timesMinusI(F_v[i]()())));
|
||||
@ -104,8 +103,7 @@ public:
|
||||
|
||||
autoView(T_v, T,AcceleratorWrite);
|
||||
autoView(F_v, F,AcceleratorRead);
|
||||
int osites=T.Grid()->oSites();
|
||||
accelerator_for(i, osites,CloverField::vector_type::Nsimd(),
|
||||
accelerator_for(i, T.Grid()->oSites(),CloverField::vector_type::Nsimd(),
|
||||
{
|
||||
coalescedWrite(T_v[i]()(0, 1), coalescedRead(-F_v[i]()()));
|
||||
coalescedWrite(T_v[i]()(1, 0), coalescedRead(F_v[i]()()));
|
||||
@ -123,8 +121,7 @@ public:
|
||||
|
||||
autoView(T_v,T,AcceleratorWrite);
|
||||
autoView(F_v,F,AcceleratorRead);
|
||||
int osites=T.Grid()->oSites();
|
||||
accelerator_for(i, osites,CloverField::vector_type::Nsimd(),
|
||||
accelerator_for(i, T.Grid()->oSites(),CloverField::vector_type::Nsimd(),
|
||||
{
|
||||
coalescedWrite(T_v[i]()(0, 0), coalescedRead(timesMinusI(F_v[i]()())));
|
||||
coalescedWrite(T_v[i]()(1, 1), coalescedRead(timesI(F_v[i]()())));
|
||||
@ -142,8 +139,7 @@ public:
|
||||
|
||||
autoView( T_v , T, AcceleratorWrite);
|
||||
autoView( F_v , F, AcceleratorRead);
|
||||
int osites=T.Grid()->oSites();
|
||||
accelerator_for(i, osites,CloverField::vector_type::Nsimd(),
|
||||
accelerator_for(i, T.Grid()->oSites(),CloverField::vector_type::Nsimd(),
|
||||
{
|
||||
coalescedWrite(T_v[i]()(0, 1), coalescedRead(timesI(F_v[i]()())));
|
||||
coalescedWrite(T_v[i]()(1, 0), coalescedRead(timesI(F_v[i]()())));
|
||||
@ -161,8 +157,7 @@ public:
|
||||
|
||||
autoView( T_v ,T,AcceleratorWrite);
|
||||
autoView( F_v ,F,AcceleratorRead);
|
||||
int osites=T.Grid()->oSites();
|
||||
accelerator_for(i, osites,CloverField::vector_type::Nsimd(),
|
||||
accelerator_for(i, T.Grid()->oSites(),CloverField::vector_type::Nsimd(),
|
||||
{
|
||||
coalescedWrite(T_v[i]()(0, 1), coalescedRead(-(F_v[i]()())));
|
||||
coalescedWrite(T_v[i]()(1, 0), coalescedRead((F_v[i]()())));
|
||||
@ -181,8 +176,7 @@ public:
|
||||
|
||||
autoView( T_v , T,AcceleratorWrite);
|
||||
autoView( F_v , F,AcceleratorRead);
|
||||
int osites=T.Grid()->oSites();
|
||||
accelerator_for(i, osites,CloverField::vector_type::Nsimd(),
|
||||
accelerator_for(i, T.Grid()->oSites(),CloverField::vector_type::Nsimd(),
|
||||
{
|
||||
coalescedWrite(T_v[i]()(0, 0), coalescedRead(timesI(F_v[i]()())));
|
||||
coalescedWrite(T_v[i]()(1, 1), coalescedRead(timesMinusI(F_v[i]()())));
|
||||
@ -206,8 +200,7 @@ public:
|
||||
autoView(phi_v, phi, AcceleratorRead);
|
||||
autoView(C_v, C, AcceleratorRead);
|
||||
typedef decltype(coalescedRead(out_v[0])) calcSpinor;
|
||||
int osites=out.Grid()->oSites();
|
||||
accelerator_for(sss,osites,Nsimd,{
|
||||
accelerator_for(sss,out.Grid()->oSites(),Nsimd,{
|
||||
calcSpinor tmp;
|
||||
multClover(tmp,C_v[sss],phi_v(sss));
|
||||
coalescedWrite(out_v[sss],tmp);
|
||||
@ -617,8 +610,7 @@ public:
|
||||
autoView(triangle_v, triangle, AcceleratorWrite);
|
||||
|
||||
// NOTE: this function cannot be 'private' since nvcc forbids this for kernels
|
||||
int osites=full.Grid()->oSites();
|
||||
accelerator_for(ss, osites, 1, {
|
||||
accelerator_for(ss, full.Grid()->oSites(), 1, {
|
||||
for(int s_row = 0; s_row < Ns; s_row++) {
|
||||
for(int s_col = 0; s_col < Ns; s_col++) {
|
||||
if(abs(s_row - s_col) > 1 || s_row + s_col == 3) continue;
|
||||
@ -658,8 +650,7 @@ public:
|
||||
autoView(full_v, full, AcceleratorWrite);
|
||||
|
||||
// NOTE: this function cannot be 'private' since nvcc forbids this for kernels
|
||||
int osites=full.Grid()->oSites();
|
||||
accelerator_for(ss, osites, 1, {
|
||||
accelerator_for(ss, full.Grid()->oSites(), 1, {
|
||||
for(int s_row = 0; s_row < Ns; s_row++) {
|
||||
for(int s_col = 0; s_col < Ns; s_col++) {
|
||||
if(abs(s_row - s_col) > 1 || s_row + s_col == 3) continue;
|
||||
|
@ -110,10 +110,8 @@ public:
|
||||
autoView( out_v, out, AcceleratorWrite);
|
||||
autoView( phi_v, phi, AcceleratorRead);
|
||||
autoView( Umu_v, Umu, AcceleratorRead);
|
||||
int size=out.Grid()->oSites();
|
||||
|
||||
typedef decltype(coalescedRead(out_v[0])) calcSpinor;
|
||||
accelerator_for(sss,size,Nsimd,{
|
||||
accelerator_for(sss,out.Grid()->oSites(),Nsimd,{
|
||||
calcSpinor tmp;
|
||||
multLink(tmp,Umu_v[sss],phi_v(sss),mu);
|
||||
coalescedWrite(out_v[sss],tmp);
|
||||
@ -205,8 +203,7 @@ public:
|
||||
autoView( tmp_v , tmp, AcceleratorWrite);
|
||||
autoView( Btilde_v , Btilde, AcceleratorRead);
|
||||
autoView( Atilde_v , Atilde, AcceleratorRead);
|
||||
int size=tmp.Grid()->oSites();
|
||||
accelerator_for(sss,size,1,{
|
||||
accelerator_for(sss,tmp.Grid()->oSites(),1,{
|
||||
int sU=sss;
|
||||
for(int s=0;s<Ls;s++){
|
||||
int sF = s+Ls*sU;
|
||||
@ -220,8 +217,7 @@ public:
|
||||
const int Nsimd = SiteSpinor::Nsimd();
|
||||
autoView( Btilde_v , Btilde, AcceleratorRead);
|
||||
autoView( Atilde_v , Atilde, AcceleratorRead);
|
||||
int size=mat.Grid()->oSites();
|
||||
accelerator_for(sss,size,Nsimd,{
|
||||
accelerator_for(sss,mat.Grid()->oSites(),Nsimd,{
|
||||
int sU=sss;
|
||||
typedef decltype(coalescedRead(mat_v[sU](mu)() )) ColorMatrixType;
|
||||
ColorMatrixType sum;
|
||||
|
@ -88,8 +88,7 @@ public:
|
||||
static inline void AddLink(Field &U, LinkField &W, int mu) { // U[mu] += W
|
||||
autoView(U_v,U,AcceleratorWrite);
|
||||
autoView(W_v,W,AcceleratorRead);
|
||||
int size=U.Grid()->oSites();
|
||||
accelerator_for( ss, size, 1, {
|
||||
accelerator_for( ss, U.Grid()->oSites(), 1, {
|
||||
U_v[ss](mu) = U_v[ss](mu) + W_v[ss]();
|
||||
});
|
||||
}
|
||||
@ -139,8 +138,7 @@ public:
|
||||
//auto start = std::chrono::high_resolution_clock::now();
|
||||
autoView(U_v,U,AcceleratorWrite);
|
||||
autoView(P_v,P,AcceleratorRead);
|
||||
int size=P.Grid()->oSites();
|
||||
accelerator_for(ss, size,1,{
|
||||
accelerator_for(ss, P.Grid()->oSites(),1,{
|
||||
for (int mu = 0; mu < Nd; mu++) {
|
||||
U_v[ss](mu) = Exponentiate(P_v[ss](mu), ep, Nexp) * U_v[ss](mu);
|
||||
U_v[ss](mu) = Group::ProjectOnGeneralGroup(U_v[ss](mu));
|
||||
|
Loading…
Reference in New Issue
Block a user