1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-09-19 16:55:37 +01:00

accelerator_for is broken

This commit is contained in:
david clarke 2024-02-23 15:58:33 -07:00
parent 88b52cc045
commit 94581e3c7a
2 changed files with 33 additions and 20 deletions

View File

@ -173,8 +173,8 @@ public:
int Nsites = U_v.size();
accelerator_for(site,Nsites,Simd::Nsimd(),{ // ----------- 3-link constructs
// for(int site=0;site<Nsites;site++){ // ----------- 3-link constructs
// accelerator_for(site,Nsites,Simd::Nsimd(),{ // ----------- 3-link constructs
for(int site=0;site<Nsites;site++){ // ----------- 3-link constructs
for(int nu=0;nu<Nd;nu++) {
if(nu==mu) continue;
int s = stencilIndex(mu,nu);
@ -203,7 +203,7 @@ public:
W = U2*U1*adj(U0) + adj(U5)*U4*U3;
// Save 3-link construct for later and add to smeared field.
U_3link_v[x](nu) = W;
coalescedWrite(U_3link_v[x](nu), W);
// The index operator (x) returns the coalesced read on GPU. The view [] index returns
// a reference to the vector object. The [x](mu) returns a reference to the densely
@ -212,10 +212,10 @@ public:
// But on GPU it's non-trivial and maps scalar object to vector object and vice versa.
coalescedWrite(U_fat_v[x](mu), U_fat_v(x)(mu) + lt.c_3*W);
}
})
}//)
accelerator_for(site,Nsites,Simd::Nsimd(),{ // ----------- 5-link
// for(int site=0;site<Nsites;site++){ // ----------- 5-link
// accelerator_for(site,Nsites,Simd::Nsimd(),{ // ----------- 5-link
for(int site=0;site<Nsites;site++){ // ----------- 5-link
int sigmaIndex = 0;
for(int nu=0;nu<Nd;nu++) {
if(nu==mu) continue;
@ -239,19 +239,19 @@ public:
W = U2*U1*adj(U0) + adj(U5)*U4*U3;
if(sigmaIndex<3) {
U_5linkA_v[x](rho) = W;
coalescedWrite(U_5linkA_v[x](rho), W);
} else {
U_5linkB_v[x](rho) = W;
coalescedWrite(U_5linkB_v[x](rho), W);
}
coalescedWrite(U_fat_v[x](mu), U_fat_v(x)(mu) + lt.c_5*W);
sigmaIndex++;
}
}
})
}//)
accelerator_for(site,Nsites,Simd::Nsimd(),{ // ----------- 7-link
// for(int site=0;site<Nsites;site++){ // ----------- 7-link
// accelerator_for(site,Nsites,Simd::Nsimd(),{ // ----------- 7-link
for(int site=0;site<Nsites;site++){ // ----------- 7-link
int sigmaIndex = 0;
for(int nu=0;nu<Nd;nu++) {
if(nu==mu) continue;
@ -286,7 +286,7 @@ public:
sigmaIndex++;
}
}
})
}//)
} // end mu loop

View File

@ -52,30 +52,36 @@ struct ConfParameters: Serializable {
};
void testSmear(GridCartesian& GRID, LatticeGaugeFieldD Umu, LatticeGaugeFieldD Usmr, LatticeGaugeFieldD Unaik,
bool testSmear(GridCartesian& GRID, LatticeGaugeFieldD Umu, LatticeGaugeFieldD Usmr, LatticeGaugeFieldD Unaik,
LatticeGaugeFieldD Ucontrol, Real c1, Real cnaik, Real c3, Real c5, Real c7, Real clp) {
Smear_HISQ<PeriodicGimplD> hisq_fat(&GRID,c1,cnaik,c3,c5,c7,clp);
LatticeGaugeFieldD diff(&GRID), Uproj(&GRID);
hisq_fat.smear(Usmr, Unaik, Umu);
bool result;
if (cnaik < 1e-30) { // Testing anything but Naik term
diff = Ucontrol-Usmr;
auto absDiff = norm2(diff)/norm2(Ucontrol);
if (absDiff < 1e-30) {
Grid_pass(" |Umu-Usmr|/|Umu| = ",absDiff);
result = true;
} else {
Grid_error(" |Umu-Usmr|/|Umu| = ",absDiff);
result = false;
}
} else { // Testing Naik specifically
diff = Ucontrol-Unaik;
auto absDiff = norm2(diff)/norm2(Ucontrol);
if (absDiff < 1e-30) {
Grid_pass(" |Umu-Unaik|/|Umu| = ",absDiff);
result = true;
} else {
Grid_error(" |Umu-Unaik|/|Umu| = ",absDiff);
result = false;
}
hisq_fat.projectU3(Uproj,Ucontrol);
hisq_fat.projectU3(Uproj,Ucontrol);
// NerscIO::writeConfiguration(Unaik,"nersc.l8t4b3360.naik");
}
return result;
}
@ -110,23 +116,30 @@ int main (int argc, char** argv) {
FieldMetaData header;
NerscIO::readConfiguration(Umu, header, conf_in);
bool pass=true;
// Carry out various tests
NerscIO::readConfiguration(Ucontrol, header, "nersc.l8t4b3360.357lplink.control");
testSmear(GRID,Umu,Usmr,Unaik,Ucontrol,1/8.,0.,1/16.,1/64.,1/384.,-1/8.);
pass *= testSmear(GRID,Umu,Usmr,Unaik,Ucontrol,1/8.,0.,1/16.,1/64.,1/384.,-1/8.);
NerscIO::readConfiguration(Ucontrol, header, "nersc.l8t4b3360.357link.control");
testSmear(GRID,Umu,Usmr,Unaik,Ucontrol,1/8.,0.,1/16.,1/64.,1/384.,0.);
pass *= testSmear(GRID,Umu,Usmr,Unaik,Ucontrol,1/8.,0.,1/16.,1/64.,1/384.,0.);
NerscIO::readConfiguration(Ucontrol, header, "nersc.l8t4b3360.35link.control");
testSmear(GRID,Umu,Usmr,Unaik,Ucontrol,1/8.,0.,1/16.,1/64.,0.,0.);
pass *= testSmear(GRID,Umu,Usmr,Unaik,Ucontrol,1/8.,0.,1/16.,1/64.,0.,0.);
NerscIO::readConfiguration(Ucontrol, header, "nersc.l8t4b3360.3link.control");
testSmear(GRID,Umu,Usmr,Unaik,Ucontrol,1/8.,0.,1/16.,0.,0.,0.);
pass *= testSmear(GRID,Umu,Usmr,Unaik,Ucontrol,1/8.,0.,1/16.,0.,0.,0.);
NerscIO::readConfiguration(Ucontrol, header, "nersc.l8t4b3360.naik.control");
testSmear(GRID,Umu,Usmr,Unaik,Ucontrol,0.,0.8675309,0.,0.,0.,0.);
pass *= testSmear(GRID,Umu,Usmr,Unaik,Ucontrol,0.,0.8675309,0.,0.,0.,0.);
if(pass){
Grid_pass("All tests passed.");
} else {
Grid_error("At least one test failed.");
}
// Test a C-style instantiation
double path_coeff[6] = {1, 2, 3, 4, 5, 6};
Smear_HISQ<PeriodicGimplD> hisq_fat_Cstyle(&GRID,path_coeff);
if (param.benchmark) {
autoView(U_v, Umu, CpuRead); // Gauge accessor