1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-04-10 06:00:45 +01:00

accelerator_for is broken

This commit is contained in:
david clarke 2024-02-23 15:58:33 -07:00
parent 88b52cc045
commit 94581e3c7a
2 changed files with 33 additions and 20 deletions

View File

@ -173,8 +173,8 @@ public:
int Nsites = U_v.size(); int Nsites = U_v.size();
accelerator_for(site,Nsites,Simd::Nsimd(),{ // ----------- 3-link constructs // accelerator_for(site,Nsites,Simd::Nsimd(),{ // ----------- 3-link constructs
// for(int site=0;site<Nsites;site++){ // ----------- 3-link constructs for(int site=0;site<Nsites;site++){ // ----------- 3-link constructs
for(int nu=0;nu<Nd;nu++) { for(int nu=0;nu<Nd;nu++) {
if(nu==mu) continue; if(nu==mu) continue;
int s = stencilIndex(mu,nu); int s = stencilIndex(mu,nu);
@ -203,7 +203,7 @@ public:
W = U2*U1*adj(U0) + adj(U5)*U4*U3; W = U2*U1*adj(U0) + adj(U5)*U4*U3;
// Save 3-link construct for later and add to smeared field. // Save 3-link construct for later and add to smeared field.
U_3link_v[x](nu) = W; coalescedWrite(U_3link_v[x](nu), W);
// The index operator (x) returns the coalesced read on GPU. The view [] index returns // The index operator (x) returns the coalesced read on GPU. The view [] index returns
// a reference to the vector object. The [x](mu) returns a reference to the densely // a reference to the vector object. The [x](mu) returns a reference to the densely
@ -212,10 +212,10 @@ public:
// But on GPU it's non-trivial and maps scalar object to vector object and vice versa. // But on GPU it's non-trivial and maps scalar object to vector object and vice versa.
coalescedWrite(U_fat_v[x](mu), U_fat_v(x)(mu) + lt.c_3*W); coalescedWrite(U_fat_v[x](mu), U_fat_v(x)(mu) + lt.c_3*W);
} }
}) }//)
accelerator_for(site,Nsites,Simd::Nsimd(),{ // ----------- 5-link // accelerator_for(site,Nsites,Simd::Nsimd(),{ // ----------- 5-link
// for(int site=0;site<Nsites;site++){ // ----------- 5-link for(int site=0;site<Nsites;site++){ // ----------- 5-link
int sigmaIndex = 0; int sigmaIndex = 0;
for(int nu=0;nu<Nd;nu++) { for(int nu=0;nu<Nd;nu++) {
if(nu==mu) continue; if(nu==mu) continue;
@ -239,19 +239,19 @@ public:
W = U2*U1*adj(U0) + adj(U5)*U4*U3; W = U2*U1*adj(U0) + adj(U5)*U4*U3;
if(sigmaIndex<3) { if(sigmaIndex<3) {
U_5linkA_v[x](rho) = W; coalescedWrite(U_5linkA_v[x](rho), W);
} else { } else {
U_5linkB_v[x](rho) = W; coalescedWrite(U_5linkB_v[x](rho), W);
} }
coalescedWrite(U_fat_v[x](mu), U_fat_v(x)(mu) + lt.c_5*W); coalescedWrite(U_fat_v[x](mu), U_fat_v(x)(mu) + lt.c_5*W);
sigmaIndex++; sigmaIndex++;
} }
} }
}) }//)
accelerator_for(site,Nsites,Simd::Nsimd(),{ // ----------- 7-link // accelerator_for(site,Nsites,Simd::Nsimd(),{ // ----------- 7-link
// for(int site=0;site<Nsites;site++){ // ----------- 7-link for(int site=0;site<Nsites;site++){ // ----------- 7-link
int sigmaIndex = 0; int sigmaIndex = 0;
for(int nu=0;nu<Nd;nu++) { for(int nu=0;nu<Nd;nu++) {
if(nu==mu) continue; if(nu==mu) continue;
@ -286,7 +286,7 @@ public:
sigmaIndex++; sigmaIndex++;
} }
} }
}) }//)
} // end mu loop } // end mu loop

View File

@ -52,30 +52,36 @@ struct ConfParameters: Serializable {
}; };
void testSmear(GridCartesian& GRID, LatticeGaugeFieldD Umu, LatticeGaugeFieldD Usmr, LatticeGaugeFieldD Unaik, bool testSmear(GridCartesian& GRID, LatticeGaugeFieldD Umu, LatticeGaugeFieldD Usmr, LatticeGaugeFieldD Unaik,
LatticeGaugeFieldD Ucontrol, Real c1, Real cnaik, Real c3, Real c5, Real c7, Real clp) { LatticeGaugeFieldD Ucontrol, Real c1, Real cnaik, Real c3, Real c5, Real c7, Real clp) {
Smear_HISQ<PeriodicGimplD> hisq_fat(&GRID,c1,cnaik,c3,c5,c7,clp); Smear_HISQ<PeriodicGimplD> hisq_fat(&GRID,c1,cnaik,c3,c5,c7,clp);
LatticeGaugeFieldD diff(&GRID), Uproj(&GRID); LatticeGaugeFieldD diff(&GRID), Uproj(&GRID);
hisq_fat.smear(Usmr, Unaik, Umu); hisq_fat.smear(Usmr, Unaik, Umu);
bool result;
if (cnaik < 1e-30) { // Testing anything but Naik term if (cnaik < 1e-30) { // Testing anything but Naik term
diff = Ucontrol-Usmr; diff = Ucontrol-Usmr;
auto absDiff = norm2(diff)/norm2(Ucontrol); auto absDiff = norm2(diff)/norm2(Ucontrol);
if (absDiff < 1e-30) { if (absDiff < 1e-30) {
Grid_pass(" |Umu-Usmr|/|Umu| = ",absDiff); Grid_pass(" |Umu-Usmr|/|Umu| = ",absDiff);
result = true;
} else { } else {
Grid_error(" |Umu-Usmr|/|Umu| = ",absDiff); Grid_error(" |Umu-Usmr|/|Umu| = ",absDiff);
result = false;
} }
} else { // Testing Naik specifically } else { // Testing Naik specifically
diff = Ucontrol-Unaik; diff = Ucontrol-Unaik;
auto absDiff = norm2(diff)/norm2(Ucontrol); auto absDiff = norm2(diff)/norm2(Ucontrol);
if (absDiff < 1e-30) { if (absDiff < 1e-30) {
Grid_pass(" |Umu-Unaik|/|Umu| = ",absDiff); Grid_pass(" |Umu-Unaik|/|Umu| = ",absDiff);
result = true;
} else { } else {
Grid_error(" |Umu-Unaik|/|Umu| = ",absDiff); Grid_error(" |Umu-Unaik|/|Umu| = ",absDiff);
result = false;
} }
hisq_fat.projectU3(Uproj,Ucontrol); hisq_fat.projectU3(Uproj,Ucontrol);
// NerscIO::writeConfiguration(Unaik,"nersc.l8t4b3360.naik"); // NerscIO::writeConfiguration(Unaik,"nersc.l8t4b3360.naik");
} }
return result;
} }
@ -110,23 +116,30 @@ int main (int argc, char** argv) {
FieldMetaData header; FieldMetaData header;
NerscIO::readConfiguration(Umu, header, conf_in); NerscIO::readConfiguration(Umu, header, conf_in);
bool pass=true;
// Carry out various tests // Carry out various tests
NerscIO::readConfiguration(Ucontrol, header, "nersc.l8t4b3360.357lplink.control"); NerscIO::readConfiguration(Ucontrol, header, "nersc.l8t4b3360.357lplink.control");
testSmear(GRID,Umu,Usmr,Unaik,Ucontrol,1/8.,0.,1/16.,1/64.,1/384.,-1/8.); pass *= testSmear(GRID,Umu,Usmr,Unaik,Ucontrol,1/8.,0.,1/16.,1/64.,1/384.,-1/8.);
NerscIO::readConfiguration(Ucontrol, header, "nersc.l8t4b3360.357link.control"); NerscIO::readConfiguration(Ucontrol, header, "nersc.l8t4b3360.357link.control");
testSmear(GRID,Umu,Usmr,Unaik,Ucontrol,1/8.,0.,1/16.,1/64.,1/384.,0.); pass *= testSmear(GRID,Umu,Usmr,Unaik,Ucontrol,1/8.,0.,1/16.,1/64.,1/384.,0.);
NerscIO::readConfiguration(Ucontrol, header, "nersc.l8t4b3360.35link.control"); NerscIO::readConfiguration(Ucontrol, header, "nersc.l8t4b3360.35link.control");
testSmear(GRID,Umu,Usmr,Unaik,Ucontrol,1/8.,0.,1/16.,1/64.,0.,0.); pass *= testSmear(GRID,Umu,Usmr,Unaik,Ucontrol,1/8.,0.,1/16.,1/64.,0.,0.);
NerscIO::readConfiguration(Ucontrol, header, "nersc.l8t4b3360.3link.control"); NerscIO::readConfiguration(Ucontrol, header, "nersc.l8t4b3360.3link.control");
testSmear(GRID,Umu,Usmr,Unaik,Ucontrol,1/8.,0.,1/16.,0.,0.,0.); pass *= testSmear(GRID,Umu,Usmr,Unaik,Ucontrol,1/8.,0.,1/16.,0.,0.,0.);
NerscIO::readConfiguration(Ucontrol, header, "nersc.l8t4b3360.naik.control"); NerscIO::readConfiguration(Ucontrol, header, "nersc.l8t4b3360.naik.control");
testSmear(GRID,Umu,Usmr,Unaik,Ucontrol,0.,0.8675309,0.,0.,0.,0.); pass *= testSmear(GRID,Umu,Usmr,Unaik,Ucontrol,0.,0.8675309,0.,0.,0.,0.);
if(pass){
Grid_pass("All tests passed.");
} else {
Grid_error("At least one test failed.");
}
// Test a C-style instantiation // Test a C-style instantiation
double path_coeff[6] = {1, 2, 3, 4, 5, 6}; double path_coeff[6] = {1, 2, 3, 4, 5, 6};
Smear_HISQ<PeriodicGimplD> hisq_fat_Cstyle(&GRID,path_coeff); Smear_HISQ<PeriodicGimplD> hisq_fat_Cstyle(&GRID,path_coeff);
if (param.benchmark) { if (param.benchmark) {
autoView(U_v, Umu, CpuRead); // Gauge accessor autoView(U_v, Umu, CpuRead); // Gauge accessor