mirror of
https://github.com/paboyle/Grid.git
synced 2025-04-10 06:00:45 +01:00
accelerator_for is broken
This commit is contained in:
parent
88b52cc045
commit
94581e3c7a
@ -173,8 +173,8 @@ public:
|
|||||||
|
|
||||||
int Nsites = U_v.size();
|
int Nsites = U_v.size();
|
||||||
|
|
||||||
accelerator_for(site,Nsites,Simd::Nsimd(),{ // ----------- 3-link constructs
|
// accelerator_for(site,Nsites,Simd::Nsimd(),{ // ----------- 3-link constructs
|
||||||
// for(int site=0;site<Nsites;site++){ // ----------- 3-link constructs
|
for(int site=0;site<Nsites;site++){ // ----------- 3-link constructs
|
||||||
for(int nu=0;nu<Nd;nu++) {
|
for(int nu=0;nu<Nd;nu++) {
|
||||||
if(nu==mu) continue;
|
if(nu==mu) continue;
|
||||||
int s = stencilIndex(mu,nu);
|
int s = stencilIndex(mu,nu);
|
||||||
@ -203,7 +203,7 @@ public:
|
|||||||
W = U2*U1*adj(U0) + adj(U5)*U4*U3;
|
W = U2*U1*adj(U0) + adj(U5)*U4*U3;
|
||||||
|
|
||||||
// Save 3-link construct for later and add to smeared field.
|
// Save 3-link construct for later and add to smeared field.
|
||||||
U_3link_v[x](nu) = W;
|
coalescedWrite(U_3link_v[x](nu), W);
|
||||||
|
|
||||||
// The index operator (x) returns the coalesced read on GPU. The view [] index returns
|
// The index operator (x) returns the coalesced read on GPU. The view [] index returns
|
||||||
// a reference to the vector object. The [x](mu) returns a reference to the densely
|
// a reference to the vector object. The [x](mu) returns a reference to the densely
|
||||||
@ -212,10 +212,10 @@ public:
|
|||||||
// But on GPU it's non-trivial and maps scalar object to vector object and vice versa.
|
// But on GPU it's non-trivial and maps scalar object to vector object and vice versa.
|
||||||
coalescedWrite(U_fat_v[x](mu), U_fat_v(x)(mu) + lt.c_3*W);
|
coalescedWrite(U_fat_v[x](mu), U_fat_v(x)(mu) + lt.c_3*W);
|
||||||
}
|
}
|
||||||
})
|
}//)
|
||||||
|
|
||||||
accelerator_for(site,Nsites,Simd::Nsimd(),{ // ----------- 5-link
|
// accelerator_for(site,Nsites,Simd::Nsimd(),{ // ----------- 5-link
|
||||||
// for(int site=0;site<Nsites;site++){ // ----------- 5-link
|
for(int site=0;site<Nsites;site++){ // ----------- 5-link
|
||||||
int sigmaIndex = 0;
|
int sigmaIndex = 0;
|
||||||
for(int nu=0;nu<Nd;nu++) {
|
for(int nu=0;nu<Nd;nu++) {
|
||||||
if(nu==mu) continue;
|
if(nu==mu) continue;
|
||||||
@ -239,19 +239,19 @@ public:
|
|||||||
W = U2*U1*adj(U0) + adj(U5)*U4*U3;
|
W = U2*U1*adj(U0) + adj(U5)*U4*U3;
|
||||||
|
|
||||||
if(sigmaIndex<3) {
|
if(sigmaIndex<3) {
|
||||||
U_5linkA_v[x](rho) = W;
|
coalescedWrite(U_5linkA_v[x](rho), W);
|
||||||
} else {
|
} else {
|
||||||
U_5linkB_v[x](rho) = W;
|
coalescedWrite(U_5linkB_v[x](rho), W);
|
||||||
}
|
}
|
||||||
|
|
||||||
coalescedWrite(U_fat_v[x](mu), U_fat_v(x)(mu) + lt.c_5*W);
|
coalescedWrite(U_fat_v[x](mu), U_fat_v(x)(mu) + lt.c_5*W);
|
||||||
sigmaIndex++;
|
sigmaIndex++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
})
|
}//)
|
||||||
|
|
||||||
accelerator_for(site,Nsites,Simd::Nsimd(),{ // ----------- 7-link
|
// accelerator_for(site,Nsites,Simd::Nsimd(),{ // ----------- 7-link
|
||||||
// for(int site=0;site<Nsites;site++){ // ----------- 7-link
|
for(int site=0;site<Nsites;site++){ // ----------- 7-link
|
||||||
int sigmaIndex = 0;
|
int sigmaIndex = 0;
|
||||||
for(int nu=0;nu<Nd;nu++) {
|
for(int nu=0;nu<Nd;nu++) {
|
||||||
if(nu==mu) continue;
|
if(nu==mu) continue;
|
||||||
@ -286,7 +286,7 @@ public:
|
|||||||
sigmaIndex++;
|
sigmaIndex++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
})
|
}//)
|
||||||
|
|
||||||
} // end mu loop
|
} // end mu loop
|
||||||
|
|
||||||
|
@ -52,30 +52,36 @@ struct ConfParameters: Serializable {
|
|||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
void testSmear(GridCartesian& GRID, LatticeGaugeFieldD Umu, LatticeGaugeFieldD Usmr, LatticeGaugeFieldD Unaik,
|
bool testSmear(GridCartesian& GRID, LatticeGaugeFieldD Umu, LatticeGaugeFieldD Usmr, LatticeGaugeFieldD Unaik,
|
||||||
LatticeGaugeFieldD Ucontrol, Real c1, Real cnaik, Real c3, Real c5, Real c7, Real clp) {
|
LatticeGaugeFieldD Ucontrol, Real c1, Real cnaik, Real c3, Real c5, Real c7, Real clp) {
|
||||||
Smear_HISQ<PeriodicGimplD> hisq_fat(&GRID,c1,cnaik,c3,c5,c7,clp);
|
Smear_HISQ<PeriodicGimplD> hisq_fat(&GRID,c1,cnaik,c3,c5,c7,clp);
|
||||||
LatticeGaugeFieldD diff(&GRID), Uproj(&GRID);
|
LatticeGaugeFieldD diff(&GRID), Uproj(&GRID);
|
||||||
hisq_fat.smear(Usmr, Unaik, Umu);
|
hisq_fat.smear(Usmr, Unaik, Umu);
|
||||||
|
bool result;
|
||||||
if (cnaik < 1e-30) { // Testing anything but Naik term
|
if (cnaik < 1e-30) { // Testing anything but Naik term
|
||||||
diff = Ucontrol-Usmr;
|
diff = Ucontrol-Usmr;
|
||||||
auto absDiff = norm2(diff)/norm2(Ucontrol);
|
auto absDiff = norm2(diff)/norm2(Ucontrol);
|
||||||
if (absDiff < 1e-30) {
|
if (absDiff < 1e-30) {
|
||||||
Grid_pass(" |Umu-Usmr|/|Umu| = ",absDiff);
|
Grid_pass(" |Umu-Usmr|/|Umu| = ",absDiff);
|
||||||
|
result = true;
|
||||||
} else {
|
} else {
|
||||||
Grid_error(" |Umu-Usmr|/|Umu| = ",absDiff);
|
Grid_error(" |Umu-Usmr|/|Umu| = ",absDiff);
|
||||||
|
result = false;
|
||||||
}
|
}
|
||||||
} else { // Testing Naik specifically
|
} else { // Testing Naik specifically
|
||||||
diff = Ucontrol-Unaik;
|
diff = Ucontrol-Unaik;
|
||||||
auto absDiff = norm2(diff)/norm2(Ucontrol);
|
auto absDiff = norm2(diff)/norm2(Ucontrol);
|
||||||
if (absDiff < 1e-30) {
|
if (absDiff < 1e-30) {
|
||||||
Grid_pass(" |Umu-Unaik|/|Umu| = ",absDiff);
|
Grid_pass(" |Umu-Unaik|/|Umu| = ",absDiff);
|
||||||
|
result = true;
|
||||||
} else {
|
} else {
|
||||||
Grid_error(" |Umu-Unaik|/|Umu| = ",absDiff);
|
Grid_error(" |Umu-Unaik|/|Umu| = ",absDiff);
|
||||||
|
result = false;
|
||||||
}
|
}
|
||||||
hisq_fat.projectU3(Uproj,Ucontrol);
|
hisq_fat.projectU3(Uproj,Ucontrol);
|
||||||
// NerscIO::writeConfiguration(Unaik,"nersc.l8t4b3360.naik");
|
// NerscIO::writeConfiguration(Unaik,"nersc.l8t4b3360.naik");
|
||||||
}
|
}
|
||||||
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -110,23 +116,30 @@ int main (int argc, char** argv) {
|
|||||||
FieldMetaData header;
|
FieldMetaData header;
|
||||||
NerscIO::readConfiguration(Umu, header, conf_in);
|
NerscIO::readConfiguration(Umu, header, conf_in);
|
||||||
|
|
||||||
|
bool pass=true;
|
||||||
|
|
||||||
// Carry out various tests
|
// Carry out various tests
|
||||||
NerscIO::readConfiguration(Ucontrol, header, "nersc.l8t4b3360.357lplink.control");
|
NerscIO::readConfiguration(Ucontrol, header, "nersc.l8t4b3360.357lplink.control");
|
||||||
testSmear(GRID,Umu,Usmr,Unaik,Ucontrol,1/8.,0.,1/16.,1/64.,1/384.,-1/8.);
|
pass *= testSmear(GRID,Umu,Usmr,Unaik,Ucontrol,1/8.,0.,1/16.,1/64.,1/384.,-1/8.);
|
||||||
NerscIO::readConfiguration(Ucontrol, header, "nersc.l8t4b3360.357link.control");
|
NerscIO::readConfiguration(Ucontrol, header, "nersc.l8t4b3360.357link.control");
|
||||||
testSmear(GRID,Umu,Usmr,Unaik,Ucontrol,1/8.,0.,1/16.,1/64.,1/384.,0.);
|
pass *= testSmear(GRID,Umu,Usmr,Unaik,Ucontrol,1/8.,0.,1/16.,1/64.,1/384.,0.);
|
||||||
NerscIO::readConfiguration(Ucontrol, header, "nersc.l8t4b3360.35link.control");
|
NerscIO::readConfiguration(Ucontrol, header, "nersc.l8t4b3360.35link.control");
|
||||||
testSmear(GRID,Umu,Usmr,Unaik,Ucontrol,1/8.,0.,1/16.,1/64.,0.,0.);
|
pass *= testSmear(GRID,Umu,Usmr,Unaik,Ucontrol,1/8.,0.,1/16.,1/64.,0.,0.);
|
||||||
NerscIO::readConfiguration(Ucontrol, header, "nersc.l8t4b3360.3link.control");
|
NerscIO::readConfiguration(Ucontrol, header, "nersc.l8t4b3360.3link.control");
|
||||||
testSmear(GRID,Umu,Usmr,Unaik,Ucontrol,1/8.,0.,1/16.,0.,0.,0.);
|
pass *= testSmear(GRID,Umu,Usmr,Unaik,Ucontrol,1/8.,0.,1/16.,0.,0.,0.);
|
||||||
NerscIO::readConfiguration(Ucontrol, header, "nersc.l8t4b3360.naik.control");
|
NerscIO::readConfiguration(Ucontrol, header, "nersc.l8t4b3360.naik.control");
|
||||||
testSmear(GRID,Umu,Usmr,Unaik,Ucontrol,0.,0.8675309,0.,0.,0.,0.);
|
pass *= testSmear(GRID,Umu,Usmr,Unaik,Ucontrol,0.,0.8675309,0.,0.,0.,0.);
|
||||||
|
|
||||||
|
if(pass){
|
||||||
|
Grid_pass("All tests passed.");
|
||||||
|
} else {
|
||||||
|
Grid_error("At least one test failed.");
|
||||||
|
}
|
||||||
|
|
||||||
// Test a C-style instantiation
|
// Test a C-style instantiation
|
||||||
double path_coeff[6] = {1, 2, 3, 4, 5, 6};
|
double path_coeff[6] = {1, 2, 3, 4, 5, 6};
|
||||||
Smear_HISQ<PeriodicGimplD> hisq_fat_Cstyle(&GRID,path_coeff);
|
Smear_HISQ<PeriodicGimplD> hisq_fat_Cstyle(&GRID,path_coeff);
|
||||||
|
|
||||||
|
|
||||||
if (param.benchmark) {
|
if (param.benchmark) {
|
||||||
|
|
||||||
autoView(U_v, Umu, CpuRead); // Gauge accessor
|
autoView(U_v, Umu, CpuRead); // Gauge accessor
|
||||||
|
Loading…
x
Reference in New Issue
Block a user