mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-09 23:45:36 +00:00
fixed race condition (thx michael)
This commit is contained in:
parent
94581e3c7a
commit
b02d022993
@ -168,25 +168,26 @@ public:
|
|||||||
// We infer some types that will be needed in the calculation.
|
// We infer some types that will be needed in the calculation.
|
||||||
typedef decltype(gStencil.GetEntry(0,0)) stencilElement;
|
typedef decltype(gStencil.GetEntry(0,0)) stencilElement;
|
||||||
typedef decltype(coalescedReadGeneralPermute(U_v[0](0),gStencil.GetEntry(0,0)->_permute,Nd)) U3matrix;
|
typedef decltype(coalescedReadGeneralPermute(U_v[0](0),gStencil.GetEntry(0,0)->_permute,Nd)) U3matrix;
|
||||||
stencilElement SE0, SE1, SE2, SE3, SE4, SE5;
|
|
||||||
U3matrix U0, U1, U2, U3, U4, U5, W;
|
|
||||||
|
|
||||||
int Nsites = U_v.size();
|
int Nsites = U_v.size();
|
||||||
|
auto gStencil_v = gStencil.View();
|
||||||
|
|
||||||
// accelerator_for(site,Nsites,Simd::Nsimd(),{ // ----------- 3-link constructs
|
accelerator_for(site,Nsites,Simd::Nsimd(),{ // ----------- 3-link constructs
|
||||||
for(int site=0;site<Nsites;site++){ // ----------- 3-link constructs
|
// for(int site=0;site<Nsites;site++){ // ----------- 3-link constructs
|
||||||
|
stencilElement SE0, SE1, SE2, SE3, SE4, SE5;
|
||||||
|
U3matrix U0, U1, U2, U3, U4, U5, W;
|
||||||
for(int nu=0;nu<Nd;nu++) {
|
for(int nu=0;nu<Nd;nu++) {
|
||||||
if(nu==mu) continue;
|
if(nu==mu) continue;
|
||||||
int s = stencilIndex(mu,nu);
|
int s = stencilIndex(mu,nu);
|
||||||
|
|
||||||
// The stencil gives us support points in the mu-nu plane that we will use to
|
// The stencil gives us support points in the mu-nu plane that we will use to
|
||||||
// grab the links we need.
|
// grab the links we need.
|
||||||
SE0 = gStencil.GetEntry(s+0,site); int x_p_mu = SE0->_offset;
|
SE0 = gStencil_v.GetEntry(s+0,site); int x_p_mu = SE0->_offset;
|
||||||
SE1 = gStencil.GetEntry(s+1,site); int x_p_nu = SE1->_offset;
|
SE1 = gStencil_v.GetEntry(s+1,site); int x_p_nu = SE1->_offset;
|
||||||
SE2 = gStencil.GetEntry(s+2,site); int x = SE2->_offset;
|
SE2 = gStencil_v.GetEntry(s+2,site); int x = SE2->_offset;
|
||||||
SE3 = gStencil.GetEntry(s+3,site); int x_p_mu_m_nu = SE3->_offset;
|
SE3 = gStencil_v.GetEntry(s+3,site); int x_p_mu_m_nu = SE3->_offset;
|
||||||
SE4 = gStencil.GetEntry(s+4,site); int x_m_nu = SE4->_offset;
|
SE4 = gStencil_v.GetEntry(s+4,site); int x_m_nu = SE4->_offset;
|
||||||
SE5 = gStencil.GetEntry(s+5,site); int x_m_mu = SE5->_offset;
|
SE5 = gStencil_v.GetEntry(s+5,site); int x_m_mu = SE5->_offset;
|
||||||
|
|
||||||
// When you're deciding whether to take an adjoint, the question is: how is the
|
// When you're deciding whether to take an adjoint, the question is: how is the
|
||||||
// stored link oriented compared to the one you want? If I imagine myself travelling
|
// stored link oriented compared to the one you want? If I imagine myself travelling
|
||||||
@ -212,10 +213,12 @@ public:
|
|||||||
// But on GPU it's non-trivial and maps scalar object to vector object and vice versa.
|
// But on GPU it's non-trivial and maps scalar object to vector object and vice versa.
|
||||||
coalescedWrite(U_fat_v[x](mu), U_fat_v(x)(mu) + lt.c_3*W);
|
coalescedWrite(U_fat_v[x](mu), U_fat_v(x)(mu) + lt.c_3*W);
|
||||||
}
|
}
|
||||||
}//)
|
})
|
||||||
|
|
||||||
// accelerator_for(site,Nsites,Simd::Nsimd(),{ // ----------- 5-link
|
accelerator_for(site,Nsites,Simd::Nsimd(),{ // ----------- 5-link
|
||||||
for(int site=0;site<Nsites;site++){ // ----------- 5-link
|
// for(int site=0;site<Nsites;site++){ // ----------- 5-link
|
||||||
|
stencilElement SE0, SE1, SE2, SE3, SE4, SE5;
|
||||||
|
U3matrix U0, U1, U2, U3, U4, U5, W;
|
||||||
int sigmaIndex = 0;
|
int sigmaIndex = 0;
|
||||||
for(int nu=0;nu<Nd;nu++) {
|
for(int nu=0;nu<Nd;nu++) {
|
||||||
if(nu==mu) continue;
|
if(nu==mu) continue;
|
||||||
@ -223,11 +226,11 @@ public:
|
|||||||
for(int rho=0;rho<Nd;rho++) {
|
for(int rho=0;rho<Nd;rho++) {
|
||||||
if (rho == mu || rho == nu) continue;
|
if (rho == mu || rho == nu) continue;
|
||||||
|
|
||||||
SE0 = gStencil.GetEntry(s+0,site); int x_p_mu = SE0->_offset;
|
SE0 = gStencil_v.GetEntry(s+0,site); int x_p_mu = SE0->_offset;
|
||||||
SE1 = gStencil.GetEntry(s+1,site); int x_p_nu = SE1->_offset;
|
SE1 = gStencil_v.GetEntry(s+1,site); int x_p_nu = SE1->_offset;
|
||||||
SE2 = gStencil.GetEntry(s+2,site); int x = SE2->_offset;
|
SE2 = gStencil_v.GetEntry(s+2,site); int x = SE2->_offset;
|
||||||
SE3 = gStencil.GetEntry(s+3,site); int x_p_mu_m_nu = SE3->_offset;
|
SE3 = gStencil_v.GetEntry(s+3,site); int x_p_mu_m_nu = SE3->_offset;
|
||||||
SE4 = gStencil.GetEntry(s+4,site); int x_m_nu = SE4->_offset;
|
SE4 = gStencil_v.GetEntry(s+4,site); int x_m_nu = SE4->_offset;
|
||||||
|
|
||||||
U0 = coalescedReadGeneralPermute( U_v[x_p_mu ](nu ),SE0->_permute,Nd);
|
U0 = coalescedReadGeneralPermute( U_v[x_p_mu ](nu ),SE0->_permute,Nd);
|
||||||
U1 = coalescedReadGeneralPermute(U_3link_v[x_p_nu ](rho),SE1->_permute,Nd);
|
U1 = coalescedReadGeneralPermute(U_3link_v[x_p_nu ](rho),SE1->_permute,Nd);
|
||||||
@ -248,10 +251,12 @@ public:
|
|||||||
sigmaIndex++;
|
sigmaIndex++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}//)
|
})
|
||||||
|
|
||||||
// accelerator_for(site,Nsites,Simd::Nsimd(),{ // ----------- 7-link
|
accelerator_for(site,Nsites,Simd::Nsimd(),{ // ----------- 7-link
|
||||||
for(int site=0;site<Nsites;site++){ // ----------- 7-link
|
// for(int site=0;site<Nsites;site++){ // ----------- 7-link
|
||||||
|
stencilElement SE0, SE1, SE2, SE3, SE4, SE5;
|
||||||
|
U3matrix U0, U1, U2, U3, U4, U5, W;
|
||||||
int sigmaIndex = 0;
|
int sigmaIndex = 0;
|
||||||
for(int nu=0;nu<Nd;nu++) {
|
for(int nu=0;nu<Nd;nu++) {
|
||||||
if(nu==mu) continue;
|
if(nu==mu) continue;
|
||||||
@ -259,11 +264,11 @@ public:
|
|||||||
for(int rho=0;rho<Nd;rho++) {
|
for(int rho=0;rho<Nd;rho++) {
|
||||||
if (rho == mu || rho == nu) continue;
|
if (rho == mu || rho == nu) continue;
|
||||||
|
|
||||||
SE0 = gStencil.GetEntry(s+0,site); int x_p_mu = SE0->_offset;
|
SE0 = gStencil_v.GetEntry(s+0,site); int x_p_mu = SE0->_offset;
|
||||||
SE1 = gStencil.GetEntry(s+1,site); int x_p_nu = SE1->_offset;
|
SE1 = gStencil_v.GetEntry(s+1,site); int x_p_nu = SE1->_offset;
|
||||||
SE2 = gStencil.GetEntry(s+2,site); int x = SE2->_offset;
|
SE2 = gStencil_v.GetEntry(s+2,site); int x = SE2->_offset;
|
||||||
SE3 = gStencil.GetEntry(s+3,site); int x_p_mu_m_nu = SE3->_offset;
|
SE3 = gStencil_v.GetEntry(s+3,site); int x_p_mu_m_nu = SE3->_offset;
|
||||||
SE4 = gStencil.GetEntry(s+4,site); int x_m_nu = SE4->_offset;
|
SE4 = gStencil_v.GetEntry(s+4,site); int x_m_nu = SE4->_offset;
|
||||||
|
|
||||||
U0 = coalescedReadGeneralPermute(U_v[x_p_mu](nu),SE0->_permute,Nd);
|
U0 = coalescedReadGeneralPermute(U_v[x_p_mu](nu),SE0->_permute,Nd);
|
||||||
if(sigmaIndex<3) {
|
if(sigmaIndex<3) {
|
||||||
@ -286,7 +291,7 @@ public:
|
|||||||
sigmaIndex++;
|
sigmaIndex++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}//)
|
})
|
||||||
|
|
||||||
} // end mu loop
|
} // end mu loop
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user