1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-04-04 19:25:56 +01:00

Accelerator loops for device resident comms buf

This commit is contained in:
Peter Boyle 2020-08-19 22:40:44 +02:00
parent f866d7c33e
commit 90ea7dfa99

View File

@ -103,24 +103,26 @@ int main(int argc, char ** argv) {
Bar = Cshift(Foo,dir,disp); Bar = Cshift(Foo,dir,disp);
// Implement a stencil code that should agree with cshift! // Implement a stencil code that should agree with cshift!
for(int i=0;i<Check.Grid()->oSites();i++){ {
autoView( check , Check, AcceleratorWrite);
autoView( foo , Foo, AcceleratorRead);
autoView(st_v ,myStencil,AcceleratorRead);
auto CBp=myStencil.CommBuf();
accelerator_for(i,Check.Grid()->oSites(), 1, {
int permute_type; int permute_type;
StencilEntry *SE; StencilEntry *SE;
SE = myStencil.GetEntry(permute_type,0,i); SE = st_v.GetEntry(permute_type,0,i);
autoView( check , Check, CpuWrite); if ( SE->_is_local && SE->_permute )
autoView( foo , Foo, CpuRead); permute(check[i],foo[SE->_offset],permute_type);
if ( SE->_is_local && SE->_permute ) else if (SE->_is_local)
permute(check[i],foo[SE->_offset],permute_type); check[i] = foo[SE->_offset];
else if (SE->_is_local) else {
check[i] = foo[SE->_offset]; check[i] = CBp[SE->_offset];
else { }
check[i] = myStencil.CommBuf()[SE->_offset]; // <-- this is illegal on most GPU setups, host accesses cudaMalloc memory });
// std::cout << " receive "<<i<<" " << Check[i]<<std::endl; }
// std::cout << " Foo "<<i<<" " << Foo[i]<<std::endl;
}
}
Real nrmC = norm2(Check); Real nrmC = norm2(Check);
Real nrmB = norm2(Bar); Real nrmB = norm2(Bar);
@ -204,36 +206,42 @@ int main(int argc, char ** argv) {
// Implement a stencil code that should agree with that darn cshift! // Implement a stencil code that should agree with that darn cshift!
EStencil.HaloExchange(EFoo,compress); EStencil.HaloExchange(EFoo,compress);
for(int i=0;i<OCheck.Grid()->oSites();i++){ {
int permute_type; autoView( ocheck , OCheck, AcceleratorWrite);
StencilEntry *SE; autoView( efoo , EFoo, AcceleratorRead);
SE = EStencil.GetEntry(permute_type,0,i); autoView( Est , EStencil,AcceleratorRead);
// std::cout << "Even source "<< i<<" -> " <<SE->_offset << " "<< SE->_is_local<<std::endl; auto ECBp = EStencil.CommBuf();
accelerator_for(i,OCheck.Grid()->oSites(),1,{
int permute_type;
StencilEntry *SE;
SE = Est.GetEntry(permute_type,0,i);
autoView( ocheck , OCheck, CpuWrite); if ( SE->_is_local && SE->_permute )
autoView( efoo , EFoo, CpuRead); permute(ocheck[i],efoo[SE->_offset],permute_type);
if ( SE->_is_local && SE->_permute ) else if (SE->_is_local)
permute(ocheck[i],efoo[SE->_offset],permute_type); ocheck[i] = efoo[SE->_offset];
else if (SE->_is_local) else
ocheck[i] = efoo[SE->_offset]; ocheck[i] = ECBp[SE->_offset];
else });
ocheck[i] = EStencil.CommBuf()[SE->_offset];
} }
OStencil.HaloExchange(OFoo,compress); OStencil.HaloExchange(OFoo,compress);
for(int i=0;i<ECheck.Grid()->oSites();i++){ {
int permute_type; autoView( echeck , ECheck, AcceleratorWrite);
StencilEntry *SE; autoView( ofoo , OFoo, AcceleratorRead);
SE = OStencil.GetEntry(permute_type,0,i); autoView( Ost , OStencil,AcceleratorRead);
// std::cout << "ODD source "<< i<<" -> " <<SE->_offset << " "<< SE->_is_local<<std::endl; auto OCBp = OStencil.CommBuf();
accelerator_for(i,ECheck.Grid()->oSites(),1,{
int permute_type;
StencilEntry *SE;
SE = Ost.GetEntry(permute_type,0,i);
autoView( echeck , ECheck, CpuWrite); if ( SE->_is_local && SE->_permute )
autoView( ofoo , OFoo, CpuRead); permute(echeck[i],ofoo[SE->_offset],permute_type);
if ( SE->_is_local && SE->_permute ) else if (SE->_is_local)
permute(echeck[i],ofoo[SE->_offset],permute_type); echeck[i] = ofoo[SE->_offset];
else if (SE->_is_local) else
echeck[i] = ofoo[SE->_offset]; echeck[i] = OCBp[SE->_offset];
else });
echeck[i] = OStencil.CommBuf()[SE->_offset];
} }
setCheckerboard(Check,ECheck); setCheckerboard(Check,ECheck);