From 90ea7dfa993a42de1d8e3d64b1e33870066100e0 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Wed, 19 Aug 2020 22:40:44 +0200 Subject: [PATCH] Accelerator loops for device resident comms buf --- tests/Test_stencil.cc | 94 +++++++++++++++++++++++-------------------- 1 file changed, 51 insertions(+), 43 deletions(-) diff --git a/tests/Test_stencil.cc b/tests/Test_stencil.cc index c0c12c4c..93402a1c 100644 --- a/tests/Test_stencil.cc +++ b/tests/Test_stencil.cc @@ -103,24 +103,26 @@ int main(int argc, char ** argv) { Bar = Cshift(Foo,dir,disp); // Implement a stencil code that should agree with cshift! - for(int i=0;ioSites();i++){ + { + autoView( check , Check, AcceleratorWrite); + autoView( foo , Foo, AcceleratorRead); + autoView(st_v ,myStencil,AcceleratorRead); + auto CBp=myStencil.CommBuf(); + accelerator_for(i,Check.Grid()->oSites(), 1, { - int permute_type; - StencilEntry *SE; - SE = myStencil.GetEntry(permute_type,0,i); - - autoView( check , Check, CpuWrite); - autoView( foo , Foo, CpuRead); - if ( SE->_is_local && SE->_permute ) - permute(check[i],foo[SE->_offset],permute_type); - else if (SE->_is_local) - check[i] = foo[SE->_offset]; - else { - check[i] = myStencil.CommBuf()[SE->_offset]; // <-- this is illegal on most GPU setups, host accesses cudaMalloc memory - // std::cout << " receive "<_is_local && SE->_permute ) + permute(check[i],foo[SE->_offset],permute_type); + else if (SE->_is_local) + check[i] = foo[SE->_offset]; + else { + check[i] = CBp[SE->_offset]; + } + }); + } Real nrmC = norm2(Check); Real nrmB = norm2(Bar); @@ -204,36 +206,42 @@ int main(int argc, char ** argv) { // Implement a stencil code that should agree with that darn cshift! EStencil.HaloExchange(EFoo,compress); - for(int i=0;ioSites();i++){ - int permute_type; - StencilEntry *SE; - SE = EStencil.GetEntry(permute_type,0,i); - // std::cout << "Even source "<< i<<" -> " <_offset << " "<< SE->_is_local<oSites(),1,{ + int permute_type; + StencilEntry *SE; + SE = Est.GetEntry(permute_type,0,i); - autoView( ocheck , OCheck, CpuWrite); - autoView( efoo , EFoo, CpuRead); - if ( SE->_is_local && SE->_permute ) - permute(ocheck[i],efoo[SE->_offset],permute_type); - else if (SE->_is_local) - ocheck[i] = efoo[SE->_offset]; - else - ocheck[i] = EStencil.CommBuf()[SE->_offset]; + if ( SE->_is_local && SE->_permute ) + permute(ocheck[i],efoo[SE->_offset],permute_type); + else if (SE->_is_local) + ocheck[i] = efoo[SE->_offset]; + else + ocheck[i] = ECBp[SE->_offset]; + }); } OStencil.HaloExchange(OFoo,compress); - for(int i=0;ioSites();i++){ - int permute_type; - StencilEntry *SE; - SE = OStencil.GetEntry(permute_type,0,i); - // std::cout << "ODD source "<< i<<" -> " <_offset << " "<< SE->_is_local<oSites(),1,{ + int permute_type; + StencilEntry *SE; + SE = Ost.GetEntry(permute_type,0,i); - autoView( echeck , ECheck, CpuWrite); - autoView( ofoo , OFoo, CpuRead); - if ( SE->_is_local && SE->_permute ) - permute(echeck[i],ofoo[SE->_offset],permute_type); - else if (SE->_is_local) - echeck[i] = ofoo[SE->_offset]; - else - echeck[i] = OStencil.CommBuf()[SE->_offset]; + if ( SE->_is_local && SE->_permute ) + permute(echeck[i],ofoo[SE->_offset],permute_type); + else if (SE->_is_local) + echeck[i] = ofoo[SE->_offset]; + else + echeck[i] = OCBp[SE->_offset]; + }); } setCheckerboard(Check,ECheck);