1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-11-10 07:55:35 +00:00

Improve meemory benchmarks for GPU (avoid host mem ping pong)

This commit is contained in:
Peter Boyle 2020-10-08 19:51:28 -04:00
parent 6b1486e89b
commit 5f0fe029d2

View File

@ -125,7 +125,7 @@ public:
lat*mpi_layout[1], lat*mpi_layout[1],
lat*mpi_layout[2], lat*mpi_layout[2],
lat*mpi_layout[3]}); lat*mpi_layout[3]});
std::cout << GridLogMessage<< latt_size <<std::endl;
GridCartesian Grid(latt_size,simd_layout,mpi_layout); GridCartesian Grid(latt_size,simd_layout,mpi_layout);
RealD Nrank = Grid._Nprocessors; RealD Nrank = Grid._Nprocessors;
RealD Nnode = Grid.NodeCount(); RealD Nnode = Grid.NodeCount();
@ -137,8 +137,8 @@ public:
for(int d=0;d<8;d++){ for(int d=0;d<8;d++){
xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
bzero((void *)xbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); // bzero((void *)xbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
bzero((void *)rbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); // bzero((void *)rbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
} }
int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD); int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD);
@ -224,7 +224,7 @@ public:
uint64_t lmax=32; uint64_t lmax=32;
#define NLOOP (100*lmax*lmax*lmax*lmax/lat/lat/lat/lat) #define NLOOP (1000*lmax*lmax*lmax*lmax/lat/lat/lat/lat)
GridSerialRNG sRNG; sRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9})); GridSerialRNG sRNG; sRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}));
for(int lat=8;lat<=lmax;lat+=8){ for(int lat=8;lat<=lmax;lat+=8){
@ -249,11 +249,6 @@ public:
double start=usecond(); double start=usecond();
for(int i=0;i<Nloop;i++){ for(int i=0;i<Nloop;i++){
z=a*x-y; z=a*x-y;
autoView( x_v , x, CpuWrite);
autoView( y_v , y, CpuWrite);
autoView( z_v , z, CpuRead);
x_v[0]=z_v[0]; // force serial dependency to prevent optimise away
y_v[4]=z_v[4];
} }
double stop=usecond(); double stop=usecond();
double time = (stop-start)/Nloop*1000; double time = (stop-start)/Nloop*1000;
@ -286,7 +281,7 @@ public:
uint64_t lmax=32; uint64_t lmax=32;
#define NLOOP (100*lmax*lmax*lmax*lmax/lat/lat/lat/lat) #define NLOOP (1000*lmax*lmax*lmax*lmax/lat/lat/lat/lat)
GridSerialRNG sRNG; sRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9})); GridSerialRNG sRNG; sRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}));
for(int lat=8;lat<=lmax;lat+=8){ for(int lat=8;lat<=lmax;lat+=8){
@ -309,11 +304,6 @@ public:
double start=usecond(); double start=usecond();
for(int i=0;i<Nloop;i++){ for(int i=0;i<Nloop;i++){
z=x*y; z=x*y;
autoView( x_v , x, CpuWrite);
autoView( y_v , y, CpuWrite);
autoView( z_v , z, CpuRead);
x_v[0]=z_v[0]; // force serial dependency to prevent optimise away
y_v[4]=z_v[4];
} }
double stop=usecond(); double stop=usecond();
double time = (stop-start)/Nloop*1000; double time = (stop-start)/Nloop*1000;