From 80ac2a73ca4ae28115a19e8b8345e98ff5f16dd8 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Thu, 5 Aug 2021 18:33:20 -0400 Subject: [PATCH 1/3] Check is wrong (HtoD / DtoH) --- Grid/communicator/SharedMemoryMPI.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Grid/communicator/SharedMemoryMPI.cc b/Grid/communicator/SharedMemoryMPI.cc index caa03a60..d7b41cee 100644 --- a/Grid/communicator/SharedMemoryMPI.cc +++ b/Grid/communicator/SharedMemoryMPI.cc @@ -844,7 +844,7 @@ void SharedMemory::SetCommunicator(Grid_MPI_Comm comm) } #endif - SharedMemoryTest(); + //SharedMemoryTest(); } ////////////////////////////////////////////////////////////////// // On node barrier From fe5aaf7677c11e917af03bd0cbbc14a29b67bd2d Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Mon, 9 Aug 2021 04:06:30 -0700 Subject: [PATCH 2/3] Make comms benchmark same as Benchmark_comms_host_device --- benchmarks/Benchmark_ITT.cc | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/benchmarks/Benchmark_ITT.cc b/benchmarks/Benchmark_ITT.cc index 032535b3..7311dfc4 100644 --- a/benchmarks/Benchmark_ITT.cc +++ b/benchmarks/Benchmark_ITT.cc @@ -133,15 +133,15 @@ public: std::vector xbuf(8); std::vector rbuf(8); - Grid.ShmBufferFreeAll(); + //Grid.ShmBufferFreeAll(); + uint64_t bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD); for(int d=0;d<8;d++){ - xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); - rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); + xbuf[d] = (HalfSpinColourVectorD *)acceleratorAllocDevice(bytes); + rbuf[d] = (HalfSpinColourVectorD *)acceleratorAllocDevice(bytes); // bzero((void *)xbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); // bzero((void *)rbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); } - int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD); int ncomm; double dbytes; std::vector times(Nloop); @@ -152,7 +152,7 @@ public: dbytes=0; ncomm=0; - thread_for(dir,8,{ + for(int dir=0;dir<8;dir++) { double tbytes; int mu =dir % 4; @@ -168,15 +168,16 @@ public: int comm_proc = mpi_layout[mu]-1; Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); } - tbytes= Grid.StencilSendToRecvFrom((void *)&xbuf[dir][0], xmit_to_rank, - (void *)&rbuf[dir][0], recv_from_rank, - bytes,dir); + Grid.SendToRecvFrom((void *)&xbuf[dir][0], xmit_to_rank, + (void *)&rbuf[dir][0], recv_from_rank, + bytes); + tbytes = bytes; thread_critical { ncomm++; dbytes+=tbytes; } } - }); + }; Grid.Barrier(); double stop=usecond(); t_time[i] = stop-start; // microseconds @@ -196,8 +197,12 @@ public: << "\t\t"<< bidibytes/timestat.mean<< " " << bidibytes*timestat.err/(timestat.mean*timestat.mean) << " " << bidibytes/timestat.max << " " << bidibytes/timestat.min << std::endl; - } - } + for(int d=0;d<8;d++){ + acceleratorFreeDevice(xbuf[d]); + acceleratorFreeDevice(rbuf[d]); + } + } + } return; } @@ -281,7 +286,6 @@ public: uint64_t lmax=32; -#define NLOOP (1000*lmax*lmax*lmax*lmax/lat/lat/lat/lat) GridSerialRNG sRNG; sRNG.SeedFixedIntegers(std::vector({45,12,81,9})); for(int lat=8;lat<=lmax;lat+=8){ From 75030637cca1f60173ca760b09dd37852a88299d Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Tue, 10 Aug 2021 05:16:30 -0700 Subject: [PATCH 3/3] Improved comms benchmark, same as benchmark_comms_host_device --- benchmarks/Benchmark_ITT.cc | 73 ++++++++++++++++--------------------- 1 file changed, 31 insertions(+), 42 deletions(-) diff --git a/benchmarks/Benchmark_ITT.cc b/benchmarks/Benchmark_ITT.cc index 7311dfc4..81d1acd4 100644 --- a/benchmarks/Benchmark_ITT.cc +++ b/benchmarks/Benchmark_ITT.cc @@ -144,23 +144,19 @@ public: int ncomm; double dbytes; - std::vector times(Nloop); - for(int i=0;i1 ) { - dbytes=0; - ncomm=0; + std::vector times(Nloop); + for(int i=0;i1 ) { - + dbytes=0; + double start=usecond(); int xmit_to_rank; int recv_from_rank; + if ( dir == mu ) { int comm_proc=1; Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); @@ -171,42 +167,35 @@ public: Grid.SendToRecvFrom((void *)&xbuf[dir][0], xmit_to_rank, (void *)&rbuf[dir][0], recv_from_rank, bytes); - tbytes = bytes; - thread_critical { - ncomm++; - dbytes+=tbytes; - } + dbytes+=bytes; + + double stop=usecond(); + t_time[i] = stop-start; // microseconds + } - }; - Grid.Barrier(); - double stop=usecond(); - t_time[i] = stop-start; // microseconds + timestat.statistics(t_time); + + dbytes=dbytes*ppn; + double xbytes = dbytes*0.5; + double bidibytes = dbytes; + + std::cout<1) ) { + if ( do_comms ) { std::cout<