diff --git a/Grid/communicator/SharedMemoryMPI.cc b/Grid/communicator/SharedMemoryMPI.cc index 554f338b..d230b2a5 100644 --- a/Grid/communicator/SharedMemoryMPI.cc +++ b/Grid/communicator/SharedMemoryMPI.cc @@ -890,7 +890,7 @@ void SharedMemory::SetCommunicator(Grid_MPI_Comm comm) } #endif - SharedMemoryTest(); + //SharedMemoryTest(); } ////////////////////////////////////////////////////////////////// // On node barrier diff --git a/benchmarks/Benchmark_ITT.cc b/benchmarks/Benchmark_ITT.cc index 032535b3..81d1acd4 100644 --- a/benchmarks/Benchmark_ITT.cc +++ b/benchmarks/Benchmark_ITT.cc @@ -133,34 +133,30 @@ public: std::vector xbuf(8); std::vector rbuf(8); - Grid.ShmBufferFreeAll(); + //Grid.ShmBufferFreeAll(); + uint64_t bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD); for(int d=0;d<8;d++){ - xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); - rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); + xbuf[d] = (HalfSpinColourVectorD *)acceleratorAllocDevice(bytes); + rbuf[d] = (HalfSpinColourVectorD *)acceleratorAllocDevice(bytes); // bzero((void *)xbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); // bzero((void *)rbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); } - int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD); int ncomm; double dbytes; - std::vector times(Nloop); - for(int i=0;i1 ) { - dbytes=0; - ncomm=0; + std::vector times(Nloop); + for(int i=0;i1 ) { - + dbytes=0; + double start=usecond(); int xmit_to_rank; int recv_from_rank; + if ( dir == mu ) { int comm_proc=1; Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); @@ -168,40 +164,38 @@ public: int comm_proc = mpi_layout[mu]-1; Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); } - tbytes= Grid.StencilSendToRecvFrom((void *)&xbuf[dir][0], xmit_to_rank, - (void *)&rbuf[dir][0], recv_from_rank, - bytes,dir); - thread_critical { - ncomm++; - dbytes+=tbytes; - } + Grid.SendToRecvFrom((void *)&xbuf[dir][0], xmit_to_rank, + (void *)&rbuf[dir][0], recv_from_rank, + bytes); + dbytes+=bytes; + + double stop=usecond(); + t_time[i] = stop-start; // microseconds + } - }); - Grid.Barrier(); - double stop=usecond(); - t_time[i] = stop-start; // microseconds + timestat.statistics(t_time); + + dbytes=dbytes*ppn; + double xbytes = dbytes*0.5; + double bidibytes = dbytes; + + std::cout<({45,12,81,9})); for(int lat=8;lat<=lmax;lat+=8){ @@ -445,7 +438,7 @@ public: // 1344= 3*(2*8+6)*2*8 + 8*3*2*2 + 3*4*2*8 // 1344 = Nc* (6+(Nc-1)*8)*2*Nd + Nd*Nc*2*2 + Nd*Nc*Ns*2 // double flops=(1344.0*volume)/2; -#if 1 +#if 0 double fps = Nc* (6+(Nc-1)*8)*Ns*Nd + Nd*Nc*Ns + Nd*Nc*Ns*2; #else double fps = Nc* (6+(Nc-1)*8)*Ns*Nd + 2*Nd*Nc*Ns + 2*Nd*Nc*Ns*2; @@ -716,12 +709,12 @@ int main (int argc, char ** argv) if ( do_su4 ) { std::cout<1) ) { + if ( do_comms ) { std::cout<