1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-04-04 19:25:56 +01:00

Merge branch 'develop' of https://github.com/paboyle/Grid into develop

This commit is contained in:
peterx.a.boyle 2021-08-10 05:41:18 -07:00
commit 1eda4d8e0b
2 changed files with 44 additions and 51 deletions

View File

@ -890,7 +890,7 @@ void SharedMemory::SetCommunicator(Grid_MPI_Comm comm)
} }
#endif #endif
SharedMemoryTest(); //SharedMemoryTest();
} }
////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////
// On node barrier // On node barrier

View File

@ -133,34 +133,30 @@ public:
std::vector<HalfSpinColourVectorD *> xbuf(8); std::vector<HalfSpinColourVectorD *> xbuf(8);
std::vector<HalfSpinColourVectorD *> rbuf(8); std::vector<HalfSpinColourVectorD *> rbuf(8);
Grid.ShmBufferFreeAll(); //Grid.ShmBufferFreeAll();
uint64_t bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD);
for(int d=0;d<8;d++){ for(int d=0;d<8;d++){
xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); xbuf[d] = (HalfSpinColourVectorD *)acceleratorAllocDevice(bytes);
rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); rbuf[d] = (HalfSpinColourVectorD *)acceleratorAllocDevice(bytes);
// bzero((void *)xbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); // bzero((void *)xbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
// bzero((void *)rbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); // bzero((void *)rbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
} }
int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD);
int ncomm; int ncomm;
double dbytes; double dbytes;
std::vector<double> times(Nloop);
for(int i=0;i<Nloop;i++){
double start=usecond(); for(int dir=0;dir<8;dir++) {
int mu =dir % 4;
if (mpi_layout[mu]>1 ) {
dbytes=0; std::vector<double> times(Nloop);
ncomm=0; for(int i=0;i<Nloop;i++){
thread_for(dir,8,{ dbytes=0;
double start=usecond();
double tbytes;
int mu =dir % 4;
if (mpi_layout[mu]>1 ) {
int xmit_to_rank; int xmit_to_rank;
int recv_from_rank; int recv_from_rank;
if ( dir == mu ) { if ( dir == mu ) {
int comm_proc=1; int comm_proc=1;
Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
@ -168,40 +164,38 @@ public:
int comm_proc = mpi_layout[mu]-1; int comm_proc = mpi_layout[mu]-1;
Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
} }
tbytes= Grid.StencilSendToRecvFrom((void *)&xbuf[dir][0], xmit_to_rank, Grid.SendToRecvFrom((void *)&xbuf[dir][0], xmit_to_rank,
(void *)&rbuf[dir][0], recv_from_rank, (void *)&rbuf[dir][0], recv_from_rank,
bytes,dir); bytes);
thread_critical { dbytes+=bytes;
ncomm++;
dbytes+=tbytes; double stop=usecond();
} t_time[i] = stop-start; // microseconds
} }
}); timestat.statistics(t_time);
Grid.Barrier();
double stop=usecond(); dbytes=dbytes*ppn;
t_time[i] = stop-start; // microseconds double xbytes = dbytes*0.5;
double bidibytes = dbytes;
std::cout<<GridLogMessage << lat<<"\t"<<Ls<<"\t "
<< bytes << " \t "
<<xbytes/timestat.mean<<" \t "<< xbytes*timestat.err/(timestat.mean*timestat.mean)<< " \t "
<<xbytes/timestat.max <<" "<< xbytes/timestat.min
<< "\t\t"<< bidibytes/timestat.mean<< " " << bidibytes*timestat.err/(timestat.mean*timestat.mean) << " "
<< bidibytes/timestat.max << " " << bidibytes/timestat.min << std::endl;
}
} }
for(int d=0;d<8;d++){
timestat.statistics(t_time); acceleratorFreeDevice(xbuf[d]);
acceleratorFreeDevice(rbuf[d]);
dbytes=dbytes*ppn; }
double xbytes = dbytes*0.5; }
// double rbytes = dbytes*0.5; }
double bidibytes = dbytes;
std::cout<<GridLogMessage << lat<<"\t"<<Ls<<"\t "
<< bytes << " \t "
<<xbytes/timestat.mean<<" \t "<< xbytes*timestat.err/(timestat.mean*timestat.mean)<< " \t "
<<xbytes/timestat.max <<" "<< xbytes/timestat.min
<< "\t\t"<< bidibytes/timestat.mean<< " " << bidibytes*timestat.err/(timestat.mean*timestat.mean) << " "
<< bidibytes/timestat.max << " " << bidibytes/timestat.min << std::endl;
}
}
return; return;
} }
static void Memory(void) static void Memory(void)
@ -281,7 +275,6 @@ public:
uint64_t lmax=32; uint64_t lmax=32;
#define NLOOP (1000*lmax*lmax*lmax*lmax/lat/lat/lat/lat)
GridSerialRNG sRNG; sRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9})); GridSerialRNG sRNG; sRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}));
for(int lat=8;lat<=lmax;lat+=8){ for(int lat=8;lat<=lmax;lat+=8){
@ -445,7 +438,7 @@ public:
// 1344= 3*(2*8+6)*2*8 + 8*3*2*2 + 3*4*2*8 // 1344= 3*(2*8+6)*2*8 + 8*3*2*2 + 3*4*2*8
// 1344 = Nc* (6+(Nc-1)*8)*2*Nd + Nd*Nc*2*2 + Nd*Nc*Ns*2 // 1344 = Nc* (6+(Nc-1)*8)*2*Nd + Nd*Nc*2*2 + Nd*Nc*Ns*2
// double flops=(1344.0*volume)/2; // double flops=(1344.0*volume)/2;
#if 1 #if 0
double fps = Nc* (6+(Nc-1)*8)*Ns*Nd + Nd*Nc*Ns + Nd*Nc*Ns*2; double fps = Nc* (6+(Nc-1)*8)*Ns*Nd + Nd*Nc*Ns + Nd*Nc*Ns*2;
#else #else
double fps = Nc* (6+(Nc-1)*8)*Ns*Nd + 2*Nd*Nc*Ns + 2*Nd*Nc*Ns*2; double fps = Nc* (6+(Nc-1)*8)*Ns*Nd + 2*Nd*Nc*Ns + 2*Nd*Nc*Ns*2;
@ -716,12 +709,12 @@ int main (int argc, char ** argv)
if ( do_su4 ) { if ( do_su4 ) {
std::cout<<GridLogMessage << "=================================================================================="<<std::endl; std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
std::cout<<GridLogMessage << " Memory benchmark " <<std::endl; std::cout<<GridLogMessage << " SU(4) benchmark " <<std::endl;
std::cout<<GridLogMessage << "=================================================================================="<<std::endl; std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
Benchmark::SU4(); Benchmark::SU4();
} }
if ( do_comms && (NN>1) ) { if ( do_comms ) {
std::cout<<GridLogMessage << "=================================================================================="<<std::endl; std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
std::cout<<GridLogMessage << " Communications benchmark " <<std::endl; std::cout<<GridLogMessage << " Communications benchmark " <<std::endl;
std::cout<<GridLogMessage << "=================================================================================="<<std::endl; std::cout<<GridLogMessage << "=================================================================================="<<std::endl;