#include using namespace std; using namespace Grid; using namespace Grid::QCD; int main (int argc, char ** argv) { Grid_init(&argc,&argv); std::vector simd_layout = GridDefaultSimd(Nd,vComplexD::Nsimd()); std::vector mpi_layout = GridDefaultMpi(); int threads = GridThread::GetThreads(); std::cout<1) nmu++; std::cout< latt_size ({lat*mpi_layout[0], lat*mpi_layout[1], lat*mpi_layout[2], lat*mpi_layout[3]}); GridCartesian Grid(latt_size,simd_layout,mpi_layout); std::vector > xbuf(8,std::vector(lat*lat*lat*Ls)); std::vector > rbuf(8,std::vector(lat*lat*lat*Ls)); int ncomm; int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD); double start=usecond(); for(int i=0;i requests; ncomm=0; for(int mu=0;mu<4;mu++){ if (mpi_layout[mu]>1 ) { ncomm++; int comm_proc=1; int xmit_to_rank; int recv_from_rank; Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); Grid.SendToRecvFromBegin(requests, (void *)&xbuf[mu][0], xmit_to_rank, (void *)&rbuf[mu][0], recv_from_rank, bytes); comm_proc = mpi_layout[mu]-1; Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); Grid.SendToRecvFromBegin(requests, (void *)&xbuf[mu+4][0], xmit_to_rank, (void *)&rbuf[mu+4][0], recv_from_rank, bytes); } } Grid.SendToRecvFromComplete(requests); Grid.Barrier(); } double stop=usecond(); double dbytes = bytes; double xbytes = Nloop*dbytes*2.0*ncomm; double rbytes = xbytes; double bidibytes = xbytes+rbytes; double time = stop-start; // microseconds std::cout< latt_size ({lat,lat,lat,lat}); GridCartesian Grid(latt_size,simd_layout,mpi_layout); std::vector > xbuf(8,std::vector(lat*lat*lat*Ls)); std::vector > rbuf(8,std::vector(lat*lat*lat*Ls)); int ncomm; int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD); double start=usecond(); for(int i=0;i1 ) { ncomm++; int comm_proc=1; int xmit_to_rank; int recv_from_rank; { std::vector requests; Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); Grid.SendToRecvFromBegin(requests, (void *)&xbuf[mu][0], xmit_to_rank, (void *)&rbuf[mu][0], recv_from_rank, bytes); Grid.SendToRecvFromComplete(requests); } comm_proc = mpi_layout[mu]-1; { std::vector requests; Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); Grid.SendToRecvFromBegin(requests, (void *)&xbuf[mu+4][0], xmit_to_rank, (void *)&rbuf[mu+4][0], recv_from_rank, bytes); Grid.SendToRecvFromComplete(requests); } } } Grid.Barrier(); } double stop=usecond(); double dbytes = bytes; double xbytes = Nloop*dbytes*2.0*ncomm; double rbytes = xbytes; double bidibytes = xbytes+rbytes; double time = stop-start; std::cout<