diff --git a/benchmarks/Benchmark_ITT.cc b/benchmarks/Benchmark_ITT.cc index 666e4830..f811ac32 100644 --- a/benchmarks/Benchmark_ITT.cc +++ b/benchmarks/Benchmark_ITT.cc @@ -158,8 +158,10 @@ public: dbytes=0; ncomm=0; - - parallel_for(int dir=0;dir<8;dir++){ +#ifdef GRID_OMP +#pragma omp parallel for num_threads(Grid::CartesianCommunicator::nCommThreads) +#endif + for(int dir=0;dir<8;dir++){ double tbytes; int mu =dir % 4; @@ -175,9 +177,14 @@ public: int comm_proc = mpi_layout[mu]-1; Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); } +#ifdef GRID_OMP + int tid = omp_get_thread_num(); +#else + int tid = dir; +#endif tbytes= Grid.StencilSendToRecvFrom((void *)&xbuf[dir][0], xmit_to_rank, (void *)&rbuf[dir][0], recv_from_rank, - bytes,dir); + bytes,tid); #ifdef GRID_OMP #pragma omp atomic diff --git a/benchmarks/Benchmark_comms.cc b/benchmarks/Benchmark_comms.cc index 29ccf96c..304a09fc 100644 --- a/benchmarks/Benchmark_comms.cc +++ b/benchmarks/Benchmark_comms.cc @@ -169,7 +169,11 @@ int main (int argc, char ** argv) for(int lat=4;lat<=maxlat;lat+=4){ for(int Ls=8;Ls<=8;Ls*=2){ - std::vector latt_size ({lat,lat,lat,lat}); + std::vector latt_size ({lat*mpi_layout[0], + lat*mpi_layout[1], + lat*mpi_layout[2], + lat*mpi_layout[3]}); + GridCartesian Grid(latt_size,simd_layout,mpi_layout); RealD Nrank = Grid._Nprocessors; @@ -485,7 +489,8 @@ int main (int argc, char ** argv) dbytes=0; ncomm=0; - parallel_for(int dir=0;dir<8;dir++){ +#pragma omp parallel for num_threads(Grid::CartesianCommunicator::nCommThreads) + for(int dir=0;dir<8;dir++){ double tbytes; int mu =dir % 4; @@ -502,9 +507,9 @@ int main (int argc, char ** argv) int comm_proc = mpi_layout[mu]-1; Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); } - + int tid = omp_get_thread_num(); tbytes= Grid.StencilSendToRecvFrom((void *)&xbuf[dir][0], xmit_to_rank, - (void *)&rbuf[dir][0], recv_from_rank, bytes,dir); + (void *)&rbuf[dir][0], recv_from_rank, bytes,tid); #pragma omp atomic dbytes+=tbytes;