diff --git a/Quda/Benchmark_Quda.cpp b/Quda/Benchmark_Quda.cpp index 67c81bc..11d8c6e 100644 --- a/Quda/Benchmark_Quda.cpp +++ b/Quda/Benchmark_Quda.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -51,6 +52,10 @@ template double bench(F const &f, double target_time, int niter_warmup // niter = std::min(1000, niter); // printfQuda("during warmup took %f s/iter, deciding on %d iters\n", secs, niter); + // important: each rank has its own timer, so their measurements can slightly vary. But + // 'niter' needs to be consistent (bug took me a couple hours to track down) + comm_broadcast_global(&niter, sizeof(niter), 0); + timer.reset(__FUNCTION__, __FILE__, __LINE__); timer.start(); for (int iter = 0; iter < niter; ++iter) diff --git a/Quda/build-benchmark.sh b/Quda/build-benchmark.sh index 9a9892c..288f3ac 100755 --- a/Quda/build-benchmark.sh +++ b/Quda/build-benchmark.sh @@ -28,5 +28,5 @@ mkdir -p "${PREFIX_DIR}" LINK_FLAGS="-Wl,-rpath,$QUDA_DIR/lib: $QUDA_DIR/lib/libquda.so $EXTRA_LIBS -lpthread -lmpi" -g++ $BUILD_FLAGS -I$QUDA_DIR/include -c -o $BUILD_DIR/Benchmark_Quda.o $script_dir/Benchmark_Quda.cpp +g++ $BUILD_FLAGS -I$QUDA_DIR/include/targets/cuda -I$QUDA_DIR/include -c -o $BUILD_DIR/Benchmark_Quda.o $script_dir/Benchmark_Quda.cpp g++ -g -O3 $BUILD_DIR/Benchmark_Quda.o -o $PREFIX_DIR/Benchmark_Quda $LINK_FLAGS -lmpi