From 9261c0da89280426bc91ee7ee1fbf399728b3f57 Mon Sep 17 00:00:00 2001 From: ferben Date: Wed, 20 Nov 2019 17:06:32 +0000 Subject: [PATCH 1/7] several typos in hadrons --- Hadrons/Global.hpp | 2 +- Hadrons/Modules/MContraction/WeakEye3pt.hpp | 2 +- Hadrons/Modules/MContraction/WeakNonEye3pt.hpp | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Hadrons/Global.hpp b/Hadrons/Global.hpp index 9b592721..b82c6288 100644 --- a/Hadrons/Global.hpp +++ b/Hadrons/Global.hpp @@ -272,7 +272,7 @@ struct Correlator: Serializable { GRID_SERIALIZABLE_CLASS_MEMBERS(ARG(Correlator), Metadata, info, - std::vector, corr); + std::vector, corr); }; END_HADRONS_NAMESPACE diff --git a/Hadrons/Modules/MContraction/WeakEye3pt.hpp b/Hadrons/Modules/MContraction/WeakEye3pt.hpp index ea7ff529..b4165098 100644 --- a/Hadrons/Modules/MContraction/WeakEye3pt.hpp +++ b/Hadrons/Modules/MContraction/WeakEye3pt.hpp @@ -144,7 +144,7 @@ void TWeakEye3pt::execute(void) { LOG(Message) << "Computing mesonic weak 3pt contractions, eye topologies" << std::endl; LOG(Message) << "gIn : " << par().gammaIn << std::endl; - LOG(Message) << "gOut: " << par().gammaIn << std::endl; + LOG(Message) << "gOut: " << par().gammaOut << std::endl; LOG(Message) << "tOut: " << par().tOut << std::endl; LOG(Message) << "qbl : " << par().qBarLeft << std::endl; LOG(Message) << "qbr : " << par().qBarRight << std::endl; diff --git a/Hadrons/Modules/MContraction/WeakNonEye3pt.hpp b/Hadrons/Modules/MContraction/WeakNonEye3pt.hpp index 08a1188b..4068cd8c 100644 --- a/Hadrons/Modules/MContraction/WeakNonEye3pt.hpp +++ b/Hadrons/Modules/MContraction/WeakNonEye3pt.hpp @@ -144,7 +144,7 @@ void TWeakNonEye3pt::execute(void) { LOG(Message) << "Computing mesonic weak 3pt contractions, non-eye topologies" << std::endl; LOG(Message) << "gIn : " << par().gammaIn << std::endl; - LOG(Message) << "gOut: " << par().gammaIn << std::endl; + LOG(Message) << "gOut: " << par().gammaOut << std::endl; LOG(Message) << "ql : " << par().qLeft << std::endl; LOG(Message) << "qbl : " << par().qBarLeft << std::endl; LOG(Message) << "qr : " << par().qRight << std::endl; From cf95a460a5898573c98abdc6bc82464cd3fccab3 Mon Sep 17 00:00:00 2001 From: Fionn O hOgain Date: Thu, 21 Nov 2019 17:17:55 +0000 Subject: [PATCH 2/7] Sliced propagator contraction was not producing any results because buf.size()=0 --- Hadrons/Modules/MContraction/Meson.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Hadrons/Modules/MContraction/Meson.hpp b/Hadrons/Modules/MContraction/Meson.hpp index ee923341..34b46b73 100644 --- a/Hadrons/Modules/MContraction/Meson.hpp +++ b/Hadrons/Modules/MContraction/Meson.hpp @@ -199,7 +199,7 @@ void TMeson::execute(void) Gamma gSnk(gammaList[i].first); Gamma gSrc(gammaList[i].second); - for (unsigned int t = 0; t < buf.size(); ++t) + for (unsigned int t = 0; t < nt; ++t) { result[i].corr[t] = TensorRemove(trace(mesonConnected(q1[t], q2[t], gSnk, gSrc))); } From 98ea67b6369fffaf087582d1211f4806e0302ac3 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Thu, 21 Nov 2019 15:00:46 -0500 Subject: [PATCH 3/7] IBM summit optimisation. Synchronise in node is still btweeen 2 halves of AC922, so could be a little faster --- Grid/communicator/SharedMemoryMPI.cc | 36 +++++++++++++++++++++------- 1 file changed, 27 insertions(+), 9 deletions(-) diff --git a/Grid/communicator/SharedMemoryMPI.cc b/Grid/communicator/SharedMemoryMPI.cc index 4af7c402..ed465252 100644 --- a/Grid/communicator/SharedMemoryMPI.cc +++ b/Grid/communicator/SharedMemoryMPI.cc @@ -162,11 +162,8 @@ static inline int divides(int a,int b) void GlobalSharedMemory::GetShmDims(const Coordinate &WorldDims,Coordinate &ShmDims) { //////////////////////////////////////////////////////////////// - // Assert power of two shm_size. + // Powers of 2,3,5 only in prime decomposition for now //////////////////////////////////////////////////////////////// - int log2size = Log2Size(WorldShmSize,MAXLOG2RANKSPERNODE); - assert(log2size != -1); - int ndimension = WorldDims.size(); ShmDims=Coordinate(ndimension,1); @@ -177,7 +174,8 @@ void GlobalSharedMemory::GetShmDims(const Coordinate &WorldDims,Coordinate &ShmD while(AutoShmSize != WorldShmSize) { for(int p=0;p ranks(size); for(int r=0;r Date: Thu, 21 Nov 2019 15:02:10 -0500 Subject: [PATCH 4/7] --enable-summit flag --- configure.ac | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/configure.ac b/configure.ac index 640b2dbf..d451c163 100644 --- a/configure.ac +++ b/configure.ac @@ -136,15 +136,15 @@ case ${ac_SFW_FP16} in esac ############### SUMMIT JSRUN -AC_ARG_ENABLE([jsrun], - [AC_HELP_STRING([--enable-jsrun=yes|no], [enable IBMs jsrun resource manager for SUMMIT])], - [ac_JSRUN=${enable_jsrun}], [ac_JSRUN=no]) -case ${ac_JSRUN} in +AC_ARG_ENABLE([summit], + [AC_HELP_STRING([--enable-summit=yes|no], [enable IBMs jsrun resource manager for SUMMIT])], + [ac_JSRUN=${enable_summit}], [ac_SUMMIT=no]) +case ${ac_SUMMIT} in + no);; yes) AC_DEFINE([GRID_IBM_SUMMIT],[1],[Let JSRUN manage the GPU device allocation]);; - no);; *) - AC_MSG_ERROR(["JSRUN option not supported ${ac_JSRUN}"]);; + AC_DEFINE([GRID_IBM_SUMMIT],[1],[Let JSRUN manage the GPU device allocation]);; esac ############### Intel libraries @@ -255,7 +255,7 @@ AC_ARG_ENABLE([simd],[AC_HELP_STRING([--enable-simd=code], AC_ARG_ENABLE([gen-simd-width], [AS_HELP_STRING([--enable-gen-simd-width=size], - [size (in bytes) of the generic SIMD vectors (default: 32)])], + [size (in bytes) of the generic SIMD vectors (default: 64)])], [ac_gen_simd_width=$enable_gen_simd_width], [ac_gen_simd_width=64]) From feb1ff349413d98ffcc720f69c682d33633375d5 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Thu, 21 Nov 2019 20:03:39 +0000 Subject: [PATCH 5/7] Fix nocompilee under NVCC --- Grid/qcd/utils/BaryonUtils.h | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/Grid/qcd/utils/BaryonUtils.h b/Grid/qcd/utils/BaryonUtils.h index 73d41422..25c91342 100644 --- a/Grid/qcd/utils/BaryonUtils.h +++ b/Grid/qcd/utils/BaryonUtils.h @@ -42,8 +42,8 @@ public: typedef typename FImpl::SitePropagator pobj; typedef typename ComplexField::vector_object vobj; - static constexpr int epsilon[6][3] = {{0,1,2},{1,2,0},{2,0,1},{0,2,1},{2,1,0},{1,0,2}}; - static constexpr Complex epsilon_sgn[6]= {1,1,1,-1,-1,-1}; + static const int epsilon[6][3] ; + static const Complex epsilon_sgn[6]; private: template @@ -83,10 +83,15 @@ public: robj &result); }; -template -constexpr int BaryonUtils::epsilon[6][3]; -template -constexpr Complex BaryonUtils::epsilon_sgn[6]; +template +const int BaryonUtils::epsilon[6][3] = {{0,1,2},{1,2,0},{2,0,1},{0,2,1},{2,1,0},{1,0,2}}; +template +const Complex BaryonUtils::epsilon_sgn[6] = {Complex(1), + Complex(1), + Complex(1), + Complex(-1), + Complex(-1), + Complex(-1)}; template template From d91ba1f6cc482ab92dad6b36042d6454a4a9102b Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Thu, 21 Nov 2019 20:11:19 +0000 Subject: [PATCH 6/7] NVCC timer support --- Grid/perfmon/PerfCount.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Grid/perfmon/PerfCount.h b/Grid/perfmon/PerfCount.h index 13d59719..b8229eec 100644 --- a/Grid/perfmon/PerfCount.h +++ b/Grid/perfmon/PerfCount.h @@ -44,8 +44,13 @@ Author: paboyle #include #endif #ifdef __x86_64__ +#ifdef GRID_NVCC +accelerator_inline uint64_t __rdtsc(void) { return 0; } +accelerator_inline uint64_t __rdpmc(int ) { return 0; } +#else #include #endif +#endif NAMESPACE_BEGIN(Grid); @@ -89,13 +94,8 @@ inline uint64_t cyclecount(void){ return tmp; } #elif defined __x86_64__ -#ifdef GRID_NVCC -accelerator_inline uint64_t __rdtsc(void) { return 0; } -#endif inline uint64_t cyclecount(void){ return __rdtsc(); - // unsigned int dummy; - // return __rdtscp(&dummy); } #else From d1a89af8c9476ef1b322b83dc49261a5ac331e5f Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Fri, 22 Nov 2019 10:49:10 -0500 Subject: [PATCH 7/7] Change to reporting --- Grid/stencil/Stencil.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Grid/stencil/Stencil.h b/Grid/stencil/Stencil.h index b361a19d..37b866cb 100644 --- a/Grid/stencil/Stencil.h +++ b/Grid/stencil/Stencil.h @@ -1233,7 +1233,7 @@ public: }; void Report(void) { -#define AVERAGE(A) _grid->GlobalSum(A);A/=NP; +#define AVERAGE(A) #define PRINTIT(A) AVERAGE(A); std::cout << GridLogMessage << " Stencil " << #A << " "<< A/calls<_Nprocessors; RealD NN = _grid->NodeCount(); @@ -1281,11 +1281,13 @@ public: std::cout << GridLogMessage << " Stencil SHM mem " << (membytes)/gatheralltime/1000. << " GB/s per rank"<