diff --git a/Grid/stencil/Stencil.h b/Grid/stencil/Stencil.h index 61935b9c..1d7ed40e 100644 --- a/Grid/stencil/Stencil.h +++ b/Grid/stencil/Stencil.h @@ -121,10 +121,10 @@ void Gather_plane_exchange_table(Vector >& table,const Lattic struct StencilEntry { #ifdef GRID_NVCC uint64_t _byte_offset; // 8 bytes - uint32_t _offset; // 8 bytes + uint32_t _offset; // 4 bytes #else uint64_t _byte_offset; // 8 bytes - uint64_t _offset; // 4 bytes (8 ever required?) + uint64_t _offset; // 8 bytes (8 ever required?) #endif uint8_t _is_local; // 1 bytes uint8_t _permute; // 1 bytes @@ -1270,10 +1270,13 @@ public: PRINTIT(shm_bytes); // X bytes + R bytes // Double this to include spin projection overhead with 2:1 ratio in wilson auto gatheralltime = gathertime+gathermtime; - auto allbytes = comms_bytes+shm_bytes; std::cout << GridLogMessage << " Stencil SHM " << (shm_bytes)/gatheralltime/1000. << " GB/s per rank"<