1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-12-04 13:24:40 +00:00

Updated for verbose on host vs. device side csum

This commit is contained in:
2025-12-02 23:15:32 +00:00
parent 973584e039
commit e8057d6b4a
4 changed files with 7 additions and 2 deletions

View File

@@ -828,6 +828,7 @@ void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector<CommsReque
#define AUDIT_FLIGHT_RECORDER_ERRORS
#ifdef AUDIT_FLIGHT_RECORDER_ERRORS
uint64_t EC = FlightRecorder::CommsErrorCount();
if (EC) std::cerr << " global sum error count "<<EC<<std::endl;
this->GlobalSum(EC);
if (EC) {
for(int r=0;r<list.size();r++){
@@ -837,10 +838,12 @@ void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector<CommsReque
uint64_t rbytes_data = list[r].bytes;
#endif
if (list[r].PacketType == InterNodeReceiveHtoD) {
std::cerr << " calling xor reduce "<<std::endl;
uint64_t csg = gpu_xor((uint64_t*)list[r].device_buf,rbytes_data/8);
uint64_t csh = cpu_xor((uint64_t*)list[r].host_buf,rbytes_data/8);
std::cerr << " Packet "<<r<<" Receive from " <<list[r].dest<<" host csum "<<csh<<" gpu csum "<<csg<<std::endl;
} if (list[r].PacketType == InterNodeXmitISend ) {
std::cerr << " calling xor reduce "<<std::endl;
uint64_t csg = gpu_xor((uint64_t*)list[r].device_buf,rbytes_data/8);
uint64_t csh = cpu_xor((uint64_t*)list[r].host_buf,rbytes_data/8);
std::cerr << " Packet "<<r<<" Send to " <<list[r].dest<<" host csum "<<csh<<" gpu csum "<<csg<<std::endl;

View File

@@ -51,7 +51,7 @@ EOF
CMD="mpiexec -np 384 -ppn 12 -envall --hostfile nodefile \
../gpu_tile.sh \
$BINARY --mpi 4.4.4.6 --grid 64.64.64.96 \
--shm-mpi 0 --comms-overlap --shm 4096 --device-mem 32000 --accelerator-threads 32 --seconds 6000 --log Message "
--shm-mpi 0 --comms-overlap --shm 4096 --device-mem 32000 --accelerator-threads 32 --seconds 6000 --log Message --debug-stdout "
echo $CMD > command-line
env > environment

View File

@@ -53,7 +53,7 @@ EOF
CMD="mpiexec -np 3072 -ppn 12 -envall --hostfile nodefile \
../gpu_tile.sh \
$BINARY --mpi 8.8.8.6 --grid 128.128.128.288 \
--shm-mpi 0 --comms-overlap --shm 4096 --device-mem 32000 --accelerator-threads 32 --seconds 18000 --log Message --debug-stdout --heartbeat"
--shm-mpi 0 --comms-overlap --shm 4096 --device-mem 32000 --accelerator-threads 32 --seconds 18000 --log Message --debug-stdout "
echo $CMD > command-line
env > environment

View File

@@ -82,6 +82,7 @@ int main (int argc, char ** argv)
Grid_init(&argc,&argv);
std::cout << GridLogMessage<<" in main(): Grid is initialised"<<std::endl;
const int Ls=12;
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplexD::Nsimd()),GridDefaultMpi());
@@ -94,6 +95,7 @@ int main (int argc, char ** argv)
GridCartesian * FGrid_f = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid_f);
GridRedBlackCartesian * FrbGrid_f = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid_f);
std::cout << GridLogMessage<<" in main(): making RNGs"<<std::endl;
std::vector<int> seeds4({1,2,3,4});
std::vector<int> seeds5({5,6,7,8});
GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5);