From 538e64e5b4e57fa146888825427b394ee584cd9e Mon Sep 17 00:00:00 2001 From: Guido Cossu Date: Thu, 8 Dec 2016 05:50:40 +0000 Subject: [PATCH] Cleaning up the CG reproduciblity test. More info reported --- lib/Bitwise.cc | 48 ++++++++++++++++ lib/Bitwise.h | 59 ++++++++++++++++++++ lib/Grid.h | 1 + lib/algorithms/iterative/ConjugateGradient.h | 15 +++-- lib/communicator/Communicator_base.cc | 11 ++-- lib/communicator/Communicator_base.h | 3 + lib/communicator/Communicator_mpi.cc | 9 +++ lib/communicator/Communicator_mpi3.cc | 5 ++ lib/communicator/Communicator_mpi3_leader.cc | 3 + lib/communicator/Communicator_none.cc | 31 +++++----- lib/communicator/Communicator_shmem.cc | 4 ++ lib/lattice/Lattice_reduction.h | 45 +++++++++------ tests/hmc/Test_hmc_WilsonFermionGauge.cc | 4 +- 13 files changed, 196 insertions(+), 42 deletions(-) create mode 100644 lib/Bitwise.cc create mode 100644 lib/Bitwise.h diff --git a/lib/Bitwise.cc b/lib/Bitwise.cc new file mode 100644 index 00000000..a33b1792 --- /dev/null +++ b/lib/Bitwise.cc @@ -0,0 +1,48 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/Bitwise.cc + +Copyright (C) 2016 + +Author: Guido Cossu + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#ifndef GRID_BITWISE_H +#define GRID_BITWISE_H + +#include +#include +#include +#include + +namespace Grid { + +void show_binaryrep(const unsigned char* a, size_t size) { + const unsigned char* beg = a; + const unsigned char* end = a + size; + while (beg != end) std::cout << std::bitset(*beg++) << ' '; + std::cout << '\n'; +} + +} // namespace + +#endif diff --git a/lib/Bitwise.h b/lib/Bitwise.h new file mode 100644 index 00000000..6661d528 --- /dev/null +++ b/lib/Bitwise.h @@ -0,0 +1,59 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./lib/Bitwise.h + +Copyright (C) 2016 + +Author: Guido Cossu + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#ifndef GRID_BITWISE_H +#define GRID_BITWISE_H + +#include +#include + +namespace Grid { + +void show_binaryrep(const unsigned char* a, size_t size); + +template +void show_binaryrep(const T& a) { + const char* beg = reinterpret_cast(&a); + const char* end = beg + sizeof(a); + while (beg != end) std::cout << std::bitset(*beg++) << ' '; + std::cout << '\n'; +} + +template +void bitwise_xor(T& l, T& r, unsigned char* xors) { + assert(sizeof(l) == sizeof(r)); + unsigned char* org = reinterpret_cast(&l); + unsigned char* cur = reinterpret_cast(&r); + int words = sizeof(l) / sizeof(*org); + unsigned char result = 0; + for (int w = 0; w < words; w++) xors[w] = (org[w] ^ cur[w]); +} + +}; // namespace + +#endif diff --git a/lib/Grid.h b/lib/Grid.h index 0c5983f3..15e808f0 100644 --- a/lib/Grid.h +++ b/lib/Grid.h @@ -62,6 +62,7 @@ Author: paboyle #include #include "Config.h" #include +#include #include #include #include diff --git a/lib/algorithms/iterative/ConjugateGradient.h b/lib/algorithms/iterative/ConjugateGradient.h index 0c90c5b4..583178ff 100644 --- a/lib/algorithms/iterative/ConjugateGradient.h +++ b/lib/algorithms/iterative/ConjugateGradient.h @@ -37,10 +37,7 @@ struct CG_state { bool do_repro; std::vector residuals; - CG_state() { - do_repro = false; - residuals.clear(); - } + CG_state() {reset();} void reset(){ do_repro = false; @@ -71,7 +68,14 @@ class ConjugateGradient : public OperatorFunction { : Tolerance(tol), MaxIterations(maxit), ErrorOnNoConverge(err_on_no_conv), - ReproTest(ReproducibilityTest){}; + ReproTest(ReproducibilityTest){ + if(ReproducibilityTest == true && err_on_no_conv == true){ + std::cout << GridLogMessage << "CG: Reproducibility test ON "<< + "and error on convergence ON are incompatible options" << std::endl; + exit(1); + } + + }; void operator()(LinearOperatorBase &Linop, const Field &src, @@ -210,6 +214,7 @@ class ConjugateGradient : public OperatorFunction { ReprTest.do_check = true; ReprTest.reset_counter(); this->operator()(Linop, src, psi_start);// run the repro test + std::cout << GridLogMessage << "Test passed" << std::endl; } // Clear state diff --git a/lib/communicator/Communicator_base.cc b/lib/communicator/Communicator_base.cc index b003d867..9496dc07 100644 --- a/lib/communicator/Communicator_base.cc +++ b/lib/communicator/Communicator_base.cc @@ -65,6 +65,7 @@ const std::vector & CartesianCommunicator::ThisProcessorCoor(void) { return const std::vector & CartesianCommunicator::ProcessorGrid(void) { return _processors; }; int CartesianCommunicator::ProcessorCount(void) { return _Nprocessors; }; + //////////////////////////////////////////////////////////////////////////////// // very VERY rarely (Log, serial RNG) we need world without a grid //////////////////////////////////////////////////////////////////////////////// @@ -89,11 +90,11 @@ void CartesianCommunicator::GlobalSumVector(ComplexD *c,int N) #if !defined( GRID_COMMS_MPI3) && !defined (GRID_COMMS_MPI3L) void CartesianCommunicator::StencilSendToRecvFromBegin(std::vector &list, - void *xmit, - int xmit_to_rank, - void *recv, - int recv_from_rank, - int bytes) + void *xmit, + int xmit_to_rank, + void *recv, + int recv_from_rank, + int bytes) { SendToRecvFromBegin(list,xmit,xmit_to_rank,recv,recv_from_rank,bytes); } diff --git a/lib/communicator/Communicator_base.h b/lib/communicator/Communicator_base.h index 94ad1093..e50414f9 100644 --- a/lib/communicator/Communicator_base.h +++ b/lib/communicator/Communicator_base.h @@ -68,6 +68,8 @@ class CartesianCommunicator { static MPI_Comm communicator_world; MPI_Comm communicator; typedef MPI_Request CommsRequest_t; + static char name[MPI_MAX_PROCESSOR_NAME]; // processing node physical name + static int length; #else typedef int CommsRequest_t; #endif @@ -149,6 +151,7 @@ class CartesianCommunicator { const std::vector & ProcessorGrid(void) ; int ProcessorCount(void) ; + void PrintRankInfo(void) ; //////////////////////////////////////////////////////////////////////////////// // very VERY rarely (Log, serial RNG) we need world without a grid //////////////////////////////////////////////////////////////////////////////// diff --git a/lib/communicator/Communicator_mpi.cc b/lib/communicator/Communicator_mpi.cc index 65ced9c7..38ee70f9 100644 --- a/lib/communicator/Communicator_mpi.cc +++ b/lib/communicator/Communicator_mpi.cc @@ -35,6 +35,8 @@ namespace Grid { // Info that is setup once and indept of cartesian layout /////////////////////////////////////////////////////////////////////////////////////////////////// MPI_Comm CartesianCommunicator::communicator_world; +char CartesianCommunicator::name[MPI_MAX_PROCESSOR_NAME]; // processing node physical name +int CartesianCommunicator::length; // Should error check all MPI calls. void CartesianCommunicator::Init(int *argc, char ***argv) { @@ -44,6 +46,8 @@ void CartesianCommunicator::Init(int *argc, char ***argv) { MPI_Init(argc,argv); } MPI_Comm_dup (MPI_COMM_WORLD,&communicator_world); + + MPI_Get_processor_name(name, &length); ShmInitGeneric(); } @@ -206,5 +210,10 @@ void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes) assert(ierr==0); } +void CartesianCommunicator::PrintRankInfo(){ + std::cout << "Grid: Rank "<< _processor << " - Physical node name: " << name << std::endl; } + +}// end of namespace + diff --git a/lib/communicator/Communicator_mpi3.cc b/lib/communicator/Communicator_mpi3.cc index c707ec1f..6e0029ce 100644 --- a/lib/communicator/Communicator_mpi3.cc +++ b/lib/communicator/Communicator_mpi3.cc @@ -576,5 +576,10 @@ void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes) assert(ierr==0); } + void CartesianCommunicator::PrintRankInfo(){ + std::cout << "Grid: Rank "<< _processor << " - Physical node name: " << name << std::endl; + } + + } diff --git a/lib/communicator/Communicator_mpi3_leader.cc b/lib/communicator/Communicator_mpi3_leader.cc index 71f1a913..b05ee95f 100644 --- a/lib/communicator/Communicator_mpi3_leader.cc +++ b/lib/communicator/Communicator_mpi3_leader.cc @@ -869,6 +869,9 @@ void *CartesianCommunicator::ShmBufferTranslate(int rank,void * local_p) { return NULL; } + void CartesianCommunicator::PrintRankInfo(){ + std::cout << "Grid: Rank "<< _processor << " - Physical node name: " << name << std::endl; + } }; diff --git a/lib/communicator/Communicator_none.cc b/lib/communicator/Communicator_none.cc index 5e91b305..1ba10572 100644 --- a/lib/communicator/Communicator_none.cc +++ b/lib/communicator/Communicator_none.cc @@ -37,6 +37,11 @@ void CartesianCommunicator::Init(int *argc, char *** arv) ShmInitGeneric(); } +void CartesianCommunicator::PrintRankInfo(){ + std::cout << GridLogMessage << "No Rank Info available" << std::endl; +} + + CartesianCommunicator::CartesianCommunicator(const std::vector &processors) { _processors = processors; @@ -60,10 +65,10 @@ void CartesianCommunicator::GlobalSum(uint64_t &){} void CartesianCommunicator::GlobalSumVector(double *,int N){} void CartesianCommunicator::SendRecvPacket(void *xmit, - void *recv, - int xmit_to_rank, - int recv_from_rank, - int bytes) + void *recv, + int xmit_to_rank, + int recv_from_rank, + int bytes) { assert(0); } @@ -71,19 +76,19 @@ void CartesianCommunicator::SendRecvPacket(void *xmit, // Basic Halo comms primitive -- should never call in single node void CartesianCommunicator::SendToRecvFrom(void *xmit, - int dest, - void *recv, - int from, - int bytes) + int dest, + void *recv, + int from, + int bytes) { assert(0); } void CartesianCommunicator::SendToRecvFromBegin(std::vector &list, - void *xmit, - int dest, - void *recv, - int from, - int bytes) + void *xmit, + int dest, + void *recv, + int from, + int bytes) { assert(0); } diff --git a/lib/communicator/Communicator_shmem.cc b/lib/communicator/Communicator_shmem.cc index 56e03224..13be66bc 100644 --- a/lib/communicator/Communicator_shmem.cc +++ b/lib/communicator/Communicator_shmem.cc @@ -333,5 +333,9 @@ void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes) } } + void CartesianCommunicator::PrintRankInfo(){ + std::cout << GridLogMessage << "SHMEM: Rank Info not implemented yet" << std::endl; + } + } diff --git a/lib/lattice/Lattice_reduction.h b/lib/lattice/Lattice_reduction.h index 8fc5549b..7f3a7c3c 100644 --- a/lib/lattice/Lattice_reduction.h +++ b/lib/lattice/Lattice_reduction.h @@ -33,26 +33,28 @@ Author: paboyle namespace Grid { template -struct ReproducibilityState { +class ReproducibilityState { + public: typedef typename T::vector_type vector_type; + typedef std::vector > > sum_type; unsigned int n_call; bool do_check; bool enable_reprocheck; - std::vector > > - th_states; + sum_type th_states; + void reset_counter() { n_call = 0; } void reset() { th_states.clear(); do_check = false; enable_reprocheck = false; n_call = 0; - } - - void reset_counter() { n_call = 0; } + }; ReproducibilityState() { reset(); } }; + + #ifdef GRID_WARN_SUBOPTIMAL #warning "Optimisation alert all these reduction loops are NOT threaded " #endif @@ -94,8 +96,6 @@ struct ReproducibilityState { sumarray[i]=zero; } - // accumulation done in the same precision ad vobj... - // may need to froce higher precision PARALLEL_FOR_LOOP_STATIC //request statically scheduled threads for reproducibility for(int thr=0;thrSumArraySize();thr++){ int nwork, mywork, myoff; @@ -103,30 +103,41 @@ struct ReproducibilityState { decltype(innerProduct(left._odata[0],right._odata[0])) vnrm = zero; // private to thread; sub summation for(int ss=myoff;ssThisRank() << std::endl; + int words = sizeof(sumarray[thread])/sizeof(unsigned char); + unsigned char xors[words]; + bitwise_xor(sumarray[thread], repr.th_states[repr.n_call][thread],xors); + // XOR all words + unsigned char res = 0; + for (int w = 0; w < words; w++) res = res ^ xors[w]; + if ( res ) { + std::cout << GridLogMessage << "Reproducibility failure report" << std::endl; + grid->PrintRankInfo(); std::cout << GridLogMessage << "Call: "<< repr.n_call << " Thread: " << thread << std::endl; std::cout << GridLogMessage << "Size of states: " << repr.th_states.size() << std::endl; - std::cout << GridLogMessage << sumarray[thread] << std::endl; - std::cout << GridLogMessage << repr.th_states[repr.n_call][thread] << std::endl; - //exit(1); + std::cout << GridLogMessage << "Current partial sum: " << sumarray[thread] << std::endl; + std::cout << GridLogMessage << "Saved partial sum : " << repr.th_states[repr.n_call][thread] << std::endl; + std::cout << GridLogMessage << "Saved state " << std::endl; + show_binaryrep(repr.th_states[repr.n_call][thread]); + std::cout << GridLogMessage << "Current state" << std::endl; + show_binaryrep(sumarray[thread]); + std::cout << GridLogMessage << "Xor result" << std::endl; + show_binaryrep(xors, words); } } repr.n_call++; } else { - //std::cout << GridLogMessage << "Saving thread state for inner product. Call n. " << repr.n_call << std::endl; + std::cout << GridLogDebug << "Saving thread state for inner product. Call n. " << repr.n_call << std::endl; repr.th_states.resize(repr.n_call+1); repr.th_states[repr.n_call].resize(grid->SumArraySize()); repr.th_states[repr.n_call] = sumarray; // save threads state diff --git a/tests/hmc/Test_hmc_WilsonFermionGauge.cc b/tests/hmc/Test_hmc_WilsonFermionGauge.cc index 84240426..dd6ab81b 100644 --- a/tests/hmc/Test_hmc_WilsonFermionGauge.cc +++ b/tests/hmc/Test_hmc_WilsonFermionGauge.cc @@ -64,9 +64,9 @@ class HmcRunner : public NerscHmcRunner { FermionAction FermOp(U, *FGrid, *FrbGrid, mass); // To enable the CG reproducibility tests use - // ConjugateGradient CG(1.0e-8, 10000, true, true); + ConjugateGradient CG(1.0e-8, 10000, false, true); // This is the plain version - ConjugateGradient CG(1.0e-8, 10000); + //ConjugateGradient CG(1.0e-8, 10000); TwoFlavourPseudoFermionAction Nf2(FermOp, CG, CG);