diff --git a/lib/Log.cc b/lib/Log.cc index 7521657b..9856ead9 100644 --- a/lib/Log.cc +++ b/lib/Log.cc @@ -100,7 +100,7 @@ void Grid_quiesce_nodes(void) { me = shmem_my_pe(); #endif if (me) { - std::cout.setstate(std::ios::badbit); + std::cout.setstate(std::ios::badbit);// mute all nodes except 0 } } diff --git a/lib/algorithms/iterative/ConjugateGradient.h b/lib/algorithms/iterative/ConjugateGradient.h index f590a3c6..b070d10b 100644 --- a/lib/algorithms/iterative/ConjugateGradient.h +++ b/lib/algorithms/iterative/ConjugateGradient.h @@ -46,6 +46,9 @@ struct CG_state { } }; + +enum CGexec_modes{ Default, ReproducibilityTest }; + ///////////////////////////////////////////////////////////// // Base classes for iterative processes based on operators // single input vec, single output vec. diff --git a/lib/lattice/Lattice_reduction.h b/lib/lattice/Lattice_reduction.h index e88506ce..0e983947 100644 --- a/lib/lattice/Lattice_reduction.h +++ b/lib/lattice/Lattice_reduction.h @@ -36,23 +36,73 @@ template class ReproducibilityState { public: typedef typename T::vector_type vector_type; - typedef std::vector > > sum_type; - unsigned int n_call; - bool do_check; - bool enable_reprocheck; - bool success; - sum_type th_states; + typedef std::vector > sum_type; + unsigned int n_call; + bool do_check; + bool enable_reprocheck; + bool success; + std::vector th_states; void reset_counter() { n_call = 0; } void reset() { th_states.clear(); - do_check = false; + do_check = false; enable_reprocheck = false; - success = true; - n_call = 0; + success = true; + n_call = 0; }; ReproducibilityState() { reset(); } + + void check(GridBase* grid, sum_type &sumarray){ + /////////////////////// Reproducibility section, not threaded on purpouse + if (enable_reprocheck) { + if (do_check) { + for (int thread = 0; thread < sumarray.size(); thread++) { + int words = sizeof(sumarray[thread])/sizeof(unsigned char); + unsigned char xors[words]; + bitwise_xor(sumarray[thread], th_states[n_call][thread],xors); + // OR all words + unsigned char res = 0; + for (int w = 0; w < words; w++) res = res | xors[w]; + if ( res ) { + std::cout << GridLogMessage << "Reproducibility failure report" << std::endl; + + Grid_unquiesce_nodes(); + int rank = 0; + while (rank < grid->_Nprocessors){ + if (rank == grid->ThisRank() ){ + grid->PrintRankInfo(); + std::cout << "Call: "<< n_call << " Thread: " << thread << std::endl; + std::cout << "Size of states: " << th_states.size() << std::endl; + std::cout << std::setprecision(GRID_REAL_DIGITS+1) << std::scientific; + std::cout << "Saved partial sum : " << th_states[n_call][thread] << std::endl; + std::cout << "Current partial sum: " << sumarray[thread] << std::endl; + std::cout << "Saved state " << std::endl; show_binaryrep(th_states[n_call][thread]); + std::cout << "Current state" << std::endl; show_binaryrep(sumarray[thread]); + std::cout << "XOR result" << std::endl; show_binaryrep(xors, words); + //std::cout << std::defaultfloat; //not supported by some compilers + std::cout << std::setprecision(6); + success = false; + } + rank++; + grid->Barrier(); + } + Grid_quiesce_nodes(); + } + } + n_call++; + } else + { + std::cout << GridLogDebug << "Saving thread state for inner product. Call n. " << n_call << std::endl; + th_states.resize(n_call+1); + th_states[n_call].resize(grid->SumArraySize()); + th_states[n_call] = sumarray; // save threads state + n_call++; + } + } + } + }; @@ -82,8 +132,6 @@ class ReproducibilityState { return innerProduct(left, right, repr); } - - template inline ComplexD innerProduct(const Lattice &left,const Lattice &right, ReproducibilityState& repr) { @@ -110,45 +158,9 @@ class ReproducibilityState { sumarray[thr]=TensorRemove(vnrm) ; } - /////////////////////// Reproducibility section, not threaded on purpouse - if (repr.enable_reprocheck) { - if (repr.do_check) { - std::cout << GridLogDebug << "Checking thread state for inner product. Call n. " << repr.n_call << std::endl; - for (int thread = 0; thread < sumarray.size(); thread++) { - int words = sizeof(sumarray[thread])/sizeof(unsigned char); - unsigned char xors[words]; - bitwise_xor(sumarray[thread], repr.th_states[repr.n_call][thread],xors); - // OR all words - unsigned char res = 0; - for (int w = 0; w < words; w++) res = res | xors[w]; - if ( res ) { - std::cout << GridLogMessage << "Reproducibility failure report" << std::endl; - grid->PrintRankInfo(); - std::cout << "Call: "<< repr.n_call << " Thread: " << thread << std::endl; - std::cout << "Size of states: " << repr.th_states.size() << std::endl; - std::cout << std::setprecision(GRID_REAL_DIGITS+1) << std::scientific; - std::cout << "Saved partial sum : " << repr.th_states[repr.n_call][thread] << std::endl; - std::cout << "Current partial sum: " << sumarray[thread] << std::endl; - std::cout << "Saved state " << std::endl; show_binaryrep(repr.th_states[repr.n_call][thread]); - std::cout << "Current state" << std::endl; show_binaryrep(sumarray[thread]); - std::cout << "XOR result" << std::endl; show_binaryrep(xors, words); - //std::cout << std::defaultfloat; //not supported by some compilers - std::cout << std::setprecision(6); - repr.success = false; - } - } - repr.n_call++; - } else - { - std::cout << GridLogDebug << "Saving thread state for inner product. Call n. " << repr.n_call << std::endl; - repr.th_states.resize(repr.n_call+1); - repr.th_states[repr.n_call].resize(grid->SumArraySize()); - repr.th_states[repr.n_call] = sumarray; // save threads state - repr.n_call++; - } - } - //////////////////////////////////////////////////////// - + /////////// Reproducibility + repr.check(grid, sumarray); + /////////////////////////// vector_type vvnrm; vvnrm=zero; // sum across threads for(int i=0;iSumArraySize();i++){