From f1908c7bc9f284a43a65a273c87c82d38e5d223d Mon Sep 17 00:00:00 2001 From: Guido Cossu Date: Mon, 21 Nov 2016 09:52:07 +0000 Subject: [PATCH] Adding reproducibility tests --- lib/algorithms/iterative/ConjugateGradient.h | 23 ++-- lib/lattice/Lattice_reduction.h | 107 +++++++++++++------ 2 files changed, 85 insertions(+), 45 deletions(-) diff --git a/lib/algorithms/iterative/ConjugateGradient.h b/lib/algorithms/iterative/ConjugateGradient.h index 1f9b9b58..dfc0dd04 100644 --- a/lib/algorithms/iterative/ConjugateGradient.h +++ b/lib/algorithms/iterative/ConjugateGradient.h @@ -33,17 +33,21 @@ directory namespace Grid { -struct CG_state{ - bool do_repro; - std::vector residuals; +struct CG_state { + bool do_repro; + std::vector residuals; - CG_state(){ - do_repro = false; - residuals.clear();} + CG_state() { + do_repro = false; + residuals.clear(); + } + void reset(){ + do_repro = false; + residuals.clear(); + } }; - ///////////////////////////////////////////////////////////// // Base classes for iterative processes based on operators // single input vec, single output vec. @@ -57,7 +61,7 @@ class ConjugateGradient : public OperatorFunction { RealD Tolerance; Integer MaxIterations; bool ReproTest; - CG_state CGState; + CG_state CGState;//to check reproducibility by repeating the CG ConjugateGradient(RealD tol, Integer maxit, bool err_on_no_conv = true, bool ReproducibilityTest = false) @@ -199,8 +203,7 @@ class ConjugateGradient : public OperatorFunction { } // Clear state - CGState.residuals.clear(); - CGState.do_repro = false; + CGState.reset(); return; } } diff --git a/lib/lattice/Lattice_reduction.h b/lib/lattice/Lattice_reduction.h index 2615af48..88603407 100644 --- a/lib/lattice/Lattice_reduction.h +++ b/lib/lattice/Lattice_reduction.h @@ -31,6 +31,25 @@ Author: paboyle #define GRID_LATTICE_REDUCTION_H namespace Grid { + +template +struct ReproducibilityState { + int n_call; + bool do_check; + std::vector > > th_states; + + void reset(){ + th_states.clear(); + do_check = false; + n_call = 0; + } + + ReproducibilityState(){ + reset(); + } +}; + + #ifdef GRID_WARN_SUBOPTIMAL #warning "Optimisation alert all these reduction loops are NOT threaded " #endif @@ -39,9 +58,12 @@ namespace Grid { // Deterministic Reduction operations //////////////////////////////////////////////////////////////////////////////////////////////////// template inline RealD norm2(const Lattice &arg){ - ComplexD nrm = innerProduct(arg,arg); - return std::real(nrm); - } + ComplexD nrm = innerProduct(arg,arg); + return std::real(nrm); + } + + + template inline ComplexD innerProduct(const Lattice &left,const Lattice &right) @@ -54,24 +76,39 @@ namespace Grid { std::vector > sumarray(grid->SumArraySize()); for(int i=0;iSumArraySize();i++){ - sumarray[i]=zero; + sumarray[i]=zero; } -PARALLEL_FOR_LOOP + // accumulation done in the same precision ad vobj... + // may need to froce higher precision + PARALLEL_FOR_LOOP for(int thr=0;thrSumArraySize();thr++){ - int nwork, mywork, myoff; + int nwork, mywork, myoff; GridThread::GetWork(left._grid->oSites(),thr,mywork,myoff); decltype(innerProduct(left._odata[0],right._odata[0])) vnrm=zero; // private to thread; sub summation for(int ss=myoff;ssThisRank() << std::endl; + exit(1); + } + } + } else { + repr.th_states.push_back(sumarray);//save threads state + repr.n_call +=1; + } + */ + vector_type vvnrm; vvnrm=zero; // sum across threads for(int i=0;iSumArraySize();i++){ - vvnrm = vvnrm+sumarray[i]; + vvnrm = vvnrm+sumarray[i]; } nrm = Reduce(vvnrm);// sum across simd right._grid->GlobalSum(nrm); @@ -79,26 +116,26 @@ PARALLEL_FOR_LOOP } template - inline auto sum(const LatticeUnaryExpression & expr) - ->typename decltype(expr.first.func(eval(0,std::get<0>(expr.second))))::scalar_object + inline auto sum(const LatticeUnaryExpression & expr) + ->typename decltype(expr.first.func(eval(0,std::get<0>(expr.second))))::scalar_object { return sum(closure(expr)); } template - inline auto sum(const LatticeBinaryExpression & expr) - ->typename decltype(expr.first.func(eval(0,std::get<0>(expr.second)),eval(0,std::get<1>(expr.second))))::scalar_object + inline auto sum(const LatticeBinaryExpression & expr) + ->typename decltype(expr.first.func(eval(0,std::get<0>(expr.second)),eval(0,std::get<1>(expr.second))))::scalar_object { return sum(closure(expr)); } template - inline auto sum(const LatticeTrinaryExpression & expr) - ->typename decltype(expr.first.func(eval(0,std::get<0>(expr.second)), - eval(0,std::get<1>(expr.second)), - eval(0,std::get<2>(expr.second)) - ))::scalar_object + inline auto sum(const LatticeTrinaryExpression & expr) + ->typename decltype(expr.first.func(eval(0,std::get<0>(expr.second)), + eval(0,std::get<1>(expr.second)), + eval(0,std::get<2>(expr.second)) + ))::scalar_object { return sum(closure(expr)); } @@ -111,24 +148,24 @@ PARALLEL_FOR_LOOP std::vector > sumarray(grid->SumArraySize()); for(int i=0;iSumArraySize();i++){ - sumarray[i]=zero; + sumarray[i]=zero; } -PARALLEL_FOR_LOOP + PARALLEL_FOR_LOOP for(int thr=0;thrSumArraySize();thr++){ - int nwork, mywork, myoff; + int nwork, mywork, myoff; GridThread::GetWork(grid->oSites(),thr,mywork,myoff); vobj vvsum=zero; for(int ss=myoff;ssSumArraySize();i++){ - vsum = vsum+sumarray[i]; + vsum = vsum+sumarray[i]; } typedef typename vobj::scalar_object sobj; @@ -138,7 +175,7 @@ PARALLEL_FOR_LOOP extract(vsum,buf); for(int i=0;iGlobalSum(ssum); + arg._grid->GlobalSum(ssum); return ssum; } @@ -146,23 +183,23 @@ PARALLEL_FOR_LOOP template inline void sliceSum(const Lattice &Data,std::vector &result,int orthogdim) -{ - typedef typename vobj::scalar_object sobj; - GridBase *grid = Data._grid; - assert(grid!=NULL); + { + typedef typename vobj::scalar_object sobj; + GridBase *grid = Data._grid; + assert(grid!=NULL); // FIXME // std::cout<SumArraySize()<<" threads "<_ndimension; - const int Nsimd = grid->Nsimd(); + const int Nd = grid->_ndimension; + const int Nsimd = grid->Nsimd(); - assert(orthogdim >= 0); - assert(orthogdim < Nd); + assert(orthogdim >= 0); + assert(orthogdim < Nd); - int fd=grid->_fdimensions[orthogdim]; - int ld=grid->_ldimensions[orthogdim]; - int rd=grid->_rdimensions[orthogdim]; + int fd=grid->_fdimensions[orthogdim]; + int ld=grid->_ldimensions[orthogdim]; + int rd=grid->_rdimensions[orthogdim]; std::vector > lvSum(rd); // will locally sum vectors first std::vector lsSum(ld,zero); // sum across these down to scalars