Just commenting

Tests more reductions and stops if failure is found in an iteration
Report only on failing nodes
2025-06-13 04:37:05 +01:00 · 2016-12-12 03:49:01 +00:00 · 2016-12-12 03:33:21 +00:00 · 2016-12-09 05:57:30 +00:00 · 2016-12-09 05:20:38 +00:00 · 2016-12-09 05:08:56 +00:00
20 changed files with 674 additions and 101 deletions
--- a/lib/Bitwise.cc
+++ b/lib/Bitwise.cc
@ -0,0 +1,49 @@
+/*************************************************************************************
+
+Grid physics library, www.github.com/paboyle/Grid
+
+Source file: ./lib/Bitwise.cc
+
+Copyright (C) 2016
+
+Author: Guido Cossu <guido.cossu@ed.ac.uk>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along
+with this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+See the full license in the file "LICENSE" in the top level distribution
+directory
+*************************************************************************************/
+/*  END LEGAL */
+#include <iostream>
+#include <Bitwise.h>
+#include <bitset>
+#include <climits>
+
+namespace Grid {
+
+void show_binaryrep(const unsigned char* a, size_t size) {
+  const unsigned char* beg = a;
+  const unsigned char* end = a + size;
+  unsigned int ctr = 0;
+  while (beg != end) {
+    std::cout << std::bitset<CHAR_BIT>(*beg++) << ' ';
+    ctr++;
+    if (ctr % GRID_REAL_BYTES == 0) std::cout << '\n';
+  }
+  std::cout << '\n';
+}
+
+} // namespace 
+
--- a/lib/Bitwise.h
+++ b/lib/Bitwise.h
@ -0,0 +1,76 @@
+/*************************************************************************************
+
+Grid physics library, www.github.com/paboyle/Grid
+
+Source file: ./lib/Bitwise.h
+
+Copyright (C) 2016
+
+Author: Guido Cossu <guido.cossu@ed.ac.uk>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along
+with this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+See the full license in the file "LICENSE" in the top level distribution
+directory
+*************************************************************************************/
+/*  END LEGAL */
+#ifndef GRID_BITWISE_H
+#define GRID_BITWISE_H
+
+#include <cassert>
+#include <cfloat>
+#include <bitset>
+#include <climits>
+#include <Config.h>
+
+#ifdef GRID_DEFAULT_PRECISION_SINGLE
+#define GRID_REAL_BYTES 4
+#endif
+#ifdef GRID_DEFAULT_PRECISION_DOUBLE
+#define GRID_REAL_BYTES 8
+#endif
+
+
+namespace Grid {
+
+void show_binaryrep(const unsigned char* a, size_t size);
+
+template <typename T>
+void show_binaryrep(const T& a) {
+  const char* beg = reinterpret_cast<const char*>(&a);
+  const char* end = beg + sizeof(a);
+  unsigned int ctr = 0;
+  while (beg != end) {
+  	std::cout << std::bitset<CHAR_BIT>(*beg++) << ' ';
+  	ctr++;
+  	if (ctr % GRID_REAL_BYTES == 0) std::cout << '\n';
+  }
+  std::cout << '\n';
+}
+
+template <typename T>
+void bitwise_xor(T& l, T& r, unsigned char* xors) {
+  assert(sizeof(l) == sizeof(r));
+  unsigned char* org = reinterpret_cast<unsigned char*>(&l);
+  unsigned char* cur = reinterpret_cast<unsigned char*>(&r);
+  int words = sizeof(l) / sizeof(*org);
+  unsigned char result = 0;
+  for (int w = 0; w < words; w++) xors[w] = (org[w] ^ cur[w]);
+}
+
+}; // namespace 
+
+
+#endif
--- a/lib/Grid.h
+++ b/lib/Grid.h
@ -62,6 +62,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
 #include <Grid/serialisation/Serialisation.h>
 #include "Config.h"
 #include <Grid/Timer.h>
+#include <Grid/Bitwise.h>
 #include <Grid/PerfCount.h>
 #include <Grid/Log.h>
 #include <Grid/AlignedAllocator.h>
--- a/lib/Log.cc
+++ b/lib/Log.cc
@ -100,7 +100,7 @@ void Grid_quiesce_nodes(void) {
  me = shmem_my_pe();
 #endif
  if (me) {
-    std::cout.setstate(std::ios::badbit);
+    std::cout.setstate(std::ios::badbit);// mute all nodes except 0
  }
 }

--- a/lib/Threads.h
+++ b/lib/Threads.h
@ -38,19 +38,21 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
 #ifdef GRID_OMP
 #include <omp.h>
 #ifdef GRID_NUMA
-#define PARALLEL_FOR_LOOP        _Pragma("omp parallel for schedule(static)")
-#define PARALLEL_FOR_LOOP_INTERN _Pragma("omp for schedule(static)")
-#else
-#define PARALLEL_FOR_LOOP        _Pragma("omp parallel for schedule(runtime)")
-#define PARALLEL_FOR_LOOP_INTERN _Pragma("omp for schedule(runtime)")
+  #define PARALLEL_FOR_LOOP        _Pragma("omp parallel for schedule(static)")
+  #define PARALLEL_FOR_LOOP_INTERN _Pragma("omp for schedule(static)")
+  #else
+  #define PARALLEL_FOR_LOOP        _Pragma("omp parallel for schedule(runtime)")
+  #define PARALLEL_FOR_LOOP_INTERN _Pragma("omp for schedule(runtime)")
 #endif
-#define PARALLEL_NESTED_LOOP2 _Pragma("omp parallel for collapse(2)")
-#define PARALLEL_REGION       _Pragma("omp parallel")
+#define PARALLEL_NESTED_LOOP2    _Pragma("omp parallel for collapse(2)")
+#define PARALLEL_REGION          _Pragma("omp parallel")
+#define PARALLEL_FOR_LOOP_STATIC _Pragma("omp parallel for schedule(static)")
 #else
 #define PARALLEL_FOR_LOOP
 #define PARALLEL_FOR_LOOP_INTERN
 #define PARALLEL_NESTED_LOOP2
 #define PARALLEL_REGION
+#define PARALLEL_FOR_LOOP_STATIC
 #endif

 namespace Grid {
--- a/lib/algorithms/iterative/ConjugateGradient.h
+++ b/lib/algorithms/iterative/ConjugateGradient.h
@ -9,6 +9,7 @@ Copyright (C) 2015
 Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
 Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 Author: paboyle <paboyle@ph.ed.ac.uk>
+Author: Guido Cossu <guido.cossu@ed.ac.uk>

 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
@ -33,6 +34,21 @@ directory

 namespace Grid {

+struct CG_state {
+  bool do_repro;
+  std::vector<RealD> residuals;
+
+  CG_state() {reset();}
+
+  void reset(){
+    do_repro = false;
+    residuals.clear();
+  }
+};
+
+
+enum CGexec_mode{ Default, ReproducibilityTest };
+
 /////////////////////////////////////////////////////////////
 // Base classes for iterative processes based on operators
 // single input vec, single output vec.
@ -45,10 +61,30 @@ class ConjugateGradient : public OperatorFunction<Field> {
                           // Defaults true.
  RealD Tolerance;
  Integer MaxIterations;
-  ConjugateGradient(RealD tol, Integer maxit, bool err_on_no_conv = true)
-      : Tolerance(tol),
-        MaxIterations(maxit),
-        ErrorOnNoConverge(err_on_no_conv){};
+
+  // Reproducibility controls
+  bool ReproTest;
+  CG_state CGState; //to check reproducibility by repeating the CG
+  ReproducibilityState<typename Field::vector_object> ReprTest; // for the inner proucts
+
+  // Constructor
+  ConjugateGradient(RealD tol, Integer maxit, CGexec_mode Mode = Default)
+    : Tolerance(tol),MaxIterations(maxit){
+    switch(Mode)
+    {
+      case Default  : 
+      ErrorOnNoConverge = true;
+      ReproTest = false;
+      case ReproducibilityTest :
+      ErrorOnNoConverge = false;
+      ReproTest = true; 
+    }
+  };
+
+  void set_reproducibility_interval(unsigned int interval){
+    ReprTest.interval = interval;
+  }
+

  void operator()(LinearOperatorBase<Field> &Linop, const Field &src,
                  Field &psi) {
@ -60,34 +96,37 @@ class ConjugateGradient : public OperatorFunction<Field> {
    Field p(src);
    Field mmp(src);
    Field r(src);
+    Field psi_start(psi);// save for the repro test
+
+    if (CGState.do_repro && ReproTest)
+        std::cout << GridLogMessage << "Starting reproducibility test, full check every "
+                  << ReprTest.interval << " calls" << std::endl;
+
+    if(!ReprTest.do_check)
+        ReprTest.reset();
+    ReprTest.enable_reprocheck=ReproTest;
+
+

    // Initial residual computation & set up
-    RealD guess = norm2(psi);
+    RealD guess = norm2(psi, ReprTest);
    assert(std::isnan(guess) == 0);

+    Linop.HermOpAndNorm(psi, mmp, d, b);// eventually split this for the norm check
    
-    Linop.HermOpAndNorm(psi, mmp, d, b);
-    
-
    r = src - mmp;
    p = r;

-    a = norm2(p);
+    a = norm2(p, ReprTest);
    cp = a;
-    ssq = norm2(src);
+    ssq = norm2(src, ReprTest);

-    std::cout << GridLogIterative << std::setprecision(4)
-              << "ConjugateGradient: guess " << guess << std::endl;
-    std::cout << GridLogIterative << std::setprecision(4)
-              << "ConjugateGradient:   src " << ssq << std::endl;
-    std::cout << GridLogIterative << std::setprecision(4)
-              << "ConjugateGradient:    mp " << d << std::endl;
-    std::cout << GridLogIterative << std::setprecision(4)
-              << "ConjugateGradient:   mmp " << b << std::endl;
-    std::cout << GridLogIterative << std::setprecision(4)
-              << "ConjugateGradient:  cp,r " << cp << std::endl;
-    std::cout << GridLogIterative << std::setprecision(4)
-              << "ConjugateGradient:     p " << a << std::endl;
+    std::cout << GridLogIterative << "ConjugateGradient: guess " << guess << std::endl;
+    std::cout << GridLogIterative << "ConjugateGradient:   src " << ssq << std::endl;
+    std::cout << GridLogIterative << "ConjugateGradient:    mp " << d << std::endl;
+    std::cout << GridLogIterative << "ConjugateGradient:   mmp " << b << std::endl;
+    std::cout << GridLogIterative << "ConjugateGradient:  cp,r " << cp << std::endl;
+    std::cout << GridLogIterative << "ConjugateGradient:     p " << a << std::endl;

    RealD rsq = Tolerance * Tolerance * ssq;

@ -107,10 +146,10 @@ class ConjugateGradient : public OperatorFunction<Field> {
    SolverTimer.Start();
    int k;
    for (k = 1; k <= MaxIterations; k++) {
-      c = cp;
+      c = cp;// old residual

      MatrixTimer.Start();
-      Linop.HermOpAndNorm(p, mmp, d, qq);
+      Linop.HermOpAndNorm(p, mmp, d, qq);// mmp = Ap, d=pAp
      MatrixTimer.Stop();

      LinalgTimer.Start();
@ -118,14 +157,31 @@ class ConjugateGradient : public OperatorFunction<Field> {
      //  ComplexD dck  = innerProduct(p,mmp);

      a = c / d;
-      b_pred = a * (a * qq - d) / c;
+      b_pred = a * (a * qq - d) / c;// a check

-      cp = axpy_norm(r, -a, mmp, r);
+
+      axpy(r, -a, mmp, r);// new residual r = r_old - a * Ap
+      cp = norm2(r, ReprTest); // bookkeeping this norm
+      if (ReproTest && !CGState.do_repro) {
+        CGState.residuals.push_back(cp);  // save residuals state
+                std::cout << GridLogIterative << "ReproTest: Saving state" << std::endl;
+        }
+      if (ReproTest && CGState.do_repro){
+        // check that the residual agrees with the previous run
+        std::cout << GridLogIterative << "ReproTest: Checking state k=" << k << std::endl;
+        if (cp != CGState.residuals[k-1]){
+                std::cout << GridLogMessage << "Failing reproducibility test";
+                std::cout << GridLogMessage << " at k=" << k << std::endl;
+                std::cout << GridLogMessage << "saved residual = " << CGState.residuals[k-1] 
+                        << " cp = " << cp << std::endl;
+                exit(1);  // exit after the first failure
+        }
+      }
      b = cp / c;

      // Fuse these loops ; should be really easy
-      psi = a * p + psi;
-      p = p * b + r;
+      psi = a * p + psi; // update solution
+      p = p * b + r;  // update search direction

      LinalgTimer.Stop();
      std::cout << GridLogIterative << "ConjugateGradient: Iteration " << k
@ -156,6 +212,22 @@ class ConjugateGradient : public OperatorFunction<Field> {

        if (ErrorOnNoConverge) assert(true_residual / Tolerance < 10000.0);

+        if (! (CGState.do_repro && ReproTest)){
+                CGState.do_repro = true;
+                ReprTest.do_check = true;
+                ReprTest.reset_counter();
+                this->operator()(Linop, src, psi_start);// run the repro test
+                if (ReprTest.success)
+                	std::cout << GridLogMessage << "Reproducibility test passed" << std::endl;
+                else{
+                	std::cout << GridLogMessage << "Reproducibility test failed" << std::endl;
+                	exit(1);
+                }
+        }
+
+        // Clear state
+        CGState.reset();
+        ReprTest.reset();
        return;
      }
    }
--- a/lib/algorithms/iterative/ImplicitlyRestartedLanczos.h
+++ b/lib/algorithms/iterative/ImplicitlyRestartedLanczos.h
@ -59,7 +59,7 @@ public:

    int Nstop;   // Number of evecs checked for convergence
    int Nk;      // Number of converged sought
-    int Np;      // Np -- Number of spare vecs in kryloc space
+    int Np;      // Np -- Number of spare vecs in krylov space
    int Nm;      // Nm -- total number of vectors

    RealD eresid;
--- a/lib/communicator/Communicator_base.cc
+++ b/lib/communicator/Communicator_base.cc
@ -65,6 +65,7 @@ const std::vector<int> & CartesianCommunicator::ThisProcessorCoor(void) { return
 const std::vector<int> & CartesianCommunicator::ProcessorGrid(void)     { return _processors; };
 int                      CartesianCommunicator::ProcessorCount(void)    { return _Nprocessors; };

+
 ////////////////////////////////////////////////////////////////////////////////
 // very VERY rarely (Log, serial RNG) we need world without a grid
 ////////////////////////////////////////////////////////////////////////////////
@ -89,11 +90,11 @@ void CartesianCommunicator::GlobalSumVector(ComplexD *c,int N)
 #if !defined( GRID_COMMS_MPI3) && !defined (GRID_COMMS_MPI3L)

 void CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list,
-						       void *xmit,
-						       int xmit_to_rank,
-						       void *recv,
-						       int recv_from_rank,
-						       int bytes)
+                                                       void *xmit,
+                                                       int xmit_to_rank,
+                                                       void *recv,
+                                                       int recv_from_rank,
+                                                       int bytes)
 {
  SendToRecvFromBegin(list,xmit,xmit_to_rank,recv,recv_from_rank,bytes);
 }
--- a/lib/communicator/Communicator_base.h
+++ b/lib/communicator/Communicator_base.h
@ -68,6 +68,8 @@ class CartesianCommunicator {
  static MPI_Comm communicator_world;
         MPI_Comm communicator;
  typedef MPI_Request CommsRequest_t;
+  static char name[MPI_MAX_PROCESSOR_NAME]; // processing node physical name
+  static int length; 
 #else 
  typedef int CommsRequest_t;
 #endif
@ -149,6 +151,7 @@ class CartesianCommunicator {
  const std::vector<int> & ProcessorGrid(void)     ;
  int                      ProcessorCount(void)    ;

+  void        		 PrintRankInfo(void)     ;
  ////////////////////////////////////////////////////////////////////////////////
  // very VERY rarely (Log, serial RNG) we need world without a grid
  ////////////////////////////////////////////////////////////////////////////////
--- a/lib/communicator/Communicator_mpi.cc
+++ b/lib/communicator/Communicator_mpi.cc
@ -35,6 +35,8 @@ namespace Grid {
 // Info that is setup once and indept of cartesian layout
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 MPI_Comm CartesianCommunicator::communicator_world;
+char CartesianCommunicator::name[MPI_MAX_PROCESSOR_NAME]; // processing node physical name
+int CartesianCommunicator::length; 

 // Should error check all MPI calls.
 void CartesianCommunicator::Init(int *argc, char ***argv) {
@ -44,6 +46,8 @@ void CartesianCommunicator::Init(int *argc, char ***argv) {
    MPI_Init(argc,argv);
  }
  MPI_Comm_dup (MPI_COMM_WORLD,&communicator_world);
+
+  MPI_Get_processor_name(name, &length);
  ShmInitGeneric();
 }

@ -206,5 +210,10 @@ void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes)
  assert(ierr==0);
 }

+void CartesianCommunicator::PrintRankInfo(){
+    std::cout << "Grid: Rank "<< _processor << "  -  Physical node name: " << name << std::endl;
 }

+
+}// end of namespace
+
--- a/lib/communicator/Communicator_mpi3.cc
+++ b/lib/communicator/Communicator_mpi3.cc
@ -576,5 +576,10 @@ void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes)
  assert(ierr==0);
 }

+  void CartesianCommunicator::PrintRankInfo(){
+    std::cout << "Grid: Rank "<< _processor << "  -  Physical node name: " << name << std::endl;
+  }
+
+  
 }

--- a/lib/communicator/Communicator_mpi3_leader.cc
+++ b/lib/communicator/Communicator_mpi3_leader.cc
@ -869,6 +869,9 @@ void *CartesianCommunicator::ShmBufferTranslate(int rank,void * local_p) {
  return NULL;
 }

+  void CartesianCommunicator::PrintRankInfo(){
+    std::cout << "Grid: Rank "<< _processor << "  -  Physical node name: " << name << std::endl;
+  }

 };

--- a/lib/communicator/Communicator_none.cc
+++ b/lib/communicator/Communicator_none.cc
@ -37,6 +37,11 @@ void CartesianCommunicator::Init(int *argc, char *** arv)
  ShmInitGeneric();
 }

+void CartesianCommunicator::PrintRankInfo(){
+    std::cout << GridLogMessage << "No Rank Info available" << std::endl;
+}
+
+
 CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
 {
  _processors = processors;
@ -60,10 +65,10 @@ void CartesianCommunicator::GlobalSum(uint64_t &){}
 void CartesianCommunicator::GlobalSumVector(double *,int N){}

 void CartesianCommunicator::SendRecvPacket(void *xmit,
-					   void *recv,
-					   int xmit_to_rank,
-					   int recv_from_rank,
-					   int bytes)
+                                           void *recv,
+                                           int xmit_to_rank,
+                                           int recv_from_rank,
+                                           int bytes)
 {
  assert(0);
 }
@ -71,19 +76,19 @@ void CartesianCommunicator::SendRecvPacket(void *xmit,

 // Basic Halo comms primitive -- should never call in single node
 void CartesianCommunicator::SendToRecvFrom(void *xmit,
-					   int dest,
-					   void *recv,
-					   int from,
-					   int bytes)
+                                           int dest,
+                                           void *recv,
+                                           int from,
+                                           int bytes)
 {
  assert(0);
 }
 void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &list,
-						void *xmit,
-						int dest,
-						void *recv,
-						int from,
-						int bytes)
+                                                void *xmit,
+                                                int dest,
+                                                void *recv,
+                                                int from,
+                                                int bytes)
 {
  assert(0);
 }
--- a/lib/communicator/Communicator_shmem.cc
+++ b/lib/communicator/Communicator_shmem.cc
@ -333,5 +333,9 @@ void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes)
  }
 }

+  void CartesianCommunicator::PrintRankInfo(){
+    std::cout << GridLogMessage << "SHMEM: Rank Info not implemented yet" << std::endl;
+  }
+
 }

--- a/lib/lattice/Lattice_reduction.h
+++ b/lib/lattice/Lattice_reduction.h
@ -31,6 +31,87 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
 #define GRID_LATTICE_REDUCTION_H

 namespace Grid {
+
+template <class T>
+class ReproducibilityState {
+ public:
+  typedef typename T::vector_type vector_type;
+  typedef std::vector<vector_type, alignedAllocator<vector_type> >  sum_type;
+  unsigned int  n_call;
+  bool          do_check;
+  bool          enable_reprocheck;
+  bool          success;
+  unsigned int  interval; 
+  std::vector<sum_type> th_states;
+
+  void reset_counter() { n_call = 0; }
+  void reset() {
+    th_states.clear();
+    do_check          = false;
+    enable_reprocheck = false;
+    success           = true;
+    n_call            = 0;
+  };
+
+  ReproducibilityState():interval(1) { 
+    reset(); 
+  }
+
+  void check(GridBase* grid, sum_type &sumarray){
+    ///////////////////////  Reproducibility section, not threaded on purpouse
+    if (enable_reprocheck) {
+      if (do_check && (n_call % interval) == 0) {
+        for (int thread = 0; thread < sumarray.size(); thread++) {
+          int words = sizeof(sumarray[thread])/sizeof(unsigned char);
+          unsigned char xors[words];
+          bitwise_xor(sumarray[thread], th_states[n_call][thread],xors);
+          // OR all words
+          unsigned char res = 0;
+          for (int w = 0; w < words; w++) res = res | xors[w];
+
+          Grid_unquiesce_nodes();
+          int rank = 0;
+          while (rank < grid->_Nprocessors){
+            if (rank == grid->ThisRank() ){
+              if ( res ) {
+                std::cout << "Reproducibility failure report" << std::endl;
+                grid->PrintRankInfo();
+                std::cout << "Call: "<< n_call << " Thread: " << thread << std::endl;
+                std::cout << "Size of states: " << th_states.size() << std::endl;
+                std::cout << std::setprecision(GRID_REAL_DIGITS+1) << std::scientific;
+                std::cout << "Saved partial sum  : " << th_states[n_call][thread] << std::endl;
+                std::cout << "Current partial sum: " << sumarray[thread] << std::endl;
+                std::cout << "Saved state "  << std::endl; show_binaryrep(th_states[n_call][thread]);
+                std::cout << "Current state" << std::endl; show_binaryrep(sumarray[thread]);
+                std::cout << "XOR result"    << std::endl; show_binaryrep(xors, words);
+              		//std::cout << std::defaultfloat;  //not supported by some compilers
+                std::cout << std::setprecision(6);
+                success = false; 
+              }
+            }
+            rank++;
+            grid->Barrier();
+          }
+          Grid_quiesce_nodes();                         
+        }
+
+      } else if (!do_check)
+      {
+        std::cout << GridLogDebug << "Saving thread state for inner product. Call n. " 
+        << n_call << std::endl;
+        th_states.resize(n_call+1);
+        th_states[n_call].resize(grid->SumArraySize());
+          th_states[n_call] = sumarray;  // save threads state
+          //n_call++;
+        }
+        n_call++;
+      }
+    }
+
+};
+
+
+
 #ifdef GRID_WARN_SUBOPTIMAL
 #warning "Optimisation alert all these reduction loops are NOT threaded "
 #endif     
@ -39,12 +120,25 @@ namespace Grid {
    // Deterministic Reduction operations
    ////////////////////////////////////////////////////////////////////////////////////////////////////
  template<class vobj> inline RealD norm2(const Lattice<vobj> &arg){
-    ComplexD nrm = innerProduct(arg,arg);
-    return std::real(nrm); 
-  }
+        ReproducibilityState<vobj> repr;
+        ComplexD nrm = innerProduct(arg, arg, repr);
+        return std::real(nrm); 
+    }
+
+    template <class vobj>
+    inline RealD norm2(const Lattice<vobj> &arg, ReproducibilityState<vobj>& rep) {
+      ComplexD nrm = innerProduct(arg, arg, rep);
+      return std::real(nrm);
+    }

    template<class vobj>
-    inline ComplexD innerProduct(const Lattice<vobj> &left,const Lattice<vobj> &right) 
+    inline ComplexD innerProduct(const Lattice<vobj> &left,const Lattice<vobj> &right){
+      ReproducibilityState<vobj> repr;
+      return innerProduct(left, right, repr);
+    } 
+    
+    template<class vobj>
+    inline ComplexD innerProduct(const Lattice<vobj> &left,const Lattice<vobj> &right, ReproducibilityState<vobj>& repr) 
    {
      typedef typename vobj::scalar_type scalar_type;
      typedef typename vobj::vector_type vector_type;
@ -54,24 +148,28 @@ namespace Grid {

      std::vector<vector_type,alignedAllocator<vector_type> > sumarray(grid->SumArraySize());
      for(int i=0;i<grid->SumArraySize();i++){
-	sumarray[i]=zero;
+        sumarray[i]=zero;
      }

-PARALLEL_FOR_LOOP
+      PARALLEL_FOR_LOOP_STATIC //request statically scheduled threads for reproducibility
      for(int thr=0;thr<grid->SumArraySize();thr++){
-	int nwork, mywork, myoff;
-	GridThread::GetWork(left._grid->oSites(),thr,mywork,myoff);
-	
-	decltype(innerProduct(left._odata[0],right._odata[0])) vnrm=zero; // private to thread; sub summation
+        int nwork, mywork, myoff;
+        GridThread::GetWork(left._grid->oSites(),thr,mywork,myoff);
+        
+        decltype(innerProduct(left._odata[0],right._odata[0])) vnrm = zero; // private to thread; sub summation
        for(int ss=myoff;ss<mywork+myoff; ss++){
-	  vnrm = vnrm + innerProduct(left._odata[ss],right._odata[ss]);
-	}
-	sumarray[thr]=TensorRemove(vnrm) ;
+          vnrm = vnrm + innerProduct(left._odata[ss],right._odata[ss]);
+        }
+        sumarray[thr]=TensorRemove(vnrm) ;
      }
-    
+      
+      /////////// Reproducibility
+      repr.check(grid, sumarray);
+      ///////////////////////////
+
      vector_type vvnrm; vvnrm=zero;  // sum across threads
      for(int i=0;i<grid->SumArraySize();i++){
-	vvnrm = vvnrm+sumarray[i];
+        vvnrm = vvnrm+sumarray[i];
      } 
      nrm = Reduce(vvnrm);// sum across simd
      right._grid->GlobalSum(nrm);
@ -79,26 +177,26 @@ PARALLEL_FOR_LOOP
    }

    template<class Op,class T1>
-      inline auto sum(const LatticeUnaryExpression<Op,T1> & expr)
-      ->typename decltype(expr.first.func(eval(0,std::get<0>(expr.second))))::scalar_object
+    inline auto sum(const LatticeUnaryExpression<Op,T1> & expr)
+    ->typename decltype(expr.first.func(eval(0,std::get<0>(expr.second))))::scalar_object
    {
      return sum(closure(expr));
    }

    template<class Op,class T1,class T2>
-      inline auto sum(const LatticeBinaryExpression<Op,T1,T2> & expr)
-      ->typename decltype(expr.first.func(eval(0,std::get<0>(expr.second)),eval(0,std::get<1>(expr.second))))::scalar_object
+    inline auto sum(const LatticeBinaryExpression<Op,T1,T2> & expr)
+    ->typename decltype(expr.first.func(eval(0,std::get<0>(expr.second)),eval(0,std::get<1>(expr.second))))::scalar_object
    {
      return sum(closure(expr));
    }


    template<class Op,class T1,class T2,class T3>
-      inline auto sum(const LatticeTrinaryExpression<Op,T1,T2,T3> & expr)
-      ->typename decltype(expr.first.func(eval(0,std::get<0>(expr.second)),
-				 eval(0,std::get<1>(expr.second)),
-				 eval(0,std::get<2>(expr.second))
-				 ))::scalar_object
+    inline auto sum(const LatticeTrinaryExpression<Op,T1,T2,T3> & expr)
+    ->typename decltype(expr.first.func(eval(0,std::get<0>(expr.second)),
+     eval(0,std::get<1>(expr.second)),
+     eval(0,std::get<2>(expr.second))
+     ))::scalar_object
    {
      return sum(closure(expr));
    }
@ -111,24 +209,24 @@ PARALLEL_FOR_LOOP

      std::vector<vobj,alignedAllocator<vobj> > sumarray(grid->SumArraySize());
      for(int i=0;i<grid->SumArraySize();i++){
-	sumarray[i]=zero;
+        sumarray[i]=zero;
      }

-PARALLEL_FOR_LOOP
+      PARALLEL_FOR_LOOP
      for(int thr=0;thr<grid->SumArraySize();thr++){
-	int nwork, mywork, myoff;
-	GridThread::GetWork(grid->oSites(),thr,mywork,myoff);
+        int nwork, mywork, myoff;
+        GridThread::GetWork(grid->oSites(),thr,mywork,myoff);

-	vobj vvsum=zero;
+        vobj vvsum=zero;
        for(int ss=myoff;ss<mywork+myoff; ss++){
-	  vvsum = vvsum + arg._odata[ss];
-	}
-	sumarray[thr]=vvsum;
+          vvsum = vvsum + arg._odata[ss];
+        }
+        sumarray[thr]=vvsum;
      }

      vobj vsum=zero;  // sum across threads
      for(int i=0;i<grid->SumArraySize();i++){
-	vsum = vsum+sumarray[i];
+        vsum = vsum+sumarray[i];
      } 

      typedef typename vobj::scalar_object sobj;
@ -138,7 +236,7 @@ PARALLEL_FOR_LOOP
      extract(vsum,buf);

      for(int i=0;i<Nsimd;i++) ssum = ssum + buf[i];
-      arg._grid->GlobalSum(ssum);
+        arg._grid->GlobalSum(ssum);

      return ssum;
    }
@ -146,23 +244,23 @@ PARALLEL_FOR_LOOP


 template<class vobj> inline void sliceSum(const Lattice<vobj> &Data,std::vector<typename vobj::scalar_object> &result,int orthogdim)
-{
-  typedef typename vobj::scalar_object sobj;
-  GridBase  *grid = Data._grid;
-  assert(grid!=NULL);
+    {
+      typedef typename vobj::scalar_object sobj;
+      GridBase  *grid = Data._grid;
+      assert(grid!=NULL);

  // FIXME
  // std::cout<<GridLogMessage<<"WARNING ! SliceSum is unthreaded "<<grid->SumArraySize()<<" threads "<<std::endl;

-  const int    Nd = grid->_ndimension;
-  const int Nsimd = grid->Nsimd();
+      const int    Nd = grid->_ndimension;
+      const int Nsimd = grid->Nsimd();

-  assert(orthogdim >= 0);
-  assert(orthogdim < Nd);
+      assert(orthogdim >= 0);
+      assert(orthogdim < Nd);

-  int fd=grid->_fdimensions[orthogdim];
-  int ld=grid->_ldimensions[orthogdim];
-  int rd=grid->_rdimensions[orthogdim];
+      int fd=grid->_fdimensions[orthogdim];
+      int ld=grid->_ldimensions[orthogdim];
+      int rd=grid->_rdimensions[orthogdim];

  std::vector<vobj,alignedAllocator<vobj> > lvSum(rd); // will locally sum vectors first
  std::vector<sobj> lsSum(ld,zero); // sum across these down to scalars
--- a/lib/simd/Grid_avx.h
+++ b/lib/simd/Grid_avx.h
@ -66,6 +66,9 @@ namespace Optimization {
    double f[4];
  };

+
+
+
  struct Vsplat{
    //Complex float
    inline __m256 operator()(float a, float b){
--- a/lib/simd/Grid_vector_types.h
+++ b/lib/simd/Grid_vector_types.h
@ -60,6 +60,13 @@ directory
 #include "Grid_neon.h"
 #endif

+#ifdef GRID_DEFAULT_PRECISION_SINGLE
+#define GRID_REAL_DIGITS FLT_DIG
+#endif
+#ifdef GRID_DEFAULT_PRECISION_DOUBLE
+#define GRID_REAL_DIGITS DBL_DIG
+#endif
+
 namespace Grid {

 //////////////////////////////////////
@ -146,6 +153,23 @@ class Grid_simd {
  Grid_simd(const Grid_simd &rhs) : v(rhs.v){};  // compiles in movaps
  Grid_simd(const Grid_simd &&rhs) : v(rhs.v){};

+  /////////////////////////////
+  // Comparisons
+  /////////////////////////////
+  inline bool operator==(const Grid_simd& lhs){ 
+    Grid_simd::conv_t conv1;
+    Grid_simd::conv_t conv2;
+    conv1.v = v;
+    conv2.v = lhs.v;
+    return std::equal(std::begin(conv1.s), std::end(conv1.s), std::begin(conv2.s));
+  }
+
+  inline bool operator!=(const Grid_simd& lhs){ 
+    return !(*this == lhs);
+  }
+
+
+
  /////////////////////////////
  // Constructors
  /////////////////////////////
--- a/tests/debug/Test_cayley_cg_reproducibility.cc
+++ b/tests/debug/Test_cayley_cg_reproducibility.cc
@ -0,0 +1,215 @@
+    /*************************************************************************************
+
+    Grid physics library, www.github.com/paboyle/Grid 
+
+    Source file: ./tests/Test_cayley_cg_reproducibility.cc
+
+    Copyright (C) 2015
+
+    Author: Peter Boyle <paboyle@ph.ed.ac.uk>
+    Author: Guido Cossu <guido.cossu@ed.ac.uk>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License along
+    with this program; if not, write to the Free Software Foundation, Inc.,
+    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+    See the full license in the file "LICENSE" in the top level distribution directory
+    *************************************************************************************/
+    /*  END LEGAL */
+#include <Grid/Grid.h>
+
+using namespace std;
+using namespace Grid;
+using namespace Grid::QCD;
+
+#define REPRODUCIBILITY_INTERVAL 1
+
+
+template<class d>
+struct scal {
+  d internal;
+};
+
+  Gamma::GammaMatrix Gmu [] = {
+    Gamma::GammaX,
+    Gamma::GammaY,
+    Gamma::GammaZ,
+    Gamma::GammaT
+  };
+
+template<class What> 
+void  TestCGinversions(What & Ddwf, 
+                       GridCartesian         * FGrid,          GridRedBlackCartesian * FrbGrid,
+                       GridCartesian         * UGrid,          GridRedBlackCartesian * UrbGrid,
+                       RealD mass, RealD M5,
+                       GridParallelRNG *RNG4,
+                       GridParallelRNG *RNG5);
+template<class What> 
+void  TestCGschur(What & Ddwf, 
+                  GridCartesian         * FGrid,               GridRedBlackCartesian * FrbGrid,
+                  GridCartesian         * UGrid,               GridRedBlackCartesian * UrbGrid,
+                  RealD mass, RealD M5,
+                  GridParallelRNG *RNG4,
+                  GridParallelRNG *RNG5);
+
+template<class What> 
+void  TestCGunprec(What & Ddwf, 
+                   GridCartesian         * FGrid,              GridRedBlackCartesian * FrbGrid,
+                   GridCartesian         * UGrid,              GridRedBlackCartesian * UrbGrid,
+                   RealD mass, RealD M5,
+                   GridParallelRNG *RNG4,
+                   GridParallelRNG *RNG5);
+
+template<class What> 
+void  TestCGprec(What & Ddwf, 
+                 GridCartesian         * FGrid,        GridRedBlackCartesian * FrbGrid,
+                 GridCartesian         * UGrid,        GridRedBlackCartesian * UrbGrid,
+                 RealD mass, RealD M5,
+                 GridParallelRNG *RNG4,
+                 GridParallelRNG *RNG5);
+
+int main (int argc, char ** argv)
+{
+  Grid_init(&argc,&argv);
+
+  int threads = GridThread::GetThreads();
+  std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
+
+  std::cout << GridLogMessage << "###########################################" << std::endl;
+  std::cout << GridLogMessage << "This test disables the convergence alert" << std::endl;
+  std::cout << GridLogMessage << "and checks whether in consecutive runs of the CG" << std::endl;
+  std::cout << GridLogMessage << "the internal reductions are bit reproducible." << std::endl;
+  std::cout << GridLogMessage << "A warning and a report is produced for every failure" << std::endl;
+  std::cout << GridLogMessage << "###########################################" << std::endl;
+
+
+  const int Ls=8;
+  GridCartesian         * UGrid   = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
+  GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
+  GridCartesian         * FGrid   = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
+  GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
+
+
+  std::vector<int> seeds4({1,2,3,4});
+  std::vector<int> seeds5({5,6,7,8});
+  GridParallelRNG          RNG5(FGrid);  RNG5.SeedFixedIntegers(seeds5);
+  GridParallelRNG          RNG4(UGrid);  RNG4.SeedFixedIntegers(seeds4);
+
+  LatticeGaugeField Umu(UGrid);
+  SU3::HotConfiguration(RNG4,Umu);
+  std::vector<LatticeColourMatrix> U(4,UGrid);
+
+  RealD mass=0.1;
+  RealD M5  =1.8;
+  std::cout<<GridLogMessage <<"DomainWallFermion test"<<std::endl;
+  DomainWallFermionR Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
+  TestCGinversions<DomainWallFermionR>(Ddwf,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
+
+  RealD b=1.5;// Scale factor b+c=2, b-c=1
+  RealD c=0.5;
+  std::cout<<GridLogMessage <<"MobiusFermion test"<<std::endl;
+  MobiusFermionR Dmob(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,b,c);
+  TestCGinversions<MobiusFermionR>(Dmob,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
+
+  std::cout<<GridLogMessage <<"MobiusZolotarevFermion test"<<std::endl;
+  MobiusZolotarevFermionR Dzolo(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,b,c,0.1,2.0);
+  TestCGinversions<MobiusZolotarevFermionR>(Dzolo,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
+
+  std::cout<<GridLogMessage <<"ScaledShamirFermion test"<<std::endl;
+  ScaledShamirFermionR Dsham(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,2.0);
+  TestCGinversions<ScaledShamirFermionR>(Dsham,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
+
+  std::cout<<GridLogMessage <<"ShamirZolotarevFermion test"<<std::endl;
+  ShamirZolotarevFermionR Dshamz(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,0.1,2.0);
+  TestCGinversions<ShamirZolotarevFermionR>(Dshamz,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
+
+  std::cout<<GridLogMessage <<"OverlapWilsonCayleyTanhFermion test"<<std::endl;
+  OverlapWilsonCayleyTanhFermionR Dov(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,1.0);
+  TestCGinversions<OverlapWilsonCayleyTanhFermionR>(Dov,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
+
+  std::cout<<GridLogMessage <<"OverlapWilsonCayleyZolotarevFermion test"<<std::endl;
+  OverlapWilsonCayleyZolotarevFermionR Dovz(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,0.1,2.0);
+  TestCGinversions<OverlapWilsonCayleyZolotarevFermionR>(Dovz,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
+
+  Grid_finalize();
+}
+template<class What> 
+void  TestCGinversions(What & Ddwf, 
+                       GridCartesian         * FGrid,          GridRedBlackCartesian * FrbGrid,
+                       GridCartesian         * UGrid,          GridRedBlackCartesian * UrbGrid,
+                       RealD mass, RealD M5,
+                       GridParallelRNG *RNG4,
+                       GridParallelRNG *RNG5)
+{
+  std::cout<<GridLogMessage << "Testing unpreconditioned inverter"<<std::endl;
+  TestCGunprec<What>(Ddwf,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,RNG4,RNG5);
+  std::cout<<GridLogMessage << "Testing red black preconditioned inverter"<<std::endl;
+  TestCGprec<What>(Ddwf,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,RNG4,RNG5);
+  std::cout<<GridLogMessage << "Testing red black Schur inverter"<<std::endl;
+  TestCGschur<What>(Ddwf,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,RNG4,RNG5);
+}
+
+template<class What> 
+void  TestCGunprec(What & Ddwf, 
+                   GridCartesian         * FGrid,              GridRedBlackCartesian * FrbGrid,
+                   GridCartesian         * UGrid,              GridRedBlackCartesian * UrbGrid,
+                   RealD mass, RealD M5,
+                   GridParallelRNG *RNG4,
+                   GridParallelRNG *RNG5)
+{
+  LatticeFermion src   (FGrid); random(*RNG5,src);
+  LatticeFermion result(FGrid); result=zero;
+
+  MdagMLinearOperator<What,LatticeFermion> HermOp(Ddwf);
+  ConjugateGradient<LatticeFermion> CG(1.0e-8,10000, ReproducibilityTest);
+  CG.set_reproducibility_interval(REPRODUCIBILITY_INTERVAL);
+  CG(HermOp,src,result);
+
+}
+template<class What> 
+void  TestCGprec(What & Ddwf, 
+                 GridCartesian         * FGrid,        GridRedBlackCartesian * FrbGrid,
+                 GridCartesian         * UGrid,        GridRedBlackCartesian * UrbGrid,
+                 RealD mass, RealD M5,
+                 GridParallelRNG *RNG4,
+                 GridParallelRNG *RNG5)
+{
+  LatticeFermion src   (FGrid); random(*RNG5,src);
+  LatticeFermion    src_o(FrbGrid);
+  LatticeFermion result_o(FrbGrid);
+  pickCheckerboard(Odd,src_o,src);
+  result_o=zero;
+
+  SchurDiagMooeeOperator<What,LatticeFermion> HermOpEO(Ddwf);
+  ConjugateGradient<LatticeFermion> CG(1.0e-8,10000, ReproducibilityTest);
+  CG.set_reproducibility_interval(REPRODUCIBILITY_INTERVAL);
+  CG(HermOpEO,src_o,result_o);
+}
+
+
+template<class What> 
+void  TestCGschur(What & Ddwf, 
+                   GridCartesian         * FGrid,              GridRedBlackCartesian * FrbGrid,
+                   GridCartesian         * UGrid,              GridRedBlackCartesian * UrbGrid,
+                   RealD mass, RealD M5,
+                   GridParallelRNG *RNG4,
+                   GridParallelRNG *RNG5)
+{
+  LatticeFermion src   (FGrid); random(*RNG5,src);
+  LatticeFermion result(FGrid); result=zero;
+
+  ConjugateGradient<LatticeFermion> CG(1.0e-8,10000, ReproducibilityTest);
+  CG.set_reproducibility_interval(REPRODUCIBILITY_INTERVAL);
+  SchurRedBlackDiagMooeeSolve<LatticeFermion> SchurSolver(CG);
+  SchurSolver(Ddwf,src,result);
+}
--- a/tests/hmc/Test_hmc_WilsonFermionGauge.cc
+++ b/tests/hmc/Test_hmc_WilsonFermionGauge.cc
@ -63,6 +63,9 @@ class HmcRunner : public NerscHmcRunner {
    Real mass = -0.77;
    FermionAction FermOp(U, *FGrid, *FrbGrid, mass);

+    // To enable the CG reproducibility tests use 
+    //ConjugateGradient<FermionField> CG(1.0e-8, 10000, ReproducibilityTest);
+    // This is the plain version
    ConjugateGradient<FermionField> CG(1.0e-8, 10000);

    TwoFlavourPseudoFermionAction<ImplPolicy> Nf2(FermOp, CG, CG);
--- a/tests/solver/Test_wilson_cg_unprec.cc
+++ b/tests/solver/Test_wilson_cg_unprec.cc
@ -71,7 +71,7 @@ int main (int argc, char ** argv)
  WilsonFermionR Dw(Umu,Grid,RBGrid,mass);

  MdagMLinearOperator<WilsonFermionR,LatticeFermion> HermOp(Dw);
-  ConjugateGradient<LatticeFermion> CG(1.0e-8,10000);
+  ConjugateGradient<LatticeFermion> CG(1.0e-8,10000,true, true);
  CG(HermOp,src,result);

  Grid_finalize();
Author	SHA1	Message	Date
Guido Cossu	70068cff51	Just commenting	2016-12-12 03:49:01 +00:00
Guido Cossu	85c055fa30	Tests more reductions and stops if failure is found in an iteration	2016-12-12 03:33:21 +00:00
Guido Cossu	90fedbd2af	Report only on failing nodes	2016-12-09 05:57:30 +00:00
Guido Cossu	ec0c53fa68	Improvement in the CG interface for Repro	2016-12-09 05:20:38 +00:00
Guido Cossu	6ceee102e8	Moving the check outside the inner product function	2016-12-09 05:08:56 +00:00
Guido Cossu	6e57bdb6b3	Removing the fake error introduced for testing	2016-12-09 04:19:36 +00:00
Guido Cossu	4c11e36d3d	Fixing a compilation error on some gcc compiler versions	2016-12-09 02:46:40 +00:00
Guido Cossu	9977c53035	Minor change	2016-12-09 02:34:37 +00:00
Guido Cossu	3a74fec62f	Output reports max number of digits	2016-12-09 02:29:12 +00:00
Guido Cossu	8fb0a13f39	Cleaning up output of CG repro	2016-12-09 02:17:31 +00:00
Guido Cossu	14a1406f54	More cleanup	2016-12-08 06:14:20 +00:00
Guido Cossu	538e64e5b4	Cleaning up the CG reproduciblity test. More info reported	2016-12-08 05:50:40 +00:00
Guido Cossu	b2dc17e160	Merge branch 'develop' into feature/CG_repro	2016-12-05 05:07:01 +00:00
Guido Cossu	afbbcd2194	Compilation fix for the non OMP version	2016-11-23 12:47:19 +00:00
Guido Cossu	d4e0b11bb1	Adding few comments	2016-11-23 11:46:51 +00:00
Guido Cossu	7144ee7ae8	Reproducibility checks for inner product	2016-11-23 11:42:04 +00:00
Guido Cossu	f1908c7bc9	Adding reproducibility tests	2016-11-21 09:52:07 +00:00
Guido Cossu	036ec31c48	Merge branch 'develop' into feature/CG_repro	2016-11-20 17:54:23 +00:00
Guido Cossu	53f240200e	Merge branch 'develop' into feature/CG_repro	2016-11-09 14:44:46 +00:00
Guido Cossu	9720c9ba3f	First implementation of the CG reproducibility test	2016-11-06 11:13:29 +00:00