From ce1a115e0b8ff288c3615f8e150ef100053354bc Mon Sep 17 00:00:00 2001
From: Guido Cossu <guido.cossu@ed.ac.uk>
Date: Tue, 20 Dec 2016 17:51:30 +0000
Subject: [PATCH] Removing redundant arguments for integrator functions, step 1

---
 lib/lattice/Lattice_base.h               |  19 +++-
 lib/qcd/hmc/GenericHMCrunner.h           |  39 ++++----
 lib/qcd/hmc/integrators/Integrator.h     |   3 +-
 lib/simd/Grid_avx.h                      | 115 +++++++++++------------
 lib/supported_compilers.h                |   2 +-
 tests/hmc/Test_hmc_WilsonGauge_Binary.cc |  60 +++++++++++-
 tests/solver/Test_dwf_cg_prec_LsVec.cc   |   6 +-
 7 files changed, 152 insertions(+), 92 deletions(-)
diff --git a/lib/lattice/Lattice_base.h b/lib/lattice/Lattice_base.h
index e4dc1ca8..e6536830 100644
--- a/lib/lattice/Lattice_base.h
+++ b/lib/lattice/Lattice_base.h
@@ -255,19 +255,28 @@ PARALLEL_FOR_LOOP
     }
 
     Lattice(const Lattice& r){ // copy constructor
-    	_grid = r._grid;
-    	checkerboard = r.checkerboard;
+      _grid = r._grid;
+      checkerboard = r.checkerboard;
     	_odata.resize(_grid->oSites());// essential
-  		PARALLEL_FOR_LOOP
+  	PARALLEL_FOR_LOOP
         for(int ss=0;ss<_grid->oSites();ss++){
-            _odata[ss]=r._odata[ss];
+          _odata[ss]=r._odata[ss];
         }  	
-    }
+      }
 
 
 
     virtual ~Lattice(void) = default;
     
+    void reset(GridBase* grid) {
+      if (_grid != grid) {
+        _grid = grid;
+        _odata.resize(grid->oSites());
+        checkerboard = 0;
+      }
+    }
+
+
     template<class sobj> strong_inline Lattice<vobj> & operator = (const sobj & r){
 PARALLEL_FOR_LOOP
         for(int ss=0;ss<_grid->oSites();ss++){
diff --git a/lib/qcd/hmc/GenericHMCrunner.h b/lib/qcd/hmc/GenericHMCrunner.h
index 114595b1..2530559f 100644
--- a/lib/qcd/hmc/GenericHMCrunner.h
+++ b/lib/qcd/hmc/GenericHMCrunner.h
@@ -27,17 +27,17 @@ with this program; if not, write to the Free Software Foundation, Inc.,
   directory
   *************************************************************************************/
 /*  END LEGAL */
-#ifndef GENERIC_HMC_RUNNER
-#define GENERIC_HMC_RUNNER
+#ifndef GRID_GENERIC_HMC_RUNNER
+#define GRID_GENERIC_HMC_RUNNER
 
 namespace Grid {
 namespace QCD {
 
-  // Virtual Class for HMC specific for gauge theories
-  // implement a specific theory by defining the BuildTheAction
-  template <class Implementation, class RepresentationsPolicy = NoHirep>
-  class BinaryHmcRunnerTemplate {
-  public:
+// Virtual Class for HMC specific for gauge theories
+// implement a specific theory by defining the BuildTheAction
+template <class Implementation, class RepresentationsPolicy = NoHirep>
+class BinaryHmcRunnerTemplate {
+public:
     INHERIT_FIELD_TYPES(Implementation);
     typedef Implementation ImplPolicy;
 
@@ -56,8 +56,10 @@ namespace QCD {
     IntegratorParameters MDparameters;
 
     GridCartesian *        UGrid;
-    GridCartesian *        FGrid;
     GridRedBlackCartesian *UrbGrid;
+
+    // These two are unnecessary, eliminate
+    GridCartesian *        FGrid;
     GridRedBlackCartesian *FrbGrid;
 
     std::vector<int> SerialSeed;
@@ -68,11 +70,11 @@ namespace QCD {
       ParallelSeed = P;
     }
 
-    virtual void BuildTheAction(int argc, char **argv) = 0; // necessary?
+    virtual void BuildTheAction(int argc, char **argv) = 0;  // necessary?
 
     // A couple of wrapper classes
     template <class IOCheckpointer>
-    void Run(int argc, char **argv, IOCheckpointer &Checkpoint) {
+    void Run(int argc, char **argv, IOCheckpointer &Checkpoint)  {
       NoSmearing<Implementation> S;
       Runner(argc, argv, Checkpoint, S);
     }
@@ -83,6 +85,8 @@ namespace QCD {
     }
     //////////////////////////////
 
+    
+
 
     template <class SmearingPolicy, class IOCheckpointer>
     void Runner(int             argc,
@@ -141,11 +145,7 @@ namespace QCD {
       Field           U(UGrid);
 
 
-      typedef MinimumNorm2<Implementation,
-                           SmearingPolicy,
-                           RepresentationsPolicy>
-          IntegratorType; // change here to change the algorithm
-          
+      typedef MinimumNorm2<Implementation, SmearingPolicy, RepresentationsPolicy> IntegratorType;  // change here to change the algorithm
       IntegratorType MDynamics(UGrid, MDparameters, TheAction, Smearing);
 
       HMCparameters HMCpar;
@@ -187,7 +187,7 @@ namespace QCD {
       // Run it
       HMC.evolve();
     }
-  };
+};
 
   // These are for gauge fields
   typedef BinaryHmcRunnerTemplate<PeriodicGimplR> BinaryHmcRunner;
@@ -199,6 +199,7 @@ namespace QCD {
 
   typedef BinaryHmcRunnerTemplate<ScalarImplR, ScalarFields>
       ScalarBinaryHmcRunner;
-}
-}
-#endif
+
+}  // namespace QCD
+}  // namespace Grid
+#endif 
diff --git a/lib/qcd/hmc/integrators/Integrator.h b/lib/qcd/hmc/integrators/Integrator.h
index db3712c0..48ec746c 100644
--- a/lib/qcd/hmc/integrators/Integrator.h
+++ b/lib/qcd/hmc/integrators/Integrator.h
@@ -189,7 +189,8 @@ class Integrator {
 
   // Initialization of momenta and actions
   void refresh(Field& U, GridParallelRNG& pRNG) {
-    assert(P._grid == U._grid);
+    //assert(P._grid == U._grid);
+    P.reset(U._grid);
     std::cout << GridLogIntegrator << "Integrator refresh\n";
     FieldImplementation::generate_momenta(P, pRNG);
  
diff --git a/lib/simd/Grid_avx.h b/lib/simd/Grid_avx.h
index 77927b10..ac6ec9f4 100644
--- a/lib/simd/Grid_avx.h
+++ b/lib/simd/Grid_avx.h
@@ -1,6 +1,6 @@
     /*************************************************************************************
 
-    Grid physics library, www.github.com/paboyle/Grid 
+    Grid physics library, www.github.com/paboyle/Grid
 
     Source file: ./lib/simd/Grid_avx.h
 
@@ -29,15 +29,6 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
     See the full license in the file "LICENSE" in the top level distribution directory
     *************************************************************************************/
     /*  END LEGAL */
-//----------------------------------------------------------------------
-/*! @file Grid_avx.h
-  @brief Optimization libraries for AVX1/2 instructions set
-
-  Using intrinsics
-*/
-// Time-stamp: <2015-06-16 23:30:41 neo>
-//----------------------------------------------------------------------
-
 #include <immintrin.h>
 #ifdef AVXFMA4
 #include <x86intrin.h>
@@ -66,9 +57,9 @@ namespace Optimization {
     double f[4];
   };
 
-  struct Vsplat{
-    //Complex float
-    inline __m256 operator()(float a, float b){
+ struct Vsplat{
+    // Complex float
+    inline __m256 operator()(float a, float b) {
       return _mm256_set_ps(b,a,b,a,b,a,b,a);
     }
     // Real float
@@ -90,7 +81,7 @@ namespace Optimization {
   };
 
   struct Vstore{
-    //Float 
+    //Float
     inline void operator()(__m256 a, float* F){
       _mm256_store_ps(F,a);
     }
@@ -119,15 +110,15 @@ namespace Optimization {
   };
 
   struct Vset{
-    // Complex float 
+    // Complex float
     inline __m256 operator()(Grid::ComplexF *a){
       return _mm256_set_ps(a[3].imag(),a[3].real(),a[2].imag(),a[2].real(),a[1].imag(),a[1].real(),a[0].imag(),a[0].real());
     }
-    // Complex double 
+    // Complex double
     inline __m256d operator()(Grid::ComplexD *a){
       return _mm256_set_pd(a[1].imag(),a[1].real(),a[0].imag(),a[0].real());
     }
-    // Real float 
+    // Real float
     inline __m256 operator()(float *a){
       return _mm256_set_ps(a[7],a[6],a[5],a[4],a[3],a[2],a[1],a[0]);
     }
@@ -144,8 +135,8 @@ namespace Optimization {
 
   template <typename Out_type, typename In_type>
   struct Reduce{
-    //Need templated class to overload output type
-    //General form must generate error if compiled
+    // Need templated class to overload output type
+    // General form must generate error if compiled
     inline Out_type operator()(In_type in){
       printf("Error, using wrong Reduce function\n");
       exit(1);
@@ -224,7 +215,7 @@ namespace Optimization {
       ymm1 = _mm256_shuffle_ps(b,b,_MM_SELECT_FOUR_FOUR(2,3,0,1)); // ymm1 <- br,bi
       ymm2 = _mm256_shuffle_ps(a,a,_MM_SELECT_FOUR_FOUR(3,3,1,1)); // ymm2 <- ai,ai
       ymm1 = _mm256_mul_ps(ymm1,ymm2);                    // ymm1 <- br ai, ai bi
-      return _mm256_addsub_ps(ymm0,ymm1);  
+      return _mm256_addsub_ps(ymm0,ymm1);
 #endif
 #if defined (AVXFMA4)
       __m256 a_real = _mm256_shuffle_ps(a,a,_MM_SELECT_FOUR_FOUR(2,2,0,0)); // ar ar,
@@ -241,10 +232,10 @@ namespace Optimization {
 #endif
     }
     // Complex double
-    inline __m256d operator()(__m256d a, __m256d b){
-      //Multiplication of (ak+ibk)*(ck+idk)
+    inline __m256d operator()(__m256d a, __m256d b) {
+      // Multiplication of (ak+ibk)*(ck+idk)
       // a + i b can be stored as a data structure
-      //From intel optimisation reference guide
+      // From intel optimisation reference guide
       /*
 	movsldup xmm0, Src1; load real parts into the destination,
 	; a1, a1, a0, a0
@@ -268,7 +259,7 @@ namespace Optimization {
       __m256d ymm0,ymm1,ymm2;
       ymm0 = _mm256_shuffle_pd(a,a,0x0); // ymm0 <- ar ar, ar,ar b'00,00
       ymm0 = _mm256_mul_pd(ymm0,b);      // ymm0 <- ar bi, ar br
-      ymm1 = _mm256_shuffle_pd(b,b,0x5); // ymm1 <- br,bi  b'01,01 
+      ymm1 = _mm256_shuffle_pd(b,b,0x5); // ymm1 <- br,bi  b'01,01
       ymm2 = _mm256_shuffle_pd(a,a,0xF); // ymm2 <- ai,ai  b'11,11
       ymm1 = _mm256_mul_pd(ymm1,ymm2);   // ymm1 <- br ai, ai bi
       return _mm256_addsub_pd(ymm0,ymm1);
@@ -365,10 +356,10 @@ namespace Optimization {
     }
   };
 
-  struct Div{
+  struct Div {
     // Real float
-    inline __m256 operator()(__m256 a, __m256 b){
-      return _mm256_div_ps(a,b);
+    inline __m256 operator()(__m256 a, __m256 b) {
+      return _mm256_div_ps(a, b);
     }
     // Real double
     inline __m256d operator()(__m256d a, __m256d b){
@@ -454,7 +445,7 @@ namespace Optimization {
 #define _mm256_alignr_epi64_grid(ret,a,b,n) ret=(__m256d) _mm256_alignr_epi8((__m256i)a,(__m256i)b,(n*8)%16)
 #endif
 
-#if defined (AVX1) || defined (AVXFMA)  
+#if defined (AVX1) || defined (AVXFMA)
 #define _mm256_alignr_epi32_grid(ret,a,b,n) {	\
     __m128 aa, bb;				\
 						\
@@ -487,7 +478,7 @@ namespace Optimization {
 
   struct Rotate{
 
-    static inline __m256 rotate(__m256 in,int n){ 
+    static inline __m256 rotate(__m256 in,int n){
       switch(n){
       case 0: return tRotate<0>(in);break;
       case 1: return tRotate<1>(in);break;
@@ -500,7 +491,7 @@ namespace Optimization {
       default: assert(0);
       }
     }
-    static inline __m256d rotate(__m256d in,int n){ 
+    static inline __m256d rotate(__m256d in,int n){
       switch(n){
       case 0: return tRotate<0>(in);break;
       case 1: return tRotate<1>(in);break;
@@ -509,28 +500,28 @@ namespace Optimization {
       default: assert(0);
       }
     }
-  
-    
-    template<int n>
-    static inline __m256 tRotate(__m256 in){ 
-      __m256 tmp = Permute::Permute0(in);
-      __m256 ret = in;
-      if ( n > 3 ) { 
-	_mm256_alignr_epi32_grid(ret,in,tmp,n);  
-      } else {
-        _mm256_alignr_epi32_grid(ret,tmp,in,n);          
-      }
-      return ret;
-    };
+
 
     template<int n>
-    static inline __m256d tRotate(__m256d in){ 
-      __m256d tmp = Permute::Permute0(in);
-      __m256d ret = in;
-      if ( n > 1 ) {
-	_mm256_alignr_epi64_grid(ret,in,tmp,n);          
+    static inline __m256 tRotate(__m256 in){
+      __m256 tmp = Permute::Permute0(in);
+      __m256 ret;
+      if ( n > 3 ) {
+          _mm256_alignr_epi32_grid(ret,in,tmp,n);
       } else {
-        _mm256_alignr_epi64_grid(ret,tmp,in,n);          
+          _mm256_alignr_epi32_grid(ret,tmp,in,n);
+      }
+      return ret;
+    }
+
+    template<int n>
+    static inline __m256d tRotate(__m256d in){
+      __m256d tmp = Permute::Permute0(in);
+      __m256d ret;
+      if ( n > 1 ) {
+	_mm256_alignr_epi64_grid(ret,in,tmp,n);
+      } else {
+        _mm256_alignr_epi64_grid(ret,tmp,in,n);
       }
       return ret;
     };
@@ -543,7 +534,7 @@ namespace Optimization {
     __m256 v1,v2;
     v1=Optimization::Permute::Permute0(in); // avx 256; quad complex single
     v1= _mm256_add_ps(v1,in);
-    v2=Optimization::Permute::Permute1(v1); 
+    v2=Optimization::Permute::Permute1(v1);
     v1 = _mm256_add_ps(v1,v2);
     u256f conv; conv.v = v1;
     return Grid::ComplexF(conv.f[0],conv.f[1]);
@@ -555,15 +546,15 @@ namespace Optimization {
     __m256 v1,v2;
     v1 = Optimization::Permute::Permute0(in); // avx 256; octo-double
     v1 = _mm256_add_ps(v1,in);
-    v2 = Optimization::Permute::Permute1(v1); 
+    v2 = Optimization::Permute::Permute1(v1);
     v1 = _mm256_add_ps(v1,v2);
-    v2 = Optimization::Permute::Permute2(v1); 
+    v2 = Optimization::Permute::Permute2(v1);
     v1 = _mm256_add_ps(v1,v2);
     u256f conv; conv.v=v1;
     return conv.f[0];
   }
-  
-  
+
+
   //Complex double Reduce
   template<>
   inline Grid::ComplexD Reduce<Grid::ComplexD, __m256d>::operator()(__m256d in){
@@ -573,14 +564,14 @@ namespace Optimization {
     u256d conv; conv.v = v1;
     return Grid::ComplexD(conv.f[0],conv.f[1]);
   }
-  
+
   //Real double Reduce
   template<>
   inline Grid::RealD Reduce<Grid::RealD, __m256d>::operator()(__m256d in){
     __m256d v1,v2;
     v1 = Optimization::Permute::Permute0(in); // avx 256; quad double
     v1 = _mm256_add_pd(v1,in);
-    v2 = Optimization::Permute::Permute1(v1); 
+    v2 = Optimization::Permute::Permute1(v1);
     v1 = _mm256_add_pd(v1,v2);
     u256d conv; conv.v = v1;
     return conv.f[0];
@@ -593,17 +584,17 @@ namespace Optimization {
     printf("Reduce : Missing integer implementation -> FIX\n");
     assert(0);
   }
-  
+
 }
 
 //////////////////////////////////////////////////////////////////////////////////////
-// Here assign types 
+// Here assign types
 
   typedef __m256  SIMD_Ftype; // Single precision type
   typedef __m256d SIMD_Dtype; // Double precision type
   typedef __m256i SIMD_Itype; // Integer type
 
-  // prefecthing 
+  // prefecthing
   inline void v_prefetch0(int size, const char *ptr){
     for(int i=0;i<size;i+=64){ //  Define L1 linesize above
       _mm_prefetch(ptr+i+4096,_MM_HINT_T1);
@@ -611,7 +602,7 @@ namespace Optimization {
     }
   }
   inline void prefetch_HINT_T0(const char *ptr){
-    _mm_prefetch(ptr,_MM_HINT_T0);
+    _mm_prefetch(ptr, _MM_HINT_T0);
   }
 
   // Function name aliases
@@ -620,7 +611,7 @@ namespace Optimization {
   typedef Optimization::Vset     VsetSIMD;
   typedef Optimization::Vstream  VstreamSIMD;
 
-  template <typename S, typename T> using ReduceSIMD = Optimization::Reduce<S,T>;
+  template <typename S, typename T> using ReduceSIMD = Optimization::Reduce<S, T>;
 
   // Arithmetic operations
   typedef Optimization::Sum         SumSIMD;
@@ -632,4 +623,4 @@ namespace Optimization {
   typedef Optimization::TimesMinusI TimesMinusISIMD;
   typedef Optimization::TimesI      TimesISIMD;
 
-}
+}  // namespace Grid
diff --git a/lib/supported_compilers.h b/lib/supported_compilers.h
index cec68713..b4cd9a78 100644
--- a/lib/supported_compilers.h
+++ b/lib/supported_compilers.h
@@ -46,4 +46,4 @@
 #endif
 
 
-#endif
+#endif  // COMPILER_CHECK_H
diff --git a/tests/hmc/Test_hmc_WilsonGauge_Binary.cc b/tests/hmc/Test_hmc_WilsonGauge_Binary.cc
index 4c650357..d5228ce8 100644
--- a/tests/hmc/Test_hmc_WilsonGauge_Binary.cc
+++ b/tests/hmc/Test_hmc_WilsonGauge_Binary.cc
@@ -63,6 +63,11 @@ class HMCRunnerParameters : Serializable {
 class HmcRunner : public BinaryHmcRunner {
  public:
     HMCRunnerParameters HMCPar;
+    void BuildTheAction(int argc, char **argv){}
+};
+/*
+
+    // eliminate arcg and argv from here
     void BuildTheAction(int argc, char **argv)
 
   {
@@ -90,6 +95,7 @@ class HmcRunner : public BinaryHmcRunner {
 
     // Add observables
     // options for checkpointers
+    // this can be moved outside the BuildTheAction
     //BinaryHmcCheckpointer
     //ILDGHmcCheckpointer
     //NerscHmcCheckpointer
@@ -107,9 +113,11 @@ class HmcRunner : public BinaryHmcRunner {
     ObservablesList.push_back(&PlaqLog);
     ObservablesList.push_back(&Checkpoint);
 
+    // This must run from here so that the grids are defined
     Run(argc, argv, Checkpoint);  // no smearing
   };
 };
+*/
 }
 }
 
@@ -136,7 +144,57 @@ int main(int argc, char **argv) {
 
   TheHMC.MDparameters.set(TheHMC.HMCPar.MDsteps, TheHMC.HMCPar.TrajectorLength);
 
-  TheHMC.BuildTheAction(argc, argv);
+  //TheHMC.BuildTheAction(argc, argv);
+
+
+
+     // Typedefs to simplify notation
+    typedef WilsonGaugeActionR GaugeAction;
+    typedef WilsonImplR ImplPolicy;
+    typedef WilsonFermionR FermionAction;
+    typedef typename FermionAction::FermionField FermionField;
+
+    // this can be simplified too. MakeDefaultGrid(Nd)
+    TheHMC.UGrid = SpaceTimeGrid::makeFourDimGrid(
+        GridDefaultLatt(), 
+        GridDefaultSimd(Nd, vComplex::Nsimd()), 
+        GridDefaultMpi());
+    
+
+    // Gauge action
+    std::cout << GridLogMessage << "Beta: " << TheHMC.HMCPar.beta << std::endl;
+    GaugeAction Waction(TheHMC.HMCPar.beta);
+
+    // Collect actions
+    ActionLevel<BinaryHmcRunner::Field> Level1(1);
+    Level1.push_back(&Waction);
+    TheHMC.TheAction.push_back(Level1);
+
+    // Add observables
+    // options for checkpointers
+    // this can be moved outside the BuildTheAction
+    //BinaryHmcCheckpointer
+    //ILDGHmcCheckpointer
+    //NerscHmcCheckpointer
+    NerscHmcCheckpointer<BinaryHmcRunner::ImplPolicy> Checkpoint(
+        TheHMC.HMCPar.conf_prefix, TheHMC.HMCPar.rng_prefix, TheHMC.HMCPar.SaveInterval, TheHMC.HMCPar.format);
+    // Can implement also a specific function in the hmcrunner
+    // AddCheckpoint (...) that takes the same parameters + a string/tag
+    // defining the type of the checkpointer
+    // with tags can be implemented by overloading and no ifs
+    // Then force all checkpoint to have few common functions
+    // return an object that is then passed to the Run function
+
+    PlaquetteLogger<BinaryHmcRunner::ImplPolicy> PlaqLog(
+        std::string("Plaquette"));
+    TheHMC.ObservablesList.push_back(&PlaqLog);
+    TheHMC.ObservablesList.push_back(&Checkpoint);
+
+    // This must run from here so that the grids are defined
+    TheHMC.Run(argc, argv, Checkpoint);  // no smearing
+
+
+
 
   Grid_finalize();
 }
diff --git a/tests/solver/Test_dwf_cg_prec_LsVec.cc b/tests/solver/Test_dwf_cg_prec_LsVec.cc
index 3fbec4d3..dfa0a194 100644
--- a/tests/solver/Test_dwf_cg_prec_LsVec.cc
+++ b/tests/solver/Test_dwf_cg_prec_LsVec.cc
@@ -67,7 +67,7 @@ int main(int argc, char** argv) {
   GridParallelRNG RNG4(UGrid);
   RNG4.SeedFixedIntegers(seeds4);
 
-  std::cout << GridLogMessage << "Generating random ferrmion field" << std::endl;
+  std::cout << GridLogMessage << "Generating random fermion field" << std::endl;
   LatticeFermion src(FGrid);
   random(RNG5, src);
   LatticeFermion result(FGrid);
@@ -96,7 +96,7 @@ int main(int argc, char** argv) {
   GridStopWatch CGTimer;
 
   SchurDiagMooeeOperator<DomainWallFermionVec5dR, LatticeFermion> HermOpEO(Ddwf);
-  ConjugateGradient<LatticeFermion> CG(1.0e-8, 10000, 0);// switch off the assert
+  ConjugateGradient<LatticeFermion> CG(1.0e-8, 10000, 0);  // switch off the assert
 
   Ddwf.ZeroCounters();
   CGTimer.Start();
@@ -110,4 +110,4 @@ int main(int argc, char** argv) {
   Ddwf.Report();
 
   Grid_finalize();
-}
+}