Just commenting

Tests more reductions and stops if failure is found in an iteration
Report only on failing nodes
2026-07-22 19:43:28 +01:00 · 2016-12-12 03:49:01 +00:00 · 2016-12-12 03:33:21 +00:00 · 2016-12-09 05:57:30 +00:00 · 2016-12-09 05:20:38 +00:00 · 2016-12-09 05:08:56 +00:00
299 changed files with 5194 additions and 40559 deletions
@@ -9,7 +9,6 @@
 ################
 *~
 *#
-*.sublime-*

 # Precompiled Headers #
 #######################
@@ -104,16 +103,4 @@ lib/fftw/*
 # libtool macros #
 ##################
 m4/lt*
-m4/libtool.m4
-
-# Buck files #
-##############
-.buck*
-buck-out
-BUCK
-make-bin-BUCK.sh
-
-# generated sources #
-#####################
-lib/qcd/spin/gamma-gen/*.h
-lib/qcd/spin/gamma-gen/*.cc
+m4/libtool.m4
@@ -102,5 +102,5 @@ script:
    - ../configure --enable-precision=single --enable-simd=SSE4 --enable-comms=mpi-auto
    - make -j4
    - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then mpirun.openmpi -n 2 ./benchmarks/Benchmark_dwf --threads 1 --mpi 2.1.1.1; fi
-
+    - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then mpirun -n 2 ./benchmarks/Benchmark_dwf --threads 1 --mpi 2.1.1.1; fi

@@ -1,5 +1,5 @@
 # additional include paths necessary to compile the C++ library
-SUBDIRS = lib benchmarks tests extras
+SUBDIRS = lib benchmarks tests

 include $(top_srcdir)/doxygen.inc

@@ -48,9 +48,9 @@ int main (int argc, char ** argv)
  std::cout<<GridLogMessage << "= Benchmarking concurrent halo exchange in "<<nmu<<" dimensions"<<std::endl;
  std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
  std::cout<<GridLogMessage << "  L  "<<"\t\t"<<" Ls  "<<"\t\t"<<"bytes"<<"\t\t"<<"MB/s uni"<<"\t\t"<<"MB/s bidi"<<std::endl;
-  int maxlat=24;
-  for(int lat=4;lat<=maxlat;lat+=4){
-    for(int Ls=8;Ls<=32;Ls*=2){
+  int maxlat=16;
+  for(int lat=4;lat<=maxlat;lat+=2){
+    for(int Ls=1;Ls<=16;Ls*=2){

      std::vector<int> latt_size  ({lat*mpi_layout[0],
      				    lat*mpi_layout[1],
@@ -124,8 +124,8 @@ int main (int argc, char ** argv)
  std::cout<<GridLogMessage << "  L  "<<"\t\t"<<" Ls  "<<"\t\t"<<"bytes"<<"\t\t"<<"MB/s uni"<<"\t\t"<<"MB/s bidi"<<std::endl;


-  for(int lat=4;lat<=maxlat;lat+=4){
-    for(int Ls=8;Ls<=32;Ls*=2){
+  for(int lat=4;lat<=maxlat;lat+=2){
+    for(int Ls=1;Ls<=16;Ls*=2){

      std::vector<int> latt_size  ({lat,lat,lat,lat});

@@ -194,14 +194,14 @@ int main (int argc, char ** argv)
  }  


-  Nloop=10;
+  Nloop=100;
  std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
  std::cout<<GridLogMessage << "= Benchmarking concurrent STENCIL halo exchange in "<<nmu<<" dimensions"<<std::endl;
  std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
  std::cout<<GridLogMessage << "  L  "<<"\t\t"<<" Ls  "<<"\t\t"<<"bytes"<<"\t\t"<<"MB/s uni"<<"\t\t"<<"MB/s bidi"<<std::endl;

-  for(int lat=4;lat<=maxlat;lat+=4){
-    for(int Ls=8;Ls<=32;Ls*=2){
+  for(int lat=4;lat<=maxlat;lat+=2){
+    for(int Ls=1;Ls<=16;Ls*=2){

      std::vector<int> latt_size  ({lat*mpi_layout[0],
      				    lat*mpi_layout[1],
@@ -281,8 +281,8 @@ int main (int argc, char ** argv)
  std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
  std::cout<<GridLogMessage << "  L  "<<"\t\t"<<" Ls  "<<"\t\t"<<"bytes"<<"\t\t"<<"MB/s uni"<<"\t\t"<<"MB/s bidi"<<std::endl;

-  for(int lat=4;lat<=maxlat;lat+=4){
-    for(int Ls=8;Ls<=32;Ls*=2){
+  for(int lat=4;lat<=maxlat;lat+=2){
+    for(int Ls=1;Ls<=16;Ls*=2){

      std::vector<int> latt_size  ({lat*mpi_layout[0],
      				    lat*mpi_layout[1],
@@ -324,8 +324,8 @@ int main (int argc, char ** argv)
 					    (void *)&rbuf[mu][0],
 					    recv_from_rank,
 					    bytes);
-	    Grid.StencilSendToRecvFromComplete(requests);
-	    requests.resize(0);
+	    //	    Grid.StencilSendToRecvFromComplete(requests);
+	    //	    requests.resize(0);

 	    comm_proc = mpi_layout[mu]-1;
 	  
@@ -37,27 +37,27 @@ struct scal {
  d internal;
 };

-  Gamma::Algebra Gmu [] = {
-    Gamma::Algebra::GammaX,
-    Gamma::Algebra::GammaY,
-    Gamma::Algebra::GammaZ,
-    Gamma::Algebra::GammaT
+  Gamma::GammaMatrix Gmu [] = {
+    Gamma::GammaX,
+    Gamma::GammaY,
+    Gamma::GammaZ,
+    Gamma::GammaT
  };

 typedef WilsonFermion5D<DomainWallVec5dImplR> WilsonFermion5DR;
 typedef WilsonFermion5D<DomainWallVec5dImplF> WilsonFermion5DF;
 typedef WilsonFermion5D<DomainWallVec5dImplD> WilsonFermion5DD;

+
 int main (int argc, char ** argv)
 {
  Grid_init(&argc,&argv);

-
  int threads = GridThread::GetThreads();
  std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;

  std::vector<int> latt4 = GridDefaultLatt();
-  const int Ls=16;
+  const int Ls=8;
  GridCartesian         * UGrid   = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
  GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
  GridCartesian         * FGrid   = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
@@ -71,66 +71,35 @@ int main (int argc, char ** argv)

  std::vector<int> seeds4({1,2,3,4});
  std::vector<int> seeds5({5,6,7,8});
-  
-  std::cout << GridLogMessage << "Initialising 4d RNG" << std::endl;
+
  GridParallelRNG          RNG4(UGrid);  RNG4.SeedFixedIntegers(seeds4);
-  std::cout << GridLogMessage << "Initialising 5d RNG" << std::endl;
  GridParallelRNG          RNG5(FGrid);  RNG5.SeedFixedIntegers(seeds5);
-  std::cout << GridLogMessage << "Initialised RNGs" << std::endl;

  LatticeFermion src   (FGrid); random(RNG5,src);
-#if 0
-  src = zero;
-  {
-    std::vector<int> origin({0,0,0,latt4[2]-1,0});
-    SpinColourVectorF tmp;
-    tmp=zero;
-    tmp()(0)(0)=Complex(-2.0,0.0);
-    std::cout << " source site 0 " << tmp<<std::endl;
-    pokeSite(tmp,src,origin);
-  }
-#else
-  RealD N2 = 1.0/::sqrt(norm2(src));
-  src = src*N2;
-#endif
-
-
  LatticeFermion result(FGrid); result=zero;
  LatticeFermion    ref(FGrid);    ref=zero;
  LatticeFermion    tmp(FGrid);
  LatticeFermion    err(FGrid);

-  std::cout << GridLogMessage << "Drawing gauge field" << std::endl;
  LatticeGaugeField Umu(UGrid); 
-  SU3::HotConfiguration(RNG4,Umu); 
-  std::cout << GridLogMessage << "Random gauge initialised " << std::endl;
-#if 0
-  Umu=1.0;
-  for(int mu=0;mu<Nd;mu++){
-    LatticeColourMatrix ttmp(UGrid);
-    ttmp = PeekIndex<LorentzIndex>(Umu,mu);
-    //    if (mu !=2 ) ttmp = 0;
-    //    ttmp = ttmp* pow(10.0,mu);
-    PokeIndex<LorentzIndex>(Umu,ttmp,mu);
-  }
-  std::cout << GridLogMessage << "Forced to diagonal " << std::endl;
-#endif
+  random(RNG4,Umu);

-  ////////////////////////////////////
-  // Naive wilson implementation
-  ////////////////////////////////////
-  // replicate across fifth dimension
  LatticeGaugeField Umu5d(FGrid); 
-  std::vector<LatticeColourMatrix> U(4,FGrid);
+
+  // replicate across fifth dimension
  for(int ss=0;ss<Umu._grid->oSites();ss++){
    for(int s=0;s<Ls;s++){
      Umu5d._odata[Ls*ss+s] = Umu._odata[ss];
    }
  }
+
+  ////////////////////////////////////
+  // Naive wilson implementation
+  ////////////////////////////////////
+  std::vector<LatticeColourMatrix> U(4,FGrid);
  for(int mu=0;mu<Nd;mu++){
    U[mu] = PeekIndex<LorentzIndex>(Umu5d,mu);
  }
-  std::cout << GridLogMessage << "Setting up Cshift based reference " << std::endl;

  if (1)
  {
@@ -152,7 +121,6 @@ int main (int argc, char ** argv)

  RealD NP = UGrid->_Nprocessors;

-  std::cout << GridLogMessage << "Creating action operator " << std::endl;
  DomainWallFermionR Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);

  std::cout << GridLogMessage<< "*****************************************************************" <<std::endl;
@@ -168,11 +136,10 @@ int main (int argc, char ** argv)
  if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3   WilsonKernels" <<std::endl;
  std::cout << GridLogMessage<< "*****************************************************************" <<std::endl;

-  int ncall =1000;
+  int ncall =100;
  if (1) {
    FGrid->Barrier();
    Dw.ZeroCounters();
-    Dw.Dhop(src,result,0);
    double t0=usecond();
    for(int i=0;i<ncall;i++){
      __SSC_START;
@@ -186,22 +153,12 @@ int main (int argc, char ** argv)
    double flops=1344*volume*ncall;

    std::cout<<GridLogMessage << "Called Dw "<<ncall<<" times in "<<t1-t0<<" us"<<std::endl;
-    //    std::cout<<GridLogMessage << "norm result "<< norm2(result)<<std::endl;
-    //    std::cout<<GridLogMessage << "norm ref    "<< norm2(ref)<<std::endl;
+    std::cout<<GridLogMessage << "norm result "<< norm2(result)<<std::endl;
+    std::cout<<GridLogMessage << "norm ref    "<< norm2(ref)<<std::endl;
    std::cout<<GridLogMessage << "mflop/s =   "<< flops/(t1-t0)<<std::endl;
    std::cout<<GridLogMessage << "mflop/s per rank =  "<< flops/(t1-t0)/NP<<std::endl;
    err = ref-result; 
    std::cout<<GridLogMessage << "norm diff   "<< norm2(err)<<std::endl;
-
-    /*
-    if(( norm2(err)>1.0e-4) ) { 
-      std::cout << "RESULT\n " << result<<std::endl;
-      std::cout << "REF   \n " << ref   <<std::endl;
-      std::cout << "ERR   \n " << err   <<std::endl;
-      FGrid->Barrier();
-      exit(-1);
-    }
-    */
    assert (norm2(err)< 1.0e-4 );
    Dw.Report();
  }
@@ -225,13 +182,21 @@ int main (int argc, char ** argv)
    LatticeFermion sresult(sFGrid);

    WilsonFermion5DR sDw(Umu,*sFGrid,*sFrbGrid,*sUGrid,*sUrbGrid,M5);
-
-    localConvert(src,ssrc);
+  
+    for(int x=0;x<latt4[0];x++){
+    for(int y=0;y<latt4[1];y++){
+    for(int z=0;z<latt4[2];z++){
+    for(int t=0;t<latt4[3];t++){
+    for(int s=0;s<Ls;s++){
+      std::vector<int> site({s,x,y,z,t});
+      SpinColourVector tmp;
+      peekSite(tmp,src,site);
+      pokeSite(tmp,ssrc,site);
+    }}}}}
    std::cout<<GridLogMessage<< "src norms "<< norm2(src)<<" " <<norm2(ssrc)<<std::endl;
    FGrid->Barrier();
-    sDw.Dhop(ssrc,sresult,0);
-    sDw.ZeroCounters();
    double t0=usecond();
+    sDw.ZeroCounters();
    for(int i=0;i<ncall;i++){
      __SSC_START;
      sDw.Dhop(ssrc,sresult,0);
@@ -245,47 +210,46 @@ int main (int argc, char ** argv)
    std::cout<<GridLogMessage << "Called Dw s_inner "<<ncall<<" times in "<<t1-t0<<" us"<<std::endl;
    std::cout<<GridLogMessage << "mflop/s =   "<< flops/(t1-t0)<<std::endl;
    std::cout<<GridLogMessage << "mflop/s per rank =  "<< flops/(t1-t0)/NP<<std::endl;
-    //    std::cout<<GridLogMessage<< "res norms "<< norm2(result)<<" " <<norm2(sresult)<<std::endl;
    sDw.Report();
+  
+    if(0){
+      for(int i=0;i< PerformanceCounter::NumTypes(); i++ ){
+	sDw.Dhop(ssrc,sresult,0);
+	PerformanceCounter Counter(i);
+	Counter.Start();
+	sDw.Dhop(ssrc,sresult,0);
+	Counter.Stop();
+	Counter.Report();
+      }
+    }
+
+    std::cout<<GridLogMessage<< "res norms "<< norm2(result)<<" " <<norm2(sresult)<<std::endl;
+
    RealD sum=0;
+    for(int x=0;x<latt4[0];x++){
+    for(int y=0;y<latt4[1];y++){
+    for(int z=0;z<latt4[2];z++){
+    for(int t=0;t<latt4[3];t++){
+    for(int s=0;s<Ls;s++){
+      std::vector<int> site({s,x,y,z,t});
+      SpinColourVector normal, simd;
+      peekSite(normal,result,site);
+      peekSite(simd,sresult,site);
+      sum=sum+norm2(normal-simd);
+      if (norm2(normal-simd) > 1.0e-6 ) {
+	std::cout << "site "<<x<<","<<y<<","<<z<<","<<t<<","<<s<<" "<<norm2(normal-simd)<<std::endl;
+	std::cout << "site "<<x<<","<<y<<","<<z<<","<<t<<","<<s<<" normal "<<normal<<std::endl;
+	std::cout << "site "<<x<<","<<y<<","<<z<<","<<t<<","<<s<<" simd   "<<simd<<std::endl;
+      }
+    }}}}}
+    std::cout<<GridLogMessage<<" difference between normal and simd is "<<sum<<std::endl;
+    assert (sum< 1.0e-4 );

-    err=zero;
-    localConvert(sresult,err);
-    err = err - ref;
-    sum = norm2(err);
-    std::cout<<GridLogMessage<<" difference between normal ref and simd is "<<sum<<std::endl;
-    if(sum > 1.0e-4 ){
-      std::cout<< "sD REF\n " <<ref << std::endl;
-      std::cout<< "sD ERR   \n " <<err  <<std::endl;
-    }
-    //    assert(sum < 1.0e-4);

-    err=zero;
-    localConvert(sresult,err);
-    err = err - result;
-    sum = norm2(err);
-    std::cout<<GridLogMessage<<" difference between normal result and simd is "<<sum<<std::endl;
-    if(sum > 1.0e-4 ){
-      std::cout<< "sD REF\n " <<result << std::endl;
-      std::cout<< "sD ERR   \n " << err  <<std::endl;
-    }
-    assert(sum < 1.0e-4);
-    
-    if(1){
-      std::cout << GridLogMessage<< "*********************************************************" <<std::endl;
-      std::cout << GridLogMessage<< "* Benchmarking WilsonFermion5D<DomainWallVec5dImplR>::DhopEO "<<std::endl;
-      std::cout << GridLogMessage<< "* Vectorising fifth dimension by "<<vComplex::Nsimd()<<std::endl;
-      if ( sizeof(Real)==4 )   std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl;
-      if ( sizeof(Real)==8 )   std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl;
-      if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric   ) 
-	std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl;
-      if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) 
-	std::cout << GridLogMessage<< "* Using Nc=3       WilsonKernels" <<std::endl;
-      if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) 
-	std::cout << GridLogMessage<< "* Using Asm Nc=3   WilsonKernels" <<std::endl;
-      std::cout << GridLogMessage<< "*********************************************************" <<std::endl;
+    if (1) {

      LatticeFermion sr_eo(sFGrid);
+
      LatticeFermion ssrc_e (sFrbGrid);
      LatticeFermion ssrc_o (sFrbGrid);
      LatticeFermion sr_e   (sFrbGrid);
@@ -293,23 +257,33 @@ int main (int argc, char ** argv)

      pickCheckerboard(Even,ssrc_e,ssrc);
      pickCheckerboard(Odd,ssrc_o,ssrc);
-      //      setCheckerboard(sr_eo,ssrc_o);
-      //      setCheckerboard(sr_eo,ssrc_e);
+
+      setCheckerboard(sr_eo,ssrc_o);
+      setCheckerboard(sr_eo,ssrc_e);

      sr_e = zero;
      sr_o = zero;

+      std::cout << GridLogMessage<< "*********************************************************" <<std::endl;
+      std::cout << GridLogMessage<< "* Benchmarking WilsonFermion5D<DomainWallVec5dImplR>::DhopEO "<<std::endl;
+      std::cout << GridLogMessage<< "* Vectorising fifth dimension by "<<vComplex::Nsimd()<<std::endl;
+      if ( sizeof(Real)==4 )   std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl;
+      if ( sizeof(Real)==8 )   std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl;
+      if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric   ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl;
+      if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using Nc=3       WilsonKernels" <<std::endl;
+      if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3   WilsonKernels" <<std::endl;
+      std::cout << GridLogMessage<< "*********************************************************" <<std::endl;
+
      FGrid->Barrier();
-      sDw.DhopEO(ssrc_o, sr_e, DaggerNo);
      sDw.ZeroCounters();
-      //      sDw.stat.init("DhopEO");
+      sDw.stat.init("DhopEO");
      double t0=usecond();
      for (int i = 0; i < ncall; i++) {
        sDw.DhopEO(ssrc_o, sr_e, DaggerNo);
      }
      double t1=usecond();
      FGrid->Barrier();
-      //      sDw.stat.print();
+      sDw.stat.print();

      double volume=Ls;  for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
      double flops=(1344.0*volume*ncall)/2;
@@ -324,26 +298,22 @@ int main (int argc, char ** argv)

      pickCheckerboard(Even,ssrc_e,sresult);
      pickCheckerboard(Odd ,ssrc_o,sresult);
-
      ssrc_e = ssrc_e - sr_e;
      RealD error = norm2(ssrc_e);
-      std::cout<<GridLogMessage << "sE norm diff   "<< norm2(ssrc_e)<< "  vec nrm"<<norm2(sr_e) <<std::endl;

+      std::cout<<GridLogMessage << "sE norm diff   "<< norm2(ssrc_e)<< "  vec nrm"<<norm2(sr_e) <<std::endl;
      ssrc_o = ssrc_o - sr_o;
+
      error+= norm2(ssrc_o);
      std::cout<<GridLogMessage << "sO norm diff   "<< norm2(ssrc_o)<< "  vec nrm"<<norm2(sr_o) <<std::endl;
-
-      if(( error>1.0e-4) ) { 
+      if(error>1.0e-4) { 
 	setCheckerboard(ssrc,ssrc_o);
 	setCheckerboard(ssrc,ssrc_e);
-	std::cout<< "DIFF\n " <<ssrc << std::endl;
-	setCheckerboard(ssrc,sr_o);
-	setCheckerboard(ssrc,sr_e);
-	std::cout<< "CBRESULT\n " <<ssrc << std::endl;
-	std::cout<< "RESULT\n " <<sresult<< std::endl;
+	std::cout<< ssrc << std::endl;
      }
-      assert(error<1.0e-4);
    }
+
+
  }

  if (1)
@@ -351,33 +321,28 @@ int main (int argc, char ** argv)
    ref = zero;
    for(int mu=0;mu<Nd;mu++){

-      //    ref =  src - Gamma(Gamma::Algebra::GammaX)* src ; // 1+gamma_x
+      //    ref =  src - Gamma(Gamma::GammaX)* src ; // 1+gamma_x
      tmp = U[mu]*Cshift(src,mu+1,1);
      for(int i=0;i<ref._odata.size();i++){
-	ref._odata[i]+= tmp._odata[i] + Gamma(Gmu[mu])*tmp._odata[i]; ;
+  ref._odata[i]+= tmp._odata[i] + Gamma(Gmu[mu])*tmp._odata[i]; ;
      }

      tmp =adj(U[mu])*src;
      tmp =Cshift(tmp,mu+1,-1);
      for(int i=0;i<ref._odata.size();i++){
-	ref._odata[i]+= tmp._odata[i] - Gamma(Gmu[mu])*tmp._odata[i]; ;
+  ref._odata[i]+= tmp._odata[i] - Gamma(Gmu[mu])*tmp._odata[i]; ;
      }
    }
    ref = -0.5*ref;
  }
-  //  dump=1;
  Dw.Dhop(src,result,1);
  std::cout << GridLogMessage << "Compare to naive wilson implementation Dag to verify correctness" << std::endl;
  std::cout<<GridLogMessage << "Called DwDag"<<std::endl;
-  std::cout<<GridLogMessage << "norm dag result "<< norm2(result)<<std::endl;
-  std::cout<<GridLogMessage << "norm dag ref    "<< norm2(ref)<<std::endl;
+  std::cout<<GridLogMessage << "norm result "<< norm2(result)<<std::endl;
+  std::cout<<GridLogMessage << "norm ref    "<< norm2(ref)<<std::endl;
  err = ref-result; 
-  std::cout<<GridLogMessage << "norm dag diff   "<< norm2(err)<<std::endl;
-  if((norm2(err)>1.0e-4)){
-	std::cout<< "DAG RESULT\n "  <<ref     << std::endl;
-	std::cout<< "DAG sRESULT\n " <<result  << std::endl;
-	std::cout<< "DAG ERR   \n "  << err    <<std::endl;
-  }
+  std::cout<<GridLogMessage << "norm diff   "<< norm2(err)<<std::endl;
+  assert(norm2(err)<1.0e-4);
  LatticeFermion src_e (FrbGrid);
  LatticeFermion src_o (FrbGrid);
  LatticeFermion r_e   (FrbGrid);
@@ -385,18 +350,13 @@ int main (int argc, char ** argv)
  LatticeFermion r_eo  (FGrid);


-  std::cout<<GridLogMessage << "Calling Deo and Doe and //assert Deo+Doe == Dunprec"<<std::endl;
+  std::cout<<GridLogMessage << "Calling Deo and Doe and assert Deo+Doe == Dunprec"<<std::endl;
  pickCheckerboard(Even,src_e,src);
  pickCheckerboard(Odd,src_o,src);

  std::cout<<GridLogMessage << "src_e"<<norm2(src_e)<<std::endl;
  std::cout<<GridLogMessage << "src_o"<<norm2(src_o)<<std::endl;

-
-  // S-direction is INNERMOST and takes no part in the parity.
-  static int Opt;  // these are a temporary hack
-  static int Comms;  // these are a temporary hack
-
  std::cout << GridLogMessage<< "*********************************************************" <<std::endl;
  std::cout << GridLogMessage<< "* Benchmarking DomainWallFermionR::DhopEO                "<<std::endl;
  std::cout << GridLogMessage<< "* Vectorising space-time by "<<vComplex::Nsimd()<<std::endl;
@@ -409,7 +369,6 @@ int main (int argc, char ** argv)
  {
    Dw.ZeroCounters();
    FGrid->Barrier();
-    Dw.DhopEO(src_o,r_e,DaggerNo);
    double t0=usecond();
    for(int i=0;i<ncall;i++){
      Dw.DhopEO(src_o,r_e,DaggerNo);
@@ -437,19 +396,14 @@ int main (int argc, char ** argv)

  err = r_eo-result; 
  std::cout<<GridLogMessage << "norm diff   "<< norm2(err)<<std::endl;
-  if((norm2(err)>1.0e-4)){
-	std::cout<< "Deo RESULT\n " <<r_eo << std::endl;
-	std::cout<< "Deo REF\n " <<result  << std::endl;
-	std::cout<< "Deo ERR   \n " << err <<std::endl;
-  }
+  assert(norm2(err)<1.0e-4);

  pickCheckerboard(Even,src_e,err);
  pickCheckerboard(Odd,src_o,err);
  std::cout<<GridLogMessage << "norm diff even  "<< norm2(src_e)<<std::endl;
  std::cout<<GridLogMessage << "norm diff odd   "<< norm2(src_o)<<std::endl;
-
-  //assert(norm2(src_e)<1.0e-4);
-  //assert(norm2(src_o)<1.0e-4);
+  assert(norm2(src_e)<1.0e-4);
+  assert(norm2(src_o)<1.0e-4);

  Grid_finalize();
 }
@@ -37,11 +37,11 @@ struct scal {
  d internal;
 };

-  Gamma::Algebra Gmu [] = {
-    Gamma::Algebra::GammaX,
-    Gamma::Algebra::GammaY,
-    Gamma::Algebra::GammaZ,
-    Gamma::Algebra::GammaT
+  Gamma::GammaMatrix Gmu [] = {
+    Gamma::GammaX,
+    Gamma::GammaY,
+    Gamma::GammaZ,
+    Gamma::GammaT
  };

 void benchDw(std::vector<int> & L, int Ls, int threads, int report =0 );
@@ -66,8 +66,7 @@ int main (int argc, char ** argv)

    Vec tsum; tsum = zero;

-    GridParallelRNG          pRNG(&Grid);      
-    pRNG.SeedFixedIntegers(std::vector<int>({56,17,89,101}));
+    GridParallelRNG          pRNG(&Grid);      pRNG.SeedRandomDevice();

    std::vector<double> stop(threads);
    Vector<Vec> sum(threads);
@@ -78,7 +77,8 @@ int main (int argc, char ** argv)
    }

    double start=usecond();
-    parallel_for(int t=0;t<threads;t++){
+PARALLEL_FOR_LOOP
+    for(int t=0;t<threads;t++){

      sum[t] = x[t]._odata[0];
      for(int i=0;i<Nloop;i++){
@@ -65,7 +65,7 @@ int main (int argc, char ** argv)

      uint64_t Nloop=NLOOP;

-      //      GridParallelRNG          pRNG(&Grid);      pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9});
+      //      GridParallelRNG          pRNG(&Grid);      pRNG.SeedRandomDevice();

      LatticeVec z(&Grid); //random(pRNG,z);
      LatticeVec x(&Grid); //random(pRNG,x);
@@ -100,7 +100,7 @@ int main (int argc, char ** argv)
      int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
      GridCartesian     Grid(latt_size,simd_layout,mpi_layout);

-      //      GridParallelRNG          pRNG(&Grid);      pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9});
+      //      GridParallelRNG          pRNG(&Grid);      pRNG.SeedRandomDevice();

      LatticeVec z(&Grid); //random(pRNG,z);
      LatticeVec x(&Grid); //random(pRNG,x);
@@ -138,7 +138,7 @@ int main (int argc, char ** argv)

      GridCartesian     Grid(latt_size,simd_layout,mpi_layout);

-      //      GridParallelRNG          pRNG(&Grid);      pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9});
+      //      GridParallelRNG          pRNG(&Grid);      pRNG.SeedRandomDevice();

      LatticeVec z(&Grid); //random(pRNG,z);
      LatticeVec x(&Grid); //random(pRNG,x);
@@ -173,7 +173,7 @@ int main (int argc, char ** argv)
      uint64_t Nloop=NLOOP;
      GridCartesian     Grid(latt_size,simd_layout,mpi_layout);

-      //      GridParallelRNG          pRNG(&Grid);      pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9});
+      //      GridParallelRNG          pRNG(&Grid);      pRNG.SeedRandomDevice();
      LatticeVec z(&Grid); //random(pRNG,z);
      LatticeVec x(&Grid); //random(pRNG,x);
      LatticeVec y(&Grid); //random(pRNG,y);
@@ -113,36 +113,6 @@ int main (int argc, char ** argv)
    std::cout<<GridLogMessage << "Called " #A " "<< (t1-t0)/ncall<<" us"<<std::endl;\
    std::cout<<GridLogMessage << "******************"<<std::endl;

-#define BENCH_ZDW(A,in,out)			\
-    zDw.CayleyZeroCounters();			\
-    zDw. A (in,out);				\
-    FGrid->Barrier();				\
-    t0=usecond();				\
-    for(int i=0;i<ncall;i++){			\
-      zDw. A (in,out);				\
-    }						\
-    t1=usecond();				\
-    FGrid->Barrier();				\
-    zDw.CayleyReport();							\
-    std::cout<<GridLogMessage << "Called ZDw " #A " "<< (t1-t0)/ncall<<" us"<<std::endl;\
-    std::cout<<GridLogMessage << "******************"<<std::endl;
-
-#define BENCH_DW_SSC(A,in,out)			\
-    Dw.CayleyZeroCounters();			\
-    Dw. A (in,out);				\
-    FGrid->Barrier();				\
-    t0=usecond();				\
-    for(int i=0;i<ncall;i++){			\
-      __SSC_START ;				\
-      Dw. A (in,out);				\
-      __SSC_STOP ;				\
-    }						\
-    t1=usecond();				\
-    FGrid->Barrier();				\
-    Dw.CayleyReport();					\
-    std::cout<<GridLogMessage << "Called " #A " "<< (t1-t0)/ncall<<" us"<<std::endl;\
-    std::cout<<GridLogMessage << "******************"<<std::endl;
-
 #define BENCH_DW_MEO(A,in,out)			\
    Dw.CayleyZeroCounters();			\
    Dw. A (in,out,0);				\
@@ -178,15 +148,9 @@ int main (int argc, char ** argv)
    LatticeFermion sref(sFGrid);
    LatticeFermion result(sFGrid);

-
    std::cout<<GridLogMessage << "Constructing Vec5D Dw "<<std::endl;
    DomainWallFermionVec5dR Dw(Umu,*sFGrid,*sFrbGrid,*sUGrid,*sUrbGrid,mass,M5);

-    RealD b=1.5;// Scale factor b+c=2, b-c=1
-    RealD c=0.5;
-    std::vector<ComplexD> gamma(Ls,std::complex<double>(1.0,0.0));
-    ZMobiusFermionVec5dR zDw(Umu,*sFGrid,*sFrbGrid,*sUGrid,*sUrbGrid,mass,M5,gamma,b,c);
-
    std::cout<<GridLogMessage << "Calling Dhop "<<std::endl;
    FGrid->Barrier();

@@ -209,13 +173,10 @@ int main (int argc, char ** argv)

    BENCH_DW_MEO(Dhop    ,src,result);
    BENCH_DW_MEO(DhopEO  ,src_o,r_e);
-    BENCH_DW_SSC(Meooe   ,src_o,r_e);
+    BENCH_DW(Meooe   ,src_o,r_e);
    BENCH_DW(Mooee   ,src_o,r_o);
    BENCH_DW(MooeeInv,src_o,r_o);

-    BENCH_ZDW(Mooee   ,src_o,r_o);
-    BENCH_ZDW(MooeeInv,src_o,r_o);
-
  }

  Grid_finalize();
@@ -1,134 +0,0 @@
-    /*************************************************************************************
-
-    Grid physics library, www.github.com/paboyle/Grid 
-
-    Source file: ./benchmarks/Benchmark_staggered.cc
-
-    Copyright (C) 2015
-
-Author: Peter Boyle <paboyle@ph.ed.ac.uk>
-Author: paboyle <paboyle@ph.ed.ac.uk>
-
-    This program is free software; you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation; either version 2 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License along
-    with this program; if not, write to the Free Software Foundation, Inc.,
-    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-    See the full license in the file "LICENSE" in the top level distribution directory
-    *************************************************************************************/
-    /*  END LEGAL */
-#include <Grid/Grid.h>
-
-using namespace std;
-using namespace Grid;
-using namespace Grid::QCD;
-
-int main (int argc, char ** argv)
-{
-  Grid_init(&argc,&argv);
-
-  std::vector<int> latt_size   = GridDefaultLatt();
-  std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
-  std::vector<int> mpi_layout  = GridDefaultMpi();
-  GridCartesian               Grid(latt_size,simd_layout,mpi_layout);
-  GridRedBlackCartesian     RBGrid(latt_size,simd_layout,mpi_layout);
-
-  int threads = GridThread::GetThreads();
-  std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
-  std::cout<<GridLogMessage << "Grid floating point word size is REALF"<< sizeof(RealF)<<std::endl;
-  std::cout<<GridLogMessage << "Grid floating point word size is REALD"<< sizeof(RealD)<<std::endl;
-  std::cout<<GridLogMessage << "Grid floating point word size is REAL"<< sizeof(Real)<<std::endl;
-
-  std::vector<int> seeds({1,2,3,4});
-  GridParallelRNG          pRNG(&Grid);
-  pRNG.SeedFixedIntegers(seeds);
-  //  pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9});
-
-  typedef typename ImprovedStaggeredFermionR::FermionField FermionField; 
-  typename ImprovedStaggeredFermionR::ImplParams params; 
-
-  FermionField src   (&Grid); random(pRNG,src);
-  FermionField result(&Grid); result=zero;
-  FermionField    ref(&Grid);    ref=zero;
-  FermionField    tmp(&Grid);    tmp=zero;
-  FermionField    err(&Grid);    tmp=zero;
-  LatticeGaugeField Umu(&Grid); random(pRNG,Umu);
-  std::vector<LatticeColourMatrix> U(4,&Grid);
-
-  double volume=1;
-  for(int mu=0;mu<Nd;mu++){
-    volume=volume*latt_size[mu];
-  }  
-
-  // Only one non-zero (y)
-#if 0
-  Umu=zero;
-  Complex cone(1.0,0.0);
-  for(int nn=0;nn<Nd;nn++){
-    random(pRNG,U[nn]);
-    if(1) {
-      if (nn!=2) { U[nn]=zero; std::cout<<GridLogMessage << "zeroing gauge field in dir "<<nn<<std::endl; }
-      //      else       { U[nn]= cone;std::cout<<GridLogMessage << "unit gauge field in dir "<<nn<<std::endl; }
-      else       { std::cout<<GridLogMessage << "random gauge field in dir "<<nn<<std::endl; }
-    }
-    PokeIndex<LorentzIndex>(Umu,U[nn],nn);
-  }
-#endif
-
-  for(int mu=0;mu<Nd;mu++){
-    U[mu] = PeekIndex<LorentzIndex>(Umu,mu);
-  }
-  ref = zero;
-  /*  
-  { // Naive wilson implementation
-    ref = zero;
-    for(int mu=0;mu<Nd;mu++){
-      //    ref =  src + Gamma(Gamma::GammaX)* src ; // 1-gamma_x
-      tmp = U[mu]*Cshift(src,mu,1);
-      for(int i=0;i<ref._odata.size();i++){
-	ref._odata[i]+= tmp._odata[i] - Gamma(Gmu[mu])*tmp._odata[i]; ;
-      }
-
-      tmp =adj(U[mu])*src;
-      tmp =Cshift(tmp,mu,-1);
-      for(int i=0;i<ref._odata.size();i++){
-	ref._odata[i]+= tmp._odata[i] + Gamma(Gmu[mu])*tmp._odata[i]; ;
-      }
-    }
-  }
-  ref = -0.5*ref;
-  */
-
-  RealD mass=0.1;
-  RealD c1=9.0/8.0;
-  RealD c2=-1.0/24.0;
-  RealD u0=1.0;
-  ImprovedStaggeredFermionR Ds(Umu,Umu,Grid,RBGrid,mass,c1,c2,u0,params);
-  
-  std::cout<<GridLogMessage << "Calling Ds"<<std::endl;
-  int ncall=1000;
-  double t0=usecond();
-  for(int i=0;i<ncall;i++){
-    Ds.Dhop(src,result,0);
-  }
-  double t1=usecond();
-  double flops=(16*(3*(6+8+8)) + 15*3*2)*volume*ncall; // == 66*16 +  == 1146
-  
-  std::cout<<GridLogMessage << "Called Ds"<<std::endl;
-  std::cout<<GridLogMessage << "norm result "<< norm2(result)<<std::endl;
-  std::cout<<GridLogMessage << "norm ref    "<< norm2(ref)<<std::endl;
-  std::cout<<GridLogMessage << "mflop/s =   "<< flops/(t1-t0)<<std::endl;
-  err = ref-result; 
-  std::cout<<GridLogMessage << "norm diff   "<< norm2(err)<<std::endl;
-
-  Grid_finalize();
-}
@@ -55,7 +55,7 @@ int main (int argc, char ** argv)
      std::vector<int> latt_size  ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
      int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
      GridCartesian     Grid(latt_size,simd_layout,mpi_layout);
-      //      GridParallelRNG          pRNG(&Grid);      pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9});
+      //      GridParallelRNG          pRNG(&Grid);      pRNG.SeedRandomDevice();

      LatticeColourMatrix z(&Grid);// random(pRNG,z);
      LatticeColourMatrix x(&Grid);// random(pRNG,x);
@@ -88,7 +88,7 @@ int main (int argc, char ** argv)
      int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];

      GridCartesian     Grid(latt_size,simd_layout,mpi_layout);
-      //      GridParallelRNG          pRNG(&Grid);      pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9});
+      //      GridParallelRNG          pRNG(&Grid);      pRNG.SeedRandomDevice();

      LatticeColourMatrix z(&Grid); //random(pRNG,z);
      LatticeColourMatrix x(&Grid); //random(pRNG,x);
@@ -119,7 +119,7 @@ int main (int argc, char ** argv)
      int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];

      GridCartesian     Grid(latt_size,simd_layout,mpi_layout);
-      //      GridParallelRNG          pRNG(&Grid);      pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9});
+      //      GridParallelRNG          pRNG(&Grid);      pRNG.SeedRandomDevice();

      LatticeColourMatrix z(&Grid); //random(pRNG,z);
      LatticeColourMatrix x(&Grid); //random(pRNG,x);
@@ -150,7 +150,7 @@ int main (int argc, char ** argv)
      int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];

      GridCartesian     Grid(latt_size,simd_layout,mpi_layout);
-      //      GridParallelRNG          pRNG(&Grid);      pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9});
+      //      GridParallelRNG          pRNG(&Grid);      pRNG.SeedRandomDevice();

      LatticeColourMatrix z(&Grid); //random(pRNG,z);
      LatticeColourMatrix x(&Grid); //random(pRNG,x);
@@ -37,11 +37,11 @@ struct scal {
  d internal;
 };

-  Gamma::Algebra Gmu [] = {
-    Gamma::Algebra::GammaX,
-    Gamma::Algebra::GammaY,
-    Gamma::Algebra::GammaZ,
-    Gamma::Algebra::GammaT
+  Gamma::GammaMatrix Gmu [] = {
+    Gamma::GammaX,
+    Gamma::GammaY,
+    Gamma::GammaZ,
+    Gamma::GammaT
  };

 bool overlapComms = false;
@@ -69,7 +69,7 @@ int main (int argc, char ** argv)
  std::vector<int> seeds({1,2,3,4});
  GridParallelRNG          pRNG(&Grid);
  pRNG.SeedFixedIntegers(seeds);
-  //  pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9});
+  //  pRNG.SeedRandomDevice();

  LatticeFermion src   (&Grid); random(pRNG,src);
  LatticeFermion result(&Grid); result=zero;
@@ -106,7 +106,7 @@ int main (int argc, char ** argv)
  { // Naive wilson implementation
    ref = zero;
    for(int mu=0;mu<Nd;mu++){
-      //    ref =  src + Gamma(Gamma::Algebra::GammaX)* src ; // 1-gamma_x
+      //    ref =  src + Gamma(Gamma::GammaX)* src ; // 1-gamma_x
      tmp = U[mu]*Cshift(src,mu,1);
      for(int i=0;i<ref._odata.size();i++){
 	ref._odata[i]+= tmp._odata[i] - Gamma(Gmu[mu])*tmp._odata[i]; ;
@@ -159,7 +159,7 @@ int main (int argc, char ** argv)
    ref = zero;
    for(int mu=0;mu<Nd;mu++){

-      //    ref =  src - Gamma(Gamma::Algebra::GammaX)* src ; // 1+gamma_x
+      //    ref =  src - Gamma(Gamma::GammaX)* src ; // 1+gamma_x
      tmp = U[mu]*Cshift(src,mu,1);
      for(int i=0;i<ref._odata.size();i++){
 	ref._odata[i]+= tmp._odata[i] + Gamma(Gmu[mu])*tmp._odata[i]; ;
@@ -30,11 +30,11 @@ struct scal {
  d internal;
 };

-Gamma::Algebra Gmu [] = {
-    Gamma::Algebra::GammaX,
-    Gamma::Algebra::GammaY,
-    Gamma::Algebra::GammaZ,
-    Gamma::Algebra::GammaT
+Gamma::GammaMatrix Gmu [] = {
+  Gamma::GammaX,
+  Gamma::GammaY,
+  Gamma::GammaZ,
+  Gamma::GammaT
 };

 bool overlapComms = false;
@@ -6,7 +6,7 @@ AC_CANONICAL_TARGET
 AM_INIT_AUTOMAKE(subdir-objects)
 AC_CONFIG_MACRO_DIR([m4])
 AC_CONFIG_SRCDIR([lib/Grid.h])
-AC_CONFIG_HEADERS([lib/Config.h],[sed -i 's|PACKAGE_|GRID_|' lib/Config.h])
+AC_CONFIG_HEADERS([lib/Config.h])
 m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])])

 ############### Checks for programs
@@ -99,13 +99,6 @@ case ${ac_MKL} in
        AC_DEFINE([USE_MKL], [1], [Define to 1 if you use the Intel MKL]);;
 esac

-############### HDF5
-AC_ARG_WITH([hdf5],
-    [AS_HELP_STRING([--with-hdf5=prefix],
-    [try this for a non-standard install prefix of the HDF5 library])],
-    [AM_CXXFLAGS="-I$with_hdf5/include $AM_CXXFLAGS"]
-    [AM_LDFLAGS="-L$with_hdf5/lib $AM_LDFLAGS"])
-
 ############### first-touch
 AC_ARG_ENABLE([numa],
    [AC_HELP_STRING([--enable-numa=yes|no|prefix], [enable first touch numa opt])], 
@@ -152,12 +145,6 @@ AC_SEARCH_LIBS([fftw_execute], [fftw3],
               [AC_DEFINE([HAVE_FFTW], [1], [Define to 1 if you have the `FFTW' library])]
               [have_fftw=true])

-AC_SEARCH_LIBS([H5Fopen], [hdf5_cpp],
-               [AC_DEFINE([HAVE_HDF5], [1], [Define to 1 if you have the `HDF5' library])]
-               [have_hdf5=true]
-               [LIBS="${LIBS} -lhdf5"], [], [-lhdf5])
-AM_CONDITIONAL(BUILD_HDF5, [ test "${have_hdf5}X" == "trueX" ])
-
 CXXFLAGS=$CXXFLAGS_CPY
 LDFLAGS=$LDFLAGS_CPY

@@ -319,9 +306,9 @@ AM_CONDITIONAL(BUILD_COMMS_MPI3L, [ test "${comms_type}X" == "mpi3lX" ] )
 AM_CONDITIONAL(BUILD_COMMS_NONE,  [ test "${comms_type}X" == "noneX" ])

 ############### RNG selection
-AC_ARG_ENABLE([rng],[AC_HELP_STRING([--enable-rng=ranlux48|mt19937|sitmo],\
+AC_ARG_ENABLE([rng],[AC_HELP_STRING([--enable-rng=ranlux48|mt19937],\
 	            [Select Random Number Generator to be used])],\
-	            [ac_RNG=${enable_rng}],[ac_RNG=sitmo])
+	            [ac_RNG=${enable_rng}],[ac_RNG=ranlux48])

 case ${ac_RNG} in
     ranlux48)
@@ -330,9 +317,6 @@ case ${ac_RNG} in
     mt19937)
      AC_DEFINE([RNG_MT19937],[1],[RNG_MT19937] )
     ;;
-     sitmo)
-      AC_DEFINE([RNG_SITMO],[1],[RNG_SITMO] )
-     ;;
     *)
      AC_MSG_ERROR([${ac_RNG} unsupported --enable-rng option]); 
     ;;
@@ -397,14 +381,10 @@ AC_CONFIG_FILES(tests/IO/Makefile)
 AC_CONFIG_FILES(tests/core/Makefile)
 AC_CONFIG_FILES(tests/debug/Makefile)
 AC_CONFIG_FILES(tests/forces/Makefile)
-AC_CONFIG_FILES(tests/hadrons/Makefile)
 AC_CONFIG_FILES(tests/hmc/Makefile)
 AC_CONFIG_FILES(tests/solver/Makefile)
 AC_CONFIG_FILES(tests/qdpxx/Makefile)
-AC_CONFIG_FILES(tests/testu01/Makefile)
 AC_CONFIG_FILES(benchmarks/Makefile)
-AC_CONFIG_FILES(extras/Makefile)
-AC_CONFIG_FILES(extras/Hadrons/Makefile)
 AC_OUTPUT

 echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -427,7 +407,6 @@ RNG choice                  : ${ac_RNG}
 GMP                         : `if test "x$have_gmp" = xtrue; then echo yes; else echo no; fi`
 LAPACK                      : ${ac_LAPACK}
 FFTW                        : `if test "x$have_fftw" = xtrue; then echo yes; else echo no; fi`
-HDF5                        : `if test "x$have_hdf5" = xtrue; then echo yes; else echo no; fi`
 build DOXYGEN documentation : `if test "$DX_FLAG_doc" = '1'; then echo yes; else echo no; fi`
 ----- BUILD FLAGS -------------------------------------
 CXXFLAGS:
@@ -1,317 +0,0 @@
-/*************************************************************************************
-
-Grid physics library, www.github.com/paboyle/Grid 
-
-Source file: extras/Hadrons/Application.cc
-
-Copyright (C) 2015
-Copyright (C) 2016
-
-Author: Antonin Portelli <antonin.portelli@me.com>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-See the full license in the file "LICENSE" in the top level distribution directory
-*************************************************************************************/
-/*  END LEGAL */
-
-#include <Grid/Hadrons/Application.hpp>
-#include <Grid/Hadrons/GeneticScheduler.hpp>
-
-using namespace Grid;
-using namespace QCD;
-using namespace Hadrons;
-
-#define BIG_SEP "==============="
-#define SEP     "---------------"
-
-/******************************************************************************
- *                       Application implementation                           *
- ******************************************************************************/
-// constructors ////////////////////////////////////////////////////////////////
-Application::Application(void)
-{
-    LOG(Message) << "Modules available:" << std::endl;
-    auto list = ModuleFactory::getInstance().getBuilderList();
-    for (auto &m: list)
-    {
-        LOG(Message) << "  " << m << std::endl;
-    }
-    auto dim = GridDefaultLatt(), mpi = GridDefaultMpi(), loc(dim);
-    locVol_ = 1;
-    for (unsigned int d = 0; d < dim.size(); ++d)
-    {
-        loc[d]  /= mpi[d];
-        locVol_ *= loc[d];
-    }
-    LOG(Message) << "Global lattice: " << dim << std::endl;
-    LOG(Message) << "MPI partition : " << mpi << std::endl;
-    LOG(Message) << "Local lattice : " << loc << std::endl;
-}
-
-Application::Application(const Application::GlobalPar &par)
-: Application()
-{
-    setPar(par);
-}
-
-Application::Application(const std::string parameterFileName)
-: Application()
-{
-    parameterFileName_ = parameterFileName;
-}
-
-// environment shortcut ////////////////////////////////////////////////////////
-Environment & Application::env(void) const
-{
-    return Environment::getInstance();
-}
-
-// access //////////////////////////////////////////////////////////////////////
-void Application::setPar(const Application::GlobalPar &par)
-{
-    par_ = par;
-    env().setSeed(strToVec<int>(par_.seed));
-}
-
-const Application::GlobalPar & Application::getPar(void)
-{
-    return par_;
-}
-
-// execute /////////////////////////////////////////////////////////////////////
-void Application::run(void)
-{
-    if (!parameterFileName_.empty() and (env().getNModule() == 0))
-    {
-        parseParameterFile(parameterFileName_);
-    }
-    if (!scheduled_)
-    {
-        schedule();
-    }
-    printSchedule();
-    configLoop();
-}
-
-// parse parameter file ////////////////////////////////////////////////////////
-class ObjectId: Serializable
-{
-public:
-    GRID_SERIALIZABLE_CLASS_MEMBERS(ObjectId,
-                                    std::string, name,
-                                    std::string, type);
-};
-
-void Application::parseParameterFile(const std::string parameterFileName)
-{
-    XmlReader reader(parameterFileName);
-    GlobalPar par;
-    ObjectId  id;
-    
-    LOG(Message) << "Building application from '" << parameterFileName << "'..." << std::endl;
-    read(reader, "parameters", par);
-    setPar(par);
-    push(reader, "modules");
-    push(reader, "module");
-    do
-    {
-        read(reader, "id", id);
-        env().createModule(id.name, id.type, reader);
-    } while (reader.nextElement("module"));
-    pop(reader);
-    pop(reader);
-}
-
-void Application::saveParameterFile(const std::string parameterFileName)
-{
-    XmlWriter          writer(parameterFileName);
-    ObjectId           id;
-    const unsigned int nMod = env().getNModule();
-    
-    LOG(Message) << "Saving application to '" << parameterFileName << "'..." << std::endl;
-    write(writer, "parameters", getPar());
-    push(writer, "modules");
-    for (unsigned int i = 0; i < nMod; ++i)
-    {
-        push(writer, "module");
-        id.name = env().getModuleName(i);
-        id.type = env().getModule(i)->getRegisteredName();
-        write(writer, "id", id);
-        env().getModule(i)->saveParameters(writer, "options");
-        pop(writer);
-    }
-    pop(writer);
-    pop(writer);
-}
-
-// schedule computation ////////////////////////////////////////////////////////
-#define MEM_MSG(size)\
-sizeString((size)*locVol_) << " (" << sizeString(size)  << "/site)"
-
-#define DEFINE_MEMPEAK \
-auto memPeak = [this](const std::vector<unsigned int> &program)\
-{\
-    unsigned int memPeak;\
-    bool         msg;\
-    \
-    msg = HadronsLogMessage.isActive();\
-    HadronsLogMessage.Active(false);\
-    env().dryRun(true);\
-    memPeak = env().executeProgram(program);\
-    env().dryRun(false);\
-    env().freeAll();\
-    HadronsLogMessage.Active(true);\
-    \
-    return memPeak;\
-}
-
-void Application::schedule(void)
-{
-    DEFINE_MEMPEAK;
-    
-    // build module dependency graph
-    LOG(Message) << "Building module graph..." << std::endl;
-    auto graph = env().makeModuleGraph();
-    auto con = graph.getConnectedComponents();
-    
-    // constrained topological sort using a genetic algorithm
-    LOG(Message) << "Scheduling computation..." << std::endl;
-    LOG(Message) << "               #module= " << graph.size() << std::endl;
-    LOG(Message) << "       population size= " << par_.genetic.popSize << std::endl;
-    LOG(Message) << "       max. generation= " << par_.genetic.maxGen << std::endl;
-    LOG(Message) << "  max. cst. generation= " << par_.genetic.maxCstGen << std::endl;
-    LOG(Message) << "         mutation rate= " << par_.genetic.mutationRate << std::endl;
-    
-    unsigned int                               k = 0, gen, prevPeak, nCstPeak = 0;
-    std::random_device                         rd;
-    GeneticScheduler<unsigned int>::Parameters par;
-    
-    par.popSize      = par_.genetic.popSize;
-    par.mutationRate = par_.genetic.mutationRate;
-    par.seed         = rd();
-    memPeak_         = 0;
-    CartesianCommunicator::BroadcastWorld(0, &(par.seed), sizeof(par.seed));
-    for (unsigned int i = 0; i < con.size(); ++i)
-    {
-        GeneticScheduler<unsigned int> scheduler(con[i], memPeak, par);
-        
-        gen = 0;
-        do
-        {
-            LOG(Debug) << "Generation " << gen << ":" << std::endl;
-            scheduler.nextGeneration();
-            if (gen != 0)
-            {
-                if (prevPeak == scheduler.getMinValue())
-                {
-                    nCstPeak++;
-                }
-                else
-                {
-                    nCstPeak = 0;
-                }
-            }
-            
-            prevPeak = scheduler.getMinValue();
-            if (gen % 10 == 0)
-            {
-                LOG(Iterative) << "Generation " << gen << ": "
-                               << MEM_MSG(scheduler.getMinValue()) << std::endl;
-            }
-            
-            gen++;
-        } while ((gen < par_.genetic.maxGen)
-                 and (nCstPeak < par_.genetic.maxCstGen));
-        auto &t = scheduler.getMinSchedule();
-        if (scheduler.getMinValue() > memPeak_)
-        {
-            memPeak_ = scheduler.getMinValue();
-        }
-        for (unsigned int j = 0; j < t.size(); ++j)
-        {
-            program_.push_back(t[j]);
-        }
-    }
-    scheduled_ = true;
-}
-
-void Application::saveSchedule(const std::string filename)
-{
-    TextWriter               writer(filename);
-    std::vector<std::string> program;
-    
-    if (!scheduled_)
-    {
-        HADRON_ERROR("Computation not scheduled");
-    }
-    LOG(Message) << "Saving current schedule to '" << filename << "'..."
-                 << std::endl;
-    for (auto address: program_)
-    {
-        program.push_back(env().getModuleName(address));
-    }
-    write(writer, "schedule", program);
-}
-
-void Application::loadSchedule(const std::string filename)
-{
-    DEFINE_MEMPEAK;
-    
-    TextReader               reader(filename);
-    std::vector<std::string> program;
-    
-    LOG(Message) << "Loading schedule from '" << filename << "'..."
-                 << std::endl;
-    read(reader, "schedule", program);
-    program_.clear();
-    for (auto &name: program)
-    {
-        program_.push_back(env().getModuleAddress(name));
-    }
-    scheduled_ = true;
-    memPeak_   = memPeak(program_);
-}
-
-void Application::printSchedule(void)
-{
-    if (!scheduled_)
-    {
-        HADRON_ERROR("Computation not scheduled");
-    }
-    LOG(Message) << "Schedule (memory peak: " << MEM_MSG(memPeak_) << "):"
-                 << std::endl;
-    for (unsigned int i = 0; i < program_.size(); ++i)
-    {
-        LOG(Message) << std::setw(4) << i + 1 << ": "
-                     << env().getModuleName(program_[i]) << std::endl;
-    }
-}
-
-// loop on configurations //////////////////////////////////////////////////////
-void Application::configLoop(void)
-{
-    auto range = par_.trajCounter;
-    
-    for (unsigned int t = range.start; t < range.end; t += range.step)
-    {
-        LOG(Message) << BIG_SEP << " Starting measurement for trajectory " << t
-                     << " " << BIG_SEP << std::endl;
-        env().setTrajectory(t);
-        env().executeProgram(program_);
-    }
-    LOG(Message) << BIG_SEP << " End of measurement " << BIG_SEP << std::endl;
-    env().freeAll();
-}
@@ -1,132 +0,0 @@
-/*************************************************************************************
-
-Grid physics library, www.github.com/paboyle/Grid 
-
-Source file: extras/Hadrons/Application.hpp
-
-Copyright (C) 2015
-Copyright (C) 2016
-
-Author: Antonin Portelli <antonin.portelli@me.com>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-See the full license in the file "LICENSE" in the top level distribution directory
-*************************************************************************************/
-/*  END LEGAL */
-
-#ifndef Hadrons_Application_hpp_
-#define Hadrons_Application_hpp_
-
-#include <Grid/Hadrons/Global.hpp>
-#include <Grid/Hadrons/Environment.hpp>
-#include <Grid/Hadrons/ModuleFactory.hpp>
-#include <Grid/Hadrons/Modules.hpp>
-
-BEGIN_HADRONS_NAMESPACE
-
-/******************************************************************************
- *                         Main program manager                               *
- ******************************************************************************/
-class Application
-{
-public:
-    class TrajRange: Serializable
-    {
-    public:
-        GRID_SERIALIZABLE_CLASS_MEMBERS(TrajRange,
-                                        unsigned int, start,
-                                        unsigned int, end,
-                                        unsigned int, step);
-    };
-    class GeneticPar: Serializable
-    {
-    public:
-        GeneticPar(void):
-            popSize{20}, maxGen{1000}, maxCstGen{100}, mutationRate{.1} {};
-    public:
-        GRID_SERIALIZABLE_CLASS_MEMBERS(GeneticPar,
-                                        unsigned int, popSize,
-                                        unsigned int, maxGen,
-                                        unsigned int, maxCstGen,
-                                        double      , mutationRate);
-    };
-    class GlobalPar: Serializable
-    {
-    public:
-        GRID_SERIALIZABLE_CLASS_MEMBERS(GlobalPar,
-                                        TrajRange,   trajCounter,
-                                        GeneticPar,  genetic,
-                                        std::string, seed);
-    };
-public:
-    // constructors
-    Application(void);
-    Application(const GlobalPar &par);
-    Application(const std::string parameterFileName);
-    // destructor
-    virtual ~Application(void) = default;
-    // access
-    void              setPar(const GlobalPar &par);
-    const GlobalPar & getPar(void);
-    // module creation
-    template <typename M>
-    void createModule(const std::string name);
-    template <typename M>
-    void createModule(const std::string name, const typename M::Par &par);
-    // execute
-    void run(void);
-    // XML parameter file I/O
-    void parseParameterFile(const std::string parameterFileName);
-    void saveParameterFile(const std::string parameterFileName);
-    // schedule computation
-    void schedule(void);
-    void saveSchedule(const std::string filename);
-    void loadSchedule(const std::string filename);
-    void printSchedule(void);
-    // loop on configurations
-    void configLoop(void);
-private:
-    // environment shortcut
-    Environment & env(void) const;
-private:
-    long unsigned int         locVol_;
-    std::string               parameterFileName_{""};
-    GlobalPar                 par_;
-    std::vector<unsigned int> program_;
-    Environment::Size         memPeak_;
-    bool                      scheduled_{false};
-};
-
-/******************************************************************************
- *                     Application template implementation                    *
- ******************************************************************************/
-// module creation /////////////////////////////////////////////////////////////
-template <typename M>
-void Application::createModule(const std::string name)
-{
-    env().createModule<M>(name);
-}
-
-template <typename M>
-void Application::createModule(const std::string name,
-                               const typename M::Par &par)
-{
-    env().createModule<M>(name, par);
-}
-
-END_HADRONS_NAMESPACE
-
-#endif // Hadrons_Application_hpp_
@@ -1,743 +0,0 @@
-/*************************************************************************************
-
-Grid physics library, www.github.com/paboyle/Grid 
-
-Source file: extras/Hadrons/Environment.cc
-
-Copyright (C) 2015
-Copyright (C) 2016
-
-Author: Antonin Portelli <antonin.portelli@me.com>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-See the full license in the file "LICENSE" in the top level distribution directory
-*************************************************************************************/
-/*  END LEGAL */
-
-#include <Grid/Hadrons/Environment.hpp>
-#include <Grid/Hadrons/Module.hpp>
-#include <Grid/Hadrons/ModuleFactory.hpp>
-
-using namespace Grid;
-using namespace QCD;
-using namespace Hadrons;
-
-/******************************************************************************
- *                       Environment implementation                           *
- ******************************************************************************/
-// constructor /////////////////////////////////////////////////////////////////
-Environment::Environment(void)
-{
-    nd_ = GridDefaultLatt().size();
-    grid4d_.reset(SpaceTimeGrid::makeFourDimGrid(
-        GridDefaultLatt(), GridDefaultSimd(nd_, vComplex::Nsimd()),
-        GridDefaultMpi()));
-    gridRb4d_.reset(SpaceTimeGrid::makeFourDimRedBlackGrid(grid4d_.get()));
-    auto loc = getGrid()->LocalDimensions();
-    locVol_ = 1;
-    for (unsigned int d = 0; d < loc.size(); ++d)
-    {
-        locVol_ *= loc[d];
-    }
-    rng4d_.reset(new GridParallelRNG(grid4d_.get()));
-}
-
-// dry run /////////////////////////////////////////////////////////////////////
-void Environment::dryRun(const bool isDry)
-{
-    dryRun_ = isDry;
-}
-
-bool Environment::isDryRun(void) const
-{
-    return dryRun_;
-}
-
-// trajectory number ///////////////////////////////////////////////////////////
-void Environment::setTrajectory(const unsigned int traj)
-{
-    traj_ = traj;
-}
-
-unsigned int Environment::getTrajectory(void) const
-{
-    return traj_;
-}
-
-// grids ///////////////////////////////////////////////////////////////////////
-void Environment::createGrid(const unsigned int Ls)
-{
-    if (grid5d_.find(Ls) == grid5d_.end())
-    {
-        auto g = getGrid();
-        
-        grid5d_[Ls].reset(SpaceTimeGrid::makeFiveDimGrid(Ls, g));
-        gridRb5d_[Ls].reset(SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls, g));
-    }
-}
-
-GridCartesian * Environment::getGrid(const unsigned int Ls) const
-{
-    try
-    {
-        if (Ls == 1)
-        {
-            return grid4d_.get();
-        }
-        else
-        {
-            return grid5d_.at(Ls).get();
-        }
-    }
-    catch(std::out_of_range &)
-    {
-        HADRON_ERROR("no grid with Ls= " << Ls);
-    }
-}
-
-GridRedBlackCartesian * Environment::getRbGrid(const unsigned int Ls) const
-{
-    try
-    {
-        if (Ls == 1)
-        {
-            return gridRb4d_.get();
-        }
-        else
-        {
-            return gridRb5d_.at(Ls).get();
-        }
-    }
-    catch(std::out_of_range &)
-    {
-        HADRON_ERROR("no red-black 5D grid with Ls= " << Ls);
-    }
-}
-
-unsigned int Environment::getNd(void) const
-{
-    return nd_;
-}
-
-// random number generator /////////////////////////////////////////////////////
-void Environment::setSeed(const std::vector<int> &seed)
-{
-    rng4d_->SeedFixedIntegers(seed);
-}
-
-GridParallelRNG * Environment::get4dRng(void) const
-{
-    return rng4d_.get();
-}
-
-// module management ///////////////////////////////////////////////////////////
-void Environment::pushModule(Environment::ModPt &pt)
-{
-    std::string name = pt->getName();
-    
-    if (!hasModule(name))
-    {
-        std::vector<unsigned int> inputAddress;
-        unsigned int              address;
-        ModuleInfo                m;
-        
-        m.data = std::move(pt);
-        m.type = typeIdPt(*m.data.get());
-        m.name = name;
-        auto input  = m.data->getInput();
-        for (auto &in: input)
-        {
-            if (!hasObject(in))
-            {
-                addObject(in , -1);
-            }
-            m.input.push_back(objectAddress_[in]);
-        }
-        auto output = m.data->getOutput();
-        module_.push_back(std::move(m));
-        address              = static_cast<unsigned int>(module_.size() - 1);
-        moduleAddress_[name] = address;
-        for (auto &out: output)
-        {
-            if (!hasObject(out))
-            {
-                addObject(out, address);
-            }
-            else
-            {
-                if (object_[objectAddress_[out]].module < 0)
-                {
-                    object_[objectAddress_[out]].module = address;
-                }
-                else
-                {
-                    HADRON_ERROR("object '" + out
-                                 + "' is already produced by module '"
-                                 + module_[object_[getObjectAddress(out)].module].name
-                                 + "' (while pushing module '" + name + "')");
-                }
-            }
-        }
-    }
-    else
-    {
-        HADRON_ERROR("module '" + name + "' already exists");
-    }
-}
-
-unsigned int Environment::getNModule(void) const
-{
-    return module_.size();
-}
-
-void Environment::createModule(const std::string name, const std::string type,
-                               XmlReader &reader)
-{
-    auto &factory = ModuleFactory::getInstance();
-    auto pt       = factory.create(type, name);
-    
-    pt->parseParameters(reader, "options");
-    pushModule(pt);
-}
-
-ModuleBase * Environment::getModule(const unsigned int address) const
-{
-    if (hasModule(address))
-    {
-        return module_[address].data.get();
-    }
-    else
-    {
-        HADRON_ERROR("no module with address " + std::to_string(address));
-    }
-}
-
-ModuleBase * Environment::getModule(const std::string name) const
-{
-    return getModule(getModuleAddress(name));
-}
-
-unsigned int Environment::getModuleAddress(const std::string name) const
-{
-    if (hasModule(name))
-    {
-        return moduleAddress_.at(name);
-    }
-    else
-    {
-        HADRON_ERROR("no module with name '" + name + "'");
-    }
-}
-
-std::string Environment::getModuleName(const unsigned int address) const
-{
-    if (hasModule(address))
-    {
-        return module_[address].name;
-    }
-    else
-    {
-        HADRON_ERROR("no module with address " + std::to_string(address));
-    }
-}
-
-std::string Environment::getModuleType(const unsigned int address) const
-{
-    if (hasModule(address))
-    {
-        return typeName(module_[address].type);
-    }
-    else
-    {
-        HADRON_ERROR("no module with address " + std::to_string(address));
-    }
-}
-
-std::string Environment::getModuleType(const std::string name) const
-{
-    return getModuleType(getModuleAddress(name));
-}
-
-bool Environment::hasModule(const unsigned int address) const
-{
-    return (address < module_.size());
-}
-
-bool Environment::hasModule(const std::string name) const
-{
-    return (moduleAddress_.find(name) != moduleAddress_.end());
-}
-
-Graph<unsigned int> Environment::makeModuleGraph(void) const
-{
-    Graph<unsigned int> moduleGraph;
-    
-    for (unsigned int i = 0; i < module_.size(); ++i)
-    {
-        moduleGraph.addVertex(i);
-        for (auto &j: module_[i].input)
-        {
-            moduleGraph.addEdge(object_[j].module, i);
-        }
-    }
-    
-    return moduleGraph;
-}
-
-#define BIG_SEP "==============="
-#define SEP     "---------------"
-#define MEM_MSG(size)\
-sizeString((size)*locVol_) << " (" << sizeString(size)  << "/site)"
-
-Environment::Size
-Environment::executeProgram(const std::vector<unsigned int> &p)
-{
-    Size                                memPeak = 0, sizeBefore, sizeAfter;
-    std::vector<std::set<unsigned int>> freeProg;
-    bool                                continueCollect, nothingFreed;
-    
-    // build garbage collection schedule
-    freeProg.resize(p.size());
-    for (unsigned int i = 0; i < object_.size(); ++i)
-    {
-        auto pred = [i, this](const unsigned int j)
-        {
-            auto &in = module_[j].input;
-            auto it  = std::find(in.begin(), in.end(), i);
-            
-            return (it != in.end()) or (j == object_[i].module);
-        };
-        auto it = std::find_if(p.rbegin(), p.rend(), pred);
-        if (it != p.rend())
-        {
-            freeProg[p.rend() - it - 1].insert(i);
-        }
-    }
-    
-    // program execution
-    for (unsigned int i = 0; i < p.size(); ++i)
-    {
-        // execute module
-        if (!isDryRun())
-        {
-            LOG(Message) << SEP << " Measurement step " << i+1 << "/"
-                         << p.size() << " (module '" << module_[p[i]].name
-                         << "') " << SEP << std::endl;
-        }
-        (*module_[p[i]].data)();
-        sizeBefore = getTotalSize();
-        // print used memory after execution
-        if (!isDryRun())
-        {
-            LOG(Message) << "Allocated objects: " << MEM_MSG(sizeBefore)
-                         << std::endl;
-        }
-        if (sizeBefore > memPeak)
-        {
-            memPeak = sizeBefore;
-        }
-        // garbage collection for step i
-        if (!isDryRun())
-        {
-            LOG(Message) << "Garbage collection..." << std::endl;
-        }
-        nothingFreed = true;
-        do
-        {
-            continueCollect = false;
-            auto toFree = freeProg[i];
-            for (auto &j: toFree)
-            {
-                // continue garbage collection while there are still
-                // objects without owners
-                continueCollect = continueCollect or !hasOwners(j);
-                if(freeObject(j))
-                {
-                    // if an object has been freed, remove it from
-                    // the garbage collection schedule
-                    freeProg[i].erase(j);
-                    nothingFreed = false;
-                }
-            }
-        } while (continueCollect);
-        // any remaining objects in step i garbage collection schedule
-        // is scheduled for step i + 1
-        if (i + 1 < p.size())
-        {
-            for (auto &j: freeProg[i])
-            {
-                freeProg[i + 1].insert(j);
-            }
-        }
-        // print used memory after garbage collection if necessary
-        if (!isDryRun())
-        {
-            sizeAfter = getTotalSize();
-            if (sizeBefore != sizeAfter)
-            {
-                LOG(Message) << "Allocated objects: " << MEM_MSG(sizeAfter)
-                             << std::endl;
-            }
-            else
-            {
-                LOG(Message) << "Nothing to free" << std::endl;
-            }
-        }
-    }
-    
-    return memPeak;
-}
-
-Environment::Size Environment::executeProgram(const std::vector<std::string> &p)
-{
-    std::vector<unsigned int> pAddress;
-    
-    for (auto &n: p)
-    {
-        pAddress.push_back(getModuleAddress(n));
-    }
-    
-    return executeProgram(pAddress);
-}
-
-// general memory management ///////////////////////////////////////////////////
-void Environment::addObject(const std::string name, const int moduleAddress)
-{
-    if (!hasObject(name))
-    {
-        ObjInfo info;
-        
-        info.name   = name;
-        info.module = moduleAddress;
-        object_.push_back(std::move(info));
-        objectAddress_[name] = static_cast<unsigned int>(object_.size() - 1);
-    }
-    else
-    {
-        HADRON_ERROR("object '" + name + "' already exists");
-    }
-}
-
-void Environment::registerObject(const unsigned int address,
-                                 const unsigned int size, const unsigned int Ls)
-{
-    if (!hasRegisteredObject(address))
-    {
-        if (hasObject(address))
-        {
-            object_[address].size         = size;
-            object_[address].Ls           = Ls;
-            object_[address].isRegistered = true;
-        }
-        else
-        {
-            HADRON_ERROR("no object with address " + std::to_string(address));
-        }
-    }
-    else
-    {
-        HADRON_ERROR("object with address " + std::to_string(address)
-                     + " already registered");
-    }
-}
-
-void Environment::registerObject(const std::string name,
-                                 const unsigned int size, const unsigned int Ls)
-{
-    if (!hasObject(name))
-    {
-        addObject(name);
-    }
-    registerObject(getObjectAddress(name), size, Ls);
-}
-
-unsigned int Environment::getObjectAddress(const std::string name) const
-{
-    if (hasObject(name))
-    {
-        return objectAddress_.at(name);
-    }
-    else
-    {
-        HADRON_ERROR("no object with name '" + name + "'");
-    }
-}
-
-std::string Environment::getObjectName(const unsigned int address) const
-{
-    if (hasObject(address))
-    {
-        return object_[address].name;
-    }
-    else
-    {
-        HADRON_ERROR("no object with address " + std::to_string(address));
-    }
-}
-
-std::string Environment::getObjectType(const unsigned int address) const
-{
-    if (hasRegisteredObject(address))
-    {
-        return typeName(object_[address].type);
-    }
-    else if (hasObject(address))
-    {
-        HADRON_ERROR("object with address " + std::to_string(address)
-                     + " exists but is not registered");
-    }
-    else
-    {
-        HADRON_ERROR("no object with address " + std::to_string(address));
-    }
-}
-
-std::string Environment::getObjectType(const std::string name) const
-{
-    return getObjectType(getObjectAddress(name));
-}
-
-Environment::Size Environment::getObjectSize(const unsigned int address) const
-{
-    if (hasRegisteredObject(address))
-    {
-        return object_[address].size;
-    }
-    else if (hasObject(address))
-    {
-        HADRON_ERROR("object with address " + std::to_string(address)
-                     + " exists but is not registered");
-    }
-    else
-    {
-        HADRON_ERROR("no object with address " + std::to_string(address));
-    }
-}
-
-Environment::Size Environment::getObjectSize(const std::string name) const
-{
-    return getObjectSize(getObjectAddress(name));
-}
-
-unsigned int Environment::getObjectLs(const unsigned int address) const
-{
-    if (hasRegisteredObject(address))
-    {
-        return object_[address].Ls;
-    }
-    else if (hasObject(address))
-    {
-        HADRON_ERROR("object with address " + std::to_string(address)
-                     + " exists but is not registered");
-    }
-    else
-    {
-        HADRON_ERROR("no object with address " + std::to_string(address));
-    }
-}
-
-unsigned int Environment::getObjectLs(const std::string name) const
-{
-    return getObjectLs(getObjectAddress(name));
-}
-
-bool Environment::hasObject(const unsigned int address) const
-{
-    return (address < object_.size());
-}
-
-bool Environment::hasObject(const std::string name) const
-{
-    auto it = objectAddress_.find(name);
-    
-    return ((it != objectAddress_.end()) and hasObject(it->second));
-}
-
-bool Environment::hasRegisteredObject(const unsigned int address) const
-{
-    if (hasObject(address))
-    {
-        return object_[address].isRegistered;
-    }
-    else
-    {
-        return false;
-    }
-}
-
-bool Environment::hasRegisteredObject(const std::string name) const
-{
-    if (hasObject(name))
-    {
-        return hasRegisteredObject(getObjectAddress(name));
-    }
-    else
-    {
-        return false;
-    }
-}
-
-bool Environment::hasCreatedObject(const unsigned int address) const
-{
-    if (hasObject(address))
-    {
-        return (object_[address].data != nullptr);
-    }
-    else
-    {
-        return false;
-    }
-}
-
-bool Environment::hasCreatedObject(const std::string name) const
-{
-    if (hasObject(name))
-    {
-        return hasCreatedObject(getObjectAddress(name));
-    }
-    else
-    {
-        return false;
-    }
-}
-
-bool Environment::isObject5d(const unsigned int address) const
-{
-    return (getObjectLs(address) > 1);
-}
-
-bool Environment::isObject5d(const std::string name) const
-{
-    return (getObjectLs(name) > 1);
-}
-
-Environment::Size Environment::getTotalSize(void) const
-{
-    Environment::Size size = 0;
-    
-    for (auto &o: object_)
-    {
-        if (o.isRegistered)
-        {
-            size += o.size;
-        }
-    }
-    
-    return size;
-}
-
-void Environment::addOwnership(const unsigned int owner,
-                               const unsigned int property)
-{
-    if (hasObject(property))
-    {
-        object_[property].owners.insert(owner);
-    }
-    else
-    {
-        HADRON_ERROR("no object with address " + std::to_string(property));
-    }
-    if (hasObject(owner))
-    {
-        object_[owner].properties.insert(property);
-    }
-    else
-    {
-        HADRON_ERROR("no object with address " + std::to_string(owner));
-    }
-}
-
-void Environment::addOwnership(const std::string owner,
-                               const std::string property)
-{
-    addOwnership(getObjectAddress(owner), getObjectAddress(property));
-}
-
-bool Environment::hasOwners(const unsigned int address) const
-{
-    
-    if (hasObject(address))
-    {
-        return (!object_[address].owners.empty());
-    }
-    else
-    {
-        HADRON_ERROR("no object with address " + std::to_string(address));
-    }
-}
-
-bool Environment::hasOwners(const std::string name) const
-{
-    return hasOwners(getObjectAddress(name));
-}
-
-bool Environment::freeObject(const unsigned int address)
-{
-    if (!hasOwners(address))
-    {
-        if (!isDryRun() and object_[address].isRegistered)
-        {
-            LOG(Message) << "Destroying object '" << object_[address].name
-                         << "'" << std::endl;
-        }
-        for (auto &p: object_[address].properties)
-        {
-            object_[p].owners.erase(address);
-        }
-        object_[address].size         = 0;
-        object_[address].Ls           = 0;
-        object_[address].isRegistered = false;
-        object_[address].type         = nullptr;
-        object_[address].owners.clear();
-        object_[address].properties.clear();
-        object_[address].data.reset(nullptr);
-        
-        return true;
-    }
-    else
-    {
-        return false;
-    }
-}
-
-bool Environment::freeObject(const std::string name)
-{
-    return freeObject(getObjectAddress(name));
-}
-
-void Environment::freeAll(void)
-{
-    for (unsigned int i = 0; i < object_.size(); ++i)
-    {
-        freeObject(i);
-    }
-}
-
-void Environment::printContent(void)
-{
-    LOG(Message) << "Modules: " << std::endl;
-    for (unsigned int i = 0; i < module_.size(); ++i)
-    {
-        LOG(Message) << std::setw(4) << i << ": "
-                     << getModuleName(i) << std::endl;
-    }
-    LOG(Message) << "Objects: " << std::endl;
-    for (unsigned int i = 0; i < object_.size(); ++i)
-    {
-        LOG(Message) << std::setw(4) << i << ": "
-                     << getObjectName(i) << std::endl;
-    }
-}
@@ -1,385 +0,0 @@
-/*************************************************************************************
-
-Grid physics library, www.github.com/paboyle/Grid 
-
-Source file: extras/Hadrons/Environment.hpp
-
-Copyright (C) 2015
-Copyright (C) 2016
-
-Author: Antonin Portelli <antonin.portelli@me.com>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-See the full license in the file "LICENSE" in the top level distribution directory
-*************************************************************************************/
-/*  END LEGAL */
-
-#ifndef Hadrons_Environment_hpp_
-#define Hadrons_Environment_hpp_
-
-#include <Grid/Hadrons/Global.hpp>
-#include <Grid/Hadrons/Graph.hpp>
-
-#ifndef SITE_SIZE_TYPE
-#define SITE_SIZE_TYPE unsigned int
-#endif
-
-BEGIN_HADRONS_NAMESPACE
-
-/******************************************************************************
- *                         Global environment                                 *
- ******************************************************************************/
-// forward declaration of Module
-class ModuleBase;
-
-class Object
-{
-public:
-    Object(void) = default;
-    virtual ~Object(void) = default;
-};
-
-template <typename T>
-class Holder: public Object
-{
-public:
-    Holder(void) = default;
-    Holder(T *pt);
-    virtual ~Holder(void) = default;
-    T &       get(void) const;
-    T *       getPt(void) const;
-    void      reset(T *pt);
-private:
-    std::unique_ptr<T> objPt_{nullptr};
-};
-
-class Environment
-{
-    SINGLETON(Environment);
-public:
-    typedef SITE_SIZE_TYPE                         Size;
-    typedef std::unique_ptr<ModuleBase>            ModPt;
-    typedef std::unique_ptr<GridCartesian>         GridPt;
-    typedef std::unique_ptr<GridRedBlackCartesian> GridRbPt;
-    typedef std::unique_ptr<GridParallelRNG>       RngPt;
-    typedef std::unique_ptr<LatticeBase>           LatticePt;
-private:
-    struct ModuleInfo
-    {
-        const std::type_info      *type{nullptr};
-        std::string               name;
-        ModPt                     data{nullptr};
-        std::vector<unsigned int> input;
-    };
-    struct ObjInfo
-    {
-        Size                    size{0};
-        unsigned int            Ls{0};
-        bool                    isRegistered{false};
-        const std::type_info    *type{nullptr};
-        std::string             name;
-        int                     module{-1};
-        std::set<unsigned int>  owners, properties;
-        std::unique_ptr<Object> data{nullptr};
-    };
-public:
-    // dry run
-    void                    dryRun(const bool isDry);
-    bool                    isDryRun(void) const;
-    // trajectory number
-    void                    setTrajectory(const unsigned int traj);
-    unsigned int            getTrajectory(void) const;
-    // grids
-    void                    createGrid(const unsigned int Ls);
-    GridCartesian *         getGrid(const unsigned int Ls = 1) const;
-    GridRedBlackCartesian * getRbGrid(const unsigned int Ls = 1) const;
-    unsigned int            getNd(void) const;
-    // random number generator
-    void                    setSeed(const std::vector<int> &seed);
-    GridParallelRNG *       get4dRng(void) const;
-    // module management
-    void                    pushModule(ModPt &pt);
-    template <typename M>
-    void                    createModule(const std::string name);
-    template <typename M>
-    void                    createModule(const std::string name,
-                                         const typename M::Par &par);
-    void                    createModule(const std::string name,
-                                         const std::string type,
-                                         XmlReader &reader);
-    unsigned int            getNModule(void) const;
-    ModuleBase *            getModule(const unsigned int address) const;
-    ModuleBase *            getModule(const std::string name) const;
-    template <typename M>
-    M *                     getModule(const unsigned int address) const;
-    template <typename M>
-    M *                     getModule(const std::string name) const;
-    unsigned int            getModuleAddress(const std::string name) const;
-    std::string             getModuleName(const unsigned int address) const;
-    std::string             getModuleType(const unsigned int address) const;
-    std::string             getModuleType(const std::string name) const;
-    bool                    hasModule(const unsigned int address) const;
-    bool                    hasModule(const std::string name) const;
-    Graph<unsigned int>     makeModuleGraph(void) const;
-    Size                    executeProgram(const std::vector<unsigned int> &p);
-    Size                    executeProgram(const std::vector<std::string> &p);
-    // general memory management
-    void                    addObject(const std::string name,
-                                      const int moduleAddress = -1);
-    void                    registerObject(const unsigned int address,
-                                           const unsigned int size,
-                                           const unsigned int Ls = 1);
-    void                    registerObject(const std::string name,
-                                           const unsigned int size,
-                                           const unsigned int Ls = 1);
-    template <typename T>
-    unsigned int            lattice4dSize(void) const;
-    template <typename T>
-    void                    registerLattice(const unsigned int address,
-                                            const unsigned int Ls = 1);
-    template <typename T>
-    void                    registerLattice(const std::string name,
-                                            const unsigned int Ls = 1);
-    template <typename T>
-    void                    setObject(const unsigned int address, T *object);
-    template <typename T>
-    void                    setObject(const std::string name, T *object);
-    template <typename T>
-    T *                     getObject(const unsigned int address) const;
-    template <typename T>
-    T *                     getObject(const std::string name) const;
-    template <typename T>
-    T *                     createLattice(const unsigned int address);
-    template <typename T>
-    T *                     createLattice(const std::string name);
-    unsigned int            getObjectAddress(const std::string name) const;
-    std::string             getObjectName(const unsigned int address) const;
-    std::string             getObjectType(const unsigned int address) const;
-    std::string             getObjectType(const std::string name) const;
-    Size                    getObjectSize(const unsigned int address) const;
-    Size                    getObjectSize(const std::string name) const;
-    unsigned int            getObjectLs(const unsigned int address) const;
-    unsigned int            getObjectLs(const std::string name) const;
-    bool                    hasObject(const unsigned int address) const;
-    bool                    hasObject(const std::string name) const;
-    bool                    hasRegisteredObject(const unsigned int address) const;
-    bool                    hasRegisteredObject(const std::string name) const;
-    bool                    hasCreatedObject(const unsigned int address) const;
-    bool                    hasCreatedObject(const std::string name) const;
-    bool                    isObject5d(const unsigned int address) const;
-    bool                    isObject5d(const std::string name) const;
-    Environment::Size       getTotalSize(void) const;
-    void                    addOwnership(const unsigned int owner,
-                                         const unsigned int property);
-    void                    addOwnership(const std::string owner,
-                                         const std::string property);
-    bool                    hasOwners(const unsigned int address) const;
-    bool                    hasOwners(const std::string name) const;
-    bool                    freeObject(const unsigned int address);
-    bool                    freeObject(const std::string name);
-    void                    freeAll(void);
-    void                    printContent(void);
-private:
-    // general
-    bool                                   dryRun_{false};
-    unsigned int                           traj_, locVol_;
-    // grids
-    GridPt                                 grid4d_;
-    std::map<unsigned int, GridPt>         grid5d_;
-    GridRbPt                               gridRb4d_;
-    std::map<unsigned int, GridRbPt>       gridRb5d_;
-    unsigned int                           nd_;
-    // random number generator
-    RngPt                                  rng4d_;
-    // module and related maps
-    std::vector<ModuleInfo>                module_;
-    std::map<std::string, unsigned int>    moduleAddress_;
-    // lattice store
-    std::map<unsigned int, LatticePt>      lattice_;
-    // object store
-    std::vector<ObjInfo>                   object_;
-    std::map<std::string, unsigned int>    objectAddress_;
-};
-
-/******************************************************************************
- *                       Holder template implementation                       *
- ******************************************************************************/
-// constructor /////////////////////////////////////////////////////////////////
-template <typename T>
-Holder<T>::Holder(T *pt)
-: objPt_(pt)
-{}
-
-// access //////////////////////////////////////////////////////////////////////
-template <typename T>
-T & Holder<T>::get(void) const
-{
-    return &objPt_.get();
-}
-
-template <typename T>
-T * Holder<T>::getPt(void) const
-{
-    return objPt_.get();
-}
-
-template <typename T>
-void Holder<T>::reset(T *pt)
-{
-    objPt_.reset(pt);
-}
-
-/******************************************************************************
- *                     Environment template implementation                    *
- ******************************************************************************/
-// module management ///////////////////////////////////////////////////////////
-template <typename M>
-void Environment::createModule(const std::string name)
-{
-    ModPt pt(new M(name));
-    
-    pushModule(pt);
-}
-
-template <typename M>
-void Environment::createModule(const std::string name,
-                               const typename M::Par &par)
-{
-    ModPt pt(new M(name));
-    
-    static_cast<M *>(pt.get())->setPar(par);
-    pushModule(pt);
-}
-
-template <typename M>
-M * Environment::getModule(const unsigned int address) const
-{
-    if (auto *pt = dynamic_cast<M *>(getModule(address)))
-    {
-        return pt;
-    }
-    else
-    {
-        HADRON_ERROR("module '" + module_[address].name
-                     + "' does not have type " + typeid(M).name()
-                     + "(object type: " + getModuleType(address) + ")");
-    }
-}
-
-template <typename M>
-M * Environment::getModule(const std::string name) const
-{
-    return getModule<M>(getModuleAddress(name));
-}
-
-template <typename T>
-unsigned int Environment::lattice4dSize(void) const
-{
-    return sizeof(typename T::vector_object)/getGrid()->Nsimd();
-}
-
-template <typename T>
-void Environment::registerLattice(const unsigned int address,
-                                  const unsigned int Ls)
-{
-    createGrid(Ls);
-    registerObject(address, Ls*lattice4dSize<T>(), Ls);
-}
-
-template <typename T>
-void Environment::registerLattice(const std::string name, const unsigned int Ls)
-{
-    createGrid(Ls);
-    registerObject(name, Ls*lattice4dSize<T>(), Ls);
-}
-
-template <typename T>
-void Environment::setObject(const unsigned int address, T *object)
-{
-    if (hasRegisteredObject(address))
-    {
-        object_[address].data.reset(new Holder<T>(object));
-        object_[address].type = &typeid(T);
-    }
-    else if (hasObject(address))
-    {
-        HADRON_ERROR("object with address " + std::to_string(address) +
-                     " exists but is not registered");
-    }
-    else
-    {
-        HADRON_ERROR("no object with address " + std::to_string(address));
-    }
-}
-
-template <typename T>
-void Environment::setObject(const std::string name, T *object)
-{
-    setObject(getObjectAddress(name), object);
-}
-
-template <typename T>
-T * Environment::getObject(const unsigned int address) const
-{
-    if (hasRegisteredObject(address))
-    {
-        if (auto h = dynamic_cast<Holder<T> *>(object_[address].data.get()))
-        {
-            return h->getPt();
-        }
-        else
-        {
-            HADRON_ERROR("object with address " + std::to_string(address) +
-                         " does not have type '" + typeid(T).name() +
-                         "' (has type '" + getObjectType(address) + "')");
-        }
-    }
-    else if (hasObject(address))
-    {
-        HADRON_ERROR("object with address " + std::to_string(address) +
-                     " exists but is not registered");
-    }
-    else
-    {
-        HADRON_ERROR("no object with address " + std::to_string(address));
-    }
-}
-
-template <typename T>
-T * Environment::getObject(const std::string name) const
-{
-    return getObject<T>(getObjectAddress(name));
-}
-
-template <typename T>
-T * Environment::createLattice(const unsigned int address)
-{
-    GridCartesian *g = getGrid(getObjectLs(address));
-    
-    setObject(address, new T(g));
-    
-    return getObject<T>(address);
-}
-
-template <typename T>
-T * Environment::createLattice(const std::string name)
-{
-    return createLattice<T>(getObjectAddress(name));
-}
-
-END_HADRONS_NAMESPACE
-
-#endif // Hadrons_Environment_hpp_
@@ -1,106 +0,0 @@
-/*************************************************************************************
-
-Grid physics library, www.github.com/paboyle/Grid 
-
-Source file: extras/Hadrons/Factory.hpp
-
-Copyright (C) 2015
-Copyright (C) 2016
-
-Author: Antonin Portelli <antonin.portelli@me.com>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-See the full license in the file "LICENSE" in the top level distribution directory
-*************************************************************************************/
-/*  END LEGAL */
-
-#ifndef Hadrons_Factory_hpp_
-#define Hadrons_Factory_hpp_
-
-#include <Grid/Hadrons/Global.hpp>
-
-BEGIN_HADRONS_NAMESPACE
-
-/******************************************************************************
- *                        abstract factory class                              *
- ******************************************************************************/
-template <typename T>
-class Factory
-{
-public:
-    typedef std::function<std::unique_ptr<T>(const std::string)> Func;
-public:
-    // constructor
-    Factory(void) = default;
-    // destructor
-    virtual ~Factory(void) = default;
-    // registration
-    void registerBuilder(const std::string type, const Func &f);
-    // get builder list
-    std::vector<std::string> getBuilderList(void) const;
-    // factory
-    std::unique_ptr<T> create(const std::string type,
-                              const std::string name) const;
-private:
-    std::map<std::string, Func> builder_;
-};
-
-/******************************************************************************
- *                         template implementation                            *
- ******************************************************************************/
-// registration ////////////////////////////////////////////////////////////////
-template <typename T>
-void Factory<T>::registerBuilder(const std::string type, const Func &f)
-{
-    builder_[type] = f;
-}
-
-// get module list /////////////////////////////////////////////////////////////
-template <typename T>
-std::vector<std::string> Factory<T>::getBuilderList(void) const
-{
-    std::vector<std::string> list;
-    
-    for (auto &b: builder_)
-    {
-        list.push_back(b.first);
-    }
-    
-    return list;
-}
-
-// factory /////////////////////////////////////////////////////////////////////
-template <typename T>
-std::unique_ptr<T> Factory<T>::create(const std::string type,
-                                      const std::string name) const
-{
-    Func func;
-    
-    try
-    {
-        func = builder_.at(type);
-    }
-    catch (std::out_of_range &)
-    {
-        HADRON_ERROR("object of type '" + type + "' unknown");
-    }
-    
-    return func(name);
-}
-
-END_HADRONS_NAMESPACE
-
-#endif // Hadrons_Factory_hpp_
@@ -1,329 +0,0 @@
-/*************************************************************************************
-
-Grid physics library, www.github.com/paboyle/Grid 
-
-Source file: extras/Hadrons/GeneticScheduler.hpp
-
-Copyright (C) 2015
-Copyright (C) 2016
-
-Author: Antonin Portelli <antonin.portelli@me.com>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-See the full license in the file "LICENSE" in the top level distribution directory
-*************************************************************************************/
-/*  END LEGAL */
-
-#ifndef Hadrons_GeneticScheduler_hpp_
-#define Hadrons_GeneticScheduler_hpp_
-
-#include <Grid/Hadrons/Global.hpp>
-#include <Grid/Hadrons/Graph.hpp>
-
-BEGIN_HADRONS_NAMESPACE
-
-/******************************************************************************
- *                   Scheduler based on a genetic algorithm                   *
- ******************************************************************************/
-template <typename T>
-class GeneticScheduler
-{
-public:
-    typedef std::vector<T>                   Gene;
-    typedef std::pair<Gene *, Gene *>        GenePair;
-    typedef std::function<int(const Gene &)> ObjFunc;
-    struct Parameters
-    {
-        double       mutationRate;
-        unsigned int popSize, seed;
-    };
-public:
-    // constructor
-    GeneticScheduler(Graph<T> &graph, const ObjFunc &func,
-                     const Parameters &par);
-    // destructor
-    virtual ~GeneticScheduler(void) = default;
-    // access
-    const Gene & getMinSchedule(void);
-    int          getMinValue(void);
-    // breed a new generation
-    void nextGeneration(void);
-    // heuristic benchmarks
-    void benchmarkCrossover(const unsigned int nIt);
-    // print population
-    friend std::ostream & operator<<(std::ostream &out,
-                                     const GeneticScheduler<T> &s)
-    {
-        out << "[";
-        for (auto &p: s.population_)
-        {
-            out << p.first << ", ";
-        }
-        out << "\b\b]";
-        
-        return out;
-    }
-private:
-    // evolution steps
-    void initPopulation(void);
-    void doCrossover(void);
-    void doMutation(void);
-    // genetic operators
-    GenePair selectPair(void);
-    void     crossover(Gene &c1, Gene &c2, const Gene &p1, const Gene &p2);
-    void     mutation(Gene &m, const Gene &c);
-    
-private:
-    Graph<T>                 &graph_;
-    const ObjFunc            &func_;
-    const Parameters         par_;
-    std::multimap<int, Gene> population_;
-    std::mt19937             gen_;
-};
-
-/******************************************************************************
- *                       template implementation                              *
- ******************************************************************************/
-// constructor /////////////////////////////////////////////////////////////////
-template <typename T>
-GeneticScheduler<T>::GeneticScheduler(Graph<T> &graph, const ObjFunc &func,
-                                      const Parameters &par)
-: graph_(graph)
-, func_(func)
-, par_(par)
-{
-    gen_.seed(par_.seed);
-}
-
-// access //////////////////////////////////////////////////////////////////////
-template <typename T>
-const typename GeneticScheduler<T>::Gene &
-GeneticScheduler<T>::getMinSchedule(void)
-{
-    return population_.begin()->second;
-}
-
-template <typename T>
-int GeneticScheduler<T>::getMinValue(void)
-{
-    return population_.begin()->first;
-}
-
-// breed a new generation //////////////////////////////////////////////////////
-template <typename T>
-void GeneticScheduler<T>::nextGeneration(void)
-{
-    // random initialization of the population if necessary
-    if (population_.size() != par_.popSize)
-    {
-        initPopulation();
-    }
-    LOG(Debug) << "Starting population:\n" << *this << std::endl;
-    
-    // random mutations
-    //PARALLEL_FOR_LOOP
-    for (unsigned int i = 0; i < par_.popSize; ++i)
-    {
-        doMutation();
-    }
-    LOG(Debug) << "After mutations:\n" << *this << std::endl;
-    
-    // mating
-    //PARALLEL_FOR_LOOP
-    for (unsigned int i = 0; i < par_.popSize/2; ++i)
-    {
-        doCrossover();
-    }
-    LOG(Debug) << "After mating:\n" << *this << std::endl;
-    
-    // grim reaper
-    auto it = population_.begin();
-    
-    std::advance(it, par_.popSize);
-    population_.erase(it, population_.end());
-    LOG(Debug) << "After grim reaper:\n" << *this << std::endl;
-}
-
-// evolution steps /////////////////////////////////////////////////////////////
-template <typename T>
-void GeneticScheduler<T>::initPopulation(void)
-{
-    population_.clear();
-    for (unsigned int i = 0; i < par_.popSize; ++i)
-    {
-        auto p = graph_.topoSort(gen_);
-        
-        population_.insert(std::make_pair(func_(p), p));
-    }
-}
-
-template <typename T>
-void GeneticScheduler<T>::doCrossover(void)
-{
-    auto p = selectPair();
-    Gene &p1 = *(p.first), &p2 = *(p.second);
-    Gene c1, c2;
-    
-    crossover(c1, c2, p1, p2);
-    PARALLEL_CRITICAL
-    {
-        population_.insert(std::make_pair(func_(c1), c1));
-        population_.insert(std::make_pair(func_(c2), c2));
-    }
-}
-
-template <typename T>
-void GeneticScheduler<T>::doMutation(void)
-{
-    std::uniform_real_distribution<double>      mdis(0., 1.);
-    std::uniform_int_distribution<unsigned int> pdis(0, population_.size() - 1);
-    
-    if (mdis(gen_) < par_.mutationRate)
-    {
-        Gene m;
-        auto it = population_.begin();
-        
-        std::advance(it, pdis(gen_));
-        mutation(m, it->second);
-        PARALLEL_CRITICAL
-        {
-            population_.insert(std::make_pair(func_(m), m));
-        }
-    }
-}
-
-// genetic operators ///////////////////////////////////////////////////////////
-template <typename T>
-typename GeneticScheduler<T>::GenePair GeneticScheduler<T>::selectPair(void)
-{
-    std::vector<double> prob;
-    unsigned int        ind;
-    Gene                *p1, *p2;
-    
-    for (auto &c: population_)
-    {
-        prob.push_back(1./c.first);
-    }
-    do
-    {
-        double probCpy;
-        
-        std::discrete_distribution<unsigned int> dis1(prob.begin(), prob.end());
-        auto rIt = population_.begin();
-        ind = dis1(gen_);
-        std::advance(rIt, ind);
-        p1 = &(rIt->second);
-        probCpy   = prob[ind];
-        prob[ind] = 0.;
-        std::discrete_distribution<unsigned int> dis2(prob.begin(), prob.end());
-        rIt = population_.begin();
-        std::advance(rIt, dis2(gen_));
-        p2 = &(rIt->second);
-        prob[ind] = probCpy;
-    } while (p1 == p2);
-    
-    return std::make_pair(p1, p2);
-}
-
-template <typename T>
-void GeneticScheduler<T>::crossover(Gene &c1, Gene &c2, const Gene &p1,
-                                    const Gene &p2)
-{
-    Gene                                        buf;
-    std::uniform_int_distribution<unsigned int> dis(0, p1.size() - 1);
-    unsigned int                                cut = dis(gen_);
-    
-    c1.clear();
-    buf = p2;
-    for (unsigned int i = 0; i < cut; ++i)
-    {
-        c1.push_back(p1[i]);
-        buf.erase(std::find(buf.begin(), buf.end(), p1[i]));
-    }
-    for (unsigned int i = 0; i < buf.size(); ++i)
-    {
-        c1.push_back(buf[i]);
-    }
-    c2.clear();
-    buf = p2;
-    for (unsigned int i = cut; i < p1.size(); ++i)
-    {
-        buf.erase(std::find(buf.begin(), buf.end(), p1[i]));
-    }
-    for (unsigned int i = 0; i < buf.size(); ++i)
-    {
-        c2.push_back(buf[i]);
-    }
-    for (unsigned int i = cut; i < p1.size(); ++i)
-    {
-        c2.push_back(p1[i]);
-    }
-}
-
-template <typename T>
-void GeneticScheduler<T>::mutation(Gene &m, const Gene &c)
-{
-    Gene                                        buf;
-    std::uniform_int_distribution<unsigned int> dis(0, c.size() - 1);
-    unsigned int                                cut = dis(gen_);
-    Graph<T>                                    g1 = graph_, g2 = graph_;
-    
-    for (unsigned int i = 0; i < cut; ++i)
-    {
-        g1.removeVertex(c[i]);
-    }
-    for (unsigned int i = cut; i < c.size(); ++i)
-    {
-        g2.removeVertex(c[i]);
-    }
-    if (g1.size() > 0)
-    {
-        buf = g1.topoSort(gen_);
-    }
-    if (g2.size() > 0)
-    {
-        m = g2.topoSort(gen_);
-    }
-    for (unsigned int i = cut; i < c.size(); ++i)
-    {
-        m.push_back(buf[i - cut]);
-    }
-}
-
-template <typename T>
-void GeneticScheduler<T>::benchmarkCrossover(const unsigned int nIt)
-{
-    Gene   p1, p2, c1, c2;
-    double neg = 0., eq = 0., pos = 0., total;
-    int    improvement;
-    
-    LOG(Message) << "Benchmarking crossover..." << std::endl;
-    for (unsigned int i = 0; i < nIt; ++i)
-    {
-        p1 = graph_.topoSort(gen_);
-        p2 = graph_.topoSort(gen_);
-        crossover(c1, c2, p1, p2);
-        improvement = (func_(c1) + func_(c2) - func_(p1) - func_(p2))/2;
-        if (improvement < 0) neg++; else if (improvement == 0) eq++; else pos++;
-    }
-    total = neg + eq + pos;
-    LOG(Message) << "  -: " << neg/total << "  =: " << eq/total
-                 << "  +: " << pos/total << std::endl;
-}
-
-END_HADRONS_NAMESPACE
-
-#endif // Hadrons_GeneticScheduler_hpp_
@@ -1,82 +0,0 @@
-/*************************************************************************************
-
-Grid physics library, www.github.com/paboyle/Grid 
-
-Source file: extras/Hadrons/Global.cc
-
-Copyright (C) 2015
-Copyright (C) 2016
-
-Author: Antonin Portelli <antonin.portelli@me.com>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-See the full license in the file "LICENSE" in the top level distribution directory
-*************************************************************************************/
-/*  END LEGAL */
-
-#include <Grid/Hadrons/Global.hpp>
-
-using namespace Grid;
-using namespace QCD;
-using namespace Hadrons;
-
-HadronsLogger Hadrons::HadronsLogError(1,"Error");
-HadronsLogger Hadrons::HadronsLogWarning(1,"Warning");
-HadronsLogger Hadrons::HadronsLogMessage(1,"Message");
-HadronsLogger Hadrons::HadronsLogIterative(1,"Iterative");
-HadronsLogger Hadrons::HadronsLogDebug(1,"Debug");
-
-// pretty size formatting //////////////////////////////////////////////////////
-std::string Hadrons::sizeString(long unsigned int bytes)
-
-{
-    constexpr unsigned int bufSize = 256;
-    const char             *suffixes[7] = {"", "K", "M", "G", "T", "P", "E"};
-    char                   buf[256];
-    long unsigned int      s     = 0;
-    double                 count = bytes;
-    
-    while (count >= 1024 && s < 7)
-    {
-        s++;
-        count /= 1024;
-    }
-    if (count - floor(count) == 0.0)
-    {
-        snprintf(buf, bufSize, "%d %sB", (int)count, suffixes[s]);
-    }
-    else
-    {
-        snprintf(buf, bufSize, "%.1f %sB", count, suffixes[s]);
-    }
-    
-    return std::string(buf);
-}
-
-// type utilities //////////////////////////////////////////////////////////////
-constexpr unsigned int maxNameSize = 1024u;
-
-std::string Hadrons::typeName(const std::type_info *info)
-{
-    char        *buf;
-    std::string name;
-    
-    buf  = abi::__cxa_demangle(info->name(), nullptr, nullptr, nullptr);
-    name = buf;
-    free(buf);
-    
-    return name;
-}
@@ -1,150 +0,0 @@
-/*************************************************************************************
-
-Grid physics library, www.github.com/paboyle/Grid 
-
-Source file: extras/Hadrons/Global.hpp
-
-Copyright (C) 2015
-Copyright (C) 2016
-
-Author: Antonin Portelli <antonin.portelli@me.com>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-See the full license in the file "LICENSE" in the top level distribution directory
-*************************************************************************************/
-/*  END LEGAL */
-
-#ifndef Hadrons_Global_hpp_
-#define Hadrons_Global_hpp_
-
-#include <set>
-#include <stack>
-#include <Grid/Grid.h>
-#include <cxxabi.h>
-
-#define BEGIN_HADRONS_NAMESPACE \
-namespace Grid {\
-using namespace QCD;\
-namespace Hadrons {\
-using Grid::operator<<;
-#define END_HADRONS_NAMESPACE }}
-
-#define BEGIN_MODULE_NAMESPACE(name)\
-namespace name {\
-using Grid::operator<<;
-#define END_MODULE_NAMESPACE }
-
-/* the 'using Grid::operator<<;' statement prevents a very nasty compilation
- * error with GCC 5 (clang & GCC 6 compile fine without it).
- */
-
-// FIXME: find a way to do that in a more general fashion
-#ifndef FIMPL
-#define FIMPL WilsonImplR
-#endif
-
-BEGIN_HADRONS_NAMESPACE
-
-// type aliases
-#define TYPE_ALIASES(FImpl, suffix)\
-typedef FermionOperator<FImpl>                       FMat##suffix;             \
-typedef typename FImpl::FermionField                 FermionField##suffix;     \
-typedef typename FImpl::PropagatorField              PropagatorField##suffix;  \
-typedef typename FImpl::SitePropagator               SitePropagator##suffix;   \
-typedef typename FImpl::DoubledGaugeField            DoubledGaugeField##suffix;\
-typedef std::function<void(FermionField##suffix &,                             \
-                      const FermionField##suffix &)> SolverFn##suffix;
-
-// logger
-class HadronsLogger: public Logger
-{
-public:
-    HadronsLogger(int on, std::string nm): Logger("Hadrons", on, nm,
-                                                  GridLogColours, "BLACK"){};
-};
-
-#define LOG(channel) std::cout << HadronsLog##channel
-#define HADRON_ERROR(msg)\
-LOG(Error) << msg << " (" << __FUNCTION__ << " at " << __FILE__ << ":"\
-           << __LINE__ << ")" << std::endl;\
-abort();
-
-#define DEBUG_VAR(var) LOG(Debug) << #var << "= " << (var) << std::endl;
-
-extern HadronsLogger HadronsLogError;
-extern HadronsLogger HadronsLogWarning;
-extern HadronsLogger HadronsLogMessage;
-extern HadronsLogger HadronsLogIterative;
-extern HadronsLogger HadronsLogDebug;
-
-// singleton pattern
-#define SINGLETON(name)\
-public:\
-    name(const name &e) = delete;\
-    void operator=(const name &e) = delete;\
-    static name & getInstance(void)\
-    {\
-        static name e;\
-        return e;\
-    }\
-private:\
-    name(void);
-
-#define SINGLETON_DEFCTOR(name)\
-public:\
-    name(const name &e) = delete;\
-    void operator=(const name &e) = delete;\
-    static name & getInstance(void)\
-    {\
-        static name e;\
-        return e;\
-    }\
-private:\
-    name(void) = default;
-
-// pretty size formating
-std::string sizeString(long unsigned int bytes);
-
-// type utilities
-template <typename T>
-const std::type_info * typeIdPt(const T &x)
-{
-    return &typeid(x);
-}
-
-std::string typeName(const std::type_info *info);
-
-template <typename T>
-const std::type_info * typeIdPt(void)
-{
-    return &typeid(T);
-}
-
-template <typename T>
-std::string typeName(const T &x)
-{
-    return typeName(typeIdPt(x));
-}
-
-template <typename T>
-std::string typeName(void)
-{
-    return typeName(typeIdPt<T>());
-}
-
-END_HADRONS_NAMESPACE
-
-#endif // Hadrons_Global_hpp_
@@ -1,760 +0,0 @@
-/*************************************************************************************
-
-Grid physics library, www.github.com/paboyle/Grid 
-
-Source file: extras/Hadrons/Graph.hpp
-
-Copyright (C) 2015
-Copyright (C) 2016
-
-Author: Antonin Portelli <antonin.portelli@me.com>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-See the full license in the file "LICENSE" in the top level distribution directory
-*************************************************************************************/
-/*  END LEGAL */
-
-#ifndef Hadrons_Graph_hpp_
-#define Hadrons_Graph_hpp_
-
-#include <Grid/Hadrons/Global.hpp>
-
-BEGIN_HADRONS_NAMESPACE
-
-/******************************************************************************
- *                          Oriented graph class                              *
- ******************************************************************************/
-// I/O for edges
-template <typename T>
-std::ostream & operator<<(std::ostream &out, const std::pair<T, T> &e)
-{
-    out << "\""  << e.first << "\" -> \"" << e.second << "\"";
-    
-    return out;
-}
-
-// main class
-template <typename T>
-class Graph
-{
-public:
-    typedef std::pair<T, T> Edge;
-public:
-    // constructor
-    Graph(void);
-    // destructor
-    virtual ~Graph(void) = default;
-    // access
-    void           addVertex(const T &value);
-    void           addEdge(const Edge &e);
-    void           addEdge(const T &start, const T &end);
-    std::vector<T> getVertices(void) const;
-    void           removeVertex(const T &value);
-    void           removeEdge(const Edge &e);
-    void           removeEdge(const T &start, const T &end);
-    unsigned int   size(void) const;
-    // tests
-    bool gotValue(const T &value) const;
-    // graph topological manipulations
-    std::vector<T>              getAdjacentVertices(const T &value) const;
-    std::vector<T>              getChildren(const T &value) const;
-    std::vector<T>              getParents(const T &value) const;
-    std::vector<T>              getRoots(void) const;
-    std::vector<Graph<T>>       getConnectedComponents(void) const;
-    std::vector<T>              topoSort(void);
-    template <typename Gen>
-    std::vector<T>              topoSort(Gen &gen);
-    std::vector<std::vector<T>> allTopoSort(void);
-    // I/O
-    friend std::ostream & operator<<(std::ostream &out, const Graph<T> &g)
-    {
-        out << "{";
-        for (auto &e: g.edgeSet_)
-        {
-            out << e << ", ";
-        }
-        if (g.edgeSet_.size() != 0)
-        {
-            out << "\b\b";
-        }
-        out << "}";
-        
-        return out;
-    }
-private:
-    // vertex marking
-    void      mark(const T &value, const bool doMark = true);
-    void      markAll(const bool doMark = true);
-    void      unmark(const T &value);
-    void      unmarkAll(void);
-    bool      isMarked(const T &value) const;
-    const T * getFirstMarked(const bool isMarked = true) const;
-    template <typename Gen>
-    const T * getRandomMarked(const bool isMarked, Gen &gen);
-    const T * getFirstUnmarked(void) const;
-    template <typename Gen>
-    const T * getRandomUnmarked(Gen &gen);
-    // prune marked/unmarked vertices
-    void removeMarked(const bool isMarked = true);
-    void removeUnmarked(void);
-    // depth-first search marking
-    void depthFirstSearch(void);
-    void depthFirstSearch(const T &root);
-private:
-    std::map<T, bool>  isMarked_;
-    std::set<Edge>     edgeSet_;
-};
-
-// build depedency matrix from topological sorts
-template <typename T>
-std::map<T, std::map<T, bool>>
-makeDependencyMatrix(const std::vector<std::vector<T>> &topSort);
-
-/******************************************************************************
- *                       template implementation                              *
- ******************************************************************************
- * in all the following V is the number of vertex and E is the number of edge
- * in the worst case E = V^2
- */
-
-// constructor /////////////////////////////////////////////////////////////////
-template <typename T>
-Graph<T>::Graph(void)
-{}
-
-// access //////////////////////////////////////////////////////////////////////
-// complexity: log(V)
-template <typename T>
-void Graph<T>::addVertex(const T &value)
-{
-    isMarked_[value] = false;
-}
-
-// complexity: O(log(V))
-template <typename T>
-void Graph<T>::addEdge(const Edge &e)
-{
-    addVertex(e.first);
-    addVertex(e.second);
-    edgeSet_.insert(e);
-}
-
-// complexity: O(log(V))
-template <typename T>
-void Graph<T>::addEdge(const T &start, const T &end)
-{
-    addEdge(Edge(start, end));
-}
-
-template <typename T>
-std::vector<T> Graph<T>::getVertices(void) const
-{
-    std::vector<T> vertex;
-    
-    for (auto &v: isMarked_)
-    {
-        vertex.push_back(v.first);
-    }
-    
-    return vertex;
-}
-
-// complexity: O(V*log(V))
-template <typename T>
-void Graph<T>::removeVertex(const T &value)
-{
-    // remove vertex from the mark table
-    auto vIt = isMarked_.find(value);
-    
-    if (vIt != isMarked_.end())
-    {
-        isMarked_.erase(vIt);
-    }
-    else
-    {
-        HADRON_ERROR("vertex " << value << " does not exists");
-    }
-
-    // remove all edges containing the vertex
-    auto pred = [&value](const Edge &e)
-    {
-        return ((e.first == value) or (e.second == value));
-    };
-    auto eIt = find_if(edgeSet_.begin(), edgeSet_.end(), pred);
-    
-    while (eIt != edgeSet_.end())
-    {
-        edgeSet_.erase(eIt);
-        eIt = find_if(edgeSet_.begin(), edgeSet_.end(), pred);
-    }
-}
-
-// complexity: O(log(V))
-template <typename T>
-void Graph<T>::removeEdge(const Edge &e)
-{
-    auto eIt = edgeSet_.find(e);
-    
-    if (eIt != edgeSet_.end())
-    {
-        edgeSet_.erase(eIt);
-    }
-    else
-    {
-        HADRON_ERROR("edge "  << e << " does not exists");
-    }
-}
-
-// complexity: O(log(V))
-template <typename T>
-void Graph<T>::removeEdge(const T &start, const T &end)
-{
-    removeEdge(Edge(start, end));
-}
-
-// complexity: O(1)
-template <typename T>
-unsigned int Graph<T>::size(void) const
-{
-    return isMarked_.size();
-}
-
-// tests ///////////////////////////////////////////////////////////////////////
-// complexity: O(log(V))
-template <typename T>
-bool Graph<T>::gotValue(const T &value) const
-{
-    auto it = isMarked_.find(value);
-    
-    if (it == isMarked_.end())
-    {
-        return false;
-    }
-    else
-    {
-        return true;
-    }
-}
-
-// vertex marking //////////////////////////////////////////////////////////////
-// complexity: O(log(V))
-template <typename T>
-void Graph<T>::mark(const T &value, const bool doMark)
-{
-    if (gotValue(value))
-    {
-        isMarked_[value] = doMark;
-    }
-    else
-    {
-        HADRON_ERROR("vertex " << value << " does not exists");
-    }
-}
-
-// complexity: O(V*log(V))
-template <typename T>
-void Graph<T>::markAll(const bool doMark)
-{
-    for (auto &v: isMarked_)
-    {
-        mark(v.first, doMark);
-    }
-}
-
-// complexity: O(log(V))
-template <typename T>
-void Graph<T>::unmark(const T &value)
-{
-    mark(value, false);
-}
-
-// complexity: O(V*log(V))
-template <typename T>
-void Graph<T>::unmarkAll(void)
-{
-    markAll(false);
-}
-
-// complexity: O(log(V))
-template <typename T>
-bool Graph<T>::isMarked(const T &value) const
-{
-    if (gotValue(value))
-    {
-        return isMarked_.at(value);
-    }
-    else
-    {
-        HADRON_ERROR("vertex " << value << " does not exists");
-        
-        return false;
-    }
-}
-
-// complexity: O(log(V))
-template <typename T>
-const T * Graph<T>::getFirstMarked(const bool isMarked) const
-{
-    auto pred = [&isMarked](const std::pair<T, bool> &v)
-    {
-        return (v.second == isMarked);
-    };
-    auto vIt = std::find_if(isMarked_.begin(), isMarked_.end(), pred);
-    
-    if (vIt != isMarked_.end())
-    {
-        return &(vIt->first);
-    }
-    else
-    {
-        return nullptr;
-    }
-}
-
-// complexity: O(log(V))
-template <typename T>
-template <typename Gen>
-const T * Graph<T>::getRandomMarked(const bool isMarked, Gen &gen)
-{
-    auto pred = [&isMarked](const std::pair<T, bool> &v)
-    {
-        return (v.second == isMarked);
-    };
-    std::uniform_int_distribution<unsigned int> dis(0, size() - 1);
-    auto                                        rIt = isMarked_.begin();
-    
-    std::advance(rIt, dis(gen));
-    auto vIt = std::find_if(rIt, isMarked_.end(), pred);
-    if (vIt != isMarked_.end())
-    {
-        return &(vIt->first);
-    }
-    else
-    {
-        vIt = std::find_if(isMarked_.begin(), rIt, pred);
-        if (vIt != rIt)
-        {
-            return &(vIt->first);
-        }
-        else
-        {
-            return nullptr;
-        }
-    }
-}
-
-// complexity: O(log(V))
-template <typename T>
-const T * Graph<T>::getFirstUnmarked(void) const
-{
-    return getFirstMarked(false);
-}
-
-// complexity: O(log(V))
-template <typename T>
-template <typename Gen>
-const T * Graph<T>::getRandomUnmarked(Gen &gen)
-{
-    return getRandomMarked(false, gen);
-}
-
-// prune marked/unmarked vertices //////////////////////////////////////////////
-// complexity: O(V^2*log(V))
-template <typename T>
-void Graph<T>::removeMarked(const bool isMarked)
-{
-    auto isMarkedCopy = isMarked_;
-    
-    for (auto &v: isMarkedCopy)
-    {
-        if (v.second == isMarked)
-        {
-            removeVertex(v.first);
-        }
-    }
-}
-
-// complexity: O(V^2*log(V))
-template <typename T>
-void Graph<T>::removeUnmarked(void)
-{
-    removeMarked(false);
-}
-
-// depth-first search marking //////////////////////////////////////////////////
-// complexity: O(V*log(V))
-template <typename T>
-void Graph<T>::depthFirstSearch(void)
-{
-    depthFirstSearch(isMarked_.begin()->first);
-}
-
-// complexity: O(V*log(V))
-template <typename T>
-void Graph<T>::depthFirstSearch(const T &root)
-{
-    std::vector<T> adjacentVertex;
-    
-    mark(root);
-    adjacentVertex = getAdjacentVertices(root);
-    for (auto &v: adjacentVertex)
-    {
-        if (!isMarked(v))
-        {
-            depthFirstSearch(v);
-        }
-    }
-}
-
-// graph topological manipulations /////////////////////////////////////////////
-// complexity: O(V*log(V))
-template <typename T>
-std::vector<T> Graph<T>::getAdjacentVertices(const T &value) const
-{
-    std::vector<T> adjacentVertex;
-    
-    auto pred = [&value](const Edge &e)
-    {
-        return ((e.first == value) or (e.second == value));
-    };
-    auto eIt = find_if(edgeSet_.begin(), edgeSet_.end(), pred);
-    
-    while (eIt != edgeSet_.end())
-    {
-        if (eIt->first == value)
-        {
-            adjacentVertex.push_back((*eIt).second);
-        }
-        else if (eIt->second == value)
-        {
-            adjacentVertex.push_back((*eIt).first);
-        }
-        eIt = find_if(++eIt, edgeSet_.end(), pred);
-    }
-    
-    return adjacentVertex;
-}
-
-// complexity: O(V*log(V))
-template <typename T>
-std::vector<T> Graph<T>::getChildren(const T &value) const
-{
-    std::vector<T> child;
-    
-    auto pred = [&value](const Edge &e)
-    {
-        return (e.first == value);
-    };
-    auto eIt = find_if(edgeSet_.begin(), edgeSet_.end(), pred);
-    
-    while (eIt != edgeSet_.end())
-    {
-        child.push_back((*eIt).second);
-        eIt = find_if(++eIt, edgeSet_.end(), pred);
-    }
-    
-    return child;
-}
-
-// complexity: O(V*log(V))
-template <typename T>
-std::vector<T> Graph<T>::getParents(const T &value) const
-{
-    std::vector<T> parent;
-    
-    auto pred = [&value](const Edge &e)
-    {
-        return (e.second == value);
-    };
-    auto eIt = find_if(edgeSet_.begin(), edgeSet_.end(), pred);
-    
-    while (eIt != edgeSet_.end())
-    {
-        parent.push_back((*eIt).first);
-        eIt = find_if(++eIt, edgeSet_.end(), pred);
-    }
-    
-    return parent;
-}
-
-// complexity: O(V^2*log(V))
-template <typename T>
-std::vector<T> Graph<T>::getRoots(void) const
-{
-    std::vector<T> root;
-    
-    for (auto &v: isMarked_)
-    {
-        auto parent = getParents(v.first);
-        
-        if (parent.size() == 0)
-        {
-            root.push_back(v.first);
-        }
-    }
-    
-    return root;
-}
-
-// complexity: O(V^2*log(V))
-template <typename T>
-std::vector<Graph<T>> Graph<T>::getConnectedComponents(void) const
-{
-    std::vector<Graph<T>> res;
-    Graph<T>              copy(*this);
-    
-    while (copy.size() > 0)
-    {
-        copy.depthFirstSearch();
-        res.push_back(copy);
-        res.back().removeUnmarked();
-        res.back().unmarkAll();
-        copy.removeMarked();
-        copy.unmarkAll();
-    }
-    
-    return res;
-}
-
-// topological sort using a directed DFS algorithm
-// complexity: O(V*log(V))
-template <typename T>
-std::vector<T> Graph<T>::topoSort(void)
-{
-    std::stack<T>     buf;
-    std::vector<T>    res;
-    const T           *vPt;
-    std::map<T, bool> tmpMarked(isMarked_);
-
-    // visit function
-    std::function<void(const T &)> visit = [&](const T &v)
-    {
-        if (tmpMarked.at(v))
-        {
-            HADRON_ERROR("cannot topologically sort a cyclic graph");
-        }
-        if (!isMarked(v))
-        {
-            std::vector<T> child = getChildren(v);
-
-            tmpMarked[v] = true;
-            for (auto &c: child)
-            {
-                visit(c);
-            }
-            mark(v);
-            tmpMarked[v] = false;
-            buf.push(v);
-        }
-    };
-    
-    // reset temporary marks
-    for (auto &v: tmpMarked)
-    {
-        tmpMarked.at(v.first) = false;
-    }
-    
-    // loop on unmarked vertices
-    unmarkAll();
-    vPt = getFirstUnmarked();
-    while (vPt)
-    {
-        visit(*vPt);
-        vPt = getFirstUnmarked();
-    }
-    unmarkAll();
-    
-    // create result vector
-    while (!buf.empty())
-    {
-        res.push_back(buf.top());
-        buf.pop();
-    }
-    
-    return res;
-}
-
-// random version of the topological sort
-// complexity: O(V*log(V))
-template <typename T>
-template <typename Gen>
-std::vector<T> Graph<T>::topoSort(Gen &gen)
-{
-    std::stack<T>     buf;
-    std::vector<T>    res;
-    const T           *vPt;
-    std::map<T, bool> tmpMarked(isMarked_);
-    
-    // visit function
-    std::function<void(const T &)> visit = [&](const T &v)
-    {
-        if (tmpMarked.at(v))
-        {
-            HADRON_ERROR("cannot topologically sort a cyclic graph");
-        }
-        if (!isMarked(v))
-        {
-            std::vector<T> child = getChildren(v);
-            
-            tmpMarked[v] = true;
-            std::shuffle(child.begin(), child.end(), gen);
-            for (auto &c: child)
-            {
-                visit(c);
-            }
-            mark(v);
-            tmpMarked[v] = false;
-            buf.push(v);
-        }
-    };
-    
-    // reset temporary marks
-    for (auto &v: tmpMarked)
-    {
-        tmpMarked.at(v.first) = false;
-    }
-    
-    // loop on unmarked vertices
-    unmarkAll();
-    vPt = getRandomUnmarked(gen);
-    while (vPt)
-    {
-        visit(*vPt);
-        vPt = getRandomUnmarked(gen);
-    }
-    unmarkAll();
-    
-    // create result vector
-    while (!buf.empty())
-    {
-        res.push_back(buf.top());
-        buf.pop();
-    }
-    
-    return res;
-}
-
-// generate all possible topological sorts
-// Y. L. Varol & D. Rotem, Comput. J. 24(1), pp. 83–84, 1981
-// http://comjnl.oupjournals.org/cgi/doi/10.1093/comjnl/24.1.83
-// complexity: O(V*log(V)) (from the paper, but really ?)
-template <typename T>
-std::vector<std::vector<T>> Graph<T>::allTopoSort(void)
-{
-    std::vector<std::vector<T>>    res;
-    std::map<T, std::map<T, bool>> iMat;
-    
-    // create incidence matrix
-    for (auto &v1: isMarked_)
-    for (auto &v2: isMarked_)
-    {
-        iMat[v1.first][v2.first] = false;
-    }
-    for (auto &v: isMarked_)
-    {
-        auto cVec = getChildren(v.first);
-        
-        for (auto &c: cVec)
-        {
-            iMat[v.first][c] = true;
-        }
-    }
-    
-    // generate initial topological sort
-    res.push_back(topoSort());
-    
-    // generate all other topological sorts by permutation
-    std::vector<T>            p = res[0];
-    const unsigned int        n = size();
-    std::vector<unsigned int> loc(n);
-    unsigned int              i, k, k1;
-    T                         obj_k, obj_k1;
-    bool                      isFinal;
-    
-    for (unsigned int j = 0; j < n; ++j)
-    {
-        loc[j] = j;
-    }
-    i = 0;
-    while (i < n-1)
-    {
-        k      = loc[i];
-        k1     = k + 1;
-        obj_k  = p[k];
-        if (k1 >= n)
-        {
-            isFinal = true;
-            obj_k1  = obj_k;
-        }
-        else
-        {
-            isFinal = false;
-            obj_k1  = p[k1];
-        }
-        if (iMat[res[0][i]][obj_k1] or isFinal)
-        {
-            for (unsigned int l = k; l >= i + 1; --l)
-            {
-                p[l]   = p[l-1];
-            }
-            p[i]   = obj_k;
-            loc[i] = i;
-            i++;
-        }
-        else
-        {
-            p[k]   = obj_k1;
-            p[k1]  = obj_k;
-            loc[i] = k1;
-            i      = 0;
-            res.push_back(p);
-        }
-    }
-    
-    return res;
-}
-
-// build depedency matrix from topological sorts ///////////////////////////////
-// complexity: something like O(V^2*log(V!))
-template <typename T>
-std::map<T, std::map<T, bool>>
-makeDependencyMatrix(const std::vector<std::vector<T>> &topSort)
-{
-    std::map<T, std::map<T, bool>> m;
-    const std::vector<T>           &vList = topSort[0];
-    
-    for (auto &v1: vList)
-    for (auto &v2: vList)
-    {
-        bool dep = true;
-        
-        for (auto &t: topSort)
-        {
-            auto i1 = std::find(t.begin(), t.end(), v1);
-            auto i2 = std::find(t.begin(), t.end(), v2);
-            
-            dep = dep and (i1 - i2 > 0);
-            if (!dep) break;
-        }
-        m[v1][v2] = dep;
-    }
-    
-    return m;
-}
-
-END_HADRONS_NAMESPACE
-
-#endif // Hadrons_Graph_hpp_
@@ -1,80 +0,0 @@
-/*************************************************************************************
-
-Grid physics library, www.github.com/paboyle/Grid 
-
-Source file: extras/Hadrons/HadronsXmlRun.cc
-
-Copyright (C) 2015
-Copyright (C) 2016
-
-Author: Antonin Portelli <antonin.portelli@me.com>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-See the full license in the file "LICENSE" in the top level distribution directory
-*************************************************************************************/
-/*  END LEGAL */
-
-#include <Grid/Hadrons/Application.hpp>
-
-using namespace Grid;
-using namespace QCD;
-using namespace Hadrons;
-
-int main(int argc, char *argv[])
-{
-    // parse command line
-    std::string parameterFileName, scheduleFileName = "";
-    
-    if (argc < 2)
-    {
-        std::cerr << "usage: " << argv[0] << " <parameter file> [<precomputed schedule>] [Grid options]";
-        std::cerr << std::endl;
-        std::exit(EXIT_FAILURE);
-    }
-    parameterFileName = argv[1];
-    if (argc > 2)
-    {
-        if (argv[2][0] != '-')
-        {
-            scheduleFileName = argv[2];
-        }
-    }
-    
-    // initialization
-    Grid_init(&argc, &argv);
-    HadronsLogError.Active(GridLogError.isActive());
-    HadronsLogWarning.Active(GridLogWarning.isActive());
-    HadronsLogMessage.Active(GridLogMessage.isActive());
-    HadronsLogIterative.Active(GridLogIterative.isActive());
-    HadronsLogDebug.Active(GridLogDebug.isActive());
-    LOG(Message) << "Grid initialized" << std::endl;
-    
-    // execution
-    Application application(parameterFileName);
-    
-    application.parseParameterFile(parameterFileName);
-    if (!scheduleFileName.empty())
-    {
-        application.loadSchedule(scheduleFileName);
-    }
-    application.run();
-    
-    // epilogue
-    LOG(Message) << "Grid is finalizing now" << std::endl;
-    Grid_finalize();
-    
-    return EXIT_SUCCESS;
-}
@@ -1,72 +0,0 @@
-/*************************************************************************************
-
-Grid physics library, www.github.com/paboyle/Grid 
-
-Source file: extras/Hadrons/HadronsXmlSchedule.cc
-
-Copyright (C) 2015
-Copyright (C) 2016
-
-Author: Antonin Portelli <antonin.portelli@me.com>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-See the full license in the file "LICENSE" in the top level distribution directory
-*************************************************************************************/
-/*  END LEGAL */
-
-#include <Grid/Hadrons/Application.hpp>
-
-using namespace Grid;
-using namespace QCD;
-using namespace Hadrons;
-
-int main(int argc, char *argv[])
-{
-    // parse command line
-    std::string parameterFileName, scheduleFileName;
-    
-    if (argc < 3)
-    {
-        std::cerr << "usage: " << argv[0] << " <parameter file> <schedule output> [Grid options]";
-        std::cerr << std::endl;
-        std::exit(EXIT_FAILURE);
-    }
-    parameterFileName = argv[1];
-    scheduleFileName  = argv[2];
-    
-    // initialization
-    Grid_init(&argc, &argv);
-    HadronsLogError.Active(GridLogError.isActive());
-    HadronsLogWarning.Active(GridLogWarning.isActive());
-    HadronsLogMessage.Active(GridLogMessage.isActive());
-    HadronsLogIterative.Active(GridLogIterative.isActive());
-    HadronsLogDebug.Active(GridLogDebug.isActive());
-    LOG(Message) << "Grid initialized" << std::endl;
-    
-    // execution
-    Application application;
-    
-    application.parseParameterFile(parameterFileName);
-    application.schedule();
-    application.printSchedule();
-    application.saveSchedule(scheduleFileName);
-    
-    // epilogue
-    LOG(Message) << "Grid is finalizing now" << std::endl;
-    Grid_finalize();
-    
-    return EXIT_SUCCESS;
-}
@@ -1,29 +0,0 @@
-lib_LIBRARIES = libHadrons.a
-bin_PROGRAMS  = HadronsXmlRun HadronsXmlSchedule
-
-include modules.inc
-
-libHadrons_a_SOURCES = \
-    $(modules_cc)      \
-    Application.cc     \
-    Environment.cc     \
-    Global.cc          \
-    Module.cc
-libHadrons_adir = $(pkgincludedir)/Hadrons
-nobase_libHadrons_a_HEADERS = \
-	$(modules_hpp)            \
-	Application.hpp           \
-	Environment.hpp           \
-	Factory.hpp               \
-	GeneticScheduler.hpp      \
-	Global.hpp                \
-	Graph.hpp                 \
-	Module.hpp                \
-	Modules.hpp               \
-	ModuleFactory.hpp
-
-HadronsXmlRun_SOURCES = HadronsXmlRun.cc
-HadronsXmlRun_LDADD   = libHadrons.a -lGrid
-
-HadronsXmlSchedule_SOURCES = HadronsXmlSchedule.cc
-HadronsXmlSchedule_LDADD   = libHadrons.a -lGrid
@@ -1,71 +0,0 @@
-/*************************************************************************************
-
-Grid physics library, www.github.com/paboyle/Grid 
-
-Source file: extras/Hadrons/Module.cc
-
-Copyright (C) 2015
-Copyright (C) 2016
-
-Author: Antonin Portelli <antonin.portelli@me.com>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-See the full license in the file "LICENSE" in the top level distribution directory
-*************************************************************************************/
-/*  END LEGAL */
-
-#include <Grid/Hadrons/Module.hpp>
-
-using namespace Grid;
-using namespace QCD;
-using namespace Hadrons;
-
-/******************************************************************************
- *                       ModuleBase implementation                            *
- ******************************************************************************/
-// constructor /////////////////////////////////////////////////////////////////
-ModuleBase::ModuleBase(const std::string name)
-: name_(name)
-, env_(Environment::getInstance())
-{}
-
-// access //////////////////////////////////////////////////////////////////////
-std::string ModuleBase::getName(void) const
-{
-    return name_;
-}
-
-Environment & ModuleBase::env(void) const
-{
-    return env_;
-}
-
-// get factory registration name if available
-std::string ModuleBase::getRegisteredName(void)
-{
-    HADRON_ERROR("module '" + getName() + "' has a type not registered"
-                 + " in the factory");
-}
-
-// execution ///////////////////////////////////////////////////////////////////
-void ModuleBase::operator()(void)
-{
-    setup();
-    if (!env().isDryRun())
-    {
-        execute();
-    }
-}
@@ -1,198 +0,0 @@
-/*************************************************************************************
-
-Grid physics library, www.github.com/paboyle/Grid 
-
-Source file: extras/Hadrons/Module.hpp
-
-Copyright (C) 2015
-Copyright (C) 2016
-
-Author: Antonin Portelli <antonin.portelli@me.com>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-See the full license in the file "LICENSE" in the top level distribution directory
-*************************************************************************************/
-/*  END LEGAL */
-
-#ifndef Hadrons_Module_hpp_
-#define Hadrons_Module_hpp_
-
-#include <Grid/Hadrons/Global.hpp>
-#include <Grid/Hadrons/Environment.hpp>
-
-BEGIN_HADRONS_NAMESPACE
-
-// module registration macros
-#define MODULE_REGISTER(mod, base)\
-class mod: public base\
-{\
-public:\
-    typedef base Base;\
-    using Base::Base;\
-    virtual std::string getRegisteredName(void)\
-    {\
-        return std::string(#mod);\
-    }\
-};\
-class mod##ModuleRegistrar\
-{\
-public:\
-    mod##ModuleRegistrar(void)\
-    {\
-        ModuleFactory &modFac = ModuleFactory::getInstance();\
-        modFac.registerBuilder(#mod, [&](const std::string name)\
-                              {\
-                                  return std::unique_ptr<mod>(new mod(name));\
-                              });\
-    }\
-};\
-static mod##ModuleRegistrar mod##ModuleRegistrarInstance;
-
-#define MODULE_REGISTER_NS(mod, base, ns)\
-class mod: public base\
-{\
-public:\
-    typedef base Base;\
-    using Base::Base;\
-    virtual std::string getRegisteredName(void)\
-    {\
-        return std::string(#ns "::" #mod);\
-    }\
-};\
-class ns##mod##ModuleRegistrar\
-{\
-public:\
-    ns##mod##ModuleRegistrar(void)\
-    {\
-        ModuleFactory &modFac = ModuleFactory::getInstance();\
-        modFac.registerBuilder(#ns "::" #mod, [&](const std::string name)\
-                              {\
-                                  return std::unique_ptr<ns::mod>(new ns::mod(name));\
-                              });\
-    }\
-};\
-static ns##mod##ModuleRegistrar ns##mod##ModuleRegistrarInstance;
-
-#define ARG(...) __VA_ARGS__
-
-/******************************************************************************
- *                            Module class                                    *
- ******************************************************************************/
-// base class
-class ModuleBase
-{
-public:
-    // constructor
-    ModuleBase(const std::string name);
-    // destructor
-    virtual ~ModuleBase(void) = default;
-    // access
-    std::string getName(void) const;
-    Environment &env(void) const;
-    // get factory registration name if available
-    virtual std::string getRegisteredName(void);
-    // dependencies/products
-    virtual std::vector<std::string> getInput(void) = 0;
-    virtual std::vector<std::string> getOutput(void) = 0;
-    // parse parameters
-    virtual void parseParameters(XmlReader &reader, const std::string name) = 0;
-    virtual void saveParameters(XmlWriter &writer, const std::string name) = 0;
-    // setup
-    virtual void setup(void) {};
-    // execution
-    void operator()(void);
-    virtual void execute(void) = 0;
-private:
-    std::string name_;
-    Environment &env_;
-};
-
-// derived class, templating the parameter class
-template <typename P>
-class Module: public ModuleBase
-{
-public:
-    typedef P Par;
-public:
-    // constructor
-    Module(const std::string name);
-    // destructor
-    virtual ~Module(void) = default;
-    // parse parameters
-    virtual void parseParameters(XmlReader &reader, const std::string name);
-    virtual void saveParameters(XmlWriter &writer, const std::string name);
-    // parameter access
-    const P & par(void) const;
-    void      setPar(const P &par);
-private:
-    P par_;
-};
-
-// no parameter type
-class NoPar {};
-
-template <>
-class Module<NoPar>: public ModuleBase
-{
-public:
-    // constructor
-    Module(const std::string name): ModuleBase(name) {};
-    // destructor
-    virtual ~Module(void) = default;
-    // parse parameters (do nothing)
-    virtual void parseParameters(XmlReader &reader, const std::string name) {};
-    virtual void saveParameters(XmlWriter &writer, const std::string name)
-    {
-        push(writer, "options");
-        pop(writer);
-    };
-};
-
-/******************************************************************************
- *                           Template implementation                          *
- ******************************************************************************/
-template <typename P>
-Module<P>::Module(const std::string name)
-: ModuleBase(name)
-{}
-
-template <typename P>
-void Module<P>::parseParameters(XmlReader &reader, const std::string name)
-{
-    read(reader, name, par_);
-}
-
-template <typename P>
-void Module<P>::saveParameters(XmlWriter &writer, const std::string name)
-{
-    write(writer, name, par_);
-}
-
-template <typename P>
-const P & Module<P>::par(void) const
-{
-    return par_;
-}
-
-template <typename P>
-void Module<P>::setPar(const P &par)
-{
-    par_ = par;
-}
-
-END_HADRONS_NAMESPACE
-
-#endif // Hadrons_Module_hpp_
@@ -1,49 +0,0 @@
-/*************************************************************************************
-
-Grid physics library, www.github.com/paboyle/Grid 
-
-Source file: extras/Hadrons/ModuleFactory.hpp
-
-Copyright (C) 2015
-Copyright (C) 2016
-
-Author: Antonin Portelli <antonin.portelli@me.com>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-See the full license in the file "LICENSE" in the top level distribution directory
-*************************************************************************************/
-/*  END LEGAL */
-
-#ifndef Hadrons_ModuleFactory_hpp_
-#define Hadrons_ModuleFactory_hpp_
-
-#include <Grid/Hadrons/Global.hpp>
-#include <Grid/Hadrons/Factory.hpp>
-#include <Grid/Hadrons/Module.hpp>
-
-BEGIN_HADRONS_NAMESPACE
-
-/******************************************************************************
- *                            ModuleFactory                                   *
- ******************************************************************************/
-class ModuleFactory: public Factory<ModuleBase>
-{
-    SINGLETON_DEFCTOR(ModuleFactory)
-};
-
-END_HADRONS_NAMESPACE
-
-#endif // Hadrons_ModuleFactory_hpp_
@@ -1,40 +0,0 @@
-/*************************************************************************************
-
-Grid physics library, www.github.com/paboyle/Grid 
-
-Source file: extras/Hadrons/Modules.hpp
-
-Copyright (C) 2015
-Copyright (C) 2016
-
-Author: Antonin Portelli <antonin.portelli@me.com>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-See the full license in the file "LICENSE" in the top level distribution directory
-*************************************************************************************/
-/*  END LEGAL */
-#include <Grid/Hadrons/Modules/MAction/DWF.hpp>
-#include <Grid/Hadrons/Modules/MAction/Wilson.hpp>
-#include <Grid/Hadrons/Modules/MContraction/Baryon.hpp>
-#include <Grid/Hadrons/Modules/MContraction/Meson.hpp>
-#include <Grid/Hadrons/Modules/MGauge/Load.hpp>
-#include <Grid/Hadrons/Modules/MGauge/Random.hpp>
-#include <Grid/Hadrons/Modules/MGauge/Unit.hpp>
-#include <Grid/Hadrons/Modules/MSolver/RBPrecCG.hpp>
-#include <Grid/Hadrons/Modules/MSource/Point.hpp>
-#include <Grid/Hadrons/Modules/MSource/SeqGamma.hpp>
-#include <Grid/Hadrons/Modules/MSource/Z2.hpp>
-#include <Grid/Hadrons/Modules/Quark.hpp>
@@ -1,134 +0,0 @@
-/*************************************************************************************
-
-Grid physics library, www.github.com/paboyle/Grid 
-
-Source file: extras/Hadrons/Modules/MAction/DWF.hpp
-
-Copyright (C) 2015
-Copyright (C) 2016
-
-Author: Antonin Portelli <antonin.portelli@me.com>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-See the full license in the file "LICENSE" in the top level distribution directory
-*************************************************************************************/
-/*  END LEGAL */
-
-#ifndef Hadrons_DWF_hpp_
-#define Hadrons_DWF_hpp_
-
-#include <Grid/Hadrons/Global.hpp>
-#include <Grid/Hadrons/Module.hpp>
-#include <Grid/Hadrons/ModuleFactory.hpp>
-
-BEGIN_HADRONS_NAMESPACE
-
-/******************************************************************************
- *                     Domain wall quark action                               *
- ******************************************************************************/
-BEGIN_MODULE_NAMESPACE(MAction)
-
-class DWFPar: Serializable
-{
-public:
-    GRID_SERIALIZABLE_CLASS_MEMBERS(DWFPar,
-                                    std::string, gauge,
-                                    unsigned int, Ls,
-                                    double      , mass,
-                                    double      , M5);
-};
-
-template <typename FImpl>
-class TDWF: public Module<DWFPar>
-{
-public:
-    TYPE_ALIASES(FImpl,);
-public:
-    // constructor
-    TDWF(const std::string name);
-    // destructor
-    virtual ~TDWF(void) = default;
-    // dependency relation
-    virtual std::vector<std::string> getInput(void);
-    virtual std::vector<std::string> getOutput(void);
-    // setup
-    virtual void setup(void);
-    // execution
-    virtual void execute(void);
-};
-
-MODULE_REGISTER_NS(DWF, TDWF<FIMPL>, MAction);
-
-/******************************************************************************
- *                        DWF template implementation                         *
- ******************************************************************************/
-// constructor /////////////////////////////////////////////////////////////////
-template <typename FImpl>
-TDWF<FImpl>::TDWF(const std::string name)
-: Module<DWFPar>(name)
-{}
-
-// dependencies/products ///////////////////////////////////////////////////////
-template <typename FImpl>
-std::vector<std::string> TDWF<FImpl>::getInput(void)
-{
-    std::vector<std::string> in = {par().gauge};
-    
-    return in;
-}
-
-template <typename FImpl>
-std::vector<std::string> TDWF<FImpl>::getOutput(void)
-{
-    std::vector<std::string> out = {getName()};
-    
-    return out;
-}
-
-// setup ///////////////////////////////////////////////////////////////////////
-template <typename FImpl>
-void TDWF<FImpl>::setup(void)
-{
-    unsigned int size;
-    
-    size = 2*env().template lattice4dSize<typename FImpl::DoubledGaugeField>();
-    env().registerObject(getName(), size, par().Ls);
-}
-
-// execution ///////////////////////////////////////////////////////////////////
-template <typename FImpl>
-void TDWF<FImpl>::execute(void)
-{
-    LOG(Message) << "Setting up domain wall fermion matrix with m= "
-                 << par().mass << ", M5= " << par().M5 << " and Ls= "
-                 << par().Ls << " using gauge field '" << par().gauge << "'"
-                 << std::endl;
-    env().createGrid(par().Ls);
-    auto &U      = *env().template getObject<LatticeGaugeField>(par().gauge);
-    auto &g4     = *env().getGrid();
-    auto &grb4   = *env().getRbGrid();
-    auto &g5     = *env().getGrid(par().Ls);
-    auto &grb5   = *env().getRbGrid(par().Ls);
-    FMat *fMatPt = new DomainWallFermion<FImpl>(U, g5, grb5, g4, grb4,
-                                                par().mass, par().M5);
-    env().setObject(getName(), fMatPt);
-}
-
-END_MODULE_NAMESPACE
-
-END_HADRONS_NAMESPACE
-
-#endif // Hadrons_DWF_hpp_
@@ -1,126 +0,0 @@
-/*************************************************************************************
-
-Grid physics library, www.github.com/paboyle/Grid 
-
-Source file: extras/Hadrons/Modules/MAction/Wilson.hpp
-
-Copyright (C) 2015
-Copyright (C) 2016
-
-Author: Antonin Portelli <antonin.portelli@me.com>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-See the full license in the file "LICENSE" in the top level distribution directory
-*************************************************************************************/
-/*  END LEGAL */
-
-#ifndef Hadrons_Wilson_hpp_
-#define Hadrons_Wilson_hpp_
-
-#include <Grid/Hadrons/Global.hpp>
-#include <Grid/Hadrons/Module.hpp>
-#include <Grid/Hadrons/ModuleFactory.hpp>
-
-BEGIN_HADRONS_NAMESPACE
-
-/******************************************************************************
- *                            TWilson quark action                            *
- ******************************************************************************/
-BEGIN_MODULE_NAMESPACE(MAction)
-
-class WilsonPar: Serializable
-{
-public:
-    GRID_SERIALIZABLE_CLASS_MEMBERS(WilsonPar,
-                                    std::string, gauge,
-                                    double     , mass);
-};
-
-template <typename FImpl>
-class TWilson: public Module<WilsonPar>
-{
-public:
-    TYPE_ALIASES(FImpl,);
-public:
-    // constructor
-    TWilson(const std::string name);
-    // destructor
-    virtual ~TWilson(void) = default;
-    // dependencies/products
-    virtual std::vector<std::string> getInput(void);
-    virtual std::vector<std::string> getOutput(void);
-    // setup
-    virtual void setup(void);
-    // execution
-    virtual void execute(void);
-};
-
-MODULE_REGISTER_NS(Wilson, TWilson<FIMPL>, MAction);
-
-/******************************************************************************
- *                     TWilson template implementation                        *
- ******************************************************************************/
-// constructor /////////////////////////////////////////////////////////////////
-template <typename FImpl>
-TWilson<FImpl>::TWilson(const std::string name)
-: Module<WilsonPar>(name)
-{}
-
-// dependencies/products ///////////////////////////////////////////////////////
-template <typename FImpl>
-std::vector<std::string> TWilson<FImpl>::getInput(void)
-{
-    std::vector<std::string> in = {par().gauge};
-    
-    return in;
-}
-
-template <typename FImpl>
-std::vector<std::string> TWilson<FImpl>::getOutput(void)
-{
-    std::vector<std::string> out = {getName()};
-    
-    return out;
-}
-
-// setup ///////////////////////////////////////////////////////////////////////
-template <typename FImpl>
-void TWilson<FImpl>::setup(void)
-{
-    unsigned int size;
-    
-    size = 2*env().template lattice4dSize<typename FImpl::DoubledGaugeField>();
-    env().registerObject(getName(), size);
-}
-
-// execution ///////////////////////////////////////////////////////////////////
-template <typename FImpl>
-void TWilson<FImpl>::execute()
-{
-    LOG(Message) << "Setting up TWilson fermion matrix with m= " << par().mass
-                 << " using gauge field '" << par().gauge << "'" << std::endl;
-    auto &U      = *env().template getObject<LatticeGaugeField>(par().gauge);
-    auto &grid   = *env().getGrid();
-    auto &gridRb = *env().getRbGrid();
-    FMat *fMatPt = new WilsonFermion<FImpl>(U, grid, gridRb, par().mass);
-    env().setObject(getName(), fMatPt);
-}
-
-END_MODULE_NAMESPACE
-
-END_HADRONS_NAMESPACE
-
-#endif // Hadrons_Wilson_hpp_
@@ -1,131 +0,0 @@
-/*************************************************************************************
-
-Grid physics library, www.github.com/paboyle/Grid 
-
-Source file: extras/Hadrons/Modules/MContraction/Baryon.hpp
-
-Copyright (C) 2015
-Copyright (C) 2016
-
-Author: Antonin Portelli <antonin.portelli@me.com>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-See the full license in the file "LICENSE" in the top level distribution directory
-*************************************************************************************/
-/*  END LEGAL */
-
-#ifndef Hadrons_Baryon_hpp_
-#define Hadrons_Baryon_hpp_
-
-#include <Grid/Hadrons/Global.hpp>
-#include <Grid/Hadrons/Module.hpp>
-#include <Grid/Hadrons/ModuleFactory.hpp>
-
-BEGIN_HADRONS_NAMESPACE
-
-/******************************************************************************
- *                               Baryon                                       *
- ******************************************************************************/
-BEGIN_MODULE_NAMESPACE(MContraction)
-
-class BaryonPar: Serializable
-{
-public:
-    GRID_SERIALIZABLE_CLASS_MEMBERS(BaryonPar,
-                                    std::string, q1,
-                                    std::string, q2,
-                                    std::string, q3,
-                                    std::string, output);
-};
-
-template <typename FImpl1, typename FImpl2, typename FImpl3>
-class TBaryon: public Module<BaryonPar>
-{
-public:
-    TYPE_ALIASES(FImpl1, 1);
-    TYPE_ALIASES(FImpl2, 2);
-    TYPE_ALIASES(FImpl3, 3);
-    class Result: Serializable
-    {
-    public:
-        GRID_SERIALIZABLE_CLASS_MEMBERS(Result,
-                                        std::vector<std::vector<std::vector<Complex>>>, corr);
-    };
-public:
-    // constructor
-    TBaryon(const std::string name);
-    // destructor
-    virtual ~TBaryon(void) = default;
-    // dependency relation
-    virtual std::vector<std::string> getInput(void);
-    virtual std::vector<std::string> getOutput(void);
-    // execution
-    virtual void execute(void);
-};
-
-MODULE_REGISTER_NS(Baryon, ARG(TBaryon<FIMPL, FIMPL, FIMPL>), MContraction);
-
-/******************************************************************************
- *                         TBaryon implementation                             *
- ******************************************************************************/
-// constructor /////////////////////////////////////////////////////////////////
-template <typename FImpl1, typename FImpl2, typename FImpl3>
-TBaryon<FImpl1, FImpl2, FImpl3>::TBaryon(const std::string name)
-: Module<BaryonPar>(name)
-{}
-
-// dependencies/products ///////////////////////////////////////////////////////
-template <typename FImpl1, typename FImpl2, typename FImpl3>
-std::vector<std::string> TBaryon<FImpl1, FImpl2, FImpl3>::getInput(void)
-{
-    std::vector<std::string> input = {par().q1, par().q2, par().q3};
-    
-    return input;
-}
-
-template <typename FImpl1, typename FImpl2, typename FImpl3>
-std::vector<std::string> TBaryon<FImpl1, FImpl2, FImpl3>::getOutput(void)
-{
-    std::vector<std::string> out = {getName()};
-    
-    return out;
-}
-
-// execution ///////////////////////////////////////////////////////////////////
-template <typename FImpl1, typename FImpl2, typename FImpl3>
-void TBaryon<FImpl1, FImpl2, FImpl3>::execute(void)
-{
-    LOG(Message) << "Computing baryon contractions '" << getName() << "' using"
-                 << " quarks '" << par().q1 << "', '" << par().q2 << "', and '"
-                 << par().q3 << "'" << std::endl;
-    
-    XmlWriter             writer(par().output);
-    PropagatorField1      &q1 = *env().template getObject<PropagatorField1>(par().q1);
-    PropagatorField2      &q2 = *env().template getObject<PropagatorField2>(par().q2);
-    PropagatorField3      &q3 = *env().template getObject<PropagatorField3>(par().q2);
-    LatticeComplex        c(env().getGrid());
-    Result                result;
-    
-    // FIXME: do contractions
-    
-    write(writer, "meson", result);
-}
-
-END_MODULE_NAMESPACE
-
-END_HADRONS_NAMESPACE
-
-#endif // Hadrons_Baryon_hpp_
@@ -1,137 +0,0 @@
-/*************************************************************************************
-
-Grid physics library, www.github.com/paboyle/Grid 
-
-Source file: extras/Hadrons/Modules/MContraction/Meson.hpp
-
-Copyright (C) 2015
-Copyright (C) 2016
-
-Author: Antonin Portelli <antonin.portelli@me.com>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-See the full license in the file "LICENSE" in the top level distribution directory
-*************************************************************************************/
-/*  END LEGAL */
-
-#ifndef Hadrons_Meson_hpp_
-#define Hadrons_Meson_hpp_
-
-#include <Grid/Hadrons/Global.hpp>
-#include <Grid/Hadrons/Module.hpp>
-#include <Grid/Hadrons/ModuleFactory.hpp>
-
-BEGIN_HADRONS_NAMESPACE
-
-/******************************************************************************
- *                                TMeson                                       *
- ******************************************************************************/
-BEGIN_MODULE_NAMESPACE(MContraction)
-
-class MesonPar: Serializable
-{
-public:
-    GRID_SERIALIZABLE_CLASS_MEMBERS(MesonPar,
-                                    std::string,    q1,
-                                    std::string,    q2,
-                                    std::string,    output,
-                                    Gamma::Algebra, gammaSource,
-                                    Gamma::Algebra, gammaSink);
-};
-
-template <typename FImpl1, typename FImpl2>
-class TMeson: public Module<MesonPar>
-{
-public:
-    TYPE_ALIASES(FImpl1, 1);
-    TYPE_ALIASES(FImpl2, 2);
-    class Result: Serializable
-    {
-    public:
-        GRID_SERIALIZABLE_CLASS_MEMBERS(Result, std::vector<Complex>, corr);
-    };
-public:
-    // constructor
-    TMeson(const std::string name);
-    // destructor
-    virtual ~TMeson(void) = default;
-    // dependencies/products
-    virtual std::vector<std::string> getInput(void);
-    virtual std::vector<std::string> getOutput(void);
-    // execution
-    virtual void execute(void);
-};
-
-MODULE_REGISTER_NS(Meson, ARG(TMeson<FIMPL, FIMPL>), MContraction);
-
-/******************************************************************************
- *                           TMeson implementation                            *
- ******************************************************************************/
-// constructor /////////////////////////////////////////////////////////////////
-template <typename FImpl1, typename FImpl2>
-TMeson<FImpl1, FImpl2>::TMeson(const std::string name)
-: Module<MesonPar>(name)
-{}
-
-// dependencies/products ///////////////////////////////////////////////////////
-template <typename FImpl1, typename FImpl2>
-std::vector<std::string> TMeson<FImpl1, FImpl2>::getInput(void)
-{
-    std::vector<std::string> input = {par().q1, par().q2};
-    
-    return input;
-}
-
-template <typename FImpl1, typename FImpl2>
-std::vector<std::string> TMeson<FImpl1, FImpl2>::getOutput(void)
-{
-    std::vector<std::string> output = {getName()};
-    
-    return output;
-}
-
-// execution ///////////////////////////////////////////////////////////////////
-template <typename FImpl1, typename FImpl2>
-void TMeson<FImpl1, FImpl2>::execute(void)
-{
-    LOG(Message) << "Computing meson contractions '" << getName() << "' using"
-                 << " quarks '" << par().q1 << "' and '" << par().q2 << "'"
-                 << std::endl;
-    
-    XmlWriter             writer(par().output);
-    PropagatorField1      &q1 = *env().template getObject<PropagatorField1>(par().q1);
-    PropagatorField2      &q2 = *env().template getObject<PropagatorField2>(par().q2);
-    LatticeComplex        c(env().getGrid());
-    Gamma                 gSrc(par().gammaSource), gSnk(par().gammaSink);
-    Gamma                 g5(Gamma::Algebra::Gamma5);
-    std::vector<TComplex> buf;
-    Result                result;
-    
-    c = trace(gSnk*q1*adj(gSrc)*g5*adj(q2)*g5);
-    sliceSum(c, buf, Tp);
-    result.corr.resize(buf.size());
-    for (unsigned int t = 0; t < buf.size(); ++t)
-    {
-        result.corr[t] = TensorRemove(buf[t]);
-    }
-    write(writer, "meson", result);
-}
-
-END_MODULE_NAMESPACE
-
-END_HADRONS_NAMESPACE
-
-#endif // Hadrons_Meson_hpp_
@@ -1,78 +0,0 @@
-/*************************************************************************************
-
-Grid physics library, www.github.com/paboyle/Grid 
-
-Source file: extras/Hadrons/Modules/MGauge/Load.cc
-
-Copyright (C) 2015
-Copyright (C) 2016
-
-Author: Antonin Portelli <antonin.portelli@me.com>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-See the full license in the file "LICENSE" in the top level distribution directory
-*************************************************************************************/
-/*  END LEGAL */
-
-#include <Grid/Hadrons/Modules/MGauge/Load.hpp>
-
-using namespace Grid;
-using namespace Hadrons;
-using namespace MGauge;
-
-/******************************************************************************
-*                           TLoad implementation                               *
-******************************************************************************/
-// constructor /////////////////////////////////////////////////////////////////
-TLoad::TLoad(const std::string name)
-: Module<LoadPar>(name)
-{}
-
-// dependencies/products ///////////////////////////////////////////////////////
-std::vector<std::string> TLoad::getInput(void)
-{
-    std::vector<std::string> in;
-    
-    return in;
-}
-
-std::vector<std::string> TLoad::getOutput(void)
-{
-    std::vector<std::string> out = {getName()};
-    
-    return out;
-}
-
-// setup ///////////////////////////////////////////////////////////////////////
-void TLoad::setup(void)
-{
-    env().registerLattice<LatticeGaugeField>(getName());
-}
-
-// execution ///////////////////////////////////////////////////////////////////
-void TLoad::execute(void)
-{
-    NerscField  header;
-    std::string fileName = par().file + "."
-                           + std::to_string(env().getTrajectory());
-    
-    LOG(Message) << "Loading NERSC configuration from file '" << fileName
-                 << "'" << std::endl;
-    LatticeGaugeField &U = *env().createLattice<LatticeGaugeField>(getName());
-    NerscIO::readConfiguration(U, header, fileName);
-    LOG(Message) << "NERSC header:" << std::endl;
-    dump_nersc_header(header, LOG(Message));
-}
@@ -1,73 +0,0 @@
-/*************************************************************************************
-
-Grid physics library, www.github.com/paboyle/Grid 
-
-Source file: extras/Hadrons/Modules/MGauge/Load.hpp
-
-Copyright (C) 2015
-Copyright (C) 2016
-
-Author: Antonin Portelli <antonin.portelli@me.com>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-See the full license in the file "LICENSE" in the top level distribution directory
-*************************************************************************************/
-/*  END LEGAL */
-
-#ifndef Hadrons_Load_hpp_
-#define Hadrons_Load_hpp_
-
-#include <Grid/Hadrons/Global.hpp>
-#include <Grid/Hadrons/Module.hpp>
-#include <Grid/Hadrons/ModuleFactory.hpp>
-
-BEGIN_HADRONS_NAMESPACE
-
-/******************************************************************************
- *                         Load a NERSC configuration                         *
- ******************************************************************************/
-BEGIN_MODULE_NAMESPACE(MGauge)
-
-class LoadPar: Serializable
-{
-public:
-    GRID_SERIALIZABLE_CLASS_MEMBERS(LoadPar,
-                                    std::string, file);
-};
-
-class TLoad: public Module<LoadPar>
-{
-public:
-    // constructor
-    TLoad(const std::string name);
-    // destructor
-    virtual ~TLoad(void) = default;
-    // dependency relation
-    virtual std::vector<std::string> getInput(void);
-    virtual std::vector<std::string> getOutput(void);
-    // setup
-    virtual void setup(void);
-    // execution
-    virtual void execute(void);
-};
-
-MODULE_REGISTER_NS(Load, TLoad, MGauge);
-
-END_MODULE_NAMESPACE
-
-END_HADRONS_NAMESPACE
-
-#endif // Hadrons_Load_hpp_
@@ -1,69 +0,0 @@
-/*************************************************************************************
-
-Grid physics library, www.github.com/paboyle/Grid 
-
-Source file: extras/Hadrons/Modules/MGauge/Random.cc
-
-Copyright (C) 2015
-Copyright (C) 2016
-
-Author: Antonin Portelli <antonin.portelli@me.com>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-See the full license in the file "LICENSE" in the top level distribution directory
-*************************************************************************************/
-/*  END LEGAL */
-
-#include <Grid/Hadrons/Modules/MGauge/Random.hpp>
-
-using namespace Grid;
-using namespace Hadrons;
-using namespace MGauge;
-
-/******************************************************************************
-*                           TRandom implementation                            *
-******************************************************************************/
-// constructor /////////////////////////////////////////////////////////////////
-TRandom::TRandom(const std::string name)
-: Module<NoPar>(name)
-{}
-
-// dependencies/products ///////////////////////////////////////////////////////
-std::vector<std::string> TRandom::getInput(void)
-{
-    return std::vector<std::string>();
-}
-
-std::vector<std::string> TRandom::getOutput(void)
-{
-    std::vector<std::string> out = {getName()};
-    
-    return out;
-}
-
-// setup ///////////////////////////////////////////////////////////////////////
-void TRandom::setup(void)
-{
-    env().registerLattice<LatticeGaugeField>(getName());
-}
-
-// execution ///////////////////////////////////////////////////////////////////
-void TRandom::execute(void)
-{
-    LOG(Message) << "Generating random gauge configuration" << std::endl;
-    LatticeGaugeField &U = *env().createLattice<LatticeGaugeField>(getName());
-    SU3::HotConfiguration(*env().get4dRng(), U);
-}
@@ -1,66 +0,0 @@
-/*************************************************************************************
-
-Grid physics library, www.github.com/paboyle/Grid 
-
-Source file: extras/Hadrons/Modules/MGauge/Random.hpp
-
-Copyright (C) 2015
-Copyright (C) 2016
-
-Author: Antonin Portelli <antonin.portelli@me.com>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-See the full license in the file "LICENSE" in the top level distribution directory
-*************************************************************************************/
-/*  END LEGAL */
-
-#ifndef Hadrons_Random_hpp_
-#define Hadrons_Random_hpp_
-
-#include <Grid/Hadrons/Global.hpp>
-#include <Grid/Hadrons/Module.hpp>
-#include <Grid/Hadrons/ModuleFactory.hpp>
-
-BEGIN_HADRONS_NAMESPACE
-
-/******************************************************************************
- *                             Random gauge                                   *
- ******************************************************************************/
-BEGIN_MODULE_NAMESPACE(MGauge)
-
-class TRandom: public Module<NoPar>
-{
-public:
-    // constructor
-    TRandom(const std::string name);
-    // destructor
-    virtual ~TRandom(void) = default;
-    // dependency relation
-    virtual std::vector<std::string> getInput(void);
-    virtual std::vector<std::string> getOutput(void);
-    // setup
-    virtual void setup(void);
-    // execution
-    virtual void execute(void);
-};
-
-MODULE_REGISTER_NS(Random, TRandom, MGauge);
-
-END_MODULE_NAMESPACE
-
-END_HADRONS_NAMESPACE
-
-#endif // Hadrons_Random_hpp_
@@ -1,69 +0,0 @@
-/*************************************************************************************
-
-Grid physics library, www.github.com/paboyle/Grid 
-
-Source file: extras/Hadrons/Modules/MGauge/Unit.cc
-
-Copyright (C) 2015
-Copyright (C) 2016
-
-Author: Antonin Portelli <antonin.portelli@me.com>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-See the full license in the file "LICENSE" in the top level distribution directory
-*************************************************************************************/
-/*  END LEGAL */
-
-#include <Grid/Hadrons/Modules/MGauge/Unit.hpp>
-
-using namespace Grid;
-using namespace Hadrons;
-using namespace MGauge;
-
-/******************************************************************************
-*                            TUnit implementation                             *
-******************************************************************************/
-// constructor /////////////////////////////////////////////////////////////////
-TUnit::TUnit(const std::string name)
-: Module<NoPar>(name)
-{}
-
-// dependencies/products ///////////////////////////////////////////////////////
-std::vector<std::string> TUnit::getInput(void)
-{
-    return std::vector<std::string>();
-}
-
-std::vector<std::string> TUnit::getOutput(void)
-{
-    std::vector<std::string> out = {getName()};
-    
-    return out;
-}
-
-// setup ///////////////////////////////////////////////////////////////////////
-void TUnit::setup(void)
-{
-    env().registerLattice<LatticeGaugeField>(getName());
-}
-
-// execution ///////////////////////////////////////////////////////////////////
-void TUnit::execute(void)
-{
-    LOG(Message) << "Creating unit gauge configuration" << std::endl;
-    LatticeGaugeField &U = *env().createLattice<LatticeGaugeField>(getName());
-    SU3::ColdConfiguration(*env().get4dRng(), U);
-}
@@ -1,66 +0,0 @@
-/*************************************************************************************
-
-Grid physics library, www.github.com/paboyle/Grid 
-
-Source file: extras/Hadrons/Modules/MGauge/Unit.hpp
-
-Copyright (C) 2015
-Copyright (C) 2016
-
-Author: Antonin Portelli <antonin.portelli@me.com>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-See the full license in the file "LICENSE" in the top level distribution directory
-*************************************************************************************/
-/*  END LEGAL */
-
-#ifndef Hadrons_Unit_hpp_
-#define Hadrons_Unit_hpp_
-
-#include <Grid/Hadrons/Global.hpp>
-#include <Grid/Hadrons/Module.hpp>
-#include <Grid/Hadrons/ModuleFactory.hpp>
-
-BEGIN_HADRONS_NAMESPACE
-
-/******************************************************************************
- *                              Unit gauge                                    *
- ******************************************************************************/
-BEGIN_MODULE_NAMESPACE(MGauge)
-
-class TUnit: public Module<NoPar>
-{
-public:
-    // constructor
-    TUnit(const std::string name);
-    // destructor
-    virtual ~TUnit(void) = default;
-    // dependencies/products
-    virtual std::vector<std::string> getInput(void);
-    virtual std::vector<std::string> getOutput(void);
-    // setup
-    virtual void setup(void);
-    // execution
-    virtual void execute(void);
-};
-
-MODULE_REGISTER_NS(Unit, TUnit, MGauge);
-
-END_MODULE_NAMESPACE
-
-END_HADRONS_NAMESPACE
-
-#endif // Hadrons_Unit_hpp_
@@ -1,132 +0,0 @@
-/*************************************************************************************
-
-Grid physics library, www.github.com/paboyle/Grid 
-
-Source file: extras/Hadrons/Modules/MSolver/RBPrecCG.hpp
-
-Copyright (C) 2015
-Copyright (C) 2016
-
-Author: Antonin Portelli <antonin.portelli@me.com>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-See the full license in the file "LICENSE" in the top level distribution directory
-*************************************************************************************/
-/*  END LEGAL */
-
-#ifndef Hadrons_RBPrecCG_hpp_
-#define Hadrons_RBPrecCG_hpp_
-
-#include <Grid/Hadrons/Global.hpp>
-#include <Grid/Hadrons/Module.hpp>
-#include <Grid/Hadrons/ModuleFactory.hpp>
-
-BEGIN_HADRONS_NAMESPACE
-
-/******************************************************************************
- *                     Schur red-black preconditioned CG                      *
- ******************************************************************************/
-BEGIN_MODULE_NAMESPACE(MSolver)
-
-class RBPrecCGPar: Serializable
-{
-public:
-    GRID_SERIALIZABLE_CLASS_MEMBERS(RBPrecCGPar,
-                                    std::string, action,
-                                    double     , residual);
-};
-
-template <typename FImpl>
-class TRBPrecCG: public Module<RBPrecCGPar>
-{
-public:
-    TYPE_ALIASES(FImpl,);
-public:
-    // constructor
-    TRBPrecCG(const std::string name);
-    // destructor
-    virtual ~TRBPrecCG(void) = default;
-    // dependencies/products
-    virtual std::vector<std::string> getInput(void);
-    virtual std::vector<std::string> getOutput(void);
-    // setup
-    virtual void setup(void);
-    // execution
-    virtual void execute(void);
-};
-
-MODULE_REGISTER_NS(RBPrecCG, TRBPrecCG<FIMPL>, MSolver);
-
-/******************************************************************************
- *                      TRBPrecCG template implementation                     *
- ******************************************************************************/
-// constructor /////////////////////////////////////////////////////////////////
-template <typename FImpl>
-TRBPrecCG<FImpl>::TRBPrecCG(const std::string name)
-: Module(name)
-{}
-
-// dependencies/products ///////////////////////////////////////////////////////
-template <typename FImpl>
-std::vector<std::string> TRBPrecCG<FImpl>::getInput(void)
-{
-    std::vector<std::string> in = {par().action};
-    
-    return in;
-}
-
-template <typename FImpl>
-std::vector<std::string> TRBPrecCG<FImpl>::getOutput(void)
-{
-    std::vector<std::string> out = {getName()};
-    
-    return out;
-}
-
-// setup ///////////////////////////////////////////////////////////////////////
-template <typename FImpl>
-void TRBPrecCG<FImpl>::setup(void)
-{
-    auto Ls = env().getObjectLs(par().action);
-    
-    env().registerObject(getName(), 0, Ls);
-    env().addOwnership(getName(), par().action);
-}
-
-// execution ///////////////////////////////////////////////////////////////////
-template <typename FImpl>
-void TRBPrecCG<FImpl>::execute(void)
-{
-    auto &mat   = *(env().template getObject<FMat>(par().action));
-    auto solver = [&mat, this](FermionField &sol, const FermionField &source)
-    {
-        ConjugateGradient<FermionField>           cg(par().residual, 10000);
-        SchurRedBlackDiagMooeeSolve<FermionField> schurSolver(cg);
-        
-        schurSolver(mat, source, sol);
-    };
-    
-    LOG(Message) << "setting up Schur red-black preconditioned CG for"
-                 << " action '" << par().action << "' with residual "
-                 << par().residual << std::endl;
-    env().setObject(getName(), new SolverFn(solver));
-}
-
-END_MODULE_NAMESPACE
-
-END_HADRONS_NAMESPACE
-
-#endif // Hadrons_RBPrecCG_hpp_
@@ -1,135 +0,0 @@
-/*************************************************************************************
-
-Grid physics library, www.github.com/paboyle/Grid 
-
-Source file: extras/Hadrons/Modules/MSource/Point.hpp
-
-Copyright (C) 2015
-Copyright (C) 2016
-
-Author: Antonin Portelli <antonin.portelli@me.com>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-See the full license in the file "LICENSE" in the top level distribution directory
-*************************************************************************************/
-/*  END LEGAL */
-
-#ifndef Hadrons_Point_hpp_
-#define Hadrons_Point_hpp_
-
-#include <Grid/Hadrons/Global.hpp>
-#include <Grid/Hadrons/Module.hpp>
-#include <Grid/Hadrons/ModuleFactory.hpp>
-
-BEGIN_HADRONS_NAMESPACE
-
-/*
- 
- Point source
- ------------
- * src_x = delta_x,position
- 
- * options:
- - position: space-separated integer sequence (e.g. "0 1 1 0")
- 
- */
-
-/******************************************************************************
- *                                  TPoint                                     *
- ******************************************************************************/
-BEGIN_MODULE_NAMESPACE(MSource)
-
-class PointPar: Serializable
-{
-public:
-    GRID_SERIALIZABLE_CLASS_MEMBERS(PointPar,
-                                    std::string, position);
-};
-
-template <typename FImpl>
-class TPoint: public Module<PointPar>
-{
-public:
-    TYPE_ALIASES(FImpl,);
-public:
-    // constructor
-    TPoint(const std::string name);
-    // destructor
-    virtual ~TPoint(void) = default;
-    // dependency relation
-    virtual std::vector<std::string> getInput(void);
-    virtual std::vector<std::string> getOutput(void);
-    // setup
-    virtual void setup(void);
-    // execution
-    virtual void execute(void);
-};
-
-MODULE_REGISTER_NS(Point, TPoint<FIMPL>, MSource);
-
-/******************************************************************************
- *                       TPoint template implementation                       *
- ******************************************************************************/
-// constructor /////////////////////////////////////////////////////////////////
-template <typename FImpl>
-TPoint<FImpl>::TPoint(const std::string name)
-: Module<PointPar>(name)
-{}
-
-// dependencies/products ///////////////////////////////////////////////////////
-template <typename FImpl>
-std::vector<std::string> TPoint<FImpl>::getInput(void)
-{
-    std::vector<std::string> in;
-    
-    return in;
-}
-
-template <typename FImpl>
-std::vector<std::string> TPoint<FImpl>::getOutput(void)
-{
-    std::vector<std::string> out = {getName()};
-    
-    return out;
-}
-
-// setup ///////////////////////////////////////////////////////////////////////
-template <typename FImpl>
-void TPoint<FImpl>::setup(void)
-{
-    env().template registerLattice<PropagatorField>(getName());
-}
-
-// execution ///////////////////////////////////////////////////////////////////
-template <typename FImpl>
-void TPoint<FImpl>::execute(void)
-{
-    std::vector<int> position = strToVec<int>(par().position);
-    typename SitePropagator::scalar_object id;
-    
-    LOG(Message) << "Creating point source at position [" << par().position
-                 << "]" << std::endl;
-    PropagatorField &src = *env().template createLattice<PropagatorField>(getName());
-    id  = 1.;
-    src = zero;
-    pokeSite(id, src, position);
-}
-
-END_MODULE_NAMESPACE
-
-END_HADRONS_NAMESPACE
-
-#endif // Hadrons_Point_hpp_
@@ -1,163 +0,0 @@
-/*************************************************************************************
-
-Grid physics library, www.github.com/paboyle/Grid 
-
-Source file: extras/Hadrons/Modules/MSource/SeqGamma.hpp
-
-Copyright (C) 2015
-Copyright (C) 2016
-
-Author: Antonin Portelli <antonin.portelli@me.com>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-See the full license in the file "LICENSE" in the top level distribution directory
-*************************************************************************************/
-/*  END LEGAL */
-
-#ifndef Hadrons_SeqGamma_hpp_
-#define Hadrons_SeqGamma_hpp_
-
-#include <Grid/Hadrons/Global.hpp>
-#include <Grid/Hadrons/Module.hpp>
-#include <Grid/Hadrons/ModuleFactory.hpp>
-
-BEGIN_HADRONS_NAMESPACE
-
-/*
- 
- Sequential source
- -----------------------------
- * src_x = q_x * theta(x_3 - tA) * theta(tB - x_3) * gamma * exp(i x.mom)
- 
- * options:
- - q: input propagator (string)
- - tA: begin timeslice (integer)
- - tB: end timesilce (integer)
- - gamma: gamma product to insert (integer)
- - mom: momentum insertion, space-separated float sequence (e.g ".1 .2 1. 0.")
- 
- */
-
-/******************************************************************************
- *                         SeqGamma                                 *
- ******************************************************************************/
-BEGIN_MODULE_NAMESPACE(MSource)
-
-class SeqGammaPar: Serializable
-{
-public:
-    GRID_SERIALIZABLE_CLASS_MEMBERS(SeqGammaPar,
-                                    std::string,    q,
-                                    unsigned int,   tA,
-                                    unsigned int,   tB,
-                                    Gamma::Algebra, gamma,
-                                    std::string,    mom);
-};
-
-template <typename FImpl>
-class TSeqGamma: public Module<SeqGammaPar>
-{
-public:
-    TYPE_ALIASES(FImpl,);
-public:
-    // constructor
-    TSeqGamma(const std::string name);
-    // destructor
-    virtual ~TSeqGamma(void) = default;
-    // dependency relation
-    virtual std::vector<std::string> getInput(void);
-    virtual std::vector<std::string> getOutput(void);
-    // setup
-    virtual void setup(void);
-    // execution
-    virtual void execute(void);
-};
-
-MODULE_REGISTER_NS(SeqGamma, TSeqGamma<FIMPL>, MSource);
-
-/******************************************************************************
- *                         TSeqGamma implementation                           *
- ******************************************************************************/
-// constructor /////////////////////////////////////////////////////////////////
-template <typename FImpl>
-TSeqGamma<FImpl>::TSeqGamma(const std::string name)
-: Module<SeqGammaPar>(name)
-{}
-
-// dependencies/products ///////////////////////////////////////////////////////
-template <typename FImpl>
-std::vector<std::string> TSeqGamma<FImpl>::getInput(void)
-{
-    std::vector<std::string> in = {par().q};
-    
-    return in;
-}
-
-template <typename FImpl>
-std::vector<std::string> TSeqGamma<FImpl>::getOutput(void)
-{
-    std::vector<std::string> out = {getName()};
-    
-    return out;
-}
-
-// setup ///////////////////////////////////////////////////////////////////////
-template <typename FImpl>
-void TSeqGamma<FImpl>::setup(void)
-{
-    env().template registerLattice<PropagatorField>(getName());
-}
-
-// execution ///////////////////////////////////////////////////////////////////
-template <typename FImpl>
-void TSeqGamma<FImpl>::execute(void)
-{
-    if (par().tA == par().tB)
-    {
-        LOG(Message) << "Generating gamma_" << par().gamma
-                     << " sequential source at t= " << par().tA << std::endl;
-    }
-    else
-    {
-        LOG(Message) << "Generating gamma_" << par().gamma
-                     << " sequential source for "
-                     << par().tA << " <= t <= " << par().tB << std::endl;
-    }
-    PropagatorField &src = *env().template createLattice<PropagatorField>(getName());
-    PropagatorField &q   = *env().template getObject<PropagatorField>(par().q);
-    Lattice<iScalar<vInteger>> t(env().getGrid());
-    LatticeComplex             ph(env().getGrid()), coor(env().getGrid());
-    Gamma                      g(par().gamma);
-    std::vector<Real>          p;
-    Complex                    i(0.0,1.0);
-    
-    p  = strToVec<Real>(par().mom);
-    ph = zero;
-    for(unsigned int mu = 0; mu < env().getNd(); mu++)
-    {
-        LatticeCoordinate(coor, mu);
-        ph = ph + p[mu]*coor;
-    }
-    ph = exp(i*ph);
-    LatticeCoordinate(t, Tp);
-    src = where((t >= par().tA) and (t <= par().tB), ph*(g*q), 0.*q);
-}
-
-END_MODULE_NAMESPACE
-
-END_HADRONS_NAMESPACE
-
-#endif // Hadrons_SeqGamma_hpp_
@@ -1,151 +0,0 @@
-/*************************************************************************************
-
-Grid physics library, www.github.com/paboyle/Grid 
-
-Source file: extras/Hadrons/Modules/MSource/Z2.hpp
-
-Copyright (C) 2015
-Copyright (C) 2016
-
-Author: Antonin Portelli <antonin.portelli@me.com>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-See the full license in the file "LICENSE" in the top level distribution directory
-*************************************************************************************/
-/*  END LEGAL */
-
-#ifndef Hadrons_Z2_hpp_
-#define Hadrons_Z2_hpp_
-
-#include <Grid/Hadrons/Global.hpp>
-#include <Grid/Hadrons/Module.hpp>
-#include <Grid/Hadrons/ModuleFactory.hpp>
-
-BEGIN_HADRONS_NAMESPACE
-
-/*
- 
- Z_2 stochastic source
- -----------------------------
- * src_x = eta_x * theta(x_3 - tA) * theta(tB - x_3)
- 
- the eta_x are independent uniform random numbers in {+/- 1 +/- i}
- 
- * options:
- - tA: begin timeslice (integer)
- - tB: end timesilce (integer)
- 
- */
- 
-/******************************************************************************
- *                          Z2 stochastic source                              *
- ******************************************************************************/
-BEGIN_MODULE_NAMESPACE(MSource)
-
-class Z2Par: Serializable
-{
-public:
-    GRID_SERIALIZABLE_CLASS_MEMBERS(Z2Par,
-                                    unsigned int, tA,
-                                    unsigned int, tB);
-};
-
-template <typename FImpl>
-class TZ2: public Module<Z2Par>
-{
-public:
-    TYPE_ALIASES(FImpl,);
-public:
-    // constructor
-    TZ2(const std::string name);
-    // destructor
-    virtual ~TZ2(void) = default;
-    // dependency relation
-    virtual std::vector<std::string> getInput(void);
-    virtual std::vector<std::string> getOutput(void);
-    // setup
-    virtual void setup(void);
-    // execution
-    virtual void execute(void);
-};
-
-MODULE_REGISTER_NS(Z2, TZ2<FIMPL>, MSource);
-
-/******************************************************************************
- *                       TZ2 template implementation                          *
- ******************************************************************************/
-// constructor /////////////////////////////////////////////////////////////////
-template <typename FImpl>
-TZ2<FImpl>::TZ2(const std::string name)
-: Module<Z2Par>(name)
-{}
-
-// dependencies/products ///////////////////////////////////////////////////////
-template <typename FImpl>
-std::vector<std::string> TZ2<FImpl>::getInput(void)
-{
-    std::vector<std::string> in;
-    
-    return in;
-}
-
-template <typename FImpl>
-std::vector<std::string> TZ2<FImpl>::getOutput(void)
-{
-    std::vector<std::string> out = {getName()};
-    
-    return out;
-}
-
-// setup ///////////////////////////////////////////////////////////////////////
-template <typename FImpl>
-void TZ2<FImpl>::setup(void)
-{
-    env().template registerLattice<PropagatorField>(getName());
-}
-
-// execution ///////////////////////////////////////////////////////////////////
-template <typename FImpl>
-void TZ2<FImpl>::execute(void)
-{
-    Lattice<iScalar<vInteger>> t(env().getGrid());
-    LatticeComplex             eta(env().getGrid());
-    Complex                    shift(1., 1.);
-    
-    if (par().tA == par().tB)
-    {
-        LOG(Message) << "Generating Z_2 wall source at t= " << par().tA
-        << std::endl;
-    }
-    else
-    {
-        LOG(Message) << "Generating Z_2 band for " << par().tA << " <= t <= "
-        << par().tB << std::endl;
-    }
-    PropagatorField &src = *env().template createLattice<PropagatorField>(getName());
-    LatticeCoordinate(t, Tp);
-    bernoulli(*env().get4dRng(), eta);
-    eta = (2.*eta - shift)*(1./::sqrt(2.));
-    eta = where((t >= par().tA) and (t <= par().tB), eta, 0.*eta);
-    src = 1.;
-    src = src*eta;
-}
-
-END_MODULE_NAMESPACE
-
-END_HADRONS_NAMESPACE
-
-#endif // Hadrons_Z2_hpp_
@@ -1,185 +0,0 @@
-/*************************************************************************************
-
-Grid physics library, www.github.com/paboyle/Grid 
-
-Source file: extras/Hadrons/Modules/Quark.hpp
-
-Copyright (C) 2015
-Copyright (C) 2016
-
-Author: Antonin Portelli <antonin.portelli@me.com>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-See the full license in the file "LICENSE" in the top level distribution directory
-*************************************************************************************/
-/*  END LEGAL */
-
-#ifndef Hadrons_Quark_hpp_
-#define Hadrons_Quark_hpp_
-
-#include <Grid/Hadrons/Global.hpp>
-#include <Grid/Hadrons/Module.hpp>
-#include <Grid/Hadrons/ModuleFactory.hpp>
-
-BEGIN_HADRONS_NAMESPACE
-
-/******************************************************************************
- *                               TQuark                                       *
- ******************************************************************************/
-class QuarkPar: Serializable
-{
-public:
-    GRID_SERIALIZABLE_CLASS_MEMBERS(QuarkPar,
-                                    std::string, source,
-                                    std::string, solver);
-};
-
-template <typename FImpl>
-class TQuark: public Module<QuarkPar>
-{
-public:
-    TYPE_ALIASES(FImpl,);
-public:
-    // constructor
-    TQuark(const std::string name);
-    // destructor
-    virtual ~TQuark(void) = default;
-    // dependencies/products
-    virtual std::vector<std::string> getInput(void);
-    virtual std::vector<std::string> getOutput(void);
-    // setup
-    virtual void setup(void);
-    // execution
-    virtual void execute(void);
-private:
-    unsigned int Ls_;
-    SolverFn     *solver_{nullptr};
-};
-
-MODULE_REGISTER(Quark, TQuark<FIMPL>);
-
-/******************************************************************************
- *                          TQuark implementation                             *
- ******************************************************************************/
-// constructor /////////////////////////////////////////////////////////////////
-template <typename FImpl>
-TQuark<FImpl>::TQuark(const std::string name)
-: Module(name)
-{}
-
-// dependencies/products ///////////////////////////////////////////////////////
-template <typename FImpl>
-std::vector<std::string> TQuark<FImpl>::getInput(void)
-{
-    std::vector<std::string> in = {par().source, par().solver};
-    
-    return in;
-}
-
-template <typename FImpl>
-std::vector<std::string> TQuark<FImpl>::getOutput(void)
-{
-    std::vector<std::string> out = {getName(), getName() + "_5d"};
-    
-    return out;
-}
-
-// setup ///////////////////////////////////////////////////////////////////////
-template <typename FImpl>
-void TQuark<FImpl>::setup(void)
-{
-    Ls_ = env().getObjectLs(par().solver);
-    env().template registerLattice<PropagatorField>(getName());
-    if (Ls_ > 1)
-    {
-        env().template registerLattice<PropagatorField>(getName() + "_5d", Ls_);
-    }
-}
-
-// execution ///////////////////////////////////////////////////////////////////
-template <typename FImpl>
-void TQuark<FImpl>::execute(void)
-{
-    LOG(Message) << "Computing quark propagator '" << getName() << "'"
-                 << std::endl;
-    
-    FermionField    source(env().getGrid(Ls_)), sol(env().getGrid(Ls_)),
-                    tmp(env().getGrid());
-    std::string     propName = (Ls_ == 1) ? getName() : (getName() + "_5d");
-    PropagatorField &prop    = *env().template createLattice<PropagatorField>(propName);
-    PropagatorField &fullSrc = *env().template getObject<PropagatorField>(par().source);
-    SolverFn        &solver  = *env().template getObject<SolverFn>(par().solver);
-    if (Ls_ > 1)
-    {
-        env().template createLattice<PropagatorField>(getName());
-    }
-    
-    LOG(Message) << "Inverting using solver '" << par().solver
-                 << "' on source '" << par().source << "'" << std::endl;
-    for (unsigned int s = 0; s < Ns; ++s)
-    for (unsigned int c = 0; c < Nc; ++c)
-    {
-        LOG(Message) << "Inversion for spin= " << s << ", color= " << c
-        << std::endl;
-        // source conversion for 4D sources
-        if (!env().isObject5d(par().source))
-        {
-            if (Ls_ == 1)
-            {
-                PropToFerm(source, fullSrc, s, c);
-            }
-            else
-            {
-                source = zero;
-                PropToFerm(tmp, fullSrc, s, c);
-                InsertSlice(tmp, source, 0, 0);
-                InsertSlice(tmp, source, Ls_-1, 0);
-                axpby_ssp_pplus(source, 0., source, 1., source, 0, 0);
-                axpby_ssp_pminus(source, 0., source, 1., source, Ls_-1, Ls_-1);
-            }
-        }
-        // source conversion for 5D sources
-        else
-        {
-            if (Ls_ != env().getObjectLs(par().source))
-            {
-                HADRON_ERROR("Ls mismatch between quark action and source");
-            }
-            else
-            {
-                PropToFerm(source, fullSrc, s, c);
-            }
-        }
-        sol = zero;
-        solver(sol, source);
-        FermToProp(prop, sol, s, c);
-        // create 4D propagators from 5D one if necessary
-        if (Ls_ > 1)
-        {
-            PropagatorField &p4d =
-                *env().template getObject<PropagatorField>(getName());
-            
-            axpby_ssp_pminus(sol, 0., sol, 1., sol, 0, 0);
-            axpby_ssp_pplus(sol, 0., sol, 1., sol, 0, Ls_-1);
-            ExtractSlice(tmp, sol, 0, 0);
-            FermToProp(p4d, tmp, s, c);
-        }
-    }
-}
-
-END_HADRONS_NAMESPACE
-
-#endif // Hadrons_Quark_hpp_
@@ -1,39 +0,0 @@
-#include <Grid/Hadrons/Modules/___FILEBASENAME___.hpp>
-
-using namespace Grid;
-using namespace Hadrons;
-
-/******************************************************************************
-*                  T___FILEBASENAME___ implementation                             *
-******************************************************************************/
-// constructor /////////////////////////////////////////////////////////////////
-T___FILEBASENAME___::T___FILEBASENAME___(const std::string name)
-: Module<___FILEBASENAME___Par>(name)
-{}
-
-// dependencies/products ///////////////////////////////////////////////////////
-std::vector<std::string> T___FILEBASENAME___::getInput(void)
-{
-    std::vector<std::string> in;
-    
-    return in;
-}
-
-std::vector<std::string> T___FILEBASENAME___::getOutput(void)
-{
-    std::vector<std::string> out = {getName()};
-    
-    return out;
-}
-
-// setup ///////////////////////////////////////////////////////////////////////
-void T___FILEBASENAME___::setup(void)
-{
-
-}
-
-// execution ///////////////////////////////////////////////////////////////////
-void T___FILEBASENAME___::execute(void)
-{
-
-}
@@ -1,40 +0,0 @@
-#ifndef Hadrons____FILEBASENAME____hpp_
-#define Hadrons____FILEBASENAME____hpp_
-
-#include <Grid/Hadrons/Global.hpp>
-#include <Grid/Hadrons/Module.hpp>
-#include <Grid/Hadrons/ModuleFactory.hpp>
-
-BEGIN_HADRONS_NAMESPACE
-
-/******************************************************************************
- *                         ___FILEBASENAME___                                 *
- ******************************************************************************/
-class ___FILEBASENAME___Par: Serializable
-{
-public:
-    GRID_SERIALIZABLE_CLASS_MEMBERS(___FILEBASENAME___Par, 
-                                    unsigned int, i);
-};
-
-class T___FILEBASENAME___: public Module<___FILEBASENAME___Par>
-{
-public:
-    // constructor
-    T___FILEBASENAME___(const std::string name);
-    // destructor
-    virtual ~T___FILEBASENAME___(void) = default;
-    // dependency relation
-    virtual std::vector<std::string> getInput(void);
-    virtual std::vector<std::string> getOutput(void);
-    // setup
-    virtual void setup(void);
-    // execution
-    virtual void execute(void);
-};
-
-MODULE_REGISTER(___FILEBASENAME___, T___FILEBASENAME___);
-
-END_HADRONS_NAMESPACE
-
-#endif // Hadrons____FILEBASENAME____hpp_
@@ -1,40 +0,0 @@
-#include <Grid/Hadrons/Modules/___NAMESPACE___/___FILEBASENAME___.hpp>
-
-using namespace Grid;
-using namespace Hadrons;
-using namespace ___NAMESPACE___;
-
-/******************************************************************************
-*                  T___FILEBASENAME___ implementation                             *
-******************************************************************************/
-// constructor /////////////////////////////////////////////////////////////////
-T___FILEBASENAME___::T___FILEBASENAME___(const std::string name)
-: Module<___FILEBASENAME___Par>(name)
-{}
-
-// dependencies/products ///////////////////////////////////////////////////////
-std::vector<std::string> T___FILEBASENAME___::getInput(void)
-{
-    std::vector<std::string> in;
-    
-    return in;
-}
-
-std::vector<std::string> T___FILEBASENAME___::getOutput(void)
-{
-    std::vector<std::string> out = {getName()};
-    
-    return out;
-}
-
-// setup ///////////////////////////////////////////////////////////////////////
-void T___FILEBASENAME___::setup(void)
-{
-
-}
-
-// execution ///////////////////////////////////////////////////////////////////
-void T___FILEBASENAME___::execute(void)
-{
-
-}
@@ -1,44 +0,0 @@
-#ifndef Hadrons____FILEBASENAME____hpp_
-#define Hadrons____FILEBASENAME____hpp_
-
-#include <Grid/Hadrons/Global.hpp>
-#include <Grid/Hadrons/Module.hpp>
-#include <Grid/Hadrons/ModuleFactory.hpp>
-
-BEGIN_HADRONS_NAMESPACE
-
-/******************************************************************************
- *                         ___FILEBASENAME___                                 *
- ******************************************************************************/
-BEGIN_MODULE_NAMESPACE(___NAMESPACE___)
-
-class ___FILEBASENAME___Par: Serializable
-{
-public:
-    GRID_SERIALIZABLE_CLASS_MEMBERS(___FILEBASENAME___Par,
-                                    unsigned int, i);
-};
-
-class T___FILEBASENAME___: public Module<___FILEBASENAME___Par>
-{
-public:
-    // constructor
-    T___FILEBASENAME___(const std::string name);
-    // destructor
-    virtual ~T___FILEBASENAME___(void) = default;
-    // dependency relation
-    virtual std::vector<std::string> getInput(void);
-    virtual std::vector<std::string> getOutput(void);
-    // setup
-    virtual void setup(void);
-    // execution
-    virtual void execute(void);
-};
-
-MODULE_REGISTER_NS(___FILEBASENAME___, T___FILEBASENAME___, ___NAMESPACE___);
-
-END_MODULE_NAMESPACE
-
-END_HADRONS_NAMESPACE
-
-#endif // Hadrons____FILEBASENAME____hpp_
@@ -1,81 +0,0 @@
-#ifndef Hadrons____FILEBASENAME____hpp_
-#define Hadrons____FILEBASENAME____hpp_
-
-#include <Grid/Hadrons/Global.hpp>
-#include <Grid/Hadrons/Module.hpp>
-#include <Grid/Hadrons/ModuleFactory.hpp>
-
-BEGIN_HADRONS_NAMESPACE
-
-/******************************************************************************
- *                         ___FILEBASENAME___                                 *
- ******************************************************************************/
-class ___FILEBASENAME___Par: Serializable
-{
-public:
-    GRID_SERIALIZABLE_CLASS_MEMBERS(___FILEBASENAME___Par,
-                                    unsigned int, i);
-};
-
-template <typename FImpl>
-class T___FILEBASENAME___: public Module<___FILEBASENAME___Par>
-{
-public:
-    // constructor
-    T___FILEBASENAME___(const std::string name);
-    // destructor
-    virtual ~T___FILEBASENAME___(void) = default;
-    // dependency relation
-    virtual std::vector<std::string> getInput(void);
-    virtual std::vector<std::string> getOutput(void);
-    // setup
-    virtual void setup(void);
-    // execution
-    virtual void execute(void);
-};
-
-MODULE_REGISTER(___FILEBASENAME___, T___FILEBASENAME___<FIMPL>);
-
-/******************************************************************************
- *                 T___FILEBASENAME___ implementation                             *
- ******************************************************************************/
-// constructor /////////////////////////////////////////////////////////////////
-template <typename FImpl>
-T___FILEBASENAME___<FImpl>::T___FILEBASENAME___(const std::string name)
-: Module<___FILEBASENAME___Par>(name)
-{}
-
-// dependencies/products ///////////////////////////////////////////////////////
-template <typename FImpl>
-std::vector<std::string> T___FILEBASENAME___<FImpl>::getInput(void)
-{
-    std::vector<std::string> in;
-    
-    return in;
-}
-
-template <typename FImpl>
-std::vector<std::string> T___FILEBASENAME___<FImpl>::getOutput(void)
-{
-    std::vector<std::string> out = {getName()};
-    
-    return out;
-}
-
-// setup ///////////////////////////////////////////////////////////////////////
-template <typename FImpl>
-void T___FILEBASENAME___<FImpl>::setup(void)
-{
-    
-}
-
-// execution ///////////////////////////////////////////////////////////////////
-template <typename FImpl>
-void T___FILEBASENAME___<FImpl>::execute(void)
-{
-    
-}
-
-END_HADRONS_NAMESPACE
-
-#endif // Hadrons____FILEBASENAME____hpp_
@@ -1,85 +0,0 @@
-#ifndef Hadrons____FILEBASENAME____hpp_
-#define Hadrons____FILEBASENAME____hpp_
-
-#include <Grid/Hadrons/Global.hpp>
-#include <Grid/Hadrons/Module.hpp>
-#include <Grid/Hadrons/ModuleFactory.hpp>
-
-BEGIN_HADRONS_NAMESPACE
-
-/******************************************************************************
- *                         ___FILEBASENAME___                                 *
- ******************************************************************************/
-BEGIN_MODULE_NAMESPACE(___NAMESPACE___)
-
-class ___FILEBASENAME___Par: Serializable
-{
-public:
-    GRID_SERIALIZABLE_CLASS_MEMBERS(___FILEBASENAME___Par,
-                                    unsigned int, i);
-};
-
-template <typename FImpl>
-class T___FILEBASENAME___: public Module<___FILEBASENAME___Par>
-{
-public:
-    // constructor
-    T___FILEBASENAME___(const std::string name);
-    // destructor
-    virtual ~T___FILEBASENAME___(void) = default;
-    // dependency relation
-    virtual std::vector<std::string> getInput(void);
-    virtual std::vector<std::string> getOutput(void);
-    // setup
-    virtual void setup(void);
-    // execution
-    virtual void execute(void);
-};
-
-MODULE_REGISTER_NS(___FILEBASENAME___, T___FILEBASENAME___<FIMPL>, ___NAMESPACE___);
-
-/******************************************************************************
- *                 T___FILEBASENAME___ implementation                             *
- ******************************************************************************/
-// constructor /////////////////////////////////////////////////////////////////
-template <typename FImpl>
-T___FILEBASENAME___<FImpl>::T___FILEBASENAME___(const std::string name)
-: Module<___FILEBASENAME___Par>(name)
-{}
-
-// dependencies/products ///////////////////////////////////////////////////////
-template <typename FImpl>
-std::vector<std::string> T___FILEBASENAME___<FImpl>::getInput(void)
-{
-    std::vector<std::string> in;
-    
-    return in;
-}
-
-template <typename FImpl>
-std::vector<std::string> T___FILEBASENAME___<FImpl>::getOutput(void)
-{
-    std::vector<std::string> out = {getName()};
-    
-    return out;
-}
-
-// setup ///////////////////////////////////////////////////////////////////////
-template <typename FImpl>
-void T___FILEBASENAME___<FImpl>::setup(void)
-{
-    
-}
-
-// execution ///////////////////////////////////////////////////////////////////
-template <typename FImpl>
-void T___FILEBASENAME___<FImpl>::execute(void)
-{
-    
-}
-
-END_MODULE_NAMESPACE
-
-END_HADRONS_NAMESPACE
-
-#endif // Hadrons____FILEBASENAME____hpp_
@@ -1,31 +0,0 @@
-#!/usr/bin/env bash
-
-if (( $# != 1 && $# != 2)); then
-    echo "usage: `basename $0` <module name> [<namespace>]" 1>&2
-    exit 1
-fi
-NAME=$1
-NS=$2
-
-if (( $# == 1 )); then
-	if [ -e "Modules/${NAME}.cc" ] || [ -e "Modules/${NAME}.hpp" ]; then
-	    echo "error: files Modules/${NAME}.* already exists" 1>&2
-	    exit 1
-	fi
-	sed "s/___FILEBASENAME___/${NAME}/g" Modules/templates/Module.cc.template > Modules/${NAME}.cc
-	sed "s/___FILEBASENAME___/${NAME}/g" Modules/templates/Module.hpp.template > Modules/${NAME}.hpp
-elif (( $# == 2 )); then
-	mkdir -p Modules/${NS}
-	if [ -e "Modules/${NS}/${NAME}.cc" ] || [ -e "Modules/${NS}/${NAME}.hpp" ]; then
-	    echo "error: files Modules/${NS}/${NAME}.* already exists" 1>&2
-	    exit 1
-	fi
-	TMPCC=".${NS}.${NAME}.tmp.cc"
-	TMPHPP=".${NS}.${NAME}.tmp.hpp"
-	sed "s/___FILEBASENAME___/${NAME}/g" Modules/templates/Module_in_NS.cc.template  > ${TMPCC}
-	sed "s/___FILEBASENAME___/${NAME}/g" Modules/templates/Module_in_NS.hpp.template > ${TMPHPP}
-	sed "s/___NAMESPACE___/${NS}/g" ${TMPCC}  > Modules/${NS}/${NAME}.cc
-	sed "s/___NAMESPACE___/${NS}/g" ${TMPHPP} > Modules/${NS}/${NAME}.hpp
-	rm -f ${TMPCC} ${TMPHPP}
-fi
-./make_module_list.sh
@@ -1,28 +0,0 @@
-#!/usr/bin/env bash
-
-if (( $# != 1 && $# != 2)); then
-    echo "usage: `basename $0` <module name> [<namespace>]" 1>&2
-    exit 1
-fi
-NAME=$1
-NS=$2
-
-if (( $# == 1 )); then
-	if [ -e "Modules/${NAME}.cc" ] || [ -e "Modules/${NAME}.hpp" ]; then
-	    echo "error: files Modules/${NAME}.* already exists" 1>&2
-	    exit 1
-	fi
-	sed "s/___FILEBASENAME___/${NAME}/g" Modules/templates/Module_tmp.hpp.template > Modules/${NAME}.hpp
-elif (( $# == 2 )); then
-	mkdir -p Modules/${NS}
-	if [ -e "Modules/${NS}/${NAME}.cc" ] || [ -e "Modules/${NS}/${NAME}.hpp" ]; then
-	    echo "error: files Modules/${NS}/${NAME}.* already exists" 1>&2
-	    exit 1
-	fi
-	TMPCC=".${NS}.${NAME}.tmp.cc"
-	TMPHPP=".${NS}.${NAME}.tmp.hpp"
-	sed "s/___FILEBASENAME___/${NAME}/g" Modules/templates/Module_tmp_in_NS.hpp.template > ${TMPHPP}
-	sed "s/___NAMESPACE___/${NS}/g" ${TMPHPP} > Modules/${NS}/${NAME}.hpp
-	rm -f ${TMPCC} ${TMPHPP}
-fi
-./make_module_list.sh
@@ -1,12 +0,0 @@
-#!/usr/bin/env bash
-
-echo 'modules_cc =\' > modules.inc
-find Modules -name '*.cc' -type f -print | sed 's/^/  /;$q;s/$/ \\/' >> modules.inc
-echo '' >> modules.inc
-echo 'modules_hpp =\' >> modules.inc
-find Modules -name '*.hpp' -type f -print | sed 's/^/  /;$q;s/$/ \\/' >> modules.inc
-echo '' >> modules.inc
-rm -f Modules.hpp
-for f in `find Modules -name '*.hpp'`; do
-	echo "#include <Grid/Hadrons/${f}>" >> Modules.hpp
-done
@@ -1,19 +0,0 @@
-modules_cc =\
-  Modules/MGauge/Load.cc \
-  Modules/MGauge/Random.cc \
-  Modules/MGauge/Unit.cc
-
-modules_hpp =\
-  Modules/MAction/DWF.hpp \
-  Modules/MAction/Wilson.hpp \
-  Modules/MContraction/Baryon.hpp \
-  Modules/MContraction/Meson.hpp \
-  Modules/MGauge/Load.hpp \
-  Modules/MGauge/Random.hpp \
-  Modules/MGauge/Unit.hpp \
-  Modules/MSolver/RBPrecCG.hpp \
-  Modules/MSource/Point.hpp \
-  Modules/MSource/SeqGamma.hpp \
-  Modules/MSource/Z2.hpp \
-  Modules/Quark.hpp
-
@@ -1 +0,0 @@
-SUBDIRS = Hadrons
@@ -42,14 +42,15 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 #include <Grid/algorithms/iterative/ConjugateResidual.h>
 #include <Grid/algorithms/iterative/NormalEquations.h>
 #include <Grid/algorithms/iterative/SchurRedBlack.h>
+
 #include <Grid/algorithms/iterative/ConjugateGradientMultiShift.h>
 #include <Grid/algorithms/iterative/ConjugateGradientMixedPrec.h>

 // Lanczos support
 #include <Grid/algorithms/iterative/MatrixUtils.h>
 #include <Grid/algorithms/iterative/ImplicitlyRestartedLanczos.h>
+
 #include <Grid/algorithms/CoarsenedMatrix.h>
-#include <Grid/algorithms/FFT.h>

 // Eigen/lanczos
 // EigCg
@@ -1,4 +1,4 @@
-/*************************************************************************************
+    /*************************************************************************************

    Grid physics library, www.github.com/paboyle/Grid 

@@ -42,32 +42,9 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>

 namespace Grid {

-  class PointerCache {
-  private:
-
-    static const int Ncache=8;
-    static int victim;
-
-    typedef struct { 
-      void *address;
-      size_t bytes;
-      int valid;
-    } PointerCacheEntry;
-    
-    static PointerCacheEntry Entries[Ncache];
-
-  public:
-
-
-    static void *Insert(void *ptr,size_t bytes) ;
-    static void *Lookup(size_t bytes) ;
-
-  };
-
 ////////////////////////////////////////////////////////////////////
 // A lattice of something, but assume the something is SIMDized.
 ////////////////////////////////////////////////////////////////////
-
 template<typename _Tp>
 class alignedAllocator {
 public: 
@@ -89,27 +66,27 @@ public:

  pointer allocate(size_type __n, const void* _p= 0)
  { 
-    size_type bytes = __n*sizeof(_Tp);
-
-    _Tp *ptr = (_Tp *) PointerCache::Lookup(bytes);
-    
 #ifdef HAVE_MM_MALLOC_H
-    if ( ptr == (_Tp *) NULL ) ptr = (_Tp *) _mm_malloc(bytes,128);
+    _Tp * ptr = (_Tp *) _mm_malloc(__n*sizeof(_Tp),128);
 #else
-    if ( ptr == (_Tp *) NULL ) ptr = (_Tp *) memalign(128,bytes);
+    _Tp * ptr = (_Tp *) memalign(128,__n*sizeof(_Tp));
 #endif

+    _Tp tmp;
+#ifdef GRID_NUMA
+#pragma omp parallel for schedule(static)
+  for(int i=0;i<__n;i++){
+    ptr[i]=tmp;
+  }
+#endif 
    return ptr;
  }

-  void deallocate(pointer __p, size_type __n) { 
-    size_type bytes = __n * sizeof(_Tp);
-    pointer __freeme = (pointer)PointerCache::Insert((void *)__p,bytes);
-
+  void deallocate(pointer __p, size_type) { 
 #ifdef HAVE_MM_MALLOC_H
-    if ( __freeme ) _mm_free((void *)__freeme); 
+    _mm_free((void *)__p); 
 #else
-    if ( __freeme ) free((void *)__freeme);
+    free((void *)__p);
 #endif
  }
  void construct(pointer __p, const _Tp& __val) { };
@@ -2,11 +2,11 @@

 Grid physics library, www.github.com/paboyle/Grid

-Source file: ./lib/qcd/hmc/HMC.h
+Source file: ./lib/Bitwise.cc

-Copyright (C) 2015
+Copyright (C) 2016

-Author: Peter Boyle <paboyle@ph.ed.ac.uk>
+Author: Guido Cossu <guido.cossu@ed.ac.uk>

 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
@@ -26,17 +26,24 @@ See the full license in the file "LICENSE" in the top level distribution
 directory
 *************************************************************************************/
 /*  END LEGAL */
-//--------------------------------------------------------------------
-//--------------------------------------------------------------------
-#ifndef HMC_AGGREGATE_INCLUDED
-#define HMC_AGGREGATE_INCLUDED
+#include <iostream>
+#include <Bitwise.h>
+#include <bitset>
+#include <climits>

-#include <string>
+namespace Grid {

-#include <Grid/qcd/hmc/HMC.h>
-// annoying location; should move this ?
-#include <Grid/parallelIO/NerscIO.h>
-#include <Grid/qcd/hmc/NerscCheckpointer.h>
-#include <Grid/qcd/hmc/HmcRunner.h>
+void show_binaryrep(const unsigned char* a, size_t size) {
+  const unsigned char* beg = a;
+  const unsigned char* end = a + size;
+  unsigned int ctr = 0;
+  while (beg != end) {
+    std::cout << std::bitset<CHAR_BIT>(*beg++) << ' ';
+    ctr++;
+    if (ctr % GRID_REAL_BYTES == 0) std::cout << '\n';
+  }
+  std::cout << '\n';
+}
+
+} // namespace 

-#endif
@@ -0,0 +1,76 @@
+/*************************************************************************************
+
+Grid physics library, www.github.com/paboyle/Grid
+
+Source file: ./lib/Bitwise.h
+
+Copyright (C) 2016
+
+Author: Guido Cossu <guido.cossu@ed.ac.uk>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along
+with this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+See the full license in the file "LICENSE" in the top level distribution
+directory
+*************************************************************************************/
+/*  END LEGAL */
+#ifndef GRID_BITWISE_H
+#define GRID_BITWISE_H
+
+#include <cassert>
+#include <cfloat>
+#include <bitset>
+#include <climits>
+#include <Config.h>
+
+#ifdef GRID_DEFAULT_PRECISION_SINGLE
+#define GRID_REAL_BYTES 4
+#endif
+#ifdef GRID_DEFAULT_PRECISION_DOUBLE
+#define GRID_REAL_BYTES 8
+#endif
+
+
+namespace Grid {
+
+void show_binaryrep(const unsigned char* a, size_t size);
+
+template <typename T>
+void show_binaryrep(const T& a) {
+  const char* beg = reinterpret_cast<const char*>(&a);
+  const char* end = beg + sizeof(a);
+  unsigned int ctr = 0;
+  while (beg != end) {
+  	std::cout << std::bitset<CHAR_BIT>(*beg++) << ' ';
+  	ctr++;
+  	if (ctr % GRID_REAL_BYTES == 0) std::cout << '\n';
+  }
+  std::cout << '\n';
+}
+
+template <typename T>
+void bitwise_xor(T& l, T& r, unsigned char* xors) {
+  assert(sizeof(l) == sizeof(r));
+  unsigned char* org = reinterpret_cast<unsigned char*>(&l);
+  unsigned char* cur = reinterpret_cast<unsigned char*>(&r);
+  int words = sizeof(l) / sizeof(*org);
+  unsigned char result = 0;
+  for (int w = 0; w < words; w++) xors[w] = (org[w] ^ cur[w]);
+}
+
+}; // namespace 
+
+
+#endif
@@ -38,10 +38,53 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
 #ifndef GRID_H
 #define GRID_H

-#include <Grid/GridCore.h>
-#include <Grid/GridQCDcore.h>
-#include <Grid/qcd/action/Action.h>
-#include <Grid/qcd/smearing/Smearing.h>
-#include <Grid/qcd/hmc/HMC_aggregate.h>
+///////////////////
+// Std C++ dependencies
+///////////////////
+#include <cassert>
+#include <complex>
+#include <vector>
+#include <iostream>
+#include <iomanip>
+#include <random>
+#include <functional>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <signal.h>
+#include <ctime>
+#include <sys/time.h>
+#include <chrono>
+
+///////////////////
+// Grid headers
+///////////////////
+#include <Grid/serialisation/Serialisation.h>
+#include "Config.h"
+#include <Grid/Timer.h>
+#include <Grid/Bitwise.h>
+#include <Grid/PerfCount.h>
+#include <Grid/Log.h>
+#include <Grid/AlignedAllocator.h>
+#include <Grid/Simd.h>
+#include <Grid/Threads.h>
+#include <Grid/Lexicographic.h>
+#include <Grid/Init.h>
+#include <Grid/Communicator.h> 
+#include <Grid/Cartesian.h>    
+#include <Grid/Tensors.h>      
+#include <Grid/Lattice.h>      
+#include <Grid/Cshift.h>       
+#include <Grid/Stencil.h>      
+#include <Grid/Algorithms.h>   
+#include <Grid/parallelIO/BinaryIO.h>
+#include <Grid/FFT.h>
+
+#include <Grid/qcd/QCD.h>
+#include <Grid/parallelIO/NerscIO.h>
+#include <Grid/qcd/hmc/NerscCheckpointer.h>
+#include <Grid/qcd/hmc/HmcRunner.h>
+
+

 #endif
@@ -1,81 +0,0 @@
-    /*************************************************************************************
-
-    Grid physics library, www.github.com/paboyle/Grid 
-
-    Source file: ./lib/Grid.h
-
-    Copyright (C) 2015
-
-Author: Peter Boyle <paboyle@ph.ed.ac.uk>
-Author: azusayamaguchi <ayamaguc@YAMAKAZE.local>
-Author: paboyle <paboyle@ph.ed.ac.uk>
-
-    This program is free software; you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation; either version 2 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License along
-    with this program; if not, write to the Free Software Foundation, Inc.,
-    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-    See the full license in the file "LICENSE" in the top level distribution directory
-    *************************************************************************************/
-    /*  END LEGAL */
-//
-//  Grid.h
-//  simd
-//
-//  Created by Peter Boyle on 09/05/2014.
-//  Copyright (c) 2014 University of Edinburgh. All rights reserved.
-//
-
-#ifndef GRID_BASE_H
-#define GRID_BASE_H
-
-///////////////////
-// Std C++ dependencies
-///////////////////
-#include <cassert>
-#include <complex>
-#include <vector>
-#include <iostream>
-#include <iomanip>
-#include <random>
-#include <functional>
-#include <stdio.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include <signal.h>
-#include <ctime>
-#include <sys/time.h>
-#include <chrono>
-
-///////////////////
-// Grid headers
-///////////////////
-#include "Config.h"
-
-#include <Grid/perfmon/Timer.h>
-#include <Grid/perfmon/PerfCount.h>
-#include <Grid/log/Log.h>
-#include <Grid/allocator/AlignedAllocator.h>
-#include <Grid/simd/Simd.h>
-#include <Grid/serialisation/Serialisation.h>
-#include <Grid/threads/Threads.h>
-#include <Grid/util/Util.h>
-#include <Grid/communicator/Communicator.h> 
-#include <Grid/cartesian/Cartesian.h>    
-#include <Grid/tensors/Tensors.h>      
-#include <Grid/lattice/Lattice.h>      
-#include <Grid/cshift/Cshift.h>       
-#include <Grid/stencil/Stencil.h>      
-#include <Grid/parallelIO/BinaryIO.h>
-#include <Grid/algorithms/Algorithms.h>   
-
-#endif
@@ -1 +0,0 @@
-../extras/Hadrons
@@ -1,4 +1,4 @@
-/*************************************************************************************
+    /*************************************************************************************

    Grid physics library, www.github.com/paboyle/Grid 

@@ -41,13 +41,12 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
 #include <signal.h>
 #include <iostream>
 #include <iterator>
+#include <Grid.h>
 #include <algorithm>
 #include <iterator>
 #include <cstdlib>
 #include <memory>

-#include <Grid/Grid.h>
-

 #include <fenv.h>
 #ifdef __APPLE__
@@ -220,57 +219,8 @@ void Grid_init(int *argc,char ***argv)
    CartesianCommunicator::MAX_MPI_SHM_BYTES = MB*1024*1024;
  }

-  if( GridCmdOptionExists(*argv,*argv+*argc,"--debug-signals") ){
-    Grid_debug_handler_init();
-  }
-
  CartesianCommunicator::Init(argc,argv);

-  if( !GridCmdOptionExists(*argv,*argv+*argc,"--debug-stdout") ){
-    Grid_quiesce_nodes();
-  } else { 
-    std::ostringstream fname;
-    fname<<"Grid.stdout.";
-    fname<<CartesianCommunicator::RankWorld();
-    freopen(fname.str().c_str(),"w",stdout);
-  }
-
-  ////////////////////////////////////
-  // Banner
-  ////////////////////////////////////
-  if ( CartesianCommunicator::RankWorld() == 0 ) { 
-    std::cout <<std::endl;
-    std::cout  << "__|__|__|__|__|__|__|__|__|__|__|__|__|__|__"<<std::endl; 
-    std::cout  << "__|__|__|__|__|__|__|__|__|__|__|__|__|__|__"<<std::endl; 
-    std::cout  << "__|_ |  |  |  |  |  |  |  |  |  |  |  | _|__"<<std::endl; 
-    std::cout  << "__|_                                    _|__"<<std::endl; 
-    std::cout  << "__|_   GGGG    RRRR    III    DDDD      _|__"<<std::endl;
-    std::cout  << "__|_  G        R   R    I     D   D     _|__"<<std::endl;
-    std::cout  << "__|_  G        R   R    I     D    D    _|__"<<std::endl;
-    std::cout  << "__|_  G  GG    RRRR     I     D    D    _|__"<<std::endl;
-    std::cout  << "__|_  G   G    R  R     I     D   D     _|__"<<std::endl;
-    std::cout  << "__|_   GGGG    R   R   III    DDDD      _|__"<<std::endl;
-    std::cout  << "__|_                                    _|__"<<std::endl; 
-    std::cout  << "__|__|__|__|__|__|__|__|__|__|__|__|__|__|__"<<std::endl; 
-    std::cout  << "__|__|__|__|__|__|__|__|__|__|__|__|__|__|__"<<std::endl; 
-    std::cout  << "  |  |  |  |  |  |  |  |  |  |  |  |  |  |  "<<std::endl; 
-    std::cout << std::endl;
-    std::cout << std::endl;
-    std::cout << "Copyright (C) 2015 Peter Boyle, Azusa Yamaguchi, Guido Cossu, Antonin Portelli and other authors"<<std::endl;
-    std::cout << std::endl;
-    std::cout << "This program is free software; you can redistribute it and/or modify"<<std::endl;
-    std::cout << "it under the terms of the GNU General Public License as published by"<<std::endl;
-    std::cout << "the Free Software Foundation; either version 2 of the License, or"<<std::endl;
-    std::cout << "(at your option) any later version."<<std::endl;
-    std::cout << std::endl;
-    std::cout << "This program is distributed in the hope that it will be useful,"<<std::endl;
-    std::cout << "but WITHOUT ANY WARRANTY; without even the implied warranty of"<<std::endl;
-    std::cout << "MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the"<<std::endl;
-    std::cout << "GNU General Public License for more details."<<std::endl;
-    std::cout << std::endl;
-  }
-
-
  ////////////////////////////////////
  // Logging
  ////////////////////////////////////
@@ -280,6 +230,9 @@ void Grid_init(int *argc,char ***argv)
  GridCmdOptionCSL(defaultLog,logstreams);
  GridLogConfigure(logstreams);

+  if( !GridCmdOptionExists(*argv,*argv+*argc,"--debug-stdout") ){
+    Grid_quiesce_nodes();
+  }

  if( GridCmdOptionExists(*argv,*argv+*argc,"--log") ){
    arg = GridCmdOptionPayload(*argv,*argv+*argc,"--log");
@@ -295,67 +248,94 @@ void Grid_init(int *argc,char ***argv)
    std::cout<<GridLogMessage<<"  --help : this message"<<std::endl;
    std::cout<<GridLogMessage<<std::endl;
    std::cout<<GridLogMessage<<"Geometry:"<<std::endl;
-    std::cout<<GridLogMessage<<std::endl;
    std::cout<<GridLogMessage<<"  --mpi n.n.n.n   : default MPI decomposition"<<std::endl;    
    std::cout<<GridLogMessage<<"  --threads n     : default number of OMP threads"<<std::endl;
    std::cout<<GridLogMessage<<"  --grid n.n.n.n  : default Grid size"<<std::endl;    
    std::cout<<GridLogMessage<<"  --shm  M        : allocate M megabytes of shared memory for comms"<<std::endl;    
    std::cout<<GridLogMessage<<std::endl;
    std::cout<<GridLogMessage<<"Verbose and debug:"<<std::endl;
-    std::cout<<GridLogMessage<<std::endl;
-    std::cout<<GridLogMessage<<"  --log list      : comma separated list from Error,Warning,Message,Performance,Iterative,Integrator,Debug,Colours"<<std::endl;
+    std::cout<<GridLogMessage<<"  --log list      : comma separted list of streams from Error,Warning,Message,Performance,Iterative,Integrator,Debug,Colours"<<std::endl;
    std::cout<<GridLogMessage<<"  --decomposition : report on default omp,mpi and simd decomposition"<<std::endl;    
    std::cout<<GridLogMessage<<"  --debug-signals : catch sigsegv and print a blame report"<<std::endl;
    std::cout<<GridLogMessage<<"  --debug-stdout  : print stdout from EVERY node"<<std::endl;    
    std::cout<<GridLogMessage<<"  --notimestamp   : suppress millisecond resolution stamps"<<std::endl;    
    std::cout<<GridLogMessage<<std::endl;
    std::cout<<GridLogMessage<<"Performance:"<<std::endl;
-    std::cout<<GridLogMessage<<std::endl;
-    std::cout<<GridLogMessage<<"  --comms-isend   : Asynchronous MPI calls; several dirs at a time "<<std::endl;    
-    std::cout<<GridLogMessage<<"  --comms-sendrecv: Synchronous MPI calls; one dirs at a time "<<std::endl;    
-    std::cout<<GridLogMessage<<"  --comms-overlap : Overlap comms with compute "<<std::endl;    
-    std::cout<<GridLogMessage<<std::endl;
    std::cout<<GridLogMessage<<"  --dslash-generic: Wilson kernel for generic Nc"<<std::endl;    
    std::cout<<GridLogMessage<<"  --dslash-unroll : Wilson kernel for Nc=3"<<std::endl;    
    std::cout<<GridLogMessage<<"  --dslash-asm    : Wilson kernel for AVX512"<<std::endl;    
-    std::cout<<GridLogMessage<<std::endl;
    std::cout<<GridLogMessage<<"  --lebesgue      : Cache oblivious Lebesgue curve/Morton order/Z-graph stencil looping"<<std::endl;    
    std::cout<<GridLogMessage<<"  --cacheblocking n.m.o.p : Hypercuboidal cache blocking"<<std::endl;    
    std::cout<<GridLogMessage<<std::endl;
    exit(EXIT_SUCCESS);
  }

+  ////////////////////////////////////
+  // Banner
+  ////////////////////////////////////
+
+  std::string COL_RED    = GridLogColours.colour["RED"];
+  std::string COL_PURPLE = GridLogColours.colour["PURPLE"];
+  std::string COL_BLACK  = GridLogColours.colour["BLACK"];
+  std::string COL_GREEN  = GridLogColours.colour["GREEN"];
+  std::string COL_BLUE   = GridLogColours.colour["BLUE"];
+  std::string COL_YELLOW = GridLogColours.colour["YELLOW"];
+  std::string COL_BACKGROUND = GridLogColours.colour["NORMAL"];
+  
+  std::cout <<std::endl;
+  std::cout <<COL_RED  << "__|__|__|__|__"<<             "|__|__|_"<<COL_PURPLE<<"_|__|__|"<<                "__|__|__|__|__"<<std::endl; 
+  std::cout <<COL_RED  << "__|__|__|__|__"<<             "|__|__|_"<<COL_PURPLE<<"_|__|__|"<<                "__|__|__|__|__"<<std::endl; 
+  std::cout <<COL_RED  << "__|_ |  |  |  "<<             "|  |  | "<<COL_PURPLE<<" |  |  |"<<                "  |  |  | _|__"<<std::endl; 
+  std::cout <<COL_RED  << "__|_          "<<             "        "<<COL_PURPLE<<"        "<<                "          _|__"<<std::endl; 
+  std::cout <<COL_RED  << "__|_  "<<COL_GREEN<<" GGGG   "<<COL_RED<<" RRRR   "<<COL_BLUE  <<" III    "<<COL_PURPLE<<"DDDD  "<<COL_PURPLE<<"    _|__"<<std::endl;
+  std::cout <<COL_RED  << "__|_  "<<COL_GREEN<<"G       "<<COL_RED<<" R   R  "<<COL_BLUE  <<"  I     "<<COL_PURPLE<<"D   D "<<COL_PURPLE<<"    _|__"<<std::endl;
+  std::cout <<COL_RED  << "__|_  "<<COL_GREEN<<"G       "<<COL_RED<<" R   R  "<<COL_BLUE  <<"  I     "<<COL_PURPLE<<"D    D"<<COL_PURPLE<<"    _|__"<<std::endl;
+  std::cout <<COL_BLUE << "__|_  "<<COL_GREEN<<"G  GG   "<<COL_RED<<" RRRR   "<<COL_BLUE  <<"  I     "<<COL_PURPLE<<"D    D"<<COL_GREEN <<"    _|__"<<std::endl;
+  std::cout <<COL_BLUE << "__|_  "<<COL_GREEN<<"G   G   "<<COL_RED<<" R  R   "<<COL_BLUE  <<"  I     "<<COL_PURPLE<<"D   D "<<COL_GREEN <<"    _|__"<<std::endl;
+  std::cout <<COL_BLUE << "__|_  "<<COL_GREEN<<" GGGG   "<<COL_RED<<" R   R  "<<COL_BLUE  <<" III    "<<COL_PURPLE<<"DDDD  "<<COL_GREEN <<"    _|__"<<std::endl;
+  std::cout <<COL_BLUE << "__|_          "<<             "        "<<COL_GREEN <<"        "<<                "          _|__"<<std::endl; 
+  std::cout <<COL_BLUE << "__|__|__|__|__"<<             "|__|__|_"<<COL_GREEN <<"_|__|__|"<<                "__|__|__|__|__"<<std::endl; 
+  std::cout <<COL_BLUE << "__|__|__|__|__"<<             "|__|__|_"<<COL_GREEN <<"_|__|__|"<<                "__|__|__|__|__"<<std::endl; 
+  std::cout <<COL_BLUE << "  |  |  |  |  "<<             "|  |  | "<<COL_GREEN <<" |  |  |"<<                "  |  |  |  |  "<<std::endl; 
+  std::cout << std::endl;
+  std::cout << std::endl;
+  std::cout <<COL_YELLOW<< std::endl;
+  std::cout << "Copyright (C) 2015 Peter Boyle, Azusa Yamaguchi, Guido Cossu, Antonin Portelli and other authors"<<std::endl;
+  std::cout << std::endl;
+  std::cout << "This program is free software; you can redistribute it and/or modify"<<std::endl;
+  std::cout << "it under the terms of the GNU General Public License as published by"<<std::endl;
+  std::cout << "the Free Software Foundation; either version 2 of the License, or"<<std::endl;
+  std::cout << "(at your option) any later version."<<std::endl;
+  std::cout << std::endl;
+  std::cout << "This program is distributed in the hope that it will be useful,"<<std::endl;
+  std::cout << "but WITHOUT ANY WARRANTY; without even the implied warranty of"<<std::endl;
+  std::cout << "MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the"<<std::endl;
+  std::cout << "GNU General Public License for more details."<<std::endl;
+  std::cout << COL_BACKGROUND <<std::endl;
+  std::cout << std::endl;
+
  ////////////////////////////////////
  // Debug and performance options
  ////////////////////////////////////

+
+  if( GridCmdOptionExists(*argv,*argv+*argc,"--debug-signals") ){
+    Grid_debug_handler_init();
+  }
  if( GridCmdOptionExists(*argv,*argv+*argc,"--dslash-unroll") ){
    QCD::WilsonKernelsStatic::Opt=QCD::WilsonKernelsStatic::OptHandUnroll;
-    QCD::StaggeredKernelsStatic::Opt=QCD::StaggeredKernelsStatic::OptHandUnroll;
  }
  if( GridCmdOptionExists(*argv,*argv+*argc,"--dslash-asm") ){
    QCD::WilsonKernelsStatic::Opt=QCD::WilsonKernelsStatic::OptInlineAsm;
-    QCD::StaggeredKernelsStatic::Opt=QCD::StaggeredKernelsStatic::OptInlineAsm;
  }
  if( GridCmdOptionExists(*argv,*argv+*argc,"--dslash-generic") ){
    QCD::WilsonKernelsStatic::Opt=QCD::WilsonKernelsStatic::OptGeneric;
-    QCD::StaggeredKernelsStatic::Opt=QCD::StaggeredKernelsStatic::OptGeneric;
-  }
-  if( GridCmdOptionExists(*argv,*argv+*argc,"--comms-overlap") ){
-    QCD::WilsonKernelsStatic::Comms = QCD::WilsonKernelsStatic::CommsAndCompute;
-  } else {
-    QCD::WilsonKernelsStatic::Comms = QCD::WilsonKernelsStatic::CommsThenCompute;
-  }
-  if( GridCmdOptionExists(*argv,*argv+*argc,"--comms-concurrent") ){
-    CartesianCommunicator::SetCommunicatorPolicy(CartesianCommunicator::CommunicatorPolicyConcurrent);
-  }
-  if( GridCmdOptionExists(*argv,*argv+*argc,"--comms-sequential") ){
-    CartesianCommunicator::SetCommunicatorPolicy(CartesianCommunicator::CommunicatorPolicySequential);
  }
  if( GridCmdOptionExists(*argv,*argv+*argc,"--lebesgue") ){
    LebesgueOrder::UseLebesgueOrder=1;
  }

+
  if( GridCmdOptionExists(*argv,*argv+*argc,"--cacheblocking") ){
    arg= GridCmdOptionPayload(*argv,*argv+*argc,"--cacheblocking");
    GridCmdOptionIntVector(arg,LebesgueOrder::Block);
@@ -393,25 +373,23 @@ void Grid_finalize(void)
  MPI_Finalize();
  Grid_unquiesce_nodes();
 #endif
-#if defined (GRID_COMMS_SHMEM)
-  shmem_finalize();
-#endif
 }

 void * Grid_backtrace_buffer[_NBACKTRACE];

 void Grid_sa_signal_handler(int sig,siginfo_t *si,void * ptr)
 {
-  fprintf(stderr,"Caught signal %d\n",si->si_signo);
-  fprintf(stderr,"  mem address %llx\n",(unsigned long long)si->si_addr);
-  fprintf(stderr,"         code %d\n",si->si_code);
+  printf("Caught signal %d\n",si->si_signo);
+  printf("  mem address %llx\n",(unsigned long long)si->si_addr);
+  printf("         code %d\n",si->si_code);
+
  // Linux/Posix
 #ifdef __linux__
  // And x86 64bit
 #ifdef __x86_64__
  ucontext_t * uc= (ucontext_t *)ptr;
  struct sigcontext *sc = (struct sigcontext *)&uc->uc_mcontext;
-  fprintf(stderr,"  instruction %llx\n",(unsigned long long)sc->rip);
+  printf("  instruction %llx\n",(unsigned long long)sc->rip);
 #define REG(A)  printf("  %s %lx\n",#A,sc-> A);
  REG(rdi);
  REG(rsi);
@@ -434,11 +412,7 @@ void Grid_sa_signal_handler(int sig,siginfo_t *si,void * ptr)
  REG(r15);
 #endif
 #endif
-  fflush(stderr);
-  BACKTRACEFP(stderr);
-  fprintf(stderr,"Called backtrace\n");
-  fflush(stdout);
-  fflush(stderr);
+  BACKTRACE();
  exit(0);
  return;
 };
@@ -451,11 +425,9 @@ void Grid_debug_handler_init(void)
  sa.sa_flags    = SA_SIGINFO;
  sigaction(SIGSEGV,&sa,NULL);
  sigaction(SIGTRAP,&sa,NULL);
-  sigaction(SIGBUS,&sa,NULL);

  feenableexcept( FE_INVALID|FE_OVERFLOW|FE_DIVBYZERO);

  sigaction(SIGFPE,&sa,NULL);
-  sigaction(SIGKILL,&sa,NULL);
 }
 }
@@ -29,10 +29,9 @@ See the full license in the file "LICENSE" in the top level distribution
 directory
 *************************************************************************************/
 /*  END LEGAL */
-#include <Grid/GridCore.h>
+#include <Grid.h>

 #include <cxxabi.h>
-#include <memory>

 namespace Grid {

@@ -101,7 +100,7 @@ void Grid_quiesce_nodes(void) {
  me = shmem_my_pe();
 #endif
  if (me) {
-    std::cout.setstate(std::ios::badbit);
+    std::cout.setstate(std::ios::badbit);// mute all nodes except 0
  }
 }

@@ -110,8 +110,8 @@ public:
  friend std::ostream& operator<< (std::ostream& stream, Logger& log){

    if ( log.active ) {
-      stream << log.background()<< std::setw(10) << std::left << log.topName << log.background()<< " : ";
-      stream << log.colour() << std::setw(14) << std::left << log.name << log.background() << " : ";
+      stream << log.background()<< log.topName << log.background()<< " : ";
+      stream << log.colour() <<std::setw(14) << std::left << log.name << log.background() << " : ";
      if ( log.timestamp ) {
 	StopWatch.Stop();
 	GridTime now = StopWatch.Elapsed();
@@ -1,5 +1,4 @@
 extra_sources=
-extra_headers=
 if BUILD_COMMS_MPI
  extra_sources+=communicator/Communicator_mpi.cc
  extra_sources+=communicator/Communicator_base.cc
@@ -25,12 +24,6 @@ if BUILD_COMMS_NONE
  extra_sources+=communicator/Communicator_base.cc
 endif

-if BUILD_HDF5
-  extra_sources+=serialisation/Hdf5IO.cc 
-  extra_headers+=serialisation/Hdf5IO.h
-  extra_headers+=serialisation/Hdf5Type.h
-endif
-
 #
 # Libraries
 #
@@ -39,9 +32,6 @@ include Eigen.inc

 lib_LIBRARIES = libGrid.a

-CCFILES += $(extra_sources)
-HFILES  += $(extra_headers)
-
-libGrid_a_SOURCES              = $(CCFILES)
+libGrid_a_SOURCES              = $(CCFILES) $(extra_sources)
 libGrid_adir                   = $(pkgincludedir)
 nobase_dist_pkginclude_HEADERS = $(HFILES) $(eigen_files) Config.h
@@ -0,0 +1,154 @@
+    /*************************************************************************************
+
+    Grid physics library, www.github.com/paboyle/Grid 
+
+    Source file: ./lib/Old/Tensor_peek.h
+
+    Copyright (C) 2015
+
+Author: Peter Boyle <paboyle@ph.ed.ac.uk>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License along
+    with this program; if not, write to the Free Software Foundation, Inc.,
+    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+    See the full license in the file "LICENSE" in the top level distribution directory
+    *************************************************************************************/
+    /*  END LEGAL */
+#ifndef GRID_MATH_PEEK_H
+#define GRID_MATH_PEEK_H
+namespace Grid {
+
+//////////////////////////////////////////////////////////////////////////////
+// Peek on a specific index; returns a scalar in that index, tensor inherits rest
+//////////////////////////////////////////////////////////////////////////////
+// If we hit the right index, return scalar with no further recursion
+
+//template<int Level> inline ComplexF peekIndex(const ComplexF arg) { return arg;}
+//template<int Level> inline ComplexD peekIndex(const ComplexD arg) { return arg;}
+//template<int Level> inline RealF peekIndex(const RealF arg) { return arg;}
+//template<int Level> inline RealD peekIndex(const RealD arg) { return arg;}
+#if 0
+// Scalar peek, no indices
+template<int Level,class vtype,typename std::enable_if< iScalar<vtype>::TensorLevel == Level >::type * =nullptr> inline 
+  auto peekIndex(const iScalar<vtype> &arg) ->  iScalar<vtype> 
+{
+  return arg;
+}
+// Vector peek, one index
+template<int Level,class vtype,int N,typename std::enable_if< iScalar<vtype>::TensorLevel == Level >::type * =nullptr> inline 
+  auto peekIndex(const iVector<vtype,N> &arg,int i) -> iScalar<vtype> // Index matches
+{
+  iScalar<vtype> ret;                              // return scalar
+  ret._internal = arg._internal[i];
+  return ret;
+}
+// Matrix peek, two indices
+template<int Level,class vtype,int N,typename std::enable_if< iScalar<vtype>::TensorLevel == Level >::type * =nullptr> inline 
+  auto peekIndex(const iMatrix<vtype,N> &arg,int i,int j) ->  iScalar<vtype>
+{
+  iScalar<vtype> ret;                              // return scalar
+  ret._internal = arg._internal[i][j];
+  return ret;
+}
+
+/////////////
+// No match peek for scalar,vector,matrix must forward on either 0,1,2 args. Must have 9 routines with notvalue
+/////////////
+// scalar
+template<int Level,class vtype,typename std::enable_if< iScalar<vtype>::TensorLevel != Level >::type * =nullptr> inline 
+  auto peekIndex(const iScalar<vtype> &arg) -> iScalar<decltype(peekIndex<Level>(arg._internal))>
+{
+  iScalar<decltype(peekIndex<Level>(arg._internal))> ret;
+  ret._internal= peekIndex<Level>(arg._internal);
+  return ret;
+}
+template<int Level,class vtype, typename std::enable_if< iScalar<vtype>::TensorLevel != Level >::type * =nullptr> inline 
+  auto peekIndex(const iScalar<vtype> &arg,int i) ->  iScalar<decltype(peekIndex<Level>(arg._internal,i))> 
+{
+  iScalar<decltype(peekIndex<Level>(arg._internal,i))> ret;
+  ret._internal=peekIndex<Level>(arg._internal,i);
+  return ret;
+}
+template<int Level,class vtype, typename std::enable_if< iScalar<vtype>::TensorLevel != Level >::type * =nullptr> inline 
+  auto peekIndex(const iScalar<vtype> &arg,int i,int j) ->  iScalar<decltype(peekIndex<Level>(arg._internal,i,j))>
+{
+  iScalar<decltype(peekIndex<Level>(arg._internal,i,j))> ret;
+  ret._internal=peekIndex<Level>(arg._internal,i,j);
+  return ret;
+}
+// vector
+template<int Level,class vtype,int N, typename std::enable_if< iScalar<vtype>::TensorLevel != Level >::type * =nullptr> inline 
+auto peekIndex(const iVector<vtype,N> &arg) ->   iVector<decltype(peekIndex<Level>(arg._internal[0])),N>
+{
+  iVector<decltype(peekIndex<Level>(arg._internal[0])),N> ret;
+  for(int ii=0;ii<N;ii++){
+    ret._internal[ii]=peekIndex<Level>(arg._internal[ii]);
+  }
+  return ret;
+}
+template<int Level,class vtype,int N, typename std::enable_if< iScalar<vtype>::TensorLevel != Level >::type * =nullptr> inline 
+  auto peekIndex(const iVector<vtype,N> &arg,int i) ->  iVector<decltype(peekIndex<Level>(arg._internal[0],i)),N>
+{
+  iVector<decltype(peekIndex<Level>(arg._internal[0],i)),N> ret;
+  for(int ii=0;ii<N;ii++){
+    ret._internal[ii]=peekIndex<Level>(arg._internal[ii],i);
+  }
+  return ret;
+}
+template<int Level,class vtype,int N, typename std::enable_if< iScalar<vtype>::TensorLevel != Level >::type * =nullptr> inline 
+  auto peekIndex(const iVector<vtype,N> &arg,int i,int j) ->  iVector<decltype(peekIndex<Level>(arg._internal[0],i,j)),N> 
+{
+  iVector<decltype(peekIndex<Level>(arg._internal[0],i,j)),N> ret;
+  for(int ii=0;ii<N;ii++){
+    ret._internal[ii]=peekIndex<Level>(arg._internal[ii],i,j);
+  }
+  return ret;
+}
+
+// matrix
+template<int Level,class vtype,int N, typename std::enable_if< iScalar<vtype>::TensorLevel != Level >::type * =nullptr> inline 
+auto peekIndex(const iMatrix<vtype,N> &arg) ->   iMatrix<decltype(peekIndex<Level>(arg._internal[0][0])),N> 
+{
+  iMatrix<decltype(peekIndex<Level>(arg._internal[0][0])),N> ret;
+  for(int ii=0;ii<N;ii++){
+  for(int jj=0;jj<N;jj++){
+    ret._internal[ii][jj]=peekIndex<Level>(arg._internal[ii][jj]);// Could avoid this because peeking a scalar is dumb
+  }}
+  return ret;
+}
+template<int Level,class vtype,int N, typename std::enable_if< iScalar<vtype>::TensorLevel != Level >::type * =nullptr> inline 
+  auto peekIndex(const iMatrix<vtype,N> &arg,int i) ->   iMatrix<decltype(peekIndex<Level>(arg._internal[0][0],i)),N>
+{
+  iMatrix<decltype(peekIndex<Level>(arg._internal[0][0],i)),N> ret;
+  for(int ii=0;ii<N;ii++){
+  for(int jj=0;jj<N;jj++){
+    ret._internal[ii][jj]=peekIndex<Level>(arg._internal[ii][jj],i);
+  }}
+  return ret;
+}
+template<int Level,class vtype,int N, typename std::enable_if< iScalar<vtype>::TensorLevel != Level >::type * =nullptr> inline 
+  auto peekIndex(const iMatrix<vtype,N> &arg,int i,int j) ->   iMatrix<decltype(peekIndex<Level>(arg._internal[0][0],i,j)),N>
+{
+  iMatrix<decltype(peekIndex<Level>(arg._internal[0][0],i,j)),N> ret;
+  for(int ii=0;ii<N;ii++){
+  for(int jj=0;jj<N;jj++){
+    ret._internal[ii][jj]=peekIndex<Level>(arg._internal[ii][jj],i,j);
+  }}
+  return ret;
+}
+#endif
+
+
+}
+#endif
@@ -0,0 +1,127 @@
+    /*************************************************************************************
+
+    Grid physics library, www.github.com/paboyle/Grid 
+
+    Source file: ./lib/Old/Tensor_poke.h
+
+    Copyright (C) 2015
+
+Author: Peter Boyle <paboyle@ph.ed.ac.uk>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License along
+    with this program; if not, write to the Free Software Foundation, Inc.,
+    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+    See the full license in the file "LICENSE" in the top level distribution directory
+    *************************************************************************************/
+    /*  END LEGAL */
+#ifndef GRID_MATH_POKE_H
+#define GRID_MATH_POKE_H
+namespace Grid {
+
+//////////////////////////////////////////////////////////////////////////////
+// Poke a specific index; 
+//////////////////////////////////////////////////////////////////////////////
+#if 0
+// Scalar poke
+template<int Level,class vtype,typename std::enable_if< iScalar<vtype>::TensorLevel == Level >::type * =nullptr> inline 
+  void pokeIndex(iScalar<vtype> &ret, const iScalar<vtype> &arg)
+{
+  ret._internal = arg._internal;
+}
+// Vector poke, one index
+template<int Level,class vtype,int N,typename std::enable_if< iScalar<vtype>::TensorLevel == Level >::type * =nullptr> inline 
+  void pokeIndex(iVector<vtype,N> &ret, const iScalar<vtype> &arg,int i)
+{
+  ret._internal[i] = arg._internal;
+}
+//Matrix poke, two indices
+template<int Level,class vtype,int N,typename std::enable_if< iScalar<vtype>::TensorLevel == Level >::type * =nullptr> inline 
+  void pokeIndex(iMatrix<vtype,N> &ret, const iScalar<vtype> &arg,int i,int j)
+{
+  ret._internal[i][j] = arg._internal;
+}
+
+/////////////
+// No match poke for scalar,vector,matrix must forward on either 0,1,2 args. Must have 9 routines with notvalue
+/////////////
+// scalar
+template<int Level,class vtype,typename std::enable_if< iScalar<vtype>::TensorLevel != Level >::type * =nullptr> inline 
+void pokeIndex(iScalar<vtype> &ret, const iScalar<decltype(peekIndex<Level>(ret._internal))>  &arg)
+{
+  pokeIndex<Level>(ret._internal,arg._internal);
+}
+template<int Level,class vtype,typename std::enable_if< iScalar<vtype>::TensorLevel != Level >::type * =nullptr> inline 
+  void pokeIndex(iScalar<vtype> &ret, const iScalar<decltype(peekIndex<Level>(ret._internal,0))> &arg, int i)
+		 
+{
+  pokeIndex<Level>(ret._internal,arg._internal,i);
+}
+template<int Level,class vtype,typename std::enable_if< iScalar<vtype>::TensorLevel != Level >::type * =nullptr> inline 
+  void pokeIndex(iScalar<vtype> &ret, const iScalar<decltype(peekIndex<Level>(ret._internal,0,0))> &arg,int i,int j)
+{
+  pokeIndex<Level>(ret._internal,arg._internal,i,j);
+}
+
+// Vector
+template<int Level,class vtype,int N,typename std::enable_if< iScalar<vtype>::TensorLevel != Level >::type * =nullptr> inline 
+  void pokeIndex(iVector<vtype,N> &ret, iVector<decltype(peekIndex<Level>(ret._internal)),N>  &arg)
+{
+  for(int ii=0;ii<N;ii++){
+    pokeIndex<Level>(ret._internal[ii],arg._internal[ii]);
+  }
+}
+template<int Level,class vtype,int N,typename std::enable_if< iScalar<vtype>::TensorLevel != Level >::type * =nullptr> inline 
+  void pokeIndex(iVector<vtype,N> &ret, const iVector<decltype(peekIndex<Level>(ret._internal,0)),N> &arg,int i)
+{
+  for(int ii=0;ii<N;ii++){
+    pokeIndex<Level>(ret._internal[ii],arg._internal[ii],i);
+  }
+}
+template<int Level,class vtype,int N,typename std::enable_if< iScalar<vtype>::TensorLevel != Level >::type * =nullptr> inline 
+  void pokeIndex(iVector<vtype,N> &ret, const iVector<decltype(peekIndex<Level>(ret._internal,0,0)),N> &arg,int i,int j)
+{
+  for(int ii=0;ii<N;ii++){
+    pokeIndex<Level>(ret._internal[ii],arg._internal[ii],i,j);
+  }
+}
+
+// Matrix
+template<int Level,class vtype,int N,typename std::enable_if< iScalar<vtype>::TensorLevel != Level >::type * =nullptr> inline 
+  void pokeIndex(iMatrix<vtype,N> &ret, const iMatrix<decltype(peekIndex<Level>(ret._internal)),N> &arg)		 
+{
+  for(int ii=0;ii<N;ii++){
+  for(int jj=0;jj<N;jj++){
+    pokeIndex<Level>(ret._internal[ii][jj],arg._internal[ii][jj]);
+  }}
+}
+template<int Level,class vtype,int N,typename std::enable_if< iScalar<vtype>::TensorLevel != Level >::type * =nullptr> inline 
+  void pokeIndex(iMatrix<vtype,N> &ret, const iMatrix<decltype(peekIndex<Level>(ret._internal,0)),N> &arg,int i)
+{
+  for(int ii=0;ii<N;ii++){
+  for(int jj=0;jj<N;jj++){
+    pokeIndex<Level>(ret._internal[ii][jj],arg._internal[ii][jj],i);
+  }}
+}
+template<int Level,class vtype,int N,typename std::enable_if< iScalar<vtype>::TensorLevel != Level >::type * =nullptr> inline 
+  void pokeIndex(iMatrix<vtype,N> &ret, const iMatrix<decltype(peekIndex<Level>(ret._internal,0,0)),N> &arg, int i,int j)
+{
+  for(int ii=0;ii<N;ii++){
+  for(int jj=0;jj<N;jj++){
+    pokeIndex<Level>(ret._internal[ii][jj],arg._internal[ii][jj],i,j);
+  }}
+}
+#endif
+
+}
+#endif
@@ -26,8 +26,8 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
    *************************************************************************************/
    /*  END LEGAL */

-#include <Grid/GridCore.h>
-#include <Grid/perfmon/PerfCount.h>
+#include <Grid.h>
+#include <PerfCount.h>

 namespace Grid {

@@ -172,7 +172,7 @@ public:
    const char * name = PerformanceCounterConfigs[PCT].name;
    fd = perf_event_open(&pe, 0, -1, -1, 0); // pid 0, cpu -1 current process any cpu. group -1
    if (fd == -1) {
-      fprintf(stderr, "Error opening leader %llx for event %s\n",(long long) pe.config,name);
+      fprintf(stderr, "Error opening leader %llx for event %s\n", pe.config,name);
      perror("Error is");
    }
    int norm = PerformanceCounterConfigs[PCT].normalisation;
@@ -181,7 +181,7 @@ public:
    name = PerformanceCounterConfigs[norm].name;
    cyclefd = perf_event_open(&pe, 0, -1, -1, 0); // pid 0, cpu -1 current process any cpu. group -1
    if (cyclefd == -1) {
-      fprintf(stderr, "Error opening leader %llx for event %s\n",(long long) pe.config,name);
+      fprintf(stderr, "Error opening leader %llx for event %s\n", pe.config,name);
      perror("Error is");
    }
 #endif
@@ -205,13 +205,12 @@ public:
  void Stop(void) {
    count=0;
    cycles=0;
-    size_t ign;
 #ifdef __linux__
    if ( fd!= -1) {
      ::ioctl(fd, PERF_EVENT_IOC_DISABLE, 0);
      ::ioctl(cyclefd, PERF_EVENT_IOC_DISABLE, 0);
-      ign=::read(fd, &count, sizeof(long long));
-      ign=::read(cyclefd, &cycles, sizeof(long long));
+      ::read(fd, &count, sizeof(long long));
+      ::read(cyclefd, &cycles, sizeof(long long));
    }
    elapsed = cyclecount() - begin;
 #else
@@ -172,8 +172,8 @@ namespace Grid {

 };

-#include <Grid/simd/Grid_vector_types.h>
-#include <Grid/simd/Grid_vector_unops.h>
+#include "simd/Grid_vector_types.h"
+#include "simd/Grid_vector_unops.h"

 namespace Grid {
  // Default precision
@@ -1,9 +1,11 @@
-#include <Grid/GridCore.h>
-#include <Grid/perfmon/PerfCount.h>
-#include <Grid/perfmon/Stat.h>
+#include <Grid.h>
+#include <PerfCount.h>
+#include <Stat.h>
+

 namespace Grid { 

+
 bool PmuStat::pmu_initialized=false;


@@ -1,4 +1,4 @@
-/*************************************************************************************
+   /*************************************************************************************

     Grid physics library, www.github.com/paboyle/Grid 

@@ -25,11 +25,13 @@
     See the full license in the file "LICENSE" in the top level distribution directory
     *************************************************************************************/
     /*  END LEGAL */
-#ifndef GRID_STENCIL_H
-#define GRID_STENCIL_H
+ #ifndef GRID_STENCIL_H
+ #define GRID_STENCIL_H
+
+ #include <thread>
+
+ #include <Grid/stencil/Lebesgue.h>   // subdir aggregate

-#include <Grid/stencil/Lebesgue.h>   // subdir aggregate
-#define NEW_XYZT_GATHER
 //////////////////////////////////////////////////////////////////////////////////////////
 // Must not lose sight that goal is to be able to construct really efficient
 // gather to a point stencil code. CSHIFT is not the best way, so need
@@ -68,49 +70,51 @@

 namespace Grid {

-///////////////////////////////////////////////////////////////////
-// Gather for when there *is* need to SIMD split with compression
-///////////////////////////////////////////////////////////////////
-void Gather_plane_table_compute (GridBase *grid,int dimension,int plane,int cbmask,
-					int off,std::vector<std::pair<int,int> > & table);
-
-template<class vobj,class cobj,class compressor> 
-void Gather_plane_simple_table (std::vector<std::pair<int,int> >& table,const Lattice<vobj> &rhs,cobj *buffer,compressor &compress, int off,int so)   __attribute__((noinline));
-
-template<class vobj,class cobj,class compressor> 
-void Gather_plane_simple_table (std::vector<std::pair<int,int> >& table,const Lattice<vobj> &rhs,cobj *buffer,compressor &compress, int off,int so)
+inline void Gather_plane_simple_table_compute (GridBase *grid,int dimension,int plane,int cbmask,
+					       int off,std::vector<std::pair<int,int> > & table)
 {
-  int num=table.size();
-  parallel_for(int i=0;i<num;i++){
-    vstream(buffer[off+table[i].first],compress(rhs._odata[so+table[i].second]));
+  table.resize(0);
+  int rd = grid->_rdimensions[dimension];
+
+  if ( !grid->CheckerBoarded(dimension) ) {
+    cbmask = 0x3;
+  }
+  int so= plane*grid->_ostride[dimension]; // base offset for start of plane 
+  int e1=grid->_slice_nblock[dimension];
+  int e2=grid->_slice_block[dimension];
+
+  int stride=grid->_slice_stride[dimension];
+  if ( cbmask == 0x3 ) { 
+    table.resize(e1*e2);
+    for(int n=0;n<e1;n++){
+      for(int b=0;b<e2;b++){
+	int o  = n*stride;
+	int bo = n*e2;
+	table[bo+b]=std::pair<int,int>(bo+b,o+b);
+      }
+    }
+  } else { 
+     int bo=0;
+     table.resize(e1*e2/2);
+     for(int n=0;n<e1;n++){
+       for(int b=0;b<e2;b++){
+	 int o  = n*stride;
+	 int ocb=1<<grid->CheckerBoardFromOindexTable(o+b);
+	 if ( ocb &cbmask ) {
+	   table[bo]=std::pair<int,int>(bo,o+b); bo++;
+	 }
+       }
+     }
  }
 }

-///////////////////////////////////////////////////////////////////
-// Gather for when there *is* need to SIMD split with compression
-///////////////////////////////////////////////////////////////////
-template<class cobj,class vobj,class compressor>
-void Gather_plane_exchange_table(const Lattice<vobj> &rhs,
-				 std::vector<cobj *> pointers,int dimension,int plane,int cbmask,compressor &compress,int type) __attribute__((noinline));
-
-template<class cobj,class vobj,class compressor>
-void Gather_plane_exchange_table(std::vector<std::pair<int,int> >& table,const Lattice<vobj> &rhs,
-				 std::vector<cobj *> pointers,int dimension,int plane,int cbmask,
-				 compressor &compress,int type)
+template<class vobj,class cobj,class compressor> void 
+Gather_plane_simple_table (std::vector<std::pair<int,int> >& table,const Lattice<vobj> &rhs,cobj *buffer,compressor &compress, int off,int so)
 {
-  assert( (table.size()&0x1)==0);
-  int num=table.size()/2;
-  int so  = plane*rhs._grid->_ostride[dimension]; // base offset for start of plane 
-  parallel_for(int j=0;j<num;j++){
-    //    buffer[off+table[i].first]=compress(rhs._odata[so+table[i].second]);
-    cobj temp1 =compress(rhs._odata[so+table[2*j].second]);
-    cobj temp2 =compress(rhs._odata[so+table[2*j+1].second]);
-    cobj temp3;
-    cobj temp4;
-    exchange(temp3,temp4,temp1,temp2,type);
-    vstream(pointers[0][j],temp3);
-    vstream(pointers[1][j],temp4);
-  }
+PARALLEL_FOR_LOOP     
+     for(int i=0;i<table.size();i++){
+       buffer[off+table[i].first]=compress(rhs._odata[so+table[i].second]);
+     }
 }

 struct StencilEntry { 
@@ -121,8 +125,6 @@ struct StencilEntry {
  uint32_t _around_the_world; //256 bits, 32 bytes, 1/2 cacheline
 };

-//extern int dump;
-
 template<class vobj,class cobj>
 class CartesianStencil { // Stencil runs along coordinate axes only; NO diagonal fill in.
 public:
@@ -157,6 +159,7 @@ class CartesianStencil { // Stencil runs along coordinate axes only; NO diagonal
    p.to_rank  = to;
    p.from_rank= from;
    p.bytes    = bytes;
+    comms_bytes+=2.0*bytes;
    Packets.push_back(p);
  }

@@ -165,45 +168,36 @@ class CartesianStencil { // Stencil runs along coordinate axes only; NO diagonal
    reqs.resize(Packets.size());
    commtime-=usecond();
    for(int i=0;i<Packets.size();i++){
-      comms_bytes+=_grid->StencilSendToRecvFromBegin(reqs[i],
+	_grid->StencilSendToRecvFromBegin(reqs[i],
 					  Packets[i].send_buf,
 					  Packets[i].to_rank,
 					  Packets[i].recv_buf,
 					  Packets[i].from_rank,
 					  Packets[i].bytes);
+	/*
+      }else{
+	_grid->SendToRecvFromBegin(reqs[i],
+				   Packets[i].send_buf,
+				   Packets[i].to_rank,
+				   Packets[i].recv_buf,
+				   Packets[i].from_rank,
+				   Packets[i].bytes);
+      }
+	*/
    }
    commtime+=usecond();
  }
  void CommunicateComplete(std::vector<std::vector<CommsRequest_t> > &reqs)
  {
    commtime-=usecond();
+
    for(int i=0;i<Packets.size();i++){
-      _grid->StencilSendToRecvFromComplete(reqs[i]);
+      //      if( ShmDirectCopy ) 
+	_grid->StencilSendToRecvFromComplete(reqs[i]);
+	//      else 
+	//	_grid->SendToRecvFromComplete(reqs[i]);
    }
-    _grid->StencilBarrier();// Synch shared memory on a single nodes
    commtime+=usecond();
-    /*
-    int dump=1;
-    if(dump){
-      for(int i=0;i<Packets.size();i++){
-	cobj * ptr  = (cobj *) Packets[i].recv_buf;
-	uint64_t num=Packets[i].bytes/sizeof(cobj);
-	  std::cout << " CommunicateComplete " << i<< " / " << Packets.size()<< " num " << num <<std::endl;
-	  std::stringstream ss;
-	  ss<<"recvbuf";
-	  for(int d=0;d<_grid->_ndimension;d++){
-	    ss<<"."<<_grid->_processor_coor[d];
-	  }
-	  ss<<"_mu_"<<i;
-	  std::string fname(ss.str());
-	  std::ofstream fout(fname);
-	  for(int k=0;k<num;k++) {
-	    fout << i<<" "<<k<<" "<<ptr[k]<<std::endl;
-	  }
-      }
-    }
-    dump =0;
-*/
  }

  ///////////////////////////////////////////
@@ -212,18 +206,14 @@ class CartesianStencil { // Stencil runs along coordinate axes only; NO diagonal
  struct Merge {
    cobj * mpointer;
    std::vector<scalar_object *> rpointers;
-    std::vector<cobj *> vpointers;
    Integer buffer_size;
    Integer packet_id;
-    Integer exchange;
-    Integer type;
  };
  
  std::vector<Merge> Mergers;

  void AddMerge(cobj *merge_p,std::vector<scalar_object *> &rpointers,Integer buffer_size,Integer packet_id) {
    Merge m;
-    m.exchange = 0;
    m.mpointer = merge_p;
    m.rpointers= rpointers;
    m.buffer_size = buffer_size;
@@ -231,48 +221,17 @@ class CartesianStencil { // Stencil runs along coordinate axes only; NO diagonal
    Mergers.push_back(m);
  }

-  void AddMergeNew(cobj *merge_p,std::vector<cobj *> &rpointers,Integer buffer_size,Integer packet_id,Integer type) {
-    Merge m;
-    m.exchange = 1;
-    m.type     = type;
-    m.mpointer = merge_p;
-    m.vpointers= rpointers;
-    m.buffer_size = buffer_size;
-    m.packet_id   = packet_id;
-    Mergers.push_back(m);
-  }
-
  void CommsMerge(void ) { 

    for(int i=0;i<Mergers.size();i++){	
+      
      mergetime-=usecond();
-
-      //      std::cout << "Merge " <<i << std::endl;
-      //      std::stringstream ss;
-      //      ss<<"mergebuf";
-      //      for(int d=0;d<_grid->_ndimension;d++){
-      //	ss<<"."<<_grid->_processor_coor[d];
-      //      }
-      //      ss<<"_m_"<<i;
-      //      std::string fname(ss.str());
-      //      std::ofstream fout(fname);
-
-      if ( Mergers[i].exchange == 0 ) { 
-	parallel_for(int o=0;o<Mergers[i].buffer_size;o++){
-	  merge1(Mergers[i].mpointer[o],Mergers[i].rpointers,o);
-	  //	fout<<o<<" "<<Mergers[i].mpointer[o]<<std::endl;
-	}
-      } else { 
-	parallel_for(int o=0;o<Mergers[i].buffer_size/2;o++){
-	  exchange(Mergers[i].mpointer[2*o],Mergers[i].mpointer[2*o+1],
-		   Mergers[i].vpointers[0][o],Mergers[i].vpointers[1][o],Mergers[i].type);
-	  //	  cobj temp1,temp2;
-	  //	  exchange(temp1,temp2,Mergers[i].vpointers[0][o],Mergers[i].vpointers[1][o],Mergers[i].type);
-	  //	  vstream(Mergers[i].mpointer[2*o],temp1);
-	  //	  vstream(Mergers[i].mpointer[2*o+1],temp2);
-	}
+PARALLEL_FOR_LOOP
+      for(int o=0;o<Mergers[i].buffer_size;o++){
+	merge1(Mergers[i].mpointer[o],Mergers[i].rpointers,o);
      }
      mergetime+=usecond();
+
    }
  }

@@ -336,8 +295,6 @@ class CartesianStencil { // Stencil runs along coordinate axes only; NO diagonal
  // depending on comms target
  cobj* u_recv_buf_p;
  cobj* u_send_buf_p;
-  std::vector<cobj *> new_simd_send_buf;
-  std::vector<cobj *> new_simd_recv_buf;
  std::vector<scalar_object *> u_simd_send_buf;
  std::vector<scalar_object *> u_simd_recv_buf;

@@ -349,8 +306,8 @@ class CartesianStencil { // Stencil runs along coordinate axes only; NO diagonal
  /////////////////////////////////////////
  // Timing info; ugly; possibly temporary
  /////////////////////////////////////////
-#define TIMING_HACK
-#ifdef TIMING_HACK
+ #define TIMING_HACK
+ #ifdef TIMING_HACK
  double jointime;
  double gathertime;
  double commtime;
@@ -384,11 +341,6 @@ class CartesianStencil { // Stencil runs along coordinate axes only; NO diagonal
  void Report(void) {
 #define PRINTIT(A)	\
 std::cout << GridLogMessage << " Stencil " << #A << " "<< A/calls<<std::endl;
-
-    RealD NP = _grid->_Nprocessors;
-    RealD NN = _grid->NodeCount();
-
-    _grid->GlobalSum(commtime);    commtime/=NP;
    if ( calls > 0. ) {
      std::cout << GridLogMessage << " Stencil calls "<<calls<<std::endl;
      PRINTIT(halogtime);
@@ -398,8 +350,7 @@ class CartesianStencil { // Stencil runs along coordinate axes only; NO diagonal
      if(comms_bytes>1.0){
 	PRINTIT(comms_bytes);
 	PRINTIT(commtime);
-	std::cout << GridLogMessage << " Stencil " << comms_bytes/commtime/1000. << " GB/s per rank"<<std::endl;
-	std::cout << GridLogMessage << " Stencil " << comms_bytes/commtime/1000.*NP/NN << " GB/s per node"<<std::endl;
+	std::cout << GridLogMessage << " Stencil " << comms_bytes/commtime/1000. << " GB/s "<<std::endl;
      }
      PRINTIT(jointime);
      PRINTIT(spintime);
@@ -443,9 +394,7 @@ class CartesianStencil { // Stencil runs along coordinate axes only; NO diagonal
      
      _checkerboard = checkerboard;
      
-      //////////////////////////
      // the permute type
-      //////////////////////////
      int simd_layout     = _grid->_simd_layout[dimension];
      int comm_dim        = _grid->_processors[dimension] >1 ;
      int splice_dim      = _grid->_simd_layout[dimension]>1 && (comm_dim);
@@ -455,11 +404,9 @@ class CartesianStencil { // Stencil runs along coordinate axes only; NO diagonal
      
      int sshift[2];
      
-      //////////////////////////
      // Underlying approach. For each local site build
      // up a table containing the npoint "neighbours" and whether they 
      // live in lattice or a comms buffer.
-      //////////////////////////
      if ( !comm_dim ) {
 	sshift[0] = _grid->CheckerBoardShiftForCB(_checkerboard,dimension,shift,Even);
 	sshift[1] = _grid->CheckerBoardShiftForCB(_checkerboard,dimension,shift,Odd);
@@ -470,11 +417,11 @@ class CartesianStencil { // Stencil runs along coordinate axes only; NO diagonal
 	  Local(point,dimension,shift,0x1);// if checkerboard is unfavourable take two passes
 	  Local(point,dimension,shift,0x2);// both with block stride loop iteration
 	}
-      } else { 
-	// All permute extract done in comms phase prior to Stencil application
+      } else { // All permute extract done in comms phase prior to Stencil application
 	//        So tables are the same whether comm_dim or splice_dim
 	sshift[0] = _grid->CheckerBoardShiftForCB(_checkerboard,dimension,shift,Even);
 	sshift[1] = _grid->CheckerBoardShiftForCB(_checkerboard,dimension,shift,Odd);
+	
 	if ( sshift[0] == sshift[1] ) {
 	  Comms(point,dimension,shift,0x3);
 	} else {
@@ -493,21 +440,13 @@ class CartesianStencil { // Stencil runs along coordinate axes only; NO diagonal

    u_simd_send_buf.resize(Nsimd);
    u_simd_recv_buf.resize(Nsimd);
-    new_simd_send_buf.resize(Nsimd);
-    new_simd_recv_buf.resize(Nsimd);
+
    u_send_buf_p=(cobj *)_grid->ShmBufferMalloc(_unified_buffer_size*sizeof(cobj));
    u_recv_buf_p=(cobj *)_grid->ShmBufferMalloc(_unified_buffer_size*sizeof(cobj));
-#ifdef NEW_XYZT_GATHER
-    for(int l=0;l<2;l++){
-      new_simd_recv_buf[l] = (cobj *)_grid->ShmBufferMalloc(_unified_buffer_size*sizeof(cobj));
-      new_simd_send_buf[l] = (cobj *)_grid->ShmBufferMalloc(_unified_buffer_size*sizeof(cobj));
-    }
-#else
    for(int l=0;l<Nsimd;l++){
      u_simd_recv_buf[l] = (scalar_object *)_grid->ShmBufferMalloc(_unified_buffer_size*sizeof(scalar_object));
      u_simd_send_buf[l] = (scalar_object *)_grid->ShmBufferMalloc(_unified_buffer_size*sizeof(scalar_object));
    }
-#endif

    PrecomputeByteOffsets();
  }
@@ -574,11 +513,9 @@ class CartesianStencil { // Stencil runs along coordinate axes only; NO diagonal
    assert(shift>=0);
    assert(shift<fd);
    
-    // done in reduced dims, so SIMD factored
-    int buffer_size = _grid->_slice_nblock[dimension]*_grid->_slice_block[dimension]; 
-
+    int buffer_size = _grid->_slice_nblock[dimension]*_grid->_slice_block[dimension]; // done in reduced dims, so SIMD factored
+    
    _comm_buf_size[point] = buffer_size; // Size of _one_ plane. Multiple planes may be gathered and
-
    // send to one or more remote nodes.
    
    int cb= (cbmask==0x2)? Odd : Even;
@@ -741,10 +678,13 @@ class CartesianStencil { // Stencil runs along coordinate axes only; NO diagonal
    calls++;
    Mergers.resize(0);
    Packets.resize(0);
+    _grid->StencilBarrier();
    HaloGather(source,compress);
    this->CommunicateBegin(reqs);
+    _grid->StencilBarrier();
    this->CommunicateComplete(reqs);
-    CommsMerge(); 
+    _grid->StencilBarrier();
+    CommsMerge(); // spins
  }
  
  template<class compressor> void HaloGatherDir(const Lattice<vobj> &source,compressor &compress,int point,int & face_idx)
@@ -775,13 +715,7 @@ class CartesianStencil { // Stencil runs along coordinate axes only; NO diagonal
      if ( sshift[0] == sshift[1] ) {
 	if (splice_dim) {
 	  splicetime-=usecond();
-	  //	  GatherSimd(source,dimension,shift,0x3,compress,face_idx);
-	  //	  std::cout << "GatherSimdNew"<<std::endl;
-#ifdef NEW_XYZT_GATHER
-	  GatherSimdNew(source,dimension,shift,0x3,compress,face_idx);
-#else 
 	  GatherSimd(source,dimension,shift,0x3,compress,face_idx);
-#endif
 	  splicetime+=usecond();
 	} else { 
 	  nosplicetime-=usecond();
@@ -791,14 +725,8 @@ class CartesianStencil { // Stencil runs along coordinate axes only; NO diagonal
      } else {
 	if(splice_dim){
 	  splicetime-=usecond();
-	  //	  std::cout << "GatherSimdNew2calls"<<std::endl;
-#ifdef NEW_XYZT_GATHER
-	  GatherSimdNew(source,dimension,shift,0x1,compress,face_idx);// if checkerboard is unfavourable take two passes
-	  GatherSimdNew(source,dimension,shift,0x2,compress,face_idx);// both with block stride loop iteration
-#else 
 	  GatherSimd(source,dimension,shift,0x1,compress,face_idx);// if checkerboard is unfavourable take two passes
 	  GatherSimd(source,dimension,shift,0x2,compress,face_idx);// both with block stride loop iteration
-#endif
 	  splicetime+=usecond();
 	} else {
 	  nosplicetime-=usecond();
@@ -813,8 +741,6 @@ class CartesianStencil { // Stencil runs along coordinate axes only; NO diagonal
  template<class compressor>
  void HaloGather(const Lattice<vobj> &source,compressor &compress)
  {
-    _grid->StencilBarrier();// Synch shared memory on a single nodes
-
    // conformable(source._grid,_grid);
    assert(source._grid==_grid);
    halogtime-=usecond();
@@ -875,13 +801,13 @@ class CartesianStencil { // Stencil runs along coordinate axes only; NO diagonal
 	if ( !face_table_computed ) {
 	  t_table-=usecond();
 	  face_table.resize(face_idx+1);
-	  Gather_plane_table_compute ((GridBase *)_grid,dimension,sx,cbmask,u_comm_offset,face_table[face_idx]);
-	  //	  std::cout << " face table size "<<face_idx <<" " <<  face_table[face_idx].size() <<" computed buffer size "<< words <<
-	  //		    " bytes = " << bytes <<std::endl;
+	  Gather_plane_simple_table_compute ((GridBase *)_grid,dimension,sx,cbmask,u_comm_offset,
+					     face_table[face_idx]);
 	  t_table+=usecond();
 	}
 	
-      	int rank           = _grid->_processor;
+	
+	int rank           = _grid->_processor;
 	int recv_from_rank;
 	int xmit_to_rank;
 	_grid->ShiftedRanks(dimension,comm_proc,xmit_to_rank,recv_from_rank);
@@ -892,14 +818,17 @@ class CartesianStencil { // Stencil runs along coordinate axes only; NO diagonal
 	/////////////////////////////////////////////////////////
 	// try the direct copy if possible
 	/////////////////////////////////////////////////////////
+
+
 	cobj *send_buf = (cobj *)_grid->ShmBufferTranslate(xmit_to_rank,u_recv_buf_p);
 	if ( send_buf==NULL ) { 
 	  send_buf = u_send_buf_p;
-	} 
-	
+	}
+	//	std::cout << " send_bufs  "<<std::hex<< send_buf <<" ubp "<<u_send_buf_p <<std::dec<<std::endl;
 	t_data-=usecond();
+	assert(u_send_buf_p!=NULL);
 	assert(send_buf!=NULL);
-	Gather_plane_simple_table(face_table[face_idx],rhs,send_buf,compress,u_comm_offset,so);  face_idx++;
+	Gather_plane_simple_table         (face_table[face_idx],rhs,send_buf,compress,u_comm_offset,so);  face_idx++;
 	t_data+=usecond();
 	
 	AddPacket((void *)&send_buf[u_comm_offset],
@@ -946,9 +875,7 @@ class CartesianStencil { // Stencil runs along coordinate axes only; NO diagonal
    
    std::vector<scalar_object *> rpointers(Nsimd);
    std::vector<scalar_object *> spointers(Nsimd);
-
-    //    std::cout << "GatherSimd " << dimension << " shift "<<shift<<std::endl;
-
+    
    ///////////////////////////////////////////
    // Work out what to send where
    ///////////////////////////////////////////
@@ -960,7 +887,7 @@ class CartesianStencil { // Stencil runs along coordinate axes only; NO diagonal
    for(int x=0;x<rd;x++){       
      
      int any_offnode = ( ((x+sshift)%fd) >= rd );
-
+      
      if ( any_offnode ) {
 	
 	for(int i=0;i<Nsimd;i++){       
@@ -975,15 +902,15 @@ class CartesianStencil { // Stencil runs along coordinate axes only; NO diagonal

 	for(int i=0;i<Nsimd;i++){
 	  
-	  // FIXME -  This logic is hard coded to simd_layout==2 and not allowing >2
-	  //	  for(int w=0;w<buffer_size;w++){
-	  //	    std::cout << "GatherSimd<"<<Nsimd<<"> : lane " << i <<" elem "<<w<<" "<< u_simd_send_buf[i ][u_comm_offset+w]<<std::endl;
-	  //	  }
+	  // FIXME 
+	  // This logic is hard coded to simd_layout ==2 and not allowing >2
+	  //		std::cout << "GatherSimd : lane 1st elem " << i << u_simd_send_buf[i ][u_comm_offset]<<std::endl;
+	  
 	  int inner_bit = (Nsimd>>(permute_type+1));
 	  int ic= (i&inner_bit)? 1:0;
 	  
-	  int my_coor  = rd*ic + x;
-	  int nbr_coor = my_coor+sshift;
+	  int my_coor          = rd*ic + x;
+	  int nbr_coor         = my_coor+sshift;
 	  int nbr_proc = ((nbr_coor)/ld) % pd;// relative shift in processors
 	  int nbr_lcoor= (nbr_coor%ld);
 	  int nbr_ic   = (nbr_lcoor)/rd;    // inner coord of peer
@@ -992,10 +919,10 @@ class CartesianStencil { // Stencil runs along coordinate axes only; NO diagonal
 	  
 	  if (nbr_ic) nbr_lane|=inner_bit;
 	  assert (sx == nbr_ox);
-
+	  
 	  auto rp = &u_simd_recv_buf[i       ][u_comm_offset];
 	  auto sp = &u_simd_send_buf[nbr_lane][u_comm_offset];
-
+	  
 	  if(nbr_proc){
 	    
 	    int recv_from_rank;
@@ -1003,17 +930,16 @@ class CartesianStencil { // Stencil runs along coordinate axes only; NO diagonal
 	    
 	    _grid->ShiftedRanks(dimension,nbr_proc,xmit_to_rank,recv_from_rank); 
 
-	    // shm == receive pointer         if offnode
-	    // shm == Translate[send pointer] if on node -- my view of his send pointer
 	    scalar_object *shm = (scalar_object *) _grid->ShmBufferTranslate(recv_from_rank,sp);
+	    //	    if ((ShmDirectCopy==0)||(shm==NULL)) { 
 	    if (shm==NULL) { 
 	      shm = rp;
-	    }
-
+	    } 
+	    
 	    // if Direct, StencilSendToRecvFrom will suppress copy to a peer on node
 	    // assuming above pointer flip
 	    AddPacket((void *)sp,(void *)rp,xmit_to_rank,recv_from_rank,bytes);
-
+	    
 	    rpointers[i] = shm;
 	    
 	  } else { 
@@ -1029,133 +955,6 @@ class CartesianStencil { // Stencil runs along coordinate axes only; NO diagonal
      }
    }
  }
-
-
-  template<class compressor>
-  void  GatherSimdNew(const Lattice<vobj> &rhs,int dimension,int shift,int cbmask,compressor &compress,int & face_idx)
-  {
-    const int Nsimd = _grid->Nsimd();
-
-    const int maxl =2;// max layout in a direction
-    int fd = _grid->_fdimensions[dimension];
-    int rd = _grid->_rdimensions[dimension];
-    int ld = _grid->_ldimensions[dimension];
-    int pd              = _grid->_processors[dimension];
-    int simd_layout     = _grid->_simd_layout[dimension];
-    int comm_dim        = _grid->_processors[dimension] >1 ;
-    assert(comm_dim==1);
-    // This will not work with a rotate dim
-    assert(simd_layout==maxl);
-    assert(shift>=0);
-    assert(shift<fd);
-
-    int permute_type=_grid->PermuteType(dimension);
-    //    std::cout << "SimdNew permute type "<<permute_type<<std::endl;
-
-    ///////////////////////////////////////////////
-    // Simd direction uses an extract/merge pair
-    ///////////////////////////////////////////////
-    int buffer_size = _grid->_slice_nblock[dimension]*_grid->_slice_block[dimension];
-    int words = sizeof(cobj)/sizeof(vector_type);
-    
-    assert(cbmask==0x3); // Fixme think there is a latent bug if not true
-    
-    int reduced_buffer_size = buffer_size;
-    if (cbmask != 0x3) reduced_buffer_size=buffer_size>>1;
-
-    int bytes = (reduced_buffer_size*sizeof(cobj))/simd_layout;
-    assert(bytes*simd_layout == reduced_buffer_size*sizeof(cobj));
-
-    std::vector<cobj *> rpointers(maxl);
-    std::vector<cobj *> spointers(maxl);
-
-    ///////////////////////////////////////////
-    // Work out what to send where
-    ///////////////////////////////////////////
-    
-    int cb    = (cbmask==0x2)? Odd : Even;
-    int sshift= _grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,cb);
-    
-    // loop over outer coord planes orthog to dim
-    for(int x=0;x<rd;x++){       
-      
-      int any_offnode = ( ((x+sshift)%fd) >= rd );
-
-      if ( any_offnode ) {
-
-	
-	for(int i=0;i<maxl;i++){       
-	  spointers[i] = (cobj *) &new_simd_send_buf[i][u_comm_offset];
-	}
-	
-	int sx   = (x+sshift)%rd;
-
-	//	if ( cbmask==0x3 ) { 
-	//	  std::vector<std::pair<int,int> > table;
-	t_table-=usecond();
-	if ( !face_table_computed ) {
-	  face_table.resize(face_idx+1);
-	  Gather_plane_table_compute ((GridBase *)_grid,dimension,sx,cbmask,u_comm_offset,face_table[face_idx]);
-	  //	  std::cout << " face table size "<<face_idx <<" " <<  face_table[face_idx].size() <<" computed buffer size "<< reduced_buffer_size <<
-	  //		    " bytes = "<<bytes <<std::endl;
-	}
-	t_table+=usecond();
-	gathermtime-=usecond();
-	Gather_plane_exchange_table(face_table[face_idx],rhs,spointers,dimension,sx,cbmask,compress,permute_type);  face_idx++;
-	gathermtime+=usecond();
-      
-	//spointers[0] -- low
-	//spointers[1] -- high
-
-	for(int i=0;i<maxl;i++){
-
-	  int my_coor  = rd*i + x;            // self explanatory
-	  int nbr_coor = my_coor+sshift;      // self explanatory
-
-	  int nbr_proc = ((nbr_coor)/ld) % pd;// relative shift in processors
-	  int nbr_lcoor= (nbr_coor%ld);       // local plane coor on neighbour node
-	  int nbr_ic   = (nbr_lcoor)/rd;      // inner coord of peer simd lane "i"
-	  int nbr_ox   = (nbr_lcoor%rd);      // outer coord of peer "x"
-
-	  int nbr_plane = nbr_ic;
-	  assert (sx == nbr_ox);
-
-	  auto rp = &new_simd_recv_buf[i        ][u_comm_offset];
-	  auto sp = &new_simd_send_buf[nbr_plane][u_comm_offset];
-
-	  if(nbr_proc){
-
-	    int recv_from_rank;
-	    int xmit_to_rank;
-	    
-	    _grid->ShiftedRanks(dimension,nbr_proc,xmit_to_rank,recv_from_rank); 
- 
-	    // shm == receive pointer         if offnode
-	    // shm == Translate[send pointer] if on node -- my view of his send pointer
-	    cobj *shm = (cobj *) _grid->ShmBufferTranslate(recv_from_rank,sp);
-	    if (shm==NULL) { 
-	      shm = rp;
-	    }
-
-	    // if Direct, StencilSendToRecvFrom will suppress copy to a peer on node
-	    // assuming above pointer flip
-	    AddPacket((void *)sp,(void *)rp,xmit_to_rank,recv_from_rank,bytes);
-
-	    rpointers[i] = shm;
-	    
-	  } else { 
-	    
-	    rpointers[i] = sp;
-	    
-	  }
-	}
-
-	AddMergeNew(&u_recv_buf_p[u_comm_offset],rpointers,reduced_buffer_size,Packets.size()-1,permute_type);
-
-	u_comm_offset     +=buffer_size;
-      }
-    }
-  }
  
 };
 }
@@ -37,23 +37,24 @@ Author: paboyle <paboyle@ph.ed.ac.uk>

 #ifdef GRID_OMP
 #include <omp.h>
-
-#define PARALLEL_FOR_LOOP        _Pragma("omp parallel for schedule(static)")
-#define PARALLEL_FOR_LOOP_INTERN _Pragma("omp for schedule(static)")
-#define PARALLEL_NESTED_LOOP2 _Pragma("omp parallel for collapse(2)")
-#define PARALLEL_REGION       _Pragma("omp parallel")
-#define PARALLEL_CRITICAL     _Pragma("omp critical")
+#ifdef GRID_NUMA
+  #define PARALLEL_FOR_LOOP        _Pragma("omp parallel for schedule(static)")
+  #define PARALLEL_FOR_LOOP_INTERN _Pragma("omp for schedule(static)")
+  #else
+  #define PARALLEL_FOR_LOOP        _Pragma("omp parallel for schedule(runtime)")
+  #define PARALLEL_FOR_LOOP_INTERN _Pragma("omp for schedule(runtime)")
+#endif
+#define PARALLEL_NESTED_LOOP2    _Pragma("omp parallel for collapse(2)")
+#define PARALLEL_REGION          _Pragma("omp parallel")
+#define PARALLEL_FOR_LOOP_STATIC _Pragma("omp parallel for schedule(static)")
 #else
 #define PARALLEL_FOR_LOOP
 #define PARALLEL_FOR_LOOP_INTERN
 #define PARALLEL_NESTED_LOOP2
 #define PARALLEL_REGION
-#define PARALLEL_CRITICAL
+#define PARALLEL_FOR_LOOP_STATIC
 #endif

-#define parallel_for       PARALLEL_FOR_LOOP for
-#define parallel_for_nest2 PARALLEL_NESTED_LOOP2 for
-
 namespace Grid {

  // Introduce a class to gain deterministic bit reproducible reduction.
@@ -267,7 +267,8 @@ namespace Grid {
      SimpleCompressor<siteVector> compressor;
      Stencil.HaloExchange(in,compressor);

-      parallel_for(int ss=0;ss<Grid()->oSites();ss++){
+PARALLEL_FOR_LOOP
+      for(int ss=0;ss<Grid()->oSites();ss++){
        siteVector res = zero;
 	siteVector nbr;
 	int ptype;
@@ -379,7 +380,8 @@ namespace Grid {
 	  Subspace.ProjectToSubspace(oProj,oblock);
 	  //	  blockProject(iProj,iblock,Subspace.subspace);
 	  //	  blockProject(oProj,oblock,Subspace.subspace);
-	  parallel_for(int ss=0;ss<Grid()->oSites();ss++){
+PARALLEL_FOR_LOOP
+	  for(int ss=0;ss<Grid()->oSites();ss++){
 	    for(int j=0;j<nbasis;j++){
 	      if( disp!= 0 ) {
 		A[p]._odata[ss](j,i) = oProj._odata[ss](j);
@@ -425,7 +427,7 @@ namespace Grid {
 	A[p]=zero;
      }

-      GridParallelRNG  RNG(Grid()); RNG.SeedFixedIntegers(std::vector<int>({55,72,19,17,34}));
+      GridParallelRNG  RNG(Grid()); RNG.SeedRandomDevice();
      Lattice<iScalar<CComplex> > val(Grid()); random(RNG,val);

      Complex one(1.0);
@@ -25,7 +25,7 @@ Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
    See the full license in the file "LICENSE" in the top level distribution directory
    *************************************************************************************/
    /*  END LEGAL */
-#include <Grid/GridCore.h>
+#include <Grid.h>

 namespace Grid {
 double MultiShiftFunction::approx(double x)
@@ -20,7 +20,7 @@
 #include<iomanip>
 #include<cassert>

-#include<Grid/algorithms/approx/Remez.h>
+#include<algorithms/approx/Remez.h>

 // Constructor
 AlgRemez::AlgRemez(double lower, double upper, long precision) 
@@ -9,6 +9,7 @@ Copyright (C) 2015
 Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
 Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 Author: paboyle <paboyle@ph.ed.ac.uk>
+Author: Guido Cossu <guido.cossu@ed.ac.uk>

 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
@@ -33,6 +34,21 @@ directory

 namespace Grid {

+struct CG_state {
+  bool do_repro;
+  std::vector<RealD> residuals;
+
+  CG_state() {reset();}
+
+  void reset(){
+    do_repro = false;
+    residuals.clear();
+  }
+};
+
+
+enum CGexec_mode{ Default, ReproducibilityTest };
+
 /////////////////////////////////////////////////////////////
 // Base classes for iterative processes based on operators
 // single input vec, single output vec.
@@ -45,12 +61,30 @@ class ConjugateGradient : public OperatorFunction<Field> {
                           // Defaults true.
  RealD Tolerance;
  Integer MaxIterations;
-  Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion
-  
-  ConjugateGradient(RealD tol, Integer maxit, bool err_on_no_conv = true)
-      : Tolerance(tol),
-        MaxIterations(maxit),
-        ErrorOnNoConverge(err_on_no_conv){};
+
+  // Reproducibility controls
+  bool ReproTest;
+  CG_state CGState; //to check reproducibility by repeating the CG
+  ReproducibilityState<typename Field::vector_object> ReprTest; // for the inner proucts
+
+  // Constructor
+  ConjugateGradient(RealD tol, Integer maxit, CGexec_mode Mode = Default)
+    : Tolerance(tol),MaxIterations(maxit){
+    switch(Mode)
+    {
+      case Default  : 
+      ErrorOnNoConverge = true;
+      ReproTest = false;
+      case ReproducibilityTest :
+      ErrorOnNoConverge = false;
+      ReproTest = true; 
+    }
+  };
+
+  void set_reproducibility_interval(unsigned int interval){
+    ReprTest.interval = interval;
+  }
+

  void operator()(LinearOperatorBase<Field> &Linop, const Field &src,
                  Field &psi) {
@@ -62,34 +96,37 @@ class ConjugateGradient : public OperatorFunction<Field> {
    Field p(src);
    Field mmp(src);
    Field r(src);
+    Field psi_start(psi);// save for the repro test
+
+    if (CGState.do_repro && ReproTest)
+        std::cout << GridLogMessage << "Starting reproducibility test, full check every "
+                  << ReprTest.interval << " calls" << std::endl;
+
+    if(!ReprTest.do_check)
+        ReprTest.reset();
+    ReprTest.enable_reprocheck=ReproTest;
+
+

    // Initial residual computation & set up
-    RealD guess = norm2(psi);
+    RealD guess = norm2(psi, ReprTest);
    assert(std::isnan(guess) == 0);

+    Linop.HermOpAndNorm(psi, mmp, d, b);// eventually split this for the norm check
    
-    Linop.HermOpAndNorm(psi, mmp, d, b);
-    
-
    r = src - mmp;
    p = r;

-    a = norm2(p);
+    a = norm2(p, ReprTest);
    cp = a;
-    ssq = norm2(src);
+    ssq = norm2(src, ReprTest);

-    std::cout << GridLogIterative << std::setprecision(4)
-              << "ConjugateGradient: guess " << guess << std::endl;
-    std::cout << GridLogIterative << std::setprecision(4)
-              << "ConjugateGradient:   src " << ssq << std::endl;
-    std::cout << GridLogIterative << std::setprecision(4)
-              << "ConjugateGradient:    mp " << d << std::endl;
-    std::cout << GridLogIterative << std::setprecision(4)
-              << "ConjugateGradient:   mmp " << b << std::endl;
-    std::cout << GridLogIterative << std::setprecision(4)
-              << "ConjugateGradient:  cp,r " << cp << std::endl;
-    std::cout << GridLogIterative << std::setprecision(4)
-              << "ConjugateGradient:     p " << a << std::endl;
+    std::cout << GridLogIterative << "ConjugateGradient: guess " << guess << std::endl;
+    std::cout << GridLogIterative << "ConjugateGradient:   src " << ssq << std::endl;
+    std::cout << GridLogIterative << "ConjugateGradient:    mp " << d << std::endl;
+    std::cout << GridLogIterative << "ConjugateGradient:   mmp " << b << std::endl;
+    std::cout << GridLogIterative << "ConjugateGradient:  cp,r " << cp << std::endl;
+    std::cout << GridLogIterative << "ConjugateGradient:     p " << a << std::endl;

    RealD rsq = Tolerance * Tolerance * ssq;

@@ -109,10 +146,10 @@ class ConjugateGradient : public OperatorFunction<Field> {
    SolverTimer.Start();
    int k;
    for (k = 1; k <= MaxIterations; k++) {
-      c = cp;
+      c = cp;// old residual

      MatrixTimer.Start();
-      Linop.HermOpAndNorm(p, mmp, d, qq);
+      Linop.HermOpAndNorm(p, mmp, d, qq);// mmp = Ap, d=pAp
      MatrixTimer.Stop();

      LinalgTimer.Start();
@@ -120,14 +157,31 @@ class ConjugateGradient : public OperatorFunction<Field> {
      //  ComplexD dck  = innerProduct(p,mmp);

      a = c / d;
-      b_pred = a * (a * qq - d) / c;
+      b_pred = a * (a * qq - d) / c;// a check

-      cp = axpy_norm(r, -a, mmp, r);
+
+      axpy(r, -a, mmp, r);// new residual r = r_old - a * Ap
+      cp = norm2(r, ReprTest); // bookkeeping this norm
+      if (ReproTest && !CGState.do_repro) {
+        CGState.residuals.push_back(cp);  // save residuals state
+                std::cout << GridLogIterative << "ReproTest: Saving state" << std::endl;
+        }
+      if (ReproTest && CGState.do_repro){
+        // check that the residual agrees with the previous run
+        std::cout << GridLogIterative << "ReproTest: Checking state k=" << k << std::endl;
+        if (cp != CGState.residuals[k-1]){
+                std::cout << GridLogMessage << "Failing reproducibility test";
+                std::cout << GridLogMessage << " at k=" << k << std::endl;
+                std::cout << GridLogMessage << "saved residual = " << CGState.residuals[k-1] 
+                        << " cp = " << cp << std::endl;
+                exit(1);  // exit after the first failure
+        }
+      }
      b = cp / c;

      // Fuse these loops ; should be really easy
-      psi = a * p + psi;
-      p = p * b + r;
+      psi = a * p + psi; // update solution
+      p = p * b + r;  // update search direction

      LinalgTimer.Stop();
      std::cout << GridLogIterative << "ConjugateGradient: Iteration " << k
@@ -157,14 +211,29 @@ class ConjugateGradient : public OperatorFunction<Field> {
        std::cout << std::endl;

        if (ErrorOnNoConverge) assert(true_residual / Tolerance < 10000.0);
-	IterationsToComplete = k;	
+
+        if (! (CGState.do_repro && ReproTest)){
+                CGState.do_repro = true;
+                ReprTest.do_check = true;
+                ReprTest.reset_counter();
+                this->operator()(Linop, src, psi_start);// run the repro test
+                if (ReprTest.success)
+                	std::cout << GridLogMessage << "Reproducibility test passed" << std::endl;
+                else{
+                	std::cout << GridLogMessage << "Reproducibility test failed" << std::endl;
+                	exit(1);
+                }
+        }
+
+        // Clear state
+        CGState.reset();
+        ReprTest.reset();
        return;
      }
    }
    std::cout << GridLogMessage << "ConjugateGradient did NOT converge"
              << std::endl;
    if (ErrorOnNoConverge) assert(0);
-    IterationsToComplete = k;
  }
 };
 }
@@ -35,7 +35,6 @@ namespace Grid {
  class MixedPrecisionConjugateGradient : public LinearFunction<FieldD> {
  public:                                                
    RealD   Tolerance;
-    RealD   InnerTolerance; //Initial tolerance for inner CG. Defaults to Tolerance but can be changed
    Integer MaxInnerIterations;
    Integer MaxOuterIterations;
    GridBase* SinglePrecGrid; //Grid for single-precision fields
@@ -43,16 +42,12 @@ namespace Grid {
    LinearOperatorBase<FieldF> &Linop_f;
    LinearOperatorBase<FieldD> &Linop_d;

-    Integer TotalInnerIterations; //Number of inner CG iterations
-    Integer TotalOuterIterations; //Number of restarts
-    Integer TotalFinalStepIterations; //Number of CG iterations in final patch-up step
-
    //Option to speed up *inner single precision* solves using a LinearFunction that produces a guess
    LinearFunction<FieldF> *guesser;
    
    MixedPrecisionConjugateGradient(RealD tol, Integer maxinnerit, Integer maxouterit, GridBase* _sp_grid, LinearOperatorBase<FieldF> &_Linop_f, LinearOperatorBase<FieldD> &_Linop_d) :
      Linop_f(_Linop_f), Linop_d(_Linop_d),
-      Tolerance(tol), InnerTolerance(tol), MaxInnerIterations(maxinnerit), MaxOuterIterations(maxouterit), SinglePrecGrid(_sp_grid),
+      Tolerance(tol), MaxInnerIterations(maxinnerit), MaxOuterIterations(maxouterit), SinglePrecGrid(_sp_grid),
      OuterLoopNormMult(100.), guesser(NULL){ };

    void useGuesser(LinearFunction<FieldF> &g){
@@ -60,8 +55,6 @@ namespace Grid {
    }
  
    void operator() (const FieldD &src_d_in, FieldD &sol_d){
-      TotalInnerIterations = 0;
-	
      GridStopWatch TotalTimer;
      TotalTimer.Start();
    
@@ -81,7 +74,7 @@ namespace Grid {
      FieldD src_d(DoublePrecGrid);
      src_d = src_d_in; //source for next inner iteration, computed from residual during operation
    
-      RealD inner_tol = InnerTolerance;
+      RealD inner_tol = Tolerance;
    
      FieldF src_f(SinglePrecGrid);
      src_f.checkerboard = cb;
@@ -96,9 +89,7 @@ namespace Grid {

      GridStopWatch PrecChangeTimer;
    
-      Integer &outer_iter = TotalOuterIterations; //so it will be equal to the final iteration count
-      
-      for(outer_iter = 0; outer_iter < MaxOuterIterations; outer_iter++){
+      for(Integer outer_iter = 0; outer_iter < MaxOuterIterations; outer_iter++){
 	//Compute double precision rsd and also new RHS vector.
 	Linop_d.HermOp(sol_d, tmp_d);
 	RealD norm = axpy_norm(src_d, -1., tmp_d, src_d_in); //src_d is residual vector
@@ -126,7 +117,6 @@ namespace Grid {
 	InnerCGtimer.Start();
 	CG_f(Linop_f, src_f, sol_f);
 	InnerCGtimer.Stop();
-	TotalInnerIterations += CG_f.IterationsToComplete;
      
 	//Convert sol back to double and add to double prec solution
 	PrecChangeTimer.Start();
@@ -141,11 +131,9 @@ namespace Grid {
    
      ConjugateGradient<FieldD> CG_d(Tolerance, MaxInnerIterations);
      CG_d(Linop_d, src_d_in, sol_d);
-      TotalFinalStepIterations = CG_d.IterationsToComplete;

      TotalTimer.Stop();
-      std::cout<<GridLogMessage<<"MixedPrecisionConjugateGradient: Inner CG iterations " << TotalInnerIterations << " Restarts " << TotalOuterIterations << " Final CG iterations " << TotalFinalStepIterations << std::endl;
-      std::cout<<GridLogMessage<<"MixedPrecisionConjugateGradient: Total time " << TotalTimer.Elapsed() << " Precision change " << PrecChangeTimer.Elapsed() << " Inner CG total " << InnerCGtimer.Elapsed() << std::endl;
+      std::cout<<GridLogMessage<<"MixedPrecisionConjugateGradient: Total " << TotalTimer.Elapsed() << " Precision change " << PrecChangeTimer.Elapsed() << " Inner CG total " << InnerCGtimer.Elapsed() << std::endl;
    }
  };

@@ -31,16 +31,11 @@ Author: paboyle <paboyle@ph.ed.ac.uk>

 #include <string.h> //memset
 #ifdef USE_LAPACK
-#ifdef USE_MKL
-#include<mkl_lapack.h>
-#else
 void LAPACK_dstegr(char *jobz, char *range, int *n, double *d, double *e,
                   double *vl, double *vu, int *il, int *iu, double *abstol,
                   int *m, double *w, double *z, int *ldz, int *isuppz,
                   double *work, int *lwork, int *iwork, int *liwork,
                   int *info);
-//#include <lapacke/lapacke.h>
-#endif
 #endif
 #include "DenseMatrix.h"
 #include "EigenSort.h"
@@ -64,16 +59,15 @@ public:

    int Nstop;   // Number of evecs checked for convergence
    int Nk;      // Number of converged sought
-    int Np;      // Np -- Number of spare vecs in kryloc space
+    int Np;      // Np -- Number of spare vecs in krylov space
    int Nm;      // Nm -- total number of vectors

-
-    RealD OrthoTime;
-
    RealD eresid;

    SortEigen<Field> _sort;

+//    GridCartesian &_fgrid;
+
    LinearOperatorBase<Field> &_Linop;

    OperatorFunction<Field>   &_poly;
@@ -130,23 +124,23 @@ public:

      GridBase *grid = evec[0]._grid;
      Field w(grid);
-      std::cout<<GridLogMessage << "RitzMatrix "<<std::endl;
+      std::cout << "RitzMatrix "<<std::endl;
      for(int i=0;i<k;i++){
 	_poly(_Linop,evec[i],w);
-	std::cout<<GridLogMessage << "["<<i<<"] ";
+	std::cout << "["<<i<<"] ";
 	for(int j=0;j<k;j++){
 	  ComplexD in = innerProduct(evec[j],w);
 	  if ( fabs((double)i-j)>1 ) { 
 	    if (abs(in) >1.0e-9 )  { 
-	      std::cout<<GridLogMessage<<"oops"<<std::endl;
+	      std::cout<<"oops"<<std::endl;
 	      abort();
 	    } else 
-	      std::cout<<GridLogMessage << " 0 ";
+	      std::cout << " 0 ";
 	  } else { 
-	    std::cout<<GridLogMessage << " "<<in<<" ";
+	    std::cout << " "<<in<<" ";
 	  }
 	}
-	std::cout<<GridLogMessage << std::endl;
+	std::cout << std::endl;
      }
    }

@@ -180,10 +174,10 @@ public:
      RealD beta = normalise(w); // 6. βk+1 := ∥wk∥2. If βk+1 = 0 then Stop
                                 // 7. vk+1 := wk/βk+1

-	std::cout<<GridLogMessage << "alpha = " << zalph << " beta "<<beta<<std::endl;
+//	std::cout << "alpha = " << zalph << " beta "<<beta<<std::endl;
      const RealD tiny = 1.0e-20;
      if ( beta < tiny ) { 
-	std::cout<<GridLogMessage << " beta is tiny "<<beta<<std::endl;
+	std::cout << " beta is tiny "<<beta<<std::endl;
     }
      lmd[k] = alph;
      lme[k]  = beta;
@@ -259,7 +253,6 @@ public:
    }

 #ifdef USE_LAPACK
-#define LAPACK_INT long long
    void diagonalize_lapack(DenseVector<RealD>& lmd,
 		     DenseVector<RealD>& lme, 
 		     int N1,
@@ -269,7 +262,7 @@ public:
  const int size = Nm;
 //  tevals.resize(size);
 //  tevecs.resize(size);
-  LAPACK_INT NN = N1;
+  int NN = N1;
  double evals_tmp[NN];
  double evec_tmp[NN][NN];
  memset(evec_tmp[0],0,sizeof(double)*NN*NN);
@@ -283,19 +276,19 @@ public:
        if (i==j) evals_tmp[i] = lmd[i];
        if (j==(i-1)) EE[j] = lme[j];
      }
-  LAPACK_INT evals_found;
-  LAPACK_INT lwork = ( (18*NN) > (1+4*NN+NN*NN)? (18*NN):(1+4*NN+NN*NN)) ;
-  LAPACK_INT liwork =  3+NN*10 ;
-  LAPACK_INT iwork[liwork];
+  int evals_found;
+  int lwork = ( (18*NN) > (1+4*NN+NN*NN)? (18*NN):(1+4*NN+NN*NN)) ;
+  int liwork =  3+NN*10 ;
+  int iwork[liwork];
  double work[lwork];
-  LAPACK_INT isuppz[2*NN];
+  int isuppz[2*NN];
  char jobz = 'V'; // calculate evals & evecs
  char range = 'I'; // calculate all evals
  //    char range = 'A'; // calculate all evals
  char uplo = 'U'; // refer to upper half of original matrix
  char compz = 'I'; // Compute eigenvectors of tridiagonal matrix
  int ifail[NN];
-  long long info;
+  int info;
 //  int total = QMP_get_number_of_nodes();
 //  int node = QMP_get_node_number();
 //  GridBase *grid = evec[0]._grid;
@@ -303,18 +296,14 @@ public:
  int node = grid->_processor;
  int interval = (NN/total)+1;
  double vl = 0.0, vu = 0.0;
-  LAPACK_INT il = interval*node+1 , iu = interval*(node+1);
+  int il = interval*node+1 , iu = interval*(node+1);
  if (iu > NN)  iu=NN;
  double tol = 0.0;
    if (1) {
      memset(evals_tmp,0,sizeof(double)*NN);
      if ( il <= NN){
        printf("total=%d node=%d il=%d iu=%d\n",total,node,il,iu);
-#ifdef USE_MKL
-        dstegr(&jobz, &range, &NN,
-#else
        LAPACK_dstegr(&jobz, &range, &NN,
-#endif
            (double*)DD, (double*)EE,
            &vl, &vu, &il, &iu, // these four are ignored if second parameteris 'A'
            &tol, // tolerance
@@ -346,7 +335,6 @@ public:
      lmd [NN-1-i]=evals_tmp[i];
  }
 }
-#undef LAPACK_INT 
 #endif


@@ -377,14 +365,12 @@ public:
 //	diagonalize_lapack(lmd2,lme2,Nm2,Nm,Qt,grid);
 #endif

-      int Niter = 10000*N1;
+      int Niter = 100*N1;
      int kmin = 1;
      int kmax = N2;
      // (this should be more sophisticated)

-      for(int iter=0; ; ++iter){
-      if ( (iter+1)%(100*N1)==0) 
-      std::cout<<GridLogMessage << "[QL method] Not converged - iteration "<<iter+1<<"\n";
+      for(int iter=0; iter<Niter; ++iter){

 	// determination of 2x2 leading submatrix
 	RealD dsub = lmd[kmax-1]-lmd[kmax-2];
@@ -413,11 +399,11 @@ public:
        _sort.push(lmd3,N2);
        _sort.push(lmd2,N2);
         for(int k=0; k<N2; ++k){
-	    if (fabs(lmd2[k] - lmd3[k]) >SMALL)  std::cout<<GridLogMessage <<"lmd(qr) lmd(lapack) "<< k << ": " << lmd2[k] <<" "<< lmd3[k] <<std::endl;
-//	    if (fabs(lme2[k] - lme[k]) >SMALL)  std::cout<<GridLogMessage <<"lme(qr)-lme(lapack) "<< k << ": " << lme2[k] - lme[k] <<std::endl;
+	    if (fabs(lmd2[k] - lmd3[k]) >SMALL)  std::cout <<"lmd(qr) lmd(lapack) "<< k << ": " << lmd2[k] <<" "<< lmd3[k] <<std::endl;
+//	    if (fabs(lme2[k] - lme[k]) >SMALL)  std::cout <<"lme(qr)-lme(lapack) "<< k << ": " << lme2[k] - lme[k] <<std::endl;
 	  }
         for(int k=0; k<N1*N1; ++k){
-//	    if (fabs(Qt2[k] - Qt[k]) >SMALL)  std::cout<<GridLogMessage <<"Qt(qr)-Qt(lapack) "<< k << ": " << Qt2[k] - Qt[k] <<std::endl;
+//	    if (fabs(Qt2[k] - Qt[k]) >SMALL)  std::cout <<"Qt(qr)-Qt(lapack) "<< k << ": " << Qt2[k] - Qt[k] <<std::endl;
 	}
    }
 #endif
@@ -432,7 +418,7 @@ public:
 	  }
 	}
      }
-      std::cout<<GridLogMessage << "[QL method] Error - Too many iteration: "<<Niter<<"\n";
+      std::cout << "[QL method] Error - Too many iteration: "<<Niter<<"\n";
      abort();
    }

@@ -449,7 +435,6 @@ public:
 		       DenseVector<Field>& evec,
 		       int k)
    {
-      double t0=-usecond()/1e6;
      typedef typename Field::scalar_type MyComplex;
      MyComplex ip;

@@ -468,8 +453,6 @@ public:
 	w = w - ip * evec[j];
      }
      normalise(w);
-      t0+=usecond()/1e6;
-      OrthoTime +=t0;
    }

    void setUnit_Qt(int Nm, DenseVector<RealD> &Qt) {
@@ -503,10 +486,10 @@ until convergence
 	GridBase *grid = evec[0]._grid;
 	assert(grid == src._grid);

-	std::cout<<GridLogMessage << " -- Nk = " << Nk << " Np = "<< Np << std::endl;
-	std::cout<<GridLogMessage << " -- Nm = " << Nm << std::endl;
-	std::cout<<GridLogMessage << " -- size of eval   = " << eval.size() << std::endl;
-	std::cout<<GridLogMessage << " -- size of evec  = " << evec.size() << std::endl;
+	std::cout << " -- Nk = " << Nk << " Np = "<< Np << std::endl;
+	std::cout << " -- Nm = " << Nm << std::endl;
+	std::cout << " -- size of eval   = " << eval.size() << std::endl;
+	std::cout << " -- size of evec  = " << evec.size() << std::endl;
 	
 	assert(Nm == evec.size() && Nm == eval.size());
 	
@@ -517,7 +500,6 @@ until convergence
 	DenseVector<int>   Iconv(Nm);

 	DenseVector<Field>  B(Nm,grid); // waste of space replicating
-//	DenseVector<Field>  Btemp(Nm,grid); // waste of space replicating
 	
 	Field f(grid);
 	Field v(grid);
@@ -533,48 +515,35 @@ until convergence
 	// (uniform vector) Why not src??
 	//	evec[0] = 1.0;
 	evec[0] = src;
-	std:: cout<<GridLogMessage <<"norm2(src)= " << norm2(src)<<std::endl;
+	std:: cout <<"norm2(src)= " << norm2(src)<<std::endl;
 // << src._grid  << std::endl;
 	normalise(evec[0]);
-	std:: cout<<GridLogMessage <<"norm2(evec[0])= " << norm2(evec[0]) <<std::endl;
+	std:: cout <<"norm2(evec[0])= " << norm2(evec[0]) <<std::endl;
 // << evec[0]._grid << std::endl;
 	
 	// Initial Nk steps
-	OrthoTime=0.;
-	double t0=usecond()/1e6;
 	for(int k=0; k<Nk; ++k) step(eval,lme,evec,f,Nm,k);
-	double t1=usecond()/1e6;
-	std::cout<<GridLogMessage <<"IRL::Initial steps: "<<t1-t0<< "seconds"<<std::endl; t0=t1;
-	std::cout<<GridLogMessage <<"IRL::Initial steps:OrthoTime "<<OrthoTime<< "seconds"<<std::endl;
-//	std:: cout<<GridLogMessage <<"norm2(evec[1])= " << norm2(evec[1]) << std::endl;
-//	std:: cout<<GridLogMessage <<"norm2(evec[2])= " << norm2(evec[2]) << std::endl;
+//	std:: cout <<"norm2(evec[1])= " << norm2(evec[1]) << std::endl;
+//	std:: cout <<"norm2(evec[2])= " << norm2(evec[2]) << std::endl;
 	RitzMatrix(evec,Nk);
-	t1=usecond()/1e6;
-	std::cout<<GridLogMessage <<"IRL::RitzMatrix: "<<t1-t0<< "seconds"<<std::endl; t0=t1;
 	for(int k=0; k<Nk; ++k){
-//	std:: cout<<GridLogMessage <<"eval " << k << " " <<eval[k] << std::endl;
-//	std:: cout<<GridLogMessage <<"lme " << k << " " << lme[k] << std::endl;
+//	std:: cout <<"eval " << k << " " <<eval[k] << std::endl;
+//	std:: cout <<"lme " << k << " " << lme[k] << std::endl;
 	}

 	// Restarting loop begins
 	for(int iter = 0; iter<Niter; ++iter){

-	  std::cout<<GridLogMessage<<"\n Restart iteration = "<< iter << std::endl;
+	  std::cout<<"\n Restart iteration = "<< iter << std::endl;

 	  // 
 	  // Rudy does a sort first which looks very different. Getting fed up with sorting out the algo defs.
 	  // We loop over 
 	  //
-	OrthoTime=0.;
 	  for(int k=Nk; k<Nm; ++k) step(eval,lme,evec,f,Nm,k);
-	t1=usecond()/1e6;
-	std::cout<<GridLogMessage <<"IRL:: "<<Np <<" steps: "<<t1-t0<< "seconds"<<std::endl; t0=t1;
-	std::cout<<GridLogMessage <<"IRL::Initial steps:OrthoTime "<<OrthoTime<< "seconds"<<std::endl;
 	  f *= lme[Nm-1];

 	  RitzMatrix(evec,k2);
-	t1=usecond()/1e6;
-	std::cout<<GridLogMessage <<"IRL:: RitzMatrix: "<<t1-t0<< "seconds"<<std::endl; t0=t1;
 	  
 	  // getting eigenvalues
 	  for(int k=0; k<Nm; ++k){
@@ -583,27 +552,18 @@ until convergence
 	  }
 	  setUnit_Qt(Nm,Qt);
 	  diagonalize(eval2,lme2,Nm,Nm,Qt,grid);
-	t1=usecond()/1e6;
-	std::cout<<GridLogMessage <<"IRL:: diagonalize: "<<t1-t0<< "seconds"<<std::endl; t0=t1;

 	  // sorting
 	  _sort.push(eval2,Nm);
-	t1=usecond()/1e6;
-	std::cout<<GridLogMessage <<"IRL:: eval sorting: "<<t1-t0<< "seconds"<<std::endl; t0=t1;
 	  
 	  // Implicitly shifted QR transformations
 	  setUnit_Qt(Nm,Qt);
-	  for(int ip=0; ip<k2; ++ip){
-	std::cout<<GridLogMessage << "eval "<< ip << " "<< eval2[ip] << std::endl;
-	}
 	  for(int ip=k2; ip<Nm; ++ip){ 
-	std::cout<<GridLogMessage << "qr_decomp "<< ip << " "<< eval2[ip] << std::endl;
+	std::cout << "qr_decomp "<< ip << " "<< eval2[ip] << std::endl;
 	    qr_decomp(eval,lme,Nm,Nm,Qt,eval2[ip],k1,Nm);
 		
 	}
-	t1=usecond()/1e6;
-	std::cout<<GridLogMessage <<"IRL::qr_decomp: "<<t1-t0<< "seconds"<<std::endl; t0=t1;
-if (0) {  
+    
 	  for(int i=0; i<(Nk+1); ++i) B[i] = 0.0;
 	  
 	  for(int j=k1-1; j<k2+1; ++j){
@@ -612,38 +572,14 @@ if (0) {
 	      B[j] += Qt[k+Nm*j] * evec[k];
 	    }
 	  }
-	t1=usecond()/1e6;
-	std::cout<<GridLogMessage <<"IRL::QR Rotate: "<<t1-t0<< "seconds"<<std::endl; t0=t1;
-}
-
-if (1) {
-	for(int i=0; i<(Nk+1); ++i) {
-		B[i] = 0.0;
-	  	B[i].checkerboard = evec[0].checkerboard;
-	}
-
-	int j_block = 24; int k_block=24;
-PARALLEL_FOR_LOOP
-	for(int ss=0;ss < grid->oSites();ss++){
-	for(int jj=k1-1; jj<k2+1; jj += j_block)
-	for(int kk=0; kk<Nm; kk += k_block)
-	for(int j=jj; (j<(k2+1)) && j<(jj+j_block); ++j){
-	for(int k=kk; (k<Nm) && k<(kk+k_block) ; ++k){
-	    B[j]._odata[ss] +=Qt[k+Nm*j] * evec[k]._odata[ss]; 
-	}
-	}
-	}
-	t1=usecond()/1e6;
-	std::cout<<GridLogMessage <<"IRL::QR rotation: "<<t1-t0<< "seconds"<<std::endl; t0=t1;
-}
-	for(int j=k1-1; j<k2+1; ++j) evec[j] = B[j];
+	  for(int j=k1-1; j<k2+1; ++j) evec[j] = B[j];

 	  // Compressed vector f and beta(k2)
 	  f *= Qt[Nm-1+Nm*(k2-1)];
 	  f += lme[k2-1] * evec[k2];
 	  beta_k = norm2(f);
 	  beta_k = sqrt(beta_k);
-	  std::cout<<GridLogMessage<<" beta(k) = "<<beta_k<<std::endl;
+	  std::cout<<" beta(k) = "<<beta_k<<std::endl;

 	  RealD betar = 1.0/beta_k;
 	  evec[k2] = betar * f;
@@ -656,10 +592,7 @@ PARALLEL_FOR_LOOP
 	  }
 	  setUnit_Qt(Nm,Qt);
 	  diagonalize(eval2,lme2,Nk,Nm,Qt,grid);
-	t1=usecond()/1e6;
-	std::cout<<GridLogMessage <<"IRL::diagonalize: "<<t1-t0<< "seconds"<<std::endl; t0=t1;
 	  
-if (0) {
 	  for(int k = 0; k<Nk; ++k) B[k]=0.0;
 	  
 	  for(int j = 0; j<Nk; ++j){
@@ -667,34 +600,12 @@ if (0) {
 	    B[j].checkerboard = evec[k].checkerboard;
 	      B[j] += Qt[k+j*Nm] * evec[k];
 	    }
-	    std::cout<<GridLogMessage << "norm(B["<<j<<"])="<<norm2(B[j])<<std::endl;
+//	    std::cout << "norm(B["<<j<<"])="<<norm2(B[j])<<std::endl;
 	  }
-	t1=usecond()/1e6;
-	std::cout<<GridLogMessage <<"IRL::Convergence rotation: "<<t1-t0<< "seconds"<<std::endl; t0=t1;
-}
-if (1) {
-	for(int i=0; i<(Nk+1); ++i) {
-		B[i] = 0.0;
-	  	B[i].checkerboard = evec[0].checkerboard;
-	}
-
-	int j_block = 24; int k_block=24;
-PARALLEL_FOR_LOOP
-	for(int ss=0;ss < grid->oSites();ss++){
-	for(int jj=0; jj<Nk; jj += j_block)
-	for(int kk=0; kk<Nk; kk += k_block)
-	for(int j=jj; (j<Nk) && j<(jj+j_block); ++j){
-	for(int k=kk; (k<Nk) && k<(kk+k_block) ; ++k){
-	    B[j]._odata[ss] +=Qt[k+Nm*j] * evec[k]._odata[ss]; 
-	}
-	}
-	}
-	t1=usecond()/1e6;
-	std::cout<<GridLogMessage <<"IRL::convergence rotation : "<<t1-t0<< "seconds"<<std::endl; t0=t1;
-}
+//	_sort.push(eval2,B,Nk);

 	  Nconv = 0;
-	  //	  std::cout<<GridLogMessage << std::setiosflags(std::ios_base::scientific);
+	  //	  std::cout << std::setiosflags(std::ios_base::scientific);
 	  for(int i=0; i<Nk; ++i){

 //	    _poly(_Linop,B[i],v);
@@ -702,16 +613,14 @@ PARALLEL_FOR_LOOP
 	    
 	    RealD vnum = real(innerProduct(B[i],v)); // HermOp.
 	    RealD vden = norm2(B[i]);
-	    RealD vv0 = norm2(v);
 	    eval2[i] = vnum/vden;
 	    v -= eval2[i]*B[i];
 	    RealD vv = norm2(v);
 	    
 	    std::cout.precision(13);
-	    std::cout<<GridLogMessage << "[" << std::setw(3)<< std::setiosflags(std::ios_base::right) <<i<<"] ";
-	    std::cout<<"eval = "<<std::setw(25)<< std::setiosflags(std::ios_base::left)<< eval2[i];
-	    std::cout<<"|H B[i] - eval[i]B[i]|^2 "<< std::setw(25)<< std::setiosflags(std::ios_base::right)<< vv;
-	    std::cout<<" "<< vnum/(sqrt(vden)*sqrt(vv0)) << std::endl;
+	    std::cout << "[" << std::setw(3)<< std::setiosflags(std::ios_base::right) <<i<<"] ";
+	    std::cout << "eval = "<<std::setw(25)<< std::setiosflags(std::ios_base::left)<< eval2[i];
+	    std::cout <<" |H B[i] - eval[i]B[i]|^2 "<< std::setw(25)<< std::setiosflags(std::ios_base::right)<< vv<< std::endl;
 	    
 	// change the criteria as evals are supposed to be sorted, all evals smaller(larger) than Nstop should have converged
 	    if((vv<eresid*eresid) && (i == Nconv) ){
@@ -720,19 +629,17 @@ PARALLEL_FOR_LOOP
 	    }

 	  }  // i-loop end
-	  //	  std::cout<<GridLogMessage << std::resetiosflags(std::ios_base::scientific);
-	t1=usecond()/1e6;
-	std::cout<<GridLogMessage <<"IRL::convergence testing: "<<t1-t0<< "seconds"<<std::endl; t0=t1;
+	  //	  std::cout << std::resetiosflags(std::ios_base::scientific);


-	  std::cout<<GridLogMessage<<" #modes converged: "<<Nconv<<std::endl;
+	  std::cout<<" #modes converged: "<<Nconv<<std::endl;

 	  if( Nconv>=Nstop ){
 	    goto converged;
 	  }
 	} // end of iter loop
 	
-	std::cout<<GridLogMessage<<"\n NOT converged.\n";
+	std::cout<<"\n NOT converged.\n";
 	abort();
 	
      converged:
@@ -745,10 +652,10 @@ PARALLEL_FOR_LOOP
       }
      _sort.push(eval,evec,Nconv);

-      std::cout<<GridLogMessage << "\n Converged\n Summary :\n";
-      std::cout<<GridLogMessage << " -- Iterations  = "<< Nconv  << "\n";
-      std::cout<<GridLogMessage << " -- beta(k)     = "<< beta_k << "\n";
-      std::cout<<GridLogMessage << " -- Nconv       = "<< Nconv  << "\n";
+      std::cout << "\n Converged\n Summary :\n";
+      std::cout << " -- Iterations  = "<< Nconv  << "\n";
+      std::cout << " -- beta(k)     = "<< beta_k << "\n";
+      std::cout << " -- Nconv       = "<< Nconv  << "\n";
     }

    /////////////////////////////////////////////////
@@ -771,25 +678,25 @@ PARALLEL_FOR_LOOP
 	}
      }

-      std::cout<<GridLogMessage<<"Lanczos_Factor start/end " <<start <<"/"<<end<<std::endl;
+      std::cout<<"Lanczos_Factor start/end " <<start <<"/"<<end<<std::endl;

      // Starting from scratch, bq[0] contains a random vector and |bq[0]| = 1
      int first;
      if(start == 0){

-	std::cout<<GridLogMessage << "start == 0\n"; //TESTING
+	std::cout << "start == 0\n"; //TESTING

 	_poly(_Linop,bq[0],bf);

 	alpha = real(innerProduct(bq[0],bf));//alpha =  bq[0]^dag A bq[0]

-	std::cout<<GridLogMessage << "alpha = " << alpha << std::endl;
+	std::cout << "alpha = " << alpha << std::endl;
 	
 	bf = bf - alpha * bq[0];  //bf =  A bq[0] - alpha bq[0]

 	H[0][0]=alpha;

-	std::cout<<GridLogMessage << "Set H(0,0) to " << H[0][0] << std::endl;
+	std::cout << "Set H(0,0) to " << H[0][0] << std::endl;

 	first = 1;

@@ -809,19 +716,19 @@ PARALLEL_FOR_LOOP

 	beta = 0;sqbt = 0;

-	std::cout<<GridLogMessage << "cont is true so setting beta to zero\n";
+	std::cout << "cont is true so setting beta to zero\n";

      }	else {

 	beta = norm2(bf);
 	sqbt = sqrt(beta);

-	std::cout<<GridLogMessage << "beta = " << beta << std::endl;
+	std::cout << "beta = " << beta << std::endl;
      }

      for(int j=first;j<end;j++){

-	std::cout<<GridLogMessage << "Factor j " << j <<std::endl;
+	std::cout << "Factor j " << j <<std::endl;

 	if(cont){ // switches to factoring; understand start!=0 and initial bf value is right.
 	  bq[j] = bf; cont = false;
@@ -844,7 +751,7 @@ PARALLEL_FOR_LOOP

 	beta = fnorm;
 	sqbt = sqrt(beta);
-	std::cout<<GridLogMessage << "alpha = " << alpha << " fnorm = " << fnorm << '\n';
+	std::cout << "alpha = " << alpha << " fnorm = " << fnorm << '\n';

 	///Iterative refinement of orthogonality V = [ bq[0]  bq[1]  ...  bq[M] ]
 	int re = 0;
@@ -879,8 +786,8 @@ PARALLEL_FOR_LOOP
 	  bck = sqrt( nmbex );
 	  re++;
 	}
-	std::cout<<GridLogMessage << "Iteratively refined orthogonality, changes alpha\n";
-	if(re > 1) std::cout<<GridLogMessage << "orthagonality refined " << re << " times" <<std::endl;
+	std::cout << "Iteratively refined orthogonality, changes alpha\n";
+	if(re > 1) std::cout << "orthagonality refined " << re << " times" <<std::endl;
 	H[j][j]=alpha;
      }

@@ -895,13 +802,11 @@ PARALLEL_FOR_LOOP

    void ImplicitRestart(int TM, DenseVector<RealD> &evals,  DenseVector<DenseVector<RealD> > &evecs, DenseVector<Field> &bq, Field &bf, int cont)
    {
-      std::cout<<GridLogMessage << "ImplicitRestart begin. Eigensort starting\n";
+      std::cout << "ImplicitRestart begin. Eigensort starting\n";

      DenseMatrix<RealD> H; Resize(H,Nm,Nm);

-#ifndef USE_LAPACK
      EigenSort(evals, evecs);
-#endif

      ///Assign shifts
      int K=Nk;
@@ -924,15 +829,15 @@ PARALLEL_FOR_LOOP
      /// Shifted H defines a new K step Arnoldi factorization
      RealD  beta = H[ff][ff-1]; 
      RealD  sig  = Q[TM - 1][ff - 1];
-      std::cout<<GridLogMessage << "beta = " << beta << " sig = " << real(sig) <<std::endl;
+      std::cout << "beta = " << beta << " sig = " << real(sig) <<std::endl;

-      std::cout<<GridLogMessage << "TM = " << TM << " ";
-      std::cout<<GridLogMessage << norm2(bq[0]) << " -- before" <<std::endl;
+      std::cout << "TM = " << TM << " ";
+      std::cout << norm2(bq[0]) << " -- before" <<std::endl;

      /// q -> q Q
      times_real(bq, Q, TM);

-      std::cout<<GridLogMessage << norm2(bq[0]) << " -- after " << ff <<std::endl;
+      std::cout << norm2(bq[0]) << " -- after " << ff <<std::endl;
      bf =  beta* bq[ff] + sig* bf;

      /// Do the rest of the factorization
@@ -956,7 +861,7 @@ PARALLEL_FOR_LOOP
      int ff = Lanczos_Factor(0, M, cont, bq,bf,H); // 0--M to begin with

      if(ff < M) {
-	std::cout<<GridLogMessage << "Krylov: aborting ff "<<ff <<" "<<M<<std::endl;
+	std::cout << "Krylov: aborting ff "<<ff <<" "<<M<<std::endl;
 	abort(); // Why would this happen?
      }

@@ -965,7 +870,7 @@ PARALLEL_FOR_LOOP

      for(int it = 0; it < Niter && (converged < Nk); ++it) {

-	std::cout<<GridLogMessage << "Krylov: Iteration --> " << it << std::endl;
+	std::cout << "Krylov: Iteration --> " << it << std::endl;
 	int lock_num = lock ? converged : 0;
 	DenseVector<RealD> tevals(M - lock_num );
 	DenseMatrix<RealD> tevecs; Resize(tevecs,M - lock_num,M - lock_num);
@@ -981,7 +886,7 @@ PARALLEL_FOR_LOOP
      Wilkinson<RealD>(H, evals, evecs, small); 
      //      Check();

-      std::cout<<GridLogMessage << "Done  "<<std::endl;
+      std::cout << "Done  "<<std::endl;

    }

@@ -1046,7 +951,7 @@ PARALLEL_FOR_LOOP
 		  DenseVector<RealD> &tevals, DenseVector<DenseVector<RealD> > &tevecs, 
 		  int lock, int converged)
    {
-      std::cout<<GridLogMessage << "Converged " << converged << " so far." << std::endl;
+      std::cout << "Converged " << converged << " so far." << std::endl;
      int lock_num = lock ? converged : 0;
      int M = Nm;

@@ -1061,9 +966,7 @@ PARALLEL_FOR_LOOP
      RealD small=1.0e-16;
      Wilkinson<RealD>(AH, tevals, tevecs, small);

-#ifndef USE_LAPACK
      EigenSort(tevals, tevecs);
-#endif

      RealD resid_nrm=  norm2(bf);

@@ -1074,7 +977,7 @@ PARALLEL_FOR_LOOP
 	RealD diff = 0;
 	diff = abs( tevecs[i][Nm - 1 - lock_num] ) * resid_nrm;

-	std::cout<<GridLogMessage << "residual estimate " << SS-1-i << " " << diff << " of (" << tevals[i] << ")" << std::endl;
+	std::cout << "residual estimate " << SS-1-i << " " << diff << " of (" << tevals[i] << ")" << std::endl;

 	if(diff < converged) {

@@ -1090,13 +993,13 @@ PARALLEL_FOR_LOOP
 	    lock_num++;
 	  }
 	  converged++;
-	  std::cout<<GridLogMessage << " converged on eval " << converged << " of " << Nk << std::endl;
+	  std::cout << " converged on eval " << converged << " of " << Nk << std::endl;
 	} else {
 	  break;
 	}
      }
 #endif
-      std::cout<<GridLogMessage << "Got " << converged << " so far " <<std::endl;	
+      std::cout << "Got " << converged << " so far " <<std::endl;	
    }

    ///Check
@@ -1105,9 +1008,7 @@ PARALLEL_FOR_LOOP

      DenseVector<RealD> goodval(this->get);

-#ifndef USE_LAPACK
      EigenSort(evals,evecs);
-#endif

      int NM = Nm;

@@ -1179,10 +1080,10 @@ say con = 2
 **/

 template<class T>
-static void Lock(DenseMatrix<T> &H, 	///Hess mtx	
-		 DenseMatrix<T> &Q, 	///Lock Transform
-		 T val, 		///value to be locked
-		 int con, 	///number already locked
+static void Lock(DenseMatrix<T> &H, 	// Hess mtx	
+		 DenseMatrix<T> &Q, 	// Lock Transform
+		 T val, 		// value to be locked
+		 int con, 	// number already locked
 		 RealD small,
 		 int dfg,
 		 bool herm)
@@ -36,7 +36,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 #include <iomanip>
 #include <complex>
 #include <typeinfo>
-#include <Grid/Grid.h>
+#include <Grid.h>


 /** Sign function **/
@@ -141,85 +141,5 @@ namespace Grid {
    }     
  };

-  ///////////////////////////////////////////////////////////////////////////////////////////////////////
-  // Take a matrix and form a Red Black solver calling a Herm solver
-  // Use of RB info prevents making SchurRedBlackSolve conform to standard interface
-  ///////////////////////////////////////////////////////////////////////////////////////////////////////
-  template<class Field> class SchurRedBlackDiagTwoSolve {
-  private:
-    OperatorFunction<Field> & _HermitianRBSolver;
-    int CBfactorise;
-  public:
-
-    /////////////////////////////////////////////////////
-    // Wrap the usual normal equations Schur trick
-    /////////////////////////////////////////////////////
-  SchurRedBlackDiagTwoSolve(OperatorFunction<Field> &HermitianRBSolver)  :
-     _HermitianRBSolver(HermitianRBSolver) 
-    { 
-      CBfactorise=0;
-    };
-
-    template<class Matrix>
-      void operator() (Matrix & _Matrix,const Field &in, Field &out){
-
-      // FIXME CGdiagonalMee not implemented virtual function
-      // FIXME use CBfactorise to control schur decomp
-      GridBase *grid = _Matrix.RedBlackGrid();
-      GridBase *fgrid= _Matrix.Grid();
-
-      SchurDiagTwoOperator<Matrix,Field> _HermOpEO(_Matrix);
- 
-      Field src_e(grid);
-      Field src_o(grid);
-      Field sol_e(grid);
-      Field sol_o(grid);
-      Field   tmp(grid);
-      Field  Mtmp(grid);
-      Field resid(fgrid);
-
-      pickCheckerboard(Even,src_e,in);
-      pickCheckerboard(Odd ,src_o,in);
-      pickCheckerboard(Even,sol_e,out);
-      pickCheckerboard(Odd ,sol_o,out);
-    
-      /////////////////////////////////////////////////////
-      // src_o = Mdag * (source_o - Moe MeeInv source_e)
-      /////////////////////////////////////////////////////
-      _Matrix.MooeeInv(src_e,tmp);     assert(  tmp.checkerboard ==Even);
-      _Matrix.Meooe   (tmp,Mtmp);      assert( Mtmp.checkerboard ==Odd);     
-      tmp=src_o-Mtmp;                  assert(  tmp.checkerboard ==Odd);     
-
-      // get the right MpcDag
-      _HermOpEO.MpcDag(tmp,src_o);     assert(src_o.checkerboard ==Odd);       
-
-      //////////////////////////////////////////////////////////////
-      // Call the red-black solver
-      //////////////////////////////////////////////////////////////
-      std::cout<<GridLogMessage << "SchurRedBlack solver calling the MpcDagMp solver" <<std::endl;
-//      _HermitianRBSolver(_HermOpEO,src_o,sol_o);  assert(sol_o.checkerboard==Odd);
-      _HermitianRBSolver(_HermOpEO,src_o,tmp);  assert(tmp.checkerboard==Odd);
-      _Matrix.MooeeInv(tmp,sol_o);        assert(  sol_o.checkerboard   ==Odd);
-
-      ///////////////////////////////////////////////////
-      // sol_e = M_ee^-1 * ( src_e - Meo sol_o )...
-      ///////////////////////////////////////////////////
-      _Matrix.Meooe(sol_o,tmp);        assert(  tmp.checkerboard   ==Even);
-      src_e = src_e-tmp;               assert(  src_e.checkerboard ==Even);
-      _Matrix.MooeeInv(src_e,sol_e);   assert(  sol_e.checkerboard ==Even);
-     
-      setCheckerboard(out,sol_e); assert(  sol_e.checkerboard ==Even);
-      setCheckerboard(out,sol_o); assert(  sol_o.checkerboard ==Odd );
-
-      // Verify the unprec residual
-      _Matrix.M(out,resid); 
-      resid = resid-in;
-      RealD ns = norm2(in);
-      RealD nr = norm2(resid);
-
-      std::cout<<GridLogMessage << "SchurRedBlackDiagTwo solver true unprec resid "<< std::sqrt(nr/ns) <<" nr "<< nr <<" ns "<<ns << std::endl;
-    }     
-  };
-
 }
 #endif
@@ -1,66 +0,0 @@
-
-
-
-#include <Grid/GridCore.h>
-
-namespace Grid {
-
-int PointerCache::victim;
-
-  PointerCache::PointerCacheEntry PointerCache::Entries[PointerCache::Ncache];
-
-void *PointerCache::Insert(void *ptr,size_t bytes) {
-
-  if (bytes < 4096 ) return NULL;
-
-#ifdef GRID_OMP
-  assert(omp_in_parallel()==0);
-#endif 
-
-  void * ret = NULL;
-  int v = -1;
-
-  for(int e=0;e<Ncache;e++) {
-    if ( Entries[e].valid==0 ) {
-      v=e; 
-      break;
-    }
-  }
-
-  if ( v==-1 ) {
-    v=victim;
-    victim = (victim+1)%Ncache;
-  }
-
-  if ( Entries[v].valid ) {
-    ret = Entries[v].address;
-    Entries[v].valid = 0;
-    Entries[v].address = NULL;
-    Entries[v].bytes = 0;
-  }
-
-  Entries[v].address=ptr;
-  Entries[v].bytes  =bytes;
-  Entries[v].valid  =1;
-
-  return ret;
-}
-
-void *PointerCache::Lookup(size_t bytes) {
-
- if (bytes < 4096 ) return NULL;
-
-#ifdef _OPENMP
-  assert(omp_in_parallel()==0);
-#endif 
-
-  for(int e=0;e<Ncache;e++){
-    if ( Entries[e].valid && ( Entries[e].bytes == bytes ) ) {
-      Entries[e].valid = 0;
-      return Entries[e].address;
-    }
-  }
-  return NULL;
-}
-
-}
@@ -52,7 +52,7 @@ public:

    // Physics Grid information.
    std::vector<int> _simd_layout;// Which dimensions get relayed out over simd lanes.
-    std::vector<int> _fdimensions;// (full) Global dimensions of array prior to cb removal
+    std::vector<int> _fdimensions;// Global dimensions of array prior to cb removal
    std::vector<int> _gdimensions;// Global dimensions of array after cb removal
    std::vector<int> _ldimensions;// local dimensions of array with processor images removed
    std::vector<int> _rdimensions;// Reduced local dimensions with simd lane images and processor images removed 
@@ -77,7 +77,7 @@ public:
    // GridCartesian / GridRedBlackCartesian
    ////////////////////////////////////////////////////////////////
    virtual int CheckerBoarded(int dim)=0;
-    virtual int CheckerBoard(const std::vector<int> &site)=0;
+    virtual int CheckerBoard(std::vector<int> &site)=0;
    virtual int CheckerBoardDestination(int source_cb,int shift,int dim)=0;
    virtual int CheckerBoardShift(int source_cb,int dim,int shift,int osite)=0;
    virtual int CheckerBoardShiftForCB(int source_cb,int dim,int shift,int cb)=0;
@@ -121,6 +121,7 @@ public:
      Lexicographic::CoorFromIndex(coor,Oindex,_rdimensions);
    }

+
    //////////////////////////////////////////////////////////
    // SIMD lane addressing
    //////////////////////////////////////////////////////////
@@ -177,11 +178,9 @@ public:
    // Global addressing
    ////////////////////////////////////////////////////////////////
    void GlobalIndexToGlobalCoor(int gidx,std::vector<int> &gcoor){
-      assert(gidx< gSites());
      Lexicographic::CoorFromIndex(gcoor,gidx,_gdimensions);
    }
    void LocalIndexToLocalCoor(int lidx,std::vector<int> &lcoor){
-      assert(lidx<lSites());
      Lexicographic::CoorFromIndex(lcoor,lidx,_ldimensions);
    }
    void GlobalCoorToGlobalIndex(const std::vector<int> & gcoor,int & gidx){
@@ -208,16 +207,16 @@ public:
      std::vector<int> lcoor;
      GlobalCoorToProcessorCoorLocalCoor(pcoor,lcoor,gcoor);
      rank = RankFromProcessorCoor(pcoor);
-      /*
+
      std::vector<int> cblcoor(lcoor);
      for(int d=0;d<cblcoor.size();d++){
 	if( this->CheckerBoarded(d) ) {
 	  cblcoor[d] = lcoor[d]/2;
 	}
      }
-      */
-      i_idx= iIndex(lcoor);
-      o_idx= oIndex(lcoor);
+
+      i_idx= iIndex(cblcoor);// this does not imply divide by 2 on checker dim
+      o_idx= oIndex(lcoor);  // this implies divide by 2 on checkerdim
    }

    void RankIndexToGlobalCoor(int rank, int o_idx, int i_idx , std::vector<int> &gcoor)
@@ -49,7 +49,7 @@ public:
    virtual int CheckerBoarded(int dim){
      return 0;
    }
-    virtual int CheckerBoard(const std::vector<int> &site){
+    virtual int CheckerBoard(std::vector<int> &site){
        return 0;
    }
    virtual int CheckerBoardDestination(int cb,int shift,int dim){
@@ -49,7 +49,7 @@ public:
      if( dim==_checker_dim) return 1;
      else return 0;
    }
-    virtual int CheckerBoard(const std::vector<int> &site){
+    virtual int CheckerBoard(std::vector<int> &site){
      int linear=0;
      assert(site.size()==_ndimension);
      for(int d=0;d<_ndimension;d++){ 
@@ -25,8 +25,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
    See the full license in the file "LICENSE" in the top level distribution directory
    *************************************************************************************/
    /*  END LEGAL */
-#include <Grid/GridCore.h>
-
+#include "Grid.h"
 namespace Grid {

 ///////////////////////////////////////////////////////////////
@@ -34,7 +33,6 @@ namespace Grid {
 ///////////////////////////////////////////////////////////////
 void *              CartesianCommunicator::ShmCommBuf;
 uint64_t            CartesianCommunicator::MAX_MPI_SHM_BYTES   = 128*1024*1024; 
-CartesianCommunicator::CommunicatorPolicy_t  CartesianCommunicator::CommunicatorPolicy= CartesianCommunicator::CommunicatorPolicyConcurrent;

 /////////////////////////////////
 // Alloc, free shmem region
@@ -67,6 +65,7 @@ const std::vector<int> & CartesianCommunicator::ThisProcessorCoor(void) { return
 const std::vector<int> & CartesianCommunicator::ProcessorGrid(void)     { return _processors; };
 int                      CartesianCommunicator::ProcessorCount(void)    { return _Nprocessors; };

+
 ////////////////////////////////////////////////////////////////////////////////
 // very VERY rarely (Log, serial RNG) we need world without a grid
 ////////////////////////////////////////////////////////////////////////////////
@@ -90,17 +89,14 @@ void CartesianCommunicator::GlobalSumVector(ComplexD *c,int N)

 #if !defined( GRID_COMMS_MPI3) && !defined (GRID_COMMS_MPI3L)

-int                      CartesianCommunicator::NodeCount(void)    { return ProcessorCount();};
-
-double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list,
-						       void *xmit,
-						       int xmit_to_rank,
-						       void *recv,
-						       int recv_from_rank,
-						       int bytes)
+void CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list,
+                                                       void *xmit,
+                                                       int xmit_to_rank,
+                                                       void *recv,
+                                                       int recv_from_rank,
+                                                       int bytes)
 {
  SendToRecvFromBegin(list,xmit,xmit_to_rank,recv,recv_from_rank,bytes);
-  return 2.0*bytes;
 }
 void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector<CommsRequest_t> &waitall)
 {
@@ -68,6 +68,8 @@ class CartesianCommunicator {
  static MPI_Comm communicator_world;
         MPI_Comm communicator;
  typedef MPI_Request CommsRequest_t;
+  static char name[MPI_MAX_PROCESSOR_NAME]; // processing node physical name
+  static int length; 
 #else 
  typedef int CommsRequest_t;
 #endif
@@ -116,12 +118,6 @@ class CartesianCommunicator {
  // Implemented in Communicator_base.C
  /////////////////////////////////
  static void * ShmCommBuf;
-
-  // Isend/Irecv/Wait, or Sendrecv blocking
-  enum CommunicatorPolicy_t { CommunicatorPolicyConcurrent, CommunicatorPolicySequential };
-  static CommunicatorPolicy_t CommunicatorPolicy;
-  static void SetCommunicatorPolicy(CommunicatorPolicy_t policy ) { CommunicatorPolicy = policy; }
-
  size_t heap_top;
  size_t heap_bytes;

@@ -154,8 +150,8 @@ class CartesianCommunicator {
  const std::vector<int> & ThisProcessorCoor(void) ;
  const std::vector<int> & ProcessorGrid(void)     ;
  int                      ProcessorCount(void)    ;
-  int                      NodeCount(void)    ;

+  void        		 PrintRankInfo(void)     ;
  ////////////////////////////////////////////////////////////////////////////////
  // very VERY rarely (Log, serial RNG) we need world without a grid
  ////////////////////////////////////////////////////////////////////////////////
@@ -207,7 +203,7 @@ class CartesianCommunicator {
  
  void SendToRecvFromComplete(std::vector<CommsRequest_t> &waitall);

-  double StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list,
+  void StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list,
 				  void *xmit,
 				  int xmit_to_rank,
 				  void *recv,
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Guido Cossu	70068cff51	Just commenting	2016-12-12 03:49:01 +00:00
Guido Cossu	85c055fa30	Tests more reductions and stops if failure is found in an iteration	2016-12-12 03:33:21 +00:00
Guido Cossu	90fedbd2af	Report only on failing nodes	2016-12-09 05:57:30 +00:00
Guido Cossu	ec0c53fa68	Improvement in the CG interface for Repro	2016-12-09 05:20:38 +00:00
Guido Cossu	6ceee102e8	Moving the check outside the inner product function	2016-12-09 05:08:56 +00:00
Guido Cossu	6e57bdb6b3	Removing the fake error introduced for testing	2016-12-09 04:19:36 +00:00
Guido Cossu	4c11e36d3d	Fixing a compilation error on some gcc compiler versions	2016-12-09 02:46:40 +00:00
Guido Cossu	9977c53035	Minor change	2016-12-09 02:34:37 +00:00
Guido Cossu	3a74fec62f	Output reports max number of digits	2016-12-09 02:29:12 +00:00
Guido Cossu	8fb0a13f39	Cleaning up output of CG repro	2016-12-09 02:17:31 +00:00
Guido Cossu	14a1406f54	More cleanup	2016-12-08 06:14:20 +00:00
Guido Cossu	538e64e5b4	Cleaning up the CG reproduciblity test. More info reported	2016-12-08 05:50:40 +00:00
Guido Cossu	b2dc17e160	Merge branch 'develop' into feature/CG_repro	2016-12-05 05:07:01 +00:00
Guido Cossu	afbbcd2194	Compilation fix for the non OMP version	2016-11-23 12:47:19 +00:00
Guido Cossu	d4e0b11bb1	Adding few comments	2016-11-23 11:46:51 +00:00
Guido Cossu	7144ee7ae8	Reproducibility checks for inner product	2016-11-23 11:42:04 +00:00
Guido Cossu	f1908c7bc9	Adding reproducibility tests	2016-11-21 09:52:07 +00:00
Guido Cossu	036ec31c48	Merge branch 'develop' into feature/CG_repro	2016-11-20 17:54:23 +00:00
Guido Cossu	53f240200e	Merge branch 'develop' into feature/CG_repro	2016-11-09 14:44:46 +00:00
Guido Cossu	9720c9ba3f	First implementation of the CG reproducibility test	2016-11-06 11:13:29 +00:00