Debugged the copy constructor of the Lattice class

2025-07-19 06:07:06 +01:00 · 2016-07-06 15:31:00 +01:00
parent e3d5319470
commit e87182cf98
4 changed files with 888 additions and 849 deletions
--- a/lib/lattice/Lattice_base.h
+++ b/lib/lattice/Lattice_base.h
@@ -1,32 +1,33 @@
-    /*************************************************************************************
+/*************************************************************************************
-    Grid physics library, www.github.com/paboyle/Grid 
+Grid physics library, www.github.com/paboyle/Grid
-    Source file: ./lib/lattice/Lattice_base.h
+Source file: ./lib/lattice/Lattice_base.h
-    Copyright (C) 2015
+Copyright (C) 2015
 Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
 Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 Author: paboyle <paboyle@ph.ed.ac.uk>
-    This program is free software; you can redistribute it and/or modify
+This program is free software; you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
+it under the terms of the GNU General Public License as published by
-    the Free Software Foundation; either version 2 of the License, or
+the Free Software Foundation; either version 2 of the License, or
-    (at your option) any later version.
+(at your option) any later version.
-    This program is distributed in the hope that it will be useful,
+This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
+but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
+GNU General Public License for more details.
-    You should have received a copy of the GNU General Public License along
+You should have received a copy of the GNU General Public License along
-    with this program; if not, write to the Free Software Foundation, Inc.,
+with this program; if not, write to the Free Software Foundation, Inc.,
-    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-    See the full license in the file "LICENSE" in the top level distribution directory
+See the full license in the file "LICENSE" in the top level distribution
-    *************************************************************************************/
+directory
-    /*  END LEGAL */
+*************************************************************************************/
 /*  END LEGAL */
 #ifndef GRID_LATTICE_BASE_H
 #define GRID_LATTICE_BASE_H
@@ -255,6 +256,18 @@ PARALLEL_FOR_LOOP
        checkerboard=0;
    }
    Lattice(const Lattice& r){ // copy constructor
    	_grid = r._grid;
    	checkerboard = r.checkerboard;
    	_odata.resize(_grid->oSites());// essential
  		PARALLEL_FOR_LOOP
        for(int ss=0;ss<_grid->oSites();ss++){
            _odata[ss]=r._odata[ss];
        }  	
    }
    virtual ~Lattice(void) = default;
    template<class sobj> strong_inline Lattice<vobj> & operator = (const sobj & r){
@@ -267,7 +280,7 @@ PARALLEL_FOR_LOOP
    template<class robj> strong_inline Lattice<vobj> & operator = (const Lattice<robj> & r){
      this->checkerboard = r.checkerboard;
      conformable(*this,r);
-      std::cout<<GridLogMessage<<"Lattice operator ="<<std::endl;
+      
 PARALLEL_FOR_LOOP
        for(int ss=0;ss<_grid->oSites();ss++){
            this->_odata[ss]=r._odata[ss];
--- a/lib/qcd/smearing/GaugeConfiguration.h
+++ b/lib/qcd/smearing/GaugeConfiguration.h
@@ -6,11 +6,11 @@
 #ifndef GAUGE_CONFIG_
 #define GAUGE_CONFIG_
-  namespace Grid {
+namespace Grid {
-  	namespace QCD {
+namespace QCD {
-    /*!
+/*!
  @brief Smeared configuration container
  It will behave like a configuration from the point of view of
@@ -20,11 +20,11 @@
  it, like smearing.
  It stores a list of smeared configurations.
-    */
+*/
-    template <class Gimpl>
+template <class Gimpl>
-      class SmearedConfiguration {
+class SmearedConfiguration {
 public:
-      	INHERIT_GIMPL_TYPES(Gimpl) ;
+  INHERIT_GIMPL_TYPES(Gimpl);
 private:
  const unsigned int smearingLevels;
@@ -33,34 +33,35 @@
  // Member functions
  //====================================================================
-      	void fill_smearedSet(GaugeField& U){
+  void fill_smearedSet(GaugeField& U) {
-	ThinLinks = &U; //attach the smearing routine to the field U
+    ThinLinks = &U;  // attach the smearing routine to the field U
-	//check the pointer is not null
+    // check the pointer is not null
-	if (ThinLinks==NULL) 
+    if (ThinLinks == NULL)
-		std::cout << GridLogError << "[SmearedConfiguration] Error in ThinLinks pointer\n";
+      std::cout << GridLogError
                << "[SmearedConfiguration] Error in ThinLinks pointer\n";
-	if (smearingLevels > 0){
+    if (smearingLevels > 0) {
-		std::cout<< GridLogDebug << "[SmearedConfiguration] Filling SmearedSet\n";
+      std::cout << GridLogDebug
                << "[SmearedConfiguration] Filling SmearedSet\n";
      GaugeField previous_u(ThinLinks->_grid);
      previous_u = *ThinLinks;
-		for(int smearLvl = 0; smearLvl < smearingLevels; ++smearLvl){
+      for (int smearLvl = 0; smearLvl < smearingLevels; ++smearLvl) {
-			StoutSmearing.smear(SmearedSet[smearLvl],previous_u);
+        StoutSmearing.smear(SmearedSet[smearLvl], previous_u);
        previous_u = SmearedSet[smearLvl];
        // For debug purposes
        RealD impl_plaq = WilsonLoops<Gimpl>::avgPlaquette(previous_u);
-			std::cout<< GridLogDebug << "[SmearedConfiguration] Plaq: " << impl_plaq<< std::endl;
+        std::cout << GridLogDebug
-
+                  << "[SmearedConfiguration] Plaq: " << impl_plaq << std::endl;
      }
    }
-}
+  }
-//====================================================================
+  //====================================================================
-GaugeField AnalyticSmearedForce(const GaugeField& SigmaKPrime, 
+  GaugeField AnalyticSmearedForce(const GaugeField& SigmaKPrime,
-	const GaugeField& GaugeK) const{
+                                  const GaugeField& GaugeK) const {
-	GridBase *grid = GaugeK._grid;
+    GridBase* grid = GaugeK._grid;
    GaugeField C(grid), SigmaK(grid), iLambda(grid);
    GaugeLinkField iLambda_mu(grid);
    GaugeLinkField iQ(grid), e_iQ(grid);
@@ -71,37 +72,33 @@ GaugeField AnalyticSmearedForce(const GaugeField& SigmaKPrime,
    SigmaK = zero;
    iLambda = zero;
-	for (int mu = 0; mu < Nd; mu++){
+    for (int mu = 0; mu < Nd; mu++) {
-		Cmu            = peekLorentz(     C,mu);
+      Cmu = peekLorentz(C, mu);
-		GaugeKmu       = peekLorentz(GaugeK,mu);
+      GaugeKmu = peekLorentz(GaugeK, mu);
-		SigmaKPrime_mu = peekLorentz(SigmaKPrime,mu);
+      SigmaKPrime_mu = peekLorentz(SigmaKPrime, mu);
-		iQ = Ta(Cmu*adj(GaugeKmu));	
+      iQ = Ta(Cmu * adj(GaugeKmu));
      set_iLambda(iLambda_mu, e_iQ, iQ, SigmaKPrime_mu, GaugeKmu);
-		pokeLorentz(SigmaK, SigmaKPrime_mu*e_iQ + adj(Cmu)*iLambda_mu, mu);
+      pokeLorentz(SigmaK, SigmaKPrime_mu * e_iQ + adj(Cmu) * iLambda_mu, mu);
      pokeLorentz(iLambda, iLambda_mu, mu);
    }
-	StoutSmearing.derivative(SigmaK, iLambda, GaugeK);// derivative of SmearBase
+    StoutSmearing.derivative(SigmaK, iLambda,
                             GaugeK);  // derivative of SmearBase
    return SigmaK;
-}
+  }
-
+  /*! @brief Returns smeared configuration at level 'Level' */
-
+  const GaugeField& get_smeared_conf(int Level) const {
 /*! @brief Returns smeared configuration at level 'Level' */
 const GaugeField& get_smeared_conf(int Level) const{
    return SmearedSet[Level];
-}
+  }
-
+  //====================================================================
-//====================================================================
+  void set_iLambda(GaugeLinkField& iLambda, GaugeLinkField& e_iQ,
-void set_iLambda(GaugeLinkField& iLambda, 
+                   const GaugeLinkField& iQ, const GaugeLinkField& Sigmap,
-	GaugeLinkField& e_iQ,
+                   const GaugeLinkField& GaugeK) const {
-	const GaugeLinkField& iQ, 
+    GridBase* grid = iQ._grid;
 	const GaugeLinkField& Sigmap,
 	const GaugeLinkField& GaugeK)const{
 	GridBase *grid = iQ._grid;
    GaugeLinkField iQ2(grid), iQ3(grid), B1(grid), B2(grid), USigmap(grid);
    GaugeLinkField unity(grid);
-	unity=1.0;
+    unity = 1.0;
    LatticeComplex u(grid), w(grid);
    LatticeComplex f0(grid), f1(grid), f2(grid);
@@ -110,7 +107,8 @@ void set_iLambda(GaugeLinkField& iLambda,
    LatticeComplex emiu(grid), e2iu(grid), qt(grid), fden(grid);
    LatticeComplex r01(grid), r11(grid), r21(grid), r02(grid), r12(grid);
    LatticeComplex r22(grid), tr1(grid), tr2(grid);
-	LatticeComplex b10(grid), b11(grid), b12(grid), b20(grid), b21(grid), b22(grid);
+    LatticeComplex b10(grid), b11(grid), b12(grid), b20(grid), b21(grid),
        b22(grid);
    LatticeComplex LatticeUnitComplex(grid);
    LatticeUnitComplex = 1.0;
@@ -118,9 +116,9 @@ void set_iLambda(GaugeLinkField& iLambda,
    // Exponential
    iQ2 = iQ * iQ;
    iQ3 = iQ * iQ2;
-	StoutSmearing.set_uw(u,w,iQ2,iQ3);
+    StoutSmearing.set_uw(u, w, iQ2, iQ3);
-	StoutSmearing.set_fj(f0,f1,f2,u,w);
+    StoutSmearing.set_fj(f0, f1, f2, u, w);
-	e_iQ = f0*unity + timesMinusI(f1) * iQ - f2 * iQ2;
+    e_iQ = f0 * unity + timesMinusI(f1) * iQ - f2 * iQ2;
    // Getting B1, B2, Gamma and Lambda
    // simplify this part, reduntant calculations in set_fj
@@ -131,36 +129,36 @@ void set_iLambda(GaugeLinkField& iLambda,
    cosw = cos(w);
    emiu = cos(u) - timesI(sin(u));
-	e2iu = cos(2.0*u) + timesI(sin(2.0*u));
+    e2iu = cos(2.0 * u) + timesI(sin(2.0 * u));
-	r01 = (2.0*u + timesI(2.0*(u2-w2))) * e2iu
+    r01 = (2.0 * u + timesI(2.0 * (u2 - w2))) * e2iu +
-	+ emiu * ((16.0*u*cosw + 2.0*u*(3.0*u2+w2)*xi0) +
+          emiu * ((16.0 * u * cosw + 2.0 * u * (3.0 * u2 + w2) * xi0) +
-		timesI(-8.0*u2*cosw + 2.0*(9.0*u2+w2)*xi0));
+                  timesI(-8.0 * u2 * cosw + 2.0 * (9.0 * u2 + w2) * xi0));
-	r11 = (2.0*LatticeUnitComplex + timesI(4.0*u))* e2iu
+    r11 = (2.0 * LatticeUnitComplex + timesI(4.0 * u)) * e2iu +
-	+ emiu * ((-2.0*cosw + (3.0*u2-w2)*xi0) +
+          emiu * ((-2.0 * cosw + (3.0 * u2 - w2) * xi0) +
-		timesI((2.0*u*cosw + 6.0*u*xi0)));
+                  timesI((2.0 * u * cosw + 6.0 * u * xi0)));
-	r21 = 2.0*timesI(e2iu)
+    r21 =
-	+ emiu * (-3.0*u*xi0 + timesI(cosw - 3.0*xi0));
+        2.0 * timesI(e2iu) + emiu * (-3.0 * u * xi0 + timesI(cosw - 3.0 * xi0));
    r02 = -2.0 * e2iu +
          emiu * (-8.0 * u2 * xi0 +
                  timesI(2.0 * u * (cosw + xi0 + 3.0 * u2 * xi1)));
-	r02 = -2.0 * e2iu + emiu * (-8.0*u2*xi0 +
+    r12 = emiu * (2.0 * u * xi0 + timesI(-cosw - xi0 + 3.0 * u2 * xi1));
 		timesI(2.0*u*(cosw + xi0 + 3.0*u2*xi1)));
-	r12 = emiu * (2.0*u*xi0 + timesI(-cosw - xi0 + 3.0*u2*xi1));
+    r22 = emiu * (xi0 - timesI(3.0 * u * xi1));
-	r22 = emiu * (xi0 - timesI(3.0*u*xi1));
+    fden = LatticeUnitComplex / (2.0 * (9.0 * u2 - w2) * (9.0 * u2 - w2));
-	fden = LatticeUnitComplex/(2.0*(9.0*u2-w2)*(9.0*u2-w2));
+    b10 = 2.0 * u * r01 + (3.0 * u2 - w2) * r02 - (30.0 * u2 + 2.0 * w2) * f0;
    b11 = 2.0 * u * r11 + (3.0 * u2 - w2) * r12 - (30.0 * u2 + 2.0 * w2) * f1;
    b12 = 2.0 * u * r21 + (3.0 * u2 - w2) * r22 - (30.0 * u2 + 2.0 * w2) * f2;
-	b10 = 2.0 * u * r01 + (3.0* u2 - w2)*r02 - (30.0 * u2 + 2.0 * w2)*f0;
+    b20 = r01 - (3.0 * u) * r02 - (24.0 * u) * f0;
-	b11 = 2.0 * u * r11 + (3.0* u2 - w2)*r12 - (30.0 * u2 + 2.0 * w2)*f1;
+    b21 = r11 - (3.0 * u) * r12 - (24.0 * u) * f1;
-	b12 = 2.0 * u * r21 + (3.0* u2 - w2)*r22 - (30.0 * u2 + 2.0 * w2)*f2;
+    b22 = r21 - (3.0 * u) * r22 - (24.0 * u) * f2;
 	b20 = r01 - (3.0*u)*r02 - (24.0*u)*f0;
 	b21 = r11 - (3.0*u)*r12 - (24.0*u)*f1;
 	b22 = r21 - (3.0*u)*r22 - (24.0*u)*f2;
    b10 *= fden;
    b11 *= fden;
@@ -169,13 +167,12 @@ void set_iLambda(GaugeLinkField& iLambda,
    b21 *= fden;
    b22 *= fden;
-
+    B1 = b10 * unity + timesMinusI(b11) * iQ - b12 * iQ2;
-	B1 = b10*unity + timesMinusI(b11) * iQ - b12 * iQ2;
+    B2 = b20 * unity + timesMinusI(b21) * iQ - b22 * iQ2;
 	B2 = b20*unity + timesMinusI(b21) * iQ - b22 * iQ2;
    USigmap = GaugeK * Sigmap;
-	tr1 = trace(USigmap*B1);
+    tr1 = trace(USigmap * B1);
-	tr2 = trace(USigmap*B2);
+    tr2 = trace(USigmap * B2);
    GaugeLinkField QUS = iQ * USigmap;
    GaugeLinkField USQ = USigmap * iQ;
@@ -184,99 +181,82 @@ void set_iLambda(GaugeLinkField& iLambda,
                            timesI(f1) * USigmap + f2 * QUS + f2 * USQ;
    iLambda = Ta(iGamma);
  }
-}
+  //====================================================================
-
+ public:
-//==================================================================== 
+  GaugeField*
-public:
+      ThinLinks; /*!< @brief Pointer to the thin
    GaugeField* ThinLinks;      /*!< @brief Pointer to the thin 
                                                         links configuration */
  /*! @brief Standard constructor */
-    SmearedConfiguration(GridCartesian * UGrid,
+  SmearedConfiguration(GridCartesian* UGrid, unsigned int Nsmear,
-    	unsigned int Nsmear, 
+                       Smear_Stout<Gimpl>& Stout)
-    	Smear_Stout<Gimpl>& Stout):
+      : smearingLevels(Nsmear), StoutSmearing(Stout), ThinLinks(NULL) {
-    smearingLevels(Nsmear),
+    for (unsigned int i = 0; i < smearingLevels; ++i)
    StoutSmearing(Stout),
    ThinLinks(NULL){
    	for (unsigned int i=0; i< smearingLevels; ++i)
      SmearedSet.push_back(*(new GaugeField(UGrid)));
  }
  /*! For just thin links */
-    SmearedConfiguration():
+  SmearedConfiguration()
-    smearingLevels(0),
+      : smearingLevels(0), StoutSmearing(), SmearedSet(), ThinLinks(NULL) {}
    StoutSmearing(),
    SmearedSet(),
    ThinLinks(NULL){}
  // attach the smeared routines to the thin links U and fill the smeared set
-    void set_GaugeField(GaugeField& U){ fill_smearedSet(U);}
+  void set_GaugeField(GaugeField& U) { fill_smearedSet(U); }
-//====================================================================
+  //====================================================================
-    void smeared_force(GaugeField& SigmaTilde) const{
+  void smeared_force(GaugeField& SigmaTilde) const {
-
+    if (smearingLevels > 0) {
-    	if (smearingLevels > 0){
+      GaugeField force = SigmaTilde; // actually = U*SigmaTilde
    		GaugeField     force(SigmaTilde._grid); 
      GaugeLinkField tmp_mu(SigmaTilde._grid);
 	  		force = SigmaTilde;//actually = U*SigmaTilde
-	  		for (int mu = 0; mu < Nd; mu++){
+      for (int mu = 0; mu < Nd; mu++) {
        // to get just SigmaTilde
-	  			tmp_mu = adj(peekLorentz(SmearedSet[smearingLevels-1], mu)) * peekLorentz(force,mu);
+        tmp_mu = adj(peekLorentz(SmearedSet[smearingLevels - 1], mu)) *
                 peekLorentz(force, mu);
        pokeLorentz(force, tmp_mu, mu);
      }
-	  		for(int ismr = smearingLevels - 1; ismr > 0; --ismr)
+      for (int ismr = smearingLevels - 1; ismr > 0; --ismr)
-	  			force = AnalyticSmearedForce(force,get_smeared_conf(ismr-1));
+        force = AnalyticSmearedForce(force, get_smeared_conf(ismr - 1));
-	  		force = AnalyticSmearedForce(force,*ThinLinks);
+      force = AnalyticSmearedForce(force, *ThinLinks);
-	  		for (int mu = 0; mu < Nd; mu++){
+      for (int mu = 0; mu < Nd; mu++) {
        tmp_mu = peekLorentz(*ThinLinks, mu) * peekLorentz(force, mu);
        pokeLorentz(SigmaTilde, tmp_mu, mu);
      }
-	}// if smearingLevels = 0 do nothing
+    }  // if smearingLevels = 0 do nothing
-}
+  }
  //====================================================================
  GaugeField& get_SmearedU() { return SmearedSet[smearingLevels - 1]; }
-GaugeField& get_SmearedU(){ 
+  GaugeField& get_U(bool smeared = false) {
 	return SmearedSet[smearingLevels-1];
 }
 GaugeField& get_U(bool smeared=false) { 
    // get the config, thin links by default
-	if (smeared){
+    if (smeared) {
-		if (smearingLevels){ 
+      if (smearingLevels) {
-			RealD impl_plaq = WilsonLoops<Gimpl>::avgPlaquette(SmearedSet[smearingLevels-1]);
+        RealD impl_plaq =
-			std::cout<< GridLogDebug << "getting Usmr Plaq: " << impl_plaq<< std::endl;
+            WilsonLoops<Gimpl>::avgPlaquette(SmearedSet[smearingLevels - 1]);
        std::cout << GridLogDebug << "getting Usmr Plaq: " << impl_plaq
                  << std::endl;
        return get_SmearedU();
-		}
+      } else {
 		else  {
        RealD impl_plaq = WilsonLoops<Gimpl>::avgPlaquette(*ThinLinks);
-			std::cout<< GridLogDebug << "getting Thin Plaq: " << impl_plaq<< std::endl;
+        std::cout << GridLogDebug << "getting Thin Plaq: " << impl_plaq
                  << std::endl;
        return *ThinLinks;
      }
    } else {
      RealD impl_plaq = WilsonLoops<Gimpl>::avgPlaquette(*ThinLinks);
      std::cout << GridLogDebug << "getting Thin Plaq: " << impl_plaq
                << std::endl;
      return *ThinLinks;
    }
  }
 	else{
 		RealD impl_plaq = WilsonLoops<Gimpl>::avgPlaquette(*ThinLinks);
 		std::cout<< GridLogDebug << "getting Thin Plaq: " << impl_plaq<< std::endl;
 		return *ThinLinks;}
 	}
 };
 }
 }
 #endif
--- a/scripts/copyright
+++ b/scripts/copyright
@@ -5,13 +5,13 @@ while (( "$#" )); do
 echo $1
 cat > message  <<EOF
-    /*************************************************************************************
+/*************************************************************************************
-    Grid physics library, www.github.com/paboyle/Grid 
+Grid physics library, www.github.com/paboyle/Grid 
-    Source file: $1
+Source file: $1
-    Copyright (C) 2015
+Copyright (C) 2015
 EOF
@@ -19,23 +19,23 @@ git log $1 | grep Author | sort -u >> message
 cat >> message <<EOF
-    This program is free software; you can redistribute it and/or modify
+This program is free software; you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
+it under the terms of the GNU General Public License as published by
-    the Free Software Foundation; either version 2 of the License, or
+the Free Software Foundation; either version 2 of the License, or
-    (at your option) any later version.
+(at your option) any later version.
-    This program is distributed in the hope that it will be useful,
+This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
+but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
+GNU General Public License for more details.
-    You should have received a copy of the GNU General Public License along
+You should have received a copy of the GNU General Public License along
-    with this program; if not, write to the Free Software Foundation, Inc.,
+with this program; if not, write to the Free Software Foundation, Inc.,
-    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-    See the full license in the file "LICENSE" in the top level distribution directory
+See the full license in the file "LICENSE" in the top level distribution directory
-    *************************************************************************************/
+*************************************************************************************/
-    /*  END LEGAL */
+/*  END LEGAL */
 EOF
 cat message > tmp.fil
--- a/tests/Test_main.cc
+++ b/tests/Test_main.cc
@@ -1,89 +1,91 @@
-    /*************************************************************************************
+/*************************************************************************************
-    Grid physics library, www.github.com/paboyle/Grid 
+Grid physics library, www.github.com/paboyle/Grid
-    Source file: ./tests/Test_main.cc
+Source file: ./tests/Test_main.cc
-    Copyright (C) 2015
+Copyright (C) 2015
 Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
 Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 Author: neo <cossu@post.kek.jp>
 Author: paboyle <paboyle@ph.ed.ac.uk>
-    This program is free software; you can redistribute it and/or modify
+This program is free software; you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
+it under the terms of the GNU General Public License as published by
-    the Free Software Foundation; either version 2 of the License, or
+the Free Software Foundation; either version 2 of the License, or
-    (at your option) any later version.
+(at your option) any later version.
-    This program is distributed in the hope that it will be useful,
+This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
+but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
+GNU General Public License for more details.
-    You should have received a copy of the GNU General Public License along
+You should have received a copy of the GNU General Public License along
-    with this program; if not, write to the Free Software Foundation, Inc.,
+with this program; if not, write to the Free Software Foundation, Inc.,
-    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-    See the full license in the file "LICENSE" in the top level distribution directory
+See the full license in the file "LICENSE" in the top level distribution
-    *************************************************************************************/
+directory
-    /*  END LEGAL */
+*************************************************************************************/
 /*  END LEGAL */
 #include "Grid.h"
 using namespace std;
 using namespace Grid;
 using namespace Grid::QCD;
 /*
- Grid_main.cc(232): error: no suitable user-defined conversion from "Grid::iScalar<Grid::iMatrix<Grid::iScalar<Grid::Complex>, 4>>" to "const Grid::iScalar<Grid::iScalar<Grid::iMatrix<Grid::Complex, 3>>>" exists
+ Grid_main.cc(232): error: no suitable user-defined conversion from
 "Grid::iScalar<Grid::iMatrix<Grid::iScalar<Grid::Complex>, 4>>" to "const
 Grid::iScalar<Grid::iScalar<Grid::iMatrix<Grid::Complex, 3>>>" exists
 c_m = peekIdiot<SpinColourMatrix>(scm,1,2);
 */
-template<class vobj> auto peekIdiot(const vobj &rhs,int i,int j) -> decltype(peekIndex<2>(rhs,0,0))
+template <class vobj>
-{
+auto peekIdiot(const vobj &rhs, int i, int j)
-  return peekIndex<2>(rhs,i,j);
+    -> decltype(peekIndex<2>(rhs, 0, 0)) {
  return peekIndex<2>(rhs, i, j);
 }
-template<class vobj> auto peekDumKopf(const vobj &rhs,int i,int j) -> decltype(peekIndex<3>(rhs,0,0))
+template <class vobj>
-{
+auto peekDumKopf(const vobj &rhs, int i, int j)
-  return peekIndex<3>(rhs,i,j);
+    -> decltype(peekIndex<3>(rhs, 0, 0)) {
  return peekIndex<3>(rhs, i, j);
 }
-template<class vobj> auto peekDumKopf(const vobj &rhs,int i) -> decltype(peekIndex<3>(rhs,0))
+template <class vobj>
-{
+auto peekDumKopf(const vobj &rhs, int i) -> decltype(peekIndex<3>(rhs, 0)) {
-  return peekIndex<3>(rhs,i);
+  return peekIndex<3>(rhs, i);
 }
-int main (int argc, char ** argv)
+int main(int argc, char **argv) {
-{
+  Grid_init(&argc, &argv);
  Grid_init(&argc,&argv);
  std::vector<int> latt_size = GridDefaultLatt();
-  std::vector<int> simd_layout = GridDefaultSimd(4,vComplex::Nsimd());
+  std::vector<int> simd_layout = GridDefaultSimd(4, vComplex::Nsimd());
  std::vector<int> mpi_layout = GridDefaultMpi();
  latt_size.resize(4);
 #ifdef AVX512
- for(int omp=128;omp<236;omp+=16){
+  for (int omp = 128; omp < 236; omp += 16) {
 #else
- for(int omp=1;omp<2;omp*=20){
+  for (int omp = 1; omp < 2; omp *= 20) {
 #endif
 #ifdef OMP
    omp_set_num_threads(omp);
 #endif
-  for(int lat=8;lat<=16;lat+=40){
+    for (int lat = 8; lat <= 16; lat += 40) {
-
+      std::cout << "Lat " << lat << std::endl;
    std::cout << "Lat "<<lat<<std::endl;
      latt_size[0] = lat;
      latt_size[1] = lat;
      latt_size[2] = lat;
      latt_size[3] = lat;
-    double volume = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
+      double volume = latt_size[0] * latt_size[1] * latt_size[2] * latt_size[3];
-    GridCartesian           Fine(latt_size,simd_layout,mpi_layout);
+      GridCartesian Fine(latt_size, simd_layout, mpi_layout);
-    GridRedBlackCartesian rbFine(latt_size,simd_layout,mpi_layout);
+      GridRedBlackCartesian rbFine(latt_size, simd_layout, mpi_layout);
      GridParallelRNG FineRNG(&Fine);
      GridSerialRNG SerialRNG;
      GridSerialRNG SerialRNG1;
@@ -91,19 +93,19 @@ int main (int argc, char ** argv)
      FineRNG.SeedRandomDevice();
      SerialRNG.SeedRandomDevice();
-    std::cout <<"SerialRNG" << SerialRNG._generators[0] <<std::endl;
+      std::cout << "SerialRNG" << SerialRNG._generators[0] << std::endl;
      std::vector<typename GridSerialRNG::RngStateType> saved;
-    SerialRNG.GetState(saved,0);
+      SerialRNG.GetState(saved, 0);
-    SerialRNG1.SetState(saved,0);
+      SerialRNG1.SetState(saved, 0);
-    RealD dd1,dd2;
+      RealD dd1, dd2;
-    std::cout << "Testing RNG state save restore"<<std::endl;
+      std::cout << "Testing RNG state save restore" << std::endl;
-    for(int i=0;i<10;i++){
+      for (int i = 0; i < 10; i++) {
-      random(SerialRNG,dd1);
+        random(SerialRNG, dd1);
-      random(SerialRNG1,dd2);
+        random(SerialRNG1, dd2);
-      std::cout << "i "<<i<<" "<<dd1<< " " <<dd2<<std::endl;
+        std::cout << "i " << i << " " << dd1 << " " << dd2 << std::endl;
      }
      LatticeColourMatrix Foo(&Fine);
      LatticeColourMatrix Bar(&Fine);
@@ -132,7 +134,6 @@ int main (int argc, char ** argv)
      LatticeLorentzColourMatrix lcMat(&Fine);
      LatticeComplex scalar(&Fine);
      LatticeReal rscalar(&Fine);
      LatticeReal iscalar(&Fine);
@@ -141,82 +142,96 @@ int main (int argc, char ** argv)
      iSpinMatrix<vComplex> iGammaFive;
      ColourMatrix cmat;
-    random(FineRNG,Foo);
+      random(FineRNG, Foo);
-    gaussian(FineRNG,Bar);
+      gaussian(FineRNG, Bar);
-    random(FineRNG,scFoo);
+      random(FineRNG, scFoo);
-    random(FineRNG,scBar);
+      random(FineRNG, scBar);
    random(FineRNG,cMat);
    random(FineRNG,sMat);
    random(FineRNG,scMat);
    random(FineRNG,lcMat);
    random(FineRNG,cVec);
    random(FineRNG,sVec);
    random(FineRNG,scVec);
      random(FineRNG, cMat);
      random(FineRNG, sMat);
      random(FineRNG, scMat);
      random(FineRNG, lcMat);
      random(FineRNG, cVec);
      random(FineRNG, sVec);
      random(FineRNG, scVec);
      fflush(stdout);
      LatticeColourMatrix newFoo = Foo; 
      // confirm correctness of copy constructor
      Bar = Foo - newFoo;
      std::cout << "Copy constructor diff check: "; 
      double test_cc = norm2(Bar);
      if (test_cc < 1e-5){
        std::cout << "OK\n";
    }
      else{
        std::cout << "fail\n";
        abort();
    }
      TComplex tr = trace(cmat);
      cVec = cMat * cVec;     // LatticeColourVector     = LatticeColourMatrix
                              // * LatticeColourVector
      sVec = sMat * sVec;     // LatticeSpinVector       = LatticeSpinMatrix
                              // * LatticeSpinVector
      scVec = scMat * scVec;  // LatticeSpinColourVector =
                              // LatticeSpinColourMatrix *
                              // LatticeSpinColourVector
      scVec = cMat * scVec;   // LatticeSpinColourVector = LatticeColourMatrix
                              // * LatticeSpinColourVector
      scVec = sMat * scVec;   // LatticeSpinColourVector = LatticeSpinMatrix
                              // * LatticeSpinColourVector
-    cVec = cMat * cVec;  // LatticeColourVector     = LatticeColourMatrix     * LatticeColourVector
+      cMat = outerProduct(cVec, cVec);
-    sVec = sMat * sVec;  // LatticeSpinVector       = LatticeSpinMatrix       * LatticeSpinVector
+      scalar = localInnerProduct(cVec, cVec);
    scVec= scMat * scVec;// LatticeSpinColourVector = LatticeSpinColourMatrix * LatticeSpinColourVector
    scVec= cMat * scVec; // LatticeSpinColourVector = LatticeColourMatrix     * LatticeSpinColourVector
    scVec= sMat * scVec; // LatticeSpinColourVector = LatticeSpinMatrix       * LatticeSpinColourVector
    cMat = outerProduct(cVec,cVec);
    scalar = localInnerProduct(cVec,cVec);
    cMat = Ta(cMat);  //traceless antihermitian
      cMat = Ta(cMat);  // traceless antihermitian
      scalar += scalar;
      scalar -= scalar;
      scalar *= scalar;
-    add(scalar,scalar,scalar);
+      add(scalar, scalar, scalar);
-    sub(scalar,scalar,scalar);
+      sub(scalar, scalar, scalar);
-    mult(scalar,scalar,scalar);
+      mult(scalar, scalar, scalar);
-    mac(scalar,scalar,scalar);
+      mac(scalar, scalar, scalar);
-    scalar = scalar+scalar;
+      scalar = scalar + scalar;
-    scalar = scalar-scalar;
+      scalar = scalar - scalar;
-    scalar = scalar*scalar;
+      scalar = scalar * scalar;
-    scalar=outerProduct(scalar,scalar);
+      scalar = outerProduct(scalar, scalar);
-    scalar=adj(scalar);
+      scalar = adj(scalar);
      //    rscalar=real(scalar);
      //    iscalar=imag(scalar);
      //    scalar =cmplx(rscalar,iscalar);
-    PokeIndex<ColourIndex>(cVec,scalar,1);
+      PokeIndex<ColourIndex>(cVec, scalar, 1);
      scalar = transpose(scalar);
      scalar = TransposeIndex<ColourIndex>(scalar);
      scalar = TraceIndex<SpinIndex>(scalar);
      scalar = PeekIndex<ColourIndex>(cVec, 0);
-    scalar=transpose(scalar);
+      scalar = trace(scalar);
-    scalar=TransposeIndex<ColourIndex>(scalar);
+      scalar = localInnerProduct(cVec, cVec);
-    scalar=TraceIndex<SpinIndex>(scalar);
+      scalar = localNorm2(cVec);
    scalar=PeekIndex<ColourIndex>(cVec,0);
-    scalar=trace(scalar);
+      //     -=,+=,*=,()
-    scalar=localInnerProduct(cVec,cVec);
+      //     add,+,sub,-,mult,mac,*
-    scalar=localNorm2(cVec);
+      //     adj,conjugate
-    
+      //     real,imag
-//     -=,+=,*=,()
+      //     transpose,transposeIndex
-//     add,+,sub,-,mult,mac,*
+      //     trace,traceIndex
-//     adj,conjugate
+      //     peekIndex
-//     real,imag
+      //     innerProduct,outerProduct,
-//     transpose,transposeIndex  
+      //     localNorm2
-//     trace,traceIndex
+      //     localInnerProduct
 //     peekIndex
 //     innerProduct,outerProduct,
 //     localNorm2
 //     localInnerProduct
    scMat = sMat*scMat;  // LatticeSpinColourMatrix = LatticeSpinMatrix       * LatticeSpinColourMatrix
      scMat = sMat * scMat;  // LatticeSpinColourMatrix = LatticeSpinMatrix
                             // * LatticeSpinColourMatrix
      ///////////////////////
      // Non-lattice (const objects) * Lattice
@@ -224,103 +239,103 @@ int main (int argc, char ** argv)
      SpinColourMatrix scm;
      vSpinColourMatrix vscm;
      Complex cplx(1.0);
-    Integer myint=1;
+      Integer myint = 1;
-    double mydouble=1.0;
+      double mydouble = 1.0;
      //    vSpinColourMatrix vscm;
-    scMat = cMat*scMat;
+      scMat = cMat * scMat;
-    scm = cm * scm;         // SpinColourMatrix  = ColourMatrix     * SpinColourMatrix
+      scm =
-    scm = scm *cm;          // SpinColourMatrix  = SpinColourMartix * ColourMatrix
+          cm * scm;  // SpinColourMatrix  = ColourMatrix     * SpinColourMatrix
-    scm = GammaFive * scm ; // SpinColourMatrix  = SpinMatrix       * SpinColourMatrix
+      scm = scm * cm;  // SpinColourMatrix  = SpinColourMartix * ColourMatrix
-    scm = scm* GammaFive  ; // SpinColourMatrix  = SpinColourMatrix * SpinMatrix
+      scm = GammaFive *
            scm;  // SpinColourMatrix  = SpinMatrix       * SpinColourMatrix
      scm =
          scm * GammaFive;  // SpinColourMatrix  = SpinColourMatrix * SpinMatrix
-    scm = scm*cplx;
+      scm = scm * cplx;
-    vscm = vscm*cplx;
+      vscm = vscm * cplx;
-    scMat = scMat*cplx;
+      scMat = scMat * cplx;
-    scm = cplx*scm;
+      scm = cplx * scm;
-    vscm = cplx*vscm;
+      vscm = cplx * vscm;
-    scMat = cplx*scMat;
+      scMat = cplx * scMat;
-    scm = myint*scm;
+      scm = myint * scm;
-    vscm = myint*vscm;
+      vscm = myint * vscm;
-    scMat = scMat*myint;
+      scMat = scMat * myint;
-    scm = scm*mydouble;
+      scm = scm * mydouble;
-    vscm = vscm*mydouble;
+      vscm = vscm * mydouble;
-    scMat = scMat*mydouble;
+      scMat = scMat * mydouble;
-    scMat = mydouble*scMat;
+      scMat = mydouble * scMat;
-    cMat = mydouble*cMat;
+      cMat = mydouble * cMat;
      sMat = adj(sMat);          // LatticeSpinMatrix adjoint
-    sMat = iGammaFive*sMat; // SpinMatrix * LatticeSpinMatrix
+      sMat = iGammaFive * sMat;  // SpinMatrix * LatticeSpinMatrix
-    sMat = GammaFive*sMat;  // SpinMatrix * LatticeSpinMatrix
+      sMat = GammaFive * sMat;   // SpinMatrix * LatticeSpinMatrix
-    scMat= adj(scMat);
+      scMat = adj(scMat);
-    cMat= adj(cMat);
+      cMat = adj(cMat);
-    cm=adj(cm);
+      cm = adj(cm);
-    scm=adj(scm);
+      scm = adj(scm);
-    scm=transpose(scm);
+      scm = transpose(scm);
-    scm=transposeIndex<1>(scm);
+      scm = transposeIndex<1>(scm);
      random(SerialRNG, cm);
-    std::cout<<GridLogMessage << cm << std::endl;
+      std::cout << GridLogMessage << cm << std::endl;
      cm = Ta(cm);
-    TComplex tracecm= trace(cm);      
+      TComplex tracecm = trace(cm);
-    std::cout<<GridLogMessage << cm << std::endl;
+      std::cout << GridLogMessage << cm << std::endl;
      cm = Exponentiate(cm, 2.0, 12);
-    std::cout<<GridLogMessage << cm << "  " << std::endl;
+      std::cout << GridLogMessage << cm << "  " << std::endl;
      Complex det = Determinant(cm);
-    std::cout<<GridLogMessage << "determinant: " << det <<  std::endl;
+      std::cout << GridLogMessage << "determinant: " << det << std::endl;
-    std::cout<<GridLogMessage << "norm: " << norm2(cm) <<  std::endl;
+      std::cout << GridLogMessage << "norm: " << norm2(cm) << std::endl;
      cm = ProjectOnGroup(cm);
-    std::cout<<GridLogMessage << cm << "  " << std::endl;
+      std::cout << GridLogMessage << cm << "  " << std::endl;
-    std::cout<<GridLogMessage << "norm: " << norm2(cm) <<  std::endl;
+      std::cout << GridLogMessage << "norm: " << norm2(cm) << std::endl;
      cm = ProjectOnGroup(cm);
-    std::cout<<GridLogMessage << cm << "  " << std::endl;
+      std::cout << GridLogMessage << cm << "  " << std::endl;
-    std::cout<<GridLogMessage << "norm: " << norm2(cm) <<  std::endl;
+      std::cout << GridLogMessage << "norm: " << norm2(cm) << std::endl;
      //    det = Determinant(cm);
      //    std::cout<<GridLogMessage << "determinant: " << det <<  std::endl;
-
+      //    Foo = Foo+scalar; // LatticeColourMatrix+Scalar
-//    Foo = Foo+scalar; // LatticeColourMatrix+Scalar
+      //    Foo = Foo*scalar; // LatticeColourMatrix*Scalar
-//    Foo = Foo*scalar; // LatticeColourMatrix*Scalar
+      //    Foo = Foo-scalar; // LatticeColourMatrix-Scalar
-//    Foo = Foo-scalar; // LatticeColourMatrix-Scalar
+      //    Foo = scalar*Foo; // Scalar*LatticeColourMatrix
-//    Foo = scalar*Foo; // Scalar*LatticeColourMatrix
+      //    Foo = scalar+Foo; // Scalar+LatticeColourMatrix
-//    Foo = scalar+Foo; // Scalar+LatticeColourMatrix
+      //    Foo = scalar-Foo; // Scalar-LatticeColourMatrix
 //    Foo = scalar-Foo; // Scalar-LatticeColourMatrix
      LatticeComplex trscMat(&Fine);
      trscMat = trace(scMat);  // Trace
      // Exponentiate test
-    std::vector<int> mysite {0,0,0,0};
+      std::vector<int> mysite{0, 0, 0, 0};
-    random(FineRNG,cMat);
+      random(FineRNG, cMat);
      cMat = Ta(cMat);
      peekSite(cm, cMat, mysite);
-    std::cout<<GridLogMessage << cm << "  " << std::endl;
+      std::cout << GridLogMessage << cm << "  " << std::endl;
      cm = Exponentiate(cm, 1.0, 12);
-    std::cout<<GridLogMessage << cm << "  " << std::endl;
+      std::cout << GridLogMessage << cm << "  " << std::endl;
-    std::cout<<GridLogMessage << "norm: " << norm2(cm) <<  std::endl;
+      std::cout << GridLogMessage << "norm: " << norm2(cm) << std::endl;
-
+      std::cout << GridLogMessage << "norm cMmat : " << norm2(cMat)
-    std::cout<<GridLogMessage << "norm cMmat : " << norm2(cMat) <<  std::endl;
+                << std::endl;
      cMat = expMat(cMat, ComplexD(1.0, 0.0));
-    std::cout<<GridLogMessage << "norm expMat: " << norm2(cMat) <<  std::endl;
+      std::cout << GridLogMessage << "norm expMat: " << norm2(cMat)
                << std::endl;
      peekSite(cm, cMat, mysite);
-    std::cout<<GridLogMessage << cm << "  " << std::endl;
+      std::cout << GridLogMessage << cm << "  " << std::endl;
-    std::cout<<GridLogMessage << "determinant: " << Determinant(cm) <<  std::endl;
+      std::cout << GridLogMessage << "determinant: " << Determinant(cm)
-    std::cout<<GridLogMessage << "norm: " << norm2(cm) <<  std::endl;
+                << std::endl;
-
+      std::cout << GridLogMessage << "norm: " << norm2(cm) << std::endl;
      // LatticeComplex trlcMat(&Fine);
-    // trlcMat = trace(lcMat); // Trace involving iVector - now generates error
+      // trlcMat = trace(lcMat); // Trace involving iVector - now generates
-    
+      // error
      {  // Peek-ology and Poke-ology, with a little app-ology
        Complex c;
@@ -328,25 +343,29 @@ int main (int argc, char ** argv)
        SpinMatrix s_m;
        SpinColourMatrix sc_m;
-      s_m = TensorIndexRecursion<ColourIndex>::traceIndex(sc_m); // Map to traceColour
+        s_m = TensorIndexRecursion<ColourIndex>::traceIndex(
-      c_m = TensorIndexRecursion<SpinIndex>::traceIndex(sc_m); // map to traceSpin
+            sc_m);  // Map to traceColour
        c_m = TensorIndexRecursion<SpinIndex>::traceIndex(
            sc_m);  // map to traceSpin
        c = TensorIndexRecursion<SpinIndex>::traceIndex(s_m);
        c = TensorIndexRecursion<ColourIndex>::traceIndex(c_m);
-      s_m = TensorIndexRecursion<ColourIndex>::peekIndex(scm,0,0);
+        s_m = TensorIndexRecursion<ColourIndex>::peekIndex(scm, 0, 0);
-      c_m = TensorIndexRecursion<SpinIndex>::peekIndex(scm,1,2);
+        c_m = TensorIndexRecursion<SpinIndex>::peekIndex(scm, 1, 2);
        //      c_m = peekSpin<SpinColourMatrix>(scm,1,2);
        //      c_m = peekIdiot<SpinColourMatrix>(scm,1,2);
-      printf("c. Level %d\n",c_m.TensorLevel);
+        printf("c. Level %d\n", c_m.TensorLevel);
-      printf("c. Level %d\n",c_m().TensorLevel);
+        printf("c. Level %d\n", c_m().TensorLevel);
-      printf("c. Level %d\n",c_m()().TensorLevel);
+        printf("c. Level %d\n", c_m()().TensorLevel);
-      c_m()()    = scm()(0,0); //ColourComponents of CM <= ColourComponents of SpinColourMatrix
+        c_m()() = scm()(0, 0);  // ColourComponents of CM <= ColourComponents of
-      scm()(1,1) = cm()();  //ColourComponents of CM <= ColourComponents of SpinColourMatrix
+                                // SpinColourMatrix
-      c          = scm()(1,1)(1,2);
+        scm()(1, 1) = cm()();  // ColourComponents of CM <= ColourComponents of
-      scm()(1,1)(2,1) = c;
+                               // SpinColourMatrix
        c = scm()(1, 1)(1, 2);
        scm()(1, 1)(2, 1) = c;
        //      pokeIndex<ColourIndex> (c_m,c,0,0);
      }
@@ -364,47 +383,49 @@ int main (int argc, char ** argv)
      */
      lex_sites(Foo);
      Integer mm[4];
-    mm[0]=1;
+      mm[0] = 1;
-    mm[1]=Fine._rdimensions[0];
+      mm[1] = Fine._rdimensions[0];
-    mm[2]=Fine._ldimensions[0]*Fine._ldimensions[1];
+      mm[2] = Fine._ldimensions[0] * Fine._ldimensions[1];
-    mm[3]=Fine._ldimensions[0]*Fine._ldimensions[1]*Fine._ldimensions[2];
+      mm[3] =
          Fine._ldimensions[0] * Fine._ldimensions[1] * Fine._ldimensions[2];
      LatticeInteger lex(&Fine);
-    lex=zero;
+      lex = zero;
-    for(int d=0;d<4;d++){
+      for (int d = 0; d < 4; d++) {
        LatticeInteger coor(&Fine);
-      LatticeCoordinate(coor,d);
+        LatticeCoordinate(coor, d);
-      lex = lex + coor*mm[d];
+        lex = lex + coor * mm[d];
      }
      //    Bar = zero;
      //    Bar = where(lex<Integer(10),Foo,Bar);
      cout << "peeking sites..\n";
      {
        std::vector<int> coor(4);
-      for(coor[3]=0;coor[3]<latt_size[3]/mpi_layout[3];coor[3]++){
+        for (coor[3] = 0; coor[3] < latt_size[3] / mpi_layout[3]; coor[3]++) {
-      for(coor[2]=0;coor[2]<latt_size[2]/mpi_layout[2];coor[2]++){
+          for (coor[2] = 0; coor[2] < latt_size[2] / mpi_layout[2]; coor[2]++) {
-      for(coor[1]=0;coor[1]<latt_size[1]/mpi_layout[1];coor[1]++){
+            for (coor[1] = 0; coor[1] < latt_size[1] / mpi_layout[1];
-      for(coor[0]=0;coor[0]<latt_size[0]/mpi_layout[0];coor[0]++){
+                 coor[1]++) {
              for (coor[0] = 0; coor[0] < latt_size[0] / mpi_layout[0];
                   coor[0]++) {
                ColourMatrix bar;
-        peekSite(bar,Bar,coor);
+                peekSite(bar, Bar, coor);
-        for(int r=0;r<3;r++){
+                for (int r = 0; r < 3; r++) {
-        for(int c=0;c<3;c++){
+                  for (int c = 0; c < 3; c++) {
-	  //	  cout<<"bar "<<coor[0]<<coor[1]<<coor[2]<<coor[3] <<" "<<bar()()(r,c)<<std::endl;
+                    //      cout<<"bar "<<coor[0]<<coor[1]<<coor[2]<<coor[3] <<"
-	}}
+                    //      "<<bar()()(r,c)<<std::endl;
-      }}}}
+                  }
                }
              }
            }
          }
        }
      }
-    //setCheckerboard(ShiftedCheck,rFoo); 
+      // setCheckerboard(ShiftedCheck,rFoo);
-    //setCheckerboard(ShiftedCheck,bFoo); 
+      // setCheckerboard(ShiftedCheck,bFoo);
      // Lattice SU(3) x SU(3)
      Fine.Barrier();
@@ -414,128 +435,137 @@ int main (int argc, char ** argv)
      scFooBar = scFoo * scBar;
      // Benchmark some simple operations LatticeSU3 * Lattice SU3.
-    double t0,t1,flops;
+      double t0, t1, flops;
      double bytes;
-    int ncall=5000;
+      int ncall = 5000;
      int Nc = Grid::QCD::Nc;
      LatticeGaugeField U(&Fine);
      //    LatticeColourMatrix Uy = peekLorentz(U,1);
      //    LatticeColourMatrix Uy = peekDumKopf(U,1);
-    flops = ncall*1.0*volume*(8*Nc*Nc*Nc);
+      flops = ncall * 1.0 * volume * (8 * Nc * Nc * Nc);
-    bytes = ncall*1.0*volume*Nc*Nc    *2*3*sizeof(Grid::Real);
+      bytes = ncall * 1.0 * volume * Nc * Nc * 2 * 3 * sizeof(Grid::Real);
-    if ( Fine.IsBoss() ) {
+      if (Fine.IsBoss()) {
-      printf("%f flop and %f bytes\n",flops,bytes/ncall);
+        printf("%f flop and %f bytes\n", flops, bytes / ncall);
      }
      FooBar = Foo * Bar;
      Fine.Barrier();
-    t0=usecond();
+      t0 = usecond();
-    for(int i=0;i<ncall;i++){
+      for (int i = 0; i < ncall; i++) {
        Fine.Barrier();
-      mult(FooBar,Foo,Bar); // this is better
+        mult(FooBar, Foo, Bar);  // this is better
      }
-    t1=usecond();
+      t1 = usecond();
      Fine.Barrier();
-    if ( Fine.IsBoss() ) {
+      if (Fine.IsBoss()) {
 #ifdef OMP
-      printf("mult NumThread %d , Lattice size %d , %f us per call\n",omp_get_max_threads(),lat,(t1-t0)/ncall);
+        printf("mult NumThread %d , Lattice size %d , %f us per call\n",
               omp_get_max_threads(), lat, (t1 - t0) / ncall);
 #endif
-      printf("mult NumThread %d , Lattice size %d , %f us per call\n",omp,lat,(t1-t0)/ncall);
+        printf("mult NumThread %d , Lattice size %d , %f us per call\n", omp,
-      printf("mult NumThread %d , Lattice size %d , %f Mflop/s\n",omp,lat,flops/(t1-t0));
+               lat, (t1 - t0) / ncall);
-      printf("mult NumThread %d , Lattice size %d , %f MB/s\n",omp,lat,bytes/(t1-t0));
+        printf("mult NumThread %d , Lattice size %d , %f Mflop/s\n", omp, lat,
               flops / (t1 - t0));
        printf("mult NumThread %d , Lattice size %d , %f MB/s\n", omp, lat,
               bytes / (t1 - t0));
      }
-    mult(FooBar,Foo,Bar);
+      mult(FooBar, Foo, Bar);
      FooBar = Foo * Bar;
-    bytes = ncall*1.0*volume*Nc*Nc    *2*5*sizeof(Grid::Real);
+      bytes = ncall * 1.0 * volume * Nc * Nc * 2 * 5 * sizeof(Grid::Real);
      Fine.Barrier();
-    t0=usecond();
+      t0 = usecond();
-    for(int i=0;i<ncall;i++){
+      for (int i = 0; i < ncall; i++) {
        Fine.Barrier();
-      mult(FooBar,Foo,Cshift(Bar,1,-1));
+        mult(FooBar, Foo, Cshift(Bar, 1, -1));
-      //mult(FooBar,Foo,Bar);
+        // mult(FooBar,Foo,Bar);
-      //FooBar = Foo * Bar; // this is bad
+        // FooBar = Foo * Bar; // this is bad
      }
-    t1=usecond();
+      t1 = usecond();
      Fine.Barrier();
      FooBar = Foo * Bar;
-    if ( Fine.IsBoss() ) {
+      if (Fine.IsBoss()) {
-      printf("Cshift Mult: NumThread %d , Lattice size %d , %f us per call\n",omp,lat,(t1-t0)/ncall);
+        printf("Cshift Mult: NumThread %d , Lattice size %d , %f us per call\n",
-      printf("Cshift Mult: NumThread %d , Lattice size %d , %f Mflop/s\n",omp,lat,flops/(t1-t0));
+               omp, lat, (t1 - t0) / ncall);
-      printf("Cshift Mult: NumThread %d , Lattice size %d , %f MB/s\n",omp,lat,bytes/(t1-t0));
+        printf("Cshift Mult: NumThread %d , Lattice size %d , %f Mflop/s\n",
               omp, lat, flops / (t1 - t0));
        printf("Cshift Mult: NumThread %d , Lattice size %d , %f MB/s\n", omp,
               lat, bytes / (t1 - t0));
      }
      //    pickCheckerboard(0,rFoo,FooBar);
      //    pickCheckerboard(1,bFoo,FooBar);
      //    setCheckerboard(FooBar,rFoo);
      //    setCheckerboard(FooBar,bFoo);
-    double nrm=0;
+      double nrm = 0;
      LatticeColourMatrix deriv(&Fine);
-    double half=0.5;
+      double half = 0.5;
-    deriv = 0.5*Cshift(Foo,0,1) - 0.5*Cshift(Foo,0,-1);
+      deriv = 0.5 * Cshift(Foo, 0, 1) - 0.5 * Cshift(Foo, 0, -1);
      for (int dir = 0; dir < 4; dir++) {
        for (int shift = 0; shift < latt_size[dir]; shift++) {
          pickCheckerboard(0, rFoo,
                           Foo);  // Pick out red or black checkerboards
          pickCheckerboard(1, bFoo, Foo);
-    for(int dir=0;dir<4;dir++){
+          if (Fine.IsBoss()) {
-      for(int shift=0;shift<latt_size[dir];shift++){
+            std::cout << GridLogMessage << "Shifting both parities by " << shift
-
+                      << " direction " << dir << std::endl;
 	pickCheckerboard(0,rFoo,Foo);    // Pick out red or black checkerboards
 	pickCheckerboard(1,bFoo,Foo);
 	if ( Fine.IsBoss() ) {
 	  std::cout<<GridLogMessage << "Shifting both parities by "<< shift <<" direction "<< dir <<std::endl;
          }
-	Shifted  = Cshift(Foo,dir,shift);    // Shift everything
+          Shifted = Cshift(Foo, dir, shift);  // Shift everything
-	bShifted = Cshift(rFoo,dir,shift);   // Shift red->black
+          bShifted = Cshift(rFoo, dir, shift);  // Shift red->black
-	rShifted = Cshift(bFoo,dir,shift);   // Shift black->red
+          rShifted = Cshift(bFoo, dir, shift);  // Shift black->red
-	ShiftedCheck=zero;
+          ShiftedCheck = zero;
-	setCheckerboard(ShiftedCheck,bShifted); // Put them all together
+          setCheckerboard(ShiftedCheck, bShifted);  // Put them all together
-	setCheckerboard(ShiftedCheck,rShifted); // and check the results (later)
+          setCheckerboard(ShiftedCheck,
                          rShifted);  // and check the results (later)
          // Check results
          std::vector<int> coor(4);
-	for(coor[3]=0;coor[3]<latt_size[3]/mpi_layout[3];coor[3]++){
+          for (coor[3] = 0; coor[3] < latt_size[3] / mpi_layout[3]; coor[3]++) {
-	for(coor[2]=0;coor[2]<latt_size[2]/mpi_layout[2];coor[2]++){
+            for (coor[2] = 0; coor[2] < latt_size[2] / mpi_layout[2];
-	for(coor[1]=0;coor[1]<latt_size[1]/mpi_layout[1];coor[1]++){
+                 coor[2]++) {
-	for(coor[0]=0;coor[0]<latt_size[0]/mpi_layout[0];coor[0]++){
+              for (coor[1] = 0; coor[1] < latt_size[1] / mpi_layout[1];
-
+                   coor[1]++) {
-	 
+                for (coor[0] = 0; coor[0] < latt_size[0] / mpi_layout[0];
- 
+                     coor[0]++) {
                  std::complex<Grid::Real> diff;
                  std::vector<int> shiftcoor = coor;
-        shiftcoor[dir]=(shiftcoor[dir]+shift+latt_size[dir])%(latt_size[dir]/mpi_layout[dir]);
+                  shiftcoor[dir] = (shiftcoor[dir] + shift + latt_size[dir]) %
                                   (latt_size[dir] / mpi_layout[dir]);
                  std::vector<int> rl(4);
-	for(int dd=0;dd<4;dd++){
+                  for (int dd = 0; dd < 4; dd++) {
-	  rl[dd] = latt_size[dd]/simd_layout[dd]/mpi_layout[dd];
+                    rl[dd] = latt_size[dd] / simd_layout[dd] / mpi_layout[dd];
                  }
-	int lex =  coor[0]%rl[0]
+                  int lex = coor[0] % rl[0] + (coor[1] % rl[1]) * rl[0] +
-	  + (coor[1]%rl[1])*rl[0]
+                            (coor[2] % rl[2]) * rl[0] * rl[1] +
-	  + (coor[2]%rl[2])*rl[0]*rl[1]
+                            (coor[3] % rl[3]) * rl[0] * rl[1] * rl[2];
-	  + (coor[3]%rl[3])*rl[0]*rl[1]*rl[2];
+                  lex += +1000 * (coor[0] / rl[0]) +
-	lex += 
+                         1000 * (coor[1] / rl[1]) * simd_layout[0] +
-	  +1000*(coor[0]/rl[0])
+                         1000 * (coor[2] / rl[2]) * simd_layout[0] *
-	  +1000*(coor[1]/rl[1])*simd_layout[0]
+                             simd_layout[1] +
-	  +1000*(coor[2]/rl[2])*simd_layout[0]*simd_layout[1]
+                         1000 * (coor[3] / rl[3]) * simd_layout[0] *
-	  +1000*(coor[3]/rl[3])*simd_layout[0]*simd_layout[1]*simd_layout[2];
+                             simd_layout[1] * simd_layout[2];
-	int lex_coor = shiftcoor[0]%rl[0]
+                  int lex_coor = shiftcoor[0] % rl[0] +
-	  + (shiftcoor[1]%rl[1])*rl[0]
+                                 (shiftcoor[1] % rl[1]) * rl[0] +
-	  + (shiftcoor[2]%rl[2])*rl[0]*rl[1]
+                                 (shiftcoor[2] % rl[2]) * rl[0] * rl[1] +
-	  + (shiftcoor[3]%rl[3])*rl[0]*rl[1]*rl[2];
+                                 (shiftcoor[3] % rl[3]) * rl[0] * rl[1] * rl[2];
-	lex_coor += 
+                  lex_coor += +1000 * (shiftcoor[0] / rl[0]) +
-	  +1000*(shiftcoor[0]/rl[0])
+                              1000 * (shiftcoor[1] / rl[1]) * simd_layout[0] +
-	  +1000*(shiftcoor[1]/rl[1])*simd_layout[0]
+                              1000 * (shiftcoor[2] / rl[2]) * simd_layout[0] *
-	  +1000*(shiftcoor[2]/rl[2])*simd_layout[0]*simd_layout[1]
+                                  simd_layout[1] +
-	  +1000*(shiftcoor[3]/rl[3])*simd_layout[0]*simd_layout[1]*simd_layout[2];
+                              1000 * (shiftcoor[3] / rl[3]) * simd_layout[0] *
                                  simd_layout[1] * simd_layout[2];
                  ColourMatrix foo;
                  ColourMatrix bar;
@@ -544,69 +574,85 @@ int main (int argc, char ** argv)
                  ColourMatrix shifted3;
                  ColourMatrix foobar1;
                  ColourMatrix foobar2;
-        ColourMatrix mdiff,amdiff;
+                  ColourMatrix mdiff, amdiff;
-        peekSite(shifted1,Shifted,coor);
+                  peekSite(shifted1, Shifted, coor);
-        peekSite(shifted2,Foo,shiftcoor);
+                  peekSite(shifted2, Foo, shiftcoor);
-        peekSite(shifted3,ShiftedCheck,coor);
+                  peekSite(shifted3, ShiftedCheck, coor);
-        peekSite(foo,Foo,coor);
+                  peekSite(foo, Foo, coor);
-        mdiff = shifted1-shifted2;
+                  mdiff = shifted1 - shifted2;
-        amdiff=adj(mdiff);
+                  amdiff = adj(mdiff);
-        ColourMatrix prod = amdiff*mdiff;
+                  ColourMatrix prod = amdiff * mdiff;
                  Complex trprod = trace(prod);
-        Real Ttr=real(trprod);
+                  Real Ttr = real(trprod);
-        double nn=Ttr;
+                  double nn = Ttr;
-        if ( nn > 0 )
+                  if (nn > 0)
-            cout<<"Shift real trace fail "<<coor[0]<<coor[1]<<coor[2]<<coor[3] <<endl;
+                    cout << "Shift real trace fail " << coor[0] << coor[1]
                         << coor[2] << coor[3] << endl;
-
+                  for (int r = 0; r < 3; r++) {
-        for(int r=0;r<3;r++){
+                    for (int c = 0; c < 3; c++) {
-        for(int c=0;c<3;c++){
+                      diff = shifted1()()(r, c) - shifted2()()(r, c);
-            diff =shifted1()()(r,c)-shifted2()()(r,c);
+                      nn = real(conjugate(diff) * diff);
-            nn=real(conjugate(diff)*diff);
+                      if (nn > 0)
-            if ( nn > 0 )
+                        cout << "Shift fail (shifted1/shifted2-ref) " << coor[0]
-                cout<<"Shift fail (shifted1/shifted2-ref) "<<coor[0]<<coor[1]<<coor[2]<<coor[3] <<" "
+                             << coor[1] << coor[2] << coor[3] << " "
-                    <<shifted1()()(r,c)<<" "<<shifted2()()(r,c)
+                             << shifted1()()(r, c) << " " << shifted2()()(r, c)
-                    << " "<< foo()()(r,c)<< " lex expect " << lex_coor << " lex "<<lex<<endl;
+                             << " " << foo()()(r, c) << " lex expect "
-            else if(0)
+                             << lex_coor << " lex " << lex << endl;
-                cout<<"Shift pass 1vs2 "<<coor[0]<<coor[1]<<coor[2]<<coor[3] <<" "
+                      else if (0)
-                    <<shifted1()()(r,c)<<" "<<shifted2()()(r,c)
+                        cout << "Shift pass 1vs2 " << coor[0] << coor[1]
-                    << " "<< foo()()(r,c)<< " lex expect " << lex_coor << " lex "<<lex<<endl;
+                             << coor[2] << coor[3] << " " << shifted1()()(r, c)
-        }}
+                             << " " << shifted2()()(r, c) << " "
-        
+                             << foo()()(r, c) << " lex expect " << lex_coor
-        for(int r=0;r<3;r++){
+                             << " lex " << lex << endl;
-        for(int c=0;c<3;c++){
+                    }
-            diff =shifted3()()(r,c)-shifted2()()(r,c);
+                  }
-            nn=real(conjugate(diff)*diff);
+
-            if ( nn > 0 )
+                  for (int r = 0; r < 3; r++) {
-                cout<<"Shift rb fail (shifted3/shifted2-ref) "<<coor[0]<<coor[1]<<coor[2]<<coor[3] <<" "
+                    for (int c = 0; c < 3; c++) {
-                <<shifted3()()(r,c)<<" "<<shifted2()()(r,c)
+                      diff = shifted3()()(r, c) - shifted2()()(r, c);
-                << " "<< foo()()(r,c)<< " lex expect " << lex_coor << " lex "<<lex<<endl;
+                      nn = real(conjugate(diff) * diff);
-            else if(0)
+                      if (nn > 0)
-                cout<<"Shift rb pass 3vs2 "<<coor[0]<<coor[1]<<coor[2]<<coor[3] <<" "
+                        cout << "Shift rb fail (shifted3/shifted2-ref) "
-                <<shifted3()()(r,c)<<" "<<shifted2()()(r,c)
+                             << coor[0] << coor[1] << coor[2] << coor[3] << " "
-                << " "<< foo()()(r,c)<< " lex expect " << lex_coor << " lex "<<lex<<endl;
+                             << shifted3()()(r, c) << " " << shifted2()()(r, c)
-        }}
+                             << " " << foo()()(r, c) << " lex expect "
-        peekSite(bar,Bar,coor);
+                             << lex_coor << " lex " << lex << endl;
-                    
+                      else if (0)
-        peekSite(foobar1,FooBar,coor);
+                        cout << "Shift rb pass 3vs2 " << coor[0] << coor[1]
-        foobar2 = foo*bar;
+                             << coor[2] << coor[3] << " " << shifted3()()(r, c)
-        for(int r=0;r<Nc;r++){
+                             << " " << shifted2()()(r, c) << " "
-        for(int c=0;c<Nc;c++){
+                             << foo()()(r, c) << " lex expect " << lex_coor
-            diff =foobar2()()(r,c)-foobar1()()(r,c);
+                             << " lex " << lex << endl;
-            nrm = nrm + real(conjugate(diff)*diff);
+                    }
-        }}
+                  }
-    }}}}
+                  peekSite(bar, Bar, coor);
-	if( Fine.IsBoss() ){
+
-	  std::cout<<GridLogMessage << "LatticeColorMatrix * LatticeColorMatrix nrm diff = "<<nrm<<std::endl;
+                  peekSite(foobar1, FooBar, coor);
                  foobar2 = foo * bar;
                  for (int r = 0; r < Nc; r++) {
                    for (int c = 0; c < Nc; c++) {
                      diff = foobar2()()(r, c) - foobar1()()(r, c);
                      nrm = nrm + real(conjugate(diff) * diff);
                    }
                  }
                }
              }
            }
          }
          if (Fine.IsBoss()) {
            std::cout << GridLogMessage
                      << "LatticeColorMatrix * LatticeColorMatrix nrm diff = "
                      << nrm << std::endl;
          }
        }
      }
      }}
    }  // loop for lat
  }    // loop for omp
  /*
  // Testing Smearing routine compilation, separate in a different file
  GridCartesian           Fine(latt_size,simd_layout,mpi_layout);