peekIndex update

2025-08-04 05:37:06 +01:00 · 2015-04-18 14:36:01 +01:00
parent d6c02e72d6
commit 57586c8e05
9 changed files with 562 additions and 109 deletions
--- a/Grid_Lattice.h
+++ b/Grid_Lattice.h
@@ -6,7 +6,23 @@

 namespace Grid {

-  extern int GridCshiftPermuteMap[4][16];
+// TODO: Indexing ()
+//       mac,real,imag
+//
+// Functionality required:
+//     -=,+=,*=,()
+//     add,+,sub,-,mult,mac,*
+//     adj,conj
+//     real,imag
+//     transpose,transposeIndex  
+//     trace,traceIndex
+//     peekIndex
+//     innerProduct,outerProduct,
+//     localNorm2
+//     localInnerProduct
+//     
+
+extern int GridCshiftPermuteMap[4][16];

 template<class vobj>
 class Lattice
@@ -41,6 +57,9 @@ public:
    template<class obj1,class obj2,class obj3>
    friend void mult(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const Lattice<obj3> &rhs);

+    template<class obj1,class obj2,class obj3>
+    friend void mac(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const Lattice<obj3> &rhs);
+
    template<class obj1,class obj2,class obj3>
    friend void sub(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const Lattice<obj3> &rhs);

@@ -212,26 +231,6 @@ public:
        return *this;
    }
    
-    // FIXME trace type structure is weird
-    inline friend Lattice<iScalar<vComplex> > _trace(const Lattice<vobj> &lhs){
-        Lattice<iScalar<vComplex> > ret(lhs._grid);
-#pragma omp parallel for
-        for(int ss=0;ss<lhs._grid->oSites();ss++){
-            ret._odata[ss] = trace(lhs._odata[ss]);
-        }
-        return ret;
-    };
-    
-    inline friend Lattice<iScalar<iScalar< vComplex > > > trace(const Lattice<vobj> &lhs){
-        Lattice<iScalar< iScalar<vComplex> >  > ret(lhs._grid);
-#pragma omp parallel for
-        for(int ss=0;ss<lhs._grid->oSites();ss++){
-            ret._odata[ss] = trace(lhs._odata[ss]);
-        }
-        return ret;
-    };
-
-    
    inline friend Lattice<vobj> adj(const Lattice<vobj> &lhs){
        Lattice<vobj> ret(lhs._grid);
 #pragma omp parallel for
@@ -241,6 +240,16 @@ public:
        return ret;
    };

+    inline friend Lattice<vobj> transpose(const Lattice<vobj> &lhs){
+        Lattice<vobj> ret(lhs._grid);
+#pragma omp parallel for
+        for(int ss=0;ss<lhs._grid->oSites();ss++){
+            ret._odata[ss] = transpose(lhs._odata[ss]);
+        }
+        return ret;
+    };
+
+
    inline friend Lattice<vobj> conj(const Lattice<vobj> &lhs){
        Lattice<vobj> ret(lhs._grid);
 #pragma omp parallel for
@@ -304,6 +313,17 @@ public:
 	  mult(&ret._odata[ss],&lhs._odata[ss],&rhs._odata[ss]);
        }
    }
+
+    template<class obj1,class obj2,class obj3>
+    void mac(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const Lattice<obj3> &rhs){
+        conformable(lhs,rhs);
+	uint32_t vec_len = lhs._grid->oSites();
+#pragma omp parallel for
+        for(int ss=0;ss<vec_len;ss++){
+	  mac(&ret._odata[ss],&lhs._odata[ss],&rhs._odata[ss]);
+        }
+    }
+
    template<class obj1,class obj2,class obj3>
    void sub(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const Lattice<obj3> &rhs){
        conformable(lhs,rhs);
@@ -409,6 +429,82 @@ public:
        return ret;
    }

+    ////////////////////////////////////////////////////////////////////////////////////////////////////
+    // Trace
+    ////////////////////////////////////////////////////////////////////////////////////////////////////
+    template<class vobj>
+    inline auto trace(const Lattice<vobj> &lhs)
+      -> Lattice<decltype(trace(lhs._odata[0]))>
+    {
+      Lattice<decltype(trace(lhs._odata[0]))> ret(lhs._grid);
+#pragma omp parallel for
+        for(int ss=0;ss<lhs._grid->oSites();ss++){
+            ret._odata[ss] = trace(lhs._odata[ss]);
+        }
+        return ret;
+    };
+    
+    ////////////////////////////////////////////////////////////////////////////////////////////////////
+    // Index level dependent operations
+    ////////////////////////////////////////////////////////////////////////////////////////////////////
+    template<int Index,class vobj>
+    inline auto traceIndex(const Lattice<vobj> &lhs)
+      -> Lattice<decltype(traceIndex<Index>(lhs._odata[0]))>
+    {
+      Lattice<decltype(traceIndex<Index>(lhs._odata[0]))> ret(lhs._grid);
+#pragma omp parallel for
+      for(int ss=0;ss<lhs._grid->oSites();ss++){
+	ret._odata[ss] = traceIndex<Index>(lhs._odata[ss]);
+      }
+      return ret;
+    };
+    template<int Index,class vobj>
+    inline auto transposeIndex(const Lattice<vobj> &lhs)
+      -> Lattice<decltype(transposeIndex<Index>(lhs._odata[0]))>
+    {
+      Lattice<decltype(transposeIndex<Index>(lhs._odata[0]))> ret(lhs._grid);
+#pragma omp parallel for
+        for(int ss=0;ss<lhs._grid->oSites();ss++){
+            ret._odata[ss] = transposeIndex<Index>(lhs._odata[ss]);
+        }
+        return ret;
+    };
+
+    // Fixme; this is problematic since the number of args is variable and 
+    // may mismatch...
+    template<int Index,class vobj>
+    inline auto peekIndex(const Lattice<vobj> &lhs)
+      -> Lattice<decltype(peekIndex<Index>(lhs._odata[0]))>
+    {
+      Lattice<decltype(peekIndex<Index>(lhs._odata[0]))> ret(lhs._grid);
+#pragma omp parallel for
+        for(int ss=0;ss<lhs._grid->oSites();ss++){
+            ret._odata[ss] = peekIndex<Index>(lhs._odata[ss]);
+        }
+        return ret;
+    };
+    template<int Index,class vobj>
+      inline auto peekIndex(const Lattice<vobj> &lhs,int i)
+      -> Lattice<decltype(peekIndex<Index>(lhs._odata[0],i))>
+    {
+      Lattice<decltype(peekIndex<Index>(lhs._odata[0],i))> ret(lhs._grid);
+#pragma omp parallel for
+        for(int ss=0;ss<lhs._grid->oSites();ss++){
+	  ret._odata[ss] = peekIndex<Index>(lhs._odata[ss],i);
+        }
+        return ret;
+    };
+    template<int Index,class vobj>
+      inline auto peekIndex(const Lattice<vobj> &lhs,int i,int j)
+      -> Lattice<decltype(peekIndex<Index>(lhs._odata[0],i,j))>
+    {
+      Lattice<decltype(peekIndex<Index>(lhs._odata[0],i,j))> ret(lhs._grid);
+#pragma omp parallel for
+        for(int ss=0;ss<lhs._grid->oSites();ss++){
+	  ret._odata[ss] = peekIndex<Index>(lhs._odata[ss],i,j);
+        }
+        return ret;
+    };
    ////////////////////////////////////////////////////////////////////////////////////////////////////
    // Reduction operations
    ////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -416,14 +512,16 @@ public:
    inline RealD norm2(const Lattice<vobj> &arg){

      typedef typename vobj::scalar_type scalar;
+      typedef typename vobj::vector_type vector;
      decltype(innerProduct(arg._odata[0],arg._odata[0])) vnrm=zero;
-
      scalar nrm;
      //FIXME make this loop parallelisable
+      vnrm=zero;
      for(int ss=0;ss<arg._grid->oSites(); ss++){
 	vnrm = vnrm + innerProduct(arg._odata[ss],arg._odata[ss]);
      }
-      nrm = Reduce(TensorRemove(vnrm));
+      vector vvnrm =TensorRemove(vnrm) ;
+      nrm = Reduce(vvnrm);
      arg._grid->GlobalSum(nrm);
      return real(nrm);
    }
@@ -439,7 +537,7 @@ public:
      for(int ss=0;ss<left._grid->oSites(); ss++){
 	vnrm = vnrm + innerProduct(left._odata[ss],right._odata[ss]);
      }
-      nrm = Reduce(TensorRemove(vnrm));
+      nrm = Reduce(vnrm);
      right._grid->GlobalSum(nrm);
      return nrm;
    }
@@ -455,10 +553,32 @@ public:
      Lattice<typename vobj::tensor_reduced> ret(rhs._grid);
 #pragma omp parallel for
        for(int ss=0;ss<rhs._grid->oSites(); ss++){
-	  ret._odata[ss]=innerProduct(rhs,rhs);
+	  ret._odata[ss]=innerProduct(rhs._odata[ss],rhs._odata[ss]);
        }
        return ret;
    }
+    
+    template<class vobj>
+    inline auto real(const Lattice<vobj> &z) -> Lattice<decltype(real(z._odata[0]))>
+    {
+      Lattice<decltype(real(z._odata[0]))> ret(z._grid);
+#pragma omp parallel for
+        for(int ss=0;ss<z._grid->oSites();ss++){
+            ret._odata[ss] = real(z._odata[ss]);
+        }
+      return ret;
+    }
+
+    template<class vobj>
+    inline auto imag(const Lattice<vobj> &z) -> Lattice<decltype(imag(z._odata[0]))>
+    {
+      Lattice<decltype(imag(z._odata[0]))> ret(z._grid);
+#pragma omp parallel for
+        for(int ss=0;ss<z._grid->oSites();ss++){
+            ret._odata[ss] = imag(z._odata[ss]);
+        }
+      return ret;
+    }

    // localInnerProduct
    template<class vobj>
--- a/Grid_QCD.h
+++ b/Grid_QCD.h
@@ -5,57 +5,80 @@ namespace QCD {

    static const int Nc=3;
    static const int Ns=4;
+    static const int Nd=4;
+
    static const int CbRed  =0;
    static const int CbBlack=1;
    
+    //////////////////////////////////////////////////////////////////////////////
    // QCD iMatrix types
-    template<typename vtype> using iSinglet          = iScalar<iScalar<vtype> > ;
-    template<typename vtype> using iSpinMatrix       = iMatrix<iScalar<vtype>, Ns>;
-    template<typename vtype> using iSpinColourMatrix = iMatrix<iMatrix<vtype, Nc>, Ns>;
-    template<typename vtype> using iColourMatrix     = iScalar<iMatrix<vtype, Nc>> ;
+    // Index conventions:                            Lorentz x Spin x Colour
+    //
+    // ChrisK very keen to add extra space for Gparity doubling.
+    //
+    // Also add domain wall index, in a way where Wilson operator 
+    // naturally distributes across the 5th dimensions.
+    //////////////////////////////////////////////////////////////////////////////
+    template<typename vtype> using iSinglet          = iScalar<iScalar<iScalar<vtype> > >;
+    template<typename vtype> using iSpinMatrix       = iScalar<iMatrix<iScalar<vtype>, Ns> >;
+    template<typename vtype> using iSpinColourMatrix = iScalar<iMatrix<iMatrix<vtype, Nc>, Ns> >;
+    template<typename vtype> using iColourMatrix     = iScalar<iScalar<iMatrix<vtype, Nc> > > ;
+    template<typename vtype> using iLorentzColourMatrix = iVector<iScalar<iMatrix<vtype, Nc> >, Nd > ;

-    template<typename vtype> using iSpinVector       = iVector<iScalar<vtype>, Ns>;
-    template<typename vtype> using iColourVector     = iScalar<iVector<vtype, Nc> >;
-    template<typename vtype> using iSpinColourVector = iVector<iVector<vtype, Nc>, Ns>;

-    typedef iSinglet<Complex >          TComplex;    // This is painful. Tensor singlet complex type.
-    typedef iSinglet<vComplex >         vTComplex;   // what if we don't know the tensor structure
-    typedef iSinglet<Real >             TReal;       // Shouldn't need these.
-    typedef iSinglet<vInteger >         vTInteger;
+    template<typename vtype> using iSpinVector       = iScalar<iVector<iScalar<vtype>, Ns> >;
+    template<typename vtype> using iColourVector     = iScalar<iScalar<iVector<vtype, Nc> > >;
+    template<typename vtype> using iSpinColourVector = iScalar<iVector<iVector<vtype, Nc>, Ns> >;

-    typedef iSpinMatrix<Complex >       SpinMatrix;
-    typedef iColourMatrix<Complex >     ColourMatrix;
-    typedef iSpinColourMatrix<Complex > SpinColourMatrix;
+    typedef iSpinMatrix<Complex >          SpinMatrix;
+    typedef iColourMatrix<Complex >        ColourMatrix;
+    typedef iSpinColourMatrix<Complex >    SpinColourMatrix;
+    typedef iLorentzColourMatrix<Complex > LorentzColourMatrix;

    typedef iSpinVector<Complex >       SpinVector;
    typedef iColourVector<Complex >     ColourVector;
    typedef iSpinColourVector<Complex > SpinColourVector;

    
-    typedef iSpinMatrix<vComplex >       vSpinMatrix;
-    typedef iColourMatrix<vComplex >     vColourMatrix;
-    typedef iSpinColourMatrix<vComplex > vSpinColourMatrix;
+    typedef iSpinMatrix<vComplex >          vSpinMatrix;
+    typedef iColourMatrix<vComplex >        vColourMatrix;
+    typedef iSpinColourMatrix<vComplex >    vSpinColourMatrix;
+    typedef iLorentzColourMatrix<vComplex > vLorentzColourMatrix;
    
    typedef iSpinVector<vComplex >       vSpinVector;
    typedef iColourVector<vComplex >     vColourVector;
    typedef iSpinColourVector<vComplex > vSpinColourVector;
    
-    typedef Lattice<vTComplex>            LatticeComplex;
+    typedef iSinglet<Complex >          TComplex;    // This is painful. Tensor singlet complex type.
+    typedef iSinglet<vComplex >         vTComplex;   // what if we don't know the tensor structure
+    typedef iSinglet<Real >             TReal;       // Shouldn't need these; can I make it work without?
+    typedef iSinglet<vReal >            vTReal;      
+    typedef iSinglet<vInteger >         vTInteger;
+    typedef iSinglet<Integer >          TInteger;
+
+    typedef Lattice<vTReal>              LatticeReal;
+    typedef Lattice<vTComplex>           LatticeComplex;
    typedef Lattice<vInteger>            LatticeInteger; // Predicates for "where"
    
    typedef Lattice<vColourMatrix>     LatticeColourMatrix;
    typedef Lattice<vSpinMatrix>       LatticeSpinMatrix;
-    typedef Lattice<vSpinColourMatrix> LatticePropagator;
-    typedef LatticePropagator LatticeSpinColourMatrix;
+    typedef Lattice<vSpinColourMatrix> LatticeSpinColourMatrix;

-    typedef Lattice<vSpinColourVector> LatticeFermion;
    typedef Lattice<vSpinColourVector> LatticeSpinColourVector;
    typedef Lattice<vSpinVector>       LatticeSpinVector;
    typedef Lattice<vColourVector>     LatticeColourVector;

+    ///////////////////////////////////////////
+    // Physical names for things
+    ///////////////////////////////////////////
+    typedef Lattice<vSpinColourVector> LatticeFermion;
+    typedef Lattice<vSpinColourMatrix> LatticePropagator;
+    typedef Lattice<vLorentzColourMatrix> LatticeGaugeField;

-    // FIXME for debug; deprecate this
-   inline void LatticeCoordinate(LatticeInteger &l,int mu){
+
+    
+
+     inline void LatticeCoordinate(LatticeInteger &l,int mu){
      GridBase *grid = l._grid;
      int Nsimd = grid->iSites();
      std::vector<int> gcoor;
@@ -63,8 +86,8 @@ namespace QCD {
      std::vector<Integer *> mergeptr(Nsimd);
      for(int o=0;o<grid->oSites();o++){
 	for(int i=0;i<grid->iSites();i++){
-	  //	  RankIndexToGlobalCoor(grid->ThisRank(),o,i,gcoor);
-	  grid->RankIndexToGlobalCoor(0,o,i,gcoor);
+	  grid->RankIndexToGlobalCoor(grid->ThisRank(),o,i,gcoor);
+	  //	  grid->RankIndexToGlobalCoor(0,o,i,gcoor);
 	  mergebuf[i]=gcoor[mu];
 	  mergeptr[i]=&mergebuf[i];
 	}
--- a/Grid_main.cc
+++ b/Grid_main.cc
@@ -84,6 +84,8 @@ int main (int argc, char ** argv)
    LatticeSpinColourMatrix scMat(&Fine);
    
    LatticeComplex scalar(&Fine);
+    LatticeReal    rscalar(&Fine);
+    LatticeReal    iscalar(&Fine);

    SpinMatrix GammaFive;
    iSpinMatrix<vComplex> iGammaFive;
@@ -110,6 +112,44 @@ int main (int argc, char ** argv)
    
    cMat = outerProduct(cVec,cVec);
    scalar = localInnerProduct(cVec,cVec);
+
+    scalar += scalar;
+    scalar -= scalar;
+    scalar *= scalar;
+    add(scalar,scalar,scalar);
+    sub(scalar,scalar,scalar);
+    mult(scalar,scalar,scalar);
+    mac(scalar,scalar,scalar);
+    scalar = scalar+scalar;
+    scalar = scalar-scalar;
+    scalar = scalar*scalar;
+
+    scalar=outerProduct(scalar,scalar);
+
+    scalar=adj(scalar);
+
+    //    rscalar=real(scalar);
+    //    iscalar=imag(scalar);
+    //    scalar =cmplx(rscalar,iscalar);
+
+    scalar=transpose(scalar);
+    scalar=transposeIndex<1>(scalar);
+    scalar=traceIndex<1>(scalar);
+    scalar=peekIndex<1>(cVec,0);
+    scalar=trace(scalar);
+    scalar=localInnerProduct(cVec,cVec);
+    scalar=localNorm2(cVec);
+
+//     -=,+=,*=,()
+//     add,+,sub,-,mult,mac,*
+//     adj,conj
+//     real,imag
+//     transpose,transposeIndex  
+//     trace,traceIndex
+//     peekIndex
+//     innerProduct,outerProduct,
+//     localNorm2
+//     localInnerProduct
    
    scMat = sMat*scMat;  // LatticeSpinColourMatrix = LatticeSpinMatrix       * LatticeSpinColourMatrix

@@ -171,15 +211,21 @@ int main (int argc, char ** argv)
      SpinMatrix   s_m;   
      SpinColourMatrix sc_m; 

-      s_m = traceIndex<1>(sc_m);
-      c_m = traceIndex<2>(sc_m);
+      s_m = traceIndex<1>(sc_m); // Map to traceColour
+      c_m = traceIndex<2>(sc_m); // map to traceSpin

-      c   = traceIndex<2>(s_m);
+      c   = traceIndex<2>(s_m); 
      c   = traceIndex<1>(c_m);
+      
+      s_m = peekIndex<1>(scm,0,0);
+      c_m = peekIndex<2>(scm,1,2);

      printf("c. Level %d\n",c_m.TensorLevel);
-      printf("c. Level %d\n",c_m._internal.TensorLevel);
-
+      printf("c. Level %d\n",c_m().TensorLevel);
+      printf("c. Level %d\n",c_m()().TensorLevel);
+      
+      c_m()() = scm()(0,0); //ColourComponents of CM <= ColourComponents of SpinColourMatrix
+      scm()(1,1) = cm()();  //ColourComponents of CM <= ColourComponents of SpinColourMatrix
    }
    
    FooBar = Bar;
@@ -244,6 +290,9 @@ int main (int argc, char ** argv)
    int ncall=100;
    int Nc = Grid::QCD::Nc;

+    LatticeGaugeField U(&Fine);
+    //    LatticeColourMatrix Uy = U(yDir);
+
    flops = ncall*1.0*volume*(8*Nc*Nc*Nc);
    bytes = ncall*1.0*volume*Nc*Nc    *2*3*sizeof(Grid::Real);
    if ( Fine.IsBoss() ) {
@@ -373,38 +422,38 @@ int main (int argc, char ** argv)
        mdiff = shifted1-shifted2;
        amdiff=adj(mdiff);
        ColourMatrix prod = amdiff*mdiff;
-        TReal Ttr=real(trace(prod));
-        double nn=Ttr._internal._internal;
+        Real Ttr=real(trace(prod));
+        double nn=Ttr;
        if ( nn > 0 )
            cout<<"Shift real trace fail "<<coor[0]<<coor[1]<<coor[2]<<coor[3] <<endl;
     

        for(int r=0;r<3;r++){
        for(int c=0;c<3;c++){
-            diff =shifted1._internal._internal[r][c]-shifted2._internal._internal[r][c];
+            diff =shifted1()()(r,c)-shifted2()()(r,c);
            nn=real(conj(diff)*diff);
            if ( nn > 0 )
                cout<<"Shift fail (shifted1/shifted2-ref) "<<coor[0]<<coor[1]<<coor[2]<<coor[3] <<" "
-                    <<shifted1._internal._internal[r][c]<<" "<<shifted2._internal._internal[r][c]
-                    << " "<< foo._internal._internal[r][c]<< " lex expect " << lex_coor << " lex "<<lex<<endl;
+                    <<shifted1()()(r,c)<<" "<<shifted2()()(r,c)
+                    << " "<< foo()()(r,c)<< " lex expect " << lex_coor << " lex "<<lex<<endl;
            else if(0)
                cout<<"Shift pass 1vs2 "<<coor[0]<<coor[1]<<coor[2]<<coor[3] <<" "
-                    <<shifted1._internal._internal[r][c]<<" "<<shifted2._internal._internal[r][c]
-                    << " "<< foo._internal._internal[r][c]<< " lex expect " << lex_coor << " lex "<<lex<<endl;
+                    <<shifted1()()(r,c)<<" "<<shifted2()()(r,c)
+                    << " "<< foo()()(r,c)<< " lex expect " << lex_coor << " lex "<<lex<<endl;
        }}
        
        for(int r=0;r<3;r++){
        for(int c=0;c<3;c++){
-            diff =shifted3._internal._internal[r][c]-shifted2._internal._internal[r][c];
+            diff =shifted3()()(r,c)-shifted2()()(r,c);
            nn=real(conj(diff)*diff);
            if ( nn > 0 )
                cout<<"Shift rb fail (shifted3/shifted2-ref) "<<coor[0]<<coor[1]<<coor[2]<<coor[3] <<" "
-                <<shifted3._internal._internal[r][c]<<" "<<shifted2._internal._internal[r][c]
-                << " "<< foo._internal._internal[r][c]<< " lex expect " << lex_coor << " lex "<<lex<<endl;
+                <<shifted3()()(r,c)<<" "<<shifted2()()(r,c)
+                << " "<< foo()()(r,c)<< " lex expect " << lex_coor << " lex "<<lex<<endl;
            else if(0)
                cout<<"Shift rb pass 3vs2 "<<coor[0]<<coor[1]<<coor[2]<<coor[3] <<" "
-                <<shifted3._internal._internal[r][c]<<" "<<shifted2._internal._internal[r][c]
-                << " "<< foo._internal._internal[r][c]<< " lex expect " << lex_coor << " lex "<<lex<<endl;
+                <<shifted3()()(r,c)<<" "<<shifted2()()(r,c)
+                << " "<< foo()()(r,c)<< " lex expect " << lex_coor << " lex "<<lex<<endl;
        }}
        peekSite(bar,Bar,coor);
                    
@@ -412,7 +461,7 @@ int main (int argc, char ** argv)
        foobar2 = foo*bar;
        for(int r=0;r<Nc;r++){
        for(int c=0;c<Nc;c++){
-            diff =foobar2._internal._internal[r][c]-foobar1._internal._internal[r][c];
+            diff =foobar2()()(r,c)-foobar1()()(r,c);
            nrm = nrm + real(conj(diff)*diff);
        }}
    }}}}
--- a/Grid_math_types.h
+++ b/Grid_math_types.h
@@ -4,6 +4,10 @@
 #include <Grid_math_type_mapper.h>
 #include <type_traits>

+//
+// Indexing; want to be able to dereference and 
+// obtain either an lvalue or an rvalue.
+//
 namespace Grid {

  // First some of my own traits
@@ -58,6 +62,16 @@ namespace Grid {
    static const bool value = (Level==T::TensorLevel);
    static const bool notvalue = (Level!=T::TensorLevel);
  };
+  // What is the vtype
+  template<typename T> struct isComplex {
+    static const bool value = false;
+  };
+  template<> struct isComplex<ComplexF> {
+    static const bool value = true;
+  };
+  template<> struct isComplex<ComplexD> {
+    static const bool value = true;
+  };


 ///////////////////////////////////////////////////
@@ -65,12 +79,7 @@ namespace Grid {
 // These can be composed to form tensor products of internal indices.
 ///////////////////////////////////////////////////

-  // Terminates the recursion for temoval of all Grids tensors
-  inline vRealD    TensorRemove(vRealD    arg){ return arg;}
-  inline vRealF    TensorRemove(vRealF    arg){ return arg;}
-  inline vComplexF TensorRemove(vComplexF arg){ return arg;}
-  inline vComplexD TensorRemove(vComplexD arg){ return arg;}
-  inline vInteger  TensorRemove(vInteger  arg){ return arg;}
+

 template<class vtype> class iScalar
 {
@@ -109,10 +118,6 @@ public:
    friend void merge(iScalar<vtype> &in,std::vector<scalar_type *> &out){
      merge(in._internal,out); // extract advances the pointers in out
    }
-    friend inline iScalar<vtype>::vector_type TensorRemove(iScalar<vtype> arg)
-    {
-      return TensorRemove(arg._internal);
-    }

    // Unary negation
    friend inline iScalar<vtype> operator -(const iScalar<vtype> &r) {
@@ -133,7 +138,23 @@ public:
        *this = (*this)+r;
        return *this;
    }
+    
+    inline vtype & operator ()(void) {
+      return _internal;
+    }
+
+    operator ComplexD () const { return(TensorRemove(_internal)); };
+    operator RealD () const { return(real(TensorRemove(_internal))); }
+
 };
+///////////////////////////////////////////////////////////
+// Allows to turn scalar<scalar<scalar<double>>>> back to double.
+///////////////////////////////////////////////////////////
+template<class T> inline typename std::enable_if<isGridTensor<T>::notvalue, T>::type TensorRemove(T arg) { return arg;}
+template<class vtype> inline auto TensorRemove(iScalar<vtype> arg) -> decltype(TensorRemove(arg._internal))
+{
+  return TensorRemove(arg._internal);
+}
    
 template<class vtype,int N> class iVector
 {
@@ -148,7 +169,8 @@ public:
  typedef iScalar<tensor_reduced_v> tensor_reduced;

    iVector(Zero &z){ *this = zero; };
-    iVector() {};
+    iVector() {};// Empty constructure
+
    iVector<vtype,N> & operator= (Zero &hero){
        zeroit(*this);
        return *this;
@@ -192,6 +214,9 @@ public:
        *this = (*this)+r;
        return *this;
    }
+    inline vtype & operator ()(int i) {
+      return _internal[i];
+    }
 };
    
 template<class vtype,int N> class iMatrix
@@ -262,6 +287,9 @@ public:
    *this = (*this)+r;
    return *this;
  }
+  inline vtype & operator ()(int i,int j) {
+    return _internal[i][j];
+  }

 };

@@ -801,6 +829,33 @@ template<class l,int N> inline iMatrix<l,N> operator * (const iMatrix<l,N>& lhs,
 }
 template<class l,int N> inline iMatrix<l,N> operator * (double lhs,const iMatrix<l,N>& rhs) {  return rhs*lhs; }

+////////////////////////////////////////////////////////////////////
+// Complex support; cast to "scalar_type" through constructor
+////////////////////////////////////////////////////////////////////
+template<class l> inline iScalar<l> operator * (const iScalar<l>& lhs,ComplexD rhs) 
+{
+  typename iScalar<l>::scalar_type t(rhs);
+  typename iScalar<l>::tensor_reduced srhs(t);
+  return lhs*srhs;
+}
+template<class l> inline iScalar<l> operator * (ComplexD lhs,const iScalar<l>& rhs) {  return rhs*lhs; }
+
+template<class l,int N> inline iVector<l,N> operator * (const iVector<l,N>& lhs,ComplexD rhs) 
+{
+  typename iScalar<l>::scalar_type t(rhs);
+  typename iScalar<l>::tensor_reduced srhs(t);
+  return lhs*srhs;
+}
+template<class l,int N> inline iVector<l,N> operator * (ComplexD lhs,const iVector<l,N>& rhs) {  return rhs*lhs; }
+
+template<class l,int N> inline iMatrix<l,N> operator * (const iMatrix<l,N>& lhs,ComplexD rhs) 
+{
+  typename iScalar<l>::scalar_type t(rhs);
+  typename iScalar<l>::tensor_reduced srhs(t);
+  return lhs*srhs;
+}
+template<class l,int N> inline iMatrix<l,N> operator * (ComplexD lhs,const iMatrix<l,N>& rhs) {  return rhs*lhs; }
+
 ////////////////////////////////////////////////////////////////////
 // Integer support; cast to "scalar_type" through constructor
 ////////////////////////////////////////////////////////////////////
@@ -1101,6 +1156,9 @@ template<class vtype,int N> inline iMatrix<vtype,N> adj(const iMatrix<vtype,N> &
    }}
    return ret;
 }
+
+
+
 /////////////////////////////////////////////////////////////////
 // Transpose all indices
 /////////////////////////////////////////////////////////////////
@@ -1133,7 +1191,7 @@ template<class vtype,int N>
    return ret;
  }

-template<class vtype,int N>
+template<class vtype>
  inline typename std::enable_if<isGridTensor<vtype>::value, iScalar<vtype> >::type 
  transpose(iScalar<vtype> arg)
  {
@@ -1142,7 +1200,7 @@ template<class vtype,int N>
    return ret;
  }

-template<class vtype,int N>
+template<class vtype>
  inline typename std::enable_if<isGridTensor<vtype>::notvalue, iScalar<vtype> >::type 
  transpose(iScalar<vtype> arg)
  {
@@ -1151,6 +1209,7 @@ template<class vtype,int N>
    return ret;
  }

+
 ////////////////////////////////////////////////////////////////////////////////////////////
 // Transpose a specific index; instructive to compare this style of recursion termination
 // to that of adj; which is easiers?
@@ -1182,7 +1241,15 @@ template<int Level,class vtype> inline
 typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,Level>::notvalue, iScalar<vtype> >::type 
 transposeIndex (const iScalar<vtype> &arg)
 {
-  return transposeIndex<Level>(arg._internal);
+  iScalar<vtype> ret;
+  ret._internal=transposeIndex<Level>(arg._internal);
+  return ret;
+}
+template<int Level,class vtype> inline 
+typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,Level>::value, iScalar<vtype> >::type 
+transposeIndex (const iScalar<vtype> &arg)
+{
+  return arg;
 }

 //////////////////////////////////////////////////////////////////
@@ -1216,8 +1283,10 @@ inline auto trace(const iScalar<vtype> &arg) -> iScalar<decltype(trace(arg._inte
    ret._internal=trace(arg._internal);
    return ret;
 }
-
-// Specific indices.
+////////////////////////////////////////////////////////////////////////////////////////////////////////
+// Trace Specific indices.
+////////////////////////////////////////////////////////////////////////////////////////////////////////
+/*
 template<int Level,class vtype> inline 
 auto traceIndex(const iScalar<vtype> &arg) -> iScalar<decltype(traceIndex<Level>(arg._internal)) >
 {
@@ -1225,6 +1294,25 @@ auto traceIndex(const iScalar<vtype> &arg) -> iScalar<decltype(traceIndex<Level>
  ret._internal = traceIndex<Level>(arg._internal);
  return ret;
 }
+*/
+template<int Level,class vtype> inline auto 
+traceIndex (const iScalar<vtype> &arg) ->
+typename 
+std::enable_if<matchGridTensorIndex<iScalar<vtype>,Level>::notvalue, 
+  iScalar<decltype(traceIndex<Level>(arg._internal))> >::type 
+
+{
+  iScalar<decltype(traceIndex<Level>(arg._internal))> ret;
+  ret._internal=traceIndex<Level>(arg._internal);
+  return ret;
+}
+template<int Level,class vtype> inline auto
+traceIndex (const iScalar<vtype> &arg) ->
+typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,Level>::value, 
+                        iScalar<vtype> >::type 
+{
+  return arg;
+}

 // If we hit the right index, return scalar and trace it with no further recursion
 template<int Level,class vtype,int N> inline 
@@ -1254,6 +1342,149 @@ auto traceIndex(const iMatrix<vtype,N> &arg) ->
  return ret;
 }

+//////////////////////////////////////////////////////////////////////////////
+// Peek on a specific index; returns a scalar in that index, tensor inherits rest
+//////////////////////////////////////////////////////////////////////////////
+// If we hit the right index, return scalar with no further recursion
+
+//template<int Level> inline ComplexF peekIndex(const ComplexF arg) { return arg;}
+//template<int Level> inline ComplexD peekIndex(const ComplexD arg) { return arg;}
+//template<int Level> inline RealF peekIndex(const RealF arg) { return arg;}
+//template<int Level> inline RealD peekIndex(const RealD arg) { return arg;}
+
+// Scalar peek, no indices
+template<int Level,class vtype> inline 
+  auto peekIndex(const iScalar<vtype> &arg) -> 
+  typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,Level>::value,  // Index matches
+  iScalar<vtype> >::type                              // return scalar
+{
+  return arg;
+}
+// Vector peek, one index
+template<int Level,class vtype,int N> inline 
+  auto peekIndex(const iVector<vtype,N> &arg,int i) -> 
+  typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,Level>::value,  // Index matches
+  iScalar<vtype> >::type                              // return scalar
+{
+  iScalar<vtype> ret;                              // return scalar
+  ret._internal = arg._internal[i];
+  return ret;
+}
+// Matrix peek, two indices
+template<int Level,class vtype,int N> inline 
+  auto peekIndex(const iMatrix<vtype,N> &arg,int i,int j) -> 
+  typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,Level>::value,  // Index matches
+  iScalar<vtype> >::type                              // return scalar
+{
+  iScalar<vtype> ret;                              // return scalar
+  ret._internal = arg._internal[i][j];
+  return ret;
+}
+
+/////////////
+// No match peek for scalar,vector,matrix must forward on either 0,1,2 args. Must have 9 routines with notvalue
+/////////////
+// scalar
+template<int Level,class vtype> inline 
+  auto peekIndex(const iScalar<vtype> &arg) ->                                     // Scalar 0 index  
+  typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,Level>::notvalue,  // Index does NOT match
+  iScalar<decltype(peekIndex<Level>(arg._internal))> >::type                       
+{
+  iScalar<decltype(peekIndex<Level>(arg._internal))> ret;
+  ret._internal= peekIndex<Level>(arg._internal);
+  return ret;
+}
+template<int Level,class vtype> inline 
+  auto peekIndex(const iScalar<vtype> &arg,int i) ->                             // Scalar 1 index
+  typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,Level>::notvalue,  // Index does NOT match
+  iScalar<decltype(peekIndex<Level>(arg._internal,i))> >::type                       
+{
+  iScalar<decltype(peekIndex<Level>(arg._internal,i))> ret;
+  ret._internal=peekIndex<Level>(arg._internal,i);
+  return ret;
+}
+template<int Level,class vtype> inline 
+  auto peekIndex(const iScalar<vtype> &arg,int i,int j) ->                         // Scalar, 2 index
+  typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,Level>::notvalue,  // Index does NOT match
+  iScalar<decltype(peekIndex<Level>(arg._internal,i,j))> >::type                       
+{
+  iScalar<decltype(peekIndex<Level>(arg._internal,i,j))> ret;
+  ret._internal=peekIndex<Level>(arg._internal,i,j);
+  return ret;
+}
+// vector
+template<int Level,class vtype,int N> inline 
+auto peekIndex(const iVector<vtype,N> &arg) -> 
+  typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,Level>::notvalue,  // Index does not match
+  iVector<decltype(peekIndex<Level>(arg._internal[0])),N> >::type                       
+{
+  iVector<decltype(peekIndex<Level>(arg._internal[0])),N> ret;
+  for(int ii=0;ii<N;ii++){
+    ret._internal[ii]=peekIndex<Level>(arg._internal[ii]);
+  }
+  return ret;
+}
+template<int Level,class vtype,int N> inline 
+  auto peekIndex(const iVector<vtype,N> &arg,int i) -> 
+  typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,Level>::notvalue,  // Index does not match
+  iVector<decltype(peekIndex<Level>(arg._internal[0],i)),N> >::type                       
+{
+  iVector<decltype(peekIndex<Level>(arg._internal[0],i)),N> ret;
+  for(int ii=0;ii<N;ii++){
+    ret._internal[ii]=peekIndex<Level>(arg._internal[ii],i);
+  }
+  return ret;
+}
+template<int Level,class vtype,int N> inline 
+  auto peekIndex(const iVector<vtype,N> &arg,int i,int j) -> 
+  typename std::enable_if<matchGridTensorIndex<iVector<vtype,N>,Level>::notvalue,  // Index does not match
+  iVector<decltype(peekIndex<Level>(arg._internal[0],i,j)),N> >::type                       
+{
+  iVector<decltype(peekIndex<Level>(arg._internal[0],i,j)),N> ret;
+  for(int ii=0;ii<N;ii++){
+    ret._internal[ii]=peekIndex<Level>(arg._internal[ii],i,j);
+  }
+  return ret;
+}
+// matrix
+template<int Level,class vtype,int N> inline 
+auto peekIndex(const iMatrix<vtype,N> &arg) -> 
+  typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,Level>::notvalue,  // Index does not match
+  iMatrix<decltype(peekIndex<Level>(arg._internal[0][0])),N> >::type                       
+{
+  iMatrix<decltype(peekIndex<Level>(arg._internal[0][0])),N> ret;
+  for(int ii=0;ii<N;ii++){
+  for(int jj=0;jj<N;jj++){
+    ret._internal[ii][jj]=peekIndex<Level>(arg._internal[ii][jj]);// Could avoid this because peeking a scalar is dumb
+  }}
+  return ret;
+}
+template<int Level,class vtype,int N> inline 
+  auto peekIndex(const iMatrix<vtype,N> &arg,int i) -> 
+  typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,Level>::notvalue,  // Index does not match
+  iMatrix<decltype(peekIndex<Level>(arg._internal[0],i)),N> >::type                       
+{
+  iMatrix<decltype(peekIndex<Level>(arg._internal[0],i)),N> ret;
+  for(int ii=0;ii<N;ii++){
+  for(int jj=0;jj<N;jj++){
+    ret._internal[ii][jj]=peekIndex<Level>(arg._internal[ii][jj],i);
+  }}
+  return ret;
+}
+template<int Level,class vtype,int N> inline 
+  auto peekIndex(const iMatrix<vtype,N> &arg,int i,int j) -> 
+  typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,Level>::notvalue,  // Index does not match
+  iMatrix<decltype(peekIndex<Level>(arg._internal[0][0],i,j)),N> >::type                       
+{
+  iMatrix<decltype(peekIndex<Level>(arg._internal[0][0],i,j)),N> ret;
+  for(int ii=0;ii<N;ii++){
+  for(int jj=0;jj<N;jj++){
+    ret._internal[ii][jj]=peekIndex<Level>(arg._internal[ii][jj],i,j);
+  }}
+  return ret;
+}
+
+
 /////////////////////////////////////////////////////////////////
 // Can only take the real/imag part of scalar objects, since
 // lattice objects of different complex nature are non-conformable.
--- a/Grid_simd.h
+++ b/Grid_simd.h
@@ -235,6 +235,7 @@ inline void Gpermute(vsimd &y,const vsimd &b,int perm){
 #include <Grid_vComplexF.h>
 #include <Grid_vComplexD.h>

+
 namespace Grid {

  // NB: Template the following on "type Complex" and then implement *,+,- for 
--- a/Grid_vRealD.h
+++ b/Grid_vRealD.h
@@ -240,7 +240,7 @@ namespace Grid {
        static int Nsimd(void) { return sizeof(dvec)/sizeof(double);}
    };

-   inline vRealD innerProduct(const vRealD & l, const vRealD & r) { return conj(l)*r; }
+    inline vRealD innerProduct(const vRealD & l, const vRealD & r) { return conj(l)*r; }
    inline void zeroit(vRealD &z){ vzero(z);}

    inline vRealD outerProduct(const vRealD &l, const vRealD& r)
@@ -250,6 +250,9 @@ namespace Grid {
    inline vRealD trace(const vRealD &arg){
        return arg;
    }
+    inline vRealD real(const vRealD &arg){
+        return arg;
+    }


 }
--- a/Grid_vRealF.h
+++ b/Grid_vRealF.h
@@ -267,10 +267,12 @@ friend inline void vstore(const vRealF &ret, float *a){
    {
        return l*r;
    }
-
    inline vRealF trace(const vRealF &arg){
        return arg;
    }
+    inline vRealF real(const vRealF &arg){
+        return arg;
+    }

    
 }
--- a/58
+++ b/58
@@ -1,36 +1,60 @@
-
-AUDITS:
-* FIXME audit
-* const audit
-* Replace vset with a call to merge.; 
-* care in Gmerge,Gextract over vset .
-* extract / merge extra implementation removal      
-
-
-BUILD:
-* Test infrastructure
-* subdirs lib, tests ??
-
 FUNCTIONALITY:
 * Conditional execution, where etc...                -----DONE, simple test
 * Integer relational support                         -----DONE
 * Coordinate information, integers etc...            -----DONE
 * Integer type padding/union to vector.              -----DONE 
 * LatticeCoordinate[mu]                              -----DONE
+
+AUDITS:
+// Lattice support audit                 Tested in Grid_main.cc
+//
+//     -=,+=,*=                           Y
+//     add,+,sub,-,mult,mac,*             Y
+//     innerProduct,norm2                 Y
+//     localInnerProduct,outerProduct,    Y
+//     adj,conj                           Y
+//     transpose,                         Y
+//     trace                              Y
+//
+//     transposeIndex                     Y
+//     traceIndex                         Y
+//     peekIndex                          N; #args
+//
+//     real,imag                          missing, semantic thought needed on real/im support.
+//                                        perhaps I just keep everything complex?
+// 
+
+* FIXME audit
+* const audit
+* Replace vset with a call to merge.; 
+* care in Gmerge,Gextract over vset .
+* extract / merge extra implementation removal      
+
+BUILD:
+* Test infrastructure
+* subdirs lib, tests ??
+
+* How to do U[mu] ... lorentz part of type structure or not. more like chroma if not.
+* Passing a Grid into construct iVector<LatticeColourMatrix,4>??
+*
+
 * Stencil operator support                           -----Initial thoughts, trial implementation DONE.
                                                     -----some simple tests that Stencil matches Cshift.
                                                     -----do all permute in comms phase, so that copy permute
 						     -----cases move into a buffer.
+						     -----allow transform in/out buffers spproj

 * CovariantShift support                             -----Use a class to store gauge field? (parallel transport?)

+POST-SFW call April 16:

-POST-SFW call:
+* TraceColor, TraceSpin.                             ----- DONE (traceIndex<1>,traceIndex<2>, transposeIndex<1>,transposeIndex<2>)
+                                                     ----- Implement mapping between traceColour and traceSpin and traceIndex<1/2>.
+
+* expose traceIndex, peekIndex, transposeIndex etc at the Lattice Level -- DONE

-* TraceColor, TraceSpin.
 * How to define simple matrix operations, such as flavour matrices?

-
 * Subset support, slice sums etc...                  -----Only need slice sum?
                                                     -----Generic cartesian subslicing?
                                                     -----Array ranges / boost extents?
@@ -38,10 +62,10 @@ POST-SFW call:
                                                     -----Suggests generalised cartesian subblocking
                                                          sums, returning modified grid?
 					             -----What should interface be?
+
 i)  Two classes of subset;   red black parity subsetting (pick checkerboard).
                             cartesian sub-block subsetting

-
 ii) Need to be able to project one Grid to another Grid.

 Interface: (?)
--- a/test_Grid_stencil.cc
+++ b/test_Grid_stencil.cc
@@ -107,7 +107,7 @@ int main (int argc, char ** argv)

 	Real nrmC = norm2(Check);
 	Real nrmB = norm2(Bar);
-	Real nrm = norm2(Check-Bar);
+	Real nrm  = norm2(Check-Bar);
 	printf("N2diff = %le (%le, %le) \n",nrm,nrmC,nrmB);fflush(stdout);

 	Real snrmC =0;
@@ -127,18 +127,18 @@ int main (int argc, char ** argv)

 	  for(int r=0;r<3;r++){
 	  for(int c=0;c<3;c++){
-            diff =check._internal._internal[r][c]-bar._internal._internal[r][c];
+            diff =check()()(r,c)-bar()()(r,c);
            double nn=real(conj(diff)*diff);
            if ( nn > 0){
 	      printf("Coor (%d %d %d %d) \t rc %d%d \t %le %le %le\n",
 		     coor[0],coor[1],coor[2],coor[3],r,c,
 		     nn,
-		     real(check._internal._internal[r][c]),
-		     real(bar._internal._internal[r][c])
+		     real(check()()(r,c)),
+		     real(bar()()(r,c))
 		     );
 	    }
-	    snrmC=snrmC+real(conj(check._internal._internal[r][c])*check._internal._internal[r][c]);
-	    snrmB=snrmB+real(conj(bar._internal._internal[r][c])*bar._internal._internal[r][c]);
+	    snrmC=snrmC+real(conj(check()()(r,c))*check()()(r,c));
+	    snrmB=snrmB+real(conj(bar()()(r,c))*bar()()(r,c));
 	    snrm=snrm+nn;
 	  }}