mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-11-04 05:54:32 +00:00 
			
		
		
		
	Shrinking and organising the files
This commit is contained in:
		@@ -1,5 +1,5 @@
 | 
			
		||||
//
 | 
			
		||||
//  Grid.cpp
 | 
			
		||||
//  Grid.h
 | 
			
		||||
//  simd
 | 
			
		||||
//
 | 
			
		||||
//  Created by Peter Boyle on 09/05/2014.
 | 
			
		||||
 
 | 
			
		||||
@@ -139,30 +139,6 @@ namespace QCD {
 | 
			
		||||
      return peekIndex<LorentzIndex>(rhs,i,j);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // FIXME this is rather generic and should find a way to place it earlier.
 | 
			
		||||
     inline void LatticeCoordinate(LatticeInteger &l,int mu){
 | 
			
		||||
      GridBase *grid = l._grid;
 | 
			
		||||
      int Nsimd = grid->iSites();
 | 
			
		||||
      std::vector<int> gcoor;
 | 
			
		||||
      std::vector<Integer> mergebuf(Nsimd);
 | 
			
		||||
      std::vector<Integer *> mergeptr(Nsimd);
 | 
			
		||||
      for(int o=0;o<grid->oSites();o++){
 | 
			
		||||
	for(int i=0;i<grid->iSites();i++){
 | 
			
		||||
	  grid->RankIndexToGlobalCoor(grid->ThisRank(),o,i,gcoor);
 | 
			
		||||
	  //	  grid->RankIndexToGlobalCoor(0,o,i,gcoor);
 | 
			
		||||
	  mergebuf[i]=gcoor[mu];
 | 
			
		||||
	  mergeptr[i]=&mergebuf[i];
 | 
			
		||||
	}
 | 
			
		||||
	merge(l._odata[o],mergeptr);
 | 
			
		||||
      }
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
#include <Grid_predicated.h>
 | 
			
		||||
 | 
			
		||||
#if 0
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
}   //namespace QCD
 | 
			
		||||
} // Grid
 | 
			
		||||
#endif
 | 
			
		||||
 
 | 
			
		||||
@@ -1,10 +1,11 @@
 | 
			
		||||
#ifndef _GRID_CSHIFT_COMMON_H_
 | 
			
		||||
#define _GRID_CSHIFT_COMMON_H_
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
//////////////////////////////////////////////////////
 | 
			
		||||
// Gather for when there is no need to SIMD split
 | 
			
		||||
//////////////////////////////////////////////////////
 | 
			
		||||
friend void Gather_plane_simple (Lattice<vobj> &rhs,std::vector<vobj,alignedAllocator<vobj> > &buffer,             int dimension,int plane,int cbmask)
 | 
			
		||||
template<class vobj> void Gather_plane_simple (Lattice<vobj> &rhs,std::vector<vobj,alignedAllocator<vobj> > &buffer,             int dimension,int plane,int cbmask)
 | 
			
		||||
{
 | 
			
		||||
  int rd = rhs._grid->_rdimensions[dimension];
 | 
			
		||||
 | 
			
		||||
@@ -48,7 +49,7 @@ friend void Gather_plane_simple (Lattice<vobj> &rhs,std::vector<vobj,alignedAllo
 | 
			
		||||
//////////////////////////////////////////////////////
 | 
			
		||||
// Gather for when there *is* need to SIMD split
 | 
			
		||||
//////////////////////////////////////////////////////
 | 
			
		||||
friend void Gather_plane_extract(Lattice<vobj> &rhs,std::vector<scalar_type *> pointers,int dimension,int plane,int cbmask)
 | 
			
		||||
 template<class vobj,class scalar_type> void Gather_plane_extract(Lattice<vobj> &rhs,std::vector<scalar_type *> pointers,int dimension,int plane,int cbmask)
 | 
			
		||||
{
 | 
			
		||||
  int rd = rhs._grid->_rdimensions[dimension];
 | 
			
		||||
 | 
			
		||||
@@ -91,7 +92,7 @@ friend void Gather_plane_extract(Lattice<vobj> &rhs,std::vector<scalar_type *> p
 | 
			
		||||
//////////////////////////////////////////////////////
 | 
			
		||||
// Scatter for when there is no need to SIMD split
 | 
			
		||||
//////////////////////////////////////////////////////
 | 
			
		||||
friend void Scatter_plane_simple (Lattice<vobj> &rhs,std::vector<vobj,alignedAllocator<vobj> > &buffer,             int dimension,int plane,int cbmask)
 | 
			
		||||
template<class vobj> void Scatter_plane_simple (Lattice<vobj> &rhs,std::vector<vobj,alignedAllocator<vobj> > &buffer,             int dimension,int plane,int cbmask)
 | 
			
		||||
{
 | 
			
		||||
  int rd = rhs._grid->_rdimensions[dimension];
 | 
			
		||||
 | 
			
		||||
@@ -134,7 +135,7 @@ friend void Scatter_plane_simple (Lattice<vobj> &rhs,std::vector<vobj,alignedAll
 | 
			
		||||
//////////////////////////////////////////////////////
 | 
			
		||||
// Scatter for when there *is* need to SIMD split
 | 
			
		||||
//////////////////////////////////////////////////////
 | 
			
		||||
friend void Scatter_plane_merge(Lattice<vobj> &rhs,std::vector<scalar_type *> pointers,int dimension,int plane,int cbmask)
 | 
			
		||||
 template<class vobj,class scalar_type> void Scatter_plane_merge(Lattice<vobj> &rhs,std::vector<scalar_type *> pointers,int dimension,int plane,int cbmask)
 | 
			
		||||
{
 | 
			
		||||
  int rd = rhs._grid->_rdimensions[dimension];
 | 
			
		||||
 | 
			
		||||
@@ -177,7 +178,7 @@ friend void Scatter_plane_merge(Lattice<vobj> &rhs,std::vector<scalar_type *> po
 | 
			
		||||
//////////////////////////////////////////////////////
 | 
			
		||||
// local to node block strided copies
 | 
			
		||||
//////////////////////////////////////////////////////
 | 
			
		||||
friend void Copy_plane(Lattice<vobj>& lhs,Lattice<vobj> &rhs, int dimension,int lplane,int rplane,int cbmask)
 | 
			
		||||
template<class vobj> void Copy_plane(Lattice<vobj>& lhs,Lattice<vobj> &rhs, int dimension,int lplane,int rplane,int cbmask)
 | 
			
		||||
{
 | 
			
		||||
  int rd = rhs._grid->_rdimensions[dimension];
 | 
			
		||||
 | 
			
		||||
@@ -219,7 +220,7 @@ friend void Copy_plane(Lattice<vobj>& lhs,Lattice<vobj> &rhs, int dimension,int
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
friend void Copy_plane_permute(Lattice<vobj>& lhs,Lattice<vobj> &rhs, int dimension,int lplane,int rplane,int cbmask,int permute_type)
 | 
			
		||||
template<class vobj> void Copy_plane_permute(Lattice<vobj>& lhs,Lattice<vobj> &rhs, int dimension,int lplane,int rplane,int cbmask,int permute_type)
 | 
			
		||||
{
 | 
			
		||||
  int rd = rhs._grid->_rdimensions[dimension];
 | 
			
		||||
 | 
			
		||||
@@ -265,7 +266,7 @@ friend void Copy_plane_permute(Lattice<vobj>& lhs,Lattice<vobj> &rhs, int dimens
 | 
			
		||||
//////////////////////////////////////////////////////
 | 
			
		||||
// Local to node Cshift
 | 
			
		||||
//////////////////////////////////////////////////////
 | 
			
		||||
friend void Cshift_local(Lattice<vobj>& ret,Lattice<vobj> &rhs,int dimension,int shift)
 | 
			
		||||
template<class vobj> void Cshift_local(Lattice<vobj>& ret,Lattice<vobj> &rhs,int dimension,int shift)
 | 
			
		||||
{
 | 
			
		||||
  int sshift[2];
 | 
			
		||||
 | 
			
		||||
@@ -280,7 +281,7 @@ friend void Cshift_local(Lattice<vobj>& ret,Lattice<vobj> &rhs,int dimension,int
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
friend Lattice<vobj> Cshift_local(Lattice<vobj> &ret,Lattice<vobj> &rhs,int dimension,int shift,int cbmask)
 | 
			
		||||
template<class vobj> Lattice<vobj> Cshift_local(Lattice<vobj> &ret,Lattice<vobj> &rhs,int dimension,int shift,int cbmask)
 | 
			
		||||
{
 | 
			
		||||
  GridBase *grid = rhs._grid;
 | 
			
		||||
  int fd = grid->_fdimensions[dimension];
 | 
			
		||||
@@ -322,5 +323,5 @@ friend Lattice<vobj> Cshift_local(Lattice<vobj> &ret,Lattice<vobj> &rhs,int dime
 | 
			
		||||
  }
 | 
			
		||||
  return ret;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
#endif
 | 
			
		||||
 
 | 
			
		||||
@@ -6,7 +6,9 @@
 | 
			
		||||
#define MIN(x,y) ((x)>(y)?(y):(x))
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
friend Lattice<vobj> Cshift(Lattice<vobj> &rhs,int dimension,int shift)
 | 
			
		||||
namespace Grid { 
 | 
			
		||||
 | 
			
		||||
template<class vobj> Lattice<vobj> Cshift(Lattice<vobj> &rhs,int dimension,int shift)
 | 
			
		||||
{
 | 
			
		||||
  typedef typename vobj::vector_type vector_type;
 | 
			
		||||
  typedef typename vobj::scalar_type scalar_type;
 | 
			
		||||
@@ -37,7 +39,7 @@ friend Lattice<vobj> Cshift(Lattice<vobj> &rhs,int dimension,int shift)
 | 
			
		||||
  return ret;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
friend void Cshift_comms(Lattice<vobj>& ret,Lattice<vobj> &rhs,int dimension,int shift)
 | 
			
		||||
template<class vobj> void Cshift_comms(Lattice<vobj>& ret,Lattice<vobj> &rhs,int dimension,int shift)
 | 
			
		||||
{
 | 
			
		||||
  int sshift[2];
 | 
			
		||||
 | 
			
		||||
@@ -52,7 +54,7 @@ friend void Cshift_comms(Lattice<vobj>& ret,Lattice<vobj> &rhs,int dimension,int
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
friend void Cshift_comms_simd(Lattice<vobj>& ret,Lattice<vobj> &rhs,int dimension,int shift)
 | 
			
		||||
template<class vobj> void Cshift_comms_simd(Lattice<vobj>& ret,Lattice<vobj> &rhs,int dimension,int shift)
 | 
			
		||||
{
 | 
			
		||||
  int sshift[2];
 | 
			
		||||
 | 
			
		||||
@@ -67,8 +69,7 @@ friend void Cshift_comms_simd(Lattice<vobj>& ret,Lattice<vobj> &rhs,int dimensio
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
friend void Cshift_comms(Lattice<vobj> &ret,Lattice<vobj> &rhs,int dimension,int shift,int cbmask)
 | 
			
		||||
template<class vobj> void Cshift_comms(Lattice<vobj> &ret,Lattice<vobj> &rhs,int dimension,int shift,int cbmask)
 | 
			
		||||
{
 | 
			
		||||
  typedef typename vobj::vector_type vector_type;
 | 
			
		||||
  typedef typename vobj::scalar_type scalar_type;
 | 
			
		||||
@@ -127,8 +128,7 @@ friend void Cshift_comms(Lattice<vobj> &ret,Lattice<vobj> &rhs,int dimension,int
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
friend void  Cshift_comms_simd(Lattice<vobj> &ret,Lattice<vobj> &rhs,int dimension,int shift,int cbmask)
 | 
			
		||||
template<class vobj> void  Cshift_comms_simd(Lattice<vobj> &ret,Lattice<vobj> &rhs,int dimension,int shift,int cbmask)
 | 
			
		||||
{
 | 
			
		||||
  GridBase *grid=rhs._grid;
 | 
			
		||||
  const int Nsimd = grid->Nsimd();
 | 
			
		||||
@@ -260,4 +260,5 @@ friend void  Cshift_comms_simd(Lattice<vobj> &ret,Lattice<vobj> &rhs,int dimensi
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
}
 | 
			
		||||
#endif
 | 
			
		||||
 
 | 
			
		||||
@@ -1,12 +1,12 @@
 | 
			
		||||
#ifndef _GRID_CSHIFT_NONE_H_
 | 
			
		||||
#define _GRID_CSHIFT_NONE_H_
 | 
			
		||||
 | 
			
		||||
friend Lattice<vobj> Cshift(Lattice<vobj> &rhs,int dimension,int shift)
 | 
			
		||||
namespace Grid {
 | 
			
		||||
template<class vobj> Lattice<vobj> Cshift(Lattice<vobj> &rhs,int dimension,int shift)
 | 
			
		||||
{
 | 
			
		||||
  Lattice<vobj> ret(rhs._grid);
 | 
			
		||||
  ret.checkerboard = rhs._grid->CheckerBoardDestination(rhs.checkerboard,shift);
 | 
			
		||||
  Cshift_local(ret,rhs,dimension,shift);
 | 
			
		||||
  return ret;
 | 
			
		||||
}
 | 
			
		||||
        
 | 
			
		||||
}
 | 
			
		||||
#endif
 | 
			
		||||
 
 | 
			
		||||
@@ -3,10 +3,10 @@
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
 | 
			
		||||
// TODO: Indexing ()
 | 
			
		||||
// TODO: 
 | 
			
		||||
//       mac,real,imag
 | 
			
		||||
//
 | 
			
		||||
// Functionality required:
 | 
			
		||||
 | 
			
		||||
// Functionality:
 | 
			
		||||
//     -=,+=,*=,()
 | 
			
		||||
//     add,+,sub,-,mult,mac,*
 | 
			
		||||
//     adj,conj
 | 
			
		||||
@@ -17,7 +17,6 @@ namespace Grid {
 | 
			
		||||
//     innerProduct,outerProduct,
 | 
			
		||||
//     localNorm2
 | 
			
		||||
//     localInnerProduct
 | 
			
		||||
//     
 | 
			
		||||
 | 
			
		||||
extern int GridCshiftPermuteMap[4][16];
 | 
			
		||||
 | 
			
		||||
@@ -33,51 +32,39 @@ public:
 | 
			
		||||
    typedef typename vobj::scalar_type scalar_type;
 | 
			
		||||
    typedef typename vobj::vector_type vector_type;
 | 
			
		||||
 | 
			
		||||
    //////////////////////////////////////////////////////////////////
 | 
			
		||||
    // Constructor requires "grid" passed.
 | 
			
		||||
    // what about a default grid?
 | 
			
		||||
    //////////////////////////////////////////////////////////////////
 | 
			
		||||
    Lattice(GridBase *grid) : _grid(grid) {
 | 
			
		||||
        _odata.reserve(_grid->oSites());
 | 
			
		||||
        assert((((uint64_t)&_odata[0])&0xF) ==0);
 | 
			
		||||
        checkerboard=0;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
#include <Grid_cshift.h>
 | 
			
		||||
   
 | 
			
		||||
    template<class obj1,class obj2>
 | 
			
		||||
    friend void conformable(const Lattice<obj1> &lhs,const Lattice<obj2> &rhs);
 | 
			
		||||
    template<class sobj> inline Lattice<vobj> & operator = (const sobj & r){
 | 
			
		||||
#pragma omp parallel for
 | 
			
		||||
        for(int ss=0;ss<_grid->oSites();ss++){
 | 
			
		||||
            this->_odata[ss]=r;
 | 
			
		||||
        }
 | 
			
		||||
        return *this;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // FIXME Performance difference between operator * and mult is troubling.
 | 
			
		||||
    // Auto move constructor seems to lose surprisingly much.
 | 
			
		||||
 | 
			
		||||
    // Site wise binary operations
 | 
			
		||||
    // We eliminate a temporary object assignment if use the mult,add,sub routines.
 | 
			
		||||
    // For the operator versions we rely on move constructor to eliminate the
 | 
			
		||||
    // vector copy back.
 | 
			
		||||
    template<class obj1,class obj2,class obj3>
 | 
			
		||||
    friend void mult(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const Lattice<obj3> &rhs);
 | 
			
		||||
 | 
			
		||||
    template<class obj1,class obj2,class obj3>
 | 
			
		||||
    friend void mac(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const Lattice<obj3> &rhs);
 | 
			
		||||
 | 
			
		||||
    template<class obj1,class obj2,class obj3>
 | 
			
		||||
    friend void sub(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const Lattice<obj3> &rhs);
 | 
			
		||||
 | 
			
		||||
    template<class obj1,class obj2,class obj3>
 | 
			
		||||
    friend void add(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const Lattice<obj3> &rhs);
 | 
			
		||||
    // *=,+=,-= operators inherit behvour from correspond */+/- operation
 | 
			
		||||
    template<class T> inline Lattice<vobj> &operator *=(const T &r) {
 | 
			
		||||
        *this = (*this)*r;
 | 
			
		||||
        return *this;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    template<class T> inline Lattice<vobj> &operator -=(const T &r) {
 | 
			
		||||
        *this = (*this)-r;
 | 
			
		||||
        return *this;
 | 
			
		||||
    }
 | 
			
		||||
    template<class T> inline Lattice<vobj> &operator +=(const T &r) {
 | 
			
		||||
        *this = (*this)+r;
 | 
			
		||||
        return *this;
 | 
			
		||||
    }
 | 
			
		||||
    
 | 
			
		||||
    friend void axpy(Lattice<vobj> &ret,double a,const Lattice<vobj> &lhs,const Lattice<vobj> &rhs){
 | 
			
		||||
        conformable(lhs,rhs);
 | 
			
		||||
#pragma omp parallel for
 | 
			
		||||
        for(int ss=0;ss<lhs._grid->oSites();ss++){
 | 
			
		||||
            axpy(&ret._odata[ss],a,&lhs._odata[ss],&rhs._odata[ss]);
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
    friend void axpy(Lattice<vobj> &ret,std::complex<double> a,const Lattice<vobj> &lhs,const Lattice<vobj> &rhs){
 | 
			
		||||
        conformable(lhs,rhs);
 | 
			
		||||
#pragma omp parallel for
 | 
			
		||||
        for(int ss=0;ss<lhs._grid->oSites();ss++){
 | 
			
		||||
            axpy(&ret._odata[ss],a,&lhs._odata[ss],&rhs._odata[ss]);
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
    inline friend Lattice<vobj> operator / (const Lattice<vobj> &lhs,const Lattice<vobj> &rhs){
 | 
			
		||||
        conformable(lhs,rhs);
 | 
			
		||||
        Lattice<vobj> ret(lhs._grid);
 | 
			
		||||
@@ -87,550 +74,23 @@ public:
 | 
			
		||||
        }
 | 
			
		||||
        return ret;
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    template<class sobj>
 | 
			
		||||
    inline Lattice<vobj> & operator = (const sobj & r){
 | 
			
		||||
#pragma omp parallel for
 | 
			
		||||
        for(int ss=0;ss<_grid->oSites();ss++){
 | 
			
		||||
            this->_odata[ss]=r;
 | 
			
		||||
        }
 | 
			
		||||
        return *this;
 | 
			
		||||
    }
 | 
			
		||||
    
 | 
			
		||||
    // Poke a scalar object into the SIMD array
 | 
			
		||||
    template<class sobj>
 | 
			
		||||
    friend void pokeSite(const sobj &s,Lattice<vobj> &l,std::vector<int> &site){
 | 
			
		||||
 | 
			
		||||
      GridBase *grid=l._grid;
 | 
			
		||||
 | 
			
		||||
      typedef typename vobj::scalar_type scalar_type;
 | 
			
		||||
      typedef typename vobj::vector_type vector_type;
 | 
			
		||||
 | 
			
		||||
      int Nsimd = grid->Nsimd();
 | 
			
		||||
 | 
			
		||||
      assert( l.checkerboard== l._grid->CheckerBoard(site));
 | 
			
		||||
      assert( sizeof(sobj)*Nsimd == sizeof(vobj));
 | 
			
		||||
 | 
			
		||||
      int rank,odx,idx;
 | 
			
		||||
      grid->GlobalCoorToRankIndex(rank,odx,idx,site);
 | 
			
		||||
 | 
			
		||||
      // Optional to broadcast from node 0.
 | 
			
		||||
      grid->Broadcast(0,s);
 | 
			
		||||
 | 
			
		||||
      std::vector<sobj> buf(Nsimd);
 | 
			
		||||
      std::vector<scalar_type *> pointers(Nsimd);  
 | 
			
		||||
      for(int i=0;i<Nsimd;i++) pointers[i] = (scalar_type *)&buf[i];
 | 
			
		||||
 | 
			
		||||
      // extract-modify-merge cycle is easiest way and this is not perf critical
 | 
			
		||||
      extract(l._odata[odx],pointers);
 | 
			
		||||
      
 | 
			
		||||
      buf[idx] = s;
 | 
			
		||||
 | 
			
		||||
      for(int i=0;i<Nsimd;i++) pointers[i] = (scalar_type *)&buf[i];
 | 
			
		||||
      merge(l._odata[odx],pointers);
 | 
			
		||||
 | 
			
		||||
      return;
 | 
			
		||||
    };
 | 
			
		||||
    
 | 
			
		||||
    // Peek a scalar object from the SIMD array
 | 
			
		||||
    template<class sobj>
 | 
			
		||||
    friend void peekSite(sobj &s,Lattice<vobj> &l,std::vector<int> &site){
 | 
			
		||||
        
 | 
			
		||||
      GridBase *grid=l._grid;
 | 
			
		||||
 | 
			
		||||
      typedef typename vobj::scalar_type scalar_type;
 | 
			
		||||
      typedef typename vobj::vector_type vector_type;
 | 
			
		||||
 | 
			
		||||
      int Nsimd = grid->Nsimd();
 | 
			
		||||
 | 
			
		||||
      assert( l.checkerboard== l._grid->CheckerBoard(site));
 | 
			
		||||
      assert( sizeof(sobj)*Nsimd == sizeof(vobj));
 | 
			
		||||
 | 
			
		||||
      int rank,odx,idx;
 | 
			
		||||
      grid->GlobalCoorToRankIndex(rank,odx,idx,site);
 | 
			
		||||
      std::vector<sobj> buf(Nsimd);
 | 
			
		||||
      std::vector<scalar_type *> pointers(Nsimd);  
 | 
			
		||||
      for(int i=0;i<Nsimd;i++) pointers[i] = (scalar_type *)&buf[i];
 | 
			
		||||
 | 
			
		||||
      extract(l._odata[odx],pointers);
 | 
			
		||||
      
 | 
			
		||||
      s = buf[idx];
 | 
			
		||||
      grid->Broadcast(rank,s);
 | 
			
		||||
 | 
			
		||||
      return;
 | 
			
		||||
    };
 | 
			
		||||
    
 | 
			
		||||
    // FIXME Randomise; deprecate this
 | 
			
		||||
    friend void random(Lattice<vobj> &l){
 | 
			
		||||
        Real *v_ptr = (Real *)&l._odata[0];
 | 
			
		||||
        size_t v_len = l._grid->oSites()*sizeof(vobj);
 | 
			
		||||
        size_t d_len = v_len/sizeof(Real);
 | 
			
		||||
	
 | 
			
		||||
        for(int i=0;i<d_len;i++){
 | 
			
		||||
 | 
			
		||||
            v_ptr[i]=drand48();
 | 
			
		||||
        }
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    // FIXME for debug; deprecate this; made obscelete by 
 | 
			
		||||
    // LatticeCoordinate();
 | 
			
		||||
    friend void lex_sites(Lattice<vobj> &l){
 | 
			
		||||
      Real *v_ptr = (Real *)&l._odata[0];
 | 
			
		||||
      size_t o_len = l._grid->oSites();
 | 
			
		||||
      size_t v_len = sizeof(vobj)/sizeof(vRealF);
 | 
			
		||||
      size_t vec_len = vRealF::Nsimd();
 | 
			
		||||
 | 
			
		||||
      for(int i=0;i<o_len;i++){
 | 
			
		||||
	for(int j=0;j<v_len;j++){
 | 
			
		||||
          for(int vv=0;vv<vec_len;vv+=2){
 | 
			
		||||
	    v_ptr[i*v_len*vec_len+j*vec_len+vv  ]= i+vv*500;
 | 
			
		||||
	    v_ptr[i*v_len*vec_len+j*vec_len+vv+1]= i+vv*500;
 | 
			
		||||
	  }
 | 
			
		||||
	}}
 | 
			
		||||
    }
 | 
			
		||||
    
 | 
			
		||||
    // FIXME Implement a consistent seed management strategy
 | 
			
		||||
    friend void gaussian(Lattice<vobj> &l){
 | 
			
		||||
        // Zero mean, unit variance.
 | 
			
		||||
        std::normal_distribution<double> distribution(0.0,1.0);
 | 
			
		||||
        Real *v_ptr = (Real *)&l._odata[0];
 | 
			
		||||
        size_t v_len = l._grid->oSites()*sizeof(vobj);
 | 
			
		||||
        size_t d_len = v_len/sizeof(Real);
 | 
			
		||||
 | 
			
		||||
        for(int i=0;i<d_len;i++){
 | 
			
		||||
	  v_ptr[i]= drand48();
 | 
			
		||||
        }
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    // Unary functions and Unops
 | 
			
		||||
    friend inline Lattice<vobj> operator -(const Lattice<vobj> &r) {
 | 
			
		||||
        Lattice<vobj> ret(r._grid);
 | 
			
		||||
#pragma omp parallel for
 | 
			
		||||
        for(int ss=0;ss<r._grid->oSites();ss++){
 | 
			
		||||
            ret._odata[ss]= -r._odata[ss];
 | 
			
		||||
        }
 | 
			
		||||
        return ret;
 | 
			
		||||
    }
 | 
			
		||||
    // *=,+=,-= operators inherit behvour from correspond */+/- operation
 | 
			
		||||
    template<class T>
 | 
			
		||||
    inline Lattice<vobj> &operator *=(const T &r) {
 | 
			
		||||
        *this = (*this)*r;
 | 
			
		||||
        return *this;
 | 
			
		||||
    }
 | 
			
		||||
    template<class T>
 | 
			
		||||
    inline Lattice<vobj> &operator -=(const T &r) {
 | 
			
		||||
        *this = (*this)-r;
 | 
			
		||||
        return *this;
 | 
			
		||||
    }
 | 
			
		||||
    template<class T>
 | 
			
		||||
    inline Lattice<vobj> &operator +=(const T &r) {
 | 
			
		||||
        *this = (*this)+r;
 | 
			
		||||
        return *this;
 | 
			
		||||
    }
 | 
			
		||||
    
 | 
			
		||||
    inline friend Lattice<vobj> adj(const Lattice<vobj> &lhs){
 | 
			
		||||
        Lattice<vobj> ret(lhs._grid);
 | 
			
		||||
#pragma omp parallel for
 | 
			
		||||
        for(int ss=0;ss<lhs._grid->oSites();ss++){
 | 
			
		||||
            ret._odata[ss] = adj(lhs._odata[ss]);
 | 
			
		||||
        }
 | 
			
		||||
        return ret;
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    inline friend Lattice<vobj> transpose(const Lattice<vobj> &lhs){
 | 
			
		||||
        Lattice<vobj> ret(lhs._grid);
 | 
			
		||||
#pragma omp parallel for
 | 
			
		||||
        for(int ss=0;ss<lhs._grid->oSites();ss++){
 | 
			
		||||
            ret._odata[ss] = transpose(lhs._odata[ss]);
 | 
			
		||||
        }
 | 
			
		||||
        return ret;
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    inline friend Lattice<vobj> conj(const Lattice<vobj> &lhs){
 | 
			
		||||
        Lattice<vobj> ret(lhs._grid);
 | 
			
		||||
#pragma omp parallel for
 | 
			
		||||
        for(int ss=0;ss<lhs._grid->oSites();ss++){
 | 
			
		||||
            ret._odata[ss] = conj(lhs._odata[ss]);
 | 
			
		||||
        }
 | 
			
		||||
        return ret;
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    // remove and insert a half checkerboard
 | 
			
		||||
    friend void pickCheckerboard(int cb,Lattice<vobj> &half,const Lattice<vobj> &full){
 | 
			
		||||
      half.checkerboard = cb;
 | 
			
		||||
      int ssh=0;
 | 
			
		||||
#pragma omp parallel for
 | 
			
		||||
      for(int ss=0;ss<full._grid->oSites();ss++){
 | 
			
		||||
	std::vector<int> coor;
 | 
			
		||||
	int cbos;
 | 
			
		||||
	
 | 
			
		||||
	full._grid->oCoorFromOindex(coor,ss);
 | 
			
		||||
	cbos=half._grid->CheckerBoard(coor);
 | 
			
		||||
	
 | 
			
		||||
	if (cbos==cb) {
 | 
			
		||||
 | 
			
		||||
	  half._odata[ssh] = full._odata[ss];
 | 
			
		||||
	  ssh++;
 | 
			
		||||
	}
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
    friend void setCheckerboard(Lattice<vobj> &full,const Lattice<vobj> &half){
 | 
			
		||||
      int cb = half.checkerboard;
 | 
			
		||||
      int ssh=0;
 | 
			
		||||
#pragma omp parallel for
 | 
			
		||||
      for(int ss=0;ss<full._grid->oSites();ss++){
 | 
			
		||||
	std::vector<int> coor;
 | 
			
		||||
	int cbos;
 | 
			
		||||
	
 | 
			
		||||
	full._grid->oCoorFromOindex(coor,ss);
 | 
			
		||||
	cbos=half._grid->CheckerBoard(coor);
 | 
			
		||||
 | 
			
		||||
	if (cbos==cb) {
 | 
			
		||||
	  full._odata[ss]=half._odata[ssh];
 | 
			
		||||
	  ssh++;
 | 
			
		||||
	}
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
}; // class Lattice
 | 
			
		||||
 | 
			
		||||
    template<class obj1,class obj2>
 | 
			
		||||
    void conformable(const Lattice<obj1> &lhs,const Lattice<obj2> &rhs)
 | 
			
		||||
    {
 | 
			
		||||
        assert(lhs._grid == rhs._grid);
 | 
			
		||||
        assert(lhs.checkerboard == rhs.checkerboard);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    template<class obj1,class obj2,class obj3>
 | 
			
		||||
    void mult(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const Lattice<obj3> &rhs){
 | 
			
		||||
        conformable(lhs,rhs);
 | 
			
		||||
	uint32_t vec_len = lhs._grid->oSites();
 | 
			
		||||
#pragma omp parallel for
 | 
			
		||||
        for(int ss=0;ss<vec_len;ss++){
 | 
			
		||||
	  mult(&ret._odata[ss],&lhs._odata[ss],&rhs._odata[ss]);
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    template<class obj1,class obj2,class obj3>
 | 
			
		||||
    void mac(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const Lattice<obj3> &rhs){
 | 
			
		||||
        conformable(lhs,rhs);
 | 
			
		||||
	uint32_t vec_len = lhs._grid->oSites();
 | 
			
		||||
#pragma omp parallel for
 | 
			
		||||
        for(int ss=0;ss<vec_len;ss++){
 | 
			
		||||
	  mac(&ret._odata[ss],&lhs._odata[ss],&rhs._odata[ss]);
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    template<class obj1,class obj2,class obj3>
 | 
			
		||||
    void sub(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const Lattice<obj3> &rhs){
 | 
			
		||||
        conformable(lhs,rhs);
 | 
			
		||||
#pragma omp parallel for
 | 
			
		||||
        for(int ss=0;ss<lhs._grid->oSites();ss++){
 | 
			
		||||
            sub(&ret._odata[ss],&lhs._odata[ss],&rhs._odata[ss]);
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
    template<class obj1,class obj2,class obj3>
 | 
			
		||||
    void add(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const Lattice<obj3> &rhs){
 | 
			
		||||
        conformable(lhs,rhs);
 | 
			
		||||
#pragma omp parallel for
 | 
			
		||||
        for(int ss=0;ss<lhs._grid->oSites();ss++){
 | 
			
		||||
            add(&ret._odata[ss],&lhs._odata[ss],&rhs._odata[ss]);
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // Lattice BinOp Lattice,
 | 
			
		||||
    template<class left,class right>
 | 
			
		||||
    inline auto operator * (const Lattice<left> &lhs,const Lattice<right> &rhs)-> Lattice<decltype(lhs._odata[0]*rhs._odata[0])>
 | 
			
		||||
    {
 | 
			
		||||
        //NB mult performs conformable check. Do not reapply here for performance.
 | 
			
		||||
      Lattice<decltype(lhs._odata[0]*rhs._odata[0])> ret(rhs._grid);
 | 
			
		||||
      mult(ret,lhs,rhs);
 | 
			
		||||
      return ret;
 | 
			
		||||
    }
 | 
			
		||||
    template<class left,class right>
 | 
			
		||||
    inline auto operator + (const Lattice<left> &lhs,const Lattice<right> &rhs)-> Lattice<decltype(lhs._odata[0]*rhs._odata[0])>
 | 
			
		||||
    {
 | 
			
		||||
        //NB mult performs conformable check. Do not reapply here for performance.
 | 
			
		||||
        Lattice<decltype(lhs._odata[0]*rhs._odata[0])> ret(rhs._grid);
 | 
			
		||||
        add(ret,lhs,rhs);
 | 
			
		||||
        return ret;
 | 
			
		||||
    }
 | 
			
		||||
    template<class left,class right>
 | 
			
		||||
    inline auto operator - (const Lattice<left> &lhs,const Lattice<right> &rhs)-> Lattice<decltype(lhs._odata[0]*rhs._odata[0])>
 | 
			
		||||
    {
 | 
			
		||||
        //NB mult performs conformable check. Do not reapply here for performance.
 | 
			
		||||
        Lattice<decltype(lhs._odata[0]*rhs._odata[0])> ret(rhs._grid);
 | 
			
		||||
        sub(ret,lhs,rhs);
 | 
			
		||||
        return ret;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // Scalar BinOp Lattice ;generate return type
 | 
			
		||||
    template<class left,class right>
 | 
			
		||||
    inline auto operator * (const left &lhs,const Lattice<right> &rhs) -> Lattice<decltype(lhs*rhs._odata[0])>
 | 
			
		||||
    {
 | 
			
		||||
        Lattice<decltype(lhs*rhs._odata[0])> ret(rhs._grid);
 | 
			
		||||
#pragma omp parallel for
 | 
			
		||||
        for(int ss=0;ss<rhs._grid->oSites(); ss++){
 | 
			
		||||
            ret._odata[ss]=lhs*rhs._odata[ss];
 | 
			
		||||
        }
 | 
			
		||||
        return ret;
 | 
			
		||||
    }
 | 
			
		||||
    template<class left,class right>
 | 
			
		||||
    inline auto operator + (const left &lhs,const Lattice<right> &rhs) -> Lattice<decltype(lhs*rhs._odata[0])>
 | 
			
		||||
    {
 | 
			
		||||
        Lattice<decltype(lhs*rhs._odata[0])> ret(rhs._grid);
 | 
			
		||||
#pragma omp parallel for
 | 
			
		||||
        for(int ss=0;ss<rhs._grid->oSites(); ss++){
 | 
			
		||||
            ret._odata[ss]=lhs+rhs._odata[ss];
 | 
			
		||||
        }
 | 
			
		||||
        return ret;
 | 
			
		||||
    }
 | 
			
		||||
    template<class left,class right>
 | 
			
		||||
    inline auto operator - (const left &lhs,const Lattice<right> &rhs) -> Lattice<decltype(lhs*rhs._odata[0])>
 | 
			
		||||
    {
 | 
			
		||||
        Lattice<decltype(lhs*rhs._odata[0])> ret(rhs._grid);
 | 
			
		||||
#pragma omp parallel for
 | 
			
		||||
        for(int ss=0;ss<rhs._grid->oSites(); ss++){
 | 
			
		||||
            ret._odata[ss]=lhs-rhs._odata[ss];
 | 
			
		||||
        }
 | 
			
		||||
        return ret;
 | 
			
		||||
    }
 | 
			
		||||
    template<class left,class right>
 | 
			
		||||
    inline auto operator * (const Lattice<left> &lhs,const right &rhs) -> Lattice<decltype(lhs._odata[0]*rhs)>
 | 
			
		||||
    {
 | 
			
		||||
        Lattice<decltype(lhs._odata[0]*rhs)> ret(lhs._grid);
 | 
			
		||||
#pragma omp parallel for
 | 
			
		||||
        for(int ss=0;ss<lhs._grid->oSites(); ss++){
 | 
			
		||||
            ret._odata[ss]=lhs._odata[ss]*rhs;
 | 
			
		||||
        }
 | 
			
		||||
        return ret;
 | 
			
		||||
    }
 | 
			
		||||
    template<class left,class right>
 | 
			
		||||
    inline auto operator + (const Lattice<left> &lhs,const right &rhs) -> Lattice<decltype(lhs._odata[0]*rhs)>
 | 
			
		||||
    {
 | 
			
		||||
        Lattice<decltype(lhs._odata[0]*rhs)> ret(lhs._grid);
 | 
			
		||||
#pragma omp parallel for
 | 
			
		||||
        for(int ss=0;ss<rhs._grid->oSites(); ss++){
 | 
			
		||||
            ret._odata[ss]=lhs._odata[ss]+rhs;
 | 
			
		||||
        }
 | 
			
		||||
        return ret;
 | 
			
		||||
    }
 | 
			
		||||
    template<class left,class right>
 | 
			
		||||
    inline auto operator - (const Lattice<left> &lhs,const right &rhs) -> Lattice<decltype(lhs._odata[0]*rhs)>
 | 
			
		||||
    {
 | 
			
		||||
        Lattice<decltype(lhs._odata[0]*rhs)> ret(lhs._grid);
 | 
			
		||||
#pragma omp parallel for
 | 
			
		||||
        for(int ss=0;ss<rhs._grid->oSites(); ss++){
 | 
			
		||||
            ret._odata[ss]=lhs._odata[ss]-rhs;
 | 
			
		||||
        }
 | 
			
		||||
        return ret;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    ////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
    // Trace
 | 
			
		||||
    ////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
    template<class vobj>
 | 
			
		||||
    inline auto trace(const Lattice<vobj> &lhs)
 | 
			
		||||
      -> Lattice<decltype(trace(lhs._odata[0]))>
 | 
			
		||||
    {
 | 
			
		||||
      Lattice<decltype(trace(lhs._odata[0]))> ret(lhs._grid);
 | 
			
		||||
#pragma omp parallel for
 | 
			
		||||
        for(int ss=0;ss<lhs._grid->oSites();ss++){
 | 
			
		||||
            ret._odata[ss] = trace(lhs._odata[ss]);
 | 
			
		||||
        }
 | 
			
		||||
        return ret;
 | 
			
		||||
    };
 | 
			
		||||
    
 | 
			
		||||
    ////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
    // Index level dependent operations
 | 
			
		||||
    ////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
    template<int Index,class vobj>
 | 
			
		||||
    inline auto traceIndex(const Lattice<vobj> &lhs)
 | 
			
		||||
      -> Lattice<decltype(traceIndex<Index>(lhs._odata[0]))>
 | 
			
		||||
    {
 | 
			
		||||
      Lattice<decltype(traceIndex<Index>(lhs._odata[0]))> ret(lhs._grid);
 | 
			
		||||
#pragma omp parallel for
 | 
			
		||||
      for(int ss=0;ss<lhs._grid->oSites();ss++){
 | 
			
		||||
	ret._odata[ss] = traceIndex<Index>(lhs._odata[ss]);
 | 
			
		||||
      }
 | 
			
		||||
      return ret;
 | 
			
		||||
    };
 | 
			
		||||
    template<int Index,class vobj>
 | 
			
		||||
    inline auto transposeIndex(const Lattice<vobj> &lhs)
 | 
			
		||||
      -> Lattice<decltype(transposeIndex<Index>(lhs._odata[0]))>
 | 
			
		||||
    {
 | 
			
		||||
      Lattice<decltype(transposeIndex<Index>(lhs._odata[0]))> ret(lhs._grid);
 | 
			
		||||
#pragma omp parallel for
 | 
			
		||||
        for(int ss=0;ss<lhs._grid->oSites();ss++){
 | 
			
		||||
            ret._odata[ss] = transposeIndex<Index>(lhs._odata[ss]);
 | 
			
		||||
        }
 | 
			
		||||
        return ret;
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    // Fixme; this is problematic since the number of args is variable and 
 | 
			
		||||
    // may mismatch...
 | 
			
		||||
    template<int Index,class vobj>
 | 
			
		||||
    inline auto peekIndex(const Lattice<vobj> &lhs)
 | 
			
		||||
      -> Lattice<decltype(peekIndex<Index>(lhs._odata[0]))>
 | 
			
		||||
    {
 | 
			
		||||
      Lattice<decltype(peekIndex<Index>(lhs._odata[0]))> ret(lhs._grid);
 | 
			
		||||
#pragma omp parallel for
 | 
			
		||||
        for(int ss=0;ss<lhs._grid->oSites();ss++){
 | 
			
		||||
            ret._odata[ss] = peekIndex<Index>(lhs._odata[ss]);
 | 
			
		||||
        }
 | 
			
		||||
        return ret;
 | 
			
		||||
    };
 | 
			
		||||
    template<int Index,class vobj>
 | 
			
		||||
      inline auto peekIndex(const Lattice<vobj> &lhs,int i)
 | 
			
		||||
      -> Lattice<decltype(peekIndex<Index>(lhs._odata[0],i))>
 | 
			
		||||
    {
 | 
			
		||||
      Lattice<decltype(peekIndex<Index>(lhs._odata[0],i))> ret(lhs._grid);
 | 
			
		||||
#pragma omp parallel for
 | 
			
		||||
        for(int ss=0;ss<lhs._grid->oSites();ss++){
 | 
			
		||||
	  ret._odata[ss] = peekIndex<Index>(lhs._odata[ss],i);
 | 
			
		||||
        }
 | 
			
		||||
        return ret;
 | 
			
		||||
    };
 | 
			
		||||
    template<int Index,class vobj>
 | 
			
		||||
      inline auto peekIndex(const Lattice<vobj> &lhs,int i,int j)
 | 
			
		||||
      -> Lattice<decltype(peekIndex<Index>(lhs._odata[0],i,j))>
 | 
			
		||||
    {
 | 
			
		||||
      Lattice<decltype(peekIndex<Index>(lhs._odata[0],i,j))> ret(lhs._grid);
 | 
			
		||||
#pragma omp parallel for
 | 
			
		||||
        for(int ss=0;ss<lhs._grid->oSites();ss++){
 | 
			
		||||
	  ret._odata[ss] = peekIndex<Index>(lhs._odata[ss],i,j);
 | 
			
		||||
        }
 | 
			
		||||
        return ret;
 | 
			
		||||
    };
 | 
			
		||||
    ////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
    // Poke internal indices of a Lattice object
 | 
			
		||||
    ////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
    template<int Index,class vobj> inline
 | 
			
		||||
    void pokeIndex(Lattice<vobj> &lhs,const Lattice<decltype(peekIndex<Index>(lhs._odata[0]))> & rhs)
 | 
			
		||||
    {
 | 
			
		||||
#pragma omp parallel for
 | 
			
		||||
        for(int ss=0;ss<lhs._grid->oSites();ss++){
 | 
			
		||||
	  pokeIndex<Index>(lhs._odata[ss],rhs._odata[ss]);
 | 
			
		||||
	}      
 | 
			
		||||
    }
 | 
			
		||||
    template<int Index,class vobj> inline
 | 
			
		||||
    void pokeIndex(Lattice<vobj> &lhs,const Lattice<decltype(peekIndex<Index>(lhs._odata[0],0))> & rhs,int i)
 | 
			
		||||
    {
 | 
			
		||||
#pragma omp parallel for
 | 
			
		||||
        for(int ss=0;ss<lhs._grid->oSites();ss++){
 | 
			
		||||
	  pokeIndex<Index>(lhs._odata[ss],rhs._odata[ss],i);
 | 
			
		||||
	}      
 | 
			
		||||
    }
 | 
			
		||||
    template<int Index,class vobj> inline
 | 
			
		||||
    void pokeIndex(Lattice<vobj> &lhs,const Lattice<decltype(peekIndex<Index>(lhs._odata[0],0,0))> & rhs,int i,int j)
 | 
			
		||||
    {
 | 
			
		||||
#pragma omp parallel for
 | 
			
		||||
        for(int ss=0;ss<lhs._grid->oSites();ss++){
 | 
			
		||||
	  pokeIndex<Index>(lhs._odata[ss],rhs._odata[ss],i,j);
 | 
			
		||||
	}      
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    ////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
    // Reduction operations
 | 
			
		||||
    ////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
    template<class vobj>
 | 
			
		||||
    inline RealD norm2(const Lattice<vobj> &arg){
 | 
			
		||||
 | 
			
		||||
      typedef typename vobj::scalar_type scalar;
 | 
			
		||||
      typedef typename vobj::vector_type vector;
 | 
			
		||||
      decltype(innerProduct(arg._odata[0],arg._odata[0])) vnrm=zero;
 | 
			
		||||
      scalar nrm;
 | 
			
		||||
      //FIXME make this loop parallelisable
 | 
			
		||||
      vnrm=zero;
 | 
			
		||||
      for(int ss=0;ss<arg._grid->oSites(); ss++){
 | 
			
		||||
	vnrm = vnrm + innerProduct(arg._odata[ss],arg._odata[ss]);
 | 
			
		||||
      }
 | 
			
		||||
      vector vvnrm =TensorRemove(vnrm) ;
 | 
			
		||||
      nrm = Reduce(vvnrm);
 | 
			
		||||
      arg._grid->GlobalSum(nrm);
 | 
			
		||||
      return real(nrm);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    template<class vobj>
 | 
			
		||||
    inline auto innerProduct(const Lattice<vobj> &left,const Lattice<vobj> &right) ->decltype(innerProduct(left._odata[0],right._odata[0]))
 | 
			
		||||
    {
 | 
			
		||||
      typedef typename vobj::scalar_type scalar;
 | 
			
		||||
      decltype(innerProduct(left._odata[0],right._odata[0])) vnrm=zero;
 | 
			
		||||
 | 
			
		||||
      scalar nrm;
 | 
			
		||||
      //FIXME make this loop parallelisable
 | 
			
		||||
      for(int ss=0;ss<left._grid->oSites(); ss++){
 | 
			
		||||
	vnrm = vnrm + innerProduct(left._odata[ss],right._odata[ss]);
 | 
			
		||||
      }
 | 
			
		||||
      nrm = Reduce(vnrm);
 | 
			
		||||
      right._grid->GlobalSum(nrm);
 | 
			
		||||
      return nrm;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /////////////////////////////////////////////////////
 | 
			
		||||
    // Non site reduced routines
 | 
			
		||||
    /////////////////////////////////////////////////////
 | 
			
		||||
 | 
			
		||||
    // localNorm2,
 | 
			
		||||
    template<class vobj>
 | 
			
		||||
    inline auto localNorm2 (const Lattice<vobj> &rhs)-> Lattice<typename vobj::tensor_reduced>
 | 
			
		||||
    {
 | 
			
		||||
      Lattice<typename vobj::tensor_reduced> ret(rhs._grid);
 | 
			
		||||
#pragma omp parallel for
 | 
			
		||||
        for(int ss=0;ss<rhs._grid->oSites(); ss++){
 | 
			
		||||
	  ret._odata[ss]=innerProduct(rhs._odata[ss],rhs._odata[ss]);
 | 
			
		||||
        }
 | 
			
		||||
        return ret;
 | 
			
		||||
    }
 | 
			
		||||
    
 | 
			
		||||
    template<class vobj>
 | 
			
		||||
    inline auto real(const Lattice<vobj> &z) -> Lattice<decltype(real(z._odata[0]))>
 | 
			
		||||
    {
 | 
			
		||||
      Lattice<decltype(real(z._odata[0]))> ret(z._grid);
 | 
			
		||||
#pragma omp parallel for
 | 
			
		||||
        for(int ss=0;ss<z._grid->oSites();ss++){
 | 
			
		||||
            ret._odata[ss] = real(z._odata[ss]);
 | 
			
		||||
        }
 | 
			
		||||
      return ret;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    template<class vobj>
 | 
			
		||||
    inline auto imag(const Lattice<vobj> &z) -> Lattice<decltype(imag(z._odata[0]))>
 | 
			
		||||
    {
 | 
			
		||||
      Lattice<decltype(imag(z._odata[0]))> ret(z._grid);
 | 
			
		||||
#pragma omp parallel for
 | 
			
		||||
        for(int ss=0;ss<z._grid->oSites();ss++){
 | 
			
		||||
            ret._odata[ss] = imag(z._odata[ss]);
 | 
			
		||||
        }
 | 
			
		||||
      return ret;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // localInnerProduct
 | 
			
		||||
    template<class vobj>
 | 
			
		||||
    inline auto localInnerProduct (const Lattice<vobj> &lhs,const Lattice<vobj> &rhs)
 | 
			
		||||
      -> Lattice<typename vobj::tensor_reduced>
 | 
			
		||||
    {
 | 
			
		||||
      Lattice<typename vobj::tensor_reduced> ret(rhs._grid);
 | 
			
		||||
#pragma omp parallel for
 | 
			
		||||
      for(int ss=0;ss<rhs._grid->oSites(); ss++){
 | 
			
		||||
	ret._odata[ss]=innerProduct(lhs._odata[ss],rhs._odata[ss]);
 | 
			
		||||
      }
 | 
			
		||||
      return ret;
 | 
			
		||||
    }
 | 
			
		||||
    
 | 
			
		||||
    // outerProduct Scalar x Scalar -> Scalar
 | 
			
		||||
    //              Vector x Vector -> Matrix
 | 
			
		||||
    template<class ll,class rr>
 | 
			
		||||
    inline auto outerProduct (const Lattice<ll> &lhs,const Lattice<rr> &rhs) -> Lattice<decltype(outerProduct(lhs._odata[0],rhs._odata[0]))>
 | 
			
		||||
    {
 | 
			
		||||
        Lattice<decltype(outerProduct(lhs._odata[0],rhs._odata[0]))> ret(rhs._grid);
 | 
			
		||||
#pragma omp parallel for
 | 
			
		||||
        for(int ss=0;ss<rhs._grid->oSites(); ss++){
 | 
			
		||||
            ret._odata[ss]=outerProduct(lhs._odata[ss],rhs._odata[ss]);
 | 
			
		||||
        }
 | 
			
		||||
        return ret;
 | 
			
		||||
     }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 }; // class Lattice
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#include <Grid_lattice_conformable.h>
 | 
			
		||||
#include <Grid_lattice_arith.h>
 | 
			
		||||
#include <Grid_lattice_trace.h>
 | 
			
		||||
#include <Grid_lattice_transpose.h>
 | 
			
		||||
#include <Grid_lattice_local.h>
 | 
			
		||||
#include <Grid_lattice_reduction.h>
 | 
			
		||||
#include <Grid_lattice_peekpoke.h>
 | 
			
		||||
#include <Grid_lattice_reality.h>
 | 
			
		||||
#include <Grid_cshift.h>
 | 
			
		||||
#include <Grid_where.h>
 | 
			
		||||
#include <Grid_lattice_coordinate.h>
 | 
			
		||||
#include <Grid_lattice_rng.h>
 | 
			
		||||
#include <Grid_lattice_transfer.h>
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										160
									
								
								lib/Grid_lattice_arith.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										160
									
								
								lib/Grid_lattice_arith.h
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,160 @@
 | 
			
		||||
#ifndef GRID_LATTICE_ARITH_H
 | 
			
		||||
#define GRID_LATTICE_ARITH_H
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
 | 
			
		||||
  template<class vobj>
 | 
			
		||||
  inline Lattice<vobj> operator -(const Lattice<vobj> &r)
 | 
			
		||||
  {
 | 
			
		||||
    Lattice<vobj> ret(r._grid);
 | 
			
		||||
#pragma omp parallel for
 | 
			
		||||
    for(int ss=0;ss<r._grid->oSites();ss++){
 | 
			
		||||
      ret._odata[ss]= -r._odata[ss];
 | 
			
		||||
    }
 | 
			
		||||
    return ret;
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  template<class vobj>
 | 
			
		||||
  inline void axpy(Lattice<vobj> &ret,double a,const Lattice<vobj> &lhs,const Lattice<vobj> &rhs){
 | 
			
		||||
    conformable(lhs,rhs);
 | 
			
		||||
#pragma omp parallel for
 | 
			
		||||
    for(int ss=0;ss<lhs._grid->oSites();ss++){
 | 
			
		||||
      axpy(&ret._odata[ss],a,&lhs._odata[ss],&rhs._odata[ss]);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  template<class vobj>
 | 
			
		||||
  inline void axpy(Lattice<vobj> &ret,std::complex<double> a,const Lattice<vobj> &lhs,const Lattice<vobj> &rhs){
 | 
			
		||||
    conformable(lhs,rhs);
 | 
			
		||||
#pragma omp parallel for
 | 
			
		||||
    for(int ss=0;ss<lhs._grid->oSites();ss++){
 | 
			
		||||
      axpy(&ret._odata[ss],a,&lhs._odata[ss],&rhs._odata[ss]);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  
 | 
			
		||||
  template<class obj1,class obj2,class obj3>
 | 
			
		||||
    void mult(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const Lattice<obj3> &rhs){
 | 
			
		||||
    conformable(lhs,rhs);
 | 
			
		||||
    uint32_t vec_len = lhs._grid->oSites();
 | 
			
		||||
#pragma omp parallel for
 | 
			
		||||
    for(int ss=0;ss<vec_len;ss++){
 | 
			
		||||
      mult(&ret._odata[ss],&lhs._odata[ss],&rhs._odata[ss]);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  template<class obj1,class obj2,class obj3>
 | 
			
		||||
    void mac(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const Lattice<obj3> &rhs){
 | 
			
		||||
    conformable(lhs,rhs);
 | 
			
		||||
    uint32_t vec_len = lhs._grid->oSites();
 | 
			
		||||
#pragma omp parallel for
 | 
			
		||||
    for(int ss=0;ss<vec_len;ss++){
 | 
			
		||||
      mac(&ret._odata[ss],&lhs._odata[ss],&rhs._odata[ss]);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  template<class obj1,class obj2,class obj3>
 | 
			
		||||
    void sub(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const Lattice<obj3> &rhs){
 | 
			
		||||
    conformable(lhs,rhs);
 | 
			
		||||
#pragma omp parallel for
 | 
			
		||||
    for(int ss=0;ss<lhs._grid->oSites();ss++){
 | 
			
		||||
      sub(&ret._odata[ss],&lhs._odata[ss],&rhs._odata[ss]);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  template<class obj1,class obj2,class obj3>
 | 
			
		||||
    void add(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const Lattice<obj3> &rhs){
 | 
			
		||||
    conformable(lhs,rhs);
 | 
			
		||||
#pragma omp parallel for
 | 
			
		||||
    for(int ss=0;ss<lhs._grid->oSites();ss++){
 | 
			
		||||
      add(&ret._odata[ss],&lhs._odata[ss],&rhs._odata[ss]);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  // Lattice BinOp Lattice,
 | 
			
		||||
  template<class left,class right>
 | 
			
		||||
    inline auto operator * (const Lattice<left> &lhs,const Lattice<right> &rhs)-> Lattice<decltype(lhs._odata[0]*rhs._odata[0])>
 | 
			
		||||
  {
 | 
			
		||||
    //NB mult performs conformable check. Do not reapply here for performance.
 | 
			
		||||
    Lattice<decltype(lhs._odata[0]*rhs._odata[0])> ret(rhs._grid);
 | 
			
		||||
    mult(ret,lhs,rhs);
 | 
			
		||||
    return ret;
 | 
			
		||||
  }
 | 
			
		||||
  template<class left,class right>
 | 
			
		||||
    inline auto operator + (const Lattice<left> &lhs,const Lattice<right> &rhs)-> Lattice<decltype(lhs._odata[0]*rhs._odata[0])>
 | 
			
		||||
  {
 | 
			
		||||
    //NB mult performs conformable check. Do not reapply here for performance.
 | 
			
		||||
    Lattice<decltype(lhs._odata[0]*rhs._odata[0])> ret(rhs._grid);
 | 
			
		||||
    add(ret,lhs,rhs);
 | 
			
		||||
    return ret;
 | 
			
		||||
  }
 | 
			
		||||
  template<class left,class right>
 | 
			
		||||
    inline auto operator - (const Lattice<left> &lhs,const Lattice<right> &rhs)-> Lattice<decltype(lhs._odata[0]*rhs._odata[0])>
 | 
			
		||||
  {
 | 
			
		||||
    //NB mult performs conformable check. Do not reapply here for performance.
 | 
			
		||||
    Lattice<decltype(lhs._odata[0]*rhs._odata[0])> ret(rhs._grid);
 | 
			
		||||
    sub(ret,lhs,rhs);
 | 
			
		||||
    return ret;
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  // Scalar BinOp Lattice ;generate return type
 | 
			
		||||
  template<class left,class right>
 | 
			
		||||
    inline auto operator * (const left &lhs,const Lattice<right> &rhs) -> Lattice<decltype(lhs*rhs._odata[0])>
 | 
			
		||||
  {
 | 
			
		||||
    Lattice<decltype(lhs*rhs._odata[0])> ret(rhs._grid);
 | 
			
		||||
#pragma omp parallel for
 | 
			
		||||
    for(int ss=0;ss<rhs._grid->oSites(); ss++){
 | 
			
		||||
      ret._odata[ss]=lhs*rhs._odata[ss];
 | 
			
		||||
    }
 | 
			
		||||
        return ret;
 | 
			
		||||
  }
 | 
			
		||||
  template<class left,class right>
 | 
			
		||||
    inline auto operator + (const left &lhs,const Lattice<right> &rhs) -> Lattice<decltype(lhs*rhs._odata[0])>
 | 
			
		||||
    {
 | 
			
		||||
      Lattice<decltype(lhs*rhs._odata[0])> ret(rhs._grid);
 | 
			
		||||
#pragma omp parallel for
 | 
			
		||||
      for(int ss=0;ss<rhs._grid->oSites(); ss++){
 | 
			
		||||
	ret._odata[ss]=lhs+rhs._odata[ss];
 | 
			
		||||
      }
 | 
			
		||||
        return ret;
 | 
			
		||||
    }
 | 
			
		||||
  template<class left,class right>
 | 
			
		||||
    inline auto operator - (const left &lhs,const Lattice<right> &rhs) -> Lattice<decltype(lhs*rhs._odata[0])>
 | 
			
		||||
  {
 | 
			
		||||
    Lattice<decltype(lhs*rhs._odata[0])> ret(rhs._grid);
 | 
			
		||||
#pragma omp parallel for
 | 
			
		||||
    for(int ss=0;ss<rhs._grid->oSites(); ss++){
 | 
			
		||||
      ret._odata[ss]=lhs-rhs._odata[ss];
 | 
			
		||||
    }
 | 
			
		||||
    return ret;
 | 
			
		||||
  }
 | 
			
		||||
    template<class left,class right>
 | 
			
		||||
      inline auto operator * (const Lattice<left> &lhs,const right &rhs) -> Lattice<decltype(lhs._odata[0]*rhs)>
 | 
			
		||||
    {
 | 
			
		||||
      Lattice<decltype(lhs._odata[0]*rhs)> ret(lhs._grid);
 | 
			
		||||
#pragma omp parallel for
 | 
			
		||||
      for(int ss=0;ss<lhs._grid->oSites(); ss++){
 | 
			
		||||
            ret._odata[ss]=lhs._odata[ss]*rhs;
 | 
			
		||||
      }
 | 
			
		||||
      return ret;
 | 
			
		||||
    }
 | 
			
		||||
    template<class left,class right>
 | 
			
		||||
      inline auto operator + (const Lattice<left> &lhs,const right &rhs) -> Lattice<decltype(lhs._odata[0]*rhs)>
 | 
			
		||||
    {
 | 
			
		||||
        Lattice<decltype(lhs._odata[0]*rhs)> ret(lhs._grid);
 | 
			
		||||
#pragma omp parallel for
 | 
			
		||||
        for(int ss=0;ss<rhs._grid->oSites(); ss++){
 | 
			
		||||
	  ret._odata[ss]=lhs._odata[ss]+rhs;
 | 
			
		||||
        }
 | 
			
		||||
        return ret;
 | 
			
		||||
    }
 | 
			
		||||
    template<class left,class right>
 | 
			
		||||
      inline auto operator - (const Lattice<left> &lhs,const right &rhs) -> Lattice<decltype(lhs._odata[0]*rhs)>
 | 
			
		||||
    {
 | 
			
		||||
      Lattice<decltype(lhs._odata[0]*rhs)> ret(lhs._grid);
 | 
			
		||||
#pragma omp parallel for
 | 
			
		||||
      for(int ss=0;ss<rhs._grid->oSites(); ss++){
 | 
			
		||||
	ret._odata[ss]=lhs._odata[ss]-rhs;
 | 
			
		||||
      }
 | 
			
		||||
      return ret;
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
#endif
 | 
			
		||||
							
								
								
									
										14
									
								
								lib/Grid_lattice_conformable.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										14
									
								
								lib/Grid_lattice_conformable.h
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,14 @@
 | 
			
		||||
#ifndef GRID_LATTICE_CONFORMABLE_H
 | 
			
		||||
#define GRID_LATTICE_CONFORMABLE_H
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
 | 
			
		||||
    template<class obj1,class obj2>
 | 
			
		||||
    void conformable(const Lattice<obj1> &lhs,const Lattice<obj2> &rhs)
 | 
			
		||||
    {
 | 
			
		||||
        assert(lhs._grid == rhs._grid);
 | 
			
		||||
        assert(lhs.checkerboard == rhs.checkerboard);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
#endif
 | 
			
		||||
							
								
								
									
										45
									
								
								lib/Grid_lattice_coordinate.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										45
									
								
								lib/Grid_lattice_coordinate.h
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,45 @@
 | 
			
		||||
#ifndef GRID_LATTICE_COORDINATE_H
 | 
			
		||||
#define GRID_LATTICE_COORDINATE_H
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
 | 
			
		||||
    template<class iobj> inline void LatticeCoordinate(Lattice<iobj> &l,int mu)
 | 
			
		||||
    {
 | 
			
		||||
      GridBase *grid = l._grid;
 | 
			
		||||
      int Nsimd = grid->iSites();
 | 
			
		||||
      std::vector<int> gcoor;
 | 
			
		||||
      std::vector<Integer> mergebuf(Nsimd);
 | 
			
		||||
      std::vector<Integer *> mergeptr(Nsimd);
 | 
			
		||||
      vInteger vI;
 | 
			
		||||
      for(int o=0;o<grid->oSites();o++){
 | 
			
		||||
	for(int i=0;i<grid->iSites();i++){
 | 
			
		||||
	  grid->RankIndexToGlobalCoor(grid->ThisRank(),o,i,gcoor);
 | 
			
		||||
	  //	  grid->RankIndexToGlobalCoor(0,o,i,gcoor);
 | 
			
		||||
	  mergebuf[i]=gcoor[mu];
 | 
			
		||||
	  mergeptr[i]=&mergebuf[i];
 | 
			
		||||
	}
 | 
			
		||||
	merge(vI,mergeptr);
 | 
			
		||||
	l._odata[o]=vI;
 | 
			
		||||
      }
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    // LatticeCoordinate();
 | 
			
		||||
    // FIXME for debug; deprecate this; made obscelete by 
 | 
			
		||||
    template<class vobj> void lex_sites(Lattice<vobj> &l){
 | 
			
		||||
      Real *v_ptr = (Real *)&l._odata[0];
 | 
			
		||||
      size_t o_len = l._grid->oSites();
 | 
			
		||||
      size_t v_len = sizeof(vobj)/sizeof(vRealF);
 | 
			
		||||
      size_t vec_len = vRealF::Nsimd();
 | 
			
		||||
 | 
			
		||||
      for(int i=0;i<o_len;i++){
 | 
			
		||||
	for(int j=0;j<v_len;j++){
 | 
			
		||||
          for(int vv=0;vv<vec_len;vv+=2){
 | 
			
		||||
	    v_ptr[i*v_len*vec_len+j*vec_len+vv  ]= i+vv*500;
 | 
			
		||||
	    v_ptr[i*v_len*vec_len+j*vec_len+vv+1]= i+vv*500;
 | 
			
		||||
	  }
 | 
			
		||||
	}}
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
#endif
 | 
			
		||||
							
								
								
									
										54
									
								
								lib/Grid_lattice_local.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										54
									
								
								lib/Grid_lattice_local.h
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,54 @@
 | 
			
		||||
#ifndef GRID_LATTICE_LOCALREDUCTION_H
 | 
			
		||||
#define GRID_LATTICE_LOCALREDUCTION_H
 | 
			
		||||
 | 
			
		||||
///////////////////////////////////////////////
 | 
			
		||||
// localInner, localNorm, outerProduct
 | 
			
		||||
///////////////////////////////////////////////
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
 | 
			
		||||
    /////////////////////////////////////////////////////
 | 
			
		||||
    // Non site, reduced locally reduced routines
 | 
			
		||||
    /////////////////////////////////////////////////////
 | 
			
		||||
 | 
			
		||||
    // localNorm2,
 | 
			
		||||
    template<class vobj>
 | 
			
		||||
    inline auto localNorm2 (const Lattice<vobj> &rhs)-> Lattice<typename vobj::tensor_reduced>
 | 
			
		||||
    {
 | 
			
		||||
      Lattice<typename vobj::tensor_reduced> ret(rhs._grid);
 | 
			
		||||
#pragma omp parallel for
 | 
			
		||||
        for(int ss=0;ss<rhs._grid->oSites(); ss++){
 | 
			
		||||
	  ret._odata[ss]=innerProduct(rhs._odata[ss],rhs._odata[ss]);
 | 
			
		||||
        }
 | 
			
		||||
        return ret;
 | 
			
		||||
    }
 | 
			
		||||
    
 | 
			
		||||
    // localInnerProduct
 | 
			
		||||
    template<class vobj>
 | 
			
		||||
    inline auto localInnerProduct (const Lattice<vobj> &lhs,const Lattice<vobj> &rhs)
 | 
			
		||||
      -> Lattice<typename vobj::tensor_reduced>
 | 
			
		||||
    {
 | 
			
		||||
      Lattice<typename vobj::tensor_reduced> ret(rhs._grid);
 | 
			
		||||
#pragma omp parallel for
 | 
			
		||||
      for(int ss=0;ss<rhs._grid->oSites(); ss++){
 | 
			
		||||
	ret._odata[ss]=innerProduct(lhs._odata[ss],rhs._odata[ss]);
 | 
			
		||||
      }
 | 
			
		||||
      return ret;
 | 
			
		||||
    }
 | 
			
		||||
    
 | 
			
		||||
    // outerProduct Scalar x Scalar -> Scalar
 | 
			
		||||
    //              Vector x Vector -> Matrix
 | 
			
		||||
    template<class ll,class rr>
 | 
			
		||||
    inline auto outerProduct (const Lattice<ll> &lhs,const Lattice<rr> &rhs) -> Lattice<decltype(outerProduct(lhs._odata[0],rhs._odata[0]))>
 | 
			
		||||
    {
 | 
			
		||||
        Lattice<decltype(outerProduct(lhs._odata[0],rhs._odata[0]))> ret(rhs._grid);
 | 
			
		||||
#pragma omp parallel for
 | 
			
		||||
        for(int ss=0;ss<rhs._grid->oSites(); ss++){
 | 
			
		||||
            ret._odata[ss]=outerProduct(lhs._odata[ss],rhs._odata[ss]);
 | 
			
		||||
        }
 | 
			
		||||
        return ret;
 | 
			
		||||
     }
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
							
								
								
									
										143
									
								
								lib/Grid_lattice_peekpoke.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										143
									
								
								lib/Grid_lattice_peekpoke.h
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,143 @@
 | 
			
		||||
#ifndef GRID_LATTICE_PEEK_H
 | 
			
		||||
#define GRID_LATTICE_PEEK_H
 | 
			
		||||
 | 
			
		||||
///////////////////////////////////////////////
 | 
			
		||||
// Peeking and poking around
 | 
			
		||||
///////////////////////////////////////////////
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
 | 
			
		||||
    ////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
    // Peek internal indices of a Lattice object
 | 
			
		||||
    ////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
    template<int Index,class vobj>
 | 
			
		||||
    inline auto peekIndex(const Lattice<vobj> &lhs)
 | 
			
		||||
      -> Lattice<decltype(peekIndex<Index>(lhs._odata[0]))>
 | 
			
		||||
    {
 | 
			
		||||
      Lattice<decltype(peekIndex<Index>(lhs._odata[0]))> ret(lhs._grid);
 | 
			
		||||
#pragma omp parallel for
 | 
			
		||||
        for(int ss=0;ss<lhs._grid->oSites();ss++){
 | 
			
		||||
            ret._odata[ss] = peekIndex<Index>(lhs._odata[ss]);
 | 
			
		||||
        }
 | 
			
		||||
        return ret;
 | 
			
		||||
    };
 | 
			
		||||
    template<int Index,class vobj>
 | 
			
		||||
      inline auto peekIndex(const Lattice<vobj> &lhs,int i)
 | 
			
		||||
      -> Lattice<decltype(peekIndex<Index>(lhs._odata[0],i))>
 | 
			
		||||
    {
 | 
			
		||||
      Lattice<decltype(peekIndex<Index>(lhs._odata[0],i))> ret(lhs._grid);
 | 
			
		||||
#pragma omp parallel for
 | 
			
		||||
        for(int ss=0;ss<lhs._grid->oSites();ss++){
 | 
			
		||||
	  ret._odata[ss] = peekIndex<Index>(lhs._odata[ss],i);
 | 
			
		||||
        }
 | 
			
		||||
        return ret;
 | 
			
		||||
    };
 | 
			
		||||
    template<int Index,class vobj>
 | 
			
		||||
      inline auto peekIndex(const Lattice<vobj> &lhs,int i,int j)
 | 
			
		||||
      -> Lattice<decltype(peekIndex<Index>(lhs._odata[0],i,j))>
 | 
			
		||||
    {
 | 
			
		||||
      Lattice<decltype(peekIndex<Index>(lhs._odata[0],i,j))> ret(lhs._grid);
 | 
			
		||||
#pragma omp parallel for
 | 
			
		||||
        for(int ss=0;ss<lhs._grid->oSites();ss++){
 | 
			
		||||
	  ret._odata[ss] = peekIndex<Index>(lhs._odata[ss],i,j);
 | 
			
		||||
        }
 | 
			
		||||
        return ret;
 | 
			
		||||
    };
 | 
			
		||||
    ////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
    // Poke internal indices of a Lattice object
 | 
			
		||||
    ////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
    template<int Index,class vobj> inline
 | 
			
		||||
    void pokeIndex(Lattice<vobj> &lhs,const Lattice<decltype(peekIndex<Index>(lhs._odata[0]))> & rhs)
 | 
			
		||||
    {
 | 
			
		||||
#pragma omp parallel for
 | 
			
		||||
        for(int ss=0;ss<lhs._grid->oSites();ss++){
 | 
			
		||||
	  pokeIndex<Index>(lhs._odata[ss],rhs._odata[ss]);
 | 
			
		||||
	}      
 | 
			
		||||
    }
 | 
			
		||||
    template<int Index,class vobj> inline
 | 
			
		||||
    void pokeIndex(Lattice<vobj> &lhs,const Lattice<decltype(peekIndex<Index>(lhs._odata[0],0))> & rhs,int i)
 | 
			
		||||
    {
 | 
			
		||||
#pragma omp parallel for
 | 
			
		||||
        for(int ss=0;ss<lhs._grid->oSites();ss++){
 | 
			
		||||
	  pokeIndex<Index>(lhs._odata[ss],rhs._odata[ss],i);
 | 
			
		||||
	}      
 | 
			
		||||
    }
 | 
			
		||||
    template<int Index,class vobj> inline
 | 
			
		||||
    void pokeIndex(Lattice<vobj> &lhs,const Lattice<decltype(peekIndex<Index>(lhs._odata[0],0,0))> & rhs,int i,int j)
 | 
			
		||||
    {
 | 
			
		||||
#pragma omp parallel for
 | 
			
		||||
        for(int ss=0;ss<lhs._grid->oSites();ss++){
 | 
			
		||||
	  pokeIndex<Index>(lhs._odata[ss],rhs._odata[ss],i,j);
 | 
			
		||||
	}      
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    //////////////////////////////////////////////////////
 | 
			
		||||
    // Poke a scalar object into the SIMD array
 | 
			
		||||
    //////////////////////////////////////////////////////
 | 
			
		||||
    template<class vobj,class sobj>
 | 
			
		||||
    void pokeSite(const sobj &s,Lattice<vobj> &l,std::vector<int> &site){
 | 
			
		||||
 | 
			
		||||
      GridBase *grid=l._grid;
 | 
			
		||||
 | 
			
		||||
      typedef typename vobj::scalar_type scalar_type;
 | 
			
		||||
      typedef typename vobj::vector_type vector_type;
 | 
			
		||||
 | 
			
		||||
      int Nsimd = grid->Nsimd();
 | 
			
		||||
 | 
			
		||||
      assert( l.checkerboard== l._grid->CheckerBoard(site));
 | 
			
		||||
      assert( sizeof(sobj)*Nsimd == sizeof(vobj));
 | 
			
		||||
 | 
			
		||||
      int rank,odx,idx;
 | 
			
		||||
      grid->GlobalCoorToRankIndex(rank,odx,idx,site);
 | 
			
		||||
 | 
			
		||||
      // Optional to broadcast from node 0.
 | 
			
		||||
      grid->Broadcast(0,s);
 | 
			
		||||
 | 
			
		||||
      std::vector<sobj> buf(Nsimd);
 | 
			
		||||
      std::vector<scalar_type *> pointers(Nsimd);  
 | 
			
		||||
      for(int i=0;i<Nsimd;i++) pointers[i] = (scalar_type *)&buf[i];
 | 
			
		||||
 | 
			
		||||
      // extract-modify-merge cycle is easiest way and this is not perf critical
 | 
			
		||||
      extract(l._odata[odx],pointers);
 | 
			
		||||
      
 | 
			
		||||
      buf[idx] = s;
 | 
			
		||||
 | 
			
		||||
      for(int i=0;i<Nsimd;i++) pointers[i] = (scalar_type *)&buf[i];
 | 
			
		||||
      merge(l._odata[odx],pointers);
 | 
			
		||||
 | 
			
		||||
      return;
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    //////////////////////////////////////////////////////////
 | 
			
		||||
    // Peek a scalar object from the SIMD array
 | 
			
		||||
    //////////////////////////////////////////////////////////
 | 
			
		||||
    template<class vobj,class sobj>
 | 
			
		||||
      void peekSite(sobj &s,Lattice<vobj> &l,std::vector<int> &site){
 | 
			
		||||
        
 | 
			
		||||
      GridBase *grid=l._grid;
 | 
			
		||||
 | 
			
		||||
      typedef typename vobj::scalar_type scalar_type;
 | 
			
		||||
      typedef typename vobj::vector_type vector_type;
 | 
			
		||||
 | 
			
		||||
      int Nsimd = grid->Nsimd();
 | 
			
		||||
 | 
			
		||||
      assert( l.checkerboard== l._grid->CheckerBoard(site));
 | 
			
		||||
      assert( sizeof(sobj)*Nsimd == sizeof(vobj));
 | 
			
		||||
 | 
			
		||||
      int rank,odx,idx;
 | 
			
		||||
      grid->GlobalCoorToRankIndex(rank,odx,idx,site);
 | 
			
		||||
      std::vector<sobj> buf(Nsimd);
 | 
			
		||||
      std::vector<scalar_type *> pointers(Nsimd);  
 | 
			
		||||
      for(int i=0;i<Nsimd;i++) pointers[i] = (scalar_type *)&buf[i];
 | 
			
		||||
 | 
			
		||||
      extract(l._odata[odx],pointers);
 | 
			
		||||
      
 | 
			
		||||
      s = buf[idx];
 | 
			
		||||
      grid->Broadcast(rank,s);
 | 
			
		||||
 | 
			
		||||
      return;
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										52
									
								
								lib/Grid_lattice_reality.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										52
									
								
								lib/Grid_lattice_reality.h
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,52 @@
 | 
			
		||||
#ifndef GRID_LATTICE_REALITY_H
 | 
			
		||||
#define GRID_LATTICE_REALITY_H
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
// FIXME .. this is the sector of the code 
 | 
			
		||||
// I am most worried about the directions
 | 
			
		||||
// The choice of burying complex in the SIMD
 | 
			
		||||
// is making the use of "real" and "imag" very cumbersome
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
 | 
			
		||||
    template<class vobj> inline Lattice<vobj> adj(const Lattice<vobj> &lhs){
 | 
			
		||||
        Lattice<vobj> ret(lhs._grid);
 | 
			
		||||
#pragma omp parallel for
 | 
			
		||||
        for(int ss=0;ss<lhs._grid->oSites();ss++){
 | 
			
		||||
            ret._odata[ss] = adj(lhs._odata[ss]);
 | 
			
		||||
        }
 | 
			
		||||
        return ret;
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    template<class vobj> inline Lattice<vobj> conj(const Lattice<vobj> &lhs){
 | 
			
		||||
        Lattice<vobj> ret(lhs._grid);
 | 
			
		||||
#pragma omp parallel for
 | 
			
		||||
        for(int ss=0;ss<lhs._grid->oSites();ss++){
 | 
			
		||||
            ret._odata[ss] = conj(lhs._odata[ss]);
 | 
			
		||||
        }
 | 
			
		||||
        return ret;
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    template<class vobj> inline auto real(const Lattice<vobj> &z) -> Lattice<decltype(real(z._odata[0]))>
 | 
			
		||||
    {
 | 
			
		||||
      Lattice<decltype(real(z._odata[0]))> ret(z._grid);
 | 
			
		||||
#pragma omp parallel for
 | 
			
		||||
        for(int ss=0;ss<z._grid->oSites();ss++){
 | 
			
		||||
            ret._odata[ss] = real(z._odata[ss]);
 | 
			
		||||
        }
 | 
			
		||||
      return ret;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    template<class vobj> inline auto imag(const Lattice<vobj> &z) -> Lattice<decltype(imag(z._odata[0]))>
 | 
			
		||||
    {
 | 
			
		||||
      Lattice<decltype(imag(z._odata[0]))> ret(z._grid);
 | 
			
		||||
#pragma omp parallel for
 | 
			
		||||
        for(int ss=0;ss<z._grid->oSites();ss++){
 | 
			
		||||
            ret._odata[ss] = imag(z._odata[ss]);
 | 
			
		||||
        }
 | 
			
		||||
      return ret;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
#endif
 | 
			
		||||
							
								
								
									
										45
									
								
								lib/Grid_lattice_reduction.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										45
									
								
								lib/Grid_lattice_reduction.h
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,45 @@
 | 
			
		||||
#ifndef GRID_LATTICE_REDUCTION_H
 | 
			
		||||
#define GRID_LATTICE_REDUCTION_H
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
 | 
			
		||||
    ////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
    // Reduction operations
 | 
			
		||||
    ////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
    template<class vobj>
 | 
			
		||||
    inline RealD norm2(const Lattice<vobj> &arg){
 | 
			
		||||
 | 
			
		||||
      typedef typename vobj::scalar_type scalar;
 | 
			
		||||
      typedef typename vobj::vector_type vector;
 | 
			
		||||
      decltype(innerProduct(arg._odata[0],arg._odata[0])) vnrm=zero;
 | 
			
		||||
      scalar nrm;
 | 
			
		||||
      //FIXME make this loop parallelisable
 | 
			
		||||
      vnrm=zero;
 | 
			
		||||
      for(int ss=0;ss<arg._grid->oSites(); ss++){
 | 
			
		||||
	vnrm = vnrm + innerProduct(arg._odata[ss],arg._odata[ss]);
 | 
			
		||||
      }
 | 
			
		||||
      vector vvnrm =TensorRemove(vnrm) ;
 | 
			
		||||
      nrm = Reduce(vvnrm);
 | 
			
		||||
      arg._grid->GlobalSum(nrm);
 | 
			
		||||
      return real(nrm);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    template<class vobj>
 | 
			
		||||
    inline auto innerProduct(const Lattice<vobj> &left,const Lattice<vobj> &right) ->decltype(innerProduct(left._odata[0],right._odata[0]))
 | 
			
		||||
    {
 | 
			
		||||
      typedef typename vobj::scalar_type scalar;
 | 
			
		||||
      decltype(innerProduct(left._odata[0],right._odata[0])) vnrm=zero;
 | 
			
		||||
 | 
			
		||||
      scalar nrm;
 | 
			
		||||
      //FIXME make this loop parallelisable
 | 
			
		||||
      for(int ss=0;ss<left._grid->oSites(); ss++){
 | 
			
		||||
	vnrm = vnrm + innerProduct(left._odata[ss],right._odata[ss]);
 | 
			
		||||
      }
 | 
			
		||||
      nrm = Reduce(vnrm);
 | 
			
		||||
      right._grid->GlobalSum(nrm);
 | 
			
		||||
      return nrm;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										32
									
								
								lib/Grid_lattice_rng.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										32
									
								
								lib/Grid_lattice_rng.h
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,32 @@
 | 
			
		||||
#ifndef GRID_LATTICE_RNG_H
 | 
			
		||||
#define GRID_LATTICE_RNG_H
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
 | 
			
		||||
    // FIXME Randomise; deprecate this
 | 
			
		||||
    template <class vobj> inline void random(Lattice<vobj> &l){
 | 
			
		||||
        Real *v_ptr = (Real *)&l._odata[0];
 | 
			
		||||
        size_t v_len = l._grid->oSites()*sizeof(vobj);
 | 
			
		||||
        size_t d_len = v_len/sizeof(Real);
 | 
			
		||||
	
 | 
			
		||||
        for(int i=0;i<d_len;i++){
 | 
			
		||||
 | 
			
		||||
            v_ptr[i]=drand48();
 | 
			
		||||
        }
 | 
			
		||||
    };
 | 
			
		||||
    
 | 
			
		||||
    // FIXME Implement a consistent seed management strategy
 | 
			
		||||
    template <class vobj> inline void gaussian(Lattice<vobj> &l){
 | 
			
		||||
        // Zero mean, unit variance.
 | 
			
		||||
        std::normal_distribution<double> distribution(0.0,1.0);
 | 
			
		||||
        Real *v_ptr = (Real *)&l._odata[0];
 | 
			
		||||
        size_t v_len = l._grid->oSites()*sizeof(vobj);
 | 
			
		||||
        size_t d_len = v_len/sizeof(Real);
 | 
			
		||||
 | 
			
		||||
        for(int i=0;i<d_len;i++){
 | 
			
		||||
	  v_ptr[i]= drand48();
 | 
			
		||||
        }
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
#endif
 | 
			
		||||
							
								
								
									
										43
									
								
								lib/Grid_lattice_trace.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										43
									
								
								lib/Grid_lattice_trace.h
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,43 @@
 | 
			
		||||
#ifndef GRID_LATTICE_TRACE_H
 | 
			
		||||
#define GRID_LATTICE_TRACE_H
 | 
			
		||||
 | 
			
		||||
///////////////////////////////////////////////
 | 
			
		||||
// Tracing, transposing, peeking, poking
 | 
			
		||||
///////////////////////////////////////////////
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
 | 
			
		||||
    ////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
    // Trace
 | 
			
		||||
    ////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
    template<class vobj>
 | 
			
		||||
    inline auto trace(const Lattice<vobj> &lhs)
 | 
			
		||||
      -> Lattice<decltype(trace(lhs._odata[0]))>
 | 
			
		||||
    {
 | 
			
		||||
      Lattice<decltype(trace(lhs._odata[0]))> ret(lhs._grid);
 | 
			
		||||
#pragma omp parallel for
 | 
			
		||||
        for(int ss=0;ss<lhs._grid->oSites();ss++){
 | 
			
		||||
            ret._odata[ss] = trace(lhs._odata[ss]);
 | 
			
		||||
        }
 | 
			
		||||
        return ret;
 | 
			
		||||
    };
 | 
			
		||||
    
 | 
			
		||||
    ////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
    // Trace Index level dependent operation
 | 
			
		||||
    ////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
    template<int Index,class vobj>
 | 
			
		||||
    inline auto traceIndex(const Lattice<vobj> &lhs)
 | 
			
		||||
      -> Lattice<decltype(traceIndex<Index>(lhs._odata[0]))>
 | 
			
		||||
    {
 | 
			
		||||
      Lattice<decltype(traceIndex<Index>(lhs._odata[0]))> ret(lhs._grid);
 | 
			
		||||
#pragma omp parallel for
 | 
			
		||||
      for(int ss=0;ss<lhs._grid->oSites();ss++){
 | 
			
		||||
	ret._odata[ss] = traceIndex<Index>(lhs._odata[ss]);
 | 
			
		||||
      }
 | 
			
		||||
      return ret;
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										46
									
								
								lib/Grid_lattice_transfer.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										46
									
								
								lib/Grid_lattice_transfer.h
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,46 @@
 | 
			
		||||
#ifndef GRID_LATTICE_TRANSFER_H
 | 
			
		||||
#define GRID_LATTICE_TRANSFER_H
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
 | 
			
		||||
  ////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
  // remove and insert a half checkerboard
 | 
			
		||||
  ////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
  template<class vobj> inline void pickCheckerboard(int cb,Lattice<vobj> &half,const Lattice<vobj> &full){
 | 
			
		||||
    half.checkerboard = cb;
 | 
			
		||||
    int ssh=0;
 | 
			
		||||
#pragma omp parallel for
 | 
			
		||||
    for(int ss=0;ss<full._grid->oSites();ss++){
 | 
			
		||||
      std::vector<int> coor;
 | 
			
		||||
      int cbos;
 | 
			
		||||
      
 | 
			
		||||
      full._grid->oCoorFromOindex(coor,ss);
 | 
			
		||||
      cbos=half._grid->CheckerBoard(coor);
 | 
			
		||||
      
 | 
			
		||||
      if (cbos==cb) {
 | 
			
		||||
	
 | 
			
		||||
	half._odata[ssh] = full._odata[ss];
 | 
			
		||||
	ssh++;
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  template<class vobj> inline void setCheckerboard(Lattice<vobj> &full,const Lattice<vobj> &half){
 | 
			
		||||
    int cb = half.checkerboard;
 | 
			
		||||
    int ssh=0;
 | 
			
		||||
#pragma omp parallel for
 | 
			
		||||
    for(int ss=0;ss<full._grid->oSites();ss++){
 | 
			
		||||
      std::vector<int> coor;
 | 
			
		||||
      int cbos;
 | 
			
		||||
      
 | 
			
		||||
      full._grid->oCoorFromOindex(coor,ss);
 | 
			
		||||
      cbos=half._grid->CheckerBoard(coor);
 | 
			
		||||
      
 | 
			
		||||
      if (cbos==cb) {
 | 
			
		||||
	full._odata[ss]=half._odata[ssh];
 | 
			
		||||
	ssh++;
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
}
 | 
			
		||||
#endif
 | 
			
		||||
							
								
								
									
										39
									
								
								lib/Grid_lattice_transpose.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										39
									
								
								lib/Grid_lattice_transpose.h
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,39 @@
 | 
			
		||||
#ifndef GRID_LATTICE_TRANSPOSE_H
 | 
			
		||||
#define GRID_LATTICE_TRANSPOSE_H
 | 
			
		||||
 | 
			
		||||
///////////////////////////////////////////////
 | 
			
		||||
// Transpose
 | 
			
		||||
///////////////////////////////////////////////
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
 | 
			
		||||
    ////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
    // Transpose
 | 
			
		||||
    ////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
  template<class vobj>
 | 
			
		||||
    inline Lattice<vobj> transpose(const Lattice<vobj> &lhs){
 | 
			
		||||
        Lattice<vobj> ret(lhs._grid);
 | 
			
		||||
#pragma omp parallel for
 | 
			
		||||
        for(int ss=0;ss<lhs._grid->oSites();ss++){
 | 
			
		||||
            ret._odata[ss] = transpose(lhs._odata[ss]);
 | 
			
		||||
        }
 | 
			
		||||
        return ret;
 | 
			
		||||
    };
 | 
			
		||||
    
 | 
			
		||||
    ////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
    // Index level dependent transpose
 | 
			
		||||
    ////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
    template<int Index,class vobj>
 | 
			
		||||
    inline auto transposeIndex(const Lattice<vobj> &lhs)
 | 
			
		||||
      -> Lattice<decltype(transposeIndex<Index>(lhs._odata[0]))>
 | 
			
		||||
    {
 | 
			
		||||
      Lattice<decltype(transposeIndex<Index>(lhs._odata[0]))> ret(lhs._grid);
 | 
			
		||||
#pragma omp parallel for
 | 
			
		||||
        for(int ss=0;ss<lhs._grid->oSites();ss++){
 | 
			
		||||
            ret._odata[ss] = transposeIndex<Index>(lhs._odata[ss]);
 | 
			
		||||
        }
 | 
			
		||||
        return ret;
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
#endif
 | 
			
		||||
@@ -1,14 +1,14 @@
 | 
			
		||||
#ifndef GRID_PREDICATED_H
 | 
			
		||||
#define GRID_PREDICATED_H
 | 
			
		||||
 | 
			
		||||
#ifndef GRID_WHERE_H
 | 
			
		||||
#define GRID_WHERE_H
 | 
			
		||||
namespace Grid {
 | 
			
		||||
// Must implement the predicate gating the 
 | 
			
		||||
// Must be able to reduce the predicate down to a single vInteger per site.
 | 
			
		||||
// Must be able to require the type be iScalar x iScalar x ....
 | 
			
		||||
//                              give a GetVtype method in iScalar
 | 
			
		||||
//                              and blow away the tensor structures.
 | 
			
		||||
//
 | 
			
		||||
template<class vobj>
 | 
			
		||||
inline void where(Lattice<vobj> &ret,const LatticeInteger &predicate,Lattice<vobj> &iftrue,Lattice<vobj> &iffalse)
 | 
			
		||||
template<class vobj,class iobj>
 | 
			
		||||
inline void where(Lattice<vobj> &ret,const Lattice<iobj> &predicate,Lattice<vobj> &iftrue,Lattice<vobj> &iffalse)
 | 
			
		||||
{
 | 
			
		||||
  conformable(iftrue,iffalse);
 | 
			
		||||
  conformable(iftrue,predicate);
 | 
			
		||||
@@ -17,6 +17,7 @@ inline void where(Lattice<vobj> &ret,const LatticeInteger &predicate,Lattice<vob
 | 
			
		||||
  GridBase *grid=iftrue._grid;
 | 
			
		||||
  typedef typename vobj::scalar_type scalar_type;
 | 
			
		||||
  typedef typename vobj::vector_type vector_type;
 | 
			
		||||
  typedef typename iobj::vector_type mask_type;
 | 
			
		||||
 | 
			
		||||
  const int Nsimd = grid->Nsimd();
 | 
			
		||||
  const int words = sizeof(vobj)/sizeof(vector_type);
 | 
			
		||||
@@ -35,7 +36,7 @@ inline void where(Lattice<vobj> &ret,const LatticeInteger &predicate,Lattice<vob
 | 
			
		||||
    for(int s=0;s<Nsimd;s++) pointers[s] = & falsevals[s][0];
 | 
			
		||||
    extract(iffalse._odata[ss]  ,pointers);
 | 
			
		||||
 | 
			
		||||
    extract(predicate._odata[ss],mask);
 | 
			
		||||
    extract(TensorRemove(predicate._odata[ss]),mask);
 | 
			
		||||
 | 
			
		||||
    for(int s=0;s<Nsimd;s++){
 | 
			
		||||
      if (mask[s]) pointers[s]=&truevals[s][0];
 | 
			
		||||
@@ -46,8 +47,8 @@ inline void where(Lattice<vobj> &ret,const LatticeInteger &predicate,Lattice<vob
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template<class vobj>
 | 
			
		||||
inline Lattice<vobj> where(const LatticeInteger &predicate,Lattice<vobj> &iftrue,Lattice<vobj> &iffalse)
 | 
			
		||||
template<class vobj,class iobj>
 | 
			
		||||
inline Lattice<vobj> where(const Lattice<iobj> &predicate,Lattice<vobj> &iftrue,Lattice<vobj> &iffalse)
 | 
			
		||||
{
 | 
			
		||||
  conformable(iftrue,iffalse);
 | 
			
		||||
  conformable(iftrue,predicate);
 | 
			
		||||
@@ -58,5 +59,5 @@ inline Lattice<vobj> where(const LatticeInteger &predicate,Lattice<vobj> &iftrue
 | 
			
		||||
 | 
			
		||||
  return ret;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
#endif
 | 
			
		||||
		Reference in New Issue
	
	Block a user