mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-11-04 05:54:32 +00:00 
			
		
		
		
	Merge pull request #34 from aportelli/master
Polymorphic lattices & various small updates
This commit is contained in:
		@@ -144,6 +144,10 @@ void GridParseLayout(char **argv,int argc,
 | 
			
		||||
  }
 | 
			
		||||
  if( GridCmdOptionExists(argv,argv+argc,"--threads") ){
 | 
			
		||||
    std::vector<int> ompthreads(0);
 | 
			
		||||
#ifndef GRID_OMP
 | 
			
		||||
    std::cout << GridLogWarning << "'--threads' option used but Grid was"
 | 
			
		||||
              << " not compiled with thread support" << std::endl;
 | 
			
		||||
#endif
 | 
			
		||||
    arg= GridCmdOptionPayload(argv,argv+argc,"--threads");
 | 
			
		||||
    GridCmdOptionIntVector(arg,ompthreads);
 | 
			
		||||
    assert(ompthreads.size()==1);
 | 
			
		||||
@@ -187,7 +191,7 @@ void Grid_init(int *argc,char ***argv)
 | 
			
		||||
    std::cout<<GridLogMessage<<"--debug-stdout  : print stdout from EVERY node"<<std::endl;    
 | 
			
		||||
    std::cout<<GridLogMessage<<"--decomposition : report on default omp,mpi and simd decomposition"<<std::endl;    
 | 
			
		||||
    std::cout<<GridLogMessage<<"--mpi n.n.n.n   : default MPI decomposition"<<std::endl;    
 | 
			
		||||
    std::cout<<GridLogMessage<<"--omp n         : default number of OMP threads"<<std::endl;    
 | 
			
		||||
    std::cout<<GridLogMessage<<"--threads n     : default number of OMP threads"<<std::endl;
 | 
			
		||||
    std::cout<<GridLogMessage<<"--grid n.n.n.n  : default Grid size"<<std::endl;    
 | 
			
		||||
    std::cout<<GridLogMessage<<"--log list      : comma separted list of streams from Error,Warning,Message,Performance,Iterative,Integrator,Debug"<<std::endl;
 | 
			
		||||
    exit(EXIT_SUCCESS);
 | 
			
		||||
 
 | 
			
		||||
@@ -69,10 +69,10 @@ public:
 | 
			
		||||
            StopWatch.Stop();
 | 
			
		||||
            GridTime now = StopWatch.Elapsed();
 | 
			
		||||
            StopWatch.Start();
 | 
			
		||||
            stream << BLACK<< log.topName << BLACK<< " : ";
 | 
			
		||||
            stream << log.COLOUR <<std::setw(10) << std::left << log.name << BLACK << " : ";
 | 
			
		||||
	    stream << YELLOW<< now <<BLACK << " : " ;
 | 
			
		||||
	    stream << log.COLOUR;
 | 
			
		||||
            stream << BLACK <<std::setw(8) << std::left << log.topName << BLACK<< " : ";
 | 
			
		||||
            stream << log.COLOUR <<std::setw(11)  << log.name << BLACK << " : ";
 | 
			
		||||
            stream << YELLOW <<std::setw(6) << now <<BLACK << " : " ;
 | 
			
		||||
            stream << log.COLOUR;
 | 
			
		||||
            return stream;
 | 
			
		||||
        } else { 
 | 
			
		||||
            return devnull;
 | 
			
		||||
 
 | 
			
		||||
@@ -55,7 +55,13 @@ extern int GridCshiftPermuteMap[4][16];
 | 
			
		||||
// Basic expressions used in Expression Template
 | 
			
		||||
////////////////////////////////////////////////
 | 
			
		||||
 | 
			
		||||
class LatticeBase {};
 | 
			
		||||
class LatticeBase
 | 
			
		||||
{
 | 
			
		||||
public:
 | 
			
		||||
    virtual ~LatticeBase(void) = default;
 | 
			
		||||
    GridBase *_grid;
 | 
			
		||||
};
 | 
			
		||||
    
 | 
			
		||||
class LatticeExpressionBase {};
 | 
			
		||||
 | 
			
		||||
template<class T> using Vector = std::vector<T,alignedAllocator<T> >;               // Aligned allocator??
 | 
			
		||||
@@ -88,8 +94,6 @@ template<class vobj>
 | 
			
		||||
class Lattice : public LatticeBase
 | 
			
		||||
{
 | 
			
		||||
public:
 | 
			
		||||
 | 
			
		||||
    GridBase *_grid;
 | 
			
		||||
    int checkerboard;
 | 
			
		||||
    Vector<vobj> _odata;
 | 
			
		||||
    
 | 
			
		||||
@@ -177,8 +181,8 @@ PARALLEL_FOR_LOOP
 | 
			
		||||
  }
 | 
			
		||||
  //GridFromExpression is tricky to do
 | 
			
		||||
  template<class Op,class T1>
 | 
			
		||||
    Lattice(const LatticeUnaryExpression<Op,T1> & expr):    _grid(nullptr){
 | 
			
		||||
 | 
			
		||||
    Lattice(const LatticeUnaryExpression<Op,T1> & expr) {
 | 
			
		||||
    _grid = nullptr;
 | 
			
		||||
    GridFromExpression(_grid,expr);
 | 
			
		||||
    assert(_grid!=nullptr);
 | 
			
		||||
 | 
			
		||||
@@ -199,7 +203,8 @@ PARALLEL_FOR_LOOP
 | 
			
		||||
    }
 | 
			
		||||
  };
 | 
			
		||||
  template<class Op,class T1, class T2>
 | 
			
		||||
  Lattice(const LatticeBinaryExpression<Op,T1,T2> & expr):    _grid(nullptr){
 | 
			
		||||
  Lattice(const LatticeBinaryExpression<Op,T1,T2> & expr) {
 | 
			
		||||
    _grid = nullptr;
 | 
			
		||||
    GridFromExpression(_grid,expr);
 | 
			
		||||
    assert(_grid!=nullptr);
 | 
			
		||||
 | 
			
		||||
@@ -220,7 +225,8 @@ PARALLEL_FOR_LOOP
 | 
			
		||||
    }
 | 
			
		||||
  };
 | 
			
		||||
  template<class Op,class T1, class T2, class T3>
 | 
			
		||||
  Lattice(const LatticeTrinaryExpression<Op,T1,T2,T3> & expr):    _grid(nullptr){
 | 
			
		||||
  Lattice(const LatticeTrinaryExpression<Op,T1,T2,T3> & expr) {
 | 
			
		||||
    _grid = nullptr;
 | 
			
		||||
    GridFromExpression(_grid,expr);
 | 
			
		||||
    assert(_grid!=nullptr);
 | 
			
		||||
 | 
			
		||||
@@ -240,7 +246,8 @@ PARALLEL_FOR_LOOP
 | 
			
		||||
    // Constructor requires "grid" passed.
 | 
			
		||||
    // what about a default grid?
 | 
			
		||||
    //////////////////////////////////////////////////////////////////
 | 
			
		||||
    Lattice(GridBase *grid) : _grid(grid), _odata(_grid->oSites()) {
 | 
			
		||||
    Lattice(GridBase *grid) : _odata(grid->oSites()) {
 | 
			
		||||
        _grid = grid;
 | 
			
		||||
    //        _odata.reserve(_grid->oSites());
 | 
			
		||||
    //        _odata.resize(_grid->oSites());
 | 
			
		||||
    //      std::cout << "Constructing lattice object with Grid pointer "<<_grid<<std::endl;
 | 
			
		||||
@@ -248,6 +255,8 @@ PARALLEL_FOR_LOOP
 | 
			
		||||
        checkerboard=0;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    virtual ~Lattice(void) = default;
 | 
			
		||||
    
 | 
			
		||||
    template<class sobj> strong_inline Lattice<vobj> & operator = (const sobj & r){
 | 
			
		||||
PARALLEL_FOR_LOOP
 | 
			
		||||
        for(int ss=0;ss<_grid->oSites();ss++){
 | 
			
		||||
 
 | 
			
		||||
@@ -361,7 +361,7 @@ PARALLEL_FOR_LOOP
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
template<class vobj>
 | 
			
		||||
void InsertSlice(const Lattice<vobj> &lowDim,Lattice<vobj> & higherDim,int slice, int orthog)
 | 
			
		||||
void InsertSlice(Lattice<vobj> &lowDim,Lattice<vobj> & higherDim,int slice, int orthog)
 | 
			
		||||
{
 | 
			
		||||
  typedef typename vobj::scalar_object sobj;
 | 
			
		||||
  sobj s;
 | 
			
		||||
@@ -374,7 +374,7 @@ void InsertSlice(const Lattice<vobj> &lowDim,Lattice<vobj> & higherDim,int slice
 | 
			
		||||
  assert(nl+1 == nh);
 | 
			
		||||
  assert(orthog<nh);
 | 
			
		||||
  assert(orthog>=0);
 | 
			
		||||
  assert(hg->_processors[orthog]==0);
 | 
			
		||||
  assert(hg->_processors[orthog]==1);
 | 
			
		||||
 | 
			
		||||
  int dl; dl = 0;
 | 
			
		||||
  for(int d=0;d<nh;d++){
 | 
			
		||||
@@ -385,48 +385,6 @@ void InsertSlice(const Lattice<vobj> &lowDim,Lattice<vobj> & higherDim,int slice
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // the above should guarantee that the operations are local
 | 
			
		||||
PARALLEL_FOR_LOOP
 | 
			
		||||
  for(int idx=0;idx<lg->lSites();idx++){
 | 
			
		||||
    std::vector<int> lcoor(nl);
 | 
			
		||||
    std::vector<int> hcoor(nh);
 | 
			
		||||
    lg->LocalIndexToLocalCoor(idx,lcoor);
 | 
			
		||||
    dl=0;
 | 
			
		||||
    hcoor[orthog] = slice;
 | 
			
		||||
    for(int d=0;d<nh;d++){
 | 
			
		||||
      if ( d!=orthog ) { 
 | 
			
		||||
	hcoor[d]=lcoor[dl++];
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
    peekLocalSite(s,higherDim,hcoor);
 | 
			
		||||
    pokeLocalSite(s,lowDim,lcoor);
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template<class vobj>
 | 
			
		||||
void ExtractSlice(Lattice<vobj> &lowDim,const Lattice<vobj> & higherDim,int slice, int orthog)
 | 
			
		||||
{
 | 
			
		||||
  typedef typename vobj::scalar_object sobj;
 | 
			
		||||
  sobj s;
 | 
			
		||||
 | 
			
		||||
  GridBase *lg = lowDim._grid;
 | 
			
		||||
  GridBase *hg = higherDim._grid;
 | 
			
		||||
  int nl = lg->_ndimension;
 | 
			
		||||
  int nh = hg->_ndimension;
 | 
			
		||||
 | 
			
		||||
  assert(nl+1 == nh);
 | 
			
		||||
  assert(orthog<nh);
 | 
			
		||||
  assert(orthog>=0);
 | 
			
		||||
  assert(hg->_processors[orthog]==0);
 | 
			
		||||
 | 
			
		||||
  int dl; dl = 0;
 | 
			
		||||
  for(int d=0;d<nh;d++){
 | 
			
		||||
    if ( d != orthog) {
 | 
			
		||||
      assert(lg->_processors[dl]  == hg->_processors[d]);
 | 
			
		||||
      assert(lg->_ldimensions[dl] == hg->_ldimensions[d]);
 | 
			
		||||
      dl++;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  // the above should guarantee that the operations are local
 | 
			
		||||
PARALLEL_FOR_LOOP
 | 
			
		||||
  for(int idx=0;idx<lg->lSites();idx++){
 | 
			
		||||
@@ -443,6 +401,48 @@ PARALLEL_FOR_LOOP
 | 
			
		||||
    peekLocalSite(s,lowDim,lcoor);
 | 
			
		||||
    pokeLocalSite(s,higherDim,hcoor);
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template<class vobj>
 | 
			
		||||
void ExtractSlice(Lattice<vobj> &lowDim, Lattice<vobj> & higherDim,int slice, int orthog)
 | 
			
		||||
{
 | 
			
		||||
  typedef typename vobj::scalar_object sobj;
 | 
			
		||||
  sobj s;
 | 
			
		||||
 | 
			
		||||
  GridBase *lg = lowDim._grid;
 | 
			
		||||
  GridBase *hg = higherDim._grid;
 | 
			
		||||
  int nl = lg->_ndimension;
 | 
			
		||||
  int nh = hg->_ndimension;
 | 
			
		||||
 | 
			
		||||
  assert(nl+1 == nh);
 | 
			
		||||
  assert(orthog<nh);
 | 
			
		||||
  assert(orthog>=0);
 | 
			
		||||
  assert(hg->_processors[orthog]==1);
 | 
			
		||||
 | 
			
		||||
  int dl; dl = 0;
 | 
			
		||||
  for(int d=0;d<nh;d++){
 | 
			
		||||
    if ( d != orthog) {
 | 
			
		||||
      assert(lg->_processors[dl]  == hg->_processors[d]);
 | 
			
		||||
      assert(lg->_ldimensions[dl] == hg->_ldimensions[d]);
 | 
			
		||||
      dl++;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  // the above should guarantee that the operations are local
 | 
			
		||||
PARALLEL_FOR_LOOP
 | 
			
		||||
  for(int idx=0;idx<lg->lSites();idx++){
 | 
			
		||||
    std::vector<int> lcoor(nl);
 | 
			
		||||
    std::vector<int> hcoor(nh);
 | 
			
		||||
    lg->LocalIndexToLocalCoor(idx,lcoor);
 | 
			
		||||
    dl=0;
 | 
			
		||||
    hcoor[orthog] = slice;
 | 
			
		||||
    for(int d=0;d<nh;d++){
 | 
			
		||||
      if ( d!=orthog ) { 
 | 
			
		||||
	hcoor[d]=lcoor[dl++];
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
    peekLocalSite(s,higherDim,hcoor);
 | 
			
		||||
    pokeLocalSite(s,lowDim,lcoor);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -213,37 +213,38 @@ class NerscIO : public BinaryIO {
 | 
			
		||||
  static inline void truncate(std::string file){
 | 
			
		||||
    std::ofstream fout(file,std::ios::out);
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  #define dump_nersc_header(field, s)\
 | 
			
		||||
  s << "BEGIN_HEADER"      << std::endl;\
 | 
			
		||||
  s << "HDR_VERSION = "    << field.hdr_version    << std::endl;\
 | 
			
		||||
  s << "DATATYPE = "       << field.data_type      << std::endl;\
 | 
			
		||||
  s << "STORAGE_FORMAT = " << field.storage_format << std::endl;\
 | 
			
		||||
  for(int i=0;i<4;i++){\
 | 
			
		||||
    s << "DIMENSION_" << i+1 << " = " << field.dimension[i] << std::endl ;\
 | 
			
		||||
  }\
 | 
			
		||||
  s << "LINK_TRACE = " << std::setprecision(10) << field.link_trace << std::endl;\
 | 
			
		||||
  s << "PLAQUETTE  = " << std::setprecision(10) << field.plaquette  << std::endl;\
 | 
			
		||||
  for(int i=0;i<4;i++){\
 | 
			
		||||
    s << "BOUNDARY_"<<i+1<<" = " << field.boundary[i] << std::endl;\
 | 
			
		||||
  }\
 | 
			
		||||
  \
 | 
			
		||||
  s << "CHECKSUM = "<< std::hex << std::setw(10) << field.checksum << std::dec<<std::endl;\
 | 
			
		||||
  s << "ENSEMBLE_ID = "     << field.ensemble_id      << std::endl;\
 | 
			
		||||
  s << "ENSEMBLE_LABEL = "  << field.ensemble_label   << std::endl;\
 | 
			
		||||
  s << "SEQUENCE_NUMBER = " << field.sequence_number  << std::endl;\
 | 
			
		||||
  s << "CREATOR = "         << field.creator          << std::endl;\
 | 
			
		||||
  s << "CREATOR_HARDWARE = "<< field.creator_hardware << std::endl;\
 | 
			
		||||
  s << "CREATION_DATE = "   << field.creation_date    << std::endl;\
 | 
			
		||||
  s << "ARCHIVE_DATE = "    << field.archive_date     << std::endl;\
 | 
			
		||||
  s << "FLOATING_POINT = "  << field.floating_point   << std::endl;\
 | 
			
		||||
  s << "END_HEADER"         << std::endl;
 | 
			
		||||
  
 | 
			
		||||
  static inline unsigned int writeHeader(NerscField &field,std::string file)
 | 
			
		||||
  {
 | 
			
		||||
    std::ofstream fout(file,std::ios::out|std::ios::in);
 | 
			
		||||
  
 | 
			
		||||
    fout.seekp(0,std::ios::beg);
 | 
			
		||||
    fout << "BEGIN_HEADER"      << std::endl;
 | 
			
		||||
    fout << "HDR_VERSION = "    << field.hdr_version    << std::endl;
 | 
			
		||||
    fout << "DATATYPE = "       << field.data_type      << std::endl;
 | 
			
		||||
    fout << "STORAGE_FORMAT = " << field.storage_format << std::endl;
 | 
			
		||||
 | 
			
		||||
    for(int i=0;i<4;i++){
 | 
			
		||||
      fout << "DIMENSION_" << i+1 << " = " << field.dimension[i] << std::endl ;
 | 
			
		||||
    }
 | 
			
		||||
    // just to keep the space and write it later
 | 
			
		||||
    fout << "LINK_TRACE = " << std::setprecision(10) << field.link_trace << std::endl;
 | 
			
		||||
    fout << "PLAQUETTE  = " << std::setprecision(10) << field.plaquette  << std::endl;
 | 
			
		||||
    for(int i=0;i<4;i++){
 | 
			
		||||
      fout << "BOUNDARY_"<<i+1<<" = " << field.boundary[i] << std::endl;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    fout << "CHECKSUM = "<< std::hex << std::setw(10) << field.checksum << std::dec<<std::endl;
 | 
			
		||||
 | 
			
		||||
    fout << "ENSEMBLE_ID = "     << field.ensemble_id      << std::endl;
 | 
			
		||||
    fout << "ENSEMBLE_LABEL = "  << field.ensemble_label   << std::endl;
 | 
			
		||||
    fout << "SEQUENCE_NUMBER = " << field.sequence_number  << std::endl;
 | 
			
		||||
    fout << "CREATOR = "         << field.creator          << std::endl;
 | 
			
		||||
    fout << "CREATOR_HARDWARE = "<< field.creator_hardware << std::endl;
 | 
			
		||||
    fout << "CREATION_DATE = "   << field.creation_date    << std::endl;
 | 
			
		||||
    fout << "ARCHIVE_DATE = "    << field.archive_date     << std::endl;
 | 
			
		||||
    fout << "FLOATING_POINT = "  << field.floating_point   << std::endl;
 | 
			
		||||
    fout << "END_HEADER"         << std::endl;
 | 
			
		||||
    dump_nersc_header(field, fout);
 | 
			
		||||
    field.data_start = fout.tellp();
 | 
			
		||||
    return field.data_start;
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
@@ -377,7 +377,55 @@ namespace Optimization {
 | 
			
		||||
  
 | 
			
		||||
  template < typename vtype > 
 | 
			
		||||
    void permute(vtype &a, vtype b, int perm) {
 | 
			
		||||
   }; 
 | 
			
		||||
   };
 | 
			
		||||
    
 | 
			
		||||
  struct Rotate{
 | 
			
		||||
 | 
			
		||||
    static inline u128f rotate(u128f in,int n){
 | 
			
		||||
      u128f out;
 | 
			
		||||
      switch(n){
 | 
			
		||||
      case 0:
 | 
			
		||||
        out.f[0] = in.f[0];
 | 
			
		||||
        out.f[1] = in.f[1];
 | 
			
		||||
        out.f[2] = in.f[2];
 | 
			
		||||
        out.f[3] = in.f[3];
 | 
			
		||||
        break;
 | 
			
		||||
      case 1:
 | 
			
		||||
        out.f[0] = in.f[1];
 | 
			
		||||
        out.f[1] = in.f[2];
 | 
			
		||||
        out.f[2] = in.f[3];
 | 
			
		||||
        out.f[3] = in.f[0];
 | 
			
		||||
        break;
 | 
			
		||||
      case 2:
 | 
			
		||||
        out.f[0] = in.f[2];
 | 
			
		||||
        out.f[1] = in.f[3];
 | 
			
		||||
        out.f[2] = in.f[0];
 | 
			
		||||
        out.f[3] = in.f[1];
 | 
			
		||||
        break;
 | 
			
		||||
      case 3:
 | 
			
		||||
        out.f[0] = in.f[3];
 | 
			
		||||
        out.f[1] = in.f[0];
 | 
			
		||||
        out.f[2] = in.f[1];
 | 
			
		||||
        out.f[3] = in.f[2];
 | 
			
		||||
        break;
 | 
			
		||||
      default: assert(0);
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
    static inline u128d rotate(u128d in,int n){
 | 
			
		||||
      u128d out;
 | 
			
		||||
      switch(n){
 | 
			
		||||
      case 0:
 | 
			
		||||
        out.f[0] = in.f[0];
 | 
			
		||||
        out.f[1] = in.f[1];
 | 
			
		||||
        break;
 | 
			
		||||
      case 1:
 | 
			
		||||
        out.f[0] = in.f[1];
 | 
			
		||||
        out.f[1] = in.f[0];
 | 
			
		||||
        break;
 | 
			
		||||
      default: assert(0);
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  //Complex float Reduce
 | 
			
		||||
  template<>
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user