Merge branch 'master' into chulwoo-dec12-2015

2025-10-25 02:04:48 +01:00 · 2016-06-06 10:57:27 -04:00
parent 2893a9b116 5c90c3b457
commit c5ab9f247f
15 changed files with 294 additions and 80 deletions
--- a/.travis.yml
+++ b/.travis.yml
@@ -33,19 +33,6 @@ matrix:
            - libmpc-dev
            - binutils-dev
      env: VERSION=-5
    - compiler: clang
      addons:
        apt:
          sources:
            - ubuntu-toolchain-r-test
            - llvm-toolchain-precise-3.6
          packages:
            - clang-3.6
            - libmpfr-dev
            - libgmp-dev
            - libmpc-dev
            - binutils-dev
      env: VERSION=-3.6
    - compiler: clang
      addons:
        apt:
@@ -59,6 +46,19 @@ matrix:
            - libmpc-dev
            - binutils-dev
      env: VERSION=-3.7
    - compiler: clang
      addons:
        apt:
          sources:
            - ubuntu-toolchain-r-test
            - llvm-toolchain-precise-3.8
          packages:
            - clang-3.8
            - libmpfr-dev
            - libgmp-dev
            - libmpc-dev
            - binutils-dev
      env: VERSION=-3.8
 before_install:
    - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew update; fi
--- a/lib/Init.cc
+++ b/lib/Init.cc
@@ -144,6 +144,10 @@ void GridParseLayout(char **argv,int argc,
  }
  if( GridCmdOptionExists(argv,argv+argc,"--threads") ){
    std::vector<int> ompthreads(0);
 #ifndef GRID_OMP
    std::cout << GridLogWarning << "'--threads' option used but Grid was"
              << " not compiled with thread support" << std::endl;
 #endif
    arg= GridCmdOptionPayload(argv,argv+argc,"--threads");
    GridCmdOptionIntVector(arg,ompthreads);
    assert(ompthreads.size()==1);
@@ -187,9 +191,10 @@ void Grid_init(int *argc,char ***argv)
    std::cout<<GridLogMessage<<"--debug-stdout  : print stdout from EVERY node"<<std::endl;    
    std::cout<<GridLogMessage<<"--decomposition : report on default omp,mpi and simd decomposition"<<std::endl;    
    std::cout<<GridLogMessage<<"--mpi n.n.n.n   : default MPI decomposition"<<std::endl;    
-    std::cout<<GridLogMessage<<"--omp n         : default number of OMP threads"<<std::endl;    
+    std::cout<<GridLogMessage<<"--threads n     : default number of OMP threads"<<std::endl;
    std::cout<<GridLogMessage<<"--grid n.n.n.n  : default Grid size"<<std::endl;    
    std::cout<<GridLogMessage<<"--log list      : comma separted list of streams from Error,Warning,Message,Performance,Iterative,Integrator,Debug"<<std::endl;
    exit(EXIT_SUCCESS);
  }
  if( GridCmdOptionExists(*argv,*argv+*argc,"--log") ){
--- a/lib/Log.h
+++ b/lib/Log.h
@@ -69,9 +69,9 @@ public:
            StopWatch.Stop();
            GridTime now = StopWatch.Elapsed();
            StopWatch.Start();
-            stream << BLACK<< log.topName << BLACK<< " : ";
+            stream << BLACK <<std::setw(8) << std::left << log.topName << BLACK<< " : ";
-            stream << log.COLOUR <<std::setw(10) << std::left << log.name << BLACK << " : ";
+            stream << log.COLOUR <<std::setw(11)  << log.name << BLACK << " : ";
-	    stream << YELLOW<< now <<BLACK << " : " ;
+            stream << YELLOW <<std::setw(6) << now <<BLACK << " : " ;
            stream << log.COLOUR;
            return stream;
        } else { 
--- a/lib/Old/Endeavour.tgz
+++ b/lib/Old/Endeavour.tgz
--- a/lib/algorithms/iterative/ConjugateGradient.h
+++ b/lib/algorithms/iterative/ConjugateGradient.h
@@ -84,7 +84,7 @@ public:
 	return;
      }
-      std::cout<<GridLogIterative << std::setprecision(4)<< "ConjugateGradient: k=0 residual "<<cp<<" rsq"<<rsq<<std::endl;
+      std::cout<<GridLogIterative << std::setprecision(4)<< "ConjugateGradient: k=0 residual "<<cp<<" target "<<rsq<<std::endl;
      GridStopWatch LinalgTimer;
      GridStopWatch MatrixTimer;
--- a/lib/algorithms/iterative/SchurRedBlack.h
+++ b/lib/algorithms/iterative/SchurRedBlack.h
@@ -102,6 +102,8 @@ namespace Grid {
      pickCheckerboard(Even,src_e,in);
      pickCheckerboard(Odd ,src_o,in);
      pickCheckerboard(Even,sol_e,out);
      pickCheckerboard(Odd ,sol_o,out);
      /////////////////////////////////////////////////////
      // src_o = Mdag * (source_o - Moe MeeInv source_e)
--- a/lib/lattice/Lattice_base.h
+++ b/lib/lattice/Lattice_base.h
@@ -55,7 +55,13 @@ extern int GridCshiftPermuteMap[4][16];
 // Basic expressions used in Expression Template
 ////////////////////////////////////////////////
-class LatticeBase {};
+class LatticeBase
 {
 public:
    virtual ~LatticeBase(void) = default;
    GridBase *_grid;
 };
 class LatticeExpressionBase {};
 template<class T> using Vector = std::vector<T,alignedAllocator<T> >;               // Aligned allocator??
@@ -88,8 +94,6 @@ template<class vobj>
 class Lattice : public LatticeBase
 {
 public:
    GridBase *_grid;
    int checkerboard;
    Vector<vobj> _odata;
@@ -177,8 +181,8 @@ PARALLEL_FOR_LOOP
  }
  //GridFromExpression is tricky to do
  template<class Op,class T1>
-    Lattice(const LatticeUnaryExpression<Op,T1> & expr):    _grid(nullptr){
+    Lattice(const LatticeUnaryExpression<Op,T1> & expr) {
-
+    _grid = nullptr;
    GridFromExpression(_grid,expr);
    assert(_grid!=nullptr);
@@ -199,7 +203,8 @@ PARALLEL_FOR_LOOP
    }
  };
  template<class Op,class T1, class T2>
-  Lattice(const LatticeBinaryExpression<Op,T1,T2> & expr):    _grid(nullptr){
+  Lattice(const LatticeBinaryExpression<Op,T1,T2> & expr) {
    _grid = nullptr;
    GridFromExpression(_grid,expr);
    assert(_grid!=nullptr);
@@ -220,7 +225,8 @@ PARALLEL_FOR_LOOP
    }
  };
  template<class Op,class T1, class T2, class T3>
-  Lattice(const LatticeTrinaryExpression<Op,T1,T2,T3> & expr):    _grid(nullptr){
+  Lattice(const LatticeTrinaryExpression<Op,T1,T2,T3> & expr) {
    _grid = nullptr;
    GridFromExpression(_grid,expr);
    assert(_grid!=nullptr);
@@ -240,7 +246,8 @@ PARALLEL_FOR_LOOP
    // Constructor requires "grid" passed.
    // what about a default grid?
    //////////////////////////////////////////////////////////////////
-    Lattice(GridBase *grid) : _grid(grid), _odata(_grid->oSites()) {
+    Lattice(GridBase *grid) : _odata(grid->oSites()) {
        _grid = grid;
    //        _odata.reserve(_grid->oSites());
    //        _odata.resize(_grid->oSites());
    //      std::cout << "Constructing lattice object with Grid pointer "<<_grid<<std::endl;
@@ -248,6 +255,8 @@ PARALLEL_FOR_LOOP
        checkerboard=0;
    }
    virtual ~Lattice(void) = default;
    template<class sobj> strong_inline Lattice<vobj> & operator = (const sobj & r){
 PARALLEL_FOR_LOOP
        for(int ss=0;ss<_grid->oSites();ss++){
--- a/lib/lattice/Lattice_reduction.h
+++ b/lib/lattice/Lattice_reduction.h
@@ -152,7 +152,7 @@ template<class vobj> inline void sliceSum(const Lattice<vobj> &Data,std::vector<
  assert(grid!=NULL);
  // FIXME
-  std::cout<<GridLogMessage<<"WARNING ! SliceSum is unthreaded "<<grid->SumArraySize()<<" threads "<<std::endl;
+  // std::cout<<GridLogMessage<<"WARNING ! SliceSum is unthreaded "<<grid->SumArraySize()<<" threads "<<std::endl;
  const int    Nd = grid->_ndimension;
  const int Nsimd = grid->Nsimd();
--- a/lib/lattice/Lattice_transfer.h
+++ b/lib/lattice/Lattice_transfer.h
@@ -325,6 +325,126 @@ inline void blockPromote(const Lattice<iVector<CComplex,nbasis > > &coarseData,
 }
 // Useful for precision conversion, or indeed anything where an operator= does a conversion on scalars.
 // Simd layouts need not match since we use peek/poke Local
 template<class vobj,class vvobj>
 void localConvert(const Lattice<vobj> &in,Lattice<vvobj> &out)
 {
  typedef typename vobj::scalar_object sobj;
  typedef typename vvobj::scalar_object ssobj;
  sobj s;
  ssobj ss;
  GridBase *ig = in._grid;
  GridBase *og = out._grid;
  int ni = ig->_ndimension;
  int no = og->_ndimension;
  assert(ni == no);
  for(int d=0;d<no;d++){
    assert(ig->_processors[d]  == og->_processors[d]);
    assert(ig->_ldimensions[d] == og->_ldimensions[d]);
  }
 PARALLEL_FOR_LOOP
  for(int idx=0;idx<ig->lSites();idx++){
    std::vector<int> lcoor(ni);
    ig->LocalIndexToLocalCoor(idx,lcoor);
    peekLocalSite(s,in,lcoor);
    ss=s;
    pokeLocalSite(ss,out,lcoor);
  }
 }
 template<class vobj>
 void InsertSlice(Lattice<vobj> &lowDim,Lattice<vobj> & higherDim,int slice, int orthog)
 {
  typedef typename vobj::scalar_object sobj;
  sobj s;
  GridBase *lg = lowDim._grid;
  GridBase *hg = higherDim._grid;
  int nl = lg->_ndimension;
  int nh = hg->_ndimension;
  assert(nl+1 == nh);
  assert(orthog<nh);
  assert(orthog>=0);
  assert(hg->_processors[orthog]==1);
  int dl; dl = 0;
  for(int d=0;d<nh;d++){
    if ( d != orthog) {
      assert(lg->_processors[dl]  == hg->_processors[d]);
      assert(lg->_ldimensions[dl] == hg->_ldimensions[d]);
      dl++;
    }
  }
  // the above should guarantee that the operations are local
 PARALLEL_FOR_LOOP
  for(int idx=0;idx<lg->lSites();idx++){
    std::vector<int> lcoor(nl);
    std::vector<int> hcoor(nh);
    lg->LocalIndexToLocalCoor(idx,lcoor);
    dl=0;
    hcoor[orthog] = slice;
    for(int d=0;d<nh;d++){
      if ( d!=orthog ) { 
 	hcoor[d]=lcoor[dl++];
      }
    }
    peekLocalSite(s,lowDim,lcoor);
    pokeLocalSite(s,higherDim,hcoor);
  }
 }
 template<class vobj>
 void ExtractSlice(Lattice<vobj> &lowDim, Lattice<vobj> & higherDim,int slice, int orthog)
 {
  typedef typename vobj::scalar_object sobj;
  sobj s;
  GridBase *lg = lowDim._grid;
  GridBase *hg = higherDim._grid;
  int nl = lg->_ndimension;
  int nh = hg->_ndimension;
  assert(nl+1 == nh);
  assert(orthog<nh);
  assert(orthog>=0);
  assert(hg->_processors[orthog]==1);
  int dl; dl = 0;
  for(int d=0;d<nh;d++){
    if ( d != orthog) {
      assert(lg->_processors[dl]  == hg->_processors[d]);
      assert(lg->_ldimensions[dl] == hg->_ldimensions[d]);
      dl++;
    }
  }
  // the above should guarantee that the operations are local
 PARALLEL_FOR_LOOP
  for(int idx=0;idx<lg->lSites();idx++){
    std::vector<int> lcoor(nl);
    std::vector<int> hcoor(nh);
    lg->LocalIndexToLocalCoor(idx,lcoor);
    dl=0;
    hcoor[orthog] = slice;
    for(int d=0;d<nh;d++){
      if ( d!=orthog ) { 
 	hcoor[d]=lcoor[dl++];
      }
    }
    peekLocalSite(s,higherDim,hcoor);
    pokeLocalSite(s,lowDim,lcoor);
  }
 }
 template<class vobj>
 void Replicate(Lattice<vobj> &coarse,Lattice<vobj> & fine)
--- a/lib/parallelIO/NerscIO.h
+++ b/lib/parallelIO/NerscIO.h
@@ -213,37 +213,38 @@ class NerscIO : public BinaryIO {
  static inline void truncate(std::string file){
    std::ofstream fout(file,std::ios::out);
  }
  #define dump_nersc_header(field, s)\
  s << "BEGIN_HEADER"      << std::endl;\
  s << "HDR_VERSION = "    << field.hdr_version    << std::endl;\
  s << "DATATYPE = "       << field.data_type      << std::endl;\
  s << "STORAGE_FORMAT = " << field.storage_format << std::endl;\
  for(int i=0;i<4;i++){\
    s << "DIMENSION_" << i+1 << " = " << field.dimension[i] << std::endl ;\
  }\
  s << "LINK_TRACE = " << std::setprecision(10) << field.link_trace << std::endl;\
  s << "PLAQUETTE  = " << std::setprecision(10) << field.plaquette  << std::endl;\
  for(int i=0;i<4;i++){\
    s << "BOUNDARY_"<<i+1<<" = " << field.boundary[i] << std::endl;\
  }\
  \
  s << "CHECKSUM = "<< std::hex << std::setw(10) << field.checksum << std::dec<<std::endl;\
  s << "ENSEMBLE_ID = "     << field.ensemble_id      << std::endl;\
  s << "ENSEMBLE_LABEL = "  << field.ensemble_label   << std::endl;\
  s << "SEQUENCE_NUMBER = " << field.sequence_number  << std::endl;\
  s << "CREATOR = "         << field.creator          << std::endl;\
  s << "CREATOR_HARDWARE = "<< field.creator_hardware << std::endl;\
  s << "CREATION_DATE = "   << field.creation_date    << std::endl;\
  s << "ARCHIVE_DATE = "    << field.archive_date     << std::endl;\
  s << "FLOATING_POINT = "  << field.floating_point   << std::endl;\
  s << "END_HEADER"         << std::endl;
  static inline unsigned int writeHeader(NerscField &field,std::string file)
  {
    std::ofstream fout(file,std::ios::out|std::ios::in);
    fout.seekp(0,std::ios::beg);
-    fout << "BEGIN_HEADER"      << std::endl;
+    dump_nersc_header(field, fout);
    fout << "HDR_VERSION = "    << field.hdr_version    << std::endl;
    fout << "DATATYPE = "       << field.data_type      << std::endl;
    fout << "STORAGE_FORMAT = " << field.storage_format << std::endl;
    for(int i=0;i<4;i++){
      fout << "DIMENSION_" << i+1 << " = " << field.dimension[i] << std::endl ;
    }
    // just to keep the space and write it later
    fout << "LINK_TRACE = " << std::setprecision(10) << field.link_trace << std::endl;
    fout << "PLAQUETTE  = " << std::setprecision(10) << field.plaquette  << std::endl;
    for(int i=0;i<4;i++){
      fout << "BOUNDARY_"<<i+1<<" = " << field.boundary[i] << std::endl;
    }
    fout << "CHECKSUM = "<< std::hex << std::setw(10) << field.checksum << std::dec<<std::endl;
    fout << "ENSEMBLE_ID = "     << field.ensemble_id      << std::endl;
    fout << "ENSEMBLE_LABEL = "  << field.ensemble_label   << std::endl;
    fout << "SEQUENCE_NUMBER = " << field.sequence_number  << std::endl;
    fout << "CREATOR = "         << field.creator          << std::endl;
    fout << "CREATOR_HARDWARE = "<< field.creator_hardware << std::endl;
    fout << "CREATION_DATE = "   << field.creation_date    << std::endl;
    fout << "ARCHIVE_DATE = "    << field.archive_date     << std::endl;
    fout << "FLOATING_POINT = "  << field.floating_point   << std::endl;
    fout << "END_HEADER"         << std::endl;
    field.data_start = fout.tellp();
    return field.data_start;
 }
--- a/lib/qcd/spin/Dirac.cc
+++ b/lib/qcd/spin/Dirac.cc
@@ -61,6 +61,31 @@ namespace Grid {
      "         "
    };
    SpinMatrix makeGammaProd(const unsigned int i)
    {
      SpinMatrix g;
      g = 1.;
      if (i & 0x1)
      {
        g = g*Gamma(Gamma::GammaMatrix::GammaX);
      }
      if (i & 0x2)
      {
        g = g*Gamma(Gamma::GammaMatrix::GammaY);
      }
      if (i & 0x4)
      {
        g = g*Gamma(Gamma::GammaMatrix::GammaZ);
      }
      if (i & 0x8)
      {
        g = g*Gamma(Gamma::GammaMatrix::GammaT);
      }
      return g;
    }
    //    void sprojMul( vHalfSpinColourVector &out,vColourMatrix &u, vSpinColourVector &in){
    //      vHalfSpinColourVector hspin;
    //      spProjXp(hspin,in);
--- a/lib/qcd/spin/Dirac.h
+++ b/lib/qcd/spin/Dirac.h
@@ -83,6 +83,9 @@ namespace QCD {
  };
    // Make gamma products (Chroma convention)
    SpinMatrix makeGammaProd(const unsigned int i);
    /* Gx
     *  0 0  0  i    
     *  0 0  i  0    
--- a/lib/serialisation/BaseIO.h
+++ b/lib/serialisation/BaseIO.h
@@ -49,6 +49,24 @@ namespace Grid {
    return v;
  }
  // output to streams for vectors
  template < class T >
  inline std::ostream & operator<<(std::ostream &os, const std::vector<T> &v)
  {
    os << "[";
    for (auto &x: v)
    {
      os << x << " ";
    }
    if (v.size() > 0)
    {
      os << "\b";
    }
    os << "]";
    return os;
  }
  class Serializable {};
  // static polymorphism implemented using CRTP idiom
@@ -154,23 +172,6 @@ namespace Grid {
    r.read(s, output);
  }
  template < class T >
  inline std::ostream& operator << (std::ostream& os, const std::vector<T>& v)
  {
    os << "[";
    for (auto &x: v)
    {
      os << x << " ";
    }
    if (v.size() > 0)
    {
      os << "\b";
    }
    os << "]";
    return os;
  }
  // Writer template implementation ////////////////////////////////////////////
  template <typename T>
  Writer<T>::Writer(void)
--- a/lib/simd/Grid_empty.h
+++ b/lib/simd/Grid_empty.h
@@ -379,6 +379,54 @@ namespace Optimization {
    void permute(vtype &a, vtype b, int perm) {
   };
  struct Rotate{
    static inline u128f rotate(u128f in,int n){
      u128f out;
      switch(n){
      case 0:
        out.f[0] = in.f[0];
        out.f[1] = in.f[1];
        out.f[2] = in.f[2];
        out.f[3] = in.f[3];
        break;
      case 1:
        out.f[0] = in.f[1];
        out.f[1] = in.f[2];
        out.f[2] = in.f[3];
        out.f[3] = in.f[0];
        break;
      case 2:
        out.f[0] = in.f[2];
        out.f[1] = in.f[3];
        out.f[2] = in.f[0];
        out.f[3] = in.f[1];
        break;
      case 3:
        out.f[0] = in.f[3];
        out.f[1] = in.f[0];
        out.f[2] = in.f[1];
        out.f[3] = in.f[2];
        break;
      default: assert(0);
      }
    }
    static inline u128d rotate(u128d in,int n){
      u128d out;
      switch(n){
      case 0:
        out.f[0] = in.f[0];
        out.f[1] = in.f[1];
        break;
      case 1:
        out.f[0] = in.f[1];
        out.f[1] = in.f[0];
        break;
      default: assert(0);
      }
    }
  };
  //Complex float Reduce
  template<>
  inline Grid::ComplexF Reduce<Grid::ComplexF, u128f>::operator()(u128f in){ //2 complex