diff --git a/lib/Init.cc b/lib/Init.cc index fd50c4d5..72d79905 100644 --- a/lib/Init.cc +++ b/lib/Init.cc @@ -144,6 +144,10 @@ void GridParseLayout(char **argv,int argc, } if( GridCmdOptionExists(argv,argv+argc,"--threads") ){ std::vector ompthreads(0); +#ifndef GRID_OMP + std::cout << GridLogWarning << "'--threads' option used but Grid was" + << " not compiled with thread support" << std::endl; +#endif arg= GridCmdOptionPayload(argv,argv+argc,"--threads"); GridCmdOptionIntVector(arg,ompthreads); assert(ompthreads.size()==1); @@ -187,7 +191,7 @@ void Grid_init(int *argc,char ***argv) std::cout< using Vector = std::vector >; // Aligned allocator?? @@ -88,8 +94,6 @@ template class Lattice : public LatticeBase { public: - - GridBase *_grid; int checkerboard; Vector _odata; @@ -177,8 +181,8 @@ PARALLEL_FOR_LOOP } //GridFromExpression is tricky to do template - Lattice(const LatticeUnaryExpression & expr): _grid(nullptr){ - + Lattice(const LatticeUnaryExpression & expr) { + _grid = nullptr; GridFromExpression(_grid,expr); assert(_grid!=nullptr); @@ -199,7 +203,8 @@ PARALLEL_FOR_LOOP } }; template - Lattice(const LatticeBinaryExpression & expr): _grid(nullptr){ + Lattice(const LatticeBinaryExpression & expr) { + _grid = nullptr; GridFromExpression(_grid,expr); assert(_grid!=nullptr); @@ -220,7 +225,8 @@ PARALLEL_FOR_LOOP } }; template - Lattice(const LatticeTrinaryExpression & expr): _grid(nullptr){ + Lattice(const LatticeTrinaryExpression & expr) { + _grid = nullptr; GridFromExpression(_grid,expr); assert(_grid!=nullptr); @@ -240,7 +246,8 @@ PARALLEL_FOR_LOOP // Constructor requires "grid" passed. // what about a default grid? ////////////////////////////////////////////////////////////////// - Lattice(GridBase *grid) : _grid(grid), _odata(_grid->oSites()) { + Lattice(GridBase *grid) : _odata(grid->oSites()) { + _grid = grid; // _odata.reserve(_grid->oSites()); // _odata.resize(_grid->oSites()); // std::cout << "Constructing lattice object with Grid pointer "<<_grid< strong_inline Lattice & operator = (const sobj & r){ PARALLEL_FOR_LOOP for(int ss=0;ss<_grid->oSites();ss++){ diff --git a/lib/lattice/Lattice_transfer.h b/lib/lattice/Lattice_transfer.h index 178b5937..638563a9 100644 --- a/lib/lattice/Lattice_transfer.h +++ b/lib/lattice/Lattice_transfer.h @@ -361,7 +361,7 @@ PARALLEL_FOR_LOOP template -void InsertSlice(const Lattice &lowDim,Lattice & higherDim,int slice, int orthog) +void InsertSlice(Lattice &lowDim,Lattice & higherDim,int slice, int orthog) { typedef typename vobj::scalar_object sobj; sobj s; @@ -374,7 +374,7 @@ void InsertSlice(const Lattice &lowDim,Lattice & higherDim,int slice assert(nl+1 == nh); assert(orthog=0); - assert(hg->_processors[orthog]==0); + assert(hg->_processors[orthog]==1); int dl; dl = 0; for(int d=0;d &lowDim,Lattice & higherDim,int slice } } - // the above should guarantee that the operations are local -PARALLEL_FOR_LOOP - for(int idx=0;idxlSites();idx++){ - std::vector lcoor(nl); - std::vector hcoor(nh); - lg->LocalIndexToLocalCoor(idx,lcoor); - dl=0; - hcoor[orthog] = slice; - for(int d=0;d -void ExtractSlice(Lattice &lowDim,const Lattice & higherDim,int slice, int orthog) -{ - typedef typename vobj::scalar_object sobj; - sobj s; - - GridBase *lg = lowDim._grid; - GridBase *hg = higherDim._grid; - int nl = lg->_ndimension; - int nh = hg->_ndimension; - - assert(nl+1 == nh); - assert(orthog=0); - assert(hg->_processors[orthog]==0); - - int dl; dl = 0; - for(int d=0;d_processors[dl] == hg->_processors[d]); - assert(lg->_ldimensions[dl] == hg->_ldimensions[d]); - dl++; - } - } // the above should guarantee that the operations are local PARALLEL_FOR_LOOP for(int idx=0;idxlSites();idx++){ @@ -443,6 +401,48 @@ PARALLEL_FOR_LOOP peekLocalSite(s,lowDim,lcoor); pokeLocalSite(s,higherDim,hcoor); } +} + +template +void ExtractSlice(Lattice &lowDim, Lattice & higherDim,int slice, int orthog) +{ + typedef typename vobj::scalar_object sobj; + sobj s; + + GridBase *lg = lowDim._grid; + GridBase *hg = higherDim._grid; + int nl = lg->_ndimension; + int nh = hg->_ndimension; + + assert(nl+1 == nh); + assert(orthog=0); + assert(hg->_processors[orthog]==1); + + int dl; dl = 0; + for(int d=0;d_processors[dl] == hg->_processors[d]); + assert(lg->_ldimensions[dl] == hg->_ldimensions[d]); + dl++; + } + } + // the above should guarantee that the operations are local +PARALLEL_FOR_LOOP + for(int idx=0;idxlSites();idx++){ + std::vector lcoor(nl); + std::vector hcoor(nh); + lg->LocalIndexToLocalCoor(idx,lcoor); + dl=0; + hcoor[orthog] = slice; + for(int d=0;d void permute(vtype &a, vtype b, int perm) { - }; + }; + + struct Rotate{ + + static inline u128f rotate(u128f in,int n){ + u128f out; + switch(n){ + case 0: + out.f[0] = in.f[0]; + out.f[1] = in.f[1]; + out.f[2] = in.f[2]; + out.f[3] = in.f[3]; + break; + case 1: + out.f[0] = in.f[1]; + out.f[1] = in.f[2]; + out.f[2] = in.f[3]; + out.f[3] = in.f[0]; + break; + case 2: + out.f[0] = in.f[2]; + out.f[1] = in.f[3]; + out.f[2] = in.f[0]; + out.f[3] = in.f[1]; + break; + case 3: + out.f[0] = in.f[3]; + out.f[1] = in.f[0]; + out.f[2] = in.f[1]; + out.f[3] = in.f[2]; + break; + default: assert(0); + } + } + static inline u128d rotate(u128d in,int n){ + u128d out; + switch(n){ + case 0: + out.f[0] = in.f[0]; + out.f[1] = in.f[1]; + break; + case 1: + out.f[0] = in.f[1]; + out.f[1] = in.f[0]; + break; + default: assert(0); + } + } + }; //Complex float Reduce template<>