From b32c14b433719ef0acb12a8a0e96478705f6dee7 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Wed, 22 Apr 2015 22:46:48 +0100 Subject: [PATCH] Got the NERSC IO working and fixed a bug in cshift. --- configure | 22 + configure.ac | 2 + lib/Grid.h | 1 + lib/Grid_communicator.h | 7 +- lib/Grid_comparison.h | 24 +- lib/Grid_config.h | 6 + lib/Grid_config.h.in | 6 + lib/Grid_lattice.h | 12 +- lib/cartesian/Grid_cartesian_base.h | 41 +- lib/communicator/Grid_communicator_fake.cc | 1 + lib/communicator/Grid_communicator_mpi.cc | 3 + lib/cshift/Grid_cshift_mpi.h | 8 +- lib/lattice/Grid_lattice_coordinate.h | 9 +- lib/lattice/Grid_lattice_peekpoke.h | 84 +++- lib/lattice/Grid_lattice_reduction.h | 30 ++ lib/math/Grid_math_tensors.h | 41 +- lib/math/Grid_math_traits.h | 14 + lib/parallelIO/GridNerscIO.h | 524 +++++++++++++++++++++ lib/qcd/Grid_QCD.h | 4 +- tests/Grid_cshift.cc | 84 ++++ tests/Grid_nersc_io.cc | 62 +++ tests/Makefile.am | 8 +- 22 files changed, 925 insertions(+), 68 deletions(-) create mode 100644 lib/parallelIO/GridNerscIO.h create mode 100644 tests/Grid_cshift.cc create mode 100644 tests/Grid_nersc_io.cc diff --git a/configure b/configure index fc0b6453..48fafcf1 100755 --- a/configure +++ b/configure @@ -4999,6 +4999,28 @@ _ACEOF fi done +for ac_func in ntohll +do : + ac_fn_c_check_func "$LINENO" "ntohll" "ac_cv_func_ntohll" +if test "x$ac_cv_func_ntohll" = xyes; then : + cat >>confdefs.h <<_ACEOF +#define HAVE_NTOHLL 1 +_ACEOF + +fi +done + +for ac_func in be64toh +do : + ac_fn_c_check_func "$LINENO" "be64toh" "ac_cv_func_be64toh" +if test "x$ac_cv_func_be64toh" = xyes; then : + cat >>confdefs.h <<_ACEOF +#define HAVE_BE64TOH 1 +_ACEOF + +fi +done + # Check whether --enable-simd was given. if test "${enable_simd+set}" = set; then : diff --git a/configure.ac b/configure.ac index 97fdbf85..21f40f6c 100644 --- a/configure.ac +++ b/configure.ac @@ -25,6 +25,8 @@ AC_TYPE_UINT64_T # Checks for library functions. AC_CHECK_FUNCS([gettimeofday]) +AC_CHECK_FUNCS([ntohll]) +AC_CHECK_FUNCS([be64toh]) AC_ARG_ENABLE([simd],[AC_HELP_STRING([--enable-simd=SSE|AVX|AVX2|AVX512],[Select instructions])],[ac_SIMD=${enable_simd}],[ac_SIMD=AVX2]) diff --git a/lib/Grid.h b/lib/Grid.h index c456bf8a..7e12490a 100644 --- a/lib/Grid.h +++ b/lib/Grid.h @@ -51,6 +51,7 @@ #include #include #include +#include namespace Grid { diff --git a/lib/Grid_communicator.h b/lib/Grid_communicator.h index 845c0123..f5baa413 100644 --- a/lib/Grid_communicator.h +++ b/lib/Grid_communicator.h @@ -35,6 +35,7 @@ class CartesianCommunicator { // Grid information queries ///////////////////////////////// int IsBoss(void) { return _processor==0; }; + int BossRank(void) { return 0; }; int ThisRank(void) { return _processor; }; const std::vector & ThisProcessorCoor(void) { return _processor_coor; }; const std::vector & ProcessorGrid(void) { return _processors; }; @@ -49,6 +50,8 @@ class CartesianCommunicator { void GlobalSum(RealD &); void GlobalSumVector(RealD *,int N); + void GlobalSum(uint32_t &); + void GlobalSum(ComplexF &c) { GlobalSumVector((float *)&c,2); @@ -68,12 +71,10 @@ class CartesianCommunicator { } template void GlobalSum(obj &o){ - typedef typename obj::scalar_type scalar_type; int words = sizeof(obj)/sizeof(scalar_type); - scalar_type * ptr = (scalar_type *)& o; - GlobalSum(ptr,words); + GlobalSumVector(ptr,words); } //////////////////////////////////////////////////////////// // Face exchange diff --git a/lib/Grid_comparison.h b/lib/Grid_comparison.h index b91f6f91..530817c6 100644 --- a/lib/Grid_comparison.h +++ b/lib/Grid_comparison.h @@ -8,42 +8,42 @@ namespace Grid { public: vInteger operator()(const lobj &lhs, const robj &rhs) { - return lhs == rhs; + return TensorRemove(lhs) == TensorRemove(rhs); } }; template class vne { public: vInteger operator()(const lobj &lhs, const robj &rhs) { - return lhs != rhs; + return TensorRemove(lhs) != TensorRemove(rhs); } }; template class vlt { public: vInteger operator()(const lobj &lhs, const robj &rhs) { - return lhs < rhs; + return TensorRemove(lhs) < TensorRemove(rhs); } }; template class vle { public: vInteger operator()(const lobj &lhs, const robj &rhs) { - return lhs <= rhs; + return TensorRemove(lhs) <= TensorRemove(rhs); } }; template class vgt { public: vInteger operator()(const lobj &lhs, const robj &rhs) { - return lhs > rhs; + return TensorRemove(lhs) > TensorRemove(rhs); } }; template class vge { public: vInteger operator()(const lobj &lhs, const robj &rhs) { - return lhs >= rhs; + return TensorRemove(lhs) >= TensorRemove(rhs); } }; @@ -52,42 +52,42 @@ namespace Grid { public: Integer operator()(const lobj &lhs, const robj &rhs) { - return lhs == rhs; + return TensorRemove(lhs) == TensorRemove(rhs); } }; template class sne { public: Integer operator()(const lobj &lhs, const robj &rhs) { - return lhs != rhs; + return TensorRemove(lhs) != TensorRemove(rhs); } }; template class slt { public: Integer operator()(const lobj &lhs, const robj &rhs) { - return lhs < rhs; + return TensorRemove(lhs) < TensorRemove(rhs); } }; template class sle { public: Integer operator()(const lobj &lhs, const robj &rhs) { - return lhs <= rhs; + return TensorRemove(lhs) <= TensorRemove(rhs); } }; template class sgt { public: Integer operator()(const lobj &lhs, const robj &rhs) { - return lhs > rhs; + return TensorRemove(lhs) > TensorRemove(rhs); } }; template class sge { public: Integer operator()(const lobj &lhs, const robj &rhs) { - return lhs >= rhs; + return TensorRemove(lhs) >= TensorRemove(rhs); } }; diff --git a/lib/Grid_config.h b/lib/Grid_config.h index cb676b54..2171ea6b 100644 --- a/lib/Grid_config.h +++ b/lib/Grid_config.h @@ -16,6 +16,9 @@ /* GRID_COMMS_NONE */ /* #undef GRID_COMMS_NONE */ +/* Define to 1 if you have the `be64toh' function. */ +/* #undef HAVE_BE64TOH */ + /* Define to 1 if you have the `gettimeofday' function. */ #define HAVE_GETTIMEOFDAY 1 @@ -31,6 +34,9 @@ /* Define to 1 if you have the header file. */ #define HAVE_MEMORY_H 1 +/* Define to 1 if you have the `ntohll' function. */ +/* #undef HAVE_NTOHLL */ + /* Define to 1 if you have the header file. */ #define HAVE_STDINT_H 1 diff --git a/lib/Grid_config.h.in b/lib/Grid_config.h.in index 467ee6e4..2381525f 100644 --- a/lib/Grid_config.h.in +++ b/lib/Grid_config.h.in @@ -15,6 +15,9 @@ /* GRID_COMMS_NONE */ #undef GRID_COMMS_NONE +/* Define to 1 if you have the `be64toh' function. */ +#undef HAVE_BE64TOH + /* Define to 1 if you have the `gettimeofday' function. */ #undef HAVE_GETTIMEOFDAY @@ -30,6 +33,9 @@ /* Define to 1 if you have the header file. */ #undef HAVE_MEMORY_H +/* Define to 1 if you have the `ntohll' function. */ +#undef HAVE_NTOHLL + /* Define to 1 if you have the header file. */ #undef HAVE_STDINT_H diff --git a/lib/Grid_lattice.h b/lib/Grid_lattice.h index 03bacf2d..7256459c 100644 --- a/lib/Grid_lattice.h +++ b/lib/Grid_lattice.h @@ -37,7 +37,8 @@ public: // what about a default grid? ////////////////////////////////////////////////////////////////// Lattice(GridBase *grid) : _grid(grid) { - _odata.reserve(_grid->oSites()); + // _odata.reserve(_grid->oSites()); + _odata.resize(_grid->oSites()); assert((((uint64_t)&_odata[0])&0xF) ==0); checkerboard=0; } @@ -49,6 +50,15 @@ public: } return *this; } + template inline Lattice & operator = (const Lattice & r){ + conformable(*this,r); + std::cout<<"Lattice operator ="<oSites();ss++){ + this->_odata[ss]=r._odata[ss]; + } + return *this; + } // *=,+=,-= operators inherit behvour from correspond */+/- operation template inline Lattice &operator *=(const T &r) { diff --git a/lib/cartesian/Grid_cartesian_base.h b/lib/cartesian/Grid_cartesian_base.h index 9ee10293..aff9375a 100644 --- a/lib/cartesian/Grid_cartesian_base.h +++ b/lib/cartesian/Grid_cartesian_base.h @@ -6,34 +6,23 @@ namespace Grid{ - class GridRNG ; // Forward declaration; - + ////////////////////////////////////////////////////////////////////// + // Commicator provides information on the processor grid + ////////////////////////////////////////////////////////////////////// + // unsigned long _ndimension; + // std::vector _processors; // processor grid + // int _processor; // linear processor rank + // std::vector _processor_coor; // linear processor rank + ////////////////////////////////////////////////////////////////////// class GridBase : public CartesianCommunicator { + public: // Give Lattice access template friend class Lattice; - GridRNG *_rng; - GridBase(std::vector & processor_grid) : CartesianCommunicator(processor_grid) {}; - //FIXME - // protected: - // Lattice wide random support. not yet fully implemented. Need seed strategy - // and one generator per site. - // std::default_random_engine generator; - // static std::mt19937 generator( 9 ); - - ////////////////////////////////////////////////////////////////////// - // Commicator provides information on the processor grid - ////////////////////////////////////////////////////////////////////// - // unsigned long _ndimension; - // std::vector _processors; // processor grid - // int _processor; // linear processor rank - // std::vector _processor_coor; // linear processor rank - ////////////////////////////////////////////////////////////////////// - // Physics Grid information. std::vector _simd_layout;// Which dimensions get relayed out over simd lanes. std::vector _fdimensions;// Global dimensions of array prior to cb removal @@ -101,6 +90,13 @@ public: for(int d=0;d<_ndimension;d++) idx+=_ostride[d]*ocoor[d]; return idx; } + inline void CoorFromIndex (std::vector& coor,int index,std::vector &dims){ + coor.resize(_ndimension); + for(int d=0;d<_ndimension;d++){ + coor[d] = index % dims[d]; + index = index / dims[d]; + } + } inline void oCoorFromOindex (std::vector& coor,int Oindex){ coor.resize(_ndimension); for(int d=0;d<_ndimension;d++){ @@ -146,6 +142,7 @@ public: inline int lSites(void) { return _isites*_osites; }; inline int gSites(void) { return _isites*_osites*_Nprocessors; }; inline int Nd (void) { return _ndimension;}; + inline const std::vector &FullDimensions(void) { return _fdimensions;}; inline const std::vector &GlobalDimensions(void) { return _gdimensions;}; inline const std::vector &LocalDimensions(void) { return _ldimensions;}; @@ -175,10 +172,10 @@ public: std::vector coor(_ndimension); ProcessorCoorFromRank(rank,coor); - for(int mu=0;mu<_ndimension;mu++) gcoor[mu] = _ldimensions[mu]&coor[mu]; + for(int mu=0;mu<_ndimension;mu++) gcoor[mu] = _ldimensions[mu]*coor[mu]; iCoorFromIindex(coor,i_idx); - for(int mu=0;mu<_ndimension;mu++) gcoor[mu] += _rdimensions[mu]&coor[mu]; + for(int mu=0;mu<_ndimension;mu++) gcoor[mu] += _rdimensions[mu]*coor[mu]; oCoorFromOindex (coor,o_idx); for(int mu=0;mu<_ndimension;mu++) gcoor[mu] += coor[mu]; diff --git a/lib/communicator/Grid_communicator_fake.cc b/lib/communicator/Grid_communicator_fake.cc index 2b94ea4b..5f4a4024 100644 --- a/lib/communicator/Grid_communicator_fake.cc +++ b/lib/communicator/Grid_communicator_fake.cc @@ -17,6 +17,7 @@ CartesianCommunicator::CartesianCommunicator(std::vector &processors) void CartesianCommunicator::GlobalSum(float &){} void CartesianCommunicator::GlobalSumVector(float *,int N){} void CartesianCommunicator::GlobalSum(double &){} +void CartesianCommunicator::GlobalSum(uint32_t &){} void CartesianCommunicator::GlobalSumVector(double *,int N){} // Basic Halo comms primitive diff --git a/lib/communicator/Grid_communicator_mpi.cc b/lib/communicator/Grid_communicator_mpi.cc index dba5f039..bd53bd9c 100644 --- a/lib/communicator/Grid_communicator_mpi.cc +++ b/lib/communicator/Grid_communicator_mpi.cc @@ -28,6 +28,9 @@ CartesianCommunicator::CartesianCommunicator(std::vector &processors) assert(Size==_Nprocessors); } +void CartesianCommunicator::GlobalSum(uint32_t &u){ + MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_SUM,communicator); +} void CartesianCommunicator::GlobalSum(float &f){ MPI_Allreduce(MPI_IN_PLACE,&f,1,MPI_FLOAT,MPI_SUM,communicator); } diff --git a/lib/cshift/Grid_cshift_mpi.h b/lib/cshift/Grid_cshift_mpi.h index c078c7d8..60894ef3 100644 --- a/lib/cshift/Grid_cshift_mpi.h +++ b/lib/cshift/Grid_cshift_mpi.h @@ -188,8 +188,8 @@ template void Cshift_comms_simd(Lattice &ret,Lattice &r if ( comm_any ) { - for(int i=0;i void Cshift_comms_simd(Lattice &ret,Lattice &r for(int i=0;i inline void LatticeCoordinate(Lattice &l,int mu) { + typedef typename iobj::scalar_type scalar_type; + typedef typename iobj::vector_type vector_type; GridBase *grid = l._grid; int Nsimd = grid->iSites(); std::vector gcoor; - std::vector mergebuf(Nsimd); - std::vector mergeptr(Nsimd); - vInteger vI; + std::vector mergebuf(Nsimd); + std::vector mergeptr(Nsimd); + vector_type vI; for(int o=0;ooSites();o++){ for(int i=0;iiSites();i++){ grid->RankIndexToGlobalCoor(grid->ThisRank(),o,i,gcoor); - // grid->RankIndexToGlobalCoor(0,o,i,gcoor); mergebuf[i]=gcoor[mu]; mergeptr[i]=&mergebuf[i]; } diff --git a/lib/lattice/Grid_lattice_peekpoke.h b/lib/lattice/Grid_lattice_peekpoke.h index 41212037..93245016 100644 --- a/lib/lattice/Grid_lattice_peekpoke.h +++ b/lib/lattice/Grid_lattice_peekpoke.h @@ -43,6 +43,7 @@ namespace Grid { } return ret; }; + //////////////////////////////////////////////////////////////////////////////////////////////////// // Poke internal indices of a Lattice object //////////////////////////////////////////////////////////////////////////////////////////////////// @@ -88,26 +89,26 @@ namespace Grid { assert( sizeof(sobj)*Nsimd == sizeof(vobj)); int rank,odx,idx; - grid->GlobalCoorToRankIndex(rank,odx,idx,site); - // Optional to broadcast from node 0. - grid->Broadcast(0,s); + grid->GlobalCoorToRankIndex(rank,odx,idx,site); + grid->Broadcast(grid->BossRank(),s); std::vector buf(Nsimd); std::vector pointers(Nsimd); - for(int i=0;iThisRank() ) { + for(int i=0;i + void peekLocalSite(sobj &s,Lattice &l,std::vector &site){ + + GridBase *grid=l._grid; + + typedef typename vobj::scalar_type scalar_type; + typedef typename vobj::vector_type vector_type; + + int Nsimd = grid->Nsimd(); + + assert( l.checkerboard== l._grid->CheckerBoard(site)); + assert( sizeof(sobj)*Nsimd == sizeof(vobj)); + + int odx,idx; + idx= grid->iIndex(site); + odx= grid->oIndex(site); + + std::vector buf(Nsimd); + std::vector pointers(Nsimd); + for(int i=0;i + void pokeLocalSite(const sobj &s,Lattice &l,std::vector &site){ + + GridBase *grid=l._grid; + + typedef typename vobj::scalar_type scalar_type; + typedef typename vobj::vector_type vector_type; + + int Nsimd = grid->Nsimd(); + + assert( l.checkerboard== l._grid->CheckerBoard(site)); + assert( sizeof(sobj)*Nsimd == sizeof(vobj)); + + int odx,idx; + idx= grid->iIndex(site); + odx= grid->oIndex(site); + + std::vector buf(Nsimd); + std::vector pointers(Nsimd); + for(int i=0;i + inline typename vobj::scalar_object sum(const Lattice &arg){ + + GridBase *grid=arg._grid; + int Nsimd = grid->Nsimd(); + + typedef typename vobj::scalar_object sobj; + typedef typename vobj::scalar_type scalar_type; + + vobj vsum; + sobj ssum; + + vsum=zero; + ssum=zero; + for(int ss=0;ssoSites(); ss++){ + vsum = vsum + arg._odata[ss]; + } + + std::vector buf(Nsimd); + std::vector pointers(Nsimd); + for(int i=0;iGlobalSum(ssum); + + return ssum; + } + } #endif diff --git a/lib/math/Grid_math_tensors.h b/lib/math/Grid_math_tensors.h index 98b9d22e..0ee30970 100644 --- a/lib/math/Grid_math_tensors.h +++ b/lib/math/Grid_math_tensors.h @@ -13,10 +13,12 @@ template class iScalar public: vtype _internal; - typedef typename GridTypeMapper::scalar_type scalar_type; + typedef typename GridTypeMapper::scalar_type scalar_type; typedef typename GridTypeMapper::vector_type vector_type; typedef typename GridTypeMapper::tensor_reduced tensor_reduced_v; typedef iScalar tensor_reduced; + typedef typename GridTypeMapper::scalar_object recurse_scalar_object; + typedef iScalar scalar_object; enum { TensorLevel = GridTypeMapper::TensorLevel + 1}; @@ -27,7 +29,7 @@ public: iScalar(scalar_type s) : _internal(s) {};// recurse down and hit the constructor for vector_type - iScalar(Zero &z){ *this = zero; }; + iScalar(const Zero &z){ *this = zero; }; iScalar & operator= (const Zero &hero){ zeroit(*this); @@ -73,11 +75,18 @@ public: operator ComplexD () const { return(TensorRemove(_internal)); }; operator RealD () const { return(real(TensorRemove(_internal))); } + + template::notvalue, T>::type* = nullptr > inline auto operator = (T arg) -> iScalar + { + _internal = arg; + return *this; + } + }; /////////////////////////////////////////////////////////// // Allows to turn scalar>>> back to double. /////////////////////////////////////////////////////////// -template inline typename std::enable_if::notvalue, T>::type TensorRemove(T arg) { return arg;} +template inline typename std::enable_if::notvalue, T>::type TensorRemove(T arg) { return arg;} template inline auto TensorRemove(iScalar arg) -> decltype(TensorRemove(arg._internal)) { return TensorRemove(arg._internal); @@ -91,14 +100,17 @@ public: typedef typename GridTypeMapper::scalar_type scalar_type; typedef typename GridTypeMapper::vector_type vector_type; typedef typename GridTypeMapper::tensor_reduced tensor_reduced_v; + typedef typename GridTypeMapper::scalar_object recurse_scalar_object; + typedef iScalar tensor_reduced; + typedef iVector scalar_object; + enum { TensorLevel = GridTypeMapper::TensorLevel + 1}; - typedef iScalar tensor_reduced; - iVector(Zero &z){ *this = zero; }; + iVector(const Zero &z){ *this = zero; }; iVector() {};// Empty constructure - iVector & operator= (Zero &hero){ + iVector & operator= (const Zero &hero){ zeroit(*this); return *this; } @@ -153,18 +165,27 @@ public: typedef typename GridTypeMapper::scalar_type scalar_type; typedef typename GridTypeMapper::vector_type vector_type; - typedef typename GridTypeMapper::tensor_reduced tensor_reduced_v; + typedef typename GridTypeMapper::scalar_object recurse_scalar_object; + typedef iScalar tensor_reduced; + typedef iMatrix scalar_object; enum { TensorLevel = GridTypeMapper::TensorLevel + 1}; - typedef iScalar tensor_reduced; - iMatrix(Zero &z){ *this = zero; }; + iMatrix(const Zero &z){ *this = zero; }; iMatrix() {}; - iMatrix & operator= (Zero &hero){ + iMatrix & operator= (const Zero &hero){ zeroit(*this); return *this; } + template::notvalue, T>::type* = nullptr > inline auto operator = (T arg) -> iMatrix + { + zeroit(*this); + for(int i=0;i &that){ for(int i=0;i class GridTypeMapper { @@ -43,6 +45,7 @@ namespace Grid { typedef RealD scalar_type; typedef RealD vector_type; typedef RealD tensor_reduced; + typedef RealD scalar_object; enum { TensorLevel = 0 }; }; template<> class GridTypeMapper { @@ -50,6 +53,7 @@ namespace Grid { typedef ComplexF scalar_type; typedef ComplexF vector_type; typedef ComplexF tensor_reduced; + typedef ComplexF scalar_object; enum { TensorLevel = 0 }; }; template<> class GridTypeMapper { @@ -57,6 +61,7 @@ namespace Grid { typedef ComplexD scalar_type; typedef ComplexD vector_type; typedef ComplexD tensor_reduced; + typedef ComplexD scalar_object; enum { TensorLevel = 0 }; }; @@ -65,6 +70,7 @@ namespace Grid { typedef RealF scalar_type; typedef vRealF vector_type; typedef vRealF tensor_reduced; + typedef RealF scalar_object; enum { TensorLevel = 0 }; }; template<> class GridTypeMapper { @@ -72,6 +78,7 @@ namespace Grid { typedef RealD scalar_type; typedef vRealD vector_type; typedef vRealD tensor_reduced; + typedef RealD scalar_object; enum { TensorLevel = 0 }; }; template<> class GridTypeMapper { @@ -79,6 +86,7 @@ namespace Grid { typedef ComplexF scalar_type; typedef vComplexF vector_type; typedef vComplexF tensor_reduced; + typedef ComplexF scalar_object; enum { TensorLevel = 0 }; }; template<> class GridTypeMapper { @@ -86,6 +94,7 @@ namespace Grid { typedef ComplexD scalar_type; typedef vComplexD vector_type; typedef vComplexD tensor_reduced; + typedef ComplexD scalar_object; enum { TensorLevel = 0 }; }; template<> class GridTypeMapper { @@ -93,6 +102,7 @@ namespace Grid { typedef Integer scalar_type; typedef vInteger vector_type; typedef vInteger tensor_reduced; + typedef Integer scalar_object; enum { TensorLevel = 0 }; }; @@ -102,6 +112,10 @@ namespace Grid { static const bool notvalue = false; }; + template<> struct isGridTensor { + static const bool value = false; + static const bool notvalue = true; + }; template<> struct isGridTensor { static const bool value = false; static const bool notvalue = true; diff --git a/lib/parallelIO/GridNerscIO.h b/lib/parallelIO/GridNerscIO.h new file mode 100644 index 00000000..51820357 --- /dev/null +++ b/lib/parallelIO/GridNerscIO.h @@ -0,0 +1,524 @@ +#ifndef GRID_NERSC_IO_H +#define GRID_NERSC_IO_H + +#include +#include +#include +#include + +#include + +namespace Grid { + + using namespace QCD; + +//////////////////////////////////////////////////////////////////////////////// +// Some data types for intermediate storage +//////////////////////////////////////////////////////////////////////////////// + template using iLorentzColour2x3 = iVector, 2>, 4 >; +typedef iLorentzColour2x3 LorentzColour2x3; +typedef iLorentzColour2x3 LorentzColour2x3F; +typedef iLorentzColour2x3 LorentzColour2x3D; + +//////////////////////////////////////////////////////////////////////////////// +// header specification/interpretation +//////////////////////////////////////////////////////////////////////////////// +class NerscField { + public: + // header strings (not in order) + int dimension[4]; + std::string boundary[4]; + int data_start; + std::string hdr_version; + std::string storage_format; + // Checks on data + double link_trace; + double plaquette; + uint32_t checksum; + unsigned int sequence_number; + std::string data_type; + std::string ensemble_id ; + std::string ensemble_label ; + std::string creator ; + std::string creator_hardware ; + std::string creation_date ; + std::string archive_date ; + std::string floating_point; +}; + + +//////////////////////////////////////////////////////////////////////////////// +// Write and read from fstream; comput header offset for payload +//////////////////////////////////////////////////////////////////////////////// +inline unsigned int writeNerscHeader(NerscField &field,std::string file) +{ + std::ofstream fout(file,std::ios::out); + + fout.seekp(0,std::ios::beg); + fout << "BEGIN_HEADER" << std::endl; + fout << "HDR_VERSION = " << field.hdr_version << std::endl; + fout << "DATATYPE = " << field.data_type << std::endl; + fout << "STORAGE_FORMAT = " << field.storage_format << std::endl; + + for(int i=0;i<4;i++){ + fout << "DIMENSION_" << i+1 << " = " << field.dimension[i] << std::endl ; + } + // just to keep the space and write it later + fout << "LINK_TRACE = " << std::setprecision(10) << field.link_trace << std::endl; + fout << "PLAQUETTE = " << std::setprecision(10) << field.plaquette << std::endl; + for(int i=0;i<4;i++){ + fout << "BOUNDARY_"< header; + std::string line; + + ////////////////////////////////////////////////// + // read the header + ////////////////////////////////////////////////// + std::ifstream fin(file); + + getline(fin,line); // read one line and insist is + + removeWhitespace(line); + assert(line==std::string("BEGIN_HEADER")); + + do { + getline(fin,line); // read one line + int eq = line.find("="); + if(eq >0) { + std::string key=line.substr(0,eq); + std::string val=line.substr(eq+1); + removeWhitespace(key); + removeWhitespace(val); + + header[key] = val; + } + } while( line.find("END_HEADER") == std::string::npos ); + + field.data_start = fin.tellg(); + + ////////////////////////////////////////////////// + // chomp the values + ////////////////////////////////////////////////// + + field.hdr_version = header[std::string("HDR_VERSION")]; + field.data_type = header[std::string("DATATYPE")]; + field.storage_format = header[std::string("STORAGE_FORMAT")]; + + field.dimension[0] = std::stol(header[std::string("DIMENSION_1")]); + field.dimension[1] = std::stol(header[std::string("DIMENSION_2")]); + field.dimension[2] = std::stol(header[std::string("DIMENSION_3")]); + field.dimension[3] = std::stol(header[std::string("DIMENSION_4")]); + + assert(grid->_ndimension == 4); + for(int d=0;d<4;d++){ + assert(grid->_fdimensions[d]==field.dimension[d]); + } + + field.link_trace = std::stod(header[std::string("LINK_TRACE")]); + field.plaquette = std::stod(header[std::string("PLAQUETTE")]); + + field.boundary[0] = header[std::string("BOUNDARY_1")]; + field.boundary[1] = header[std::string("BOUNDARY_2")]; + field.boundary[2] = header[std::string("BOUNDARY_3")]; + field.boundary[3] = header[std::string("BOUNDARY_4")]; + + field.checksum = std::stoul(header[std::string("CHECKSUM")],0,16); + field.ensemble_id = header[std::string("ENSEMBLE_ID")]; + field.ensemble_label = header[std::string("ENSEMBLE_LABEL")]; + field.sequence_number = std::stol(header[std::string("SEQUENCE_NUMBER")]); + field.creator = header[std::string("CREATOR")]; + field.creator_hardware = header[std::string("CREATOR_HARDWARE")]; + field.creation_date = header[std::string("CREATION_DATE")]; + field.archive_date = header[std::string("ARCHIVE_DATE")]; + field.floating_point = header[std::string("FLOATING_POINT")]; + + return field.data_start; +} + + +////////////////////////////////////////////////////////////////////// +// Utilities +////////////////////////////////////////////////////////////////////// +inline void reconstruct3(LorentzColourMatrix & cm) +{ + const int x=0; + const int y=1; + const int z=2; + for(int mu=0;mu<4;mu++){ + cm(mu)()(2,x) = adj(cm(mu)()(0,y)*cm(mu)()(1,z)-cm(mu)()(0,z)*cm(mu)()(1,y)); //x= yz-zy + cm(mu)()(2,y) = adj(cm(mu)()(0,z)*cm(mu)()(1,x)-cm(mu)()(0,x)*cm(mu)()(1,z)); //y= zx-xz + cm(mu)()(2,z) = adj(cm(mu)()(0,x)*cm(mu)()(1,y)-cm(mu)()(0,y)*cm(mu)()(1,x)); //z= xy-yx + } +} + + + void inline be32toh(void *file_object,uint32_t bytes) + { + uint32_t * f = (uint32_t *)file_object; + for(int i=0;i*sizeof(uint32_t)>8) | ((f&0xFF000000UL)>>24) ; + fp[i] = ntohl(f); + } + } + void inline be64toh(void *file_object,uint32_t bytes) + { + uint64_t * f = (uint64_t *)file_object; + for(int i=0;i*sizeof(uint64_t)>8) | ((f&0xFF000000UL)>>24) ; + g = g << 32; + f = f >> 32; + g|= ((f&0xFF)<<24) | ((f&0xFF00)<<8) | ((f&0xFF0000)>>8) | ((f&0xFF000000UL)>>24) ; + fp[i] = ntohl(g); + } + } + +inline void NerscChecksum(uint32_t *buf,uint32_t buf_size,uint32_t &csum) +{ + for(int i=0;i*sizeof(uint32_t) + struct NerscSimpleMunger{ + void operator() (fobj &in,sobj &out,uint32_t &csum){ + + for(int mu=0;mu<4;mu++){ + for(int i=0;i<3;i++){ + for(int j=0;j<3;j++){ + out(mu)()(i,j) = in(mu)()(i,j); + }}} + + NerscChecksum((uint32_t *)&in,sizeof(in),csum); + }; + }; + template + struct NerscSimpleUnmunger{ + void operator() (sobj &in,fobj &out,uint32_t &csum){ + for(int mu=0;mu<4;mu++){ + for(int i=0;i<3;i++){ + for(int j=0;j<3;j++){ + out(mu)()(i,j) = in(mu)()(i,j); + }}} + NerscChecksum((uint32_t *)&out,sizeof(out),csum); + }; + }; + + template + struct Nersc3x2munger{ + void operator() (fobj &in,sobj &out,uint32_t &csum){ + + NerscChecksum((uint32_t *)&in,sizeof(in),csum); + + for(int mu=0;mu<4;mu++){ + for(int i=0;i<2;i++){ + for(int j=0;j<3;j++){ + out(mu)()(i,j) = in(mu)(i)(j); + }} + } + reconstruct3(out); + } + }; + + template + struct Nersc3x2unmunger{ + + void operator() (sobj &in,fobj &out,uint32_t &csum){ + + NerscChecksum((uint32_t *)&out,sizeof(out),csum); + + for(int mu=0;mu<4;mu++){ + for(int i=0;i<2;i++){ + for(int j=0;j<3;j++){ + out(mu)(i)(j) = in(mu)()(i,j); + }} + } + } +}; + +//////////////////////////////////////////////////////////////////////////// +// Template wizardry to map types to strings for NERSC in an extensible way +//////////////////////////////////////////////////////////////////////////// + template struct NerscDataType { + static void DataType (std::string &str) { str = std::string("4D_BINARY_UNKNOWN"); }; + static void FloatingPoint(std::string &str) { str = std::string("IEEE64BIG"); }; + }; + + template<> struct NerscDataType > { + static void DataType (std::string &str) { str = std::string("4D_SU3_GAUGE_3X3"); }; + static void FloatingPoint(std::string &str) { str = std::string("IEEE64BIG");}; + }; + + template<> struct NerscDataType > { + static void DataType (std::string &str) { str = std::string("4D_SU3_GAUGE_3X3"); }; + static void FloatingPoint(std::string &str) { str = std::string("IEEE32BIG");}; + }; + +////////////////////////////////////////////////////////////////////// +// Bit and Physical Checksumming and QA of data +////////////////////////////////////////////////////////////////////// +/* +template inline uint32_t NerscChecksum(Lattice & data) +{ + uint32_t sum; + for(int ss=0;ssOsites();ss++){ + uint32_t *iptr = (uint32_t *)& data._odata[0] ; + for(int i=0;iglobalSum(sum); + return sum; +} +*/ +template inline void NerscPhysicalCharacteristics(Lattice & data,NerscField &header) +{ + header.data_type = NerscDataType::DataType; + header.floating_point = NerscDataType::FloatingPoint; + return; +} + + template<> inline void NerscPhysicalCharacteristics(LatticeGaugeField & data,NerscField &header) +{ + NerscDataType::DataType(header.data_type); + NerscDataType::FloatingPoint(header.floating_point); + header.link_trace=1.0; + header.plaquette =1.0; +} + +template inline void NerscStatisics(Lattice & data,NerscField &header) +{ + assert(data._grid->_ndimension==4); + + for(int d=0;d<4;d++) + header.dimension[d] = data._grid->_fdimensions[d]; + + // compute checksum and any physical properties contained for this type + // header.checksum = NerscChecksum(data); + + NerscPhysicalCharacteristics(data,header); +} + +///////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// Now the meat: the object readers +///////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +template +inline void readNerscObject(Lattice &Umu,std::string file,munger munge,int offset,std::string &format) +{ + GridBase *grid = Umu._grid; + + int ieee32big = (format == std::string("IEEE32BIG")); + int ieee32 = (format == std::string("IEEE32")); + int ieee64big = (format == std::string("IEEE64BIG")); + int ieee64 = (format == std::string("IEEE64")); + + // Find the location of each site and send to primary node + // for(int site=0; site < Layout::vol(); ++site){ + // multi1d coord = crtesn(site, Layout::lattSize()); + // for(int dd=0; dd_fdimensions[3];t++){ + for(int z=0;z_fdimensions[2];z++){ + for(int y=0;y_fdimensions[1];y++){ + for(int x=0;x_fdimensions[0];x++){ + + std::vector site({x,y,z,t}); + + if ( grid->IsBoss() ) { + fin.read((char *)&file_object,sizeof(file_object)); + + if(ieee32big) be32toh((void *)&file_object,sizeof(file_object)); + if(ieee32) le32toh((void *)&file_object,sizeof(file_object)); + if(ieee64big) be64toh((void *)&file_object,sizeof(file_object)); + if(ieee64) le64toh((void *)&file_object,sizeof(file_object)); + + munge(file_object,munged,csum); + } + // The boss who read the file has their value poked + pokeSite(munged,Umu,site); + }}}} + } +} + +template +inline void writeNerscObject(Lattice &Umu,std::string file,munger munge,int offset, + int sequence,double lt,double pl) +{ + GridBase *grid = Umu._grid; + NerscField header; + + ////////////////////////////////////////////////// + // First write the header; this is in wrong place + ////////////////////////////////////////////////// + assert(grid->_ndimension == 4); + for(int d=0;d<4;d++){ + header.dimension[d]=grid->_fdimensions[d]; + header.boundary [d]=std::string("PERIODIC");; + } + header.hdr_version=std::string("WHATDAHECK"); + // header.storage_format=storage_format::string; // use template specialisation + // header.data_type=data_type::string; + header.storage_format=std::string("debug"); + header.data_type =std::string("debug"); + + //FIXME; use template specialisation to fill these out + header.link_trace =lt; + header.plaquette =pl; + header.checksum =0; + + // + header.sequence_number =sequence; + header.ensemble_id =std::string("UKQCD"); + header.ensemble_label =std::string("UKQCD"); + header.creator =std::string("Tadahito"); + header.creator_hardware=std::string("BlueGene/Q"); + header.creation_date =std::string("AnnoDomini"); + header.archive_date =std::string("AnnoDomini"); + header.floating_point =std::string("IEEE64BIG"); + // header.data_start=; + // unsigned int checksum; + + ////////////////////////////////////////////////// + // Now write the body + ////////////////////////////////////////////////// + { + std::ofstream fout(file,std::ios::binary|std::ios::in); + fout.seekp(offset); + + Umu = zero; + uint32_t csum=0; + fobj file_object; + sobj unmunged; + for(int t=0;t_fdimensions[3];t++){ + for(int z=0;z_fdimensions[2];z++){ + for(int y=0;y_fdimensions[1];y++){ + for(int x=0;x_fdimensions[0];x++){ + std::vector site({x,y,z,t}); + peekSite(unmunged,Umu,site); + munge(unmunged,file_object,csum); + // broadcast & insert + fout.write((char *)&file_object,sizeof(file_object)); + }}}} + } +} + + + +inline void readNerscConfiguration(LatticeGaugeField &Umu,NerscField& header,std::string file) +{ + GridBase *grid = Umu._grid; + + int offset = readNerscHeader(file,Umu._grid,header); + + std::string format(header.floating_point); + + int ieee32big = (format == std::string("IEEE32BIG")); + int ieee32 = (format == std::string("IEEE32")); + int ieee64big = (format == std::string("IEEE64BIG")); + int ieee64 = (format == std::string("IEEE64")); + + // depending on datatype, set up munger; + // munger is a function of + if ( header.data_type == std::string("4D_SU3_GAUGE") ) { + if ( ieee32 || ieee32big ) { + readNerscObject + (Umu,file, + Nersc3x2munger(), + offset,format); + } + if ( ieee64 || ieee64big ) { + readNerscObject + (Umu,file, + Nersc3x2munger(), + offset,format); + } + } else if ( header.data_type == std::string("4D_SU3_GAUGE_3X3") ) { + if ( ieee32 || ieee32big ) { + readNerscObject + (Umu,file,NerscSimpleMunger(),offset,format); + } + if ( ieee64 || ieee64big ) { + readNerscObject + (Umu,file,NerscSimpleMunger(),offset,format); + } + } else { + assert(0); + } + +} + +template +inline void writeNerscConfiguration(Lattice &Umu,NerscField &header,std::string file) +{ + GridBase &grid = Umu._grid; + + NerscStatisics(Umu,header); + + int offset = writeNerscHeader(header,file); + + writeNerscObject(Umu,NerscSimpleMunger(),offset); +} + + +} +#endif diff --git a/lib/qcd/Grid_QCD.h b/lib/qcd/Grid_QCD.h index c405662d..b30b7a67 100644 --- a/lib/qcd/Grid_QCD.h +++ b/lib/qcd/Grid_QCD.h @@ -42,6 +42,8 @@ namespace QCD { typedef iColourMatrix ColourMatrix; typedef iSpinColourMatrix SpinColourMatrix; typedef iLorentzColourMatrix LorentzColourMatrix; + typedef iLorentzColourMatrix LorentzColourMatrixF; + typedef iLorentzColourMatrix LorentzColourMatrixD; typedef iSpinVector SpinVector; typedef iColourVector ColourVector; @@ -66,7 +68,7 @@ namespace QCD { typedef Lattice LatticeReal; typedef Lattice LatticeComplex; - typedef Lattice LatticeInteger; // Predicates for "where" + typedef Lattice LatticeInteger; // Predicates for "where" typedef Lattice LatticeColourMatrix; typedef Lattice LatticeSpinMatrix; diff --git a/tests/Grid_cshift.cc b/tests/Grid_cshift.cc new file mode 100644 index 00000000..3fe2b9cc --- /dev/null +++ b/tests/Grid_cshift.cc @@ -0,0 +1,84 @@ +#include +#include + +using namespace std; +using namespace Grid; +using namespace Grid::QCD; + +int main (int argc, char ** argv) +{ + Grid_init(&argc,&argv); + + std::vector simd_layout({1,1,2,2}); + std::vector mpi_layout ({2,2,2,2}); + std::vector latt_size ({8,8,8,16}); + + GridCartesian Fine(latt_size,simd_layout,mpi_layout); + GridRNG FineRNG(&Fine); + + LatticeComplex U(&Fine); + LatticeComplex ShiftU(&Fine); + + LatticeComplex lex(&Fine); + lex=zero; + Integer stride =1; + { + double nrm; + LatticeComplex coor(&Fine); + + for(int d=0;d<4;d++){ + LatticeCoordinate(coor,d); + lex = lex + coor*stride; + stride=stride*latt_size[d]; + } + U=lex; + } + + TComplex cm; + + for(int dir=0;dir<4;dir++){ + for(int shift=0;shift coor(4); + + for(coor[3]=0;coor[3] scoor(coor); + scoor[dir] = (scoor[dir]+shift)%latt_size[dir]; + + Integer slex = scoor[0] + + latt_size[0]*scoor[1] + + latt_size[0]*latt_size[1]*scoor[2] + + latt_size[0]*latt_size[1]*latt_size[2]*scoor[3]; + + Complex scm(slex); + + nrm = abs(scm-cm()()()); + std::vector peer(4); + int index=real(cm); + Fine.CoorFromIndex(peer,index,latt_size); + + if (nrm > 0){ + cout<<"FAIL shift "<< shift<<" in dir "<< dir<<" ["< +#include + +using namespace std; +using namespace Grid; +using namespace Grid::QCD; + + +int main (int argc, char ** argv) +{ + Grid_init(&argc,&argv); + + std::vector simd_layout({1,1,2,2}); + std::vector mpi_layout ({2,1,1,2}); + std::vector latt_size ({16,16,16,32}); + + GridCartesian Fine(latt_size,simd_layout,mpi_layout); + GridRNG FineRNG(&Fine); + LatticeGaugeField Umu(&Fine); + + std::vector U(4,&Fine); + + NerscField header; + + std::string file("./ckpoint_lat.4000"); + readNerscConfiguration(Umu,header,file); + + for(int mu=0;mu(Umu,mu); + } + + // Painful ; fix syntactical niceness + LatticeComplex LinkTrace(&Fine); + LinkTrace=zero; + for(int mu=0;mu