diff --git a/Grid/lattice/Lattice_transfer.h b/Grid/lattice/Lattice_transfer.h index 4eca50d6..69c132ed 100644 --- a/Grid/lattice/Lattice_transfer.h +++ b/Grid/lattice/Lattice_transfer.h @@ -487,7 +487,7 @@ void InsertSliceLocal(const Lattice &lowDim, Lattice & higherDim,int template -void ExtractSliceLocal(Lattice &lowDim, Lattice & higherDim,int slice_lo,int slice_hi, int orthog) +void ExtractSliceLocal(Lattice &lowDim,const Lattice & higherDim,int slice_lo,int slice_hi, int orthog) { typedef typename vobj::scalar_object sobj; diff --git a/Grid/serialisation/Hdf5IO.cc b/Grid/serialisation/Hdf5IO.cc index ca2457fa..77396809 100644 --- a/Grid/serialisation/Hdf5IO.cc +++ b/Grid/serialisation/Hdf5IO.cc @@ -61,9 +61,9 @@ Group & Hdf5Writer::getGroup(void) } // Reader implementation /////////////////////////////////////////////////////// -Hdf5Reader::Hdf5Reader(const std::string &fileName) +Hdf5Reader::Hdf5Reader(const std::string &fileName, const bool readOnly) : fileName_(fileName) -, file_(fileName.c_str(), H5F_ACC_RDWR) +, file_(fileName.c_str(), readOnly ? H5F_ACC_RDONLY : H5F_ACC_RDWR) { group_ = file_.openGroup("/"); readSingleAttribute(dataSetThres_, HDF5_GRID_GUARD "dataset_threshold", diff --git a/Grid/serialisation/Hdf5IO.h b/Grid/serialisation/Hdf5IO.h index 1ae2791e..ec26612a 100644 --- a/Grid/serialisation/Hdf5IO.h +++ b/Grid/serialisation/Hdf5IO.h @@ -54,7 +54,7 @@ namespace Grid class Hdf5Reader: public Reader { public: - Hdf5Reader(const std::string &fileName); + Hdf5Reader(const std::string &fileName, const bool readOnly = true); virtual ~Hdf5Reader(void) = default; bool push(const std::string &s); void pop(void); diff --git a/Grid/threads/Threads.h b/Grid/threads/Threads.h index 5e448c44..9041d01e 100644 --- a/Grid/threads/Threads.h +++ b/Grid/threads/Threads.h @@ -47,6 +47,7 @@ Author: paboyle #else #define PARALLEL_FOR_LOOP #define PARALLEL_FOR_LOOP_INTERN +#define PARALLEL_FOR_LOOP_REDUCE(op, var) #define PARALLEL_NESTED_LOOP2 #define PARALLEL_NESTED_LOOP5 #define PARALLEL_REGION @@ -58,6 +59,7 @@ Author: paboyle #define parallel_for_internal PARALLEL_FOR_LOOP_INTERN for #define parallel_for_nest2 PARALLEL_NESTED_LOOP2 for #define parallel_for_nest5 PARALLEL_NESTED_LOOP5 for +#define parallel_critical PARALLEL_CRITICAL namespace Grid { diff --git a/Grid/util/Sha.h b/Grid/util/Sha.h index b0a8cc10..ee164c34 100644 --- a/Grid/util/Sha.h +++ b/Grid/util/Sha.h @@ -28,16 +28,31 @@ extern "C" { #include } +#ifdef USE_IPP +#include "ipp.h" +#endif #pragma once class GridChecksum { public: - static inline uint32_t crc32(void *data,size_t bytes) + static inline uint32_t crc32(const void *data, size_t bytes) { return ::crc32(0L,(unsigned char *)data,bytes); } + +#ifdef USE_IPP + static inline uint32_t crc32c(const void* data, size_t bytes) + { + uint32_t crc32c = ~(uint32_t)0; + ippsCRC32C_8u(reinterpret_cast(data), bytes, &crc32c); + ippsSwapBytes_32u_I(&crc32c, 1); + + return ~crc32c; + } +#endif + template static inline std::string sha256_string(const std::vector &hash) { diff --git a/Hadrons/A2AMatrix.hpp b/Hadrons/A2AMatrix.hpp index 95133f30..e224a95e 100644 --- a/Hadrons/A2AMatrix.hpp +++ b/Hadrons/A2AMatrix.hpp @@ -32,11 +32,19 @@ See the full license in the file "LICENSE" in the top level distribution directo #include #include #include +#ifdef USE_MKL +#include "mkl.h" +#include "mkl_cblas.h" +#endif #ifndef HADRONS_A2AM_NAME #define HADRONS_A2AM_NAME "a2aMatrix" #endif +#ifndef HADRONS_A2AM_IO_TYPE +#define HADRONS_A2AM_IO_TYPE ComplexF +#endif + #define HADRONS_A2AM_PARALLEL_IO BEGIN_HADRONS_NAMESPACE @@ -51,6 +59,12 @@ BEGIN_HADRONS_NAMESPACE template using A2AMatrixSet = Eigen::TensorMap>; +template +using A2AMatrix = Eigen::Matrix; + +template +using A2AMatrixTr = Eigen::Matrix; + /****************************************************************************** * Abstract class for A2A kernels * ******************************************************************************/ @@ -76,10 +90,15 @@ public: // constructors A2AMatrixIo(void) = default; A2AMatrixIo(std::string filename, std::string dataname, - const unsigned int nt, const unsigned int ni, - const unsigned int nj); + const unsigned int nt, const unsigned int ni = 0, + const unsigned int nj = 0); // destructor ~A2AMatrixIo(void) = default; + // access + unsigned int getNi(void) const; + unsigned int getNj(void) const; + unsigned int getNt(void) const; + size_t getSize(void) const; // file allocation template void initFile(const MetadataType &d, const unsigned int chunkSize); @@ -88,9 +107,11 @@ public: const unsigned int blockSizei, const unsigned int blockSizej); void saveBlock(const A2AMatrixSet &m, const unsigned int ext, const unsigned int str, const unsigned int i, const unsigned int j); + template