mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-10-24 17:54:47 +01:00 
			
		
		
		
	Compare commits
	
		
			3 Commits
		
	
	
		
			b58fd80379
			...
			feature/co
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| de8b2dcca3 | |||
| efe000341d | |||
| 11086c5c25 | 
| @@ -62,6 +62,9 @@ using A2AMatrixSet = Eigen::TensorMap<Eigen::Tensor<T, 5, Eigen::RowMajor>>; | |||||||
| template <typename T> | template <typename T> | ||||||
| using A2AMatrix = Eigen::Matrix<T, -1, -1, Eigen::RowMajor>; | using A2AMatrix = Eigen::Matrix<T, -1, -1, Eigen::RowMajor>; | ||||||
|  |  | ||||||
|  | template <typename T> | ||||||
|  | using A2AMatrixMap = Eigen::Map<A2AMatrix<T>>; | ||||||
|  |  | ||||||
| template <typename T> | template <typename T> | ||||||
| using A2AMatrixTr = Eigen::Matrix<T, -1, -1, Eigen::ColMajor>; | using A2AMatrixTr = Eigen::Matrix<T, -1, -1, Eigen::ColMajor>; | ||||||
|  |  | ||||||
| @@ -108,7 +111,7 @@ public: | |||||||
|     void saveBlock(const A2AMatrixSet<T> &m, const unsigned int ext, const unsigned int str, |     void saveBlock(const A2AMatrixSet<T> &m, const unsigned int ext, const unsigned int str, | ||||||
|                    const unsigned int i, const unsigned int j); |                    const unsigned int i, const unsigned int j); | ||||||
|     template <template <class> class Vec, typename VecT> |     template <template <class> class Vec, typename VecT> | ||||||
|     void load(Vec<VecT> &v, double *tRead = nullptr); |     void load(Vec<VecT> &v, double *tRead = nullptr, const bool useCache = true); | ||||||
| private: | private: | ||||||
|     std::string  filename_{""}, dataname_{""}; |     std::string  filename_{""}, dataname_{""}; | ||||||
|     unsigned int nt_{0}, ni_{0}, nj_{0}; |     unsigned int nt_{0}, ni_{0}, nj_{0}; | ||||||
| @@ -495,7 +498,7 @@ void A2AMatrixIo<T>::saveBlock(const A2AMatrixSet<T> &m, | |||||||
|  |  | ||||||
| template <typename T> | template <typename T> | ||||||
| template <template <class> class Vec, typename VecT> | template <template <class> class Vec, typename VecT> | ||||||
| void A2AMatrixIo<T>::load(Vec<VecT> &v, double *tRead) | void A2AMatrixIo<T>::load(Vec<VecT> &v, double *tRead, const bool useCache) | ||||||
| { | { | ||||||
| #ifdef HAVE_HDF5 | #ifdef HAVE_HDF5 | ||||||
|     Hdf5Reader           reader(filename_); |     Hdf5Reader           reader(filename_); | ||||||
| @@ -532,36 +535,55 @@ void A2AMatrixIo<T>::load(Vec<VecT> &v, double *tRead) | |||||||
|         nj_ = hdim[2]; |         nj_ = hdim[2]; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     A2AMatrix<T>         buf(ni_, nj_); |     if (useCache) | ||||||
|     std::vector<hsize_t> count    = {1, static_cast<hsize_t>(ni_), |  | ||||||
|                                      static_cast<hsize_t>(nj_)}, |  | ||||||
|                          stride   = {1, 1, 1}, |  | ||||||
|                          block    = {1, 1, 1}, |  | ||||||
|                          memCount = {static_cast<hsize_t>(ni_), |  | ||||||
|                                      static_cast<hsize_t>(nj_)}; |  | ||||||
|     H5NS::DataSpace      memspace(memCount.size(), memCount.data()); |  | ||||||
|  |  | ||||||
|     std::cout << "Loading timeslice"; |  | ||||||
|     std::cout.flush(); |  | ||||||
|     *tRead = 0.; |  | ||||||
|     for (unsigned int tp1 = nt_; tp1 > 0; --tp1) |  | ||||||
|     { |     { | ||||||
|         unsigned int         t      = tp1 - 1; |         std::vector<T> buf(nt_*ni_*nj_); | ||||||
|         std::vector<hsize_t> offset = {static_cast<hsize_t>(t), 0, 0}; |         T              *pt; | ||||||
|  |  | ||||||
|         if (t % 10 == 0) |         dataset.read(buf.data(), datatype); | ||||||
|  |         pt = buf.data(); | ||||||
|  |         for (unsigned int t = 0; t < nt_; ++t) | ||||||
|         { |         { | ||||||
|             std::cout << " " << t; |             A2AMatrixMap<T> bufMap(pt, ni_, nj_); | ||||||
|             std::cout.flush(); |  | ||||||
|  |             v[t]  = bufMap.template cast<VecT>(); | ||||||
|  |             pt   += ni_*nj_; | ||||||
|         } |         } | ||||||
|         dataspace.selectHyperslab(H5S_SELECT_SET, count.data(), offset.data(), |  | ||||||
|                                   stride.data(), block.data()); |  | ||||||
|         if (tRead) *tRead -= usecond();     |  | ||||||
|         dataset.read(buf.data(), datatype, memspace, dataspace); |  | ||||||
|         if (tRead) *tRead += usecond(); |  | ||||||
|         v[t] = buf.template cast<VecT>(); |  | ||||||
|     } |     } | ||||||
|     std::cout << std::endl; |     // if useCache = false, do I/O timeslice per timeslice (much slower) | ||||||
|  |     else | ||||||
|  |     { | ||||||
|  |         A2AMatrix<T>         buf(ni_, nj_); | ||||||
|  |         std::vector<hsize_t> count    = {1, static_cast<hsize_t>(ni_), | ||||||
|  |                                         static_cast<hsize_t>(nj_)}, | ||||||
|  |                              stride   = {1, 1, 1}, | ||||||
|  |                              block    = {1, 1, 1}, | ||||||
|  |                              memCount = {static_cast<hsize_t>(ni_), | ||||||
|  |                                          static_cast<hsize_t>(nj_)}; | ||||||
|  |         H5NS::DataSpace      memspace(memCount.size(), memCount.data()); | ||||||
|  |  | ||||||
|  |         std::cout << "Loading timeslice"; | ||||||
|  |         std::cout.flush(); | ||||||
|  |         *tRead = 0.; | ||||||
|  |         for (unsigned int tp1 = nt_; tp1 > 0; --tp1) | ||||||
|  |         { | ||||||
|  |             unsigned int         t      = tp1 - 1; | ||||||
|  |             std::vector<hsize_t> offset = {static_cast<hsize_t>(t), 0, 0}; | ||||||
|  |              | ||||||
|  |             if (t % 10 == 0) | ||||||
|  |             { | ||||||
|  |                 std::cout << " " << t; | ||||||
|  |                 std::cout.flush(); | ||||||
|  |             } | ||||||
|  |             dataspace.selectHyperslab(H5S_SELECT_SET, count.data(), offset.data(), | ||||||
|  |                                       stride.data(), block.data()); | ||||||
|  |             if (tRead) *tRead -= usecond();     | ||||||
|  |             dataset.read(buf.data(), datatype, memspace, dataspace); | ||||||
|  |             if (tRead) *tRead += usecond(); | ||||||
|  |             v[t] = buf.template cast<VecT>(); | ||||||
|  |         } | ||||||
|  |         std::cout << std::endl; | ||||||
|  |     } | ||||||
| #else | #else | ||||||
|     HADRONS_ERROR(Implementation, "all-to-all matrix I/O needs HDF5 library"); |     HADRONS_ERROR(Implementation, "all-to-all matrix I/O needs HDF5 library"); | ||||||
| #endif | #endif | ||||||
|   | |||||||
| @@ -41,14 +41,6 @@ BEGIN_HADRONS_NAMESPACE | |||||||
| class Application | class Application | ||||||
| { | { | ||||||
| public: | public: | ||||||
|     class TrajRange: Serializable |  | ||||||
|     { |  | ||||||
|     public: |  | ||||||
|         GRID_SERIALIZABLE_CLASS_MEMBERS(TrajRange, |  | ||||||
|                                         unsigned int, start, |  | ||||||
|                                         unsigned int, end, |  | ||||||
|                                         unsigned int, step); |  | ||||||
|     }; |  | ||||||
|     class GlobalPar: Serializable |     class GlobalPar: Serializable | ||||||
|     { |     { | ||||||
|     public: |     public: | ||||||
|   | |||||||
| @@ -263,6 +263,33 @@ void tokenReplace(std::string &str, const std::string token, | |||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
|  | // trajectory range | ||||||
|  | class TrajRange: Serializable | ||||||
|  | { | ||||||
|  | public: | ||||||
|  |     GRID_SERIALIZABLE_CLASS_MEMBERS(TrajRange, | ||||||
|  |                                     unsigned int, start, | ||||||
|  |                                     unsigned int, end, | ||||||
|  |                                     unsigned int, step, | ||||||
|  |                                     std::string,  exclude); | ||||||
|  |  | ||||||
|  |     inline std::vector<unsigned int> getTrajectoryList(void) | ||||||
|  |     { | ||||||
|  |         std::vector<unsigned int> excVec = strToVec<unsigned int>(exclude); | ||||||
|  |         std::vector<unsigned int> list; | ||||||
|  |  | ||||||
|  |         for (unsigned int t = start; t < end; t += step) | ||||||
|  |         { | ||||||
|  |             if (std::find(excVec.begin(), excVec.end(), t) == excVec.end()) | ||||||
|  |             { | ||||||
|  |                 list.push_back(t); | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         return list; | ||||||
|  |     } | ||||||
|  | }; | ||||||
|  |  | ||||||
| END_HADRONS_NAMESPACE | END_HADRONS_NAMESPACE | ||||||
|  |  | ||||||
| #include <Hadrons/Exceptions.hpp> | #include <Hadrons/Exceptions.hpp> | ||||||
|   | |||||||
| @@ -34,4 +34,5 @@ nobase_libHadrons_a_HEADERS = \ | |||||||
| 	Solver.hpp                \ | 	Solver.hpp                \ | ||||||
| 	TimerArray.hpp            \ | 	TimerArray.hpp            \ | ||||||
| 	VirtualMachine.hpp        \ | 	VirtualMachine.hpp        \ | ||||||
|  | 	Utilities/Contractor.hpp  \ | ||||||
| 	$(modules_hpp) | 	$(modules_hpp) | ||||||
|   | |||||||
| @@ -28,6 +28,25 @@ See the full license in the file "LICENSE" in the top level distribution directo | |||||||
| #include <Hadrons/A2AMatrix.hpp> | #include <Hadrons/A2AMatrix.hpp> | ||||||
| #include <Hadrons/DiskVector.hpp> | #include <Hadrons/DiskVector.hpp> | ||||||
| #include <Hadrons/TimerArray.hpp> | #include <Hadrons/TimerArray.hpp> | ||||||
|  | #include <Hadrons/Utilities/Contractor.hpp> | ||||||
|  |  | ||||||
|  | #ifdef GRID_COMMS_MPI3 | ||||||
|  | #define GET_RANK(rank, nMpi) \ | ||||||
|  | MPI_Comm_size(MPI_COMM_WORLD, &(nMpi));\ | ||||||
|  | MPI_Comm_rank(MPI_COMM_WORLD, &(rank)) | ||||||
|  | #define BARRIER() MPI_Barrier(MPI_COMM_WORLD) | ||||||
|  | #define GLOBAL_DSUM(x) MPI_Allreduce(MPI_IN_PLACE, &x, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD) | ||||||
|  | #define GLOBAL_DMAX(x) MPI_Allreduce(MPI_IN_PLACE, &x, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD) | ||||||
|  | #define INIT() MPI_Init(NULL, NULL) | ||||||
|  | #define FINALIZE() MPI_Finalize() | ||||||
|  | #else | ||||||
|  | #define GET_RANK(rank, nMpi) (nMpi) = 1; (rank) = 0 | ||||||
|  | #define BARRIER() | ||||||
|  | #define GLOBAL_DSUM(x) | ||||||
|  | #define GLOBAL_DMAX(x) | ||||||
|  | #define INIT() | ||||||
|  | #define FINALIZE() | ||||||
|  | #endif | ||||||
|  |  | ||||||
| using namespace Grid; | using namespace Grid; | ||||||
| using namespace QCD; | using namespace QCD; | ||||||
| @@ -35,58 +54,6 @@ using namespace Hadrons; | |||||||
|  |  | ||||||
| #define TIME_MOD(t) (((t) + par.global.nt) % par.global.nt) | #define TIME_MOD(t) (((t) + par.global.nt) % par.global.nt) | ||||||
|  |  | ||||||
| namespace Contractor |  | ||||||
| { |  | ||||||
|     class TrajRange: Serializable |  | ||||||
|     { |  | ||||||
|     public: |  | ||||||
|         GRID_SERIALIZABLE_CLASS_MEMBERS(TrajRange, |  | ||||||
|                                         unsigned int, start, |  | ||||||
|                                         unsigned int, end, |  | ||||||
|                                         unsigned int, step); |  | ||||||
|     }; |  | ||||||
|      |  | ||||||
|     class GlobalPar: Serializable |  | ||||||
|     { |  | ||||||
|     public: |  | ||||||
|         GRID_SERIALIZABLE_CLASS_MEMBERS(GlobalPar, |  | ||||||
|                                         TrajRange, trajCounter, |  | ||||||
|                                         unsigned int, nt, |  | ||||||
|                                         std::string, diskVectorDir, |  | ||||||
|                                         std::string, output); |  | ||||||
|     }; |  | ||||||
|  |  | ||||||
|     class A2AMatrixPar: Serializable |  | ||||||
|     { |  | ||||||
|     public: |  | ||||||
|         GRID_SERIALIZABLE_CLASS_MEMBERS(A2AMatrixPar, |  | ||||||
|                                         std::string, file, |  | ||||||
|                                         std::string, dataset, |  | ||||||
|                                         unsigned int, cacheSize, |  | ||||||
|                                         std::string, name); |  | ||||||
|     }; |  | ||||||
|  |  | ||||||
|     class ProductPar: Serializable |  | ||||||
|     { |  | ||||||
|     public: |  | ||||||
|         GRID_SERIALIZABLE_CLASS_MEMBERS(ProductPar, |  | ||||||
|                                         std::string, terms, |  | ||||||
|                                         std::vector<std::string>, times, |  | ||||||
|                                         std::string, translations, |  | ||||||
|                                         bool, translationAverage); |  | ||||||
|     }; |  | ||||||
|  |  | ||||||
|     class CorrelatorResult: Serializable |  | ||||||
|     { |  | ||||||
|     public: |  | ||||||
|         GRID_SERIALIZABLE_CLASS_MEMBERS(CorrelatorResult, |  | ||||||
|                                         std::vector<Contractor::A2AMatrixPar>,  a2aMatrix, |  | ||||||
|                                         ProductPar, contraction, |  | ||||||
|                                         std::vector<unsigned int>, times, |  | ||||||
|                                         std::vector<ComplexD>, correlator); |  | ||||||
|     }; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| struct ContractorPar | struct ContractorPar | ||||||
| { | { | ||||||
|     Contractor::GlobalPar                  global; |     Contractor::GlobalPar                  global; | ||||||
| @@ -143,6 +110,27 @@ void saveCorrelator(const Contractor::CorrelatorResult &result, const std::strin | |||||||
|     write(writer, fileStem, result); |     write(writer, fileStem, result); | ||||||
| } | } | ||||||
|  |  | ||||||
|  | void printPerf(const double bytes, const double usec) | ||||||
|  | { | ||||||
|  |     double maxt; | ||||||
|  |  | ||||||
|  |     maxt = usec; | ||||||
|  |     GLOBAL_DMAX(maxt); | ||||||
|  |     std::cout << maxt/1.0e6 << " sec " << bytes/maxt*1.0e6/1024/1024/1024 << " GB/s"; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | void printPerf(const double bytes, const double busec,  | ||||||
|  |                const double flops, const double fusec) | ||||||
|  | { | ||||||
|  |     double maxt; | ||||||
|  |  | ||||||
|  |     printPerf(bytes, busec); | ||||||
|  |     std::cout << " "; | ||||||
|  |     maxt = fusec; | ||||||
|  |     GLOBAL_DMAX(maxt); | ||||||
|  |     std::cout << flops/fusec/1.0e3 << " GFlop/s"; | ||||||
|  | } | ||||||
|  |  | ||||||
| std::set<unsigned int> parseTimeRange(const std::string str, const unsigned int nt) | std::set<unsigned int> parseTimeRange(const std::string str, const unsigned int nt) | ||||||
| { | { | ||||||
|     std::regex               rex("([0-9]+)|(([0-9]+)\\.\\.([0-9]+))"); |     std::regex               rex("([0-9]+)|(([0-9]+)\\.\\.([0-9]+))"); | ||||||
| @@ -184,59 +172,18 @@ std::set<unsigned int> parseTimeRange(const std::string str, const unsigned int | |||||||
|     return tSet; |     return tSet; | ||||||
| } | } | ||||||
|  |  | ||||||
| struct Sec |  | ||||||
| { |  | ||||||
|     Sec(const double usec) |  | ||||||
|     { |  | ||||||
|         seconds = usec/1.0e6; |  | ||||||
|     } |  | ||||||
|      |  | ||||||
|     double seconds; |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| inline std::ostream & operator<< (std::ostream& s, const Sec &&sec) |  | ||||||
| { |  | ||||||
|     s << std::setw(10) << sec.seconds << " sec"; |  | ||||||
|  |  | ||||||
|     return s; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| struct Flops |  | ||||||
| { |  | ||||||
|     Flops(const double flops, const double fusec) |  | ||||||
|     { |  | ||||||
|         gFlopsPerSec = flops/fusec/1.0e3; |  | ||||||
|     } |  | ||||||
|      |  | ||||||
|     double gFlopsPerSec; |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| inline std::ostream & operator<< (std::ostream& s, const Flops &&f) |  | ||||||
| { |  | ||||||
|     s << std::setw(10) << f.gFlopsPerSec << " GFlop/s"; |  | ||||||
|  |  | ||||||
|     return s; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| struct Bytes |  | ||||||
| { |  | ||||||
|     Bytes(const double bytes, const double busec) |  | ||||||
|     { |  | ||||||
|         gBytesPerSec = bytes/busec*1.0e6/1024/1024/1024; |  | ||||||
|     } |  | ||||||
|      |  | ||||||
|     double gBytesPerSec; |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| inline std::ostream & operator<< (std::ostream& s, const Bytes &&b) |  | ||||||
| { |  | ||||||
|     s << std::setw(10) << b.gBytesPerSec << " GB/s"; |  | ||||||
|  |  | ||||||
|     return s; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| int main(int argc, char* argv[]) | int main(int argc, char* argv[]) | ||||||
| { | { | ||||||
|  |     // MPI init | ||||||
|  |     int nMpi, rank; | ||||||
|  |  | ||||||
|  |     INIT(); | ||||||
|  |     GET_RANK(rank, nMpi); | ||||||
|  |     if (rank != 0) | ||||||
|  |     { | ||||||
|  |         std::cout.setstate(std::ios::badbit); | ||||||
|  |     } | ||||||
|  |  | ||||||
|     // parse command line |     // parse command line | ||||||
|     std::string   parFilename; |     std::string   parFilename; | ||||||
|  |  | ||||||
| @@ -266,31 +213,68 @@ int main(int argc, char* argv[]) | |||||||
|  |  | ||||||
|     for (auto &p: par.a2aMatrix) |     for (auto &p: par.a2aMatrix) | ||||||
|     { |     { | ||||||
|         std::string dirName = par.global.diskVectorDir + "/" + p.name; |         std::string dirName = par.global.diskVectorDir + "/" + p.name + "." + std::to_string(rank); | ||||||
|  |  | ||||||
|         a2aMat.emplace(p.name, EigenDiskVector<ComplexD>(dirName, par.global.nt, p.cacheSize)); |         a2aMat.emplace(p.name, EigenDiskVector<ComplexD>(dirName, par.global.nt, p.cacheSize)); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     // trajectory loop |     // trajectory loop | ||||||
|     for (unsigned int traj = par.global.trajCounter.start;  |     std::vector<unsigned int> tList = par.global.trajCounter.getTrajectoryList(); | ||||||
|          traj < par.global.trajCounter.end; traj += par.global.trajCounter.step) |     unsigned int              indi, inde, indPerRank; | ||||||
|  |  | ||||||
|  |     indPerRank = tList.size()/nMpi; | ||||||
|  |     indi       = rank*indPerRank; | ||||||
|  |     BARRIER(); | ||||||
|  |     for (unsigned int tInd = indi; tInd < indi + indPerRank; tInd++) | ||||||
|     { |     { | ||||||
|         std::cout << ":::::::: Trajectory " << traj << std::endl; |         unsigned int traj; | ||||||
|  |  | ||||||
|  |         if (tInd < tList.size()) | ||||||
|  |         { | ||||||
|  |             traj = tList[tInd]; | ||||||
|  |         } | ||||||
|  |         else | ||||||
|  |         { | ||||||
|  |             traj = tList.back(); | ||||||
|  |         } | ||||||
|  |         if (nMpi > 1) | ||||||
|  |         { | ||||||
|  |             if (rank == 0) | ||||||
|  |             { | ||||||
|  |                 std::cout << ":::::::: Trajectories "; | ||||||
|  |                 for (unsigned int r = 0; r < nMpi - 1; ++r) | ||||||
|  |                 { | ||||||
|  |                     std::cout << tList[tInd + r*indPerRank] << " "; | ||||||
|  |                 } | ||||||
|  |                 if (tInd + (nMpi - 1)*indPerRank < tList.size()) | ||||||
|  |                 { | ||||||
|  |                     std::cout << tList[tInd + (nMpi - 1)*indPerRank]; | ||||||
|  |                 } | ||||||
|  |                 std::cout << std::endl; | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |         else | ||||||
|  |         { | ||||||
|  |             std::cout << ":::::::: Trajectory " << traj << std::endl; | ||||||
|  |         } | ||||||
|      |      | ||||||
|         // load data |         // load data | ||||||
|         for (auto &p: par.a2aMatrix) |         for (auto &p: par.a2aMatrix) | ||||||
|         { |         { | ||||||
|             std::string filename = p.file; |             std::string filename = p.file; | ||||||
|             double      t, size; |             double      t; | ||||||
|  |  | ||||||
|             tokenReplace(filename, "traj", traj); |             tokenReplace(filename, "traj", traj); | ||||||
|             std::cout << "======== Loading '" << filename << "'" << std::endl; |             std::cout << "======== Loading '" << p.file << "'" << std::endl; | ||||||
|  |  | ||||||
|  |             BARRIER(); | ||||||
|             A2AMatrixIo<HADRONS_A2AM_IO_TYPE> a2aIo(filename, p.dataset, par.global.nt); |             A2AMatrixIo<HADRONS_A2AM_IO_TYPE> a2aIo(filename, p.dataset, par.global.nt); | ||||||
|  |  | ||||||
|             a2aIo.load(a2aMat.at(p.name), &t); |             a2aIo.load(a2aMat.at(p.name), &t); | ||||||
|             std::cout << "Read " << a2aIo.getSize() << " bytes in " << t/1.0e6  |             GLOBAL_DMAX(t); | ||||||
|                     << " sec, " << a2aIo.getSize()/t*1.0e6/1024/1024 << " MB/s" << std::endl; |             std::cout << "Read "  << nMpi*a2aIo.getSize() << " bytes in " << t/1.0e6  | ||||||
|  |                       << " sec, " << nMpi*a2aIo.getSize()/t*1.0e6/1024/1024  | ||||||
|  |                       << " MB/s"  << std::endl; | ||||||
|         } |         } | ||||||
|  |  | ||||||
|         // contract |         // contract | ||||||
| @@ -308,6 +292,7 @@ int main(int argc, char* argv[]) | |||||||
|             double                                 fusec, busec, flops, bytes, tusec; |             double                                 fusec, busec, flops, bytes, tusec; | ||||||
|             Contractor::CorrelatorResult           result;              |             Contractor::CorrelatorResult           result;              | ||||||
|  |  | ||||||
|  |             BARRIER(); | ||||||
|             tAr.startTimer("Total"); |             tAr.startTimer("Total"); | ||||||
|             std::cout << "======== Contraction tr("; |             std::cout << "======== Contraction tr("; | ||||||
|             for (unsigned int g = 0; g < term.size(); ++g) |             for (unsigned int g = 0; g < term.size(); ++g) | ||||||
| @@ -358,9 +343,10 @@ int main(int argc, char* argv[]) | |||||||
|                 } |                 } | ||||||
|                 tAr.stopTimer("Transpose caching"); |                 tAr.stopTimer("Transpose caching"); | ||||||
|             } |             } | ||||||
|             bytes = par.global.nt*lastTerm[0].rows()*lastTerm[0].cols()*sizeof(ComplexD); |             bytes  = par.global.nt*lastTerm[0].rows()*lastTerm[0].cols(); | ||||||
|             std::cout << Sec(tAr.getDTimer("Transpose caching")) << " "  |             bytes *= sizeof(ComplexD)*nMpi; | ||||||
|                       << Bytes(bytes, tAr.getDTimer("Transpose caching")) << std::endl; |             printPerf(bytes, tAr.getDTimer("Transpose caching")); | ||||||
|  |             std::cout << std::endl; | ||||||
|             for (unsigned int i = 0; i < timeSeq.size(); ++i) |             for (unsigned int i = 0; i < timeSeq.size(); ++i) | ||||||
|             { |             { | ||||||
|                 unsigned int dti = 0; |                 unsigned int dti = 0; | ||||||
| @@ -378,7 +364,7 @@ int main(int argc, char* argv[]) | |||||||
|                             << " -- positions= " << t << ", dt= " << dt << std::endl; |                             << " -- positions= " << t << ", dt= " << dt << std::endl; | ||||||
|                     if (term.size() > 2) |                     if (term.size() > 2) | ||||||
|                     { |                     { | ||||||
|                         std::cout << std::setw(8) << "products"; |                         std::cout << std::setw(10) << "products "; | ||||||
|                     } |                     } | ||||||
|                     flops  = 0.; |                     flops  = 0.; | ||||||
|                     bytes  = 0.; |                     bytes  = 0.; | ||||||
| @@ -405,11 +391,11 @@ int main(int argc, char* argv[]) | |||||||
|                     } |                     } | ||||||
|                     if (term.size() > 2) |                     if (term.size() > 2) | ||||||
|                     { |                     { | ||||||
|                         std::cout << Sec(tAr.getDTimer("A*B total") - busec) << " " |                         printPerf(bytes*nMpi, tAr.getDTimer("A*B total") - busec, | ||||||
|                                 << Flops(flops, tAr.getDTimer("A*B algebra") - fusec) << " "  |                                   flops*nMpi, tAr.getDTimer("A*B algebra") - fusec); | ||||||
|                                 << Bytes(bytes, tAr.getDTimer("A*B total") - busec) << std::endl; |                         std::cout << std::endl; | ||||||
|                     } |                     } | ||||||
|                     std::cout << std::setw(8) << "traces"; |                     std::cout << std::setw(10) << "traces "; | ||||||
|                     flops  = 0.; |                     flops  = 0.; | ||||||
|                     bytes  = 0.; |                     bytes  = 0.; | ||||||
|                     fusec  = tAr.getDTimer("tr(A*B)"); |                     fusec  = tAr.getDTimer("tr(A*B)"); | ||||||
| @@ -423,9 +409,9 @@ int main(int argc, char* argv[]) | |||||||
|                         bytes += 2.*prod.rows()*prod.cols()*sizeof(ComplexD); |                         bytes += 2.*prod.rows()*prod.cols()*sizeof(ComplexD); | ||||||
|                     } |                     } | ||||||
|                     tAr.stopTimer("Linear algebra"); |                     tAr.stopTimer("Linear algebra"); | ||||||
|                     std::cout << Sec(tAr.getDTimer("tr(A*B)") - busec) << " " |                     printPerf(bytes*nMpi, tAr.getDTimer("tr(A*B)") - busec, | ||||||
|                             << Flops(flops, tAr.getDTimer("tr(A*B)") - fusec) << " "  |                               flops*nMpi, tAr.getDTimer("tr(A*B)") - fusec); | ||||||
|                             << Bytes(bytes, tAr.getDTimer("tr(A*B)") - busec) << std::endl; |                     std::cout << std::endl; | ||||||
|                     if (!p.translationAverage) |                     if (!p.translationAverage) | ||||||
|                     { |                     { | ||||||
|                         saveCorrelator(result, par.global.output, dt, traj); |                         saveCorrelator(result, par.global.output, dt, traj); | ||||||
| @@ -450,5 +436,7 @@ int main(int argc, char* argv[]) | |||||||
|         } |         } | ||||||
|     } |     } | ||||||
|      |      | ||||||
|  |     FINALIZE(); | ||||||
|  |  | ||||||
|     return EXIT_SUCCESS; |     return EXIT_SUCCESS; | ||||||
| } | } | ||||||
|   | |||||||
| @@ -5,7 +5,48 @@ | |||||||
|  |  | ||||||
| BEGIN_HADRONS_NAMESPACE | BEGIN_HADRONS_NAMESPACE | ||||||
|  |  | ||||||
|  | namespace Contractor | ||||||
|  | {    | ||||||
|  |     class GlobalPar: Serializable | ||||||
|  |     { | ||||||
|  |     public: | ||||||
|  |         GRID_SERIALIZABLE_CLASS_MEMBERS(GlobalPar, | ||||||
|  |                                         TrajRange, trajCounter, | ||||||
|  |                                         unsigned int, nt, | ||||||
|  |                                         std::string, diskVectorDir, | ||||||
|  |                                         std::string, output); | ||||||
|  |     }; | ||||||
|  |  | ||||||
|  |     class A2AMatrixPar: Serializable | ||||||
|  |     { | ||||||
|  |     public: | ||||||
|  |         GRID_SERIALIZABLE_CLASS_MEMBERS(A2AMatrixPar, | ||||||
|  |                                         std::string, file, | ||||||
|  |                                         std::string, dataset, | ||||||
|  |                                         unsigned int, cacheSize, | ||||||
|  |                                         std::string, name); | ||||||
|  |     }; | ||||||
|  |  | ||||||
|  |     class ProductPar: Serializable | ||||||
|  |     { | ||||||
|  |     public: | ||||||
|  |         GRID_SERIALIZABLE_CLASS_MEMBERS(ProductPar, | ||||||
|  |                                         std::string, terms, | ||||||
|  |                                         std::vector<std::string>, times, | ||||||
|  |                                         std::string, translations, | ||||||
|  |                                         bool, translationAverage); | ||||||
|  |     }; | ||||||
|  |  | ||||||
|  |     class CorrelatorResult: Serializable | ||||||
|  |     { | ||||||
|  |     public: | ||||||
|  |         GRID_SERIALIZABLE_CLASS_MEMBERS(CorrelatorResult, | ||||||
|  |                                         std::vector<Contractor::A2AMatrixPar>,  a2aMatrix, | ||||||
|  |                                         ProductPar, contraction, | ||||||
|  |                                         std::vector<unsigned int>, times, | ||||||
|  |                                         std::vector<ComplexD>, correlator); | ||||||
|  |     }; | ||||||
|  | } | ||||||
|  |  | ||||||
| END_HADRONS_NAMESPACE | END_HADRONS_NAMESPACE | ||||||
|  |  | ||||||
|   | |||||||
| @@ -7,7 +7,7 @@ HadronsFermionEP64To32_SOURCES  = EigenPackCast.cc | |||||||
| HadronsFermionEP64To32_CXXFLAGS = $(AM_CXXFLAGS) -DFIN=WilsonImplD::FermionField -DFOUT=WilsonImplF::FermionField | HadronsFermionEP64To32_CXXFLAGS = $(AM_CXXFLAGS) -DFIN=WilsonImplD::FermionField -DFOUT=WilsonImplF::FermionField | ||||||
| HadronsFermionEP64To32_LDADD    = ../libHadrons.a ../../Grid/libGrid.a | HadronsFermionEP64To32_LDADD    = ../libHadrons.a ../../Grid/libGrid.a | ||||||
|  |  | ||||||
| HadronsContractor_SOURCES = Contractor.cc | HadronsContractor_SOURCES = Contractor.cc Contractor.hpp | ||||||
| HadronsContractor_LDADD   = ../libHadrons.a ../../Grid/libGrid.a | HadronsContractor_LDADD   = ../libHadrons.a ../../Grid/libGrid.a | ||||||
|  |  | ||||||
| HadronsContractorBenchmark_SOURCES = ContractorBenchmark.cc | HadronsContractorBenchmark_SOURCES = ContractorBenchmark.cc | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user