mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-11-04 05:54:32 +00:00 
			
		
		
		
	Compare commits
	
		
			3 Commits
		
	
	
		
			a4d11a630f
			...
			feature/co
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| de8b2dcca3 | |||
| efe000341d | |||
| 11086c5c25 | 
@@ -62,6 +62,9 @@ using A2AMatrixSet = Eigen::TensorMap<Eigen::Tensor<T, 5, Eigen::RowMajor>>;
 | 
				
			|||||||
template <typename T>
 | 
					template <typename T>
 | 
				
			||||||
using A2AMatrix = Eigen::Matrix<T, -1, -1, Eigen::RowMajor>;
 | 
					using A2AMatrix = Eigen::Matrix<T, -1, -1, Eigen::RowMajor>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					template <typename T>
 | 
				
			||||||
 | 
					using A2AMatrixMap = Eigen::Map<A2AMatrix<T>>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
template <typename T>
 | 
					template <typename T>
 | 
				
			||||||
using A2AMatrixTr = Eigen::Matrix<T, -1, -1, Eigen::ColMajor>;
 | 
					using A2AMatrixTr = Eigen::Matrix<T, -1, -1, Eigen::ColMajor>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -108,7 +111,7 @@ public:
 | 
				
			|||||||
    void saveBlock(const A2AMatrixSet<T> &m, const unsigned int ext, const unsigned int str,
 | 
					    void saveBlock(const A2AMatrixSet<T> &m, const unsigned int ext, const unsigned int str,
 | 
				
			||||||
                   const unsigned int i, const unsigned int j);
 | 
					                   const unsigned int i, const unsigned int j);
 | 
				
			||||||
    template <template <class> class Vec, typename VecT>
 | 
					    template <template <class> class Vec, typename VecT>
 | 
				
			||||||
    void load(Vec<VecT> &v, double *tRead = nullptr);
 | 
					    void load(Vec<VecT> &v, double *tRead = nullptr, const bool useCache = true);
 | 
				
			||||||
private:
 | 
					private:
 | 
				
			||||||
    std::string  filename_{""}, dataname_{""};
 | 
					    std::string  filename_{""}, dataname_{""};
 | 
				
			||||||
    unsigned int nt_{0}, ni_{0}, nj_{0};
 | 
					    unsigned int nt_{0}, ni_{0}, nj_{0};
 | 
				
			||||||
@@ -495,7 +498,7 @@ void A2AMatrixIo<T>::saveBlock(const A2AMatrixSet<T> &m,
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
template <typename T>
 | 
					template <typename T>
 | 
				
			||||||
template <template <class> class Vec, typename VecT>
 | 
					template <template <class> class Vec, typename VecT>
 | 
				
			||||||
void A2AMatrixIo<T>::load(Vec<VecT> &v, double *tRead)
 | 
					void A2AMatrixIo<T>::load(Vec<VecT> &v, double *tRead, const bool useCache)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
#ifdef HAVE_HDF5
 | 
					#ifdef HAVE_HDF5
 | 
				
			||||||
    Hdf5Reader           reader(filename_);
 | 
					    Hdf5Reader           reader(filename_);
 | 
				
			||||||
@@ -532,36 +535,55 @@ void A2AMatrixIo<T>::load(Vec<VecT> &v, double *tRead)
 | 
				
			|||||||
        nj_ = hdim[2];
 | 
					        nj_ = hdim[2];
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    A2AMatrix<T>         buf(ni_, nj_);
 | 
					    if (useCache)
 | 
				
			||||||
    std::vector<hsize_t> count    = {1, static_cast<hsize_t>(ni_),
 | 
					 | 
				
			||||||
                                     static_cast<hsize_t>(nj_)},
 | 
					 | 
				
			||||||
                         stride   = {1, 1, 1},
 | 
					 | 
				
			||||||
                         block    = {1, 1, 1},
 | 
					 | 
				
			||||||
                         memCount = {static_cast<hsize_t>(ni_),
 | 
					 | 
				
			||||||
                                     static_cast<hsize_t>(nj_)};
 | 
					 | 
				
			||||||
    H5NS::DataSpace      memspace(memCount.size(), memCount.data());
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    std::cout << "Loading timeslice";
 | 
					 | 
				
			||||||
    std::cout.flush();
 | 
					 | 
				
			||||||
    *tRead = 0.;
 | 
					 | 
				
			||||||
    for (unsigned int tp1 = nt_; tp1 > 0; --tp1)
 | 
					 | 
				
			||||||
    {
 | 
					    {
 | 
				
			||||||
        unsigned int         t      = tp1 - 1;
 | 
					        std::vector<T> buf(nt_*ni_*nj_);
 | 
				
			||||||
        std::vector<hsize_t> offset = {static_cast<hsize_t>(t), 0, 0};
 | 
					        T              *pt;
 | 
				
			||||||
        
 | 
					
 | 
				
			||||||
        if (t % 10 == 0)
 | 
					        dataset.read(buf.data(), datatype);
 | 
				
			||||||
 | 
					        pt = buf.data();
 | 
				
			||||||
 | 
					        for (unsigned int t = 0; t < nt_; ++t)
 | 
				
			||||||
        {
 | 
					        {
 | 
				
			||||||
            std::cout << " " << t;
 | 
					            A2AMatrixMap<T> bufMap(pt, ni_, nj_);
 | 
				
			||||||
            std::cout.flush();
 | 
					
 | 
				
			||||||
 | 
					            v[t]  = bufMap.template cast<VecT>();
 | 
				
			||||||
 | 
					            pt   += ni_*nj_;
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
        dataspace.selectHyperslab(H5S_SELECT_SET, count.data(), offset.data(),
 | 
					 | 
				
			||||||
                                  stride.data(), block.data());
 | 
					 | 
				
			||||||
        if (tRead) *tRead -= usecond();    
 | 
					 | 
				
			||||||
        dataset.read(buf.data(), datatype, memspace, dataspace);
 | 
					 | 
				
			||||||
        if (tRead) *tRead += usecond();
 | 
					 | 
				
			||||||
        v[t] = buf.template cast<VecT>();
 | 
					 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
    std::cout << std::endl;
 | 
					    // if useCache = false, do I/O timeslice per timeslice (much slower)
 | 
				
			||||||
 | 
					    else
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
 | 
					        A2AMatrix<T>         buf(ni_, nj_);
 | 
				
			||||||
 | 
					        std::vector<hsize_t> count    = {1, static_cast<hsize_t>(ni_),
 | 
				
			||||||
 | 
					                                        static_cast<hsize_t>(nj_)},
 | 
				
			||||||
 | 
					                             stride   = {1, 1, 1},
 | 
				
			||||||
 | 
					                             block    = {1, 1, 1},
 | 
				
			||||||
 | 
					                             memCount = {static_cast<hsize_t>(ni_),
 | 
				
			||||||
 | 
					                                         static_cast<hsize_t>(nj_)};
 | 
				
			||||||
 | 
					        H5NS::DataSpace      memspace(memCount.size(), memCount.data());
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        std::cout << "Loading timeslice";
 | 
				
			||||||
 | 
					        std::cout.flush();
 | 
				
			||||||
 | 
					        *tRead = 0.;
 | 
				
			||||||
 | 
					        for (unsigned int tp1 = nt_; tp1 > 0; --tp1)
 | 
				
			||||||
 | 
					        {
 | 
				
			||||||
 | 
					            unsigned int         t      = tp1 - 1;
 | 
				
			||||||
 | 
					            std::vector<hsize_t> offset = {static_cast<hsize_t>(t), 0, 0};
 | 
				
			||||||
 | 
					            
 | 
				
			||||||
 | 
					            if (t % 10 == 0)
 | 
				
			||||||
 | 
					            {
 | 
				
			||||||
 | 
					                std::cout << " " << t;
 | 
				
			||||||
 | 
					                std::cout.flush();
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					            dataspace.selectHyperslab(H5S_SELECT_SET, count.data(), offset.data(),
 | 
				
			||||||
 | 
					                                      stride.data(), block.data());
 | 
				
			||||||
 | 
					            if (tRead) *tRead -= usecond();    
 | 
				
			||||||
 | 
					            dataset.read(buf.data(), datatype, memspace, dataspace);
 | 
				
			||||||
 | 
					            if (tRead) *tRead += usecond();
 | 
				
			||||||
 | 
					            v[t] = buf.template cast<VecT>();
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        std::cout << std::endl;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
#else
 | 
					#else
 | 
				
			||||||
    HADRONS_ERROR(Implementation, "all-to-all matrix I/O needs HDF5 library");
 | 
					    HADRONS_ERROR(Implementation, "all-to-all matrix I/O needs HDF5 library");
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -41,14 +41,6 @@ BEGIN_HADRONS_NAMESPACE
 | 
				
			|||||||
class Application
 | 
					class Application
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
public:
 | 
					public:
 | 
				
			||||||
    class TrajRange: Serializable
 | 
					 | 
				
			||||||
    {
 | 
					 | 
				
			||||||
    public:
 | 
					 | 
				
			||||||
        GRID_SERIALIZABLE_CLASS_MEMBERS(TrajRange,
 | 
					 | 
				
			||||||
                                        unsigned int, start,
 | 
					 | 
				
			||||||
                                        unsigned int, end,
 | 
					 | 
				
			||||||
                                        unsigned int, step);
 | 
					 | 
				
			||||||
    };
 | 
					 | 
				
			||||||
    class GlobalPar: Serializable
 | 
					    class GlobalPar: Serializable
 | 
				
			||||||
    {
 | 
					    {
 | 
				
			||||||
    public:
 | 
					    public:
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -263,6 +263,33 @@ void tokenReplace(std::string &str, const std::string token,
 | 
				
			|||||||
    }
 | 
					    }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// trajectory range
 | 
				
			||||||
 | 
					class TrajRange: Serializable
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					public:
 | 
				
			||||||
 | 
					    GRID_SERIALIZABLE_CLASS_MEMBERS(TrajRange,
 | 
				
			||||||
 | 
					                                    unsigned int, start,
 | 
				
			||||||
 | 
					                                    unsigned int, end,
 | 
				
			||||||
 | 
					                                    unsigned int, step,
 | 
				
			||||||
 | 
					                                    std::string,  exclude);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    inline std::vector<unsigned int> getTrajectoryList(void)
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
 | 
					        std::vector<unsigned int> excVec = strToVec<unsigned int>(exclude);
 | 
				
			||||||
 | 
					        std::vector<unsigned int> list;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        for (unsigned int t = start; t < end; t += step)
 | 
				
			||||||
 | 
					        {
 | 
				
			||||||
 | 
					            if (std::find(excVec.begin(), excVec.end(), t) == excVec.end())
 | 
				
			||||||
 | 
					            {
 | 
				
			||||||
 | 
					                list.push_back(t);
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        return list;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
END_HADRONS_NAMESPACE
 | 
					END_HADRONS_NAMESPACE
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#include <Hadrons/Exceptions.hpp>
 | 
					#include <Hadrons/Exceptions.hpp>
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -34,4 +34,5 @@ nobase_libHadrons_a_HEADERS = \
 | 
				
			|||||||
	Solver.hpp                \
 | 
						Solver.hpp                \
 | 
				
			||||||
	TimerArray.hpp            \
 | 
						TimerArray.hpp            \
 | 
				
			||||||
	VirtualMachine.hpp        \
 | 
						VirtualMachine.hpp        \
 | 
				
			||||||
 | 
						Utilities/Contractor.hpp  \
 | 
				
			||||||
	$(modules_hpp)
 | 
						$(modules_hpp)
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -28,6 +28,25 @@ See the full license in the file "LICENSE" in the top level distribution directo
 | 
				
			|||||||
#include <Hadrons/A2AMatrix.hpp>
 | 
					#include <Hadrons/A2AMatrix.hpp>
 | 
				
			||||||
#include <Hadrons/DiskVector.hpp>
 | 
					#include <Hadrons/DiskVector.hpp>
 | 
				
			||||||
#include <Hadrons/TimerArray.hpp>
 | 
					#include <Hadrons/TimerArray.hpp>
 | 
				
			||||||
 | 
					#include <Hadrons/Utilities/Contractor.hpp>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifdef GRID_COMMS_MPI3
 | 
				
			||||||
 | 
					#define GET_RANK(rank, nMpi) \
 | 
				
			||||||
 | 
					MPI_Comm_size(MPI_COMM_WORLD, &(nMpi));\
 | 
				
			||||||
 | 
					MPI_Comm_rank(MPI_COMM_WORLD, &(rank))
 | 
				
			||||||
 | 
					#define BARRIER() MPI_Barrier(MPI_COMM_WORLD)
 | 
				
			||||||
 | 
					#define GLOBAL_DSUM(x) MPI_Allreduce(MPI_IN_PLACE, &x, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD)
 | 
				
			||||||
 | 
					#define GLOBAL_DMAX(x) MPI_Allreduce(MPI_IN_PLACE, &x, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD)
 | 
				
			||||||
 | 
					#define INIT() MPI_Init(NULL, NULL)
 | 
				
			||||||
 | 
					#define FINALIZE() MPI_Finalize()
 | 
				
			||||||
 | 
					#else
 | 
				
			||||||
 | 
					#define GET_RANK(rank, nMpi) (nMpi) = 1; (rank) = 0
 | 
				
			||||||
 | 
					#define BARRIER()
 | 
				
			||||||
 | 
					#define GLOBAL_DSUM(x)
 | 
				
			||||||
 | 
					#define GLOBAL_DMAX(x)
 | 
				
			||||||
 | 
					#define INIT()
 | 
				
			||||||
 | 
					#define FINALIZE()
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
using namespace Grid;
 | 
					using namespace Grid;
 | 
				
			||||||
using namespace QCD;
 | 
					using namespace QCD;
 | 
				
			||||||
@@ -35,58 +54,6 @@ using namespace Hadrons;
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
#define TIME_MOD(t) (((t) + par.global.nt) % par.global.nt)
 | 
					#define TIME_MOD(t) (((t) + par.global.nt) % par.global.nt)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
namespace Contractor
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    class TrajRange: Serializable
 | 
					 | 
				
			||||||
    {
 | 
					 | 
				
			||||||
    public:
 | 
					 | 
				
			||||||
        GRID_SERIALIZABLE_CLASS_MEMBERS(TrajRange,
 | 
					 | 
				
			||||||
                                        unsigned int, start,
 | 
					 | 
				
			||||||
                                        unsigned int, end,
 | 
					 | 
				
			||||||
                                        unsigned int, step);
 | 
					 | 
				
			||||||
    };
 | 
					 | 
				
			||||||
    
 | 
					 | 
				
			||||||
    class GlobalPar: Serializable
 | 
					 | 
				
			||||||
    {
 | 
					 | 
				
			||||||
    public:
 | 
					 | 
				
			||||||
        GRID_SERIALIZABLE_CLASS_MEMBERS(GlobalPar,
 | 
					 | 
				
			||||||
                                        TrajRange, trajCounter,
 | 
					 | 
				
			||||||
                                        unsigned int, nt,
 | 
					 | 
				
			||||||
                                        std::string, diskVectorDir,
 | 
					 | 
				
			||||||
                                        std::string, output);
 | 
					 | 
				
			||||||
    };
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    class A2AMatrixPar: Serializable
 | 
					 | 
				
			||||||
    {
 | 
					 | 
				
			||||||
    public:
 | 
					 | 
				
			||||||
        GRID_SERIALIZABLE_CLASS_MEMBERS(A2AMatrixPar,
 | 
					 | 
				
			||||||
                                        std::string, file,
 | 
					 | 
				
			||||||
                                        std::string, dataset,
 | 
					 | 
				
			||||||
                                        unsigned int, cacheSize,
 | 
					 | 
				
			||||||
                                        std::string, name);
 | 
					 | 
				
			||||||
    };
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    class ProductPar: Serializable
 | 
					 | 
				
			||||||
    {
 | 
					 | 
				
			||||||
    public:
 | 
					 | 
				
			||||||
        GRID_SERIALIZABLE_CLASS_MEMBERS(ProductPar,
 | 
					 | 
				
			||||||
                                        std::string, terms,
 | 
					 | 
				
			||||||
                                        std::vector<std::string>, times,
 | 
					 | 
				
			||||||
                                        std::string, translations,
 | 
					 | 
				
			||||||
                                        bool, translationAverage);
 | 
					 | 
				
			||||||
    };
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    class CorrelatorResult: Serializable
 | 
					 | 
				
			||||||
    {
 | 
					 | 
				
			||||||
    public:
 | 
					 | 
				
			||||||
        GRID_SERIALIZABLE_CLASS_MEMBERS(CorrelatorResult,
 | 
					 | 
				
			||||||
                                        std::vector<Contractor::A2AMatrixPar>,  a2aMatrix,
 | 
					 | 
				
			||||||
                                        ProductPar, contraction,
 | 
					 | 
				
			||||||
                                        std::vector<unsigned int>, times,
 | 
					 | 
				
			||||||
                                        std::vector<ComplexD>, correlator);
 | 
					 | 
				
			||||||
    };
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
struct ContractorPar
 | 
					struct ContractorPar
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
    Contractor::GlobalPar                  global;
 | 
					    Contractor::GlobalPar                  global;
 | 
				
			||||||
@@ -143,6 +110,27 @@ void saveCorrelator(const Contractor::CorrelatorResult &result, const std::strin
 | 
				
			|||||||
    write(writer, fileStem, result);
 | 
					    write(writer, fileStem, result);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void printPerf(const double bytes, const double usec)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					    double maxt;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    maxt = usec;
 | 
				
			||||||
 | 
					    GLOBAL_DMAX(maxt);
 | 
				
			||||||
 | 
					    std::cout << maxt/1.0e6 << " sec " << bytes/maxt*1.0e6/1024/1024/1024 << " GB/s";
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void printPerf(const double bytes, const double busec, 
 | 
				
			||||||
 | 
					               const double flops, const double fusec)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					    double maxt;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    printPerf(bytes, busec);
 | 
				
			||||||
 | 
					    std::cout << " ";
 | 
				
			||||||
 | 
					    maxt = fusec;
 | 
				
			||||||
 | 
					    GLOBAL_DMAX(maxt);
 | 
				
			||||||
 | 
					    std::cout << flops/fusec/1.0e3 << " GFlop/s";
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
std::set<unsigned int> parseTimeRange(const std::string str, const unsigned int nt)
 | 
					std::set<unsigned int> parseTimeRange(const std::string str, const unsigned int nt)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
    std::regex               rex("([0-9]+)|(([0-9]+)\\.\\.([0-9]+))");
 | 
					    std::regex               rex("([0-9]+)|(([0-9]+)\\.\\.([0-9]+))");
 | 
				
			||||||
@@ -184,59 +172,18 @@ std::set<unsigned int> parseTimeRange(const std::string str, const unsigned int
 | 
				
			|||||||
    return tSet;
 | 
					    return tSet;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
struct Sec
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    Sec(const double usec)
 | 
					 | 
				
			||||||
    {
 | 
					 | 
				
			||||||
        seconds = usec/1.0e6;
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
    
 | 
					 | 
				
			||||||
    double seconds;
 | 
					 | 
				
			||||||
};
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
inline std::ostream & operator<< (std::ostream& s, const Sec &&sec)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    s << std::setw(10) << sec.seconds << " sec";
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    return s;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
struct Flops
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    Flops(const double flops, const double fusec)
 | 
					 | 
				
			||||||
    {
 | 
					 | 
				
			||||||
        gFlopsPerSec = flops/fusec/1.0e3;
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
    
 | 
					 | 
				
			||||||
    double gFlopsPerSec;
 | 
					 | 
				
			||||||
};
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
inline std::ostream & operator<< (std::ostream& s, const Flops &&f)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    s << std::setw(10) << f.gFlopsPerSec << " GFlop/s";
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    return s;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
struct Bytes
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    Bytes(const double bytes, const double busec)
 | 
					 | 
				
			||||||
    {
 | 
					 | 
				
			||||||
        gBytesPerSec = bytes/busec*1.0e6/1024/1024/1024;
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
    
 | 
					 | 
				
			||||||
    double gBytesPerSec;
 | 
					 | 
				
			||||||
};
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
inline std::ostream & operator<< (std::ostream& s, const Bytes &&b)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    s << std::setw(10) << b.gBytesPerSec << " GB/s";
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    return s;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
int main(int argc, char* argv[])
 | 
					int main(int argc, char* argv[])
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
 | 
					    // MPI init
 | 
				
			||||||
 | 
					    int nMpi, rank;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    INIT();
 | 
				
			||||||
 | 
					    GET_RANK(rank, nMpi);
 | 
				
			||||||
 | 
					    if (rank != 0)
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
 | 
					        std::cout.setstate(std::ios::badbit);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    // parse command line
 | 
					    // parse command line
 | 
				
			||||||
    std::string   parFilename;
 | 
					    std::string   parFilename;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -266,31 +213,68 @@ int main(int argc, char* argv[])
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    for (auto &p: par.a2aMatrix)
 | 
					    for (auto &p: par.a2aMatrix)
 | 
				
			||||||
    {
 | 
					    {
 | 
				
			||||||
        std::string dirName = par.global.diskVectorDir + "/" + p.name;
 | 
					        std::string dirName = par.global.diskVectorDir + "/" + p.name + "." + std::to_string(rank);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        a2aMat.emplace(p.name, EigenDiskVector<ComplexD>(dirName, par.global.nt, p.cacheSize));
 | 
					        a2aMat.emplace(p.name, EigenDiskVector<ComplexD>(dirName, par.global.nt, p.cacheSize));
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    // trajectory loop
 | 
					    // trajectory loop
 | 
				
			||||||
    for (unsigned int traj = par.global.trajCounter.start; 
 | 
					    std::vector<unsigned int> tList = par.global.trajCounter.getTrajectoryList();
 | 
				
			||||||
         traj < par.global.trajCounter.end; traj += par.global.trajCounter.step)
 | 
					    unsigned int              indi, inde, indPerRank;
 | 
				
			||||||
    {
 | 
					 | 
				
			||||||
        std::cout << ":::::::: Trajectory " << traj << std::endl;
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    indPerRank = tList.size()/nMpi;
 | 
				
			||||||
 | 
					    indi       = rank*indPerRank;
 | 
				
			||||||
 | 
					    BARRIER();
 | 
				
			||||||
 | 
					    for (unsigned int tInd = indi; tInd < indi + indPerRank; tInd++)
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
 | 
					        unsigned int traj;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        if (tInd < tList.size())
 | 
				
			||||||
 | 
					        {
 | 
				
			||||||
 | 
					            traj = tList[tInd];
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        else
 | 
				
			||||||
 | 
					        {
 | 
				
			||||||
 | 
					            traj = tList.back();
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        if (nMpi > 1)
 | 
				
			||||||
 | 
					        {
 | 
				
			||||||
 | 
					            if (rank == 0)
 | 
				
			||||||
 | 
					            {
 | 
				
			||||||
 | 
					                std::cout << ":::::::: Trajectories ";
 | 
				
			||||||
 | 
					                for (unsigned int r = 0; r < nMpi - 1; ++r)
 | 
				
			||||||
 | 
					                {
 | 
				
			||||||
 | 
					                    std::cout << tList[tInd + r*indPerRank] << " ";
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					                if (tInd + (nMpi - 1)*indPerRank < tList.size())
 | 
				
			||||||
 | 
					                {
 | 
				
			||||||
 | 
					                    std::cout << tList[tInd + (nMpi - 1)*indPerRank];
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					                std::cout << std::endl;
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        else
 | 
				
			||||||
 | 
					        {
 | 
				
			||||||
 | 
					            std::cout << ":::::::: Trajectory " << traj << std::endl;
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
        // load data
 | 
					        // load data
 | 
				
			||||||
        for (auto &p: par.a2aMatrix)
 | 
					        for (auto &p: par.a2aMatrix)
 | 
				
			||||||
        {
 | 
					        {
 | 
				
			||||||
            std::string filename = p.file;
 | 
					            std::string filename = p.file;
 | 
				
			||||||
            double      t, size;
 | 
					            double      t;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            tokenReplace(filename, "traj", traj);
 | 
					            tokenReplace(filename, "traj", traj);
 | 
				
			||||||
            std::cout << "======== Loading '" << filename << "'" << std::endl;
 | 
					            std::cout << "======== Loading '" << p.file << "'" << std::endl;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            BARRIER();
 | 
				
			||||||
            A2AMatrixIo<HADRONS_A2AM_IO_TYPE> a2aIo(filename, p.dataset, par.global.nt);
 | 
					            A2AMatrixIo<HADRONS_A2AM_IO_TYPE> a2aIo(filename, p.dataset, par.global.nt);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            a2aIo.load(a2aMat.at(p.name), &t);
 | 
					            a2aIo.load(a2aMat.at(p.name), &t);
 | 
				
			||||||
            std::cout << "Read " << a2aIo.getSize() << " bytes in " << t/1.0e6 
 | 
					            GLOBAL_DMAX(t);
 | 
				
			||||||
                    << " sec, " << a2aIo.getSize()/t*1.0e6/1024/1024 << " MB/s" << std::endl;
 | 
					            std::cout << "Read "  << nMpi*a2aIo.getSize() << " bytes in " << t/1.0e6 
 | 
				
			||||||
 | 
					                      << " sec, " << nMpi*a2aIo.getSize()/t*1.0e6/1024/1024 
 | 
				
			||||||
 | 
					                      << " MB/s"  << std::endl;
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        // contract
 | 
					        // contract
 | 
				
			||||||
@@ -308,6 +292,7 @@ int main(int argc, char* argv[])
 | 
				
			|||||||
            double                                 fusec, busec, flops, bytes, tusec;
 | 
					            double                                 fusec, busec, flops, bytes, tusec;
 | 
				
			||||||
            Contractor::CorrelatorResult           result;             
 | 
					            Contractor::CorrelatorResult           result;             
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            BARRIER();
 | 
				
			||||||
            tAr.startTimer("Total");
 | 
					            tAr.startTimer("Total");
 | 
				
			||||||
            std::cout << "======== Contraction tr(";
 | 
					            std::cout << "======== Contraction tr(";
 | 
				
			||||||
            for (unsigned int g = 0; g < term.size(); ++g)
 | 
					            for (unsigned int g = 0; g < term.size(); ++g)
 | 
				
			||||||
@@ -358,9 +343,10 @@ int main(int argc, char* argv[])
 | 
				
			|||||||
                }
 | 
					                }
 | 
				
			||||||
                tAr.stopTimer("Transpose caching");
 | 
					                tAr.stopTimer("Transpose caching");
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
            bytes = par.global.nt*lastTerm[0].rows()*lastTerm[0].cols()*sizeof(ComplexD);
 | 
					            bytes  = par.global.nt*lastTerm[0].rows()*lastTerm[0].cols();
 | 
				
			||||||
            std::cout << Sec(tAr.getDTimer("Transpose caching")) << " " 
 | 
					            bytes *= sizeof(ComplexD)*nMpi;
 | 
				
			||||||
                      << Bytes(bytes, tAr.getDTimer("Transpose caching")) << std::endl;
 | 
					            printPerf(bytes, tAr.getDTimer("Transpose caching"));
 | 
				
			||||||
 | 
					            std::cout << std::endl;
 | 
				
			||||||
            for (unsigned int i = 0; i < timeSeq.size(); ++i)
 | 
					            for (unsigned int i = 0; i < timeSeq.size(); ++i)
 | 
				
			||||||
            {
 | 
					            {
 | 
				
			||||||
                unsigned int dti = 0;
 | 
					                unsigned int dti = 0;
 | 
				
			||||||
@@ -378,7 +364,7 @@ int main(int argc, char* argv[])
 | 
				
			|||||||
                            << " -- positions= " << t << ", dt= " << dt << std::endl;
 | 
					                            << " -- positions= " << t << ", dt= " << dt << std::endl;
 | 
				
			||||||
                    if (term.size() > 2)
 | 
					                    if (term.size() > 2)
 | 
				
			||||||
                    {
 | 
					                    {
 | 
				
			||||||
                        std::cout << std::setw(8) << "products";
 | 
					                        std::cout << std::setw(10) << "products ";
 | 
				
			||||||
                    }
 | 
					                    }
 | 
				
			||||||
                    flops  = 0.;
 | 
					                    flops  = 0.;
 | 
				
			||||||
                    bytes  = 0.;
 | 
					                    bytes  = 0.;
 | 
				
			||||||
@@ -405,11 +391,11 @@ int main(int argc, char* argv[])
 | 
				
			|||||||
                    }
 | 
					                    }
 | 
				
			||||||
                    if (term.size() > 2)
 | 
					                    if (term.size() > 2)
 | 
				
			||||||
                    {
 | 
					                    {
 | 
				
			||||||
                        std::cout << Sec(tAr.getDTimer("A*B total") - busec) << " "
 | 
					                        printPerf(bytes*nMpi, tAr.getDTimer("A*B total") - busec,
 | 
				
			||||||
                                << Flops(flops, tAr.getDTimer("A*B algebra") - fusec) << " " 
 | 
					                                  flops*nMpi, tAr.getDTimer("A*B algebra") - fusec);
 | 
				
			||||||
                                << Bytes(bytes, tAr.getDTimer("A*B total") - busec) << std::endl;
 | 
					                        std::cout << std::endl;
 | 
				
			||||||
                    }
 | 
					                    }
 | 
				
			||||||
                    std::cout << std::setw(8) << "traces";
 | 
					                    std::cout << std::setw(10) << "traces ";
 | 
				
			||||||
                    flops  = 0.;
 | 
					                    flops  = 0.;
 | 
				
			||||||
                    bytes  = 0.;
 | 
					                    bytes  = 0.;
 | 
				
			||||||
                    fusec  = tAr.getDTimer("tr(A*B)");
 | 
					                    fusec  = tAr.getDTimer("tr(A*B)");
 | 
				
			||||||
@@ -423,9 +409,9 @@ int main(int argc, char* argv[])
 | 
				
			|||||||
                        bytes += 2.*prod.rows()*prod.cols()*sizeof(ComplexD);
 | 
					                        bytes += 2.*prod.rows()*prod.cols()*sizeof(ComplexD);
 | 
				
			||||||
                    }
 | 
					                    }
 | 
				
			||||||
                    tAr.stopTimer("Linear algebra");
 | 
					                    tAr.stopTimer("Linear algebra");
 | 
				
			||||||
                    std::cout << Sec(tAr.getDTimer("tr(A*B)") - busec) << " "
 | 
					                    printPerf(bytes*nMpi, tAr.getDTimer("tr(A*B)") - busec,
 | 
				
			||||||
                            << Flops(flops, tAr.getDTimer("tr(A*B)") - fusec) << " " 
 | 
					                              flops*nMpi, tAr.getDTimer("tr(A*B)") - fusec);
 | 
				
			||||||
                            << Bytes(bytes, tAr.getDTimer("tr(A*B)") - busec) << std::endl;
 | 
					                    std::cout << std::endl;
 | 
				
			||||||
                    if (!p.translationAverage)
 | 
					                    if (!p.translationAverage)
 | 
				
			||||||
                    {
 | 
					                    {
 | 
				
			||||||
                        saveCorrelator(result, par.global.output, dt, traj);
 | 
					                        saveCorrelator(result, par.global.output, dt, traj);
 | 
				
			||||||
@@ -450,5 +436,7 @@ int main(int argc, char* argv[])
 | 
				
			|||||||
        }
 | 
					        }
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
    
 | 
					    
 | 
				
			||||||
 | 
					    FINALIZE();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    return EXIT_SUCCESS;
 | 
					    return EXIT_SUCCESS;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -5,7 +5,48 @@
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
BEGIN_HADRONS_NAMESPACE
 | 
					BEGIN_HADRONS_NAMESPACE
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					namespace Contractor
 | 
				
			||||||
 | 
					{   
 | 
				
			||||||
 | 
					    class GlobalPar: Serializable
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
 | 
					    public:
 | 
				
			||||||
 | 
					        GRID_SERIALIZABLE_CLASS_MEMBERS(GlobalPar,
 | 
				
			||||||
 | 
					                                        TrajRange, trajCounter,
 | 
				
			||||||
 | 
					                                        unsigned int, nt,
 | 
				
			||||||
 | 
					                                        std::string, diskVectorDir,
 | 
				
			||||||
 | 
					                                        std::string, output);
 | 
				
			||||||
 | 
					    };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    class A2AMatrixPar: Serializable
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
 | 
					    public:
 | 
				
			||||||
 | 
					        GRID_SERIALIZABLE_CLASS_MEMBERS(A2AMatrixPar,
 | 
				
			||||||
 | 
					                                        std::string, file,
 | 
				
			||||||
 | 
					                                        std::string, dataset,
 | 
				
			||||||
 | 
					                                        unsigned int, cacheSize,
 | 
				
			||||||
 | 
					                                        std::string, name);
 | 
				
			||||||
 | 
					    };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    class ProductPar: Serializable
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
 | 
					    public:
 | 
				
			||||||
 | 
					        GRID_SERIALIZABLE_CLASS_MEMBERS(ProductPar,
 | 
				
			||||||
 | 
					                                        std::string, terms,
 | 
				
			||||||
 | 
					                                        std::vector<std::string>, times,
 | 
				
			||||||
 | 
					                                        std::string, translations,
 | 
				
			||||||
 | 
					                                        bool, translationAverage);
 | 
				
			||||||
 | 
					    };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    class CorrelatorResult: Serializable
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
 | 
					    public:
 | 
				
			||||||
 | 
					        GRID_SERIALIZABLE_CLASS_MEMBERS(CorrelatorResult,
 | 
				
			||||||
 | 
					                                        std::vector<Contractor::A2AMatrixPar>,  a2aMatrix,
 | 
				
			||||||
 | 
					                                        ProductPar, contraction,
 | 
				
			||||||
 | 
					                                        std::vector<unsigned int>, times,
 | 
				
			||||||
 | 
					                                        std::vector<ComplexD>, correlator);
 | 
				
			||||||
 | 
					    };
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
END_HADRONS_NAMESPACE
 | 
					END_HADRONS_NAMESPACE
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -7,7 +7,7 @@ HadronsFermionEP64To32_SOURCES  = EigenPackCast.cc
 | 
				
			|||||||
HadronsFermionEP64To32_CXXFLAGS = $(AM_CXXFLAGS) -DFIN=WilsonImplD::FermionField -DFOUT=WilsonImplF::FermionField
 | 
					HadronsFermionEP64To32_CXXFLAGS = $(AM_CXXFLAGS) -DFIN=WilsonImplD::FermionField -DFOUT=WilsonImplF::FermionField
 | 
				
			||||||
HadronsFermionEP64To32_LDADD    = ../libHadrons.a ../../Grid/libGrid.a
 | 
					HadronsFermionEP64To32_LDADD    = ../libHadrons.a ../../Grid/libGrid.a
 | 
				
			||||||
 | 
					
 | 
				
			||||||
HadronsContractor_SOURCES = Contractor.cc
 | 
					HadronsContractor_SOURCES = Contractor.cc Contractor.hpp
 | 
				
			||||||
HadronsContractor_LDADD   = ../libHadrons.a ../../Grid/libGrid.a
 | 
					HadronsContractor_LDADD   = ../libHadrons.a ../../Grid/libGrid.a
 | 
				
			||||||
 | 
					
 | 
				
			||||||
HadronsContractorBenchmark_SOURCES = ContractorBenchmark.cc
 | 
					HadronsContractorBenchmark_SOURCES = ContractorBenchmark.cc
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user