diff --git a/VERSION b/VERSION index bfad377d..a0211af1 100644 --- a/VERSION +++ b/VERSION @@ -1,4 +1,4 @@ -Version : 0.7.0 +Version : 0.8.0 - Clang 3.5 and above, ICPC v16 and above, GCC 6.3 and above recommended - MPI and MPI3 comms optimisations for KNL and OPA finished diff --git a/benchmarks/Benchmark_ITT.cc b/benchmarks/Benchmark_ITT.cc index 666e4830..f811ac32 100644 --- a/benchmarks/Benchmark_ITT.cc +++ b/benchmarks/Benchmark_ITT.cc @@ -158,8 +158,10 @@ public: dbytes=0; ncomm=0; - - parallel_for(int dir=0;dir<8;dir++){ +#ifdef GRID_OMP +#pragma omp parallel for num_threads(Grid::CartesianCommunicator::nCommThreads) +#endif + for(int dir=0;dir<8;dir++){ double tbytes; int mu =dir % 4; @@ -175,9 +177,14 @@ public: int comm_proc = mpi_layout[mu]-1; Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); } +#ifdef GRID_OMP + int tid = omp_get_thread_num(); +#else + int tid = dir; +#endif tbytes= Grid.StencilSendToRecvFrom((void *)&xbuf[dir][0], xmit_to_rank, (void *)&rbuf[dir][0], recv_from_rank, - bytes,dir); + bytes,tid); #ifdef GRID_OMP #pragma omp atomic diff --git a/benchmarks/Benchmark_comms.cc b/benchmarks/Benchmark_comms.cc index 29ccf96c..304a09fc 100644 --- a/benchmarks/Benchmark_comms.cc +++ b/benchmarks/Benchmark_comms.cc @@ -169,7 +169,11 @@ int main (int argc, char ** argv) for(int lat=4;lat<=maxlat;lat+=4){ for(int Ls=8;Ls<=8;Ls*=2){ - std::vector latt_size ({lat,lat,lat,lat}); + std::vector latt_size ({lat*mpi_layout[0], + lat*mpi_layout[1], + lat*mpi_layout[2], + lat*mpi_layout[3]}); + GridCartesian Grid(latt_size,simd_layout,mpi_layout); RealD Nrank = Grid._Nprocessors; @@ -485,7 +489,8 @@ int main (int argc, char ** argv) dbytes=0; ncomm=0; - parallel_for(int dir=0;dir<8;dir++){ +#pragma omp parallel for num_threads(Grid::CartesianCommunicator::nCommThreads) + for(int dir=0;dir<8;dir++){ double tbytes; int mu =dir % 4; @@ -502,9 +507,9 @@ int main (int argc, char ** argv) int comm_proc = mpi_layout[mu]-1; Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); } - + int tid = omp_get_thread_num(); tbytes= Grid.StencilSendToRecvFrom((void *)&xbuf[dir][0], xmit_to_rank, - (void *)&rbuf[dir][0], recv_from_rank, bytes,dir); + (void *)&rbuf[dir][0], recv_from_rank, bytes,tid); #pragma omp atomic dbytes+=tbytes; diff --git a/benchmarks/Benchmark_memory_bandwidth.cc b/benchmarks/Benchmark_memory_bandwidth.cc index 848f271d..cc965050 100644 --- a/benchmarks/Benchmark_memory_bandwidth.cc +++ b/benchmarks/Benchmark_memory_bandwidth.cc @@ -55,7 +55,7 @@ int main (int argc, char ** argv) std::cout< simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd()); std::vector mpi_layout = GridDefaultMpi(); @@ -52,7 +53,7 @@ int main (int argc, char ** argv) std::cout< latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; @@ -84,7 +85,7 @@ int main (int argc, char ** argv) std::cout< latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; @@ -115,7 +116,7 @@ int main (int argc, char ** argv) std::cout< latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; @@ -146,31 +147,30 @@ int main (int argc, char ** argv) std::cout< latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); - int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; - - GridCartesian Grid(latt_size,simd_layout,mpi_layout); - GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector({45,12,81,9})); - - LatticeColourMatrix z(&Grid); random(pRNG,z); - LatticeColourMatrix x(&Grid); random(pRNG,x); - LatticeColourMatrix y(&Grid); random(pRNG,y); - - double start=usecond(); - for(int64_t i=0;i latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); + int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; + + GridCartesian Grid(latt_size,simd_layout,mpi_layout); + GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector({45,12,81,9})); + + LatticeColourMatrix z(&Grid); random(pRNG,z); + LatticeColourMatrix x(&Grid); random(pRNG,x); + LatticeColourMatrix y(&Grid); random(pRNG,y); + + double start=usecond(); + for(int64_t i=0;i latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; @@ -190,19 +190,64 @@ int main (int argc, char ** argv) LatticeColourMatrix x(&Grid); random(pRNG,x); LatticeColourMatrix y(&Grid); random(pRNG,y); - for(int mu=0;mu<=4;mu++){ + for(int mu=0;mu<4;mu++){ + double start=usecond(); + for(int64_t i=0;i latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); + int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; + + GridCartesian Grid(latt_size,simd_layout,mpi_layout); + GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector({45,12,81,9})); + + LatticeColourMatrix z(&Grid); random(pRNG,z); + LatticeColourMatrix x(&Grid); random(pRNG,x); + LatticeColourMatrix y(&Grid); random(pRNG,y); + LatticeColourMatrix tmp(&Grid); + + for(int mu=0;mu<4;mu++){ + double tshift=0; + double tmult =0; + double start=usecond(); for(int64_t i=0;i &blockSize, coarseDim[d] = fineDim[d]/blockSize[d]; if (coarseDim[d]*blockSize[d] != fineDim[d]) { - HADRON_ERROR(Size, "Fine dimension " + std::to_string(d) + HADRONS_ERROR(Size, "Fine dimension " + std::to_string(d) + " (" + std::to_string(fineDim[d]) + ") not divisible by coarse dimension (" + std::to_string(coarseDim[d]) + ")"); @@ -96,7 +96,7 @@ void Environment::createCoarseGrid(const std::vector &blockSize, cLs = Ls/blockSize[nd]; if (cLs*blockSize[nd] != Ls) { - HADRON_ERROR(Size, "Fine Ls (" + std::to_string(Ls) + HADRONS_ERROR(Size, "Fine Ls (" + std::to_string(Ls) + ") not divisible by coarse Ls (" + std::to_string(cLs) + ")"); } @@ -128,7 +128,7 @@ GridCartesian * Environment::getGrid(const unsigned int Ls) const } catch(std::out_of_range &) { - HADRON_ERROR(Definition, "no grid with Ls= " + std::to_string(Ls)); + HADRONS_ERROR(Definition, "no grid with Ls= " + std::to_string(Ls)); } } @@ -147,7 +147,7 @@ GridRedBlackCartesian * Environment::getRbGrid(const unsigned int Ls) const } catch(std::out_of_range &) { - HADRON_ERROR(Definition, "no red-black grid with Ls= " + std::to_string(Ls)); + HADRONS_ERROR(Definition, "no red-black grid with Ls= " + std::to_string(Ls)); } } @@ -171,7 +171,7 @@ GridCartesian * Environment::getCoarseGrid( } catch(std::out_of_range &) { - HADRON_ERROR(Definition, "no coarse grid with Ls= " + std::to_string(Ls)); + HADRONS_ERROR(Definition, "no coarse grid with Ls= " + std::to_string(Ls)); } } @@ -221,7 +221,7 @@ void Environment::addObject(const std::string name, const int moduleAddress) } else { - HADRON_ERROR(Definition, "object '" + name + "' already exists"); + HADRONS_ERROR(Definition, "object '" + name + "' already exists"); } } @@ -244,7 +244,7 @@ unsigned int Environment::getObjectAddress(const std::string name) const } else { - HADRON_ERROR(Definition, "no object with name '" + name + "'"); + HADRONS_ERROR(Definition, "no object with name '" + name + "'"); } } diff --git a/extras/Hadrons/Environment.hpp b/extras/Hadrons/Environment.hpp index f624f36e..a9c3c724 100644 --- a/extras/Hadrons/Environment.hpp +++ b/extras/Hadrons/Environment.hpp @@ -245,7 +245,7 @@ void Environment::createDerivedObject(const std::string name, (object_[address].type != &typeid(B)) or (object_[address].derivedType != &typeid(T))) { - HADRON_ERROR(Definition, "object '" + name + "' already allocated"); + HADRONS_ERROR(Definition, "object '" + name + "' already allocated"); } } @@ -279,7 +279,7 @@ T * Environment::getDerivedObject(const unsigned int address) const } else { - HADRON_ERROR(Definition, "object with address " + std::to_string(address) + + HADRONS_ERROR(Definition, "object with address " + std::to_string(address) + " cannot be casted to '" + typeName(&typeid(T)) + "' (has type '" + typeName(&typeid(h->get())) + "')"); } @@ -287,20 +287,20 @@ T * Environment::getDerivedObject(const unsigned int address) const } else { - HADRON_ERROR(Definition, "object with address " + std::to_string(address) + + HADRONS_ERROR(Definition, "object with address " + std::to_string(address) + " does not have type '" + typeName(&typeid(B)) + "' (has type '" + getObjectType(address) + "')"); } } else { - HADRON_ERROR(Definition, "object with address " + std::to_string(address) + + HADRONS_ERROR(Definition, "object with address " + std::to_string(address) + " is empty"); } } else { - HADRON_ERROR(Definition, "no object with address " + std::to_string(address)); + HADRONS_ERROR(Definition, "no object with address " + std::to_string(address)); } } @@ -338,7 +338,7 @@ bool Environment::isObjectOfType(const unsigned int address) const } else { - HADRON_ERROR(Definition, "no object with address " + std::to_string(address)); + HADRONS_ERROR(Definition, "no object with address " + std::to_string(address)); } } diff --git a/extras/Hadrons/Exceptions.hpp b/extras/Hadrons/Exceptions.hpp index adf2340f..3eb1c25f 100644 --- a/extras/Hadrons/Exceptions.hpp +++ b/extras/Hadrons/Exceptions.hpp @@ -34,10 +34,10 @@ See the full license in the file "LICENSE" in the top level distribution directo #include #endif -#define SRC_LOC std::string(__FUNCTION__) + " at " + std::string(__FILE__) + ":"\ - + std::to_string(__LINE__) -#define HADRON_ERROR(exc, msg)\ -throw(Exceptions::exc(msg, SRC_LOC)); +#define HADRONS_SRC_LOC std::string(__FUNCTION__) + " at " \ + + std::string(__FILE__) + ":" + std::to_string(__LINE__) +#define HADRONS_ERROR(exc, msg)\ +throw(Exceptions::exc(msg, HADRONS_SRC_LOC)); #define DECL_EXC(name, base) \ class name: public base\ diff --git a/extras/Hadrons/Factory.hpp b/extras/Hadrons/Factory.hpp index 705a639e..07516640 100644 --- a/extras/Hadrons/Factory.hpp +++ b/extras/Hadrons/Factory.hpp @@ -94,7 +94,7 @@ std::unique_ptr Factory::create(const std::string type, } catch (std::out_of_range &) { - HADRON_ERROR(Argument, "object of type '" + type + "' unknown"); + HADRONS_ERROR(Argument, "object of type '" + type + "' unknown"); } return func(name); diff --git a/extras/Hadrons/Global.hpp b/extras/Hadrons/Global.hpp index 5e729579..433dcd21 100644 --- a/extras/Hadrons/Global.hpp +++ b/extras/Hadrons/Global.hpp @@ -110,7 +110,7 @@ public: }; #define LOG(channel) std::cout << HadronsLog##channel -#define DEBUG_VAR(var) LOG(Debug) << #var << "= " << (var) << std::endl; +#define HADRONS_DEBUG_VAR(var) LOG(Debug) << #var << "= " << (var) << std::endl; extern HadronsLogger HadronsLogError; extern HadronsLogger HadronsLogWarning; diff --git a/extras/Hadrons/Graph.hpp b/extras/Hadrons/Graph.hpp index 67694aa8..ad84e7e0 100644 --- a/extras/Hadrons/Graph.hpp +++ b/extras/Hadrons/Graph.hpp @@ -184,7 +184,7 @@ void Graph::removeVertex(const T &value) } else { - HADRON_ERROR(Range, "vertex does not exists"); + HADRONS_ERROR(Range, "vertex does not exists"); } // remove all edges containing the vertex @@ -213,7 +213,7 @@ void Graph::removeEdge(const Edge &e) } else { - HADRON_ERROR(Range, "edge does not exists"); + HADRONS_ERROR(Range, "edge does not exists"); } } @@ -259,7 +259,7 @@ void Graph::mark(const T &value, const bool doMark) } else { - HADRON_ERROR(Range, "vertex does not exists"); + HADRONS_ERROR(Range, "vertex does not exists"); } } @@ -297,7 +297,7 @@ bool Graph::isMarked(const T &value) const } else { - HADRON_ERROR(Range, "vertex does not exists"); + HADRONS_ERROR(Range, "vertex does not exists"); return false; } @@ -543,7 +543,7 @@ std::vector Graph::topoSort(void) { if (tmpMarked.at(v)) { - HADRON_ERROR(Range, "cannot topologically sort a cyclic graph"); + HADRONS_ERROR(Range, "cannot topologically sort a cyclic graph"); } if (!isMarked(v)) { @@ -602,7 +602,7 @@ std::vector Graph::topoSort(Gen &gen) { if (tmpMarked.at(v)) { - HADRON_ERROR(Range, "cannot topologically sort a cyclic graph"); + HADRONS_ERROR(Range, "cannot topologically sort a cyclic graph"); } if (!isMarked(v)) { diff --git a/extras/Hadrons/Module.cc b/extras/Hadrons/Module.cc index 54978f93..faf01d5a 100644 --- a/extras/Hadrons/Module.cc +++ b/extras/Hadrons/Module.cc @@ -49,7 +49,7 @@ std::string ModuleBase::getName(void) const // get factory registration name if available std::string ModuleBase::getRegisteredName(void) { - HADRON_ERROR(Definition, "module '" + getName() + "' has no registered type" + HADRONS_ERROR(Definition, "module '" + getName() + "' has no registered type" + " in the factory"); } diff --git a/extras/Hadrons/Module.hpp b/extras/Hadrons/Module.hpp index 656aacef..7f8b7796 100644 --- a/extras/Hadrons/Module.hpp +++ b/extras/Hadrons/Module.hpp @@ -126,7 +126,7 @@ if (env().getGrid()->IsBoss())\ \ if (mkdir(_dirname))\ {\ - HADRON_ERROR(Io, "cannot create directory '" + _dirname + "'");\ + HADRONS_ERROR(Io, "cannot create directory '" + _dirname + "'");\ }\ {\ ResultWriter _writer(RESULT_FILE_NAME(ioStem));\ diff --git a/extras/Hadrons/Modules/MContraction/WardIdentity.hpp b/extras/Hadrons/Modules/MContraction/WardIdentity.hpp index f1ca6c2b..e529d7a2 100644 --- a/extras/Hadrons/Modules/MContraction/WardIdentity.hpp +++ b/extras/Hadrons/Modules/MContraction/WardIdentity.hpp @@ -119,7 +119,7 @@ void TWardIdentity::setup(void) Ls_ = env().getObjectLs(par().q); if (Ls_ != env().getObjectLs(par().action)) { - HADRON_ERROR(Size, "Ls mismatch between quark action and propagator"); + HADRONS_ERROR(Size, "Ls mismatch between quark action and propagator"); } envTmpLat(PropagatorField, "tmp"); envTmpLat(PropagatorField, "vector_WI"); diff --git a/extras/Hadrons/Modules/MFermion/GaugeProp.hpp b/extras/Hadrons/Modules/MFermion/GaugeProp.hpp index 9a359427..ee21cba9 100644 --- a/extras/Hadrons/Modules/MFermion/GaugeProp.hpp +++ b/extras/Hadrons/Modules/MFermion/GaugeProp.hpp @@ -177,7 +177,7 @@ void TGaugeProp::execute(void) { if (Ls_ != env().getObjectLs(par().source)) { - HADRON_ERROR(Size, "Ls mismatch between quark action and source"); + HADRONS_ERROR(Size, "Ls mismatch between quark action and source"); } else { diff --git a/extras/Hadrons/Modules/MScalarSUN/Div.hpp b/extras/Hadrons/Modules/MScalarSUN/Div.hpp index d1f6df26..ff26c60b 100644 --- a/extras/Hadrons/Modules/MScalarSUN/Div.hpp +++ b/extras/Hadrons/Modules/MScalarSUN/Div.hpp @@ -49,19 +49,20 @@ public: std::string, output); }; +class DivResult: Serializable +{ +public: + GRID_SERIALIZABLE_CLASS_MEMBERS(DivResult, + DiffType, type, + Complex, value); +}; + template class TDiv: public Module { public: typedef typename SImpl::Field Field; typedef typename SImpl::ComplexField ComplexField; - class Result: Serializable - { - public: - GRID_SERIALIZABLE_CLASS_MEMBERS(Result, - DiffType, type, - Complex, value); - }; public: // constructor TDiv(const std::string name); @@ -112,7 +113,7 @@ void TDiv::setup(void) { if (par().op.size() != env().getNd()) { - HADRON_ERROR(Size, "the number of components differs from number of dimensions"); + HADRONS_ERROR(Size, "the number of components differs from number of dimensions"); } envCreateLat(ComplexField, getName()); } @@ -139,7 +140,7 @@ void TDiv::execute(void) } if (!par().output.empty()) { - Result r; + DivResult r; r.type = par().type; r.value = TensorRemove(sum(div)); diff --git a/extras/Hadrons/Modules/MScalarSUN/EMT.hpp b/extras/Hadrons/Modules/MScalarSUN/EMT.hpp index 025b7936..dbbfb6b3 100644 --- a/extras/Hadrons/Modules/MScalarSUN/EMT.hpp +++ b/extras/Hadrons/Modules/MScalarSUN/EMT.hpp @@ -54,6 +54,17 @@ public: std::string, output); }; +class EMTResult: Serializable +{ +public: + GRID_SERIALIZABLE_CLASS_MEMBERS(EMTResult, + std::vector>, value, + double, m2, + double, lambda, + double, g, + double, xi); +}; + template class TEMT: public Module { @@ -155,13 +166,22 @@ void TEMT::execute(void) LOG(Message) << " xi= " << par().xi << std::endl; } - const unsigned int N = SImpl::Group::Dimension; + const unsigned int N = SImpl::Group::Dimension, nd = env().getNd(); auto &trphi2 = envGet(ComplexField, varName(par().phiPow, 2)); auto &trphi4 = envGet(ComplexField, varName(par().phiPow, 4)); auto &sumkin = envGet(ComplexField, varName(par().kinetic, "sum")); + EMTResult result; - for (unsigned int mu = 0; mu < env().getNd(); ++mu) - for (unsigned int nu = mu; nu < env().getNd(); ++nu) + if (!par().output.empty()) + { + result.m2 = par().m2; + result.g = par().g; + result.lambda = par().lambda; + result.xi = par().xi; + result.value.resize(nd, std::vector(nd)); + } + for (unsigned int mu = 0; mu < nd; ++mu) + for (unsigned int nu = mu; nu < nd; ++nu) { auto &out = envGet(ComplexField, varName(getName(), mu, nu)); auto &trkin = envGet(ComplexField, varName(par().kinetic, mu, nu)); @@ -178,6 +198,15 @@ void TEMT::execute(void) out -= sumkin + par().m2*trphi2 + par().lambda*trphi4; } out *= N/par().g; + if (!par().output.empty()) + { + result.value[mu][nu] = TensorRemove(sum(out)); + result.value[mu][nu] = result.value[nu][mu]; + } + } + if (!par().output.empty()) + { + saveResult(par().output, "emt", result); } } diff --git a/extras/Hadrons/Modules/MScalarSUN/Grad.hpp b/extras/Hadrons/Modules/MScalarSUN/Grad.hpp index 7718fbf2..ecf65e90 100644 --- a/extras/Hadrons/Modules/MScalarSUN/Grad.hpp +++ b/extras/Hadrons/Modules/MScalarSUN/Grad.hpp @@ -49,19 +49,20 @@ public: std::string, output); }; +class GradResult: Serializable +{ +public: + GRID_SERIALIZABLE_CLASS_MEMBERS(GradResult, + DiffType, type, + std::vector, value); +}; + template class TGrad: public Module { public: typedef typename SImpl::Field Field; typedef typename SImpl::ComplexField ComplexField; - class Result: Serializable - { - public: - GRID_SERIALIZABLE_CLASS_MEMBERS(Result, - DiffType, type, - Complex, value); - }; public: // constructor TGrad(const std::string name); @@ -130,14 +131,18 @@ void TGrad::setup(void) template void TGrad::execute(void) { - const auto nd = env().getNd(); - LOG(Message) << "Computing the " << par().type << " gradient of '" << par().op << "'" << std::endl; - std::vector result; - auto &op = envGet(ComplexField, par().op); + const unsigned int nd = env().getNd(); + GradResult result; + auto &op = envGet(ComplexField, par().op); + if (!par().output.empty()) + { + result.type = par().type; + result.value.resize(nd); + } for (unsigned int mu = 0; mu < nd; ++mu) { auto &der = envGet(ComplexField, varName(getName(), mu)); @@ -145,14 +150,10 @@ void TGrad::execute(void) dmu(der, op, mu, par().type); if (!par().output.empty()) { - Result r; - - r.type = par().type; - r.value = TensorRemove(sum(der)); - result.push_back(r); + result.value[mu] = TensorRemove(sum(der)); } } - if (result.size() > 0) + if (!par().output.empty()) { saveResult(par().output, "grad", result); } diff --git a/extras/Hadrons/Modules/MScalarSUN/ShiftProbe.hpp b/extras/Hadrons/Modules/MScalarSUN/ShiftProbe.hpp index 979a340e..cd7c15eb 100644 --- a/extras/Hadrons/Modules/MScalarSUN/ShiftProbe.hpp +++ b/extras/Hadrons/Modules/MScalarSUN/ShiftProbe.hpp @@ -51,20 +51,20 @@ public: std::string, output); }; +class ShiftProbeResult: Serializable +{ +public: + GRID_SERIALIZABLE_CLASS_MEMBERS(ShiftProbeResult, + std::string, shifts, + Complex, value); +}; + template class TShiftProbe: public Module { public: - typedef typename SImpl::Field Field; typedef typename SImpl::ComplexField ComplexField; - class Result: Serializable - { - public: - GRID_SERIALIZABLE_CLASS_MEMBERS(Result, - std::string, op, - Complex , value); - }; public: // constructor TShiftProbe(const std::string name); @@ -134,14 +134,14 @@ void TShiftProbe::execute(void) shift = strToVec(par().shifts); if (shift.size() % 2 != 0) { - HADRON_ERROR(Size, "the number of shifts is odd"); + HADRONS_ERROR(Size, "the number of shifts is odd"); } sign = (shift.size() % 4 == 0) ? 1 : -1; for (auto &s: shift) { if (s.first >= env().getNd()) { - HADRON_ERROR(Size, "dimension to large for shift <" + HADRONS_ERROR(Size, "dimension to large for shift <" + std::to_string(s.first) + " " + std::to_string(s.second) + ">" ); } @@ -160,6 +160,14 @@ void TShiftProbe::execute(void) } } probe = real(sign*trace(acc)); + if (!par().output.empty()) + { + ShiftProbeResult r; + + r.shifts = par().shifts; + r.value = TensorRemove(sum(probe)); + saveResult(par().output, "probe", r); + } } END_MODULE_NAMESPACE diff --git a/extras/Hadrons/Modules/MScalarSUN/TrKinetic.hpp b/extras/Hadrons/Modules/MScalarSUN/TrKinetic.hpp index 59aa27b8..a714daaa 100644 --- a/extras/Hadrons/Modules/MScalarSUN/TrKinetic.hpp +++ b/extras/Hadrons/Modules/MScalarSUN/TrKinetic.hpp @@ -49,19 +49,20 @@ public: std::string, output); }; +class TrKineticResult: Serializable +{ +public: + GRID_SERIALIZABLE_CLASS_MEMBERS(TrKineticResult, + std::vector>, value, + DiffType, type); +}; + template class TTrKinetic: public Module { public: typedef typename SImpl::Field Field; typedef typename SImpl::ComplexField ComplexField; - class Result: Serializable - { - public: - GRID_SERIALIZABLE_CLASS_MEMBERS(Result, - std::string, op, - Complex , value); - }; public: // constructor TTrKinetic(const std::string name); @@ -135,18 +136,24 @@ void TTrKinetic::execute(void) LOG(Message) << "Computing tr(d_mu phi*d_nu phi) using " << par().type << " derivative" << std::endl; - std::vector result; - auto &phi = envGet(Field, par().field); - auto &sumkin = envGet(ComplexField, varName(getName(), "sum")); + const unsigned int nd = env().getNd(); + TrKineticResult result; + auto &phi = envGet(Field, par().field); + auto &sumkin = envGet(ComplexField, varName(getName(), "sum")); envGetTmp(std::vector, der); sumkin = zero; - for (unsigned int mu = 0; mu < env().getNd(); ++mu) + if (!par().output.empty()) + { + result.type = par().type; + result.value.resize(nd, std::vector(nd)); + } + for (unsigned int mu = 0; mu < nd; ++mu) { dmu(der[mu], phi, mu, par().type); } - for (unsigned int mu = 0; mu < env().getNd(); ++mu) - for (unsigned int nu = mu; nu < env().getNd(); ++nu) + for (unsigned int mu = 0; mu < nd; ++mu) + for (unsigned int nu = mu; nu < nd; ++nu) { auto &out = envGet(ComplexField, varName(getName(), mu, nu)); @@ -155,32 +162,13 @@ void TTrKinetic::execute(void) { sumkin += out; } - } - if (!par().output.empty()) - { - for (unsigned int mu = 0; mu < env().getNd(); ++mu) - for (unsigned int nu = mu; nu < env().getNd(); ++nu) + if (!par().output.empty()) { - auto &out = envGet(ComplexField, varName(getName(), mu, nu)); - Result r; - - r.op = "tr(d_" + std::to_string(mu) + "phi*d_" - + std::to_string(nu) + "phi)"; - r.value = TensorRemove(sum(out)); - result.push_back(r); - } - { - Result r; - - r.op = "sum_mu tr(d_mu phi*d_mu phi)"; - r.value = TensorRemove(sum(sumkin)); - result.push_back(r); + result.value[mu][nu] = TensorRemove(sum(out)); + result.value[mu][nu] = result.value[nu][mu]; } } - if (result.size() > 0) - { - saveResult(par().output, "trkinetic", result); - } + saveResult(par().output, "trkinetic", result); } END_MODULE_NAMESPACE diff --git a/extras/Hadrons/Modules/MScalarSUN/TrMag.hpp b/extras/Hadrons/Modules/MScalarSUN/TrMag.hpp index ed1a629a..cdbf7e30 100644 --- a/extras/Hadrons/Modules/MScalarSUN/TrMag.hpp +++ b/extras/Hadrons/Modules/MScalarSUN/TrMag.hpp @@ -49,19 +49,20 @@ public: std::string, output); }; +class TrMagResult: Serializable +{ +public: + GRID_SERIALIZABLE_CLASS_MEMBERS(TrMagResult, + std::string, op, + Real, value); +}; + template class TTrMag: public Module { public: typedef typename SImpl::Field Field; typedef typename SImpl::ComplexField ComplexField; - class Result: Serializable - { - public: - GRID_SERIALIZABLE_CLASS_MEMBERS(Result, - std::string, op, - Real, value); - }; public: // constructor TTrMag(const std::string name); @@ -120,8 +121,8 @@ void TTrMag::execute(void) LOG(Message) << "Computing tr(mag^n) for n even up to " << par().maxPow << std::endl; - std::vector result; - auto &phi = envGet(Field, par().field); + std::vector result; + auto &phi = envGet(Field, par().field); auto m2 = sum(phi), mn = m2; @@ -129,7 +130,7 @@ void TTrMag::execute(void) mn = 1.; for (unsigned int n = 2; n <= par().maxPow; n += 2) { - Result r; + TrMagResult r; mn = mn*m2; r.op = "tr(mag^" + std::to_string(n) + ")"; diff --git a/extras/Hadrons/Modules/MScalarSUN/TrPhi.hpp b/extras/Hadrons/Modules/MScalarSUN/TrPhi.hpp index a61c00bc..9be0a5d6 100644 --- a/extras/Hadrons/Modules/MScalarSUN/TrPhi.hpp +++ b/extras/Hadrons/Modules/MScalarSUN/TrPhi.hpp @@ -49,19 +49,21 @@ public: std::string, output); }; +class TrPhiResult: Serializable +{ +public: + GRID_SERIALIZABLE_CLASS_MEMBERS(TrPhiResult, + std::string, op, + Real, value); +}; + template class TTrPhi: public Module { public: typedef typename SImpl::Field Field; typedef typename SImpl::ComplexField ComplexField; - class Result: Serializable - { - public: - GRID_SERIALIZABLE_CLASS_MEMBERS(Result, - std::string, op, - Real, value); - }; + public: // constructor TTrPhi(const std::string name); @@ -119,7 +121,7 @@ void TTrPhi::setup(void) { if (par().maxPow < 2) { - HADRON_ERROR(Size, "'maxPow' should be at least equal to 2"); + HADRONS_ERROR(Size, "'maxPow' should be at least equal to 2"); } envTmpLat(Field, "phi2"); envTmpLat(Field, "buf"); @@ -136,8 +138,8 @@ void TTrPhi::execute(void) LOG(Message) << "Computing tr(phi^n) for n even up to " << par().maxPow << std::endl; - std::vector result; - auto &phi = envGet(Field, par().field); + std::vector result; + auto &phi = envGet(Field, par().field); envGetTmp(Field, phi2); envGetTmp(Field, buf); @@ -151,7 +153,7 @@ void TTrPhi::execute(void) phin = trace(buf); if (!par().output.empty()) { - Result r; + TrPhiResult r; r.op = "tr(phi^" + std::to_string(n) + ")"; r.value = TensorRemove(sum(phin)).real(); diff --git a/extras/Hadrons/Modules/MScalarSUN/TransProj.hpp b/extras/Hadrons/Modules/MScalarSUN/TransProj.hpp index 6c6502fc..c9b42bf0 100644 --- a/extras/Hadrons/Modules/MScalarSUN/TransProj.hpp +++ b/extras/Hadrons/Modules/MScalarSUN/TransProj.hpp @@ -49,19 +49,20 @@ public: std::string, output); }; +class TransProjResult: Serializable +{ +public: + GRID_SERIALIZABLE_CLASS_MEMBERS(TransProjResult, + std::vector>, value, + DiffType, type); +}; + template class TTransProj: public Module { public: typedef typename SImpl::Field Field; typedef typename SImpl::ComplexField ComplexField; - class Result: Serializable - { - public: - GRID_SERIALIZABLE_CLASS_MEMBERS(Result, - std::string, op, - Complex , value); - }; public: // constructor TTransProj(const std::string name); @@ -137,21 +138,27 @@ void TTransProj::execute(void) << par().type << " derivatives and op= '" << par().op << "'" << std::endl; - std::vector result; - auto &op = envGet(ComplexField, par().op); + const unsigned int nd = env().getNd(); + TransProjResult result; + auto &op = envGet(ComplexField, par().op); envGetTmp(ComplexField, buf1); envGetTmp(ComplexField, buf2); envGetTmp(ComplexField, lap); lap = zero; - for (unsigned int mu = 0; mu < env().getNd(); ++mu) + if (!par().output.empty()) + { + result.type = par().type; + result.value.resize(nd, std::vector(nd)); + } + for (unsigned int mu = 0; mu < nd; ++mu) { dmu(buf1, op, mu, par().type); dmu(buf2, buf1, mu, par().type); lap += buf2; } - for (unsigned int mu = 0; mu < env().getNd(); ++mu) - for (unsigned int nu = mu; nu < env().getNd(); ++nu) + for (unsigned int mu = 0; mu < nd; ++mu) + for (unsigned int nu = mu; nu < nd; ++nu) { auto &out = envGet(ComplexField, varName(getName(), mu, nu)); dmu(buf1, op, mu, par().type); @@ -163,16 +170,11 @@ void TTransProj::execute(void) } if (!par().output.empty()) { - Result r; - - r.op = "(delta_" + std::to_string(mu) + "," + std::to_string(nu) - + " d^2 - d_" + std::to_string(mu) + "*d_" - + std::to_string(nu) + ")*op"; - r.value = TensorRemove(sum(out)); - result.push_back(r); + result.value[mu][nu] = TensorRemove(sum(out)); + result.value[mu][nu] = result.value[nu][mu]; } } - if (result.size() > 0) + if (!par().output.empty()) { saveResult(par().output, "transproj", result); } diff --git a/extras/Hadrons/Modules/MScalarSUN/TwoPoint.hpp b/extras/Hadrons/Modules/MScalarSUN/TwoPoint.hpp index c31b1621..1496edf9 100644 --- a/extras/Hadrons/Modules/MScalarSUN/TwoPoint.hpp +++ b/extras/Hadrons/Modules/MScalarSUN/TwoPoint.hpp @@ -50,23 +50,23 @@ public: std::string, output); }; +class TwoPointResult: Serializable +{ +public: + GRID_SERIALIZABLE_CLASS_MEMBERS(TwoPointResult, + std::string, sink, + std::string, source, + std::vector, mom, + std::vector, data); +}; + template class TTwoPoint: public Module { public: - typedef typename SImpl::Field Field; - typedef typename SImpl::ComplexField ComplexField; - typedef std::vector SlicedOp; - - class Result: Serializable - { - public: - GRID_SERIALIZABLE_CLASS_MEMBERS(Result, - std::string, sink, - std::string, source, - std::vector, mom, - std::vector, data); - }; + typedef typename SImpl::Field Field; + typedef typename SImpl::ComplexField ComplexField; + typedef std::vector SlicedOp; public: // constructor TTwoPoint(const std::string name); @@ -143,7 +143,7 @@ void TTwoPoint::setup(void) mom_[i] = strToVec(par().mom[i]); if (mom_[i].size() != nd - 1) { - HADRON_ERROR(Size, "momentum number of components different from " + HADRONS_ERROR(Size, "momentum number of components different from " + std::to_string(nd-1)); } } @@ -160,18 +160,24 @@ void TTwoPoint::execute(void) LOG(Message) << " <" << p.first << " " << p.second << ">" << std::endl; } - const unsigned int nd = env().getDim().size(); - const unsigned int nt = env().getDim().back(); - const unsigned int nop = par().op.size(); - const unsigned int nmom = mom_.size(); + const unsigned int nd = env().getNd(); + const unsigned int nt = env().getDim().back(); + const unsigned int nop = par().op.size(); + const unsigned int nmom = mom_.size(); + double partVol = 1.; std::vector dMask(nd, 1); std::set ops; - std::vector result; + std::vector result; std::map> slicedOp; FFT fft(env().getGrid()); + TComplex buf; envGetTmp(ComplexField, ftBuf); dMask[nd - 1] = 0; + for (unsigned int mu = 0; mu < nd - 1; ++mu) + { + partVol *= env().getDim()[mu]; + } for (auto &p: par().op) { ops.insert(p.first); @@ -183,7 +189,7 @@ void TTwoPoint::execute(void) slicedOp[o].resize(nmom); LOG(Message) << "Operator '" << o << "' FFT" << std::endl; - fft.FFT_dim_mask(ftBuf, op, dMask, FFT::backward); + fft.FFT_dim_mask(ftBuf, op, dMask, FFT::forward); for (unsigned int m = 0; m < nmom; ++m) { auto qt = mom_[m]; @@ -193,7 +199,8 @@ void TTwoPoint::execute(void) for (unsigned int t = 0; t < nt; ++t) { qt[nd - 1] = t; - peekSite(slicedOp[o][m][t], ftBuf, qt); + peekSite(buf, ftBuf, qt); + slicedOp[o][m][t] = TensorRemove(buf)/partVol; } } } @@ -201,7 +208,7 @@ void TTwoPoint::execute(void) for (unsigned int m = 0; m < nmom; ++m) for (auto &p: par().op) { - Result r; + TwoPointResult r; r.sink = p.first; r.source = p.second; @@ -228,7 +235,7 @@ std::vector TTwoPoint::makeTwoPoint( { for (unsigned int t = 0; t < nt; ++t) { - res[dt] += TensorRemove(trace(sink[(t+dt)%nt]*adj(source[t]))); + res[dt] += sink[(t+dt)%nt]*adj(source[t]); } res[dt] *= 1./static_cast(nt); } diff --git a/extras/Hadrons/Modules/MScalarSUN/Utils.hpp b/extras/Hadrons/Modules/MScalarSUN/Utils.hpp index b9e49715..37a9e137 100644 --- a/extras/Hadrons/Modules/MScalarSUN/Utils.hpp +++ b/extras/Hadrons/Modules/MScalarSUN/Utils.hpp @@ -44,7 +44,7 @@ inline void dmu(Field &out, const Field &in, const unsigned int mu, const DiffTy if (mu >= env.getNd()) { - HADRON_ERROR(Range, "Derivative direction out of range"); + HADRONS_ERROR(Range, "Derivative direction out of range"); } switch(type) { @@ -58,7 +58,7 @@ inline void dmu(Field &out, const Field &in, const unsigned int mu, const DiffTy out = 0.5*(Cshift(in, mu, 1) - Cshift(in, mu, -1)); break; default: - HADRON_ERROR(Argument, "Derivative type invalid"); + HADRONS_ERROR(Argument, "Derivative type invalid"); break; } } @@ -70,7 +70,7 @@ inline void dmuAcc(Field &out, const Field &in, const unsigned int mu, const Dif if (mu >= env.getNd()) { - HADRON_ERROR(Range, "Derivative direction out of range"); + HADRONS_ERROR(Range, "Derivative direction out of range"); } switch(type) { @@ -84,7 +84,7 @@ inline void dmuAcc(Field &out, const Field &in, const unsigned int mu, const Dif out += 0.5*(Cshift(in, mu, 1) - Cshift(in, mu, -1)); break; default: - HADRON_ERROR(Argument, "Derivative type invalid"); + HADRONS_ERROR(Argument, "Derivative type invalid"); break; } } diff --git a/extras/Hadrons/Modules/MSolver/RBPrecCG.hpp b/extras/Hadrons/Modules/MSolver/RBPrecCG.hpp index f559c4eb..206d44d1 100644 --- a/extras/Hadrons/Modules/MSolver/RBPrecCG.hpp +++ b/extras/Hadrons/Modules/MSolver/RBPrecCG.hpp @@ -127,7 +127,7 @@ void TRBPrecCG::setup(void) { if (par().maxIteration == 0) { - HADRON_ERROR(Argument, "zero maximum iteration"); + HADRONS_ERROR(Argument, "zero maximum iteration"); } LOG(Message) << "setting up Schur red-black preconditioned CG for" diff --git a/extras/Hadrons/Modules/MUtilities/TestSeqConserved.hpp b/extras/Hadrons/Modules/MUtilities/TestSeqConserved.hpp index dcd1ba9a..7b1bc1db 100644 --- a/extras/Hadrons/Modules/MUtilities/TestSeqConserved.hpp +++ b/extras/Hadrons/Modules/MUtilities/TestSeqConserved.hpp @@ -123,7 +123,7 @@ void TTestSeqConserved::setup(void) auto Ls = env().getObjectLs(par().q); if (Ls != env().getObjectLs(par().action)) { - HADRON_ERROR(Size, "Ls mismatch between quark action and propagator"); + HADRONS_ERROR(Size, "Ls mismatch between quark action and propagator"); } envTmpLat(PropagatorField, "tmp"); envTmpLat(LatticeComplex, "c"); diff --git a/extras/Hadrons/VirtualMachine.cc b/extras/Hadrons/VirtualMachine.cc index dffabe29..cc197ef8 100644 --- a/extras/Hadrons/VirtualMachine.cc +++ b/extras/Hadrons/VirtualMachine.cc @@ -123,7 +123,7 @@ void VirtualMachine::pushModule(VirtualMachine::ModPt &pt) else { // output already fully registered, error - HADRON_ERROR(Definition, "object '" + out + HADRONS_ERROR(Definition, "object '" + out + "' is already produced by module '" + module_[env().getObjectModule(out)].name + "' (while pushing module '" + name + "')"); @@ -158,7 +158,7 @@ void VirtualMachine::pushModule(VirtualMachine::ModPt &pt) } else { - HADRON_ERROR(Definition, "module '" + name + "' already exists"); + HADRONS_ERROR(Definition, "module '" + name + "' already exists"); } } @@ -185,7 +185,7 @@ ModuleBase * VirtualMachine::getModule(const unsigned int address) const } else { - HADRON_ERROR(Definition, "no module with address " + std::to_string(address)); + HADRONS_ERROR(Definition, "no module with address " + std::to_string(address)); } } @@ -202,7 +202,7 @@ unsigned int VirtualMachine::getModuleAddress(const std::string name) const } else { - HADRON_ERROR(Definition, "no module with name '" + name + "'"); + HADRONS_ERROR(Definition, "no module with name '" + name + "'"); } } @@ -214,7 +214,7 @@ std::string VirtualMachine::getModuleName(const unsigned int address) const } else { - HADRON_ERROR(Definition, "no module with address " + std::to_string(address)); + HADRONS_ERROR(Definition, "no module with address " + std::to_string(address)); } } @@ -226,7 +226,7 @@ std::string VirtualMachine::getModuleType(const unsigned int address) const } else { - HADRON_ERROR(Definition, "no module with address " + std::to_string(address)); + HADRONS_ERROR(Definition, "no module with address " + std::to_string(address)); } } @@ -306,7 +306,7 @@ void VirtualMachine::makeModuleGraph(void) if (min < 0) { - HADRON_ERROR(Definition, "dependency '" + HADRONS_ERROR(Definition, "dependency '" + env().getObjectName(in) + "' (address " + std::to_string(in) + ") is not produced by any module"); diff --git a/extras/Hadrons/VirtualMachine.hpp b/extras/Hadrons/VirtualMachine.hpp index 153f8d70..ccc06d63 100644 --- a/extras/Hadrons/VirtualMachine.hpp +++ b/extras/Hadrons/VirtualMachine.hpp @@ -195,7 +195,7 @@ M * VirtualMachine::getModule(const unsigned int address) const } else { - HADRON_ERROR(Definition, "module '" + module_[address].name + HADRONS_ERROR(Definition, "module '" + module_[address].name + "' does not have type " + typeid(M).name() + "(has type: " + getModuleType(address) + ")"); } diff --git a/lib/algorithms/iterative/ImplicitlyRestartedLanczos.h b/lib/algorithms/iterative/ImplicitlyRestartedLanczos.h index 787cf15a..8011e796 100644 --- a/lib/algorithms/iterative/ImplicitlyRestartedLanczos.h +++ b/lib/algorithms/iterative/ImplicitlyRestartedLanczos.h @@ -479,15 +479,13 @@ until convergence Field B(grid); B.checkerboard = evec[0].checkerboard; // power of two search pattern; not every evalue in eval2 is assessed. + int allconv =1; for(int jj = 1; jj<=Nstop; jj*=2){ int j = Nstop-jj; RealD e = eval2_copy[j]; // Discard the evalue basisRotateJ(B,evec,Qt,j,0,Nk,Nm); - if( _Tester.TestConvergence(j,eresid,B,e,evalMaxApprox) ) { - if ( j > Nconv ) { - Nconv=j+1; - jj=Nstop; // Terminate the scan - } + if( !_Tester.TestConvergence(j,eresid,B,e,evalMaxApprox) ) { + allconv=0; } } // Do evec[0] for good measure @@ -495,8 +493,10 @@ until convergence int j=0; RealD e = eval2_copy[0]; basisRotateJ(B,evec,Qt,j,0,Nk,Nm); - _Tester.TestConvergence(j,eresid,B,e,evalMaxApprox); + if( !_Tester.TestConvergence(j,eresid,B,e,evalMaxApprox) ) allconv=0; } + if ( allconv ) Nconv = Nstop; + // test if we converged, if so, terminate std::cout<= "<=Nstop || beta_k < betastp){ diff --git a/lib/algorithms/iterative/LocalCoherenceLanczos.h b/lib/algorithms/iterative/LocalCoherenceLanczos.h index 66ded6eb..c771d1cb 100644 --- a/lib/algorithms/iterative/LocalCoherenceLanczos.h +++ b/lib/algorithms/iterative/LocalCoherenceLanczos.h @@ -48,6 +48,7 @@ struct LanczosParams : Serializable { struct LocalCoherenceLanczosParams : Serializable { public: GRID_SERIALIZABLE_CLASS_MEMBERS(LocalCoherenceLanczosParams, + bool, saveEvecs, bool, doFine, bool, doFineRead, bool, doCoarse, diff --git a/lib/allocator/AlignedAllocator.h b/lib/allocator/AlignedAllocator.h index 3b27aec9..b0f7e206 100644 --- a/lib/allocator/AlignedAllocator.h +++ b/lib/allocator/AlignedAllocator.h @@ -277,7 +277,9 @@ public: uint8_t *cp = (uint8_t *)ptr; if ( ptr ) { // One touch per 4k page, static OMP loop to catch same loop order +#ifdef GRID_OMP #pragma omp parallel for schedule(static) +#endif for(size_type n=0;n &rhs,commVector &buffer,int dimen int so=plane*rhs._grid->_ostride[dimension]; // base offset for start of plane int e1=rhs._grid->_slice_nblock[dimension]; int e2=rhs._grid->_slice_block[dimension]; + int ent = 0; + + static std::vector > table; table.resize(e1*e2); int stride=rhs._grid->_slice_stride[dimension]; if ( cbmask == 0x3 ) { - parallel_for_nest2(int n=0;n(off+bo+b,so+o+b); } } } else { int bo=0; - std::vector > table; for(int n=0;nCheckerBoardFromOindex(o+b); if ( ocb &cbmask ) { - table.push_back(std::pair (bo++,o+b)); + table[ent++]=std::pair (off+bo++,so+o+b); } } } - parallel_for(int i=0;i void Scatter_plane_simple (Lattice &rhs,commVector_slice_nblock[dimension]; int e2=rhs._grid->_slice_block[dimension]; int stride=rhs._grid->_slice_stride[dimension]; - + + static std::vector > table; table.resize(e1*e2); + int ent =0; + if ( cbmask ==0x3 ) { - parallel_for_nest2(int n=0;n_slice_stride[dimension]; int bo =n*rhs._grid->_slice_block[dimension]; - rhs._odata[so+o+b]=buffer[bo+b]; + table[ent++] = std::pair(so+o+b,bo+b); } } + } else { - std::vector > table; int bo=0; for(int n=0;n_slice_stride[dimension]; int ocb=1<CheckerBoardFromOindex(o+b);// Could easily be a table lookup if ( ocb & cbmask ) { - table.push_back(std::pair (so+o+b,bo++)); + table[ent++]=std::pair (so+o+b,bo++); } } } - parallel_for(int i=0;i void Copy_plane(Lattice& lhs,const Lattice &rhs int e1=rhs._grid->_slice_nblock[dimension]; // clearly loop invariant for icpc int e2=rhs._grid->_slice_block[dimension]; int stride = rhs._grid->_slice_stride[dimension]; + static std::vector > table; table.resize(e1*e2); + int ent=0; + if(cbmask == 0x3 ){ - parallel_for_nest2(int n=0;n(lo+o,ro+o); } } } else { - parallel_for_nest2(int n=0;nCheckerBoardFromOindex(o); if ( ocb&cbmask ) { - //lhs._odata[lo+o]=rhs._odata[ro+o]; - vstream(lhs._odata[lo+o],rhs._odata[ro+o]); + table[ent++] = std::pair(lo+o,ro+o); } } } } - + + parallel_for(int i=0;i void Copy_plane_permute(Lattice& lhs,const Lattice &rhs, int dimension,int lplane,int rplane,int cbmask,int permute_type) @@ -269,16 +278,28 @@ template void Copy_plane_permute(Lattice& lhs,const Lattice_slice_block [dimension]; int stride = rhs._grid->_slice_stride[dimension]; - parallel_for_nest2(int n=0;n > table; table.resize(e1*e2); + int ent=0; + double t_tab,t_perm; + if ( cbmask == 0x3 ) { + for(int n=0;n(lo+o+b,ro+o+b); + }} + } else { + for(int n=0;nCheckerBoardFromOindex(o+b); - if ( ocb&cbmask ) { - permute(lhs._odata[lo+o+b],rhs._odata[ro+o+b],permute_type); - } + if ( ocb&cbmask ) table[ent++] = std::pair(lo+o+b,ro+o+b); + }} + } - }} + parallel_for(int i=0;i void Cshift_local(Lattice& ret,const Lattice &r sshift[0] = rhs._grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,Even); sshift[1] = rhs._grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,Odd); + double t_local; + if ( sshift[0] == sshift[1] ) { Cshift_local(ret,rhs,dimension,shift,0x3); } else { @@ -299,7 +322,7 @@ template void Cshift_local(Lattice& ret,const Lattice &r } } -template Lattice Cshift_local(Lattice &ret,const Lattice &rhs,int dimension,int shift,int cbmask) +template void Cshift_local(Lattice &ret,const Lattice &rhs,int dimension,int shift,int cbmask) { GridBase *grid = rhs._grid; int fd = grid->_fdimensions[dimension]; @@ -325,11 +348,7 @@ template Lattice Cshift_local(Lattice &ret,const Lattice int sshift = grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,cb); int sx = (x+sshift)%rd; - - // FIXME : This must change where we have a - // Rotate slice. - // Document how this works ; why didn't I do this when I first wrote it... // wrap is whether sshift > rd. // num is sshift mod rd. // @@ -365,10 +384,8 @@ template Lattice Cshift_local(Lattice &ret,const Lattice if ( permute_slice ) Copy_plane_permute(ret,rhs,dimension,x,sx,cbmask,permute_type_dist); else Copy_plane(ret,rhs,dimension,x,sx,cbmask); - } - return ret; } } #endif diff --git a/lib/cshift/Cshift_mpi.h b/lib/cshift/Cshift_mpi.h index a66b49bf..98972135 100644 --- a/lib/cshift/Cshift_mpi.h +++ b/lib/cshift/Cshift_mpi.h @@ -54,13 +54,13 @@ template Lattice Cshift(const Lattice &rhs,int dimension if ( !comm_dim ) { - // std::cout << "Cshift_local" < void Cshift_comms_simd(Lattice& ret,const LatticeCheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,Even); sshift[1] = rhs._grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,Odd); + //std::cout << "Cshift_comms_simd dim "< void Cshift_comms_simd(Lattice &ret,const Lattice_simd_layout[dimension]; int comm_dim = grid->_processors[dimension] >1 ; + //std::cout << "Cshift_comms_simd dim "<< dimension << " fd "<=0); diff --git a/lib/lattice/Lattice_base.h b/lib/lattice/Lattice_base.h index 014e443d..1169d18f 100644 --- a/lib/lattice/Lattice_base.h +++ b/lib/lattice/Lattice_base.h @@ -256,9 +256,42 @@ public: _odata[ss]=r._odata[ss]; } } + + Lattice(Lattice&& r){ // move constructor + _grid = r._grid; + checkerboard = r.checkerboard; + _odata=std::move(r._odata); + } - - + inline Lattice & operator = (Lattice && r) + { + _grid = r._grid; + checkerboard = r.checkerboard; + _odata =std::move(r._odata); + return *this; + } + + inline Lattice & operator = (const Lattice & r){ + _grid = r._grid; + checkerboard = r.checkerboard; + _odata.resize(_grid->oSites());// essential + + parallel_for(int ss=0;ss<_grid->oSites();ss++){ + _odata[ss]=r._odata[ss]; + } + return *this; + } + + template strong_inline Lattice & operator = (const Lattice & r){ + this->checkerboard = r.checkerboard; + conformable(*this,r); + + parallel_for(int ss=0;ss<_grid->oSites();ss++){ + this->_odata[ss]=r._odata[ss]; + } + return *this; + } + virtual ~Lattice(void) = default; void reset(GridBase* grid) { @@ -277,15 +310,6 @@ public: return *this; } - template strong_inline Lattice & operator = (const Lattice & r){ - this->checkerboard = r.checkerboard; - conformable(*this,r); - - parallel_for(int ss=0;ss<_grid->oSites();ss++){ - this->_odata[ss]=r._odata[ss]; - } - return *this; - } // *=,+=,-= operators inherit behvour from correspond */+/- operation template strong_inline Lattice &operator *=(const T &r) { diff --git a/lib/lattice/Lattice_comparison_utils.h b/lib/lattice/Lattice_comparison_utils.h index 9580d4d2..cbac20ec 100644 --- a/lib/lattice/Lattice_comparison_utils.h +++ b/lib/lattice/Lattice_comparison_utils.h @@ -179,7 +179,7 @@ namespace Grid { return ret; } -#define DECLARE_RELATIONAL(op,functor) \ +#define DECLARE_RELATIONAL_EQ(op,functor) \ template = 0>\ inline vInteger operator op (const vsimd & lhs, const vsimd & rhs)\ {\ @@ -198,11 +198,6 @@ namespace Grid { typedef typename vsimd::scalar_type scalar;\ return Comparison(functor(),lhs,rhs);\ }\ - template = 0>\ - inline vInteger operator op(const iScalar &lhs,const iScalar &rhs)\ - { \ - return lhs._internal op rhs._internal; \ - } \ template\ inline vInteger operator op(const iScalar &lhs,const typename vsimd::scalar_type &rhs) \ { \ @@ -212,14 +207,21 @@ namespace Grid { inline vInteger operator op(const typename vsimd::scalar_type &lhs,const iScalar &rhs) \ { \ return lhs op rhs._internal; \ - } + } \ +#define DECLARE_RELATIONAL(op,functor) \ + DECLARE_RELATIONAL_EQ(op,functor) \ + template\ + inline vInteger operator op(const iScalar &lhs,const iScalar &rhs)\ + { \ + return lhs._internal op rhs._internal; \ + } DECLARE_RELATIONAL(<,slt); DECLARE_RELATIONAL(<=,sle); DECLARE_RELATIONAL(>,sgt); DECLARE_RELATIONAL(>=,sge); -DECLARE_RELATIONAL(==,seq); +DECLARE_RELATIONAL_EQ(==,seq); DECLARE_RELATIONAL(!=,sne); #undef DECLARE_RELATIONAL diff --git a/lib/parallelIO/BinaryIO.h b/lib/parallelIO/BinaryIO.h index 45fd522e..ce84fc81 100644 --- a/lib/parallelIO/BinaryIO.h +++ b/lib/parallelIO/BinaryIO.h @@ -110,11 +110,11 @@ class BinaryIO { lsites = 1; } - #pragma omp parallel +PARALLEL_REGION { uint32_t nersc_csum_thr = 0; - #pragma omp for +PARALLEL_FOR_LOOP_INTERN for (uint64_t local_site = 0; local_site < lsites; local_site++) { uint32_t *site_buf = (uint32_t *)&fbuf[local_site]; @@ -124,7 +124,7 @@ class BinaryIO { } } - #pragma omp critical +PARALLEL_CRITICAL { nersc_csum += nersc_csum_thr; } @@ -146,14 +146,14 @@ class BinaryIO { std::vector local_start =grid->LocalStarts(); std::vector global_vol =grid->FullDimensions(); -#pragma omp parallel +PARALLEL_REGION { std::vector coor(nd); uint32_t scidac_csuma_thr=0; uint32_t scidac_csumb_thr=0; uint32_t site_crc=0; -#pragma omp for +PARALLEL_FOR_LOOP_INTERN for(uint64_t local_site=0;local_site>(32-gsite31); } -#pragma omp critical +PARALLEL_CRITICAL { scidac_csuma^= scidac_csuma_thr; scidac_csumb^= scidac_csumb_thr; diff --git a/lib/qcd/hmc/integrators/Integrator.h b/lib/qcd/hmc/integrators/Integrator.h index 95b19904..232eba42 100644 --- a/lib/qcd/hmc/integrators/Integrator.h +++ b/lib/qcd/hmc/integrators/Integrator.h @@ -114,18 +114,26 @@ class Integrator { // input U actually not used in the fundamental case // Fundamental updates, include smearing - for (int a = 0; a < as[level].actions.size(); ++a) { + for (int a = 0; a < as[level].actions.size(); ++a) { + double start_full = usecond(); Field force(U._grid); conformable(U._grid, Mom._grid); + Field& Us = Smearer.get_U(as[level].actions.at(a)->is_smeared); + double start_force = usecond(); as[level].actions.at(a)->deriv(Us, force); // deriv should NOT include Ta std::cout << GridLogIntegrator << "Smearing (on/off): " << as[level].actions.at(a)->is_smeared << std::endl; if (as[level].actions.at(a)->is_smeared) Smearer.smeared_force(force); force = FieldImplementation::projectForce(force); // Ta for gauge fields + double end_force = usecond(); Real force_abs = std::sqrt(norm2(force)/U._grid->gSites()); - std::cout << GridLogIntegrator << "Force average: " << force_abs << std::endl; + std::cout << GridLogIntegrator << "["< Factory::create(const std::string type, } catch (std::out_of_range &) { - //HADRON_ERROR("object of type '" + type + "' unknown"); + //HADRONS_ERROR("object of type '" + type + "' unknown"); std::cout << GridLogError << "Error" << std::endl; std::cout << GridLogError << obj_type() << " object of name [" << type << "] unknown" << std::endl; exit(1); diff --git a/lib/qcd/smearing/GaugeConfiguration.h b/lib/qcd/smearing/GaugeConfiguration.h index fc045ba2..6fea875b 100644 --- a/lib/qcd/smearing/GaugeConfiguration.h +++ b/lib/qcd/smearing/GaugeConfiguration.h @@ -6,30 +6,33 @@ #ifndef GAUGE_CONFIG_ #define GAUGE_CONFIG_ -namespace Grid { +namespace Grid +{ -namespace QCD { +namespace QCD +{ - //trivial class for no smearing - template< class Impl > -class NoSmearing { +//trivial class for no smearing +template +class NoSmearing +{ public: INHERIT_FIELD_TYPES(Impl); - Field* ThinField; + Field *ThinField; - NoSmearing(): ThinField(NULL) {} + NoSmearing() : ThinField(NULL) {} - void set_Field(Field& U) { ThinField = &U; } + void set_Field(Field &U) { ThinField = &U; } - void smeared_force(Field&) const {} + void smeared_force(Field &) const {} - Field& get_SmearedU() { return *ThinField; } + Field &get_SmearedU() { return *ThinField; } - Field& get_U(bool smeared = false) { + Field &get_U(bool smeared = false) + { return *ThinField; } - }; /*! @@ -44,32 +47,36 @@ public: It stores a list of smeared configurations. */ template -class SmearedConfiguration { - public: +class SmearedConfiguration +{ +public: INHERIT_GIMPL_TYPES(Gimpl); - private: +private: const unsigned int smearingLevels; Smear_Stout StoutSmearing; std::vector SmearedSet; // Member functions //==================================================================== - void fill_smearedSet(GaugeField& U) { - ThinLinks = &U; // attach the smearing routine to the field U + void fill_smearedSet(GaugeField &U) + { + ThinLinks = &U; // attach the smearing routine to the field U // check the pointer is not null if (ThinLinks == NULL) std::cout << GridLogError << "[SmearedConfiguration] Error in ThinLinks pointer\n"; - if (smearingLevels > 0) { + if (smearingLevels > 0) + { std::cout << GridLogDebug << "[SmearedConfiguration] Filling SmearedSet\n"; GaugeField previous_u(ThinLinks->_grid); previous_u = *ThinLinks; - for (int smearLvl = 0; smearLvl < smearingLevels; ++smearLvl) { + for (int smearLvl = 0; smearLvl < smearingLevels; ++smearLvl) + { StoutSmearing.smear(SmearedSet[smearLvl], previous_u); previous_u = SmearedSet[smearLvl]; @@ -81,9 +88,10 @@ class SmearedConfiguration { } } //==================================================================== - GaugeField AnalyticSmearedForce(const GaugeField& SigmaKPrime, - const GaugeField& GaugeK) const { - GridBase* grid = GaugeK._grid; + GaugeField AnalyticSmearedForce(const GaugeField &SigmaKPrime, + const GaugeField &GaugeK) const + { + GridBase *grid = GaugeK._grid; GaugeField C(grid), SigmaK(grid), iLambda(grid); GaugeLinkField iLambda_mu(grid); GaugeLinkField iQ(grid), e_iQ(grid); @@ -94,7 +102,8 @@ class SmearedConfiguration { SigmaK = zero; iLambda = zero; - for (int mu = 0; mu < Nd; mu++) { + for (int mu = 0; mu < Nd; mu++) + { Cmu = peekLorentz(C, mu); GaugeKmu = peekLorentz(GaugeK, mu); SigmaKPrime_mu = peekLorentz(SigmaKPrime, mu); @@ -104,20 +113,22 @@ class SmearedConfiguration { pokeLorentz(iLambda, iLambda_mu, mu); } StoutSmearing.derivative(SigmaK, iLambda, - GaugeK); // derivative of SmearBase + GaugeK); // derivative of SmearBase return SigmaK; } /*! @brief Returns smeared configuration at level 'Level' */ - const GaugeField& get_smeared_conf(int Level) const { + const GaugeField &get_smeared_conf(int Level) const + { return SmearedSet[Level]; } //==================================================================== - void set_iLambda(GaugeLinkField& iLambda, GaugeLinkField& e_iQ, - const GaugeLinkField& iQ, const GaugeLinkField& Sigmap, - const GaugeLinkField& GaugeK) const { - GridBase* grid = iQ._grid; + void set_iLambda(GaugeLinkField &iLambda, GaugeLinkField &e_iQ, + const GaugeLinkField &iQ, const GaugeLinkField &Sigmap, + const GaugeLinkField &GaugeK) const + { + GridBase *grid = iQ._grid; GaugeLinkField iQ2(grid), iQ3(grid), B1(grid), B2(grid), USigmap(grid); GaugeLinkField unity(grid); unity = 1.0; @@ -206,15 +217,15 @@ class SmearedConfiguration { } //==================================================================== - public: - GaugeField* - ThinLinks; /*!< @brief Pointer to the thin - links configuration */ +public: + GaugeField * + ThinLinks; /* Pointer to the thin links configuration */ - /*! @brief Standard constructor */ - SmearedConfiguration(GridCartesian* UGrid, unsigned int Nsmear, - Smear_Stout& Stout) - : smearingLevels(Nsmear), StoutSmearing(Stout), ThinLinks(NULL) { + /* Standard constructor */ + SmearedConfiguration(GridCartesian *UGrid, unsigned int Nsmear, + Smear_Stout &Stout) + : smearingLevels(Nsmear), StoutSmearing(Stout), ThinLinks(NULL) + { for (unsigned int i = 0; i < smearingLevels; ++i) SmearedSet.push_back(*(new GaugeField(UGrid))); } @@ -223,21 +234,29 @@ class SmearedConfiguration { SmearedConfiguration() : smearingLevels(0), StoutSmearing(), SmearedSet(), ThinLinks(NULL) {} - - // attach the smeared routines to the thin links U and fill the smeared set - void set_Field(GaugeField& U) { fill_smearedSet(U); } + void set_Field(GaugeField &U) + { + double start = usecond(); + fill_smearedSet(U); + double end = usecond(); + double time = (end - start)/ 1e3; + std::cout << GridLogMessage << "Smearing in " << time << " ms" << std::endl; + } //==================================================================== - void smeared_force(GaugeField& SigmaTilde) const { - if (smearingLevels > 0) { + void smeared_force(GaugeField &SigmaTilde) const + { + if (smearingLevels > 0) + { + double start = usecond(); GaugeField force = SigmaTilde; // actually = U*SigmaTilde GaugeLinkField tmp_mu(SigmaTilde._grid); - for (int mu = 0; mu < Nd; mu++) { + for (int mu = 0; mu < Nd; mu++) + { // to get just SigmaTilde - tmp_mu = adj(peekLorentz(SmearedSet[smearingLevels - 1], mu)) * - peekLorentz(force, mu); + tmp_mu = adj(peekLorentz(SmearedSet[smearingLevels - 1], mu)) * peekLorentz(force, mu); pokeLorentz(force, tmp_mu, mu); } @@ -246,33 +265,43 @@ class SmearedConfiguration { force = AnalyticSmearedForce(force, *ThinLinks); - for (int mu = 0; mu < Nd; mu++) { + for (int mu = 0; mu < Nd; mu++) + { tmp_mu = peekLorentz(*ThinLinks, mu) * peekLorentz(force, mu); pokeLorentz(SigmaTilde, tmp_mu, mu); } - } // if smearingLevels = 0 do nothing + double end = usecond(); + double time = (end - start)/ 1e3; + std::cout << GridLogMessage << "Smearing force in " << time << " ms" << std::endl; + } // if smearingLevels = 0 do nothing } //==================================================================== - GaugeField& get_SmearedU() { return SmearedSet[smearingLevels - 1]; } + GaugeField &get_SmearedU() { return SmearedSet[smearingLevels - 1]; } - GaugeField& get_U(bool smeared = false) { + GaugeField &get_U(bool smeared = false) + { // get the config, thin links by default - if (smeared) { - if (smearingLevels) { + if (smeared) + { + if (smearingLevels) + { RealD impl_plaq = WilsonLoops::avgPlaquette(SmearedSet[smearingLevels - 1]); std::cout << GridLogDebug << "getting Usmr Plaq: " << impl_plaq << std::endl; return get_SmearedU(); - - } else { + } + else + { RealD impl_plaq = WilsonLoops::avgPlaquette(*ThinLinks); std::cout << GridLogDebug << "getting Thin Plaq: " << impl_plaq << std::endl; return *ThinLinks; } - } else { + } + else + { RealD impl_plaq = WilsonLoops::avgPlaquette(*ThinLinks); std::cout << GridLogDebug << "getting Thin Plaq: " << impl_plaq << std::endl; diff --git a/lib/qcd/smearing/WilsonFlow.h b/lib/qcd/smearing/WilsonFlow.h index 4f5c0d43..1c0855ab 100644 --- a/lib/qcd/smearing/WilsonFlow.h +++ b/lib/qcd/smearing/WilsonFlow.h @@ -173,8 +173,8 @@ void WilsonFlow::smear(GaugeField& out, const GaugeField& in) const { std::cout << "Time to evolve " << diff.count() << " s\n"; #endif std::cout << GridLogMessage << "[WilsonFlow] Energy density (plaq) : " - << step << " " - << energyDensityPlaquette(step,out) << std::endl; + << step << " " << tau(step) << " " + << energyDensityPlaquette(step,out) << std::endl; if( step % measure_interval == 0){ std::cout << GridLogMessage << "[WilsonFlow] Top. charge : " << step << " " @@ -193,8 +193,8 @@ void WilsonFlow::smear_adaptive(GaugeField& out, const GaugeField& in, Re //std::cout << GridLogMessage << "Evolution time :"<< taus << std::endl; evolve_step_adaptive(out, maxTau); std::cout << GridLogMessage << "[WilsonFlow] Energy density (plaq) : " - << step << " " - << energyDensityPlaquette(out) << std::endl; + << step << " " << taus << " " + << energyDensityPlaquette(out) << std::endl; if( step % measure_interval == 0){ std::cout << GridLogMessage << "[WilsonFlow] Top. charge : " << step << " " diff --git a/lib/threads/Threads.h b/lib/threads/Threads.h index 36daf2af..dacaf5d8 100644 --- a/lib/threads/Threads.h +++ b/lib/threads/Threads.h @@ -40,7 +40,7 @@ Author: paboyle #define PARALLEL_FOR_LOOP _Pragma("omp parallel for schedule(static)") #define PARALLEL_FOR_LOOP_INTERN _Pragma("omp for schedule(static)") -#define PARALLEL_NESTED_LOOP2 _Pragma("omp parallel for collapse(2)") +#define PARALLEL_NESTED_LOOP2 _Pragma("omp parallel for schedule(static) collapse(2)") #define PARALLEL_REGION _Pragma("omp parallel") #define PARALLEL_CRITICAL _Pragma("omp critical") #else diff --git a/tests/Test_compressed_lanczos_hot_start.cc b/tests/Test_compressed_lanczos_hot_start.cc new file mode 100644 index 00000000..3276d0f8 --- /dev/null +++ b/tests/Test_compressed_lanczos_hot_start.cc @@ -0,0 +1,259 @@ + /************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./tests/Test_dwf_compressed_lanczos_reorg.cc + + Copyright (C) 2017 + +Author: Leans heavily on Christoph Lehner's code +Author: Peter Boyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +/* + * Reimplement the badly named "multigrid" lanczos as compressed Lanczos using the features + * in Grid that were intended to be used to support blocked Aggregates, from + */ +#include +#include +#include + +using namespace std; +using namespace Grid; +using namespace Grid::QCD; + +template +class LocalCoherenceLanczosScidac : public LocalCoherenceLanczos +{ +public: + typedef iVector CoarseSiteVector; + typedef Lattice CoarseField; + typedef Lattice CoarseScalar; // used for inner products on fine field + typedef Lattice FineField; + + LocalCoherenceLanczosScidac(GridBase *FineGrid,GridBase *CoarseGrid, + LinearOperatorBase &FineOp, + int checkerboard) + // Base constructor + : LocalCoherenceLanczos(FineGrid,CoarseGrid,FineOp,checkerboard) + {}; + + void checkpointFine(std::string evecs_file,std::string evals_file) + { +#ifdef HAVE_LIME + assert(this->subspace.size()==nbasis); + emptyUserRecord record; + Grid::QCD::ScidacWriter WR(this->_FineGrid->IsBoss()); + WR.open(evecs_file); + for(int k=0;ksubspace[k],record); + } + WR.close(); + + XmlWriter WRx(evals_file); + write(WRx,"evals",this->evals_fine); +#else + assert(0); +#endif + } + + void checkpointFineRestore(std::string evecs_file,std::string evals_file) + { +#ifdef HAVE_LIME + this->evals_fine.resize(nbasis); + this->subspace.resize(nbasis,this->_FineGrid); + + std::cout << GridLogIRL<< "checkpointFineRestore: Reading evals from "<evals_fine); + + assert(this->evals_fine.size()==nbasis); + + std::cout << GridLogIRL<< "checkpointFineRestore: Reading evecs from "<subspace[k].checkerboard=this->_checkerboard; + RD.readScidacFieldRecord(this->subspace[k],record); + + } + RD.close(); +#else + assert(0); +#endif + } + + void checkpointCoarse(std::string evecs_file,std::string evals_file) + { +#ifdef HAVE_LIME + int n = this->evec_coarse.size(); + emptyUserRecord record; + Grid::QCD::ScidacWriter WR(this->_CoarseGrid->IsBoss()); + WR.open(evecs_file); + for(int k=0;kevec_coarse[k],record); + } + WR.close(); + + XmlWriter WRx(evals_file); + write(WRx,"evals",this->evals_coarse); +#else + assert(0); +#endif + } + + void checkpointCoarseRestore(std::string evecs_file,std::string evals_file,int nvec) + { +#ifdef HAVE_LIME + std::cout << "resizing coarse vecs to " << nvec<< std::endl; + this->evals_coarse.resize(nvec); + this->evec_coarse.resize(nvec,this->_CoarseGrid); + std::cout << GridLogIRL<< "checkpointCoarseRestore: Reading evals from "<evals_coarse); + + assert(this->evals_coarse.size()==nvec); + emptyUserRecord record; + std::cout << GridLogIRL<< "checkpointCoarseRestore: Reading evecs from "<evec_coarse[k],record); + } + RD.close(); +#else + assert(0); +#endif + } +}; + +int main (int argc, char ** argv) { + + Grid_init(&argc,&argv); + GridLogIRL.TimingMode(1); + + LocalCoherenceLanczosParams Params; + { + Params.omega.resize(10); + Params.blockSize.resize(5); + XmlWriter writer("Params_template.xml"); + write(writer,"Params",Params); + std::cout << GridLogMessage << " Written Params_template.xml" < blockSize = Params.blockSize; + std::vector latt({16,16,16,16}); + uint64_t vol = Ls*latt[0]*latt[1]*latt[2]*latt[3]; + double mat_flop= 2.0*1320.0*vol; + // Grids + GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(latt, + GridDefaultSimd(Nd,vComplex::Nsimd()), + GridDefaultMpi()); + GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); + GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid); + GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid); + + std::vector fineLatt = latt; + int dims=fineLatt.size(); + assert(blockSize.size()==dims+1); + std::vector coarseLatt(dims); + std::vector coarseLatt5d ; + + for (int d=0;d seeds4({1,2,3,4}); + GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4); + LatticeGaugeField Umu(UGrid); + SU3::HotConfiguration(RNG4,Umu); + // FieldMetaData header; + // NerscIO::readConfiguration(Umu,header,Params.config); + + std::cout << GridLogMessage << "Lattice dimensions: " << latt << " Ls: " << Ls << std::endl; + + // ZMobius EO Operator + ZMobiusFermionR Ddwf(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mass, M5, Params.omega,1.,0.); + SchurDiagTwoOperator HermOp(Ddwf); + + // Eigenvector storage + LanczosParams fine =Params.FineParams; + LanczosParams coarse=Params.CoarseParams; + + const int Ns1 = fine.Nstop; const int Ns2 = coarse.Nstop; + const int Nk1 = fine.Nk; const int Nk2 = coarse.Nk; + const int Nm1 = fine.Nm; const int Nm2 = coarse.Nm; + + std::cout << GridLogMessage << "Keep " << fine.Nstop << " fine vectors" << std::endl; + std::cout << GridLogMessage << "Keep " << coarse.Nstop << " coarse vectors" << std::endl; + assert(Nm2 >= Nm1); + + const int nbasis= 60; + assert(nbasis==Ns1); + LocalCoherenceLanczosScidac _LocalCoherenceLanczos(FrbGrid,CoarseGrid5,HermOp,Odd); + std::cout << GridLogMessage << "Constructed LocalCoherenceLanczos" << std::endl; + + assert( (Params.doFine)||(Params.doFineRead)); + + if ( Params.doFine ) { + std::cout << GridLogMessage << "Performing fine grid IRL Nstop "<< Ns1 << " Nk "< HermOpEO(Ddwf); SchurDiagMooeeOperator HermOpEO_f(Ddwf_f); - std::cout << "Starting mixed CG" << std::endl; + std::cout << GridLogMessage << "::::::::::::: Starting mixed CG" << std::endl; MixedPrecisionConjugateGradient mCG(1.0e-8, 10000, 50, FrbGrid_f, HermOpEO_f, HermOpEO); mCG(src_o,result_o); - std::cout << "Starting regular CG" << std::endl; + std::cout << GridLogMessage << "::::::::::::: Starting regular CG" << std::endl; ConjugateGradient CG(1.0e-8,10000); CG(HermOpEO,src_o,result_o_2); LatticeFermionD diff_o(FrbGrid); RealD diff = axpy_norm(diff_o, -1.0, result_o, result_o_2); - std::cout << "Diff between mixed and regular CG: " << diff << std::endl; + std::cout << GridLogMessage << "::::::::::::: Diff between mixed and regular CG: " << diff << std::endl; #ifdef HAVE_LIME if( GridCmdOptionExists(argv,argv+argc,"--checksums") ){ std::string file1("./Propagator1"); - std::string file2("./Propagator2"); emptyUserRecord record; uint32_t nersc_csum; uint32_t scidac_csuma; @@ -121,12 +122,12 @@ int main (int argc, char ** argv) BinaryIO::writeLatticeObject(result_o,file1,munge, 0, format, nersc_csum,scidac_csuma,scidac_csumb); - std::cout << " Mixed checksums "<(result_o_2,file1,munge, 0, format, nersc_csum,scidac_csuma,scidac_csumb); - std::cout << " CG checksums "< + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +/* + * Reimplement the badly named "multigrid" lanczos as compressed Lanczos using the features + * in Grid that were intended to be used to support blocked Aggregates, from + */ +#include +#include +#include + +using namespace std; +using namespace Grid; +using namespace Grid::QCD; + +template +class LocalCoherenceLanczosScidac : public LocalCoherenceLanczos +{ +public: + typedef iVector CoarseSiteVector; + typedef Lattice CoarseField; + typedef Lattice CoarseScalar; // used for inner products on fine field + typedef Lattice FineField; + + LocalCoherenceLanczosScidac(GridBase *FineGrid,GridBase *CoarseGrid, + LinearOperatorBase &FineOp, + int checkerboard) + // Base constructor + : LocalCoherenceLanczos(FineGrid,CoarseGrid,FineOp,checkerboard) + {}; + + void checkpointFine(std::string evecs_file,std::string evals_file) + { + assert(this->subspace.size()==nbasis); + emptyUserRecord record; + Grid::QCD::ScidacWriter WR(this->_FineGrid->IsBoss()); + WR.open(evecs_file); + for(int k=0;ksubspace[k],record); + } + WR.close(); + + XmlWriter WRx(evals_file); + write(WRx,"evals",this->evals_fine); + } + + void checkpointFineRestore(std::string evecs_file,std::string evals_file) + { + this->evals_fine.resize(nbasis); + this->subspace.resize(nbasis,this->_FineGrid); + + std::cout << GridLogIRL<< "checkpointFineRestore: Reading evals from "<evals_fine); + + assert(this->evals_fine.size()==nbasis); + + std::cout << GridLogIRL<< "checkpointFineRestore: Reading evecs from "<subspace[k].checkerboard=this->_checkerboard; + RD.readScidacFieldRecord(this->subspace[k],record); + + } + RD.close(); + } + + void checkpointCoarse(std::string evecs_file,std::string evals_file) + { + int n = this->evec_coarse.size(); + emptyUserRecord record; + Grid::QCD::ScidacWriter WR(this->_CoarseGrid->IsBoss()); + WR.open(evecs_file); + for(int k=0;kevec_coarse[k],record); + } + WR.close(); + + XmlWriter WRx(evals_file); + write(WRx,"evals",this->evals_coarse); + } + + void checkpointCoarseRestore(std::string evecs_file,std::string evals_file,int nvec) + { + std::cout << "resizing coarse vecs to " << nvec<< std::endl; + this->evals_coarse.resize(nvec); + this->evec_coarse.resize(nvec,this->_CoarseGrid); + std::cout << GridLogIRL<< "checkpointCoarseRestore: Reading evals from "<evals_coarse); + + assert(this->evals_coarse.size()==nvec); + emptyUserRecord record; + std::cout << GridLogIRL<< "checkpointCoarseRestore: Reading evecs from "<evec_coarse[k],record); + } + RD.close(); + } +}; + +int main (int argc, char ** argv) { + + Grid_init(&argc,&argv); + GridLogIRL.TimingMode(1); + + LocalCoherenceLanczosParams Params; + { + Params.omega.resize(10); + Params.blockSize.resize(5); + XmlWriter writer("Params_template.xml"); + write(writer,"Params",Params); + std::cout << GridLogMessage << " Written Params_template.xml" < blockSize = Params.blockSize; + + // Grids + GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), + GridDefaultSimd(Nd,vComplex::Nsimd()), + GridDefaultMpi()); + GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); + GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid); + GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid); + + std::vector fineLatt = GridDefaultLatt(); + int dims=fineLatt.size(); + assert(blockSize.size()==dims+1); + std::vector coarseLatt(dims); + std::vector coarseLatt5d ; + + for (int d=0;d HermOp(Ddwf); + + // Eigenvector storage + LanczosParams fine =Params.FineParams; + LanczosParams coarse=Params.CoarseParams; + + const int Ns1 = fine.Nstop; const int Ns2 = coarse.Nstop; + const int Nk1 = fine.Nk; const int Nk2 = coarse.Nk; + const int Nm1 = fine.Nm; const int Nm2 = coarse.Nm; + + std::cout << GridLogMessage << "Keep " << fine.Nstop << " fine vectors" << std::endl; + std::cout << GridLogMessage << "Keep " << coarse.Nstop << " coarse vectors" << std::endl; + assert(Nm2 >= Nm1); + + const int nbasis= 60; + assert(nbasis==Ns1); + LocalCoherenceLanczosScidac _LocalCoherenceLanczos(FrbGrid,CoarseGrid5,HermOp,Odd); + std::cout << GridLogMessage << "Constructed LocalCoherenceLanczos" << std::endl; + + assert( (Params.doFine)||(Params.doFineRead)); + + if ( Params.doFine ) { + std::cout << GridLogMessage << "Performing fine grid IRL Nstop "<< Ns1 << " Nk "<