From c81d3d422d1bba570f74900d7dae7cc8bc29d318 Mon Sep 17 00:00:00 2001 From: Michael Marshall <43034299+mmphys@users.noreply.github.com> Date: Mon, 3 Jun 2019 15:25:05 +0100 Subject: [PATCH 1/8] Housekeeping. #include ---> #include --- .../qcd/action/fermion/ImprovedStaggeredFermion.cc | 2 +- Grid/qcd/action/fermion/StaggeredKernelsAsm.cc | 14 +++++++------- Grid/qcd/action/fermion/StaggeredKernelsHand.cc | 2 +- Grid/qcd/action/fermion/WilsonKernelsAsm.cc | 4 ++-- documentation/GridXcode/readme.md | 5 ++--- 5 files changed, 13 insertions(+), 14 deletions(-) diff --git a/Grid/qcd/action/fermion/ImprovedStaggeredFermion.cc b/Grid/qcd/action/fermion/ImprovedStaggeredFermion.cc index 4a0f7e63..883db902 100644 --- a/Grid/qcd/action/fermion/ImprovedStaggeredFermion.cc +++ b/Grid/qcd/action/fermion/ImprovedStaggeredFermion.cc @@ -26,7 +26,7 @@ See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ /* END LEGAL */ -#include +#include namespace Grid { namespace QCD { diff --git a/Grid/qcd/action/fermion/StaggeredKernelsAsm.cc b/Grid/qcd/action/fermion/StaggeredKernelsAsm.cc index 990ac126..9711c487 100644 --- a/Grid/qcd/action/fermion/StaggeredKernelsAsm.cc +++ b/Grid/qcd/action/fermion/StaggeredKernelsAsm.cc @@ -26,11 +26,11 @@ Author: paboyle See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ /* END LEGAL */ -#include +#include #ifdef AVX512 -#include -#include +#include +#include #endif // Interleave operations from two directions @@ -679,7 +679,7 @@ void StaggeredKernels::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo, gauge3 =(uint64_t)&UU._odata[sU]( T ); // This is the single precision 5th direction vectorised kernel -#include +#include template <> void StaggeredKernels::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, DoubledGaugeField &UUU, SiteSpinor *buf, int LLs, int sU, @@ -732,7 +732,7 @@ template <> void StaggeredKernels::DhopSiteAsm(StencilImpl } -#include +#include template <> void StaggeredKernels::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, DoubledGaugeField &UUU, SiteSpinor *buf, int LLs, int sU, @@ -816,7 +816,7 @@ template <> void StaggeredKernels::DhopSiteAsm(StencilImpl // This is the single precision 5th direction vectorised kernel -#include +#include template <> void StaggeredKernels::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, DoubledGaugeField &UUU, SiteSpinor *buf, int LLs, int sU, @@ -884,7 +884,7 @@ template <> void StaggeredKernels::DhopSiteAsm(StencilImpl &st, #endif } -#include +#include template <> void StaggeredKernels::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, DoubledGaugeField &UUU, SiteSpinor *buf, int LLs, int sU, diff --git a/Grid/qcd/action/fermion/StaggeredKernelsHand.cc b/Grid/qcd/action/fermion/StaggeredKernelsHand.cc index 47ebdd86..f304b00f 100644 --- a/Grid/qcd/action/fermion/StaggeredKernelsHand.cc +++ b/Grid/qcd/action/fermion/StaggeredKernelsHand.cc @@ -26,7 +26,7 @@ Author: paboyle See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ /* END LEGAL */ -#include +#include #define LOAD_CHI(b) \ diff --git a/Grid/qcd/action/fermion/WilsonKernelsAsm.cc b/Grid/qcd/action/fermion/WilsonKernelsAsm.cc index cd5d2430..55911988 100644 --- a/Grid/qcd/action/fermion/WilsonKernelsAsm.cc +++ b/Grid/qcd/action/fermion/WilsonKernelsAsm.cc @@ -81,8 +81,8 @@ WilsonKernels::AsmDhopSiteDagExt(StencilImpl &st,LebesgueOrder & lo,Doubl assert(0); } -#include -#include +#include +#include #define INSTANTIATE_ASM(A)\ template void WilsonKernels::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,\ diff --git a/documentation/GridXcode/readme.md b/documentation/GridXcode/readme.md index 031ec72a..8d9d7ad8 100644 --- a/documentation/GridXcode/readme.md +++ b/documentation/GridXcode/readme.md @@ -262,7 +262,6 @@ Set HEADER_SEARCH_PATHS to: $Grid/build$(CONFIGURATION)/Grid $Grid - $Grid/Grid followed by (***the order is important***) the locations reported by `grid-config --cxxflags`, ignoring duplicates, e.g.: @@ -272,7 +271,7 @@ followed by (***the order is important***) the locations reported by `grid-confi **Note: the easiest way to set this value is to put it all on one line, space separated, and edit the text to the right of `HEADER_SEARCH_PATHS`**, i.e.: - $Grid/build$(CONFIGURATION)/Grid $Grid $Grid/Grid $GridPre/openmpi/include $GridPkg/include $GridPre/lime/include + $Grid/build$(CONFIGURATION)/Grid $Grid $GridPre/openmpi/include $GridPkg/include $GridPre/lime/include #### LIBRARY_SEARCH_PATHS @@ -298,7 +297,7 @@ The easiest way to link to all required libraries is to obtain a list of all lib and pasting the output ***with `-lGrid -lHadrons ` prepended*** (including the `-l` switches) directly into `OTHER_LDFLAGS`, e.g.: - -lGrid -lHadrons -lmpi -lhdf5_cpp -lz -lcrypto -llime -lfftw3f -lfftw3 -lmpfr -lgmp -lstdc++ -lm -lz -lhdf5 + -lGrid -lHadrons -lmpi -lhdf5_cpp -lhdf5 -lz -lcrypto -llime -lfftw3f -lfftw3 -lmpfr -lgmp -lm ## Make additional configurations From 1059189abfa4694e9ad88dd1d71ce86b37801b02 Mon Sep 17 00:00:00 2001 From: fionnoh Date: Thu, 27 Jun 2019 13:49:55 +0800 Subject: [PATCH 2/8] Bugfix for A2ALoop module --- Hadrons/Modules/MContraction/A2ALoop.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Hadrons/Modules/MContraction/A2ALoop.hpp b/Hadrons/Modules/MContraction/A2ALoop.hpp index 2ef99354..7e7ffac0 100644 --- a/Hadrons/Modules/MContraction/A2ALoop.hpp +++ b/Hadrons/Modules/MContraction/A2ALoop.hpp @@ -112,7 +112,7 @@ void TA2ALoop::execute(void) loop = zero; for (unsigned int i = 0; i < left.size(); ++i) { - loop += outerProduct(adj(left[i]), right[i]); + loop += outerProduct(left[i], right[i]); } } From eac6337466c1477cf875c5c8dc489b3ad745a97e Mon Sep 17 00:00:00 2001 From: Vera Guelpers Date: Wed, 3 Jul 2019 14:36:34 +0100 Subject: [PATCH 3/8] Hadrons: EMLepton: multiple source-sink separations at once --- Hadrons/Modules/MFermion/EMLepton.hpp | 53 +++++++++++++++++++-------- 1 file changed, 38 insertions(+), 15 deletions(-) diff --git a/Hadrons/Modules/MFermion/EMLepton.hpp b/Hadrons/Modules/MFermion/EMLepton.hpp index 2d26416d..3fe544d3 100644 --- a/Hadrons/Modules/MFermion/EMLepton.hpp +++ b/Hadrons/Modules/MFermion/EMLepton.hpp @@ -54,8 +54,9 @@ BEGIN_HADRONS_NAMESPACE * - action: fermion action used for propagator (string) * - emField: photon field A_mu (string) * - mass: input mass for the lepton propagator +* - boundary: boundary conditions for the lepton propagator, e.g. "1 1 1 -1" * - twist: twisted boundary for lepton propagator, e.g. "0.0 0.0 0.0 0.5" -* - deltat: source-sink separation +* - deltat: list of source-sink separations * *******************************************************************************/ @@ -74,7 +75,7 @@ public: double, mass, std::string , boundary, std::string, twist, - unsigned int, deltat); + std::vector, deltat); }; template @@ -124,7 +125,12 @@ std::vector TEMLepton::getInput(void) template std::vector TEMLepton::getOutput(void) { - std::vector out = {getName(), getName() + "_free"}; + std::vector out = {}; + for(int i=0; i void TEMLepton::setup(void) { Ls_ = env().getObjectLs(par().action); - envCreateLat(PropagatorField, getName()); - envCreateLat(PropagatorField, getName() + "_free"); + for(int i=0; i::execute(void) auto &mat = envGet(FMat, par().action); RealD mass = par().mass; Complex ci(0.0,1.0); - - PropagatorField &Aslashlep = envGet(PropagatorField, getName()); - PropagatorField &lep = envGet(PropagatorField, getName() + "_free"); envGetTmp(FermionField, source); envGetTmp(FermionField, sol); @@ -227,6 +233,22 @@ void TEMLepton::execute(void) } } + for(unsigned int dt=0;dt= nt-par().deltat[dt], boundary[Tp]*lep, lep); + } + for(tl=0;tl::execute(void) proptmp = Cshift(freetmp,Tp, -tl); proptmp = where( tlat < tl, boundary[Tp]*proptmp, proptmp); - // free propagator for fixed source-sink separation - lep = where(tlat == (tl-par().deltat+nt)%nt, proptmp, lep); - // i*A_mu*gamma_mu sourcetmp = zero; for(unsigned int mu=0;mu<=3;mu++) @@ -276,13 +295,17 @@ void TEMLepton::execute(void) } } // keep the result for the desired delta t - Aslashlep = where(tlat == (tl-par().deltat+nt)%nt, proptmp, Aslashlep); + for(unsigned int dt=0;dt= nt-par().deltat, boundary[Tp]*Aslashlep, Aslashlep); - lep = where( tlat >= nt-par().deltat, boundary[Tp]*lep, lep); - + for(unsigned int dt=0;dt= nt-par().deltat[dt], boundary[Tp]*Aslashlep, Aslashlep); + } } END_MODULE_NAMESPACE From c3d0c176abc974e77e2a9ea0bf46bc4c0554c8ab Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Fri, 24 May 2019 13:08:35 +0100 Subject: [PATCH 4/8] cleaning up Kl2 contraction --- Hadrons/Global.hpp | 9 ++- .../MContraction/WeakMesonDecayKl2.hpp | 79 +++++++------------ 2 files changed, 32 insertions(+), 56 deletions(-) diff --git a/Hadrons/Global.hpp b/Hadrons/Global.hpp index 947d962c..a525a30f 100644 --- a/Hadrons/Global.hpp +++ b/Hadrons/Global.hpp @@ -109,10 +109,11 @@ typedef std::vector #define FERM_TYPE_ALIASES(FImpl, suffix)\ BASIC_TYPE_ALIASES(FImpl, suffix);\ -typedef FermionOperator FMat##suffix;\ -typedef typename FImpl::FermionField FermionField##suffix;\ -typedef typename FImpl::GaugeField GaugeField##suffix;\ -typedef typename FImpl::DoubledGaugeField DoubledGaugeField##suffix; +typedef FermionOperator FMat##suffix;\ +typedef typename FImpl::FermionField FermionField##suffix;\ +typedef typename FImpl::GaugeField GaugeField##suffix;\ +typedef typename FImpl::DoubledGaugeField DoubledGaugeField##suffix;\ +typedef Lattice> SpinMatrixField##suffix; #define GAUGE_TYPE_ALIASES(GImpl, suffix)\ typedef typename GImpl::GaugeField GaugeField##suffix; diff --git a/Hadrons/Modules/MContraction/WeakMesonDecayKl2.hpp b/Hadrons/Modules/MContraction/WeakMesonDecayKl2.hpp index fa97cda3..d7a45108 100644 --- a/Hadrons/Modules/MContraction/WeakMesonDecayKl2.hpp +++ b/Hadrons/Modules/MContraction/WeakMesonDecayKl2.hpp @@ -64,7 +64,7 @@ BEGIN_HADRONS_NAMESPACE */ /****************************************************************************** - * TWeakMesonDecayKl2 * + * TWeakMesonDecayKl2 * ******************************************************************************/ BEGIN_MODULE_NAMESPACE(MContraction) @@ -75,7 +75,7 @@ public: std::string, q1, std::string, q2, std::string, lepton, - std::string, output); + std::string, output); }; template @@ -83,14 +83,13 @@ class TWeakMesonDecayKl2: public Module { public: FERM_TYPE_ALIASES(FImpl,); - class Metadata: Serializable + typedef typename SpinMatrixField::vector_object::scalar_object SpinMatrix; + class Result: Serializable { public: - GRID_SERIALIZABLE_CLASS_MEMBERS(Metadata, - int, spinidx1, - int, spinidx2); + GRID_SERIALIZABLE_CLASS_MEMBERS(Result, + std::vector, corr); }; - typedef Correlator Result; public: // constructor TWeakMesonDecayKl2(const std::string name); @@ -138,10 +137,10 @@ std::vector TWeakMesonDecayKl2::getOutput(void) template void TWeakMesonDecayKl2::setup(void) { - envTmpLat(LatticeComplex, "c"); + envTmpLat(ComplexField, "c"); envTmpLat(PropagatorField, "prop_buf"); envCreateLat(PropagatorField, getName()); - envTmpLat(LatticeComplex, "buf"); + envTmpLat(SpinMatrixField, "buf"); } // execution /////////////////////////////////////////////////////////////////// @@ -150,57 +149,33 @@ void TWeakMesonDecayKl2::execute(void) { LOG(Message) << "Computing QED Kl2 contractions '" << getName() << "' using" << " quarks '" << par().q1 << "' and '" << par().q2 << "' and" - << "lepton '" << par().lepton << "'" << std::endl; + << "lepton '" << par().lepton << "'" << std::endl; + Gamma g5(Gamma::Algebra::Gamma5); + int nt = env().getDim(Tp); + std::vector res_summed; + Result r; - auto &res = envGet(PropagatorField, getName()); res = zero; - Gamma g5(Gamma::Algebra::Gamma5); - int nt = env().getDim(Tp); - - auto &q1 = envGet(PropagatorField, par().q1); - auto &q2 = envGet(PropagatorField, par().q2); + auto &res = envGet(PropagatorField, getName()); res = zero; + auto &q1 = envGet(PropagatorField, par().q1); + auto &q2 = envGet(PropagatorField, par().q2); auto &lepton = envGet(PropagatorField, par().lepton); - envGetTmp(LatticeComplex, buf); - std::vector res_summed; - envGetTmp(LatticeComplex, c); + envGetTmp(SpinMatrixField, buf); + envGetTmp(ComplexField, c); envGetTmp(PropagatorField, prop_buf); - std::vector result; - Result r; - for (unsigned int mu = 0; mu < 4; ++mu) { - c = zero; - //hadronic part: trace(q1*adj(q2)*g5*gL[mu]) - c = trace(q1*adj(q2)*g5*GammaL(Gamma::gmu[mu])); - prop_buf = 1.; - //multiply lepton part - res += c * prop_buf * GammaL(Gamma::gmu[mu]) * lepton; + c = zero; + //hadronic part: trace(q1*adj(q2)*g5*gL[mu]) + c = trace(q1*adj(q2)*g5*GammaL(Gamma::gmu[mu])); + prop_buf = 1.; + //multiply lepton part + res += c * prop_buf * GammaL(Gamma::gmu[mu]) * lepton; } - - //loop over spinor index of lepton part - unsigned int i = 0; - for (unsigned int s1 = 0; s1 < Ns ; ++s1) - for (unsigned int s2 = 0; s2 < Ns ; ++s2) - { - buf = peekColour(peekSpin(res,s1,s2),0,0); - - sliceSum(buf, res_summed, Tp); - - r.corr.clear(); - for (unsigned int t = 0; t < nt; ++t) - { - r.corr.push_back(TensorRemove(res_summed[t])); - } - - r.info.spinidx1 = s1; - r.info.spinidx2 = s2; - result.push_back(r); - - i+=1; - } - - saveResult(par().output, "weakdecay", result); + buf = peekColour(res, 0, 0); + sliceSum(buf, r.corr, Tp); + saveResult(par().output, "weakdecay", r); } END_MODULE_NAMESPACE From 9e926e3fc5f53031f195fa2ecc2ecb715190e4c0 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Fri, 19 Jul 2019 10:01:52 +0100 Subject: [PATCH 5/8] Build fix in develop --- Grid/qcd/action/pseudofermion/ExactOneFlavourRatio.h | 1 + HMC/Mobius2p1fEOFA.cc | 3 +++ tests/forces/Test_dwf_force_eofa.cc | 2 +- tests/forces/Test_dwf_gpforce_eofa.cc | 2 +- tests/forces/Test_mobius_force_eofa.cc | 2 +- tests/forces/Test_mobius_gpforce_eofa.cc | 2 +- 6 files changed, 8 insertions(+), 4 deletions(-) diff --git a/Grid/qcd/action/pseudofermion/ExactOneFlavourRatio.h b/Grid/qcd/action/pseudofermion/ExactOneFlavourRatio.h index 25285565..c6746a88 100644 --- a/Grid/qcd/action/pseudofermion/ExactOneFlavourRatio.h +++ b/Grid/qcd/action/pseudofermion/ExactOneFlavourRatio.h @@ -66,6 +66,7 @@ namespace QCD{ FermionField Phi; // the pseudofermion field for this trajectory public: + ExactOneFlavourRatioPseudoFermionAction(AbstractEOFAFermion& _Lop, AbstractEOFAFermion& _Rop, OperatorFunction& HeatbathCG, diff --git a/HMC/Mobius2p1fEOFA.cc b/HMC/Mobius2p1fEOFA.cc index 61b06829..997e76ab 100644 --- a/HMC/Mobius2p1fEOFA.cc +++ b/HMC/Mobius2p1fEOFA.cc @@ -30,7 +30,9 @@ directory /* END LEGAL */ #include +#ifdef GRID_DEFAULT_PRECISION_DOUBLE #define MIXED_PRECISION +#endif namespace Grid{ namespace QCD{ @@ -346,6 +348,7 @@ int main(int argc, char **argv) { #else ExactOneFlavourRatioPseudoFermionAction EOFA(Strange_Op_L, Strange_Op_R, + ActionCG, ActionCG, ActionCG, DerivativeCG, DerivativeCG, OFRp, true); diff --git a/tests/forces/Test_dwf_force_eofa.cc b/tests/forces/Test_dwf_force_eofa.cc index f17579ae..18e339ab 100644 --- a/tests/forces/Test_dwf_force_eofa.cc +++ b/tests/forces/Test_dwf_force_eofa.cc @@ -84,7 +84,7 @@ int main (int argc, char** argv) DomainWallEOFAFermionR Rop(U, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mb, mf, mb, -1.0, 1, M5); OneFlavourRationalParams Params(0.95, 100.0, 5000, 1.0e-12, 12); ConjugateGradient CG(1.0e-12, 5000); - ExactOneFlavourRatioPseudoFermionAction Meofa(Lop, Rop, CG, Params, true); + ExactOneFlavourRatioPseudoFermionAction Meofa(Lop, Rop, CG, CG, CG, CG, CG, Params, true); Meofa.refresh(U, RNG5); RealD S = Meofa.S(U); // pdag M p diff --git a/tests/forces/Test_dwf_gpforce_eofa.cc b/tests/forces/Test_dwf_gpforce_eofa.cc index 3afeaa43..32b3ed6b 100644 --- a/tests/forces/Test_dwf_gpforce_eofa.cc +++ b/tests/forces/Test_dwf_gpforce_eofa.cc @@ -89,7 +89,7 @@ int main (int argc, char** argv) FermionAction Rop(U, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mb, mf, mb, -1.0, 1, M5, params); OneFlavourRationalParams Params(0.95, 100.0, 5000, 1.0e-12, 12); ConjugateGradient CG(1.0e-12, 5000); - ExactOneFlavourRatioPseudoFermionAction Meofa(Lop, Rop, CG, Params, true); + ExactOneFlavourRatioPseudoFermionAction Meofa(Lop, Rop, CG, CG, CG, CG, CG, Params, true); Meofa.refresh(U, RNG5); RealD S = Meofa.S(U); // pdag M p diff --git a/tests/forces/Test_mobius_force_eofa.cc b/tests/forces/Test_mobius_force_eofa.cc index 2a5a7d04..68a2931f 100644 --- a/tests/forces/Test_mobius_force_eofa.cc +++ b/tests/forces/Test_mobius_force_eofa.cc @@ -86,7 +86,7 @@ int main (int argc, char** argv) MobiusEOFAFermionR Rop(U, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mb, mf, mb, -1.0, 1, M5, b, c); OneFlavourRationalParams Params(0.95, 100.0, 5000, 1.0e-12, 12); ConjugateGradient CG(1.0e-12, 5000); - ExactOneFlavourRatioPseudoFermionAction Meofa(Lop, Rop, CG, Params, false); + ExactOneFlavourRatioPseudoFermionAction Meofa(Lop, Rop, CG, CG, CG, CG, CG, Params, false); Meofa.refresh(U, RNG5); RealD S = Meofa.S(U); // pdag M p diff --git a/tests/forces/Test_mobius_gpforce_eofa.cc b/tests/forces/Test_mobius_gpforce_eofa.cc index 72f1dee2..577b4477 100644 --- a/tests/forces/Test_mobius_gpforce_eofa.cc +++ b/tests/forces/Test_mobius_gpforce_eofa.cc @@ -91,7 +91,7 @@ int main (int argc, char** argv) FermionAction Rop(U, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mb, mf, mb, -1.0, 1, M5, b, c, params); OneFlavourRationalParams Params(0.95, 100.0, 5000, 1.0e-12, 12); ConjugateGradient CG(1.0e-12, 5000); - ExactOneFlavourRatioPseudoFermionAction Meofa(Lop, Rop, CG, Params, false); + ExactOneFlavourRatioPseudoFermionAction Meofa(Lop, Rop, CG, CG, CG, CG, CG, Params, false); Meofa.refresh(U, RNG5); RealD S = Meofa.S(U); // pdag M p From ff325376cb19794bcf980fcdd792fb700e306ef4 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Fri, 19 Jul 2019 10:47:44 +0100 Subject: [PATCH 6/8] Fix single precision deriv test fail --- tests/forces/Test_rect_force.cc | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/tests/forces/Test_rect_force.cc b/tests/forces/Test_rect_force.cc index e0ffd28c..259090eb 100644 --- a/tests/forces/Test_rect_force.cc +++ b/tests/forces/Test_rect_force.cc @@ -73,7 +73,7 @@ int main (int argc, char ** argv) //////////////////////////////////// // Modify the gauge field a little //////////////////////////////////// - RealD dt = 0.0001; + RealD dt = 0.002; LatticeColourMatrix mommu(&Grid); LatticeColourMatrix forcemu(&Grid); @@ -88,13 +88,7 @@ int main (int argc, char ** argv) // fourth order exponential approx parallel_for(auto i=mom.begin();i Date: Sat, 20 Jul 2019 16:52:24 +0100 Subject: [PATCH 7/8] Intrinsics for CLANG are now fixed in v6 --- Grid/simd/Grid_avx512.h | 79 ----------------------------------------- 1 file changed, 79 deletions(-) diff --git a/Grid/simd/Grid_avx512.h b/Grid/simd/Grid_avx512.h index cce77a58..7546b22d 100644 --- a/Grid/simd/Grid_avx512.h +++ b/Grid/simd/Grid_avx512.h @@ -485,83 +485,6 @@ namespace Optimization { // Some Template specialization // Hack for CLANG until mm512_reduce_add_ps etc... are implemented in GCC and Clang releases -#ifndef __INTEL_COMPILER -#warning "Slow reduction due to incomplete reduce intrinsics" - //Complex float Reduce - template<> - inline Grid::ComplexF Reduce::operator()(__m512 in){ - __m512 v1,v2; - v1=Optimization::Permute::Permute0(in); // avx 512; quad complex single - v1= _mm512_add_ps(v1,in); - v2=Optimization::Permute::Permute1(v1); - v1 = _mm512_add_ps(v1,v2); - v2=Optimization::Permute::Permute2(v1); - v1 = _mm512_add_ps(v1,v2); - u512f conv; conv.v = v1; - return Grid::ComplexF(conv.f[0],conv.f[1]); - } - - //Real float Reduce - template<> - inline Grid::RealF Reduce::operator()(__m512 in){ - __m512 v1,v2; - v1 = Optimization::Permute::Permute0(in); // avx 512; octo-double - v1 = _mm512_add_ps(v1,in); - v2 = Optimization::Permute::Permute1(v1); - v1 = _mm512_add_ps(v1,v2); - v2 = Optimization::Permute::Permute2(v1); - v1 = _mm512_add_ps(v1,v2); - v2 = Optimization::Permute::Permute3(v1); - v1 = _mm512_add_ps(v1,v2); - u512f conv; conv.v=v1; - return conv.f[0]; - } - - - //Complex double Reduce - template<> - inline Grid::ComplexD Reduce::operator()(__m512d in){ - __m512d v1; - v1 = Optimization::Permute::Permute0(in); // sse 128; paired complex single - v1 = _mm512_add_pd(v1,in); - v1 = Optimization::Permute::Permute1(in); // sse 128; paired complex single - v1 = _mm512_add_pd(v1,in); - u512d conv; conv.v = v1; - return Grid::ComplexD(conv.f[0],conv.f[1]); - } - - //Real double Reduce - template<> - inline Grid::RealD Reduce::operator()(__m512d in){ - __m512d v1,v2; - v1 = Optimization::Permute::Permute0(in); // avx 512; quad double - v1 = _mm512_add_pd(v1,in); - v2 = Optimization::Permute::Permute1(v1); - v1 = _mm512_add_pd(v1,v2); - v2 = Optimization::Permute::Permute2(v1); - v1 = _mm512_add_pd(v1,v2); - u512d conv; conv.v = v1; - return conv.f[0]; - } - - //Integer Reduce - template<> - inline Integer Reduce::operator()(__m512i in){ - // No full vector reduce, use AVX to add upper and lower halves of register - // and perform AVX reduction. - __m256i v1, v2, v3; - __m128i u1, u2, ret; - v1 = _mm512_castsi512_si256(in); // upper half - v2 = _mm512_extracti32x8_epi32(in, 1); // lower half - v3 = _mm256_add_epi32(v1, v2); - v1 = _mm256_hadd_epi32(v3, v3); - v2 = _mm256_hadd_epi32(v1, v1); - u1 = _mm256_castsi256_si128(v2); // upper half - u2 = _mm256_extracti128_si256(v2, 1); // lower half - ret = _mm_add_epi32(u1, u2); - return _mm_cvtsi128_si32(ret); - } -#else //Complex float Reduce template<> inline Grid::ComplexF Reduce::operator()(__m512 in){ @@ -590,8 +513,6 @@ namespace Optimization { inline Integer Reduce::operator()(__m512i in){ return _mm512_reduce_add_epi32(in); } -#endif - } From 263dcbababaa49e8b50f8555a4d80be042aee6a5 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Tue, 30 Jul 2019 22:51:04 +0100 Subject: [PATCH 8/8] Simplify the comms benchmark --- benchmarks/Benchmark_comms.cc | 33 +++++++++++++++++---------------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/benchmarks/Benchmark_comms.cc b/benchmarks/Benchmark_comms.cc index 6d95bbe2..f87caef0 100644 --- a/benchmarks/Benchmark_comms.cc +++ b/benchmarks/Benchmark_comms.cc @@ -188,9 +188,9 @@ int main (int argc, char ** argv) rbuf[mu].resize(lat*lat*lat*Ls); // std::cout << " buffers " << std::hex << (uint64_t)&xbuf[mu][0] <<" " << (uint64_t)&rbuf[mu][0] < xbuf(8); std::vector rbuf(8); Grid.ShmBufferFreeAll(); + uint64_t bytes = lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD); for(int d=0;d<8;d++){ - xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); - rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); - bzero((void *)xbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); - bzero((void *)rbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); + xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(bytes); + rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(bytes); + bzero((void *)xbuf[d],bytes); + bzero((void *)rbuf[d],bytes); } int ncomm; - int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD); double dbytes; for(int i=0;i xbuf(8); std::vector rbuf(8); Grid.ShmBufferFreeAll(); + uint64_t bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD); for(int d=0;d<8;d++){ - xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); - rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); - bzero((void *)xbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); - bzero((void *)rbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); + xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(bytes); + rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(bytes); + bzero((void *)xbuf[d],bytes); + bzero((void *)rbuf[d],bytes); } int ncomm; - int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD); double dbytes; for(int i=0;i xbuf(8); std::vector rbuf(8); Grid.ShmBufferFreeAll(); + // uint64_t bytes = lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD); + uint64_t bytes = 2*1024*1024; for(int d=0;d<8;d++){ - xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); - rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); - bzero((void *)xbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); - bzero((void *)rbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); + xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(bytes); + rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(bytes); + bzero((void *)xbuf[d],bytes); + bzero((void *)rbuf[d],bytes); } int ncomm; - int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD); double dbytes; for(int i=0;i