Merge GPU support (upstream/develop) into distillation branch.

This compiles and looks right ... but may need some testing * develop: (762 commits) Tensor ambiguous fix Fix for GCC preprocessor/pragma handling bug Trips up NVCC for reasons I dont understand on summit Fix GCC complaint Zero() change Force a couple of things to compile on NVCC Remove debug code nvcc error suppress Merge develop Reduction finished and hopefully fixes CI regression fail on single precisoin and force Double precision variants for summation accuracy Update todo list Freeze the seed Fix compiling of MSource::Gauss for single precision Think the reduction is now sorted and cleaned up Fix force term Printing improvement GPU reduction fix and also exit backtrace option GPU friendly Simplify the comms benchmark ... # Conflicts: # Grid/communicator/SharedMemoryMPI.cc # Grid/qcd/action/fermion/WilsonKernelsAsm.cc # Grid/qcd/action/fermion/implementation/StaggeredKernelsAsm.h # Grid/qcd/smearing/StoutSmearing.h # Hadrons/Modules.hpp # Hadrons/Utilities/Contractor.cc # Hadrons/modules.inc # tests/forces/Test_dwf_force_eofa.cc # tests/forces/Test_dwf_gpforce_eofa.cc
2026-06-26 21:43:30 +01:00 · 2019-09-13 13:30:00 +01:00
parent 04a661cafe b473405652
commit 61d017d0a5
796 changed files with 41536 additions and 52391 deletions
@@ -174,6 +174,7 @@ void TA2AAslashField<FImpl, PhotonImpl>::setup(void)
 template <typename FImpl, typename PhotonImpl>
 void TA2AAslashField<FImpl, PhotonImpl>::execute(void)
 {
+#ifndef GRID_NVCC
    auto &left  = envGet(std::vector<FermionField>, par().left);
    auto &right = envGet(std::vector<FermionField>, par().right);

@@ -237,6 +238,7 @@ void TA2AAslashField<FImpl, PhotonImpl>::execute(void)

    envGetTmp(Computation, computation);
    computation.execute(left, right, kernel, ionameFn, filenameFn, metadataFn);
+#endif
 }

 END_MODULE_NAMESPACE
@@ -109,10 +109,10 @@ void TA2ALoop<FImpl>::execute(void)
    auto &left  = envGet(std::vector<FermionField>, par().left);
    auto &right = envGet(std::vector<FermionField>, par().right);

-    loop = zero;
+    loop = Zero();
    for (unsigned int i = 0; i < left.size(); ++i)
    {
-        loop += outerProduct(adj(left[i]), right[i]);
+        loop += outerProduct(left[i], right[i]);
    }
 }

@@ -258,7 +258,7 @@ void TA2AMesonField<FImpl>::execute(void)
            std::vector<Real> p;

            envGetTmp(ComplexField, coor);
-            ph[j] = zero;
+            ph[j] = Zero();
            for(unsigned int mu = 0; mu < mom_[j].size(); mu++)
            {
                LatticeCoordinate(coor, mu);
@@ -1,35 +0,0 @@
-/*************************************************************************************
-
-Grid physics library, www.github.com/paboyle/Grid 
-
-Source file: Hadrons/Modules/MContraction/Baryon.cc
-
-Copyright (C) 2015-2019
-
-Author: Antonin Portelli <antonin.portelli@me.com>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-See the full license in the file "LICENSE" in the top level distribution directory
-*************************************************************************************/
-/*  END LEGAL */
-#include <Hadrons/Modules/MContraction/Baryon.hpp>
-
-using namespace Grid;
-using namespace Hadrons;
-using namespace MContraction;
-
-template class Grid::Hadrons::MContraction::TBaryon<FIMPL,FIMPL,FIMPL>;
-
@@ -1,257 +0,0 @@
-/*************************************************************************************
-
-Grid physics library, www.github.com/paboyle/Grid 
-
-Source file: Hadrons/Modules/MContraction/Baryon.hpp
-
-Copyright (C) 2015-2019
-
-Author: Antonin Portelli <antonin.portelli@me.com>
-Author: Felix Erben <felix.erben@ed.ac.uk>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-See the full license in the file "LICENSE" in the top level distribution directory
-*************************************************************************************/
-/*  END LEGAL */
-
-#ifndef Hadrons_MContraction_Baryon_hpp_
-#define Hadrons_MContraction_Baryon_hpp_
-
-#include <Hadrons/Global.hpp>
-#include <Hadrons/Module.hpp>
-#include <Hadrons/ModuleFactory.hpp>
-#include <Grid/qcd/utils/BaryonUtils.h>
-
-BEGIN_HADRONS_NAMESPACE
-
-/******************************************************************************
- *                               Baryon                                       *
- ******************************************************************************/
-BEGIN_MODULE_NAMESPACE(MContraction)
-
-class BaryonPar: Serializable
-{
-public:
-    GRID_SERIALIZABLE_CLASS_MEMBERS(BaryonPar,
-                                    std::string, q1,
-                                    std::string, q2,
-                                    std::string, q3,
-                                    std::string, gamma,
-                                    std::string, output);
-};
-
-template <typename FImpl1, typename FImpl2, typename FImpl3>
-class TBaryon: public Module<BaryonPar>
-{
-public:
-    FERM_TYPE_ALIASES(FImpl1, 1);
-    FERM_TYPE_ALIASES(FImpl2, 2);
-    FERM_TYPE_ALIASES(FImpl3, 3);
-    class Result: Serializable
-    {
-    public:
-        GRID_SERIALIZABLE_CLASS_MEMBERS(Result,
-                                        std::vector<Complex>, corr);
-    };
-public:
-    // constructor
-    TBaryon(const std::string name);
-    // destructor
-    virtual ~TBaryon(void) {};
-    // dependency relation
-    virtual std::vector<std::string> getInput(void);
-    virtual std::vector<std::string> getOutput(void);
-protected:
-    // setup
-    virtual void setup(void);
-    // execution
-    virtual void execute(void);
-    // Which gamma algebra was specified
-    Gamma::Algebra  al;
-};
-
-MODULE_REGISTER_TMP(Baryon, ARG(TBaryon<FIMPL, FIMPL, FIMPL>), MContraction);
-
-/******************************************************************************
- *                         TBaryon implementation                             *
- ******************************************************************************/
-// constructor /////////////////////////////////////////////////////////////////
-template <typename FImpl1, typename FImpl2, typename FImpl3>
-TBaryon<FImpl1, FImpl2, FImpl3>::TBaryon(const std::string name)
-: Module<BaryonPar>(name)
-{}
-
-// dependencies/products ///////////////////////////////////////////////////////
-template <typename FImpl1, typename FImpl2, typename FImpl3>
-std::vector<std::string> TBaryon<FImpl1, FImpl2, FImpl3>::getInput(void)
-{
-    std::vector<std::string> input = {par().q1, par().q2, par().q3};
-    
-    return input;
-}
-
-template <typename FImpl1, typename FImpl2, typename FImpl3>
-std::vector<std::string> TBaryon<FImpl1, FImpl2, FImpl3>::getOutput(void)
-{
-    std::vector<std::string> out = {};
-    
-    return out;
-}
-
-// setup ///////////////////////////////////////////////////////////////////////
-template <typename FImpl1, typename FImpl2, typename FImpl3>
-void TBaryon<FImpl1, FImpl2, FImpl3>::setup(void)
-{
-    envTmpLat(LatticeComplex, "c");
-    envTmpLat(LatticeComplex, "c1");
-    envTmpLat(LatticeComplex, "c2");
-    envTmpLat(LatticeComplex, "c3");
-    envTmpLat(LatticeComplex, "c4");
-    envTmpLat(LatticeComplex, "c5");
-    envTmpLat(LatticeComplex, "c6");
-    envTmpLat(LatticeComplex, "diquark");
-  // Translate the full string naming the desired gamma structure into the one we need to use
-  const std::string gamma{ par().gamma };
-  int iGamma = 0;
-  do
-  {
-    const char * pGammaName = Gamma::name[iGamma];
-    int iLen = 0;
-    while( pGammaName[iLen] && pGammaName[iLen] != ' ' )
-      iLen++;
-    if( !gamma.compare( 0, gamma.size(), pGammaName, iLen ) )
-      break;
-  }
-  while( ++iGamma < Gamma::nGamma );
-  if( iGamma >= Gamma::nGamma ) {
-    LOG(Message) << "Unrecognised gamma structure \"" << gamma << "\"" << std::endl;
-    assert( 0 && "Invalid gamma structure specified" );
-  }
-  switch( iGamma ) {
-    case Gamma::Algebra::GammaX:
-      std::cout << "using interpolator C gamma_X" << std::endl;
-      al = Gamma::Algebra::GammaZGamma5; //Still hardcoded CgX = i gamma_3 gamma_5
-      break;
-    case Gamma::Algebra::GammaY:
-      std::cout << "using interpolator C gamma_Y" << std::endl;
-      al = Gamma::Algebra::GammaT; //Still hardcoded CgX = - gamma_4
-      break;
-    case Gamma::Algebra::GammaZ:
-      std::cout << "using interpolator C gamma_Z" << std::endl;
-      al = Gamma::Algebra::GammaXGamma5; //Still hardcoded CgX = i gamma_1 gamma_5
-      break;
-    default:
-    {
-      LOG(Message) << "Unsupported gamma structure " << gamma << " = " << iGamma << std::endl;
-      assert( 0 && "Unsupported gamma structure" );
-      // or you could do something like
-      al = static_cast<Gamma::Algebra>( iGamma );
-      break;
-    }
-  }
-  LOG(Message) << "Gamma structure " << gamma << " = " << iGamma
-               << " translated to " << Gamma::name[al] << std::endl;
-}
-
-// execution ///////////////////////////////////////////////////////////////////
-template <typename FImpl1, typename FImpl2, typename FImpl3>
-void TBaryon<FImpl1, FImpl2, FImpl3>::execute(void)
-{
-    LOG(Message) << "Computing baryon contractions '" << getName() << "' using"
-                 << " quarks '" << par().q1 << "', and a diquark formed of ('" << par().q2 << "', and '"
-                 << par().q3 << "')" << std::endl;
-    
-    auto       &q1 = envGet(PropagatorField1, par().q1);
-    auto       &q2 = envGet(PropagatorField2, par().q2);
-    auto       &q3 = envGet(PropagatorField3, par().q3);
-    envGetTmp(LatticeComplex, c);
-    envGetTmp(LatticeComplex, c1);
-    envGetTmp(LatticeComplex, c2);
-    envGetTmp(LatticeComplex, c3);
-    envGetTmp(LatticeComplex, c4);
-    envGetTmp(LatticeComplex, c5);
-    envGetTmp(LatticeComplex, c6);
-    envGetTmp(LatticeComplex, diquark);
-    Result     result;
-    int nt = env().getDim(Tp);
-    result.corr.resize(nt);
-    const std::string gamma{ par().gamma };
-    std::vector<TComplex> buf;
-    
-    Result     result1;
-    Result     result2;
-    Result     result3;
-    Result     result4;
-    Result     result5;
-    Result     result6;
-    result1.corr.resize(nt);
-    result2.corr.resize(nt);
-    result3.corr.resize(nt);
-    result4.corr.resize(nt);
-    result5.corr.resize(nt);
-    result6.corr.resize(nt);
-    std::vector<TComplex> buf1;
-    std::vector<TComplex> buf2;
-    std::vector<TComplex> buf3;
-    std::vector<TComplex> buf4;
-    std::vector<TComplex> buf5;
-    std::vector<TComplex> buf6;
-
-    const Gamma GammaA{ Gamma::Algebra::Identity };
-    const Gamma GammaB{ al };
-
-    //BaryonUtils<FIMPL>::ContractBaryons(q1,q2,q3,GammaA,GammaB,c);
-    BaryonUtils<FIMPL>::ContractBaryons_debug(q1,q2,q3,GammaA,GammaB,c1,c2,c3,c4,c5,c6,c);
-
-    sliceSum(c,buf,Tp);
-    sliceSum(c1,buf1,Tp);
-    sliceSum(c2,buf2,Tp);
-    sliceSum(c3,buf3,Tp);
-    sliceSum(c4,buf4,Tp);
-    sliceSum(c5,buf5,Tp);
-    sliceSum(c6,buf6,Tp);
-
-    for (unsigned int t = 0; t < buf.size(); ++t)
-    {
-        result.corr[t] = TensorRemove(buf[t]);
-        result1.corr[t] = TensorRemove(buf1[t]);
-        result2.corr[t] = TensorRemove(buf2[t]);
-        result3.corr[t] = TensorRemove(buf3[t]);
-        result4.corr[t] = TensorRemove(buf4[t]);
-        result5.corr[t] = TensorRemove(buf5[t]);
-        result6.corr[t] = TensorRemove(buf6[t]);
-    }
-
-    std::string ostr1{ par().output + "_1"};
-    std::string ostr2{ par().output + "_2"};
-    std::string ostr3{ par().output + "_3"};
-    std::string ostr4{ par().output + "_4"};
-    std::string ostr5{ par().output + "_5"};
-    std::string ostr6{ par().output + "_6"};
-    saveResult(par().output, "baryon", result);
-    saveResult(ostr1, "baryon1", result1);
-    saveResult(ostr2, "baryon2", result2);
-    saveResult(ostr3, "baryon3", result3);
-    saveResult(ostr4, "baryon4", result4);
-    saveResult(ostr5, "baryon5", result5);
-    saveResult(ostr6, "baryon6", result6);
-}
-
-END_MODULE_NAMESPACE
-
-END_HADRONS_NAMESPACE
-
-#endif // Hadrons_MContraction_Baryon_hpp_
@@ -64,7 +64,7 @@ BEGIN_HADRONS_NAMESPACE
 */

 /******************************************************************************
- *                                TWeakMesonDecayKl2                             *
+ *                               TWeakMesonDecayKl2                           *
 ******************************************************************************/
 BEGIN_MODULE_NAMESPACE(MContraction)

@@ -75,7 +75,7 @@ public:
                                    std::string, q1,
                                    std::string, q2,
                                    std::string, lepton,
-				    std::string, output);
+				                    std::string, output);
 };

 template <typename FImpl>
@@ -83,14 +83,13 @@ class TWeakMesonDecayKl2: public Module<WeakMesonDecayKl2Par>
 {
 public:
    FERM_TYPE_ALIASES(FImpl,);
-    class Metadata: Serializable
+    typedef typename SpinMatrixField::vector_object::scalar_object SpinMatrix;
+    class Result: Serializable
    {
    public:
-        GRID_SERIALIZABLE_CLASS_MEMBERS(Metadata,
-                                        int, spinidx1,
-                                        int, spinidx2);
+        GRID_SERIALIZABLE_CLASS_MEMBERS(Result,
+                                        std::vector<SpinMatrix>, corr);
    };
-    typedef Correlator<Metadata> Result;
 public:
    // constructor
    TWeakMesonDecayKl2(const std::string name);
@@ -138,10 +137,10 @@ std::vector<std::string> TWeakMesonDecayKl2<FImpl>::getOutput(void)
 template <typename FImpl>
 void TWeakMesonDecayKl2<FImpl>::setup(void)
 {
-    envTmpLat(LatticeComplex, "c");
+    envTmpLat(ComplexField, "c");
    envTmpLat(PropagatorField, "prop_buf");
    envCreateLat(PropagatorField, getName());
-    envTmpLat(LatticeComplex, "buf");
+    envTmpLat(SpinMatrixField, "buf");
 }

 // execution ///////////////////////////////////////////////////////////////////
@@ -150,57 +149,33 @@ void TWeakMesonDecayKl2<FImpl>::execute(void)
 {
    LOG(Message) << "Computing QED Kl2 contractions '" << getName() << "' using"
                 << " quarks '" << par().q1 << "' and '" << par().q2 << "' and"
-		 << "lepton '"  << par().lepton << "'" << std::endl;
+		         << "lepton '"  << par().lepton << "'" << std::endl;

+    Gamma                   g5(Gamma::Algebra::Gamma5);
+    int                     nt = env().getDim(Tp);
+    std::vector<SpinMatrix> res_summed;
+    Result                  r;

-    auto &res = envGet(PropagatorField, getName()); res = zero;
-    Gamma                  g5(Gamma::Algebra::Gamma5);
-    int                    nt = env().getDim(Tp);
-
-    auto &q1 = envGet(PropagatorField, par().q1);
-    auto &q2 = envGet(PropagatorField, par().q2);
+    auto &res    = envGet(PropagatorField, getName()); res = Zero();
+    auto &q1     = envGet(PropagatorField, par().q1);
+    auto &q2     = envGet(PropagatorField, par().q2);
    auto &lepton = envGet(PropagatorField, par().lepton);
-    envGetTmp(LatticeComplex, buf);
-    std::vector<TComplex>  res_summed;
-    envGetTmp(LatticeComplex, c);
+    envGetTmp(SpinMatrixField, buf);
+    envGetTmp(ComplexField, c);
    envGetTmp(PropagatorField, prop_buf);  

-    std::vector<Result>    result;
-    Result r;
-
    for (unsigned int mu = 0; mu < 4; ++mu)
    {
-	c = zero;
-	//hadronic part: trace(q1*adj(q2)*g5*gL[mu]) 
-        c   = trace(q1*adj(q2)*g5*GammaL(Gamma::gmu[mu]));
-    	prop_buf = 1.;
-	//multiply lepton part
-	res += c * prop_buf * GammaL(Gamma::gmu[mu]) * lepton;
+        c = Zero();
+        //hadronic part: trace(q1*adj(q2)*g5*gL[mu]) 
+        c = trace(q1*adj(q2)*g5*GammaL(Gamma::gmu[mu]));
+        prop_buf = 1.;
+        //multiply lepton part
+        res += c * prop_buf * GammaL(Gamma::gmu[mu]) * lepton;
    }
-
-    //loop over spinor index of lepton part
-    unsigned int i = 0;
-    for (unsigned int s1 = 0; s1 < Ns ; ++s1)
-    for (unsigned int s2 = 0; s2 < Ns ; ++s2)
-    {
-	buf = peekColour(peekSpin(res,s1,s2),0,0);
-
-	sliceSum(buf, res_summed, Tp);
-
-	r.corr.clear();
-	for (unsigned int t = 0; t < nt; ++t)
-	{
-              r.corr.push_back(TensorRemove(res_summed[t]));
-	}
-
-	r.info.spinidx1 = s1;
-	r.info.spinidx2 = s2;
-	result.push_back(r);
-
-	i+=1;
-    }
-
-    saveResult(par().output, "weakdecay", result);
+    buf = peekColour(res, 0, 0);
+    sliceSum(buf, r.corr, Tp);
+    saveResult(par().output, "weakdecay", r);
 }

 END_MODULE_NAMESPACE