diff --git a/Grid/qcd/utils/A2Autils.h b/Grid/qcd/utils/A2Autils.h index ccdbc7b9..89b4d4bd 100644 --- a/Grid/qcd/utils/A2Autils.h +++ b/Grid/qcd/utils/A2Autils.h @@ -60,6 +60,14 @@ public: const FermionField *vj, int orthogdim); + template // output: rank 5 tensor, e.g. Eigen::Tensor + static void AslashField(TensorType &mat, + const FermionField *lhs_wi, + const FermionField *rhs_vj, + const std::vector &emB0, + const std::vector &emB1, + int orthogdim, double *t_kernel = nullptr, double *t_gsum = nullptr); + static void ContractWWVV(std::vector &WWVV, const Eigen::Tensor &WW_sd, const FermionField *vs, @@ -617,6 +625,189 @@ void A2Autils::PionFieldVV(Eigen::Tensor &mat, PionFieldXX(mat,vi,vj,orthogdim,nog5); } +// "A-slash" field w_i(x)^dag * i * A_mu * gamma_mu * v_j(x) +// +// With: +// +// B_0 = A_0 + i A_1 +// B_1 = A_2 + i A_3 +// +// then in spin space +// +// ( 0 0 -conj(B_1) -B_0 ) +// i * A_mu g_mu = ( 0 0 -conj(B_0) B_1 ) +// ( B_1 B_0 0 0 ) +// ( conj(B_0) -conj(B_1) 0 0 ) +template +template +void A2Autils::AslashField(TensorType &mat, + const FermionField *lhs_wi, + const FermionField *rhs_vj, + const std::vector &emB0, + const std::vector &emB1, + int orthogdim, double *t_kernel, double *t_gsum) +{ + typedef typename FermionField::vector_object vobj; + typedef typename vobj::scalar_object sobj; + typedef typename vobj::scalar_type scalar_type; + typedef typename vobj::vector_type vector_type; + + typedef iSpinMatrix SpinMatrix_v; + typedef iSpinMatrix SpinMatrix_s; + typedef iSinglet Singlet_v; + typedef iSinglet Singlet_s; + + int Lblock = mat.dimension(3); + int Rblock = mat.dimension(4); + + GridBase *grid = lhs_wi[0]._grid; + + const int Nd = grid->_ndimension; + const int Nsimd = grid->Nsimd(); + + int Nt = grid->GlobalDimensions()[orthogdim]; + int Nem = emB0.size(); + assert(emB1.size() == Nem); + + int fd=grid->_fdimensions[orthogdim]; + int ld=grid->_ldimensions[orthogdim]; + int rd=grid->_rdimensions[orthogdim]; + + // will locally sum vectors first + // sum across these down to scalars + // splitting the SIMD + int MFrvol = rd*Lblock*Rblock*Nem; + int MFlvol = ld*Lblock*Rblock*Nem; + + Vector lvSum(MFrvol); + parallel_for (int r = 0; r < MFrvol; r++) + { + lvSum[r] = zero; + } + + Vector lsSum(MFlvol); + parallel_for (int r = 0; r < MFlvol; r++) + { + lsSum[r] = scalar_type(0.0); + } + + int e1= grid->_slice_nblock[orthogdim]; + int e2= grid->_slice_block [orthogdim]; + int stride=grid->_slice_stride[orthogdim]; + + // Nested parallelism would be ok + // Wasting cores here. Test case r + if (t_kernel) *t_kernel = -usecond(); + parallel_for(int r=0;r_ostride[orthogdim]; // base offset for start of plane + + for(int n=0;n icoor(Nd); + std::vector extracted(Nsimd); + + for(int i=0;i(lvSum[ij_rdx],extracted); + for(int idx=0;idxiCoorFromIindex(icoor,idx); + + int ldx = rt+icoor[orthogdim]*rd; + int ij_ldx = m+Nem*i+Nem*Lblock*j+Nem*Lblock*Rblock*ldx; + + lsSum[ij_ldx]=lsSum[ij_ldx]+extracted[idx]; + } + } + } + if (t_kernel) *t_kernel += usecond(); + + // ld loop and local only?? + int pd = grid->_processors[orthogdim]; + int pc = grid->_processor_coor[orthogdim]; + parallel_for_nest2(int lt=0;ltGlobalSumVector(&mat(0,0,0,0,0),Nem*Nt*Lblock*Rblock); + if (t_gsum) *t_gsum += usecond(); +} //////////////////////////////////////////// // Schematic thoughts about more generalised four quark insertion diff --git a/Hadrons/Modules.hpp b/Hadrons/Modules.hpp index 67ade797..2859f0fc 100644 --- a/Hadrons/Modules.hpp +++ b/Hadrons/Modules.hpp @@ -1,4 +1,5 @@ #include +#include #include #include #include diff --git a/Hadrons/Modules/MContraction/A2AAslashField.cc b/Hadrons/Modules/MContraction/A2AAslashField.cc new file mode 100644 index 00000000..eb76932e --- /dev/null +++ b/Hadrons/Modules/MContraction/A2AAslashField.cc @@ -0,0 +1,7 @@ +#include + +using namespace Grid; +using namespace Hadrons; +using namespace MContraction; + +template class Grid::Hadrons::MContraction::TA2AAslashField; diff --git a/Hadrons/Modules/MContraction/A2AAslashField.hpp b/Hadrons/Modules/MContraction/A2AAslashField.hpp new file mode 100644 index 00000000..792ff6ee --- /dev/null +++ b/Hadrons/Modules/MContraction/A2AAslashField.hpp @@ -0,0 +1,223 @@ +#ifndef Hadrons_MContraction_A2AAslashField_hpp_ +#define Hadrons_MContraction_A2AAslashField_hpp_ + +#include +#include +#include +#include + +#ifndef ASF_IO_TYPE +#define ASF_IO_TYPE ComplexF +#endif + +BEGIN_HADRONS_NAMESPACE + +/****************************************************************************** + * A2AAslashField * + ******************************************************************************/ +BEGIN_MODULE_NAMESPACE(MContraction) + +class A2AAslashFieldPar: Serializable +{ +public: + GRID_SERIALIZABLE_CLASS_MEMBERS(A2AAslashFieldPar, + int, cacheBlock, + int, block, + std::string, left, + std::string, right, + std::string, output, + std::vector, emField); +}; + +class A2AAslashFieldMetadata: Serializable +{ +public: + GRID_SERIALIZABLE_CLASS_MEMBERS(A2AAslashFieldMetadata, + std::string, emFieldName); +}; + +template +class AslashFieldKernel: public A2AKernel +{ +public: + typedef typename FImpl::FermionField FermionField; +public: + AslashFieldKernel(const std::vector &emB0, + const std::vector &emB1, + GridBase *grid) + : emB0_(emB0), emB1_(emB1), grid_(grid) + { + vol_ = 1.; + for (auto &d: grid_->GlobalDimensions()) + { + vol_ *= d; + } + } + + virtual ~AslashFieldKernel(void) = default; + virtual void operator()(A2AMatrixSet &m, const FermionField *left, + const FermionField *right, + const unsigned int orthogDim, double &t) + { + A2Autils::AslashField(m, left, right, emB0_, emB1_, orthogDim, &t); + } + + virtual double flops(const unsigned int blockSizei, const unsigned int blockSizej) + { + return 0.; + } + + virtual double bytes(const unsigned int blockSizei, const unsigned int blockSizej) + { + return 0.; + } +private: + const std::vector &emB0_, &emB1_; + GridBase *grid_; + double vol_; +}; + +template +class TA2AAslashField: public Module +{ +public: + FERM_TYPE_ALIASES(FImpl,); + typedef typename PhotonImpl::GaugeField EmField; + typedef A2AMatrixBlockComputation Computation; + typedef AslashFieldKernel Kernel; +public: + // constructor + TA2AAslashField(const std::string name); + // destructor + virtual ~TA2AAslashField(void) {}; + // dependency relation + virtual std::vector getInput(void); + virtual std::vector getOutput(void); + // setup + virtual void setup(void); + // execution + virtual void execute(void); +}; + +MODULE_REGISTER_TMP(A2AAslashField, ARG(TA2AAslashField), MContraction); + +/****************************************************************************** + * TA2AAslashField implementation * + ******************************************************************************/ +// constructor ///////////////////////////////////////////////////////////////// +template +TA2AAslashField::TA2AAslashField(const std::string name) +: Module(name) +{} + +// dependencies/products /////////////////////////////////////////////////////// +template +std::vector TA2AAslashField::getInput(void) +{ + std::vector in = par().emField; + + in.push_back(par().left); + in.push_back(par().right); + + return in; +} + +template +std::vector TA2AAslashField::getOutput(void) +{ + std::vector out = {}; + + return out; +} + +// setup /////////////////////////////////////////////////////////////////////// +template +void TA2AAslashField::setup(void) +{ + envTmp(Computation, "computation", 1, envGetGrid(FermionField), + env().getNd() - 1, par().emField.size(), 1, par().block, + par().cacheBlock, this); + envTmp(std::vector, "B0", 1, + par().emField.size(), envGetGrid(ComplexField)); + envTmp(std::vector, "B1", 1, + par().emField.size(), envGetGrid(ComplexField)); + envTmpLat(ComplexField, "Amu"); +} + +// execution /////////////////////////////////////////////////////////////////// +template +void TA2AAslashField::execute(void) +{ + auto &left = envGet(std::vector, par().left); + auto &right = envGet(std::vector, par().right); + + int nt = env().getDim().back(); + int N_i = left.size(); + int N_j = right.size(); + int nem = par().emField.size(); + int block = par().block; + int cacheBlock = par().cacheBlock; + + LOG(Message) << "Computing all-to-all A-slash fields" << std::endl; + LOG(Message) << "Left: '" << par().left << "' Right: '" << par().right << "'" << std::endl; + LOG(Message) << "EM fields:" << std::endl; + for (auto &name: par().emField) + { + LOG(Message) << " " << name << std::endl; + } + LOG(Message) << "A-slash field size: " << nt << "*" << N_i << "*" << N_j + << " (filesize " << sizeString(nt*N_i*N_j*sizeof(ASF_IO_TYPE)) + << "/EM field)" << std::endl; + + // preparing "B" complexified fields + startTimer("Complexify EM fields"); + envGetTmp(std::vector, B0); + envGetTmp(std::vector, B1); + for (unsigned int i = 0; i < par().emField.size(); ++i) + { + auto &A = envGet(EmField, par().emField[i]); + envGetTmp(ComplexField, Amu); + + B0[i] = peekLorentz(A, 0); + B0[i] += timesI(peekLorentz(A, 1)); + B1[i] = peekLorentz(A, 2); + B1[i] += timesI(peekLorentz(A, 3)); + } + stopTimer("Complexify EM fields"); + + // I/O name & metadata lambdas + auto ionameFn = [this](const unsigned int em, const unsigned int dummy) + { + return par().emField[em]; + }; + + auto filenameFn = [this, &ionameFn](const unsigned int em, const unsigned int dummy) + { + return par().output + "." + std::to_string(vm().getTrajectory()) + + "/" + ionameFn(em, dummy) + ".h5"; + }; + + auto metadataFn = [this](const unsigned int em, const unsigned int dummy) + { + A2AAslashFieldMetadata md; + + md.emFieldName = par().emField[em]; + + return md; + }; + + // executing computation + Kernel kernel(B0, B1, envGetGrid(FermionField)); + + envGetTmp(Computation, computation); + computation.execute(left, right, kernel, ionameFn, filenameFn, metadataFn); +} + +END_MODULE_NAMESPACE + +END_HADRONS_NAMESPACE + +#endif // Hadrons_MContraction_A2AAslashField_hpp_ diff --git a/Hadrons/modules.inc b/Hadrons/modules.inc index 0e6ef893..866dfd91 100644 --- a/Hadrons/modules.inc +++ b/Hadrons/modules.inc @@ -4,6 +4,7 @@ modules_cc =\ Modules/MContraction/Meson.cc \ Modules/MContraction/WeakNeutral4ptDisc.cc \ Modules/MContraction/WeakHamiltonianNonEye.cc \ + Modules/MContraction/A2AAslashField.cc \ Modules/MContraction/WardIdentity.cc \ Modules/MContraction/A2AMesonField.cc \ Modules/MContraction/DiscLoop.cc \ @@ -63,6 +64,7 @@ modules_cc =\ modules_hpp =\ Modules/MContraction/Baryon.hpp \ + Modules/MContraction/A2AAslashField.hpp \ Modules/MContraction/A2AMesonField.hpp \ Modules/MContraction/Meson.hpp \ Modules/MContraction/WeakHamiltonian.hpp \