mirror of
https://github.com/paboyle/Grid.git
synced 2025-04-05 19:55:56 +01:00
Moved the meson field inner product to its own header file
This commit is contained in:
parent
9deae8c962
commit
2c54a536f3
146
extras/Hadrons/AllToAllReduction.hpp
Normal file
146
extras/Hadrons/AllToAllReduction.hpp
Normal file
@ -0,0 +1,146 @@
|
||||
#ifndef A2A_Reduction_hpp_
|
||||
#define A2A_Reduction_hpp_
|
||||
|
||||
#include <Grid/Hadrons/Global.hpp>
|
||||
#include <Grid/Hadrons/Environment.hpp>
|
||||
#include <Grid/Hadrons/Solver.hpp>
|
||||
|
||||
BEGIN_HADRONS_NAMESPACE
|
||||
|
||||
////////////////////////////////////////////
|
||||
// A2A Meson Field Inner Product
|
||||
////////////////////////////////////////////
|
||||
|
||||
template <class FermionField>
|
||||
void sliceInnerProductMesonField(std::vector<std::vector<ComplexD>> &mat,
|
||||
const std::vector<Lattice<FermionField>> &lhs,
|
||||
const std::vector<Lattice<FermionField>> &rhs,
|
||||
int orthogdim)
|
||||
{
|
||||
typedef typename FermionField::scalar_type scalar_type;
|
||||
typedef typename FermionField::vector_type vector_type;
|
||||
|
||||
int Lblock = lhs.size();
|
||||
int Rblock = rhs.size();
|
||||
|
||||
GridBase *grid = lhs[0]._grid;
|
||||
|
||||
const int Nd = grid->_ndimension;
|
||||
const int Nsimd = grid->Nsimd();
|
||||
int Nt = grid->GlobalDimensions()[orthogdim];
|
||||
|
||||
assert(mat.size() == Lblock * Rblock);
|
||||
for (int t = 0; t < mat.size(); t++)
|
||||
{
|
||||
assert(mat[t].size() == Nt);
|
||||
}
|
||||
|
||||
int fd = grid->_fdimensions[orthogdim];
|
||||
int ld = grid->_ldimensions[orthogdim];
|
||||
int rd = grid->_rdimensions[orthogdim];
|
||||
|
||||
// will locally sum vectors first
|
||||
// sum across these down to scalars
|
||||
// splitting the SIMD
|
||||
std::vector<vector_type, alignedAllocator<vector_type>> lvSum(rd * Lblock * Rblock);
|
||||
for(int r=0;r<rd * Lblock * Rblock;r++)
|
||||
{
|
||||
lvSum[r]=zero;
|
||||
}
|
||||
std::vector<scalar_type> lsSum(ld * Lblock * Rblock, scalar_type(0.0));
|
||||
|
||||
int e1 = grid->_slice_nblock[orthogdim];
|
||||
int e2 = grid->_slice_block[orthogdim];
|
||||
int stride = grid->_slice_stride[orthogdim];
|
||||
|
||||
// std::cout << GridLogMessage << " Entering first parallel loop " << std::endl;
|
||||
// Parallelise over t-direction doesn't expose as much parallelism as needed for KNL
|
||||
parallel_for(int r = 0; r < rd; r++)
|
||||
{
|
||||
int so = r * grid->_ostride[orthogdim]; // base offset for start of plane
|
||||
for (int n = 0; n < e1; n++)
|
||||
{
|
||||
for (int b = 0; b < e2; b++)
|
||||
{
|
||||
int ss = so + n * stride + b;
|
||||
for (int i = 0; i < Lblock; i++)
|
||||
{
|
||||
auto left = conjugate(lhs[i]._odata[ss]);
|
||||
for (int j = 0; j < Rblock; j++)
|
||||
{
|
||||
int idx = i + Lblock * j + Lblock * Rblock * r;
|
||||
auto right = rhs[j]._odata[ss];
|
||||
vector_type vv = left()(0)(0) * right()(0)(0)
|
||||
+ left()(0)(1) * right()(0)(1)
|
||||
+ left()(0)(2) * right()(0)(2)
|
||||
+ left()(1)(0) * right()(1)(0)
|
||||
+ left()(1)(1) * right()(1)(1)
|
||||
+ left()(1)(2) * right()(1)(2)
|
||||
+ left()(2)(0) * right()(2)(0)
|
||||
+ left()(2)(1) * right()(2)(1)
|
||||
+ left()(2)(2) * right()(2)(2)
|
||||
+ left()(3)(0) * right()(3)(0)
|
||||
+ left()(3)(1) * right()(3)(1)
|
||||
+ left()(3)(2) * right()(3)(2);
|
||||
|
||||
lvSum[idx] = lvSum[idx] + vv;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// std::cout << GridLogMessage << " Entering second parallel loop " << std::endl;
|
||||
// Sum across simd lanes in the plane, breaking out orthog dir.
|
||||
parallel_for(int rt = 0; rt < rd; rt++)
|
||||
{
|
||||
std::vector<int> icoor(Nd);
|
||||
for (int i = 0; i < Lblock; i++)
|
||||
{
|
||||
for (int j = 0; j < Rblock; j++)
|
||||
{
|
||||
iScalar<vector_type> temp;
|
||||
std::vector<iScalar<scalar_type>> extracted(Nsimd);
|
||||
temp._internal = lvSum[i + Lblock * j + Lblock * Rblock * rt];
|
||||
extract(temp, extracted);
|
||||
for (int idx = 0; idx < Nsimd; idx++)
|
||||
{
|
||||
grid->iCoorFromIindex(icoor, idx);
|
||||
int ldx = rt + icoor[orthogdim] * rd;
|
||||
int ij_dx = i + Lblock * j + Lblock * Rblock * ldx;
|
||||
lsSum[ij_dx] = lsSum[ij_dx] + extracted[idx]._internal;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// std::cout << GridLogMessage << " Entering non parallel loop " << std::endl;
|
||||
for (int t = 0; t < fd; t++)
|
||||
{
|
||||
int pt = t/ld; // processor plane
|
||||
int lt = t%ld;
|
||||
for (int i = 0; i < Lblock; i++)
|
||||
{
|
||||
for (int j = 0; j < Rblock; j++)
|
||||
{
|
||||
if (pt == grid->_processor_coor[orthogdim])
|
||||
{
|
||||
int ij_dx = i + Lblock * j + Lblock * Rblock * lt;
|
||||
mat[i + j * Lblock][t] = lsSum[ij_dx];
|
||||
}
|
||||
else
|
||||
{
|
||||
mat[i + j * Lblock][t] = scalar_type(0.0);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
// std::cout << GridLogMessage << " Done " << std::endl;
|
||||
// defer sum over nodes.
|
||||
return;
|
||||
}
|
||||
|
||||
END_HADRONS_NAMESPACE
|
||||
|
||||
#endif // A2A_Reduction_hpp_
|
@ -182,6 +182,7 @@ class A2AModesSchurDiagTwo
|
||||
assert(sol_o.checkerboard == Odd);
|
||||
|
||||
action.DminusDag(tmp_wout, wout_5d);
|
||||
|
||||
action.ExportPhysicalFermionSolution(wout_5d, wout_4d);
|
||||
}
|
||||
|
||||
|
@ -15,6 +15,7 @@ libHadrons_adir = $(pkgincludedir)/Hadrons
|
||||
nobase_libHadrons_a_HEADERS = \
|
||||
$(modules_hpp) \
|
||||
AllToAllVectors.hpp \
|
||||
AllToAllReduction.hpp \
|
||||
Application.hpp \
|
||||
EigenPack.hpp \
|
||||
Environment.hpp \
|
||||
|
@ -174,8 +174,8 @@ void TA2AMeson<FImpl>::execute(void)
|
||||
{
|
||||
for (unsigned int j = 0; j < N; j++)
|
||||
{
|
||||
sliceInnerProductVector(MF_x, w1[i], v1[j], Tp);
|
||||
sliceInnerProductVector(MF_y, w1[j], v1[i], Tp);
|
||||
mySliceInnerProductVector(MF_x, w1[i], v1[j], Tp);
|
||||
mySliceInnerProductVector(MF_y, w1[j], v1[i], Tp);
|
||||
for (unsigned int t = 0; t < nt; ++t)
|
||||
{
|
||||
for (unsigned int tx = 0; tx < nt; tx++)
|
||||
|
@ -5,6 +5,9 @@
|
||||
#include <Grid/Hadrons/Module.hpp>
|
||||
#include <Grid/Hadrons/ModuleFactory.hpp>
|
||||
#include <Grid/Hadrons/AllToAllVectors.hpp>
|
||||
#include <Grid/Hadrons/AllToAllReduction.hpp>
|
||||
#include <Grid/Grid_Eigen_Dense.h>
|
||||
#include <fstream>
|
||||
|
||||
BEGIN_HADRONS_NAMESPACE
|
||||
|
||||
@ -19,6 +22,7 @@ class MesonFieldPar : Serializable
|
||||
GRID_SERIALIZABLE_CLASS_MEMBERS(MesonFieldPar,
|
||||
int, Nl,
|
||||
int, N,
|
||||
int, Nblock,
|
||||
std::string, A2A1,
|
||||
std::string, A2A2,
|
||||
std::string, gammas,
|
||||
@ -40,6 +44,7 @@ class TMesonFieldGamma : public Module<MesonFieldPar>
|
||||
GRID_SERIALIZABLE_CLASS_MEMBERS(Result,
|
||||
Gamma::Algebra, gamma,
|
||||
std::vector<std::vector<std::vector<ComplexD>>>, MesonField,
|
||||
std::vector<std::vector<ComplexD>>, MesonFiield,
|
||||
ComplexD, last);
|
||||
};
|
||||
|
||||
@ -52,6 +57,9 @@ class TMesonFieldGamma : public Module<MesonFieldPar>
|
||||
virtual std::vector<std::string> getInput(void);
|
||||
virtual std::vector<std::string> getOutput(void);
|
||||
virtual void parseGammaString(std::vector<Gamma::Algebra> &gammaList);
|
||||
virtual void vectorOfWs(std::vector<FermionField> &w, int i, int Nblock, FermionField &tmpw_5d, std::vector<FermionField> &vec_w);
|
||||
virtual void vectorOfVs(std::vector<FermionField> &v, int j, int Nblock, FermionField &tmpv_5d, std::vector<FermionField> &vec_v);
|
||||
virtual void gammaMult(std::vector<FermionField> &v, Gamma gamma);
|
||||
// setup
|
||||
virtual void setup(void);
|
||||
// execution
|
||||
@ -107,19 +115,54 @@ void TMesonFieldGamma<FImpl>::parseGammaString(std::vector<Gamma::Algebra> &gamm
|
||||
}
|
||||
}
|
||||
|
||||
template <typename FImpl>
|
||||
void TMesonFieldGamma<FImpl>::vectorOfWs(std::vector<FermionField> &w, int i, int Nblock, FermionField &tmpw_5d, std::vector<FermionField> &vec_w)
|
||||
{
|
||||
for (unsigned int ni = 0; ni < Nblock; ni++)
|
||||
{
|
||||
vec_w[ni] = w[i + ni];
|
||||
}
|
||||
}
|
||||
|
||||
template <typename FImpl>
|
||||
void TMesonFieldGamma<FImpl>::vectorOfVs(std::vector<FermionField> &v, int j, int Nblock, FermionField &tmpv_5d, std::vector<FermionField> &vec_v)
|
||||
{
|
||||
for (unsigned int nj = 0; nj < Nblock; nj++)
|
||||
{
|
||||
vec_v[nj] = v[j+nj];
|
||||
}
|
||||
}
|
||||
|
||||
template <typename FImpl>
|
||||
void TMesonFieldGamma<FImpl>::gammaMult(std::vector<FermionField> &v, Gamma gamma)
|
||||
{
|
||||
int Nblock = v.size();
|
||||
for (unsigned int nj = 0; nj < Nblock; nj++)
|
||||
{
|
||||
v[nj] = gamma * v[nj];
|
||||
}
|
||||
}
|
||||
|
||||
// setup ///////////////////////////////////////////////////////////////////////
|
||||
template <typename FImpl>
|
||||
void TMesonFieldGamma<FImpl>::setup(void)
|
||||
{
|
||||
int nt = env().getDim(Tp);
|
||||
int N = par().N;
|
||||
int Nblock = par().Nblock;
|
||||
|
||||
int Ls_ = env().getObjectLs(par().A2A1 + "_class");
|
||||
|
||||
envTmpLat(FermionField, "w", Ls_);
|
||||
envTmpLat(FermionField, "v", Ls_);
|
||||
envTmpLat(FermionField, "tmpv_5d", Ls_);
|
||||
envTmpLat(FermionField, "tmpw_5d", Ls_);
|
||||
|
||||
envTmp(std::vector<FermionField>, "w", 1, N, FermionField(env().getGrid(1)));
|
||||
envTmp(std::vector<FermionField>, "v", 1, N, FermionField(env().getGrid(1)));
|
||||
|
||||
envTmp(Eigen::MatrixXcd, "MF", 1, Eigen::MatrixXcd::Zero(nt, N * N));
|
||||
|
||||
envTmp(std::vector<FermionField>, "w_block", 1, Nblock, FermionField(env().getGrid(1)));
|
||||
envTmp(std::vector<FermionField>, "v_block", 1, Nblock, FermionField(env().getGrid(1)));
|
||||
}
|
||||
|
||||
// execution ///////////////////////////////////////////////////////////////////
|
||||
@ -130,6 +173,7 @@ void TMesonFieldGamma<FImpl>::execute(void)
|
||||
|
||||
int N = par().N;
|
||||
int nt = env().getDim(Tp);
|
||||
int Nblock = par().Nblock;
|
||||
|
||||
std::vector<Result> result;
|
||||
std::vector<Gamma::Algebra> gammaResultList;
|
||||
@ -145,33 +189,54 @@ void TMesonFieldGamma<FImpl>::execute(void)
|
||||
{
|
||||
result[i].gamma = gammaResultList[i];
|
||||
result[i].MesonField.resize(N, std::vector<std::vector<ComplexD>>(N, std::vector<ComplexD>(nt)));
|
||||
result[i].MesonFiield.resize(N, std::vector<<ComplexD>(nt));
|
||||
|
||||
Gamma gamma(gammaResultList[i]);
|
||||
gammaList[i] = gamma;
|
||||
}
|
||||
|
||||
std::vector<ComplexD> MesonField_ij;
|
||||
MesonField_ij.resize(nt);
|
||||
|
||||
auto &a2a1 = envGet(A2ABase, par().A2A1 + "_class");
|
||||
auto &a2a2 = envGet(A2ABase, par().A2A2 + "_class");
|
||||
|
||||
envGetTmp(FermionField, w);
|
||||
envGetTmp(FermionField, v);
|
||||
envGetTmp(FermionField, tmpv_5d);
|
||||
envGetTmp(FermionField, tmpw_5d);
|
||||
|
||||
for (unsigned int i = 0; i < N; i++)
|
||||
envGetTmp(std::vector<FermionField>, v);
|
||||
envGetTmp(std::vector<FermionField>, w);
|
||||
LOG(Message) << "Finding v and w vectors for N = " << N << std::endl;
|
||||
for (int i = 0; i < N; i++)
|
||||
{
|
||||
a2a1.return_w(i, tmpw_5d, w);
|
||||
for (unsigned int j = 0; j < N; j++)
|
||||
a2a2.return_v(i, tmpv_5d, v[i]);
|
||||
a2a1.return_w(i, tmpw_5d, w[i]);
|
||||
}
|
||||
LOG(Message) << "Found v and w vectors for N = " << N << std::endl;
|
||||
|
||||
std::vector<std::vector<ComplexD>> MesonField_ij;
|
||||
LOG(Message) << "Before blocked MFs, Nblock = " << Nblock << std::endl;
|
||||
envGetTmp(std::vector<FermionField>, v_block);
|
||||
envGetTmp(std::vector<FermionField>, w_block);
|
||||
MesonField_ij.resize(Nblock * Nblock, std::vector<ComplexD>(nt));
|
||||
|
||||
envGetTmp(Eigen::MatrixXcd, MF);
|
||||
|
||||
LOG(Message) << "Before blocked MFs, Nblock = " << Nblock << std::endl;
|
||||
for (unsigned int i = 0; i < N; i += Nblock)
|
||||
{
|
||||
vectorOfWs(w, i, Nblock, tmpw_5d, w_block);
|
||||
for (unsigned int j = 0; j < N; j += Nblock)
|
||||
{
|
||||
a2a2.return_v(j, tmpv_5d, v);
|
||||
vectorOfVs(v, j, Nblock, tmpv_5d, v_block);
|
||||
for (unsigned int k = 0; k < result.size(); k++)
|
||||
{
|
||||
v = gammaList[k]*v;
|
||||
sliceInnerProductVector(MesonField_ij, w, v, Tp);
|
||||
result[k].MesonField[i][j] = MesonField_ij;
|
||||
gammaMult(v_block, gammaList[k]);
|
||||
sliceInnerProductMesonField(MesonField_ij, w_block, v_block, Tp);
|
||||
for (unsigned int nj = 0; nj < Nblock; nj++)
|
||||
{
|
||||
for (unsigned int ni = 0; ni < Nblock; ni++)
|
||||
{
|
||||
MF.col((i + ni) + (j + nj) * N) = Eigen::VectorXcd::Map(&MesonField_ij[nj * Nblock + ni][0], MesonField_ij[nj * Nblock + ni].size());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (i % 10 == 0)
|
||||
@ -179,7 +244,22 @@ void TMesonFieldGamma<FImpl>::execute(void)
|
||||
LOG(Message) << "MF for i = " << i << " of " << N << std::endl;
|
||||
}
|
||||
}
|
||||
result[0].last = MesonField_ij[7];
|
||||
LOG(Message) << "Before Global sum, Nblock = " << Nblock << std::endl;
|
||||
v_block[0]._grid->GlobalSumVector(MF.data(), MF.size());
|
||||
LOG(Message) << "After Global sum, Nblock = " << Nblock << std::endl;
|
||||
for (unsigned int i = 0; i < N; i++)
|
||||
{
|
||||
for (unsigned int j = 0; j < N; j++)
|
||||
{
|
||||
for (unsigned int k = 0; k < result.size(); k++)
|
||||
{
|
||||
for (unsigned int t = 0; t < nt; t++)
|
||||
{
|
||||
result[k].MesonField[i][j][t] = MF.col(i + N * j)[t];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
saveResult(par().output, "meson", result);
|
||||
}
|
||||
|
||||
|
@ -41,7 +41,6 @@ template<class vobj> inline RealD norm2(const Lattice<vobj> &arg){
|
||||
template<class vobj>
|
||||
inline ComplexD innerProduct(const Lattice<vobj> &left,const Lattice<vobj> &right)
|
||||
{
|
||||
std::cout << GridLogMessage << "Start alloc innerProduct" << std::endl;
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
typedef typename vobj::vector_typeD vector_type;
|
||||
GridBase *grid = left._grid;
|
||||
@ -50,8 +49,6 @@ inline ComplexD innerProduct(const Lattice<vobj> &left,const Lattice<vobj> &righ
|
||||
ComplexD inner;
|
||||
Vector<ComplexD> sumarray(grid->SumArraySize()*pad);
|
||||
|
||||
std::cout << GridLogMessage << "End alloc innerProduct" << std::endl;
|
||||
std::cout << GridLogMessage << "Start parallel for innerProduct" << std::endl;
|
||||
parallel_for(int thr=0;thr<grid->SumArraySize();thr++){
|
||||
int nwork, mywork, myoff;
|
||||
GridThread::GetWork(left._grid->oSites(),thr,mywork,myoff);
|
||||
@ -65,18 +62,12 @@ inline ComplexD innerProduct(const Lattice<vobj> &left,const Lattice<vobj> &righ
|
||||
ComplexD tmp = Reduce(TensorRemove(vinner)) ;
|
||||
vstream(sumarray[thr*pad],tmp);
|
||||
}
|
||||
std::cout << GridLogMessage << "End parallel for innerProduct" << std::endl;
|
||||
|
||||
std::cout << GridLogMessage << "Start inner sum innerProduct" << std::endl;
|
||||
inner=0.0;
|
||||
for(int i=0;i<grid->SumArraySize();i++){
|
||||
inner = inner+sumarray[i*pad];
|
||||
}
|
||||
right._grid->GlobalSum(inner);
|
||||
return inner;
|
||||
std::cout << GridLogMessage << "End inner sum innerProduct" << std::endl;
|
||||
|
||||
std::cout << GridLogMessage << "End innerProduct" << std::endl;
|
||||
}
|
||||
|
||||
/////////////////////////
|
||||
@ -285,7 +276,7 @@ template<class vobj> inline void sliceSum(const Lattice<vobj> &Data,std::vector<
|
||||
template<class vobj>
|
||||
static void mySliceInnerProductVector( std::vector<ComplexD> & result, const Lattice<vobj> &lhs,const Lattice<vobj> &rhs,int orthogdim)
|
||||
{
|
||||
std::cout << GridLogMessage << "Start mySsliceInnerProductVector" << std::endl;
|
||||
std::cout << GridLogMessage << "Start mySliceInnerProductVector" << std::endl;
|
||||
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
std::vector<scalar_type> lsSum;
|
||||
@ -313,12 +304,12 @@ static void localSliceInnerProductVector(std::vector<ComplexD> &result, const La
|
||||
int fd=grid->_fdimensions[orthogdim];
|
||||
int ld=grid->_ldimensions[orthogdim];
|
||||
int rd=grid->_rdimensions[orthogdim];
|
||||
std::cout << GridLogMessage << "Start alloc" << std::endl;
|
||||
// std::cout << GridLogMessage << "Start alloc" << std::endl;
|
||||
|
||||
std::vector<vector_type,alignedAllocator<vector_type> > lvSum(rd); // will locally sum vectors first
|
||||
lsSum.resize(ld,scalar_type(0.0)); // sum across these down to scalars
|
||||
std::vector<iScalar<scalar_type>> extracted(Nsimd); // splitting the SIMD
|
||||
std::cout << GridLogMessage << "End alloc" << std::endl;
|
||||
// std::cout << GridLogMessage << "End alloc" << std::endl;
|
||||
|
||||
result.resize(fd); // And then global sum to return the same vector to every node for IO to file
|
||||
for(int r=0;r<rd;r++){
|
||||
@ -328,8 +319,8 @@ static void localSliceInnerProductVector(std::vector<ComplexD> &result, const La
|
||||
int e1= grid->_slice_nblock[orthogdim];
|
||||
int e2= grid->_slice_block [orthogdim];
|
||||
int stride=grid->_slice_stride[orthogdim];
|
||||
std::cout << GridLogMessage << "End prep" << std::endl;
|
||||
std::cout << GridLogMessage << "Start parallel inner product, _rd = " << rd << std::endl;
|
||||
// std::cout << GridLogMessage << "End prep" << std::endl;
|
||||
// std::cout << GridLogMessage << "Start parallel inner product, _rd = " << rd << std::endl;
|
||||
vector_type vv;
|
||||
parallel_for(int r=0;r<rd;r++)
|
||||
{
|
||||
@ -339,12 +330,12 @@ static void localSliceInnerProductVector(std::vector<ComplexD> &result, const La
|
||||
for(int n=0;n<e1;n++){
|
||||
for(int b=0;b<e2;b++){
|
||||
int ss = so + n * stride + b;
|
||||
vv = TensorRemove(innerProduct(lhs._odata[ss]._internal, rhs._odata[ss]._internal));
|
||||
vv = TensorRemove(innerProduct(lhs._odata[ss], rhs._odata[ss]));
|
||||
lvSum[r] = lvSum[r] + vv;
|
||||
}
|
||||
}
|
||||
}
|
||||
std::cout << GridLogMessage << "End parallel inner product" << std::endl;
|
||||
// std::cout << GridLogMessage << "End parallel inner product" << std::endl;
|
||||
|
||||
// Sum across simd lanes in the plane, breaking out orthog dir.
|
||||
std::vector<int> icoor(Nd);
|
||||
@ -364,7 +355,7 @@ static void localSliceInnerProductVector(std::vector<ComplexD> &result, const La
|
||||
|
||||
}
|
||||
}
|
||||
std::cout << GridLogMessage << "End sum over simd lanes" << std::endl;
|
||||
// std::cout << GridLogMessage << "End sum over simd lanes" << std::endl;
|
||||
}
|
||||
template <class vobj>
|
||||
static void globalSliceInnerProductVector(std::vector<ComplexD> &result, const Lattice<vobj> &lhs, std::vector<typename vobj::scalar_type> &lsSum, int orthogdim)
|
||||
@ -376,7 +367,7 @@ static void globalSliceInnerProductVector(std::vector<ComplexD> &result, const L
|
||||
// sum over nodes.
|
||||
std::vector<scalar_type> gsum;
|
||||
gsum.resize(fd, scalar_type(0.0));
|
||||
std::cout << GridLogMessage << "Start of gsum[t] creation:" << std::endl;
|
||||
// std::cout << GridLogMessage << "Start of gsum[t] creation:" << std::endl;
|
||||
for(int t=0;t<fd;t++){
|
||||
int pt = t/ld; // processor plane
|
||||
int lt = t%ld;
|
||||
@ -384,10 +375,10 @@ static void globalSliceInnerProductVector(std::vector<ComplexD> &result, const L
|
||||
gsum[t]=lsSum[lt];
|
||||
}
|
||||
}
|
||||
std::cout << GridLogMessage << "End of gsum[t] creation:" << std::endl;
|
||||
std::cout << GridLogMessage << "Start of GlobalSumVector:" << std::endl;
|
||||
// std::cout << GridLogMessage << "End of gsum[t] creation:" << std::endl;
|
||||
// std::cout << GridLogMessage << "Start of GlobalSumVector:" << std::endl;
|
||||
grid->GlobalSumVector(&gsum[0], fd);
|
||||
std::cout << GridLogMessage << "End of GlobalSumVector:" << std::endl;
|
||||
// std::cout << GridLogMessage << "End of GlobalSumVector:" << std::endl;
|
||||
|
||||
result = gsum;
|
||||
}
|
||||
|
@ -106,7 +106,6 @@ inline vRealD innerProductD(const vRealF &l,const vRealF &r){
|
||||
typedef decltype(innerProduct(lhs._internal[0],rhs._internal[0])) ret_t;
|
||||
iScalar<ret_t> ret;
|
||||
ret=zero;
|
||||
// std::cout << GridLogMessage << "innerProduct iVector" << std::endl;
|
||||
for(int c1=0;c1<N;c1++){
|
||||
ret._internal += innerProduct(lhs._internal[c1],rhs._internal[c1]);
|
||||
}
|
||||
@ -130,34 +129,9 @@ inline vRealD innerProductD(const vRealF &l,const vRealF &r){
|
||||
{
|
||||
typedef decltype(innerProduct(lhs._internal,rhs._internal)) ret_t;
|
||||
iScalar<ret_t> ret;
|
||||
// std::cout << GridLogMessage << "innerProduct iScalar" << std::endl;
|
||||
|
||||
ret._internal = innerProduct(lhs._internal,rhs._internal);
|
||||
return ret;
|
||||
}
|
||||
template<class l,class r,int N> inline
|
||||
auto myInnerProduct (const iVector<l,N>& lhs,const iVector<r,N>& rhs) -> iScalar<decltype(innerProduct(lhs._internal[0],rhs._internal[0]))>
|
||||
{
|
||||
typedef decltype(innerProduct(lhs._internal[0],rhs._internal[0])) ret_t;
|
||||
iScalar<ret_t> ret;
|
||||
ret=zero;
|
||||
std::cout << GridLogMessage << "myInnerProduct iVector, N = " << N << std::endl;
|
||||
for(int c1=0;c1<N;c1++){
|
||||
ret._internal += innerProduct(lhs._internal[c1],rhs._internal[c1]);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
template<class l,class r> inline
|
||||
auto myInnerProduct (const iScalar<l>& lhs,const iScalar<r>& rhs) -> iScalar<decltype(innerProduct(lhs._internal,rhs._internal))>
|
||||
{
|
||||
typedef decltype(innerProduct(lhs._internal,rhs._internal)) ret_t;
|
||||
iScalar<ret_t> ret;
|
||||
std::cout << GridLogMessage << "myInnerProduct iScalar" << std::endl;
|
||||
|
||||
ret._internal = myInnerProduct(lhs._internal,rhs._internal);
|
||||
return ret;
|
||||
}
|
||||
|
||||
}
|
||||
#endif
|
||||
|
Loading…
x
Reference in New Issue
Block a user