mirror of
https://github.com/paboyle/Grid.git
synced 2025-04-04 19:25:56 +01:00
Merge branch 'feature/scalar_adjointFT' into feature/hadrons
This commit is contained in:
commit
6718fa8c4f
@ -59,6 +59,7 @@ public:
|
|||||||
|
|
||||||
virtual ~GridBase() = default;
|
virtual ~GridBase() = default;
|
||||||
|
|
||||||
|
|
||||||
// Physics Grid information.
|
// Physics Grid information.
|
||||||
std::vector<int> _simd_layout;// Which dimensions get relayed out over simd lanes.
|
std::vector<int> _simd_layout;// Which dimensions get relayed out over simd lanes.
|
||||||
std::vector<int> _fdimensions;// (full) Global dimensions of array prior to cb removal
|
std::vector<int> _fdimensions;// (full) Global dimensions of array prior to cb removal
|
||||||
|
@ -122,6 +122,7 @@ public:
|
|||||||
|
|
||||||
// Use a reduced simd grid
|
// Use a reduced simd grid
|
||||||
_ldimensions[d] = _gdimensions[d] / _processors[d]; //local dimensions
|
_ldimensions[d] = _gdimensions[d] / _processors[d]; //local dimensions
|
||||||
|
//std::cout << _ldimensions[d] << " " << _gdimensions[d] << " " << _processors[d] << std::endl;
|
||||||
assert(_ldimensions[d] * _processors[d] == _gdimensions[d]);
|
assert(_ldimensions[d] * _processors[d] == _gdimensions[d]);
|
||||||
|
|
||||||
_rdimensions[d] = _ldimensions[d] / _simd_layout[d]; //overdecomposition
|
_rdimensions[d] = _ldimensions[d] / _simd_layout[d]; //overdecomposition
|
||||||
@ -166,6 +167,7 @@ public:
|
|||||||
block = block * _rdimensions[d];
|
block = block * _rdimensions[d];
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
@ -166,6 +166,7 @@ class CartesianCommunicator {
|
|||||||
void InitFromMPICommunicator(const std::vector<int> &processors, MPI_Comm communicator_base);
|
void InitFromMPICommunicator(const std::vector<int> &processors, MPI_Comm communicator_base);
|
||||||
#endif
|
#endif
|
||||||
public:
|
public:
|
||||||
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////
|
||||||
// Wraps MPI_Cart routines, or implements equivalent on other impls
|
// Wraps MPI_Cart routines, or implements equivalent on other impls
|
||||||
|
@ -830,6 +830,9 @@ CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
|
|||||||
MPI_Cart_coords(communicator,_processor,_ndimension,&_processor_coor[0]);
|
MPI_Cart_coords(communicator,_processor,_ndimension,&_processor_coor[0]);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
CartesianCommunicator::~CartesianCommunicator() = default;
|
||||||
|
|
||||||
|
|
||||||
void CartesianCommunicator::GlobalSum(uint32_t &u){
|
void CartesianCommunicator::GlobalSum(uint32_t &u){
|
||||||
int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_SUM,communicator);
|
int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_SUM,communicator);
|
||||||
assert(ierr==0);
|
assert(ierr==0);
|
||||||
|
@ -105,6 +105,9 @@ CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
|
|||||||
assert(Size==_Nprocessors);
|
assert(Size==_Nprocessors);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
CartesianCommunicator::~CartesianCommunicator() = default;
|
||||||
|
|
||||||
|
|
||||||
void CartesianCommunicator::GlobalSum(uint32_t &u){
|
void CartesianCommunicator::GlobalSum(uint32_t &u){
|
||||||
static long long source ;
|
static long long source ;
|
||||||
static long long dest ;
|
static long long dest ;
|
||||||
|
@ -16,12 +16,12 @@ class ScalarImplTypes {
|
|||||||
typedef iImplField<Simd> SiteField;
|
typedef iImplField<Simd> SiteField;
|
||||||
typedef SiteField SitePropagator;
|
typedef SiteField SitePropagator;
|
||||||
typedef SiteField SiteComplex;
|
typedef SiteField SiteComplex;
|
||||||
|
|
||||||
typedef Lattice<SiteField> Field;
|
typedef Lattice<SiteField> Field;
|
||||||
typedef Field ComplexField;
|
typedef Field ComplexField;
|
||||||
typedef Field FermionField;
|
typedef Field FermionField;
|
||||||
typedef Field PropagatorField;
|
typedef Field PropagatorField;
|
||||||
|
|
||||||
static inline void generate_momenta(Field& P, GridParallelRNG& pRNG){
|
static inline void generate_momenta(Field& P, GridParallelRNG& pRNG){
|
||||||
gaussian(pRNG, P);
|
gaussian(pRNG, P);
|
||||||
}
|
}
|
||||||
@ -47,54 +47,60 @@ class ScalarImplTypes {
|
|||||||
static inline void ColdConfiguration(GridParallelRNG &pRNG, Field &U) {
|
static inline void ColdConfiguration(GridParallelRNG &pRNG, Field &U) {
|
||||||
U = 1.0;
|
U = 1.0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void MomentumSpacePropagator(Field &out, RealD m)
|
static void MomentumSpacePropagator(Field &out, RealD m)
|
||||||
{
|
{
|
||||||
GridBase *grid = out._grid;
|
GridBase *grid = out._grid;
|
||||||
Field kmu(grid), one(grid);
|
Field kmu(grid), one(grid);
|
||||||
const unsigned int nd = grid->_ndimension;
|
const unsigned int nd = grid->_ndimension;
|
||||||
std::vector<int> &l = grid->_fdimensions;
|
std::vector<int> &l = grid->_fdimensions;
|
||||||
|
|
||||||
one = Complex(1.0,0.0);
|
one = Complex(1.0,0.0);
|
||||||
out = m*m;
|
out = m*m;
|
||||||
for(int mu = 0; mu < nd; mu++)
|
for(int mu = 0; mu < nd; mu++)
|
||||||
{
|
{
|
||||||
Real twoPiL = M_PI*2./l[mu];
|
Real twoPiL = M_PI*2./l[mu];
|
||||||
|
|
||||||
LatticeCoordinate(kmu,mu);
|
LatticeCoordinate(kmu,mu);
|
||||||
kmu = 2.*sin(.5*twoPiL*kmu);
|
kmu = 2.*sin(.5*twoPiL*kmu);
|
||||||
out = out + kmu*kmu;
|
out = out + kmu*kmu;
|
||||||
}
|
}
|
||||||
out = one/out;
|
out = one/out;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void FreePropagator(const Field &in, Field &out,
|
static void FreePropagator(const Field &in, Field &out,
|
||||||
const Field &momKernel)
|
const Field &momKernel)
|
||||||
{
|
{
|
||||||
FFT fft((GridCartesian *)in._grid);
|
FFT fft((GridCartesian *)in._grid);
|
||||||
Field inFT(in._grid);
|
Field inFT(in._grid);
|
||||||
|
|
||||||
fft.FFT_all_dim(inFT, in, FFT::forward);
|
fft.FFT_all_dim(inFT, in, FFT::forward);
|
||||||
inFT = inFT*momKernel;
|
inFT = inFT*momKernel;
|
||||||
fft.FFT_all_dim(out, inFT, FFT::backward);
|
fft.FFT_all_dim(out, inFT, FFT::backward);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void FreePropagator(const Field &in, Field &out, RealD m)
|
static void FreePropagator(const Field &in, Field &out, RealD m)
|
||||||
{
|
{
|
||||||
Field momKernel(in._grid);
|
Field momKernel(in._grid);
|
||||||
|
|
||||||
MomentumSpacePropagator(momKernel, m);
|
MomentumSpacePropagator(momKernel, m);
|
||||||
FreePropagator(in, out, momKernel);
|
FreePropagator(in, out, momKernel);
|
||||||
}
|
}
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#ifdef USE_FFT_ACCELERATION
|
||||||
|
#ifndef FFT_MASS
|
||||||
|
#error "USE_FFT_ACCELERATION is defined but not FFT_MASS"
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
template <class S, unsigned int N>
|
template <class S, unsigned int N>
|
||||||
class ScalarAdjMatrixImplTypes {
|
class ScalarAdjMatrixImplTypes {
|
||||||
public:
|
public:
|
||||||
typedef S Simd;
|
typedef S Simd;
|
||||||
typedef QCD::SU<N> Group;
|
typedef QCD::SU<N> Group;
|
||||||
|
|
||||||
template <typename vtype>
|
template <typename vtype>
|
||||||
using iImplField = iScalar<iScalar<iMatrix<vtype, N>>>;
|
using iImplField = iScalar<iScalar<iMatrix<vtype, N>>>;
|
||||||
template <typename vtype>
|
template <typename vtype>
|
||||||
@ -103,24 +109,119 @@ class ScalarImplTypes {
|
|||||||
typedef iImplField<Simd> SiteField;
|
typedef iImplField<Simd> SiteField;
|
||||||
typedef SiteField SitePropagator;
|
typedef SiteField SitePropagator;
|
||||||
typedef iImplComplex<Simd> SiteComplex;
|
typedef iImplComplex<Simd> SiteComplex;
|
||||||
|
|
||||||
typedef Lattice<SiteField> Field;
|
typedef Lattice<SiteField> Field;
|
||||||
typedef Lattice<SiteComplex> ComplexField;
|
typedef Lattice<SiteComplex> ComplexField;
|
||||||
typedef Field FermionField;
|
typedef Field FermionField;
|
||||||
typedef Field PropagatorField;
|
typedef Field PropagatorField;
|
||||||
|
|
||||||
static inline void generate_momenta(Field& P, GridParallelRNG& pRNG) {
|
static void MomentaSquare(ComplexField &out)
|
||||||
|
{
|
||||||
|
GridBase *grid = out._grid;
|
||||||
|
const std::vector<int> &l = grid->FullDimensions();
|
||||||
|
ComplexField kmu(grid);
|
||||||
|
|
||||||
|
for (int mu = 0; mu < grid->Nd(); mu++)
|
||||||
|
{
|
||||||
|
Real twoPiL = M_PI * 2.0 / l[mu];
|
||||||
|
LatticeCoordinate(kmu, mu);
|
||||||
|
kmu = 2.0 * sin(0.5 * twoPiL * kmu);
|
||||||
|
out += kmu * kmu;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void MomentumSpacePropagator(ComplexField &out, RealD m)
|
||||||
|
{
|
||||||
|
GridBase *grid = out._grid;
|
||||||
|
ComplexField one(grid);
|
||||||
|
one = Complex(1.0, 0.0);
|
||||||
|
out = m * m;
|
||||||
|
MomentaSquare(out);
|
||||||
|
out = one / out;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void generate_momenta(Field &P, GridParallelRNG &pRNG)
|
||||||
|
{
|
||||||
|
#ifndef USE_FFT_ACCELERATION
|
||||||
Group::GaussianFundamentalLieAlgebraMatrix(pRNG, P);
|
Group::GaussianFundamentalLieAlgebraMatrix(pRNG, P);
|
||||||
|
#else
|
||||||
|
|
||||||
|
Field Pgaussian(P._grid), Pp(P._grid);
|
||||||
|
ComplexField p2(P._grid); p2 = zero;
|
||||||
|
RealD M = FFT_MASS;
|
||||||
|
|
||||||
|
Group::GaussianFundamentalLieAlgebraMatrix(pRNG, Pgaussian);
|
||||||
|
|
||||||
|
FFT theFFT((GridCartesian*)P._grid);
|
||||||
|
theFFT.FFT_all_dim(Pp, Pgaussian, FFT::forward);
|
||||||
|
MomentaSquare(p2);
|
||||||
|
p2 += M * M;
|
||||||
|
p2 = sqrt(p2);
|
||||||
|
Pp *= p2;
|
||||||
|
theFFT.FFT_all_dim(P, Pp, FFT::backward);
|
||||||
|
|
||||||
|
#endif //USE_FFT_ACCELERATION
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline Field projectForce(Field& P) {return P;}
|
static inline Field projectForce(Field& P) {return P;}
|
||||||
|
|
||||||
static inline void update_field(Field& P, Field& U, double ep) {
|
static inline void update_field(Field &P, Field &U, double ep)
|
||||||
U += P*ep;
|
{
|
||||||
|
#ifndef USE_FFT_ACCELERATION
|
||||||
|
double t0=usecond();
|
||||||
|
U += P * ep;
|
||||||
|
double t1=usecond();
|
||||||
|
double total_time = (t1-t0)/1e6;
|
||||||
|
std::cout << GridLogIntegrator << "Total time for updating field (s) : " << total_time << std::endl;
|
||||||
|
#else
|
||||||
|
// FFT transform P(x) -> P(p)
|
||||||
|
// divide by (M^2+p^2) M external parameter (how to pass?)
|
||||||
|
// P'(p) = P(p)/(M^2+p^2)
|
||||||
|
// Transform back -> P'(x)
|
||||||
|
// U += P'(x)*ep
|
||||||
|
|
||||||
|
Field Pp(U._grid), P_FFT(U._grid);
|
||||||
|
static ComplexField p2(U._grid);
|
||||||
|
RealD M = FFT_MASS;
|
||||||
|
|
||||||
|
FFT theFFT((GridCartesian*)U._grid);
|
||||||
|
theFFT.FFT_all_dim(Pp, P, FFT::forward);
|
||||||
|
|
||||||
|
static bool first_call = true;
|
||||||
|
if (first_call)
|
||||||
|
{
|
||||||
|
// avoid recomputing
|
||||||
|
MomentumSpacePropagator(p2, M);
|
||||||
|
first_call = false;
|
||||||
|
}
|
||||||
|
Pp *= p2;
|
||||||
|
theFFT.FFT_all_dim(P_FFT, Pp, FFT::backward);
|
||||||
|
U += P_FFT * ep;
|
||||||
|
|
||||||
|
#endif //USE_FFT_ACCELERATION
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline RealD FieldSquareNorm(Field& U) {
|
static inline RealD FieldSquareNorm(Field &U)
|
||||||
return (TensorRemove(sum(trace(U*U))).real());
|
{
|
||||||
|
#ifndef USE_FFT_ACCELERATION
|
||||||
|
return (TensorRemove(sum(trace(U * U))).real());
|
||||||
|
#else
|
||||||
|
// In case of Fourier acceleration we have to:
|
||||||
|
// compute U(p)*U(p)/(M^2+p^2)) Parseval theorem
|
||||||
|
// 1 FFT needed U(x) -> U(p)
|
||||||
|
// M to be passed
|
||||||
|
|
||||||
|
FFT theFFT((GridCartesian*)U._grid);
|
||||||
|
Field Up(U._grid);
|
||||||
|
|
||||||
|
theFFT.FFT_all_dim(Up, U, FFT::forward);
|
||||||
|
RealD M = FFT_MASS;
|
||||||
|
ComplexField p2(U._grid);
|
||||||
|
MomentumSpacePropagator(p2, M);
|
||||||
|
Field Up2 = Up * p2;
|
||||||
|
// from the definition of the DFT we need to divide by the volume
|
||||||
|
return (-TensorRemove(sum(trace(adj(Up) * Up2))).real() / U._grid->gSites());
|
||||||
|
#endif //USE_FFT_ACCELERATION
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void HotConfiguration(GridParallelRNG &pRNG, Field &U) {
|
static inline void HotConfiguration(GridParallelRNG &pRNG, Field &U) {
|
||||||
@ -146,7 +247,7 @@ class ScalarImplTypes {
|
|||||||
typedef ScalarImplTypes<vComplex> ScalarImplCR;
|
typedef ScalarImplTypes<vComplex> ScalarImplCR;
|
||||||
typedef ScalarImplTypes<vComplexF> ScalarImplCF;
|
typedef ScalarImplTypes<vComplexF> ScalarImplCF;
|
||||||
typedef ScalarImplTypes<vComplexD> ScalarImplCD;
|
typedef ScalarImplTypes<vComplexD> ScalarImplCD;
|
||||||
|
|
||||||
// Hardcoding here the size of the matrices
|
// Hardcoding here the size of the matrices
|
||||||
typedef ScalarAdjMatrixImplTypes<vComplex, QCD::Nc> ScalarAdjImplR;
|
typedef ScalarAdjMatrixImplTypes<vComplex, QCD::Nc> ScalarAdjImplR;
|
||||||
typedef ScalarAdjMatrixImplTypes<vComplexF, QCD::Nc> ScalarAdjImplF;
|
typedef ScalarAdjMatrixImplTypes<vComplexF, QCD::Nc> ScalarAdjImplF;
|
||||||
@ -155,7 +256,7 @@ class ScalarImplTypes {
|
|||||||
template <int Colours > using ScalarNxNAdjImplR = ScalarAdjMatrixImplTypes<vComplex, Colours >;
|
template <int Colours > using ScalarNxNAdjImplR = ScalarAdjMatrixImplTypes<vComplex, Colours >;
|
||||||
template <int Colours > using ScalarNxNAdjImplF = ScalarAdjMatrixImplTypes<vComplexF, Colours >;
|
template <int Colours > using ScalarNxNAdjImplF = ScalarAdjMatrixImplTypes<vComplexF, Colours >;
|
||||||
template <int Colours > using ScalarNxNAdjImplD = ScalarAdjMatrixImplTypes<vComplexD, Colours >;
|
template <int Colours > using ScalarNxNAdjImplD = ScalarAdjMatrixImplTypes<vComplexD, Colours >;
|
||||||
|
|
||||||
//}
|
//}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -30,119 +30,179 @@ directory
|
|||||||
#ifndef SCALAR_INT_ACTION_H
|
#ifndef SCALAR_INT_ACTION_H
|
||||||
#define SCALAR_INT_ACTION_H
|
#define SCALAR_INT_ACTION_H
|
||||||
|
|
||||||
|
|
||||||
// Note: this action can completely absorb the ScalarAction for real float fields
|
// Note: this action can completely absorb the ScalarAction for real float fields
|
||||||
// use the scalarObjs to generalise the structure
|
// use the scalarObjs to generalise the structure
|
||||||
|
|
||||||
namespace Grid {
|
namespace Grid
|
||||||
// FIXME drop the QCD namespace everywhere here
|
{
|
||||||
|
// FIXME drop the QCD namespace everywhere here
|
||||||
|
|
||||||
template <class Impl, int Ndim >
|
template <class Impl, int Ndim>
|
||||||
class ScalarInteractionAction : public QCD::Action<typename Impl::Field> {
|
class ScalarInteractionAction : public QCD::Action<typename Impl::Field>
|
||||||
public:
|
{
|
||||||
INHERIT_FIELD_TYPES(Impl);
|
public:
|
||||||
private:
|
INHERIT_FIELD_TYPES(Impl);
|
||||||
RealD mass_square;
|
|
||||||
RealD lambda;
|
|
||||||
|
|
||||||
|
private:
|
||||||
|
RealD mass_square;
|
||||||
|
RealD lambda;
|
||||||
|
RealD g;
|
||||||
|
const unsigned int N = Impl::Group::Dimension;
|
||||||
|
|
||||||
typedef typename Field::vector_object vobj;
|
typedef typename Field::vector_object vobj;
|
||||||
typedef CartesianStencil<vobj,vobj> Stencil;
|
typedef CartesianStencil<vobj, vobj> Stencil;
|
||||||
|
|
||||||
SimpleCompressor<vobj> compressor;
|
SimpleCompressor<vobj> compressor;
|
||||||
int npoint = 2*Ndim;
|
int npoint = 2 * Ndim;
|
||||||
std::vector<int> directions;// = {0,1,2,3,0,1,2,3}; // forcing 4 dimensions
|
std::vector<int> directions; //
|
||||||
std::vector<int> displacements;// = {1,1,1,1, -1,-1,-1,-1};
|
std::vector<int> displacements; //
|
||||||
|
|
||||||
|
public:
|
||||||
public:
|
ScalarInteractionAction(RealD ms, RealD l, RealD gval) : mass_square(ms), lambda(l), g(gval), displacements(2 * Ndim, 0), directions(2 * Ndim, 0)
|
||||||
|
{
|
||||||
ScalarInteractionAction(RealD ms, RealD l) : mass_square(ms), lambda(l), displacements(2*Ndim,0), directions(2*Ndim,0){
|
for (int mu = 0; mu < Ndim; mu++)
|
||||||
for (int mu = 0 ; mu < Ndim; mu++){
|
{
|
||||||
directions[mu] = mu; directions[mu+Ndim] = mu;
|
directions[mu] = mu;
|
||||||
displacements[mu] = 1; displacements[mu+Ndim] = -1;
|
directions[mu + Ndim] = mu;
|
||||||
}
|
displacements[mu] = 1;
|
||||||
|
displacements[mu + Ndim] = -1;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
virtual std::string LogParameters() {
|
virtual std::string LogParameters()
|
||||||
std::stringstream sstream;
|
{
|
||||||
sstream << GridLogMessage << "[ScalarAction] lambda : " << lambda << std::endl;
|
std::stringstream sstream;
|
||||||
sstream << GridLogMessage << "[ScalarAction] mass_square : " << mass_square << std::endl;
|
sstream << GridLogMessage << "[ScalarAction] lambda : " << lambda << std::endl;
|
||||||
return sstream.str();
|
sstream << GridLogMessage << "[ScalarAction] mass_square : " << mass_square << std::endl;
|
||||||
}
|
sstream << GridLogMessage << "[ScalarAction] g : " << g << std::endl;
|
||||||
|
return sstream.str();
|
||||||
|
}
|
||||||
|
|
||||||
virtual std::string action_name() {return "ScalarAction";}
|
virtual std::string action_name() { return "ScalarAction"; }
|
||||||
|
|
||||||
virtual void refresh(const Field &U, GridParallelRNG &pRNG) {}
|
virtual void refresh(const Field &U, GridParallelRNG &pRNG) {}
|
||||||
|
|
||||||
virtual RealD S(const Field &p) {
|
virtual RealD S(const Field &p)
|
||||||
assert(p._grid->Nd() == Ndim);
|
{
|
||||||
static Stencil phiStencil(p._grid, npoint, 0, directions, displacements);
|
assert(p._grid->Nd() == Ndim);
|
||||||
phiStencil.HaloExchange(p, compressor);
|
static Stencil phiStencil(p._grid, npoint, 0, directions, displacements);
|
||||||
Field action(p._grid), pshift(p._grid), phisquared(p._grid);
|
phiStencil.HaloExchange(p, compressor);
|
||||||
phisquared = p*p;
|
Field action(p._grid), pshift(p._grid), phisquared(p._grid);
|
||||||
action = (2.0*Ndim + mass_square)*phisquared - lambda/24.*phisquared*phisquared;
|
phisquared = p * p;
|
||||||
for (int mu = 0; mu < Ndim; mu++) {
|
action = (2.0 * Ndim + mass_square) * phisquared - lambda * phisquared * phisquared;
|
||||||
// pshift = Cshift(p, mu, +1); // not efficient, implement with stencils
|
for (int mu = 0; mu < Ndim; mu++)
|
||||||
parallel_for (int i = 0; i < p._grid->oSites(); i++) {
|
{
|
||||||
int permute_type;
|
// pshift = Cshift(p, mu, +1); // not efficient, implement with stencils
|
||||||
StencilEntry *SE;
|
parallel_for(int i = 0; i < p._grid->oSites(); i++)
|
||||||
vobj temp2;
|
{
|
||||||
const vobj *temp, *t_p;
|
int permute_type;
|
||||||
|
StencilEntry *SE;
|
||||||
SE = phiStencil.GetEntry(permute_type, mu, i);
|
vobj temp2;
|
||||||
t_p = &p._odata[i];
|
const vobj *temp, *t_p;
|
||||||
if ( SE->_is_local ) {
|
|
||||||
temp = &p._odata[SE->_offset];
|
SE = phiStencil.GetEntry(permute_type, mu, i);
|
||||||
if ( SE->_permute ) {
|
t_p = &p._odata[i];
|
||||||
permute(temp2, *temp, permute_type);
|
if (SE->_is_local)
|
||||||
action._odata[i] -= temp2*(*t_p) + (*t_p)*temp2;
|
{
|
||||||
} else {
|
temp = &p._odata[SE->_offset];
|
||||||
action._odata[i] -= (*temp)*(*t_p) + (*t_p)*(*temp);
|
if (SE->_permute)
|
||||||
}
|
{
|
||||||
} else {
|
permute(temp2, *temp, permute_type);
|
||||||
action._odata[i] -= phiStencil.CommBuf()[SE->_offset]*(*t_p) + (*t_p)*phiStencil.CommBuf()[SE->_offset];
|
action._odata[i] -= temp2 * (*t_p) + (*t_p) * temp2;
|
||||||
}
|
}
|
||||||
}
|
else
|
||||||
// action -= pshift*p + p*pshift;
|
{
|
||||||
}
|
action._odata[i] -= (*temp) * (*t_p) + (*t_p) * (*temp);
|
||||||
// NB the trace in the algebra is normalised to 1/2
|
}
|
||||||
// minus sign coming from the antihermitian fields
|
}
|
||||||
return -(TensorRemove(sum(trace(action)))).real();
|
else
|
||||||
};
|
{
|
||||||
|
action._odata[i] -= phiStencil.CommBuf()[SE->_offset] * (*t_p) + (*t_p) * phiStencil.CommBuf()[SE->_offset];
|
||||||
virtual void deriv(const Field &p, Field &force) {
|
}
|
||||||
assert(p._grid->Nd() == Ndim);
|
|
||||||
force = (2.0*Ndim + mass_square)*p - lambda/12.*p*p*p;
|
|
||||||
// move this outside
|
|
||||||
static Stencil phiStencil(p._grid, npoint, 0, directions, displacements);
|
|
||||||
phiStencil.HaloExchange(p, compressor);
|
|
||||||
|
|
||||||
//for (int mu = 0; mu < QCD::Nd; mu++) force -= Cshift(p, mu, -1) + Cshift(p, mu, 1);
|
|
||||||
for (int point = 0; point < npoint; point++) {
|
|
||||||
parallel_for (int i = 0; i < p._grid->oSites(); i++) {
|
|
||||||
const vobj *temp;
|
|
||||||
vobj temp2;
|
|
||||||
int permute_type;
|
|
||||||
StencilEntry *SE;
|
|
||||||
SE = phiStencil.GetEntry(permute_type, point, i);
|
|
||||||
|
|
||||||
if ( SE->_is_local ) {
|
|
||||||
temp = &p._odata[SE->_offset];
|
|
||||||
if ( SE->_permute ) {
|
|
||||||
permute(temp2, *temp, permute_type);
|
|
||||||
force._odata[i] -= temp2;
|
|
||||||
} else {
|
|
||||||
force._odata[i] -= *temp;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
force._odata[i] -= phiStencil.CommBuf()[SE->_offset];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
// action -= pshift*p + p*pshift;
|
||||||
}
|
}
|
||||||
|
// NB the trace in the algebra is normalised to 1/2
|
||||||
|
// minus sign coming from the antihermitian fields
|
||||||
|
return -(TensorRemove(sum(trace(action)))).real() * N / g;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace Grid
|
|
||||||
|
|
||||||
#endif // SCALAR_INT_ACTION_H
|
virtual void deriv(const Field &p, Field &force)
|
||||||
|
{
|
||||||
|
double t0 = usecond();
|
||||||
|
assert(p._grid->Nd() == Ndim);
|
||||||
|
force = (2. * Ndim + mass_square) * p - 2. * lambda * p * p * p;
|
||||||
|
double interm_t = usecond();
|
||||||
|
|
||||||
|
// move this outside
|
||||||
|
static Stencil phiStencil(p._grid, npoint, 0, directions, displacements);
|
||||||
|
|
||||||
|
phiStencil.HaloExchange(p, compressor);
|
||||||
|
double halo_t = usecond();
|
||||||
|
int chunk = 128;
|
||||||
|
//for (int mu = 0; mu < QCD::Nd; mu++) force -= Cshift(p, mu, -1) + Cshift(p, mu, 1);
|
||||||
|
|
||||||
|
// inverting the order of the loops slows down the code(! g++ 7)
|
||||||
|
// cannot try to reduce the number of force writes by factor npoint...
|
||||||
|
// use cache blocking
|
||||||
|
for (int point = 0; point < npoint; point++)
|
||||||
|
{
|
||||||
|
|
||||||
|
#pragma omp parallel
|
||||||
|
{
|
||||||
|
int permute_type;
|
||||||
|
StencilEntry *SE;
|
||||||
|
const vobj *temp;
|
||||||
|
|
||||||
|
#pragma omp for schedule(static, chunk)
|
||||||
|
for (int i = 0; i < p._grid->oSites(); i++)
|
||||||
|
{
|
||||||
|
SE = phiStencil.GetEntry(permute_type, point, i);
|
||||||
|
// prefetch next p?
|
||||||
|
|
||||||
|
if (SE->_is_local)
|
||||||
|
{
|
||||||
|
temp = &p._odata[SE->_offset];
|
||||||
|
|
||||||
|
if (SE->_permute)
|
||||||
|
{
|
||||||
|
vobj temp2;
|
||||||
|
permute(temp2, *temp, permute_type);
|
||||||
|
force._odata[i] -= temp2;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
force._odata[i] -= *temp; // slow part. Dominated by this read/write (BW)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
force._odata[i] -= phiStencil.CommBuf()[SE->_offset];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
force *= N / g;
|
||||||
|
|
||||||
|
double t1 = usecond();
|
||||||
|
double total_time = (t1 - t0) / 1e6;
|
||||||
|
double interm_time = (interm_t - t0) / 1e6;
|
||||||
|
double halo_time = (halo_t - interm_t) / 1e6;
|
||||||
|
double stencil_time = (t1 - halo_t) / 1e6;
|
||||||
|
std::cout << GridLogIntegrator << "Total time for force computation (s) : " << total_time << std::endl;
|
||||||
|
std::cout << GridLogIntegrator << "Intermediate time for force computation (s): " << interm_time << std::endl;
|
||||||
|
std::cout << GridLogIntegrator << "Halo time in force computation (s) : " << halo_time << std::endl;
|
||||||
|
std::cout << GridLogIntegrator << "Stencil time in force computation (s) : " << stencil_time << std::endl;
|
||||||
|
double flops = p._grid->gSites() * (14 * N * N * N + 18 * N * N + 2);
|
||||||
|
double flops_no_stencil = p._grid->gSites() * (14 * N * N * N + 6 * N * N + 2);
|
||||||
|
double Gflops = flops / (total_time * 1e9);
|
||||||
|
double Gflops_no_stencil = flops_no_stencil / (interm_time * 1e9);
|
||||||
|
std::cout << GridLogIntegrator << "Flops: " << flops << " - Gflop/s : " << Gflops << std::endl;
|
||||||
|
std::cout << GridLogIntegrator << "Flops NS: " << flops_no_stencil << " - Gflop/s NS: " << Gflops_no_stencil << std::endl;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace Grid
|
||||||
|
|
||||||
|
#endif // SCALAR_INT_ACTION_H
|
||||||
|
@ -211,7 +211,7 @@ typedef HMCWrapperTemplate<ScalarAdjImplR, MinimumNorm2, ScalarMatrixFields>
|
|||||||
ScalarAdjGenericHMCRunner;
|
ScalarAdjGenericHMCRunner;
|
||||||
|
|
||||||
template <int Colours>
|
template <int Colours>
|
||||||
using ScalarNxNAdjGenericHMCRunner = HMCWrapperTemplate < ScalarNxNAdjImplR<Colours>, MinimumNorm2, ScalarNxNMatrixFields<Colours> >;
|
using ScalarNxNAdjGenericHMCRunner = HMCWrapperTemplate < ScalarNxNAdjImplR<Colours>, ForceGradient, ScalarNxNMatrixFields<Colours> >;
|
||||||
|
|
||||||
} // namespace QCD
|
} // namespace QCD
|
||||||
} // namespace Grid
|
} // namespace Grid
|
||||||
|
@ -31,7 +31,8 @@ class ScalarActionParameters : Serializable {
|
|||||||
public:
|
public:
|
||||||
GRID_SERIALIZABLE_CLASS_MEMBERS(ScalarActionParameters,
|
GRID_SERIALIZABLE_CLASS_MEMBERS(ScalarActionParameters,
|
||||||
double, mass_squared,
|
double, mass_squared,
|
||||||
double, lambda);
|
double, lambda,
|
||||||
|
double, g);
|
||||||
|
|
||||||
template <class ReaderClass >
|
template <class ReaderClass >
|
||||||
ScalarActionParameters(Reader<ReaderClass>& Reader){
|
ScalarActionParameters(Reader<ReaderClass>& Reader){
|
||||||
@ -140,7 +141,7 @@ int main(int argc, char **argv) {
|
|||||||
|
|
||||||
// Scalar action in adjoint representation
|
// Scalar action in adjoint representation
|
||||||
ScalarActionParameters SPar(Reader);
|
ScalarActionParameters SPar(Reader);
|
||||||
ScalarAction Saction(SPar.mass_squared, SPar.lambda);
|
ScalarAction Saction(SPar.mass_squared, SPar.lambda, SPar.g);
|
||||||
|
|
||||||
// Collect actions
|
// Collect actions
|
||||||
ActionLevel<ScalarAction::Field, ScalarNxNMatrixFields<Ncolours>> Level1(1);
|
ActionLevel<ScalarAction::Field, ScalarNxNMatrixFields<Ncolours>> Level1(1);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user