1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-04-10 06:00:45 +01:00

Merge branch 'feature/scalar_adjointFT' into feature/hadrons

This commit is contained in:
Antonin Portelli 2017-12-26 12:59:33 +01:00
commit 6718fa8c4f
9 changed files with 293 additions and 121 deletions

View File

@ -59,6 +59,7 @@ public:
virtual ~GridBase() = default; virtual ~GridBase() = default;
// Physics Grid information. // Physics Grid information.
std::vector<int> _simd_layout;// Which dimensions get relayed out over simd lanes. std::vector<int> _simd_layout;// Which dimensions get relayed out over simd lanes.
std::vector<int> _fdimensions;// (full) Global dimensions of array prior to cb removal std::vector<int> _fdimensions;// (full) Global dimensions of array prior to cb removal

View File

@ -122,6 +122,7 @@ public:
// Use a reduced simd grid // Use a reduced simd grid
_ldimensions[d] = _gdimensions[d] / _processors[d]; //local dimensions _ldimensions[d] = _gdimensions[d] / _processors[d]; //local dimensions
//std::cout << _ldimensions[d] << " " << _gdimensions[d] << " " << _processors[d] << std::endl;
assert(_ldimensions[d] * _processors[d] == _gdimensions[d]); assert(_ldimensions[d] * _processors[d] == _gdimensions[d]);
_rdimensions[d] = _ldimensions[d] / _simd_layout[d]; //overdecomposition _rdimensions[d] = _ldimensions[d] / _simd_layout[d]; //overdecomposition
@ -166,6 +167,7 @@ public:
block = block * _rdimensions[d]; block = block * _rdimensions[d];
} }
}; };
}; };
} }
#endif #endif

View File

@ -167,6 +167,7 @@ class CartesianCommunicator {
#endif #endif
public: public:
//////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////
// Wraps MPI_Cart routines, or implements equivalent on other impls // Wraps MPI_Cart routines, or implements equivalent on other impls
//////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////

View File

@ -830,6 +830,9 @@ CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
MPI_Cart_coords(communicator,_processor,_ndimension,&_processor_coor[0]); MPI_Cart_coords(communicator,_processor,_ndimension,&_processor_coor[0]);
}; };
CartesianCommunicator::~CartesianCommunicator() = default;
void CartesianCommunicator::GlobalSum(uint32_t &u){ void CartesianCommunicator::GlobalSum(uint32_t &u){
int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_SUM,communicator); int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_SUM,communicator);
assert(ierr==0); assert(ierr==0);

View File

@ -105,6 +105,9 @@ CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
assert(Size==_Nprocessors); assert(Size==_Nprocessors);
} }
CartesianCommunicator::~CartesianCommunicator() = default;
void CartesianCommunicator::GlobalSum(uint32_t &u){ void CartesianCommunicator::GlobalSum(uint32_t &u){
static long long source ; static long long source ;
static long long dest ; static long long dest ;

View File

@ -89,6 +89,12 @@ class ScalarImplTypes {
}; };
#ifdef USE_FFT_ACCELERATION
#ifndef FFT_MASS
#error "USE_FFT_ACCELERATION is defined but not FFT_MASS"
#endif
#endif
template <class S, unsigned int N> template <class S, unsigned int N>
class ScalarAdjMatrixImplTypes { class ScalarAdjMatrixImplTypes {
public: public:
@ -109,18 +115,113 @@ class ScalarImplTypes {
typedef Field FermionField; typedef Field FermionField;
typedef Field PropagatorField; typedef Field PropagatorField;
static inline void generate_momenta(Field& P, GridParallelRNG& pRNG) { static void MomentaSquare(ComplexField &out)
{
GridBase *grid = out._grid;
const std::vector<int> &l = grid->FullDimensions();
ComplexField kmu(grid);
for (int mu = 0; mu < grid->Nd(); mu++)
{
Real twoPiL = M_PI * 2.0 / l[mu];
LatticeCoordinate(kmu, mu);
kmu = 2.0 * sin(0.5 * twoPiL * kmu);
out += kmu * kmu;
}
}
static void MomentumSpacePropagator(ComplexField &out, RealD m)
{
GridBase *grid = out._grid;
ComplexField one(grid);
one = Complex(1.0, 0.0);
out = m * m;
MomentaSquare(out);
out = one / out;
}
static inline void generate_momenta(Field &P, GridParallelRNG &pRNG)
{
#ifndef USE_FFT_ACCELERATION
Group::GaussianFundamentalLieAlgebraMatrix(pRNG, P); Group::GaussianFundamentalLieAlgebraMatrix(pRNG, P);
#else
Field Pgaussian(P._grid), Pp(P._grid);
ComplexField p2(P._grid); p2 = zero;
RealD M = FFT_MASS;
Group::GaussianFundamentalLieAlgebraMatrix(pRNG, Pgaussian);
FFT theFFT((GridCartesian*)P._grid);
theFFT.FFT_all_dim(Pp, Pgaussian, FFT::forward);
MomentaSquare(p2);
p2 += M * M;
p2 = sqrt(p2);
Pp *= p2;
theFFT.FFT_all_dim(P, Pp, FFT::backward);
#endif //USE_FFT_ACCELERATION
} }
static inline Field projectForce(Field& P) {return P;} static inline Field projectForce(Field& P) {return P;}
static inline void update_field(Field& P, Field& U, double ep) { static inline void update_field(Field &P, Field &U, double ep)
{
#ifndef USE_FFT_ACCELERATION
double t0=usecond();
U += P * ep; U += P * ep;
double t1=usecond();
double total_time = (t1-t0)/1e6;
std::cout << GridLogIntegrator << "Total time for updating field (s) : " << total_time << std::endl;
#else
// FFT transform P(x) -> P(p)
// divide by (M^2+p^2) M external parameter (how to pass?)
// P'(p) = P(p)/(M^2+p^2)
// Transform back -> P'(x)
// U += P'(x)*ep
Field Pp(U._grid), P_FFT(U._grid);
static ComplexField p2(U._grid);
RealD M = FFT_MASS;
FFT theFFT((GridCartesian*)U._grid);
theFFT.FFT_all_dim(Pp, P, FFT::forward);
static bool first_call = true;
if (first_call)
{
// avoid recomputing
MomentumSpacePropagator(p2, M);
first_call = false;
}
Pp *= p2;
theFFT.FFT_all_dim(P_FFT, Pp, FFT::backward);
U += P_FFT * ep;
#endif //USE_FFT_ACCELERATION
} }
static inline RealD FieldSquareNorm(Field& U) { static inline RealD FieldSquareNorm(Field &U)
{
#ifndef USE_FFT_ACCELERATION
return (TensorRemove(sum(trace(U * U))).real()); return (TensorRemove(sum(trace(U * U))).real());
#else
// In case of Fourier acceleration we have to:
// compute U(p)*U(p)/(M^2+p^2)) Parseval theorem
// 1 FFT needed U(x) -> U(p)
// M to be passed
FFT theFFT((GridCartesian*)U._grid);
Field Up(U._grid);
theFFT.FFT_all_dim(Up, U, FFT::forward);
RealD M = FFT_MASS;
ComplexField p2(U._grid);
MomentumSpacePropagator(p2, M);
Field Up2 = Up * p2;
// from the definition of the DFT we need to divide by the volume
return (-TensorRemove(sum(trace(adj(Up) * Up2))).real() / U._grid->gSites());
#endif //USE_FFT_ACCELERATION
} }
static inline void HotConfiguration(GridParallelRNG &pRNG, Field &U) { static inline void HotConfiguration(GridParallelRNG &pRNG, Field &U) {

View File

@ -30,44 +30,51 @@ directory
#ifndef SCALAR_INT_ACTION_H #ifndef SCALAR_INT_ACTION_H
#define SCALAR_INT_ACTION_H #define SCALAR_INT_ACTION_H
// Note: this action can completely absorb the ScalarAction for real float fields // Note: this action can completely absorb the ScalarAction for real float fields
// use the scalarObjs to generalise the structure // use the scalarObjs to generalise the structure
namespace Grid { namespace Grid
{
// FIXME drop the QCD namespace everywhere here // FIXME drop the QCD namespace everywhere here
template <class Impl, int Ndim> template <class Impl, int Ndim>
class ScalarInteractionAction : public QCD::Action<typename Impl::Field> { class ScalarInteractionAction : public QCD::Action<typename Impl::Field>
{
public: public:
INHERIT_FIELD_TYPES(Impl); INHERIT_FIELD_TYPES(Impl);
private: private:
RealD mass_square; RealD mass_square;
RealD lambda; RealD lambda;
RealD g;
const unsigned int N = Impl::Group::Dimension;
typedef typename Field::vector_object vobj; typedef typename Field::vector_object vobj;
typedef CartesianStencil<vobj, vobj> Stencil; typedef CartesianStencil<vobj, vobj> Stencil;
SimpleCompressor<vobj> compressor; SimpleCompressor<vobj> compressor;
int npoint = 2 * Ndim; int npoint = 2 * Ndim;
std::vector<int> directions;// = {0,1,2,3,0,1,2,3}; // forcing 4 dimensions std::vector<int> directions; //
std::vector<int> displacements;// = {1,1,1,1, -1,-1,-1,-1}; std::vector<int> displacements; //
public: public:
ScalarInteractionAction(RealD ms, RealD l, RealD gval) : mass_square(ms), lambda(l), g(gval), displacements(2 * Ndim, 0), directions(2 * Ndim, 0)
ScalarInteractionAction(RealD ms, RealD l) : mass_square(ms), lambda(l), displacements(2*Ndim,0), directions(2*Ndim,0){ {
for (int mu = 0 ; mu < Ndim; mu++){ for (int mu = 0; mu < Ndim; mu++)
directions[mu] = mu; directions[mu+Ndim] = mu; {
displacements[mu] = 1; displacements[mu+Ndim] = -1; directions[mu] = mu;
directions[mu + Ndim] = mu;
displacements[mu] = 1;
displacements[mu + Ndim] = -1;
} }
} }
virtual std::string LogParameters() { virtual std::string LogParameters()
{
std::stringstream sstream; std::stringstream sstream;
sstream << GridLogMessage << "[ScalarAction] lambda : " << lambda << std::endl; sstream << GridLogMessage << "[ScalarAction] lambda : " << lambda << std::endl;
sstream << GridLogMessage << "[ScalarAction] mass_square : " << mass_square << std::endl; sstream << GridLogMessage << "[ScalarAction] mass_square : " << mass_square << std::endl;
sstream << GridLogMessage << "[ScalarAction] g : " << g << std::endl;
return sstream.str(); return sstream.str();
} }
@ -75,16 +82,19 @@ namespace Grid {
virtual void refresh(const Field &U, GridParallelRNG &pRNG) {} virtual void refresh(const Field &U, GridParallelRNG &pRNG) {}
virtual RealD S(const Field &p) { virtual RealD S(const Field &p)
{
assert(p._grid->Nd() == Ndim); assert(p._grid->Nd() == Ndim);
static Stencil phiStencil(p._grid, npoint, 0, directions, displacements); static Stencil phiStencil(p._grid, npoint, 0, directions, displacements);
phiStencil.HaloExchange(p, compressor); phiStencil.HaloExchange(p, compressor);
Field action(p._grid), pshift(p._grid), phisquared(p._grid); Field action(p._grid), pshift(p._grid), phisquared(p._grid);
phisquared = p * p; phisquared = p * p;
action = (2.0*Ndim + mass_square)*phisquared - lambda/24.*phisquared*phisquared; action = (2.0 * Ndim + mass_square) * phisquared - lambda * phisquared * phisquared;
for (int mu = 0; mu < Ndim; mu++) { for (int mu = 0; mu < Ndim; mu++)
{
// pshift = Cshift(p, mu, +1); // not efficient, implement with stencils // pshift = Cshift(p, mu, +1); // not efficient, implement with stencils
parallel_for (int i = 0; i < p._grid->oSites(); i++) { parallel_for(int i = 0; i < p._grid->oSites(); i++)
{
int permute_type; int permute_type;
StencilEntry *SE; StencilEntry *SE;
vobj temp2; vobj temp2;
@ -92,15 +102,21 @@ namespace Grid {
SE = phiStencil.GetEntry(permute_type, mu, i); SE = phiStencil.GetEntry(permute_type, mu, i);
t_p = &p._odata[i]; t_p = &p._odata[i];
if ( SE->_is_local ) { if (SE->_is_local)
{
temp = &p._odata[SE->_offset]; temp = &p._odata[SE->_offset];
if ( SE->_permute ) { if (SE->_permute)
{
permute(temp2, *temp, permute_type); permute(temp2, *temp, permute_type);
action._odata[i] -= temp2 * (*t_p) + (*t_p) * temp2; action._odata[i] -= temp2 * (*t_p) + (*t_p) * temp2;
} else { }
else
{
action._odata[i] -= (*temp) * (*t_p) + (*t_p) * (*temp); action._odata[i] -= (*temp) * (*t_p) + (*t_p) * (*temp);
} }
} else { }
else
{
action._odata[i] -= phiStencil.CommBuf()[SE->_offset] * (*t_p) + (*t_p) * phiStencil.CommBuf()[SE->_offset]; action._odata[i] -= phiStencil.CommBuf()[SE->_offset] * (*t_p) + (*t_p) * phiStencil.CommBuf()[SE->_offset];
} }
} }
@ -108,39 +124,83 @@ namespace Grid {
} }
// NB the trace in the algebra is normalised to 1/2 // NB the trace in the algebra is normalised to 1/2
// minus sign coming from the antihermitian fields // minus sign coming from the antihermitian fields
return -(TensorRemove(sum(trace(action)))).real(); return -(TensorRemove(sum(trace(action)))).real() * N / g;
}; };
virtual void deriv(const Field &p, Field &force) { virtual void deriv(const Field &p, Field &force)
{
double t0 = usecond();
assert(p._grid->Nd() == Ndim); assert(p._grid->Nd() == Ndim);
force = (2.0*Ndim + mass_square)*p - lambda/12.*p*p*p; force = (2. * Ndim + mass_square) * p - 2. * lambda * p * p * p;
double interm_t = usecond();
// move this outside // move this outside
static Stencil phiStencil(p._grid, npoint, 0, directions, displacements); static Stencil phiStencil(p._grid, npoint, 0, directions, displacements);
phiStencil.HaloExchange(p, compressor);
phiStencil.HaloExchange(p, compressor);
double halo_t = usecond();
int chunk = 128;
//for (int mu = 0; mu < QCD::Nd; mu++) force -= Cshift(p, mu, -1) + Cshift(p, mu, 1); //for (int mu = 0; mu < QCD::Nd; mu++) force -= Cshift(p, mu, -1) + Cshift(p, mu, 1);
for (int point = 0; point < npoint; point++) {
parallel_for (int i = 0; i < p._grid->oSites(); i++) { // inverting the order of the loops slows down the code(! g++ 7)
const vobj *temp; // cannot try to reduce the number of force writes by factor npoint...
vobj temp2; // use cache blocking
for (int point = 0; point < npoint; point++)
{
#pragma omp parallel
{
int permute_type; int permute_type;
StencilEntry *SE; StencilEntry *SE;
SE = phiStencil.GetEntry(permute_type, point, i); const vobj *temp;
if ( SE->_is_local ) { #pragma omp for schedule(static, chunk)
for (int i = 0; i < p._grid->oSites(); i++)
{
SE = phiStencil.GetEntry(permute_type, point, i);
// prefetch next p?
if (SE->_is_local)
{
temp = &p._odata[SE->_offset]; temp = &p._odata[SE->_offset];
if ( SE->_permute ) {
if (SE->_permute)
{
vobj temp2;
permute(temp2, *temp, permute_type); permute(temp2, *temp, permute_type);
force._odata[i] -= temp2; force._odata[i] -= temp2;
} else {
force._odata[i] -= *temp;
} }
} else { else
{
force._odata[i] -= *temp; // slow part. Dominated by this read/write (BW)
}
}
else
{
force._odata[i] -= phiStencil.CommBuf()[SE->_offset]; force._odata[i] -= phiStencil.CommBuf()[SE->_offset];
} }
} }
} }
} }
force *= N / g;
double t1 = usecond();
double total_time = (t1 - t0) / 1e6;
double interm_time = (interm_t - t0) / 1e6;
double halo_time = (halo_t - interm_t) / 1e6;
double stencil_time = (t1 - halo_t) / 1e6;
std::cout << GridLogIntegrator << "Total time for force computation (s) : " << total_time << std::endl;
std::cout << GridLogIntegrator << "Intermediate time for force computation (s): " << interm_time << std::endl;
std::cout << GridLogIntegrator << "Halo time in force computation (s) : " << halo_time << std::endl;
std::cout << GridLogIntegrator << "Stencil time in force computation (s) : " << stencil_time << std::endl;
double flops = p._grid->gSites() * (14 * N * N * N + 18 * N * N + 2);
double flops_no_stencil = p._grid->gSites() * (14 * N * N * N + 6 * N * N + 2);
double Gflops = flops / (total_time * 1e9);
double Gflops_no_stencil = flops_no_stencil / (interm_time * 1e9);
std::cout << GridLogIntegrator << "Flops: " << flops << " - Gflop/s : " << Gflops << std::endl;
std::cout << GridLogIntegrator << "Flops NS: " << flops_no_stencil << " - Gflop/s NS: " << Gflops_no_stencil << std::endl;
}
}; };
} // namespace Grid } // namespace Grid

View File

@ -211,7 +211,7 @@ typedef HMCWrapperTemplate<ScalarAdjImplR, MinimumNorm2, ScalarMatrixFields>
ScalarAdjGenericHMCRunner; ScalarAdjGenericHMCRunner;
template <int Colours> template <int Colours>
using ScalarNxNAdjGenericHMCRunner = HMCWrapperTemplate < ScalarNxNAdjImplR<Colours>, MinimumNorm2, ScalarNxNMatrixFields<Colours> >; using ScalarNxNAdjGenericHMCRunner = HMCWrapperTemplate < ScalarNxNAdjImplR<Colours>, ForceGradient, ScalarNxNMatrixFields<Colours> >;
} // namespace QCD } // namespace QCD
} // namespace Grid } // namespace Grid

View File

@ -31,7 +31,8 @@ class ScalarActionParameters : Serializable {
public: public:
GRID_SERIALIZABLE_CLASS_MEMBERS(ScalarActionParameters, GRID_SERIALIZABLE_CLASS_MEMBERS(ScalarActionParameters,
double, mass_squared, double, mass_squared,
double, lambda); double, lambda,
double, g);
template <class ReaderClass > template <class ReaderClass >
ScalarActionParameters(Reader<ReaderClass>& Reader){ ScalarActionParameters(Reader<ReaderClass>& Reader){
@ -140,7 +141,7 @@ int main(int argc, char **argv) {
// Scalar action in adjoint representation // Scalar action in adjoint representation
ScalarActionParameters SPar(Reader); ScalarActionParameters SPar(Reader);
ScalarAction Saction(SPar.mass_squared, SPar.lambda); ScalarAction Saction(SPar.mass_squared, SPar.lambda, SPar.g);
// Collect actions // Collect actions
ActionLevel<ScalarAction::Field, ScalarNxNMatrixFields<Ncolours>> Level1(1); ActionLevel<ScalarAction::Field, ScalarNxNMatrixFields<Ncolours>> Level1(1);