1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-06-14 22:07:05 +01:00

Merge branch 'develop' into feature/hmc_generalise

This commit is contained in:
Guido Cossu
2017-04-05 14:41:04 +01:00
205 changed files with 27899 additions and 3601 deletions

View File

@ -30,11 +30,19 @@
#define GRID_LATTICE_RNG_H
#include <random>
#ifdef RNG_SITMO
#include <Grid/sitmo_rng/sitmo_prng_engine.hpp>
#endif
#if defined(RNG_SITMO)
#define RNG_FAST_DISCARD
#else
#undef RNG_FAST_DISCARD
#endif
namespace Grid {
//////////////////////////////////////////////////////////////
// Allow the RNG state to be less dense than the fine grid
//////////////////////////////////////////////////////////////
@ -64,16 +72,19 @@ namespace Grid {
multiplicity = multiplicity *fine->_rdimensions[fd] / coarse->_rdimensions[d];
}
return multiplicity;
}
// merge of April 11 2017
//<<<<<<< HEAD
inline int RNGfillable_general(GridBase *coarse,GridBase *fine)
// this function is necessary for the LS vectorised field
inline int RNGfillable_general(GridBase *coarse,GridBase *fine)
{
int rngdims = coarse->_ndimension;
// trivially extended in higher dims, with locality guaranteeing RNG state is local to node
int lowerdims = fine->_ndimension - coarse->_ndimension; assert(lowerdims >= 0);
// assumes that the higher dimensions are not using more processors
@ -92,6 +103,7 @@ namespace Grid {
return fine->lSites() / coarse->lSites();
}
/*
// Wrap seed_seq to give common interface with random_device
class fixedSeed {
public:
@ -108,89 +120,140 @@ namespace Grid {
};
=======
>>>>>>> develop
*/
// real scalars are one component
template<class scalar,class distribution,class generator> void fillScalar(scalar &s,distribution &dist,generator & gen)
template<class scalar,class distribution,class generator>
void fillScalar(scalar &s,distribution &dist,generator & gen)
{
s=dist(gen);
}
template<class distribution,class generator> void fillScalar(ComplexF &s,distribution &dist, generator &gen)
template<class distribution,class generator>
void fillScalar(ComplexF &s,distribution &dist, generator &gen)
{
s=ComplexF(dist(gen),dist(gen));
}
template<class distribution,class generator> void fillScalar(ComplexD &s,distribution &dist,generator &gen)
template<class distribution,class generator>
void fillScalar(ComplexD &s,distribution &dist,generator &gen)
{
s=ComplexD(dist(gen),dist(gen));
}
class GridRNGbase {
public:
int _seeded;
// One generator per site.
// Uniform and Gaussian distributions from these generators.
#ifdef RNG_RANLUX
typedef uint64_t RngStateType;
typedef std::ranlux48 RngEngine;
typedef uint64_t RngStateType;
static const int RngStateCount = 15;
#elif RNG_MT19937
#endif
#ifdef RNG_MT19937
typedef std::mt19937 RngEngine;
typedef uint32_t RngStateType;
static const int RngStateCount = std::mt19937::state_size;
#elif RNG_SITMO
#endif
#ifdef RNG_SITMO
typedef sitmo::prng_engine RngEngine;
typedef uint64_t RngStateType;
static const int RngStateCount = 4;
#endif
std::vector<RngEngine> _generators;
std::vector<std::uniform_real_distribution<RealD>> _uniform;
std::vector<std::normal_distribution<RealD>> _gaussian;
std::vector<std::discrete_distribution<int32_t>> _bernoulli;
void GetState(std::vector<RngStateType> & saved,int gen) {
std::vector<RngEngine> _generators;
std::vector<std::uniform_real_distribution<RealD> > _uniform;
std::vector<std::normal_distribution<RealD> > _gaussian;
std::vector<std::discrete_distribution<int32_t> > _bernoulli;
std::vector<std::uniform_int_distribution<uint32_t> > _uid;
///////////////////////
// support for parallel init
///////////////////////
#ifdef RNG_FAST_DISCARD
static void Skip(RngEngine &eng)
{
/////////////////////////////////////////////////////////////////////////////////////
// Skip by 2^40 elements between successive lattice sites
// This goes by 10^12.
// Consider quenched updating; likely never exceeding rate of 1000 sweeps
// per second on any machine. This gives us of order 10^9 seconds, or 100 years
// skip ahead.
// For HMC unlikely to go at faster than a solve per second, and
// tens of seconds per trajectory so this is clean in all reasonable cases,
// and margin of safety is orders of magnitude.
// We could hack Sitmo to skip in the higher order words of state if necessary
/////////////////////////////////////////////////////////////////////////////////////
uint64_t skip = 0x1; skip = skip<<40;
eng.discard(skip);
}
#endif
static RngEngine Reseed(RngEngine &eng)
{
std::vector<uint32_t> newseed;
std::uniform_int_distribution<uint32_t> uid;
return Reseed(eng,newseed,uid);
}
static RngEngine Reseed(RngEngine &eng,std::vector<uint32_t> & newseed,
std::uniform_int_distribution<uint32_t> &uid)
{
const int reseeds=4;
newseed.resize(reseeds);
for(int i=0;i<reseeds;i++){
newseed[i] = uid(eng);
}
std::seed_seq sseq(newseed.begin(),newseed.end());
return RngEngine(sseq);
}
void GetState(std::vector<RngStateType> & saved,RngEngine &eng) {
saved.resize(RngStateCount);
std::stringstream ss;
ss<<_generators[gen];
ss<<eng;
ss.seekg(0,ss.beg);
for(int i=0;i<RngStateCount;i++){
ss>>saved[i];
}
}
void SetState(std::vector<RngStateType> & saved,int gen){
void GetState(std::vector<RngStateType> & saved,int gen) {
GetState(saved,_generators[gen]);
}
void SetState(std::vector<RngStateType> & saved,RngEngine &eng){
assert(saved.size()==RngStateCount);
std::stringstream ss;
for(int i=0;i<RngStateCount;i++){
ss<< saved[i]<<" ";
}
ss.seekg(0,ss.beg);
ss>>_generators[gen];
ss>>eng;
}
void SetState(std::vector<RngStateType> & saved,int gen){
SetState(saved,_generators[gen]);
}
void SetEngine(RngEngine &Eng, int gen){
_generators[gen]=Eng;
}
void GetEngine(RngEngine &Eng, int gen){
Eng=_generators[gen];
}
template<class source> void Seed(source &src, int gen)
{
_generators[gen] = RngEngine(src);
}
};
class GridSerialRNG : public GridRNGbase {
public:
// FIXME ... do we require lockstep draws of randoms
// from all nodes keeping seeds consistent.
// place a barrier/broadcast in the fill routine
template<class source> void Seed(source &src)
{
typename source::result_type init = src();
CartesianCommunicator::BroadcastWorld(0,(void *)&init,sizeof(init));
_generators[0] = RngEngine(init);
_seeded=1;
}
GridSerialRNG() : GridRNGbase() {
_generators.resize(1);
_uniform.resize(1,std::uniform_real_distribution<RealD>{0,1});
_gaussian.resize(1,std::normal_distribution<RealD>(0.0,1.0) );
_bernoulli.resize(1,std::discrete_distribution<int32_t>{1,1});
_seeded=0;
_uid.resize(1,std::uniform_int_distribution<uint32_t>() );
}
template <class sobj,class distribution> inline void fill(sobj &l,std::vector<distribution> &dist){
typedef typename sobj::scalar_type scalar_type;
@ -203,7 +266,7 @@ namespace Grid {
for(int idx=0;idx<words;idx++){
fillScalar(buf[idx],dist[0],_generators[0]);
}
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
};
@ -257,21 +320,16 @@ namespace Grid {
RealD *pointer=(RealD *)&l;
dist[0].reset();
for(int i=0;i<vRealD::Nsimd();i++){
fillScalar(pointer[i],dist[0],_generators[0]);
fillScalar(pointer[i],dist[0],_generators[0]);
}
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
}
void SeedRandomDevice(void){
std::random_device rd;
Seed(rd);
}
void SeedFixedIntegers(const std::vector<int> &seeds){
fixedSeed src(seeds);
Seed(src);
CartesianCommunicator::BroadcastWorld(0,(void *)&seeds[0],sizeof(int)*seeds.size());
std::seed_seq src(seeds.begin(),seeds.end());
Seed(src,0);
}
};
class GridParallelRNG : public GridRNGbase {
@ -279,7 +337,6 @@ namespace Grid {
double _time_counter;
public:
GridBase *_grid;
unsigned int _vol;
@ -295,61 +352,11 @@ namespace Grid {
_uniform.resize(_vol,std::uniform_real_distribution<RealD>{0,1});
_gaussian.resize(_vol,std::normal_distribution<RealD>(0.0,1.0) );
_bernoulli.resize(_vol,std::discrete_distribution<int32_t>{1,1});
_seeded = 0;
_time_counter = 0.0;
_uid.resize(_vol,std::uniform_int_distribution<uint32_t>() );
}
template <class vobj,class distribution> inline void fill(Lattice<vobj> &l,std::vector<distribution> &dist){
// This loop could be made faster to avoid the Ahmdahl by
// i) seed generators on each timeslice, for x=y=z=0;
// ii) seed generators on each z for x=y=0
// iii)seed generators on each y,z for x=0
// iv) seed generators on each y,z,x
// made possible by physical indexing.
template<class source> void Seed(source &src)
{
std::vector<int> gcoor;
int gsites = _grid->_gsites;
typename source::result_type init = src();
RngEngine pseeder(init);
std::uniform_int_distribution<uint64_t> ui;
for(int gidx=0;gidx<gsites;gidx++){
int rank,o_idx,i_idx;
_grid->GlobalIndexToGlobalCoor(gidx,gcoor);
_grid->GlobalCoorToRankIndex(rank,o_idx,i_idx,gcoor);
int l_idx=generator_idx(o_idx,i_idx);
const int num_rand_seed=16;
std::vector<int> site_seeds(num_rand_seed);
for(int i=0;i<site_seeds.size();i++){
site_seeds[i]= ui(pseeder);
}
_grid->Broadcast(0,(void *)&site_seeds[0],sizeof(int)*site_seeds.size());
if( rank == _grid->ThisRank() ){
fixedSeed ssrc(site_seeds);
typename source::result_type sinit = ssrc();
_generators[l_idx] = RngEngine(sinit);
}
}
_seeded=1;
}
//FIXME implement generic IO and create state save/restore
//void SaveState(const std::string<char> &file);
//void LoadState(const std::string<char> &file);
template <class vobj, class distribution>
inline void fill(Lattice<vobj> &l, std::vector<distribution> &dist) {
typedef typename vobj::scalar_object scalar_object;
typedef typename vobj::scalar_type scalar_type;
typedef typename vobj::vector_type vector_type;
@ -357,14 +364,11 @@ namespace Grid {
double inner_time_counter = usecond();
int multiplicity = RNGfillable_general(_grid, l._grid); // l has finer or same grid
int Nsimd = _grid->Nsimd(); // guaranteed to be the same for l._grid too
int osites = _grid->oSites(); // guaranteed to be <= l._grid->oSites() by a factor multiplicity
int words = sizeof(scalar_object) / sizeof(scalar_type);
int Nsimd = _grid->Nsimd();// guaranteed to be the same for l._grid too
int osites = _grid->oSites();// guaranteed to be <= l._grid->oSites() by a factor multiplicity
int words = sizeof(scalar_object) / sizeof(scalar_type);
PARALLEL_FOR_LOOP
for (int ss = 0; ss < osites; ss++) {
parallel_for(int ss=0;ss<osites;ss++){
std::vector<scalar_object> buf(Nsimd);
for (int m = 0; m < multiplicity; m++) { // Draw from same generator multiplicity times
@ -386,13 +390,79 @@ namespace Grid {
_time_counter += usecond()- inner_time_counter;
};
void SeedRandomDevice(void) {
std::random_device rd;
Seed(rd);
}
void SeedFixedIntegers(const std::vector<int> &seeds) {
fixedSeed src(seeds);
Seed(src);
void SeedFixedIntegers(const std::vector<int> &seeds){
// Everyone generates the same seed_seq based on input seeds
CartesianCommunicator::BroadcastWorld(0,(void *)&seeds[0],sizeof(int)*seeds.size());
std::seed_seq source(seeds.begin(),seeds.end());
RngEngine master_engine(source);
#ifdef RNG_FAST_DISCARD
////////////////////////////////////////////////
// Skip ahead through a single stream.
// Applicable to SITMO and other has based/crypto RNGs
// Should be applicable to Mersenne Twister, but the C++11
// MT implementation does not implement fast discard even though
// in principle this is possible
////////////////////////////////////////////////
std::vector<int> gcoor;
int rank,o_idx,i_idx;
// Everybody loops over global volume.
for(int gidx=0;gidx<_grid->_gsites;gidx++){
Skip(master_engine); // Skip to next RNG sequence
// Where is it?
_grid->GlobalIndexToGlobalCoor(gidx,gcoor);
_grid->GlobalCoorToRankIndex(rank,o_idx,i_idx,gcoor);
// If this is one of mine we take it
if( rank == _grid->ThisRank() ){
int l_idx=generator_idx(o_idx,i_idx);
_generators[l_idx] = master_engine;
}
}
#else
////////////////////////////////////////////////////////////////
// Machine and thread decomposition dependent seeding is efficient
// and maximally parallel; but NOT reproducible from machine to machine.
// Not ideal, but fastest way to reseed all nodes.
////////////////////////////////////////////////////////////////
{
// Obtain one Reseed per processor
int Nproc = _grid->ProcessorCount();
std::vector<RngEngine> seeders(Nproc);
int me= _grid->ThisRank();
for(int p=0;p<Nproc;p++){
seeders[p] = Reseed(master_engine);
}
master_engine = seeders[me];
}
{
// Obtain one reseeded generator per thread
int Nthread = GridThread::GetThreads();
std::vector<RngEngine> seeders(Nthread);
for(int t=0;t<Nthread;t++){
seeders[t] = Reseed(master_engine);
}
parallel_for(int t=0;t<Nthread;t++) {
// set up one per local site in threaded fashion
std::vector<uint32_t> newseeds;
std::uniform_int_distribution<uint32_t> uid;
for(int l=0;l<_grid->lSites();l++) {
if ( (l%Nthread)==t ) {
_generators[l] = Reseed(seeders[t],newseeds,uid);
}
}
}
}
#endif
}
void Report(){
@ -400,31 +470,39 @@ namespace Grid {
}
////////////////////////////////////////////////////////////////////////
// Support for rigorous test of RNG's
// Return uniform random uint32_t from requested site generator
////////////////////////////////////////////////////////////////////////
uint32_t GlobalU01(int gsite){
uint32_t the_number;
// who
std::vector<int> gcoor;
int rank,o_idx,i_idx;
_grid->GlobalIndexToGlobalCoor(gsite,gcoor);
_grid->GlobalCoorToRankIndex(rank,o_idx,i_idx,gcoor);
// draw
int l_idx=generator_idx(o_idx,i_idx);
if( rank == _grid->ThisRank() ){
the_number = _uid[l_idx](_generators[l_idx]);
}
// share & return
_grid->Broadcast(rank,(void *)&the_number,sizeof(the_number));
return the_number;
}
};
template <class vobj> inline void random(GridParallelRNG &rng,Lattice<vobj> &l){
rng.fill(l,rng._uniform);
}
template <class vobj> inline void random(GridParallelRNG &rng,Lattice<vobj> &l) { rng.fill(l,rng._uniform); }
template <class vobj> inline void gaussian(GridParallelRNG &rng,Lattice<vobj> &l) { rng.fill(l,rng._gaussian); }
template <class vobj> inline void bernoulli(GridParallelRNG &rng,Lattice<vobj> &l){ rng.fill(l,rng._bernoulli);}
template <class vobj> inline void gaussian(GridParallelRNG &rng,Lattice<vobj> &l){
rng.fill(l,rng._gaussian);
}
template <class vobj> inline void bernoulli(GridParallelRNG &rng,Lattice<vobj> &l){
rng.fill(l,rng._bernoulli);
}
template <class sobj> inline void random(GridSerialRNG &rng,sobj &l){
rng.fill(l,rng._uniform);
}
template <class sobj> inline void gaussian(GridSerialRNG &rng,sobj &l){
rng.fill(l,rng._gaussian);
}
template <class sobj> inline void bernoulli(GridSerialRNG &rng,sobj &l){
rng.fill(l,rng._bernoulli);
}
template <class sobj> inline void random(GridSerialRNG &rng,sobj &l) { rng.fill(l,rng._uniform ); }
template <class sobj> inline void gaussian(GridSerialRNG &rng,sobj &l) { rng.fill(l,rng._gaussian ); }
template <class sobj> inline void bernoulli(GridSerialRNG &rng,sobj &l){ rng.fill(l,rng._bernoulli); }
}
#endif