1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-09-20 01:05:38 +01:00

Big/Small crush test, and fast SITMO rng init, faster but not ideal

MT and Ranlux init.
This commit is contained in:
paboyle 2017-04-02 12:10:51 +09:00
parent 0fade84ab2
commit 83f6fab8fa
2 changed files with 117 additions and 100 deletions

View File

@ -30,12 +30,19 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
#define GRID_LATTICE_RNG_H
#include <random>
#ifdef RNG_SITMO
#include <Grid/sitmo_rng/sitmo_prng_engine.hpp>
#endif
#if defined(RNG_SITMO)
#define RNG_FAST_DISCARD
#else
#undef RNG_FAST_DISCARD
#endif
namespace Grid {
//http://nvlpubs.nist.gov/nistpubs/SpecialPublications/NIST.SP.800-90Ar1.pdf ?
//////////////////////////////////////////////////////////////
// Allow the RNG state to be less dense than the fine grid
//////////////////////////////////////////////////////////////
@ -65,20 +72,22 @@ namespace Grid {
multiplicity = multiplicity *fine->_rdimensions[fd] / coarse->_rdimensions[d];
}
return multiplicity;
}
// real scalars are one component
template<class scalar,class distribution,class generator> void fillScalar(scalar &s,distribution &dist,generator & gen)
template<class scalar,class distribution,class generator>
void fillScalar(scalar &s,distribution &dist,generator & gen)
{
s=dist(gen);
}
template<class distribution,class generator> void fillScalar(ComplexF &s,distribution &dist, generator &gen)
template<class distribution,class generator>
void fillScalar(ComplexF &s,distribution &dist, generator &gen)
{
s=ComplexF(dist(gen),dist(gen));
}
template<class distribution,class generator> void fillScalar(ComplexD &s,distribution &dist,generator &gen)
template<class distribution,class generator>
void fillScalar(ComplexD &s,distribution &dist,generator &gen)
{
s=ComplexD(dist(gen),dist(gen));
}
@ -102,27 +111,6 @@ namespace Grid {
typedef uint64_t RngStateType;
static const int RngStateCount = 4;
#endif
///////////////////////
// support for parallel init
///////////////////////
#ifdef RNG_SITMO
static void Skip(RngEngine &eng)
{
uint64_t skip = 0x1; skip = skip<<40;
eng.discard(skip);
}
#endif
static RngEngine Reseed(RngEngine &eng)
{
const int reseeds=4;
std::uniform_int_distribution<uint32_t> uid;
std::vector<uint32_t> newseed(reseeds);
for(int i=0;i<reseeds;i++){
newseed[i] = uid(eng);
}
std::seed_seq sseq(newseed.begin(),newseed.end());
return RngEngine(sseq);
}
std::vector<RngEngine> _generators;
std::vector<std::uniform_real_distribution<RealD> > _uniform;
@ -130,6 +118,46 @@ namespace Grid {
std::vector<std::discrete_distribution<int32_t> > _bernoulli;
std::vector<std::uniform_int_distribution<uint32_t> > _uid;
///////////////////////
// support for parallel init
///////////////////////
#ifdef RNG_FAST_DISCARD
static void Skip(RngEngine &eng)
{
/////////////////////////////////////////////////////////////////////////////////////
// Skip by 2^40 elements between successive lattice sites
// This goes by 10^12.
// Consider quenched updating; likely never exceeding rate of 1000 sweeps
// per second on any machine. This gives us of order 10^9 seconds, or 100 years
// skip ahead.
// For HMC unlikely to go at faster than a solve per second, and
// tens of seconds per trajectory so this is clean in all reasonable cases,
// and margin of safety is orders of magnitude.
// We could hack Sitmo to skip in the higher order words of state if necessary
/////////////////////////////////////////////////////////////////////////////////////
uint64_t skip = 0x1; skip = skip<<40;
eng.discard(skip);
}
#endif
static RngEngine Reseed(RngEngine &eng)
{
std::vector<uint32_t> newseed;
std::uniform_int_distribution<uint32_t> uid;
return Reseed(eng,newseed,uid);
}
static RngEngine Reseed(RngEngine &eng,std::vector<uint32_t> & newseed,
std::uniform_int_distribution<uint32_t> &uid)
{
const int reseeds=4;
newseed.resize(reseeds);
for(int i=0;i<reseeds;i++){
newseed[i] = uid(eng);
}
std::seed_seq sseq(newseed.begin(),newseed.end());
return RngEngine(sseq);
}
void GetState(std::vector<RngStateType> & saved,RngEngine &eng) {
saved.resize(RngStateCount);
std::stringstream ss;
@ -160,7 +188,6 @@ namespace Grid {
void GetEngine(RngEngine &Eng, int gen){
Eng=_generators[gen];
}
template<class source> void Seed(source &src, int gen)
{
_generators[gen] = RngEngine(src);
@ -194,6 +221,7 @@ namespace Grid {
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
};
template <class distribution> inline void fill(ComplexF &l,std::vector<distribution> &dist){
dist[0].reset();
fillScalar(l,dist[0],_generators[0]);
@ -253,14 +281,13 @@ namespace Grid {
std::seed_seq src(seeds.begin(),seeds.end());
Seed(src,0);
}
};
class GridParallelRNG : public GridRNGbase {
public:
GridBase *_grid;
int _vol;
public:
int generator_idx(int os,int is){
return is*_grid->oSites()+os;
@ -289,7 +316,6 @@ namespace Grid {
int osites=_grid->oSites();
int words=sizeof(scalar_object)/sizeof(scalar_type);
parallel_for(int ss=0;ss<osites;ss++){
std::vector<scalar_object> buf(Nsimd);
@ -314,23 +340,34 @@ namespace Grid {
void SeedFixedIntegers(const std::vector<int> &seeds){
// Everyone generates the same seed_seq based on input seeds
CartesianCommunicator::BroadcastWorld(0,(void *)&seeds[0],sizeof(int)*seeds.size());
std::seed_seq source(seeds.begin(),seeds.end());
RngEngine master_engine(source);
#ifdef RNG_SITMO
#ifdef RNG_FAST_DISCARD
////////////////////////////////////////////////
// Skip ahead through a single stream.
// Applicable to SITMO and other has based/crypto RNGs
// Should be applicable to Mersenne Twister, but the C++11
// MT implementation does not implement fast discard even though
// in principle this is possible
////////////////////////////////////////////////
std::vector<int> gcoor;
int rank,o_idx,i_idx;
// Everybody loops over global volume.
for(int gidx=0;gidx<_grid->_gsites;gidx++){
Skip(master_engine); // advance the state; does this work?
Skip(master_engine); // Skip to next RNG sequence
int rank,o_idx,i_idx;
// Where is it?
_grid->GlobalIndexToGlobalCoor(gidx,gcoor);
_grid->GlobalCoorToRankIndex(rank,o_idx,i_idx,gcoor);
// If this is one of mine we take it
if( rank == _grid->ThisRank() ){
int l_idx=generator_idx(o_idx,i_idx);
_generators[l_idx] = master_engine;
@ -338,23 +375,24 @@ namespace Grid {
}
#else
// Machine and thread decomposition dependent seeding
// is efficient and maximally parallel; but not
// reproducible from machine to machine. Not ideal, but fast.
// Different seed for each node.
////////////////////////////////////////////////////////////////
// Machine and thread decomposition dependent seeding is efficient
// and maximally parallel; but NOT reproducible from machine to machine.
// Not ideal, but fastest way to reseed all nodes.
////////////////////////////////////////////////////////////////
{
// Obtain one Reseed per processor
int Nproc = _grid->ProcessorCount();
int me= _grid->ThisRank();
std::vector<RngEngine> seeders(Nproc);
int me= _grid->ThisRank();
for(int p=0;p<Nproc;p++){
seeders[p] = Reseed(master_engine);
}
master_engine = seeders[me];
}
// Different seed for each thread
{
// Obtain one reseeded generator per thread
int Nthread = GridThread::GetThreads();
std::vector<RngEngine> seeders(Nthread);
for(int t=0;t<Nthread;t++){
@ -362,63 +400,52 @@ namespace Grid {
}
parallel_for(int t=0;t<Nthread;t++) {
master_engine = seeders[t];
// set up one per local site in threaded fashion
std::vector<uint32_t> newseeds;
std::uniform_int_distribution<uint32_t> uid;
for(int l=0;l<_grid->lSites();l++) {
if ( (l%Nthread)==t ) {
_generators[l] = Reseed(master_engine);
_generators[l] = Reseed(seeders[t],newseeds,uid);
}
}
}
}
#endif
}
////////////////////////////////////////////////////////////////////////
// Support for rigorous test of RNG's
// Return uniform random uint32_t from requested site generator
////////////////////////////////////////////////////////////////////////
uint32_t GlobalU01(int gsite){
std::vector<int> gcoor;
_grid->GlobalIndexToGlobalCoor(gsite,gcoor);
uint32_t the_number;
// who
std::vector<int> gcoor;
int rank,o_idx,i_idx;
_grid->GlobalIndexToGlobalCoor(gsite,gcoor);
_grid->GlobalCoorToRankIndex(rank,o_idx,i_idx,gcoor);
// draw
int l_idx=generator_idx(o_idx,i_idx);
uint32_t the_number;
if( rank == _grid->ThisRank() ){
the_number = _uid[l_idx](_generators[l_idx]);
}
// share & return
_grid->Broadcast(rank,(void *)&the_number,sizeof(the_number));
return the_number;
}
};
template <class vobj> inline void random(GridParallelRNG &rng,Lattice<vobj> &l){
rng.fill(l,rng._uniform);
}
template <class vobj> inline void random(GridParallelRNG &rng,Lattice<vobj> &l) { rng.fill(l,rng._uniform); }
template <class vobj> inline void gaussian(GridParallelRNG &rng,Lattice<vobj> &l) { rng.fill(l,rng._gaussian); }
template <class vobj> inline void bernoulli(GridParallelRNG &rng,Lattice<vobj> &l){ rng.fill(l,rng._bernoulli);}
template <class vobj> inline void gaussian(GridParallelRNG &rng,Lattice<vobj> &l){
rng.fill(l,rng._gaussian);
}
template <class vobj> inline void bernoulli(GridParallelRNG &rng,Lattice<vobj> &l){
rng.fill(l,rng._bernoulli);
}
template <class sobj> inline void random(GridSerialRNG &rng,sobj &l){
rng.fill(l,rng._uniform);
}
template <class sobj> inline void gaussian(GridSerialRNG &rng,sobj &l){
rng.fill(l,rng._gaussian);
}
template <class sobj> inline void bernoulli(GridSerialRNG &rng,sobj &l){
rng.fill(l,rng._bernoulli);
}
template <class sobj> inline void random(GridSerialRNG &rng,sobj &l) { rng.fill(l,rng._uniform ); }
template <class sobj> inline void gaussian(GridSerialRNG &rng,sobj &l) { rng.fill(l,rng._gaussian ); }
template <class sobj> inline void bernoulli(GridSerialRNG &rng,sobj &l){ rng.fill(l,rng._bernoulli); }
}
#endif

View File

@ -32,17 +32,17 @@ using namespace Grid;
using namespace Grid::QCD;
// Wrap Grid's parallel RNG for testU01
#define BIG_CRUSH // Big crush enable (long running)
#undef TEST_RNG_STANDALONE // Test serial RNGs in isolation
extern "C" {
#include "TestU01.h"
#include "gdef.h"
#include "unif01.h"
#include "ucarry.h"
#include "bbattery.h"
}
std::vector<std::ranlux48> EngineRanlux;
std::vector<std::mt19937> EngineMT;
#include <Grid/sitmo_rng/sitmo_prng_engine.hpp>
std::vector<sitmo::prng_engine> EngineSitmo;
std::uniform_int_distribution<uint32_t> uid;
@ -74,21 +74,13 @@ public:
sRNG = _sRNG;
_grid= grid;
gsites= grid->_gsites;
std::cout << "Init: Global sites is " <<gsites<<std::endl;
site = 0;
}
static uint32_t GetU01(void) {
// std::uniform_int_distribution<uint32_t> uid;
uint32_t ret_val;
#if 0
ret_val = sRNG->_uid[0](sRNG->_generators[0]);
#else
ret_val = pRNG->GlobalU01(site);
site=(site+1)%gsites;
#endif
// std::cout << "site "<<site <<" "<<std::hex << ret_val <<std::dec<<std::endl;
return ret_val;
}
};
@ -134,24 +126,17 @@ int main (int argc, char ** argv)
std::cout << GridLogMessage<< "Initialised Grid RNGs "<<std::endl;
TestRNG::Init(&pRNG,&sRNG,&Grid);
std::cout << GridLogMessage<< "Grid RNG's are "<< std::string(TestRNG::name) <<std::endl;
std::cout << GridLogMessage<< "Grid RNG's are "<< std::string(TestRNG::name) <<std::endl;
unif01_Gen * gen;
// gen = ulcg_CreateLCG (2147483647, 397204094, 0, 12345);
// bbattery_SmallCrush (gen);
// gen = ucarry_CreateRanlux (48, 0x12345);
// bbattery_SmallCrush (gen);
/*
#ifdef TEST_RNG_STANDALONE
std::cout << GridLogMessage<< "Testing Standalone Ranlux" <<std::endl;
gen = unif01_CreateExternGenBits ((char *)"GridRanlux",GetU01Ranlux);
bbattery_SmallCrush (gen);
unif01_DeleteExternGenBits(gen);
std::cout << GridLogMessage<< "Testing Standalone Ranlux is complete" <<std::endl;
*/
/*
std::cout << GridLogMessage<< "Testing Standalone Mersenne Twister" <<std::endl;
gen = unif01_CreateExternGenBits ((char *)"GridMT",GetU01MT);
bbattery_SmallCrush (gen);
@ -163,14 +148,19 @@ int main (int argc, char ** argv)
bbattery_SmallCrush (gen);
unif01_DeleteExternGenBits(gen);
std::cout << GridLogMessage<< "Testing Standalone Sitmo is complete" <<std::endl;
*/
#endif
// gen = unif01_CreateExternGenBits ((char *)"xorshift", xorshift);
std::cout << GridLogMessage<< "Testing Grid "<< std::string(TestRNG::name) <<std::endl;
#ifdef BIG_CRUSH
std::cout << GridLogMessage<< "Testing Grid BigCrush for "<< std::string(TestRNG::name) <<std::endl;
gen = unif01_CreateExternGenBits(TestRNG::name,TestRNG::GetU01);
bbattery_BigCrush (gen);
std::cout << GridLogMessage<< "Testing Grid "<< std::string(TestRNG::name)<<" is complete" <<std::endl;
#else
std::cout << GridLogMessage<< "Testing Grid SmallCrush for "<< std::string(TestRNG::name) <<std::endl;
gen = unif01_CreateExternGenBits(TestRNG::name,TestRNG::GetU01);
bbattery_SmallCrush (gen);
std::cout << GridLogMessage<< "Testing Grid "<< std::string(TestRNG::name)<<" is complete" <<std::endl;
#endif
Grid_finalize();
}