1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-06-10 19:36:56 +01:00

Added single threaded version of the derivative for the Ls vectorised DWF

This commit is contained in:
Guido Cossu
2016-12-06 16:31:13 +00:00
parent 01480da0a8
commit b812d5e39c
11 changed files with 307 additions and 135 deletions

View File

@ -6,8 +6,8 @@
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Guido Cossu <guido.cossu@ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -67,24 +67,42 @@ namespace Grid {
return multiplicity;
}
inline int RNGfillable_general(GridBase *coarse,GridBase *fine)
{
int rngdims = coarse->_ndimension;
// trivially extended in higher dims, with locality guaranteeing RNG state is local to node
int lowerdims = fine->_ndimension - coarse->_ndimension; assert(lowerdims >= 0);
// assumes that the higher dimensions are not using more processors
// all further divisions are local
for(int d=0;d<lowerdims;d++) assert(fine->_processors[d]==1);
for(int d=0;d<rngdims;d++) assert(coarse->_processors[d] == fine->_processors[d+lowerdims]);
// then divide the number of local sites
// check that the total number of sims agree, meanse the iSites are the same
assert(fine->Nsimd() == coarse->Nsimd());
// check that the two grids divide cleanly
assert( (fine->lSites() / coarse->lSites() ) * coarse->lSites() == fine->lSites() );
return fine->lSites() / coarse->lSites();
}
// Wrap seed_seq to give common interface with random_device
class fixedSeed {
public:
typedef std::seed_seq::result_type result_type;
std::seed_seq src;
fixedSeed(const std::vector<int> &seeds) : src(seeds.begin(),seeds.end()) {};
result_type operator () (void){
std::vector<result_type> list(1);
src.generate(list.begin(),list.end());
return list[0];
}
};
@ -252,24 +270,30 @@ namespace Grid {
};
class GridParallelRNG : public GridRNGbase {
double _time_counter;
public:
GridBase *_grid;
int _vol;
unsigned int _vol;
int generator_idx(int os,int is){
return is*_grid->oSites()+os;
}
GridParallelRNG(GridBase *grid) : GridRNGbase() {
_grid=grid;
_vol =_grid->iSites()*_grid->oSites();
_grid = grid;
_vol =_grid->iSites()*_grid->oSites();
_generators.resize(_vol);
_uniform.resize(_vol,std::uniform_real_distribution<RealD>{0,1});
_gaussian.resize(_vol,std::normal_distribution<RealD>(0.0,1.0) );
_bernoulli.resize(_vol,std::discrete_distribution<int32_t>{1,1});
_seeded=0;
_seeded = 0;
_time_counter = 0.0;
}
@ -325,37 +349,36 @@ namespace Grid {
typedef typename vobj::scalar_type scalar_type;
typedef typename vobj::vector_type vector_type;
int multiplicity = RNGfillable(_grid, l._grid);
double inner_time_counter = usecond();
int Nsimd = _grid->Nsimd();
int osites = _grid->oSites();
int multiplicity = RNGfillable_general(_grid, l._grid); // l has finer or same grid
int Nsimd = _grid->Nsimd();// guaranteed to be the same for l._grid too
int osites = _grid->oSites();// guaranteed to be <= l._grid->oSites() by a factor multiplicity
int words = sizeof(scalar_object) / sizeof(scalar_type);
PARALLEL_FOR_LOOP
for (int ss = 0; ss < osites; ss++) {
std::vector<scalar_object> buf(Nsimd);
for (int m = 0; m < multiplicity;
m++) { // Draw from same generator multiplicity times
for (int m = 0; m < multiplicity; m++) { // Draw from same generator multiplicity times
int sm = multiplicity * ss +
m; // Maps the generator site to the fine site
int sm = multiplicity * ss + m; // Maps the generator site to the fine site
for (int si = 0; si < Nsimd; si++) {
int gdx = generator_idx(ss, si); // index of generator state
scalar_type *pointer = (scalar_type *)&buf[si];
dist[gdx].reset();
for (int idx = 0; idx < words; idx++) {
for (int idx = 0; idx < words; idx++)
fillScalar(pointer[idx], dist[gdx], _generators[gdx]);
}
}
// merge into SIMD lanes
merge(l._odata[sm], buf);
}
}
_time_counter += usecond()- inner_time_counter;
};
void SeedRandomDevice(void) {
@ -366,6 +389,12 @@ namespace Grid {
fixedSeed src(seeds);
Seed(src);
}
void Report(){
std::cout << GridLogMessage << "Time spent in the fill() routine by GridParallelRNG: "<< _time_counter/1e3 << " ms" << std::endl;
}
};
template <class vobj> inline void random(GridParallelRNG &rng,Lattice<vobj> &l){