mirror of
https://github.com/paboyle/Grid.git
synced 2025-06-17 07:17:06 +01:00
Merge branch 'develop' into feature/staggered-comms-compute
Conflicts: lib/qcd/action/fermion/ImprovedStaggeredFermion.cc
This commit is contained in:
@ -256,9 +256,42 @@ public:
|
||||
_odata[ss]=r._odata[ss];
|
||||
}
|
||||
}
|
||||
|
||||
Lattice(Lattice&& r){ // move constructor
|
||||
_grid = r._grid;
|
||||
checkerboard = r.checkerboard;
|
||||
_odata=std::move(r._odata);
|
||||
}
|
||||
|
||||
|
||||
|
||||
inline Lattice<vobj> & operator = (Lattice<vobj> && r)
|
||||
{
|
||||
_grid = r._grid;
|
||||
checkerboard = r.checkerboard;
|
||||
_odata =std::move(r._odata);
|
||||
return *this;
|
||||
}
|
||||
|
||||
inline Lattice<vobj> & operator = (const Lattice<vobj> & r){
|
||||
_grid = r._grid;
|
||||
checkerboard = r.checkerboard;
|
||||
_odata.resize(_grid->oSites());// essential
|
||||
|
||||
parallel_for(int ss=0;ss<_grid->oSites();ss++){
|
||||
_odata[ss]=r._odata[ss];
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
template<class robj> strong_inline Lattice<vobj> & operator = (const Lattice<robj> & r){
|
||||
this->checkerboard = r.checkerboard;
|
||||
conformable(*this,r);
|
||||
|
||||
parallel_for(int ss=0;ss<_grid->oSites();ss++){
|
||||
this->_odata[ss]=r._odata[ss];
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
virtual ~Lattice(void) = default;
|
||||
|
||||
void reset(GridBase* grid) {
|
||||
@ -277,15 +310,6 @@ public:
|
||||
return *this;
|
||||
}
|
||||
|
||||
template<class robj> strong_inline Lattice<vobj> & operator = (const Lattice<robj> & r){
|
||||
this->checkerboard = r.checkerboard;
|
||||
conformable(*this,r);
|
||||
|
||||
parallel_for(int ss=0;ss<_grid->oSites();ss++){
|
||||
this->_odata[ss]=r._odata[ss];
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
// *=,+=,-= operators inherit behvour from correspond */+/- operation
|
||||
template<class T> strong_inline Lattice<vobj> &operator *=(const T &r) {
|
||||
|
@ -179,7 +179,7 @@ namespace Grid {
|
||||
return ret;
|
||||
}
|
||||
|
||||
#define DECLARE_RELATIONAL(op,functor) \
|
||||
#define DECLARE_RELATIONAL_EQ(op,functor) \
|
||||
template<class vsimd,IfSimd<vsimd> = 0>\
|
||||
inline vInteger operator op (const vsimd & lhs, const vsimd & rhs)\
|
||||
{\
|
||||
@ -198,11 +198,6 @@ namespace Grid {
|
||||
typedef typename vsimd::scalar_type scalar;\
|
||||
return Comparison(functor<scalar,scalar>(),lhs,rhs);\
|
||||
}\
|
||||
template<class vsimd>\
|
||||
inline vInteger operator op(const iScalar<vsimd> &lhs,const iScalar<vsimd> &rhs)\
|
||||
{ \
|
||||
return lhs._internal op rhs._internal; \
|
||||
} \
|
||||
template<class vsimd>\
|
||||
inline vInteger operator op(const iScalar<vsimd> &lhs,const typename vsimd::scalar_type &rhs) \
|
||||
{ \
|
||||
@ -212,14 +207,21 @@ namespace Grid {
|
||||
inline vInteger operator op(const typename vsimd::scalar_type &lhs,const iScalar<vsimd> &rhs) \
|
||||
{ \
|
||||
return lhs op rhs._internal; \
|
||||
}
|
||||
} \
|
||||
|
||||
#define DECLARE_RELATIONAL(op,functor) \
|
||||
DECLARE_RELATIONAL_EQ(op,functor) \
|
||||
template<class vsimd>\
|
||||
inline vInteger operator op(const iScalar<vsimd> &lhs,const iScalar<vsimd> &rhs)\
|
||||
{ \
|
||||
return lhs._internal op rhs._internal; \
|
||||
}
|
||||
|
||||
DECLARE_RELATIONAL(<,slt);
|
||||
DECLARE_RELATIONAL(<=,sle);
|
||||
DECLARE_RELATIONAL(>,sgt);
|
||||
DECLARE_RELATIONAL(>=,sge);
|
||||
DECLARE_RELATIONAL(==,seq);
|
||||
DECLARE_RELATIONAL_EQ(==,seq);
|
||||
DECLARE_RELATIONAL(!=,sne);
|
||||
|
||||
#undef DECLARE_RELATIONAL
|
||||
|
@ -52,23 +52,5 @@ namespace Grid {
|
||||
}
|
||||
};
|
||||
|
||||
// LatticeCoordinate();
|
||||
// FIXME for debug; deprecate this; made obscelete by
|
||||
template<class vobj> void lex_sites(Lattice<vobj> &l){
|
||||
Real *v_ptr = (Real *)&l._odata[0];
|
||||
size_t o_len = l._grid->oSites();
|
||||
size_t v_len = sizeof(vobj)/sizeof(vRealF);
|
||||
size_t vec_len = vRealF::Nsimd();
|
||||
|
||||
for(int i=0;i<o_len;i++){
|
||||
for(int j=0;j<v_len;j++){
|
||||
for(int vv=0;vv<vec_len;vv+=2){
|
||||
v_ptr[i*v_len*vec_len+j*vec_len+vv ]= i+vv*500;
|
||||
v_ptr[i*v_len*vec_len+j*vec_len+vv+1]= i+vv*500;
|
||||
}
|
||||
}}
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
#endif
|
||||
|
@ -77,9 +77,6 @@ namespace Grid {
|
||||
|
||||
|
||||
// merge of April 11 2017
|
||||
//<<<<<<< HEAD
|
||||
|
||||
|
||||
// this function is necessary for the LS vectorised field
|
||||
inline int RNGfillable_general(GridBase *coarse,GridBase *fine)
|
||||
{
|
||||
@ -91,7 +88,6 @@ namespace Grid {
|
||||
// all further divisions are local
|
||||
for(int d=0;d<lowerdims;d++) assert(fine->_processors[d]==1);
|
||||
for(int d=0;d<rngdims;d++) assert(coarse->_processors[d] == fine->_processors[d+lowerdims]);
|
||||
|
||||
|
||||
// then divide the number of local sites
|
||||
// check that the total number of sims agree, meanse the iSites are the same
|
||||
@ -102,27 +98,6 @@ namespace Grid {
|
||||
|
||||
return fine->lSites() / coarse->lSites();
|
||||
}
|
||||
|
||||
/*
|
||||
// Wrap seed_seq to give common interface with random_device
|
||||
class fixedSeed {
|
||||
public:
|
||||
typedef std::seed_seq::result_type result_type;
|
||||
std::seed_seq src;
|
||||
|
||||
fixedSeed(const std::vector<int> &seeds) : src(seeds.begin(),seeds.end()) {};
|
||||
|
||||
result_type operator () (void){
|
||||
std::vector<result_type> list(1);
|
||||
src.generate(list.begin(),list.end());
|
||||
return list[0];
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
=======
|
||||
>>>>>>> develop
|
||||
*/
|
||||
|
||||
// real scalars are one component
|
||||
template<class scalar,class distribution,class generator>
|
||||
@ -171,7 +146,7 @@ namespace Grid {
|
||||
// support for parallel init
|
||||
///////////////////////
|
||||
#ifdef RNG_FAST_DISCARD
|
||||
static void Skip(RngEngine &eng)
|
||||
static void Skip(RngEngine &eng,uint64_t site)
|
||||
{
|
||||
/////////////////////////////////////////////////////////////////////////////////////
|
||||
// Skip by 2^40 elements between successive lattice sites
|
||||
@ -184,8 +159,11 @@ namespace Grid {
|
||||
// and margin of safety is orders of magnitude.
|
||||
// We could hack Sitmo to skip in the higher order words of state if necessary
|
||||
/////////////////////////////////////////////////////////////////////////////////////
|
||||
uint64_t skip = 0x1; skip = skip<<40;
|
||||
// uint64_t skip = site+1; // Old init Skipped then drew. Checked compat with faster init
|
||||
uint64_t skip = site;
|
||||
skip = skip<<40;
|
||||
eng.discard(skip);
|
||||
// std::cout << " Engine " <<site << " state " <<eng<<std::endl;
|
||||
}
|
||||
#endif
|
||||
static RngEngine Reseed(RngEngine &eng)
|
||||
@ -407,15 +385,14 @@ namespace Grid {
|
||||
// MT implementation does not implement fast discard even though
|
||||
// in principle this is possible
|
||||
////////////////////////////////////////////////
|
||||
std::vector<int> gcoor;
|
||||
int rank,o_idx,i_idx;
|
||||
|
||||
// Everybody loops over global volume.
|
||||
for(int gidx=0;gidx<_grid->_gsites;gidx++){
|
||||
|
||||
Skip(master_engine); // Skip to next RNG sequence
|
||||
parallel_for(int gidx=0;gidx<_grid->_gsites;gidx++){
|
||||
|
||||
// Where is it?
|
||||
int rank,o_idx,i_idx;
|
||||
std::vector<int> gcoor;
|
||||
|
||||
_grid->GlobalIndexToGlobalCoor(gidx,gcoor);
|
||||
_grid->GlobalCoorToRankIndex(rank,o_idx,i_idx,gcoor);
|
||||
|
||||
@ -423,6 +400,7 @@ namespace Grid {
|
||||
if( rank == _grid->ThisRank() ){
|
||||
int l_idx=generator_idx(o_idx,i_idx);
|
||||
_generators[l_idx] = master_engine;
|
||||
Skip(_generators[l_idx],gidx); // Skip to next RNG sequence
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -599,6 +599,51 @@ unvectorizeToLexOrdArray(std::vector<sobj> &out, const Lattice<vobj> &in)
|
||||
extract1(in_vobj, out_ptrs, 0);
|
||||
}
|
||||
}
|
||||
|
||||
template<typename vobj, typename sobj>
|
||||
typename std::enable_if<isSIMDvectorized<vobj>::value && !isSIMDvectorized<sobj>::value, void>::type
|
||||
unvectorizeToRevLexOrdArray(std::vector<sobj> &out, const Lattice<vobj> &in)
|
||||
{
|
||||
|
||||
typedef typename vobj::vector_type vtype;
|
||||
|
||||
GridBase* in_grid = in._grid;
|
||||
out.resize(in_grid->lSites());
|
||||
|
||||
int ndim = in_grid->Nd();
|
||||
int in_nsimd = vtype::Nsimd();
|
||||
|
||||
std::vector<std::vector<int> > in_icoor(in_nsimd);
|
||||
|
||||
for(int lane=0; lane < in_nsimd; lane++){
|
||||
in_icoor[lane].resize(ndim);
|
||||
in_grid->iCoorFromIindex(in_icoor[lane], lane);
|
||||
}
|
||||
|
||||
parallel_for(int in_oidx = 0; in_oidx < in_grid->oSites(); in_oidx++){ //loop over outer index
|
||||
//Assemble vector of pointers to output elements
|
||||
std::vector<sobj*> out_ptrs(in_nsimd);
|
||||
|
||||
std::vector<int> in_ocoor(ndim);
|
||||
in_grid->oCoorFromOindex(in_ocoor, in_oidx);
|
||||
|
||||
std::vector<int> lcoor(in_grid->Nd());
|
||||
|
||||
for(int lane=0; lane < in_nsimd; lane++){
|
||||
for(int mu=0;mu<ndim;mu++)
|
||||
lcoor[mu] = in_ocoor[mu] + in_grid->_rdimensions[mu]*in_icoor[lane][mu];
|
||||
|
||||
int lex;
|
||||
Lexicographic::IndexFromCoorReversed(lcoor, lex, in_grid->_ldimensions);
|
||||
out_ptrs[lane] = &out[lex];
|
||||
}
|
||||
|
||||
//Unpack into those ptrs
|
||||
const vobj & in_vobj = in._odata[in_oidx];
|
||||
extract1(in_vobj, out_ptrs, 0);
|
||||
}
|
||||
}
|
||||
|
||||
//Copy SIMD-vectorized lattice to array of scalar objects in lexicographic order
|
||||
template<typename vobj, typename sobj>
|
||||
typename std::enable_if<isSIMDvectorized<vobj>::value
|
||||
@ -648,10 +693,59 @@ vectorizeFromLexOrdArray( std::vector<sobj> &in, Lattice<vobj> &out)
|
||||
}
|
||||
}
|
||||
|
||||
template<typename vobj, typename sobj>
|
||||
typename std::enable_if<isSIMDvectorized<vobj>::value
|
||||
&& !isSIMDvectorized<sobj>::value, void>::type
|
||||
vectorizeFromRevLexOrdArray( std::vector<sobj> &in, Lattice<vobj> &out)
|
||||
{
|
||||
|
||||
typedef typename vobj::vector_type vtype;
|
||||
|
||||
GridBase* grid = out._grid;
|
||||
assert(in.size()==grid->lSites());
|
||||
|
||||
int ndim = grid->Nd();
|
||||
int nsimd = vtype::Nsimd();
|
||||
|
||||
std::vector<std::vector<int> > icoor(nsimd);
|
||||
|
||||
for(int lane=0; lane < nsimd; lane++){
|
||||
icoor[lane].resize(ndim);
|
||||
grid->iCoorFromIindex(icoor[lane],lane);
|
||||
}
|
||||
|
||||
parallel_for(uint64_t oidx = 0; oidx < grid->oSites(); oidx++){ //loop over outer index
|
||||
//Assemble vector of pointers to output elements
|
||||
std::vector<sobj*> ptrs(nsimd);
|
||||
|
||||
std::vector<int> ocoor(ndim);
|
||||
grid->oCoorFromOindex(ocoor, oidx);
|
||||
|
||||
std::vector<int> lcoor(grid->Nd());
|
||||
|
||||
for(int lane=0; lane < nsimd; lane++){
|
||||
|
||||
for(int mu=0;mu<ndim;mu++){
|
||||
lcoor[mu] = ocoor[mu] + grid->_rdimensions[mu]*icoor[lane][mu];
|
||||
}
|
||||
|
||||
int lex;
|
||||
Lexicographic::IndexFromCoorReversed(lcoor, lex, grid->_ldimensions);
|
||||
ptrs[lane] = &in[lex];
|
||||
}
|
||||
|
||||
//pack from those ptrs
|
||||
vobj vecobj;
|
||||
merge1(vecobj, ptrs, 0);
|
||||
out._odata[oidx] = vecobj;
|
||||
}
|
||||
}
|
||||
|
||||
//Convert a Lattice from one precision to another
|
||||
template<class VobjOut, class VobjIn>
|
||||
void precisionChange(Lattice<VobjOut> &out, const Lattice<VobjIn> &in){
|
||||
assert(out._grid->Nd() == in._grid->Nd());
|
||||
assert(out._grid->FullDimensions() == in._grid->FullDimensions());
|
||||
out.checkerboard = in.checkerboard;
|
||||
GridBase *in_grid=in._grid;
|
||||
GridBase *out_grid = out._grid;
|
||||
@ -694,30 +788,6 @@ void precisionChange(Lattice<VobjOut> &out, const Lattice<VobjIn> &in){
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Communicate between grids
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// All to all plan
|
||||
//
|
||||
// Subvolume on fine grid is v. Vectors a,b,c,d
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// SIMPLEST CASE:
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Mesh of nodes (2) ; subdivide to 1 subdivisions
|
||||
//
|
||||
// Lex ord:
|
||||
// N0 va0 vb0 N1 va1 vb1
|
||||
//
|
||||
// For each dimension do an all to all
|
||||
//
|
||||
// full AllToAll(0)
|
||||
// N0 va0 va1 N1 vb0 vb1
|
||||
//
|
||||
// REARRANGE
|
||||
// N0 va01 N1 vb01
|
||||
//
|
||||
// Must also rearrange data to get into the NEW lex order of grid at each stage. Some kind of "insert/extract".
|
||||
// NB: Easiest to programme if keep in lex order.
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// SIMPLE CASE:
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
@ -751,9 +821,17 @@ void precisionChange(Lattice<VobjOut> &out, const Lattice<VobjIn> &in){
|
||||
//
|
||||
// Must also rearrange data to get into the NEW lex order of grid at each stage. Some kind of "insert/extract".
|
||||
// NB: Easiest to programme if keep in lex order.
|
||||
//
|
||||
/////////////////////////////////////////////////////////
|
||||
|
||||
/*
|
||||
* Let chunk = (fvol*nvec)/sP be size of a chunk. ( Divide lexico vol * nvec into fP/sP = M chunks )
|
||||
*
|
||||
* 2nd A2A (over sP nodes; subdivide the fP into sP chunks of M)
|
||||
*
|
||||
* node 0 1st chunk of node 0M..(1M-1); 2nd chunk of node 0M..(1M-1).. data chunk x M x sP = fL / sP * M * sP = fL * M growth
|
||||
* node 1 1st chunk of node 1M..(2M-1); 2nd chunk of node 1M..(2M-1)..
|
||||
* node 2 1st chunk of node 2M..(3M-1); 2nd chunk of node 2M..(3M-1)..
|
||||
* node 3 1st chunk of node 3M..(3M-1); 2nd chunk of node 2M..(3M-1)..
|
||||
* etc...
|
||||
*/
|
||||
template<class Vobj>
|
||||
void Grid_split(std::vector<Lattice<Vobj> > & full,Lattice<Vobj> & split)
|
||||
{
|
||||
@ -812,57 +890,58 @@ void Grid_split(std::vector<Lattice<Vobj> > & full,Lattice<Vobj> & split)
|
||||
|
||||
int nvec = nvector; // Counts down to 1 as we collapse dims
|
||||
std::vector<int> ldims = full_grid->_ldimensions;
|
||||
std::vector<int> lcoor(ndim);
|
||||
|
||||
for(int d=ndim-1;d>=0;d--){
|
||||
|
||||
if ( ratio[d] != 1 ) {
|
||||
|
||||
full_grid ->AllToAll(d,alldata,tmpdata);
|
||||
// std::cout << GridLogMessage << "Grid_split: dim " <<d<<" ratio "<<ratio[d]<<" nvec "<<nvec<<" procs "<<split_grid->_processors[d]<<std::endl;
|
||||
// for(int v=0;v<nvec;v++){
|
||||
// std::cout << "Grid_split: alldata["<<v<<"] " << alldata[v] <<std::endl;
|
||||
// std::cout << "Grid_split: tmpdata["<<v<<"] " << tmpdata[v] <<std::endl;
|
||||
// }
|
||||
//////////////////////////////////////////
|
||||
//Local volume for this dimension is expanded by ratio of processor extents
|
||||
// Number of vectors is decreased by same factor
|
||||
// Rearrange to lexico for bigger volume
|
||||
//////////////////////////////////////////
|
||||
nvec /= ratio[d];
|
||||
if ( split_grid->_processors[d] > 1 ) {
|
||||
alldata=tmpdata;
|
||||
split_grid->AllToAll(d,alldata,tmpdata);
|
||||
}
|
||||
|
||||
auto rdims = ldims; rdims[d] *= ratio[d];
|
||||
auto rsites= lsites*ratio[d];
|
||||
for(int v=0;v<nvec;v++){
|
||||
auto rdims = ldims;
|
||||
auto M = ratio[d];
|
||||
auto rsites= lsites*M;// increases rsites by M
|
||||
nvec /= M; // Reduce nvec by subdivision factor
|
||||
rdims[d] *= M; // increase local dim by same factor
|
||||
|
||||
// For loop over each site within old subvol
|
||||
for(int lsite=0;lsite<lsites;lsite++){
|
||||
int sP = split_grid->_processors[d];
|
||||
int fP = full_grid->_processors[d];
|
||||
|
||||
Lexicographic::CoorFromIndex(lcoor, lsite, ldims);
|
||||
int fvol = lsites;
|
||||
|
||||
int chunk = (nvec*fvol)/sP; assert(chunk*sP == nvec*fvol);
|
||||
|
||||
for(int r=0;r<ratio[d];r++){ // ratio*nvec terms
|
||||
// Loop over reordered data post A2A
|
||||
parallel_for(int c=0;c<chunk;c++){
|
||||
std::vector<int> coor(ndim);
|
||||
for(int m=0;m<M;m++){
|
||||
for(int s=0;s<sP;s++){
|
||||
|
||||
// addressing; use lexico
|
||||
int lex_r;
|
||||
uint64_t lex_c = c+chunk*m+chunk*M*s;
|
||||
uint64_t lex_fvol_vec = c+chunk*s;
|
||||
uint64_t lex_fvol = lex_fvol_vec%fvol;
|
||||
uint64_t lex_vec = lex_fvol_vec/fvol;
|
||||
|
||||
auto rcoor = lcoor; rcoor[d] += r*ldims[d];
|
||||
// which node sets an adder to the coordinate
|
||||
Lexicographic::CoorFromIndex(coor, lex_fvol, ldims);
|
||||
coor[d] += m*ldims[d];
|
||||
Lexicographic::IndexFromCoor(coor, lex_r, rdims);
|
||||
lex_r += lex_vec * rsites;
|
||||
|
||||
int rsite; Lexicographic::IndexFromCoor(rcoor, rsite, rdims);
|
||||
rsite += v * rsites;
|
||||
// LexicoFind coordinate & vector number within split lattice
|
||||
alldata[lex_r] = tmpdata[lex_c];
|
||||
|
||||
int rmul=nvec*lsites;
|
||||
int vmul= lsites;
|
||||
alldata[rsite] = tmpdata[lsite+r*rmul+v*vmul];
|
||||
// if ( lsite==0 ) {
|
||||
// std::cout << "Grid_split: grow alldata["<<rsite<<"] " << alldata[rsite] << " <- tmpdata["<< lsite+r*rmul+v*vmul<<"] "<<tmpdata[lsite+r*rmul+v*vmul] <<std::endl;
|
||||
// }
|
||||
}
|
||||
}
|
||||
}
|
||||
ldims[d]*= ratio[d];
|
||||
lsites *= ratio[d];
|
||||
|
||||
if ( split_grid->_processors[d] > 1 ) {
|
||||
tmpdata = alldata;
|
||||
split_grid->AllToAll(d,tmpdata,alldata);
|
||||
}
|
||||
}
|
||||
}
|
||||
vectorizeFromLexOrdArray(alldata,split);
|
||||
@ -933,72 +1012,74 @@ void Grid_unsplit(std::vector<Lattice<Vobj> > & full,Lattice<Vobj> & split)
|
||||
/////////////////////////////////////////////////////////////////
|
||||
// Start from split grid and work towards full grid
|
||||
/////////////////////////////////////////////////////////////////
|
||||
std::vector<int> lcoor(ndim);
|
||||
std::vector<int> rcoor(ndim);
|
||||
|
||||
int nvec = 1;
|
||||
lsites = split_grid->lSites();
|
||||
std::vector<int> ldims = split_grid->_ldimensions;
|
||||
uint64_t rsites = split_grid->lSites();
|
||||
std::vector<int> rdims = split_grid->_ldimensions;
|
||||
|
||||
// for(int d=ndim-1;d>=0;d--){
|
||||
for(int d=0;d<ndim;d++){
|
||||
|
||||
if ( ratio[d] != 1 ) {
|
||||
|
||||
auto M = ratio[d];
|
||||
|
||||
if ( split_grid->_processors[d] > 1 ) {
|
||||
tmpdata = alldata;
|
||||
split_grid->AllToAll(d,tmpdata,alldata);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////
|
||||
//Local volume for this dimension is expanded by ratio of processor extents
|
||||
// Number of vectors is decreased by same factor
|
||||
// Rearrange to lexico for bigger volume
|
||||
//////////////////////////////////////////
|
||||
auto rsites= lsites/ratio[d];
|
||||
auto rdims = ldims; rdims[d]/=ratio[d];
|
||||
|
||||
for(int v=0;v<nvec;v++){
|
||||
|
||||
// rsite, rcoor --> smaller local volume
|
||||
// lsite, lcoor --> bigger original (single node?) volume
|
||||
// For loop over each site within smaller subvol
|
||||
for(int rsite=0;rsite<rsites;rsite++){
|
||||
|
||||
Lexicographic::CoorFromIndex(rcoor, rsite, rdims);
|
||||
int lsite;
|
||||
|
||||
for(int r=0;r<ratio[d];r++){
|
||||
|
||||
lcoor = rcoor; lcoor[d] += r*rdims[d];
|
||||
Lexicographic::IndexFromCoor(lcoor, lsite, ldims); lsite += v * lsites;
|
||||
|
||||
int rmul=nvec*rsites;
|
||||
int vmul= rsites;
|
||||
tmpdata[rsite+r*rmul+v*vmul]=alldata[lsite];
|
||||
int sP = split_grid->_processors[d];
|
||||
int fP = full_grid->_processors[d];
|
||||
|
||||
auto ldims = rdims; ldims[d] /= M; // Decrease local dims by same factor
|
||||
auto lsites= rsites/M; // Decreases rsites by M
|
||||
|
||||
int fvol = lsites;
|
||||
int chunk = (nvec*fvol)/sP; assert(chunk*sP == nvec*fvol);
|
||||
|
||||
{
|
||||
// Loop over reordered data post A2A
|
||||
parallel_for(int c=0;c<chunk;c++){
|
||||
std::vector<int> coor(ndim);
|
||||
for(int m=0;m<M;m++){
|
||||
for(int s=0;s<sP;s++){
|
||||
|
||||
// addressing; use lexico
|
||||
int lex_r;
|
||||
uint64_t lex_c = c+chunk*m+chunk*M*s;
|
||||
uint64_t lex_fvol_vec = c+chunk*s;
|
||||
uint64_t lex_fvol = lex_fvol_vec%fvol;
|
||||
uint64_t lex_vec = lex_fvol_vec/fvol;
|
||||
|
||||
// which node sets an adder to the coordinate
|
||||
Lexicographic::CoorFromIndex(coor, lex_fvol, ldims);
|
||||
coor[d] += m*ldims[d];
|
||||
Lexicographic::IndexFromCoor(coor, lex_r, rdims);
|
||||
lex_r += lex_vec * rsites;
|
||||
|
||||
// LexicoFind coordinate & vector number within split lattice
|
||||
tmpdata[lex_c] = alldata[lex_r];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
nvec *= ratio[d];
|
||||
ldims[d]=rdims[d];
|
||||
lsites =rsites;
|
||||
|
||||
if ( split_grid->_processors[d] > 1 ) {
|
||||
split_grid->AllToAll(d,tmpdata,alldata);
|
||||
tmpdata=alldata;
|
||||
}
|
||||
full_grid ->AllToAll(d,tmpdata,alldata);
|
||||
rdims[d]/= M;
|
||||
rsites /= M;
|
||||
nvec *= M; // Increase nvec by subdivision factor
|
||||
}
|
||||
}
|
||||
|
||||
lsites = full_grid->lSites();
|
||||
for(int v=0;v<nvector;v++){
|
||||
assert(v<full.size());
|
||||
// assert(v<full.size());
|
||||
parallel_for(int site=0;site<lsites;site++){
|
||||
// assert(v*lsites+site < alldata.size());
|
||||
scalardata[site] = alldata[v*lsites+site];
|
||||
}
|
||||
vectorizeFromLexOrdArray(scalardata,full[v]);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
#endif
|
||||
|
Reference in New Issue
Block a user