1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-04-04 11:15:55 +01:00
Grid/Grid/lattice/Lattice_transfer.h
Christopher Kelly 1ad54d049d To PeriodicBC and ConjugateBC, added a new function "CshiftLink" which performs a boundary-aware C-shift of links or products of links. For the latter, the links crossing the global boundary are complex-conjugated.
To the gauge implementations, added CshiftLink functions calling into the appropriate operation for the BC in a given direction.
GaugeTransform, FourierAcceleratedGaugeFixer and WilsonLoops::FieldStrength no longer implicitly assume periodic boundary conditions; instead the shifted link is obtained using CshiftLink and is aware of the gauge implementation.
Added an assert-check to ensure that the gauge fixing converges within the specified number of steps.
Added functionality to compute the timeslice averaged plaquette
Added functionality to compute the 5LI topological charge and timeslice topological charge
Added a check of the properties of the charge conjugation matrix C=-gamma_2 gamma_4 to Test_gamma
Fixed const correctness for Replicate
Modified Test_fft_gfix to support either conjugate or periodic BCs, optionally disabling Fourier-accelerated gauge fixing, and tuning of alpha using cmdline options
2022-06-02 15:30:41 -04:00

1427 lines
42 KiB
C++

/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/lattice/Lattice_transfer.h
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Christoph Lehner <christoph@lhnr.de>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#pragma once
NAMESPACE_BEGIN(Grid);
inline void subdivides(GridBase *coarse,GridBase *fine)
{
assert(coarse->_ndimension == fine->_ndimension);
int _ndimension = coarse->_ndimension;
// local and global volumes subdivide cleanly after SIMDization
for(int d=0;d<_ndimension;d++){
assert(coarse->_processors[d] == fine->_processors[d]);
assert(coarse->_simd_layout[d] == fine->_simd_layout[d]);
assert((fine->_rdimensions[d] / coarse->_rdimensions[d])* coarse->_rdimensions[d]==fine->_rdimensions[d]);
}
}
////////////////////////////////////////////////////////////////////////////////////////////
// remove and insert a half checkerboard
////////////////////////////////////////////////////////////////////////////////////////////
template<class vobj> inline void pickCheckerboard(int cb,Lattice<vobj> &half,const Lattice<vobj> &full)
{
half.Checkerboard() = cb;
autoView( half_v, half, CpuWrite);
autoView( full_v, full, CpuRead);
thread_for(ss, full.Grid()->oSites(),{
int cbos;
Coordinate coor;
full.Grid()->oCoorFromOindex(coor,ss);
cbos=half.Grid()->CheckerBoard(coor);
if (cbos==cb) {
int ssh=half.Grid()->oIndex(coor);
half_v[ssh] = full_v[ss];
}
});
}
template<class vobj> inline void setCheckerboard(Lattice<vobj> &full,const Lattice<vobj> &half)
{
int cb = half.Checkerboard();
autoView( half_v , half, CpuRead);
autoView( full_v , full, CpuWrite);
thread_for(ss,full.Grid()->oSites(),{
Coordinate coor;
int cbos;
full.Grid()->oCoorFromOindex(coor,ss);
cbos=half.Grid()->CheckerBoard(coor);
if (cbos==cb) {
int ssh=half.Grid()->oIndex(coor);
full_v[ss]=half_v[ssh];
}
});
}
template<class vobj> inline void acceleratorPickCheckerboard(int cb,Lattice<vobj> &half,const Lattice<vobj> &full, int checker_dim_half=0)
{
half.Checkerboard() = cb;
autoView(half_v, half, AcceleratorWrite);
autoView(full_v, full, AcceleratorRead);
Coordinate rdim_full = full.Grid()->_rdimensions;
Coordinate rdim_half = half.Grid()->_rdimensions;
unsigned long ndim_half = half.Grid()->_ndimension;
Coordinate checker_dim_mask_half = half.Grid()->_checker_dim_mask;
Coordinate ostride_half = half.Grid()->_ostride;
accelerator_for(ss, full.Grid()->oSites(),full.Grid()->Nsimd(),{
Coordinate coor;
int cbos;
int linear=0;
Lexicographic::CoorFromIndex(coor,ss,rdim_full);
assert(coor.size()==ndim_half);
for(int d=0;d<ndim_half;d++){
if(checker_dim_mask_half[d]) linear += coor[d];
}
cbos = (linear&0x1);
if (cbos==cb) {
int ssh=0;
for(int d=0;d<ndim_half;d++) {
if (d == checker_dim_half) ssh += ostride_half[d] * ((coor[d] / 2) % rdim_half[d]);
else ssh += ostride_half[d] * (coor[d] % rdim_half[d]);
}
coalescedWrite(half_v[ssh],full_v(ss));
}
});
}
template<class vobj> inline void acceleratorSetCheckerboard(Lattice<vobj> &full,const Lattice<vobj> &half, int checker_dim_half=0)
{
int cb = half.Checkerboard();
autoView(half_v , half, AcceleratorRead);
autoView(full_v , full, AcceleratorWrite);
Coordinate rdim_full = full.Grid()->_rdimensions;
Coordinate rdim_half = half.Grid()->_rdimensions;
unsigned long ndim_half = half.Grid()->_ndimension;
Coordinate checker_dim_mask_half = half.Grid()->_checker_dim_mask;
Coordinate ostride_half = half.Grid()->_ostride;
accelerator_for(ss,full.Grid()->oSites(),full.Grid()->Nsimd(),{
Coordinate coor;
int cbos;
int linear=0;
Lexicographic::CoorFromIndex(coor,ss,rdim_full);
assert(coor.size()==ndim_half);
for(int d=0;d<ndim_half;d++){
if(checker_dim_mask_half[d]) linear += coor[d];
}
cbos = (linear&0x1);
if (cbos==cb) {
int ssh=0;
for(int d=0;d<ndim_half;d++){
if (d == checker_dim_half) ssh += ostride_half[d] * ((coor[d] / 2) % rdim_half[d]);
else ssh += ostride_half[d] * (coor[d] % rdim_half[d]);
}
coalescedWrite(full_v[ss],half_v(ssh));
}
});
}
////////////////////////////////////////////////////////////////////////////////////////////
// Flexible Type Conversion for internal promotion to double as well as graceful
// treatment of scalar-compatible types
////////////////////////////////////////////////////////////////////////////////////////////
accelerator_inline void convertType(ComplexD & out, const std::complex<double> & in) {
out = in;
}
accelerator_inline void convertType(ComplexF & out, const std::complex<float> & in) {
out = in;
}
template<typename T>
accelerator_inline EnableIf<isGridFundamental<T>> convertType(T & out, const T & in) {
out = in;
}
// This would allow for conversions between GridFundamental types, but is not strictly needed as yet
/*template<typename T1, typename T2>
accelerator_inline typename std::enable_if<isGridFundamental<T1>::value && isGridFundamental<T2>::value>::type
// Or to make this very broad, conversions between anything that's not a GridTensor could be allowed
//accelerator_inline typename std::enable_if<!isGridTensor<T1>::value && !isGridTensor<T2>::value>::type
convertType(T1 & out, const T2 & in) {
out = in;
}*/
#ifdef GRID_SIMT
accelerator_inline void convertType(vComplexF & out, const ComplexF & in) {
((ComplexF*)&out)[acceleratorSIMTlane(vComplexF::Nsimd())] = in;
}
accelerator_inline void convertType(vComplexD & out, const ComplexD & in) {
((ComplexD*)&out)[acceleratorSIMTlane(vComplexD::Nsimd())] = in;
}
accelerator_inline void convertType(vComplexD2 & out, const ComplexD & in) {
((ComplexD*)&out)[acceleratorSIMTlane(vComplexD::Nsimd()*2)] = in;
}
#endif
accelerator_inline void convertType(vComplexF & out, const vComplexD2 & in) {
out.v = Optimization::PrecisionChange::DtoS(in._internal[0].v,in._internal[1].v);
}
accelerator_inline void convertType(vComplexD2 & out, const vComplexF & in) {
Optimization::PrecisionChange::StoD(in.v,out._internal[0].v,out._internal[1].v);
}
template<typename T1,typename T2>
accelerator_inline void convertType(iScalar<T1> & out, const iScalar<T2> & in) {
convertType(out._internal,in._internal);
}
template<typename T1,typename T2>
accelerator_inline NotEnableIf<isGridScalar<T1>> convertType(T1 & out, const iScalar<T2> & in) {
convertType(out,in._internal);
}
template<typename T1,typename T2>
accelerator_inline NotEnableIf<isGridScalar<T2>> convertType(iScalar<T1> & out, const T2 & in) {
convertType(out._internal,in);
}
template<typename T1,typename T2,int N>
accelerator_inline void convertType(iMatrix<T1,N> & out, const iMatrix<T2,N> & in) {
for (int i=0;i<N;i++)
for (int j=0;j<N;j++)
convertType(out._internal[i][j],in._internal[i][j]);
}
template<typename T1,typename T2,int N>
accelerator_inline void convertType(iVector<T1,N> & out, const iVector<T2,N> & in) {
for (int i=0;i<N;i++)
convertType(out._internal[i],in._internal[i]);
}
template<typename T1,typename T2>
accelerator_inline void convertType(Lattice<T1> & out, const Lattice<T2> & in) {
autoView( out_v , out,AcceleratorWrite);
autoView( in_v , in ,AcceleratorRead);
accelerator_for(ss,out_v.size(),T1::Nsimd(),{
convertType(out_v[ss],in_v(ss));
});
}
////////////////////////////////////////////////////////////////////////////////////////////
// precision-promoted local inner product
////////////////////////////////////////////////////////////////////////////////////////////
template<class vobj>
inline auto localInnerProductD(const Lattice<vobj> &lhs,const Lattice<vobj> &rhs)
-> Lattice<iScalar<decltype(TensorRemove(innerProductD2(lhs.View(CpuRead)[0],rhs.View(CpuRead)[0])))>>
{
autoView( lhs_v , lhs, AcceleratorRead);
autoView( rhs_v , rhs, AcceleratorRead);
typedef decltype(TensorRemove(innerProductD2(lhs_v[0],rhs_v[0]))) t_inner;
Lattice<iScalar<t_inner>> ret(lhs.Grid());
{
autoView(ret_v, ret,AcceleratorWrite);
accelerator_for(ss,rhs_v.size(),vobj::Nsimd(),{
convertType(ret_v[ss],innerProductD2(lhs_v(ss),rhs_v(ss)));
});
}
return ret;
}
////////////////////////////////////////////////////////////////////////////////////////////
// block routines
////////////////////////////////////////////////////////////////////////////////////////////
template<class vobj,class CComplex,int nbasis,class VLattice>
inline void blockProject(Lattice<iVector<CComplex,nbasis > > &coarseData,
const Lattice<vobj> &fineData,
const VLattice &Basis)
{
GridBase * fine = fineData.Grid();
GridBase * coarse= coarseData.Grid();
Lattice<iScalar<CComplex>> ip(coarse);
Lattice<vobj> fineDataRed = fineData;
autoView( coarseData_ , coarseData, AcceleratorWrite);
autoView( ip_ , ip, AcceleratorWrite);
for(int v=0;v<nbasis;v++) {
blockInnerProductD(ip,Basis[v],fineDataRed); // ip = <basis|fine>
accelerator_for( sc, coarse->oSites(), vobj::Nsimd(), {
convertType(coarseData_[sc](v),ip_[sc]);
});
// improve numerical stability of projection
// |fine> = |fine> - <basis|fine> |basis>
ip=-ip;
blockZAXPY(fineDataRed,ip,Basis[v],fineDataRed);
}
}
template<class vobj,class vobj2,class CComplex>
inline void blockZAXPY(Lattice<vobj> &fineZ,
const Lattice<CComplex> &coarseA,
const Lattice<vobj2> &fineX,
const Lattice<vobj> &fineY)
{
GridBase * fine = fineZ.Grid();
GridBase * coarse= coarseA.Grid();
fineZ.Checkerboard()=fineX.Checkerboard();
assert(fineX.Checkerboard()==fineY.Checkerboard());
subdivides(coarse,fine); // require they map
conformable(fineX,fineY);
conformable(fineX,fineZ);
int _ndimension = coarse->_ndimension;
Coordinate block_r (_ndimension);
// FIXME merge with subdivide checking routine as this is redundant
for(int d=0 ; d<_ndimension;d++){
block_r[d] = fine->_rdimensions[d] / coarse->_rdimensions[d];
assert(block_r[d]*coarse->_rdimensions[d]==fine->_rdimensions[d]);
}
autoView( fineZ_ , fineZ, AcceleratorWrite);
autoView( fineX_ , fineX, AcceleratorRead);
autoView( fineY_ , fineY, AcceleratorRead);
autoView( coarseA_, coarseA, AcceleratorRead);
Coordinate fine_rdimensions = fine->_rdimensions;
Coordinate coarse_rdimensions = coarse->_rdimensions;
accelerator_for(sf, fine->oSites(), CComplex::Nsimd(), {
int sc;
Coordinate coor_c(_ndimension);
Coordinate coor_f(_ndimension);
Lexicographic::CoorFromIndex(coor_f,sf,fine_rdimensions);
for(int d=0;d<_ndimension;d++) coor_c[d]=coor_f[d]/block_r[d];
Lexicographic::IndexFromCoor(coor_c,sc,coarse_rdimensions);
// z = A x + y
#ifdef GRID_SIMT
typename vobj2::tensor_reduced::scalar_object cA;
typename vobj::scalar_object cAx;
#else
typename vobj2::tensor_reduced cA;
vobj cAx;
#endif
convertType(cA,TensorRemove(coarseA_(sc)));
auto prod = cA*fineX_(sf);
convertType(cAx,prod);
coalescedWrite(fineZ_[sf],cAx+fineY_(sf));
});
return;
}
template<class vobj,class CComplex>
inline void blockInnerProductD(Lattice<CComplex> &CoarseInner,
const Lattice<vobj> &fineX,
const Lattice<vobj> &fineY)
{
typedef iScalar<decltype(TensorRemove(innerProductD2(vobj(),vobj())))> dotp;
GridBase *coarse(CoarseInner.Grid());
GridBase *fine (fineX.Grid());
Lattice<dotp> fine_inner(fine); fine_inner.Checkerboard() = fineX.Checkerboard();
Lattice<dotp> coarse_inner(coarse);
// Precision promotion
fine_inner = localInnerProductD<vobj>(fineX,fineY);
blockSum(coarse_inner,fine_inner);
{
autoView( CoarseInner_ , CoarseInner,AcceleratorWrite);
autoView( coarse_inner_ , coarse_inner,AcceleratorRead);
accelerator_for(ss, coarse->oSites(), 1, {
convertType(CoarseInner_[ss], TensorRemove(coarse_inner_[ss]));
});
}
}
template<class vobj,class CComplex> // deprecate
inline void blockInnerProduct(Lattice<CComplex> &CoarseInner,
const Lattice<vobj> &fineX,
const Lattice<vobj> &fineY)
{
typedef decltype(innerProduct(vobj(),vobj())) dotp;
GridBase *coarse(CoarseInner.Grid());
GridBase *fine (fineX.Grid());
Lattice<dotp> fine_inner(fine); fine_inner.Checkerboard() = fineX.Checkerboard();
Lattice<dotp> coarse_inner(coarse);
// Precision promotion?
fine_inner = localInnerProduct(fineX,fineY);
blockSum(coarse_inner,fine_inner);
{
autoView( CoarseInner_ , CoarseInner, AcceleratorWrite);
autoView( coarse_inner_ , coarse_inner, AcceleratorRead);
accelerator_for(ss, coarse->oSites(), 1, {
CoarseInner_[ss] = coarse_inner_[ss];
});
}
}
template<class vobj,class CComplex>
inline void blockNormalise(Lattice<CComplex> &ip,Lattice<vobj> &fineX)
{
GridBase *coarse = ip.Grid();
Lattice<vobj> zz(fineX.Grid()); zz=Zero(); zz.Checkerboard()=fineX.Checkerboard();
blockInnerProduct(ip,fineX,fineX);
ip = pow(ip,-0.5);
blockZAXPY(fineX,ip,fineX,zz);
}
// useful in multigrid project;
// Generic name : Coarsen?
template<class vobj>
inline void blockSum(Lattice<vobj> &coarseData,const Lattice<vobj> &fineData)
{
GridBase * fine = fineData.Grid();
GridBase * coarse= coarseData.Grid();
subdivides(coarse,fine); // require they map
int _ndimension = coarse->_ndimension;
Coordinate block_r (_ndimension);
for(int d=0 ; d<_ndimension;d++){
block_r[d] = fine->_rdimensions[d] / coarse->_rdimensions[d];
}
int blockVol = fine->oSites()/coarse->oSites();
// Turn this around to loop threaded over sc and interior loop
// over sf would thread better
autoView( coarseData_ , coarseData, AcceleratorWrite);
autoView( fineData_ , fineData, AcceleratorRead);
auto coarseData_p = &coarseData_[0];
auto fineData_p = &fineData_[0];
Coordinate fine_rdimensions = fine->_rdimensions;
Coordinate coarse_rdimensions = coarse->_rdimensions;
vobj zz = Zero();
accelerator_for(sc,coarse->oSites(),1,{
// One thread per sub block
Coordinate coor_c(_ndimension);
Lexicographic::CoorFromIndex(coor_c,sc,coarse_rdimensions); // Block coordinate
vobj cd = zz;
for(int sb=0;sb<blockVol;sb++){
int sf;
Coordinate coor_b(_ndimension);
Coordinate coor_f(_ndimension);
Lexicographic::CoorFromIndex(coor_b,sb,block_r); // Block sub coordinate
for(int d=0;d<_ndimension;d++) coor_f[d]=coor_c[d]*block_r[d] + coor_b[d];
Lexicographic::IndexFromCoor(coor_f,sf,fine_rdimensions);
cd=cd+fineData_p[sf];
}
coarseData_p[sc] = cd;
});
return;
}
template<class vobj>
inline void blockPick(GridBase *coarse,const Lattice<vobj> &unpicked,Lattice<vobj> &picked,Coordinate coor)
{
GridBase * fine = unpicked.Grid();
Lattice<vobj> zz(fine); zz.Checkerboard() = unpicked.Checkerboard();
Lattice<iScalar<vInteger> > fcoor(fine);
zz = Zero();
picked = unpicked;
for(int d=0;d<fine->_ndimension;d++){
LatticeCoordinate(fcoor,d);
int block= fine->_rdimensions[d] / coarse->_rdimensions[d];
int lo = (coor[d])*block;
int hi = (coor[d]+1)*block;
picked = where( (fcoor<hi) , picked, zz);
picked = where( (fcoor>=lo), picked, zz);
}
}
template<class CComplex,class VLattice>
inline void blockOrthonormalize(Lattice<CComplex> &ip,VLattice &Basis)
{
GridBase *coarse = ip.Grid();
GridBase *fine = Basis[0].Grid();
int nbasis = Basis.size() ;
// checks
subdivides(coarse,fine);
for(int i=0;i<nbasis;i++){
conformable(Basis[i].Grid(),fine);
}
for(int v=0;v<nbasis;v++) {
for(int u=0;u<v;u++) {
//Inner product & remove component
blockInnerProductD(ip,Basis[u],Basis[v]);
ip = -ip;
blockZAXPY(Basis[v],ip,Basis[u],Basis[v]);
}
blockNormalise(ip,Basis[v]);
}
}
template<class vobj,class CComplex>
inline void blockOrthogonalise(Lattice<CComplex> &ip,std::vector<Lattice<vobj> > &Basis) // deprecated inaccurate naming
{
blockOrthonormalize(ip,Basis);
}
#if 0
// TODO: CPU optimized version here
template<class vobj,class CComplex,int nbasis>
inline void blockPromote(const Lattice<iVector<CComplex,nbasis > > &coarseData,
Lattice<vobj> &fineData,
const std::vector<Lattice<vobj> > &Basis)
{
GridBase * fine = fineData.Grid();
GridBase * coarse= coarseData.Grid();
int _ndimension = coarse->_ndimension;
// checks
assert( nbasis == Basis.size() );
subdivides(coarse,fine);
for(int i=0;i<nbasis;i++){
conformable(Basis[i].Grid(),fine);
}
Coordinate block_r (_ndimension);
for(int d=0 ; d<_ndimension;d++){
block_r[d] = fine->_rdimensions[d] / coarse->_rdimensions[d];
}
autoView( fineData_ , fineData, AcceleratorWrite);
autoView( coarseData_ , coarseData, AcceleratorRead);
// Loop with a cache friendly loop ordering
accelerator_for(sf,fine->oSites(),1,{
int sc;
Coordinate coor_c(_ndimension);
Coordinate coor_f(_ndimension);
Lexicographic::CoorFromIndex(coor_f,sf,fine->_rdimensions);
for(int d=0;d<_ndimension;d++) coor_c[d]=coor_f[d]/block_r[d];
Lexicographic::IndexFromCoor(coor_c,sc,coarse->_rdimensions);
for(int i=0;i<nbasis;i++) {
/* auto basis_ = Basis[i], );*/
if(i==0) fineData_[sf]=coarseData_[sc](i) *basis_[sf]);
else fineData_[sf]=fineData_[sf]+coarseData_[sc](i)*basis_[sf]);
}
});
return;
}
#else
template<class vobj,class CComplex,int nbasis,class VLattice>
inline void blockPromote(const Lattice<iVector<CComplex,nbasis > > &coarseData,
Lattice<vobj> &fineData,
const VLattice &Basis)
{
GridBase * fine = fineData.Grid();
GridBase * coarse= coarseData.Grid();
fineData=Zero();
for(int i=0;i<nbasis;i++) {
Lattice<iScalar<CComplex> > ip = PeekIndex<0>(coarseData,i);
//Lattice<CComplex> cip(coarse);
//autoView( cip_ , cip, AcceleratorWrite);
//autoView( ip_ , ip, AcceleratorRead);
//accelerator_forNB(sc,coarse->oSites(),CComplex::Nsimd(),{
// coalescedWrite(cip_[sc], ip_(sc)());
// });
//blockZAXPY<vobj,CComplex >(fineData,cip,Basis[i],fineData);
blockZAXPY(fineData,ip,Basis[i],fineData);
}
}
#endif
// Useful for precision conversion, or indeed anything where an operator= does a conversion on scalars.
// Simd layouts need not match since we use peek/poke Local
template<class vobj,class vvobj>
void localConvert(const Lattice<vobj> &in,Lattice<vvobj> &out)
{
typedef typename vobj::scalar_object sobj;
typedef typename vvobj::scalar_object ssobj;
GridBase *ig = in.Grid();
GridBase *og = out.Grid();
int ni = ig->_ndimension;
int no = og->_ndimension;
assert(ni == no);
for(int d=0;d<no;d++){
assert(ig->_processors[d] == og->_processors[d]);
assert(ig->_ldimensions[d] == og->_ldimensions[d]);
assert(ig->lSites() == og->lSites());
}
autoView(in_v,in,CpuRead);
autoView(out_v,out,CpuWrite);
thread_for(idx, ig->lSites(),{
sobj s;
ssobj ss;
Coordinate lcoor(ni);
ig->LocalIndexToLocalCoor(idx,lcoor);
peekLocalSite(s,in_v,lcoor);
ss=s;
pokeLocalSite(ss,out_v,lcoor);
});
}
template<class vobj>
void localCopyRegion(const Lattice<vobj> &From,Lattice<vobj> & To,Coordinate FromLowerLeft, Coordinate ToLowerLeft, Coordinate RegionSize)
{
typedef typename vobj::scalar_object sobj;
typedef typename vobj::scalar_type scalar_type;
typedef typename vobj::vector_type vector_type;
static const int words=sizeof(vobj)/sizeof(vector_type);
GridBase *Fg = From.Grid();
GridBase *Tg = To.Grid();
assert(!Fg->_isCheckerBoarded);
assert(!Tg->_isCheckerBoarded);
int Nsimd = Fg->Nsimd();
int nF = Fg->_ndimension;
int nT = Tg->_ndimension;
int nd = nF;
assert(nF == nT);
for(int d=0;d<nd;d++){
assert(Fg->_processors[d] == Tg->_processors[d]);
}
// the above should guarantee that the operations are local
Coordinate ldf = Fg->_ldimensions;
Coordinate rdf = Fg->_rdimensions;
Coordinate isf = Fg->_istride;
Coordinate osf = Fg->_ostride;
Coordinate rdt = Tg->_rdimensions;
Coordinate ist = Tg->_istride;
Coordinate ost = Tg->_ostride;
autoView( t_v , To, AcceleratorWrite);
autoView( f_v , From, AcceleratorRead);
accelerator_for(idx,Fg->lSites(),1,{
sobj s;
Coordinate Fcoor(nd);
Coordinate Tcoor(nd);
Lexicographic::CoorFromIndex(Fcoor,idx,ldf);
int in_region=1;
for(int d=0;d<nd;d++){
if ( (Fcoor[d] < FromLowerLeft[d]) || (Fcoor[d]>=FromLowerLeft[d]+RegionSize[d]) ){
in_region=0;
}
Tcoor[d] = ToLowerLeft[d]+ Fcoor[d]-FromLowerLeft[d];
}
if (in_region) {
Integer idx_f = 0; for(int d=0;d<nd;d++) idx_f+=isf[d]*(Fcoor[d]/rdf[d]);
Integer idx_t = 0; for(int d=0;d<nd;d++) idx_t+=ist[d]*(Tcoor[d]/rdt[d]);
Integer odx_f = 0; for(int d=0;d<nd;d++) odx_f+=osf[d]*(Fcoor[d]%rdf[d]);
Integer odx_t = 0; for(int d=0;d<nd;d++) odx_t+=ost[d]*(Tcoor[d]%rdt[d]);
scalar_type * fp = (scalar_type *)&f_v[odx_f];
scalar_type * tp = (scalar_type *)&t_v[odx_t];
for(int w=0;w<words;w++){
tp[idx_t+w*Nsimd] = fp[idx_f+w*Nsimd]; // FIXME IF RRII layout, type pun no worke
}
}
});
}
template<class vobj>
void InsertSlice(const Lattice<vobj> &lowDim,Lattice<vobj> & higherDim,int slice, int orthog)
{
typedef typename vobj::scalar_object sobj;
GridBase *lg = lowDim.Grid();
GridBase *hg = higherDim.Grid();
int nl = lg->_ndimension;
int nh = hg->_ndimension;
assert(nl+1 == nh);
assert(orthog<nh);
assert(orthog>=0);
assert(hg->_processors[orthog]==1);
int dl; dl = 0;
for(int d=0;d<nh;d++){
if ( d != orthog) {
assert(lg->_processors[dl] == hg->_processors[d]);
assert(lg->_ldimensions[dl] == hg->_ldimensions[d]);
dl++;
}
}
// the above should guarantee that the operations are local
autoView(lowDimv,lowDim,CpuRead);
autoView(higherDimv,higherDim,CpuWrite);
thread_for(idx,lg->lSites(),{
sobj s;
Coordinate lcoor(nl);
Coordinate hcoor(nh);
lg->LocalIndexToLocalCoor(idx,lcoor);
int ddl=0;
hcoor[orthog] = slice;
for(int d=0;d<nh;d++){
if ( d!=orthog ) {
hcoor[d]=lcoor[ddl++];
}
}
peekLocalSite(s,lowDimv,lcoor);
pokeLocalSite(s,higherDimv,hcoor);
});
}
template<class vobj>
void ExtractSlice(Lattice<vobj> &lowDim,const Lattice<vobj> & higherDim,int slice, int orthog)
{
typedef typename vobj::scalar_object sobj;
GridBase *lg = lowDim.Grid();
GridBase *hg = higherDim.Grid();
int nl = lg->_ndimension;
int nh = hg->_ndimension;
assert(nl+1 == nh);
assert(orthog<nh);
assert(orthog>=0);
assert(hg->_processors[orthog]==1);
int dl; dl = 0;
for(int d=0;d<nh;d++){
if ( d != orthog) {
assert(lg->_processors[dl] == hg->_processors[d]);
assert(lg->_ldimensions[dl] == hg->_ldimensions[d]);
dl++;
}
}
// the above should guarantee that the operations are local
autoView(lowDimv,lowDim,CpuWrite);
autoView(higherDimv,higherDim,CpuRead);
thread_for(idx,lg->lSites(),{
sobj s;
Coordinate lcoor(nl);
Coordinate hcoor(nh);
lg->LocalIndexToLocalCoor(idx,lcoor);
int ddl=0;
hcoor[orthog] = slice;
for(int d=0;d<nh;d++){
if ( d!=orthog ) {
hcoor[d]=lcoor[ddl++];
}
}
peekLocalSite(s,higherDimv,hcoor);
pokeLocalSite(s,lowDimv,lcoor);
});
}
template<class vobj>
void InsertSliceLocal(const Lattice<vobj> &lowDim, Lattice<vobj> & higherDim,int slice_lo,int slice_hi, int orthog)
{
typedef typename vobj::scalar_object sobj;
GridBase *lg = lowDim.Grid();
GridBase *hg = higherDim.Grid();
int nl = lg->_ndimension;
int nh = hg->_ndimension;
assert(nl == nh);
assert(orthog<nh);
assert(orthog>=0);
for(int d=0;d<nh;d++){
if ( d!=orthog ) {
assert(lg->_processors[d] == hg->_processors[d]);
assert(lg->_ldimensions[d] == hg->_ldimensions[d]);
}
}
// the above should guarantee that the operations are local
autoView(lowDimv,lowDim,CpuRead);
autoView(higherDimv,higherDim,CpuWrite);
thread_for(idx,lg->lSites(),{
sobj s;
Coordinate lcoor(nl);
Coordinate hcoor(nh);
lg->LocalIndexToLocalCoor(idx,lcoor);
if( lcoor[orthog] == slice_lo ) {
hcoor=lcoor;
hcoor[orthog] = slice_hi;
peekLocalSite(s,lowDimv,lcoor);
pokeLocalSite(s,higherDimv,hcoor);
}
});
}
template<class vobj>
void ExtractSliceLocal(Lattice<vobj> &lowDim,const Lattice<vobj> & higherDim,int slice_lo,int slice_hi, int orthog)
{
typedef typename vobj::scalar_object sobj;
GridBase *lg = lowDim.Grid();
GridBase *hg = higherDim.Grid();
int nl = lg->_ndimension;
int nh = hg->_ndimension;
assert(nl == nh);
assert(orthog<nh);
assert(orthog>=0);
for(int d=0;d<nh;d++){
if ( d!=orthog ) {
assert(lg->_processors[d] == hg->_processors[d]);
assert(lg->_ldimensions[d] == hg->_ldimensions[d]);
}
}
// the above should guarantee that the operations are local
autoView(lowDimv,lowDim,CpuWrite);
autoView(higherDimv,higherDim,CpuRead);
thread_for(idx,lg->lSites(),{
sobj s;
Coordinate lcoor(nl);
Coordinate hcoor(nh);
lg->LocalIndexToLocalCoor(idx,lcoor);
if( lcoor[orthog] == slice_lo ) {
hcoor=lcoor;
hcoor[orthog] = slice_hi;
peekLocalSite(s,higherDimv,hcoor);
pokeLocalSite(s,lowDimv,lcoor);
}
});
}
template<class vobj>
void Replicate(const Lattice<vobj> &coarse,Lattice<vobj> & fine)
{
typedef typename vobj::scalar_object sobj;
GridBase *cg = coarse.Grid();
GridBase *fg = fine.Grid();
int nd = cg->_ndimension;
subdivides(cg,fg);
assert(cg->_ndimension==fg->_ndimension);
Coordinate ratio(cg->_ndimension);
for(int d=0;d<cg->_ndimension;d++){
ratio[d] = fg->_fdimensions[d]/cg->_fdimensions[d];
}
Coordinate fcoor(nd);
Coordinate ccoor(nd);
for(int g=0;g<fg->gSites();g++){
fg->GlobalIndexToGlobalCoor(g,fcoor);
for(int d=0;d<nd;d++){
ccoor[d] = fcoor[d]%cg->_gdimensions[d];
}
sobj tmp;
peekSite(tmp,coarse,ccoor);
pokeSite(tmp,fine,fcoor);
}
}
//Copy SIMD-vectorized lattice to array of scalar objects in lexicographic order
template<typename vobj, typename sobj>
typename std::enable_if<isSIMDvectorized<vobj>::value && !isSIMDvectorized<sobj>::value, void>::type
unvectorizeToLexOrdArray(std::vector<sobj> &out, const Lattice<vobj> &in)
{
typedef typename vobj::vector_type vtype;
GridBase* in_grid = in.Grid();
out.resize(in_grid->lSites());
int ndim = in_grid->Nd();
int in_nsimd = vtype::Nsimd();
std::vector<Coordinate > in_icoor(in_nsimd);
for(int lane=0; lane < in_nsimd; lane++){
in_icoor[lane].resize(ndim);
in_grid->iCoorFromIindex(in_icoor[lane], lane);
}
//loop over outer index
autoView( in_v , in, CpuRead);
thread_for(in_oidx,in_grid->oSites(),{
//Assemble vector of pointers to output elements
ExtractPointerArray<sobj> out_ptrs(in_nsimd);
Coordinate in_ocoor(ndim);
in_grid->oCoorFromOindex(in_ocoor, in_oidx);
Coordinate lcoor(in_grid->Nd());
for(int lane=0; lane < in_nsimd; lane++){
for(int mu=0;mu<ndim;mu++){
lcoor[mu] = in_ocoor[mu] + in_grid->_rdimensions[mu]*in_icoor[lane][mu];
}
int lex;
Lexicographic::IndexFromCoor(lcoor, lex, in_grid->_ldimensions);
assert(lex < out.size());
out_ptrs[lane] = &out[lex];
}
//Unpack into those ptrs
const vobj & in_vobj = in_v[in_oidx];
extract(in_vobj, out_ptrs, 0);
});
}
template<typename vobj, typename sobj>
typename std::enable_if<isSIMDvectorized<vobj>::value && !isSIMDvectorized<sobj>::value, void>::type
unvectorizeToRevLexOrdArray(std::vector<sobj> &out, const Lattice<vobj> &in)
{
typedef typename vobj::vector_type vtype;
GridBase* in_grid = in._grid;
out.resize(in_grid->lSites());
int ndim = in_grid->Nd();
int in_nsimd = vtype::Nsimd();
std::vector<Coordinate > in_icoor(in_nsimd);
for(int lane=0; lane < in_nsimd; lane++){
in_icoor[lane].resize(ndim);
in_grid->iCoorFromIindex(in_icoor[lane], lane);
}
thread_for(in_oidx, in_grid->oSites(),{
//Assemble vector of pointers to output elements
std::vector<sobj*> out_ptrs(in_nsimd);
Coordinate in_ocoor(ndim);
in_grid->oCoorFromOindex(in_ocoor, in_oidx);
Coordinate lcoor(in_grid->Nd());
for(int lane=0; lane < in_nsimd; lane++){
for(int mu=0;mu<ndim;mu++)
lcoor[mu] = in_ocoor[mu] + in_grid->_rdimensions[mu]*in_icoor[lane][mu];
int lex;
Lexicographic::IndexFromCoorReversed(lcoor, lex, in_grid->_ldimensions);
out_ptrs[lane] = &out[lex];
}
//Unpack into those ptrs
const vobj & in_vobj = in._odata[in_oidx];
extract1(in_vobj, out_ptrs, 0);
});
}
//Copy SIMD-vectorized lattice to array of scalar objects in lexicographic order
template<typename vobj, typename sobj>
typename std::enable_if<isSIMDvectorized<vobj>::value
&& !isSIMDvectorized<sobj>::value, void>::type
vectorizeFromLexOrdArray( std::vector<sobj> &in, Lattice<vobj> &out)
{
typedef typename vobj::vector_type vtype;
GridBase* grid = out.Grid();
assert(in.size()==grid->lSites());
const int ndim = grid->Nd();
constexpr int nsimd = vtype::Nsimd();
std::vector<Coordinate > icoor(nsimd);
for(int lane=0; lane < nsimd; lane++){
icoor[lane].resize(ndim);
grid->iCoorFromIindex(icoor[lane],lane);
}
autoView( out_v , out, CpuWrite);
thread_for(oidx, grid->oSites(),{
//Assemble vector of pointers to output elements
ExtractPointerArray<sobj> ptrs(nsimd);
Coordinate ocoor(ndim);
Coordinate lcoor(ndim);
grid->oCoorFromOindex(ocoor, oidx);
for(int lane=0; lane < nsimd; lane++){
for(int mu=0;mu<ndim;mu++){
lcoor[mu] = ocoor[mu] + grid->_rdimensions[mu]*icoor[lane][mu];
}
int lex;
Lexicographic::IndexFromCoor(lcoor, lex, grid->_ldimensions);
ptrs[lane] = &in[lex];
}
//pack from those ptrs
vobj vecobj;
merge(vecobj, ptrs, 0);
out_v[oidx] = vecobj;
});
}
template<typename vobj, typename sobj>
typename std::enable_if<isSIMDvectorized<vobj>::value
&& !isSIMDvectorized<sobj>::value, void>::type
vectorizeFromRevLexOrdArray( std::vector<sobj> &in, Lattice<vobj> &out)
{
typedef typename vobj::vector_type vtype;
GridBase* grid = out._grid;
assert(in.size()==grid->lSites());
int ndim = grid->Nd();
int nsimd = vtype::Nsimd();
std::vector<Coordinate > icoor(nsimd);
for(int lane=0; lane < nsimd; lane++){
icoor[lane].resize(ndim);
grid->iCoorFromIindex(icoor[lane],lane);
}
thread_for(oidx, grid->oSites(), {
//Assemble vector of pointers to output elements
std::vector<sobj*> ptrs(nsimd);
Coordinate ocoor(ndim);
grid->oCoorFromOindex(ocoor, oidx);
Coordinate lcoor(grid->Nd());
for(int lane=0; lane < nsimd; lane++){
for(int mu=0;mu<ndim;mu++){
lcoor[mu] = ocoor[mu] + grid->_rdimensions[mu]*icoor[lane][mu];
}
int lex;
Lexicographic::IndexFromCoorReversed(lcoor, lex, grid->_ldimensions);
ptrs[lane] = &in[lex];
}
//pack from those ptrs
vobj vecobj;
merge1(vecobj, ptrs, 0);
out._odata[oidx] = vecobj;
});
}
//Convert a Lattice from one precision to another
template<class VobjOut, class VobjIn>
void precisionChange(Lattice<VobjOut> &out, const Lattice<VobjIn> &in)
{
assert(out.Grid()->Nd() == in.Grid()->Nd());
for(int d=0;d<out.Grid()->Nd();d++){
assert(out.Grid()->FullDimensions()[d] == in.Grid()->FullDimensions()[d]);
}
out.Checkerboard() = in.Checkerboard();
GridBase *in_grid=in.Grid();
GridBase *out_grid = out.Grid();
typedef typename VobjOut::scalar_object SobjOut;
typedef typename VobjIn::scalar_object SobjIn;
int ndim = out.Grid()->Nd();
int out_nsimd = out_grid->Nsimd();
std::vector<Coordinate > out_icoor(out_nsimd);
for(int lane=0; lane < out_nsimd; lane++){
out_icoor[lane].resize(ndim);
out_grid->iCoorFromIindex(out_icoor[lane], lane);
}
std::vector<SobjOut> in_slex_conv(in_grid->lSites());
unvectorizeToLexOrdArray(in_slex_conv, in);
autoView( out_v , out, CpuWrite);
thread_for(out_oidx,out_grid->oSites(),{
Coordinate out_ocoor(ndim);
out_grid->oCoorFromOindex(out_ocoor, out_oidx);
ExtractPointerArray<SobjOut> ptrs(out_nsimd);
Coordinate lcoor(out_grid->Nd());
for(int lane=0; lane < out_nsimd; lane++){
for(int mu=0;mu<ndim;mu++)
lcoor[mu] = out_ocoor[mu] + out_grid->_rdimensions[mu]*out_icoor[lane][mu];
int llex; Lexicographic::IndexFromCoor(lcoor, llex, out_grid->_ldimensions);
ptrs[lane] = &in_slex_conv[llex];
}
merge(out_v[out_oidx], ptrs, 0);
});
}
////////////////////////////////////////////////////////////////////////////////
// Communicate between grids
////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////////////
// SIMPLE CASE:
///////////////////////////////////////////////////////////////////////////////////////////////////////////
//
// Mesh of nodes (2x2) ; subdivide to 1x1 subdivisions
//
// Lex ord:
// N0 va0 vb0 vc0 vd0 N1 va1 vb1 vc1 vd1
// N2 va2 vb2 vc2 vd2 N3 va3 vb3 vc3 vd3
//
// Ratio = full[dim] / split[dim]
//
// For each dimension do an all to all; get Nvec -> Nvec / ratio
// Ldim -> Ldim * ratio
// LocalVol -> LocalVol * ratio
// full AllToAll(0)
// N0 va0 vb0 va1 vb1 N1 vc0 vd0 vc1 vd1
// N2 va2 vb2 va3 vb3 N3 vc2 vd2 vc3 vd3
//
// REARRANGE
// N0 va01 vb01 N1 vc01 vd01
// N2 va23 vb23 N3 vc23 vd23
//
// full AllToAll(1) // Not what is wanted. FIXME
// N0 va01 va23 N1 vc01 vc23
// N2 vb01 vb23 N3 vd01 vd23
//
// REARRANGE
// N0 va0123 N1 vc0123
// N2 vb0123 N3 vd0123
//
// Must also rearrange data to get into the NEW lex order of grid at each stage. Some kind of "insert/extract".
// NB: Easiest to programme if keep in lex order.
/*
* Let chunk = (fvol*nvec)/sP be size of a chunk. ( Divide lexico vol * nvec into fP/sP = M chunks )
*
* 2nd A2A (over sP nodes; subdivide the fP into sP chunks of M)
*
* node 0 1st chunk of node 0M..(1M-1); 2nd chunk of node 0M..(1M-1).. data chunk x M x sP = fL / sP * M * sP = fL * M growth
* node 1 1st chunk of node 1M..(2M-1); 2nd chunk of node 1M..(2M-1)..
* node 2 1st chunk of node 2M..(3M-1); 2nd chunk of node 2M..(3M-1)..
* node 3 1st chunk of node 3M..(3M-1); 2nd chunk of node 2M..(3M-1)..
* etc...
*/
template<class Vobj>
void Grid_split(std::vector<Lattice<Vobj> > & full,Lattice<Vobj> & split)
{
typedef typename Vobj::scalar_object Sobj;
int full_vecs = full.size();
assert(full_vecs>=1);
GridBase * full_grid = full[0].Grid();
GridBase *split_grid = split.Grid();
int ndim = full_grid->_ndimension;
int full_nproc = full_grid->_Nprocessors;
int split_nproc =split_grid->_Nprocessors;
////////////////////////////////
// Checkerboard management
////////////////////////////////
int cb = full[0].Checkerboard();
split.Checkerboard() = cb;
//////////////////////////////
// Checks
//////////////////////////////
assert(full_grid->_ndimension==split_grid->_ndimension);
for(int n=0;n<full_vecs;n++){
assert(full[n].Checkerboard() == cb);
for(int d=0;d<ndim;d++){
assert(full[n].Grid()->_gdimensions[d]==split.Grid()->_gdimensions[d]);
assert(full[n].Grid()->_fdimensions[d]==split.Grid()->_fdimensions[d]);
}
}
int nvector =full_nproc/split_nproc;
assert(nvector*split_nproc==full_nproc);
assert(nvector == full_vecs);
Coordinate ratio(ndim);
for(int d=0;d<ndim;d++){
ratio[d] = full_grid->_processors[d]/ split_grid->_processors[d];
}
uint64_t lsites = full_grid->lSites();
uint64_t sz = lsites * nvector;
std::vector<Sobj> tmpdata(sz);
std::vector<Sobj> alldata(sz);
std::vector<Sobj> scalardata(lsites);
for(int v=0;v<nvector;v++){
unvectorizeToLexOrdArray(scalardata,full[v]);
thread_for(site,lsites,{
alldata[v*lsites+site] = scalardata[site];
});
}
int nvec = nvector; // Counts down to 1 as we collapse dims
Coordinate ldims = full_grid->_ldimensions;
for(int d=ndim-1;d>=0;d--){
if ( ratio[d] != 1 ) {
full_grid ->AllToAll(d,alldata,tmpdata);
if ( split_grid->_processors[d] > 1 ) {
alldata=tmpdata;
split_grid->AllToAll(d,alldata,tmpdata);
}
auto rdims = ldims;
auto M = ratio[d];
auto rsites= lsites*M;// increases rsites by M
nvec /= M; // Reduce nvec by subdivision factor
rdims[d] *= M; // increase local dim by same factor
int sP = split_grid->_processors[d];
int fP = full_grid->_processors[d];
int fvol = lsites;
int chunk = (nvec*fvol)/sP; assert(chunk*sP == nvec*fvol);
// Loop over reordered data post A2A
thread_for(c, chunk, {
Coordinate coor(ndim);
for(int m=0;m<M;m++){
for(int s=0;s<sP;s++){
// addressing; use lexico
int lex_r;
uint64_t lex_c = c+chunk*m+chunk*M*s;
uint64_t lex_fvol_vec = c+chunk*s;
uint64_t lex_fvol = lex_fvol_vec%fvol;
uint64_t lex_vec = lex_fvol_vec/fvol;
// which node sets an adder to the coordinate
Lexicographic::CoorFromIndex(coor, lex_fvol, ldims);
coor[d] += m*ldims[d];
Lexicographic::IndexFromCoor(coor, lex_r, rdims);
lex_r += lex_vec * rsites;
// LexicoFind coordinate & vector number within split lattice
alldata[lex_r] = tmpdata[lex_c];
}
}
});
ldims[d]*= ratio[d];
lsites *= ratio[d];
}
}
vectorizeFromLexOrdArray(alldata,split);
}
template<class Vobj>
void Grid_split(Lattice<Vobj> &full,Lattice<Vobj> & split)
{
int nvector = full.Grid()->_Nprocessors / split.Grid()->_Nprocessors;
std::vector<Lattice<Vobj> > full_v(nvector,full.Grid());
for(int n=0;n<nvector;n++){
full_v[n] = full;
}
Grid_split(full_v,split);
}
template<class Vobj>
void Grid_unsplit(std::vector<Lattice<Vobj> > & full,Lattice<Vobj> & split)
{
typedef typename Vobj::scalar_object Sobj;
int full_vecs = full.size();
assert(full_vecs>=1);
GridBase * full_grid = full[0].Grid();
GridBase *split_grid = split.Grid();
int ndim = full_grid->_ndimension;
int full_nproc = full_grid->_Nprocessors;
int split_nproc =split_grid->_Nprocessors;
////////////////////////////////
// Checkerboard management
////////////////////////////////
int cb = full[0].Checkerboard();
split.Checkerboard() = cb;
//////////////////////////////
// Checks
//////////////////////////////
assert(full_grid->_ndimension==split_grid->_ndimension);
for(int n=0;n<full_vecs;n++){
assert(full[n].Checkerboard() == cb);
for(int d=0;d<ndim;d++){
assert(full[n].Grid()->_gdimensions[d]==split.Grid()->_gdimensions[d]);
assert(full[n].Grid()->_fdimensions[d]==split.Grid()->_fdimensions[d]);
}
}
int nvector =full_nproc/split_nproc;
assert(nvector*split_nproc==full_nproc);
assert(nvector == full_vecs);
Coordinate ratio(ndim);
for(int d=0;d<ndim;d++){
ratio[d] = full_grid->_processors[d]/ split_grid->_processors[d];
}
uint64_t lsites = full_grid->lSites();
uint64_t sz = lsites * nvector;
std::vector<Sobj> tmpdata(sz);
std::vector<Sobj> alldata(sz);
std::vector<Sobj> scalardata(lsites);
unvectorizeToLexOrdArray(alldata,split);
/////////////////////////////////////////////////////////////////
// Start from split grid and work towards full grid
/////////////////////////////////////////////////////////////////
int nvec = 1;
uint64_t rsites = split_grid->lSites();
Coordinate rdims = split_grid->_ldimensions;
for(int d=0;d<ndim;d++){
if ( ratio[d] != 1 ) {
auto M = ratio[d];
int sP = split_grid->_processors[d];
int fP = full_grid->_processors[d];
auto ldims = rdims; ldims[d] /= M; // Decrease local dims by same factor
auto lsites= rsites/M; // Decreases rsites by M
int fvol = lsites;
int chunk = (nvec*fvol)/sP; assert(chunk*sP == nvec*fvol);
{
// Loop over reordered data post A2A
thread_for(c, chunk,{
Coordinate coor(ndim);
for(int m=0;m<M;m++){
for(int s=0;s<sP;s++){
// addressing; use lexico
int lex_r;
uint64_t lex_c = c+chunk*m+chunk*M*s;
uint64_t lex_fvol_vec = c+chunk*s;
uint64_t lex_fvol = lex_fvol_vec%fvol;
uint64_t lex_vec = lex_fvol_vec/fvol;
// which node sets an adder to the coordinate
Lexicographic::CoorFromIndex(coor, lex_fvol, ldims);
coor[d] += m*ldims[d];
Lexicographic::IndexFromCoor(coor, lex_r, rdims);
lex_r += lex_vec * rsites;
// LexicoFind coordinate & vector number within split lattice
tmpdata[lex_c] = alldata[lex_r];
}
}
});
}
if ( split_grid->_processors[d] > 1 ) {
split_grid->AllToAll(d,tmpdata,alldata);
tmpdata=alldata;
}
full_grid ->AllToAll(d,tmpdata,alldata);
rdims[d]/= M;
rsites /= M;
nvec *= M; // Increase nvec by subdivision factor
}
}
lsites = full_grid->lSites();
for(int v=0;v<nvector;v++){
thread_for(site, lsites,{
scalardata[site] = alldata[v*lsites+site];
});
vectorizeFromLexOrdArray(scalardata,full[v]);
}
}
NAMESPACE_END(Grid);