1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-11-10 07:55:35 +00:00

Slice summation working. May move this into lattice/Grid_lattice_reduction however

This commit is contained in:
Peter Boyle 2015-04-23 15:13:00 +01:00
parent b7416d79e3
commit 62e8d2d127
7 changed files with 181 additions and 72 deletions

View File

@ -98,6 +98,7 @@ public:
#include <lattice/Grid_lattice_coordinate.h>
#include <lattice/Grid_lattice_rng.h>
#include <lattice/Grid_lattice_transfer.h>
#include <Grid_summation.h>

View File

@ -126,16 +126,16 @@ namespace Grid {
/////////////////////////////////////////////////////////////////
// Generic extract/merge/permute
/////////////////////////////////////////////////////////////////
template<class vsimd,class scalar>
inline void Gextract(const vsimd &y,std::vector<scalar *> &extracted){
// FIXME: bounce off stack is painful
// temporary hack while I figure out better way.
// There are intrinsics to do this work without the storage.
// FIXME: bounce off memory is painful
int Nextr=extracted.size();
int Nsimd=vsimd::Nsimd();
int s=Nsimd/Nextr;
std::vector<scalar,alignedAllocator<scalar> > buf(Nsimd);
vstore(y,&buf[0]);
for(int i=0;i<Nextr;i++){
*extracted[i] = buf[i*s];
@ -143,6 +143,19 @@ inline void Gextract(const vsimd &y,std::vector<scalar *> &extracted){
}
};
template<class vsimd,class scalar>
inline void Gextract(const vsimd &y,std::vector<scalar> &extracted){
int Nextr=extracted.size();
int Nsimd=vsimd::Nsimd();
int s=Nsimd/Nextr;
std::vector<scalar,alignedAllocator<scalar> > buf(Nsimd);
vstore(y,&buf[0]);
for(int i=0;i<Nextr;i++){
extracted[i] = buf[i*s];
}
};
template<class vsimd,class scalar>
inline void Gmerge(vsimd &y,std::vector<scalar *> &extracted){
int Nextr=extracted.size();
int Nsimd=vsimd::Nsimd();
@ -158,23 +171,6 @@ inline void Gmerge(vsimd &y,std::vector<scalar *> &extracted){
vset(y,&buf[0]);
};
template<class vsimd,class scalar>
inline void Gextract(const vsimd &y,std::vector<scalar> &extracted){
// FIXME: bounce off stack is painful
// temporary hack while I figure out better way.
// There are intrinsics to do this work without the storage.
int Nextr=extracted.size();
int Nsimd=vsimd::Nsimd();
int s=Nsimd/Nextr;
std::vector<scalar,alignedAllocator<scalar> > buf(Nsimd);
vstore(y,&buf[0]);
for(int i=0;i<Nextr;i++){
extracted[i] = buf[i*s];
}
};
template<class vsimd,class scalar>
inline void Gmerge(vsimd &y,std::vector<scalar> &extracted){
int Nextr=extracted.size();
int Nsimd=vsimd::Nsimd();

View File

@ -1,7 +1,8 @@
#ifndef GRID_SUMMATION_H
#define GRID_SUMMATION_H
namespace Grid {
void subdivides(GridBase *coarse,GridBase *fine)
inline void subdivides(GridBase *coarse,GridBase *fine)
{
assert(coarse->_ndimension == fine->_ndimension);
@ -9,58 +10,43 @@ void subdivides(GridBase *coarse,GridBase *fine)
// local and global volumes subdivide cleanly after SIMDization
for(int d=0;d<_ndimension;d++){
assert(coarse->_processors[d] == fine->_processors[d]);
assert(coarse->_simd_layout[d] == fine->_simd_layout[d]);
assert((fine->_rdimensions[d] / coarse->_rdimensions[d])* coarse->_rdimensions[d]==fine->_rdimensions[d]);
assert((fine->_ldimensions[d] / coarse->_ldimensions[d])* coarse->_ldimensions[d]==fine->_ldimensions[d]);
assert((fine->_gdimensions[d] / coarse->_gdimensions[d])* coarse->_gdimensions[d]==fine->_gdimensions[d]);
assert((fine->_fdimensions[d] / coarse->_fdimensions[d])* coarse->_fdimensions[d]==fine->_fdimensions[d]);
}
}
// useful in multigrid project;
// Generic name : Coarsen?
// : SubMeshSum?
//
template<class vobj>
inline void sumBlocks(Lattice<vobj> &coarseData,const Lattice<vobj> &fineData)
{
GridBase * fine = findData._grid;
GridBase * coarse= findData._grid;
GridBase * fine = fineData._grid;
GridBase * coarse= coarseData._grid;
subdivides(coars,fine); // require they map
subdivides(coarse,fine); // require they map
int _ndimension = coarse->_ndimension;
std::vector<bool> replicated(_ndimension,false);
std::vector<int> block_r (_dimension);
std::vector<int> block_f (_dimension);
std::vector<int> block_r (_ndimension);
///////////////////////////////////////////////////////////
// Detect whether the result is replicated in dimension d
///////////////////////////////////////////////////////////
for(int d=0 ; d<_ndimension;d++){
if ( (_fdimensions[d] == 1) && (coarse->_processors[d]>1) ) {
replicated[d]=true;
}
block_r[d] = fine->_rdimensions[d] / coarse->_rdimensions[d];
block_l[d] = fine->_ldimensions[d] / coarse->_ldimensions[d];
block_f[d] = fine->_fdimensions[d] / coarse->_fdimensions[d];
}
coaseData=zero;
//FIXME Bagel's strategy: loop over fine sites
// identify corresponding coarse site, but coarse sites are
// divided across threads. Not so easy to do in openmp but
// there must be a way
coarseData=zero;
for(int sf=0;sf<fine->oSites();sf++){
int sc;
vobj sum=zero;
std::vector<int> coor_c(_ndimension);
std::vector<int> coor_f(_ndimension);
GridBase::CoorFromIndex(coor_f,sf,fine->_rdimensions);
for(int d=0;d<_ndimension;d++) coor_c[d]=coor_f[d]/fine->_rdimensions;
for(int d=0;d<_ndimension;d++) coor_c[d]=coor_f[d]/block_r[d];
GridBase::IndexFromCoor(coor_c,sc,coarse->_rdimensions);
coarseData._odata[sc]=coarseData._odata[sc]+fineData._odata[sf];
@ -68,4 +54,75 @@ inline void sumBlocks(Lattice<vobj> &coarseData,const Lattice<vobj> &fineData)
}
return;
}
template<class vobj> inline void sliceSum(const Lattice<vobj> &Data,std::vector<typename vobj::scalar_object> &result,int orthogdim)
{
typedef typename vobj::scalar_object sobj;
GridBase *grid = Data._grid;
const int Nd = grid->_ndimension;
const int Nsimd = grid->Nsimd();
assert(orthogdim >= 0);
assert(orthogdim < Nd);
int fd=grid->_fdimensions[orthogdim];
int ld=grid->_ldimensions[orthogdim];
int rd=grid->_rdimensions[orthogdim];
std::vector<vobj,alignedAllocator<vobj> > lvSum(rd); // will locally sum vectors first
std::vector<sobj> lsSum(ld,sobj(zero)); // sum across these down to scalars
std::vector<sobj> extracted(Nsimd); // splitting the SIMD
result.resize(fd); // And then global sum to return the same vector to every node for IO to file
for(int r=0;r<rd;r++){
lvSum[r]=zero;
}
std::vector<int> coor(Nd);
// sum over reduced dimension planes, breaking out orthog dir
for(int ss=0;ss<grid->oSites();ss++){
GridBase::CoorFromIndex(coor,ss,grid->_rdimensions);
int r = coor[orthogdim];
lvSum[r]=lvSum[r]+Data._odata[ss];
}
// Sum across simd lanes in the plane, breaking out orthog dir.
std::vector<int> icoor(Nd);
for(int rt=0;rt<rd;rt++){
extract(lvSum[rt],extracted);
for(int idx=0;idx<Nsimd;idx++){
grid->iCoorFromIindex(icoor,idx);
int ldx =rt+icoor[orthogdim]*rd;
lsSum[ldx]=lsSum[ldx]+extracted[idx];
}
}
// sum over nodes.
sobj gsum;
for(int t=0;t<fd;t++){
int pt = t/ld; // processor plane
int lt = t%ld;
if ( pt == grid->_processor_coor[orthogdim] ) {
gsum=lsSum[lt];
} else {
gsum=zero;
}
grid->GlobalSum(gsum);
result[t]=gsum;
}
}
}
#endif

View File

@ -93,7 +93,7 @@ public:
static inline void CoorFromIndex (std::vector<int>& coor,int index,std::vector<int> &dims){
int nd= dims.size();
coor.resize(nd);
for(int d=0;d<_ndimension;d++){
for(int d=0;d<nd;d++){
coor[d] = index % dims[d];
index = index / dims[d];
}
@ -102,7 +102,7 @@ public:
int nd=dims.size();
int stride=1;
index=0;
for(int d=0;d<_ndimension;d++){
for(int d=0;d<nd;d++){
index = index+stride*coor[d];
stride=stride*dims[d];
}
@ -164,6 +164,25 @@ public:
mult*=_gdimensions[mu];
}
}
void GlobalCoorToProcessorCoorLocalCoor(std::vector<int> &pcoor,std::vector<int> &lcoor,const std::vector<int> &gcoor)
{
pcoor.resize(_ndimension);
lcoor.resize(_ndimension);
for(int mu=0;mu<_ndimension;mu++){
pcoor[mu] = gcoor[mu]/_ldimensions[mu];
lcoor[mu] = gcoor[mu]%_ldimensions[mu];
}
}
void GlobalCoorToRankIndex(int &rank, int &o_idx, int &i_idx ,const std::vector<int> &gcoor)
{
std::vector<int> pcoor;
std::vector<int> lcoor;
GlobalCoorToProcessorCoorLocalCoor(pcoor,lcoor,gcoor);
rank = RankFromProcessorCoor(pcoor);
i_idx= iIndex(lcoor);
o_idx= oIndex(lcoor);
}
void RankIndexToGlobalCoor(int rank, int o_idx, int i_idx , std::vector<int> &gcoor)
{
gcoor.resize(_ndimension);
@ -191,24 +210,6 @@ public:
gcoor.resize(_ndimension);
for(int mu=0;mu<_ndimension;mu++) gcoor[mu] = Pcoor[mu]*_ldimensions[mu]+Lcoor[mu];
}
void GlobalCoorToProcessorCoorLocalCoor(std::vector<int> &pcoor,std::vector<int> &lcoor,const std::vector<int> &gcoor)
{
pcoor.resize(_ndimension);
lcoor.resize(_ndimension);
for(int mu=0;mu<_ndimension;mu++){
pcoor[mu] = gcoor[mu]/_ldimensions[mu];
lcoor[mu] = gcoor[mu]%_ldimensions[mu];
}
}
void GlobalCoorToRankIndex(int &rank, int &o_idx, int &i_idx ,const std::vector<int> &gcoor)
{
std::vector<int> pcoor;
std::vector<int> lcoor;
GlobalCoorToProcessorCoorLocalCoor(pcoor,lcoor,gcoor);
rank = RankFromProcessorCoor(pcoor);
i_idx= iIndex(lcoor);
o_idx= oIndex(lcoor);
}
};

View File

@ -25,7 +25,9 @@ namespace Grid {
}
template<class vobj>
inline auto innerProduct(const Lattice<vobj> &left,const Lattice<vobj> &right) ->decltype(innerProduct(left._odata[0],right._odata[0]))
inline ComplexD innerProduct(const Lattice<vobj> &left,const Lattice<vobj> &right)
// inline auto innerProduct(const Lattice<vobj> &left,const Lattice<vobj> &right)
//->decltype(innerProduct(left._odata[0],right._odata[0]))
{
typedef typename vobj::scalar_type scalar;
decltype(innerProduct(left._odata[0],right._odata[0])) vnrm=zero;
@ -54,6 +56,7 @@ namespace Grid {
vsum=zero;
ssum=zero;
//FIXME make this loop parallelisable
for(int ss=0;ss<arg._grid->oSites(); ss++){
vsum = vsum + arg._odata[ss];
}

View File

@ -241,5 +241,37 @@ public:
};
template<class vobj> inline
void extract(const vobj &vec,std::vector<typename vobj::scalar_object> &extracted)
{
typedef typename vobj::scalar_type scalar_type ;
typedef typename vobj::vector_type vector_type ;
int Nsimd=vobj::vector_type::Nsimd();
extracted.resize(Nsimd);
std::vector<scalar_type *> pointers(Nsimd);
for(int i=0;i<Nsimd;i++)
pointers[i] =(scalar_type *)& extracted[i];
extract(vec,pointers);
}
template<class vobj> inline
void merge(vobj &vec,std::vector<typename vobj::scalar_object> &extracted)
{
typedef typename vobj::scalar_type scalar_type ;
typedef typename vobj::vector_type vector_type ;
int Nsimd=vobj::vector_type::Nsimd();
assert(extracted.size()==Nsimd);
std::vector<scalar_type *> pointers(Nsimd);
for(int i=0;i<Nsimd;i++)
pointers[i] =(scalar_type *)& extracted[i];
merge(vec,pointers);
}
}
#endif

View File

@ -11,8 +11,10 @@ int main (int argc, char ** argv)
Grid_init(&argc,&argv);
std::vector<int> simd_layout({1,1,2,2});
std::vector<int> mpi_layout ({2,1,1,2});
std::vector<int> mpi_layout ({1,1,1,1});
std::vector<int> latt_size ({16,16,16,32});
int orthodir=3;
int orthosz =latt_size[orthodir];
GridCartesian Fine(latt_size,simd_layout,mpi_layout);
GridRNG FineRNG(&Fine);
@ -45,14 +47,31 @@ int main (int argc, char ** argv)
}
}
double vol = Fine.gSites();
Complex PlaqScale(1.0/vol/6.0/3.0);
std::vector<TComplex> Plaq_T(orthosz);
sliceSum(Plaq,Plaq_T,Nd-1);
int Nt = Plaq_T.size();
TComplex Plaq_T_sum=zero;
for(int t=0;t<Nt;t++){
Plaq_T_sum = Plaq_T_sum+Plaq_T[t];
Complex Pt=TensorRemove(Plaq_T[t]);
std::cout << "sliced ["<<t<<"]" <<Pt*PlaqScale*Real(Nt)<<std::endl;
}
{
Complex Pt = TensorRemove(Plaq_T_sum);
std::cout << "total " <<Pt*PlaqScale<<std::endl;
}
TComplex Tp = sum(Plaq);
Complex p = TensorRemove(Tp);
std::cout << "calculated plaquettes " <<p*PlaqScale<<std::endl;
Complex LinkTraceScale(1.0/vol/4.0/3.0);
TComplex Tl = sum(LinkTrace);
Complex l = TensorRemove(Tl);