mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-10 07:55:35 +00:00
Slice summation working. May move this into lattice/Grid_lattice_reduction however
This commit is contained in:
parent
4d2198ea56
commit
52a6ba9767
@ -98,6 +98,7 @@ public:
|
||||
#include <lattice/Grid_lattice_coordinate.h>
|
||||
#include <lattice/Grid_lattice_rng.h>
|
||||
#include <lattice/Grid_lattice_transfer.h>
|
||||
#include <Grid_summation.h>
|
||||
|
||||
|
||||
|
||||
|
@ -126,16 +126,16 @@ namespace Grid {
|
||||
/////////////////////////////////////////////////////////////////
|
||||
// Generic extract/merge/permute
|
||||
/////////////////////////////////////////////////////////////////
|
||||
|
||||
template<class vsimd,class scalar>
|
||||
inline void Gextract(const vsimd &y,std::vector<scalar *> &extracted){
|
||||
// FIXME: bounce off stack is painful
|
||||
// temporary hack while I figure out better way.
|
||||
// There are intrinsics to do this work without the storage.
|
||||
// FIXME: bounce off memory is painful
|
||||
int Nextr=extracted.size();
|
||||
int Nsimd=vsimd::Nsimd();
|
||||
int s=Nsimd/Nextr;
|
||||
|
||||
std::vector<scalar,alignedAllocator<scalar> > buf(Nsimd);
|
||||
|
||||
vstore(y,&buf[0]);
|
||||
for(int i=0;i<Nextr;i++){
|
||||
*extracted[i] = buf[i*s];
|
||||
@ -143,6 +143,19 @@ inline void Gextract(const vsimd &y,std::vector<scalar *> &extracted){
|
||||
}
|
||||
};
|
||||
template<class vsimd,class scalar>
|
||||
inline void Gextract(const vsimd &y,std::vector<scalar> &extracted){
|
||||
int Nextr=extracted.size();
|
||||
int Nsimd=vsimd::Nsimd();
|
||||
int s=Nsimd/Nextr;
|
||||
|
||||
std::vector<scalar,alignedAllocator<scalar> > buf(Nsimd);
|
||||
|
||||
vstore(y,&buf[0]);
|
||||
for(int i=0;i<Nextr;i++){
|
||||
extracted[i] = buf[i*s];
|
||||
}
|
||||
};
|
||||
template<class vsimd,class scalar>
|
||||
inline void Gmerge(vsimd &y,std::vector<scalar *> &extracted){
|
||||
int Nextr=extracted.size();
|
||||
int Nsimd=vsimd::Nsimd();
|
||||
@ -158,23 +171,6 @@ inline void Gmerge(vsimd &y,std::vector<scalar *> &extracted){
|
||||
vset(y,&buf[0]);
|
||||
};
|
||||
template<class vsimd,class scalar>
|
||||
inline void Gextract(const vsimd &y,std::vector<scalar> &extracted){
|
||||
// FIXME: bounce off stack is painful
|
||||
// temporary hack while I figure out better way.
|
||||
// There are intrinsics to do this work without the storage.
|
||||
int Nextr=extracted.size();
|
||||
int Nsimd=vsimd::Nsimd();
|
||||
int s=Nsimd/Nextr;
|
||||
|
||||
std::vector<scalar,alignedAllocator<scalar> > buf(Nsimd);
|
||||
|
||||
vstore(y,&buf[0]);
|
||||
|
||||
for(int i=0;i<Nextr;i++){
|
||||
extracted[i] = buf[i*s];
|
||||
}
|
||||
};
|
||||
template<class vsimd,class scalar>
|
||||
inline void Gmerge(vsimd &y,std::vector<scalar> &extracted){
|
||||
int Nextr=extracted.size();
|
||||
int Nsimd=vsimd::Nsimd();
|
||||
|
@ -1,7 +1,8 @@
|
||||
#ifndef GRID_SUMMATION_H
|
||||
#define GRID_SUMMATION_H
|
||||
namespace Grid {
|
||||
|
||||
void subdivides(GridBase *coarse,GridBase *fine)
|
||||
inline void subdivides(GridBase *coarse,GridBase *fine)
|
||||
{
|
||||
assert(coarse->_ndimension == fine->_ndimension);
|
||||
|
||||
@ -9,58 +10,43 @@ void subdivides(GridBase *coarse,GridBase *fine)
|
||||
|
||||
// local and global volumes subdivide cleanly after SIMDization
|
||||
for(int d=0;d<_ndimension;d++){
|
||||
assert(coarse->_processors[d] == fine->_processors[d]);
|
||||
assert(coarse->_simd_layout[d] == fine->_simd_layout[d]);
|
||||
assert((fine->_rdimensions[d] / coarse->_rdimensions[d])* coarse->_rdimensions[d]==fine->_rdimensions[d]);
|
||||
assert((fine->_ldimensions[d] / coarse->_ldimensions[d])* coarse->_ldimensions[d]==fine->_ldimensions[d]);
|
||||
assert((fine->_gdimensions[d] / coarse->_gdimensions[d])* coarse->_gdimensions[d]==fine->_gdimensions[d]);
|
||||
assert((fine->_fdimensions[d] / coarse->_fdimensions[d])* coarse->_fdimensions[d]==fine->_fdimensions[d]);
|
||||
}
|
||||
}
|
||||
|
||||
// useful in multigrid project;
|
||||
// Generic name : Coarsen?
|
||||
// : SubMeshSum?
|
||||
//
|
||||
template<class vobj>
|
||||
inline void sumBlocks(Lattice<vobj> &coarseData,const Lattice<vobj> &fineData)
|
||||
{
|
||||
GridBase * fine = findData._grid;
|
||||
GridBase * coarse= findData._grid;
|
||||
GridBase * fine = fineData._grid;
|
||||
GridBase * coarse= coarseData._grid;
|
||||
|
||||
subdivides(coars,fine); // require they map
|
||||
subdivides(coarse,fine); // require they map
|
||||
|
||||
int _ndimension = coarse->_ndimension;
|
||||
|
||||
std::vector<bool> replicated(_ndimension,false);
|
||||
std::vector<int> block_r (_dimension);
|
||||
std::vector<int> block_f (_dimension);
|
||||
|
||||
std::vector<int> block_r (_ndimension);
|
||||
|
||||
///////////////////////////////////////////////////////////
|
||||
// Detect whether the result is replicated in dimension d
|
||||
///////////////////////////////////////////////////////////
|
||||
|
||||
for(int d=0 ; d<_ndimension;d++){
|
||||
if ( (_fdimensions[d] == 1) && (coarse->_processors[d]>1) ) {
|
||||
replicated[d]=true;
|
||||
}
|
||||
block_r[d] = fine->_rdimensions[d] / coarse->_rdimensions[d];
|
||||
block_l[d] = fine->_ldimensions[d] / coarse->_ldimensions[d];
|
||||
block_f[d] = fine->_fdimensions[d] / coarse->_fdimensions[d];
|
||||
}
|
||||
|
||||
coaseData=zero;
|
||||
|
||||
//FIXME Bagel's strategy: loop over fine sites
|
||||
// identify corresponding coarse site, but coarse sites are
|
||||
// divided across threads. Not so easy to do in openmp but
|
||||
// there must be a way
|
||||
coarseData=zero;
|
||||
for(int sf=0;sf<fine->oSites();sf++){
|
||||
|
||||
int sc;
|
||||
vobj sum=zero;
|
||||
std::vector<int> coor_c(_ndimension);
|
||||
std::vector<int> coor_f(_ndimension);
|
||||
|
||||
GridBase::CoorFromIndex(coor_f,sf,fine->_rdimensions);
|
||||
|
||||
for(int d=0;d<_ndimension;d++) coor_c[d]=coor_f[d]/fine->_rdimensions;
|
||||
|
||||
for(int d=0;d<_ndimension;d++) coor_c[d]=coor_f[d]/block_r[d];
|
||||
GridBase::IndexFromCoor(coor_c,sc,coarse->_rdimensions);
|
||||
|
||||
coarseData._odata[sc]=coarseData._odata[sc]+fineData._odata[sf];
|
||||
@ -68,4 +54,75 @@ inline void sumBlocks(Lattice<vobj> &coarseData,const Lattice<vobj> &fineData)
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
template<class vobj> inline void sliceSum(const Lattice<vobj> &Data,std::vector<typename vobj::scalar_object> &result,int orthogdim)
|
||||
{
|
||||
typedef typename vobj::scalar_object sobj;
|
||||
|
||||
GridBase *grid = Data._grid;
|
||||
const int Nd = grid->_ndimension;
|
||||
const int Nsimd = grid->Nsimd();
|
||||
|
||||
assert(orthogdim >= 0);
|
||||
assert(orthogdim < Nd);
|
||||
|
||||
int fd=grid->_fdimensions[orthogdim];
|
||||
int ld=grid->_ldimensions[orthogdim];
|
||||
int rd=grid->_rdimensions[orthogdim];
|
||||
|
||||
std::vector<vobj,alignedAllocator<vobj> > lvSum(rd); // will locally sum vectors first
|
||||
std::vector<sobj> lsSum(ld,sobj(zero)); // sum across these down to scalars
|
||||
std::vector<sobj> extracted(Nsimd); // splitting the SIMD
|
||||
|
||||
result.resize(fd); // And then global sum to return the same vector to every node for IO to file
|
||||
for(int r=0;r<rd;r++){
|
||||
lvSum[r]=zero;
|
||||
}
|
||||
|
||||
std::vector<int> coor(Nd);
|
||||
|
||||
// sum over reduced dimension planes, breaking out orthog dir
|
||||
for(int ss=0;ss<grid->oSites();ss++){
|
||||
GridBase::CoorFromIndex(coor,ss,grid->_rdimensions);
|
||||
int r = coor[orthogdim];
|
||||
lvSum[r]=lvSum[r]+Data._odata[ss];
|
||||
}
|
||||
|
||||
// Sum across simd lanes in the plane, breaking out orthog dir.
|
||||
std::vector<int> icoor(Nd);
|
||||
|
||||
for(int rt=0;rt<rd;rt++){
|
||||
|
||||
extract(lvSum[rt],extracted);
|
||||
|
||||
for(int idx=0;idx<Nsimd;idx++){
|
||||
|
||||
grid->iCoorFromIindex(icoor,idx);
|
||||
|
||||
int ldx =rt+icoor[orthogdim]*rd;
|
||||
|
||||
lsSum[ldx]=lsSum[ldx]+extracted[idx];
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
// sum over nodes.
|
||||
sobj gsum;
|
||||
for(int t=0;t<fd;t++){
|
||||
int pt = t/ld; // processor plane
|
||||
int lt = t%ld;
|
||||
if ( pt == grid->_processor_coor[orthogdim] ) {
|
||||
gsum=lsSum[lt];
|
||||
} else {
|
||||
gsum=zero;
|
||||
}
|
||||
|
||||
grid->GlobalSum(gsum);
|
||||
|
||||
result[t]=gsum;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
#endif
|
||||
|
@ -93,7 +93,7 @@ public:
|
||||
static inline void CoorFromIndex (std::vector<int>& coor,int index,std::vector<int> &dims){
|
||||
int nd= dims.size();
|
||||
coor.resize(nd);
|
||||
for(int d=0;d<_ndimension;d++){
|
||||
for(int d=0;d<nd;d++){
|
||||
coor[d] = index % dims[d];
|
||||
index = index / dims[d];
|
||||
}
|
||||
@ -102,7 +102,7 @@ public:
|
||||
int nd=dims.size();
|
||||
int stride=1;
|
||||
index=0;
|
||||
for(int d=0;d<_ndimension;d++){
|
||||
for(int d=0;d<nd;d++){
|
||||
index = index+stride*coor[d];
|
||||
stride=stride*dims[d];
|
||||
}
|
||||
@ -164,6 +164,25 @@ public:
|
||||
mult*=_gdimensions[mu];
|
||||
}
|
||||
}
|
||||
void GlobalCoorToProcessorCoorLocalCoor(std::vector<int> &pcoor,std::vector<int> &lcoor,const std::vector<int> &gcoor)
|
||||
{
|
||||
pcoor.resize(_ndimension);
|
||||
lcoor.resize(_ndimension);
|
||||
for(int mu=0;mu<_ndimension;mu++){
|
||||
pcoor[mu] = gcoor[mu]/_ldimensions[mu];
|
||||
lcoor[mu] = gcoor[mu]%_ldimensions[mu];
|
||||
}
|
||||
}
|
||||
void GlobalCoorToRankIndex(int &rank, int &o_idx, int &i_idx ,const std::vector<int> &gcoor)
|
||||
{
|
||||
std::vector<int> pcoor;
|
||||
std::vector<int> lcoor;
|
||||
GlobalCoorToProcessorCoorLocalCoor(pcoor,lcoor,gcoor);
|
||||
rank = RankFromProcessorCoor(pcoor);
|
||||
i_idx= iIndex(lcoor);
|
||||
o_idx= oIndex(lcoor);
|
||||
}
|
||||
|
||||
void RankIndexToGlobalCoor(int rank, int o_idx, int i_idx , std::vector<int> &gcoor)
|
||||
{
|
||||
gcoor.resize(_ndimension);
|
||||
@ -191,24 +210,6 @@ public:
|
||||
gcoor.resize(_ndimension);
|
||||
for(int mu=0;mu<_ndimension;mu++) gcoor[mu] = Pcoor[mu]*_ldimensions[mu]+Lcoor[mu];
|
||||
}
|
||||
void GlobalCoorToProcessorCoorLocalCoor(std::vector<int> &pcoor,std::vector<int> &lcoor,const std::vector<int> &gcoor)
|
||||
{
|
||||
pcoor.resize(_ndimension);
|
||||
lcoor.resize(_ndimension);
|
||||
for(int mu=0;mu<_ndimension;mu++){
|
||||
pcoor[mu] = gcoor[mu]/_ldimensions[mu];
|
||||
lcoor[mu] = gcoor[mu]%_ldimensions[mu];
|
||||
}
|
||||
}
|
||||
void GlobalCoorToRankIndex(int &rank, int &o_idx, int &i_idx ,const std::vector<int> &gcoor)
|
||||
{
|
||||
std::vector<int> pcoor;
|
||||
std::vector<int> lcoor;
|
||||
GlobalCoorToProcessorCoorLocalCoor(pcoor,lcoor,gcoor);
|
||||
rank = RankFromProcessorCoor(pcoor);
|
||||
i_idx= iIndex(lcoor);
|
||||
o_idx= oIndex(lcoor);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
|
@ -25,7 +25,9 @@ namespace Grid {
|
||||
}
|
||||
|
||||
template<class vobj>
|
||||
inline auto innerProduct(const Lattice<vobj> &left,const Lattice<vobj> &right) ->decltype(innerProduct(left._odata[0],right._odata[0]))
|
||||
inline ComplexD innerProduct(const Lattice<vobj> &left,const Lattice<vobj> &right)
|
||||
// inline auto innerProduct(const Lattice<vobj> &left,const Lattice<vobj> &right)
|
||||
//->decltype(innerProduct(left._odata[0],right._odata[0]))
|
||||
{
|
||||
typedef typename vobj::scalar_type scalar;
|
||||
decltype(innerProduct(left._odata[0],right._odata[0])) vnrm=zero;
|
||||
@ -54,6 +56,7 @@ namespace Grid {
|
||||
|
||||
vsum=zero;
|
||||
ssum=zero;
|
||||
//FIXME make this loop parallelisable
|
||||
for(int ss=0;ss<arg._grid->oSites(); ss++){
|
||||
vsum = vsum + arg._odata[ss];
|
||||
}
|
||||
|
@ -241,5 +241,37 @@ public:
|
||||
|
||||
};
|
||||
|
||||
template<class vobj> inline
|
||||
void extract(const vobj &vec,std::vector<typename vobj::scalar_object> &extracted)
|
||||
{
|
||||
typedef typename vobj::scalar_type scalar_type ;
|
||||
typedef typename vobj::vector_type vector_type ;
|
||||
|
||||
int Nsimd=vobj::vector_type::Nsimd();
|
||||
|
||||
extracted.resize(Nsimd);
|
||||
|
||||
std::vector<scalar_type *> pointers(Nsimd);
|
||||
for(int i=0;i<Nsimd;i++)
|
||||
pointers[i] =(scalar_type *)& extracted[i];
|
||||
|
||||
extract(vec,pointers);
|
||||
}
|
||||
template<class vobj> inline
|
||||
void merge(vobj &vec,std::vector<typename vobj::scalar_object> &extracted)
|
||||
{
|
||||
typedef typename vobj::scalar_type scalar_type ;
|
||||
typedef typename vobj::vector_type vector_type ;
|
||||
|
||||
int Nsimd=vobj::vector_type::Nsimd();
|
||||
assert(extracted.size()==Nsimd);
|
||||
|
||||
std::vector<scalar_type *> pointers(Nsimd);
|
||||
for(int i=0;i<Nsimd;i++)
|
||||
pointers[i] =(scalar_type *)& extracted[i];
|
||||
|
||||
merge(vec,pointers);
|
||||
}
|
||||
|
||||
}
|
||||
#endif
|
||||
|
@ -11,8 +11,10 @@ int main (int argc, char ** argv)
|
||||
Grid_init(&argc,&argv);
|
||||
|
||||
std::vector<int> simd_layout({1,1,2,2});
|
||||
std::vector<int> mpi_layout ({2,1,1,2});
|
||||
std::vector<int> mpi_layout ({1,1,1,1});
|
||||
std::vector<int> latt_size ({16,16,16,32});
|
||||
int orthodir=3;
|
||||
int orthosz =latt_size[orthodir];
|
||||
|
||||
GridCartesian Fine(latt_size,simd_layout,mpi_layout);
|
||||
GridRNG FineRNG(&Fine);
|
||||
@ -45,14 +47,31 @@ int main (int argc, char ** argv)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
double vol = Fine.gSites();
|
||||
|
||||
Complex PlaqScale(1.0/vol/6.0/3.0);
|
||||
|
||||
std::vector<TComplex> Plaq_T(orthosz);
|
||||
sliceSum(Plaq,Plaq_T,Nd-1);
|
||||
int Nt = Plaq_T.size();
|
||||
|
||||
TComplex Plaq_T_sum=zero;
|
||||
for(int t=0;t<Nt;t++){
|
||||
Plaq_T_sum = Plaq_T_sum+Plaq_T[t];
|
||||
Complex Pt=TensorRemove(Plaq_T[t]);
|
||||
std::cout << "sliced ["<<t<<"]" <<Pt*PlaqScale*Real(Nt)<<std::endl;
|
||||
}
|
||||
|
||||
{
|
||||
Complex Pt = TensorRemove(Plaq_T_sum);
|
||||
std::cout << "total " <<Pt*PlaqScale<<std::endl;
|
||||
}
|
||||
|
||||
|
||||
TComplex Tp = sum(Plaq);
|
||||
Complex p = TensorRemove(Tp);
|
||||
std::cout << "calculated plaquettes " <<p*PlaqScale<<std::endl;
|
||||
|
||||
|
||||
Complex LinkTraceScale(1.0/vol/4.0/3.0);
|
||||
TComplex Tl = sum(LinkTrace);
|
||||
Complex l = TensorRemove(Tl);
|
||||
|
Loading…
Reference in New Issue
Block a user