1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-04-04 19:25:56 +01:00

Merge branch 'develop' into feature/mpi3

This commit is contained in:
paboyle 2016-10-25 06:02:33 +01:00
commit b820076b91
5 changed files with 30 additions and 34 deletions

View File

@ -200,18 +200,14 @@ namespace Grid {
sign,FFTW_ESTIMATE); sign,FFTW_ESTIMATE);
} }
double add,mul,fma; std::vector<int> lcoor(Nd), gcoor(Nd);
FFTW<scalar>::fftw_flops(p,&add,&mul,&fma);
flops_call = add+mul+2.0*fma;
GridStopWatch timer;
// Barrel shift and collect global pencil // Barrel shift and collect global pencil
for(int p=0;p<processors[dim];p++) { for(int p=0;p<processors[dim];p++) {
for(int idx=0;idx<sgrid->lSites();idx++) { for(int idx=0;idx<sgrid->lSites();idx++) {
std::vector<int> lcoor(Nd);
sgrid->LocalIndexToLocalCoor(idx,lcoor); sgrid->LocalIndexToLocalCoor(idx,lcoor);
sobj s; sobj s;
@ -228,14 +224,11 @@ namespace Grid {
// Loop over orthog coords // Loop over orthog coords
int NN=pencil_g.lSites(); int NN=pencil_g.lSites();
GridStopWatch timer;
GridStopWatch Timer; timer.Start();
Timer.Start();
PARALLEL_FOR_LOOP PARALLEL_FOR_LOOP
for(int idx=0;idx<NN;idx++) { for(int idx=0;idx<NN;idx++) {
std::vector<int> lcoor(Nd);
pencil_g.LocalIndexToLocalCoor(idx,lcoor); pencil_g.LocalIndexToLocalCoor(idx,lcoor);
if ( lcoor[dim] == 0 ) { // restricts loop to plane at lcoor[dim]==0 if ( lcoor[dim] == 0 ) { // restricts loop to plane at lcoor[dim]==0
@ -245,15 +238,17 @@ PARALLEL_FOR_LOOP
} }
} }
Timer.Stop(); timer.Stop();
usec += Timer.useconds();
flops+= flops_call*NN;
double add,mul,fma;
FFTW<scalar>::fftw_flops(p,&add,&mul,&fma);
flops_call = add+mul+2.0*fma;
usec += timer.useconds();
flops+= flops_call*NN;
int pc = processor_coor[dim]; int pc = processor_coor[dim];
for(int idx=0;idx<sgrid->lSites();idx++) { for(int idx=0;idx<sgrid->lSites();idx++) {
std::vector<int> lcoor(Nd);
sgrid->LocalIndexToLocalCoor(idx,lcoor); sgrid->LocalIndexToLocalCoor(idx,lcoor);
std::vector<int> gcoor = lcoor; gcoor = lcoor;
// extract the result // extract the result
sobj s; sobj s;
gcoor[dim] = lcoor[dim]+L*pc; gcoor[dim] = lcoor[dim]+L*pc;

View File

@ -77,7 +77,7 @@ public:
// GridCartesian / GridRedBlackCartesian // GridCartesian / GridRedBlackCartesian
//////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////
virtual int CheckerBoarded(int dim)=0; virtual int CheckerBoarded(int dim)=0;
virtual int CheckerBoard(std::vector<int> site)=0; virtual int CheckerBoard(std::vector<int> &site)=0;
virtual int CheckerBoardDestination(int source_cb,int shift,int dim)=0; virtual int CheckerBoardDestination(int source_cb,int shift,int dim)=0;
virtual int CheckerBoardShift(int source_cb,int dim,int shift,int osite)=0; virtual int CheckerBoardShift(int source_cb,int dim,int shift,int osite)=0;
virtual int CheckerBoardShiftForCB(int source_cb,int dim,int shift,int cb)=0; virtual int CheckerBoardShiftForCB(int source_cb,int dim,int shift,int cb)=0;

View File

@ -49,7 +49,7 @@ public:
virtual int CheckerBoarded(int dim){ virtual int CheckerBoarded(int dim){
return 0; return 0;
} }
virtual int CheckerBoard(std::vector<int> site){ virtual int CheckerBoard(std::vector<int> &site){
return 0; return 0;
} }
virtual int CheckerBoardDestination(int cb,int shift,int dim){ virtual int CheckerBoardDestination(int cb,int shift,int dim){

View File

@ -49,7 +49,7 @@ public:
if( dim==_checker_dim) return 1; if( dim==_checker_dim) return 1;
else return 0; else return 0;
} }
virtual int CheckerBoard(std::vector<int> site){ virtual int CheckerBoard(std::vector<int> &site){
int linear=0; int linear=0;
assert(site.size()==_ndimension); assert(site.size()==_ndimension);
for(int d=0;d<_ndimension;d++){ for(int d=0;d<_ndimension;d++){

View File

@ -154,7 +154,7 @@ PARALLEL_FOR_LOOP
template<class vobj,class sobj> template<class vobj,class sobj>
void peekLocalSite(sobj &s,const Lattice<vobj> &l,std::vector<int> &site){ void peekLocalSite(sobj &s,const Lattice<vobj> &l,std::vector<int> &site){
GridBase *grid=l._grid; GridBase *grid = l._grid;
typedef typename vobj::scalar_type scalar_type; typedef typename vobj::scalar_type scalar_type;
typedef typename vobj::vector_type vector_type; typedef typename vobj::vector_type vector_type;
@ -164,16 +164,18 @@ PARALLEL_FOR_LOOP
assert( l.checkerboard== l._grid->CheckerBoard(site)); assert( l.checkerboard== l._grid->CheckerBoard(site));
assert( sizeof(sobj)*Nsimd == sizeof(vobj)); assert( sizeof(sobj)*Nsimd == sizeof(vobj));
static const int words=sizeof(vobj)/sizeof(vector_type);
int odx,idx; int odx,idx;
idx= grid->iIndex(site); idx= grid->iIndex(site);
odx= grid->oIndex(site); odx= grid->oIndex(site);
std::vector<sobj> buf(Nsimd); scalar_type * vp = (scalar_type *)&l._odata[odx];
scalar_type * pt = (scalar_type *)&s;
extract(l._odata[odx],buf);
for(int w=0;w<words;w++){
pt[w] = vp[idx+w*Nsimd];
}
s = buf[idx];
return; return;
}; };
@ -190,18 +192,17 @@ PARALLEL_FOR_LOOP
assert( l.checkerboard== l._grid->CheckerBoard(site)); assert( l.checkerboard== l._grid->CheckerBoard(site));
assert( sizeof(sobj)*Nsimd == sizeof(vobj)); assert( sizeof(sobj)*Nsimd == sizeof(vobj));
static const int words=sizeof(vobj)/sizeof(vector_type);
int odx,idx; int odx,idx;
idx= grid->iIndex(site); idx= grid->iIndex(site);
odx= grid->oIndex(site); odx= grid->oIndex(site);
std::vector<sobj> buf(Nsimd); scalar_type * vp = (scalar_type *)&l._odata[odx];
scalar_type * pt = (scalar_type *)&s;
// extract-modify-merge cycle is easiest way and this is not perf critical
extract(l._odata[odx],buf);
buf[idx] = s; for(int w=0;w<words;w++){
vp[idx+w*Nsimd] = pt[w];
merge(l._odata[odx],buf); }
return; return;
}; };