1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-04-09 21:50:45 +01:00

Merge staggered fix linear operator and reduction

This commit is contained in:
Azusa Yamaguchi 2018-04-26 10:33:19 +01:00
parent 5c936d88a0
commit 96272f3841
2 changed files with 11 additions and 10 deletions

View File

@ -51,7 +51,7 @@ namespace Grid {
virtual void Op (const Field &in, Field &out) = 0; // Abstract base virtual void Op (const Field &in, Field &out) = 0; // Abstract base
virtual void AdjOp (const Field &in, Field &out) = 0; // Abstract base virtual void AdjOp (const Field &in, Field &out) = 0; // Abstract base
virtual void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2); virtual void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2) = 0;
virtual void HermOp(const Field &in, Field &out)=0; virtual void HermOp(const Field &in, Field &out)=0;
}; };

View File

@ -46,28 +46,29 @@ inline ComplexD innerProduct(const Lattice<vobj> &left,const Lattice<vobj> &righ
GridBase *grid = left._grid; GridBase *grid = left._grid;
const int pad = 8; const int pad = 8;
scalar_type nrm; ComplexD inner;
std::vector<scalar_type,alignedAllocator<vector_type> > sumarray(grid->SumArraySize()*pad); Vector<ComplexD> sumarray(grid->SumArraySize()*pad);
parallel_for(int thr=0;thr<grid->SumArraySize();thr++){ parallel_for(int thr=0;thr<grid->SumArraySize();thr++){
int nwork, mywork, myoff; int nwork, mywork, myoff;
GridThread::GetWork(left._grid->oSites(),thr,mywork,myoff); GridThread::GetWork(left._grid->oSites(),thr,mywork,myoff);
decltype(innerProductD(left._odata[0],right._odata[0])) vnrm=zero; // private to thread; sub summation decltype(innerProductD(left._odata[0],right._odata[0])) vinner=zero; // private to thread; sub summation
for(int ss=myoff;ss<mywork+myoff; ss++){ for(int ss=myoff;ss<mywork+myoff; ss++){
vnrm = vnrm + innerProductD(left._odata[ss],right._odata[ss]); vinner = vinner + innerProductD(left._odata[ss],right._odata[ss]);
} }
// All threads sum across SIMD; reduce serial work at end // All threads sum across SIMD; reduce serial work at end
// one write per cacheline with streaming store // one write per cacheline with streaming store
vstream(sumarray[thr*pad],Reduce(TensorRemove(vnrm))) ; ComplexD tmp = Reduce(TensorRemove(vinner)) ;
vstream(sumarray[thr*pad],tmp);
} }
nrm=0.0; inner=0.0;
for(int i=0;i<grid->SumArraySize();i++){ for(int i=0;i<grid->SumArraySize();i++){
nrm = nrm+sumarray[i*pad]; inner = inner+sumarray[i*pad];
} }
right._grid->GlobalSum(nrm); right._grid->GlobalSum(inner);
return nrm; return inner;
} }
///////////////////////// /////////////////////////