mirror of
https://github.com/paboyle/Grid.git
synced 2025-04-09 21:50:45 +01:00
Merge staggered fix linear operator and reduction
This commit is contained in:
parent
5c936d88a0
commit
96272f3841
@ -51,7 +51,7 @@ namespace Grid {
|
|||||||
|
|
||||||
virtual void Op (const Field &in, Field &out) = 0; // Abstract base
|
virtual void Op (const Field &in, Field &out) = 0; // Abstract base
|
||||||
virtual void AdjOp (const Field &in, Field &out) = 0; // Abstract base
|
virtual void AdjOp (const Field &in, Field &out) = 0; // Abstract base
|
||||||
virtual void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2);
|
virtual void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2) = 0;
|
||||||
virtual void HermOp(const Field &in, Field &out)=0;
|
virtual void HermOp(const Field &in, Field &out)=0;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -46,28 +46,29 @@ inline ComplexD innerProduct(const Lattice<vobj> &left,const Lattice<vobj> &righ
|
|||||||
GridBase *grid = left._grid;
|
GridBase *grid = left._grid;
|
||||||
const int pad = 8;
|
const int pad = 8;
|
||||||
|
|
||||||
scalar_type nrm;
|
ComplexD inner;
|
||||||
std::vector<scalar_type,alignedAllocator<vector_type> > sumarray(grid->SumArraySize()*pad);
|
Vector<ComplexD> sumarray(grid->SumArraySize()*pad);
|
||||||
|
|
||||||
parallel_for(int thr=0;thr<grid->SumArraySize();thr++){
|
parallel_for(int thr=0;thr<grid->SumArraySize();thr++){
|
||||||
int nwork, mywork, myoff;
|
int nwork, mywork, myoff;
|
||||||
GridThread::GetWork(left._grid->oSites(),thr,mywork,myoff);
|
GridThread::GetWork(left._grid->oSites(),thr,mywork,myoff);
|
||||||
|
|
||||||
decltype(innerProductD(left._odata[0],right._odata[0])) vnrm=zero; // private to thread; sub summation
|
decltype(innerProductD(left._odata[0],right._odata[0])) vinner=zero; // private to thread; sub summation
|
||||||
for(int ss=myoff;ss<mywork+myoff; ss++){
|
for(int ss=myoff;ss<mywork+myoff; ss++){
|
||||||
vnrm = vnrm + innerProductD(left._odata[ss],right._odata[ss]);
|
vinner = vinner + innerProductD(left._odata[ss],right._odata[ss]);
|
||||||
}
|
}
|
||||||
// All threads sum across SIMD; reduce serial work at end
|
// All threads sum across SIMD; reduce serial work at end
|
||||||
// one write per cacheline with streaming store
|
// one write per cacheline with streaming store
|
||||||
vstream(sumarray[thr*pad],Reduce(TensorRemove(vnrm))) ;
|
ComplexD tmp = Reduce(TensorRemove(vinner)) ;
|
||||||
|
vstream(sumarray[thr*pad],tmp);
|
||||||
}
|
}
|
||||||
|
|
||||||
nrm=0.0;
|
inner=0.0;
|
||||||
for(int i=0;i<grid->SumArraySize();i++){
|
for(int i=0;i<grid->SumArraySize();i++){
|
||||||
nrm = nrm+sumarray[i*pad];
|
inner = inner+sumarray[i*pad];
|
||||||
}
|
}
|
||||||
right._grid->GlobalSum(nrm);
|
right._grid->GlobalSum(inner);
|
||||||
return nrm;
|
return inner;
|
||||||
}
|
}
|
||||||
|
|
||||||
/////////////////////////
|
/////////////////////////
|
||||||
|
Loading…
x
Reference in New Issue
Block a user