mirror of
https://github.com/paboyle/Grid.git
synced 2025-06-15 14:27:06 +01:00
Some fix for the GenericHMCrunner
This commit is contained in:
@ -38,51 +38,56 @@ namespace Grid {
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Deterministic Reduction operations
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class vobj> inline RealD norm2(const Lattice<vobj> &arg){
|
||||
ComplexD nrm = innerProduct(arg,arg);
|
||||
return std::real(nrm);
|
||||
template <class vobj>
|
||||
inline RealD norm2(const Lattice<vobj> &arg) {
|
||||
ComplexD nrm = innerProduct(arg, arg);
|
||||
return std::real(nrm);
|
||||
}
|
||||
|
||||
template <class vobj>
|
||||
inline ComplexD innerProduct(const Lattice<vobj> &left,
|
||||
const Lattice<vobj> &right) {
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
typedef typename vobj::vector_type vector_type;
|
||||
scalar_type nrm;
|
||||
|
||||
GridBase *grid = left._grid;
|
||||
|
||||
std::vector<vector_type, alignedAllocator<vector_type> > sumarray(
|
||||
grid->SumArraySize());
|
||||
for (int i = 0; i < grid->SumArraySize(); i++) {
|
||||
sumarray[i] = zero;
|
||||
}
|
||||
|
||||
template<class vobj>
|
||||
inline ComplexD innerProduct(const Lattice<vobj> &left,const Lattice<vobj> &right)
|
||||
{
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
typedef typename vobj::vector_type vector_type;
|
||||
scalar_type nrm;
|
||||
PARALLEL_FOR_LOOP
|
||||
for (int thr = 0; thr < grid->SumArraySize(); thr++) {
|
||||
int nwork, mywork, myoff;
|
||||
GridThread::GetWork(left._grid->oSites(), thr, mywork, myoff);
|
||||
|
||||
GridBase *grid = left._grid;
|
||||
|
||||
std::vector<vector_type,alignedAllocator<vector_type> > sumarray(grid->SumArraySize());
|
||||
for(int i=0;i<grid->SumArraySize();i++){
|
||||
sumarray[i]=zero;
|
||||
}
|
||||
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int thr=0;thr<grid->SumArraySize();thr++){
|
||||
int nwork, mywork, myoff;
|
||||
GridThread::GetWork(left._grid->oSites(),thr,mywork,myoff);
|
||||
|
||||
decltype(innerProduct(left._odata[0],right._odata[0])) vnrm=zero; // private to thread; sub summation
|
||||
for(int ss=myoff;ss<mywork+myoff; ss++){
|
||||
vnrm = vnrm + innerProduct(left._odata[ss],right._odata[ss]);
|
||||
}
|
||||
sumarray[thr]=TensorRemove(vnrm) ;
|
||||
}
|
||||
|
||||
vector_type vvnrm; vvnrm=zero; // sum across threads
|
||||
for(int i=0;i<grid->SumArraySize();i++){
|
||||
vvnrm = vvnrm+sumarray[i];
|
||||
}
|
||||
nrm = Reduce(vvnrm);// sum across simd
|
||||
right._grid->GlobalSum(nrm);
|
||||
return nrm;
|
||||
decltype(innerProduct(left._odata[0], right._odata[0])) vnrm =
|
||||
zero; // private to thread; sub summation
|
||||
for (int ss = myoff; ss < mywork + myoff; ss++) {
|
||||
vnrm = vnrm + innerProduct(left._odata[ss], right._odata[ss]);
|
||||
}
|
||||
sumarray[thr] = TensorRemove(vnrm);
|
||||
}
|
||||
|
||||
|
||||
template<class Op,class T1>
|
||||
inline auto sum(const LatticeUnaryExpression<Op,T1> & expr)
|
||||
->typename decltype(expr.first.func(eval(0,std::get<0>(expr.second))))::scalar_object
|
||||
{
|
||||
return sum(closure(expr));
|
||||
vector_type vvnrm;
|
||||
vvnrm = zero; // sum across threads
|
||||
for (int i = 0; i < grid->SumArraySize(); i++) {
|
||||
vvnrm = vvnrm + sumarray[i];
|
||||
}
|
||||
nrm = Reduce(vvnrm); // sum across simd
|
||||
right._grid->GlobalSum(nrm);
|
||||
return nrm;
|
||||
}
|
||||
|
||||
template <class Op, class T1>
|
||||
inline auto sum(const LatticeUnaryExpression<Op, T1> &expr) ->
|
||||
typename decltype(
|
||||
expr.first.func(eval(0, std::get<0>(expr.second))))::scalar_object {
|
||||
return sum(closure(expr));
|
||||
}
|
||||
|
||||
template<class Op,class T1,class T2>
|
||||
@ -96,9 +101,9 @@ PARALLEL_FOR_LOOP
|
||||
template<class Op,class T1,class T2,class T3>
|
||||
inline auto sum(const LatticeTrinaryExpression<Op,T1,T2,T3> & expr)
|
||||
->typename decltype(expr.first.func(eval(0,std::get<0>(expr.second)),
|
||||
eval(0,std::get<1>(expr.second)),
|
||||
eval(0,std::get<2>(expr.second))
|
||||
))::scalar_object
|
||||
eval(0,std::get<1>(expr.second)),
|
||||
eval(0,std::get<2>(expr.second))
|
||||
))::scalar_object
|
||||
{
|
||||
return sum(closure(expr));
|
||||
}
|
||||
@ -111,24 +116,24 @@ PARALLEL_FOR_LOOP
|
||||
|
||||
std::vector<vobj,alignedAllocator<vobj> > sumarray(grid->SumArraySize());
|
||||
for(int i=0;i<grid->SumArraySize();i++){
|
||||
sumarray[i]=zero;
|
||||
sumarray[i]=zero;
|
||||
}
|
||||
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int thr=0;thr<grid->SumArraySize();thr++){
|
||||
int nwork, mywork, myoff;
|
||||
GridThread::GetWork(grid->oSites(),thr,mywork,myoff);
|
||||
int nwork, mywork, myoff;
|
||||
GridThread::GetWork(grid->oSites(),thr,mywork,myoff);
|
||||
|
||||
vobj vvsum=zero;
|
||||
vobj vvsum=zero;
|
||||
for(int ss=myoff;ss<mywork+myoff; ss++){
|
||||
vvsum = vvsum + arg._odata[ss];
|
||||
}
|
||||
sumarray[thr]=vvsum;
|
||||
vvsum = vvsum + arg._odata[ss];
|
||||
}
|
||||
sumarray[thr]=vvsum;
|
||||
}
|
||||
|
||||
vobj vsum=zero; // sum across threads
|
||||
for(int i=0;i<grid->SumArraySize();i++){
|
||||
vsum = vsum+sumarray[i];
|
||||
vsum = vsum+sumarray[i];
|
||||
}
|
||||
|
||||
typedef typename vobj::scalar_object sobj;
|
||||
|
Reference in New Issue
Block a user