1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-04-04 19:25:56 +01:00

First cut at higher precision reduction

This commit is contained in:
paboyle 2017-04-15 10:57:21 +01:00
parent a8db024c92
commit 441a52ee5d
5 changed files with 210 additions and 143 deletions

View File

@ -38,110 +38,108 @@ namespace Grid {
//////////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////
// Deterministic Reduction operations // Deterministic Reduction operations
//////////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////
template<class vobj> inline RealD norm2(const Lattice<vobj> &arg){ template<class vobj> inline RealD norm2(const Lattice<vobj> &arg){
ComplexD nrm = innerProduct(arg,arg); ComplexD nrm = innerProduct(arg,arg);
return std::real(nrm); return std::real(nrm);
}
template<class vobj>
inline ComplexD innerProduct(const Lattice<vobj> &left,const Lattice<vobj> &right)
{
typedef typename vobj::scalar_type scalar_type;
typedef typename vobj::vector_typeD vector_type;
scalar_type nrm;
GridBase *grid = left._grid;
std::vector<vector_type,alignedAllocator<vector_type> > sumarray(grid->SumArraySize());
for(int i=0;i<grid->SumArraySize();i++){
sumarray[i]=zero;
} }
template<class vobj> parallel_for(int thr=0;thr<grid->SumArraySize();thr++){
inline ComplexD innerProduct(const Lattice<vobj> &left,const Lattice<vobj> &right) int nwork, mywork, myoff;
{ GridThread::GetWork(left._grid->oSites(),thr,mywork,myoff);
typedef typename vobj::scalar_type scalar_type;
typedef typename vobj::vector_type vector_type; decltype(innerProductD(left._odata[0],right._odata[0])) vnrm=zero; // private to thread; sub summation
scalar_type nrm; for(int ss=myoff;ss<mywork+myoff; ss++){
vnrm = vnrm + innerProductD(left._odata[ss],right._odata[ss]);
GridBase *grid = left._grid;
std::vector<vector_type,alignedAllocator<vector_type> > sumarray(grid->SumArraySize());
for(int i=0;i<grid->SumArraySize();i++){
sumarray[i]=zero;
}
parallel_for(int thr=0;thr<grid->SumArraySize();thr++){
int nwork, mywork, myoff;
GridThread::GetWork(left._grid->oSites(),thr,mywork,myoff);
decltype(innerProduct(left._odata[0],right._odata[0])) vnrm=zero; // private to thread; sub summation
for(int ss=myoff;ss<mywork+myoff; ss++){
vnrm = vnrm + innerProduct(left._odata[ss],right._odata[ss]);
}
sumarray[thr]=TensorRemove(vnrm) ;
}
vector_type vvnrm; vvnrm=zero; // sum across threads
for(int i=0;i<grid->SumArraySize();i++){
vvnrm = vvnrm+sumarray[i];
}
nrm = Reduce(vvnrm);// sum across simd
right._grid->GlobalSum(nrm);
return nrm;
} }
sumarray[thr]=TensorRemove(vnrm) ;
}
vector_type vvnrm; vvnrm=zero; // sum across threads
for(int i=0;i<grid->SumArraySize();i++){
vvnrm = vvnrm+sumarray[i];
}
nrm = Reduce(vvnrm);// sum across simd
right._grid->GlobalSum(nrm);
return nrm;
}
template<class Op,class T1>
inline auto sum(const LatticeUnaryExpression<Op,T1> & expr)
->typename decltype(expr.first.func(eval(0,std::get<0>(expr.second))))::scalar_object
{
return sum(closure(expr));
}
template<class Op,class T1> template<class Op,class T1,class T2>
inline auto sum(const LatticeUnaryExpression<Op,T1> & expr) inline auto sum(const LatticeBinaryExpression<Op,T1,T2> & expr)
->typename decltype(expr.first.func(eval(0,std::get<0>(expr.second))))::scalar_object
{
return sum(closure(expr));
}
template<class Op,class T1,class T2>
inline auto sum(const LatticeBinaryExpression<Op,T1,T2> & expr)
->typename decltype(expr.first.func(eval(0,std::get<0>(expr.second)),eval(0,std::get<1>(expr.second))))::scalar_object ->typename decltype(expr.first.func(eval(0,std::get<0>(expr.second)),eval(0,std::get<1>(expr.second))))::scalar_object
{ {
return sum(closure(expr)); return sum(closure(expr));
}
template<class Op,class T1,class T2,class T3>
inline auto sum(const LatticeTrinaryExpression<Op,T1,T2,T3> & expr)
->typename decltype(expr.first.func(eval(0,std::get<0>(expr.second)),
eval(0,std::get<1>(expr.second)),
eval(0,std::get<2>(expr.second))
))::scalar_object
{
return sum(closure(expr));
}
template<class vobj>
inline typename vobj::scalar_object sum(const Lattice<vobj> &arg)
{
GridBase *grid=arg._grid;
int Nsimd = grid->Nsimd();
std::vector<vobj,alignedAllocator<vobj> > sumarray(grid->SumArraySize());
for(int i=0;i<grid->SumArraySize();i++){
sumarray[i]=zero;
}
parallel_for(int thr=0;thr<grid->SumArraySize();thr++){
int nwork, mywork, myoff;
GridThread::GetWork(grid->oSites(),thr,mywork,myoff);
vobj vvsum=zero;
for(int ss=myoff;ss<mywork+myoff; ss++){
vvsum = vvsum + arg._odata[ss];
} }
sumarray[thr]=vvsum;
}
template<class Op,class T1,class T2,class T3>
inline auto sum(const LatticeTrinaryExpression<Op,T1,T2,T3> & expr) vobj vsum=zero; // sum across threads
->typename decltype(expr.first.func(eval(0,std::get<0>(expr.second)), for(int i=0;i<grid->SumArraySize();i++){
eval(0,std::get<1>(expr.second)), vsum = vsum+sumarray[i];
eval(0,std::get<2>(expr.second)) }
))::scalar_object
{ typedef typename vobj::scalar_object sobj;
return sum(closure(expr)); sobj ssum=zero;
}
std::vector<sobj> buf(Nsimd);
template<class vobj> extract(vsum,buf);
inline typename vobj::scalar_object sum(const Lattice<vobj> &arg){
for(int i=0;i<Nsimd;i++) ssum = ssum + buf[i];
GridBase *grid=arg._grid; arg._grid->GlobalSum(ssum);
int Nsimd = grid->Nsimd();
return ssum;
std::vector<vobj,alignedAllocator<vobj> > sumarray(grid->SumArraySize()); }
for(int i=0;i<grid->SumArraySize();i++){
sumarray[i]=zero;
}
parallel_for(int thr=0;thr<grid->SumArraySize();thr++){
int nwork, mywork, myoff;
GridThread::GetWork(grid->oSites(),thr,mywork,myoff);
vobj vvsum=zero;
for(int ss=myoff;ss<mywork+myoff; ss++){
vvsum = vvsum + arg._odata[ss];
}
sumarray[thr]=vvsum;
}
vobj vsum=zero; // sum across threads
for(int i=0;i<grid->SumArraySize();i++){
vsum = vsum+sumarray[i];
}
typedef typename vobj::scalar_object sobj;
sobj ssum=zero;
std::vector<sobj> buf(Nsimd);
extract(vsum,buf);
for(int i=0;i<Nsimd;i++) ssum = ssum + buf[i];
arg._grid->GlobalSum(ssum);
return ssum;
}
template<class vobj> inline void sliceSum(const Lattice<vobj> &Data,std::vector<typename vobj::scalar_object> &result,int orthogdim) template<class vobj> inline void sliceSum(const Lattice<vobj> &Data,std::vector<typename vobj::scalar_object> &result,int orthogdim)
{ {
@ -214,7 +212,6 @@ template<class vobj> inline void sliceSum(const Lattice<vobj> &Data,std::vector<
result[t]=gsum; result[t]=gsum;
} }
} }

View File

@ -694,7 +694,6 @@ inline Grid_simd<S, V> innerProduct(const Grid_simd<S, V> &l,
const Grid_simd<S, V> &r) { const Grid_simd<S, V> &r) {
return conjugate(l) * r; return conjugate(l) * r;
} }
template <class S, class V> template <class S, class V>
inline Grid_simd<S, V> outerProduct(const Grid_simd<S, V> &l, inline Grid_simd<S, V> outerProduct(const Grid_simd<S, V> &l,
const Grid_simd<S, V> &r) { const Grid_simd<S, V> &r) {

View File

@ -56,6 +56,7 @@ class iScalar {
typedef vtype element; typedef vtype element;
typedef typename GridTypeMapper<vtype>::scalar_type scalar_type; typedef typename GridTypeMapper<vtype>::scalar_type scalar_type;
typedef typename GridTypeMapper<vtype>::vector_type vector_type; typedef typename GridTypeMapper<vtype>::vector_type vector_type;
typedef typename GridTypeMapper<vtype>::vector_typeD vector_typeD;
typedef typename GridTypeMapper<vtype>::tensor_reduced tensor_reduced_v; typedef typename GridTypeMapper<vtype>::tensor_reduced tensor_reduced_v;
typedef iScalar<tensor_reduced_v> tensor_reduced; typedef iScalar<tensor_reduced_v> tensor_reduced;
typedef typename GridTypeMapper<vtype>::scalar_object recurse_scalar_object; typedef typename GridTypeMapper<vtype>::scalar_object recurse_scalar_object;
@ -193,6 +194,7 @@ class iVector {
typedef vtype element; typedef vtype element;
typedef typename GridTypeMapper<vtype>::scalar_type scalar_type; typedef typename GridTypeMapper<vtype>::scalar_type scalar_type;
typedef typename GridTypeMapper<vtype>::vector_type vector_type; typedef typename GridTypeMapper<vtype>::vector_type vector_type;
typedef typename GridTypeMapper<vtype>::vector_typeD vector_typeD;
typedef typename GridTypeMapper<vtype>::tensor_reduced tensor_reduced_v; typedef typename GridTypeMapper<vtype>::tensor_reduced tensor_reduced_v;
typedef typename GridTypeMapper<vtype>::scalar_object recurse_scalar_object; typedef typename GridTypeMapper<vtype>::scalar_object recurse_scalar_object;
typedef iScalar<tensor_reduced_v> tensor_reduced; typedef iScalar<tensor_reduced_v> tensor_reduced;
@ -305,6 +307,7 @@ class iMatrix {
typedef vtype element; typedef vtype element;
typedef typename GridTypeMapper<vtype>::scalar_type scalar_type; typedef typename GridTypeMapper<vtype>::scalar_type scalar_type;
typedef typename GridTypeMapper<vtype>::vector_type vector_type; typedef typename GridTypeMapper<vtype>::vector_type vector_type;
typedef typename GridTypeMapper<vtype>::vector_typeD vector_typeD;
typedef typename GridTypeMapper<vtype>::tensor_reduced tensor_reduced_v; typedef typename GridTypeMapper<vtype>::tensor_reduced tensor_reduced_v;
typedef typename GridTypeMapper<vtype>::scalar_object recurse_scalar_object; typedef typename GridTypeMapper<vtype>::scalar_object recurse_scalar_object;

View File

@ -29,51 +29,109 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
#ifndef GRID_MATH_INNER_H #ifndef GRID_MATH_INNER_H
#define GRID_MATH_INNER_H #define GRID_MATH_INNER_H
namespace Grid { namespace Grid {
/////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////
// innerProduct Scalar x Scalar -> Scalar // innerProduct Scalar x Scalar -> Scalar
// innerProduct Vector x Vector -> Scalar // innerProduct Vector x Vector -> Scalar
// innerProduct Matrix x Matrix -> Scalar // innerProduct Matrix x Matrix -> Scalar
/////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////
template<class sobj> inline RealD norm2(const sobj &arg){ template<class sobj> inline RealD norm2(const sobj &arg){
typedef typename sobj::scalar_type scalar; auto nrm = innerProductD(arg,arg);
decltype(innerProduct(arg,arg)) nrm; RealD ret = real(nrm);
nrm = innerProduct(arg,arg); return ret;
RealD ret = real(nrm); }
return ret; //////////////////////////////////////
} // If single promote to double and sum 2x
//////////////////////////////////////
template<class l,class r,int N> inline inline ComplexD innerProductD(const ComplexF &l,const ComplexF &r){ return innerProduct(l,r); }
auto innerProduct (const iVector<l,N>& lhs,const iVector<r,N>& rhs) -> iScalar<decltype(innerProduct(lhs._internal[0],rhs._internal[0]))> inline ComplexD innerProductD(const ComplexD &l,const ComplexD &r){ return innerProduct(l,r); }
{ inline RealD innerProductD(const RealD &l,const RealD &r){ return innerProduct(l,r); }
typedef decltype(innerProduct(lhs._internal[0],rhs._internal[0])) ret_t; inline RealD innerProductD(const RealF &l,const RealF &r){ return innerProduct(l,r); }
iScalar<ret_t> ret;
ret=zero; inline vComplexD innerProductD(const vComplexD &l,const vComplexD &r){ return innerProduct(l,r); }
for(int c1=0;c1<N;c1++){ inline vRealD innerProductD(const vRealD &l,const vRealD &r){ return innerProduct(l,r); }
ret._internal += innerProduct(lhs._internal[c1],rhs._internal[c1]); inline vComplexD innerProductD(const vComplexF &l,const vComplexF &r){
} vComplexD la,lb;
return ret; vComplexD ra,rb;
Optimization::PrecisionChange::StoD(l.v,la.v,lb.v);
Optimization::PrecisionChange::StoD(r.v,ra.v,rb.v);
return innerProduct(la,ra) + innerProduct(lb,rb);
}
inline vRealD innerProductD(const vRealF &l,const vRealF &r){
vRealD la,lb;
vRealD ra,rb;
Optimization::PrecisionChange::StoD(l.v,la.v,lb.v);
Optimization::PrecisionChange::StoD(r.v,ra.v,rb.v);
return innerProduct(la,ra) + innerProduct(lb,rb);
}
template<class l,class r,int N> inline
auto innerProductD (const iVector<l,N>& lhs,const iVector<r,N>& rhs) -> iScalar<decltype(innerProductD(lhs._internal[0],rhs._internal[0]))>
{
typedef decltype(innerProductD(lhs._internal[0],rhs._internal[0])) ret_t;
iScalar<ret_t> ret;
ret=zero;
for(int c1=0;c1<N;c1++){
ret._internal += innerProductD(lhs._internal[c1],rhs._internal[c1]);
} }
template<class l,class r,int N> inline return ret;
auto innerProduct (const iMatrix<l,N>& lhs,const iMatrix<r,N>& rhs) -> iScalar<decltype(innerProduct(lhs._internal[0][0],rhs._internal[0][0]))> }
{ template<class l,class r,int N> inline
typedef decltype(innerProduct(lhs._internal[0][0],rhs._internal[0][0])) ret_t; auto innerProductD (const iMatrix<l,N>& lhs,const iMatrix<r,N>& rhs) -> iScalar<decltype(innerProductD(lhs._internal[0][0],rhs._internal[0][0]))>
iScalar<ret_t> ret; {
iScalar<ret_t> tmp; typedef decltype(innerProductD(lhs._internal[0][0],rhs._internal[0][0])) ret_t;
ret=zero; iScalar<ret_t> ret;
for(int c1=0;c1<N;c1++){ iScalar<ret_t> tmp;
for(int c2=0;c2<N;c2++){ ret=zero;
ret._internal+=innerProduct(lhs._internal[c1][c2],rhs._internal[c1][c2]); for(int c1=0;c1<N;c1++){
}} for(int c2=0;c2<N;c2++){
return ret; ret._internal+=innerProductD(lhs._internal[c1][c2],rhs._internal[c1][c2]);
} }}
template<class l,class r> inline return ret;
auto innerProduct (const iScalar<l>& lhs,const iScalar<r>& rhs) -> iScalar<decltype(innerProduct(lhs._internal,rhs._internal))> }
{ template<class l,class r> inline
typedef decltype(innerProduct(lhs._internal,rhs._internal)) ret_t; auto innerProductD (const iScalar<l>& lhs,const iScalar<r>& rhs) -> iScalar<decltype(innerProductD(lhs._internal,rhs._internal))>
iScalar<ret_t> ret; {
ret._internal = innerProduct(lhs._internal,rhs._internal); typedef decltype(innerProductD(lhs._internal,rhs._internal)) ret_t;
return ret; iScalar<ret_t> ret;
ret._internal = innerProductD(lhs._internal,rhs._internal);
return ret;
}
//////////////////////
// Keep same precison
//////////////////////
template<class l,class r,int N> inline
auto innerProduct (const iVector<l,N>& lhs,const iVector<r,N>& rhs) -> iScalar<decltype(innerProduct(lhs._internal[0],rhs._internal[0]))>
{
typedef decltype(innerProduct(lhs._internal[0],rhs._internal[0])) ret_t;
iScalar<ret_t> ret;
ret=zero;
for(int c1=0;c1<N;c1++){
ret._internal += innerProduct(lhs._internal[c1],rhs._internal[c1]);
} }
return ret;
}
template<class l,class r,int N> inline
auto innerProduct (const iMatrix<l,N>& lhs,const iMatrix<r,N>& rhs) -> iScalar<decltype(innerProduct(lhs._internal[0][0],rhs._internal[0][0]))>
{
typedef decltype(innerProduct(lhs._internal[0][0],rhs._internal[0][0])) ret_t;
iScalar<ret_t> ret;
iScalar<ret_t> tmp;
ret=zero;
for(int c1=0;c1<N;c1++){
for(int c2=0;c2<N;c2++){
ret._internal+=innerProduct(lhs._internal[c1][c2],rhs._internal[c1][c2]);
}}
return ret;
}
template<class l,class r> inline
auto innerProduct (const iScalar<l>& lhs,const iScalar<r>& rhs) -> iScalar<decltype(innerProduct(lhs._internal,rhs._internal))>
{
typedef decltype(innerProduct(lhs._internal,rhs._internal)) ret_t;
iScalar<ret_t> ret;
ret._internal = innerProduct(lhs._internal,rhs._internal);
return ret;
}
} }
#endif #endif

View File

@ -53,6 +53,7 @@ namespace Grid {
public: public:
typedef typename T::scalar_type scalar_type; typedef typename T::scalar_type scalar_type;
typedef typename T::vector_type vector_type; typedef typename T::vector_type vector_type;
typedef typename T::vector_typeD vector_typeD;
typedef typename T::tensor_reduced tensor_reduced; typedef typename T::tensor_reduced tensor_reduced;
typedef typename T::scalar_object scalar_object; typedef typename T::scalar_object scalar_object;
typedef typename T::Complexified Complexified; typedef typename T::Complexified Complexified;
@ -67,6 +68,7 @@ namespace Grid {
public: public:
typedef RealF scalar_type; typedef RealF scalar_type;
typedef RealF vector_type; typedef RealF vector_type;
typedef RealD vector_typeD;
typedef RealF tensor_reduced ; typedef RealF tensor_reduced ;
typedef RealF scalar_object; typedef RealF scalar_object;
typedef ComplexF Complexified; typedef ComplexF Complexified;
@ -77,6 +79,7 @@ namespace Grid {
public: public:
typedef RealD scalar_type; typedef RealD scalar_type;
typedef RealD vector_type; typedef RealD vector_type;
typedef RealD vector_typeD;
typedef RealD tensor_reduced; typedef RealD tensor_reduced;
typedef RealD scalar_object; typedef RealD scalar_object;
typedef ComplexD Complexified; typedef ComplexD Complexified;
@ -87,6 +90,7 @@ namespace Grid {
public: public:
typedef ComplexF scalar_type; typedef ComplexF scalar_type;
typedef ComplexF vector_type; typedef ComplexF vector_type;
typedef ComplexD vector_typeD;
typedef ComplexF tensor_reduced; typedef ComplexF tensor_reduced;
typedef ComplexF scalar_object; typedef ComplexF scalar_object;
typedef ComplexF Complexified; typedef ComplexF Complexified;
@ -97,6 +101,7 @@ namespace Grid {
public: public:
typedef ComplexD scalar_type; typedef ComplexD scalar_type;
typedef ComplexD vector_type; typedef ComplexD vector_type;
typedef ComplexD vector_typeD;
typedef ComplexD tensor_reduced; typedef ComplexD tensor_reduced;
typedef ComplexD scalar_object; typedef ComplexD scalar_object;
typedef ComplexD Complexified; typedef ComplexD Complexified;
@ -118,6 +123,7 @@ namespace Grid {
public: public:
typedef RealF scalar_type; typedef RealF scalar_type;
typedef vRealF vector_type; typedef vRealF vector_type;
typedef vRealD vector_typeD;
typedef vRealF tensor_reduced; typedef vRealF tensor_reduced;
typedef RealF scalar_object; typedef RealF scalar_object;
typedef vComplexF Complexified; typedef vComplexF Complexified;
@ -128,6 +134,7 @@ namespace Grid {
public: public:
typedef RealD scalar_type; typedef RealD scalar_type;
typedef vRealD vector_type; typedef vRealD vector_type;
typedef vRealD vector_typeD;
typedef vRealD tensor_reduced; typedef vRealD tensor_reduced;
typedef RealD scalar_object; typedef RealD scalar_object;
typedef vComplexD Complexified; typedef vComplexD Complexified;
@ -138,6 +145,7 @@ namespace Grid {
public: public:
typedef ComplexF scalar_type; typedef ComplexF scalar_type;
typedef vComplexF vector_type; typedef vComplexF vector_type;
typedef vComplexD vector_typeD;
typedef vComplexF tensor_reduced; typedef vComplexF tensor_reduced;
typedef ComplexF scalar_object; typedef ComplexF scalar_object;
typedef vComplexF Complexified; typedef vComplexF Complexified;
@ -148,6 +156,7 @@ namespace Grid {
public: public:
typedef ComplexD scalar_type; typedef ComplexD scalar_type;
typedef vComplexD vector_type; typedef vComplexD vector_type;
typedef vComplexD vector_typeD;
typedef vComplexD tensor_reduced; typedef vComplexD tensor_reduced;
typedef ComplexD scalar_object; typedef ComplexD scalar_object;
typedef vComplexD Complexified; typedef vComplexD Complexified;
@ -158,6 +167,7 @@ namespace Grid {
public: public:
typedef Integer scalar_type; typedef Integer scalar_type;
typedef vInteger vector_type; typedef vInteger vector_type;
typedef vInteger vector_typeD;
typedef vInteger tensor_reduced; typedef vInteger tensor_reduced;
typedef Integer scalar_object; typedef Integer scalar_object;
typedef void Complexified; typedef void Complexified;