1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-09-20 01:05:38 +01:00

Double precision variants for summation accuracy

This commit is contained in:
Peter Boyle 2019-08-14 13:08:01 +01:00
parent 2b037e3daa
commit 96ac56cace
3 changed files with 144 additions and 8 deletions

View File

@ -47,9 +47,11 @@ class GridTensorBase {};
using element = vtype; \
using scalar_type = typename Traits::scalar_type; \
using vector_type = typename Traits::vector_type; \
using scalar_typeD = typename Traits::scalar_typeD; \
using vector_typeD = typename Traits::vector_typeD; \
using tensor_reduced = typename Traits::tensor_reduced; \
using scalar_object = typename Traits::scalar_object; \
using scalar_objectD = typename Traits::scalar_objectD; \
using Complexified = typename Traits::Complexified; \
using Realified = typename Traits::Realified; \
using DoublePrecision = typename Traits::DoublePrecision; \
@ -149,7 +151,7 @@ public:
// Convert elements
template <class ttype>
accelerator_inline iScalar<vtype> operator=(iScalar<ttype> &&arg) {
accelerator_inline iScalar<vtype> operator=(const iScalar<ttype> &arg) {
_internal = arg._internal;
return *this;
}
@ -200,6 +202,14 @@ public:
accelerator iVector() = default;
accelerator_inline iVector(const Zero &z) { zeroit(*this); };
template<class other>
accelerator_inline iVector<vtype, N> &operator=(const iVector<other,N> &him)
{
for (int i = 0; i < N; i++) {
_internal[i] = him._internal[i];
}
return *this;
}
accelerator_inline iVector<vtype, N> &operator=(const Zero &hero) {
zeroit(*this);
return *this;
@ -296,10 +306,12 @@ public:
accelerator_inline iMatrix(const Zero &z) { zeroit(*this); };
accelerator iMatrix() = default;
accelerator_inline iMatrix &operator=(const iMatrix &rhs) {
// Allow for type conversion.
template<class other>
accelerator_inline iMatrix &operator=(const iMatrix<other,N> &rhs) {
for (int i = 0; i < N; i++)
for (int j = 0; j < N; j++)
vstream(_internal[i][j], rhs._internal[i][j]);
_internal[i][j] = rhs._internal[i][j];
return *this;
};

View File

@ -41,10 +41,103 @@ template<class sobj> accelerator_inline RealD norm2(const sobj &arg){
RealD ret = real(nrm);
return ret;
}
//////////////////////////////////////
// If single promote to double and sum 2x
//////////////////////////////////////
////////////////////////////////////////////////////////////////
// FIXME horizontal sum: single promote to double and sum
// Not yet used; make sum_cpu in LatticeReduction.h call these
// GPU sum_gpu is using a double precision promotion.
////////////////////////////////////////////////////////////////
#if 0
accelerator_inline ComplexD ReduceD(const ComplexF &l) { return l; };
accelerator_inline ComplexD ReduceD(const ComplexD &l) { return l; };
accelerator_inline RealD ReduceD(const RealD &l) { return l; };
accelerator_inline RealD ReduceD(const RealF &l) { return l; };
accelerator_inline ComplexD ReduceD(const vComplexD &l) { return Reduce(l); };
accelerator_inline RealD ReduceD(const vRealD &l) { return Reduce(l); };
accelerator_inline ComplexD ReduceD(const vComplexF &l)
{
vComplexD la,lb;
Optimization::PrecisionChange::StoD(l.v,la.v,lb.v);
return Reduce(la)+Reduce(lb);
}
accelerator_inline RealD ReduceD(const vRealF &l)
{
vRealD la,lb;
Optimization::PrecisionChange::StoD(l.v,la.v,lb.v);
return Reduce(la)+Reduce(lb);
};
///////////////////////////////////////////////////////
// Now do it for vector, matrix, scalar
///////////////////////////////////////////////////////
template<class l,int N> accelerator_inline
auto ReduceD (const iVector<l,N>& lhs) -> iVector<decltype(ReduceD(lhs._internal[0])),N>
{
typedef decltype(ReduceD(lhs._internal[0])) ret_t;
iVector<ret_t,N> ret;
for(int c1=0;c1<N;c1++){
ret._internal[c1] = ReduceD(lhs._internal[c1]);
}
return ret;
}
template<class l,int N> accelerator_inline
auto ReduceD (const iMatrix<l,N>& lhs) -> iMatrix<decltype(ReduceD(lhs._internal[0][0])),N>
{
typedef decltype(ReduceD(lhs._internal[0][0])) ret_t;
iMatrix<ret_t,N> ret;
for(int c1=0;c1<N;c1++){
for(int c2=0;c2<N;c2++){
ret._internal[c1][c2]=ReduceD(lhs._internal[c1][c2]);
}}
return ret;
}
template<class l> accelerator_inline
auto ReduceD (const iScalar<l>& lhs) -> iScalar<decltype(ReduceD(lhs._internal))>
{
typedef decltype(ReduceD(lhs._internal)) ret_t;
iScalar<ret_t> ret;
ret._internal = ReduceD(lhs._internal);
return ret;
}
#endif
///////////////////////////////////////////////////////
// Now do Reduce for vector, matrix, scalar
///////////////////////////////////////////////////////
template<class l,int N> accelerator_inline
auto Reduce (const iVector<l,N>& lhs) -> iVector<decltype(Reduce(lhs._internal[0])),N>
{
typedef decltype(Reduce(lhs._internal[0])) ret_t;
iVector<ret_t,N> ret;
for(int c1=0;c1<N;c1++){
ret._internal[c1] = Reduce(lhs._internal[c1]);
}
return ret;
}
template<class l,int N> accelerator_inline
auto Reduce (const iMatrix<l,N>& lhs) -> iMatrix<decltype(Reduce(lhs._internal[0][0])),N>
{
typedef decltype(Reduce(lhs._internal[0][0])) ret_t;
iMatrix<ret_t,N> ret;
for(int c1=0;c1<N;c1++){
for(int c2=0;c2<N;c2++){
ret._internal[c1][c2]=Reduce(lhs._internal[c1][c2]);
}}
return ret;
}
template<class l> accelerator_inline
auto Reduce (const iScalar<l>& lhs) -> iScalar<decltype(Reduce(lhs._internal))>
{
typedef decltype(Reduce(lhs._internal)) ret_t;
iScalar<ret_t> ret;
ret._internal = Reduce(lhs._internal);
return ret;
}
//////////////////////////////////////
// innerProductD : if single promote to double and evaluate with sum 2x
//////////////////////////////////////
accelerator_inline ComplexD innerProductD(const ComplexF &l,const ComplexF &r){ return innerProduct(l,r); }
accelerator_inline ComplexD innerProductD(const ComplexD &l,const ComplexD &r){ return innerProduct(l,r); }
accelerator_inline RealD innerProductD(const RealD &l,const RealD &r){ return innerProduct(l,r); }
@ -52,14 +145,16 @@ accelerator_inline RealD innerProductD(const RealF &l,const RealF &r){
accelerator_inline vComplexD innerProductD(const vComplexD &l,const vComplexD &r){ return innerProduct(l,r); }
accelerator_inline vRealD innerProductD(const vRealD &l,const vRealD &r){ return innerProduct(l,r); }
accelerator_inline vComplexD innerProductD(const vComplexF &l,const vComplexF &r){
accelerator_inline vComplexD innerProductD(const vComplexF &l,const vComplexF &r)
{
vComplexD la,lb;
vComplexD ra,rb;
Optimization::PrecisionChange::StoD(l.v,la.v,lb.v);
Optimization::PrecisionChange::StoD(r.v,ra.v,rb.v);
return innerProduct(la,ra) + innerProduct(lb,rb);
}
accelerator_inline vRealD innerProductD(const vRealF &l,const vRealF &r){
accelerator_inline vRealD innerProductD(const vRealF &l,const vRealF &r)
{
vRealD la,lb;
vRealD ra,rb;
Optimization::PrecisionChange::StoD(l.v,la.v,lb.v);
@ -67,6 +162,7 @@ accelerator_inline vRealD innerProductD(const vRealF &l,const vRealF &r){
return innerProduct(la,ra) + innerProduct(lb,rb);
}
// Now do it for vector, matrix, scalar
template<class l,class r,int N> accelerator_inline
auto innerProductD (const iVector<l,N>& lhs,const iVector<r,N>& rhs) -> iScalar<decltype(innerProductD(lhs._internal[0],rhs._internal[0]))>
{

View File

@ -72,50 +72,60 @@ NAMESPACE_BEGIN(Grid);
template<> struct GridTypeMapper<RealF> : public GridTypeMapper_Base {
typedef RealF scalar_type;
typedef RealD scalar_typeD;
typedef RealF vector_type;
typedef RealD vector_typeD;
typedef RealF tensor_reduced ;
typedef RealF scalar_object;
typedef RealD scalar_objectD;
typedef ComplexF Complexified;
typedef RealF Realified;
typedef RealD DoublePrecision;
};
template<> struct GridTypeMapper<RealD> : public GridTypeMapper_Base {
typedef RealD scalar_type;
typedef RealD scalar_typeD;
typedef RealD vector_type;
typedef RealD vector_typeD;
typedef RealD tensor_reduced;
typedef RealD scalar_object;
typedef RealD scalar_objectD;
typedef ComplexD Complexified;
typedef RealD Realified;
typedef RealD DoublePrecision;
};
template<> struct GridTypeMapper<ComplexF> : public GridTypeMapper_Base {
typedef ComplexF scalar_type;
typedef ComplexD scalar_typeD;
typedef ComplexF vector_type;
typedef ComplexD vector_typeD;
typedef ComplexF tensor_reduced;
typedef ComplexF scalar_object;
typedef ComplexD scalar_objectD;
typedef ComplexF Complexified;
typedef RealF Realified;
typedef ComplexD DoublePrecision;
};
template<> struct GridTypeMapper<ComplexD> : public GridTypeMapper_Base {
typedef ComplexD scalar_type;
typedef ComplexD scalar_typeD;
typedef ComplexD vector_type;
typedef ComplexD vector_typeD;
typedef ComplexD tensor_reduced;
typedef ComplexD scalar_object;
typedef ComplexD scalar_objectD;
typedef ComplexD Complexified;
typedef RealD Realified;
typedef ComplexD DoublePrecision;
};
template<> struct GridTypeMapper<Integer> : public GridTypeMapper_Base {
typedef Integer scalar_type;
typedef Integer scalar_typeD;
typedef Integer vector_type;
typedef Integer vector_typeD;
typedef Integer tensor_reduced;
typedef Integer scalar_object;
typedef Integer scalar_objectD;
typedef void Complexified;
typedef void Realified;
typedef void DoublePrecision;
@ -123,20 +133,24 @@ NAMESPACE_BEGIN(Grid);
template<> struct GridTypeMapper<vRealF> : public GridTypeMapper_Base {
typedef RealF scalar_type;
typedef RealD scalar_typeD;
typedef vRealF vector_type;
typedef vRealD vector_typeD;
typedef vRealF tensor_reduced;
typedef RealF scalar_object;
typedef RealD scalar_objectD;
typedef vComplexF Complexified;
typedef vRealF Realified;
typedef vRealD DoublePrecision;
};
template<> struct GridTypeMapper<vRealD> : public GridTypeMapper_Base {
typedef RealD scalar_type;
typedef RealD scalar_typeD;
typedef vRealD vector_type;
typedef vRealD vector_typeD;
typedef vRealD tensor_reduced;
typedef RealD scalar_object;
typedef RealD scalar_objectD;
typedef vComplexD Complexified;
typedef vRealD Realified;
typedef vRealD DoublePrecision;
@ -144,10 +158,12 @@ NAMESPACE_BEGIN(Grid);
template<> struct GridTypeMapper<vRealH> : public GridTypeMapper_Base {
// Fixme this is incomplete until Grid supports fp16 or bfp16 arithmetic types
typedef RealF scalar_type;
typedef RealD scalar_typeD;
typedef vRealH vector_type;
typedef vRealD vector_typeD;
typedef vRealH tensor_reduced;
typedef RealF scalar_object;
typedef RealD scalar_objectD;
typedef vComplexH Complexified;
typedef vRealH Realified;
typedef vRealD DoublePrecision;
@ -155,40 +171,48 @@ NAMESPACE_BEGIN(Grid);
template<> struct GridTypeMapper<vComplexH> : public GridTypeMapper_Base {
// Fixme this is incomplete until Grid supports fp16 or bfp16 arithmetic types
typedef ComplexF scalar_type;
typedef ComplexD scalar_typeD;
typedef vComplexH vector_type;
typedef vComplexD vector_typeD;
typedef vComplexH tensor_reduced;
typedef ComplexF scalar_object;
typedef ComplexD scalar_objectD;
typedef vComplexH Complexified;
typedef vRealH Realified;
typedef vComplexD DoublePrecision;
};
template<> struct GridTypeMapper<vComplexF> : public GridTypeMapper_Base {
typedef ComplexF scalar_type;
typedef ComplexD scalar_typeD;
typedef vComplexF vector_type;
typedef vComplexD vector_typeD;
typedef vComplexF tensor_reduced;
typedef ComplexF scalar_object;
typedef ComplexD scalar_objectD;
typedef vComplexF Complexified;
typedef vRealF Realified;
typedef vComplexD DoublePrecision;
};
template<> struct GridTypeMapper<vComplexD> : public GridTypeMapper_Base {
typedef ComplexD scalar_type;
typedef ComplexD scalar_typeD;
typedef vComplexD vector_type;
typedef vComplexD vector_typeD;
typedef vComplexD tensor_reduced;
typedef ComplexD scalar_object;
typedef ComplexD scalar_objectD;
typedef vComplexD Complexified;
typedef vRealD Realified;
typedef vComplexD DoublePrecision;
};
template<> struct GridTypeMapper<vInteger> : public GridTypeMapper_Base {
typedef Integer scalar_type;
typedef Integer scalar_typeD;
typedef vInteger vector_type;
typedef vInteger vector_typeD;
typedef vInteger tensor_reduced;
typedef Integer scalar_object;
typedef Integer scalar_objectD;
typedef void Complexified;
typedef void Realified;
typedef void DoublePrecision;
@ -198,6 +222,7 @@ NAMESPACE_BEGIN(Grid);
using BaseTraits = GridTypeMapper<T>; \
using scalar_type = typename BaseTraits::scalar_type; \
using vector_type = typename BaseTraits::vector_type; \
using scalar_typeD = typename BaseTraits::scalar_typeD; \
using vector_typeD = typename BaseTraits::vector_typeD; \
static constexpr int TensorLevel = BaseTraits::TensorLevel + 1
@ -205,6 +230,7 @@ NAMESPACE_BEGIN(Grid);
GridTypeMapper_RepeatedTypes;
using tensor_reduced = iScalar<typename BaseTraits::tensor_reduced>;
using scalar_object = iScalar<typename BaseTraits::scalar_object>;
using scalar_objectD = iScalar<typename BaseTraits::scalar_objectD>;
using Complexified = iScalar<typename BaseTraits::Complexified>;
using Realified = iScalar<typename BaseTraits::Realified>;
using DoublePrecision = iScalar<typename BaseTraits::DoublePrecision>;
@ -218,6 +244,7 @@ NAMESPACE_BEGIN(Grid);
GridTypeMapper_RepeatedTypes;
using tensor_reduced = iScalar<typename BaseTraits::tensor_reduced>;
using scalar_object = iVector<typename BaseTraits::scalar_object, N>;
using scalar_objectD = iVector<typename BaseTraits::scalar_objectD, N>;
using Complexified = iVector<typename BaseTraits::Complexified, N>;
using Realified = iVector<typename BaseTraits::Realified, N>;
using DoublePrecision = iVector<typename BaseTraits::DoublePrecision, N>;
@ -231,6 +258,7 @@ NAMESPACE_BEGIN(Grid);
GridTypeMapper_RepeatedTypes;
using tensor_reduced = iScalar<typename BaseTraits::tensor_reduced>;
using scalar_object = iMatrix<typename BaseTraits::scalar_object, N>;
using scalar_objectD = iMatrix<typename BaseTraits::scalar_objectD, N>;
using Complexified = iMatrix<typename BaseTraits::Complexified, N>;
using Realified = iMatrix<typename BaseTraits::Realified, N>;
using DoublePrecision = iMatrix<typename BaseTraits::DoublePrecision, N>;