mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-09 23:45:36 +00:00
Double precision variants for summation accuracy
This commit is contained in:
parent
2b037e3daa
commit
96ac56cace
@ -47,9 +47,11 @@ class GridTensorBase {};
|
||||
using element = vtype; \
|
||||
using scalar_type = typename Traits::scalar_type; \
|
||||
using vector_type = typename Traits::vector_type; \
|
||||
using scalar_typeD = typename Traits::scalar_typeD; \
|
||||
using vector_typeD = typename Traits::vector_typeD; \
|
||||
using tensor_reduced = typename Traits::tensor_reduced; \
|
||||
using scalar_object = typename Traits::scalar_object; \
|
||||
using scalar_objectD = typename Traits::scalar_objectD; \
|
||||
using Complexified = typename Traits::Complexified; \
|
||||
using Realified = typename Traits::Realified; \
|
||||
using DoublePrecision = typename Traits::DoublePrecision; \
|
||||
@ -149,7 +151,7 @@ public:
|
||||
|
||||
// Convert elements
|
||||
template <class ttype>
|
||||
accelerator_inline iScalar<vtype> operator=(iScalar<ttype> &&arg) {
|
||||
accelerator_inline iScalar<vtype> operator=(const iScalar<ttype> &arg) {
|
||||
_internal = arg._internal;
|
||||
return *this;
|
||||
}
|
||||
@ -200,6 +202,14 @@ public:
|
||||
accelerator iVector() = default;
|
||||
accelerator_inline iVector(const Zero &z) { zeroit(*this); };
|
||||
|
||||
template<class other>
|
||||
accelerator_inline iVector<vtype, N> &operator=(const iVector<other,N> &him)
|
||||
{
|
||||
for (int i = 0; i < N; i++) {
|
||||
_internal[i] = him._internal[i];
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
accelerator_inline iVector<vtype, N> &operator=(const Zero &hero) {
|
||||
zeroit(*this);
|
||||
return *this;
|
||||
@ -296,10 +306,12 @@ public:
|
||||
accelerator_inline iMatrix(const Zero &z) { zeroit(*this); };
|
||||
accelerator iMatrix() = default;
|
||||
|
||||
accelerator_inline iMatrix &operator=(const iMatrix &rhs) {
|
||||
// Allow for type conversion.
|
||||
template<class other>
|
||||
accelerator_inline iMatrix &operator=(const iMatrix<other,N> &rhs) {
|
||||
for (int i = 0; i < N; i++)
|
||||
for (int j = 0; j < N; j++)
|
||||
vstream(_internal[i][j], rhs._internal[i][j]);
|
||||
_internal[i][j] = rhs._internal[i][j];
|
||||
return *this;
|
||||
};
|
||||
|
||||
|
@ -41,10 +41,103 @@ template<class sobj> accelerator_inline RealD norm2(const sobj &arg){
|
||||
RealD ret = real(nrm);
|
||||
return ret;
|
||||
}
|
||||
//////////////////////////////////////
|
||||
// If single promote to double and sum 2x
|
||||
//////////////////////////////////////
|
||||
|
||||
////////////////////////////////////////////////////////////////
|
||||
// FIXME horizontal sum: single promote to double and sum
|
||||
// Not yet used; make sum_cpu in LatticeReduction.h call these
|
||||
// GPU sum_gpu is using a double precision promotion.
|
||||
////////////////////////////////////////////////////////////////
|
||||
#if 0
|
||||
accelerator_inline ComplexD ReduceD(const ComplexF &l) { return l; };
|
||||
accelerator_inline ComplexD ReduceD(const ComplexD &l) { return l; };
|
||||
accelerator_inline RealD ReduceD(const RealD &l) { return l; };
|
||||
accelerator_inline RealD ReduceD(const RealF &l) { return l; };
|
||||
|
||||
accelerator_inline ComplexD ReduceD(const vComplexD &l) { return Reduce(l); };
|
||||
accelerator_inline RealD ReduceD(const vRealD &l) { return Reduce(l); };
|
||||
accelerator_inline ComplexD ReduceD(const vComplexF &l)
|
||||
{
|
||||
vComplexD la,lb;
|
||||
Optimization::PrecisionChange::StoD(l.v,la.v,lb.v);
|
||||
return Reduce(la)+Reduce(lb);
|
||||
}
|
||||
accelerator_inline RealD ReduceD(const vRealF &l)
|
||||
{
|
||||
vRealD la,lb;
|
||||
Optimization::PrecisionChange::StoD(l.v,la.v,lb.v);
|
||||
return Reduce(la)+Reduce(lb);
|
||||
};
|
||||
///////////////////////////////////////////////////////
|
||||
// Now do it for vector, matrix, scalar
|
||||
///////////////////////////////////////////////////////
|
||||
template<class l,int N> accelerator_inline
|
||||
auto ReduceD (const iVector<l,N>& lhs) -> iVector<decltype(ReduceD(lhs._internal[0])),N>
|
||||
{
|
||||
typedef decltype(ReduceD(lhs._internal[0])) ret_t;
|
||||
iVector<ret_t,N> ret;
|
||||
for(int c1=0;c1<N;c1++){
|
||||
ret._internal[c1] = ReduceD(lhs._internal[c1]);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
template<class l,int N> accelerator_inline
|
||||
auto ReduceD (const iMatrix<l,N>& lhs) -> iMatrix<decltype(ReduceD(lhs._internal[0][0])),N>
|
||||
{
|
||||
typedef decltype(ReduceD(lhs._internal[0][0])) ret_t;
|
||||
iMatrix<ret_t,N> ret;
|
||||
for(int c1=0;c1<N;c1++){
|
||||
for(int c2=0;c2<N;c2++){
|
||||
ret._internal[c1][c2]=ReduceD(lhs._internal[c1][c2]);
|
||||
}}
|
||||
return ret;
|
||||
}
|
||||
template<class l> accelerator_inline
|
||||
auto ReduceD (const iScalar<l>& lhs) -> iScalar<decltype(ReduceD(lhs._internal))>
|
||||
{
|
||||
typedef decltype(ReduceD(lhs._internal)) ret_t;
|
||||
iScalar<ret_t> ret;
|
||||
ret._internal = ReduceD(lhs._internal);
|
||||
return ret;
|
||||
}
|
||||
#endif
|
||||
///////////////////////////////////////////////////////
|
||||
// Now do Reduce for vector, matrix, scalar
|
||||
///////////////////////////////////////////////////////
|
||||
template<class l,int N> accelerator_inline
|
||||
auto Reduce (const iVector<l,N>& lhs) -> iVector<decltype(Reduce(lhs._internal[0])),N>
|
||||
{
|
||||
typedef decltype(Reduce(lhs._internal[0])) ret_t;
|
||||
iVector<ret_t,N> ret;
|
||||
for(int c1=0;c1<N;c1++){
|
||||
ret._internal[c1] = Reduce(lhs._internal[c1]);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
template<class l,int N> accelerator_inline
|
||||
auto Reduce (const iMatrix<l,N>& lhs) -> iMatrix<decltype(Reduce(lhs._internal[0][0])),N>
|
||||
{
|
||||
typedef decltype(Reduce(lhs._internal[0][0])) ret_t;
|
||||
iMatrix<ret_t,N> ret;
|
||||
for(int c1=0;c1<N;c1++){
|
||||
for(int c2=0;c2<N;c2++){
|
||||
ret._internal[c1][c2]=Reduce(lhs._internal[c1][c2]);
|
||||
}}
|
||||
return ret;
|
||||
}
|
||||
template<class l> accelerator_inline
|
||||
auto Reduce (const iScalar<l>& lhs) -> iScalar<decltype(Reduce(lhs._internal))>
|
||||
{
|
||||
typedef decltype(Reduce(lhs._internal)) ret_t;
|
||||
iScalar<ret_t> ret;
|
||||
ret._internal = Reduce(lhs._internal);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
|
||||
//////////////////////////////////////
|
||||
// innerProductD : if single promote to double and evaluate with sum 2x
|
||||
//////////////////////////////////////
|
||||
accelerator_inline ComplexD innerProductD(const ComplexF &l,const ComplexF &r){ return innerProduct(l,r); }
|
||||
accelerator_inline ComplexD innerProductD(const ComplexD &l,const ComplexD &r){ return innerProduct(l,r); }
|
||||
accelerator_inline RealD innerProductD(const RealD &l,const RealD &r){ return innerProduct(l,r); }
|
||||
@ -52,14 +145,16 @@ accelerator_inline RealD innerProductD(const RealF &l,const RealF &r){
|
||||
|
||||
accelerator_inline vComplexD innerProductD(const vComplexD &l,const vComplexD &r){ return innerProduct(l,r); }
|
||||
accelerator_inline vRealD innerProductD(const vRealD &l,const vRealD &r){ return innerProduct(l,r); }
|
||||
accelerator_inline vComplexD innerProductD(const vComplexF &l,const vComplexF &r){
|
||||
accelerator_inline vComplexD innerProductD(const vComplexF &l,const vComplexF &r)
|
||||
{
|
||||
vComplexD la,lb;
|
||||
vComplexD ra,rb;
|
||||
Optimization::PrecisionChange::StoD(l.v,la.v,lb.v);
|
||||
Optimization::PrecisionChange::StoD(r.v,ra.v,rb.v);
|
||||
return innerProduct(la,ra) + innerProduct(lb,rb);
|
||||
}
|
||||
accelerator_inline vRealD innerProductD(const vRealF &l,const vRealF &r){
|
||||
accelerator_inline vRealD innerProductD(const vRealF &l,const vRealF &r)
|
||||
{
|
||||
vRealD la,lb;
|
||||
vRealD ra,rb;
|
||||
Optimization::PrecisionChange::StoD(l.v,la.v,lb.v);
|
||||
@ -67,6 +162,7 @@ accelerator_inline vRealD innerProductD(const vRealF &l,const vRealF &r){
|
||||
return innerProduct(la,ra) + innerProduct(lb,rb);
|
||||
}
|
||||
|
||||
// Now do it for vector, matrix, scalar
|
||||
template<class l,class r,int N> accelerator_inline
|
||||
auto innerProductD (const iVector<l,N>& lhs,const iVector<r,N>& rhs) -> iScalar<decltype(innerProductD(lhs._internal[0],rhs._internal[0]))>
|
||||
{
|
||||
|
@ -72,50 +72,60 @@ NAMESPACE_BEGIN(Grid);
|
||||
|
||||
template<> struct GridTypeMapper<RealF> : public GridTypeMapper_Base {
|
||||
typedef RealF scalar_type;
|
||||
typedef RealD scalar_typeD;
|
||||
typedef RealF vector_type;
|
||||
typedef RealD vector_typeD;
|
||||
typedef RealF tensor_reduced ;
|
||||
typedef RealF scalar_object;
|
||||
typedef RealD scalar_objectD;
|
||||
typedef ComplexF Complexified;
|
||||
typedef RealF Realified;
|
||||
typedef RealD DoublePrecision;
|
||||
};
|
||||
template<> struct GridTypeMapper<RealD> : public GridTypeMapper_Base {
|
||||
typedef RealD scalar_type;
|
||||
typedef RealD scalar_typeD;
|
||||
typedef RealD vector_type;
|
||||
typedef RealD vector_typeD;
|
||||
typedef RealD tensor_reduced;
|
||||
typedef RealD scalar_object;
|
||||
typedef RealD scalar_objectD;
|
||||
typedef ComplexD Complexified;
|
||||
typedef RealD Realified;
|
||||
typedef RealD DoublePrecision;
|
||||
};
|
||||
template<> struct GridTypeMapper<ComplexF> : public GridTypeMapper_Base {
|
||||
typedef ComplexF scalar_type;
|
||||
typedef ComplexD scalar_typeD;
|
||||
typedef ComplexF vector_type;
|
||||
typedef ComplexD vector_typeD;
|
||||
typedef ComplexF tensor_reduced;
|
||||
typedef ComplexF scalar_object;
|
||||
typedef ComplexD scalar_objectD;
|
||||
typedef ComplexF Complexified;
|
||||
typedef RealF Realified;
|
||||
typedef ComplexD DoublePrecision;
|
||||
};
|
||||
template<> struct GridTypeMapper<ComplexD> : public GridTypeMapper_Base {
|
||||
typedef ComplexD scalar_type;
|
||||
typedef ComplexD scalar_typeD;
|
||||
typedef ComplexD vector_type;
|
||||
typedef ComplexD vector_typeD;
|
||||
typedef ComplexD tensor_reduced;
|
||||
typedef ComplexD scalar_object;
|
||||
typedef ComplexD scalar_objectD;
|
||||
typedef ComplexD Complexified;
|
||||
typedef RealD Realified;
|
||||
typedef ComplexD DoublePrecision;
|
||||
};
|
||||
template<> struct GridTypeMapper<Integer> : public GridTypeMapper_Base {
|
||||
typedef Integer scalar_type;
|
||||
typedef Integer scalar_typeD;
|
||||
typedef Integer vector_type;
|
||||
typedef Integer vector_typeD;
|
||||
typedef Integer tensor_reduced;
|
||||
typedef Integer scalar_object;
|
||||
typedef Integer scalar_objectD;
|
||||
typedef void Complexified;
|
||||
typedef void Realified;
|
||||
typedef void DoublePrecision;
|
||||
@ -123,20 +133,24 @@ NAMESPACE_BEGIN(Grid);
|
||||
|
||||
template<> struct GridTypeMapper<vRealF> : public GridTypeMapper_Base {
|
||||
typedef RealF scalar_type;
|
||||
typedef RealD scalar_typeD;
|
||||
typedef vRealF vector_type;
|
||||
typedef vRealD vector_typeD;
|
||||
typedef vRealF tensor_reduced;
|
||||
typedef RealF scalar_object;
|
||||
typedef RealD scalar_objectD;
|
||||
typedef vComplexF Complexified;
|
||||
typedef vRealF Realified;
|
||||
typedef vRealD DoublePrecision;
|
||||
};
|
||||
template<> struct GridTypeMapper<vRealD> : public GridTypeMapper_Base {
|
||||
typedef RealD scalar_type;
|
||||
typedef RealD scalar_typeD;
|
||||
typedef vRealD vector_type;
|
||||
typedef vRealD vector_typeD;
|
||||
typedef vRealD tensor_reduced;
|
||||
typedef RealD scalar_object;
|
||||
typedef RealD scalar_objectD;
|
||||
typedef vComplexD Complexified;
|
||||
typedef vRealD Realified;
|
||||
typedef vRealD DoublePrecision;
|
||||
@ -144,10 +158,12 @@ NAMESPACE_BEGIN(Grid);
|
||||
template<> struct GridTypeMapper<vRealH> : public GridTypeMapper_Base {
|
||||
// Fixme this is incomplete until Grid supports fp16 or bfp16 arithmetic types
|
||||
typedef RealF scalar_type;
|
||||
typedef RealD scalar_typeD;
|
||||
typedef vRealH vector_type;
|
||||
typedef vRealD vector_typeD;
|
||||
typedef vRealH tensor_reduced;
|
||||
typedef RealF scalar_object;
|
||||
typedef RealD scalar_objectD;
|
||||
typedef vComplexH Complexified;
|
||||
typedef vRealH Realified;
|
||||
typedef vRealD DoublePrecision;
|
||||
@ -155,40 +171,48 @@ NAMESPACE_BEGIN(Grid);
|
||||
template<> struct GridTypeMapper<vComplexH> : public GridTypeMapper_Base {
|
||||
// Fixme this is incomplete until Grid supports fp16 or bfp16 arithmetic types
|
||||
typedef ComplexF scalar_type;
|
||||
typedef ComplexD scalar_typeD;
|
||||
typedef vComplexH vector_type;
|
||||
typedef vComplexD vector_typeD;
|
||||
typedef vComplexH tensor_reduced;
|
||||
typedef ComplexF scalar_object;
|
||||
typedef ComplexD scalar_objectD;
|
||||
typedef vComplexH Complexified;
|
||||
typedef vRealH Realified;
|
||||
typedef vComplexD DoublePrecision;
|
||||
};
|
||||
template<> struct GridTypeMapper<vComplexF> : public GridTypeMapper_Base {
|
||||
typedef ComplexF scalar_type;
|
||||
typedef ComplexD scalar_typeD;
|
||||
typedef vComplexF vector_type;
|
||||
typedef vComplexD vector_typeD;
|
||||
typedef vComplexF tensor_reduced;
|
||||
typedef ComplexF scalar_object;
|
||||
typedef ComplexD scalar_objectD;
|
||||
typedef vComplexF Complexified;
|
||||
typedef vRealF Realified;
|
||||
typedef vComplexD DoublePrecision;
|
||||
};
|
||||
template<> struct GridTypeMapper<vComplexD> : public GridTypeMapper_Base {
|
||||
typedef ComplexD scalar_type;
|
||||
typedef ComplexD scalar_typeD;
|
||||
typedef vComplexD vector_type;
|
||||
typedef vComplexD vector_typeD;
|
||||
typedef vComplexD tensor_reduced;
|
||||
typedef ComplexD scalar_object;
|
||||
typedef ComplexD scalar_objectD;
|
||||
typedef vComplexD Complexified;
|
||||
typedef vRealD Realified;
|
||||
typedef vComplexD DoublePrecision;
|
||||
};
|
||||
template<> struct GridTypeMapper<vInteger> : public GridTypeMapper_Base {
|
||||
typedef Integer scalar_type;
|
||||
typedef Integer scalar_typeD;
|
||||
typedef vInteger vector_type;
|
||||
typedef vInteger vector_typeD;
|
||||
typedef vInteger tensor_reduced;
|
||||
typedef Integer scalar_object;
|
||||
typedef Integer scalar_objectD;
|
||||
typedef void Complexified;
|
||||
typedef void Realified;
|
||||
typedef void DoublePrecision;
|
||||
@ -198,6 +222,7 @@ NAMESPACE_BEGIN(Grid);
|
||||
using BaseTraits = GridTypeMapper<T>; \
|
||||
using scalar_type = typename BaseTraits::scalar_type; \
|
||||
using vector_type = typename BaseTraits::vector_type; \
|
||||
using scalar_typeD = typename BaseTraits::scalar_typeD; \
|
||||
using vector_typeD = typename BaseTraits::vector_typeD; \
|
||||
static constexpr int TensorLevel = BaseTraits::TensorLevel + 1
|
||||
|
||||
@ -205,6 +230,7 @@ NAMESPACE_BEGIN(Grid);
|
||||
GridTypeMapper_RepeatedTypes;
|
||||
using tensor_reduced = iScalar<typename BaseTraits::tensor_reduced>;
|
||||
using scalar_object = iScalar<typename BaseTraits::scalar_object>;
|
||||
using scalar_objectD = iScalar<typename BaseTraits::scalar_objectD>;
|
||||
using Complexified = iScalar<typename BaseTraits::Complexified>;
|
||||
using Realified = iScalar<typename BaseTraits::Realified>;
|
||||
using DoublePrecision = iScalar<typename BaseTraits::DoublePrecision>;
|
||||
@ -218,6 +244,7 @@ NAMESPACE_BEGIN(Grid);
|
||||
GridTypeMapper_RepeatedTypes;
|
||||
using tensor_reduced = iScalar<typename BaseTraits::tensor_reduced>;
|
||||
using scalar_object = iVector<typename BaseTraits::scalar_object, N>;
|
||||
using scalar_objectD = iVector<typename BaseTraits::scalar_objectD, N>;
|
||||
using Complexified = iVector<typename BaseTraits::Complexified, N>;
|
||||
using Realified = iVector<typename BaseTraits::Realified, N>;
|
||||
using DoublePrecision = iVector<typename BaseTraits::DoublePrecision, N>;
|
||||
@ -231,6 +258,7 @@ NAMESPACE_BEGIN(Grid);
|
||||
GridTypeMapper_RepeatedTypes;
|
||||
using tensor_reduced = iScalar<typename BaseTraits::tensor_reduced>;
|
||||
using scalar_object = iMatrix<typename BaseTraits::scalar_object, N>;
|
||||
using scalar_objectD = iMatrix<typename BaseTraits::scalar_objectD, N>;
|
||||
using Complexified = iMatrix<typename BaseTraits::Complexified, N>;
|
||||
using Realified = iMatrix<typename BaseTraits::Realified, N>;
|
||||
using DoublePrecision = iMatrix<typename BaseTraits::DoublePrecision, N>;
|
||||
|
Loading…
Reference in New Issue
Block a user