mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-11-01 04:24:32 +00:00 
			
		
		
		
	Double precision variants for summation accuracy
This commit is contained in:
		| @@ -47,9 +47,11 @@ class GridTensorBase {}; | ||||
|   using element = vtype; \ | ||||
|   using scalar_type     = typename Traits::scalar_type; \ | ||||
|   using vector_type     = typename Traits::vector_type; \ | ||||
|   using scalar_typeD    = typename Traits::scalar_typeD; \ | ||||
|   using vector_typeD    = typename Traits::vector_typeD; \ | ||||
|   using tensor_reduced  = typename Traits::tensor_reduced; \ | ||||
|   using scalar_object   = typename Traits::scalar_object; \ | ||||
|   using scalar_objectD  = typename Traits::scalar_objectD; \ | ||||
|   using Complexified    = typename Traits::Complexified; \ | ||||
|   using Realified       = typename Traits::Realified; \ | ||||
|   using DoublePrecision = typename Traits::DoublePrecision; \ | ||||
| @@ -149,7 +151,7 @@ public: | ||||
|  | ||||
|   // Convert elements | ||||
|   template <class ttype> | ||||
|   accelerator_inline iScalar<vtype> operator=(iScalar<ttype> &&arg) { | ||||
|   accelerator_inline iScalar<vtype> operator=(const iScalar<ttype> &arg) { | ||||
|     _internal = arg._internal; | ||||
|     return *this; | ||||
|   } | ||||
| @@ -200,6 +202,14 @@ public: | ||||
|   accelerator iVector() = default; | ||||
|   accelerator_inline iVector(const Zero &z) { zeroit(*this); }; | ||||
|  | ||||
|   template<class other> | ||||
|   accelerator_inline iVector<vtype, N> &operator=(const iVector<other,N> &him) | ||||
|   { | ||||
|     for (int i = 0; i < N; i++) { | ||||
|       _internal[i] = him._internal[i]; | ||||
|     }     | ||||
|     return *this; | ||||
|   }  | ||||
|   accelerator_inline iVector<vtype, N> &operator=(const Zero &hero) { | ||||
|     zeroit(*this); | ||||
|     return *this; | ||||
| @@ -296,10 +306,12 @@ public: | ||||
|   accelerator_inline iMatrix(const Zero &z) { zeroit(*this); }; | ||||
|   accelerator iMatrix() = default; | ||||
|  | ||||
|   accelerator_inline iMatrix &operator=(const iMatrix &rhs) { | ||||
|   // Allow for type conversion. | ||||
|   template<class other> | ||||
|   accelerator_inline iMatrix &operator=(const iMatrix<other,N> &rhs) { | ||||
|     for (int i = 0; i < N; i++) | ||||
|       for (int j = 0; j < N; j++)  | ||||
| 	vstream(_internal[i][j], rhs._internal[i][j]); | ||||
| 	_internal[i][j] = rhs._internal[i][j]; | ||||
|     return *this; | ||||
|   }; | ||||
|  | ||||
|   | ||||
| @@ -41,10 +41,103 @@ template<class sobj> accelerator_inline RealD norm2(const sobj &arg){ | ||||
|   RealD ret = real(nrm); | ||||
|   return ret; | ||||
| } | ||||
| ////////////////////////////////////// | ||||
| // If single promote to double and sum 2x | ||||
| ////////////////////////////////////// | ||||
|  | ||||
| //////////////////////////////////////////////////////////////// | ||||
| // FIXME horizontal sum: single promote to double and sum  | ||||
| // Not yet used; make sum_cpu in LatticeReduction.h call these | ||||
| // GPU sum_gpu is using a double precision promotion. | ||||
| //////////////////////////////////////////////////////////////// | ||||
| #if 0 | ||||
| accelerator_inline ComplexD ReduceD(const ComplexF &l) { return l; }; | ||||
| accelerator_inline ComplexD ReduceD(const ComplexD &l) { return l; }; | ||||
| accelerator_inline RealD    ReduceD(const RealD    &l) { return l; }; | ||||
| accelerator_inline RealD    ReduceD(const RealF    &l) { return l; }; | ||||
|  | ||||
| accelerator_inline ComplexD ReduceD(const vComplexD &l) { return Reduce(l); }; | ||||
| accelerator_inline RealD    ReduceD(const vRealD    &l) { return Reduce(l); }; | ||||
| accelerator_inline ComplexD ReduceD(const vComplexF  &l)  | ||||
| {  | ||||
|   vComplexD la,lb; | ||||
|   Optimization::PrecisionChange::StoD(l.v,la.v,lb.v); | ||||
|   return Reduce(la)+Reduce(lb);  | ||||
| } | ||||
| accelerator_inline RealD    ReduceD(const vRealF    &l)  | ||||
| { | ||||
|   vRealD la,lb; | ||||
|   Optimization::PrecisionChange::StoD(l.v,la.v,lb.v); | ||||
|   return Reduce(la)+Reduce(lb);  | ||||
| }; | ||||
| /////////////////////////////////////////////////////// | ||||
| // Now do it for vector, matrix, scalar | ||||
| /////////////////////////////////////////////////////// | ||||
| template<class l,int N> accelerator_inline | ||||
| auto ReduceD (const iVector<l,N>& lhs) -> iVector<decltype(ReduceD(lhs._internal[0])),N> | ||||
| { | ||||
|   typedef decltype(ReduceD(lhs._internal[0])) ret_t; | ||||
|   iVector<ret_t,N> ret; | ||||
|   for(int c1=0;c1<N;c1++){ | ||||
|     ret._internal[c1] = ReduceD(lhs._internal[c1]); | ||||
|   } | ||||
|   return ret; | ||||
| } | ||||
| template<class l,int N> accelerator_inline | ||||
| auto ReduceD (const iMatrix<l,N>& lhs) -> iMatrix<decltype(ReduceD(lhs._internal[0][0])),N> | ||||
| { | ||||
|   typedef decltype(ReduceD(lhs._internal[0][0])) ret_t; | ||||
|   iMatrix<ret_t,N> ret; | ||||
|   for(int c1=0;c1<N;c1++){ | ||||
|   for(int c2=0;c2<N;c2++){ | ||||
|     ret._internal[c1][c2]=ReduceD(lhs._internal[c1][c2]); | ||||
|   }} | ||||
|   return ret; | ||||
| } | ||||
| template<class l> accelerator_inline | ||||
| auto ReduceD (const iScalar<l>& lhs) -> iScalar<decltype(ReduceD(lhs._internal))> | ||||
| { | ||||
|   typedef decltype(ReduceD(lhs._internal)) ret_t; | ||||
|   iScalar<ret_t> ret; | ||||
|   ret._internal = ReduceD(lhs._internal); | ||||
|   return ret; | ||||
| } | ||||
| #endif | ||||
| /////////////////////////////////////////////////////// | ||||
| // Now do Reduce for vector, matrix, scalar | ||||
| /////////////////////////////////////////////////////// | ||||
| template<class l,int N> accelerator_inline | ||||
| auto Reduce (const iVector<l,N>& lhs) -> iVector<decltype(Reduce(lhs._internal[0])),N> | ||||
| { | ||||
|   typedef decltype(Reduce(lhs._internal[0])) ret_t; | ||||
|   iVector<ret_t,N> ret; | ||||
|   for(int c1=0;c1<N;c1++){ | ||||
|     ret._internal[c1] = Reduce(lhs._internal[c1]); | ||||
|   } | ||||
|   return ret; | ||||
| } | ||||
| template<class l,int N> accelerator_inline | ||||
| auto Reduce (const iMatrix<l,N>& lhs) -> iMatrix<decltype(Reduce(lhs._internal[0][0])),N> | ||||
| { | ||||
|   typedef decltype(Reduce(lhs._internal[0][0])) ret_t; | ||||
|   iMatrix<ret_t,N> ret; | ||||
|   for(int c1=0;c1<N;c1++){ | ||||
|   for(int c2=0;c2<N;c2++){ | ||||
|     ret._internal[c1][c2]=Reduce(lhs._internal[c1][c2]); | ||||
|   }} | ||||
|   return ret; | ||||
| } | ||||
| template<class l> accelerator_inline | ||||
| auto Reduce (const iScalar<l>& lhs) -> iScalar<decltype(Reduce(lhs._internal))> | ||||
| { | ||||
|   typedef decltype(Reduce(lhs._internal)) ret_t; | ||||
|   iScalar<ret_t> ret; | ||||
|   ret._internal = Reduce(lhs._internal); | ||||
|   return ret; | ||||
| } | ||||
|  | ||||
|  | ||||
|  | ||||
| ////////////////////////////////////// | ||||
| // innerProductD : if single promote to double and evaluate with sum 2x | ||||
| ////////////////////////////////////// | ||||
| accelerator_inline ComplexD innerProductD(const ComplexF &l,const ComplexF &r){  return innerProduct(l,r); } | ||||
| accelerator_inline ComplexD innerProductD(const ComplexD &l,const ComplexD &r){  return innerProduct(l,r); } | ||||
| accelerator_inline RealD    innerProductD(const RealD    &l,const RealD    &r){  return innerProduct(l,r); } | ||||
| @@ -52,14 +145,16 @@ accelerator_inline RealD    innerProductD(const RealF    &l,const RealF    &r){ | ||||
|  | ||||
| accelerator_inline vComplexD innerProductD(const vComplexD &l,const vComplexD &r){  return innerProduct(l,r); } | ||||
| accelerator_inline vRealD    innerProductD(const vRealD    &l,const vRealD    &r){  return innerProduct(l,r); } | ||||
| accelerator_inline vComplexD innerProductD(const vComplexF &l,const vComplexF &r){   | ||||
| accelerator_inline vComplexD innerProductD(const vComplexF &l,const vComplexF &r) | ||||
| {   | ||||
|   vComplexD la,lb; | ||||
|   vComplexD ra,rb; | ||||
|   Optimization::PrecisionChange::StoD(l.v,la.v,lb.v); | ||||
|   Optimization::PrecisionChange::StoD(r.v,ra.v,rb.v); | ||||
|   return innerProduct(la,ra) + innerProduct(lb,rb);  | ||||
| } | ||||
| accelerator_inline vRealD innerProductD(const vRealF &l,const vRealF &r){   | ||||
| accelerator_inline vRealD innerProductD(const vRealF &l,const vRealF &r) | ||||
| {   | ||||
|   vRealD la,lb; | ||||
|   vRealD ra,rb; | ||||
|   Optimization::PrecisionChange::StoD(l.v,la.v,lb.v); | ||||
| @@ -67,6 +162,7 @@ accelerator_inline vRealD innerProductD(const vRealF &l,const vRealF &r){ | ||||
|   return innerProduct(la,ra) + innerProduct(lb,rb);  | ||||
| } | ||||
|  | ||||
| // Now do it for vector, matrix, scalar | ||||
| template<class l,class r,int N> accelerator_inline | ||||
| auto innerProductD (const iVector<l,N>& lhs,const iVector<r,N>& rhs) -> iScalar<decltype(innerProductD(lhs._internal[0],rhs._internal[0]))> | ||||
| { | ||||
|   | ||||
| @@ -72,50 +72,60 @@ NAMESPACE_BEGIN(Grid); | ||||
|  | ||||
|   template<> struct GridTypeMapper<RealF> : public GridTypeMapper_Base { | ||||
|     typedef RealF scalar_type; | ||||
|     typedef RealD scalar_typeD; | ||||
|     typedef RealF vector_type; | ||||
|     typedef RealD vector_typeD; | ||||
|     typedef RealF tensor_reduced ; | ||||
|     typedef RealF scalar_object; | ||||
|     typedef RealD scalar_objectD; | ||||
|     typedef ComplexF Complexified; | ||||
|     typedef RealF Realified; | ||||
|     typedef RealD DoublePrecision; | ||||
|   }; | ||||
|   template<> struct GridTypeMapper<RealD> : public GridTypeMapper_Base { | ||||
|     typedef RealD scalar_type; | ||||
|     typedef RealD scalar_typeD; | ||||
|     typedef RealD vector_type; | ||||
|     typedef RealD vector_typeD; | ||||
|     typedef RealD tensor_reduced; | ||||
|     typedef RealD scalar_object; | ||||
|     typedef RealD scalar_objectD; | ||||
|     typedef ComplexD Complexified; | ||||
|     typedef RealD Realified; | ||||
|     typedef RealD DoublePrecision; | ||||
|   }; | ||||
|   template<> struct GridTypeMapper<ComplexF> : public GridTypeMapper_Base { | ||||
|     typedef ComplexF scalar_type; | ||||
|     typedef ComplexD scalar_typeD; | ||||
|     typedef ComplexF vector_type; | ||||
|     typedef ComplexD vector_typeD; | ||||
|     typedef ComplexF tensor_reduced; | ||||
|     typedef ComplexF scalar_object; | ||||
|     typedef ComplexD scalar_objectD; | ||||
|     typedef ComplexF Complexified; | ||||
|     typedef RealF Realified; | ||||
|     typedef ComplexD DoublePrecision; | ||||
|   }; | ||||
|   template<> struct GridTypeMapper<ComplexD> : public GridTypeMapper_Base { | ||||
|     typedef ComplexD scalar_type; | ||||
|     typedef ComplexD scalar_typeD; | ||||
|     typedef ComplexD vector_type; | ||||
|     typedef ComplexD vector_typeD; | ||||
|     typedef ComplexD tensor_reduced; | ||||
|     typedef ComplexD scalar_object; | ||||
|     typedef ComplexD scalar_objectD; | ||||
|     typedef ComplexD Complexified; | ||||
|     typedef RealD Realified; | ||||
|     typedef ComplexD DoublePrecision; | ||||
|   }; | ||||
|   template<> struct GridTypeMapper<Integer> : public GridTypeMapper_Base { | ||||
|     typedef Integer scalar_type; | ||||
|     typedef Integer scalar_typeD; | ||||
|     typedef Integer vector_type; | ||||
|     typedef Integer vector_typeD; | ||||
|     typedef Integer tensor_reduced; | ||||
|     typedef Integer scalar_object; | ||||
|     typedef Integer scalar_objectD; | ||||
|     typedef void Complexified; | ||||
|     typedef void Realified; | ||||
|     typedef void DoublePrecision; | ||||
| @@ -123,20 +133,24 @@ NAMESPACE_BEGIN(Grid); | ||||
|  | ||||
|   template<> struct GridTypeMapper<vRealF> : public GridTypeMapper_Base { | ||||
|     typedef RealF  scalar_type; | ||||
|     typedef RealD  scalar_typeD; | ||||
|     typedef vRealF vector_type; | ||||
|     typedef vRealD vector_typeD; | ||||
|     typedef vRealF tensor_reduced; | ||||
|     typedef RealF  scalar_object; | ||||
|     typedef RealD  scalar_objectD; | ||||
|     typedef vComplexF Complexified; | ||||
|     typedef vRealF Realified; | ||||
|     typedef vRealD DoublePrecision; | ||||
|   }; | ||||
|   template<> struct GridTypeMapper<vRealD> : public GridTypeMapper_Base { | ||||
|     typedef RealD  scalar_type; | ||||
|     typedef RealD  scalar_typeD; | ||||
|     typedef vRealD vector_type; | ||||
|     typedef vRealD vector_typeD; | ||||
|     typedef vRealD tensor_reduced; | ||||
|     typedef RealD  scalar_object; | ||||
|     typedef RealD  scalar_objectD; | ||||
|     typedef vComplexD Complexified; | ||||
|     typedef vRealD Realified; | ||||
|     typedef vRealD DoublePrecision; | ||||
| @@ -144,10 +158,12 @@ NAMESPACE_BEGIN(Grid); | ||||
|   template<> struct GridTypeMapper<vRealH> : public GridTypeMapper_Base { | ||||
|     // Fixme this is incomplete until Grid supports fp16 or bfp16 arithmetic types | ||||
|     typedef RealF  scalar_type; | ||||
|     typedef RealD  scalar_typeD; | ||||
|     typedef vRealH vector_type; | ||||
|     typedef vRealD vector_typeD; | ||||
|     typedef vRealH tensor_reduced; | ||||
|     typedef RealF  scalar_object; | ||||
|     typedef RealD  scalar_objectD; | ||||
|     typedef vComplexH Complexified; | ||||
|     typedef vRealH Realified; | ||||
|     typedef vRealD DoublePrecision; | ||||
| @@ -155,40 +171,48 @@ NAMESPACE_BEGIN(Grid); | ||||
|   template<> struct GridTypeMapper<vComplexH> : public GridTypeMapper_Base { | ||||
|     // Fixme this is incomplete until Grid supports fp16 or bfp16 arithmetic types | ||||
|     typedef ComplexF  scalar_type; | ||||
|     typedef ComplexD  scalar_typeD; | ||||
|     typedef vComplexH vector_type; | ||||
|     typedef vComplexD vector_typeD; | ||||
|     typedef vComplexH tensor_reduced; | ||||
|     typedef ComplexF  scalar_object; | ||||
|     typedef ComplexD  scalar_objectD; | ||||
|     typedef vComplexH Complexified; | ||||
|     typedef vRealH Realified; | ||||
|     typedef vComplexD DoublePrecision; | ||||
|   }; | ||||
|   template<> struct GridTypeMapper<vComplexF> : public GridTypeMapper_Base { | ||||
|     typedef ComplexF  scalar_type; | ||||
|     typedef ComplexD  scalar_typeD; | ||||
|     typedef vComplexF vector_type; | ||||
|     typedef vComplexD vector_typeD; | ||||
|     typedef vComplexF tensor_reduced; | ||||
|     typedef ComplexF  scalar_object; | ||||
|     typedef ComplexD  scalar_objectD; | ||||
|     typedef vComplexF Complexified; | ||||
|     typedef vRealF Realified; | ||||
|     typedef vComplexD DoublePrecision; | ||||
|   }; | ||||
|   template<> struct GridTypeMapper<vComplexD> : public GridTypeMapper_Base { | ||||
|     typedef ComplexD  scalar_type; | ||||
|     typedef ComplexD  scalar_typeD; | ||||
|     typedef vComplexD vector_type; | ||||
|     typedef vComplexD vector_typeD; | ||||
|     typedef vComplexD tensor_reduced; | ||||
|     typedef ComplexD  scalar_object; | ||||
|     typedef ComplexD  scalar_objectD; | ||||
|     typedef vComplexD Complexified; | ||||
|     typedef vRealD Realified; | ||||
|     typedef vComplexD DoublePrecision; | ||||
|   }; | ||||
|   template<> struct GridTypeMapper<vInteger> : public GridTypeMapper_Base { | ||||
|     typedef  Integer scalar_type; | ||||
|     typedef  Integer scalar_typeD; | ||||
|     typedef vInteger vector_type; | ||||
|     typedef vInteger vector_typeD; | ||||
|     typedef vInteger tensor_reduced; | ||||
|     typedef  Integer scalar_object; | ||||
|     typedef  Integer scalar_objectD; | ||||
|     typedef void Complexified; | ||||
|     typedef void Realified; | ||||
|     typedef void DoublePrecision; | ||||
| @@ -198,6 +222,7 @@ NAMESPACE_BEGIN(Grid); | ||||
|   using BaseTraits   = GridTypeMapper<T>; \ | ||||
|   using scalar_type  = typename BaseTraits::scalar_type; \ | ||||
|   using vector_type  = typename BaseTraits::vector_type; \ | ||||
|   using scalar_typeD = typename BaseTraits::scalar_typeD; \ | ||||
|   using vector_typeD = typename BaseTraits::vector_typeD; \ | ||||
|   static constexpr int TensorLevel = BaseTraits::TensorLevel + 1 | ||||
|  | ||||
| @@ -205,6 +230,7 @@ NAMESPACE_BEGIN(Grid); | ||||
|     GridTypeMapper_RepeatedTypes; | ||||
|     using tensor_reduced  = iScalar<typename BaseTraits::tensor_reduced>; | ||||
|     using scalar_object   = iScalar<typename BaseTraits::scalar_object>; | ||||
|     using scalar_objectD  = iScalar<typename BaseTraits::scalar_objectD>; | ||||
|     using Complexified    = iScalar<typename BaseTraits::Complexified>; | ||||
|     using Realified       = iScalar<typename BaseTraits::Realified>; | ||||
|     using DoublePrecision = iScalar<typename BaseTraits::DoublePrecision>; | ||||
| @@ -218,6 +244,7 @@ NAMESPACE_BEGIN(Grid); | ||||
|     GridTypeMapper_RepeatedTypes; | ||||
|     using tensor_reduced  = iScalar<typename BaseTraits::tensor_reduced>; | ||||
|     using scalar_object   = iVector<typename BaseTraits::scalar_object,   N>; | ||||
|     using scalar_objectD  = iVector<typename BaseTraits::scalar_objectD,  N>; | ||||
|     using Complexified    = iVector<typename BaseTraits::Complexified,    N>; | ||||
|     using Realified       = iVector<typename BaseTraits::Realified,       N>; | ||||
|     using DoublePrecision = iVector<typename BaseTraits::DoublePrecision, N>; | ||||
| @@ -231,6 +258,7 @@ NAMESPACE_BEGIN(Grid); | ||||
|     GridTypeMapper_RepeatedTypes; | ||||
|     using tensor_reduced  = iScalar<typename BaseTraits::tensor_reduced>; | ||||
|     using scalar_object   = iMatrix<typename BaseTraits::scalar_object,   N>; | ||||
|     using scalar_objectD  = iMatrix<typename BaseTraits::scalar_objectD,  N>; | ||||
|     using Complexified    = iMatrix<typename BaseTraits::Complexified,    N>; | ||||
|     using Realified       = iMatrix<typename BaseTraits::Realified,       N>; | ||||
|     using DoublePrecision = iMatrix<typename BaseTraits::DoublePrecision, N>; | ||||
|   | ||||
		Reference in New Issue
	
	Block a user