From ddb192bac73f7d0b2329f8bd43fe4d64e31f90e1 Mon Sep 17 00:00:00 2001 From: Christoph Lehner Date: Thu, 30 Apr 2020 16:09:57 -0400 Subject: [PATCH] re-work double precision promotion for summit --- Grid/lattice/Lattice_reduction.h | 2 +- Grid/parallelIO/NerscIO.h | 2 +- Grid/tensors/Tensor_traits.h | 92 ++++++++++++++++++++++++++++++-- 3 files changed, 90 insertions(+), 6 deletions(-) diff --git a/Grid/lattice/Lattice_reduction.h b/Grid/lattice/Lattice_reduction.h index d3f5f9ae..e2690cfe 100644 --- a/Grid/lattice/Lattice_reduction.h +++ b/Grid/lattice/Lattice_reduction.h @@ -234,7 +234,7 @@ innerProductNorm(ComplexD& ip, RealD &nrm, const Lattice &left,const Latti accelerator_for( ss, sites, nsimd,{ auto left_tmp = left_v(ss); coalescedWrite(inner_tmp_v[ss],innerProduct(left_tmp,right_v(ss))); - coalescedWrite(norm_tmp_v[ss],innerProduct(left_tmp,left_tmp))); + coalescedWrite(norm_tmp_v[ss],innerProduct(left_tmp,left_tmp)); }); tmp[0] = TensorRemove(sumD_gpu(inner_tmp_v,sites)); diff --git a/Grid/parallelIO/NerscIO.h b/Grid/parallelIO/NerscIO.h index d3b62d1f..5522ba91 100644 --- a/Grid/parallelIO/NerscIO.h +++ b/Grid/parallelIO/NerscIO.h @@ -146,7 +146,7 @@ public: int ieee32big = (format == std::string("IEEE32BIG")); int ieee32 = (format == std::string("IEEE32")); int ieee64big = (format == std::string("IEEE64BIG")); - int ieee64 = (format == std::string("IEEE64")); + int ieee64 = (format == std::string("IEEE64") || format == std::string("IEEE64LITTLE")); uint32_t nersc_csum,scidac_csuma,scidac_csumb; // depending on datatype, set up munger; diff --git a/Grid/tensors/Tensor_traits.h b/Grid/tensors/Tensor_traits.h index 5359c547..04d7343e 100644 --- a/Grid/tensors/Tensor_traits.h +++ b/Grid/tensors/Tensor_traits.h @@ -43,10 +43,38 @@ NAMESPACE_BEGIN(Grid); template struct isGridScalar> : public std::true_type { static constexpr bool notvalue = false; }; // Store double-precision data in single-precision grids for precision promoted localInnerProductD - typedef iVector ComplexD2; - typedef iVector vComplexD2; - typedef iVector RealD2; - typedef iVector vRealD2; + template + class TypePair { + public: + T _internal[2]; + TypePair& operator=(const Grid::Zero& o) { + _internal[0] = Zero(); + _internal[1] = Zero(); + return *this; + } + + TypePair operator+(const TypePair& o) const { + TypePair r; + r._internal[0] = _internal[0] + o._internal[0]; + r._internal[1] = _internal[1] + o._internal[1]; + return r; + } + + TypePair& operator+=(const TypePair& o) { + _internal[0] += o._internal[0]; + _internal[1] += o._internal[1]; + return *this; + } + + friend accelerator_inline void add(TypePair* ret, const TypePair* a, const TypePair* b) { + add(&ret->_internal[0],&a->_internal[0],&b->_internal[0]); + add(&ret->_internal[1],&a->_internal[1],&b->_internal[1]); + } + }; + typedef TypePair ComplexD2; + typedef TypePair RealD2; + typedef TypePair vComplexD2; + typedef TypePair vRealD2; // Traits to identify fundamental data types template struct isGridFundamental : public std::false_type { static constexpr bool notvalue = true; }; @@ -58,6 +86,10 @@ NAMESPACE_BEGIN(Grid); template<> struct isGridFundamental : public std::true_type { static constexpr bool notvalue = false; }; template<> struct isGridFundamental : public std::true_type { static constexpr bool notvalue = false; }; template<> struct isGridFundamental : public std::true_type { static constexpr bool notvalue = false; }; + template<> struct isGridFundamental : public std::true_type { static constexpr bool notvalue = false; }; + template<> struct isGridFundamental : public std::true_type { static constexpr bool notvalue = false; }; + template<> struct isGridFundamental : public std::true_type { static constexpr bool notvalue = false; }; + template<> struct isGridFundamental : public std::true_type { static constexpr bool notvalue = false; }; ////////////////////////////////////////////////////////////////////////////////// @@ -119,6 +151,19 @@ NAMESPACE_BEGIN(Grid); typedef RealD DoublePrecision; typedef RealD DoublePrecision2; }; + template<> struct GridTypeMapper : public GridTypeMapper_Base { + typedef RealD2 scalar_type; + typedef RealD2 scalar_typeD; + typedef RealD2 vector_type; + typedef RealD2 vector_typeD; + typedef RealD2 tensor_reduced; + typedef RealD2 scalar_object; + typedef RealD2 scalar_objectD; + typedef ComplexD2 Complexified; + typedef RealD2 Realified; + typedef RealD2 DoublePrecision; + typedef RealD2 DoublePrecision2; + }; template<> struct GridTypeMapper : public GridTypeMapper_Base { typedef ComplexF scalar_type; typedef ComplexD scalar_typeD; @@ -145,6 +190,19 @@ NAMESPACE_BEGIN(Grid); typedef ComplexD DoublePrecision; typedef ComplexD DoublePrecision2; }; + template<> struct GridTypeMapper : public GridTypeMapper_Base { + typedef ComplexD2 scalar_type; + typedef ComplexD2 scalar_typeD; + typedef ComplexD2 vector_type; + typedef ComplexD2 vector_typeD; + typedef ComplexD2 tensor_reduced; + typedef ComplexD2 scalar_object; + typedef ComplexD2 scalar_objectD; + typedef ComplexD2 Complexified; + typedef RealD2 Realified; + typedef ComplexD2 DoublePrecision; + typedef ComplexD2 DoublePrecision2; + }; template<> struct GridTypeMapper : public GridTypeMapper_Base { typedef Integer scalar_type; typedef Integer scalar_typeD; @@ -185,6 +243,19 @@ NAMESPACE_BEGIN(Grid); typedef vRealD DoublePrecision; typedef vRealD DoublePrecision2; }; + template<> struct GridTypeMapper : public GridTypeMapper_Base { + typedef RealD2 scalar_type; + typedef RealD2 scalar_typeD; + typedef vRealD2 vector_type; + typedef vRealD2 vector_typeD; + typedef vRealD2 tensor_reduced; + typedef RealD2 scalar_object; + typedef RealD2 scalar_objectD; + typedef vComplexD2 Complexified; + typedef vRealD2 Realified; + typedef vRealD2 DoublePrecision; + typedef vRealD2 DoublePrecision2; + }; template<> struct GridTypeMapper : public GridTypeMapper_Base { // Fixme this is incomplete until Grid supports fp16 or bfp16 arithmetic types typedef RealF scalar_type; @@ -239,6 +310,19 @@ NAMESPACE_BEGIN(Grid); typedef vComplexD DoublePrecision; typedef vComplexD DoublePrecision2; }; + template<> struct GridTypeMapper : public GridTypeMapper_Base { + typedef ComplexD2 scalar_type; + typedef ComplexD2 scalar_typeD; + typedef vComplexD2 vector_type; + typedef vComplexD2 vector_typeD; + typedef vComplexD2 tensor_reduced; + typedef ComplexD2 scalar_object; + typedef ComplexD2 scalar_objectD; + typedef vComplexD2 Complexified; + typedef vRealD2 Realified; + typedef vComplexD2 DoublePrecision; + typedef vComplexD2 DoublePrecision2; + }; template<> struct GridTypeMapper : public GridTypeMapper_Base { typedef Integer scalar_type; typedef Integer scalar_typeD;