From 091d5c605ef041dbb95ba88e8a8b49c8a7b966af Mon Sep 17 00:00:00 2001 From: Christoph Lehner Date: Fri, 17 Apr 2020 04:25:28 -0400 Subject: [PATCH] towards more precise blocking --- Grid/lattice/Lattice_reduction.h | 2 +- Grid/tensors/Tensor_class.h | 2 + Grid/tensors/Tensor_inner.h | 73 ++++++++++++++++++++++++++++++++ Grid/tensors/Tensor_traits.h | 20 +++++++++ 4 files changed, 96 insertions(+), 1 deletion(-) diff --git a/Grid/lattice/Lattice_reduction.h b/Grid/lattice/Lattice_reduction.h index de2efd72..d3f5f9ae 100644 --- a/Grid/lattice/Lattice_reduction.h +++ b/Grid/lattice/Lattice_reduction.h @@ -206,7 +206,7 @@ axpby_norm_fast(Lattice &z,sobj a,sobj b,const Lattice &x,const Latt } template strong_inline void -innerProduct_norm(ComplexD& ip, RealD &nrm, const Lattice &left,const Lattice &right) +innerProductNorm(ComplexD& ip, RealD &nrm, const Lattice &left,const Lattice &right) { conformable(left,right); diff --git a/Grid/tensors/Tensor_class.h b/Grid/tensors/Tensor_class.h index 75e42721..dbcbae8d 100644 --- a/Grid/tensors/Tensor_class.h +++ b/Grid/tensors/Tensor_class.h @@ -6,6 +6,7 @@ Copyright (C) 2015 Author: Azusa Yamaguchi Author: Peter Boyle Author: Michael Marshall +Author: Christoph Lehner This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -55,6 +56,7 @@ class GridTensorBase {}; using Complexified = typename Traits::Complexified; \ using Realified = typename Traits::Realified; \ using DoublePrecision = typename Traits::DoublePrecision; \ + using DoublePrecision2= typename Traits::DoublePrecision2; \ static constexpr int TensorLevel = Traits::TensorLevel template diff --git a/Grid/tensors/Tensor_inner.h b/Grid/tensors/Tensor_inner.h index 03f72966..c052adcf 100644 --- a/Grid/tensors/Tensor_inner.h +++ b/Grid/tensors/Tensor_inner.h @@ -8,6 +8,7 @@ Author: Azusa Yamaguchi Author: Peter Boyle +Author: Christoph Lehner This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -194,6 +195,78 @@ auto innerProductD (const iScalar& lhs,const iScalar& rhs) -> iScalar accelerator_inline + auto innerProductD2 (const iVector& lhs,const iVector& rhs) -> iScalar +{ + typedef decltype(innerProductD2(lhs._internal[0],rhs._internal[0])) ret_t; + iScalar ret; + zeroit(ret); + for(int c1=0;c1 accelerator_inline + auto innerProductD2 (const iMatrix& lhs,const iMatrix& rhs) -> iScalar +{ + typedef decltype(innerProductD2(lhs._internal[0][0],rhs._internal[0][0])) ret_t; + iScalar ret; + ret=Zero(); + for(int c1=0;c1 accelerator_inline + auto innerProductD2 (const iScalar& lhs,const iScalar& rhs) -> iScalar +{ + typedef decltype(innerProductD2(lhs._internal,rhs._internal)) ret_t; + iScalar ret; + ret._internal = innerProductD2(lhs._internal,rhs._internal); + return ret; +} + ////////////////////// // Keep same precison ////////////////////// diff --git a/Grid/tensors/Tensor_traits.h b/Grid/tensors/Tensor_traits.h index 9067d43d..afb1f916 100644 --- a/Grid/tensors/Tensor_traits.h +++ b/Grid/tensors/Tensor_traits.h @@ -6,6 +6,7 @@ Author: Azusa Yamaguchi Author: Peter Boyle Author: Christopher Kelly Author: Michael Marshall +Author: Christoph Lehner This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or @@ -37,6 +38,10 @@ NAMESPACE_BEGIN(Grid); template struct isGridTensor> : public std::true_type { static constexpr bool notvalue = false; }; template struct isGridTensor> : public std::true_type { static constexpr bool notvalue = false; }; + // To store double-precision data in single-precision grids for precision promoted localInnerProductD + typedef iVector vComplexD2; + typedef iVector vRealD2; + ////////////////////////////////////////////////////////////////////////////////// // Want to recurse: GridTypeMapper >::scalar_type == ComplexD. // Use of a helper class like this allows us to template specialise and "dress" @@ -81,6 +86,7 @@ NAMESPACE_BEGIN(Grid); typedef ComplexF Complexified; typedef RealF Realified; typedef RealD DoublePrecision; + typedef RealD DoublePrecision2; }; template<> struct GridTypeMapper : public GridTypeMapper_Base { typedef RealD scalar_type; @@ -93,6 +99,7 @@ NAMESPACE_BEGIN(Grid); typedef ComplexD Complexified; typedef RealD Realified; typedef RealD DoublePrecision; + typedef RealD DoublePrecision2; }; template<> struct GridTypeMapper : public GridTypeMapper_Base { typedef ComplexF scalar_type; @@ -105,6 +112,7 @@ NAMESPACE_BEGIN(Grid); typedef ComplexF Complexified; typedef RealF Realified; typedef ComplexD DoublePrecision; + typedef ComplexD DoublePrecision2; }; template<> struct GridTypeMapper : public GridTypeMapper_Base { typedef ComplexD scalar_type; @@ -117,6 +125,7 @@ NAMESPACE_BEGIN(Grid); typedef ComplexD Complexified; typedef RealD Realified; typedef ComplexD DoublePrecision; + typedef ComplexD DoublePrecision2; }; template<> struct GridTypeMapper : public GridTypeMapper_Base { typedef Integer scalar_type; @@ -129,6 +138,7 @@ NAMESPACE_BEGIN(Grid); typedef void Complexified; typedef void Realified; typedef void DoublePrecision; + typedef void DoublePrecision2; }; template<> struct GridTypeMapper : public GridTypeMapper_Base { @@ -142,6 +152,7 @@ NAMESPACE_BEGIN(Grid); typedef vComplexF Complexified; typedef vRealF Realified; typedef vRealD DoublePrecision; + typedef vRealD2 DoublePrecision2; }; template<> struct GridTypeMapper : public GridTypeMapper_Base { typedef RealD scalar_type; @@ -154,6 +165,7 @@ NAMESPACE_BEGIN(Grid); typedef vComplexD Complexified; typedef vRealD Realified; typedef vRealD DoublePrecision; + typedef vRealD DoublePrecision2; }; template<> struct GridTypeMapper : public GridTypeMapper_Base { // Fixme this is incomplete until Grid supports fp16 or bfp16 arithmetic types @@ -167,6 +179,7 @@ NAMESPACE_BEGIN(Grid); typedef vComplexH Complexified; typedef vRealH Realified; typedef vRealD DoublePrecision; + typedef vRealD DoublePrecision2; }; template<> struct GridTypeMapper : public GridTypeMapper_Base { // Fixme this is incomplete until Grid supports fp16 or bfp16 arithmetic types @@ -180,6 +193,7 @@ NAMESPACE_BEGIN(Grid); typedef vComplexH Complexified; typedef vRealH Realified; typedef vComplexD DoublePrecision; + typedef vComplexD DoublePrecision2; }; template<> struct GridTypeMapper : public GridTypeMapper_Base { typedef ComplexF scalar_type; @@ -192,6 +206,7 @@ NAMESPACE_BEGIN(Grid); typedef vComplexF Complexified; typedef vRealF Realified; typedef vComplexD DoublePrecision; + typedef vComplexD2 DoublePrecision2; }; template<> struct GridTypeMapper : public GridTypeMapper_Base { typedef ComplexD scalar_type; @@ -204,6 +219,7 @@ NAMESPACE_BEGIN(Grid); typedef vComplexD Complexified; typedef vRealD Realified; typedef vComplexD DoublePrecision; + typedef vComplexD DoublePrecision2; }; template<> struct GridTypeMapper : public GridTypeMapper_Base { typedef Integer scalar_type; @@ -216,6 +232,7 @@ NAMESPACE_BEGIN(Grid); typedef void Complexified; typedef void Realified; typedef void DoublePrecision; + typedef void DoublePrecision2; }; #define GridTypeMapper_RepeatedTypes \ @@ -234,6 +251,7 @@ NAMESPACE_BEGIN(Grid); using Complexified = iScalar; using Realified = iScalar; using DoublePrecision = iScalar; + using DoublePrecision2= iScalar; static constexpr int Rank = BaseTraits::Rank + 1; static constexpr std::size_t count = BaseTraits::count; static constexpr int Dimension(int dim) { @@ -248,6 +266,7 @@ NAMESPACE_BEGIN(Grid); using Complexified = iVector; using Realified = iVector; using DoublePrecision = iVector; + using DoublePrecision2= iVector; static constexpr int Rank = BaseTraits::Rank + 1; static constexpr std::size_t count = BaseTraits::count * N; static constexpr int Dimension(int dim) { @@ -262,6 +281,7 @@ NAMESPACE_BEGIN(Grid); using Complexified = iMatrix; using Realified = iMatrix; using DoublePrecision = iMatrix; + using DoublePrecision2= iMatrix; static constexpr int Rank = BaseTraits::Rank + 2; static constexpr std::size_t count = BaseTraits::count * N * N; static constexpr int Dimension(int dim) {