mirror of
https://github.com/paboyle/Grid.git
synced 2025-04-09 21:50:45 +01:00
towards more precise blocking
This commit is contained in:
parent
327da332bb
commit
091d5c605e
@ -206,7 +206,7 @@ axpby_norm_fast(Lattice<vobj> &z,sobj a,sobj b,const Lattice<vobj> &x,const Latt
|
|||||||
}
|
}
|
||||||
|
|
||||||
template<class vobj> strong_inline void
|
template<class vobj> strong_inline void
|
||||||
innerProduct_norm(ComplexD& ip, RealD &nrm, const Lattice<vobj> &left,const Lattice<vobj> &right)
|
innerProductNorm(ComplexD& ip, RealD &nrm, const Lattice<vobj> &left,const Lattice<vobj> &right)
|
||||||
{
|
{
|
||||||
conformable(left,right);
|
conformable(left,right);
|
||||||
|
|
||||||
|
@ -6,6 +6,7 @@ Copyright (C) 2015
|
|||||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
Author: Michael Marshall <michael.marshall@ed.ac.au>
|
Author: Michael Marshall <michael.marshall@ed.ac.au>
|
||||||
|
Author: Christoph Lehner <christoph@lhnr.de>
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
This program is free software; you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
@ -55,6 +56,7 @@ class GridTensorBase {};
|
|||||||
using Complexified = typename Traits::Complexified; \
|
using Complexified = typename Traits::Complexified; \
|
||||||
using Realified = typename Traits::Realified; \
|
using Realified = typename Traits::Realified; \
|
||||||
using DoublePrecision = typename Traits::DoublePrecision; \
|
using DoublePrecision = typename Traits::DoublePrecision; \
|
||||||
|
using DoublePrecision2= typename Traits::DoublePrecision2; \
|
||||||
static constexpr int TensorLevel = Traits::TensorLevel
|
static constexpr int TensorLevel = Traits::TensorLevel
|
||||||
|
|
||||||
template <class vtype>
|
template <class vtype>
|
||||||
|
@ -8,6 +8,7 @@
|
|||||||
|
|
||||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
Author: Christoph Lehner <christoph@lhnr.de>
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
This program is free software; you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
@ -194,6 +195,78 @@ auto innerProductD (const iScalar<l>& lhs,const iScalar<r>& rhs) -> iScalar<decl
|
|||||||
ret._internal = innerProductD(lhs._internal,rhs._internal);
|
ret._internal = innerProductD(lhs._internal,rhs._internal);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
//////////////////////////////////////
|
||||||
|
// innerProductD2: precision promotion without inner sum
|
||||||
|
//////////////////////////////////////
|
||||||
|
accelerator_inline vComplexD2 TensorRemove(const vComplexD2 & x) { return x; };
|
||||||
|
accelerator_inline vRealD2 TensorRemove(const vRealD2 & x) { return x; };
|
||||||
|
|
||||||
|
accelerator_inline ComplexD innerProductD2(const ComplexF &l,const ComplexF &r){ return innerProduct(l,r); }
|
||||||
|
accelerator_inline ComplexD innerProductD2(const ComplexD &l,const ComplexD &r){ return innerProduct(l,r); }
|
||||||
|
accelerator_inline RealD innerProductD2(const RealD &l,const RealD &r){ return innerProduct(l,r); }
|
||||||
|
accelerator_inline RealD innerProductD2(const RealF &l,const RealF &r){ return innerProduct(l,r); }
|
||||||
|
|
||||||
|
accelerator_inline vComplexD innerProductD2(const vComplexD &l,const vComplexD &r){ return innerProduct(l,r); }
|
||||||
|
accelerator_inline vRealD innerProductD2(const vRealD &l,const vRealD &r){ return innerProduct(l,r); }
|
||||||
|
|
||||||
|
accelerator_inline vComplexD2 innerProductD2(const vComplexF &l,const vComplexF &r)
|
||||||
|
{
|
||||||
|
vComplexD la,lb;
|
||||||
|
vComplexD ra,rb;
|
||||||
|
Optimization::PrecisionChange::StoD(l.v,la.v,lb.v);
|
||||||
|
Optimization::PrecisionChange::StoD(r.v,ra.v,rb.v);
|
||||||
|
vComplexD2 ret;
|
||||||
|
ret._internal[0] = innerProduct(la,ra);
|
||||||
|
ret._internal[1] = innerProduct(lb,rb);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
accelerator_inline vRealD2 innerProductD2(const vRealF &l,const vRealF &r)
|
||||||
|
{
|
||||||
|
vRealD la,lb;
|
||||||
|
vRealD ra,rb;
|
||||||
|
Optimization::PrecisionChange::StoD(l.v,la.v,lb.v);
|
||||||
|
Optimization::PrecisionChange::StoD(r.v,ra.v,rb.v);
|
||||||
|
vRealD2 ret;
|
||||||
|
ret._internal[0]=innerProduct(la,ra);
|
||||||
|
ret._internal[1]=innerProduct(lb,rb);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Now do it for vector, matrix, scalar
|
||||||
|
template<class l,class r,int N> accelerator_inline
|
||||||
|
auto innerProductD2 (const iVector<l,N>& lhs,const iVector<r,N>& rhs) -> iScalar<decltype(innerProductD2(lhs._internal[0],rhs._internal[0]))>
|
||||||
|
{
|
||||||
|
typedef decltype(innerProductD2(lhs._internal[0],rhs._internal[0])) ret_t;
|
||||||
|
iScalar<ret_t> ret;
|
||||||
|
zeroit(ret);
|
||||||
|
for(int c1=0;c1<N;c1++){
|
||||||
|
ret._internal += innerProductD2(lhs._internal[c1],rhs._internal[c1]);
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
template<class l,class r,int N> accelerator_inline
|
||||||
|
auto innerProductD2 (const iMatrix<l,N>& lhs,const iMatrix<r,N>& rhs) -> iScalar<decltype(innerProductD2(lhs._internal[0][0],rhs._internal[0][0]))>
|
||||||
|
{
|
||||||
|
typedef decltype(innerProductD2(lhs._internal[0][0],rhs._internal[0][0])) ret_t;
|
||||||
|
iScalar<ret_t> ret;
|
||||||
|
ret=Zero();
|
||||||
|
for(int c1=0;c1<N;c1++){
|
||||||
|
for(int c2=0;c2<N;c2++){
|
||||||
|
ret._internal+=innerProductD2(lhs._internal[c1][c2],rhs._internal[c1][c2]);
|
||||||
|
}}
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
template<class l,class r> accelerator_inline
|
||||||
|
auto innerProductD2 (const iScalar<l>& lhs,const iScalar<r>& rhs) -> iScalar<decltype(innerProductD2(lhs._internal,rhs._internal))>
|
||||||
|
{
|
||||||
|
typedef decltype(innerProductD2(lhs._internal,rhs._internal)) ret_t;
|
||||||
|
iScalar<ret_t> ret;
|
||||||
|
ret._internal = innerProductD2(lhs._internal,rhs._internal);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
//////////////////////
|
//////////////////////
|
||||||
// Keep same precison
|
// Keep same precison
|
||||||
//////////////////////
|
//////////////////////
|
||||||
|
@ -6,6 +6,7 @@ Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
|||||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
Author: Christopher Kelly <ckelly@phys.columbia.edu>
|
Author: Christopher Kelly <ckelly@phys.columbia.edu>
|
||||||
Author: Michael Marshall <michael.marshall@ed.ac.au>
|
Author: Michael Marshall <michael.marshall@ed.ac.au>
|
||||||
|
Author: Christoph Lehner <christoph@lhnr.de>
|
||||||
This program is free software; you can redistribute it and/or modify
|
This program is free software; you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
the Free Software Foundation; either version 2 of the License, or
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
@ -37,6 +38,10 @@ NAMESPACE_BEGIN(Grid);
|
|||||||
template<class T, int N> struct isGridTensor<iVector<T, N>> : public std::true_type { static constexpr bool notvalue = false; };
|
template<class T, int N> struct isGridTensor<iVector<T, N>> : public std::true_type { static constexpr bool notvalue = false; };
|
||||||
template<class T, int N> struct isGridTensor<iMatrix<T, N>> : public std::true_type { static constexpr bool notvalue = false; };
|
template<class T, int N> struct isGridTensor<iMatrix<T, N>> : public std::true_type { static constexpr bool notvalue = false; };
|
||||||
|
|
||||||
|
// To store double-precision data in single-precision grids for precision promoted localInnerProductD
|
||||||
|
typedef iVector<vComplexD,2> vComplexD2;
|
||||||
|
typedef iVector<vRealD,2> vRealD2;
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////////
|
||||||
// Want to recurse: GridTypeMapper<Matrix<vComplexD> >::scalar_type == ComplexD.
|
// Want to recurse: GridTypeMapper<Matrix<vComplexD> >::scalar_type == ComplexD.
|
||||||
// Use of a helper class like this allows us to template specialise and "dress"
|
// Use of a helper class like this allows us to template specialise and "dress"
|
||||||
@ -81,6 +86,7 @@ NAMESPACE_BEGIN(Grid);
|
|||||||
typedef ComplexF Complexified;
|
typedef ComplexF Complexified;
|
||||||
typedef RealF Realified;
|
typedef RealF Realified;
|
||||||
typedef RealD DoublePrecision;
|
typedef RealD DoublePrecision;
|
||||||
|
typedef RealD DoublePrecision2;
|
||||||
};
|
};
|
||||||
template<> struct GridTypeMapper<RealD> : public GridTypeMapper_Base {
|
template<> struct GridTypeMapper<RealD> : public GridTypeMapper_Base {
|
||||||
typedef RealD scalar_type;
|
typedef RealD scalar_type;
|
||||||
@ -93,6 +99,7 @@ NAMESPACE_BEGIN(Grid);
|
|||||||
typedef ComplexD Complexified;
|
typedef ComplexD Complexified;
|
||||||
typedef RealD Realified;
|
typedef RealD Realified;
|
||||||
typedef RealD DoublePrecision;
|
typedef RealD DoublePrecision;
|
||||||
|
typedef RealD DoublePrecision2;
|
||||||
};
|
};
|
||||||
template<> struct GridTypeMapper<ComplexF> : public GridTypeMapper_Base {
|
template<> struct GridTypeMapper<ComplexF> : public GridTypeMapper_Base {
|
||||||
typedef ComplexF scalar_type;
|
typedef ComplexF scalar_type;
|
||||||
@ -105,6 +112,7 @@ NAMESPACE_BEGIN(Grid);
|
|||||||
typedef ComplexF Complexified;
|
typedef ComplexF Complexified;
|
||||||
typedef RealF Realified;
|
typedef RealF Realified;
|
||||||
typedef ComplexD DoublePrecision;
|
typedef ComplexD DoublePrecision;
|
||||||
|
typedef ComplexD DoublePrecision2;
|
||||||
};
|
};
|
||||||
template<> struct GridTypeMapper<ComplexD> : public GridTypeMapper_Base {
|
template<> struct GridTypeMapper<ComplexD> : public GridTypeMapper_Base {
|
||||||
typedef ComplexD scalar_type;
|
typedef ComplexD scalar_type;
|
||||||
@ -117,6 +125,7 @@ NAMESPACE_BEGIN(Grid);
|
|||||||
typedef ComplexD Complexified;
|
typedef ComplexD Complexified;
|
||||||
typedef RealD Realified;
|
typedef RealD Realified;
|
||||||
typedef ComplexD DoublePrecision;
|
typedef ComplexD DoublePrecision;
|
||||||
|
typedef ComplexD DoublePrecision2;
|
||||||
};
|
};
|
||||||
template<> struct GridTypeMapper<Integer> : public GridTypeMapper_Base {
|
template<> struct GridTypeMapper<Integer> : public GridTypeMapper_Base {
|
||||||
typedef Integer scalar_type;
|
typedef Integer scalar_type;
|
||||||
@ -129,6 +138,7 @@ NAMESPACE_BEGIN(Grid);
|
|||||||
typedef void Complexified;
|
typedef void Complexified;
|
||||||
typedef void Realified;
|
typedef void Realified;
|
||||||
typedef void DoublePrecision;
|
typedef void DoublePrecision;
|
||||||
|
typedef void DoublePrecision2;
|
||||||
};
|
};
|
||||||
|
|
||||||
template<> struct GridTypeMapper<vRealF> : public GridTypeMapper_Base {
|
template<> struct GridTypeMapper<vRealF> : public GridTypeMapper_Base {
|
||||||
@ -142,6 +152,7 @@ NAMESPACE_BEGIN(Grid);
|
|||||||
typedef vComplexF Complexified;
|
typedef vComplexF Complexified;
|
||||||
typedef vRealF Realified;
|
typedef vRealF Realified;
|
||||||
typedef vRealD DoublePrecision;
|
typedef vRealD DoublePrecision;
|
||||||
|
typedef vRealD2 DoublePrecision2;
|
||||||
};
|
};
|
||||||
template<> struct GridTypeMapper<vRealD> : public GridTypeMapper_Base {
|
template<> struct GridTypeMapper<vRealD> : public GridTypeMapper_Base {
|
||||||
typedef RealD scalar_type;
|
typedef RealD scalar_type;
|
||||||
@ -154,6 +165,7 @@ NAMESPACE_BEGIN(Grid);
|
|||||||
typedef vComplexD Complexified;
|
typedef vComplexD Complexified;
|
||||||
typedef vRealD Realified;
|
typedef vRealD Realified;
|
||||||
typedef vRealD DoublePrecision;
|
typedef vRealD DoublePrecision;
|
||||||
|
typedef vRealD DoublePrecision2;
|
||||||
};
|
};
|
||||||
template<> struct GridTypeMapper<vRealH> : public GridTypeMapper_Base {
|
template<> struct GridTypeMapper<vRealH> : public GridTypeMapper_Base {
|
||||||
// Fixme this is incomplete until Grid supports fp16 or bfp16 arithmetic types
|
// Fixme this is incomplete until Grid supports fp16 or bfp16 arithmetic types
|
||||||
@ -167,6 +179,7 @@ NAMESPACE_BEGIN(Grid);
|
|||||||
typedef vComplexH Complexified;
|
typedef vComplexH Complexified;
|
||||||
typedef vRealH Realified;
|
typedef vRealH Realified;
|
||||||
typedef vRealD DoublePrecision;
|
typedef vRealD DoublePrecision;
|
||||||
|
typedef vRealD DoublePrecision2;
|
||||||
};
|
};
|
||||||
template<> struct GridTypeMapper<vComplexH> : public GridTypeMapper_Base {
|
template<> struct GridTypeMapper<vComplexH> : public GridTypeMapper_Base {
|
||||||
// Fixme this is incomplete until Grid supports fp16 or bfp16 arithmetic types
|
// Fixme this is incomplete until Grid supports fp16 or bfp16 arithmetic types
|
||||||
@ -180,6 +193,7 @@ NAMESPACE_BEGIN(Grid);
|
|||||||
typedef vComplexH Complexified;
|
typedef vComplexH Complexified;
|
||||||
typedef vRealH Realified;
|
typedef vRealH Realified;
|
||||||
typedef vComplexD DoublePrecision;
|
typedef vComplexD DoublePrecision;
|
||||||
|
typedef vComplexD DoublePrecision2;
|
||||||
};
|
};
|
||||||
template<> struct GridTypeMapper<vComplexF> : public GridTypeMapper_Base {
|
template<> struct GridTypeMapper<vComplexF> : public GridTypeMapper_Base {
|
||||||
typedef ComplexF scalar_type;
|
typedef ComplexF scalar_type;
|
||||||
@ -192,6 +206,7 @@ NAMESPACE_BEGIN(Grid);
|
|||||||
typedef vComplexF Complexified;
|
typedef vComplexF Complexified;
|
||||||
typedef vRealF Realified;
|
typedef vRealF Realified;
|
||||||
typedef vComplexD DoublePrecision;
|
typedef vComplexD DoublePrecision;
|
||||||
|
typedef vComplexD2 DoublePrecision2;
|
||||||
};
|
};
|
||||||
template<> struct GridTypeMapper<vComplexD> : public GridTypeMapper_Base {
|
template<> struct GridTypeMapper<vComplexD> : public GridTypeMapper_Base {
|
||||||
typedef ComplexD scalar_type;
|
typedef ComplexD scalar_type;
|
||||||
@ -204,6 +219,7 @@ NAMESPACE_BEGIN(Grid);
|
|||||||
typedef vComplexD Complexified;
|
typedef vComplexD Complexified;
|
||||||
typedef vRealD Realified;
|
typedef vRealD Realified;
|
||||||
typedef vComplexD DoublePrecision;
|
typedef vComplexD DoublePrecision;
|
||||||
|
typedef vComplexD DoublePrecision2;
|
||||||
};
|
};
|
||||||
template<> struct GridTypeMapper<vInteger> : public GridTypeMapper_Base {
|
template<> struct GridTypeMapper<vInteger> : public GridTypeMapper_Base {
|
||||||
typedef Integer scalar_type;
|
typedef Integer scalar_type;
|
||||||
@ -216,6 +232,7 @@ NAMESPACE_BEGIN(Grid);
|
|||||||
typedef void Complexified;
|
typedef void Complexified;
|
||||||
typedef void Realified;
|
typedef void Realified;
|
||||||
typedef void DoublePrecision;
|
typedef void DoublePrecision;
|
||||||
|
typedef void DoublePrecision2;
|
||||||
};
|
};
|
||||||
|
|
||||||
#define GridTypeMapper_RepeatedTypes \
|
#define GridTypeMapper_RepeatedTypes \
|
||||||
@ -234,6 +251,7 @@ NAMESPACE_BEGIN(Grid);
|
|||||||
using Complexified = iScalar<typename BaseTraits::Complexified>;
|
using Complexified = iScalar<typename BaseTraits::Complexified>;
|
||||||
using Realified = iScalar<typename BaseTraits::Realified>;
|
using Realified = iScalar<typename BaseTraits::Realified>;
|
||||||
using DoublePrecision = iScalar<typename BaseTraits::DoublePrecision>;
|
using DoublePrecision = iScalar<typename BaseTraits::DoublePrecision>;
|
||||||
|
using DoublePrecision2= iScalar<typename BaseTraits::DoublePrecision2>;
|
||||||
static constexpr int Rank = BaseTraits::Rank + 1;
|
static constexpr int Rank = BaseTraits::Rank + 1;
|
||||||
static constexpr std::size_t count = BaseTraits::count;
|
static constexpr std::size_t count = BaseTraits::count;
|
||||||
static constexpr int Dimension(int dim) {
|
static constexpr int Dimension(int dim) {
|
||||||
@ -248,6 +266,7 @@ NAMESPACE_BEGIN(Grid);
|
|||||||
using Complexified = iVector<typename BaseTraits::Complexified, N>;
|
using Complexified = iVector<typename BaseTraits::Complexified, N>;
|
||||||
using Realified = iVector<typename BaseTraits::Realified, N>;
|
using Realified = iVector<typename BaseTraits::Realified, N>;
|
||||||
using DoublePrecision = iVector<typename BaseTraits::DoublePrecision, N>;
|
using DoublePrecision = iVector<typename BaseTraits::DoublePrecision, N>;
|
||||||
|
using DoublePrecision2= iVector<typename BaseTraits::DoublePrecision2, N>;
|
||||||
static constexpr int Rank = BaseTraits::Rank + 1;
|
static constexpr int Rank = BaseTraits::Rank + 1;
|
||||||
static constexpr std::size_t count = BaseTraits::count * N;
|
static constexpr std::size_t count = BaseTraits::count * N;
|
||||||
static constexpr int Dimension(int dim) {
|
static constexpr int Dimension(int dim) {
|
||||||
@ -262,6 +281,7 @@ NAMESPACE_BEGIN(Grid);
|
|||||||
using Complexified = iMatrix<typename BaseTraits::Complexified, N>;
|
using Complexified = iMatrix<typename BaseTraits::Complexified, N>;
|
||||||
using Realified = iMatrix<typename BaseTraits::Realified, N>;
|
using Realified = iMatrix<typename BaseTraits::Realified, N>;
|
||||||
using DoublePrecision = iMatrix<typename BaseTraits::DoublePrecision, N>;
|
using DoublePrecision = iMatrix<typename BaseTraits::DoublePrecision, N>;
|
||||||
|
using DoublePrecision2= iMatrix<typename BaseTraits::DoublePrecision2, N>;
|
||||||
static constexpr int Rank = BaseTraits::Rank + 2;
|
static constexpr int Rank = BaseTraits::Rank + 2;
|
||||||
static constexpr std::size_t count = BaseTraits::count * N * N;
|
static constexpr std::size_t count = BaseTraits::count * N * N;
|
||||||
static constexpr int Dimension(int dim) {
|
static constexpr int Dimension(int dim) {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user