1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-06-19 00:07:05 +01:00

Accelerator mark up of entire tensore space for offload

This commit is contained in:
paboyle
2018-01-24 13:27:30 +00:00
parent 69327db9a9
commit 8e99264f40
18 changed files with 614 additions and 661 deletions

View File

@ -41,14 +41,14 @@ NAMESPACE_BEGIN(Grid);
// Vector +/- Vector
// Matrix +/- Matrix
// Matrix /- scalar
template<class vtype,class ltype,class rtype> strong_inline void sub(iScalar<vtype> * __restrict__ ret,
template<class vtype,class ltype,class rtype> accelerator_inline void sub(iScalar<vtype> * __restrict__ ret,
const iScalar<ltype> * __restrict__ lhs,
const iScalar<rtype> * __restrict__ rhs)
{
sub(&ret->_internal,&lhs->_internal,&rhs->_internal);
}
template<class vtype,class ltype,class rtype,int N> strong_inline void sub(iVector<vtype,N> * __restrict__ ret,
template<class vtype,class ltype,class rtype,int N> accelerator_inline void sub(iVector<vtype,N> * __restrict__ ret,
const iVector<ltype,N> * __restrict__ lhs,
const iVector<rtype,N> * __restrict__ rhs)
{
@ -57,7 +57,7 @@ template<class vtype,class ltype,class rtype,int N> strong_inline void sub(iVect
}
return;
}
template<class vtype,class ltype,class rtype, int N> strong_inline void sub(iMatrix<vtype,N> * __restrict__ ret,
template<class vtype,class ltype,class rtype, int N> accelerator_inline void sub(iMatrix<vtype,N> * __restrict__ ret,
const iMatrix<ltype,N> * __restrict__ lhs,
const iMatrix<rtype,N> * __restrict__ rhs){
for(int c2=0;c2<N;c2++){
@ -66,7 +66,7 @@ template<class vtype,class ltype,class rtype, int N> strong_inline void sub(iMat
}}
return;
}
template<class vtype,class ltype,class rtype, int N> strong_inline void sub(iMatrix<vtype,N> * __restrict__ ret,
template<class vtype,class ltype,class rtype, int N> accelerator_inline void sub(iMatrix<vtype,N> * __restrict__ ret,
const iScalar<ltype> * __restrict__ lhs,
const iMatrix<rtype,N> * __restrict__ rhs){
for(int c2=0;c2<N;c2++){
@ -82,7 +82,7 @@ template<class vtype,class ltype,class rtype, int N> strong_inline void sub(iMat
}}
return;
}
template<class vtype,class ltype,class rtype, int N> strong_inline void sub(iMatrix<vtype,N> * __restrict__ ret,
template<class vtype,class ltype,class rtype, int N> accelerator_inline void sub(iMatrix<vtype,N> * __restrict__ ret,
const iMatrix<ltype,N> * __restrict__ lhs,
const iScalar<rtype> * __restrict__ rhs){
for(int c2=0;c2<N;c2++){
@ -96,7 +96,7 @@ template<class vtype,class ltype,class rtype, int N> strong_inline void sub(iMat
}
// - operator for scalar, vector, matrix
template<class ltype,class rtype> strong_inline auto
template<class ltype,class rtype> accelerator_inline auto
operator - (const iScalar<ltype>& lhs, const iScalar<rtype>& rhs) -> iScalar<decltype(lhs._internal - rhs._internal)>
{
typedef iScalar<decltype(lhs._internal-rhs._internal)> ret_t;
@ -105,7 +105,7 @@ operator - (const iScalar<ltype>& lhs, const iScalar<rtype>& rhs) -> iScalar<dec
return ret;
}
template<class ltype,class rtype,int N>
strong_inline auto operator - (const iVector<ltype,N>& lhs,const iVector<rtype,N>& rhs) ->iVector<decltype(lhs._internal[0]-rhs._internal[0]),N>
accelerator_inline auto operator - (const iVector<ltype,N>& lhs,const iVector<rtype,N>& rhs) ->iVector<decltype(lhs._internal[0]-rhs._internal[0]),N>
{
typedef iVector<decltype(lhs._internal[0]-rhs._internal[0]),N> ret_t;
ret_t ret;
@ -113,7 +113,7 @@ strong_inline auto operator - (const iVector<ltype,N>& lhs,const iVector<rtype,N
return ret;
}
template<class ltype,class rtype,int N>
strong_inline auto operator - (const iMatrix<ltype,N>& lhs,const iMatrix<rtype,N>& rhs) ->iMatrix<decltype(lhs._internal[0][0]-rhs._internal[0][0]),N>
accelerator_inline auto operator - (const iMatrix<ltype,N>& lhs,const iMatrix<rtype,N>& rhs) ->iMatrix<decltype(lhs._internal[0][0]-rhs._internal[0][0]),N>
{
typedef iMatrix<decltype(lhs._internal[0][0]-rhs._internal[0][0]),N> ret_t;
ret_t ret;
@ -121,7 +121,7 @@ strong_inline auto operator - (const iMatrix<ltype,N>& lhs,const iMatrix<rtype,N
return ret;
}
template<class ltype,class rtype,int N>
strong_inline auto operator - (const iScalar<ltype>& lhs,const iMatrix<rtype,N>& rhs)->iMatrix<decltype(lhs._internal-rhs._internal[0][0]),N>
accelerator_inline auto operator - (const iScalar<ltype>& lhs,const iMatrix<rtype,N>& rhs)->iMatrix<decltype(lhs._internal-rhs._internal[0][0]),N>
{
typedef iMatrix<decltype(lhs._internal-rhs._internal[0][0]),N> ret_t;
ret_t ret;
@ -129,7 +129,7 @@ strong_inline auto operator - (const iScalar<ltype>& lhs,const iMatrix<rtype,N>&
return ret;
}
template<class ltype,class rtype,int N>
strong_inline auto operator - (const iMatrix<ltype,N>& lhs,const iScalar<rtype>& rhs)->iMatrix<decltype(lhs._internal[0][0]-rhs._internal),N>
accelerator_inline auto operator - (const iMatrix<ltype,N>& lhs,const iScalar<rtype>& rhs)->iMatrix<decltype(lhs._internal[0][0]-rhs._internal),N>
{
typedef iMatrix<decltype(lhs._internal[0][0]-rhs._internal),N> ret_t;
ret_t ret;