Accelerator mark up of entire tensore space for offload

2025-12-22 13:44:29 +00:00 · 2018-01-24 13:27:30 +00:00
parent 69327db9a9
commit 8e99264f40
18 changed files with 614 additions and 661 deletions
--- a/lib/tensors/Tensor_arith_sub.h
+++ b/lib/tensors/Tensor_arith_sub.h
@@ -41,14 +41,14 @@ NAMESPACE_BEGIN(Grid);
 // Vector +/- Vector
 // Matrix +/- Matrix
 // Matrix /- scalar
-template<class vtype,class ltype,class rtype> strong_inline void sub(iScalar<vtype> * __restrict__ ret,
+template<class vtype,class ltype,class rtype> accelerator_inline void sub(iScalar<vtype> * __restrict__ ret,
 								     const iScalar<ltype> * __restrict__ lhs,
 								     const iScalar<rtype> * __restrict__ rhs)
 {
  sub(&ret->_internal,&lhs->_internal,&rhs->_internal);
 }

-template<class vtype,class ltype,class rtype,int N> strong_inline void sub(iVector<vtype,N> * __restrict__ ret,
+template<class vtype,class ltype,class rtype,int N> accelerator_inline void sub(iVector<vtype,N> * __restrict__ ret,
 									   const iVector<ltype,N> * __restrict__ lhs,
 									   const iVector<rtype,N> * __restrict__ rhs)
 {
@@ -57,7 +57,7 @@ template<class vtype,class ltype,class rtype,int N> strong_inline void sub(iVect
  }
  return;
 }
-template<class vtype,class ltype,class rtype, int N> strong_inline void sub(iMatrix<vtype,N> * __restrict__ ret,
+template<class vtype,class ltype,class rtype, int N> accelerator_inline void sub(iMatrix<vtype,N> * __restrict__ ret,
 									    const iMatrix<ltype,N> * __restrict__ lhs,
 									    const iMatrix<rtype,N> * __restrict__ rhs){
  for(int c2=0;c2<N;c2++){
@@ -66,7 +66,7 @@ template<class vtype,class ltype,class rtype, int N> strong_inline void sub(iMat
    }}
  return;
 }
-template<class vtype,class ltype,class rtype, int N> strong_inline void sub(iMatrix<vtype,N> * __restrict__ ret,
+template<class vtype,class ltype,class rtype, int N> accelerator_inline void sub(iMatrix<vtype,N> * __restrict__ ret,
 									    const iScalar<ltype> * __restrict__ lhs,
 									    const iMatrix<rtype,N> * __restrict__ rhs){
  for(int c2=0;c2<N;c2++){
@@ -82,7 +82,7 @@ template<class vtype,class ltype,class rtype, int N> strong_inline void sub(iMat
    }}
  return;
 }
-template<class vtype,class ltype,class rtype, int N> strong_inline void sub(iMatrix<vtype,N> * __restrict__ ret,
+template<class vtype,class ltype,class rtype, int N> accelerator_inline void sub(iMatrix<vtype,N> * __restrict__ ret,
 									    const iMatrix<ltype,N> * __restrict__ lhs,
 									    const iScalar<rtype> * __restrict__ rhs){
  for(int c2=0;c2<N;c2++){
@@ -96,7 +96,7 @@ template<class vtype,class ltype,class rtype, int N> strong_inline void sub(iMat
 }

 // - operator for scalar, vector, matrix
-template<class ltype,class rtype> strong_inline auto
+template<class ltype,class rtype> accelerator_inline auto
 operator - (const iScalar<ltype>& lhs, const iScalar<rtype>& rhs) -> iScalar<decltype(lhs._internal - rhs._internal)>
 {
  typedef iScalar<decltype(lhs._internal-rhs._internal)> ret_t;
@@ -105,7 +105,7 @@ operator - (const iScalar<ltype>& lhs, const iScalar<rtype>& rhs) -> iScalar<dec
  return ret;
 }
 template<class ltype,class rtype,int N>
-strong_inline auto operator - (const iVector<ltype,N>& lhs,const iVector<rtype,N>& rhs) ->iVector<decltype(lhs._internal[0]-rhs._internal[0]),N>
+accelerator_inline auto operator - (const iVector<ltype,N>& lhs,const iVector<rtype,N>& rhs) ->iVector<decltype(lhs._internal[0]-rhs._internal[0]),N>
 {
  typedef iVector<decltype(lhs._internal[0]-rhs._internal[0]),N> ret_t;
  ret_t ret;
@@ -113,7 +113,7 @@ strong_inline auto operator - (const iVector<ltype,N>& lhs,const iVector<rtype,N
  return ret;
 }
 template<class ltype,class rtype,int N>
-strong_inline auto operator - (const iMatrix<ltype,N>& lhs,const iMatrix<rtype,N>& rhs) ->iMatrix<decltype(lhs._internal[0][0]-rhs._internal[0][0]),N>
+accelerator_inline auto operator - (const iMatrix<ltype,N>& lhs,const iMatrix<rtype,N>& rhs) ->iMatrix<decltype(lhs._internal[0][0]-rhs._internal[0][0]),N>
 {
  typedef iMatrix<decltype(lhs._internal[0][0]-rhs._internal[0][0]),N> ret_t;
  ret_t ret;
@@ -121,7 +121,7 @@ strong_inline auto operator - (const iMatrix<ltype,N>& lhs,const iMatrix<rtype,N
  return ret;
 }
 template<class ltype,class rtype,int N>
-strong_inline auto operator - (const iScalar<ltype>& lhs,const iMatrix<rtype,N>& rhs)->iMatrix<decltype(lhs._internal-rhs._internal[0][0]),N>
+accelerator_inline auto operator - (const iScalar<ltype>& lhs,const iMatrix<rtype,N>& rhs)->iMatrix<decltype(lhs._internal-rhs._internal[0][0]),N>
 {
  typedef iMatrix<decltype(lhs._internal-rhs._internal[0][0]),N> ret_t;
  ret_t ret;
@@ -129,7 +129,7 @@ strong_inline auto operator - (const iScalar<ltype>& lhs,const iMatrix<rtype,N>&
  return ret;
 }
 template<class ltype,class rtype,int N>
-strong_inline auto operator - (const iMatrix<ltype,N>& lhs,const iScalar<rtype>& rhs)->iMatrix<decltype(lhs._internal[0][0]-rhs._internal),N>
+accelerator_inline auto operator - (const iMatrix<ltype,N>& lhs,const iScalar<rtype>& rhs)->iMatrix<decltype(lhs._internal[0][0]-rhs._internal),N>
 {
  typedef iMatrix<decltype(lhs._internal[0][0]-rhs._internal),N> ret_t;
  ret_t ret;