1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-04-10 06:00:45 +01:00

Accelerator mark up of entire tensore space for offload

This commit is contained in:
paboyle 2018-01-24 13:27:30 +00:00
parent 69327db9a9
commit 8e99264f40
18 changed files with 614 additions and 661 deletions

View File

@ -36,19 +36,19 @@ NAMESPACE_BEGIN(Grid);
// Ta function for scalar, vector, matrix // Ta function for scalar, vector, matrix
/////////////////////////////////////////////// ///////////////////////////////////////////////
/* /*
inline ComplexF Ta( const ComplexF &arg){ return arg;} accelerator_inline ComplexF Ta( const ComplexF &arg){ return arg;}
inline ComplexD Ta( const ComplexD &arg){ return arg;} accelerator_inline ComplexD Ta( const ComplexD &arg){ return arg;}
inline RealF Ta( const RealF &arg){ return arg;} accelerator_inline RealF Ta( const RealF &arg){ return arg;}
inline RealD Ta( const RealD &arg){ return arg;} accelerator_inline RealD Ta( const RealD &arg){ return arg;}
*/ */
template<class vtype> inline iScalar<vtype> Ta(const iScalar<vtype>&r) template<class vtype> accelerator_inline iScalar<vtype> Ta(const iScalar<vtype>&r)
{ {
iScalar<vtype> ret; iScalar<vtype> ret;
ret._internal = Ta(r._internal); ret._internal = Ta(r._internal);
return ret; return ret;
} }
template<class vtype,int N> inline iVector<vtype,N> Ta(const iVector<vtype,N>&r) template<class vtype,int N> accelerator_inline iVector<vtype,N> Ta(const iVector<vtype,N>&r)
{ {
iVector<vtype,N> ret; iVector<vtype,N> ret;
for(int i=0;i<N;i++){ for(int i=0;i<N;i++){
@ -56,7 +56,7 @@ template<class vtype,int N> inline iVector<vtype,N> Ta(const iVector<vtype,N>&r)
} }
return ret; return ret;
} }
template<class vtype,int N> inline iMatrix<vtype,N> Ta(const iMatrix<vtype,N> &arg) template<class vtype,int N> accelerator_inline iMatrix<vtype,N> Ta(const iMatrix<vtype,N> &arg)
{ {
iMatrix<vtype,N> ret; iMatrix<vtype,N> ret;
@ -73,13 +73,13 @@ template<class vtype,int N> inline iMatrix<vtype,N> Ta(const iMatrix<vtype,N> &a
/////////////////////////////////////////////// ///////////////////////////////////////////////
template<class vtype> inline iScalar<vtype> ProjectOnGroup(const iScalar<vtype>&r) template<class vtype> accelerator_inline iScalar<vtype> ProjectOnGroup(const iScalar<vtype>&r)
{ {
iScalar<vtype> ret; iScalar<vtype> ret;
ret._internal = ProjectOnGroup(r._internal); ret._internal = ProjectOnGroup(r._internal);
return ret; return ret;
} }
template<class vtype,int N> inline iVector<vtype,N> ProjectOnGroup(const iVector<vtype,N>&r) template<class vtype,int N> accelerator_inline iVector<vtype,N> ProjectOnGroup(const iVector<vtype,N>&r)
{ {
iVector<vtype,N> ret; iVector<vtype,N> ret;
for(int i=0;i<N;i++){ for(int i=0;i<N;i++){
@ -88,7 +88,7 @@ template<class vtype,int N> inline iVector<vtype,N> ProjectOnGroup(const iVector
return ret; return ret;
} }
template<class vtype,int N, typename std::enable_if< GridTypeMapper<vtype>::TensorLevel == 0 >::type * =nullptr> template<class vtype,int N, typename std::enable_if< GridTypeMapper<vtype>::TensorLevel == 0 >::type * =nullptr>
inline iMatrix<vtype,N> ProjectOnGroup(const iMatrix<vtype,N> &arg) accelerator_inline iMatrix<vtype,N> ProjectOnGroup(const iMatrix<vtype,N> &arg)
{ {
// need a check for the group type? // need a check for the group type?
iMatrix<vtype,N> ret(arg); iMatrix<vtype,N> ret(arg);

View File

@ -30,24 +30,22 @@ Author: neo <cossu@post.kek.jp>
#define GRID_MATH_ARITH_ADD_H #define GRID_MATH_ARITH_ADD_H
NAMESPACE_BEGIN(Grid); NAMESPACE_BEGIN(Grid);
/////////////////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////// ADD /////////////////////////////////////////// /////////////////////////////////////////// ADD ///////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////////////////
// ADD is simple for now; cannot mix types and straightforward template // ADD is simple for now; cannot mix types and straightforward template
// Scalar +/- Scalar // Scalar +/- Scalar
// Vector +/- Vector // Vector +/- Vector
// Matrix +/- Matrix // Matrix +/- Matrix
template<class vtype,class ltype,class rtype> strong_inline void add(iScalar<vtype> * __restrict__ ret, template<class vtype,class ltype,class rtype> accelerator_inline void add(iScalar<vtype> * __restrict__ ret,
const iScalar<ltype> * __restrict__ lhs, const iScalar<ltype> * __restrict__ lhs,
const iScalar<rtype> * __restrict__ rhs) const iScalar<rtype> * __restrict__ rhs)
{ {
add(&ret->_internal,&lhs->_internal,&rhs->_internal); add(&ret->_internal,&lhs->_internal,&rhs->_internal);
} }
template<class vtype,class ltype,class rtype,int N> strong_inline void add(iVector<vtype,N> * __restrict__ ret, template<class vtype,class ltype,class rtype,int N> accelerator_inline void add(iVector<vtype,N> * __restrict__ ret,
const iVector<ltype,N> * __restrict__ lhs, const iVector<ltype,N> * __restrict__ lhs,
const iVector<rtype,N> * __restrict__ rhs) const iVector<rtype,N> * __restrict__ rhs)
{ {
@ -57,7 +55,7 @@ template<class vtype,class ltype,class rtype,int N> strong_inline void add(iVect
return; return;
} }
template<class vtype,class ltype,class rtype, int N> strong_inline void add(iMatrix<vtype,N> * __restrict__ ret, template<class vtype,class ltype,class rtype, int N> accelerator_inline void add(iMatrix<vtype,N> * __restrict__ ret,
const iMatrix<ltype,N> * __restrict__ lhs, const iMatrix<ltype,N> * __restrict__ lhs,
const iMatrix<rtype,N> * __restrict__ rhs) const iMatrix<rtype,N> * __restrict__ rhs)
{ {
@ -67,7 +65,7 @@ template<class vtype,class ltype,class rtype, int N> strong_inline void add(iMa
}} }}
return; return;
} }
template<class vtype,class ltype,class rtype, int N> strong_inline void add(iMatrix<vtype,N> * __restrict__ ret, template<class vtype,class ltype,class rtype, int N> accelerator_inline void add(iMatrix<vtype,N> * __restrict__ ret,
const iScalar<ltype> * __restrict__ lhs, const iScalar<ltype> * __restrict__ lhs,
const iMatrix<rtype,N> * __restrict__ rhs) const iMatrix<rtype,N> * __restrict__ rhs)
{ {
@ -80,7 +78,7 @@ template<class vtype,class ltype,class rtype, int N> strong_inline void add(iMa
}} }}
return; return;
} }
template<class vtype,class ltype,class rtype, int N> strong_inline void add(iMatrix<vtype,N> * __restrict__ ret, template<class vtype,class ltype,class rtype, int N> accelerator_inline void add(iMatrix<vtype,N> * __restrict__ ret,
const iMatrix<ltype,N> * __restrict__ lhs, const iMatrix<ltype,N> * __restrict__ lhs,
const iScalar<rtype> * __restrict__ rhs) const iScalar<rtype> * __restrict__ rhs)
{ {
@ -97,44 +95,43 @@ template<class vtype,class ltype,class rtype, int N> strong_inline void add(iMa
// + operator for scalar, vector, matrix // + operator for scalar, vector, matrix
template<class ltype,class rtype> template<class ltype,class rtype>
//strong_inline auto operator + (iScalar<ltype>& lhs,iScalar<rtype>&& rhs) -> iScalar<decltype(lhs._internal + rhs._internal)> accelerator_inline auto operator + (const iScalar<ltype>& lhs,const iScalar<rtype>& rhs) -> iScalar<decltype(lhs._internal + rhs._internal)>
strong_inline auto operator + (const iScalar<ltype>& lhs,const iScalar<rtype>& rhs) -> iScalar<decltype(lhs._internal + rhs._internal)>
{ {
typedef iScalar<decltype(lhs._internal+rhs._internal)> ret_t; typedef iScalar<decltype(lhs._internal+rhs._internal)> ret_t;
ret_t ret; ret_t ret ;
add(&ret,&lhs,&rhs); add(&ret,&lhs,&rhs);
return ret; return ret;
} }
template<class ltype,class rtype,int N> template<class ltype,class rtype,int N>
strong_inline auto operator + (const iVector<ltype,N>& lhs,const iVector<rtype,N>& rhs) ->iVector<decltype(lhs._internal[0]+rhs._internal[0]),N> accelerator_inline auto operator + (const iVector<ltype,N>& lhs,const iVector<rtype,N>& rhs) ->iVector<decltype(lhs._internal[0]+rhs._internal[0]),N>
{ {
typedef iVector<decltype(lhs._internal[0]+rhs._internal[0]),N> ret_t; typedef iVector<decltype(lhs._internal[0]+rhs._internal[0]),N> ret_t;
ret_t ret; ret_t ret ;
add(&ret,&lhs,&rhs); add(&ret,&lhs,&rhs);
return ret; return ret;
} }
template<class ltype,class rtype,int N> template<class ltype,class rtype,int N>
strong_inline auto operator + (const iMatrix<ltype,N>& lhs,const iMatrix<rtype,N>& rhs) ->iMatrix<decltype(lhs._internal[0][0]+rhs._internal[0][0]),N> accelerator_inline auto operator + (const iMatrix<ltype,N>& lhs,const iMatrix<rtype,N>& rhs) ->iMatrix<decltype(lhs._internal[0][0]+rhs._internal[0][0]),N>
{ {
typedef iMatrix<decltype(lhs._internal[0][0]+rhs._internal[0][0]),N> ret_t; typedef iMatrix<decltype(lhs._internal[0][0]+rhs._internal[0][0]),N> ret_t;
ret_t ret; ret_t ret ;
add(&ret,&lhs,&rhs); add(&ret,&lhs,&rhs);
return ret; return ret;
} }
template<class ltype,class rtype,int N> template<class ltype,class rtype,int N>
strong_inline auto operator + (const iScalar<ltype>& lhs,const iMatrix<rtype,N>& rhs)->iMatrix<decltype(lhs._internal+rhs._internal[0][0]),N> accelerator_inline auto operator + (const iScalar<ltype>& lhs,const iMatrix<rtype,N>& rhs)->iMatrix<decltype(lhs._internal+rhs._internal[0][0]),N>
{ {
typedef iMatrix<decltype(lhs._internal+rhs._internal[0][0]),N> ret_t; typedef iMatrix<decltype(lhs._internal+rhs._internal[0][0]),N> ret_t;
ret_t ret; ret_t ret ;
add(&ret,&lhs,&rhs); add(&ret,&lhs,&rhs);
return ret; return ret;
} }
template<class ltype,class rtype,int N> template<class ltype,class rtype,int N>
strong_inline auto operator + (const iMatrix<ltype,N>& lhs,const iScalar<rtype>& rhs)->iMatrix<decltype(lhs._internal[0][0]+rhs._internal),N> accelerator_inline auto operator + (const iMatrix<ltype,N>& lhs,const iScalar<rtype>& rhs)->iMatrix<decltype(lhs._internal[0][0]+rhs._internal),N>
{ {
typedef iMatrix<decltype(lhs._internal[0][0]+rhs._internal),N> ret_t; typedef iMatrix<decltype(lhs._internal[0][0]+rhs._internal),N> ret_t;
ret_t ret; ret_t ret ;
add(&ret,&lhs,&rhs); add(&ret,&lhs,&rhs);
return ret; return ret;
} }

View File

@ -47,12 +47,12 @@ NAMESPACE_BEGIN(Grid);
// scal x vec = vec // scal x vec = vec
/////////////////////////// ///////////////////////////
template<class rtype,class vtype,class mtype> template<class rtype,class vtype,class mtype>
strong_inline void mac(iScalar<rtype> * __restrict__ ret,const iScalar<vtype> * __restrict__ lhs,const iScalar<mtype> * __restrict__ rhs) accelerator_inline void mac(iScalar<rtype> * __restrict__ ret,const iScalar<vtype> * __restrict__ lhs,const iScalar<mtype> * __restrict__ rhs)
{ {
mac(&ret->_internal,&lhs->_internal,&rhs->_internal); mac(&ret->_internal,&lhs->_internal,&rhs->_internal);
} }
template<class rrtype,class ltype,class rtype,int N> template<class rrtype,class ltype,class rtype,int N>
strong_inline void mac(iMatrix<rrtype,N> * __restrict__ ret,const iMatrix<ltype,N> * __restrict__ lhs,const iMatrix<rtype,N> * __restrict__ rhs){ accelerator_inline void mac(iMatrix<rrtype,N> * __restrict__ ret,const iMatrix<ltype,N> * __restrict__ lhs,const iMatrix<rtype,N> * __restrict__ rhs){
for(int c3=0;c3<N;c3++){ for(int c3=0;c3<N;c3++){
for(int c1=0;c1<N;c1++){ for(int c1=0;c1<N;c1++){
for(int c2=0;c2<N;c2++){ for(int c2=0;c2<N;c2++){
@ -62,7 +62,7 @@ strong_inline void mac(iMatrix<rrtype,N> * __restrict__ ret,const iMatrix<ltype,
} }
template<class rrtype,class ltype,class rtype,int N> template<class rrtype,class ltype,class rtype,int N>
strong_inline void mac(iMatrix<rrtype,N> * __restrict__ ret,const iMatrix<ltype,N> * __restrict__ lhs,const iScalar<rtype> * __restrict__ rhs){ accelerator_inline void mac(iMatrix<rrtype,N> * __restrict__ ret,const iMatrix<ltype,N> * __restrict__ lhs,const iScalar<rtype> * __restrict__ rhs){
for(int c1=0;c1<N;c1++){ for(int c1=0;c1<N;c1++){
for(int c2=0;c2<N;c2++){ for(int c2=0;c2<N;c2++){
mac(&ret->_internal[c1][c2],&lhs->_internal[c1][c2],&rhs->_internal); mac(&ret->_internal[c1][c2],&lhs->_internal[c1][c2],&rhs->_internal);
@ -70,7 +70,7 @@ strong_inline void mac(iMatrix<rrtype,N> * __restrict__ ret,const iMatrix<ltype,
return; return;
} }
template<class rrtype,class ltype,class rtype,int N> template<class rrtype,class ltype,class rtype,int N>
strong_inline void mac(iMatrix<rrtype,N> * __restrict__ ret,const iScalar<ltype> * __restrict__ lhs,const iMatrix<rtype,N> * __restrict__ rhs){ accelerator_inline void mac(iMatrix<rrtype,N> * __restrict__ ret,const iScalar<ltype> * __restrict__ lhs,const iMatrix<rtype,N> * __restrict__ rhs){
for(int c1=0;c1<N;c1++){ for(int c1=0;c1<N;c1++){
for(int c2=0;c2<N;c2++){ for(int c2=0;c2<N;c2++){
mac(&ret->_internal[c1][c2],&lhs->_internal,&rhs->_internal[c1][c2]); mac(&ret->_internal[c1][c2],&lhs->_internal,&rhs->_internal[c1][c2]);
@ -78,7 +78,7 @@ strong_inline void mac(iMatrix<rrtype,N> * __restrict__ ret,const iScalar<ltype>
return; return;
} }
template<class rrtype,class ltype,class rtype,int N> template<class rrtype,class ltype,class rtype,int N>
strong_inline void mac(iVector<rrtype,N> * __restrict__ ret,const iMatrix<ltype,N> * __restrict__ lhs,const iVector<rtype,N> * __restrict__ rhs) accelerator_inline void mac(iVector<rrtype,N> * __restrict__ ret,const iMatrix<ltype,N> * __restrict__ lhs,const iVector<rtype,N> * __restrict__ rhs)
{ {
for(int c1=0;c1<N;c1++){ for(int c1=0;c1<N;c1++){
for(int c2=0;c2<N;c2++){ for(int c2=0;c2<N;c2++){
@ -87,7 +87,7 @@ strong_inline void mac(iVector<rrtype,N> * __restrict__ ret,const iMatrix<ltype,
return; return;
} }
template<class rrtype,class ltype,class rtype,int N> template<class rrtype,class ltype,class rtype,int N>
strong_inline void mac(iVector<rrtype,N> * __restrict__ ret,const iScalar<ltype> * __restrict__ lhs,const iVector<rtype,N> * __restrict__ rhs) accelerator_inline void mac(iVector<rrtype,N> * __restrict__ ret,const iScalar<ltype> * __restrict__ lhs,const iVector<rtype,N> * __restrict__ rhs)
{ {
for(int c1=0;c1<N;c1++){ for(int c1=0;c1<N;c1++){
mac(&ret->_internal[c1],&lhs->_internal,&rhs->_internal[c1]); mac(&ret->_internal[c1],&lhs->_internal,&rhs->_internal[c1]);
@ -95,7 +95,7 @@ strong_inline void mac(iVector<rrtype,N> * __restrict__ ret,const iScalar<ltype>
return; return;
} }
template<class rrtype,class ltype,class rtype,int N> template<class rrtype,class ltype,class rtype,int N>
strong_inline void mac(iVector<rrtype,N> * __restrict__ ret,const iVector<ltype,N> * __restrict__ lhs,const iScalar<rtype> * __restrict__ rhs) accelerator_inline void mac(iVector<rrtype,N> * __restrict__ ret,const iVector<ltype,N> * __restrict__ lhs,const iScalar<rtype> * __restrict__ rhs)
{ {
for(int c1=0;c1<N;c1++){ for(int c1=0;c1<N;c1++){
mac(&ret->_internal[c1],&lhs->_internal[c1],&rhs->_internal); mac(&ret->_internal[c1],&lhs->_internal[c1],&rhs->_internal);

View File

@ -35,12 +35,12 @@ NAMESPACE_BEGIN(Grid);
/////////////////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////////////////
template<class rtype,class vtype,class mtype> template<class rtype,class vtype,class mtype>
strong_inline void mult(iScalar<rtype> * __restrict__ ret,const iScalar<mtype> * __restrict__ lhs,const iScalar<vtype> * __restrict__ rhs){ accelerator_inline void mult(iScalar<rtype> * __restrict__ ret,const iScalar<mtype> * __restrict__ lhs,const iScalar<vtype> * __restrict__ rhs){
mult(&ret->_internal,&lhs->_internal,&rhs->_internal); mult(&ret->_internal,&lhs->_internal,&rhs->_internal);
} }
template<class rrtype,class ltype,class rtype,int N> template<class rrtype,class ltype,class rtype,int N>
strong_inline void mult(iMatrix<rrtype,N> * __restrict__ ret,const iMatrix<ltype,N> * __restrict__ lhs,const iMatrix<rtype,N> * __restrict__ rhs){ accelerator_inline void mult(iMatrix<rrtype,N> * __restrict__ ret,const iMatrix<ltype,N> * __restrict__ lhs,const iMatrix<rtype,N> * __restrict__ rhs){
for(int c1=0;c1<N;c1++){ for(int c1=0;c1<N;c1++){
for(int c2=0;c2<N;c2++){ for(int c2=0;c2<N;c2++){
mult(&ret->_internal[c1][c2],&lhs->_internal[c1][0],&rhs->_internal[0][c2]); mult(&ret->_internal[c1][c2],&lhs->_internal[c1][0],&rhs->_internal[0][c2]);
@ -57,7 +57,7 @@ strong_inline void mult(iMatrix<rrtype,N> * __restrict__ ret,const iMatrix<ltype
} }
template<class rrtype,class ltype,class rtype,int N> template<class rrtype,class ltype,class rtype,int N>
strong_inline void mult(iMatrix<rrtype,N> * __restrict__ ret,const iMatrix<ltype,N> * __restrict__ lhs,const iScalar<rtype> * __restrict__ rhs){ accelerator_inline void mult(iMatrix<rrtype,N> * __restrict__ ret,const iMatrix<ltype,N> * __restrict__ lhs,const iScalar<rtype> * __restrict__ rhs){
for(int c2=0;c2<N;c2++){ for(int c2=0;c2<N;c2++){
for(int c1=0;c1<N;c1++){ for(int c1=0;c1<N;c1++){
mult(&ret->_internal[c1][c2],&lhs->_internal[c1][c2],&rhs->_internal); mult(&ret->_internal[c1][c2],&lhs->_internal[c1][c2],&rhs->_internal);
@ -66,7 +66,7 @@ strong_inline void mult(iMatrix<rrtype,N> * __restrict__ ret,const iMatrix<ltype
} }
template<class rrtype,class ltype,class rtype, int N> template<class rrtype,class ltype,class rtype, int N>
strong_inline void mult(iMatrix<rrtype,N> * __restrict__ ret,const iScalar<ltype> * __restrict__ lhs,const iMatrix<rtype,N> * __restrict__ rhs){ accelerator_inline void mult(iMatrix<rrtype,N> * __restrict__ ret,const iScalar<ltype> * __restrict__ lhs,const iMatrix<rtype,N> * __restrict__ rhs){
for(int c2=0;c2<N;c2++){ for(int c2=0;c2<N;c2++){
for(int c1=0;c1<N;c1++){ for(int c1=0;c1<N;c1++){
mult(&ret->_internal[c1][c2],&lhs->_internal,&rhs->_internal[c1][c2]); mult(&ret->_internal[c1][c2],&lhs->_internal,&rhs->_internal[c1][c2]);
@ -75,7 +75,7 @@ strong_inline void mult(iMatrix<rrtype,N> * __restrict__ ret,const iScalar<ltype
} }
// Matrix left multiplies vector // Matrix left multiplies vector
template<class rtype,class vtype,class mtype,int N> template<class rtype,class vtype,class mtype,int N>
strong_inline void mult(iVector<rtype,N> * __restrict__ ret,const iMatrix<mtype,N> * __restrict__ lhs,const iVector<vtype,N> * __restrict__ rhs) accelerator_inline void mult(iVector<rtype,N> * __restrict__ ret,const iMatrix<mtype,N> * __restrict__ lhs,const iVector<vtype,N> * __restrict__ rhs)
{ {
for(int c1=0;c1<N;c1++){ for(int c1=0;c1<N;c1++){
mult(&ret->_internal[c1],&lhs->_internal[c1][0],&rhs->_internal[0]); mult(&ret->_internal[c1],&lhs->_internal[c1][0],&rhs->_internal[0]);
@ -86,7 +86,7 @@ strong_inline void mult(iVector<rtype,N> * __restrict__ ret,const iMatrix<mtype,
return; return;
} }
template<class rtype,class vtype,class mtype,int N> template<class rtype,class vtype,class mtype,int N>
strong_inline void mult(iVector<rtype,N> * __restrict__ ret, accelerator_inline void mult(iVector<rtype,N> * __restrict__ ret,
const iScalar<mtype> * __restrict__ lhs, const iScalar<mtype> * __restrict__ lhs,
const iVector<vtype,N> * __restrict__ rhs){ const iVector<vtype,N> * __restrict__ rhs){
for(int c1=0;c1<N;c1++){ for(int c1=0;c1<N;c1++){
@ -94,7 +94,7 @@ strong_inline void mult(iVector<rtype,N> * __restrict__ ret,
} }
} }
template<class rtype,class vtype,class mtype,int N> template<class rtype,class vtype,class mtype,int N>
strong_inline void mult(iVector<rtype,N> * __restrict__ ret, accelerator_inline void mult(iVector<rtype,N> * __restrict__ ret,
const iVector<vtype,N> * __restrict__ rhs, const iVector<vtype,N> * __restrict__ rhs,
const iScalar<mtype> * __restrict__ lhs){ const iScalar<mtype> * __restrict__ lhs){
for(int c1=0;c1<N;c1++){ for(int c1=0;c1<N;c1++){
@ -104,7 +104,7 @@ strong_inline void mult(iVector<rtype,N> * __restrict__ ret,
template<class rtype,class vtype,class mtype,int N> strong_inline template<class rtype,class vtype,class mtype,int N> accelerator_inline
iVector<rtype,N> operator * (const iMatrix<mtype,N>& lhs,const iVector<vtype,N>& rhs) iVector<rtype,N> operator * (const iMatrix<mtype,N>& lhs,const iVector<vtype,N>& rhs)
{ {
iVector<rtype,N> ret; iVector<rtype,N> ret;
@ -112,7 +112,7 @@ iVector<rtype,N> operator * (const iMatrix<mtype,N>& lhs,const iVector<vtype,N>&
return ret; return ret;
} }
template<class rtype,class vtype,class mtype,int N> strong_inline template<class rtype,class vtype,class mtype,int N> accelerator_inline
iVector<rtype,N> operator * (const iScalar<mtype>& lhs,const iVector<vtype,N>& rhs) iVector<rtype,N> operator * (const iScalar<mtype>& lhs,const iVector<vtype,N>& rhs)
{ {
iVector<rtype,N> ret; iVector<rtype,N> ret;
@ -120,7 +120,7 @@ iVector<rtype,N> operator * (const iScalar<mtype>& lhs,const iVector<vtype,N>& r
return ret; return ret;
} }
template<class rtype,class vtype,class mtype,int N> strong_inline template<class rtype,class vtype,class mtype,int N> accelerator_inline
iVector<rtype,N> operator * (const iVector<mtype,N>& lhs,const iScalar<vtype>& rhs) iVector<rtype,N> operator * (const iVector<mtype,N>& lhs,const iScalar<vtype>& rhs)
{ {
iVector<rtype,N> ret; iVector<rtype,N> ret;
@ -131,14 +131,14 @@ iVector<rtype,N> operator * (const iVector<mtype,N>& lhs,const iScalar<vtype>& r
////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////
// Divide by scalar // Divide by scalar
////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////
template<class rtype,class vtype> strong_inline template<class rtype,class vtype> accelerator_inline
iScalar<rtype> operator / (const iScalar<rtype>& lhs,const iScalar<vtype>& rhs) iScalar<rtype> operator / (const iScalar<rtype>& lhs,const iScalar<vtype>& rhs)
{ {
iScalar<rtype> ret; iScalar<rtype> ret;
ret._internal = lhs._internal/rhs._internal; ret._internal = lhs._internal/rhs._internal;
return ret; return ret;
} }
template<class rtype,class vtype,int N> strong_inline template<class rtype,class vtype,int N> accelerator_inline
iVector<rtype,N> operator / (const iVector<rtype,N>& lhs,const iScalar<vtype>& rhs) iVector<rtype,N> operator / (const iVector<rtype,N>& lhs,const iScalar<vtype>& rhs)
{ {
iVector<rtype,N> ret; iVector<rtype,N> ret;
@ -147,7 +147,7 @@ iVector<rtype,N> operator / (const iVector<rtype,N>& lhs,const iScalar<vtype>& r
} }
return ret; return ret;
} }
template<class rtype,class vtype,int N> strong_inline template<class rtype,class vtype,int N> accelerator_inline
iMatrix<rtype,N> operator / (const iMatrix<rtype,N>& lhs,const iScalar<vtype>& rhs) iMatrix<rtype,N> operator / (const iMatrix<rtype,N>& lhs,const iScalar<vtype>& rhs)
{ {
iMatrix<rtype,N> ret; iMatrix<rtype,N> ret;
@ -174,14 +174,14 @@ iMatrix<rtype,N> operator / (const iMatrix<rtype,N>& lhs,const iScalar<vtype>& r
// //
// We can special case scalar_type ?? // We can special case scalar_type ??
template<class l,class r> template<class l,class r>
strong_inline auto operator * (const iScalar<l>& lhs,const iScalar<r>& rhs) -> iScalar<decltype(lhs._internal * rhs._internal)> accelerator_inline auto operator * (const iScalar<l>& lhs,const iScalar<r>& rhs) -> iScalar<decltype(lhs._internal * rhs._internal)>
{ {
typedef iScalar<decltype(lhs._internal*rhs._internal)> ret_t; typedef iScalar<decltype(lhs._internal*rhs._internal)> ret_t;
ret_t ret; ret_t ret;
mult(&ret,&lhs,&rhs); mult(&ret,&lhs,&rhs);
return ret; return ret;
} }
template<class l,class r,int N> strong_inline template<class l,class r,int N> accelerator_inline
auto operator * (const iMatrix<l,N>& lhs,const iMatrix<r,N>& rhs) -> iMatrix<decltype(lhs._internal[0][0]*rhs._internal[0][0]),N> auto operator * (const iMatrix<l,N>& lhs,const iMatrix<r,N>& rhs) -> iMatrix<decltype(lhs._internal[0][0]*rhs._internal[0][0]),N>
{ {
typedef decltype(lhs._internal[0][0]*rhs._internal[0][0]) ret_t; typedef decltype(lhs._internal[0][0]*rhs._internal[0][0]) ret_t;
@ -189,7 +189,7 @@ auto operator * (const iMatrix<l,N>& lhs,const iMatrix<r,N>& rhs) -> iMatrix<dec
mult(&ret,&lhs,&rhs); mult(&ret,&lhs,&rhs);
return ret; return ret;
} }
template<class l,class r, int N> strong_inline template<class l,class r, int N> accelerator_inline
auto operator * (const iMatrix<r,N>& lhs,const iScalar<l>& rhs) -> iMatrix<decltype(lhs._internal[0][0]*rhs._internal),N> auto operator * (const iMatrix<r,N>& lhs,const iScalar<l>& rhs) -> iMatrix<decltype(lhs._internal[0][0]*rhs._internal),N>
{ {
typedef decltype(lhs._internal[0][0]*rhs._internal) ret_t; typedef decltype(lhs._internal[0][0]*rhs._internal) ret_t;
@ -201,7 +201,7 @@ auto operator * (const iMatrix<r,N>& lhs,const iScalar<l>& rhs) -> iMatrix<declt
}} }}
return ret; return ret;
} }
template<class l,class r,int N> strong_inline template<class l,class r,int N> accelerator_inline
auto operator * (const iScalar<l>& lhs,const iMatrix<r,N>& rhs) -> iMatrix<decltype(lhs._internal*rhs._internal[0][0]),N> auto operator * (const iScalar<l>& lhs,const iMatrix<r,N>& rhs) -> iMatrix<decltype(lhs._internal*rhs._internal[0][0]),N>
{ {
typedef decltype(lhs._internal*rhs._internal[0][0]) ret_t; typedef decltype(lhs._internal*rhs._internal[0][0]) ret_t;
@ -212,7 +212,7 @@ auto operator * (const iScalar<l>& lhs,const iMatrix<r,N>& rhs) -> iMatrix<declt
}} }}
return ret; return ret;
} }
template<class l,class r,int N> strong_inline template<class l,class r,int N> accelerator_inline
auto operator * (const iMatrix<l,N>& lhs,const iVector<r,N>& rhs) -> iVector<decltype(lhs._internal[0][0]*rhs._internal[0]),N> auto operator * (const iMatrix<l,N>& lhs,const iVector<r,N>& rhs) -> iVector<decltype(lhs._internal[0][0]*rhs._internal[0]),N>
{ {
typedef decltype(lhs._internal[0][0]*rhs._internal[0]) ret_t; typedef decltype(lhs._internal[0][0]*rhs._internal[0]) ret_t;
@ -225,7 +225,7 @@ auto operator * (const iMatrix<l,N>& lhs,const iVector<r,N>& rhs) -> iVector<dec
} }
return ret; return ret;
} }
template<class l,class r,int N> strong_inline template<class l,class r,int N> accelerator_inline
auto operator * (const iScalar<l>& lhs,const iVector<r,N>& rhs) -> iVector<decltype(lhs._internal*rhs._internal[0]),N> auto operator * (const iScalar<l>& lhs,const iVector<r,N>& rhs) -> iVector<decltype(lhs._internal*rhs._internal[0]),N>
{ {
typedef decltype(lhs._internal*rhs._internal[0]) ret_t; typedef decltype(lhs._internal*rhs._internal[0]) ret_t;
@ -235,7 +235,7 @@ auto operator * (const iScalar<l>& lhs,const iVector<r,N>& rhs) -> iVector<declt
} }
return ret; return ret;
} }
template<class l,class r,int N> strong_inline template<class l,class r,int N> accelerator_inline
auto operator * (const iVector<l,N>& lhs,const iScalar<r>& rhs) -> iVector<decltype(lhs._internal[0]*rhs._internal),N> auto operator * (const iVector<l,N>& lhs,const iScalar<r>& rhs) -> iVector<decltype(lhs._internal[0]*rhs._internal),N>
{ {
typedef decltype(lhs._internal[0]*rhs._internal) ret_t; typedef decltype(lhs._internal[0]*rhs._internal) ret_t;

View File

@ -36,58 +36,58 @@ NAMESPACE_BEGIN(Grid);
////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////
// multiplication by fundamental scalar type // multiplication by fundamental scalar type
template<class l> strong_inline iScalar<l> operator * (const iScalar<l>& lhs,const typename iScalar<l>::scalar_type rhs) template<class l> accelerator_inline iScalar<l> operator * (const iScalar<l>& lhs,const typename iScalar<l>::scalar_type rhs)
{ {
typename iScalar<l>::tensor_reduced srhs; srhs=rhs; typename iScalar<l>::tensor_reduced srhs; srhs=rhs;
return lhs*srhs; return lhs*srhs;
} }
template<class l> strong_inline iScalar<l> operator * (const typename iScalar<l>::scalar_type lhs,const iScalar<l>& rhs) { return rhs*lhs; } template<class l> accelerator_inline iScalar<l> operator * (const typename iScalar<l>::scalar_type lhs,const iScalar<l>& rhs) { return rhs*lhs; }
template<class l,int N> strong_inline iVector<l,N> operator * (const iVector<l,N>& lhs,const typename iScalar<l>::scalar_type rhs) template<class l,int N> accelerator_inline iVector<l,N> operator * (const iVector<l,N>& lhs,const typename iScalar<l>::scalar_type rhs)
{ {
typename iVector<l,N>::tensor_reduced srhs; srhs=rhs; typename iVector<l,N>::tensor_reduced srhs; srhs=rhs;
return lhs*srhs; return lhs*srhs;
} }
template<class l,int N> strong_inline iVector<l,N> operator * (const typename iScalar<l>::scalar_type lhs,const iVector<l,N>& rhs) { return rhs*lhs; } template<class l,int N> accelerator_inline iVector<l,N> operator * (const typename iScalar<l>::scalar_type lhs,const iVector<l,N>& rhs) { return rhs*lhs; }
template<class l,int N> strong_inline iMatrix<l,N> operator * (const iMatrix<l,N>& lhs,const typename iScalar<l>::scalar_type &rhs) template<class l,int N> accelerator_inline iMatrix<l,N> operator * (const iMatrix<l,N>& lhs,const typename iScalar<l>::scalar_type &rhs)
{ {
typename iMatrix<l,N>::tensor_reduced srhs; srhs=rhs; typename iMatrix<l,N>::tensor_reduced srhs; srhs=rhs;
return lhs*srhs; return lhs*srhs;
} }
template<class l,int N> strong_inline iMatrix<l,N> operator * (const typename iScalar<l>::scalar_type & lhs,const iMatrix<l,N>& rhs) { return rhs*lhs; } template<class l,int N> accelerator_inline iMatrix<l,N> operator * (const typename iScalar<l>::scalar_type & lhs,const iMatrix<l,N>& rhs) { return rhs*lhs; }
//////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////
// Double support; cast to "scalar_type" through constructor // Double support; cast to "scalar_type" through constructor
//////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////
template<class l> strong_inline iScalar<l> operator * (const iScalar<l>& lhs,double rhs) template<class l> accelerator_inline iScalar<l> operator * (const iScalar<l>& lhs,double rhs)
{ {
typename iScalar<l>::scalar_type t; t=rhs; typename iScalar<l>::scalar_type t; t=rhs;
typename iScalar<l>::tensor_reduced srhs;srhs=t; typename iScalar<l>::tensor_reduced srhs;srhs=t;
return lhs*srhs; return lhs*srhs;
} }
template<class l> strong_inline iScalar<l> operator * (double lhs,const iScalar<l>& rhs) { return rhs*lhs; } template<class l> accelerator_inline iScalar<l> operator * (double lhs,const iScalar<l>& rhs) { return rhs*lhs; }
template<class l,int N> strong_inline iVector<l,N> operator * (const iVector<l,N>& lhs,double rhs) template<class l,int N> accelerator_inline iVector<l,N> operator * (const iVector<l,N>& lhs,double rhs)
{ {
typename iScalar<l>::scalar_type t;t=rhs; typename iScalar<l>::scalar_type t;t=rhs;
typename iScalar<l>::tensor_reduced srhs;srhs=t; typename iScalar<l>::tensor_reduced srhs;srhs=t;
return lhs*srhs; return lhs*srhs;
} }
template<class l,int N> strong_inline iVector<l,N> operator * (double lhs,const iVector<l,N>& rhs) { return rhs*lhs; } template<class l,int N> accelerator_inline iVector<l,N> operator * (double lhs,const iVector<l,N>& rhs) { return rhs*lhs; }
template<class l,int N> strong_inline iMatrix<l,N> operator * (const iMatrix<l,N>& lhs,double rhs) template<class l,int N> accelerator_inline iMatrix<l,N> operator * (const iMatrix<l,N>& lhs,double rhs)
{ {
typename iScalar<l>::scalar_type t;t=rhs; typename iScalar<l>::scalar_type t;t=rhs;
typename iScalar<l>::tensor_reduced srhs;srhs=t; typename iScalar<l>::tensor_reduced srhs;srhs=t;
return lhs*srhs; return lhs*srhs;
} }
template<class l,int N> strong_inline iMatrix<l,N> operator * (double lhs,const iMatrix<l,N>& rhs) { return rhs*lhs; } template<class l,int N> accelerator_inline iMatrix<l,N> operator * (double lhs,const iMatrix<l,N>& rhs) { return rhs*lhs; }
//////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////
// Complex support; cast to "scalar_type" through constructor // Complex support; cast to "scalar_type" through constructor
//////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////
template<class l> strong_inline iScalar<l> operator * (const iScalar<l>& lhs,ComplexD rhs) template<class l> accelerator_inline iScalar<l> operator * (const iScalar<l>& lhs,ComplexD rhs)
{ {
typename iScalar<l>::scalar_type t;t=rhs; typename iScalar<l>::scalar_type t;t=rhs;
typename iScalar<l>::tensor_reduced srhs;srhs=t; typename iScalar<l>::tensor_reduced srhs;srhs=t;
@ -95,131 +95,131 @@ template<class l> strong_inline iScalar<l> operator * (const iScalar<l>& lhs,Com
return lhs*srhs; return lhs*srhs;
} }
template<class l> strong_inline iScalar<l> operator * (ComplexD lhs,const iScalar<l>& rhs) { return rhs*lhs; } template<class l> accelerator_inline iScalar<l> operator * (ComplexD lhs,const iScalar<l>& rhs) { return rhs*lhs; }
template<class l,int N> strong_inline iVector<l,N> operator * (const iVector<l,N>& lhs,ComplexD rhs) template<class l,int N> accelerator_inline iVector<l,N> operator * (const iVector<l,N>& lhs,ComplexD rhs)
{ {
typename iScalar<l>::scalar_type t;t=rhs; typename iScalar<l>::scalar_type t;t=rhs;
typename iScalar<l>::tensor_reduced srhs;srhs=t; typename iScalar<l>::tensor_reduced srhs;srhs=t;
return lhs*srhs; return lhs*srhs;
} }
template<class l,int N> strong_inline iVector<l,N> operator * (ComplexD lhs,const iVector<l,N>& rhs) { return rhs*lhs; } template<class l,int N> accelerator_inline iVector<l,N> operator * (ComplexD lhs,const iVector<l,N>& rhs) { return rhs*lhs; }
template<class l,int N> strong_inline iMatrix<l,N> operator * (const iMatrix<l,N>& lhs,ComplexD rhs) template<class l,int N> accelerator_inline iMatrix<l,N> operator * (const iMatrix<l,N>& lhs,ComplexD rhs)
{ {
typename iScalar<l>::scalar_type t;t=rhs; typename iScalar<l>::scalar_type t;t=rhs;
typename iScalar<l>::tensor_reduced srhs;srhs=t; typename iScalar<l>::tensor_reduced srhs;srhs=t;
return lhs*srhs; return lhs*srhs;
} }
template<class l,int N> strong_inline iMatrix<l,N> operator * (ComplexD lhs,const iMatrix<l,N>& rhs) { return rhs*lhs; } template<class l,int N> accelerator_inline iMatrix<l,N> operator * (ComplexD lhs,const iMatrix<l,N>& rhs) { return rhs*lhs; }
//////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////
// Integer support; cast to "scalar_type" through constructor // Integer support; cast to "scalar_type" through constructor
//////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////
template<class l> strong_inline iScalar<l> operator * (const iScalar<l>& lhs,Integer rhs) template<class l> accelerator_inline iScalar<l> operator * (const iScalar<l>& lhs,Integer rhs)
{ {
typename iScalar<l>::scalar_type t; t=rhs; typename iScalar<l>::scalar_type t; t=rhs;
typename iScalar<l>::tensor_reduced srhs; srhs=t; typename iScalar<l>::tensor_reduced srhs; srhs=t;
return lhs*srhs; return lhs*srhs;
} }
template<class l> strong_inline iScalar<l> operator * (Integer lhs,const iScalar<l>& rhs) { return rhs*lhs; } template<class l> accelerator_inline iScalar<l> operator * (Integer lhs,const iScalar<l>& rhs) { return rhs*lhs; }
template<class l,int N> strong_inline iVector<l,N> operator * (const iVector<l,N>& lhs,Integer rhs) template<class l,int N> accelerator_inline iVector<l,N> operator * (const iVector<l,N>& lhs,Integer rhs)
{ {
typename iScalar<l>::scalar_type t;t=rhs; typename iScalar<l>::scalar_type t;t=rhs;
typename iScalar<l>::tensor_reduced srhs;srhs=t; typename iScalar<l>::tensor_reduced srhs;srhs=t;
return lhs*srhs; return lhs*srhs;
} }
template<class l,int N> strong_inline iVector<l,N> operator * (Integer lhs,const iVector<l,N>& rhs) { return rhs*lhs; } template<class l,int N> accelerator_inline iVector<l,N> operator * (Integer lhs,const iVector<l,N>& rhs) { return rhs*lhs; }
template<class l,int N> strong_inline iMatrix<l,N> operator * (const iMatrix<l,N>& lhs,Integer rhs) template<class l,int N> accelerator_inline iMatrix<l,N> operator * (const iMatrix<l,N>& lhs,Integer rhs)
{ {
typename iScalar<l>::scalar_type t;t=rhs; typename iScalar<l>::scalar_type t;t=rhs;
typename iScalar<l>::tensor_reduced srhs;srhs=t; typename iScalar<l>::tensor_reduced srhs;srhs=t;
return lhs*srhs; return lhs*srhs;
} }
template<class l,int N> strong_inline iMatrix<l,N> operator * (Integer lhs,const iMatrix<l,N>& rhs) { return rhs*lhs; } template<class l,int N> accelerator_inline iMatrix<l,N> operator * (Integer lhs,const iMatrix<l,N>& rhs) { return rhs*lhs; }
/////////////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////////////
// addition by fundamental scalar type applies to matrix(down diag) and scalar // addition by fundamental scalar type applies to matrix(down diag) and scalar
/////////////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////////////
template<class l> strong_inline iScalar<l> operator + (const iScalar<l>& lhs,const typename iScalar<l>::scalar_type rhs) template<class l> accelerator_inline iScalar<l> operator + (const iScalar<l>& lhs,const typename iScalar<l>::scalar_type rhs)
{ {
typename iScalar<l>::tensor_reduced srhs; srhs=rhs; typename iScalar<l>::tensor_reduced srhs; srhs=rhs;
return lhs+srhs; return lhs+srhs;
} }
template<class l> strong_inline iScalar<l> operator + (const typename iScalar<l>::scalar_type lhs,const iScalar<l>& rhs) { return rhs+lhs; } template<class l> accelerator_inline iScalar<l> operator + (const typename iScalar<l>::scalar_type lhs,const iScalar<l>& rhs) { return rhs+lhs; }
template<class l,int N> strong_inline iMatrix<l,N> operator + (const iMatrix<l,N>& lhs,const typename iScalar<l>::scalar_type rhs) template<class l,int N> accelerator_inline iMatrix<l,N> operator + (const iMatrix<l,N>& lhs,const typename iScalar<l>::scalar_type rhs)
{ {
typename iMatrix<l,N>::tensor_reduced srhs; srhs=rhs; typename iMatrix<l,N>::tensor_reduced srhs; srhs=rhs;
return lhs+srhs; return lhs+srhs;
} }
template<class l,int N> strong_inline iMatrix<l,N> operator + (const typename iScalar<l>::scalar_type lhs,const iMatrix<l,N>& rhs) { return rhs+lhs; } template<class l,int N> accelerator_inline iMatrix<l,N> operator + (const typename iScalar<l>::scalar_type lhs,const iMatrix<l,N>& rhs) { return rhs+lhs; }
//////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////
// Double support; cast to "scalar_type" through constructor // Double support; cast to "scalar_type" through constructor
//////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////
template<class l> strong_inline iScalar<l> operator + (const iScalar<l>& lhs,double rhs) template<class l> accelerator_inline iScalar<l> operator + (const iScalar<l>& lhs,double rhs)
{ {
typename iScalar<l>::scalar_type t; t=rhs; typename iScalar<l>::scalar_type t; t=rhs;
typename iScalar<l>::tensor_reduced srhs; srhs=t; typename iScalar<l>::tensor_reduced srhs; srhs=t;
return lhs+srhs; return lhs+srhs;
} }
template<class l> strong_inline iScalar<l> operator + (double lhs,const iScalar<l>& rhs) { return rhs+lhs; } template<class l> accelerator_inline iScalar<l> operator + (double lhs,const iScalar<l>& rhs) { return rhs+lhs; }
template<class l,int N> strong_inline iMatrix<l,N> operator + (const iMatrix<l,N>& lhs,double rhs) template<class l,int N> accelerator_inline iMatrix<l,N> operator + (const iMatrix<l,N>& lhs,double rhs)
{ {
typename iScalar<l>::scalar_type t;t=rhs; typename iScalar<l>::scalar_type t;t=rhs;
typename iScalar<l>::tensor_reduced srhs;srhs=t; typename iScalar<l>::tensor_reduced srhs;srhs=t;
return lhs+srhs; return lhs+srhs;
} }
template<class l,int N> strong_inline iMatrix<l,N> operator + (double lhs,const iMatrix<l,N>& rhs) { return rhs+lhs; } template<class l,int N> accelerator_inline iMatrix<l,N> operator + (double lhs,const iMatrix<l,N>& rhs) { return rhs+lhs; }
// Integer support cast to scalar type through constructor // Integer support cast to scalar type through constructor
template<class l> strong_inline iScalar<l> operator + (const iScalar<l>& lhs,Integer rhs) template<class l> accelerator_inline iScalar<l> operator + (const iScalar<l>& lhs,Integer rhs)
{ {
typename iScalar<l>::scalar_type t; t=rhs; typename iScalar<l>::scalar_type t; t=rhs;
typename iScalar<l>::tensor_reduced srhs; srhs=t; typename iScalar<l>::tensor_reduced srhs; srhs=t;
return lhs+srhs; return lhs+srhs;
} }
template<class l> strong_inline iScalar<l> operator + (Integer lhs,const iScalar<l>& rhs) { return rhs+lhs; } template<class l> accelerator_inline iScalar<l> operator + (Integer lhs,const iScalar<l>& rhs) { return rhs+lhs; }
template<class l,int N> strong_inline iMatrix<l,N> operator + (const iMatrix<l,N>& lhs,Integer rhs) template<class l,int N> accelerator_inline iMatrix<l,N> operator + (const iMatrix<l,N>& lhs,Integer rhs)
{ {
typename iScalar<l>::scalar_type t;t=rhs; typename iScalar<l>::scalar_type t;t=rhs;
typename iScalar<l>::tensor_reduced srhs;srhs=t; typename iScalar<l>::tensor_reduced srhs;srhs=t;
return lhs+srhs; return lhs+srhs;
} }
template<class l,int N> strong_inline iMatrix<l,N> operator + (Integer lhs,const iMatrix<l,N>& rhs) { return rhs+lhs; } template<class l,int N> accelerator_inline iMatrix<l,N> operator + (Integer lhs,const iMatrix<l,N>& rhs) { return rhs+lhs; }
/////////////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////////////
// subtraction of fundamental scalar type applies to matrix(down diag) and scalar // subtraction of fundamental scalar type applies to matrix(down diag) and scalar
/////////////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////////////
template<class l> strong_inline iScalar<l> operator - (const iScalar<l>& lhs,const typename iScalar<l>::scalar_type rhs) template<class l> accelerator_inline iScalar<l> operator - (const iScalar<l>& lhs,const typename iScalar<l>::scalar_type rhs)
{ {
typename iScalar<l>::tensor_reduced srhs; srhs=rhs; typename iScalar<l>::tensor_reduced srhs; srhs=rhs;
return lhs-srhs; return lhs-srhs;
} }
template<class l> strong_inline iScalar<l> operator - (const typename iScalar<l>::scalar_type lhs,const iScalar<l>& rhs) template<class l> accelerator_inline iScalar<l> operator - (const typename iScalar<l>::scalar_type lhs,const iScalar<l>& rhs)
{ {
typename iScalar<l>::tensor_reduced slhs;slhs=lhs; typename iScalar<l>::tensor_reduced slhs;slhs=lhs;
return slhs-rhs; return slhs-rhs;
} }
template<class l,int N> strong_inline iMatrix<l,N> operator - (const iMatrix<l,N>& lhs,const typename iScalar<l>::scalar_type rhs) template<class l,int N> accelerator_inline iMatrix<l,N> operator - (const iMatrix<l,N>& lhs,const typename iScalar<l>::scalar_type rhs)
{ {
typename iScalar<l>::tensor_reduced srhs; srhs=rhs; typename iScalar<l>::tensor_reduced srhs; srhs=rhs;
return lhs-srhs; return lhs-srhs;
} }
template<class l,int N> strong_inline iMatrix<l,N> operator - (const typename iScalar<l>::scalar_type lhs,const iMatrix<l,N>& rhs) template<class l,int N> accelerator_inline iMatrix<l,N> operator - (const typename iScalar<l>::scalar_type lhs,const iMatrix<l,N>& rhs)
{ {
typename iScalar<l>::tensor_reduced slhs;slhs=lhs; typename iScalar<l>::tensor_reduced slhs;slhs=lhs;
return slhs-rhs; return slhs-rhs;
@ -228,26 +228,26 @@ template<class l,int N> strong_inline iMatrix<l,N> operator - (const typename iS
//////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////
// Double support; cast to "scalar_type" through constructor // Double support; cast to "scalar_type" through constructor
//////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////
template<class l> strong_inline iScalar<l> operator - (const iScalar<l>& lhs,double rhs) template<class l> accelerator_inline iScalar<l> operator - (const iScalar<l>& lhs,double rhs)
{ {
typename iScalar<l>::scalar_type t; t=rhs; typename iScalar<l>::scalar_type t; t=rhs;
typename iScalar<l>::tensor_reduced srhs; srhs=t; typename iScalar<l>::tensor_reduced srhs; srhs=t;
return lhs-srhs; return lhs-srhs;
} }
template<class l> strong_inline iScalar<l> operator - (double lhs,const iScalar<l>& rhs) template<class l> accelerator_inline iScalar<l> operator - (double lhs,const iScalar<l>& rhs)
{ {
typename iScalar<l>::scalar_type t(lhs); typename iScalar<l>::scalar_type t(lhs);
typename iScalar<l>::tensor_reduced slhs;slhs=t; typename iScalar<l>::tensor_reduced slhs;slhs=t;
return slhs-rhs; return slhs-rhs;
} }
template<class l,int N> strong_inline iMatrix<l,N> operator - (const iMatrix<l,N>& lhs,double rhs) template<class l,int N> accelerator_inline iMatrix<l,N> operator - (const iMatrix<l,N>& lhs,double rhs)
{ {
typename iScalar<l>::scalar_type t;t=rhs; typename iScalar<l>::scalar_type t;t=rhs;
typename iScalar<l>::tensor_reduced srhs;srhs=t; typename iScalar<l>::tensor_reduced srhs;srhs=t;
return lhs-srhs; return lhs-srhs;
} }
template<class l,int N> strong_inline iMatrix<l,N> operator - (double lhs,const iMatrix<l,N>& rhs) template<class l,int N> accelerator_inline iMatrix<l,N> operator - (double lhs,const iMatrix<l,N>& rhs)
{ {
typename iScalar<l>::scalar_type t(lhs); typename iScalar<l>::scalar_type t(lhs);
typename iScalar<l>::tensor_reduced slhs;slhs=t; typename iScalar<l>::tensor_reduced slhs;slhs=t;
@ -257,25 +257,25 @@ template<class l,int N> strong_inline iMatrix<l,N> operator - (double lhs,const
//////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////
// Integer support; cast to "scalar_type" through constructor // Integer support; cast to "scalar_type" through constructor
//////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////
template<class l> strong_inline iScalar<l> operator - (const iScalar<l>& lhs,Integer rhs) template<class l> accelerator_inline iScalar<l> operator - (const iScalar<l>& lhs,Integer rhs)
{ {
typename iScalar<l>::scalar_type t; t=rhs; typename iScalar<l>::scalar_type t; t=rhs;
typename iScalar<l>::tensor_reduced srhs; srhs=t; typename iScalar<l>::tensor_reduced srhs; srhs=t;
return lhs-srhs; return lhs-srhs;
} }
template<class l> strong_inline iScalar<l> operator - (Integer lhs,const iScalar<l>& rhs) template<class l> accelerator_inline iScalar<l> operator - (Integer lhs,const iScalar<l>& rhs)
{ {
typename iScalar<l>::scalar_type t;t=lhs; typename iScalar<l>::scalar_type t;t=lhs;
typename iScalar<l>::tensor_reduced slhs;slhs=t; typename iScalar<l>::tensor_reduced slhs;slhs=t;
return slhs-rhs; return slhs-rhs;
} }
template<class l,int N> strong_inline iMatrix<l,N> operator - (const iMatrix<l,N>& lhs,Integer rhs) template<class l,int N> accelerator_inline iMatrix<l,N> operator - (const iMatrix<l,N>& lhs,Integer rhs)
{ {
typename iScalar<l>::scalar_type t;t=rhs; typename iScalar<l>::scalar_type t;t=rhs;
typename iScalar<l>::tensor_reduced srhs;srhs=t; typename iScalar<l>::tensor_reduced srhs;srhs=t;
return lhs-srhs; return lhs-srhs;
} }
template<class l,int N> strong_inline iMatrix<l,N> operator - (Integer lhs,const iMatrix<l,N>& rhs) template<class l,int N> accelerator_inline iMatrix<l,N> operator - (Integer lhs,const iMatrix<l,N>& rhs)
{ {
typename iScalar<l>::scalar_type t;t=lhs; typename iScalar<l>::scalar_type t;t=lhs;
typename iScalar<l>::tensor_reduced slhs;slhs=t; typename iScalar<l>::tensor_reduced slhs;slhs=t;

View File

@ -41,14 +41,14 @@ NAMESPACE_BEGIN(Grid);
// Vector +/- Vector // Vector +/- Vector
// Matrix +/- Matrix // Matrix +/- Matrix
// Matrix /- scalar // Matrix /- scalar
template<class vtype,class ltype,class rtype> strong_inline void sub(iScalar<vtype> * __restrict__ ret, template<class vtype,class ltype,class rtype> accelerator_inline void sub(iScalar<vtype> * __restrict__ ret,
const iScalar<ltype> * __restrict__ lhs, const iScalar<ltype> * __restrict__ lhs,
const iScalar<rtype> * __restrict__ rhs) const iScalar<rtype> * __restrict__ rhs)
{ {
sub(&ret->_internal,&lhs->_internal,&rhs->_internal); sub(&ret->_internal,&lhs->_internal,&rhs->_internal);
} }
template<class vtype,class ltype,class rtype,int N> strong_inline void sub(iVector<vtype,N> * __restrict__ ret, template<class vtype,class ltype,class rtype,int N> accelerator_inline void sub(iVector<vtype,N> * __restrict__ ret,
const iVector<ltype,N> * __restrict__ lhs, const iVector<ltype,N> * __restrict__ lhs,
const iVector<rtype,N> * __restrict__ rhs) const iVector<rtype,N> * __restrict__ rhs)
{ {
@ -57,7 +57,7 @@ template<class vtype,class ltype,class rtype,int N> strong_inline void sub(iVect
} }
return; return;
} }
template<class vtype,class ltype,class rtype, int N> strong_inline void sub(iMatrix<vtype,N> * __restrict__ ret, template<class vtype,class ltype,class rtype, int N> accelerator_inline void sub(iMatrix<vtype,N> * __restrict__ ret,
const iMatrix<ltype,N> * __restrict__ lhs, const iMatrix<ltype,N> * __restrict__ lhs,
const iMatrix<rtype,N> * __restrict__ rhs){ const iMatrix<rtype,N> * __restrict__ rhs){
for(int c2=0;c2<N;c2++){ for(int c2=0;c2<N;c2++){
@ -66,7 +66,7 @@ template<class vtype,class ltype,class rtype, int N> strong_inline void sub(iMat
}} }}
return; return;
} }
template<class vtype,class ltype,class rtype, int N> strong_inline void sub(iMatrix<vtype,N> * __restrict__ ret, template<class vtype,class ltype,class rtype, int N> accelerator_inline void sub(iMatrix<vtype,N> * __restrict__ ret,
const iScalar<ltype> * __restrict__ lhs, const iScalar<ltype> * __restrict__ lhs,
const iMatrix<rtype,N> * __restrict__ rhs){ const iMatrix<rtype,N> * __restrict__ rhs){
for(int c2=0;c2<N;c2++){ for(int c2=0;c2<N;c2++){
@ -82,7 +82,7 @@ template<class vtype,class ltype,class rtype, int N> strong_inline void sub(iMat
}} }}
return; return;
} }
template<class vtype,class ltype,class rtype, int N> strong_inline void sub(iMatrix<vtype,N> * __restrict__ ret, template<class vtype,class ltype,class rtype, int N> accelerator_inline void sub(iMatrix<vtype,N> * __restrict__ ret,
const iMatrix<ltype,N> * __restrict__ lhs, const iMatrix<ltype,N> * __restrict__ lhs,
const iScalar<rtype> * __restrict__ rhs){ const iScalar<rtype> * __restrict__ rhs){
for(int c2=0;c2<N;c2++){ for(int c2=0;c2<N;c2++){
@ -96,7 +96,7 @@ template<class vtype,class ltype,class rtype, int N> strong_inline void sub(iMat
} }
// - operator for scalar, vector, matrix // - operator for scalar, vector, matrix
template<class ltype,class rtype> strong_inline auto template<class ltype,class rtype> accelerator_inline auto
operator - (const iScalar<ltype>& lhs, const iScalar<rtype>& rhs) -> iScalar<decltype(lhs._internal - rhs._internal)> operator - (const iScalar<ltype>& lhs, const iScalar<rtype>& rhs) -> iScalar<decltype(lhs._internal - rhs._internal)>
{ {
typedef iScalar<decltype(lhs._internal-rhs._internal)> ret_t; typedef iScalar<decltype(lhs._internal-rhs._internal)> ret_t;
@ -105,7 +105,7 @@ operator - (const iScalar<ltype>& lhs, const iScalar<rtype>& rhs) -> iScalar<dec
return ret; return ret;
} }
template<class ltype,class rtype,int N> template<class ltype,class rtype,int N>
strong_inline auto operator - (const iVector<ltype,N>& lhs,const iVector<rtype,N>& rhs) ->iVector<decltype(lhs._internal[0]-rhs._internal[0]),N> accelerator_inline auto operator - (const iVector<ltype,N>& lhs,const iVector<rtype,N>& rhs) ->iVector<decltype(lhs._internal[0]-rhs._internal[0]),N>
{ {
typedef iVector<decltype(lhs._internal[0]-rhs._internal[0]),N> ret_t; typedef iVector<decltype(lhs._internal[0]-rhs._internal[0]),N> ret_t;
ret_t ret; ret_t ret;
@ -113,7 +113,7 @@ strong_inline auto operator - (const iVector<ltype,N>& lhs,const iVector<rtype,N
return ret; return ret;
} }
template<class ltype,class rtype,int N> template<class ltype,class rtype,int N>
strong_inline auto operator - (const iMatrix<ltype,N>& lhs,const iMatrix<rtype,N>& rhs) ->iMatrix<decltype(lhs._internal[0][0]-rhs._internal[0][0]),N> accelerator_inline auto operator - (const iMatrix<ltype,N>& lhs,const iMatrix<rtype,N>& rhs) ->iMatrix<decltype(lhs._internal[0][0]-rhs._internal[0][0]),N>
{ {
typedef iMatrix<decltype(lhs._internal[0][0]-rhs._internal[0][0]),N> ret_t; typedef iMatrix<decltype(lhs._internal[0][0]-rhs._internal[0][0]),N> ret_t;
ret_t ret; ret_t ret;
@ -121,7 +121,7 @@ strong_inline auto operator - (const iMatrix<ltype,N>& lhs,const iMatrix<rtype,N
return ret; return ret;
} }
template<class ltype,class rtype,int N> template<class ltype,class rtype,int N>
strong_inline auto operator - (const iScalar<ltype>& lhs,const iMatrix<rtype,N>& rhs)->iMatrix<decltype(lhs._internal-rhs._internal[0][0]),N> accelerator_inline auto operator - (const iScalar<ltype>& lhs,const iMatrix<rtype,N>& rhs)->iMatrix<decltype(lhs._internal-rhs._internal[0][0]),N>
{ {
typedef iMatrix<decltype(lhs._internal-rhs._internal[0][0]),N> ret_t; typedef iMatrix<decltype(lhs._internal-rhs._internal[0][0]),N> ret_t;
ret_t ret; ret_t ret;
@ -129,7 +129,7 @@ strong_inline auto operator - (const iScalar<ltype>& lhs,const iMatrix<rtype,N>&
return ret; return ret;
} }
template<class ltype,class rtype,int N> template<class ltype,class rtype,int N>
strong_inline auto operator - (const iMatrix<ltype,N>& lhs,const iScalar<rtype>& rhs)->iMatrix<decltype(lhs._internal[0][0]-rhs._internal),N> accelerator_inline auto operator - (const iMatrix<ltype,N>& lhs,const iScalar<rtype>& rhs)->iMatrix<decltype(lhs._internal[0][0]-rhs._internal),N>
{ {
typedef iMatrix<decltype(lhs._internal[0][0]-rhs._internal),N> ret_t; typedef iMatrix<decltype(lhs._internal[0][0]-rhs._internal),N> ret_t;
ret_t ret; ret_t ret;

View File

@ -65,126 +65,111 @@ public:
enum { TensorLevel = GridTypeMapper<vtype>::TensorLevel + 1 }; enum { TensorLevel = GridTypeMapper<vtype>::TensorLevel + 1 };
// Scalar no action // Scalar no action
// template<int Level> using tensor_reduce_level = typename accelerator iScalar() = default;
// iScalar<GridTypeMapper<vtype>::tensor_reduce_level<Level> >;
iScalar() = default;
/*
iScalar(const iScalar<vtype> &copyme)=default;
iScalar(iScalar<vtype> &&copyme)=default;
iScalar<vtype> & operator= (const iScalar<vtype> &copyme) = default;
iScalar<vtype> & operator= (iScalar<vtype> &&copyme) = default;
*/
// template<int N=0> friend accelerator_inline void zeroit(iScalar<vtype> &that){
// iScalar(EnableIf<isSIMDvectorized<vector_type>, vector_type> s) : _internal(s){}; // recurse down and hit the constructor for vector_type
iScalar(scalar_type s) : _internal(s){}; // recurse down and hit the constructor for vector_type
iScalar(const Zero &z) { *this = zero; };
iScalar<vtype> &operator=(const Zero &hero) {
zeroit(*this);
return *this;
}
friend strong_inline void vstream(iScalar<vtype> &out,
const iScalar<vtype> &in) {
vstream(out._internal, in._internal);
}
friend strong_inline void vbroadcast(iScalar<vtype> &out,const iScalar<vtype> &in,int lane){
vbroadcast(out._internal,in._internal,lane);
}
friend strong_inline void zeroit(iScalar<vtype> &that){
zeroit(that._internal); zeroit(that._internal);
} }
friend strong_inline void prefetch(iScalar<vtype> &that) {
accelerator_inline iScalar(scalar_type s) : _internal(s){}; // recurse down and hit the constructor for vector_type
accelerator_inline iScalar(const Zero &z) { zeroit(*this); };
accelerator_inline iScalar<vtype> &operator=(const Zero &hero) {
zeroit(*this); return *this;
}
friend accelerator_inline void vstream(iScalar<vtype> &out, const iScalar<vtype> &in) {
vstream(out._internal, in._internal);
}
friend accelerator_inline void vbroadcast(iScalar<vtype> &out,const iScalar<vtype> &in,int lane){
vbroadcast(out._internal,in._internal,lane);
}
friend accelerator_inline void prefetch(iScalar<vtype> &that) {
prefetch(that._internal); prefetch(that._internal);
} }
friend strong_inline void permute(iScalar<vtype> &out, friend accelerator_inline void permute(iScalar<vtype> &out, const iScalar<vtype> &in, int permutetype) {
const iScalar<vtype> &in, int permutetype) {
permute(out._internal, in._internal, permutetype); permute(out._internal, in._internal, permutetype);
} }
friend strong_inline void rotate(iScalar<vtype> &out,const iScalar<vtype> &in,int rot){ friend accelerator_inline void rotate(iScalar<vtype> &out,const iScalar<vtype> &in,int rot){
rotate(out._internal,in._internal,rot); rotate(out._internal,in._internal,rot);
} }
friend strong_inline void exchange(iScalar<vtype> &out1,iScalar<vtype> &out2, friend accelerator_inline void exchange(iScalar<vtype> &out1,iScalar<vtype> &out2,
const iScalar<vtype> &in1,const iScalar<vtype> &in2,int type){ const iScalar<vtype> &in1,const iScalar<vtype> &in2,int type)
exchange(out1._internal,out2._internal, {
in1._internal, in2._internal,type); exchange(out1._internal,out2._internal,in1._internal, in2._internal,type);
} }
// Unary negation // Unary negation
friend strong_inline iScalar<vtype> operator-(const iScalar<vtype> &r) { friend accelerator_inline iScalar<vtype> operator-(const iScalar<vtype> &r) {
iScalar<vtype> ret; iScalar<vtype> ret;
ret._internal = -r._internal; ret._internal = -r._internal;
return ret; return ret;
} }
// *=,+=,-= operators inherit from corresponding "*,-,+" behaviour // *=,+=,-= operators inherit from corresponding "*,-,+" behaviour
strong_inline iScalar<vtype> &operator*=(const iScalar<vtype> &r) { accelerator_inline iScalar<vtype> &operator*=(const iScalar<vtype> &r) {
*this = (*this) * r; *this = (*this) * r;
return *this; return *this;
} }
strong_inline iScalar<vtype> &operator-=(const iScalar<vtype> &r) { accelerator_inline iScalar<vtype> &operator-=(const iScalar<vtype> &r) {
*this = (*this) - r; *this = (*this) - r;
return *this; return *this;
} }
strong_inline iScalar<vtype> &operator+=(const iScalar<vtype> &r) { accelerator_inline iScalar<vtype> &operator+=(const iScalar<vtype> &r) {
*this = (*this) + r; *this = (*this) + r;
return *this; return *this;
} }
strong_inline vtype &operator()(void) { return _internal; } accelerator_inline vtype &operator()(void) { return _internal; }
strong_inline const vtype &operator()(void) const { return _internal; } accelerator_inline const vtype &operator()(void) const { return _internal; }
// Type casts meta programmed, must be pure scalar to match TensorRemove // Type casts meta programmed, must be pure scalar to match TensorRemove
template <class U = vtype, class V = scalar_type, IfComplex<V> = 0, IfNotSimd<U> = 0> template <class U = vtype, class V = scalar_type, IfComplex<V> = 0, IfNotSimd<U> = 0> accelerator_inline
operator ComplexF() const { operator ComplexF() const {
return (TensorRemove(_internal)); return (TensorRemove(_internal));
}; }
template <class U = vtype, class V = scalar_type, IfComplex<V> = 0, IfNotSimd<U> = 0> template <class U = vtype, class V = scalar_type, IfComplex<V> = 0, IfNotSimd<U> = 0> accelerator_inline
operator ComplexD() const { operator ComplexD() const {
return (TensorRemove(_internal)); return (TensorRemove(_internal));
}; }
// template<class U=vtype,class V=scalar_type,IfComplex<V> = 0,IfNotSimd<U> = template <class U = vtype, class V = scalar_type, IfReal<V> = 0,IfNotSimd<U> = 0> accelerator_inline
// 0> operator RealD () const { return(real(TensorRemove(_internal))); }
template <class U = vtype, class V = scalar_type, IfReal<V> = 0,IfNotSimd<U> = 0>
operator RealD() const { operator RealD() const {
return TensorRemove(_internal); return TensorRemove(_internal);
} }
template <class U = vtype, class V = scalar_type, IfInteger<V> = 0, IfNotSimd<U> = 0> template <class U = vtype, class V = scalar_type, IfInteger<V> = 0, IfNotSimd<U> = 0> accelerator_inline
operator Integer() const { operator Integer() const {
return Integer(TensorRemove(_internal)); return Integer(TensorRemove(_internal));
} }
// convert from a something to a scalar via constructor of something arg // convert from a something to a scalar via constructor of something arg
template <class T, typename std::enable_if<!isGridTensor<T>::value, T>::type * = nullptr> template <class T, typename std::enable_if<!isGridTensor<T>::value, T>::type * = nullptr>
strong_inline iScalar<vtype> operator=(T arg) { accelerator_inline iScalar<vtype> operator=(T arg) {
_internal = arg; _internal = arg;
return *this; return *this;
} }
// Convert elements // Convert elements
template <class ttype> template <class ttype>
strong_inline iScalar<vtype> operator=(iScalar<ttype> &&arg) { accelerator_inline iScalar<vtype> operator=(iScalar<ttype> &&arg) {
_internal = arg._internal; _internal = arg._internal;
return *this; return *this;
} }
// Host only
friend std::ostream &operator<<(std::ostream &stream,const iScalar<vtype> &o) { friend std::ostream &operator<<(std::ostream &stream,const iScalar<vtype> &o) {
stream << "S {" << o._internal << "}"; stream << "S {" << o._internal << "}";
return stream; return stream;
}; };
}; };
/////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////
// Allows to turn scalar<scalar<scalar<double>>>> back to double. // Allows to turn scalar<scalar<scalar<double>>>> back to double.
/////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////
template <class T> template <class T>
strong_inline typename std::enable_if<!isGridTensor<T>::value, T>::type accelerator_inline typename std::enable_if<!isGridTensor<T>::value, T>::type
TensorRemove(T arg) { TensorRemove(T arg) {
return arg; return arg;
} }
template <class vtype> template <class vtype>
strong_inline auto TensorRemove(iScalar<vtype> arg) accelerator_inline auto TensorRemove(iScalar<vtype> arg)
-> decltype(TensorRemove(arg._internal)) { -> decltype(TensorRemove(arg._internal)) {
return TensorRemove(arg._internal); return TensorRemove(arg._internal);
} }
@ -210,88 +195,80 @@ public:
// get double precision version // get double precision version
typedef iVector<typename GridTypeMapper<vtype>::DoublePrecision, N> DoublePrecision; typedef iVector<typename GridTypeMapper<vtype>::DoublePrecision, N> DoublePrecision;
template <class T, typename std::enable_if<!isGridTensor<T>::value, T>::type template <class T, typename std::enable_if<!isGridTensor<T>::value, T>::type * = nullptr>
* = nullptr> accelerator_inline auto operator=(T arg) -> iVector<vtype, N> {
strong_inline auto operator=(T arg) -> iVector<vtype, N> {
zeroit(*this); zeroit(*this);
for (int i = 0; i < N; i++) _internal[i] = arg; for (int i = 0; i < N; i++) _internal[i] = arg;
return *this; return *this;
} }
enum { TensorLevel = GridTypeMapper<vtype>::TensorLevel + 1 }; enum { TensorLevel = GridTypeMapper<vtype>::TensorLevel + 1 };
iVector(const Zero &z) { *this = zero; }; accelerator_inline iVector(const Zero &z) { zeroit(*this); };
iVector() = default; accelerator iVector() = default;
/*
iVector(const iVector<vtype,N> &copyme)=default;
iVector(iVector<vtype,N> &&copyme)=default;
iVector<vtype,N> & operator= (const iVector<vtype,N> &copyme) = default;
iVector<vtype,N> & operator= (iVector<vtype,N> &&copyme) = default;
*/
iVector<vtype, N> &operator=(const Zero &hero) { accelerator_inline iVector<vtype, N> &operator=(const Zero &hero) {
zeroit(*this); zeroit(*this);
return *this; return *this;
} }
friend strong_inline void zeroit(iVector<vtype, N> &that) { friend accelerator_inline void zeroit(iVector<vtype, N> &that) {
for (int i = 0; i < N; i++) { for (int i = 0; i < N; i++) {
zeroit(that._internal[i]); zeroit(that._internal[i]);
} }
} }
friend strong_inline void prefetch(iVector<vtype, N> &that) { friend accelerator_inline void prefetch(iVector<vtype, N> &that) {
for (int i = 0; i < N; i++) prefetch(that._internal[i]); for (int i = 0; i < N; i++) prefetch(that._internal[i]);
} }
friend strong_inline void vstream(iVector<vtype, N> &out, friend accelerator_inline void vstream(iVector<vtype, N> &out, const iVector<vtype, N> &in) {
const iVector<vtype, N> &in) {
for (int i = 0; i < N; i++) { for (int i = 0; i < N; i++) {
vstream(out._internal[i], in._internal[i]); vstream(out._internal[i], in._internal[i]);
} }
} }
friend strong_inline void vbroadcast(iVector<vtype,N> &out,const iVector<vtype,N> &in,int lane){ friend accelerator_inline void vbroadcast(iVector<vtype,N> &out,const iVector<vtype,N> &in,int lane){
for(int i=0;i<N;i++){ for(int i=0;i<N;i++){
vbroadcast(out._internal[i],in._internal[i],lane); vbroadcast(out._internal[i],in._internal[i],lane);
} }
} }
friend strong_inline void permute(iVector<vtype,N> &out,const iVector<vtype,N> &in,int permutetype){ friend accelerator_inline void permute(iVector<vtype,N> &out,const iVector<vtype,N> &in,int permutetype){
for(int i=0;i<N;i++){ for(int i=0;i<N;i++){
permute(out._internal[i],in._internal[i],permutetype); permute(out._internal[i],in._internal[i],permutetype);
} }
} }
friend strong_inline void rotate(iVector<vtype,N> &out,const iVector<vtype,N> &in,int rot){ friend accelerator_inline void rotate(iVector<vtype,N> &out,const iVector<vtype,N> &in,int rot){
for(int i=0;i<N;i++){ for(int i=0;i<N;i++){
rotate(out._internal[i],in._internal[i],rot); rotate(out._internal[i],in._internal[i],rot);
} }
} }
friend strong_inline void exchange(iVector<vtype,N> &out1,iVector<vtype,N> &out2, friend accelerator_inline void exchange(iVector<vtype,N> &out1,iVector<vtype,N> &out2,
const iVector<vtype,N> &in1,const iVector<vtype,N> &in2,int type){ const iVector<vtype,N> &in1,const iVector<vtype,N> &in2,int type){
for(int i=0;i<N;i++){ for(int i=0;i<N;i++){
exchange(out1._internal[i],out2._internal[i], exchange(out1._internal[i],out2._internal[i],in1._internal[i], in2._internal[i],type);
in1._internal[i], in2._internal[i],type);
} }
} }
// Unary negation // Unary negation
friend strong_inline iVector<vtype, N> operator-(const iVector<vtype, N> &r) { friend accelerator_inline iVector<vtype, N> operator-(const iVector<vtype, N> &r) {
iVector<vtype, N> ret; iVector<vtype, N> ret;
for (int i = 0; i < N; i++) ret._internal[i] = -r._internal[i]; for (int i = 0; i < N; i++) ret._internal[i] = -r._internal[i];
return ret; return ret;
} }
// *=,+=,-= operators inherit from corresponding "*,-,+" behaviour // *=,+=,-= operators inherit from corresponding "*,-,+" behaviour
strong_inline iVector<vtype, N> &operator*=(const iScalar<vtype> &r) { accelerator_inline iVector<vtype, N> &operator*=(const iScalar<vtype> &r) {
*this = (*this) * r; *this = (*this) * r;
return *this; return *this;
} }
strong_inline iVector<vtype, N> &operator-=(const iVector<vtype, N> &r) { accelerator_inline iVector<vtype, N> &operator-=(const iVector<vtype, N> &r) {
*this = (*this) - r; *this = (*this) - r;
return *this; return *this;
} }
strong_inline iVector<vtype, N> &operator+=(const iVector<vtype, N> &r) { accelerator_inline iVector<vtype, N> &operator+=(const iVector<vtype, N> &r) {
*this = (*this) + r; *this = (*this) + r;
return *this; return *this;
} }
strong_inline vtype &operator()(int i) { return _internal[i]; } accelerator_inline vtype &operator()(int i) { return _internal[i]; }
strong_inline const vtype &operator()(int i) const { return _internal[i]; } accelerator_inline const vtype &operator()(int i) const { return _internal[i]; }
friend std::ostream &operator<<(std::ostream &stream,
const iVector<vtype, N> &o) { // Host
friend std::ostream &operator<<(std::ostream &stream, const iVector<vtype, N> &o) {
stream << "V<" << N << ">{"; stream << "V<" << N << ">{";
for (int i = 0; i < N; i++) { for (int i = 0; i < N; i++) {
stream << o._internal[i]; stream << o._internal[i];
@ -300,9 +277,6 @@ public:
stream << "}"; stream << "}";
return stream; return stream;
}; };
// strong_inline vtype && operator ()(int i) {
// return _internal[i];
// }
}; };
template <class vtype, int N> template <class vtype, int N>
@ -330,147 +304,137 @@ public:
enum { TensorLevel = GridTypeMapper<vtype>::TensorLevel + 1 }; enum { TensorLevel = GridTypeMapper<vtype>::TensorLevel + 1 };
iMatrix(const Zero &z) { *this = zero; }; accelerator_inline iMatrix(const Zero &z) { zeroit(*this); };
iMatrix() = default; accelerator iMatrix() = default;
iMatrix &operator=(const iMatrix &rhs) { accelerator_inline iMatrix &operator=(const iMatrix &rhs) {
for (int i = 0; i < N; i++) for (int i = 0; i < N; i++)
for (int j = 0; j < N; j++) vstream(_internal[i][j], rhs._internal[i][j]); for (int j = 0; j < N; j++)
vstream(_internal[i][j], rhs._internal[i][j]);
return *this; return *this;
}; };
iMatrix(scalar_type s) { accelerator_inline iMatrix(scalar_type s) {
(*this) = s; (*this) = s;
}; // recurse down and hit the constructor for vector_type }; // recurse down and hit the constructor for vector_type
/* accelerator_inline iMatrix<vtype, N> &operator=(const Zero &hero) {
iMatrix(const iMatrix<vtype,N> &copyme)=default;
iMatrix(iMatrix<vtype,N> &&copyme)=default;
iMatrix<vtype,N> & operator= (const iMatrix<vtype,N> &copyme) = default;
iMatrix<vtype,N> & operator= (iMatrix<vtype,N> &&copyme) = default;
*/
iMatrix<vtype, N> &operator=(const Zero &hero) {
zeroit(*this); zeroit(*this);
return *this; return *this;
} }
template <class T, typename std::enable_if<!isGridTensor<T>::value, T>::type template <class T, typename std::enable_if<!isGridTensor<T>::value, T>::type * = nullptr>
* = nullptr> accelerator_inline auto operator=(T arg) -> iMatrix<vtype, N> {
strong_inline auto operator=(T arg) -> iMatrix<vtype, N> {
zeroit(*this); zeroit(*this);
for (int i = 0; i < N; i++) _internal[i][i] = arg; for (int i = 0; i < N; i++) _internal[i][i] = arg;
return *this; return *this;
} }
friend strong_inline void zeroit(iMatrix<vtype,N> &that){ friend accelerator_inline void zeroit(iMatrix<vtype,N> &that){
for(int i=0;i<N;i++){ for(int i=0;i<N;i++){
for(int j=0;j<N;j++){ for(int j=0;j<N;j++){
zeroit(that._internal[i][j]); zeroit(that._internal[i][j]);
}} }}
} }
friend strong_inline void prefetch(iMatrix<vtype,N> &that){ friend accelerator_inline void prefetch(iMatrix<vtype,N> &that){
for(int i=0;i<N;i++) for(int i=0;i<N;i++) {
for(int j=0;j<N;j++) for(int j=0;j<N;j++) {
prefetch(that._internal[i][j]); prefetch(that._internal[i][j]);
}
friend strong_inline void vstream(iMatrix<vtype,N> &out,const iMatrix<vtype,N> &in){
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
vstream(out._internal[i][j],in._internal[i][j]);
}} }}
} }
friend strong_inline void vbroadcast(iMatrix<vtype,N> &out,const iMatrix<vtype,N> &in,int lane){ friend accelerator_inline void vstream(iMatrix<vtype,N> &out,const iMatrix<vtype,N> &in){
for(int i=0;i<N;i++){ for(int i=0;i<N;i++){
for(int j=0;j<N;j++){ for(int j=0;j<N;j++){
vbroadcast(out._internal[i][j],in._internal[i][j],lane); vstream(out._internal[i][j],in._internal[i][j]);
}} }}
} }
friend accelerator_inline void vbroadcast(iMatrix<vtype,N> &out,const iMatrix<vtype,N> &in,int lane){
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
vbroadcast(out._internal[i][j],in._internal[i][j],lane);
}}
}
friend strong_inline void permute(iMatrix<vtype,N> &out,const iMatrix<vtype,N> &in,int permutetype){ friend accelerator_inline void permute(iMatrix<vtype,N> &out,const iMatrix<vtype,N> &in,int permutetype){
for(int i=0;i<N;i++){ for(int i=0;i<N;i++){
for(int j=0;j<N;j++){ for(int j=0;j<N;j++){
permute(out._internal[i][j],in._internal[i][j],permutetype); permute(out._internal[i][j],in._internal[i][j],permutetype);
}} }}
} }
friend strong_inline void rotate(iMatrix<vtype,N> &out,const iMatrix<vtype,N> &in,int rot){ friend accelerator_inline void rotate(iMatrix<vtype,N> &out,const iMatrix<vtype,N> &in,int rot){
for(int i=0;i<N;i++){ for(int i=0;i<N;i++){
for(int j=0;j<N;j++){ for(int j=0;j<N;j++){
rotate(out._internal[i][j],in._internal[i][j],rot); rotate(out._internal[i][j],in._internal[i][j],rot);
}} }}
}
friend strong_inline void exchange(iMatrix<vtype,N> &out1,iMatrix<vtype,N> &out2,
const iMatrix<vtype,N> &in1,const iMatrix<vtype,N> &in2,int type){
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
exchange(out1._internal[i][j],out2._internal[i][j],
in1._internal[i][j], in2._internal[i][j],type);
}}
}
// Unary negation
friend strong_inline iMatrix<vtype, N> operator-(const iMatrix<vtype, N> &r) {
iMatrix<vtype, N> ret;
for (int i = 0; i < N; i++) {
for (int j = 0; j < N; j++) {
ret._internal[i][j] = -r._internal[i][j];
}
} }
return ret; friend accelerator_inline void exchange(iMatrix<vtype,N> &out1,iMatrix<vtype,N> &out2,
} const iMatrix<vtype,N> &in1,const iMatrix<vtype,N> &in2,int type){
// *=,+=,-= operators inherit from corresponding "*,-,+" behaviour for(int i=0;i<N;i++){
template <class T> for(int j=0;j<N;j++){
strong_inline iMatrix<vtype, N> &operator*=(const T &r) { exchange(out1._internal[i][j],out2._internal[i][j],in1._internal[i][j], in2._internal[i][j],type);
*this = (*this) * r; }}
return *this; }
}
template <class T> // Unary negation
strong_inline iMatrix<vtype, N> &operator-=(const T &r) { friend accelerator_inline iMatrix<vtype, N> operator-(const iMatrix<vtype, N> &r) {
*this = (*this) - r; iMatrix<vtype, N> ret;
return *this; for (int i = 0; i < N; i++) {
} for (int j = 0; j < N; j++) {
template <class T> ret._internal[i][j] = -r._internal[i][j];
strong_inline iMatrix<vtype, N> &operator+=(const T &r) { }}
*this = (*this) + r; return ret;
return *this; }
} // *=,+=,-= operators inherit from corresponding "*,-,+" behaviour
template <class T>
accelerator_inline iMatrix<vtype, N> &operator*=(const T &r) {
*this = (*this) * r;
return *this;
}
template <class T>
accelerator_inline iMatrix<vtype, N> &operator-=(const T &r) {
*this = (*this) - r;
return *this;
}
template <class T>
accelerator_inline iMatrix<vtype, N> &operator+=(const T &r) {
*this = (*this) + r;
return *this;
}
// returns an lvalue reference // returns an lvalue reference
strong_inline vtype &operator()(int i, int j) { return _internal[i][j]; } accelerator_inline vtype &operator()(int i, int j) { return _internal[i][j]; }
strong_inline const vtype &operator()(int i, int j) const { accelerator_inline const vtype &operator()(int i, int j) const {
return _internal[i][j]; return _internal[i][j];
} }
friend std::ostream &operator<<(std::ostream &stream,
const iMatrix<vtype, N> &o) { // Host function only
stream << "M<" << N << ">{"; friend std::ostream &operator<<(std::ostream &stream, const iMatrix<vtype, N> &o) {
for (int i = 0; i < N; i++) { stream << "M<" << N << ">{";
stream << "{"; for (int i = 0; i < N; i++) {
for (int j = 0; j < N; j++) { stream << "{";
stream << o._internal[i][j]; for (int j = 0; j < N; j++) {
if (i < N - 1) stream << ","; stream << o._internal[i][j];
if (i < N - 1) stream << ",";
}
stream << "}";
if (i != N - 1) stream << "\n\t\t";
} }
stream << "}"; stream << "}";
if (i != N - 1) stream << "\n\t\t"; return stream;
} };
stream << "}";
return stream;
}; };
// strong_inline vtype && operator ()(int i,int j) { template <class v> accelerator_inline
// return _internal[i][j];
// }
};
template <class v>
void vprefetch(const iScalar<v> &vv) { void vprefetch(const iScalar<v> &vv) {
vprefetch(vv._internal); vprefetch(vv._internal);
} }
template <class v, int N> template <class v, int N> accelerator_inline
void vprefetch(const iVector<v, N> &vv) { void vprefetch(const iVector<v, N> &vv) {
for (int i = 0; i < N; i++) { for (int i = 0; i < N; i++) {
vprefetch(vv._internal[i]); vprefetch(vv._internal[i]);
} }
} }
template <class v, int N> template <class v, int N> accelerator_inline
void vprefetch(const iMatrix<v, N> &vv) { void vprefetch(const iMatrix<v, N> &vv) {
for (int i = 0; i < N; i++) { for (int i = 0; i < N; i++) {
for (int j = 0; j < N; j++) { for (int j = 0; j < N; j++) {
@ -482,6 +446,3 @@ void vprefetch(const iMatrix<v, N> &vv) {
NAMESPACE_END(Grid); NAMESPACE_END(Grid);
#endif #endif

View File

@ -33,12 +33,12 @@ NAMESPACE_BEGIN(Grid);
/////////////////////////////////////////////// ///////////////////////////////////////////////
// Determinant function for scalar, vector, matrix // Determinant function for scalar, vector, matrix
/////////////////////////////////////////////// ///////////////////////////////////////////////
inline ComplexF Determinant( const ComplexF &arg){ return arg;} accelerator_inline ComplexF Determinant( const ComplexF &arg){ return arg;}
inline ComplexD Determinant( const ComplexD &arg){ return arg;} accelerator_inline ComplexD Determinant( const ComplexD &arg){ return arg;}
inline RealF Determinant( const RealF &arg){ return arg;} accelerator_inline RealF Determinant( const RealF &arg){ return arg;}
inline RealD Determinant( const RealD &arg){ return arg;} accelerator_inline RealD Determinant( const RealD &arg){ return arg;}
template<class vtype> inline auto Determinant(const iScalar<vtype>&r) -> iScalar<decltype(Determinant(r._internal))> template<class vtype> accelerator_inline auto Determinant(const iScalar<vtype>&r) -> iScalar<decltype(Determinant(r._internal))>
{ {
iScalar<decltype(Determinant(r._internal))> ret; iScalar<decltype(Determinant(r._internal))> ret;
ret._internal = Determinant(r._internal); ret._internal = Determinant(r._internal);
@ -46,7 +46,7 @@ template<class vtype> inline auto Determinant(const iScalar<vtype>&r) -> iScalar
} }
template<class vtype,int N, typename std::enable_if< GridTypeMapper<vtype>::TensorLevel == 0 >::type * =nullptr> template<class vtype,int N, typename std::enable_if< GridTypeMapper<vtype>::TensorLevel == 0 >::type * =nullptr>
inline iScalar<vtype> Determinant(const iMatrix<vtype,N> &arg) accelerator_inline iScalar<vtype> Determinant(const iMatrix<vtype,N> &arg)
{ {
iMatrix<vtype,N> ret(arg); iMatrix<vtype,N> ret(arg);
iScalar<vtype> det = vtype(1.0); iScalar<vtype> det = vtype(1.0);

View File

@ -37,14 +37,14 @@ NAMESPACE_BEGIN(Grid);
/////////////////////////////////////////////// ///////////////////////////////////////////////
template<class vtype> inline iScalar<vtype> Exponentiate(const iScalar<vtype>&r, RealD alpha , Integer Nexp = DEFAULT_MAT_EXP) template<class vtype> accelerator_inline iScalar<vtype> Exponentiate(const iScalar<vtype>&r, RealD alpha , Integer Nexp = DEFAULT_MAT_EXP)
{ {
iScalar<vtype> ret; iScalar<vtype> ret;
ret._internal = Exponentiate(r._internal, alpha, Nexp); ret._internal = Exponentiate(r._internal, alpha, Nexp);
return ret; return ret;
} }
template<class vtype, int N> inline iVector<vtype, N> Exponentiate(const iVector<vtype,N>&r, RealD alpha , Integer Nexp = DEFAULT_MAT_EXP) template<class vtype, int N> accelerator_inline iVector<vtype, N> Exponentiate(const iVector<vtype,N>&r, RealD alpha , Integer Nexp = DEFAULT_MAT_EXP)
{ {
iVector<vtype, N> ret; iVector<vtype, N> ret;
for (int i = 0; i < N; i++) for (int i = 0; i < N; i++)
@ -56,7 +56,7 @@ template<class vtype, int N> inline iVector<vtype, N> Exponentiate(const iVector
// Specialisation: Cayley-Hamilton exponential for SU(3) // Specialisation: Cayley-Hamilton exponential for SU(3)
template<class vtype, typename std::enable_if< GridTypeMapper<vtype>::TensorLevel == 0>::type * =nullptr> template<class vtype, typename std::enable_if< GridTypeMapper<vtype>::TensorLevel == 0>::type * =nullptr>
inline iMatrix<vtype,3> Exponentiate(const iMatrix<vtype,3> &arg, RealD alpha , Integer Nexp = DEFAULT_MAT_EXP ) accelerator_inline iMatrix<vtype,3> Exponentiate(const iMatrix<vtype,3> &arg, RealD alpha , Integer Nexp = DEFAULT_MAT_EXP )
{ {
// for SU(3) 2x faster than the std implementation using Nexp=12 // for SU(3) 2x faster than the std implementation using Nexp=12
// notice that it actually computes // notice that it actually computes
@ -120,7 +120,7 @@ inline iMatrix<vtype,3> Exponentiate(const iMatrix<vtype,3> &arg, RealD alpha ,
// General exponential // General exponential
template<class vtype,int N, typename std::enable_if< GridTypeMapper<vtype>::TensorLevel == 0 >::type * =nullptr> template<class vtype,int N, typename std::enable_if< GridTypeMapper<vtype>::TensorLevel == 0 >::type * =nullptr>
inline iMatrix<vtype,N> Exponentiate(const iMatrix<vtype,N> &arg, RealD alpha , Integer Nexp = DEFAULT_MAT_EXP ) accelerator_inline iMatrix<vtype,N> Exponentiate(const iMatrix<vtype,N> &arg, RealD alpha , Integer Nexp = DEFAULT_MAT_EXP )
{ {
// notice that it actually computes // notice that it actually computes
// exp ( input matrix ) // exp ( input matrix )

View File

@ -40,10 +40,9 @@ namespace Grid{
//////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////
// Extract/merge a fundamental vector type, to pointer array with offset // Extract/merge a fundamental vector type, to pointer array with offset
//////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////
template<class vsimd,class scalar> template<class vsimd,class scalar>
inline void extract(typename std::enable_if<!isGridTensor<vsimd>::value, const vsimd >::type * y, strong_inline void extract(typename std::enable_if<!isGridTensor<vsimd>::value, const vsimd >::type * y,
std::vector<scalar *> &extracted,int offset){ std::vector<scalar *> &extracted,int offset){
// FIXME: bounce off memory is painful // FIXME: bounce off memory is painful
static const int Nsimd=sizeof(vsimd)/sizeof(scalar); static const int Nsimd=sizeof(vsimd)/sizeof(scalar);
int Nextr=extracted.size(); int Nextr=extracted.size();
@ -58,8 +57,8 @@ namespace Grid{
// Merge simd vector from array of scalars to pointer array with offset // Merge simd vector from array of scalars to pointer array with offset
//////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////
template<class vsimd,class scalar> template<class vsimd,class scalar>
inline void merge(typename std::enable_if<!isGridTensor<vsimd>::value, vsimd >::type * y, strong_inline void merge(typename std::enable_if<!isGridTensor<vsimd>::value, vsimd >::type * y,
std::vector<scalar *> &extracted,int offset){ std::vector<scalar *> &extracted,int offset){
static const int Nsimd=sizeof(vsimd)/sizeof(scalar); static const int Nsimd=sizeof(vsimd)/sizeof(scalar);
@ -79,7 +78,7 @@ namespace Grid{
// Extract a fundamental vector type to scalar array // Extract a fundamental vector type to scalar array
//////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////
template<class vsimd,class scalar> template<class vsimd,class scalar>
inline void extract(typename std::enable_if<!isGridTensor<vsimd>::value, const vsimd >::type &y,std::vector<scalar> &extracted){ strong_inline void extract(typename std::enable_if<!isGridTensor<vsimd>::value, const vsimd >::type &y,std::vector<scalar> &extracted){
int Nextr=extracted.size(); int Nextr=extracted.size();
int Nsimd=vsimd::Nsimd(); int Nsimd=vsimd::Nsimd();
@ -109,7 +108,7 @@ namespace Grid{
// Merge simd vector from array of scalars // Merge simd vector from array of scalars
//////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////
template<class vsimd,class scalar> template<class vsimd,class scalar>
inline void merge(typename std::enable_if<!isGridTensor<vsimd>::value, vsimd >::type &y,std::vector<scalar> &extracted){ strong_inline void merge(typename std::enable_if<!isGridTensor<vsimd>::value, vsimd >::type &y,std::vector<scalar> &extracted){
int Nextr=extracted.size(); int Nextr=extracted.size();
static const int Nsimd=vsimd::Nsimd(); static const int Nsimd=vsimd::Nsimd();
int s=Nsimd/Nextr; int s=Nsimd/Nextr;
@ -125,15 +124,13 @@ namespace Grid{
//////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////
// Extract to contiguous array scalar object // Extract to contiguous array scalar object
//////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////
template<class vobj> inline void extract(const vobj &vec,std::vector<typename vobj::scalar_object> &extracted) template<class vobj> strong_inline void extract(const vobj &vec,std::vector<typename vobj::scalar_object> &extracted)
{ {
typedef typename vobj::scalar_type scalar_type ; typedef typename vobj::scalar_type scalar_type ;
typedef typename vobj::vector_type vector_type ; typedef typename vobj::vector_type vector_type ;
static const int Nsimd=sizeof(vector_type)/sizeof(scalar_type);
static const int words=sizeof(vobj)/sizeof(vector_type); static const int words=sizeof(vobj)/sizeof(vector_type);
int Nextr=extracted.size(); int Nextr=extracted.size();
int s=Nsimd/Nextr;
std::vector<scalar_type *> pointers(Nextr); std::vector<scalar_type *> pointers(Nextr);
for(int i=0;i<Nextr;i++) for(int i=0;i<Nextr;i++)
@ -147,7 +144,7 @@ namespace Grid{
//////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////
// Extract to a bunch of scalar object pointers, with offset // Extract to a bunch of scalar object pointers, with offset
//////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////
template<class vobj> inline template<class vobj> strong_inline
void extract(const vobj &vec,std::vector<typename vobj::scalar_object *> &extracted, int offset) void extract(const vobj &vec,std::vector<typename vobj::scalar_object *> &extracted, int offset)
{ {
typedef typename vobj::scalar_type scalar_type ; typedef typename vobj::scalar_type scalar_type ;
@ -171,7 +168,7 @@ namespace Grid{
//////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////
// Extract to a bunch of scalar object pointers of different scalar type, with offset. Useful for precision change // Extract to a bunch of scalar object pointers of different scalar type, with offset. Useful for precision change
//////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////
template<class vobj, class sobj> inline template<class vobj, class sobj> strong_inline
void extract1(const vobj &vec,std::vector<sobj*> &extracted, int offset) void extract1(const vobj &vec,std::vector<sobj*> &extracted, int offset)
{ {
typedef typename vobj::scalar_type vobj_scalar_type ; typedef typename vobj::scalar_type vobj_scalar_type ;
@ -198,17 +195,15 @@ namespace Grid{
//////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////
// Merge a contiguous array of scalar objects // Merge a contiguous array of scalar objects
//////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////
template<class vobj> inline template<class vobj> strong_inline
void merge(vobj &vec,std::vector<typename vobj::scalar_object> &extracted) void merge(vobj &vec,std::vector<typename vobj::scalar_object> &extracted)
{ {
typedef typename vobj::scalar_type scalar_type ; typedef typename vobj::scalar_type scalar_type ;
typedef typename vobj::vector_type vector_type ; typedef typename vobj::vector_type vector_type ;
static const int Nsimd=sizeof(vector_type)/sizeof(scalar_type);
static const int words=sizeof(vobj)/sizeof(vector_type); static const int words=sizeof(vobj)/sizeof(vector_type);
int Nextr = extracted.size(); int Nextr = extracted.size();
int splat=Nsimd/Nextr;
std::vector<scalar_type *> pointers(Nextr); std::vector<scalar_type *> pointers(Nextr);
for(int i=0;i<Nextr;i++) for(int i=0;i<Nextr;i++)
@ -223,7 +218,7 @@ namespace Grid{
//////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////
// Merge a bunch of different scalar object pointers, with offset // Merge a bunch of different scalar object pointers, with offset
//////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////
template<class vobj> inline template<class vobj> strong_inline
void merge(vobj &vec,std::vector<typename vobj::scalar_object *> &extracted,int offset) void merge(vobj &vec,std::vector<typename vobj::scalar_object *> &extracted,int offset)
{ {
typedef typename vobj::scalar_type scalar_type ; typedef typename vobj::scalar_type scalar_type ;
@ -250,7 +245,7 @@ namespace Grid{
} }
} }
template<class vobj> inline void merge1(vobj &vec,std::vector<typename vobj::scalar_object *> &extracted,int offset) template<class vobj> strong_inline void merge1(vobj &vec,std::vector<typename vobj::scalar_object *> &extracted,int offset)
{ {
typedef typename vobj::scalar_type scalar_type ; typedef typename vobj::scalar_type scalar_type ;
typedef typename vobj::vector_type vector_type ; typedef typename vobj::vector_type vector_type ;
@ -268,7 +263,7 @@ namespace Grid{
}} }}
} }
template<class vobj> inline void merge2(vobj &vec,std::vector<typename vobj::scalar_object *> &extracted,int offset) template<class vobj> strong_inline void merge2(vobj &vec,std::vector<typename vobj::scalar_object *> &extracted,int offset)
{ {
typedef typename vobj::scalar_type scalar_type ; typedef typename vobj::scalar_type scalar_type ;
typedef typename vobj::vector_type vector_type ; typedef typename vobj::vector_type vector_type ;

View File

@ -38,10 +38,10 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
NAMESPACE_BEGIN(Grid); NAMESPACE_BEGIN(Grid);
/* Needed? /* Needed?
template<int Level> inline ComplexF traceIndex(const ComplexF arg) { return arg;} template<int Level> accelerator_inline ComplexF traceIndex(const ComplexF arg) { return arg;}
template<int Level> inline ComplexD traceIndex(const ComplexD arg) { return arg;} template<int Level> accelerator_inline ComplexD traceIndex(const ComplexD arg) { return arg;}
template<int Level> inline RealF traceIndex(const RealF arg) { return arg;} template<int Level> accelerator_inline RealF traceIndex(const RealF arg) { return arg;}
template<int Level> inline RealD traceIndex(const RealD arg) { return arg;} template<int Level> accelerator_inline RealD traceIndex(const RealD arg) { return arg;}
*/ */
template<int Level> template<int Level>
class TensorIndexRecursion { class TensorIndexRecursion {
@ -51,42 +51,42 @@ public:
//////////////////////////////////////////////////// ////////////////////////////////////////////////////
// Type Queries // Type Queries
//////////////////////////////////////////////////// ////////////////////////////////////////////////////
template<class vtype> static inline int indexRank(const iScalar<vtype> tmp) { return TensorIndexRecursion<Level-1>::indexRank(tmp._internal); } template<class vtype> static accelerator_inline int indexRank(const iScalar<vtype> tmp) { return TensorIndexRecursion<Level-1>::indexRank(tmp._internal); }
template<class vtype,int N> static inline int indexRank(const iVector<vtype,N> tmp){ return TensorIndexRecursion<Level-1>::indexRank(tmp._internal[0]); } template<class vtype,int N> static accelerator_inline int indexRank(const iVector<vtype,N> tmp){ return TensorIndexRecursion<Level-1>::indexRank(tmp._internal[0]); }
template<class vtype,int N> static inline int indexRank(const iMatrix<vtype,N> tmp){ return TensorIndexRecursion<Level-1>::indexRank(tmp._internal[0][0]); } template<class vtype,int N> static accelerator_inline int indexRank(const iMatrix<vtype,N> tmp){ return TensorIndexRecursion<Level-1>::indexRank(tmp._internal[0][0]); }
template<class vtype> static inline int isScalar(const iScalar<vtype> tmp) { return TensorIndexRecursion<Level-1>::isScalar(tmp._internal); } template<class vtype> static accelerator_inline int isScalar(const iScalar<vtype> tmp) { return TensorIndexRecursion<Level-1>::isScalar(tmp._internal); }
template<class vtype,int N> static inline int isScalar(const iVector<vtype,N> tmp){ return TensorIndexRecursion<Level-1>::isScalar(tmp._internal[0]); } template<class vtype,int N> static accelerator_inline int isScalar(const iVector<vtype,N> tmp){ return TensorIndexRecursion<Level-1>::isScalar(tmp._internal[0]); }
template<class vtype,int N> static inline int isScalar(const iMatrix<vtype,N> tmp){ return TensorIndexRecursion<Level-1>::isScalar(tmp._internal[0][0]); } template<class vtype,int N> static accelerator_inline int isScalar(const iMatrix<vtype,N> tmp){ return TensorIndexRecursion<Level-1>::isScalar(tmp._internal[0][0]); }
template<class vtype> static inline int isVector(const iScalar<vtype> tmp) { return TensorIndexRecursion<Level-1>::isVector(tmp._internal); } template<class vtype> static accelerator_inline int isVector(const iScalar<vtype> tmp) { return TensorIndexRecursion<Level-1>::isVector(tmp._internal); }
template<class vtype,int N> static inline int isVector(const iVector<vtype,N> tmp){ return TensorIndexRecursion<Level-1>::isVector(tmp._internal[0]); } template<class vtype,int N> static accelerator_inline int isVector(const iVector<vtype,N> tmp){ return TensorIndexRecursion<Level-1>::isVector(tmp._internal[0]); }
template<class vtype,int N> static inline int isVector(const iMatrix<vtype,N> tmp){ return TensorIndexRecursion<Level-1>::isVector(tmp._internal[0][0]); } template<class vtype,int N> static accelerator_inline int isVector(const iMatrix<vtype,N> tmp){ return TensorIndexRecursion<Level-1>::isVector(tmp._internal[0][0]); }
template<class vtype> static inline int isMatrix(const iScalar<vtype> tmp) { return TensorIndexRecursion<Level-1>::isMatrix(tmp._internal); } template<class vtype> static accelerator_inline int isMatrix(const iScalar<vtype> tmp) { return TensorIndexRecursion<Level-1>::isMatrix(tmp._internal); }
template<class vtype,int N> static inline int isMatrix(const iVector<vtype,N> tmp){ return TensorIndexRecursion<Level-1>::isMatrix(tmp._internal[0]); } template<class vtype,int N> static accelerator_inline int isMatrix(const iVector<vtype,N> tmp){ return TensorIndexRecursion<Level-1>::isMatrix(tmp._internal[0]); }
template<class vtype,int N> static inline int isMatrix(const iMatrix<vtype,N> tmp){ return TensorIndexRecursion<Level-1>::isMatrix(tmp._internal[0][0]); } template<class vtype,int N> static accelerator_inline int isMatrix(const iMatrix<vtype,N> tmp){ return TensorIndexRecursion<Level-1>::isMatrix(tmp._internal[0][0]); }
//////////////////////////////////////////////////// ////////////////////////////////////////////////////
// Trace // Trace
//////////////////////////////////////////////////// ////////////////////////////////////////////////////
template<class vtype> template<class vtype>
static auto traceIndex(const iScalar<vtype> arg) -> iScalar<decltype(TensorIndexRecursion<Level-1>::traceIndex(arg._internal))> static accelerator_inline auto traceIndex(const iScalar<vtype> arg) -> iScalar<decltype(TensorIndexRecursion<Level-1>::traceIndex(arg._internal))>
{ {
iScalar<decltype(TensorIndexRecursion<Level-1>::traceIndex(arg._internal))> ret; iScalar<decltype(TensorIndexRecursion<Level-1>::traceIndex(arg._internal))> ret;
ret._internal = TensorIndexRecursion<Level-1>::traceIndex(arg._internal); ret._internal = TensorIndexRecursion<Level-1>::traceIndex(arg._internal);
return ret; return ret;
} }
template<class vtype,int N>
static auto traceIndex(const iVector<vtype,N> arg) -> iVector<decltype(TensorIndexRecursion<Level-1>::traceIndex(arg._internal[0])),N>
{
iVector<decltype(TensorIndexRecursion<Level-1>::traceIndex(arg._internal[0])),N> ret;
for(int i=0;i<N;i++){
ret._internal[i] = TensorIndexRecursion<Level-1>::traceIndex(arg._internal[i]);
}
return ret;
}
template<class vtype,int N> template<class vtype,int N>
static auto traceIndex(const iMatrix<vtype,N> arg) -> iMatrix<decltype(TensorIndexRecursion<Level-1>::traceIndex(arg._internal[0][0])),N> static accelerator_inline auto traceIndex(const iVector<vtype,N> arg) -> iVector<decltype(TensorIndexRecursion<Level-1>::traceIndex(arg._internal[0])),N>
{
iVector<decltype(TensorIndexRecursion<Level-1>::traceIndex(arg._internal[0])),N> ret;
for(int i=0;i<N;i++){
ret._internal[i] = TensorIndexRecursion<Level-1>::traceIndex(arg._internal[i]);
}
return ret;
}
template<class vtype,int N>
static accelerator_inline auto traceIndex(const iMatrix<vtype,N> arg) -> iMatrix<decltype(TensorIndexRecursion<Level-1>::traceIndex(arg._internal[0][0])),N>
{ {
iMatrix<decltype(TensorIndexRecursion<Level-1>::traceIndex(arg._internal[0][0])),N> ret; iMatrix<decltype(TensorIndexRecursion<Level-1>::traceIndex(arg._internal[0][0])),N> ret;
for(int i=0;i<N;i++){ for(int i=0;i<N;i++){
@ -95,139 +95,141 @@ static auto traceIndex(const iVector<vtype,N> arg) -> iVector<decltype(TensorIn
}} }}
return ret; return ret;
} }
//////////////////////////////////////////// ////////////////////////////////////////////
// Recursion for peeking a specific index // Recursion for peeking a specific index
//////////////////////////////////////////// ////////////////////////////////////////////
template<class vtype> template<class vtype>
static auto peekIndex(const iScalar<vtype> arg,int i) -> iScalar<decltype(TensorIndexRecursion<Level-1>::peekIndex(arg._internal,0))> static accelerator_inline auto peekIndex(const iScalar<vtype> arg,int i) -> iScalar<decltype(TensorIndexRecursion<Level-1>::peekIndex(arg._internal,0))>
{ {
iScalar<decltype(TensorIndexRecursion<Level-1>::peekIndex(arg._internal,0))> ret; iScalar<decltype(TensorIndexRecursion<Level-1>::peekIndex(arg._internal,0))> ret;
ret._internal = TensorIndexRecursion<Level-1>::peekIndex(arg._internal,i); ret._internal = TensorIndexRecursion<Level-1>::peekIndex(arg._internal,i);
return ret; return ret;
}
template<class vtype>
static auto peekIndex(const iScalar<vtype> arg,int i,int j) -> iScalar<decltype(TensorIndexRecursion<Level-1>::peekIndex(arg._internal,0,0))>
{
iScalar<decltype(TensorIndexRecursion<Level-1>::peekIndex(arg._internal,0,0))> ret;
ret._internal = TensorIndexRecursion<Level-1>::peekIndex(arg._internal,i,j);
return ret;
}
template<class vtype,int N>
static auto peekIndex(const iVector<vtype,N> arg,int ii) -> iVector<decltype(TensorIndexRecursion<Level-1>::peekIndex(arg._internal[0],0)),N>
{
iVector<decltype(TensorIndexRecursion<Level-1>::peekIndex(arg._internal[0],0)),N> ret;
for(int i=0;i<N;i++){
ret._internal[i] = TensorIndexRecursion<Level-1>::peekIndex(arg._internal[i],ii);
} }
return ret; template<class vtype>
} static accelerator_inline auto peekIndex(const iScalar<vtype> arg,int i,int j) -> iScalar<decltype(TensorIndexRecursion<Level-1>::peekIndex(arg._internal,0,0))>
template<class vtype,int N> {
static auto peekIndex(const iVector<vtype,N> arg,int ii,int jj) -> iVector<decltype(TensorIndexRecursion<Level-1>::peekIndex(arg._internal[0],0,0)),N> iScalar<decltype(TensorIndexRecursion<Level-1>::peekIndex(arg._internal,0,0))> ret;
{ ret._internal = TensorIndexRecursion<Level-1>::peekIndex(arg._internal,i,j);
iVector<decltype(TensorIndexRecursion<Level-1>::peekIndex(arg._internal[0],0,0)),N> ret; return ret;
for(int i=0;i<N;i++){
ret._internal[i] = TensorIndexRecursion<Level-1>::peekIndex(arg._internal[i],ii,jj);
} }
return ret;
}
template<class vtype,int N> template<class vtype,int N>
static auto peekIndex(const iMatrix<vtype,N> arg,int ii) -> iMatrix<decltype(TensorIndexRecursion<Level-1>::peekIndex(arg._internal[0][0],0)),N> static accelerator_inline auto peekIndex(const iVector<vtype,N> arg,int ii) -> iVector<decltype(TensorIndexRecursion<Level-1>::peekIndex(arg._internal[0],0)),N>
{ {
iMatrix<decltype(TensorIndexRecursion<Level-1>::peekIndex(arg._internal[0][0],0)),N> ret; iVector<decltype(TensorIndexRecursion<Level-1>::peekIndex(arg._internal[0],0)),N> ret;
for(int i=0;i<N;i++){ for(int i=0;i<N;i++){
for(int j=0;j<N;j++){ ret._internal[i] = TensorIndexRecursion<Level-1>::peekIndex(arg._internal[i],ii);
ret._internal[i][j] = TensorIndexRecursion<Level-1>::peekIndex(arg._internal[i][j],ii); }
}} return ret;
return ret;
}
template<class vtype,int N>
static auto peekIndex(const iMatrix<vtype,N> arg,int ii,int jj) -> iMatrix<decltype(TensorIndexRecursion<Level-1>::peekIndex(arg._internal[0][0],0,0)),N>
{
iMatrix<decltype(TensorIndexRecursion<Level-1>::peekIndex(arg._internal[0][0],0,0)),N> ret;
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
ret._internal[i][j] = TensorIndexRecursion<Level-1>::peekIndex(arg._internal[i][j],ii,jj);
}}
return ret;
}
////////////////////////////////////////////
// Recursion for poking a specific index
////////////////////////////////////////////
template<class vtype> inline static
void pokeIndex(iScalar<vtype> &ret, const iScalar<decltype(TensorIndexRecursion<Level-1>::peekIndex(ret._internal,0))> &arg, int i)
{
TensorIndexRecursion<Level-1>::pokeIndex(ret._internal,arg._internal,i);
}
template<class vtype> inline static
void pokeIndex(iScalar<vtype> &ret, const iScalar<decltype(TensorIndexRecursion<Level-1>::peekIndex(ret._internal,0,0))> &arg, int i,int j)
{
TensorIndexRecursion<Level-1>::pokeIndex(ret._internal,arg._internal,i,j);
}
template<class vtype,int N> inline static
void pokeIndex(iVector<vtype,N> &ret, const iVector<decltype(TensorIndexRecursion<Level-1>::peekIndex(ret._internal[0],0)),N> &arg, int i)
{
for(int ii=0;ii<N;ii++){
TensorIndexRecursion<Level-1>::pokeIndex(ret._internal[ii],arg._internal[ii],i);
} }
} template<class vtype,int N>
template<class vtype,int N> inline static static accelerator_inline auto peekIndex(const iVector<vtype,N> arg,int ii,int jj)
void pokeIndex(iVector<vtype,N> &ret, const iVector<decltype(TensorIndexRecursion<Level-1>::peekIndex(ret._internal[0],0,0)),N> &arg, int i,int j) -> iVector<decltype(TensorIndexRecursion<Level-1>::peekIndex(arg._internal[0],0,0)),N>
{ {
for(int ii=0;ii<N;ii++){ iVector<decltype(TensorIndexRecursion<Level-1>::peekIndex(arg._internal[0],0,0)),N> ret;
TensorIndexRecursion<Level-1>::pokeIndex(ret._internal[ii],arg._internal[ii],i,j); for(int i=0;i<N;i++){
ret._internal[i] = TensorIndexRecursion<Level-1>::peekIndex(arg._internal[i],ii,jj);
}
return ret;
} }
}
template<class vtype,int N>
template<class vtype,int N> inline static static accelerator_inline auto peekIndex(const iMatrix<vtype,N> arg,int ii) -> iMatrix<decltype(TensorIndexRecursion<Level-1>::peekIndex(arg._internal[0][0],0)),N>
void pokeIndex(iMatrix<vtype,N> &ret, const iMatrix<decltype(TensorIndexRecursion<Level-1>::peekIndex(ret._internal[0][0],0)),N> &arg, int i) {
{ iMatrix<decltype(TensorIndexRecursion<Level-1>::peekIndex(arg._internal[0][0],0)),N> ret;
for(int ii=0;ii<N;ii++){ for(int i=0;i<N;i++){
for(int jj=0;jj<N;jj++){ for(int j=0;j<N;j++){
TensorIndexRecursion<Level-1>::pokeIndex(ret._internal[ii][jj],arg._internal[ii][jj],i); ret._internal[i][j] = TensorIndexRecursion<Level-1>::peekIndex(arg._internal[i][j],ii);
}} }}
} return ret;
template<class vtype,int N> inline static }
void pokeIndex(iMatrix<vtype,N> &ret, const iMatrix<decltype(TensorIndexRecursion<Level-1>::peekIndex(ret._internal[0][0],0,0)),N> &arg, int i,int j) template<class vtype,int N>
{ static accelerator_inline auto peekIndex(const iMatrix<vtype,N> arg,int ii,int jj)
for(int ii=0;ii<N;ii++){ -> iMatrix<decltype(TensorIndexRecursion<Level-1>::peekIndex(arg._internal[0][0],0,0)),N>
for(int jj=0;jj<N;jj++){ {
TensorIndexRecursion<Level-1>::pokeIndex(ret._internal[ii][jj],arg._internal[ii][jj],i,j); iMatrix<decltype(TensorIndexRecursion<Level-1>::peekIndex(arg._internal[0][0],0,0)),N> ret;
}} for(int i=0;i<N;i++){
} for(int j=0;j<N;j++){
ret._internal[i][j] = TensorIndexRecursion<Level-1>::peekIndex(arg._internal[i][j],ii,jj);
//////////////////////////////////////////// }}
// Recursion for transposing a specific index return ret;
//////////////////////////////////////////// }
template<class vtype> ////////////////////////////////////////////
static auto transposeIndex(const iScalar<vtype> arg) -> iScalar<vtype> // Recursion for poking a specific index
{ ////////////////////////////////////////////
iScalar<vtype> ret;
ret._internal = TensorIndexRecursion<Level-1>::transposeIndex(arg._internal); template<class vtype> accelerator_inline static
return ret; void pokeIndex(iScalar<vtype> &ret, const iScalar<decltype(TensorIndexRecursion<Level-1>::peekIndex(ret._internal,0))> &arg, int i)
} {
template<class vtype,int N> TensorIndexRecursion<Level-1>::pokeIndex(ret._internal,arg._internal,i);
static auto transposeIndex(const iVector<vtype,N> arg) -> iVector<vtype,N> }
{ template<class vtype> accelerator_inline static
iVector<vtype,N> ret; void pokeIndex(iScalar<vtype> &ret, const iScalar<decltype(TensorIndexRecursion<Level-1>::peekIndex(ret._internal,0,0))> &arg, int i,int j)
for(int i=0;i<N;i++){ {
ret._internal[i] = TensorIndexRecursion<Level-1>::transposeIndex(arg._internal[i]); TensorIndexRecursion<Level-1>::pokeIndex(ret._internal,arg._internal,i,j);
}
template<class vtype,int N> accelerator_inline static
void pokeIndex(iVector<vtype,N> &ret, const iVector<decltype(TensorIndexRecursion<Level-1>::peekIndex(ret._internal[0],0)),N> &arg, int i)
{
for(int ii=0;ii<N;ii++){
TensorIndexRecursion<Level-1>::pokeIndex(ret._internal[ii],arg._internal[ii],i);
}
}
template<class vtype,int N> accelerator_inline static
void pokeIndex(iVector<vtype,N> &ret, const iVector<decltype(TensorIndexRecursion<Level-1>::peekIndex(ret._internal[0],0,0)),N> &arg, int i,int j)
{
for(int ii=0;ii<N;ii++){
TensorIndexRecursion<Level-1>::pokeIndex(ret._internal[ii],arg._internal[ii],i,j);
}
}
template<class vtype,int N> accelerator_inline static
void pokeIndex(iMatrix<vtype,N> &ret, const iMatrix<decltype(TensorIndexRecursion<Level-1>::peekIndex(ret._internal[0][0],0)),N> &arg, int i)
{
for(int ii=0;ii<N;ii++){
for(int jj=0;jj<N;jj++){
TensorIndexRecursion<Level-1>::pokeIndex(ret._internal[ii][jj],arg._internal[ii][jj],i);
}}
}
template<class vtype,int N> accelerator_inline static
void pokeIndex(iMatrix<vtype,N> &ret, const iMatrix<decltype(TensorIndexRecursion<Level-1>::peekIndex(ret._internal[0][0],0,0)),N> &arg, int i,int j)
{
for(int ii=0;ii<N;ii++){
for(int jj=0;jj<N;jj++){
TensorIndexRecursion<Level-1>::pokeIndex(ret._internal[ii][jj],arg._internal[ii][jj],i,j);
}}
}
////////////////////////////////////////////
// Recursion for transposing a specific index
////////////////////////////////////////////
template<class vtype> accelerator_inline
static auto transposeIndex(const iScalar<vtype> arg) -> iScalar<vtype>
{
iScalar<vtype> ret;
ret._internal = TensorIndexRecursion<Level-1>::transposeIndex(arg._internal);
return ret;
}
template<class vtype,int N> accelerator_inline
static auto transposeIndex(const iVector<vtype,N> arg) -> iVector<vtype,N>
{
iVector<vtype,N> ret;
for(int i=0;i<N;i++){
ret._internal[i] = TensorIndexRecursion<Level-1>::transposeIndex(arg._internal[i]);
}
return ret;
}
template<class vtype,int N> accelerator_inline
static auto transposeIndex(const iMatrix<vtype,N> arg) -> iMatrix<vtype,N>
{
iMatrix<vtype,N> ret;
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
ret._internal[i][j] = TensorIndexRecursion<Level-1>::transposeIndex(arg._internal[i][j]);
}}
return ret;
} }
return ret;
}
template<class vtype,int N>
static auto transposeIndex(const iMatrix<vtype,N> arg) -> iMatrix<vtype,N>
{
iMatrix<vtype,N> ret;
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
ret._internal[i][j] = TensorIndexRecursion<Level-1>::transposeIndex(arg._internal[i][j]);
}}
return ret;
}
}; };
//////////////////////////// ////////////////////////////
@ -240,162 +242,162 @@ public:
//////////////////////////////////////////////////// ////////////////////////////////////////////////////
// Type Queries // Type Queries
//////////////////////////////////////////////////// ////////////////////////////////////////////////////
template<class vtype> static inline int indexRank(const iScalar<vtype> tmp) { return 1; } template<class vtype> static accelerator_inline int indexRank(const iScalar<vtype> tmp) { return 1; }
template<class vtype,int N> static inline int indexRank(const iVector<vtype,N> tmp){ return N; } template<class vtype,int N> static accelerator_inline int indexRank(const iVector<vtype,N> tmp){ return N; }
template<class vtype,int N> static inline int indexRank(const iMatrix<vtype,N> tmp){ return N; } template<class vtype,int N> static accelerator_inline int indexRank(const iMatrix<vtype,N> tmp){ return N; }
template<class vtype> static inline int isScalar(const iScalar<vtype> tmp) { return true;} template<class vtype> static accelerator_inline int isScalar(const iScalar<vtype> tmp) { return true;}
template<class vtype,int N> static inline int isScalar(const iVector<vtype,N> tmp){ return false;} template<class vtype,int N> static accelerator_inline int isScalar(const iVector<vtype,N> tmp){ return false;}
template<class vtype,int N> static inline int isScalar(const iMatrix<vtype,N> tmp){ return false;} template<class vtype,int N> static accelerator_inline int isScalar(const iMatrix<vtype,N> tmp){ return false;}
template<class vtype> static inline int isVector(const iScalar<vtype> tmp) { return false;} template<class vtype> static accelerator_inline int isVector(const iScalar<vtype> tmp) { return false;}
template<class vtype,int N> static inline int isVector(const iVector<vtype,N> tmp){ return true;} template<class vtype,int N> static accelerator_inline int isVector(const iVector<vtype,N> tmp){ return true;}
template<class vtype,int N> static inline int isVector(const iMatrix<vtype,N> tmp){ return false;} template<class vtype,int N> static accelerator_inline int isVector(const iMatrix<vtype,N> tmp){ return false;}
template<class vtype> static inline int isMatrix(const iScalar<vtype> tmp) { return false;} template<class vtype> static accelerator_inline int isMatrix(const iScalar<vtype> tmp) { return false;}
template<class vtype,int N> static inline int isMatrix(const iVector<vtype,N> tmp){ return false;} template<class vtype,int N> static accelerator_inline int isMatrix(const iVector<vtype,N> tmp){ return false;}
template<class vtype,int N> static inline int isMatrix(const iMatrix<vtype,N> tmp){ return true;} template<class vtype,int N> static accelerator_inline int isMatrix(const iMatrix<vtype,N> tmp){ return true;}
///////////////////////////////////////// /////////////////////////////////////////
// Ends recursion for trace (scalar/vector/matrix) // Ends recursion for trace (scalar/vector/matrix)
///////////////////////////////////////// /////////////////////////////////////////
template<class vtype> template<class vtype> accelerator_inline
static auto traceIndex(const iScalar<vtype> arg) -> iScalar<RemoveCRV(arg._internal)> static auto traceIndex(const iScalar<vtype> arg) -> iScalar<RemoveCRV(arg._internal)>
{ {
iScalar<RemoveCRV(arg._internal)> ret; iScalar<RemoveCRV(arg._internal)> ret;
ret._internal = arg._internal; ret._internal = arg._internal;
return ret; return ret;
} }
template<class vtype,int N> template<class vtype,int N> accelerator_inline
static auto traceIndex(const iVector<vtype,N> arg) -> iScalar<RemoveCRV(arg._internal[0])> static auto traceIndex(const iVector<vtype,N> arg) -> iScalar<RemoveCRV(arg._internal[0])>
{ {
iScalar<RemoveCRV(arg._internal[0])> ret; iScalar<RemoveCRV(arg._internal[0])> ret;
ret._internal=zero; ret._internal=zero;
for(int i=0;i<N;i++){ for(int i=0;i<N;i++){
ret._internal = ret._internal+ arg._internal[i]; ret._internal = ret._internal+ arg._internal[i];
}
return ret;
} }
return ret; template<class vtype,int N> accelerator_inline
}
template<class vtype,int N>
static auto traceIndex(const iMatrix<vtype,N> arg) -> iScalar<RemoveCRV(arg._internal[0][0])> static auto traceIndex(const iMatrix<vtype,N> arg) -> iScalar<RemoveCRV(arg._internal[0][0])>
{ {
iScalar<RemoveCRV(arg._internal[0][0])> ret; iScalar<RemoveCRV(arg._internal[0][0])> ret;
ret=zero; zeroit(ret);
for(int i=0;i<N;i++){ for(int i=0;i<N;i++){
ret._internal = ret._internal+arg._internal[i][i]; ret._internal = ret._internal+arg._internal[i][i];
} }
return ret; return ret;
} }
///////////////////////////////////////// /////////////////////////////////////////
// Ends recursion for transpose scalar/matrix ; no way to terminate on vector // Ends recursion for transpose scalar/matrix ; no way to terminate on vector
///////////////////////////////////////// /////////////////////////////////////////
template<class vtype> template<class vtype> accelerator_inline
static auto transposeIndex(const iScalar<vtype> arg) -> iScalar<vtype> static auto transposeIndex(const iScalar<vtype> arg) -> iScalar<vtype>
{ {
iScalar<vtype> ret; iScalar<vtype> ret;
ret._internal = arg._internal; ret._internal = arg._internal;
return ret; return ret;
} }
template<class vtype,int N> template<class vtype,int N> accelerator_inline
static auto transposeIndex(const iMatrix<vtype,N> arg) -> iMatrix<vtype,N> static auto transposeIndex(const iMatrix<vtype,N> arg) -> iMatrix<vtype,N>
{ {
iMatrix<vtype,N> ret; iMatrix<vtype,N> ret;
ret=zero; ret=zero;
for(int i=0;i<N;i++){ for(int i=0;i<N;i++){
for(int j=0;j<N;j++){ for(int j=0;j<N;j++){
ret._internal[i][j] = ret._internal[i][j]+arg._internal[i][j]; ret._internal[i][j] = ret._internal[i][j]+arg._internal[i][j];
}} }}
return ret; return ret;
} }
//////////////////////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// End recursion for peeking a specific index; single index on vector, double index on matrix // End recursion for peeking a specific index; single index on vector, double index on matrix
//////////////////////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////////////////
template<class vtype,int N> template<class vtype,int N> accelerator_inline
static auto peekIndex(const iVector<vtype,N> arg,int ii) -> iScalar<vtype> static auto peekIndex(const iVector<vtype,N> arg,int ii) -> iScalar<vtype>
{ {
iScalar<vtype> ret; iScalar<vtype> ret;
ret._internal = arg._internal[ii]; ret._internal = arg._internal[ii];
return ret; return ret;
} }
template<class vtype,int N> template<class vtype,int N> accelerator_inline
static auto peekIndex(const iMatrix<vtype,N> arg,int ii,int jj) -> iScalar<vtype> static auto peekIndex(const iMatrix<vtype,N> arg,int ii,int jj) -> iScalar<vtype>
{ {
iScalar<vtype> ret; iScalar<vtype> ret;
ret._internal = arg._internal[ii][jj]; ret._internal = arg._internal[ii][jj];
return ret; return ret;
} }
// Vector poke, one index // Vector poke, one index
template<class vtype,int N> inline static template<class vtype,int N> accelerator_inline static
void pokeIndex(iVector<vtype,N> &ret, const iScalar<vtype> &arg,int i) void pokeIndex(iVector<vtype,N> &ret, const iScalar<vtype> &arg,int i)
{ {
ret._internal[i] = arg._internal; ret._internal[i] = arg._internal;
} }
// Matrix poke two indices // Matrix poke two indices
template<class vtype,int N> inline static template<class vtype,int N> accelerator_inline static
void pokeIndex(iMatrix<vtype,N> &ret, const iScalar<vtype> &arg,int i,int j) void pokeIndex(iMatrix<vtype,N> &ret, const iScalar<vtype> &arg,int i,int j)
{ {
ret._internal[i][j] = arg._internal; ret._internal[i][j] = arg._internal;
} }
}; };
//////////////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////////
// External wrappers // External wrappers
//////////////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////////
template<int Level,class vtype> inline int indexRank(void) template<int Level,class vtype> accelerator_inline int indexRank(void)
{ {
vtype tmp; vtype tmp;
return TensorIndexRecursion<Level>::indexRank(tmp); return TensorIndexRecursion<Level>::indexRank(tmp);
} }
template<int Level,class vtype> inline int isScalar(void) template<int Level,class vtype> accelerator_inline int isScalar(void)
{ {
vtype tmp; vtype tmp;
return TensorIndexRecursion<Level>::isScalar(tmp); return TensorIndexRecursion<Level>::isScalar(tmp);
} }
template<int Level,class vtype> inline int isVector(void) template<int Level,class vtype> accelerator_inline int isVector(void)
{ {
vtype tmp; vtype tmp;
return TensorIndexRecursion<Level>::isVector(tmp); return TensorIndexRecursion<Level>::isVector(tmp);
} }
template<int Level,class vtype> inline int isMatrix(void) template<int Level,class vtype> accelerator_inline int isMatrix(void)
{ {
vtype tmp; vtype tmp;
return TensorIndexRecursion<Level>::isMatrix(tmp); return TensorIndexRecursion<Level>::isMatrix(tmp);
} }
template<int Level,class vtype> inline auto traceIndex (const vtype &arg) -> RemoveCRV(TensorIndexRecursion<Level>::traceIndex(arg)) template<int Level,class vtype> accelerator_inline auto traceIndex (const vtype &arg) -> RemoveCRV(TensorIndexRecursion<Level>::traceIndex(arg))
{ {
RemoveCRV(TensorIndexRecursion<Level>::traceIndex(arg)) ret; RemoveCRV(TensorIndexRecursion<Level>::traceIndex(arg)) ret;
ret=TensorIndexRecursion<Level>::traceIndex(arg); ret=TensorIndexRecursion<Level>::traceIndex(arg);
return ret; return ret;
} }
template<int Level,class vtype> inline auto transposeIndex (const vtype &arg) -> RemoveCRV(TensorIndexRecursion<Level>::transposeIndex(arg)) template<int Level,class vtype> accelerator_inline auto transposeIndex (const vtype &arg) -> RemoveCRV(TensorIndexRecursion<Level>::transposeIndex(arg))
{ {
RemoveCRV(TensorIndexRecursion<Level>::transposeIndex(arg)) ret; RemoveCRV(TensorIndexRecursion<Level>::transposeIndex(arg)) ret;
ret=TensorIndexRecursion<Level>::transposeIndex(arg); ret=TensorIndexRecursion<Level>::transposeIndex(arg);
return ret; return ret;
} }
template<int Level,class vtype> inline auto peekIndex (const vtype &arg,int i) -> RemoveCRV(TensorIndexRecursion<Level>::peekIndex(arg,0)) template<int Level,class vtype> accelerator_inline auto peekIndex (const vtype &arg,int i) -> RemoveCRV(TensorIndexRecursion<Level>::peekIndex(arg,0))
{ {
RemoveCRV(TensorIndexRecursion<Level>::peekIndex(arg,0)) ret; RemoveCRV(TensorIndexRecursion<Level>::peekIndex(arg,0)) ret;
ret=TensorIndexRecursion<Level>::peekIndex(arg,i); ret=TensorIndexRecursion<Level>::peekIndex(arg,i);
return ret; return ret;
} }
template<int Level,class vtype> inline auto peekIndex (const vtype &arg,int i,int j) -> RemoveCRV(TensorIndexRecursion<Level>::peekIndex(arg,0,0)) template<int Level,class vtype> accelerator_inline auto peekIndex (const vtype &arg,int i,int j) -> RemoveCRV(TensorIndexRecursion<Level>::peekIndex(arg,0,0))
{ {
RemoveCRV(TensorIndexRecursion<Level>::peekIndex(arg,0,0)) ret; RemoveCRV(TensorIndexRecursion<Level>::peekIndex(arg,0,0)) ret;
ret=TensorIndexRecursion<Level>::peekIndex(arg,i,j); ret=TensorIndexRecursion<Level>::peekIndex(arg,i,j);
return ret; return ret;
} }
template<int Level,class vtype> inline template<int Level,class vtype> accelerator_inline
void pokeIndex (vtype &ret,const decltype(TensorIndexRecursion<Level>::peekIndex(ret,0)) &arg,int i) void pokeIndex (vtype &ret,const decltype(TensorIndexRecursion<Level>::peekIndex(ret,0)) &arg,int i)
{ {
TensorIndexRecursion<Level>::pokeIndex(ret,arg,i); TensorIndexRecursion<Level>::pokeIndex(ret,arg,i);
} }
template<int Level,class vtype> inline template<int Level,class vtype> accelerator_inline
void pokeIndex (vtype &ret,const decltype(TensorIndexRecursion<Level>::peekIndex(ret,0,0)) &arg,int i,int j) void pokeIndex (vtype &ret,const decltype(TensorIndexRecursion<Level>::peekIndex(ret,0,0)) &arg,int i,int j)
{ {
TensorIndexRecursion<Level>::pokeIndex(ret,arg,i,j); TensorIndexRecursion<Level>::pokeIndex(ret,arg,i,j);

View File

@ -36,7 +36,7 @@ NAMESPACE_BEGIN(Grid);
// innerProduct Vector x Vector -> Scalar // innerProduct Vector x Vector -> Scalar
// innerProduct Matrix x Matrix -> Scalar // innerProduct Matrix x Matrix -> Scalar
/////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////
template<class sobj> inline RealD norm2(const sobj &arg){ template<class sobj> accelerator_inline RealD norm2(const sobj &arg){
auto nrm = innerProductD(arg,arg); auto nrm = innerProductD(arg,arg);
RealD ret = real(nrm); RealD ret = real(nrm);
return ret; return ret;
@ -45,21 +45,21 @@ template<class sobj> inline RealD norm2(const sobj &arg){
// If single promote to double and sum 2x // If single promote to double and sum 2x
////////////////////////////////////// //////////////////////////////////////
inline ComplexD innerProductD(const ComplexF &l,const ComplexF &r){ return innerProduct(l,r); } accelerator_inline ComplexD innerProductD(const ComplexF &l,const ComplexF &r){ return innerProduct(l,r); }
inline ComplexD innerProductD(const ComplexD &l,const ComplexD &r){ return innerProduct(l,r); } accelerator_inline ComplexD innerProductD(const ComplexD &l,const ComplexD &r){ return innerProduct(l,r); }
inline RealD innerProductD(const RealD &l,const RealD &r){ return innerProduct(l,r); } accelerator_inline RealD innerProductD(const RealD &l,const RealD &r){ return innerProduct(l,r); }
inline RealD innerProductD(const RealF &l,const RealF &r){ return innerProduct(l,r); } accelerator_inline RealD innerProductD(const RealF &l,const RealF &r){ return innerProduct(l,r); }
inline vComplexD innerProductD(const vComplexD &l,const vComplexD &r){ return innerProduct(l,r); } accelerator_inline vComplexD innerProductD(const vComplexD &l,const vComplexD &r){ return innerProduct(l,r); }
inline vRealD innerProductD(const vRealD &l,const vRealD &r){ return innerProduct(l,r); } accelerator_inline vRealD innerProductD(const vRealD &l,const vRealD &r){ return innerProduct(l,r); }
inline vComplexD innerProductD(const vComplexF &l,const vComplexF &r){ accelerator_inline vComplexD innerProductD(const vComplexF &l,const vComplexF &r){
vComplexD la,lb; vComplexD la,lb;
vComplexD ra,rb; vComplexD ra,rb;
Optimization::PrecisionChange::StoD(l.v,la.v,lb.v); Optimization::PrecisionChange::StoD(l.v,la.v,lb.v);
Optimization::PrecisionChange::StoD(r.v,ra.v,rb.v); Optimization::PrecisionChange::StoD(r.v,ra.v,rb.v);
return innerProduct(la,ra) + innerProduct(lb,rb); return innerProduct(la,ra) + innerProduct(lb,rb);
} }
inline vRealD innerProductD(const vRealF &l,const vRealF &r){ accelerator_inline vRealD innerProductD(const vRealF &l,const vRealF &r){
vRealD la,lb; vRealD la,lb;
vRealD ra,rb; vRealD ra,rb;
Optimization::PrecisionChange::StoD(l.v,la.v,lb.v); Optimization::PrecisionChange::StoD(l.v,la.v,lb.v);
@ -67,18 +67,18 @@ inline vRealD innerProductD(const vRealF &l,const vRealF &r){
return innerProduct(la,ra) + innerProduct(lb,rb); return innerProduct(la,ra) + innerProduct(lb,rb);
} }
template<class l,class r,int N> inline template<class l,class r,int N> accelerator_inline
auto innerProductD (const iVector<l,N>& lhs,const iVector<r,N>& rhs) -> iScalar<decltype(innerProductD(lhs._internal[0],rhs._internal[0]))> auto innerProductD (const iVector<l,N>& lhs,const iVector<r,N>& rhs) -> iScalar<decltype(innerProductD(lhs._internal[0],rhs._internal[0]))>
{ {
typedef decltype(innerProductD(lhs._internal[0],rhs._internal[0])) ret_t; typedef decltype(innerProductD(lhs._internal[0],rhs._internal[0])) ret_t;
iScalar<ret_t> ret; iScalar<ret_t> ret;
ret=zero; zeroit(ret);
for(int c1=0;c1<N;c1++){ for(int c1=0;c1<N;c1++){
ret._internal += innerProductD(lhs._internal[c1],rhs._internal[c1]); ret._internal += innerProductD(lhs._internal[c1],rhs._internal[c1]);
} }
return ret; return ret;
} }
template<class l,class r,int N> inline template<class l,class r,int N> accelerator_inline
auto innerProductD (const iMatrix<l,N>& lhs,const iMatrix<r,N>& rhs) -> iScalar<decltype(innerProductD(lhs._internal[0][0],rhs._internal[0][0]))> auto innerProductD (const iMatrix<l,N>& lhs,const iMatrix<r,N>& rhs) -> iScalar<decltype(innerProductD(lhs._internal[0][0],rhs._internal[0][0]))>
{ {
typedef decltype(innerProductD(lhs._internal[0][0],rhs._internal[0][0])) ret_t; typedef decltype(innerProductD(lhs._internal[0][0],rhs._internal[0][0])) ret_t;
@ -91,7 +91,7 @@ auto innerProductD (const iMatrix<l,N>& lhs,const iMatrix<r,N>& rhs) -> iScalar<
}} }}
return ret; return ret;
} }
template<class l,class r> inline template<class l,class r> accelerator_inline
auto innerProductD (const iScalar<l>& lhs,const iScalar<r>& rhs) -> iScalar<decltype(innerProductD(lhs._internal,rhs._internal))> auto innerProductD (const iScalar<l>& lhs,const iScalar<r>& rhs) -> iScalar<decltype(innerProductD(lhs._internal,rhs._internal))>
{ {
typedef decltype(innerProductD(lhs._internal,rhs._internal)) ret_t; typedef decltype(innerProductD(lhs._internal,rhs._internal)) ret_t;
@ -102,7 +102,7 @@ auto innerProductD (const iScalar<l>& lhs,const iScalar<r>& rhs) -> iScalar<decl
////////////////////// //////////////////////
// Keep same precison // Keep same precison
////////////////////// //////////////////////
template<class l,class r,int N> inline template<class l,class r,int N> accelerator_inline
auto innerProduct (const iVector<l,N>& lhs,const iVector<r,N>& rhs) -> iScalar<decltype(innerProduct(lhs._internal[0],rhs._internal[0]))> auto innerProduct (const iVector<l,N>& lhs,const iVector<r,N>& rhs) -> iScalar<decltype(innerProduct(lhs._internal[0],rhs._internal[0]))>
{ {
typedef decltype(innerProduct(lhs._internal[0],rhs._internal[0])) ret_t; typedef decltype(innerProduct(lhs._internal[0],rhs._internal[0])) ret_t;
@ -113,7 +113,7 @@ auto innerProduct (const iVector<l,N>& lhs,const iVector<r,N>& rhs) -> iScalar<d
} }
return ret; return ret;
} }
template<class l,class r,int N> inline template<class l,class r,int N> accelerator_inline
auto innerProduct (const iMatrix<l,N>& lhs,const iMatrix<r,N>& rhs) -> iScalar<decltype(innerProduct(lhs._internal[0][0],rhs._internal[0][0]))> auto innerProduct (const iMatrix<l,N>& lhs,const iMatrix<r,N>& rhs) -> iScalar<decltype(innerProduct(lhs._internal[0][0],rhs._internal[0][0]))>
{ {
typedef decltype(innerProduct(lhs._internal[0][0],rhs._internal[0][0])) ret_t; typedef decltype(innerProduct(lhs._internal[0][0],rhs._internal[0][0])) ret_t;
@ -126,7 +126,7 @@ auto innerProduct (const iMatrix<l,N>& lhs,const iMatrix<r,N>& rhs) -> iScalar<d
}} }}
return ret; return ret;
} }
template<class l,class r> inline template<class l,class r> accelerator_inline
auto innerProduct (const iScalar<l>& lhs,const iScalar<r>& rhs) -> iScalar<decltype(innerProduct(lhs._internal,rhs._internal))> auto innerProduct (const iScalar<l>& lhs,const iScalar<r>& rhs) -> iScalar<decltype(innerProduct(lhs._internal,rhs._internal))>
{ {
typedef decltype(innerProduct(lhs._internal,rhs._internal)) ret_t; typedef decltype(innerProduct(lhs._internal,rhs._internal)) ret_t;

View File

@ -31,19 +31,19 @@ Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
NAMESPACE_BEGIN(Grid); NAMESPACE_BEGIN(Grid);
#define LOGICAL_BINOP(Op) \ #define LOGICAL_BINOP(Op) \
template<class v> strong_inline iScalar<v> operator Op (const iScalar<v>& lhs,const iScalar<v>& rhs) \ template<class v> accelerator_inline iScalar<v> operator Op (const iScalar<v>& lhs,const iScalar<v>& rhs) \
{ \ { \
iScalar<v> ret; \ iScalar<v> ret; \
ret._internal = lhs._internal Op rhs._internal ; \ ret._internal = lhs._internal Op rhs._internal ; \
return ret; \ return ret; \
} \ } \
template<class l> strong_inline iScalar<l> operator Op (const iScalar<l>& lhs,Integer rhs) \ template<class l> accelerator_inline iScalar<l> operator Op (const iScalar<l>& lhs,Integer rhs) \
{ \ { \
typename iScalar<l>::scalar_type t; t=rhs; \ typename iScalar<l>::scalar_type t; t=rhs; \
typename iScalar<l>::tensor_reduced srhs; srhs=t; \ typename iScalar<l>::tensor_reduced srhs; srhs=t; \
return lhs Op srhs; \ return lhs Op srhs; \
} \ } \
template<class l> strong_inline iScalar<l> operator Op (Integer lhs,const iScalar<l>& rhs) \ template<class l> accelerator_inline iScalar<l> operator Op (Integer lhs,const iScalar<l>& rhs) \
{ \ { \
typename iScalar<l>::scalar_type t;t=lhs; \ typename iScalar<l>::scalar_type t;t=lhs; \
typename iScalar<l>::tensor_reduced slhs;slhs=t; \ typename iScalar<l>::tensor_reduced slhs;slhs=t; \

View File

@ -35,7 +35,7 @@ NAMESPACE_BEGIN(Grid);
// Vector x Vector -> Matrix // Vector x Vector -> Matrix
/////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////
template<class l,class r,int N> inline template<class l,class r,int N> accelerator_inline
auto outerProduct (const iVector<l,N>& lhs,const iVector<r,N>& rhs) -> iMatrix<decltype(outerProduct(lhs._internal[0],rhs._internal[0])),N> auto outerProduct (const iVector<l,N>& lhs,const iVector<r,N>& rhs) -> iMatrix<decltype(outerProduct(lhs._internal[0],rhs._internal[0])),N>
{ {
typedef decltype(outerProduct(lhs._internal[0],rhs._internal[0])) ret_t; typedef decltype(outerProduct(lhs._internal[0],rhs._internal[0])) ret_t;
@ -48,7 +48,7 @@ auto outerProduct (const iVector<l,N>& lhs,const iVector<r,N>& rhs) -> iMatrix<d
} }
template<class l,class r> inline template<class l,class r> accelerator_inline
auto outerProduct (const iScalar<l>& lhs,const iScalar<r>& rhs) -> iScalar<decltype(outerProduct(lhs._internal,rhs._internal))> auto outerProduct (const iScalar<l>& lhs,const iScalar<r>& rhs) -> iScalar<decltype(outerProduct(lhs._internal,rhs._internal))>
{ {
typedef decltype(outerProduct(lhs._internal,rhs._internal)) ret_t; typedef decltype(outerProduct(lhs._internal,rhs._internal)) ret_t;
@ -58,21 +58,19 @@ auto outerProduct (const iScalar<l>& lhs,const iScalar<r>& rhs) -> iScalar<declt
} }
inline ComplexF outerProduct(const ComplexF &l, const ComplexF& r) accelerator_inline ComplexF outerProduct(const ComplexF &l, const ComplexF& r)
{ {
std::cout << "outer product taking conj "<<r<<" "<<conj(r)<<std::endl;
return l*conj(r); return l*conj(r);
} }
inline ComplexD outerProduct(const ComplexD &l, const ComplexD& r) accelerator_inline ComplexD outerProduct(const ComplexD &l, const ComplexD& r)
{ {
std::cout << "outer product taking conj "<<r<<" "<<conj(r)<<std::endl;
return l*conj(r); return l*conj(r);
} }
inline RealF outerProduct(const RealF &l, const RealF& r) accelerator_inline RealF outerProduct(const RealF &l, const RealF& r)
{ {
return l*r; return l*r;
} }
inline RealD outerProduct(const RealD &l, const RealD& r) accelerator_inline RealD outerProduct(const RealD &l, const RealD& r)
{ {
return l*r; return l*r;
} }

View File

@ -34,13 +34,13 @@ NAMESPACE_BEGIN(Grid);
/////////////////////////////////////////////// ///////////////////////////////////////////////
// multiply by I; make recursive. // multiply by I; make recursive.
/////////////////////////////////////////////// ///////////////////////////////////////////////
template<class vtype> inline iScalar<vtype> timesI(const iScalar<vtype>&r) template<class vtype> accelerator_inline iScalar<vtype> timesI(const iScalar<vtype>&r)
{ {
iScalar<vtype> ret; iScalar<vtype> ret;
timesI(ret._internal,r._internal); timesI(ret._internal,r._internal);
return ret; return ret;
} }
template<class vtype,int N> inline iVector<vtype,N> timesI(const iVector<vtype,N>&r) template<class vtype,int N> accelerator_inline iVector<vtype,N> timesI(const iVector<vtype,N>&r)
{ {
iVector<vtype,N> ret; iVector<vtype,N> ret;
for(int i=0;i<N;i++){ for(int i=0;i<N;i++){
@ -48,7 +48,7 @@ template<class vtype,int N> inline iVector<vtype,N> timesI(const iVector<vtype,N
} }
return ret; return ret;
} }
template<class vtype,int N> inline iMatrix<vtype,N> timesI(const iMatrix<vtype,N>&r) template<class vtype,int N> accelerator_inline iMatrix<vtype,N> timesI(const iMatrix<vtype,N>&r)
{ {
iMatrix<vtype,N> ret; iMatrix<vtype,N> ret;
for(int i=0;i<N;i++){ for(int i=0;i<N;i++){
@ -58,17 +58,17 @@ template<class vtype,int N> inline iMatrix<vtype,N> timesI(const iMatrix<vtype,N
return ret; return ret;
} }
template<class vtype> inline void timesI(iScalar<vtype> &ret,const iScalar<vtype>&r) template<class vtype> accelerator_inline void timesI(iScalar<vtype> &ret,const iScalar<vtype>&r)
{ {
timesI(ret._internal,r._internal); timesI(ret._internal,r._internal);
} }
template<class vtype,int N> inline void timesI(iVector<vtype,N> &ret,const iVector<vtype,N>&r) template<class vtype,int N> accelerator_inline void timesI(iVector<vtype,N> &ret,const iVector<vtype,N>&r)
{ {
for(int i=0;i<N;i++){ for(int i=0;i<N;i++){
timesI(ret._internal[i],r._internal[i]); timesI(ret._internal[i],r._internal[i]);
} }
} }
template<class vtype,int N> inline void timesI(iMatrix<vtype,N> &ret,const iMatrix<vtype,N>&r) template<class vtype,int N> accelerator_inline void timesI(iMatrix<vtype,N> &ret,const iMatrix<vtype,N>&r)
{ {
for(int i=0;i<N;i++){ for(int i=0;i<N;i++){
for(int j=0;j<N;j++){ for(int j=0;j<N;j++){
@ -77,13 +77,13 @@ template<class vtype,int N> inline void timesI(iMatrix<vtype,N> &ret,const iMat
} }
template<class vtype> inline iScalar<vtype> timesMinusI(const iScalar<vtype>&r) template<class vtype> accelerator_inline iScalar<vtype> timesMinusI(const iScalar<vtype>&r)
{ {
iScalar<vtype> ret; iScalar<vtype> ret;
timesMinusI(ret._internal,r._internal); timesMinusI(ret._internal,r._internal);
return ret; return ret;
} }
template<class vtype,int N> inline iVector<vtype,N> timesMinusI(const iVector<vtype,N>&r) template<class vtype,int N> accelerator_inline iVector<vtype,N> timesMinusI(const iVector<vtype,N>&r)
{ {
iVector<vtype,N> ret; iVector<vtype,N> ret;
for(int i=0;i<N;i++){ for(int i=0;i<N;i++){
@ -91,7 +91,7 @@ template<class vtype,int N> inline iVector<vtype,N> timesMinusI(const iVector<vt
} }
return ret; return ret;
} }
template<class vtype,int N> inline iMatrix<vtype,N> timesMinusI(const iMatrix<vtype,N>&r) template<class vtype,int N> accelerator_inline iMatrix<vtype,N> timesMinusI(const iMatrix<vtype,N>&r)
{ {
iMatrix<vtype,N> ret; iMatrix<vtype,N> ret;
for(int i=0;i<N;i++){ for(int i=0;i<N;i++){
@ -101,17 +101,17 @@ template<class vtype,int N> inline iMatrix<vtype,N> timesMinusI(const iMatrix<vt
return ret; return ret;
} }
template<class vtype> inline void timesMinusI(iScalar<vtype> &ret,const iScalar<vtype>&r) template<class vtype> accelerator_inline void timesMinusI(iScalar<vtype> &ret,const iScalar<vtype>&r)
{ {
timesMinusI(ret._internal,r._internal); timesMinusI(ret._internal,r._internal);
} }
template<class vtype,int N> inline void timesMinusI(iVector<vtype,N> &ret,const iVector<vtype,N>&r) template<class vtype,int N> accelerator_inline void timesMinusI(iVector<vtype,N> &ret,const iVector<vtype,N>&r)
{ {
for(int i=0;i<N;i++){ for(int i=0;i<N;i++){
timesMinusI(ret._internal[i],r._internal[i]); timesMinusI(ret._internal[i],r._internal[i]);
} }
} }
template<class vtype,int N> inline void timesMinusI(iMatrix<vtype,N> &ret,const iMatrix<vtype,N>&r) template<class vtype,int N> accelerator_inline void timesMinusI(iMatrix<vtype,N> &ret,const iMatrix<vtype,N>&r)
{ {
for(int i=0;i<N;i++){ for(int i=0;i<N;i++){
for(int j=0;j<N;j++){ for(int j=0;j<N;j++){
@ -123,13 +123,13 @@ template<class vtype,int N> inline void timesMinusI(iMatrix<vtype,N> &ret,const
/////////////////////////////////////////////// ///////////////////////////////////////////////
// Conj function for scalar, vector, matrix // Conj function for scalar, vector, matrix
/////////////////////////////////////////////// ///////////////////////////////////////////////
template<class vtype> inline iScalar<vtype> conjugate(const iScalar<vtype>&r) template<class vtype> accelerator_inline iScalar<vtype> conjugate(const iScalar<vtype>&r)
{ {
iScalar<vtype> ret; iScalar<vtype> ret;
ret._internal = conjugate(r._internal); ret._internal = conjugate(r._internal);
return ret; return ret;
} }
template<class vtype,int N> inline iVector<vtype,N> conjugate(const iVector<vtype,N>&r) template<class vtype,int N> accelerator_inline iVector<vtype,N> conjugate(const iVector<vtype,N>&r)
{ {
iVector<vtype,N> ret; iVector<vtype,N> ret;
for(int i=0;i<N;i++){ for(int i=0;i<N;i++){
@ -137,7 +137,7 @@ template<class vtype,int N> inline iVector<vtype,N> conjugate(const iVector<vtyp
} }
return ret; return ret;
} }
template<class vtype,int N> inline iMatrix<vtype,N> conjugate(const iMatrix<vtype,N>&r) template<class vtype,int N> accelerator_inline iMatrix<vtype,N> conjugate(const iMatrix<vtype,N>&r)
{ {
iMatrix<vtype,N> ret; iMatrix<vtype,N> ret;
for(int i=0;i<N;i++){ for(int i=0;i<N;i++){
@ -150,13 +150,13 @@ template<class vtype,int N> inline iMatrix<vtype,N> conjugate(const iMatrix<vtyp
/////////////////////////////////////////////// ///////////////////////////////////////////////
// Adj function for scalar, vector, matrix // Adj function for scalar, vector, matrix
/////////////////////////////////////////////// ///////////////////////////////////////////////
template<class vtype> inline iScalar<vtype> adj(const iScalar<vtype>&r) template<class vtype> accelerator_inline iScalar<vtype> adj(const iScalar<vtype>&r)
{ {
iScalar<vtype> ret; iScalar<vtype> ret;
ret._internal = adj(r._internal); ret._internal = adj(r._internal);
return ret; return ret;
} }
template<class vtype,int N> inline iVector<vtype,N> adj(const iVector<vtype,N>&r) template<class vtype,int N> accelerator_inline iVector<vtype,N> adj(const iVector<vtype,N>&r)
{ {
iVector<vtype,N> ret; iVector<vtype,N> ret;
for(int i=0;i<N;i++){ for(int i=0;i<N;i++){
@ -164,7 +164,7 @@ template<class vtype,int N> inline iVector<vtype,N> adj(const iVector<vtype,N>&r
} }
return ret; return ret;
} }
template<class vtype,int N> inline iMatrix<vtype,N> adj(const iMatrix<vtype,N> &arg) template<class vtype,int N> accelerator_inline iMatrix<vtype,N> adj(const iMatrix<vtype,N> &arg)
{ {
iMatrix<vtype,N> ret; iMatrix<vtype,N> ret;
for(int c1=0;c1<N;c1++){ for(int c1=0;c1<N;c1++){
@ -183,13 +183,13 @@ template<class vtype,int N> inline iMatrix<vtype,N> adj(const iMatrix<vtype,N> &
// Can only take the real/imag part of scalar objects, since // Can only take the real/imag part of scalar objects, since
// lattice objects of different complex nature are non-conformable. // lattice objects of different complex nature are non-conformable.
///////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////
template<class itype> inline auto real(const iScalar<itype> &z) -> iScalar<decltype(real(z._internal))> template<class itype> accelerator_inline auto real(const iScalar<itype> &z) -> iScalar<decltype(real(z._internal))>
{ {
iScalar<decltype(real(z._internal))> ret; iScalar<decltype(real(z._internal))> ret;
ret._internal = real(z._internal); ret._internal = real(z._internal);
return ret; return ret;
} }
template<class itype,int N> inline auto real(const iMatrix<itype,N> &z) -> iMatrix<decltype(real(z._internal[0][0])),N> template<class itype,int N> accelerator_inline auto real(const iMatrix<itype,N> &z) -> iMatrix<decltype(real(z._internal[0][0])),N>
{ {
iMatrix<decltype(real(z._internal[0][0])),N> ret; iMatrix<decltype(real(z._internal[0][0])),N> ret;
for(int c1=0;c1<N;c1++){ for(int c1=0;c1<N;c1++){
@ -198,7 +198,7 @@ template<class itype,int N> inline auto real(const iMatrix<itype,N> &z) -> iMatr
}} }}
return ret; return ret;
} }
template<class itype,int N> inline auto real(const iVector<itype,N> &z) -> iVector<decltype(real(z._internal[0])),N> template<class itype,int N> accelerator_inline auto real(const iVector<itype,N> &z) -> iVector<decltype(real(z._internal[0])),N>
{ {
iVector<decltype(real(z._internal[0])),N> ret; iVector<decltype(real(z._internal[0])),N> ret;
for(int c1=0;c1<N;c1++){ for(int c1=0;c1<N;c1++){
@ -207,13 +207,13 @@ template<class itype,int N> inline auto real(const iVector<itype,N> &z) -> iVect
return ret; return ret;
} }
template<class itype> inline auto imag(const iScalar<itype> &z) -> iScalar<decltype(imag(z._internal))> template<class itype> accelerator_inline auto imag(const iScalar<itype> &z) -> iScalar<decltype(imag(z._internal))>
{ {
iScalar<decltype(imag(z._internal))> ret; iScalar<decltype(imag(z._internal))> ret;
ret._internal = imag(z._internal); ret._internal = imag(z._internal);
return ret; return ret;
} }
template<class itype,int N> inline auto imag(const iMatrix<itype,N> &z) -> iMatrix<decltype(imag(z._internal[0][0])),N> template<class itype,int N> accelerator_inline auto imag(const iMatrix<itype,N> &z) -> iMatrix<decltype(imag(z._internal[0][0])),N>
{ {
iMatrix<decltype(imag(z._internal[0][0])),N> ret; iMatrix<decltype(imag(z._internal[0][0])),N> ret;
for(int c1=0;c1<N;c1++){ for(int c1=0;c1<N;c1++){
@ -222,7 +222,7 @@ template<class itype,int N> inline auto imag(const iMatrix<itype,N> &z) -> iMatr
}} }}
return ret; return ret;
} }
template<class itype,int N> inline auto imag(const iVector<itype,N> &z) -> iVector<decltype(imag(z._internal[0])),N> template<class itype,int N> accelerator_inline auto imag(const iVector<itype,N> &z) -> iVector<decltype(imag(z._internal[0])),N>
{ {
iVector<decltype(imag(z._internal[0])),N> ret; iVector<decltype(imag(z._internal[0])),N> ret;
for(int c1=0;c1<N;c1++){ for(int c1=0;c1<N;c1++){

View File

@ -36,13 +36,13 @@ NAMESPACE_BEGIN(Grid);
// either scalar or matrix // either scalar or matrix
///////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////
inline ComplexF trace( const ComplexF &arg){ return arg;} accelerator_inline ComplexF trace( const ComplexF &arg){ return arg;}
inline ComplexD trace( const ComplexD &arg){ return arg;} accelerator_inline ComplexD trace( const ComplexD &arg){ return arg;}
inline RealF trace( const RealF &arg){ return arg;} accelerator_inline RealF trace( const RealF &arg){ return arg;}
inline RealD trace( const RealD &arg){ return arg;} accelerator_inline RealD trace( const RealD &arg){ return arg;}
template<class vtype,int N> template<class vtype,int N>
inline auto trace(const iMatrix<vtype,N> &arg) -> iScalar<decltype(trace(arg._internal[0][0]))> accelerator_inline auto trace(const iMatrix<vtype,N> &arg) -> iScalar<decltype(trace(arg._internal[0][0]))>
{ {
iScalar<decltype( trace(arg._internal[0][0] )) > ret; iScalar<decltype( trace(arg._internal[0][0] )) > ret;
zeroit(ret._internal); zeroit(ret._internal);
@ -53,7 +53,7 @@ inline auto trace(const iMatrix<vtype,N> &arg) -> iScalar<decltype(trace(arg._in
} }
template<class vtype> template<class vtype>
inline auto trace(const iScalar<vtype> &arg) -> iScalar<decltype(trace(arg._internal))> accelerator_inline auto trace(const iScalar<vtype> &arg) -> iScalar<decltype(trace(arg._internal))>
{ {
iScalar<decltype(trace(arg._internal))> ret; iScalar<decltype(trace(arg._internal))> ret;
ret._internal=trace(arg._internal); ret._internal=trace(arg._internal);
@ -61,7 +61,7 @@ inline auto trace(const iScalar<vtype> &arg) -> iScalar<decltype(trace(arg._inte
} }
template<class vtype,int N> template<class vtype,int N>
inline auto trace(const iVector<vtype,N> &arg) -> iVector<decltype(trace(arg._internal[0])),N> accelerator_inline auto trace(const iVector<vtype,N> &arg) -> iVector<decltype(trace(arg._internal[0])),N>
{ {
iVector<decltype(trace(arg._internal[0])),N> ret; iVector<decltype(trace(arg._internal[0])),N> ret;
for(int i=0;i<N;i++){ for(int i=0;i<N;i++){

View File

@ -34,13 +34,13 @@ NAMESPACE_BEGIN(Grid);
// Transpose all indices // Transpose all indices
///////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////
inline ComplexD transpose(ComplexD &rhs){ return rhs;} accelerator_inline ComplexD transpose(ComplexD &rhs){ return rhs;}
inline ComplexF transpose(ComplexF &rhs){ return rhs;} accelerator_inline ComplexF transpose(ComplexF &rhs){ return rhs;}
inline RealD transpose(RealD &rhs){ return rhs;} accelerator_inline RealD transpose(RealD &rhs){ return rhs;}
inline RealF transpose(RealF &rhs){ return rhs;} accelerator_inline RealF transpose(RealF &rhs){ return rhs;}
template<class vtype,int N> template<class vtype,int N>
inline typename std::enable_if<isGridTensor<vtype>::value, iMatrix<vtype,N> >::type accelerator_inline typename std::enable_if<isGridTensor<vtype>::value, iMatrix<vtype,N> >::type
transpose(iMatrix<vtype,N> arg) transpose(iMatrix<vtype,N> arg)
{ {
iMatrix<vtype,N> ret; iMatrix<vtype,N> ret;
@ -51,7 +51,7 @@ transpose(iMatrix<vtype,N> arg)
return ret; return ret;
} }
template<class vtype,int N> template<class vtype,int N>
inline typename std::enable_if<isGridTensor<vtype>::notvalue, iMatrix<vtype,N> >::type accelerator_inline typename std::enable_if<isGridTensor<vtype>::notvalue, iMatrix<vtype,N> >::type
transpose(iMatrix<vtype,N> arg) transpose(iMatrix<vtype,N> arg)
{ {
iMatrix<vtype,N> ret; iMatrix<vtype,N> ret;
@ -63,7 +63,7 @@ transpose(iMatrix<vtype,N> arg)
} }
template<class vtype> template<class vtype>
inline typename std::enable_if<isGridTensor<vtype>::value, iScalar<vtype> >::type accelerator_inline typename std::enable_if<isGridTensor<vtype>::value, iScalar<vtype> >::type
transpose(iScalar<vtype> arg) transpose(iScalar<vtype> arg)
{ {
iScalar<vtype> ret; iScalar<vtype> ret;
@ -72,7 +72,7 @@ transpose(iScalar<vtype> arg)
} }
template<class vtype> template<class vtype>
inline typename std::enable_if<isGridTensor<vtype>::notvalue, iScalar<vtype> >::type accelerator_inline typename std::enable_if<isGridTensor<vtype>::notvalue, iScalar<vtype> >::type
transpose(iScalar<vtype> arg) transpose(iScalar<vtype> arg)
{ {
iScalar<vtype> ret; iScalar<vtype> ret;
@ -86,7 +86,7 @@ transpose(iScalar<vtype> arg)
// to that of adj; which is easiers? // to that of adj; which is easiers?
//////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////
#if 0 #if 0
template<int Level,class vtype,int N> inline template<int Level,class vtype,int N> accelerator_inline
typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,Level>::value, iMatrix<vtype,N> >::type typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,Level>::value, iMatrix<vtype,N> >::type
transposeIndex (const iMatrix<vtype,N> &arg) transposeIndex (const iMatrix<vtype,N> &arg)
{ {
@ -98,7 +98,7 @@ transposeIndex (const iMatrix<vtype,N> &arg)
return ret; return ret;
} }
// or not // or not
template<int Level,class vtype,int N> inline template<int Level,class vtype,int N> accelerator_inline
typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,Level>::notvalue, iMatrix<vtype,N> >::type typename std::enable_if<matchGridTensorIndex<iMatrix<vtype,N>,Level>::notvalue, iMatrix<vtype,N> >::type
transposeIndex (const iMatrix<vtype,N> &arg) transposeIndex (const iMatrix<vtype,N> &arg)
{ {
@ -109,7 +109,7 @@ transposeIndex (const iMatrix<vtype,N> &arg)
}} }}
return ret; return ret;
} }
template<int Level,class vtype> inline template<int Level,class vtype> accelerator_inline
typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,Level>::notvalue, iScalar<vtype> >::type typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,Level>::notvalue, iScalar<vtype> >::type
transposeIndex (const iScalar<vtype> &arg) transposeIndex (const iScalar<vtype> &arg)
{ {
@ -117,7 +117,7 @@ transposeIndex (const iScalar<vtype> &arg)
ret._internal=transposeIndex<Level>(arg._internal); ret._internal=transposeIndex<Level>(arg._internal);
return ret; return ret;
} }
template<int Level,class vtype> inline template<int Level,class vtype> accelerator_inline
typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,Level>::value, iScalar<vtype> >::type typename std::enable_if<matchGridTensorIndex<iScalar<vtype>,Level>::value, iScalar<vtype> >::type
transposeIndex (const iScalar<vtype> &arg) transposeIndex (const iScalar<vtype> &arg)
{ {

View File

@ -33,13 +33,13 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
NAMESPACE_BEGIN(Grid); NAMESPACE_BEGIN(Grid);
#define UNARY(func) \ #define UNARY(func) \
template<class obj> inline auto func(const iScalar<obj> &z) -> iScalar<obj> \ template<class obj> accelerator_inline auto func(const iScalar<obj> &z) -> iScalar<obj> \
{ \ { \
iScalar<obj> ret; \ iScalar<obj> ret; \
ret._internal = func( (z._internal)); \ ret._internal = func( (z._internal)); \
return ret; \ return ret; \
} \ } \
template<class obj,int N> inline auto func(const iVector<obj,N> &z) -> iVector<obj,N> \ template<class obj,int N> accelerator_inline auto func(const iVector<obj,N> &z) -> iVector<obj,N> \
{ \ { \
iVector<obj,N> ret; \ iVector<obj,N> ret; \
for(int c1=0;c1<N;c1++){ \ for(int c1=0;c1<N;c1++){ \
@ -47,7 +47,7 @@ NAMESPACE_BEGIN(Grid);
} \ } \
return ret; \ return ret; \
} \ } \
template<class obj,int N> inline auto func(const iMatrix<obj,N> &z) -> iMatrix<obj,N> \ template<class obj,int N> accelerator_inline auto func(const iMatrix<obj,N> &z) -> iMatrix<obj,N> \
{ \ { \
iMatrix<obj,N> ret; \ iMatrix<obj,N> ret; \
for(int c1=0;c1<N;c1++){ \ for(int c1=0;c1<N;c1++){ \
@ -59,13 +59,13 @@ NAMESPACE_BEGIN(Grid);
#define BINARY_RSCALAR(func,scal) \ #define BINARY_RSCALAR(func,scal) \
template<class obj> inline iScalar<obj> func(const iScalar<obj> &z,scal y) \ template<class obj> accelerator_inline iScalar<obj> func(const iScalar<obj> &z,scal y) \
{ \ { \
iScalar<obj> ret; \ iScalar<obj> ret; \
ret._internal = func(z._internal,y); \ ret._internal = func(z._internal,y); \
return ret; \ return ret; \
} \ } \
template<class obj,int N> inline iVector<obj,N> func(const iVector<obj,N> &z,scal y) \ template<class obj,int N> accelerator_inline iVector<obj,N> func(const iVector<obj,N> &z,scal y) \
{ \ { \
iVector<obj,N> ret; \ iVector<obj,N> ret; \
for(int c1=0;c1<N;c1++){ \ for(int c1=0;c1<N;c1++){ \
@ -73,7 +73,7 @@ NAMESPACE_BEGIN(Grid);
} \ } \
return ret; \ return ret; \
} \ } \
template<class obj,int N> inline iMatrix<obj,N> func(const iMatrix<obj,N> &z, scal y) \ template<class obj,int N> accelerator_inline iMatrix<obj,N> func(const iMatrix<obj,N> &z, scal y) \
{ \ { \
iMatrix<obj,N> ret; \ iMatrix<obj,N> ret; \
for(int c1=0;c1<N;c1++){ \ for(int c1=0;c1<N;c1++){ \
@ -95,13 +95,13 @@ UNARY(abs);
UNARY(Not); UNARY(Not);
template<class obj> inline auto toReal(const iScalar<obj> &z) -> typename iScalar<obj>::Realified template<class obj> accelerator_inline auto toReal(const iScalar<obj> &z) -> typename iScalar<obj>::Realified
{ {
typename iScalar<obj>::Realified ret; typename iScalar<obj>::Realified ret;
ret._internal = toReal(z._internal); ret._internal = toReal(z._internal);
return ret; return ret;
} }
template<class obj,int N> inline auto toReal(const iVector<obj,N> &z) -> typename iVector<obj,N>::Realified template<class obj,int N> accelerator_inline auto toReal(const iVector<obj,N> &z) -> typename iVector<obj,N>::Realified
{ {
typename iVector<obj,N>::Realified ret; typename iVector<obj,N>::Realified ret;
for(int c1=0;c1<N;c1++){ for(int c1=0;c1<N;c1++){
@ -109,7 +109,7 @@ template<class obj,int N> inline auto toReal(const iVector<obj,N> &z) -> typenam
} }
return ret; return ret;
} }
template<class obj,int N> inline auto toReal(const iMatrix<obj,N> &z) -> typename iMatrix<obj,N>::Realified template<class obj,int N> accelerator_inline auto toReal(const iMatrix<obj,N> &z) -> typename iMatrix<obj,N>::Realified
{ {
typename iMatrix<obj,N>::Realified ret; typename iMatrix<obj,N>::Realified ret;
for(int c1=0;c1<N;c1++){ for(int c1=0;c1<N;c1++){
@ -119,13 +119,13 @@ template<class obj,int N> inline auto toReal(const iMatrix<obj,N> &z) -> typenam
return ret; return ret;
} }
template<class obj> inline auto toComplex(const iScalar<obj> &z) -> typename iScalar<obj>::Complexified template<class obj> accelerator_inline auto toComplex(const iScalar<obj> &z) -> typename iScalar<obj>::Complexified
{ {
typename iScalar<obj>::Complexified ret; typename iScalar<obj>::Complexified ret;
ret._internal = toComplex(z._internal); ret._internal = toComplex(z._internal);
return ret; return ret;
} }
template<class obj,int N> inline auto toComplex(const iVector<obj,N> &z) -> typename iVector<obj,N>::Complexified template<class obj,int N> accelerator_inline auto toComplex(const iVector<obj,N> &z) -> typename iVector<obj,N>::Complexified
{ {
typename iVector<obj,N>::Complexified ret; typename iVector<obj,N>::Complexified ret;
for(int c1=0;c1<N;c1++){ for(int c1=0;c1<N;c1++){
@ -133,7 +133,7 @@ template<class obj,int N> inline auto toComplex(const iVector<obj,N> &z) -> type
} }
return ret; return ret;
} }
template<class obj,int N> inline auto toComplex(const iMatrix<obj,N> &z) -> typename iMatrix<obj,N>::Complexified template<class obj,int N> accelerator_inline auto toComplex(const iMatrix<obj,N> &z) -> typename iMatrix<obj,N>::Complexified
{ {
typename iMatrix<obj,N>::Complexified ret; typename iMatrix<obj,N>::Complexified ret;
for(int c1=0;c1<N;c1++){ for(int c1=0;c1<N;c1++){