1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-06-13 20:57:06 +01:00

Optimisation and syntax pretty

This commit is contained in:
Peter Boyle
2015-05-16 04:36:22 +01:00
parent 25bfa7e830
commit 5f8b82b90c
4 changed files with 26 additions and 18 deletions

View File

@ -27,13 +27,14 @@ strong_inline void mac(iScalar<rtype> * __restrict__ ret,const iScalar<vtype> *
}
template<class rrtype,class ltype,class rtype,int N>
strong_inline void mac(iMatrix<rrtype,N> * __restrict__ ret,const iMatrix<ltype,N> * __restrict__ lhs,const iMatrix<rtype,N> * __restrict__ rhs){
for(int c2=0;c2<N;c2++){
for(int c1=0;c1<N;c1++){
for(int c3=0;c3<N;c3++){
for(int c1=0;c1<N;c1++){
for(int c2=0;c2<N;c2++){
mac(&ret->_internal[c1][c2],&lhs->_internal[c1][c3],&rhs->_internal[c3][c2]);
}}}
return;
}
template<class rrtype,class ltype,class rtype,int N>
strong_inline void mac(iMatrix<rrtype,N> * __restrict__ ret,const iMatrix<ltype,N> * __restrict__ lhs,const iScalar<rtype> * __restrict__ rhs){
for(int c1=0;c1<N;c1++){

View File

@ -15,15 +15,22 @@ strong_inline void mult(iScalar<rtype> * __restrict__ ret,const iScalar<mtype> *
template<class rrtype,class ltype,class rtype,int N>
strong_inline void mult(iMatrix<rrtype,N> * __restrict__ ret,const iMatrix<ltype,N> * __restrict__ lhs,const iMatrix<rtype,N> * __restrict__ rhs){
for(int c1=0;c1<N;c1++){
int c3=0;
for(int c2=0;c2<N;c2++){
mult(&ret->_internal[c1][c2],&lhs->_internal[c1][c3],&rhs->_internal[c3][c2]);
}
}
for(int c3=1;c3<N;c3++){
for(int c1=0;c1<N;c1++){
mult(&ret->_internal[c1][c2],&lhs->_internal[c1][0],&rhs->_internal[0][c2]);
for(int c3=1;c3<N;c3++){
mac(&ret->_internal[c1][c2],&lhs->_internal[c1][c3],&rhs->_internal[c3][c2]);
}
}}
for(int c2=0;c2<N;c2++){
mac(&ret->_internal[c1][c2],&lhs->_internal[c1][c3],&rhs->_internal[c3][c2]);
}
}
}
return;
}
template<class rrtype,class ltype,class rtype,int N>
strong_inline void mult(iMatrix<rrtype,N> * __restrict__ ret,const iMatrix<ltype,N> * __restrict__ lhs,const iScalar<rtype> * __restrict__ rhs){
for(int c2=0;c2<N;c2++){