1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-11-10 15:55:37 +00:00

Optimisation and syntax pretty

This commit is contained in:
Peter Boyle 2015-05-16 04:36:22 +01:00
parent 56667e9d32
commit 9f0e990b40
4 changed files with 26 additions and 18 deletions

View File

@ -62,7 +62,7 @@ public:
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// Expression Template closure support // Expression Template closure support
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
template <typename Op, typename T1> inline Lattice<vobj> & operator=(const LatticeUnaryExpression<Op,T1> &expr) template <typename Op, typename T1> strong_inline Lattice<vobj> & operator=(const LatticeUnaryExpression<Op,T1> &expr)
{ {
PARALLEL_FOR_LOOP PARALLEL_FOR_LOOP
for(int ss=0;ss<_grid->oSites();ss++){ for(int ss=0;ss<_grid->oSites();ss++){
@ -71,7 +71,7 @@ PARALLEL_FOR_LOOP
} }
return *this; return *this;
} }
template <typename Op, typename T1,typename T2> inline Lattice<vobj> & operator=(const LatticeBinaryExpression<Op,T1,T2> &expr) template <typename Op, typename T1,typename T2> strong_inline Lattice<vobj> & operator=(const LatticeBinaryExpression<Op,T1,T2> &expr)
{ {
PARALLEL_FOR_LOOP PARALLEL_FOR_LOOP
for(int ss=0;ss<_grid->oSites();ss++){ for(int ss=0;ss<_grid->oSites();ss++){
@ -80,7 +80,7 @@ PARALLEL_FOR_LOOP
} }
return *this; return *this;
} }
template <typename Op, typename T1,typename T2,typename T3> inline Lattice<vobj> & operator=(const LatticeTrinaryExpression<Op,T1,T2,T3> &expr) template <typename Op, typename T1,typename T2,typename T3> strong_inline Lattice<vobj> & operator=(const LatticeTrinaryExpression<Op,T1,T2,T3> &expr)
{ {
PARALLEL_FOR_LOOP PARALLEL_FOR_LOOP
for(int ss=0;ss<_grid->oSites();ss++){ for(int ss=0;ss<_grid->oSites();ss++){
@ -132,14 +132,14 @@ PARALLEL_FOR_LOOP
checkerboard=0; checkerboard=0;
} }
template<class sobj> inline Lattice<vobj> & operator = (const sobj & r){ template<class sobj> strong_inline Lattice<vobj> & operator = (const sobj & r){
PARALLEL_FOR_LOOP PARALLEL_FOR_LOOP
for(int ss=0;ss<_grid->oSites();ss++){ for(int ss=0;ss<_grid->oSites();ss++){
this->_odata[ss]=r; this->_odata[ss]=r;
} }
return *this; return *this;
} }
template<class robj> inline Lattice<vobj> & operator = (const Lattice<robj> & r){ template<class robj> strong_inline Lattice<vobj> & operator = (const Lattice<robj> & r){
conformable(*this,r); conformable(*this,r);
std::cout<<"Lattice operator ="<<std::endl; std::cout<<"Lattice operator ="<<std::endl;
PARALLEL_FOR_LOOP PARALLEL_FOR_LOOP
@ -150,21 +150,21 @@ PARALLEL_FOR_LOOP
} }
// *=,+=,-= operators inherit behvour from correspond */+/- operation // *=,+=,-= operators inherit behvour from correspond */+/- operation
template<class T> inline Lattice<vobj> &operator *=(const T &r) { template<class T> strong_inline Lattice<vobj> &operator *=(const T &r) {
*this = (*this)*r; *this = (*this)*r;
return *this; return *this;
} }
template<class T> inline Lattice<vobj> &operator -=(const T &r) { template<class T> strong_inline Lattice<vobj> &operator -=(const T &r) {
*this = (*this)-r; *this = (*this)-r;
return *this; return *this;
} }
template<class T> inline Lattice<vobj> &operator +=(const T &r) { template<class T> strong_inline Lattice<vobj> &operator +=(const T &r) {
*this = (*this)+r; *this = (*this)+r;
return *this; return *this;
} }
inline friend Lattice<vobj> operator / (const Lattice<vobj> &lhs,const Lattice<vobj> &rhs){ strong_inline friend Lattice<vobj> operator / (const Lattice<vobj> &lhs,const Lattice<vobj> &rhs){
conformable(lhs,rhs); conformable(lhs,rhs);
Lattice<vobj> ret(lhs._grid); Lattice<vobj> ret(lhs._grid);
PARALLEL_FOR_LOOP PARALLEL_FOR_LOOP
@ -176,7 +176,7 @@ PARALLEL_FOR_LOOP
}; // class Lattice }; // class Lattice
template<class vobj> inline std::ostream& operator<< (std::ostream& stream, const Lattice<vobj> &o){ template<class vobj> std::ostream& operator<< (std::ostream& stream, const Lattice<vobj> &o){
std::vector<int> gcoor; std::vector<int> gcoor;
typedef typename vobj::scalar_object sobj; typedef typename vobj::scalar_object sobj;
sobj ss; sobj ss;

View File

@ -26,7 +26,7 @@ PARALLEL_FOR_LOOP
// Trace Index level dependent operation // Trace Index level dependent operation
//////////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////
template<int Index,class vobj> template<int Index,class vobj>
inline auto traceIndex(const Lattice<vobj> &lhs) -> Lattice<decltype(traceIndex<Index>(lhs._odata[0]))> inline auto latTraceIndex(const Lattice<vobj> &lhs) -> Lattice<decltype(traceIndex<Index>(lhs._odata[0]))>
{ {
Lattice<decltype(traceIndex<Index>(lhs._odata[0]))> ret(lhs._grid); Lattice<decltype(traceIndex<Index>(lhs._odata[0]))> ret(lhs._grid);
PARALLEL_FOR_LOOP PARALLEL_FOR_LOOP

View File

@ -27,13 +27,14 @@ strong_inline void mac(iScalar<rtype> * __restrict__ ret,const iScalar<vtype> *
} }
template<class rrtype,class ltype,class rtype,int N> template<class rrtype,class ltype,class rtype,int N>
strong_inline void mac(iMatrix<rrtype,N> * __restrict__ ret,const iMatrix<ltype,N> * __restrict__ lhs,const iMatrix<rtype,N> * __restrict__ rhs){ strong_inline void mac(iMatrix<rrtype,N> * __restrict__ ret,const iMatrix<ltype,N> * __restrict__ lhs,const iMatrix<rtype,N> * __restrict__ rhs){
for(int c2=0;c2<N;c2++){
for(int c1=0;c1<N;c1++){
for(int c3=0;c3<N;c3++){ for(int c3=0;c3<N;c3++){
for(int c1=0;c1<N;c1++){
for(int c2=0;c2<N;c2++){
mac(&ret->_internal[c1][c2],&lhs->_internal[c1][c3],&rhs->_internal[c3][c2]); mac(&ret->_internal[c1][c2],&lhs->_internal[c1][c3],&rhs->_internal[c3][c2]);
}}} }}}
return; return;
} }
template<class rrtype,class ltype,class rtype,int N> template<class rrtype,class ltype,class rtype,int N>
strong_inline void mac(iMatrix<rrtype,N> * __restrict__ ret,const iMatrix<ltype,N> * __restrict__ lhs,const iScalar<rtype> * __restrict__ rhs){ strong_inline void mac(iMatrix<rrtype,N> * __restrict__ ret,const iMatrix<ltype,N> * __restrict__ lhs,const iScalar<rtype> * __restrict__ rhs){
for(int c1=0;c1<N;c1++){ for(int c1=0;c1<N;c1++){

View File

@ -15,15 +15,22 @@ strong_inline void mult(iScalar<rtype> * __restrict__ ret,const iScalar<mtype> *
template<class rrtype,class ltype,class rtype,int N> template<class rrtype,class ltype,class rtype,int N>
strong_inline void mult(iMatrix<rrtype,N> * __restrict__ ret,const iMatrix<ltype,N> * __restrict__ lhs,const iMatrix<rtype,N> * __restrict__ rhs){ strong_inline void mult(iMatrix<rrtype,N> * __restrict__ ret,const iMatrix<ltype,N> * __restrict__ lhs,const iMatrix<rtype,N> * __restrict__ rhs){
for(int c2=0;c2<N;c2++){
for(int c1=0;c1<N;c1++){ for(int c1=0;c1<N;c1++){
mult(&ret->_internal[c1][c2],&lhs->_internal[c1][0],&rhs->_internal[0][c2]); int c3=0;
for(int c2=0;c2<N;c2++){
mult(&ret->_internal[c1][c2],&lhs->_internal[c1][c3],&rhs->_internal[c3][c2]);
}
}
for(int c3=1;c3<N;c3++){ for(int c3=1;c3<N;c3++){
for(int c1=0;c1<N;c1++){
for(int c2=0;c2<N;c2++){
mac(&ret->_internal[c1][c2],&lhs->_internal[c1][c3],&rhs->_internal[c3][c2]); mac(&ret->_internal[c1][c2],&lhs->_internal[c1][c3],&rhs->_internal[c3][c2]);
} }
}} }
}
return; return;
} }
template<class rrtype,class ltype,class rtype,int N> template<class rrtype,class ltype,class rtype,int N>
strong_inline void mult(iMatrix<rrtype,N> * __restrict__ ret,const iMatrix<ltype,N> * __restrict__ lhs,const iScalar<rtype> * __restrict__ rhs){ strong_inline void mult(iMatrix<rrtype,N> * __restrict__ ret,const iMatrix<ltype,N> * __restrict__ lhs,const iScalar<rtype> * __restrict__ rhs){
for(int c2=0;c2<N;c2++){ for(int c2=0;c2<N;c2++){