1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-09-20 17:25:37 +01:00

Optimisation and syntax pretty

This commit is contained in:
Peter Boyle 2015-05-16 04:36:22 +01:00
parent 56667e9d32
commit 9f0e990b40
4 changed files with 26 additions and 18 deletions

View File

@ -62,7 +62,7 @@ public:
////////////////////////////////////////////////////////////////////////////////
// Expression Template closure support
////////////////////////////////////////////////////////////////////////////////
template <typename Op, typename T1> inline Lattice<vobj> & operator=(const LatticeUnaryExpression<Op,T1> &expr)
template <typename Op, typename T1> strong_inline Lattice<vobj> & operator=(const LatticeUnaryExpression<Op,T1> &expr)
{
PARALLEL_FOR_LOOP
for(int ss=0;ss<_grid->oSites();ss++){
@ -71,7 +71,7 @@ PARALLEL_FOR_LOOP
}
return *this;
}
template <typename Op, typename T1,typename T2> inline Lattice<vobj> & operator=(const LatticeBinaryExpression<Op,T1,T2> &expr)
template <typename Op, typename T1,typename T2> strong_inline Lattice<vobj> & operator=(const LatticeBinaryExpression<Op,T1,T2> &expr)
{
PARALLEL_FOR_LOOP
for(int ss=0;ss<_grid->oSites();ss++){
@ -80,7 +80,7 @@ PARALLEL_FOR_LOOP
}
return *this;
}
template <typename Op, typename T1,typename T2,typename T3> inline Lattice<vobj> & operator=(const LatticeTrinaryExpression<Op,T1,T2,T3> &expr)
template <typename Op, typename T1,typename T2,typename T3> strong_inline Lattice<vobj> & operator=(const LatticeTrinaryExpression<Op,T1,T2,T3> &expr)
{
PARALLEL_FOR_LOOP
for(int ss=0;ss<_grid->oSites();ss++){
@ -132,14 +132,14 @@ PARALLEL_FOR_LOOP
checkerboard=0;
}
template<class sobj> inline Lattice<vobj> & operator = (const sobj & r){
template<class sobj> strong_inline Lattice<vobj> & operator = (const sobj & r){
PARALLEL_FOR_LOOP
for(int ss=0;ss<_grid->oSites();ss++){
this->_odata[ss]=r;
}
return *this;
}
template<class robj> inline Lattice<vobj> & operator = (const Lattice<robj> & r){
template<class robj> strong_inline Lattice<vobj> & operator = (const Lattice<robj> & r){
conformable(*this,r);
std::cout<<"Lattice operator ="<<std::endl;
PARALLEL_FOR_LOOP
@ -150,21 +150,21 @@ PARALLEL_FOR_LOOP
}
// *=,+=,-= operators inherit behvour from correspond */+/- operation
template<class T> inline Lattice<vobj> &operator *=(const T &r) {
template<class T> strong_inline Lattice<vobj> &operator *=(const T &r) {
*this = (*this)*r;
return *this;
}
template<class T> inline Lattice<vobj> &operator -=(const T &r) {
template<class T> strong_inline Lattice<vobj> &operator -=(const T &r) {
*this = (*this)-r;
return *this;
}
template<class T> inline Lattice<vobj> &operator +=(const T &r) {
template<class T> strong_inline Lattice<vobj> &operator +=(const T &r) {
*this = (*this)+r;
return *this;
}
inline friend Lattice<vobj> operator / (const Lattice<vobj> &lhs,const Lattice<vobj> &rhs){
strong_inline friend Lattice<vobj> operator / (const Lattice<vobj> &lhs,const Lattice<vobj> &rhs){
conformable(lhs,rhs);
Lattice<vobj> ret(lhs._grid);
PARALLEL_FOR_LOOP
@ -176,7 +176,7 @@ PARALLEL_FOR_LOOP
}; // class Lattice
template<class vobj> inline std::ostream& operator<< (std::ostream& stream, const Lattice<vobj> &o){
template<class vobj> std::ostream& operator<< (std::ostream& stream, const Lattice<vobj> &o){
std::vector<int> gcoor;
typedef typename vobj::scalar_object sobj;
sobj ss;

View File

@ -26,7 +26,7 @@ PARALLEL_FOR_LOOP
// Trace Index level dependent operation
////////////////////////////////////////////////////////////////////////////////////////////////////
template<int Index,class vobj>
inline auto traceIndex(const Lattice<vobj> &lhs) -> Lattice<decltype(traceIndex<Index>(lhs._odata[0]))>
inline auto latTraceIndex(const Lattice<vobj> &lhs) -> Lattice<decltype(traceIndex<Index>(lhs._odata[0]))>
{
Lattice<decltype(traceIndex<Index>(lhs._odata[0]))> ret(lhs._grid);
PARALLEL_FOR_LOOP

View File

@ -27,13 +27,14 @@ strong_inline void mac(iScalar<rtype> * __restrict__ ret,const iScalar<vtype> *
}
template<class rrtype,class ltype,class rtype,int N>
strong_inline void mac(iMatrix<rrtype,N> * __restrict__ ret,const iMatrix<ltype,N> * __restrict__ lhs,const iMatrix<rtype,N> * __restrict__ rhs){
for(int c2=0;c2<N;c2++){
for(int c1=0;c1<N;c1++){
for(int c3=0;c3<N;c3++){
for(int c1=0;c1<N;c1++){
for(int c2=0;c2<N;c2++){
mac(&ret->_internal[c1][c2],&lhs->_internal[c1][c3],&rhs->_internal[c3][c2]);
}}}
return;
}
template<class rrtype,class ltype,class rtype,int N>
strong_inline void mac(iMatrix<rrtype,N> * __restrict__ ret,const iMatrix<ltype,N> * __restrict__ lhs,const iScalar<rtype> * __restrict__ rhs){
for(int c1=0;c1<N;c1++){

View File

@ -15,15 +15,22 @@ strong_inline void mult(iScalar<rtype> * __restrict__ ret,const iScalar<mtype> *
template<class rrtype,class ltype,class rtype,int N>
strong_inline void mult(iMatrix<rrtype,N> * __restrict__ ret,const iMatrix<ltype,N> * __restrict__ lhs,const iMatrix<rtype,N> * __restrict__ rhs){
for(int c2=0;c2<N;c2++){
for(int c1=0;c1<N;c1++){
mult(&ret->_internal[c1][c2],&lhs->_internal[c1][0],&rhs->_internal[0][c2]);
int c3=0;
for(int c2=0;c2<N;c2++){
mult(&ret->_internal[c1][c2],&lhs->_internal[c1][c3],&rhs->_internal[c3][c2]);
}
}
for(int c3=1;c3<N;c3++){
for(int c1=0;c1<N;c1++){
for(int c2=0;c2<N;c2++){
mac(&ret->_internal[c1][c2],&lhs->_internal[c1][c3],&rhs->_internal[c3][c2]);
}
}}
}
}
return;
}
template<class rrtype,class ltype,class rtype,int N>
strong_inline void mult(iMatrix<rrtype,N> * __restrict__ ret,const iMatrix<ltype,N> * __restrict__ lhs,const iScalar<rtype> * __restrict__ rhs){
for(int c2=0;c2<N;c2++){