mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-10 15:55:37 +00:00
Optimisation and syntax pretty
This commit is contained in:
parent
56667e9d32
commit
9f0e990b40
@ -62,7 +62,7 @@ public:
|
|||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
// Expression Template closure support
|
// Expression Template closure support
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
template <typename Op, typename T1> inline Lattice<vobj> & operator=(const LatticeUnaryExpression<Op,T1> &expr)
|
template <typename Op, typename T1> strong_inline Lattice<vobj> & operator=(const LatticeUnaryExpression<Op,T1> &expr)
|
||||||
{
|
{
|
||||||
PARALLEL_FOR_LOOP
|
PARALLEL_FOR_LOOP
|
||||||
for(int ss=0;ss<_grid->oSites();ss++){
|
for(int ss=0;ss<_grid->oSites();ss++){
|
||||||
@ -71,7 +71,7 @@ PARALLEL_FOR_LOOP
|
|||||||
}
|
}
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
template <typename Op, typename T1,typename T2> inline Lattice<vobj> & operator=(const LatticeBinaryExpression<Op,T1,T2> &expr)
|
template <typename Op, typename T1,typename T2> strong_inline Lattice<vobj> & operator=(const LatticeBinaryExpression<Op,T1,T2> &expr)
|
||||||
{
|
{
|
||||||
PARALLEL_FOR_LOOP
|
PARALLEL_FOR_LOOP
|
||||||
for(int ss=0;ss<_grid->oSites();ss++){
|
for(int ss=0;ss<_grid->oSites();ss++){
|
||||||
@ -80,7 +80,7 @@ PARALLEL_FOR_LOOP
|
|||||||
}
|
}
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
template <typename Op, typename T1,typename T2,typename T3> inline Lattice<vobj> & operator=(const LatticeTrinaryExpression<Op,T1,T2,T3> &expr)
|
template <typename Op, typename T1,typename T2,typename T3> strong_inline Lattice<vobj> & operator=(const LatticeTrinaryExpression<Op,T1,T2,T3> &expr)
|
||||||
{
|
{
|
||||||
PARALLEL_FOR_LOOP
|
PARALLEL_FOR_LOOP
|
||||||
for(int ss=0;ss<_grid->oSites();ss++){
|
for(int ss=0;ss<_grid->oSites();ss++){
|
||||||
@ -132,14 +132,14 @@ PARALLEL_FOR_LOOP
|
|||||||
checkerboard=0;
|
checkerboard=0;
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class sobj> inline Lattice<vobj> & operator = (const sobj & r){
|
template<class sobj> strong_inline Lattice<vobj> & operator = (const sobj & r){
|
||||||
PARALLEL_FOR_LOOP
|
PARALLEL_FOR_LOOP
|
||||||
for(int ss=0;ss<_grid->oSites();ss++){
|
for(int ss=0;ss<_grid->oSites();ss++){
|
||||||
this->_odata[ss]=r;
|
this->_odata[ss]=r;
|
||||||
}
|
}
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
template<class robj> inline Lattice<vobj> & operator = (const Lattice<robj> & r){
|
template<class robj> strong_inline Lattice<vobj> & operator = (const Lattice<robj> & r){
|
||||||
conformable(*this,r);
|
conformable(*this,r);
|
||||||
std::cout<<"Lattice operator ="<<std::endl;
|
std::cout<<"Lattice operator ="<<std::endl;
|
||||||
PARALLEL_FOR_LOOP
|
PARALLEL_FOR_LOOP
|
||||||
@ -150,21 +150,21 @@ PARALLEL_FOR_LOOP
|
|||||||
}
|
}
|
||||||
|
|
||||||
// *=,+=,-= operators inherit behvour from correspond */+/- operation
|
// *=,+=,-= operators inherit behvour from correspond */+/- operation
|
||||||
template<class T> inline Lattice<vobj> &operator *=(const T &r) {
|
template<class T> strong_inline Lattice<vobj> &operator *=(const T &r) {
|
||||||
*this = (*this)*r;
|
*this = (*this)*r;
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class T> inline Lattice<vobj> &operator -=(const T &r) {
|
template<class T> strong_inline Lattice<vobj> &operator -=(const T &r) {
|
||||||
*this = (*this)-r;
|
*this = (*this)-r;
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
template<class T> inline Lattice<vobj> &operator +=(const T &r) {
|
template<class T> strong_inline Lattice<vobj> &operator +=(const T &r) {
|
||||||
*this = (*this)+r;
|
*this = (*this)+r;
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline friend Lattice<vobj> operator / (const Lattice<vobj> &lhs,const Lattice<vobj> &rhs){
|
strong_inline friend Lattice<vobj> operator / (const Lattice<vobj> &lhs,const Lattice<vobj> &rhs){
|
||||||
conformable(lhs,rhs);
|
conformable(lhs,rhs);
|
||||||
Lattice<vobj> ret(lhs._grid);
|
Lattice<vobj> ret(lhs._grid);
|
||||||
PARALLEL_FOR_LOOP
|
PARALLEL_FOR_LOOP
|
||||||
@ -176,7 +176,7 @@ PARALLEL_FOR_LOOP
|
|||||||
|
|
||||||
}; // class Lattice
|
}; // class Lattice
|
||||||
|
|
||||||
template<class vobj> inline std::ostream& operator<< (std::ostream& stream, const Lattice<vobj> &o){
|
template<class vobj> std::ostream& operator<< (std::ostream& stream, const Lattice<vobj> &o){
|
||||||
std::vector<int> gcoor;
|
std::vector<int> gcoor;
|
||||||
typedef typename vobj::scalar_object sobj;
|
typedef typename vobj::scalar_object sobj;
|
||||||
sobj ss;
|
sobj ss;
|
||||||
|
@ -26,7 +26,7 @@ PARALLEL_FOR_LOOP
|
|||||||
// Trace Index level dependent operation
|
// Trace Index level dependent operation
|
||||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
template<int Index,class vobj>
|
template<int Index,class vobj>
|
||||||
inline auto traceIndex(const Lattice<vobj> &lhs) -> Lattice<decltype(traceIndex<Index>(lhs._odata[0]))>
|
inline auto latTraceIndex(const Lattice<vobj> &lhs) -> Lattice<decltype(traceIndex<Index>(lhs._odata[0]))>
|
||||||
{
|
{
|
||||||
Lattice<decltype(traceIndex<Index>(lhs._odata[0]))> ret(lhs._grid);
|
Lattice<decltype(traceIndex<Index>(lhs._odata[0]))> ret(lhs._grid);
|
||||||
PARALLEL_FOR_LOOP
|
PARALLEL_FOR_LOOP
|
||||||
|
@ -27,13 +27,14 @@ strong_inline void mac(iScalar<rtype> * __restrict__ ret,const iScalar<vtype> *
|
|||||||
}
|
}
|
||||||
template<class rrtype,class ltype,class rtype,int N>
|
template<class rrtype,class ltype,class rtype,int N>
|
||||||
strong_inline void mac(iMatrix<rrtype,N> * __restrict__ ret,const iMatrix<ltype,N> * __restrict__ lhs,const iMatrix<rtype,N> * __restrict__ rhs){
|
strong_inline void mac(iMatrix<rrtype,N> * __restrict__ ret,const iMatrix<ltype,N> * __restrict__ lhs,const iMatrix<rtype,N> * __restrict__ rhs){
|
||||||
for(int c2=0;c2<N;c2++){
|
|
||||||
for(int c1=0;c1<N;c1++){
|
|
||||||
for(int c3=0;c3<N;c3++){
|
for(int c3=0;c3<N;c3++){
|
||||||
|
for(int c1=0;c1<N;c1++){
|
||||||
|
for(int c2=0;c2<N;c2++){
|
||||||
mac(&ret->_internal[c1][c2],&lhs->_internal[c1][c3],&rhs->_internal[c3][c2]);
|
mac(&ret->_internal[c1][c2],&lhs->_internal[c1][c3],&rhs->_internal[c3][c2]);
|
||||||
}}}
|
}}}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class rrtype,class ltype,class rtype,int N>
|
template<class rrtype,class ltype,class rtype,int N>
|
||||||
strong_inline void mac(iMatrix<rrtype,N> * __restrict__ ret,const iMatrix<ltype,N> * __restrict__ lhs,const iScalar<rtype> * __restrict__ rhs){
|
strong_inline void mac(iMatrix<rrtype,N> * __restrict__ ret,const iMatrix<ltype,N> * __restrict__ lhs,const iScalar<rtype> * __restrict__ rhs){
|
||||||
for(int c1=0;c1<N;c1++){
|
for(int c1=0;c1<N;c1++){
|
||||||
|
@ -15,15 +15,22 @@ strong_inline void mult(iScalar<rtype> * __restrict__ ret,const iScalar<mtype> *
|
|||||||
|
|
||||||
template<class rrtype,class ltype,class rtype,int N>
|
template<class rrtype,class ltype,class rtype,int N>
|
||||||
strong_inline void mult(iMatrix<rrtype,N> * __restrict__ ret,const iMatrix<ltype,N> * __restrict__ lhs,const iMatrix<rtype,N> * __restrict__ rhs){
|
strong_inline void mult(iMatrix<rrtype,N> * __restrict__ ret,const iMatrix<ltype,N> * __restrict__ lhs,const iMatrix<rtype,N> * __restrict__ rhs){
|
||||||
for(int c2=0;c2<N;c2++){
|
|
||||||
for(int c1=0;c1<N;c1++){
|
for(int c1=0;c1<N;c1++){
|
||||||
mult(&ret->_internal[c1][c2],&lhs->_internal[c1][0],&rhs->_internal[0][c2]);
|
int c3=0;
|
||||||
|
for(int c2=0;c2<N;c2++){
|
||||||
|
mult(&ret->_internal[c1][c2],&lhs->_internal[c1][c3],&rhs->_internal[c3][c2]);
|
||||||
|
}
|
||||||
|
}
|
||||||
for(int c3=1;c3<N;c3++){
|
for(int c3=1;c3<N;c3++){
|
||||||
|
for(int c1=0;c1<N;c1++){
|
||||||
|
for(int c2=0;c2<N;c2++){
|
||||||
mac(&ret->_internal[c1][c2],&lhs->_internal[c1][c3],&rhs->_internal[c3][c2]);
|
mac(&ret->_internal[c1][c2],&lhs->_internal[c1][c3],&rhs->_internal[c3][c2]);
|
||||||
}
|
}
|
||||||
}}
|
}
|
||||||
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class rrtype,class ltype,class rtype,int N>
|
template<class rrtype,class ltype,class rtype,int N>
|
||||||
strong_inline void mult(iMatrix<rrtype,N> * __restrict__ ret,const iMatrix<ltype,N> * __restrict__ lhs,const iScalar<rtype> * __restrict__ rhs){
|
strong_inline void mult(iMatrix<rrtype,N> * __restrict__ ret,const iMatrix<ltype,N> * __restrict__ lhs,const iScalar<rtype> * __restrict__ rhs){
|
||||||
for(int c2=0;c2<N;c2++){
|
for(int c2=0;c2<N;c2++){
|
||||||
|
Loading…
Reference in New Issue
Block a user