#ifndef GRID_LATTICE_ARITH_H #define GRID_LATTICE_ARITH_H namespace Grid { ////////////////////////////////////////////////////////////////////////////////////////////////////// // unary negation ////////////////////////////////////////////////////////////////////////////////////////////////////// template inline Lattice operator -(const Lattice &r) { Lattice ret(r._grid); #pragma omp parallel for for(int ss=0;ssoSites();ss++){ ret._odata[ss]= -r._odata[ss]; } return ret; } ////////////////////////////////////////////////////////////////////////////////////////////////////// // avoid copy back routines for mult, mac, sub, add ////////////////////////////////////////////////////////////////////////////////////////////////////// template void mult(Lattice &ret,const Lattice &lhs,const Lattice &rhs){ conformable(lhs,rhs); uint32_t vec_len = lhs._grid->oSites(); #pragma omp parallel for for(int ss=0;ss void mac(Lattice &ret,const Lattice &lhs,const Lattice &rhs){ conformable(lhs,rhs); uint32_t vec_len = lhs._grid->oSites(); #pragma omp parallel for for(int ss=0;ss void sub(Lattice &ret,const Lattice &lhs,const Lattice &rhs){ conformable(lhs,rhs); #pragma omp parallel for for(int ss=0;ssoSites();ss++){ sub(&ret._odata[ss],&lhs._odata[ss],&rhs._odata[ss]); } } template void add(Lattice &ret,const Lattice &lhs,const Lattice &rhs){ conformable(lhs,rhs); #pragma omp parallel for for(int ss=0;ssoSites();ss++){ add(&ret._odata[ss],&lhs._odata[ss],&rhs._odata[ss]); } } ////////////////////////////////////////////////////////////////////////////////////////////////////// // avoid copy back routines for mult, mac, sub, add ////////////////////////////////////////////////////////////////////////////////////////////////////// template void mult(Lattice &ret,const Lattice &lhs,const obj3 &rhs){ conformable(lhs,rhs); uint32_t vec_len = lhs._grid->oSites(); #pragma omp parallel for for(int ss=0;ss void mac(Lattice &ret,const Lattice &lhs,const obj3 &rhs){ conformable(lhs,rhs); uint32_t vec_len = lhs._grid->oSites(); #pragma omp parallel for for(int ss=0;ss void sub(Lattice &ret,const Lattice &lhs,const obj3 &rhs){ conformable(lhs,rhs); #pragma omp parallel for for(int ss=0;ssoSites();ss++){ sub(&ret._odata[ss],&lhs._odata[ss],&rhs); } } template void add(Lattice &ret,const Lattice &lhs,const obj3 &rhs){ conformable(lhs,rhs); #pragma omp parallel for for(int ss=0;ssoSites();ss++){ add(&ret._odata[ss],&lhs._odata[ss],&rhs); } } ////////////////////////////////////////////////////////////////////////////////////////////////////// // avoid copy back routines for mult, mac, sub, add ////////////////////////////////////////////////////////////////////////////////////////////////////// template void mult(Lattice &ret,const obj2 &lhs,const Lattice &rhs){ conformable(lhs,rhs); uint32_t vec_len = lhs._grid->oSites(); #pragma omp parallel for for(int ss=0;ss void mac(Lattice &ret,const obj2 &lhs,const Lattice &rhs){ conformable(lhs,rhs); uint32_t vec_len = lhs._grid->oSites(); #pragma omp parallel for for(int ss=0;ss void sub(Lattice &ret,const obj2 &lhs,const Lattice &rhs){ conformable(lhs,rhs); #pragma omp parallel for for(int ss=0;ssoSites();ss++){ sub(&ret._odata[ss],&lhs,&rhs._odata[ss]); } } template void add(Lattice &ret,const obj2 &lhs,const Lattice &rhs){ conformable(lhs,rhs); #pragma omp parallel for for(int ss=0;ssoSites();ss++){ add(&ret._odata[ss],&lhs,&rhs._odata[ss]); } } ///////////////////////////////////////////////////////////////////////////////////// // Lattice BinOp Lattice, ///////////////////////////////////////////////////////////////////////////////////// template inline auto operator * (const Lattice &lhs,const Lattice &rhs)-> Lattice { //NB mult performs conformable check. Do not reapply here for performance. Lattice ret(rhs._grid); mult(ret,lhs,rhs); return ret; } template inline auto operator + (const Lattice &lhs,const Lattice &rhs)-> Lattice { //NB mult performs conformable check. Do not reapply here for performance. Lattice ret(rhs._grid); add(ret,lhs,rhs); return ret; } template inline auto operator - (const Lattice &lhs,const Lattice &rhs)-> Lattice { //NB mult performs conformable check. Do not reapply here for performance. Lattice ret(rhs._grid); sub(ret,lhs,rhs); return ret; } // Scalar BinOp Lattice ;generate return type template inline auto operator * (const left &lhs,const Lattice &rhs) -> Lattice { Lattice ret(rhs._grid); #pragma omp parallel for for(int ss=0;ssoSites(); ss++){ ret._odata[ss]=lhs*rhs._odata[ss]; } return ret; } template inline auto operator + (const left &lhs,const Lattice &rhs) -> Lattice { Lattice ret(rhs._grid); #pragma omp parallel for for(int ss=0;ssoSites(); ss++){ ret._odata[ss]=lhs+rhs._odata[ss]; } return ret; } template inline auto operator - (const left &lhs,const Lattice &rhs) -> Lattice { Lattice ret(rhs._grid); #pragma omp parallel for for(int ss=0;ssoSites(); ss++){ ret._odata[ss]=lhs-rhs._odata[ss]; } return ret; } template inline auto operator * (const Lattice &lhs,const right &rhs) -> Lattice { Lattice ret(lhs._grid); #pragma omp parallel for for(int ss=0;ssoSites(); ss++){ ret._odata[ss]=lhs._odata[ss]*rhs; } return ret; } template inline auto operator + (const Lattice &lhs,const right &rhs) -> Lattice { Lattice ret(lhs._grid); #pragma omp parallel for for(int ss=0;ssoSites(); ss++){ ret._odata[ss]=lhs._odata[ss]+rhs; } return ret; } template inline auto operator - (const Lattice &lhs,const right &rhs) -> Lattice { Lattice ret(lhs._grid); #pragma omp parallel for for(int ss=0;ssoSites(); ss++){ ret._odata[ss]=lhs._odata[ss]-rhs; } return ret; } template inline void axpy(Lattice &ret,sobj a,const Lattice &lhs,const Lattice &rhs){ conformable(lhs,rhs); vobj tmp; #pragma omp parallel for for(int ss=0;ssoSites();ss++){ tmp = a*lhs._odata[ss]; ret._odata[ss]= tmp+rhs._odata[ss]; } } } #endif