#ifndef GRID_LATTICE_ARITH_H #define GRID_LATTICE_ARITH_H namespace Grid { ////////////////////////////////////////////////////////////////////////////////////////////////////// // unary negation ////////////////////////////////////////////////////////////////////////////////////////////////////// template inline Lattice operator -(const Lattice &r) { Lattice ret(r._grid); #pragma omp parallel for for(int ss=0;ssoSites();ss++){ vstream(ret._odata[ss], -r._odata[ss]); } return ret; } ////////////////////////////////////////////////////////////////////////////////////////////////////// // avoid copy back routines for mult, mac, sub, add ////////////////////////////////////////////////////////////////////////////////////////////////////// template void mult(Lattice &ret,const Lattice &lhs,const Lattice &rhs){ conformable(lhs,rhs); #pragma omp parallel for for(int ss=0;ssoSites();ss++){ obj1 tmp; mult(&tmp,&lhs._odata[ss],&rhs._odata[ss]); vstream(ret._odata[ss],tmp); } } template void mac(Lattice &ret,const Lattice &lhs,const Lattice &rhs){ conformable(lhs,rhs); #pragma omp parallel for for(int ss=0;ssoSites();ss++){ obj1 tmp; mac(&tmp,&lhs._odata[ss],&rhs._odata[ss]); vstream(ret._odata[ss],tmp); } } template void sub(Lattice &ret,const Lattice &lhs,const Lattice &rhs){ conformable(lhs,rhs); #pragma omp parallel for for(int ss=0;ssoSites();ss++){ obj1 tmp; sub(&tmp,&lhs._odata[ss],&rhs._odata[ss]); vstream(ret._odata[ss],tmp); } } template void add(Lattice &ret,const Lattice &lhs,const Lattice &rhs){ conformable(lhs,rhs); #pragma omp parallel for for(int ss=0;ssoSites();ss++){ obj1 tmp; add(&tmp,&lhs._odata[ss],&rhs._odata[ss]); vstream(ret._odata[ss],tmp); } } ////////////////////////////////////////////////////////////////////////////////////////////////////// // avoid copy back routines for mult, mac, sub, add ////////////////////////////////////////////////////////////////////////////////////////////////////// template void mult(Lattice &ret,const Lattice &lhs,const obj3 &rhs){ conformable(lhs,ret); #pragma omp parallel for for(int ss=0;ssoSites();ss++){ obj1 tmp; mult(&tmp,&lhs._odata[ss],&rhs); vstream(ret._odata[ss],tmp); } } template void mac(Lattice &ret,const Lattice &lhs,const obj3 &rhs){ conformable(lhs,ret); #pragma omp parallel for for(int ss=0;ssoSites();ss++){ obj1 tmp; mac(&tmp,&lhs._odata[ss],&rhs); vstream(ret._odata[ss],tmp); } } template void sub(Lattice &ret,const Lattice &lhs,const obj3 &rhs){ conformable(lhs,ret); #pragma omp parallel for for(int ss=0;ssoSites();ss++){ obj1 tmp; sub(&tmp,&lhs._odata[ss],&rhs); vstream(ret._odata[ss],tmp); } } template void add(Lattice &ret,const Lattice &lhs,const obj3 &rhs){ conformable(lhs,ret); #pragma omp parallel for for(int ss=0;ssoSites();ss++){ obj1 tmp; add(&tmp,&lhs._odata[ss],&rhs); vstream(ret._odata[ss],tmp); } } ////////////////////////////////////////////////////////////////////////////////////////////////////// // avoid copy back routines for mult, mac, sub, add ////////////////////////////////////////////////////////////////////////////////////////////////////// template void mult(Lattice &ret,const obj2 &lhs,const Lattice &rhs){ conformable(ret,rhs); #pragma omp parallel for for(int ss=0;ssoSites();ss++){ obj1 tmp; mult(&tmp,&lhs,&rhs._odata[ss]); vstream(ret._odata[ss],tmp); } } template void mac(Lattice &ret,const obj2 &lhs,const Lattice &rhs){ conformable(ret,rhs); #pragma omp parallel for for(int ss=0;ssoSites();ss++){ obj1 tmp; mac(&tmp,&lhs,&rhs._odata[ss]); vstream(ret._odata[ss],tmp); } } template void sub(Lattice &ret,const obj2 &lhs,const Lattice &rhs){ conformable(ret,rhs); #pragma omp parallel for for(int ss=0;ssoSites();ss++){ obj1 tmp; sub(&tmp,&lhs,&rhs._odata[ss]); vstream(ret._odata[ss],tmp); } } template void add(Lattice &ret,const obj2 &lhs,const Lattice &rhs){ conformable(ret,rhs); #pragma omp parallel for for(int ss=0;ssoSites();ss++){ obj1 tmp; add(&tmp,&lhs,&rhs._odata[ss]); vstream(ret._odata[ss],tmp); } } ///////////////////////////////////////////////////////////////////////////////////// // Lattice BinOp Lattice, //NB mult performs conformable check. Do not reapply here for performance. ///////////////////////////////////////////////////////////////////////////////////// template inline auto operator * (const Lattice &lhs,const Lattice &rhs)-> Lattice { Lattice ret(rhs._grid); mult(ret,lhs,rhs); return ret; } template inline auto operator + (const Lattice &lhs,const Lattice &rhs)-> Lattice { Lattice ret(rhs._grid); add(ret,lhs,rhs); return ret; } template inline auto operator - (const Lattice &lhs,const Lattice &rhs)-> Lattice { Lattice ret(rhs._grid); sub(ret,lhs,rhs); return ret; } // Scalar BinOp Lattice ;generate return type template inline auto operator * (const left &lhs,const Lattice &rhs) -> Lattice { Lattice ret(rhs._grid); #pragma omp parallel for for(int ss=0;ssoSites(); ss++){ decltype(lhs*rhs._odata[0]) tmp=lhs*rhs._odata[ss]; vstream(ret._odata[ss],tmp); // ret._odata[ss]=lhs*rhs._odata[ss]; } return ret; } template inline auto operator + (const left &lhs,const Lattice &rhs) -> Lattice { Lattice ret(rhs._grid); #pragma omp parallel for for(int ss=0;ssoSites(); ss++){ decltype(lhs+rhs._odata[0]) tmp =lhs-rhs._odata[ss]; vstream(ret._odata[ss],tmp); // ret._odata[ss]=lhs+rhs._odata[ss]; } return ret; } template inline auto operator - (const left &lhs,const Lattice &rhs) -> Lattice { Lattice ret(rhs._grid); #pragma omp parallel for for(int ss=0;ssoSites(); ss++){ decltype(lhs-rhs._odata[0]) tmp=lhs-rhs._odata[ss]; vstream(ret._odata[ss],tmp); // ret._odata[ss]=lhs-rhs._odata[ss]; } return ret; } template inline auto operator * (const Lattice &lhs,const right &rhs) -> Lattice { Lattice ret(lhs._grid); #pragma omp parallel for for(int ss=0;ssoSites(); ss++){ decltype(lhs._odata[0]*rhs) tmp =lhs._odata[ss]*rhs; vstream(ret._odata[ss],tmp); // ret._odata[ss]=lhs._odata[ss]*rhs; } return ret; } template inline auto operator + (const Lattice &lhs,const right &rhs) -> Lattice { Lattice ret(lhs._grid); #pragma omp parallel for for(int ss=0;ssoSites(); ss++){ decltype(lhs._odata[0]+rhs) tmp=lhs._odata[ss]+rhs; vstream(ret._odata[ss],tmp); // ret._odata[ss]=lhs._odata[ss]+rhs; } return ret; } template inline auto operator - (const Lattice &lhs,const right &rhs) -> Lattice { Lattice ret(lhs._grid); #pragma omp parallel for for(int ss=0;ssoSites(); ss++){ decltype(lhs._odata[0]-rhs) tmp=lhs._odata[ss]-rhs; vstream(ret._odata[ss],tmp); // ret._odata[ss]=lhs._odata[ss]-rhs; } return ret; } template inline void axpy(Lattice &ret,sobj a,const Lattice &lhs,const Lattice &rhs){ conformable(lhs,rhs); #pragma omp parallel for for(int ss=0;ssoSites();ss++){ vobj tmp = a*lhs._odata[ss]; vstream(ret._odata[ss],tmp+rhs._odata[ss]); } } } #endif