#ifndef GRID_LATTICE_H #define GRID_LATTICE_H #include "Grid.h" namespace Grid { extern int GridCshiftPermuteMap[4][16]; template class Lattice { public: GridBase *_grid; int checkerboard; std::vector > _odata; typedef typename vobj::scalar_type scalar_type; typedef typename vobj::vector_type vector_type; public: Lattice(GridBase *grid) : _grid(grid) { _odata.reserve(_grid->oSites()); assert((((uint64_t)&_odata[0])&0xF) ==0); checkerboard=0; } #include template friend void conformable(const Lattice &lhs,const Lattice &rhs); // FIXME Performance difference between operator * and mult is troubling. // Auto move constructor seems to lose surprisingly much. // Site wise binary operations // We eliminate a temporary object assignment if use the mult,add,sub routines. // For the operator versions we rely on move constructor to eliminate the // vector copy back. template friend void mult(Lattice &ret,const Lattice &lhs,const Lattice &rhs); template friend void sub(Lattice &ret,const Lattice &lhs,const Lattice &rhs); template friend void add(Lattice &ret,const Lattice &lhs,const Lattice &rhs); friend void axpy(Lattice &ret,double a,const Lattice &lhs,const Lattice &rhs){ conformable(lhs,rhs); #pragma omp parallel for for(int ss=0;ssoSites();ss++){ axpy(&ret._odata[ss],a,&lhs._odata[ss],&rhs._odata[ss]); } } friend void axpy(Lattice &ret,std::complex a,const Lattice &lhs,const Lattice &rhs){ conformable(lhs,rhs); #pragma omp parallel for for(int ss=0;ssoSites();ss++){ axpy(&ret._odata[ss],a,&lhs._odata[ss],&rhs._odata[ss]); } } inline friend Lattice operator / (const Lattice &lhs,const Lattice &rhs){ conformable(lhs,rhs); Lattice ret(lhs._grid); #pragma omp parallel for for(int ss=0;ssoSites();ss++){ ret._odata[ss] = lhs._odata[ss]/rhs._odata[ss]; } return ret; }; template inline Lattice & operator = (const sobj & r){ #pragma omp parallel for for(int ss=0;ss<_grid->oSites();ss++){ this->_odata[ss]=r; } return *this; } // Poke a scalar object into the SIMD array template friend void pokeSite(const sobj &s,Lattice &l,std::vector &site){ GridBase *grid=l._grid; typedef typename vobj::scalar_type scalar_type; typedef typename vobj::vector_type vector_type; int Nsimd = grid->Nsimd(); assert( l.checkerboard== l._grid->CheckerBoard(site)); assert( sizeof(sobj)*Nsimd == sizeof(vobj)); int rank,odx,idx; grid->GlobalCoorToRankIndex(rank,odx,idx,site); // Optional to broadcast from node 0. grid->Broadcast(0,s); std::vector buf(Nsimd); std::vector pointers(Nsimd); for(int i=0;i friend void peekSite(sobj &s,Lattice &l,std::vector &site){ GridBase *grid=l._grid; typedef typename vobj::scalar_type scalar_type; typedef typename vobj::vector_type vector_type; int Nsimd = grid->Nsimd(); assert( l.checkerboard== l._grid->CheckerBoard(site)); assert( sizeof(sobj)*Nsimd == sizeof(vobj)); int rank,odx,idx; grid->GlobalCoorToRankIndex(rank,odx,idx,site); std::vector buf(Nsimd); std::vector pointers(Nsimd); for(int i=0;iBroadcast(rank,s); return; }; // FIXME Randomise; deprecate this friend void random(Lattice &l){ Real *v_ptr = (Real *)&l._odata[0]; size_t v_len = l._grid->oSites()*sizeof(vobj); size_t d_len = v_len/sizeof(Real); for(int i=0;i &l){ Real *v_ptr = (Real *)&l._odata[0]; size_t o_len = l._grid->oSites(); size_t v_len = sizeof(vobj)/sizeof(vRealF); size_t vec_len = vRealF::Nsimd(); for(int i=0;i &l){ // Zero mean, unit variance. std::normal_distribution distribution(0.0,1.0); Real *v_ptr = (Real *)&l._odata[0]; size_t v_len = l._grid->oSites()*sizeof(vobj); size_t d_len = v_len/sizeof(Real); for(int i=0;i operator -(const Lattice &r) { Lattice ret(r._grid); #pragma omp parallel for for(int ss=0;ssoSites();ss++){ ret._odata[ss]= -r._odata[ss]; } return ret; } // *=,+=,-= operators template inline Lattice &operator *=(const T &r) { *this = (*this)*r; return *this; } template inline Lattice &operator -=(const T &r) { *this = (*this)-r; return *this; } template inline Lattice &operator +=(const T &r) { *this = (*this)+r; return *this; } inline friend Lattice > _trace(const Lattice &lhs){ Lattice > ret(lhs._grid); #pragma omp parallel for for(int ss=0;ssoSites();ss++){ ret._odata[ss] = trace(lhs._odata[ss]); } return ret; }; inline friend Lattice > > trace(const Lattice &lhs){ Lattice > > ret(lhs._grid); #pragma omp parallel for for(int ss=0;ssoSites();ss++){ ret._odata[ss] = trace(lhs._odata[ss]); } return ret; }; inline friend Lattice adj(const Lattice &lhs){ Lattice ret(lhs._grid); #pragma omp parallel for for(int ss=0;ssoSites();ss++){ ret._odata[ss] = adj(lhs._odata[ss]); } return ret; }; inline friend Lattice conj(const Lattice &lhs){ Lattice ret(lhs._grid); #pragma omp parallel for for(int ss=0;ssoSites();ss++){ ret._odata[ss] = conj(lhs._odata[ss]); } return ret; }; // remove and insert a half checkerboard friend void pickCheckerboard(int cb,Lattice &half,const Lattice &full){ half.checkerboard = cb; int ssh=0; #pragma omp parallel for for(int ss=0;ssoSites();ss++){ std::vector coor; int cbos; full._grid->oCoorFromOindex(coor,ss); cbos=half._grid->CheckerBoard(coor); if (cbos==cb) { half._odata[ssh] = full._odata[ss]; ssh++; } } } friend void setCheckerboard(Lattice &full,const Lattice &half){ int cb = half.checkerboard; int ssh=0; #pragma omp parallel for for(int ss=0;ssoSites();ss++){ std::vector coor; int cbos; full._grid->oCoorFromOindex(coor,ss); cbos=half._grid->CheckerBoard(coor); if (cbos==cb) { full._odata[ss]=half._odata[ssh]; ssh++; } } } }; // class Lattice template void conformable(const Lattice &lhs,const Lattice &rhs) { assert(lhs._grid == rhs._grid); assert(lhs.checkerboard == rhs.checkerboard); } template void mult(Lattice &ret,const Lattice &lhs,const Lattice &rhs){ conformable(lhs,rhs); uint32_t vec_len = lhs._grid->oSites(); #pragma omp parallel for for(int ss=0;ss void sub(Lattice &ret,const Lattice &lhs,const Lattice &rhs){ conformable(lhs,rhs); #pragma omp parallel for for(int ss=0;ssoSites();ss++){ sub(&ret._odata[ss],&lhs._odata[ss],&rhs._odata[ss]); } } template void add(Lattice &ret,const Lattice &lhs,const Lattice &rhs){ conformable(lhs,rhs); #pragma omp parallel for for(int ss=0;ssoSites();ss++){ add(&ret._odata[ss],&lhs._odata[ss],&rhs._odata[ss]); } } // Lattice BinOp Lattice, template inline auto operator * (const Lattice &lhs,const Lattice &rhs)-> Lattice { //NB mult performs conformable check. Do not reapply here for performance. Lattice ret(rhs._grid); mult(ret,lhs,rhs); return ret; } template inline auto operator + (const Lattice &lhs,const Lattice &rhs)-> Lattice { //NB mult performs conformable check. Do not reapply here for performance. Lattice ret(rhs._grid); add(ret,lhs,rhs); return ret; } template inline auto operator - (const Lattice &lhs,const Lattice &rhs)-> Lattice { //NB mult performs conformable check. Do not reapply here for performance. Lattice ret(rhs._grid); sub(ret,lhs,rhs); return ret; } // Scalar BinOp Lattice ;generate return type template inline auto operator * (const left &lhs,const Lattice &rhs) -> Lattice { Lattice ret(rhs._grid); #pragma omp parallel for for(int ss=0;ssoSites(); ss++){ ret._odata[ss]=lhs*rhs._odata[ss]; } return ret; } template inline auto operator + (const left &lhs,const Lattice &rhs) -> Lattice { Lattice ret(rhs._grid); #pragma omp parallel for for(int ss=0;ssoSites(); ss++){ ret._odata[ss]=lhs+rhs._odata[ss]; } return ret; } template inline auto operator - (const left &lhs,const Lattice &rhs) -> Lattice { Lattice ret(rhs._grid); #pragma omp parallel for for(int ss=0;ssoSites(); ss++){ ret._odata[ss]=lhs-rhs._odata[ss]; } return ret; } template inline auto operator * (const Lattice &lhs,const right &rhs) -> Lattice { Lattice ret(lhs._grid); #pragma omp parallel for for(int ss=0;ssoSites(); ss++){ ret._odata[ss]=lhs._odata[ss]*rhs; } return ret; } template inline auto operator + (const Lattice &lhs,const right &rhs) -> Lattice { Lattice ret(lhs._grid); #pragma omp parallel for for(int ss=0;ssoSites(); ss++){ ret._odata[ss]=lhs._odata[ss]+rhs; } return ret; } template inline auto operator - (const Lattice &lhs,const right &rhs) -> Lattice { Lattice ret(lhs._grid); #pragma omp parallel for for(int ss=0;ssoSites(); ss++){ ret._odata[ss]=lhs._odata[ss]-rhs; } return ret; } } #endif