From f574c2011851a178af597fcf36f528515ca433e4 Mon Sep 17 00:00:00 2001 From: paboyle Date: Sat, 27 Jan 2018 23:50:17 +0000 Subject: [PATCH] Zero changes, __VA_ARGS__ and swap --- lib/lattice/Lattice_base.h | 11 +++++++++++ lib/lattice/Lattice_reduction.h | 18 +++++++++--------- lib/lattice/Lattice_transfer.h | 8 ++++---- lib/lattice/Lattice_where.h | 4 ++-- 4 files changed, 26 insertions(+), 15 deletions(-) diff --git a/lib/lattice/Lattice_base.h b/lib/lattice/Lattice_base.h index 89e0bbcb..26eabde4 100644 --- a/lib/lattice/Lattice_base.h +++ b/lib/lattice/Lattice_base.h @@ -76,6 +76,7 @@ public: accelerator_inline uint64_t size(void) const { return _odata_size; }; accelerator_inline vobj & operator[](size_t i) { return _odata[i]; }; accelerator_inline const vobj & operator[](size_t i) const { return _odata[i]; }; + }; class LatticeExpressionBase {}; @@ -398,6 +399,16 @@ public: *this = (*this)+r; return *this; } + + friend inline void swap(Lattice &l, Lattice &r) { + conformable(l,r); + LatticeAccelerator tmp; + LatticeAccelerator *lp = (LatticeAccelerator *)&l; + LatticeAccelerator *rp = (LatticeAccelerator *)&r; + tmp = *lp; *lp=*rp; *rp=tmp; + } + + }; // class Lattice template std::ostream& operator<< (std::ostream& stream, const Lattice &o){ diff --git a/lib/lattice/Lattice_reduction.h b/lib/lattice/Lattice_reduction.h index 86d27aa6..b39f43fb 100644 --- a/lib/lattice/Lattice_reduction.h +++ b/lib/lattice/Lattice_reduction.h @@ -52,14 +52,14 @@ inline ComplexD innerProduct(const Lattice &left,const Lattice &righ int mywork, myoff; GridThread::GetWork(left.Grid()->oSites(),thr,mywork,myoff); - decltype(innerProductD(left[0],right[0])) vnrm=zero; // private to thread; sub summation + decltype(innerProductD(left[0],right[0])) vnrm=Zero(); // private to thread; sub summation for(int ss=myoff;ssSumArraySize();i++){ vvnrm = vvnrm+sumarray[i]; } @@ -101,21 +101,21 @@ inline typename vobj::scalar_object sum(const Lattice &arg) std::vector > sumarray(grid->SumArraySize()); for(int i=0;iSumArraySize();i++){ - sumarray[i]=zero; + sumarray[i]=Zero(); } parallel_for(int thr=0;thrSumArraySize();thr++){ int mywork, myoff; GridThread::GetWork(grid->oSites(),thr,mywork,myoff); - vobj vvsum=zero; + vobj vvsum=Zero(); for(int ss=myoff;ssSumArraySize();i++){ vsum = vsum+sumarray[i]; } @@ -159,12 +159,12 @@ template inline void sliceSum(const Lattice &Data,std::vector< int rd=grid->_rdimensions[orthogdim]; std::vector > lvSum(rd); // will locally sum vectors first - std::vector lsSum(ld,zero); // sum across these down to scalars + std::vector lsSum(ld,Zero()); // sum across these down to scalars std::vector extracted(Nsimd); // splitting the SIMD result.resize(fd); // And then global sum to return the same vector to every node for(int r=0;r_slice_nblock[orthogdim]; @@ -211,7 +211,7 @@ template inline void sliceSum(const Lattice &Data,std::vector< if ( pt == grid->_processor_coor[orthogdim] ) { gsum=lsSum[lt]; } else { - gsum=zero; + gsum=Zero(); } grid->GlobalSum(gsum); @@ -245,7 +245,7 @@ static void sliceInnerProductVector( std::vector & result, const Latti result.resize(fd); // And then global sum to return the same vector to every node for IO to file for(int r=0;r_slice_nblock[orthogdim]; diff --git a/lib/lattice/Lattice_transfer.h b/lib/lattice/Lattice_transfer.h index ccb5e7e9..1ee91a52 100644 --- a/lib/lattice/Lattice_transfer.h +++ b/lib/lattice/Lattice_transfer.h @@ -103,7 +103,7 @@ inline void blockProject(Lattice > &coarseData, assert(block_r[d]*coarse->_rdimensions[d] == fine->_rdimensions[d]); } - coarseData=zero; + coarseData=Zero(); // Loop over coars parallel, and then loop over fine associated with coarse. thread_loop( (int sf=0;sfoSites();sf++),{ @@ -192,7 +192,7 @@ template inline void blockNormalise(Lattice &ip,Lattice &fineX) { GridBase *coarse = ip.Grid(); - Lattice zz(fineX.Grid()); zz=zero; zz.Checkerboard()=fineX.Checkerboard(); + Lattice zz(fineX.Grid()); zz=Zero(); zz.Checkerboard()=fineX.Checkerboard(); blockInnerProduct(ip,fineX,fineX); ip = pow(ip,-0.5); blockZAXPY(fineX,ip,fineX,zz); @@ -217,7 +217,7 @@ inline void blockSum(Lattice &coarseData,const Lattice &fineData) // Turn this around to loop threaded over sc and interior loop // over sf would thread better - coarseData=zero; + coarseData=Zero(); thread_region { int sc; @@ -247,7 +247,7 @@ inline void blockPick(GridBase *coarse,const Lattice &unpicked,Lattice zz(fine); zz.Checkerboard() = unpicked.Checkerboard(); Lattice > fcoor(fine); - zz = zero; + zz = Zero(); picked = unpicked; for(int d=0;d_ndimension;d++){ diff --git a/lib/lattice/Lattice_where.h b/lib/lattice/Lattice_where.h index ad524cc6..3aa9098c 100644 --- a/lib/lattice/Lattice_where.h +++ b/lib/lattice/Lattice_where.h @@ -58,7 +58,7 @@ inline void whereWolf(Lattice &ret,const Lattice &predicate,Lattice< std::vector truevals (Nsimd); std::vector falsevals(Nsimd); - thread_loop( (int ss=iftrue.begin(); ss &ret,const Lattice &predicate,Lattice< } merge(ret[ss],falsevals); - }) + } ); }