1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-09-20 09:15:38 +01:00

Zero changes, __VA_ARGS__ and swap

This commit is contained in:
paboyle 2018-01-27 23:50:17 +00:00
parent f102897385
commit f574c20118
4 changed files with 26 additions and 15 deletions

View File

@ -76,6 +76,7 @@ public:
accelerator_inline uint64_t size(void) const { return _odata_size; };
accelerator_inline vobj & operator[](size_t i) { return _odata[i]; };
accelerator_inline const vobj & operator[](size_t i) const { return _odata[i]; };
};
class LatticeExpressionBase {};
@ -398,6 +399,16 @@ public:
*this = (*this)+r;
return *this;
}
friend inline void swap(Lattice &l, Lattice &r) {
conformable(l,r);
LatticeAccelerator<vobj> tmp;
LatticeAccelerator<vobj> *lp = (LatticeAccelerator<vobj> *)&l;
LatticeAccelerator<vobj> *rp = (LatticeAccelerator<vobj> *)&r;
tmp = *lp; *lp=*rp; *rp=tmp;
}
}; // class Lattice
template<class vobj> std::ostream& operator<< (std::ostream& stream, const Lattice<vobj> &o){

View File

@ -52,14 +52,14 @@ inline ComplexD innerProduct(const Lattice<vobj> &left,const Lattice<vobj> &righ
int mywork, myoff;
GridThread::GetWork(left.Grid()->oSites(),thr,mywork,myoff);
decltype(innerProductD(left[0],right[0])) vnrm=zero; // private to thread; sub summation
decltype(innerProductD(left[0],right[0])) vnrm=Zero(); // private to thread; sub summation
for(int ss=myoff;ss<mywork+myoff; ss++){
vnrm = vnrm + innerProductD(left[ss],right[ss]);
}
sumarray[thr]=TensorRemove(vnrm) ;
}
vector_type vvnrm; vvnrm=zero; // sum across threads
vector_type vvnrm; vvnrm=Zero(); // sum across threads
for(int i=0;i<grid->SumArraySize();i++){
vvnrm = vvnrm+sumarray[i];
}
@ -101,21 +101,21 @@ inline typename vobj::scalar_object sum(const Lattice<vobj> &arg)
std::vector<vobj,alignedAllocator<vobj> > sumarray(grid->SumArraySize());
for(int i=0;i<grid->SumArraySize();i++){
sumarray[i]=zero;
sumarray[i]=Zero();
}
parallel_for(int thr=0;thr<grid->SumArraySize();thr++){
int mywork, myoff;
GridThread::GetWork(grid->oSites(),thr,mywork,myoff);
vobj vvsum=zero;
vobj vvsum=Zero();
for(int ss=myoff;ss<mywork+myoff; ss++){
vvsum = vvsum + arg[ss];
}
sumarray[thr]=vvsum;
}
vobj vsum=zero; // sum across threads
vobj vsum=Zero(); // sum across threads
for(int i=0;i<grid->SumArraySize();i++){
vsum = vsum+sumarray[i];
}
@ -159,12 +159,12 @@ template<class vobj> inline void sliceSum(const Lattice<vobj> &Data,std::vector<
int rd=grid->_rdimensions[orthogdim];
std::vector<vobj,alignedAllocator<vobj> > lvSum(rd); // will locally sum vectors first
std::vector<sobj> lsSum(ld,zero); // sum across these down to scalars
std::vector<sobj> lsSum(ld,Zero()); // sum across these down to scalars
std::vector<sobj> extracted(Nsimd); // splitting the SIMD
result.resize(fd); // And then global sum to return the same vector to every node
for(int r=0;r<rd;r++){
lvSum[r]=zero;
lvSum[r]=Zero();
}
int e1= grid->_slice_nblock[orthogdim];
@ -211,7 +211,7 @@ template<class vobj> inline void sliceSum(const Lattice<vobj> &Data,std::vector<
if ( pt == grid->_processor_coor[orthogdim] ) {
gsum=lsSum[lt];
} else {
gsum=zero;
gsum=Zero();
}
grid->GlobalSum(gsum);
@ -245,7 +245,7 @@ static void sliceInnerProductVector( std::vector<ComplexD> & result, const Latti
result.resize(fd); // And then global sum to return the same vector to every node for IO to file
for(int r=0;r<rd;r++){
lvSum[r]=zero;
lvSum[r]=Zero();
}
int e1= grid->_slice_nblock[orthogdim];

View File

@ -103,7 +103,7 @@ inline void blockProject(Lattice<iVector<CComplex,nbasis > > &coarseData,
assert(block_r[d]*coarse->_rdimensions[d] == fine->_rdimensions[d]);
}
coarseData=zero;
coarseData=Zero();
// Loop over coars parallel, and then loop over fine associated with coarse.
thread_loop( (int sf=0;sf<fine->oSites();sf++),{
@ -192,7 +192,7 @@ template<class vobj,class CComplex>
inline void blockNormalise(Lattice<CComplex> &ip,Lattice<vobj> &fineX)
{
GridBase *coarse = ip.Grid();
Lattice<vobj> zz(fineX.Grid()); zz=zero; zz.Checkerboard()=fineX.Checkerboard();
Lattice<vobj> zz(fineX.Grid()); zz=Zero(); zz.Checkerboard()=fineX.Checkerboard();
blockInnerProduct(ip,fineX,fineX);
ip = pow(ip,-0.5);
blockZAXPY(fineX,ip,fineX,zz);
@ -217,7 +217,7 @@ inline void blockSum(Lattice<vobj> &coarseData,const Lattice<vobj> &fineData)
// Turn this around to loop threaded over sc and interior loop
// over sf would thread better
coarseData=zero;
coarseData=Zero();
thread_region {
int sc;
@ -247,7 +247,7 @@ inline void blockPick(GridBase *coarse,const Lattice<vobj> &unpicked,Lattice<vob
Lattice<vobj> zz(fine); zz.Checkerboard() = unpicked.Checkerboard();
Lattice<iScalar<vInteger> > fcoor(fine);
zz = zero;
zz = Zero();
picked = unpicked;
for(int d=0;d<fine->_ndimension;d++){

View File

@ -58,7 +58,7 @@ inline void whereWolf(Lattice<vobj> &ret,const Lattice<iobj> &predicate,Lattice<
std::vector<scalar_object> truevals (Nsimd);
std::vector<scalar_object> falsevals(Nsimd);
thread_loop( (int ss=iftrue.begin(); ss<iftrue.end();ss++) , COMMA_SAFE({
thread_loop( (int ss=iftrue.begin(); ss<iftrue.end();ss++) , {
extract(iftrue[ss] ,truevals);
extract(iffalse[ss] ,falsevals);
@ -69,7 +69,7 @@ inline void whereWolf(Lattice<vobj> &ret,const Lattice<iobj> &predicate,Lattice<
}
merge(ret[ss],falsevals);
})
}
);
}