mirror of
https://github.com/paboyle/Grid.git
synced 2025-04-09 21:50:45 +01:00
Zero changes, __VA_ARGS__ and swap
This commit is contained in:
parent
f102897385
commit
f574c20118
@ -76,6 +76,7 @@ public:
|
|||||||
accelerator_inline uint64_t size(void) const { return _odata_size; };
|
accelerator_inline uint64_t size(void) const { return _odata_size; };
|
||||||
accelerator_inline vobj & operator[](size_t i) { return _odata[i]; };
|
accelerator_inline vobj & operator[](size_t i) { return _odata[i]; };
|
||||||
accelerator_inline const vobj & operator[](size_t i) const { return _odata[i]; };
|
accelerator_inline const vobj & operator[](size_t i) const { return _odata[i]; };
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
class LatticeExpressionBase {};
|
class LatticeExpressionBase {};
|
||||||
@ -398,6 +399,16 @@ public:
|
|||||||
*this = (*this)+r;
|
*this = (*this)+r;
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
friend inline void swap(Lattice &l, Lattice &r) {
|
||||||
|
conformable(l,r);
|
||||||
|
LatticeAccelerator<vobj> tmp;
|
||||||
|
LatticeAccelerator<vobj> *lp = (LatticeAccelerator<vobj> *)&l;
|
||||||
|
LatticeAccelerator<vobj> *rp = (LatticeAccelerator<vobj> *)&r;
|
||||||
|
tmp = *lp; *lp=*rp; *rp=tmp;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
}; // class Lattice
|
}; // class Lattice
|
||||||
|
|
||||||
template<class vobj> std::ostream& operator<< (std::ostream& stream, const Lattice<vobj> &o){
|
template<class vobj> std::ostream& operator<< (std::ostream& stream, const Lattice<vobj> &o){
|
||||||
|
@ -52,14 +52,14 @@ inline ComplexD innerProduct(const Lattice<vobj> &left,const Lattice<vobj> &righ
|
|||||||
int mywork, myoff;
|
int mywork, myoff;
|
||||||
GridThread::GetWork(left.Grid()->oSites(),thr,mywork,myoff);
|
GridThread::GetWork(left.Grid()->oSites(),thr,mywork,myoff);
|
||||||
|
|
||||||
decltype(innerProductD(left[0],right[0])) vnrm=zero; // private to thread; sub summation
|
decltype(innerProductD(left[0],right[0])) vnrm=Zero(); // private to thread; sub summation
|
||||||
for(int ss=myoff;ss<mywork+myoff; ss++){
|
for(int ss=myoff;ss<mywork+myoff; ss++){
|
||||||
vnrm = vnrm + innerProductD(left[ss],right[ss]);
|
vnrm = vnrm + innerProductD(left[ss],right[ss]);
|
||||||
}
|
}
|
||||||
sumarray[thr]=TensorRemove(vnrm) ;
|
sumarray[thr]=TensorRemove(vnrm) ;
|
||||||
}
|
}
|
||||||
|
|
||||||
vector_type vvnrm; vvnrm=zero; // sum across threads
|
vector_type vvnrm; vvnrm=Zero(); // sum across threads
|
||||||
for(int i=0;i<grid->SumArraySize();i++){
|
for(int i=0;i<grid->SumArraySize();i++){
|
||||||
vvnrm = vvnrm+sumarray[i];
|
vvnrm = vvnrm+sumarray[i];
|
||||||
}
|
}
|
||||||
@ -101,21 +101,21 @@ inline typename vobj::scalar_object sum(const Lattice<vobj> &arg)
|
|||||||
|
|
||||||
std::vector<vobj,alignedAllocator<vobj> > sumarray(grid->SumArraySize());
|
std::vector<vobj,alignedAllocator<vobj> > sumarray(grid->SumArraySize());
|
||||||
for(int i=0;i<grid->SumArraySize();i++){
|
for(int i=0;i<grid->SumArraySize();i++){
|
||||||
sumarray[i]=zero;
|
sumarray[i]=Zero();
|
||||||
}
|
}
|
||||||
|
|
||||||
parallel_for(int thr=0;thr<grid->SumArraySize();thr++){
|
parallel_for(int thr=0;thr<grid->SumArraySize();thr++){
|
||||||
int mywork, myoff;
|
int mywork, myoff;
|
||||||
GridThread::GetWork(grid->oSites(),thr,mywork,myoff);
|
GridThread::GetWork(grid->oSites(),thr,mywork,myoff);
|
||||||
|
|
||||||
vobj vvsum=zero;
|
vobj vvsum=Zero();
|
||||||
for(int ss=myoff;ss<mywork+myoff; ss++){
|
for(int ss=myoff;ss<mywork+myoff; ss++){
|
||||||
vvsum = vvsum + arg[ss];
|
vvsum = vvsum + arg[ss];
|
||||||
}
|
}
|
||||||
sumarray[thr]=vvsum;
|
sumarray[thr]=vvsum;
|
||||||
}
|
}
|
||||||
|
|
||||||
vobj vsum=zero; // sum across threads
|
vobj vsum=Zero(); // sum across threads
|
||||||
for(int i=0;i<grid->SumArraySize();i++){
|
for(int i=0;i<grid->SumArraySize();i++){
|
||||||
vsum = vsum+sumarray[i];
|
vsum = vsum+sumarray[i];
|
||||||
}
|
}
|
||||||
@ -159,12 +159,12 @@ template<class vobj> inline void sliceSum(const Lattice<vobj> &Data,std::vector<
|
|||||||
int rd=grid->_rdimensions[orthogdim];
|
int rd=grid->_rdimensions[orthogdim];
|
||||||
|
|
||||||
std::vector<vobj,alignedAllocator<vobj> > lvSum(rd); // will locally sum vectors first
|
std::vector<vobj,alignedAllocator<vobj> > lvSum(rd); // will locally sum vectors first
|
||||||
std::vector<sobj> lsSum(ld,zero); // sum across these down to scalars
|
std::vector<sobj> lsSum(ld,Zero()); // sum across these down to scalars
|
||||||
std::vector<sobj> extracted(Nsimd); // splitting the SIMD
|
std::vector<sobj> extracted(Nsimd); // splitting the SIMD
|
||||||
|
|
||||||
result.resize(fd); // And then global sum to return the same vector to every node
|
result.resize(fd); // And then global sum to return the same vector to every node
|
||||||
for(int r=0;r<rd;r++){
|
for(int r=0;r<rd;r++){
|
||||||
lvSum[r]=zero;
|
lvSum[r]=Zero();
|
||||||
}
|
}
|
||||||
|
|
||||||
int e1= grid->_slice_nblock[orthogdim];
|
int e1= grid->_slice_nblock[orthogdim];
|
||||||
@ -211,7 +211,7 @@ template<class vobj> inline void sliceSum(const Lattice<vobj> &Data,std::vector<
|
|||||||
if ( pt == grid->_processor_coor[orthogdim] ) {
|
if ( pt == grid->_processor_coor[orthogdim] ) {
|
||||||
gsum=lsSum[lt];
|
gsum=lsSum[lt];
|
||||||
} else {
|
} else {
|
||||||
gsum=zero;
|
gsum=Zero();
|
||||||
}
|
}
|
||||||
|
|
||||||
grid->GlobalSum(gsum);
|
grid->GlobalSum(gsum);
|
||||||
@ -245,7 +245,7 @@ static void sliceInnerProductVector( std::vector<ComplexD> & result, const Latti
|
|||||||
|
|
||||||
result.resize(fd); // And then global sum to return the same vector to every node for IO to file
|
result.resize(fd); // And then global sum to return the same vector to every node for IO to file
|
||||||
for(int r=0;r<rd;r++){
|
for(int r=0;r<rd;r++){
|
||||||
lvSum[r]=zero;
|
lvSum[r]=Zero();
|
||||||
}
|
}
|
||||||
|
|
||||||
int e1= grid->_slice_nblock[orthogdim];
|
int e1= grid->_slice_nblock[orthogdim];
|
||||||
|
@ -103,7 +103,7 @@ inline void blockProject(Lattice<iVector<CComplex,nbasis > > &coarseData,
|
|||||||
assert(block_r[d]*coarse->_rdimensions[d] == fine->_rdimensions[d]);
|
assert(block_r[d]*coarse->_rdimensions[d] == fine->_rdimensions[d]);
|
||||||
}
|
}
|
||||||
|
|
||||||
coarseData=zero;
|
coarseData=Zero();
|
||||||
|
|
||||||
// Loop over coars parallel, and then loop over fine associated with coarse.
|
// Loop over coars parallel, and then loop over fine associated with coarse.
|
||||||
thread_loop( (int sf=0;sf<fine->oSites();sf++),{
|
thread_loop( (int sf=0;sf<fine->oSites();sf++),{
|
||||||
@ -192,7 +192,7 @@ template<class vobj,class CComplex>
|
|||||||
inline void blockNormalise(Lattice<CComplex> &ip,Lattice<vobj> &fineX)
|
inline void blockNormalise(Lattice<CComplex> &ip,Lattice<vobj> &fineX)
|
||||||
{
|
{
|
||||||
GridBase *coarse = ip.Grid();
|
GridBase *coarse = ip.Grid();
|
||||||
Lattice<vobj> zz(fineX.Grid()); zz=zero; zz.Checkerboard()=fineX.Checkerboard();
|
Lattice<vobj> zz(fineX.Grid()); zz=Zero(); zz.Checkerboard()=fineX.Checkerboard();
|
||||||
blockInnerProduct(ip,fineX,fineX);
|
blockInnerProduct(ip,fineX,fineX);
|
||||||
ip = pow(ip,-0.5);
|
ip = pow(ip,-0.5);
|
||||||
blockZAXPY(fineX,ip,fineX,zz);
|
blockZAXPY(fineX,ip,fineX,zz);
|
||||||
@ -217,7 +217,7 @@ inline void blockSum(Lattice<vobj> &coarseData,const Lattice<vobj> &fineData)
|
|||||||
|
|
||||||
// Turn this around to loop threaded over sc and interior loop
|
// Turn this around to loop threaded over sc and interior loop
|
||||||
// over sf would thread better
|
// over sf would thread better
|
||||||
coarseData=zero;
|
coarseData=Zero();
|
||||||
thread_region {
|
thread_region {
|
||||||
|
|
||||||
int sc;
|
int sc;
|
||||||
@ -247,7 +247,7 @@ inline void blockPick(GridBase *coarse,const Lattice<vobj> &unpicked,Lattice<vob
|
|||||||
Lattice<vobj> zz(fine); zz.Checkerboard() = unpicked.Checkerboard();
|
Lattice<vobj> zz(fine); zz.Checkerboard() = unpicked.Checkerboard();
|
||||||
Lattice<iScalar<vInteger> > fcoor(fine);
|
Lattice<iScalar<vInteger> > fcoor(fine);
|
||||||
|
|
||||||
zz = zero;
|
zz = Zero();
|
||||||
|
|
||||||
picked = unpicked;
|
picked = unpicked;
|
||||||
for(int d=0;d<fine->_ndimension;d++){
|
for(int d=0;d<fine->_ndimension;d++){
|
||||||
|
@ -58,7 +58,7 @@ inline void whereWolf(Lattice<vobj> &ret,const Lattice<iobj> &predicate,Lattice<
|
|||||||
std::vector<scalar_object> truevals (Nsimd);
|
std::vector<scalar_object> truevals (Nsimd);
|
||||||
std::vector<scalar_object> falsevals(Nsimd);
|
std::vector<scalar_object> falsevals(Nsimd);
|
||||||
|
|
||||||
thread_loop( (int ss=iftrue.begin(); ss<iftrue.end();ss++) , COMMA_SAFE({
|
thread_loop( (int ss=iftrue.begin(); ss<iftrue.end();ss++) , {
|
||||||
|
|
||||||
extract(iftrue[ss] ,truevals);
|
extract(iftrue[ss] ,truevals);
|
||||||
extract(iffalse[ss] ,falsevals);
|
extract(iffalse[ss] ,falsevals);
|
||||||
@ -69,7 +69,7 @@ inline void whereWolf(Lattice<vobj> &ret,const Lattice<iobj> &predicate,Lattice<
|
|||||||
}
|
}
|
||||||
|
|
||||||
merge(ret[ss],falsevals);
|
merge(ret[ss],falsevals);
|
||||||
})
|
}
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user