1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-12-22 05:34:30 +00:00

Coordinate handling GPU friendly + some GPU merge/extract improvements

This commit is contained in:
paboyle
2018-02-24 22:26:10 +00:00
parent ff7b19a71b
commit c1fc947bb8
8 changed files with 80 additions and 80 deletions

View File

@@ -123,7 +123,7 @@ inline typename vobj::scalar_object sum(const Lattice<vobj> &arg)
typedef typename vobj::scalar_object sobj;
sobj ssum; zeroit(ssum);
std::vector<sobj> buf(Nsimd);
ExtractBuffer<sobj> buf(Nsimd);
extract(vsum,buf);
for(int i=0;i<Nsimd;i++) ssum = ssum + buf[i];
@@ -160,7 +160,7 @@ template<class vobj> inline void sliceSum(const Lattice<vobj> &Data,std::vector<
std::vector<vobj,alignedAllocator<vobj> > lvSum(rd); // will locally sum vectors first
std::vector<sobj> lsSum(ld,Zero()); // sum across these down to scalars
std::vector<sobj> extracted(Nsimd); // splitting the SIMD
ExtractBuffer<sobj> extracted(Nsimd); // splitting the SIMD
result.resize(fd); // And then global sum to return the same vector to every node
for(int r=0;r<rd;r++){
@@ -185,7 +185,7 @@ template<class vobj> inline void sliceSum(const Lattice<vobj> &Data,std::vector<
});
// Sum across simd lanes in the plane, breaking out orthog dir.
std::vector<int> icoor(Nd);
Coordinate icoor(Nd);
for(int rt=0;rt<rd;rt++){
@@ -240,7 +240,7 @@ static void sliceInnerProductVector( std::vector<ComplexD> & result, const Latti
std::vector<vector_type,alignedAllocator<vector_type> > lvSum(rd); // will locally sum vectors first
std::vector<scalar_type > lsSum(ld,scalar_type(0.0)); // sum across these down to scalars
std::vector<iScalar<scalar_type> > extracted(Nsimd); // splitting the SIMD
ExtractBuffer<iScalar<scalar_type> > extracted(Nsimd); // splitting the SIMD
result.resize(fd); // And then global sum to return the same vector to every node for IO to file
for(int r=0;r<rd;r++){
@@ -265,7 +265,7 @@ static void sliceInnerProductVector( std::vector<ComplexD> & result, const Latti
});
// Sum across simd lanes in the plane, breaking out orthog dir.
std::vector<int> icoor(Nd);
Coordinate icoor(Nd);
for(int rt=0;rt<rd;rt++){
iScalar<vector_type> temp;
@@ -341,7 +341,7 @@ static void sliceMaddVector(Lattice<vobj> &R,std::vector<RealD> &a,const Lattice
int e2 =grid->_slice_block [orthogdim];
int stride =grid->_slice_stride[orthogdim];
std::vector<int> icoor;
Coordinate icoor;
for(int r=0;r<rd;r++){