mirror of
https://github.com/paboyle/Grid.git
synced 2025-04-09 21:50:45 +01:00
Still using parallel_for -- don't know how to implement reduction on GPU yet. Look at some sample code is best.
This commit is contained in:
parent
0bfa5bb213
commit
733f8ff0b2
@ -22,8 +22,6 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
|||||||
#ifndef GRID_LATTICE_REDUCTION_H
|
#ifndef GRID_LATTICE_REDUCTION_H
|
||||||
#define GRID_LATTICE_REDUCTION_H
|
#define GRID_LATTICE_REDUCTION_H
|
||||||
|
|
||||||
#include <Grid/Grid_Eigen_Dense.h>
|
|
||||||
|
|
||||||
#ifdef GRID_WARN_SUBOPTIMAL
|
#ifdef GRID_WARN_SUBOPTIMAL
|
||||||
#warning "Optimisation alert all these reduction loops are NOT threaded "
|
#warning "Optimisation alert all these reduction loops are NOT threaded "
|
||||||
#endif
|
#endif
|
||||||
@ -35,7 +33,7 @@ NAMESPACE_BEGIN(Grid);
|
|||||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
template<class vobj> inline RealD norm2(const Lattice<vobj> &arg){
|
template<class vobj> inline RealD norm2(const Lattice<vobj> &arg){
|
||||||
ComplexD nrm = innerProduct(arg,arg);
|
ComplexD nrm = innerProduct(arg,arg);
|
||||||
return std::real(nrm);
|
return real(nrm);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Double inner product
|
// Double inner product
|
||||||
@ -51,7 +49,7 @@ inline ComplexD innerProduct(const Lattice<vobj> &left,const Lattice<vobj> &righ
|
|||||||
std::vector<vector_type,alignedAllocator<vector_type> > sumarray(grid->SumArraySize());
|
std::vector<vector_type,alignedAllocator<vector_type> > sumarray(grid->SumArraySize());
|
||||||
|
|
||||||
parallel_for(int thr=0;thr<grid->SumArraySize();thr++){
|
parallel_for(int thr=0;thr<grid->SumArraySize();thr++){
|
||||||
int nwork, mywork, myoff;
|
int mywork, myoff;
|
||||||
GridThread::GetWork(left._grid->oSites(),thr,mywork,myoff);
|
GridThread::GetWork(left._grid->oSites(),thr,mywork,myoff);
|
||||||
|
|
||||||
decltype(innerProductD(left._odata[0],right._odata[0])) vnrm=zero; // private to thread; sub summation
|
decltype(innerProductD(left._odata[0],right._odata[0])) vnrm=zero; // private to thread; sub summation
|
||||||
@ -107,7 +105,7 @@ inline typename vobj::scalar_object sum(const Lattice<vobj> &arg)
|
|||||||
}
|
}
|
||||||
|
|
||||||
parallel_for(int thr=0;thr<grid->SumArraySize();thr++){
|
parallel_for(int thr=0;thr<grid->SumArraySize();thr++){
|
||||||
int nwork, mywork, myoff;
|
int mywork, myoff;
|
||||||
GridThread::GetWork(grid->oSites(),thr,mywork,myoff);
|
GridThread::GetWork(grid->oSites(),thr,mywork,myoff);
|
||||||
|
|
||||||
vobj vvsum=zero;
|
vobj vvsum=zero;
|
||||||
@ -123,7 +121,7 @@ inline typename vobj::scalar_object sum(const Lattice<vobj> &arg)
|
|||||||
}
|
}
|
||||||
|
|
||||||
typedef typename vobj::scalar_object sobj;
|
typedef typename vobj::scalar_object sobj;
|
||||||
sobj ssum=zero;
|
sobj ssum; zeroit(ssum);
|
||||||
|
|
||||||
std::vector<sobj> buf(Nsimd);
|
std::vector<sobj> buf(Nsimd);
|
||||||
extract(vsum,buf);
|
extract(vsum,buf);
|
||||||
@ -370,201 +368,6 @@ static void sliceMaddVector(Lattice<vobj> &R,std::vector<RealD> &a,const Lattice
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
|
||||||
inline GridBase *makeSubSliceGrid(const GridBase *BlockSolverGrid,int Orthog)
|
|
||||||
{
|
|
||||||
int NN = BlockSolverGrid->_ndimension;
|
|
||||||
int nsimd = BlockSolverGrid->Nsimd();
|
|
||||||
|
|
||||||
std::vector<int> latt_phys(0);
|
|
||||||
std::vector<int> simd_phys(0);
|
|
||||||
std::vector<int> mpi_phys(0);
|
|
||||||
|
|
||||||
for(int d=0;d<NN;d++){
|
|
||||||
if( d!=Orthog ) {
|
|
||||||
latt_phys.push_back(BlockSolverGrid->_fdimensions[d]);
|
|
||||||
simd_phys.push_back(BlockSolverGrid->_simd_layout[d]);
|
|
||||||
mpi_phys.push_back(BlockSolverGrid->_processors[d]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return (GridBase *)new GridCartesian(latt_phys,simd_phys,mpi_phys);
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
|
|
||||||
template<class vobj>
|
|
||||||
static void sliceMaddMatrix (Lattice<vobj> &R,Eigen::MatrixXcd &aa,const Lattice<vobj> &X,const Lattice<vobj> &Y,int Orthog,RealD scale=1.0)
|
|
||||||
{
|
|
||||||
typedef typename vobj::scalar_object sobj;
|
|
||||||
typedef typename vobj::scalar_type scalar_type;
|
|
||||||
typedef typename vobj::vector_type vector_type;
|
|
||||||
|
|
||||||
int Nblock = X._grid->GlobalDimensions()[Orthog];
|
|
||||||
|
|
||||||
GridBase *FullGrid = X._grid;
|
|
||||||
// GridBase *SliceGrid = makeSubSliceGrid(FullGrid,Orthog);
|
|
||||||
|
|
||||||
// Lattice<vobj> Xslice(SliceGrid);
|
|
||||||
// Lattice<vobj> Rslice(SliceGrid);
|
|
||||||
|
|
||||||
assert( FullGrid->_simd_layout[Orthog]==1);
|
|
||||||
int nh = FullGrid->_ndimension;
|
|
||||||
// int nl = SliceGrid->_ndimension;
|
|
||||||
int nl = nh-1;
|
|
||||||
|
|
||||||
//FIXME package in a convenient iterator
|
|
||||||
//Should loop over a plane orthogonal to direction "Orthog"
|
|
||||||
int stride=FullGrid->_slice_stride[Orthog];
|
|
||||||
int block =FullGrid->_slice_block [Orthog];
|
|
||||||
int nblock=FullGrid->_slice_nblock[Orthog];
|
|
||||||
int ostride=FullGrid->_ostride[Orthog];
|
|
||||||
#pragma omp parallel
|
|
||||||
{
|
|
||||||
std::vector<vobj> s_x(Nblock);
|
|
||||||
|
|
||||||
#pragma omp for collapse(2)
|
|
||||||
for(int n=0;n<nblock;n++){
|
|
||||||
for(int b=0;b<block;b++){
|
|
||||||
int o = n*stride + b;
|
|
||||||
|
|
||||||
for(int i=0;i<Nblock;i++){
|
|
||||||
s_x[i] = X[o+i*ostride];
|
|
||||||
}
|
|
||||||
|
|
||||||
vobj dot;
|
|
||||||
for(int i=0;i<Nblock;i++){
|
|
||||||
dot = Y[o+i*ostride];
|
|
||||||
for(int j=0;j<Nblock;j++){
|
|
||||||
dot = dot + s_x[j]*(scale*aa(j,i));
|
|
||||||
}
|
|
||||||
R[o+i*ostride]=dot;
|
|
||||||
}
|
|
||||||
}}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
template<class vobj>
|
|
||||||
static void sliceMulMatrix (Lattice<vobj> &R,Eigen::MatrixXcd &aa,const Lattice<vobj> &X,int Orthog,RealD scale=1.0)
|
|
||||||
{
|
|
||||||
typedef typename vobj::scalar_object sobj;
|
|
||||||
typedef typename vobj::scalar_type scalar_type;
|
|
||||||
typedef typename vobj::vector_type vector_type;
|
|
||||||
|
|
||||||
int Nblock = X._grid->GlobalDimensions()[Orthog];
|
|
||||||
|
|
||||||
GridBase *FullGrid = X._grid;
|
|
||||||
// GridBase *SliceGrid = makeSubSliceGrid(FullGrid,Orthog);
|
|
||||||
// Lattice<vobj> Xslice(SliceGrid);
|
|
||||||
// Lattice<vobj> Rslice(SliceGrid);
|
|
||||||
|
|
||||||
assert( FullGrid->_simd_layout[Orthog]==1);
|
|
||||||
int nh = FullGrid->_ndimension;
|
|
||||||
// int nl = SliceGrid->_ndimension;
|
|
||||||
int nl=1;
|
|
||||||
|
|
||||||
//FIXME package in a convenient iterator
|
|
||||||
//Should loop over a plane orthogonal to direction "Orthog"
|
|
||||||
int stride=FullGrid->_slice_stride[Orthog];
|
|
||||||
int block =FullGrid->_slice_block [Orthog];
|
|
||||||
int nblock=FullGrid->_slice_nblock[Orthog];
|
|
||||||
int ostride=FullGrid->_ostride[Orthog];
|
|
||||||
#pragma omp parallel
|
|
||||||
{
|
|
||||||
std::vector<vobj> s_x(Nblock);
|
|
||||||
|
|
||||||
#pragma omp for collapse(2)
|
|
||||||
for(int n=0;n<nblock;n++){
|
|
||||||
for(int b=0;b<block;b++){
|
|
||||||
int o = n*stride + b;
|
|
||||||
|
|
||||||
for(int i=0;i<Nblock;i++){
|
|
||||||
s_x[i] = X[o+i*ostride];
|
|
||||||
}
|
|
||||||
|
|
||||||
vobj dot;
|
|
||||||
for(int i=0;i<Nblock;i++){
|
|
||||||
dot = s_x[0]*(scale*aa(0,i));
|
|
||||||
for(int j=1;j<Nblock;j++){
|
|
||||||
dot = dot + s_x[j]*(scale*aa(j,i));
|
|
||||||
}
|
|
||||||
R[o+i*ostride]=dot;
|
|
||||||
}
|
|
||||||
}}
|
|
||||||
}
|
|
||||||
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
template<class vobj>
|
|
||||||
static void sliceInnerProductMatrix( Eigen::MatrixXcd &mat, const Lattice<vobj> &lhs,const Lattice<vobj> &rhs,int Orthog)
|
|
||||||
{
|
|
||||||
typedef typename vobj::scalar_object sobj;
|
|
||||||
typedef typename vobj::scalar_type scalar_type;
|
|
||||||
typedef typename vobj::vector_type vector_type;
|
|
||||||
|
|
||||||
GridBase *FullGrid = lhs._grid;
|
|
||||||
// GridBase *SliceGrid = makeSubSliceGrid(FullGrid,Orthog);
|
|
||||||
|
|
||||||
int Nblock = FullGrid->GlobalDimensions()[Orthog];
|
|
||||||
|
|
||||||
// Lattice<vobj> Lslice(SliceGrid);
|
|
||||||
// Lattice<vobj> Rslice(SliceGrid);
|
|
||||||
|
|
||||||
mat = Eigen::MatrixXcd::Zero(Nblock,Nblock);
|
|
||||||
|
|
||||||
assert( FullGrid->_simd_layout[Orthog]==1);
|
|
||||||
int nh = FullGrid->_ndimension;
|
|
||||||
// int nl = SliceGrid->_ndimension;
|
|
||||||
int nl = nh-1;
|
|
||||||
|
|
||||||
//FIXME package in a convenient iterator
|
|
||||||
//Should loop over a plane orthogonal to direction "Orthog"
|
|
||||||
int stride=FullGrid->_slice_stride[Orthog];
|
|
||||||
int block =FullGrid->_slice_block [Orthog];
|
|
||||||
int nblock=FullGrid->_slice_nblock[Orthog];
|
|
||||||
int ostride=FullGrid->_ostride[Orthog];
|
|
||||||
|
|
||||||
typedef typename vobj::vector_typeD vector_typeD;
|
|
||||||
|
|
||||||
#pragma omp parallel
|
|
||||||
{
|
|
||||||
std::vector<vobj> Left(Nblock);
|
|
||||||
std::vector<vobj> Right(Nblock);
|
|
||||||
Eigen::MatrixXcd mat_thread = Eigen::MatrixXcd::Zero(Nblock,Nblock);
|
|
||||||
|
|
||||||
#pragma omp for collapse(2)
|
|
||||||
for(int n=0;n<nblock;n++){
|
|
||||||
for(int b=0;b<block;b++){
|
|
||||||
|
|
||||||
int o = n*stride + b;
|
|
||||||
|
|
||||||
for(int i=0;i<Nblock;i++){
|
|
||||||
Left [i] = lhs[o+i*ostride];
|
|
||||||
Right[i] = rhs[o+i*ostride];
|
|
||||||
}
|
|
||||||
|
|
||||||
for(int i=0;i<Nblock;i++){
|
|
||||||
for(int j=0;j<Nblock;j++){
|
|
||||||
auto tmp = innerProduct(Left[i],Right[j]);
|
|
||||||
auto rtmp = TensorRemove(tmp);
|
|
||||||
mat_thread(i,j) += Reduce(rtmp);
|
|
||||||
}}
|
|
||||||
}}
|
|
||||||
#pragma omp critical
|
|
||||||
{
|
|
||||||
mat += mat_thread;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for(int i=0;i<Nblock;i++){
|
|
||||||
for(int j=0;j<Nblock;j++){
|
|
||||||
ComplexD sum = mat(i,j);
|
|
||||||
FullGrid->GlobalSum(sum);
|
|
||||||
mat(i,j)=sum;
|
|
||||||
}}
|
|
||||||
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
NAMESPACE_END(Grid);
|
NAMESPACE_END(Grid);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user