mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-09 23:45:36 +00:00
Non compile of tests fixed
This commit is contained in:
parent
bf516c3b81
commit
7ede696126
17
TODO
17
TODO
@ -3,19 +3,20 @@ TODO:
|
||||
|
||||
Peter's work list:
|
||||
|
||||
-- Merge high precision reduction into develop
|
||||
-- Remove DenseVector, DenseMatrix; Use Eigen instead. <-- started
|
||||
-- Merge high precision reduction into develop <-- done
|
||||
-- Precision conversion and sort out localConvert <--
|
||||
-- Physical propagator interface
|
||||
-- Precision conversion and sort out localConvert
|
||||
-- slice* linalg routines for multiRHS, BlockCG
|
||||
|
||||
-- multiRHS DWF; benchmark on Cori/BNL for comms elimination
|
||||
-- slice* linalg routines for multiRHS, BlockCG <-- started
|
||||
|
||||
-- Profile CG, BlockCG, etc... Flop count/rate
|
||||
-- Binary I/O speed up & x-strips
|
||||
-- Half-precision comms
|
||||
-- multiRHS DWF; benchmark on Cori/BNL for comms elimination
|
||||
-- Half-precision comms <-- started
|
||||
-- GaugeFix into central location
|
||||
-- Help Julia with NPR code
|
||||
-- Switch to measurements
|
||||
-- FFTfix in sensible place
|
||||
-- Multigrid Wilson and DWF, compare to other Multigrid implementations
|
||||
-- Remove DenseVector, DenseMatrix; Use Eigen instead.
|
||||
-- quaternions -- Might not need
|
||||
|
||||
|
||||
|
@ -30,210 +30,9 @@ directory
|
||||
#ifndef GRID_BLOCK_CONJUGATE_GRADIENT_H
|
||||
#define GRID_BLOCK_CONJUGATE_GRADIENT_H
|
||||
|
||||
#include <Grid/Eigen/Dense>
|
||||
|
||||
namespace Grid {
|
||||
|
||||
GridBase *makeSubSliceGrid(const GridBase *BlockSolverGrid,int Orthog)
|
||||
{
|
||||
int NN = BlockSolverGrid->_ndimension;
|
||||
int nsimd = BlockSolverGrid->Nsimd();
|
||||
|
||||
std::vector<int> latt_phys(0);
|
||||
std::vector<int> simd_phys(0);
|
||||
std::vector<int> mpi_phys(0);
|
||||
|
||||
for(int d=0;d<NN;d++){
|
||||
if( d!=Orthog ) {
|
||||
latt_phys.push_back(BlockSolverGrid->_fdimensions[d]);
|
||||
simd_phys.push_back(BlockSolverGrid->_simd_layout[d]);
|
||||
mpi_phys.push_back(BlockSolverGrid->_processors[d]);
|
||||
}
|
||||
}
|
||||
return (GridBase *)new GridCartesian(latt_phys,simd_phys,mpi_phys);
|
||||
}
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Need to move sliceInnerProduct, sliceAxpy, sliceNorm etc... into lattice sector along with sliceSum
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class vobj>
|
||||
static void sliceMaddMatrix (Lattice<vobj> &R,Eigen::MatrixXcd &aa,const Lattice<vobj> &X,const Lattice<vobj> &Y,int Orthog,RealD scale=1.0)
|
||||
{
|
||||
typedef typename vobj::scalar_object sobj;
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
typedef typename vobj::vector_type vector_type;
|
||||
|
||||
int Nblock = X._grid->GlobalDimensions()[Orthog];
|
||||
|
||||
GridBase *FullGrid = X._grid;
|
||||
GridBase *SliceGrid = makeSubSliceGrid(FullGrid,Orthog);
|
||||
|
||||
Lattice<vobj> Xslice(SliceGrid);
|
||||
Lattice<vobj> Rslice(SliceGrid);
|
||||
// FIXME: Implementation is slow
|
||||
// If we based this on Cshift it would work for spread out
|
||||
// but it would be even slower
|
||||
//
|
||||
// Repeated extract slice is inefficient
|
||||
//
|
||||
// Best base the linear combination by constructing a
|
||||
// set of vectors of size grid->_rdimensions[Orthog].
|
||||
for(int i=0;i<Nblock;i++){
|
||||
ExtractSlice(Rslice,Y,i,Orthog);
|
||||
for(int j=0;j<Nblock;j++){
|
||||
ExtractSlice(Xslice,X,j,Orthog);
|
||||
Rslice = Rslice + Xslice*(scale*aa(j,i));
|
||||
}
|
||||
InsertSlice(Rslice,R,i,Orthog);
|
||||
}
|
||||
};
|
||||
template<class vobj>
|
||||
static void sliceMaddVector (Lattice<vobj> &R,std::vector<RealD> &a,const Lattice<vobj> &X,const Lattice<vobj> &Y,
|
||||
int Orthog,RealD scale=1.0)
|
||||
{
|
||||
// FIXME: Implementation is slow
|
||||
// Best base the linear combination by constructing a
|
||||
// set of vectors of size grid->_rdimensions[Orthog].
|
||||
typedef typename vobj::scalar_object sobj;
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
typedef typename vobj::vector_type vector_type;
|
||||
|
||||
int Nblock = X._grid->GlobalDimensions()[Orthog];
|
||||
|
||||
GridBase *FullGrid = X._grid;
|
||||
GridBase *SliceGrid = makeSubSliceGrid(FullGrid,Orthog);
|
||||
|
||||
Lattice<vobj> Xslice(SliceGrid);
|
||||
Lattice<vobj> Rslice(SliceGrid);
|
||||
// If we based this on Cshift it would work for spread out
|
||||
// but it would be even slower
|
||||
for(int i=0;i<Nblock;i++){
|
||||
ExtractSlice(Rslice,Y,i,Orthog);
|
||||
ExtractSlice(Xslice,X,i,Orthog);
|
||||
Rslice = Rslice + Xslice*(scale*a[i]);
|
||||
InsertSlice(Rslice,R,i,Orthog);
|
||||
}
|
||||
};
|
||||
template<class vobj>
|
||||
static void sliceInnerProductMatrix( Eigen::MatrixXcd &mat, const Lattice<vobj> &lhs,const Lattice<vobj> &rhs,int Orthog)
|
||||
{
|
||||
// FIXME: Implementation is slow
|
||||
// Not sure of best solution.. think about it
|
||||
typedef typename vobj::scalar_object sobj;
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
typedef typename vobj::vector_type vector_type;
|
||||
|
||||
GridBase *FullGrid = lhs._grid;
|
||||
GridBase *SliceGrid = makeSubSliceGrid(FullGrid,Orthog);
|
||||
|
||||
int Nblock = FullGrid->GlobalDimensions()[Orthog];
|
||||
|
||||
Lattice<vobj> Lslice(SliceGrid);
|
||||
Lattice<vobj> Rslice(SliceGrid);
|
||||
|
||||
mat = Eigen::MatrixXcd::Zero(Nblock,Nblock);
|
||||
|
||||
for(int i=0;i<Nblock;i++){
|
||||
ExtractSlice(Lslice,lhs,i,Orthog);
|
||||
for(int j=0;j<Nblock;j++){
|
||||
ExtractSlice(Rslice,rhs,j,Orthog);
|
||||
mat(i,j) = innerProduct(Lslice,Rslice);
|
||||
}
|
||||
}
|
||||
#undef FORCE_DIAG
|
||||
#ifdef FORCE_DIAG
|
||||
for(int i=0;i<Nblock;i++){
|
||||
for(int j=0;j<Nblock;j++){
|
||||
if ( i != j ) mat(i,j)=0.0;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
template<class vobj>
|
||||
static void sliceInnerProductVector( std::vector<ComplexD> & vec, const Lattice<vobj> &lhs,const Lattice<vobj> &rhs,int Orthog)
|
||||
{
|
||||
// FIXME: Implementation is slow
|
||||
// Look at localInnerProduct implementation,
|
||||
// and do inside a site loop with block strided iterators
|
||||
typedef typename vobj::scalar_object sobj;
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
typedef typename vobj::vector_type vector_type;
|
||||
typedef typename vobj::tensor_reduced scalar;
|
||||
typedef typename scalar::scalar_object scomplex;
|
||||
|
||||
int Nblock = lhs._grid->GlobalDimensions()[Orthog];
|
||||
|
||||
vec.resize(Nblock);
|
||||
std::vector<scomplex> sip(Nblock);
|
||||
Lattice<scalar> IP(lhs._grid);
|
||||
|
||||
IP=localInnerProduct(lhs,rhs);
|
||||
sliceSum(IP,sip,Orthog);
|
||||
|
||||
for(int ss=0;ss<Nblock;ss++){
|
||||
vec[ss] = TensorRemove(sip[ss]);
|
||||
}
|
||||
}
|
||||
template<class vobj>
|
||||
static void sliceNorm (std::vector<RealD> &sn,const Lattice<vobj> &rhs,int Orthog) {
|
||||
|
||||
typedef typename vobj::scalar_object sobj;
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
typedef typename vobj::vector_type vector_type;
|
||||
|
||||
int Nblock = rhs._grid->GlobalDimensions()[Orthog];
|
||||
std::vector<ComplexD> ip(Nblock);
|
||||
sn.resize(Nblock);
|
||||
|
||||
sliceInnerProductVector(ip,rhs,rhs,Orthog);
|
||||
for(int ss=0;ss<Nblock;ss++){
|
||||
sn[ss] = real(ip[ss]);
|
||||
}
|
||||
};
|
||||
/*
|
||||
template<class vobj>
|
||||
static void sliceInnerProductMatrixOld( Eigen::MatrixXcd &mat, const Lattice<vobj> &lhs,const Lattice<vobj> &rhs,int Orthog)
|
||||
{
|
||||
typedef typename vobj::scalar_object sobj;
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
typedef typename vobj::vector_type vector_type;
|
||||
typedef typename vobj::tensor_reduced scalar;
|
||||
typedef typename scalar::scalar_object scomplex;
|
||||
|
||||
int Nblock = lhs._grid->GlobalDimensions()[Orthog];
|
||||
|
||||
std::cout << " sliceInnerProductMatrix Dim "<<Orthog<<" Nblock " << Nblock<<std::endl;
|
||||
|
||||
Lattice<scalar> IP(lhs._grid);
|
||||
std::vector<scomplex> sip(Nblock);
|
||||
|
||||
mat = Eigen::MatrixXcd::Zero(Nblock,Nblock);
|
||||
|
||||
Lattice<vobj> tmp = rhs;
|
||||
|
||||
for(int s1=0;s1<Nblock;s1++){
|
||||
|
||||
IP=localInnerProduct(lhs,tmp);
|
||||
sliceSum(IP,sip,Orthog);
|
||||
|
||||
std::cout << "InnerProductMatrix ["<<s1<<"] = ";
|
||||
for(int ss=0;ss<Nblock;ss++){
|
||||
std::cout << TensorRemove(sip[ss])<<" ";
|
||||
}
|
||||
std::cout << std::endl;
|
||||
|
||||
for(int ss=0;ss<Nblock;ss++){
|
||||
mat(ss,(s1+ss)%Nblock) = TensorRemove(sip[ss]);
|
||||
}
|
||||
if ( s1!=(Nblock-1) ) {
|
||||
tmp = Cshift(tmp,Orthog,1);
|
||||
}
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// Block conjugate gradient. Dimension zero should be the block direction
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
|
@ -30,6 +30,8 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
#ifndef GRID_LATTICE_REDUCTION_H
|
||||
#define GRID_LATTICE_REDUCTION_H
|
||||
|
||||
#include <Grid/Eigen/Dense>
|
||||
|
||||
namespace Grid {
|
||||
#ifdef GRID_WARN_SUBOPTIMAL
|
||||
#warning "Optimisation alert all these reduction loops are NOT threaded "
|
||||
@ -215,6 +217,163 @@ template<class vobj> inline void sliceSum(const Lattice<vobj> &Data,std::vector<
|
||||
}
|
||||
}
|
||||
|
||||
inline GridBase *makeSubSliceGrid(const GridBase *BlockSolverGrid,int Orthog)
|
||||
{
|
||||
int NN = BlockSolverGrid->_ndimension;
|
||||
int nsimd = BlockSolverGrid->Nsimd();
|
||||
|
||||
std::vector<int> latt_phys(0);
|
||||
std::vector<int> simd_phys(0);
|
||||
std::vector<int> mpi_phys(0);
|
||||
|
||||
for(int d=0;d<NN;d++){
|
||||
if( d!=Orthog ) {
|
||||
latt_phys.push_back(BlockSolverGrid->_fdimensions[d]);
|
||||
simd_phys.push_back(BlockSolverGrid->_simd_layout[d]);
|
||||
mpi_phys.push_back(BlockSolverGrid->_processors[d]);
|
||||
}
|
||||
}
|
||||
return (GridBase *)new GridCartesian(latt_phys,simd_phys,mpi_phys);
|
||||
}
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Need to move sliceInnerProduct, sliceAxpy, sliceNorm etc... into lattice sector along with sliceSum
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class vobj>
|
||||
static void sliceMaddMatrix (Lattice<vobj> &R,Eigen::MatrixXcd &aa,const Lattice<vobj> &X,const Lattice<vobj> &Y,int Orthog,RealD scale=1.0)
|
||||
{
|
||||
typedef typename vobj::scalar_object sobj;
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
typedef typename vobj::vector_type vector_type;
|
||||
|
||||
int Nblock = X._grid->GlobalDimensions()[Orthog];
|
||||
|
||||
GridBase *FullGrid = X._grid;
|
||||
GridBase *SliceGrid = makeSubSliceGrid(FullGrid,Orthog);
|
||||
|
||||
Lattice<vobj> Xslice(SliceGrid);
|
||||
Lattice<vobj> Rslice(SliceGrid);
|
||||
// FIXME: Implementation is slow
|
||||
// If we based this on Cshift it would work for spread out
|
||||
// but it would be even slower
|
||||
//
|
||||
// Repeated extract slice is inefficient
|
||||
//
|
||||
// Best base the linear combination by constructing a
|
||||
// set of vectors of size grid->_rdimensions[Orthog].
|
||||
for(int i=0;i<Nblock;i++){
|
||||
ExtractSlice(Rslice,Y,i,Orthog);
|
||||
for(int j=0;j<Nblock;j++){
|
||||
ExtractSlice(Xslice,X,j,Orthog);
|
||||
Rslice = Rslice + Xslice*(scale*aa(j,i));
|
||||
}
|
||||
InsertSlice(Rslice,R,i,Orthog);
|
||||
}
|
||||
};
|
||||
template<class vobj>
|
||||
static void sliceMaddVector (Lattice<vobj> &R,std::vector<RealD> &a,const Lattice<vobj> &X,const Lattice<vobj> &Y,
|
||||
int Orthog,RealD scale=1.0)
|
||||
{
|
||||
// FIXME: Implementation is slow
|
||||
// Best base the linear combination by constructing a
|
||||
// set of vectors of size grid->_rdimensions[Orthog].
|
||||
typedef typename vobj::scalar_object sobj;
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
typedef typename vobj::vector_type vector_type;
|
||||
|
||||
int Nblock = X._grid->GlobalDimensions()[Orthog];
|
||||
|
||||
GridBase *FullGrid = X._grid;
|
||||
GridBase *SliceGrid = makeSubSliceGrid(FullGrid,Orthog);
|
||||
|
||||
Lattice<vobj> Xslice(SliceGrid);
|
||||
Lattice<vobj> Rslice(SliceGrid);
|
||||
// If we based this on Cshift it would work for spread out
|
||||
// but it would be even slower
|
||||
for(int i=0;i<Nblock;i++){
|
||||
ExtractSlice(Rslice,Y,i,Orthog);
|
||||
ExtractSlice(Xslice,X,i,Orthog);
|
||||
Rslice = Rslice + Xslice*(scale*a[i]);
|
||||
InsertSlice(Rslice,R,i,Orthog);
|
||||
}
|
||||
};
|
||||
template<class vobj>
|
||||
static void sliceInnerProductMatrix( Eigen::MatrixXcd &mat, const Lattice<vobj> &lhs,const Lattice<vobj> &rhs,int Orthog)
|
||||
{
|
||||
// FIXME: Implementation is slow
|
||||
// Not sure of best solution.. think about it
|
||||
typedef typename vobj::scalar_object sobj;
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
typedef typename vobj::vector_type vector_type;
|
||||
|
||||
GridBase *FullGrid = lhs._grid;
|
||||
GridBase *SliceGrid = makeSubSliceGrid(FullGrid,Orthog);
|
||||
|
||||
int Nblock = FullGrid->GlobalDimensions()[Orthog];
|
||||
|
||||
Lattice<vobj> Lslice(SliceGrid);
|
||||
Lattice<vobj> Rslice(SliceGrid);
|
||||
|
||||
mat = Eigen::MatrixXcd::Zero(Nblock,Nblock);
|
||||
|
||||
for(int i=0;i<Nblock;i++){
|
||||
ExtractSlice(Lslice,lhs,i,Orthog);
|
||||
for(int j=0;j<Nblock;j++){
|
||||
ExtractSlice(Rslice,rhs,j,Orthog);
|
||||
mat(i,j) = innerProduct(Lslice,Rslice);
|
||||
}
|
||||
}
|
||||
#undef FORCE_DIAG
|
||||
#ifdef FORCE_DIAG
|
||||
for(int i=0;i<Nblock;i++){
|
||||
for(int j=0;j<Nblock;j++){
|
||||
if ( i != j ) mat(i,j)=0.0;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
template<class vobj>
|
||||
static void sliceInnerProductVector( std::vector<ComplexD> & vec, const Lattice<vobj> &lhs,const Lattice<vobj> &rhs,int Orthog)
|
||||
{
|
||||
// FIXME: Implementation is slow
|
||||
// Look at localInnerProduct implementation,
|
||||
// and do inside a site loop with block strided iterators
|
||||
typedef typename vobj::scalar_object sobj;
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
typedef typename vobj::vector_type vector_type;
|
||||
typedef typename vobj::tensor_reduced scalar;
|
||||
typedef typename scalar::scalar_object scomplex;
|
||||
|
||||
int Nblock = lhs._grid->GlobalDimensions()[Orthog];
|
||||
|
||||
vec.resize(Nblock);
|
||||
std::vector<scomplex> sip(Nblock);
|
||||
Lattice<scalar> IP(lhs._grid);
|
||||
|
||||
IP=localInnerProduct(lhs,rhs);
|
||||
sliceSum(IP,sip,Orthog);
|
||||
|
||||
for(int ss=0;ss<Nblock;ss++){
|
||||
vec[ss] = TensorRemove(sip[ss]);
|
||||
}
|
||||
}
|
||||
template<class vobj>
|
||||
static void sliceNorm (std::vector<RealD> &sn,const Lattice<vobj> &rhs,int Orthog) {
|
||||
|
||||
typedef typename vobj::scalar_object sobj;
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
typedef typename vobj::vector_type vector_type;
|
||||
|
||||
int Nblock = rhs._grid->GlobalDimensions()[Orthog];
|
||||
std::vector<ComplexD> ip(Nblock);
|
||||
sn.resize(Nblock);
|
||||
|
||||
sliceInnerProductVector(ip,rhs,rhs,Orthog);
|
||||
for(int ss=0;ss<Nblock;ss++){
|
||||
sn[ss] = real(ip[ss]);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
}
|
||||
#endif
|
||||
|
@ -2,7 +2,7 @@
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/simd/Grid_vector_types.h
|
||||
Source file: ./lib/simd/Grid_vector_type.h
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
|
@ -112,6 +112,7 @@ namespace Grid {
|
||||
public:
|
||||
typedef Integer scalar_type;
|
||||
typedef Integer vector_type;
|
||||
typedef Integer vector_typeD;
|
||||
typedef Integer tensor_reduced;
|
||||
typedef Integer scalar_object;
|
||||
typedef void Complexified;
|
||||
|
Loading…
Reference in New Issue
Block a user