mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-10 07:55:35 +00:00
Merge branch 'develop' into feature/multi-communicator
This commit is contained in:
commit
80c5bce5bb
68
.travis.yml
68
.travis.yml
@ -9,68 +9,6 @@ matrix:
|
||||
- os: osx
|
||||
osx_image: xcode8.3
|
||||
compiler: clang
|
||||
- compiler: gcc
|
||||
dist: trusty
|
||||
sudo: required
|
||||
addons:
|
||||
apt:
|
||||
sources:
|
||||
- ubuntu-toolchain-r-test
|
||||
packages:
|
||||
- g++-4.9
|
||||
- libmpfr-dev
|
||||
- libgmp-dev
|
||||
- libmpc-dev
|
||||
- libopenmpi-dev
|
||||
- openmpi-bin
|
||||
- binutils-dev
|
||||
env: VERSION=-4.9
|
||||
- compiler: gcc
|
||||
dist: trusty
|
||||
sudo: required
|
||||
addons:
|
||||
apt:
|
||||
sources:
|
||||
- ubuntu-toolchain-r-test
|
||||
packages:
|
||||
- g++-5
|
||||
- libmpfr-dev
|
||||
- libgmp-dev
|
||||
- libmpc-dev
|
||||
- libopenmpi-dev
|
||||
- openmpi-bin
|
||||
- binutils-dev
|
||||
env: VERSION=-5
|
||||
- compiler: clang
|
||||
dist: trusty
|
||||
addons:
|
||||
apt:
|
||||
sources:
|
||||
- ubuntu-toolchain-r-test
|
||||
packages:
|
||||
- g++-4.8
|
||||
- libmpfr-dev
|
||||
- libgmp-dev
|
||||
- libmpc-dev
|
||||
- libopenmpi-dev
|
||||
- openmpi-bin
|
||||
- binutils-dev
|
||||
env: CLANG_LINK=http://llvm.org/releases/3.8.0/clang+llvm-3.8.0-x86_64-linux-gnu-ubuntu-14.04.tar.xz
|
||||
- compiler: clang
|
||||
dist: trusty
|
||||
addons:
|
||||
apt:
|
||||
sources:
|
||||
- ubuntu-toolchain-r-test
|
||||
packages:
|
||||
- g++-4.8
|
||||
- libmpfr-dev
|
||||
- libgmp-dev
|
||||
- libmpc-dev
|
||||
- libopenmpi-dev
|
||||
- openmpi-bin
|
||||
- binutils-dev
|
||||
env: CLANG_LINK=http://llvm.org/releases/3.7.0/clang+llvm-3.7.0-x86_64-linux-gnu-ubuntu-14.04.tar.xz
|
||||
|
||||
before_install:
|
||||
- export GRIDDIR=`pwd`
|
||||
@ -106,9 +44,3 @@ script:
|
||||
- make -j4
|
||||
- ./benchmarks/Benchmark_dwf --threads 1 --debug-signals
|
||||
- make check
|
||||
- echo make clean
|
||||
- if [[ "$TRAVIS_OS_NAME" == "linux" ]] && [[ "$CC" == "clang" ]]; then ../configure --enable-precision=single --enable-simd=SSE4 --enable-comms=mpi-auto ; fi
|
||||
- if [[ "$TRAVIS_OS_NAME" == "linux" ]] && [[ "$CC" == "clang" ]]; then make -j4; fi
|
||||
- if [[ "$TRAVIS_OS_NAME" == "linux" ]] && [[ "$CC" == "clang" ]]; then mpirun.openmpi -n 2 ./benchmarks/Benchmark_dwf --threads 1 --mpi 2.1.1.1; fi
|
||||
|
||||
|
||||
|
16
README.md
16
README.md
@ -1,18 +1,4 @@
|
||||
# Grid
|
||||
<table>
|
||||
<tr>
|
||||
<td>Last stable release</td>
|
||||
<td><a href="https://travis-ci.org/paboyle/Grid">
|
||||
<img src="https://travis-ci.org/paboyle/Grid.svg?branch=master"></a>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Development branch</td>
|
||||
<td><a href="https://travis-ci.org/paboyle/Grid">
|
||||
<img src="https://travis-ci.org/paboyle/Grid.svg?branch=develop"></a>
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
# Grid [![Teamcity status](http://ci.cliath.ph.ed.ac.uk/app/rest/builds/aggregated/strob:(buildType:(affectedProject(id:Grid)),branch:name:develop)/statusIcon.svg)](http://ci.cliath.ph.ed.ac.uk/project.html?projectId=Grid&tab=projectOverview) [![Travis status](https://travis-ci.org/paboyle/Grid.svg?branch=develop)](https://travis-ci.org/paboyle/Grid)
|
||||
|
||||
**Data parallel C++ mathematical object library.**
|
||||
|
||||
|
@ -181,7 +181,6 @@ public:
|
||||
|
||||
#ifdef GRID_OMP
|
||||
#pragma omp atomic
|
||||
#endif
|
||||
ncomm++;
|
||||
|
||||
#ifdef GRID_OMP
|
||||
|
@ -199,7 +199,12 @@ void BlockCGrQsolve(LinearOperatorBase<Field> &Linop, const Field &B, Field &X)
|
||||
|
||||
Linop.HermOp(X, AD);
|
||||
tmp = B - AD;
|
||||
//std::cout << GridLogMessage << " initial tmp " << norm2(tmp)<< std::endl;
|
||||
ThinQRfact (m_rr, m_C, m_Cinv, Q, tmp);
|
||||
//std::cout << GridLogMessage << " initial Q " << norm2(Q)<< std::endl;
|
||||
//std::cout << GridLogMessage << " m_rr " << m_rr<<std::endl;
|
||||
//std::cout << GridLogMessage << " m_C " << m_C<<std::endl;
|
||||
//std::cout << GridLogMessage << " m_Cinv " << m_Cinv<<std::endl;
|
||||
D=Q;
|
||||
|
||||
std::cout << GridLogMessage<<"BlockCGrQ computed initial residual and QR fact " <<std::endl;
|
||||
@ -221,12 +226,14 @@ void BlockCGrQsolve(LinearOperatorBase<Field> &Linop, const Field &B, Field &X)
|
||||
MatrixTimer.Start();
|
||||
Linop.HermOp(D, Z);
|
||||
MatrixTimer.Stop();
|
||||
//std::cout << GridLogMessage << " norm2 Z " <<norm2(Z)<<std::endl;
|
||||
|
||||
//4. M = [D^dag Z]^{-1}
|
||||
sliceInnerTimer.Start();
|
||||
sliceInnerProductMatrix(m_DZ,D,Z,Orthog);
|
||||
sliceInnerTimer.Stop();
|
||||
m_M = m_DZ.inverse();
|
||||
//std::cout << GridLogMessage << " m_DZ " <<m_DZ<<std::endl;
|
||||
|
||||
//5. X = X + D MC
|
||||
m_tmp = m_M * m_C;
|
||||
|
@ -369,6 +369,7 @@ static void sliceMaddVector(Lattice<vobj> &R,std::vector<RealD> &a,const Lattice
|
||||
}
|
||||
};
|
||||
|
||||
/*
|
||||
inline GridBase *makeSubSliceGrid(const GridBase *BlockSolverGrid,int Orthog)
|
||||
{
|
||||
int NN = BlockSolverGrid->_ndimension;
|
||||
@ -387,6 +388,7 @@ inline GridBase *makeSubSliceGrid(const GridBase *BlockSolverGrid,int Or
|
||||
}
|
||||
return (GridBase *)new GridCartesian(latt_phys,simd_phys,mpi_phys);
|
||||
}
|
||||
*/
|
||||
|
||||
template<class vobj>
|
||||
static void sliceMaddMatrix (Lattice<vobj> &R,Eigen::MatrixXcd &aa,const Lattice<vobj> &X,const Lattice<vobj> &Y,int Orthog,RealD scale=1.0)
|
||||
@ -398,14 +400,15 @@ static void sliceMaddMatrix (Lattice<vobj> &R,Eigen::MatrixXcd &aa,const Lattice
|
||||
int Nblock = X._grid->GlobalDimensions()[Orthog];
|
||||
|
||||
GridBase *FullGrid = X._grid;
|
||||
GridBase *SliceGrid = makeSubSliceGrid(FullGrid,Orthog);
|
||||
// GridBase *SliceGrid = makeSubSliceGrid(FullGrid,Orthog);
|
||||
|
||||
Lattice<vobj> Xslice(SliceGrid);
|
||||
Lattice<vobj> Rslice(SliceGrid);
|
||||
// Lattice<vobj> Xslice(SliceGrid);
|
||||
// Lattice<vobj> Rslice(SliceGrid);
|
||||
|
||||
assert( FullGrid->_simd_layout[Orthog]==1);
|
||||
int nh = FullGrid->_ndimension;
|
||||
int nl = SliceGrid->_ndimension;
|
||||
// int nl = SliceGrid->_ndimension;
|
||||
int nl = nh-1;
|
||||
|
||||
//FIXME package in a convenient iterator
|
||||
//Should loop over a plane orthogonal to direction "Orthog"
|
||||
@ -448,14 +451,14 @@ static void sliceMulMatrix (Lattice<vobj> &R,Eigen::MatrixXcd &aa,const Lattice<
|
||||
int Nblock = X._grid->GlobalDimensions()[Orthog];
|
||||
|
||||
GridBase *FullGrid = X._grid;
|
||||
GridBase *SliceGrid = makeSubSliceGrid(FullGrid,Orthog);
|
||||
|
||||
Lattice<vobj> Xslice(SliceGrid);
|
||||
Lattice<vobj> Rslice(SliceGrid);
|
||||
// GridBase *SliceGrid = makeSubSliceGrid(FullGrid,Orthog);
|
||||
// Lattice<vobj> Xslice(SliceGrid);
|
||||
// Lattice<vobj> Rslice(SliceGrid);
|
||||
|
||||
assert( FullGrid->_simd_layout[Orthog]==1);
|
||||
int nh = FullGrid->_ndimension;
|
||||
int nl = SliceGrid->_ndimension;
|
||||
// int nl = SliceGrid->_ndimension;
|
||||
int nl=1;
|
||||
|
||||
//FIXME package in a convenient iterator
|
||||
//Should loop over a plane orthogonal to direction "Orthog"
|
||||
@ -498,18 +501,19 @@ static void sliceInnerProductMatrix( Eigen::MatrixXcd &mat, const Lattice<vobj>
|
||||
typedef typename vobj::vector_type vector_type;
|
||||
|
||||
GridBase *FullGrid = lhs._grid;
|
||||
GridBase *SliceGrid = makeSubSliceGrid(FullGrid,Orthog);
|
||||
// GridBase *SliceGrid = makeSubSliceGrid(FullGrid,Orthog);
|
||||
|
||||
int Nblock = FullGrid->GlobalDimensions()[Orthog];
|
||||
|
||||
Lattice<vobj> Lslice(SliceGrid);
|
||||
Lattice<vobj> Rslice(SliceGrid);
|
||||
// Lattice<vobj> Lslice(SliceGrid);
|
||||
// Lattice<vobj> Rslice(SliceGrid);
|
||||
|
||||
mat = Eigen::MatrixXcd::Zero(Nblock,Nblock);
|
||||
|
||||
assert( FullGrid->_simd_layout[Orthog]==1);
|
||||
int nh = FullGrid->_ndimension;
|
||||
int nl = SliceGrid->_ndimension;
|
||||
// int nl = SliceGrid->_ndimension;
|
||||
int nl = nh-1;
|
||||
|
||||
//FIXME package in a convenient iterator
|
||||
//Should loop over a plane orthogonal to direction "Orthog"
|
||||
@ -550,6 +554,14 @@ static void sliceInnerProductMatrix( Eigen::MatrixXcd &mat, const Lattice<vobj>
|
||||
mat += mat_thread;
|
||||
}
|
||||
}
|
||||
|
||||
for(int i=0;i<Nblock;i++){
|
||||
for(int j=0;j<Nblock;j++){
|
||||
ComplexD sum = mat(i,j);
|
||||
FullGrid->GlobalSum(sum);
|
||||
mat(i,j)=sum;
|
||||
}}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -230,8 +230,15 @@ void ImprovedStaggeredFermion5D<Impl>::DhopInternal(StencilImpl & st, LebesgueOr
|
||||
{
|
||||
Compressor compressor;
|
||||
int LLs = in._grid->_rdimensions[0];
|
||||
st.HaloExchange(in,compressor);
|
||||
|
||||
|
||||
|
||||
DhopTotalTime -= usecond();
|
||||
DhopCommTime -= usecond();
|
||||
st.HaloExchange(in,compressor);
|
||||
DhopCommTime += usecond();
|
||||
|
||||
DhopComputeTime -= usecond();
|
||||
// Dhop takes the 4d grid from U, and makes a 5d index for fermion
|
||||
if (dag == DaggerYes) {
|
||||
parallel_for (int ss = 0; ss < U._grid->oSites(); ss++) {
|
||||
@ -244,12 +251,15 @@ void ImprovedStaggeredFermion5D<Impl>::DhopInternal(StencilImpl & st, LebesgueOr
|
||||
Kernels::DhopSite(st,lo,U,UUU,st.CommBuf(),LLs,sU,in,out);
|
||||
}
|
||||
}
|
||||
DhopComputeTime += usecond();
|
||||
DhopTotalTime += usecond();
|
||||
}
|
||||
|
||||
|
||||
template<class Impl>
|
||||
void ImprovedStaggeredFermion5D<Impl>::DhopOE(const FermionField &in, FermionField &out,int dag)
|
||||
{
|
||||
DhopCalls+=1;
|
||||
conformable(in._grid,FermionRedBlackGrid()); // verifies half grid
|
||||
conformable(in._grid,out._grid); // drops the cb check
|
||||
|
||||
@ -261,6 +271,7 @@ void ImprovedStaggeredFermion5D<Impl>::DhopOE(const FermionField &in, FermionFie
|
||||
template<class Impl>
|
||||
void ImprovedStaggeredFermion5D<Impl>::DhopEO(const FermionField &in, FermionField &out,int dag)
|
||||
{
|
||||
DhopCalls+=1;
|
||||
conformable(in._grid,FermionRedBlackGrid()); // verifies half grid
|
||||
conformable(in._grid,out._grid); // drops the cb check
|
||||
|
||||
@ -272,6 +283,7 @@ void ImprovedStaggeredFermion5D<Impl>::DhopEO(const FermionField &in, FermionFie
|
||||
template<class Impl>
|
||||
void ImprovedStaggeredFermion5D<Impl>::Dhop(const FermionField &in, FermionField &out,int dag)
|
||||
{
|
||||
DhopCalls+=2;
|
||||
conformable(in._grid,FermionGrid()); // verifies full grid
|
||||
conformable(in._grid,out._grid);
|
||||
|
||||
@ -280,6 +292,54 @@ void ImprovedStaggeredFermion5D<Impl>::Dhop(const FermionField &in, FermionField
|
||||
DhopInternal(Stencil,Lebesgue,Umu,UUUmu,in,out,dag);
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void ImprovedStaggeredFermion5D<Impl>::Report(void)
|
||||
{
|
||||
std::vector<int> latt = GridDefaultLatt();
|
||||
RealD volume = Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt[mu];
|
||||
RealD NP = _FourDimGrid->_Nprocessors;
|
||||
RealD NN = _FourDimGrid->NodeCount();
|
||||
|
||||
std::cout << GridLogMessage << "#### Dhop calls report " << std::endl;
|
||||
|
||||
std::cout << GridLogMessage << "ImprovedStaggeredFermion5D Number of DhopEO Calls : "
|
||||
<< DhopCalls << std::endl;
|
||||
std::cout << GridLogMessage << "ImprovedStaggeredFermion5D TotalTime /Calls : "
|
||||
<< DhopTotalTime / DhopCalls << " us" << std::endl;
|
||||
std::cout << GridLogMessage << "ImprovedStaggeredFermion5D CommTime /Calls : "
|
||||
<< DhopCommTime / DhopCalls << " us" << std::endl;
|
||||
std::cout << GridLogMessage << "ImprovedStaggeredFermion5D ComputeTime/Calls : "
|
||||
<< DhopComputeTime / DhopCalls << " us" << std::endl;
|
||||
|
||||
// Average the compute time
|
||||
_FourDimGrid->GlobalSum(DhopComputeTime);
|
||||
DhopComputeTime/=NP;
|
||||
|
||||
RealD mflops = 1154*volume*DhopCalls/DhopComputeTime/2; // 2 for red black counting
|
||||
std::cout << GridLogMessage << "Average mflops/s per call : " << mflops << std::endl;
|
||||
std::cout << GridLogMessage << "Average mflops/s per call per rank : " << mflops/NP << std::endl;
|
||||
std::cout << GridLogMessage << "Average mflops/s per call per node : " << mflops/NN << std::endl;
|
||||
|
||||
RealD Fullmflops = 1154*volume*DhopCalls/(DhopTotalTime)/2; // 2 for red black counting
|
||||
std::cout << GridLogMessage << "Average mflops/s per call (full) : " << Fullmflops << std::endl;
|
||||
std::cout << GridLogMessage << "Average mflops/s per call per rank (full): " << Fullmflops/NP << std::endl;
|
||||
std::cout << GridLogMessage << "Average mflops/s per call per node (full): " << Fullmflops/NN << std::endl;
|
||||
|
||||
std::cout << GridLogMessage << "ImprovedStaggeredFermion5D Stencil" <<std::endl; Stencil.Report();
|
||||
std::cout << GridLogMessage << "ImprovedStaggeredFermion5D StencilEven"<<std::endl; StencilEven.Report();
|
||||
std::cout << GridLogMessage << "ImprovedStaggeredFermion5D StencilOdd" <<std::endl; StencilOdd.Report();
|
||||
}
|
||||
template<class Impl>
|
||||
void ImprovedStaggeredFermion5D<Impl>::ZeroCounters(void)
|
||||
{
|
||||
DhopCalls = 0;
|
||||
DhopTotalTime = 0;
|
||||
DhopCommTime = 0;
|
||||
DhopComputeTime = 0;
|
||||
Stencil.ZeroCounters();
|
||||
StencilEven.ZeroCounters();
|
||||
StencilOdd.ZeroCounters();
|
||||
}
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////
|
||||
// Implement the general interface. Here we use SAME mass on all slices
|
||||
|
@ -55,6 +55,16 @@ namespace QCD {
|
||||
FermionField _tmp;
|
||||
FermionField &tmp(void) { return _tmp; }
|
||||
|
||||
////////////////////////////////////////
|
||||
// Performance monitoring
|
||||
////////////////////////////////////////
|
||||
void Report(void);
|
||||
void ZeroCounters(void);
|
||||
double DhopTotalTime;
|
||||
double DhopCalls;
|
||||
double DhopCommTime;
|
||||
double DhopComputeTime;
|
||||
|
||||
///////////////////////////////////////////////////////////////
|
||||
// Implement the abstract base
|
||||
///////////////////////////////////////////////////////////////
|
||||
|
@ -26,6 +26,8 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
/* END LEGAL */
|
||||
//#include <Grid/Grid.h>
|
||||
|
||||
#ifndef GRID_QCD_GAUGE_FIX_H
|
||||
#define GRID_QCD_GAUGE_FIX_H
|
||||
namespace Grid {
|
||||
namespace QCD {
|
||||
|
||||
@ -188,3 +190,4 @@ class FourierAcceleratedGaugeFixer : public Gimpl {
|
||||
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
@ -28,6 +28,9 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
/* END LEGAL */
|
||||
#include <Grid/Grid.h>
|
||||
|
||||
using namespace Grid;
|
||||
using namespace Grid::QCD;
|
||||
|
||||
int main (int argc, char ** argv)
|
||||
{
|
||||
std::vector<int> seeds({1,2,3,4});
|
||||
@ -82,6 +85,7 @@ int main (int argc, char ** argv)
|
||||
|
||||
Uorg = Uorg - Umu;
|
||||
std::cout << " Norm Difference "<< norm2(Uorg) << std::endl;
|
||||
std::cout << " Norm "<< norm2(Umu) << std::endl;
|
||||
|
||||
|
||||
std::cout<< "*****************************************************************" <<std::endl;
|
||||
|
@ -99,21 +99,27 @@ int main (int argc, char ** argv)
|
||||
std::cout << GridLogMessage << " Calling 5d CG for "<<Ls <<" right hand sides" <<std::endl;
|
||||
std::cout << GridLogMessage << "************************************************************************ "<<std::endl;
|
||||
result=zero;
|
||||
Ds.ZeroCounters();
|
||||
CG(HermOp,src,result);
|
||||
Ds.Report();
|
||||
std::cout << GridLogMessage << "************************************************************************ "<<std::endl;
|
||||
|
||||
std::cout << GridLogMessage << "************************************************************************ "<<std::endl;
|
||||
std::cout << GridLogMessage << " Calling multiRHS CG for "<<Ls <<" right hand sides" <<std::endl;
|
||||
std::cout << GridLogMessage << "************************************************************************ "<<std::endl;
|
||||
result=zero;
|
||||
Ds.ZeroCounters();
|
||||
mCG(HermOp,src,result);
|
||||
Ds.Report();
|
||||
std::cout << GridLogMessage << "************************************************************************ "<<std::endl;
|
||||
|
||||
std::cout << GridLogMessage << "************************************************************************ "<<std::endl;
|
||||
std::cout << GridLogMessage << " Calling Block CG for "<<Ls <<" right hand sides" <<std::endl;
|
||||
std::cout << GridLogMessage << "************************************************************************ "<<std::endl;
|
||||
result=zero;
|
||||
Ds.ZeroCounters();
|
||||
BCGrQ(HermOp,src,result);
|
||||
Ds.Report();
|
||||
std::cout << GridLogMessage << "************************************************************************ "<<std::endl;
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user