diff --git a/.travis.yml b/.travis.yml
index 64dae823..7d8203ce 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -9,68 +9,6 @@ matrix:
- os: osx
osx_image: xcode8.3
compiler: clang
- - compiler: gcc
- dist: trusty
- sudo: required
- addons:
- apt:
- sources:
- - ubuntu-toolchain-r-test
- packages:
- - g++-4.9
- - libmpfr-dev
- - libgmp-dev
- - libmpc-dev
- - libopenmpi-dev
- - openmpi-bin
- - binutils-dev
- env: VERSION=-4.9
- - compiler: gcc
- dist: trusty
- sudo: required
- addons:
- apt:
- sources:
- - ubuntu-toolchain-r-test
- packages:
- - g++-5
- - libmpfr-dev
- - libgmp-dev
- - libmpc-dev
- - libopenmpi-dev
- - openmpi-bin
- - binutils-dev
- env: VERSION=-5
- - compiler: clang
- dist: trusty
- addons:
- apt:
- sources:
- - ubuntu-toolchain-r-test
- packages:
- - g++-4.8
- - libmpfr-dev
- - libgmp-dev
- - libmpc-dev
- - libopenmpi-dev
- - openmpi-bin
- - binutils-dev
- env: CLANG_LINK=http://llvm.org/releases/3.8.0/clang+llvm-3.8.0-x86_64-linux-gnu-ubuntu-14.04.tar.xz
- - compiler: clang
- dist: trusty
- addons:
- apt:
- sources:
- - ubuntu-toolchain-r-test
- packages:
- - g++-4.8
- - libmpfr-dev
- - libgmp-dev
- - libmpc-dev
- - libopenmpi-dev
- - openmpi-bin
- - binutils-dev
- env: CLANG_LINK=http://llvm.org/releases/3.7.0/clang+llvm-3.7.0-x86_64-linux-gnu-ubuntu-14.04.tar.xz
before_install:
- export GRIDDIR=`pwd`
@@ -106,9 +44,3 @@ script:
- make -j4
- ./benchmarks/Benchmark_dwf --threads 1 --debug-signals
- make check
- - echo make clean
- - if [[ "$TRAVIS_OS_NAME" == "linux" ]] && [[ "$CC" == "clang" ]]; then ../configure --enable-precision=single --enable-simd=SSE4 --enable-comms=mpi-auto ; fi
- - if [[ "$TRAVIS_OS_NAME" == "linux" ]] && [[ "$CC" == "clang" ]]; then make -j4; fi
- - if [[ "$TRAVIS_OS_NAME" == "linux" ]] && [[ "$CC" == "clang" ]]; then mpirun.openmpi -n 2 ./benchmarks/Benchmark_dwf --threads 1 --mpi 2.1.1.1; fi
-
-
diff --git a/README.md b/README.md
index 1e0988f3..13dd6996 100644
--- a/README.md
+++ b/README.md
@@ -1,18 +1,4 @@
-# Grid
-
-
- Last stable release |
-
-
- |
-
-
- Development branch |
-
-
- |
-
-
+# Grid [![Teamcity status](http://ci.cliath.ph.ed.ac.uk/app/rest/builds/aggregated/strob:(buildType:(affectedProject(id:Grid)),branch:name:develop)/statusIcon.svg)](http://ci.cliath.ph.ed.ac.uk/project.html?projectId=Grid&tab=projectOverview) [![Travis status](https://travis-ci.org/paboyle/Grid.svg?branch=develop)](https://travis-ci.org/paboyle/Grid)
**Data parallel C++ mathematical object library.**
diff --git a/benchmarks/Benchmark_ITT.cc b/benchmarks/Benchmark_ITT.cc
index c0ce451f..58fdb84a 100644
--- a/benchmarks/Benchmark_ITT.cc
+++ b/benchmarks/Benchmark_ITT.cc
@@ -181,7 +181,6 @@ public:
#ifdef GRID_OMP
#pragma omp atomic
-#endif
ncomm++;
#ifdef GRID_OMP
diff --git a/lib/algorithms/iterative/BlockConjugateGradient.h b/lib/algorithms/iterative/BlockConjugateGradient.h
index 9418f63c..d7817c05 100644
--- a/lib/algorithms/iterative/BlockConjugateGradient.h
+++ b/lib/algorithms/iterative/BlockConjugateGradient.h
@@ -199,7 +199,12 @@ void BlockCGrQsolve(LinearOperatorBase &Linop, const Field &B, Field &X)
Linop.HermOp(X, AD);
tmp = B - AD;
+ //std::cout << GridLogMessage << " initial tmp " << norm2(tmp)<< std::endl;
ThinQRfact (m_rr, m_C, m_Cinv, Q, tmp);
+ //std::cout << GridLogMessage << " initial Q " << norm2(Q)<< std::endl;
+ //std::cout << GridLogMessage << " m_rr " << m_rr< &Linop, const Field &B, Field &X)
MatrixTimer.Start();
Linop.HermOp(D, Z);
MatrixTimer.Stop();
+ //std::cout << GridLogMessage << " norm2 Z " < &R,std::vector &a,const Lattice
}
};
+/*
inline GridBase *makeSubSliceGrid(const GridBase *BlockSolverGrid,int Orthog)
{
int NN = BlockSolverGrid->_ndimension;
@@ -387,6 +388,7 @@ inline GridBase *makeSubSliceGrid(const GridBase *BlockSolverGrid,int Or
}
return (GridBase *)new GridCartesian(latt_phys,simd_phys,mpi_phys);
}
+*/
template
static void sliceMaddMatrix (Lattice &R,Eigen::MatrixXcd &aa,const Lattice &X,const Lattice &Y,int Orthog,RealD scale=1.0)
@@ -398,14 +400,15 @@ static void sliceMaddMatrix (Lattice &R,Eigen::MatrixXcd &aa,const Lattice
int Nblock = X._grid->GlobalDimensions()[Orthog];
GridBase *FullGrid = X._grid;
- GridBase *SliceGrid = makeSubSliceGrid(FullGrid,Orthog);
+ // GridBase *SliceGrid = makeSubSliceGrid(FullGrid,Orthog);
- Lattice Xslice(SliceGrid);
- Lattice Rslice(SliceGrid);
+ // Lattice Xslice(SliceGrid);
+ // Lattice Rslice(SliceGrid);
assert( FullGrid->_simd_layout[Orthog]==1);
int nh = FullGrid->_ndimension;
- int nl = SliceGrid->_ndimension;
+ // int nl = SliceGrid->_ndimension;
+ int nl = nh-1;
//FIXME package in a convenient iterator
//Should loop over a plane orthogonal to direction "Orthog"
@@ -448,14 +451,14 @@ static void sliceMulMatrix (Lattice &R,Eigen::MatrixXcd &aa,const Lattice<
int Nblock = X._grid->GlobalDimensions()[Orthog];
GridBase *FullGrid = X._grid;
- GridBase *SliceGrid = makeSubSliceGrid(FullGrid,Orthog);
-
- Lattice Xslice(SliceGrid);
- Lattice Rslice(SliceGrid);
+ // GridBase *SliceGrid = makeSubSliceGrid(FullGrid,Orthog);
+ // Lattice Xslice(SliceGrid);
+ // Lattice Rslice(SliceGrid);
assert( FullGrid->_simd_layout[Orthog]==1);
int nh = FullGrid->_ndimension;
- int nl = SliceGrid->_ndimension;
+ // int nl = SliceGrid->_ndimension;
+ int nl=1;
//FIXME package in a convenient iterator
//Should loop over a plane orthogonal to direction "Orthog"
@@ -498,18 +501,19 @@ static void sliceInnerProductMatrix( Eigen::MatrixXcd &mat, const Lattice
typedef typename vobj::vector_type vector_type;
GridBase *FullGrid = lhs._grid;
- GridBase *SliceGrid = makeSubSliceGrid(FullGrid,Orthog);
+ // GridBase *SliceGrid = makeSubSliceGrid(FullGrid,Orthog);
int Nblock = FullGrid->GlobalDimensions()[Orthog];
- Lattice Lslice(SliceGrid);
- Lattice Rslice(SliceGrid);
+ // Lattice Lslice(SliceGrid);
+ // Lattice Rslice(SliceGrid);
mat = Eigen::MatrixXcd::Zero(Nblock,Nblock);
assert( FullGrid->_simd_layout[Orthog]==1);
int nh = FullGrid->_ndimension;
- int nl = SliceGrid->_ndimension;
+ // int nl = SliceGrid->_ndimension;
+ int nl = nh-1;
//FIXME package in a convenient iterator
//Should loop over a plane orthogonal to direction "Orthog"
@@ -550,6 +554,14 @@ static void sliceInnerProductMatrix( Eigen::MatrixXcd &mat, const Lattice
mat += mat_thread;
}
}
+
+ for(int i=0;iGlobalSum(sum);
+ mat(i,j)=sum;
+ }}
+
return;
}
diff --git a/lib/qcd/action/fermion/ImprovedStaggeredFermion5D.cc b/lib/qcd/action/fermion/ImprovedStaggeredFermion5D.cc
index 61a3c559..7d988d89 100644
--- a/lib/qcd/action/fermion/ImprovedStaggeredFermion5D.cc
+++ b/lib/qcd/action/fermion/ImprovedStaggeredFermion5D.cc
@@ -230,8 +230,15 @@ void ImprovedStaggeredFermion5D::DhopInternal(StencilImpl & st, LebesgueOr
{
Compressor compressor;
int LLs = in._grid->_rdimensions[0];
+
+
+
+ DhopTotalTime -= usecond();
+ DhopCommTime -= usecond();
st.HaloExchange(in,compressor);
+ DhopCommTime += usecond();
+ DhopComputeTime -= usecond();
// Dhop takes the 4d grid from U, and makes a 5d index for fermion
if (dag == DaggerYes) {
parallel_for (int ss = 0; ss < U._grid->oSites(); ss++) {
@@ -244,12 +251,15 @@ void ImprovedStaggeredFermion5D::DhopInternal(StencilImpl & st, LebesgueOr
Kernels::DhopSite(st,lo,U,UUU,st.CommBuf(),LLs,sU,in,out);
}
}
+ DhopComputeTime += usecond();
+ DhopTotalTime += usecond();
}
template
void ImprovedStaggeredFermion5D::DhopOE(const FermionField &in, FermionField &out,int dag)
{
+ DhopCalls+=1;
conformable(in._grid,FermionRedBlackGrid()); // verifies half grid
conformable(in._grid,out._grid); // drops the cb check
@@ -261,6 +271,7 @@ void ImprovedStaggeredFermion5D::DhopOE(const FermionField &in, FermionFie
template
void ImprovedStaggeredFermion5D::DhopEO(const FermionField &in, FermionField &out,int dag)
{
+ DhopCalls+=1;
conformable(in._grid,FermionRedBlackGrid()); // verifies half grid
conformable(in._grid,out._grid); // drops the cb check
@@ -272,6 +283,7 @@ void ImprovedStaggeredFermion5D::DhopEO(const FermionField &in, FermionFie
template
void ImprovedStaggeredFermion5D::Dhop(const FermionField &in, FermionField &out,int dag)
{
+ DhopCalls+=2;
conformable(in._grid,FermionGrid()); // verifies full grid
conformable(in._grid,out._grid);
@@ -280,6 +292,54 @@ void ImprovedStaggeredFermion5D::Dhop(const FermionField &in, FermionField
DhopInternal(Stencil,Lebesgue,Umu,UUUmu,in,out,dag);
}
+template
+void ImprovedStaggeredFermion5D::Report(void)
+{
+ std::vector latt = GridDefaultLatt();
+ RealD volume = Ls; for(int mu=0;mu_Nprocessors;
+ RealD NN = _FourDimGrid->NodeCount();
+
+ std::cout << GridLogMessage << "#### Dhop calls report " << std::endl;
+
+ std::cout << GridLogMessage << "ImprovedStaggeredFermion5D Number of DhopEO Calls : "
+ << DhopCalls << std::endl;
+ std::cout << GridLogMessage << "ImprovedStaggeredFermion5D TotalTime /Calls : "
+ << DhopTotalTime / DhopCalls << " us" << std::endl;
+ std::cout << GridLogMessage << "ImprovedStaggeredFermion5D CommTime /Calls : "
+ << DhopCommTime / DhopCalls << " us" << std::endl;
+ std::cout << GridLogMessage << "ImprovedStaggeredFermion5D ComputeTime/Calls : "
+ << DhopComputeTime / DhopCalls << " us" << std::endl;
+
+ // Average the compute time
+ _FourDimGrid->GlobalSum(DhopComputeTime);
+ DhopComputeTime/=NP;
+
+ RealD mflops = 1154*volume*DhopCalls/DhopComputeTime/2; // 2 for red black counting
+ std::cout << GridLogMessage << "Average mflops/s per call : " << mflops << std::endl;
+ std::cout << GridLogMessage << "Average mflops/s per call per rank : " << mflops/NP << std::endl;
+ std::cout << GridLogMessage << "Average mflops/s per call per node : " << mflops/NN << std::endl;
+
+ RealD Fullmflops = 1154*volume*DhopCalls/(DhopTotalTime)/2; // 2 for red black counting
+ std::cout << GridLogMessage << "Average mflops/s per call (full) : " << Fullmflops << std::endl;
+ std::cout << GridLogMessage << "Average mflops/s per call per rank (full): " << Fullmflops/NP << std::endl;
+ std::cout << GridLogMessage << "Average mflops/s per call per node (full): " << Fullmflops/NN << std::endl;
+
+ std::cout << GridLogMessage << "ImprovedStaggeredFermion5D Stencil" <
+void ImprovedStaggeredFermion5D::ZeroCounters(void)
+{
+ DhopCalls = 0;
+ DhopTotalTime = 0;
+ DhopCommTime = 0;
+ DhopComputeTime = 0;
+ Stencil.ZeroCounters();
+ StencilEven.ZeroCounters();
+ StencilOdd.ZeroCounters();
+}
/////////////////////////////////////////////////////////////////////////
// Implement the general interface. Here we use SAME mass on all slices
diff --git a/lib/qcd/action/fermion/ImprovedStaggeredFermion5D.h b/lib/qcd/action/fermion/ImprovedStaggeredFermion5D.h
index 4961da49..ca1a955a 100644
--- a/lib/qcd/action/fermion/ImprovedStaggeredFermion5D.h
+++ b/lib/qcd/action/fermion/ImprovedStaggeredFermion5D.h
@@ -55,6 +55,16 @@ namespace QCD {
FermionField _tmp;
FermionField &tmp(void) { return _tmp; }
+ ////////////////////////////////////////
+ // Performance monitoring
+ ////////////////////////////////////////
+ void Report(void);
+ void ZeroCounters(void);
+ double DhopTotalTime;
+ double DhopCalls;
+ double DhopCommTime;
+ double DhopComputeTime;
+
///////////////////////////////////////////////////////////////
// Implement the abstract base
///////////////////////////////////////////////////////////////
diff --git a/lib/qcd/utils/GaugeFix.h b/lib/qcd/utils/GaugeFix.h
index f2ea1aa2..c4ea31aa 100644
--- a/lib/qcd/utils/GaugeFix.h
+++ b/lib/qcd/utils/GaugeFix.h
@@ -26,6 +26,8 @@ Author: Peter Boyle
/* END LEGAL */
//#include
+#ifndef GRID_QCD_GAUGE_FIX_H
+#define GRID_QCD_GAUGE_FIX_H
namespace Grid {
namespace QCD {
@@ -188,3 +190,4 @@ class FourierAcceleratedGaugeFixer : public Gimpl {
}
}
+#endif
diff --git a/tests/core/Test_fft_gfix.cc b/tests/core/Test_fft_gfix.cc
index 9732eb85..916c4b0b 100644
--- a/tests/core/Test_fft_gfix.cc
+++ b/tests/core/Test_fft_gfix.cc
@@ -28,6 +28,9 @@ Author: Peter Boyle
/* END LEGAL */
#include
+using namespace Grid;
+using namespace Grid::QCD;
+
int main (int argc, char ** argv)
{
std::vector seeds({1,2,3,4});
@@ -82,6 +85,7 @@ int main (int argc, char ** argv)
Uorg = Uorg - Umu;
std::cout << " Norm Difference "<< norm2(Uorg) << std::endl;
+ std::cout << " Norm "<< norm2(Umu) << std::endl;
std::cout<< "*****************************************************************" < HermOp(Ds);
ConjugateGradient CG(1.0e-8,10000);
@@ -99,21 +99,27 @@ int main (int argc, char ** argv)
std::cout << GridLogMessage << " Calling 5d CG for "<