diff --git a/.gitignore b/.gitignore index da7de5e4..d743ee06 100644 --- a/.gitignore +++ b/.gitignore @@ -9,6 +9,7 @@ ################ *~ *# +*.sublime-* # Precompiled Headers # ####################### @@ -91,6 +92,7 @@ build*/* ##################### *.xcodeproj/* build.sh +.vscode # Eigen source # ################ @@ -103,4 +105,21 @@ lib/fftw/* # libtool macros # ################## m4/lt* -m4/libtool.m4 \ No newline at end of file +m4/libtool.m4 + +# github pages # +################ +gh-pages/ + +# Buck files # +############## +.buck* +buck-out +BUCK +make-bin-BUCK.sh + +# generated sources # +##################### +lib/qcd/spin/gamma-gen/*.h +lib/qcd/spin/gamma-gen/*.cc + diff --git a/.travis.yml b/.travis.yml index ae3efda8..7d8203ce 100644 --- a/.travis.yml +++ b/.travis.yml @@ -7,64 +7,8 @@ cache: matrix: include: - os: osx - osx_image: xcode7.2 + osx_image: xcode8.3 compiler: clang - - compiler: gcc - addons: - apt: - sources: - - ubuntu-toolchain-r-test - packages: - - g++-4.9 - - libmpfr-dev - - libgmp-dev - - libmpc-dev - - libopenmpi-dev - - openmpi-bin - - binutils-dev - env: VERSION=-4.9 - - compiler: gcc - addons: - apt: - sources: - - ubuntu-toolchain-r-test - packages: - - g++-5 - - libmpfr-dev - - libgmp-dev - - libmpc-dev - - libopenmpi-dev - - openmpi-bin - - binutils-dev - env: VERSION=-5 - - compiler: clang - addons: - apt: - sources: - - ubuntu-toolchain-r-test - packages: - - g++-4.8 - - libmpfr-dev - - libgmp-dev - - libmpc-dev - - libopenmpi-dev - - openmpi-bin - - binutils-dev - env: CLANG_LINK=http://llvm.org/releases/3.8.0/clang+llvm-3.8.0-x86_64-linux-gnu-ubuntu-14.04.tar.xz - - compiler: clang - addons: - apt: - sources: - - ubuntu-toolchain-r-test - packages: - - g++-4.8 - - libmpfr-dev - - libgmp-dev - - libmpc-dev - - libopenmpi-dev - - openmpi-bin - - binutils-dev - env: CLANG_LINK=http://llvm.org/releases/3.7.0/clang+llvm-3.7.0-x86_64-linux-gnu-ubuntu-14.04.tar.xz before_install: - export GRIDDIR=`pwd` @@ -73,13 +17,15 @@ before_install: - if [[ "$TRAVIS_OS_NAME" == "linux" ]] && [[ "$CC" == "clang" ]]; then export LD_LIBRARY_PATH="${GRIDDIR}/clang/lib:${LD_LIBRARY_PATH}"; fi - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew update; fi - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install libmpc; fi - - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install openmpi; fi - - if [[ "$TRAVIS_OS_NAME" == "osx" ]] && [[ "$CC" == "gcc" ]]; then brew install gcc5; fi install: - export CC=$CC$VERSION - export CXX=$CXX$VERSION - echo $PATH + - which autoconf + - autoconf --version + - which automake + - automake --version - which $CC - $CC --version - which $CXX @@ -92,15 +38,9 @@ script: - cd build - ../configure --enable-precision=single --enable-simd=SSE4 --enable-comms=none - make -j4 - - ./benchmarks/Benchmark_dwf --threads 1 + - ./benchmarks/Benchmark_dwf --threads 1 --debug-signals - echo make clean - ../configure --enable-precision=double --enable-simd=SSE4 --enable-comms=none - make -j4 - - ./benchmarks/Benchmark_dwf --threads 1 - - echo make clean - - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then export CXXFLAGS='-DMPI_UINT32_T=MPI_UNSIGNED -DMPI_UINT64_T=MPI_UNSIGNED_LONG'; fi - - ../configure --enable-precision=single --enable-simd=SSE4 --enable-comms=mpi-auto - - make -j4 - - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then mpirun.openmpi -n 2 ./benchmarks/Benchmark_dwf --threads 1 --mpi 2.1.1.1; fi - - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then mpirun -n 2 ./benchmarks/Benchmark_dwf --threads 1 --mpi 2.1.1.1; fi - + - ./benchmarks/Benchmark_dwf --threads 1 --debug-signals + - make check diff --git a/Makefile.am b/Makefile.am index 818f0983..3a65cf1b 100644 --- a/Makefile.am +++ b/Makefile.am @@ -1,12 +1,17 @@ # additional include paths necessary to compile the C++ library -SUBDIRS = lib benchmarks tests +SUBDIRS = lib benchmarks tests extras include $(top_srcdir)/doxygen.inc -tests: all - $(MAKE) -C tests tests +bin_SCRIPTS=grid-config -.PHONY: tests doxygen-run doxygen-doc $(DX_PS_GOAL) $(DX_PDF_GOAL) + +.PHONY: bench check tests doxygen-run doxygen-doc $(DX_PS_GOAL) $(DX_PDF_GOAL) + +tests-local: all +bench-local: all +check-local: all AM_CXXFLAGS += -I$(top_builddir)/include + ACLOCAL_AMFLAGS = -I m4 diff --git a/README.md b/README.md index c47a257c..13dd6996 100644 --- a/README.md +++ b/README.md @@ -1,41 +1,13 @@ -# Grid -
Last stable release | -
- |
-
Development branch | -
- |
-
` | Description |
| ----------- | -------------------------------------- |
-| `KNC` | [Intel Xeon Phi codename Knights Corner](http://ark.intel.com/products/codename/57721/Knights-Corner) |
| `KNL` | [Intel Xeon Phi codename Knights Landing](http://ark.intel.com/products/codename/48999/Knights-Landing) |
| `BGQ` | Blue Gene/Q |
@@ -176,21 +202,205 @@ The following configuration is recommended for the Intel Knights Landing platfor
``` bash
../configure --enable-precision=double\
--enable-simd=KNL \
- --enable-comms=mpi-auto \
- --with-gmp= \
- --with-mpfr= \
+ --enable-comms=mpi-auto \
--enable-mkl \
CXX=icpc MPICXX=mpiicpc
```
+The MKL flag enables use of BLAS and FFTW from the Intel Math Kernels Library.
-where `` is the UNIX prefix where GMP and MPFR are installed. If you are working on a Cray machine that does not use the `mpiicpc` wrapper, please use:
+If you are working on a Cray machine that does not use the `mpiicpc` wrapper, please use:
``` bash
../configure --enable-precision=double\
--enable-simd=KNL \
--enable-comms=mpi \
- --with-gmp= \
- --with-mpfr= \
--enable-mkl \
CXX=CC CC=cc
-```
\ No newline at end of file
+```
+
+If gmp and mpfr are NOT in standard places (/usr/) these flags may be needed:
+``` bash
+ --with-gmp= \
+ --with-mpfr= \
+```
+where `` is the UNIX prefix where GMP and MPFR are installed.
+
+Knight's Landing with Intel Omnipath adapters with two adapters per node
+presently performs better with use of more than one rank per node, using shared memory
+for interior communication. This is the mpi3 communications implementation.
+We recommend four ranks per node for best performance, but optimum is local volume dependent.
+
+``` bash
+../configure --enable-precision=double\
+ --enable-simd=KNL \
+ --enable-comms=mpi3-auto \
+ --enable-mkl \
+ CC=icpc MPICXX=mpiicpc
+```
+
+### Build setup for Intel Haswell Xeon platform
+
+The following configuration is recommended for the Intel Haswell platform:
+
+``` bash
+../configure --enable-precision=double\
+ --enable-simd=AVX2 \
+ --enable-comms=mpi3-auto \
+ --enable-mkl \
+ CXX=icpc MPICXX=mpiicpc
+```
+The MKL flag enables use of BLAS and FFTW from the Intel Math Kernels Library.
+
+If gmp and mpfr are NOT in standard places (/usr/) these flags may be needed:
+``` bash
+ --with-gmp= \
+ --with-mpfr= \
+```
+where `` is the UNIX prefix where GMP and MPFR are installed.
+
+If you are working on a Cray machine that does not use the `mpiicpc` wrapper, please use:
+
+``` bash
+../configure --enable-precision=double\
+ --enable-simd=AVX2 \
+ --enable-comms=mpi3 \
+ --enable-mkl \
+ CXX=CC CC=cc
+```
+Since Dual socket nodes are commonplace, we recommend MPI-3 as the default with the use of
+one rank per socket. If using the Intel MPI library, threads should be pinned to NUMA domains using
+```
+ export I_MPI_PIN=1
+```
+This is the default.
+
+### Build setup for Intel Skylake Xeon platform
+
+The following configuration is recommended for the Intel Skylake platform:
+
+``` bash
+../configure --enable-precision=double\
+ --enable-simd=AVX512 \
+ --enable-comms=mpi3 \
+ --enable-mkl \
+ CXX=mpiicpc
+```
+The MKL flag enables use of BLAS and FFTW from the Intel Math Kernels Library.
+
+If gmp and mpfr are NOT in standard places (/usr/) these flags may be needed:
+``` bash
+ --with-gmp= \
+ --with-mpfr= \
+```
+where `` is the UNIX prefix where GMP and MPFR are installed.
+
+If you are working on a Cray machine that does not use the `mpiicpc` wrapper, please use:
+
+``` bash
+../configure --enable-precision=double\
+ --enable-simd=AVX512 \
+ --enable-comms=mpi3 \
+ --enable-mkl \
+ CXX=CC CC=cc
+```
+Since Dual socket nodes are commonplace, we recommend MPI-3 as the default with the use of
+one rank per socket. If using the Intel MPI library, threads should be pinned to NUMA domains using
+```
+ export I_MPI_PIN=1
+```
+This is the default.
+
+#### Expected Skylake Gold 6148 dual socket (single prec, single node 20+20 cores) performance using NUMA MPI mapping):
+
+mpirun -n 2 benchmarks/Benchmark_dwf --grid 16.16.16.16 --mpi 2.1.1.1 --cacheblocking 2.2.2.2 --dslash-asm --shm 1024 --threads 18
+
+TBA
+
+
+### Build setup for AMD EPYC / RYZEN
+
+The AMD EPYC is a multichip module comprising 32 cores spread over four distinct chips each with 8 cores.
+So, even with a single socket node there is a quad-chip module. Dual socket nodes with 64 cores total
+are common. Each chip within the module exposes a separate NUMA domain.
+There are four NUMA domains per socket and we recommend one MPI rank per NUMA domain.
+MPI-3 is recommended with the use of four ranks per socket,
+and 8 threads per rank.
+
+The following configuration is recommended for the AMD EPYC platform.
+
+``` bash
+../configure --enable-precision=double\
+ --enable-simd=AVX2 \
+ --enable-comms=mpi3 \
+ CXX=mpicxx
+```
+
+If gmp and mpfr are NOT in standard places (/usr/) these flags may be needed:
+``` bash
+ --with-gmp= \
+ --with-mpfr= \
+```
+where `` is the UNIX prefix where GMP and MPFR are installed.
+
+Using MPICH and g++ v4.9.2, best performance can be obtained using explicit GOMP_CPU_AFFINITY flags for each MPI rank.
+This can be done by invoking MPI on a wrapper script omp_bind.sh to handle this.
+
+It is recommended to run 8 MPI ranks on a single dual socket AMD EPYC, with 8 threads per rank using MPI3 and
+shared memory to communicate within this node:
+
+mpirun -np 8 ./omp_bind.sh ./Benchmark_dwf --mpi 2.2.2.1 --dslash-unroll --threads 8 --grid 16.16.16.16 --cacheblocking 4.4.4.4
+
+Where omp_bind.sh does the following:
+```
+#!/bin/bash
+
+numanode=` expr $PMI_RANK % 8 `
+basecore=`expr $numanode \* 16`
+core0=`expr $basecore + 0 `
+core1=`expr $basecore + 2 `
+core2=`expr $basecore + 4 `
+core3=`expr $basecore + 6 `
+core4=`expr $basecore + 8 `
+core5=`expr $basecore + 10 `
+core6=`expr $basecore + 12 `
+core7=`expr $basecore + 14 `
+
+export GOMP_CPU_AFFINITY="$core0 $core1 $core2 $core3 $core4 $core5 $core6 $core7"
+echo GOMP_CUP_AFFINITY $GOMP_CPU_AFFINITY
+
+$@
+```
+
+Performance:
+
+#### Expected AMD EPYC 7601 dual socket (single prec, single node 32+32 cores) performance using NUMA MPI mapping):
+
+mpirun -np 8 ./omp_bind.sh ./Benchmark_dwf --threads 8 --mpi 2.2.2.1 --dslash-unroll --grid 16.16.16.16 --cacheblocking 4.4.4.4
+
+TBA
+
+### Build setup for BlueGene/Q
+
+To be written...
+
+### Build setup for ARM Neon
+
+To be written...
+
+### Build setup for laptops, other compilers, non-cluster builds
+
+Many versions of g++ and clang++ work with Grid, and involve merely replacing CXX (and MPICXX),
+and omit the enable-mkl flag.
+
+Single node builds are enabled with
+```
+ --enable-comms=none
+```
+
+FFTW support that is not in the default search path may then enabled with
+```
+ --with-fftw=
+```
+
+BLAS will not be compiled in by default, and Lanczos will default to Eigen diagonalisation.
+
diff --git a/TODO b/TODO
index df8554cc..c37cbf8b 100644
--- a/TODO
+++ b/TODO
@@ -1,6 +1,35 @@
TODO:
---------------
+Large item work list:
+
+1)- BG/Q port and check
+2)- Christoph's local basis expansion Lanczos
+3)- Precision conversion and sort out localConvert <-- partial
+
+ - Consistent linear solver flop count/rate -- PARTIAL, time but no flop/s yet
+4)- Physical propagator interface
+5)- Conserved currents
+6)- Multigrid Wilson and DWF, compare to other Multigrid implementations
+7)- HDCR resume
+
+Recent DONE
+
+-- MultiRHS with spread out extra dim -- Go through filesystem with SciDAC I/O. <--- DONE
+-- Lanczos Remove DenseVector, DenseMatrix; Use Eigen instead. <-- DONE
+-- GaugeFix into central location <-- DONE
+-- Scidac and Ildg metadata handling <-- DONE
+-- Binary I/O MPI2 IO <-- DONE
+-- Binary I/O speed up & x-strips <-- DONE
+-- Cut down the exterior overhead <-- DONE
+-- Interior legs from SHM comms <-- DONE
+-- Half-precision comms <-- DONE
+-- Merge high precision reduction into develop <-- DONE
+-- BlockCG, BCGrQ <-- DONE
+-- multiRHS DWF; benchmark on Cori/BNL for comms elimination <-- DONE
+ -- slice* linalg routines for multiRHS, BlockCG
+
+-----
* Forces; the UdSdU term in gauge force term is half of what I think it should
be. This is a consequence of taking ONLY the first term in:
@@ -21,16 +50,8 @@ TODO:
This means we must double the force in the Test_xxx_force routines, and is the origin of the factor of two.
This 2x is applied by hand in the fermion routines and in the Test_rect_force routine.
-
-Policies:
-
-* Link smearing/boundary conds; Policy class based implementation ; framework more in place
-
* Support different boundary conditions (finite temp, chem. potential ... )
-* Support different fermion representations?
- - contained entirely within the integrator presently
-
- Sign of force term.
- Reversibility test.
@@ -41,11 +62,6 @@ Policies:
- Audit oIndex usage for cb behaviour
-- Rectangle gauge actions.
- Iwasaki,
- Symanzik,
- ... etc...
-
- Prepare multigrid for HMC. - Alternate setup schemes.
- Support for ILDG --- ugly, not done
@@ -55,9 +71,11 @@ Policies:
- FFTnD ?
- Gparity; hand opt use template specialisation elegance to enable the optimised paths ?
+
- Gparity force term; Gparity (R)HMC.
-- Random number state save restore
+
- Mobius implementation clean up to rmove #if 0 stale code sequences
+
- CG -- profile carefully, kernel fusion, whole CG performance measurements.
================================================================
@@ -90,6 +108,7 @@ Insert/Extract
Not sure of status of this -- reverify. Things are working nicely now though.
* Make the Tensor types and Complex etc... play more nicely.
+
- TensorRemove is a hack, come up with a long term rationalised approach to Complex vs. Scalar > >
QDP forces use of "toDouble" to get back to non tensor scalar. This role is presently taken TensorRemove, but I
want to introduce a syntax that does not require this.
@@ -112,6 +131,8 @@ Not sure of status of this -- reverify. Things are working nicely now though.
RECENT
---------------
+ - Support different fermion representations? -- DONE
+ - contained entirely within the integrator presently
- Clean up HMC -- DONE
- LorentzScalar gets Gauge link type (cleaner). -- DONE
- Simplified the integrators a bit. -- DONE
@@ -123,6 +144,26 @@ RECENT
- Parallel io improvements -- DONE
- Plaquette and link trace checks into nersc reader from the Grid_nersc_io.cc test. -- DONE
+
+DONE:
+- MultiArray -- MultiRHS done
+- ConjugateGradientMultiShift -- DONE
+- MCR -- DONE
+- Remez -- Mike or Boost? -- DONE
+- Proto (ET) -- DONE
+- uBlas -- DONE ; Eigen
+- Potentially Useful Boost libraries -- DONE ; Eigen
+- Aligned allocator; memory pool -- DONE
+- Multiprecision -- DONE
+- Serialization -- DONE
+- Regex -- Not needed
+- Tokenize -- Why?
+
+- Random number state save restore -- DONE
+- Rectangle gauge actions. -- DONE
+ Iwasaki,
+ Symanzik,
+ ... etc...
Done: Cayley, Partial , ContFrac force terms.
DONE
@@ -207,6 +248,7 @@ Done
FUNCTIONALITY: it pleases me to keep track of things I have done (keeps me arguably sane)
======================================================================================================
+* Link smearing/boundary conds; Policy class based implementation ; framework more in place -- DONE
* Command line args for geometry, simd, etc. layout. Is it necessary to have -- DONE
user pass these? Is this a QCD specific?
diff --git a/VERSION b/VERSION
index e7abbba7..bfad377d 100644
--- a/VERSION
+++ b/VERSION
@@ -1,6 +1,5 @@
-Version : 0.6.0
+Version : 0.7.0
-- AVX512, AVX2, AVX, SSE good
-- Clang 3.5 and above, ICPC v16 and above, GCC 4.9 and above
-- MPI and MPI3
-- HiRep, Smearing, Generic gauge group
+- Clang 3.5 and above, ICPC v16 and above, GCC 6.3 and above recommended
+- MPI and MPI3 comms optimisations for KNL and OPA finished
+- Half precision comms
diff --git a/benchmarks/Benchmark_ITT.cc b/benchmarks/Benchmark_ITT.cc
new file mode 100644
index 00000000..666e4830
--- /dev/null
+++ b/benchmarks/Benchmark_ITT.cc
@@ -0,0 +1,800 @@
+ /*************************************************************************************
+
+ Grid physics library, www.github.com/paboyle/Grid
+
+ Source file: ./benchmarks/Benchmark_memory_bandwidth.cc
+
+ Copyright (C) 2015
+
+Author: Peter Boyle
+Author: paboyle
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License along
+ with this program; if not, write to the Free Software Foundation, Inc.,
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+ See the full license in the file "LICENSE" in the top level distribution directory
+ *************************************************************************************/
+ /* END LEGAL */
+#include
+
+using namespace std;
+using namespace Grid;
+using namespace Grid::QCD;
+
+typedef WilsonFermion5D WilsonFermion5DR;
+typedef WilsonFermion5D WilsonFermion5DF;
+typedef WilsonFermion5D WilsonFermion5DD;
+
+
+std::vector L_list;
+std::vector Ls_list;
+std::vector mflop_list;
+
+double mflop_ref;
+double mflop_ref_err;
+
+int NN_global;
+
+struct time_statistics{
+ double mean;
+ double err;
+ double min;
+ double max;
+
+ void statistics(std::vector v){
+ double sum = std::accumulate(v.begin(), v.end(), 0.0);
+ mean = sum / v.size();
+
+ std::vector diff(v.size());
+ std::transform(v.begin(), v.end(), diff.begin(), [=](double x) { return x - mean; });
+ double sq_sum = std::inner_product(diff.begin(), diff.end(), diff.begin(), 0.0);
+ err = std::sqrt(sq_sum / (v.size()*(v.size() - 1)));
+
+ auto result = std::minmax_element(v.begin(), v.end());
+ min = *result.first;
+ max = *result.second;
+}
+};
+
+void comms_header(){
+ std::cout < simd_layout = GridDefaultSimd(Nd,vComplexD::Nsimd());
+ std::vector mpi_layout = GridDefaultMpi();
+
+ for(int mu=0;mu1) nmu++;
+
+ std::vector t_time(Nloop);
+ time_statistics timestat;
+
+ std::cout< latt_size ({lat*mpi_layout[0],
+ lat*mpi_layout[1],
+ lat*mpi_layout[2],
+ lat*mpi_layout[3]});
+
+ GridCartesian Grid(latt_size,simd_layout,mpi_layout);
+ RealD Nrank = Grid._Nprocessors;
+ RealD Nnode = Grid.NodeCount();
+ RealD ppn = Nrank/Nnode;
+
+ std::vector xbuf(8);
+ std::vector rbuf(8);
+ Grid.ShmBufferFreeAll();
+ for(int d=0;d<8;d++){
+ xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
+ rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
+ bzero((void *)xbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
+ bzero((void *)rbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
+ }
+
+ int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD);
+ int ncomm;
+ double dbytes;
+ std::vector times(Nloop);
+ for(int i=0;i1 ) {
+
+ int xmit_to_rank;
+ int recv_from_rank;
+ if ( dir == mu ) {
+ int comm_proc=1;
+ Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
+ } else {
+ int comm_proc = mpi_layout[mu]-1;
+ Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
+ }
+ tbytes= Grid.StencilSendToRecvFrom((void *)&xbuf[dir][0], xmit_to_rank,
+ (void *)&rbuf[dir][0], recv_from_rank,
+ bytes,dir);
+
+#ifdef GRID_OMP
+#pragma omp atomic
+#endif
+ ncomm++;
+
+#ifdef GRID_OMP
+#pragma omp atomic
+#endif
+ dbytes+=tbytes;
+ }
+ }
+ Grid.Barrier();
+ double stop=usecond();
+ t_time[i] = stop-start; // microseconds
+ }
+
+ timestat.statistics(t_time);
+ // for(int i=0;i > LatticeVec;
+ typedef iVector Vec;
+
+ std::vector simd_layout = GridDefaultSimd(Nd,vReal::Nsimd());
+ std::vector mpi_layout = GridDefaultMpi();
+
+ std::cout<({45,12,81,9}));
+ for(int lat=8;lat<=lmax;lat+=4){
+
+ std::vector latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
+ int64_t vol= latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
+ GridCartesian Grid(latt_size,simd_layout,mpi_layout);
+
+ NP= Grid.RankCount();
+ NN =Grid.NodeCount();
+
+ Vec rn ; random(sRNG,rn);
+
+ LatticeVec z(&Grid); z=rn;
+ LatticeVec x(&Grid); x=rn;
+ LatticeVec y(&Grid); y=rn;
+ double a=2.0;
+
+ uint64_t Nloop=NLOOP;
+
+ double start=usecond();
+ for(int i=0;i mflops_all;
+
+ ///////////////////////////////////////////////////////
+ // Set/Get the layout & grid size
+ ///////////////////////////////////////////////////////
+ int threads = GridThread::GetThreads();
+ std::vector mpi = GridDefaultMpi(); assert(mpi.size()==4);
+ std::vector local({L,L,L,L});
+
+ GridCartesian * TmpGrid = SpaceTimeGrid::makeFourDimGrid(std::vector({64,64,64,64}),
+ GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
+ uint64_t NP = TmpGrid->RankCount();
+ uint64_t NN = TmpGrid->NodeCount();
+ NN_global=NN;
+ uint64_t SHM=NP/NN;
+
+ std::vector internal;
+ if ( SHM == 1 ) internal = std::vector({1,1,1,1});
+ else if ( SHM == 2 ) internal = std::vector({2,1,1,1});
+ else if ( SHM == 4 ) internal = std::vector({2,2,1,1});
+ else if ( SHM == 8 ) internal = std::vector({2,2,2,1});
+ else assert(0);
+
+ std::vector nodes({mpi[0]/internal[0],mpi[1]/internal[1],mpi[2]/internal[2],mpi[3]/internal[3]});
+ std::vector latt4({local[0]*nodes[0],local[1]*nodes[1],local[2]*nodes[2],local[3]*nodes[3]});
+
+ ///////// Welcome message ////////////
+ std::cout< seeds4({1,2,3,4});
+ std::vector seeds5({5,6,7,8});
+ GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
+ GridParallelRNG RNG5(sFGrid); RNG5.SeedFixedIntegers(seeds5);
+ std::cout << GridLogMessage << "Initialised RNGs" << std::endl;
+
+ ///////// Source preparation ////////////
+ LatticeFermion src (sFGrid); random(RNG5,src);
+ LatticeFermion tmp (sFGrid);
+
+ RealD N2 = 1.0/::sqrt(norm2(src));
+ src = src*N2;
+
+ LatticeGaugeField Umu(UGrid); SU3::HotConfiguration(RNG4,Umu);
+
+ WilsonFermion5DR sDw(Umu,*sFGrid,*sFrbGrid,*sUGrid,*sUrbGrid,M5);
+ LatticeFermion src_e (sFrbGrid);
+ LatticeFermion src_o (sFrbGrid);
+ LatticeFermion r_e (sFrbGrid);
+ LatticeFermion r_o (sFrbGrid);
+ LatticeFermion r_eo (sFGrid);
+ LatticeFermion err (sFGrid);
+ {
+
+ pickCheckerboard(Even,src_e,src);
+ pickCheckerboard(Odd,src_o,src);
+
+#if defined(AVX512)
+ const int num_cases = 6;
+ std::string fmt("A/S ; A/O ; U/S ; U/O ; G/S ; G/O ");
+#else
+ const int num_cases = 4;
+ std::string fmt("U/S ; U/O ; G/S ; G/O ");
+#endif
+ controls Cases [] = {
+#ifdef AVX512
+ { QCD::WilsonKernelsStatic::OptInlineAsm , QCD::WilsonKernelsStatic::CommsThenCompute ,CartesianCommunicator::CommunicatorPolicySequential },
+ { QCD::WilsonKernelsStatic::OptInlineAsm , QCD::WilsonKernelsStatic::CommsAndCompute ,CartesianCommunicator::CommunicatorPolicySequential },
+#endif
+ { QCD::WilsonKernelsStatic::OptHandUnroll, QCD::WilsonKernelsStatic::CommsThenCompute ,CartesianCommunicator::CommunicatorPolicySequential },
+ { QCD::WilsonKernelsStatic::OptHandUnroll, QCD::WilsonKernelsStatic::CommsAndCompute ,CartesianCommunicator::CommunicatorPolicySequential },
+ { QCD::WilsonKernelsStatic::OptGeneric , QCD::WilsonKernelsStatic::CommsThenCompute ,CartesianCommunicator::CommunicatorPolicySequential },
+ { QCD::WilsonKernelsStatic::OptGeneric , QCD::WilsonKernelsStatic::CommsAndCompute ,CartesianCommunicator::CommunicatorPolicySequential }
+ };
+
+ for(int c=0;cBarrier();
+ for(int i=0;iBarrier();
+ double t1=usecond();
+
+ sDw.ZeroCounters();
+ time_statistics timestat;
+ std::vector t_time(ncall);
+ for(uint64_t i=0;iBarrier();
+
+ double volume=Ls; for(int mu=0;mumflops_best ) mflops_best = mflops;
+ if ( mflops mflops_all;
+
+ ///////////////////////////////////////////////////////
+ // Set/Get the layout & grid size
+ ///////////////////////////////////////////////////////
+ int threads = GridThread::GetThreads();
+ std::vector mpi = GridDefaultMpi(); assert(mpi.size()==4);
+ std::vector local({L,L,L,L});
+
+ GridCartesian * TmpGrid = SpaceTimeGrid::makeFourDimGrid(std::vector({64,64,64,64}),
+ GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
+ uint64_t NP = TmpGrid->RankCount();
+ uint64_t NN = TmpGrid->NodeCount();
+ NN_global=NN;
+ uint64_t SHM=NP/NN;
+
+ std::vector internal;
+ if ( SHM == 1 ) internal = std::vector({1,1,1,1});
+ else if ( SHM == 2 ) internal = std::vector({2,1,1,1});
+ else if ( SHM == 4 ) internal = std::vector({2,2,1,1});
+ else if ( SHM == 8 ) internal = std::vector({2,2,2,1});
+ else assert(0);
+
+ std::vector nodes({mpi[0]/internal[0],mpi[1]/internal[1],mpi[2]/internal[2],mpi[3]/internal[3]});
+ std::vector latt4({local[0]*nodes[0],local[1]*nodes[1],local[2]*nodes[2],local[3]*nodes[3]});
+
+ ///////// Welcome message ////////////
+ std::cout< seeds4({1,2,3,4});
+ std::vector seeds5({5,6,7,8});
+ GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
+ GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5);
+ std::cout << GridLogMessage << "Initialised RNGs" << std::endl;
+
+ ///////// Source preparation ////////////
+ LatticeFermion src (FGrid); random(RNG5,src);
+ LatticeFermion ref (FGrid);
+ LatticeFermion tmp (FGrid);
+
+ RealD N2 = 1.0/::sqrt(norm2(src));
+ src = src*N2;
+
+ LatticeGaugeField Umu(UGrid); SU3::HotConfiguration(RNG4,Umu);
+
+ DomainWallFermionR Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
+
+ ////////////////////////////////////
+ // Naive wilson implementation
+ ////////////////////////////////////
+ {
+ LatticeGaugeField Umu5d(FGrid);
+ std::vector U(4,FGrid);
+ for(int ss=0;ssoSites();ss++){
+ for(int s=0;s(Umu5d,mu);
+ }
+ for(int mu=0;muBarrier();
+ for(int i=0;iBarrier();
+ double t1=usecond();
+ // uint64_t ncall = (uint64_t) 2.5*1000.0*1000.0*nwarm/(t1-t0);
+ // if (ncall < 500) ncall = 500;
+ uint64_t ncall = 1000;
+
+ FGrid->Broadcast(0,&ncall,sizeof(ncall));
+
+ // std::cout << GridLogMessage << " Estimate " << ncall << " calls per second"< t_time(ncall);
+ for(uint64_t i=0;iBarrier();
+
+ double volume=Ls; for(int mu=0;mumflops_best ) mflops_best = mflops;
+ if ( mflops({8,2,2,2});
+#else
+ LebesgueOrder::Block = std::vector({2,2,2,2});
+#endif
+ Benchmark::Decomposition();
+
+ int do_memory=1;
+ int do_comms =1;
+ int do_su3 =0;
+ int do_wilson=1;
+ int do_dwf =1;
+
+ if ( do_su3 ) {
+ // empty for now
+ }
+#if 1
+ int sel=2;
+ std::vector L_list({8,12,16,24});
+#else
+ int sel=1;
+ std::vector L_list({8,12});
+#endif
+ int selm1=sel-1;
+ std::vector robust_list;
+
+ std::vector wilson;
+ std::vector dwf4;
+ std::vector dwf5;
+
+ if ( do_wilson ) {
+ int Ls=1;
+ std::cout<1) ) {
+ std::cout< v){
+ double sum = std::accumulate(v.begin(), v.end(), 0.0);
+ mean = sum / v.size();
+
+ std::vector diff(v.size());
+ std::transform(v.begin(), v.end(), diff.begin(), [=](double x) { return x - mean; });
+ double sq_sum = std::inner_product(diff.begin(), diff.end(), diff.begin(), 0.0);
+ err = std::sqrt(sq_sum / (v.size()*(v.size() - 1)));
+
+ auto result = std::minmax_element(v.begin(), v.end());
+ min = *result.first;
+ max = *result.second;
+}
+};
+
+void header(){
+ std::cout <1) nmu++;
+ std::cout << GridLogMessage << "Number of iterations to average: "<< Nloop << std::endl;
+ std::vector t_time(Nloop);
+ time_statistics timestat;
+
std::cout< latt_size ({lat*mpi_layout[0],
lat*mpi_layout[1],
@@ -58,15 +88,23 @@ int main (int argc, char ** argv)
lat*mpi_layout[3]});
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
+ RealD Nrank = Grid._Nprocessors;
+ RealD Nnode = Grid.NodeCount();
+ RealD ppn = Nrank/Nnode;
- std::vector > xbuf(8,std::vector(lat*lat*lat*Ls));
- std::vector > rbuf(8,std::vector(lat*lat*lat*Ls));
+ std::vector > xbuf(8);
+ std::vector > rbuf(8);
int ncomm;
int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD);
+ for(int mu=0;mu<8;mu++){
+ xbuf[mu].resize(lat*lat*lat*Ls);
+ rbuf[mu].resize(lat*lat*lat*Ls);
+ // std::cout << " buffers " << std::hex << (uint64_t)&xbuf[mu][0] <<" " << (uint64_t)&rbuf[mu][0] < requests;
@@ -79,7 +117,6 @@ int main (int argc, char ** argv)
int comm_proc=1;
int xmit_to_rank;
int recv_from_rank;
-
Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
Grid.SendToRecvFromBegin(requests,
(void *)&xbuf[mu][0],
@@ -102,18 +139,24 @@ int main (int argc, char ** argv)
}
Grid.SendToRecvFromComplete(requests);
Grid.Barrier();
-
+ double stop=usecond();
+ t_time[i] = stop-start; // microseconds
}
- double stop=usecond();
- double dbytes = bytes;
- double xbytes = Nloop*dbytes*2.0*ncomm;
+ timestat.statistics(t_time);
+
+ double dbytes = bytes*ppn;
+ double xbytes = dbytes*2.0*ncomm;
double rbytes = xbytes;
double bidibytes = xbytes+rbytes;
- double time = stop-start; // microseconds
+ std::cout< latt_size ({lat,lat,lat,lat});
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
+ RealD Nrank = Grid._Nprocessors;
+ RealD Nnode = Grid.NodeCount();
+ RealD ppn = Nrank/Nnode;
- std::vector > xbuf(8,std::vector(lat*lat*lat*Ls));
- std::vector > rbuf(8,std::vector(lat*lat*lat*Ls));
+ std::vector > xbuf(8);
+ std::vector > rbuf(8);
+ for(int mu=0;mu<8;mu++){
+ xbuf[mu].resize(lat*lat*lat*Ls);
+ rbuf[mu].resize(lat*lat*lat*Ls);
+ // std::cout << " buffers " << std::hex << (uint64_t)&xbuf[mu][0] <<" " << (uint64_t)&rbuf[mu][0] < latt_size ({lat*mpi_layout[0],
lat*mpi_layout[1],
@@ -209,6 +266,9 @@ int main (int argc, char ** argv)
lat*mpi_layout[3]});
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
+ RealD Nrank = Grid._Nprocessors;
+ RealD Nnode = Grid.NodeCount();
+ RealD ppn = Nrank/Nnode;
std::vector xbuf(8);
std::vector rbuf(8);
@@ -216,73 +276,86 @@ int main (int argc, char ** argv)
for(int d=0;d<8;d++){
xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
+ bzero((void *)xbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
+ bzero((void *)rbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
}
int ncomm;
int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD);
- double start=usecond();
+ double dbytes;
for(int i=0;i requests;
- ncomm=0;
for(int mu=0;mu<4;mu++){
+
if (mpi_layout[mu]>1 ) {
ncomm++;
int comm_proc=1;
int xmit_to_rank;
int recv_from_rank;
-
Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
- Grid.StencilSendToRecvFromBegin(requests,
- (void *)&xbuf[mu][0],
- xmit_to_rank,
- (void *)&rbuf[mu][0],
- recv_from_rank,
- bytes);
+ dbytes+=
+ Grid.StencilSendToRecvFromBegin(requests,
+ (void *)&xbuf[mu][0],
+ xmit_to_rank,
+ (void *)&rbuf[mu][0],
+ recv_from_rank,
+ bytes,mu);
comm_proc = mpi_layout[mu]-1;
Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
- Grid.StencilSendToRecvFromBegin(requests,
- (void *)&xbuf[mu+4][0],
- xmit_to_rank,
- (void *)&rbuf[mu+4][0],
- recv_from_rank,
- bytes);
+ dbytes+=
+ Grid.StencilSendToRecvFromBegin(requests,
+ (void *)&xbuf[mu+4][0],
+ xmit_to_rank,
+ (void *)&rbuf[mu+4][0],
+ recv_from_rank,
+ bytes,mu+4);
}
}
- Grid.StencilSendToRecvFromComplete(requests);
+ Grid.StencilSendToRecvFromComplete(requests,0);
Grid.Barrier();
-
+ double stop=usecond();
+ t_time[i] = stop-start; // microseconds
+
}
- double stop=usecond();
- double dbytes = bytes;
- double xbytes = Nloop*dbytes*2.0*ncomm;
- double rbytes = xbytes;
- double bidibytes = xbytes+rbytes;
+ timestat.statistics(t_time);
+
+ dbytes=dbytes*ppn;
+ double xbytes = dbytes*0.5;
+ double rbytes = dbytes*0.5;
+ double bidibytes = dbytes;
+
+ std::cout< latt_size ({lat*mpi_layout[0],
lat*mpi_layout[1],
@@ -290,6 +363,9 @@ int main (int argc, char ** argv)
lat*mpi_layout[3]});
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
+ RealD Nrank = Grid._Nprocessors;
+ RealD Nnode = Grid.NodeCount();
+ RealD ppn = Nrank/Nnode;
std::vector xbuf(8);
std::vector rbuf(8);
@@ -297,16 +373,18 @@ int main (int argc, char ** argv)
for(int d=0;d<8;d++){
xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
+ bzero((void *)xbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
+ bzero((void *)rbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
}
int ncomm;
int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD);
-
- double start=usecond();
+ double dbytes;
for(int i=0;i requests;
-
+ dbytes=0;
ncomm=0;
for(int mu=0;mu<4;mu++){
@@ -318,44 +396,146 @@ int main (int argc, char ** argv)
int recv_from_rank;
Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
- Grid.StencilSendToRecvFromBegin(requests,
- (void *)&xbuf[mu][0],
- xmit_to_rank,
- (void *)&rbuf[mu][0],
- recv_from_rank,
- bytes);
- // Grid.StencilSendToRecvFromComplete(requests);
- // requests.resize(0);
+ dbytes+=
+ Grid.StencilSendToRecvFromBegin(requests,
+ (void *)&xbuf[mu][0],
+ xmit_to_rank,
+ (void *)&rbuf[mu][0],
+ recv_from_rank,
+ bytes,mu);
+ Grid.StencilSendToRecvFromComplete(requests,mu);
+ requests.resize(0);
comm_proc = mpi_layout[mu]-1;
Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
- Grid.StencilSendToRecvFromBegin(requests,
- (void *)&xbuf[mu+4][0],
- xmit_to_rank,
- (void *)&rbuf[mu+4][0],
- recv_from_rank,
- bytes);
- Grid.StencilSendToRecvFromComplete(requests);
+ dbytes+=
+ Grid.StencilSendToRecvFromBegin(requests,
+ (void *)&xbuf[mu+4][0],
+ xmit_to_rank,
+ (void *)&rbuf[mu+4][0],
+ recv_from_rank,
+ bytes,mu+4);
+ Grid.StencilSendToRecvFromComplete(requests,mu+4);
requests.resize(0);
}
}
Grid.Barrier();
-
+ double stop=usecond();
+ t_time[i] = stop-start; // microseconds
+
}
- double stop=usecond();
- double dbytes = bytes;
- double xbytes = Nloop*dbytes*2.0*ncomm;
- double rbytes = xbytes;
- double bidibytes = xbytes+rbytes;
+ timestat.statistics(t_time);
- double time = stop-start; // microseconds
+ dbytes=dbytes*ppn;
+ double xbytes = dbytes*0.5;
+ double rbytes = dbytes*0.5;
+ double bidibytes = dbytes;
- std::cout< latt_size ({lat*mpi_layout[0],
+ lat*mpi_layout[1],
+ lat*mpi_layout[2],
+ lat*mpi_layout[3]});
+
+ GridCartesian Grid(latt_size,simd_layout,mpi_layout);
+ RealD Nrank = Grid._Nprocessors;
+ RealD Nnode = Grid.NodeCount();
+ RealD ppn = Nrank/Nnode;
+
+ std::vector xbuf(8);
+ std::vector rbuf(8);
+ Grid.ShmBufferFreeAll();
+ for(int d=0;d<8;d++){
+ xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
+ rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
+ bzero((void *)xbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
+ bzero((void *)rbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
+ }
+
+ int ncomm;
+ int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD);
+ double dbytes;
+ for(int i=0;i requests;
+ dbytes=0;
+ ncomm=0;
+
+ parallel_for(int dir=0;dir<8;dir++){
+
+ double tbytes;
+ int mu =dir % 4;
+
+ if (mpi_layout[mu]>1 ) {
+
+ ncomm++;
+ int xmit_to_rank;
+ int recv_from_rank;
+ if ( dir == mu ) {
+ int comm_proc=1;
+ Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
+ } else {
+ int comm_proc = mpi_layout[mu]-1;
+ Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
+ }
+
+ tbytes= Grid.StencilSendToRecvFrom((void *)&xbuf[dir][0], xmit_to_rank,
+ (void *)&rbuf[dir][0], recv_from_rank, bytes,dir);
+
+#pragma omp atomic
+ dbytes+=tbytes;
+ }
+ }
+ Grid.Barrier();
+ double stop=usecond();
+ t_time[i] = stop-start; // microseconds
+ }
+
+ timestat.statistics(t_time);
+
+ dbytes=dbytes*ppn;
+ double xbytes = dbytes*0.5;
+ double rbytes = dbytes*0.5;
+ double bidibytes = dbytes;
+
+
+ std::cout<
-Author: paboyle
+ Author: Peter Boyle
+ Author: paboyle
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
-
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
@@ -37,27 +31,33 @@ struct scal {
d internal;
};
- Gamma::GammaMatrix Gmu [] = {
- Gamma::GammaX,
- Gamma::GammaY,
- Gamma::GammaZ,
- Gamma::GammaT
+ Gamma::Algebra Gmu [] = {
+ Gamma::Algebra::GammaX,
+ Gamma::Algebra::GammaY,
+ Gamma::Algebra::GammaZ,
+ Gamma::Algebra::GammaT
};
typedef WilsonFermion5D WilsonFermion5DR;
typedef WilsonFermion5D WilsonFermion5DF;
typedef WilsonFermion5D WilsonFermion5DD;
-
int main (int argc, char ** argv)
{
Grid_init(&argc,&argv);
+
int threads = GridThread::GetThreads();
std::cout< latt4 = GridDefaultLatt();
- const int Ls=8;
+ int Ls=16;
+ for(int i=0;i> Ls;
+ }
+
+
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
@@ -71,35 +71,66 @@ int main (int argc, char ** argv)
std::vector seeds4({1,2,3,4});
std::vector seeds5({5,6,7,8});
-
+
+ std::cout << GridLogMessage << "Initialising 4d RNG" << std::endl;
GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
+ std::cout << GridLogMessage << "Initialising 5d RNG" << std::endl;
GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5);
+ std::cout << GridLogMessage << "Initialised RNGs" << std::endl;
LatticeFermion src (FGrid); random(RNG5,src);
+#if 0
+ src = zero;
+ {
+ std::vector origin({0,0,0,latt4[2]-1,0});
+ SpinColourVectorF tmp;
+ tmp=zero;
+ tmp()(0)(0)=Complex(-2.0,0.0);
+ std::cout << " source site 0 " << tmp<(Umu,mu);
+ // if (mu !=2 ) ttmp = 0;
+ // ttmp = ttmp* pow(10.0,mu);
+ PokeIndex(Umu,ttmp,mu);
+ }
+ std::cout << GridLogMessage << "Forced to diagonal " << std::endl;
+#endif
+ ////////////////////////////////////
+ // Naive wilson implementation
+ ////////////////////////////////////
// replicate across fifth dimension
+ LatticeGaugeField Umu5d(FGrid);
+ std::vector U(4,FGrid);
for(int ss=0;ssoSites();ss++){
for(int s=0;s U(4,FGrid);
for(int mu=0;mu(Umu5d,mu);
}
+ std::cout << GridLogMessage << "Setting up Cshift based reference " << std::endl;
if (1)
{
@@ -120,8 +151,7 @@ int main (int argc, char ** argv)
RealD M5 =1.8;
RealD NP = UGrid->_Nprocessors;
-
- DomainWallFermionR Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
+ RealD NN = UGrid->NodeCount();
std::cout << GridLogMessage<< "*****************************************************************" <Barrier();
Dw.ZeroCounters();
+ Dw.Dhop(src,result,0);
+ std::cout<1.0e-4) ) {
+ std::cout << "RESULT\n " << result<Barrier();
+ exit(-1);
+ }
+ */
assert (norm2(err)< 1.0e-4 );
Dw.Report();
}
+ DomainWallFermionRL DwH(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
+ if (1) {
+ FGrid->Barrier();
+ DwH.ZeroCounters();
+ DwH.Dhop(src,result,0);
+ double t0=usecond();
+ for(int i=0;iBarrier();
+
+ double volume=Ls; for(int mu=0;mu site({s,x,y,z,t});
- SpinColourVector tmp;
- peekSite(tmp,src,site);
- pokeSite(tmp,ssrc,site);
- }}}}}
+
+ localConvert(src,ssrc);
std::cout<Barrier();
- double t0=usecond();
+ sDw.Dhop(ssrc,sresult,0);
sDw.ZeroCounters();
+ double t0=usecond();
for(int i=0;i site({s,x,y,z,t});
- SpinColourVector normal, simd;
- peekSite(normal,result,site);
- peekSite(simd,sresult,site);
- sum=sum+norm2(normal-simd);
- if (norm2(normal-simd) > 1.0e-6 ) {
- std::cout << "site "< 1.0e-4 ){
+ std::cout<< "sD REF\n " < 1.0e-4 ){
+ std::cout<< "sD REF\n " <::DhopEO "<::DhopEO "<Barrier();
+ sDw.DhopEO(ssrc_o, sr_e, DaggerNo);
sDw.ZeroCounters();
- sDw.stat.init("DhopEO");
+ // sDw.stat.init("DhopEO");
double t0=usecond();
for (int i = 0; i < ncall; i++) {
sDw.DhopEO(ssrc_o, sr_e, DaggerNo);
}
double t1=usecond();
FGrid->Barrier();
- sDw.stat.print();
+ // sDw.stat.print();
double volume=Ls; for(int mu=0;mu1.0e-4) {
+
+ if(( error>1.0e-4) ) {
setCheckerboard(ssrc,ssrc_o);
setCheckerboard(ssrc,ssrc_e);
- std::cout<< ssrc << std::endl;
+ std::cout<< "DIFF\n " <1.0e-4)){
+ std::cout<< "DAG RESULT\n " <Barrier();
+ Dw.DhopEO(src_o,r_e,DaggerNo);
double t0=usecond();
for(int i=0;i1.0e-4)){
+ std::cout<< "Deo RESULT\n " < & L, int Ls, int threads, int report =0 );
diff --git a/benchmarks/Benchmark_gparity.cc b/benchmarks/Benchmark_gparity.cc
new file mode 100644
index 00000000..f6036aa8
--- /dev/null
+++ b/benchmarks/Benchmark_gparity.cc
@@ -0,0 +1,190 @@
+#include
+#include
+using namespace std;
+using namespace Grid;
+using namespace Grid::QCD;
+
+template
+struct scal {
+ d internal;
+};
+
+ Gamma::Algebra Gmu [] = {
+ Gamma::Algebra::GammaX,
+ Gamma::Algebra::GammaY,
+ Gamma::Algebra::GammaZ,
+ Gamma::Algebra::GammaT
+ };
+
+typedef typename GparityDomainWallFermionF::FermionField GparityLatticeFermionF;
+typedef typename GparityDomainWallFermionD::FermionField GparityLatticeFermionD;
+
+
+
+int main (int argc, char ** argv)
+{
+ Grid_init(&argc,&argv);
+
+ int Ls=16;
+ for(int i=0;i> Ls;
+ }
+
+
+ int threads = GridThread::GetThreads();
+ std::cout< latt4 = GridDefaultLatt();
+
+ GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplexF::Nsimd()),GridDefaultMpi());
+ GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
+ GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
+ GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
+
+ std::vector seeds4({1,2,3,4});
+ std::vector seeds5({5,6,7,8});
+
+ std::cout << GridLogMessage << "Initialising 4d RNG" << std::endl;
+ GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
+ std::cout << GridLogMessage << "Initialising 5d RNG" << std::endl;
+ GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5);
+ std::cout << GridLogMessage << "Initialised RNGs" << std::endl;
+
+ GparityLatticeFermionF src (FGrid); random(RNG5,src);
+ RealD N2 = 1.0/::sqrt(norm2(src));
+ src = src*N2;
+
+ GparityLatticeFermionF result(FGrid); result=zero;
+ GparityLatticeFermionF ref(FGrid); ref=zero;
+ GparityLatticeFermionF tmp(FGrid);
+ GparityLatticeFermionF err(FGrid);
+
+ std::cout << GridLogMessage << "Drawing gauge field" << std::endl;
+ LatticeGaugeFieldF Umu(UGrid);
+ SU3::HotConfiguration(RNG4,Umu);
+ std::cout << GridLogMessage << "Random gauge initialised " << std::endl;
+
+ RealD mass=0.1;
+ RealD M5 =1.8;
+
+ RealD NP = UGrid->_Nprocessors;
+ RealD NN = UGrid->NodeCount();
+
+ std::cout << GridLogMessage<< "*****************************************************************" <Barrier();
+ Dw.ZeroCounters();
+ Dw.Dhop(src,result,0);
+ std::cout<Barrier();
+
+ double volume=Ls; for(int mu=0;muBarrier();
+ DwH.ZeroCounters();
+ DwH.Dhop(src,result,0);
+ double t0=usecond();
+ for(int i=0;iBarrier();
+
+ double volume=Ls; for(int mu=0;muBarrier();
+ DwD.ZeroCounters();
+ DwD.Dhop(src_d,result_d,0);
+ std::cout<Barrier();
+
+ double volume=Ls; for(int mu=0;mu latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
- int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
+ int64_t vol= latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
uint64_t Nloop=NLOOP;
- // GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice();
+ // GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector({45,12,81,9}));
- LatticeVec z(&Grid); //random(pRNG,z);
- LatticeVec x(&Grid); //random(pRNG,x);
- LatticeVec y(&Grid); //random(pRNG,y);
+ LatticeVec z(&Grid);// random(pRNG,z);
+ LatticeVec x(&Grid);// random(pRNG,x);
+ LatticeVec y(&Grid);// random(pRNG,y);
double a=2.0;
@@ -83,7 +83,7 @@ int main (int argc, char ** argv)
double time = (stop-start)/Nloop*1000;
double flops=vol*Nvec*2;// mul,add
- double bytes=3*vol*Nvec*sizeof(Real);
+ double bytes=3.0*vol*Nvec*sizeof(Real);
std::cout< latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
- int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
+ int64_t vol= latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
- // GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice();
+ // GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector({45,12,81,9}));
- LatticeVec z(&Grid); //random(pRNG,z);
- LatticeVec x(&Grid); //random(pRNG,x);
- LatticeVec y(&Grid); //random(pRNG,y);
+ LatticeVec z(&Grid);// random(pRNG,z);
+ LatticeVec x(&Grid);// random(pRNG,x);
+ LatticeVec y(&Grid);// random(pRNG,y);
double a=2.0;
uint64_t Nloop=NLOOP;
@@ -119,7 +119,7 @@ int main (int argc, char ** argv)
double time = (stop-start)/Nloop*1000;
double flops=vol*Nvec*2;// mul,add
- double bytes=3*vol*Nvec*sizeof(Real);
+ double bytes=3.0*vol*Nvec*sizeof(Real);
std::cout< latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
- int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
+ int64_t vol= latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
uint64_t Nloop=NLOOP;
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
- // GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice();
+ // GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector({45,12,81,9}));
- LatticeVec z(&Grid); //random(pRNG,z);
- LatticeVec x(&Grid); //random(pRNG,x);
- LatticeVec y(&Grid); //random(pRNG,y);
+ LatticeVec z(&Grid);// random(pRNG,z);
+ LatticeVec x(&Grid);// random(pRNG,x);
+ LatticeVec y(&Grid);// random(pRNG,y);
RealD a=2.0;
@@ -154,7 +154,7 @@ int main (int argc, char ** argv)
double stop=usecond();
double time = (stop-start)/Nloop*1000;
- double bytes=2*vol*Nvec*sizeof(Real);
+ double bytes=2.0*vol*Nvec*sizeof(Real);
double flops=vol*Nvec*1;// mul
std::cout< latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
- int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
+ int64_t vol= latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
uint64_t Nloop=NLOOP;
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
- // GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice();
- LatticeVec z(&Grid); //random(pRNG,z);
- LatticeVec x(&Grid); //random(pRNG,x);
- LatticeVec y(&Grid); //random(pRNG,y);
+ // GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector({45,12,81,9}));
+ LatticeVec z(&Grid);// random(pRNG,z);
+ LatticeVec x(&Grid);// random(pRNG,x);
+ LatticeVec y(&Grid);// random(pRNG,y);
RealD a=2.0;
Real nn;
double start=usecond();
@@ -187,7 +187,7 @@ int main (int argc, char ** argv)
double stop=usecond();
double time = (stop-start)/Nloop*1000;
- double bytes=vol*Nvec*sizeof(Real);
+ double bytes=1.0*vol*Nvec*sizeof(Real);
double flops=vol*Nvec*2;// mul,add
std::cout<Barrier(); \
+ t0=usecond(); \
+ for(int i=0;iBarrier(); \
+ zDw.CayleyReport(); \
+ std::cout<Barrier(); \
+ t0=usecond(); \
+ for(int i=0;iBarrier(); \
+ Dw.CayleyReport(); \
+ std::cout< gamma(Ls,std::complex(1.0,0.0));
+ ZMobiusFermionVec5dR zDw(Umu,*sFGrid,*sFrbGrid,*sUGrid,*sUrbGrid,mass,M5,gamma,b,c);
+
std::cout<Barrier();
@@ -173,10 +209,13 @@ int main (int argc, char ** argv)
BENCH_DW_MEO(Dhop ,src,result);
BENCH_DW_MEO(DhopEO ,src_o,r_e);
- BENCH_DW(Meooe ,src_o,r_e);
+ BENCH_DW_SSC(Meooe ,src_o,r_e);
BENCH_DW(Mooee ,src_o,r_o);
BENCH_DW(MooeeInv,src_o,r_o);
+ BENCH_ZDW(Mooee ,src_o,r_o);
+ BENCH_ZDW(MooeeInv,src_o,r_o);
+
}
Grid_finalize();
diff --git a/benchmarks/Benchmark_staggered.cc b/benchmarks/Benchmark_staggered.cc
index 121dc0d5..dc2dcf91 100644
--- a/benchmarks/Benchmark_staggered.cc
+++ b/benchmarks/Benchmark_staggered.cc
@@ -51,7 +51,7 @@ int main (int argc, char ** argv)
std::vector seeds({1,2,3,4});
GridParallelRNG pRNG(&Grid);
pRNG.SeedFixedIntegers(seeds);
- // pRNG.SeedRandomDevice();
+ // pRNG.SeedFixedIntegers(std::vector({45,12,81,9});
typedef typename ImprovedStaggeredFermionR::FermionField FermionField;
typename ImprovedStaggeredFermionR::ImplParams params;
diff --git a/benchmarks/Benchmark_su3.cc b/benchmarks/Benchmark_su3.cc
index b6d1d303..035af2d9 100644
--- a/benchmarks/Benchmark_su3.cc
+++ b/benchmarks/Benchmark_su3.cc
@@ -35,13 +35,14 @@ using namespace Grid::QCD;
int main (int argc, char ** argv)
{
Grid_init(&argc,&argv);
+#define LMAX (64)
- int Nloop=1000;
+ int64_t Nloop=20;
std::vector simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
std::vector mpi_layout = GridDefaultMpi();
- int threads = GridThread::GetThreads();
+ int64_t threads = GridThread::GetThreads();
std::cout< latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
- int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
+ int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
- // GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice();
+ GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector({45,12,81,9}));
- LatticeColourMatrix z(&Grid);// random(pRNG,z);
- LatticeColourMatrix x(&Grid);// random(pRNG,x);
- LatticeColourMatrix y(&Grid);// random(pRNG,y);
+ LatticeColourMatrix z(&Grid); random(pRNG,z);
+ LatticeColourMatrix x(&Grid); random(pRNG,x);
+ LatticeColourMatrix y(&Grid); random(pRNG,y);
double start=usecond();
- for(int i=0;i latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
- int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
+ int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
- // GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice();
+ GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector({45,12,81,9}));
- LatticeColourMatrix z(&Grid); //random(pRNG,z);
- LatticeColourMatrix x(&Grid); //random(pRNG,x);
- LatticeColourMatrix y(&Grid); //random(pRNG,y);
+ LatticeColourMatrix z(&Grid); random(pRNG,z);
+ LatticeColourMatrix x(&Grid); random(pRNG,x);
+ LatticeColourMatrix y(&Grid); random(pRNG,y);
double start=usecond();
- for(int i=0;i latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
- int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
+ int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
- // GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice();
+ GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector({45,12,81,9}));
- LatticeColourMatrix z(&Grid); //random(pRNG,z);
- LatticeColourMatrix x(&Grid); //random(pRNG,x);
- LatticeColourMatrix y(&Grid); //random(pRNG,y);
+ LatticeColourMatrix z(&Grid); random(pRNG,z);
+ LatticeColourMatrix x(&Grid); random(pRNG,x);
+ LatticeColourMatrix y(&Grid); random(pRNG,y);
double start=usecond();
- for(int i=0;i latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
- int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
+ int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
- // GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice();
+ GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector({45,12,81,9}));
- LatticeColourMatrix z(&Grid); //random(pRNG,z);
- LatticeColourMatrix x(&Grid); //random(pRNG,x);
- LatticeColourMatrix y(&Grid); //random(pRNG,y);
+ LatticeColourMatrix z(&Grid); random(pRNG,z);
+ LatticeColourMatrix x(&Grid); random(pRNG,x);
+ LatticeColourMatrix y(&Grid); random(pRNG,y);
double start=usecond();
- for(int i=0;i seeds({1,2,3,4});
GridParallelRNG pRNG(&Grid);
pRNG.SeedFixedIntegers(seeds);
- // pRNG.SeedRandomDevice();
+ // pRNG.SeedFixedIntegers(std::vector({45,12,81,9});
LatticeFermion src (&Grid); random(pRNG,src);
LatticeFermion result(&Grid); result=zero;
@@ -106,7 +106,7 @@ int main (int argc, char ** argv)
{ // Naive wilson implementation
ref = zero;
for(int mu=0;mu]])
AC_CHECK_DECLS([be64toh],[], [], [[#include ]])
+############## Standard libraries
+AC_CHECK_LIB([m],[cos])
+AC_CHECK_LIB([stdc++],[abort])
+
############### GMP and MPFR
AC_ARG_WITH([gmp],
[AS_HELP_STRING([--with-gmp=prefix],
@@ -60,16 +74,23 @@ AC_ARG_WITH([mpfr],
[AM_CXXFLAGS="-I$with_mpfr/include $AM_CXXFLAGS"]
[AM_LDFLAGS="-L$with_mpfr/lib $AM_LDFLAGS"])
-############### FFTW3
-AC_ARG_WITH([fftw],
+############### FFTW3
+AC_ARG_WITH([fftw],
[AS_HELP_STRING([--with-fftw=prefix],
[try this for a non-standard install prefix of the FFTW3 library])],
[AM_CXXFLAGS="-I$with_fftw/include $AM_CXXFLAGS"]
[AM_LDFLAGS="-L$with_fftw/lib $AM_LDFLAGS"])
-############### lapack
+############### LIME
+AC_ARG_WITH([lime],
+ [AS_HELP_STRING([--with-lime=prefix],
+ [try this for a non-standard install prefix of the LIME library])],
+ [AM_CXXFLAGS="-I$with_lime/include $AM_CXXFLAGS"]
+ [AM_LDFLAGS="-L$with_lime/lib $AM_LDFLAGS"])
+
+############### lapack
AC_ARG_ENABLE([lapack],
- [AC_HELP_STRING([--enable-lapack=yes|no|prefix], [enable LAPACK])],
+ [AC_HELP_STRING([--enable-lapack=yes|no|prefix], [enable LAPACK])],
[ac_LAPACK=${enable_lapack}], [ac_LAPACK=no])
case ${ac_LAPACK} in
@@ -83,6 +104,18 @@ case ${ac_LAPACK} in
AC_DEFINE([USE_LAPACK],[1],[use LAPACK]);;
esac
+############### FP16 conversions
+AC_ARG_ENABLE([sfw-fp16],
+ [AC_HELP_STRING([--enable-sfw-fp16=yes|no], [enable software fp16 comms])],
+ [ac_SFW_FP16=${enable_sfw_fp16}], [ac_SFW_FP16=yes])
+case ${ac_SFW_FP16} in
+ yes)
+ AC_DEFINE([SFW_FP16],[1],[software conversion to fp16]);;
+ no);;
+ *)
+ AC_MSG_ERROR(["SFW FP16 option not supported ${ac_SFW_FP16}"]);;
+esac
+
############### MKL
AC_ARG_ENABLE([mkl],
[AC_HELP_STRING([--enable-mkl=yes|no|prefix], [enable Intel MKL for LAPACK & FFTW])],
@@ -99,9 +132,16 @@ case ${ac_MKL} in
AC_DEFINE([USE_MKL], [1], [Define to 1 if you use the Intel MKL]);;
esac
+############### HDF5
+AC_ARG_WITH([hdf5],
+ [AS_HELP_STRING([--with-hdf5=prefix],
+ [try this for a non-standard install prefix of the HDF5 library])],
+ [AM_CXXFLAGS="-I$with_hdf5/include $AM_CXXFLAGS"]
+ [AM_LDFLAGS="-L$with_hdf5/lib $AM_LDFLAGS"])
+
############### first-touch
AC_ARG_ENABLE([numa],
- [AC_HELP_STRING([--enable-numa=yes|no|prefix], [enable first touch numa opt])],
+ [AC_HELP_STRING([--enable-numa=yes|no|prefix], [enable first touch numa opt])],
[ac_NUMA=${enable_NUMA}],[ac_NUMA=no])
case ${ac_NUMA} in
@@ -127,8 +167,8 @@ if test "${ac_MKL}x" != "nox"; then
fi
AC_SEARCH_LIBS([__gmpf_init], [gmp],
- [AC_SEARCH_LIBS([mpfr_init], [mpfr],
- [AC_DEFINE([HAVE_LIBMPFR], [1],
+ [AC_SEARCH_LIBS([mpfr_init], [mpfr],
+ [AC_DEFINE([HAVE_LIBMPFR], [1],
[Define to 1 if you have the `MPFR' library])]
[have_mpfr=true], [AC_MSG_ERROR([MPFR library not found])])]
[AC_DEFINE([HAVE_LIBGMP], [1], [Define to 1 if you have the `GMP' library])]
@@ -137,7 +177,7 @@ AC_SEARCH_LIBS([__gmpf_init], [gmp],
if test "${ac_LAPACK}x" != "nox"; then
AC_SEARCH_LIBS([LAPACKE_sbdsdc], [lapack], [],
[AC_MSG_ERROR("LAPACK enabled but library not found")])
-fi
+fi
AC_SEARCH_LIBS([fftw_execute], [fftw3],
[AC_SEARCH_LIBS([fftwf_execute], [fftw3f], [],
@@ -145,6 +185,29 @@ AC_SEARCH_LIBS([fftw_execute], [fftw3],
[AC_DEFINE([HAVE_FFTW], [1], [Define to 1 if you have the `FFTW' library])]
[have_fftw=true])
+AC_SEARCH_LIBS([limeCreateReader], [lime],
+ [AC_DEFINE([HAVE_LIME], [1], [Define to 1 if you have the `LIME' library])]
+ [have_lime=true],
+ [AC_MSG_WARN(C-LIME library was not found in your system.
+In order to use ILGG file format please install or provide the correct path to your installation
+Info at: http://usqcd.jlab.org/usqcd-docs/c-lime/)])
+
+AC_SEARCH_LIBS([crc32], [z],
+ [AC_DEFINE([HAVE_ZLIB], [1], [Define to 1 if you have the `LIBZ' library])]
+ [have_zlib=true] [LIBS="${LIBS} -lz"],
+ [AC_MSG_ERROR(zlib library was not found in your system.)])
+
+AC_SEARCH_LIBS([move_pages], [numa],
+ [AC_DEFINE([HAVE_LIBNUMA], [1], [Define to 1 if you have the `LIBNUMA' library])]
+ [have_libnuma=true] [LIBS="${LIBS} -lnuma"],
+ [AC_MSG_WARN(libnuma library was not found in your system. Some optimisations will not apply)])
+
+AC_SEARCH_LIBS([H5Fopen], [hdf5_cpp],
+ [AC_DEFINE([HAVE_HDF5], [1], [Define to 1 if you have the `HDF5' library])]
+ [have_hdf5=true]
+ [LIBS="${LIBS} -lhdf5"], [], [-lhdf5])
+AM_CONDITIONAL(BUILD_HDF5, [ test "${have_hdf5}X" == "trueX" ])
+
CXXFLAGS=$CXXFLAGS_CPY
LDFLAGS=$LDFLAGS_CPY
@@ -163,19 +226,26 @@ case ${ax_cv_cxx_compiler_vendor} in
case ${ac_SIMD} in
SSE4)
AC_DEFINE([SSE4],[1],[SSE4 intrinsics])
- SIMD_FLAGS='-msse4.2';;
+ case ${ac_SFW_FP16} in
+ yes)
+ SIMD_FLAGS='-msse4.2';;
+ no)
+ SIMD_FLAGS='-msse4.2 -mf16c';;
+ *)
+ AC_MSG_ERROR(["SFW_FP16 must be either yes or no value ${ac_SFW_FP16} "]);;
+ esac;;
AVX)
AC_DEFINE([AVX1],[1],[AVX intrinsics])
- SIMD_FLAGS='-mavx';;
+ SIMD_FLAGS='-mavx -mf16c';;
AVXFMA4)
AC_DEFINE([AVXFMA4],[1],[AVX intrinsics with FMA4])
- SIMD_FLAGS='-mavx -mfma4';;
+ SIMD_FLAGS='-mavx -mfma4 -mf16c';;
AVXFMA)
AC_DEFINE([AVXFMA],[1],[AVX intrinsics with FMA3])
- SIMD_FLAGS='-mavx -mfma';;
+ SIMD_FLAGS='-mavx -mfma -mf16c';;
AVX2)
AC_DEFINE([AVX2],[1],[AVX2 intrinsics])
- SIMD_FLAGS='-mavx2 -mfma';;
+ SIMD_FLAGS='-mavx2 -mfma -mf16c';;
AVX512)
AC_DEFINE([AVX512],[1],[AVX512 intrinsics])
SIMD_FLAGS='-mavx512f -mavx512pf -mavx512er -mavx512cd';;
@@ -184,6 +254,7 @@ case ${ax_cv_cxx_compiler_vendor} in
SIMD_FLAGS='';;
KNL)
AC_DEFINE([AVX512],[1],[AVX512 intrinsics])
+ AC_DEFINE([KNL],[1],[Knights landing processor])
SIMD_FLAGS='-march=knl';;
GEN)
AC_DEFINE([GEN],[1],[generic vector code])
@@ -191,6 +262,9 @@ case ${ax_cv_cxx_compiler_vendor} in
[generic SIMD vector width (in bytes)])
SIMD_GEN_WIDTH_MSG=" (width= $ac_gen_simd_width)"
SIMD_FLAGS='';;
+ NEONv8)
+ AC_DEFINE([NEONV8],[1],[ARMv8 NEON])
+ SIMD_FLAGS='-march=armv8-a';;
QPX|BGQ)
AC_DEFINE([QPX],[1],[QPX intrinsics for BG/Q])
SIMD_FLAGS='';;
@@ -219,6 +293,7 @@ case ${ax_cv_cxx_compiler_vendor} in
SIMD_FLAGS='';;
KNL)
AC_DEFINE([AVX512],[1],[AVX512 intrinsics for Knights Landing])
+ AC_DEFINE([KNL],[1],[Knights landing processor])
SIMD_FLAGS='-xmic-avx512';;
GEN)
AC_DEFINE([GEN],[1],[generic vector code])
@@ -256,8 +331,41 @@ case ${ac_PRECISION} in
double)
AC_DEFINE([GRID_DEFAULT_PRECISION_DOUBLE],[1],[GRID_DEFAULT_PRECISION is DOUBLE] )
;;
+ *)
+ AC_MSG_ERROR([${ac_PRECISION} unsupported --enable-precision option]);
+ ;;
esac
+###################### Shared memory allocation technique under MPI3
+AC_ARG_ENABLE([shm],[AC_HELP_STRING([--enable-shm=shmget|shmopen|hugetlbfs],
+ [Select SHM allocation technique])],[ac_SHM=${enable_shm}],[ac_SHM=shmopen])
+
+case ${ac_SHM} in
+
+ shmget)
+ AC_DEFINE([GRID_MPI3_SHMGET],[1],[GRID_MPI3_SHMGET] )
+ ;;
+
+ shmopen)
+ AC_DEFINE([GRID_MPI3_SHMOPEN],[1],[GRID_MPI3_SHMOPEN] )
+ ;;
+
+ hugetlbfs)
+ AC_DEFINE([GRID_MPI3_SHMMMAP],[1],[GRID_MPI3_SHMMMAP] )
+ ;;
+
+ *)
+ AC_MSG_ERROR([${ac_SHM} unsupported --enable-shm option]);
+ ;;
+esac
+
+###################### Shared base path for SHMMMAP
+AC_ARG_ENABLE([shmpath],[AC_HELP_STRING([--enable-shmpath=path],
+ [Select SHM mmap base path for hugetlbfs])],
+ [ac_SHMPATH=${enable_shmpath}],
+ [ac_SHMPATH=/var/lib/hugetlbfs/pagesize-2MB/])
+AC_DEFINE_UNQUOTED([GRID_SHM_PATH],["$ac_SHMPATH"],[Path to a hugetlbfs filesystem for MMAPing])
+
############### communication type selection
AC_ARG_ENABLE([comms],[AC_HELP_STRING([--enable-comms=none|mpi|mpi-auto|mpi3|mpi3-auto|shmem],
[Select communications])],[ac_COMMS=${enable_comms}],[ac_COMMS=none])
@@ -267,14 +375,14 @@ case ${ac_COMMS} in
AC_DEFINE([GRID_COMMS_NONE],[1],[GRID_COMMS_NONE] )
comms_type='none'
;;
- mpi3l*)
- AC_DEFINE([GRID_COMMS_MPI3L],[1],[GRID_COMMS_MPI3L] )
- comms_type='mpi3l'
- ;;
mpi3*)
AC_DEFINE([GRID_COMMS_MPI3],[1],[GRID_COMMS_MPI3] )
comms_type='mpi3'
;;
+ mpit)
+ AC_DEFINE([GRID_COMMS_MPIT],[1],[GRID_COMMS_MPIT] )
+ comms_type='mpit'
+ ;;
mpi*)
AC_DEFINE([GRID_COMMS_MPI],[1],[GRID_COMMS_MPI] )
comms_type='mpi'
@@ -284,7 +392,7 @@ case ${ac_COMMS} in
comms_type='shmem'
;;
*)
- AC_MSG_ERROR([${ac_COMMS} unsupported --enable-comms option]);
+ AC_MSG_ERROR([${ac_COMMS} unsupported --enable-comms option]);
;;
esac
case ${ac_COMMS} in
@@ -302,13 +410,13 @@ esac
AM_CONDITIONAL(BUILD_COMMS_SHMEM, [ test "${comms_type}X" == "shmemX" ])
AM_CONDITIONAL(BUILD_COMMS_MPI, [ test "${comms_type}X" == "mpiX" ])
AM_CONDITIONAL(BUILD_COMMS_MPI3, [ test "${comms_type}X" == "mpi3X" ] )
-AM_CONDITIONAL(BUILD_COMMS_MPI3L, [ test "${comms_type}X" == "mpi3lX" ] )
+AM_CONDITIONAL(BUILD_COMMS_MPIT, [ test "${comms_type}X" == "mpitX" ] )
AM_CONDITIONAL(BUILD_COMMS_NONE, [ test "${comms_type}X" == "noneX" ])
############### RNG selection
-AC_ARG_ENABLE([rng],[AC_HELP_STRING([--enable-rng=ranlux48|mt19937],\
+AC_ARG_ENABLE([rng],[AC_HELP_STRING([--enable-rng=ranlux48|mt19937|sitmo],\
[Select Random Number Generator to be used])],\
- [ac_RNG=${enable_rng}],[ac_RNG=ranlux48])
+ [ac_RNG=${enable_rng}],[ac_RNG=sitmo])
case ${ac_RNG} in
ranlux48)
@@ -317,8 +425,11 @@ case ${ac_RNG} in
mt19937)
AC_DEFINE([RNG_MT19937],[1],[RNG_MT19937] )
;;
+ sitmo)
+ AC_DEFINE([RNG_SITMO],[1],[RNG_SITMO] )
+ ;;
*)
- AC_MSG_ERROR([${ac_RNG} unsupported --enable-rng option]);
+ AC_MSG_ERROR([${ac_RNG} unsupported --enable-rng option]);
;;
esac
@@ -335,7 +446,7 @@ case ${ac_TIMERS} in
AC_DEFINE([TIMERS_OFF],[1],[TIMERS_OFF] )
;;
*)
- AC_MSG_ERROR([${ac_TIMERS} unsupported --enable-timers option]);
+ AC_MSG_ERROR([${ac_TIMERS} unsupported --enable-timers option]);
;;
esac
@@ -347,7 +458,7 @@ case ${ac_CHROMA} in
yes|no)
;;
*)
- AC_MSG_ERROR([${ac_CHROMA} unsupported --enable-chroma option]);
+ AC_MSG_ERROR([${ac_CHROMA} unsupported --enable-chroma option]);
;;
esac
@@ -368,29 +479,31 @@ DX_INIT_DOXYGEN([$PACKAGE_NAME], [doxygen.cfg])
############### Ouput
cwd=`pwd -P`; cd ${srcdir}; abs_srcdir=`pwd -P`; cd ${cwd}
+GRID_CXXFLAGS="$AM_CXXFLAGS $CXXFLAGS"
+GRID_LDFLAGS="$AM_LDFLAGS $LDFLAGS"
+GRID_LIBS=$LIBS
+GRID_SHORT_SHA=`git rev-parse --short HEAD`
+GRID_SHA=`git rev-parse HEAD`
+GRID_BRANCH=`git rev-parse --abbrev-ref HEAD`
AM_CXXFLAGS="-I${abs_srcdir}/include $AM_CXXFLAGS"
AM_CFLAGS="-I${abs_srcdir}/include $AM_CFLAGS"
AM_LDFLAGS="-L${cwd}/lib $AM_LDFLAGS"
AC_SUBST([AM_CFLAGS])
AC_SUBST([AM_CXXFLAGS])
AC_SUBST([AM_LDFLAGS])
-AC_CONFIG_FILES(Makefile)
-AC_CONFIG_FILES(lib/Makefile)
-AC_CONFIG_FILES(tests/Makefile)
-AC_CONFIG_FILES(tests/IO/Makefile)
-AC_CONFIG_FILES(tests/core/Makefile)
-AC_CONFIG_FILES(tests/debug/Makefile)
-AC_CONFIG_FILES(tests/forces/Makefile)
-AC_CONFIG_FILES(tests/hmc/Makefile)
-AC_CONFIG_FILES(tests/solver/Makefile)
-AC_CONFIG_FILES(tests/qdpxx/Makefile)
-AC_CONFIG_FILES(benchmarks/Makefile)
-AC_OUTPUT
+AC_SUBST([GRID_CXXFLAGS])
+AC_SUBST([GRID_LDFLAGS])
+AC_SUBST([GRID_LIBS])
+AC_SUBST([GRID_SHA])
+AC_SUBST([GRID_BRANCH])
+
+git_commit=`cd $srcdir && ./scripts/configure.commit`
echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Summary of configuration for $PACKAGE v$VERSION
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
+----- GIT VERSION -------------------------------------
+$git_commit
----- PLATFORM ----------------------------------------
architecture (build) : $build_cpu
os (build) : $build_os
@@ -400,13 +513,18 @@ compiler vendor : ${ax_cv_cxx_compiler_vendor}
compiler version : ${ax_cv_gxx_version}
----- BUILD OPTIONS -----------------------------------
SIMD : ${ac_SIMD}${SIMD_GEN_WIDTH_MSG}
-Threading : ${ac_openmp}
+Threading : ${ac_openmp}
Communications type : ${comms_type}
+Shared memory allocator : ${ac_SHM}
+Shared memory mmap path : ${ac_SHMPATH}
Default precision : ${ac_PRECISION}
-RNG choice : ${ac_RNG}
+Software FP16 conversion : ${ac_SFW_FP16}
+RNG choice : ${ac_RNG}
GMP : `if test "x$have_gmp" = xtrue; then echo yes; else echo no; fi`
LAPACK : ${ac_LAPACK}
FFTW : `if test "x$have_fftw" = xtrue; then echo yes; else echo no; fi`
+LIME (ILDG support) : `if test "x$have_lime" = xtrue; then echo yes; else echo no; fi`
+HDF5 : `if test "x$have_hdf5" = xtrue; then echo yes; else echo no; fi`
build DOXYGEN documentation : `if test "$DX_FLAG_doc" = '1'; then echo yes; else echo no; fi`
----- BUILD FLAGS -------------------------------------
CXXFLAGS:
@@ -415,7 +533,32 @@ LDFLAGS:
`echo ${AM_LDFLAGS} ${LDFLAGS} | tr ' ' '\n' | sed 's/^-/ -/g'`
LIBS:
`echo ${LIBS} | tr ' ' '\n' | sed 's/^-/ -/g'`
--------------------------------------------------------" > config.summary
+-------------------------------------------------------" > grid.configure.summary
+
+GRID_SUMMARY="`cat grid.configure.summary`"
+AM_SUBST_NOTMAKE([GRID_SUMMARY])
+AC_SUBST([GRID_SUMMARY])
+
+AC_CONFIG_FILES([grid-config], [chmod +x grid-config])
+AC_CONFIG_FILES(Makefile)
+AC_CONFIG_FILES(lib/Makefile)
+AC_CONFIG_FILES(tests/Makefile)
+AC_CONFIG_FILES(tests/IO/Makefile)
+AC_CONFIG_FILES(tests/core/Makefile)
+AC_CONFIG_FILES(tests/debug/Makefile)
+AC_CONFIG_FILES(tests/forces/Makefile)
+AC_CONFIG_FILES(tests/hadrons/Makefile)
+AC_CONFIG_FILES(tests/hmc/Makefile)
+AC_CONFIG_FILES(tests/solver/Makefile)
+AC_CONFIG_FILES(tests/smearing/Makefile)
+AC_CONFIG_FILES(tests/qdpxx/Makefile)
+AC_CONFIG_FILES(tests/testu01/Makefile)
+AC_CONFIG_FILES(benchmarks/Makefile)
+AC_CONFIG_FILES(extras/Makefile)
+AC_CONFIG_FILES(extras/Hadrons/Makefile)
+AC_OUTPUT
+
echo ""
-cat config.summary
+cat grid.configure.summary
echo ""
+
diff --git a/extras/Hadrons/Application.cc b/extras/Hadrons/Application.cc
new file mode 100644
index 00000000..90ebcfd7
--- /dev/null
+++ b/extras/Hadrons/Application.cc
@@ -0,0 +1,318 @@
+/*************************************************************************************
+
+Grid physics library, www.github.com/paboyle/Grid
+
+Source file: extras/Hadrons/Application.cc
+
+Copyright (C) 2015
+Copyright (C) 2016
+
+Author: Antonin Portelli
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along
+with this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+See the full license in the file "LICENSE" in the top level distribution directory
+*************************************************************************************/
+/* END LEGAL */
+
+#include
+#include
+
+using namespace Grid;
+using namespace QCD;
+using namespace Hadrons;
+
+#define BIG_SEP "==============="
+#define SEP "---------------"
+
+/******************************************************************************
+ * Application implementation *
+ ******************************************************************************/
+// constructors ////////////////////////////////////////////////////////////////
+Application::Application(void)
+{
+ LOG(Message) << "Modules available:" << std::endl;
+ auto list = ModuleFactory::getInstance().getBuilderList();
+ for (auto &m: list)
+ {
+ LOG(Message) << " " << m << std::endl;
+ }
+ auto dim = GridDefaultLatt(), mpi = GridDefaultMpi(), loc(dim);
+ locVol_ = 1;
+ for (unsigned int d = 0; d < dim.size(); ++d)
+ {
+ loc[d] /= mpi[d];
+ locVol_ *= loc[d];
+ }
+ LOG(Message) << "Global lattice: " << dim << std::endl;
+ LOG(Message) << "MPI partition : " << mpi << std::endl;
+ LOG(Message) << "Local lattice : " << loc << std::endl;
+}
+
+Application::Application(const Application::GlobalPar &par)
+: Application()
+{
+ setPar(par);
+}
+
+Application::Application(const std::string parameterFileName)
+: Application()
+{
+ parameterFileName_ = parameterFileName;
+}
+
+// environment shortcut ////////////////////////////////////////////////////////
+Environment & Application::env(void) const
+{
+ return Environment::getInstance();
+}
+
+// access //////////////////////////////////////////////////////////////////////
+void Application::setPar(const Application::GlobalPar &par)
+{
+ par_ = par;
+ env().setSeed(strToVec(par_.seed));
+}
+
+const Application::GlobalPar & Application::getPar(void)
+{
+ return par_;
+}
+
+// execute /////////////////////////////////////////////////////////////////////
+void Application::run(void)
+{
+ if (!parameterFileName_.empty() and (env().getNModule() == 0))
+ {
+ parseParameterFile(parameterFileName_);
+ }
+ if (!scheduled_)
+ {
+ schedule();
+ }
+ printSchedule();
+ configLoop();
+}
+
+// parse parameter file ////////////////////////////////////////////////////////
+class ObjectId: Serializable
+{
+public:
+ GRID_SERIALIZABLE_CLASS_MEMBERS(ObjectId,
+ std::string, name,
+ std::string, type);
+};
+
+void Application::parseParameterFile(const std::string parameterFileName)
+{
+ XmlReader reader(parameterFileName);
+ GlobalPar par;
+ ObjectId id;
+
+ LOG(Message) << "Building application from '" << parameterFileName << "'..." << std::endl;
+ read(reader, "parameters", par);
+ setPar(par);
+ push(reader, "modules");
+ push(reader, "module");
+ do
+ {
+ read(reader, "id", id);
+ env().createModule(id.name, id.type, reader);
+ } while (reader.nextElement("module"));
+ pop(reader);
+ pop(reader);
+}
+
+void Application::saveParameterFile(const std::string parameterFileName)
+{
+ XmlWriter writer(parameterFileName);
+ ObjectId id;
+ const unsigned int nMod = env().getNModule();
+
+ LOG(Message) << "Saving application to '" << parameterFileName << "'..." << std::endl;
+ write(writer, "parameters", getPar());
+ push(writer, "modules");
+ for (unsigned int i = 0; i < nMod; ++i)
+ {
+ push(writer, "module");
+ id.name = env().getModuleName(i);
+ id.type = env().getModule(i)->getRegisteredName();
+ write(writer, "id", id);
+ env().getModule(i)->saveParameters(writer, "options");
+ pop(writer);
+ }
+ pop(writer);
+ pop(writer);
+}
+
+// schedule computation ////////////////////////////////////////////////////////
+#define MEM_MSG(size)\
+sizeString((size)*locVol_) << " (" << sizeString(size) << "/site)"
+
+#define DEFINE_MEMPEAK \
+GeneticScheduler::ObjFunc memPeak = \
+[this](const std::vector &program)\
+{\
+ unsigned int memPeak;\
+ bool msg;\
+ \
+ msg = HadronsLogMessage.isActive();\
+ HadronsLogMessage.Active(false);\
+ env().dryRun(true);\
+ memPeak = env().executeProgram(program);\
+ env().dryRun(false);\
+ env().freeAll();\
+ HadronsLogMessage.Active(true);\
+ \
+ return memPeak;\
+}
+
+void Application::schedule(void)
+{
+ DEFINE_MEMPEAK;
+
+ // build module dependency graph
+ LOG(Message) << "Building module graph..." << std::endl;
+ auto graph = env().makeModuleGraph();
+ auto con = graph.getConnectedComponents();
+
+ // constrained topological sort using a genetic algorithm
+ LOG(Message) << "Scheduling computation..." << std::endl;
+ LOG(Message) << " #module= " << graph.size() << std::endl;
+ LOG(Message) << " population size= " << par_.genetic.popSize << std::endl;
+ LOG(Message) << " max. generation= " << par_.genetic.maxGen << std::endl;
+ LOG(Message) << " max. cst. generation= " << par_.genetic.maxCstGen << std::endl;
+ LOG(Message) << " mutation rate= " << par_.genetic.mutationRate << std::endl;
+
+ unsigned int k = 0, gen, prevPeak, nCstPeak = 0;
+ std::random_device rd;
+ GeneticScheduler::Parameters par;
+
+ par.popSize = par_.genetic.popSize;
+ par.mutationRate = par_.genetic.mutationRate;
+ par.seed = rd();
+ memPeak_ = 0;
+ CartesianCommunicator::BroadcastWorld(0, &(par.seed), sizeof(par.seed));
+ for (unsigned int i = 0; i < con.size(); ++i)
+ {
+ GeneticScheduler scheduler(con[i], memPeak, par);
+
+ gen = 0;
+ do
+ {
+ LOG(Debug) << "Generation " << gen << ":" << std::endl;
+ scheduler.nextGeneration();
+ if (gen != 0)
+ {
+ if (prevPeak == scheduler.getMinValue())
+ {
+ nCstPeak++;
+ }
+ else
+ {
+ nCstPeak = 0;
+ }
+ }
+
+ prevPeak = scheduler.getMinValue();
+ if (gen % 10 == 0)
+ {
+ LOG(Iterative) << "Generation " << gen << ": "
+ << MEM_MSG(scheduler.getMinValue()) << std::endl;
+ }
+
+ gen++;
+ } while ((gen < par_.genetic.maxGen)
+ and (nCstPeak < par_.genetic.maxCstGen));
+ auto &t = scheduler.getMinSchedule();
+ if (scheduler.getMinValue() > memPeak_)
+ {
+ memPeak_ = scheduler.getMinValue();
+ }
+ for (unsigned int j = 0; j < t.size(); ++j)
+ {
+ program_.push_back(t[j]);
+ }
+ }
+ scheduled_ = true;
+}
+
+void Application::saveSchedule(const std::string filename)
+{
+ TextWriter writer(filename);
+ std::vector program;
+
+ if (!scheduled_)
+ {
+ HADRON_ERROR("Computation not scheduled");
+ }
+ LOG(Message) << "Saving current schedule to '" << filename << "'..."
+ << std::endl;
+ for (auto address: program_)
+ {
+ program.push_back(env().getModuleName(address));
+ }
+ write(writer, "schedule", program);
+}
+
+void Application::loadSchedule(const std::string filename)
+{
+ DEFINE_MEMPEAK;
+
+ TextReader reader(filename);
+ std::vector program;
+
+ LOG(Message) << "Loading schedule from '" << filename << "'..."
+ << std::endl;
+ read(reader, "schedule", program);
+ program_.clear();
+ for (auto &name: program)
+ {
+ program_.push_back(env().getModuleAddress(name));
+ }
+ scheduled_ = true;
+ memPeak_ = memPeak(program_);
+}
+
+void Application::printSchedule(void)
+{
+ if (!scheduled_)
+ {
+ HADRON_ERROR("Computation not scheduled");
+ }
+ LOG(Message) << "Schedule (memory peak: " << MEM_MSG(memPeak_) << "):"
+ << std::endl;
+ for (unsigned int i = 0; i < program_.size(); ++i)
+ {
+ LOG(Message) << std::setw(4) << i + 1 << ": "
+ << env().getModuleName(program_[i]) << std::endl;
+ }
+}
+
+// loop on configurations //////////////////////////////////////////////////////
+void Application::configLoop(void)
+{
+ auto range = par_.trajCounter;
+
+ for (unsigned int t = range.start; t < range.end; t += range.step)
+ {
+ LOG(Message) << BIG_SEP << " Starting measurement for trajectory " << t
+ << " " << BIG_SEP << std::endl;
+ env().setTrajectory(t);
+ env().executeProgram(program_);
+ }
+ LOG(Message) << BIG_SEP << " End of measurement " << BIG_SEP << std::endl;
+ env().freeAll();
+}
diff --git a/extras/Hadrons/Application.hpp b/extras/Hadrons/Application.hpp
new file mode 100644
index 00000000..fce9b6eb
--- /dev/null
+++ b/extras/Hadrons/Application.hpp
@@ -0,0 +1,132 @@
+/*************************************************************************************
+
+Grid physics library, www.github.com/paboyle/Grid
+
+Source file: extras/Hadrons/Application.hpp
+
+Copyright (C) 2015
+Copyright (C) 2016
+
+Author: Antonin Portelli
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along
+with this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+See the full license in the file "LICENSE" in the top level distribution directory
+*************************************************************************************/
+/* END LEGAL */
+
+#ifndef Hadrons_Application_hpp_
+#define Hadrons_Application_hpp_
+
+#include
+#include
+#include
+#include
+
+BEGIN_HADRONS_NAMESPACE
+
+/******************************************************************************
+ * Main program manager *
+ ******************************************************************************/
+class Application
+{
+public:
+ class TrajRange: Serializable
+ {
+ public:
+ GRID_SERIALIZABLE_CLASS_MEMBERS(TrajRange,
+ unsigned int, start,
+ unsigned int, end,
+ unsigned int, step);
+ };
+ class GeneticPar: Serializable
+ {
+ public:
+ GeneticPar(void):
+ popSize{20}, maxGen{1000}, maxCstGen{100}, mutationRate{.1} {};
+ public:
+ GRID_SERIALIZABLE_CLASS_MEMBERS(GeneticPar,
+ unsigned int, popSize,
+ unsigned int, maxGen,
+ unsigned int, maxCstGen,
+ double , mutationRate);
+ };
+ class GlobalPar: Serializable
+ {
+ public:
+ GRID_SERIALIZABLE_CLASS_MEMBERS(GlobalPar,
+ TrajRange, trajCounter,
+ GeneticPar, genetic,
+ std::string, seed);
+ };
+public:
+ // constructors
+ Application(void);
+ Application(const GlobalPar &par);
+ Application(const std::string parameterFileName);
+ // destructor
+ virtual ~Application(void) = default;
+ // access
+ void setPar(const GlobalPar &par);
+ const GlobalPar & getPar(void);
+ // module creation
+ template
+ void createModule(const std::string name);
+ template
+ void createModule(const std::string name, const typename M::Par &par);
+ // execute
+ void run(void);
+ // XML parameter file I/O
+ void parseParameterFile(const std::string parameterFileName);
+ void saveParameterFile(const std::string parameterFileName);
+ // schedule computation
+ void schedule(void);
+ void saveSchedule(const std::string filename);
+ void loadSchedule(const std::string filename);
+ void printSchedule(void);
+ // loop on configurations
+ void configLoop(void);
+private:
+ // environment shortcut
+ Environment & env(void) const;
+private:
+ long unsigned int locVol_;
+ std::string parameterFileName_{""};
+ GlobalPar par_;
+ std::vector program_;
+ Environment::Size memPeak_;
+ bool scheduled_{false};
+};
+
+/******************************************************************************
+ * Application template implementation *
+ ******************************************************************************/
+// module creation /////////////////////////////////////////////////////////////
+template
+void Application::createModule(const std::string name)
+{
+ env().createModule(name);
+}
+
+template
+void Application::createModule(const std::string name,
+ const typename M::Par &par)
+{
+ env().createModule(name, par);
+}
+
+END_HADRONS_NAMESPACE
+
+#endif // Hadrons_Application_hpp_
diff --git a/extras/Hadrons/Environment.cc b/extras/Hadrons/Environment.cc
new file mode 100644
index 00000000..0e7a4326
--- /dev/null
+++ b/extras/Hadrons/Environment.cc
@@ -0,0 +1,793 @@
+/*************************************************************************************
+
+Grid physics library, www.github.com/paboyle/Grid
+
+Source file: extras/Hadrons/Environment.cc
+
+Copyright (C) 2015
+Copyright (C) 2016
+
+Author: Antonin Portelli
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along
+with this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+See the full license in the file "LICENSE" in the top level distribution directory
+*************************************************************************************/
+/* END LEGAL */
+
+#include
+#include
+#include
+
+using namespace Grid;
+using namespace QCD;
+using namespace Hadrons;
+
+/******************************************************************************
+ * Environment implementation *
+ ******************************************************************************/
+// constructor /////////////////////////////////////////////////////////////////
+Environment::Environment(void)
+{
+ dim_ = GridDefaultLatt();
+ nd_ = dim_.size();
+ grid4d_.reset(SpaceTimeGrid::makeFourDimGrid(
+ dim_, GridDefaultSimd(nd_, vComplex::Nsimd()),
+ GridDefaultMpi()));
+ gridRb4d_.reset(SpaceTimeGrid::makeFourDimRedBlackGrid(grid4d_.get()));
+ auto loc = getGrid()->LocalDimensions();
+ locVol_ = 1;
+ for (unsigned int d = 0; d < loc.size(); ++d)
+ {
+ locVol_ *= loc[d];
+ }
+ rng4d_.reset(new GridParallelRNG(grid4d_.get()));
+}
+
+// dry run /////////////////////////////////////////////////////////////////////
+void Environment::dryRun(const bool isDry)
+{
+ dryRun_ = isDry;
+}
+
+bool Environment::isDryRun(void) const
+{
+ return dryRun_;
+}
+
+// trajectory number ///////////////////////////////////////////////////////////
+void Environment::setTrajectory(const unsigned int traj)
+{
+ traj_ = traj;
+}
+
+unsigned int Environment::getTrajectory(void) const
+{
+ return traj_;
+}
+
+// grids ///////////////////////////////////////////////////////////////////////
+void Environment::createGrid(const unsigned int Ls)
+{
+ if (grid5d_.find(Ls) == grid5d_.end())
+ {
+ auto g = getGrid();
+
+ grid5d_[Ls].reset(SpaceTimeGrid::makeFiveDimGrid(Ls, g));
+ gridRb5d_[Ls].reset(SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls, g));
+ }
+}
+
+GridCartesian * Environment::getGrid(const unsigned int Ls) const
+{
+ try
+ {
+ if (Ls == 1)
+ {
+ return grid4d_.get();
+ }
+ else
+ {
+ return grid5d_.at(Ls).get();
+ }
+ }
+ catch(std::out_of_range &)
+ {
+ HADRON_ERROR("no grid with Ls= " << Ls);
+ }
+}
+
+GridRedBlackCartesian * Environment::getRbGrid(const unsigned int Ls) const
+{
+ try
+ {
+ if (Ls == 1)
+ {
+ return gridRb4d_.get();
+ }
+ else
+ {
+ return gridRb5d_.at(Ls).get();
+ }
+ }
+ catch(std::out_of_range &)
+ {
+ HADRON_ERROR("no red-black 5D grid with Ls= " << Ls);
+ }
+}
+
+unsigned int Environment::getNd(void) const
+{
+ return nd_;
+}
+
+std::vector Environment::getDim(void) const
+{
+ return dim_;
+}
+
+int Environment::getDim(const unsigned int mu) const
+{
+ return dim_[mu];
+}
+
+// random number generator /////////////////////////////////////////////////////
+void Environment::setSeed(const std::vector &seed)
+{
+ rng4d_->SeedFixedIntegers(seed);
+}
+
+GridParallelRNG * Environment::get4dRng(void) const
+{
+ return rng4d_.get();
+}
+
+// module management ///////////////////////////////////////////////////////////
+void Environment::pushModule(Environment::ModPt &pt)
+{
+ std::string name = pt->getName();
+
+ if (!hasModule(name))
+ {
+ std::vector inputAddress;
+ unsigned int address;
+ ModuleInfo m;
+
+ m.data = std::move(pt);
+ m.type = typeIdPt(*m.data.get());
+ m.name = name;
+ auto input = m.data->getInput();
+ for (auto &in: input)
+ {
+ if (!hasObject(in))
+ {
+ addObject(in , -1);
+ }
+ m.input.push_back(objectAddress_[in]);
+ }
+ auto output = m.data->getOutput();
+ module_.push_back(std::move(m));
+ address = static_cast(module_.size() - 1);
+ moduleAddress_[name] = address;
+ for (auto &out: output)
+ {
+ if (!hasObject(out))
+ {
+ addObject(out, address);
+ }
+ else
+ {
+ if (object_[objectAddress_[out]].module < 0)
+ {
+ object_[objectAddress_[out]].module = address;
+ }
+ else
+ {
+ HADRON_ERROR("object '" + out
+ + "' is already produced by module '"
+ + module_[object_[getObjectAddress(out)].module].name
+ + "' (while pushing module '" + name + "')");
+ }
+ }
+ }
+ }
+ else
+ {
+ HADRON_ERROR("module '" + name + "' already exists");
+ }
+}
+
+unsigned int Environment::getNModule(void) const
+{
+ return module_.size();
+}
+
+void Environment::createModule(const std::string name, const std::string type,
+ XmlReader &reader)
+{
+ auto &factory = ModuleFactory::getInstance();
+ auto pt = factory.create(type, name);
+
+ pt->parseParameters(reader, "options");
+ pushModule(pt);
+}
+
+ModuleBase * Environment::getModule(const unsigned int address) const
+{
+ if (hasModule(address))
+ {
+ return module_[address].data.get();
+ }
+ else
+ {
+ HADRON_ERROR("no module with address " + std::to_string(address));
+ }
+}
+
+ModuleBase * Environment::getModule(const std::string name) const
+{
+ return getModule(getModuleAddress(name));
+}
+
+unsigned int Environment::getModuleAddress(const std::string name) const
+{
+ if (hasModule(name))
+ {
+ return moduleAddress_.at(name);
+ }
+ else
+ {
+ HADRON_ERROR("no module with name '" + name + "'");
+ }
+}
+
+std::string Environment::getModuleName(const unsigned int address) const
+{
+ if (hasModule(address))
+ {
+ return module_[address].name;
+ }
+ else
+ {
+ HADRON_ERROR("no module with address " + std::to_string(address));
+ }
+}
+
+std::string Environment::getModuleType(const unsigned int address) const
+{
+ if (hasModule(address))
+ {
+ return typeName(module_[address].type);
+ }
+ else
+ {
+ HADRON_ERROR("no module with address " + std::to_string(address));
+ }
+}
+
+std::string Environment::getModuleType(const std::string name) const
+{
+ return getModuleType(getModuleAddress(name));
+}
+
+std::string Environment::getModuleNamespace(const unsigned int address) const
+{
+ std::string type = getModuleType(address), ns;
+
+ auto pos2 = type.rfind("::");
+ auto pos1 = type.rfind("::", pos2 - 2);
+
+ return type.substr(pos1 + 2, pos2 - pos1 - 2);
+}
+
+std::string Environment::getModuleNamespace(const std::string name) const
+{
+ return getModuleNamespace(getModuleAddress(name));
+}
+
+bool Environment::hasModule(const unsigned int address) const
+{
+ return (address < module_.size());
+}
+
+bool Environment::hasModule(const std::string name) const
+{
+ return (moduleAddress_.find(name) != moduleAddress_.end());
+}
+
+Graph Environment::makeModuleGraph(void) const
+{
+ Graph moduleGraph;
+
+ for (unsigned int i = 0; i < module_.size(); ++i)
+ {
+ moduleGraph.addVertex(i);
+ for (auto &j: module_[i].input)
+ {
+ moduleGraph.addEdge(object_[j].module, i);
+ }
+ }
+
+ return moduleGraph;
+}
+
+#define BIG_SEP "==============="
+#define SEP "---------------"
+#define MEM_MSG(size)\
+sizeString((size)*locVol_) << " (" << sizeString(size) << "/site)"
+
+Environment::Size
+Environment::executeProgram(const std::vector &p)
+{
+ Size memPeak = 0, sizeBefore, sizeAfter;
+ std::vector> freeProg;
+ bool continueCollect, nothingFreed;
+
+ // build garbage collection schedule
+ freeProg.resize(p.size());
+ for (unsigned int i = 0; i < object_.size(); ++i)
+ {
+ auto pred = [i, this](const unsigned int j)
+ {
+ auto &in = module_[j].input;
+ auto it = std::find(in.begin(), in.end(), i);
+
+ return (it != in.end()) or (j == object_[i].module);
+ };
+ auto it = std::find_if(p.rbegin(), p.rend(), pred);
+ if (it != p.rend())
+ {
+ freeProg[p.rend() - it - 1].insert(i);
+ }
+ }
+
+ // program execution
+ for (unsigned int i = 0; i < p.size(); ++i)
+ {
+ // execute module
+ if (!isDryRun())
+ {
+ LOG(Message) << SEP << " Measurement step " << i+1 << "/"
+ << p.size() << " (module '" << module_[p[i]].name
+ << "') " << SEP << std::endl;
+ }
+ (*module_[p[i]].data)();
+ sizeBefore = getTotalSize();
+ // print used memory after execution
+ if (!isDryRun())
+ {
+ LOG(Message) << "Allocated objects: " << MEM_MSG(sizeBefore)
+ << std::endl;
+ }
+ if (sizeBefore > memPeak)
+ {
+ memPeak = sizeBefore;
+ }
+ // garbage collection for step i
+ if (!isDryRun())
+ {
+ LOG(Message) << "Garbage collection..." << std::endl;
+ }
+ nothingFreed = true;
+ do
+ {
+ continueCollect = false;
+ auto toFree = freeProg[i];
+ for (auto &j: toFree)
+ {
+ // continue garbage collection while there are still
+ // objects without owners
+ continueCollect = continueCollect or !hasOwners(j);
+ if(freeObject(j))
+ {
+ // if an object has been freed, remove it from
+ // the garbage collection schedule
+ freeProg[i].erase(j);
+ nothingFreed = false;
+ }
+ }
+ } while (continueCollect);
+ // any remaining objects in step i garbage collection schedule
+ // is scheduled for step i + 1
+ if (i + 1 < p.size())
+ {
+ for (auto &j: freeProg[i])
+ {
+ freeProg[i + 1].insert(j);
+ }
+ }
+ // print used memory after garbage collection if necessary
+ if (!isDryRun())
+ {
+ sizeAfter = getTotalSize();
+ if (sizeBefore != sizeAfter)
+ {
+ LOG(Message) << "Allocated objects: " << MEM_MSG(sizeAfter)
+ << std::endl;
+ }
+ else
+ {
+ LOG(Message) << "Nothing to free" << std::endl;
+ }
+ }
+ }
+
+ return memPeak;
+}
+
+Environment::Size Environment::executeProgram(const std::vector &p)
+{
+ std::vector pAddress;
+
+ for (auto &n: p)
+ {
+ pAddress.push_back(getModuleAddress(n));
+ }
+
+ return executeProgram(pAddress);
+}
+
+// general memory management ///////////////////////////////////////////////////
+void Environment::addObject(const std::string name, const int moduleAddress)
+{
+ if (!hasObject(name))
+ {
+ ObjInfo info;
+
+ info.name = name;
+ info.module = moduleAddress;
+ object_.push_back(std::move(info));
+ objectAddress_[name] = static_cast(object_.size() - 1);
+ }
+ else
+ {
+ HADRON_ERROR("object '" + name + "' already exists");
+ }
+}
+
+void Environment::registerObject(const unsigned int address,
+ const unsigned int size, const unsigned int Ls)
+{
+ if (!hasRegisteredObject(address))
+ {
+ if (hasObject(address))
+ {
+ object_[address].size = size;
+ object_[address].Ls = Ls;
+ object_[address].isRegistered = true;
+ }
+ else
+ {
+ HADRON_ERROR("no object with address " + std::to_string(address));
+ }
+ }
+ else
+ {
+ HADRON_ERROR("object with address " + std::to_string(address)
+ + " already registered");
+ }
+}
+
+void Environment::registerObject(const std::string name,
+ const unsigned int size, const unsigned int Ls)
+{
+ if (!hasObject(name))
+ {
+ addObject(name);
+ }
+ registerObject(getObjectAddress(name), size, Ls);
+}
+
+unsigned int Environment::getObjectAddress(const std::string name) const
+{
+ if (hasObject(name))
+ {
+ return objectAddress_.at(name);
+ }
+ else
+ {
+ HADRON_ERROR("no object with name '" + name + "'");
+ }
+}
+
+std::string Environment::getObjectName(const unsigned int address) const
+{
+ if (hasObject(address))
+ {
+ return object_[address].name;
+ }
+ else
+ {
+ HADRON_ERROR("no object with address " + std::to_string(address));
+ }
+}
+
+std::string Environment::getObjectType(const unsigned int address) const
+{
+ if (hasRegisteredObject(address))
+ {
+ if (object_[address].type)
+ {
+ return typeName(object_[address].type);
+ }
+ else
+ {
+ return "";
+ }
+ }
+ else if (hasObject(address))
+ {
+ HADRON_ERROR("object with address " + std::to_string(address)
+ + " exists but is not registered");
+ }
+ else
+ {
+ HADRON_ERROR("no object with address " + std::to_string(address));
+ }
+}
+
+std::string Environment::getObjectType(const std::string name) const
+{
+ return getObjectType(getObjectAddress(name));
+}
+
+Environment::Size Environment::getObjectSize(const unsigned int address) const
+{
+ if (hasRegisteredObject(address))
+ {
+ return object_[address].size;
+ }
+ else if (hasObject(address))
+ {
+ HADRON_ERROR("object with address " + std::to_string(address)
+ + " exists but is not registered");
+ }
+ else
+ {
+ HADRON_ERROR("no object with address " + std::to_string(address));
+ }
+}
+
+Environment::Size Environment::getObjectSize(const std::string name) const
+{
+ return getObjectSize(getObjectAddress(name));
+}
+
+unsigned int Environment::getObjectModule(const unsigned int address) const
+{
+ if (hasObject(address))
+ {
+ return object_[address].module;
+ }
+ else
+ {
+ HADRON_ERROR("no object with address " + std::to_string(address));
+ }
+}
+
+unsigned int Environment::getObjectModule(const std::string name) const
+{
+ return getObjectModule(getObjectAddress(name));
+}
+
+unsigned int Environment::getObjectLs(const unsigned int address) const
+{
+ if (hasRegisteredObject(address))
+ {
+ return object_[address].Ls;
+ }
+ else if (hasObject(address))
+ {
+ HADRON_ERROR("object with address " + std::to_string(address)
+ + " exists but is not registered");
+ }
+ else
+ {
+ HADRON_ERROR("no object with address " + std::to_string(address));
+ }
+}
+
+unsigned int Environment::getObjectLs(const std::string name) const
+{
+ return getObjectLs(getObjectAddress(name));
+}
+
+bool Environment::hasObject(const unsigned int address) const
+{
+ return (address < object_.size());
+}
+
+bool Environment::hasObject(const std::string name) const
+{
+ auto it = objectAddress_.find(name);
+
+ return ((it != objectAddress_.end()) and hasObject(it->second));
+}
+
+bool Environment::hasRegisteredObject(const unsigned int address) const
+{
+ if (hasObject(address))
+ {
+ return object_[address].isRegistered;
+ }
+ else
+ {
+ return false;
+ }
+}
+
+bool Environment::hasRegisteredObject(const std::string name) const
+{
+ if (hasObject(name))
+ {
+ return hasRegisteredObject(getObjectAddress(name));
+ }
+ else
+ {
+ return false;
+ }
+}
+
+bool Environment::hasCreatedObject(const unsigned int address) const
+{
+ if (hasObject(address))
+ {
+ return (object_[address].data != nullptr);
+ }
+ else
+ {
+ return false;
+ }
+}
+
+bool Environment::hasCreatedObject(const std::string name) const
+{
+ if (hasObject(name))
+ {
+ return hasCreatedObject(getObjectAddress(name));
+ }
+ else
+ {
+ return false;
+ }
+}
+
+bool Environment::isObject5d(const unsigned int address) const
+{
+ return (getObjectLs(address) > 1);
+}
+
+bool Environment::isObject5d(const std::string name) const
+{
+ return (getObjectLs(name) > 1);
+}
+
+Environment::Size Environment::getTotalSize(void) const
+{
+ Environment::Size size = 0;
+
+ for (auto &o: object_)
+ {
+ if (o.isRegistered)
+ {
+ size += o.size;
+ }
+ }
+
+ return size;
+}
+
+void Environment::addOwnership(const unsigned int owner,
+ const unsigned int property)
+{
+ if (hasObject(property))
+ {
+ object_[property].owners.insert(owner);
+ }
+ else
+ {
+ HADRON_ERROR("no object with address " + std::to_string(property));
+ }
+ if (hasObject(owner))
+ {
+ object_[owner].properties.insert(property);
+ }
+ else
+ {
+ HADRON_ERROR("no object with address " + std::to_string(owner));
+ }
+}
+
+void Environment::addOwnership(const std::string owner,
+ const std::string property)
+{
+ addOwnership(getObjectAddress(owner), getObjectAddress(property));
+}
+
+bool Environment::hasOwners(const unsigned int address) const
+{
+
+ if (hasObject(address))
+ {
+ return (!object_[address].owners.empty());
+ }
+ else
+ {
+ HADRON_ERROR("no object with address " + std::to_string(address));
+ }
+}
+
+bool Environment::hasOwners(const std::string name) const
+{
+ return hasOwners(getObjectAddress(name));
+}
+
+bool Environment::freeObject(const unsigned int address)
+{
+ if (!hasOwners(address))
+ {
+ if (!isDryRun() and object_[address].isRegistered)
+ {
+ LOG(Message) << "Destroying object '" << object_[address].name
+ << "'" << std::endl;
+ }
+ for (auto &p: object_[address].properties)
+ {
+ object_[p].owners.erase(address);
+ }
+ object_[address].size = 0;
+ object_[address].Ls = 0;
+ object_[address].isRegistered = false;
+ object_[address].type = nullptr;
+ object_[address].owners.clear();
+ object_[address].properties.clear();
+ object_[address].data.reset(nullptr);
+
+ return true;
+ }
+ else
+ {
+ return false;
+ }
+}
+
+bool Environment::freeObject(const std::string name)
+{
+ return freeObject(getObjectAddress(name));
+}
+
+void Environment::freeAll(void)
+{
+ for (unsigned int i = 0; i < object_.size(); ++i)
+ {
+ freeObject(i);
+ }
+}
+
+void Environment::printContent(void)
+{
+ LOG(Message) << "Modules: " << std::endl;
+ for (unsigned int i = 0; i < module_.size(); ++i)
+ {
+ LOG(Message) << std::setw(4) << i << ": "
+ << getModuleName(i) << std::endl;
+ }
+ LOG(Message) << "Objects: " << std::endl;
+ for (unsigned int i = 0; i < object_.size(); ++i)
+ {
+ LOG(Message) << std::setw(4) << i << ": "
+ << getObjectName(i) << std::endl;
+ }
+}
diff --git a/extras/Hadrons/Environment.hpp b/extras/Hadrons/Environment.hpp
new file mode 100644
index 00000000..13264bd5
--- /dev/null
+++ b/extras/Hadrons/Environment.hpp
@@ -0,0 +1,427 @@
+/*************************************************************************************
+
+Grid physics library, www.github.com/paboyle/Grid
+
+Source file: extras/Hadrons/Environment.hpp
+
+Copyright (C) 2015
+Copyright (C) 2016
+
+Author: Antonin Portelli
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along
+with this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+See the full license in the file "LICENSE" in the top level distribution directory
+*************************************************************************************/
+/* END LEGAL */
+
+#ifndef Hadrons_Environment_hpp_
+#define Hadrons_Environment_hpp_
+
+#include
+#include
+
+#ifndef SITE_SIZE_TYPE
+#define SITE_SIZE_TYPE unsigned int
+#endif
+
+BEGIN_HADRONS_NAMESPACE
+
+/******************************************************************************
+ * Global environment *
+ ******************************************************************************/
+// forward declaration of Module
+class ModuleBase;
+
+class Object
+{
+public:
+ Object(void) = default;
+ virtual ~Object(void) = default;
+};
+
+template
+class Holder: public Object
+{
+public:
+ Holder(void) = default;
+ Holder(T *pt);
+ virtual ~Holder(void) = default;
+ T & get(void) const;
+ T * getPt(void) const;
+ void reset(T *pt);
+private:
+ std::unique_ptr objPt_{nullptr};
+};
+
+class Environment
+{
+ SINGLETON(Environment);
+public:
+ typedef SITE_SIZE_TYPE Size;
+ typedef std::unique_ptr ModPt;
+ typedef std::unique_ptr GridPt;
+ typedef std::unique_ptr GridRbPt;
+ typedef std::unique_ptr