1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-06-14 22:07:05 +01:00

Compare commits

..

3 Commits

402 changed files with 12652 additions and 60147 deletions

8
.gitignore vendored
View File

@ -92,7 +92,6 @@ build*/*
##################### #####################
*.xcodeproj/* *.xcodeproj/*
build.sh build.sh
.vscode
# Eigen source # # Eigen source #
################ ################
@ -107,10 +106,6 @@ lib/fftw/*
m4/lt* m4/lt*
m4/libtool.m4 m4/libtool.m4
# github pages #
################
gh-pages/
# Buck files # # Buck files #
############## ##############
.buck* .buck*
@ -121,5 +116,4 @@ make-bin-BUCK.sh
# generated sources # # generated sources #
##################### #####################
lib/qcd/spin/gamma-gen/*.h lib/qcd/spin/gamma-gen/*.h
lib/qcd/spin/gamma-gen/*.cc lib/qcd/spin/gamma-gen/*.cc

View File

@ -7,11 +7,9 @@ cache:
matrix: matrix:
include: include:
- os: osx - os: osx
osx_image: xcode8.3 osx_image: xcode7.2
compiler: clang compiler: clang
- compiler: gcc - compiler: gcc
dist: trusty
sudo: required
addons: addons:
apt: apt:
sources: sources:
@ -26,8 +24,6 @@ matrix:
- binutils-dev - binutils-dev
env: VERSION=-4.9 env: VERSION=-4.9
- compiler: gcc - compiler: gcc
dist: trusty
sudo: required
addons: addons:
apt: apt:
sources: sources:
@ -42,7 +38,6 @@ matrix:
- binutils-dev - binutils-dev
env: VERSION=-5 env: VERSION=-5
- compiler: clang - compiler: clang
dist: trusty
addons: addons:
apt: apt:
sources: sources:
@ -57,7 +52,6 @@ matrix:
- binutils-dev - binutils-dev
env: CLANG_LINK=http://llvm.org/releases/3.8.0/clang+llvm-3.8.0-x86_64-linux-gnu-ubuntu-14.04.tar.xz env: CLANG_LINK=http://llvm.org/releases/3.8.0/clang+llvm-3.8.0-x86_64-linux-gnu-ubuntu-14.04.tar.xz
- compiler: clang - compiler: clang
dist: trusty
addons: addons:
apt: apt:
sources: sources:
@ -79,15 +73,13 @@ before_install:
- if [[ "$TRAVIS_OS_NAME" == "linux" ]] && [[ "$CC" == "clang" ]]; then export LD_LIBRARY_PATH="${GRIDDIR}/clang/lib:${LD_LIBRARY_PATH}"; fi - if [[ "$TRAVIS_OS_NAME" == "linux" ]] && [[ "$CC" == "clang" ]]; then export LD_LIBRARY_PATH="${GRIDDIR}/clang/lib:${LD_LIBRARY_PATH}"; fi
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew update; fi - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew update; fi
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install libmpc; fi - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install libmpc; fi
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install openmpi; fi
- if [[ "$TRAVIS_OS_NAME" == "osx" ]] && [[ "$CC" == "gcc" ]]; then brew install gcc5; fi
install: install:
- export CC=$CC$VERSION - export CC=$CC$VERSION
- export CXX=$CXX$VERSION - export CXX=$CXX$VERSION
- echo $PATH - echo $PATH
- which autoconf
- autoconf --version
- which automake
- automake --version
- which $CC - which $CC
- $CC --version - $CC --version
- which $CXX - which $CXX
@ -100,15 +92,15 @@ script:
- cd build - cd build
- ../configure --enable-precision=single --enable-simd=SSE4 --enable-comms=none - ../configure --enable-precision=single --enable-simd=SSE4 --enable-comms=none
- make -j4 - make -j4
- ./benchmarks/Benchmark_dwf --threads 1 --debug-signals - ./benchmarks/Benchmark_dwf --threads 1
- echo make clean - echo make clean
- ../configure --enable-precision=double --enable-simd=SSE4 --enable-comms=none - ../configure --enable-precision=double --enable-simd=SSE4 --enable-comms=none
- make -j4 - make -j4
- ./benchmarks/Benchmark_dwf --threads 1 --debug-signals - ./benchmarks/Benchmark_dwf --threads 1
- make check
- echo make clean - echo make clean
- if [[ "$TRAVIS_OS_NAME" == "linux" ]] && [[ "$CC" == "clang" ]]; then ../configure --enable-precision=single --enable-simd=SSE4 --enable-comms=mpi-auto ; fi - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then export CXXFLAGS='-DMPI_UINT32_T=MPI_UNSIGNED -DMPI_UINT64_T=MPI_UNSIGNED_LONG'; fi
- if [[ "$TRAVIS_OS_NAME" == "linux" ]] && [[ "$CC" == "clang" ]]; then make -j4; fi - ../configure --enable-precision=single --enable-simd=SSE4 --enable-comms=mpi-auto
- if [[ "$TRAVIS_OS_NAME" == "linux" ]] && [[ "$CC" == "clang" ]]; then mpirun.openmpi -n 2 ./benchmarks/Benchmark_dwf --threads 1 --mpi 2.1.1.1; fi - make -j4
- if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then mpirun.openmpi -n 2 ./benchmarks/Benchmark_dwf --threads 1 --mpi 2.1.1.1; fi
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then mpirun -n 2 ./benchmarks/Benchmark_dwf --threads 1 --mpi 2.1.1.1; fi

View File

@ -3,15 +3,10 @@ SUBDIRS = lib benchmarks tests extras
include $(top_srcdir)/doxygen.inc include $(top_srcdir)/doxygen.inc
bin_SCRIPTS=grid-config tests: all
$(MAKE) -C tests tests
.PHONY: tests doxygen-run doxygen-doc $(DX_PS_GOAL) $(DX_PDF_GOAL)
.PHONY: bench check tests doxygen-run doxygen-doc $(DX_PS_GOAL) $(DX_PDF_GOAL)
tests-local: all
bench-local: all
check-local: all
AM_CXXFLAGS += -I$(top_builddir)/include AM_CXXFLAGS += -I$(top_builddir)/include
ACLOCAL_AMFLAGS = -I m4 ACLOCAL_AMFLAGS = -I m4

View File

@ -22,26 +22,6 @@ Last update Nov 2016.
_Please do not send pull requests to the `master` branch which is reserved for releases._ _Please do not send pull requests to the `master` branch which is reserved for releases._
### Compilers
Intel ICPC v16.0.3 and later
Clang v3.5 and later (need 3.8 and later for OpenMP)
GCC v4.9.x (recommended)
GCC v6.3 and later
### Important:
Some versions of GCC appear to have a bug under high optimisation (-O2, -O3).
The safety of these compiler versions cannot be guaranteed at this time. Follow Issue 100 for details and updates.
GCC v5.x
GCC v6.1, v6.2
### Bug report ### Bug report
_To help us tracking and solving more efficiently issues with Grid, please report problems using the issue system of GitHub rather than sending emails to Grid developers._ _To help us tracking and solving more efficiently issues with Grid, please report problems using the issue system of GitHub rather than sending emails to Grid developers._
@ -52,7 +32,7 @@ When you file an issue, please go though the following checklist:
2. Give a description of the target platform (CPU, network, compiler). Please give the full CPU part description, using for example `cat /proc/cpuinfo | grep 'model name' | uniq` (Linux) or `sysctl machdep.cpu.brand_string` (macOS) and the full output the `--version` option of your compiler. 2. Give a description of the target platform (CPU, network, compiler). Please give the full CPU part description, using for example `cat /proc/cpuinfo | grep 'model name' | uniq` (Linux) or `sysctl machdep.cpu.brand_string` (macOS) and the full output the `--version` option of your compiler.
3. Give the exact `configure` command used. 3. Give the exact `configure` command used.
4. Attach `config.log`. 4. Attach `config.log`.
5. Attach `grid.config.summary`. 5. Attach `config.summary`.
6. Attach the output of `make V=1`. 6. Attach the output of `make V=1`.
7. Describe the issue and any previous attempt to solve it. If relevant, show how to reproduce the issue using a minimal working example. 7. Describe the issue and any previous attempt to solve it. If relevant, show how to reproduce the issue using a minimal working example.
@ -115,10 +95,10 @@ install Grid. Other options are detailed in the next section, you can also use `
`CXX`, `CXXFLAGS`, `LDFLAGS`, ... environment variables can be modified to `CXX`, `CXXFLAGS`, `LDFLAGS`, ... environment variables can be modified to
customise the build. customise the build.
Finally, you can build, check, and install Grid: Finally, you can build and install Grid:
``` bash ``` bash
make; make check; make install make; make install
``` ```
To minimise the build time, only the tests at the root of the `tests` directory are built by default. If you want to build tests in the sub-directory `<subdir>` you can execute: To minimise the build time, only the tests at the root of the `tests` directory are built by default. If you want to build tests in the sub-directory `<subdir>` you can execute:
@ -141,7 +121,7 @@ If you want to build all the tests at once just use `make tests`.
- `--enable-gen-simd-width=<size>`: select the size (in bytes) of the generic SIMD vector type (default: 32 bytes). - `--enable-gen-simd-width=<size>`: select the size (in bytes) of the generic SIMD vector type (default: 32 bytes).
- `--enable-precision={single|double}`: set the default precision (default: `double`). - `--enable-precision={single|double}`: set the default precision (default: `double`).
- `--enable-precision=<comm>`: Use `<comm>` for message passing (default: `none`). A list of possible SIMD targets is detailed in a section below. - `--enable-precision=<comm>`: Use `<comm>` for message passing (default: `none`). A list of possible SIMD targets is detailed in a section below.
- `--enable-rng={sitmo|ranlux48|mt19937}`: choose the RNG (default: `sitmo `). - `--enable-rng={ranlux48|mt19937}`: choose the RNG (default: `ranlux48 `).
- `--disable-timers`: disable system dependent high-resolution timers. - `--disable-timers`: disable system dependent high-resolution timers.
- `--enable-chroma`: enable Chroma regression tests. - `--enable-chroma`: enable Chroma regression tests.
- `--enable-doxygen-doc`: enable the Doxygen documentation generation (build with `make doxygen-doc`) - `--enable-doxygen-doc`: enable the Doxygen documentation generation (build with `make doxygen-doc`)
@ -179,6 +159,7 @@ Alternatively, some CPU codenames can be directly used:
| `<code>` | Description | | `<code>` | Description |
| ----------- | -------------------------------------- | | ----------- | -------------------------------------- |
| `KNC` | [Intel Xeon Phi codename Knights Corner](http://ark.intel.com/products/codename/57721/Knights-Corner) |
| `KNL` | [Intel Xeon Phi codename Knights Landing](http://ark.intel.com/products/codename/48999/Knights-Landing) | | `KNL` | [Intel Xeon Phi codename Knights Landing](http://ark.intel.com/products/codename/48999/Knights-Landing) |
| `BGQ` | Blue Gene/Q | | `BGQ` | Blue Gene/Q |

68
TODO
View File

@ -1,33 +1,6 @@
TODO: TODO:
--------------- ---------------
Large item work list:
1)- MultiRHS with spread out extra dim -- Go through filesystem with SciDAC I/O
2)- Christoph's local basis expansion Lanczos
3)- BG/Q port and check
4)- Precision conversion and sort out localConvert <-- partial
- Consistent linear solver flop count/rate -- PARTIAL, time but no flop/s yet
5)- Physical propagator interface
6)- Conserved currents
7)- Multigrid Wilson and DWF, compare to other Multigrid implementations
8)- HDCR resume
Recent DONE
-- Lanczos Remove DenseVector, DenseMatrix; Use Eigen instead. <-- DONE
-- GaugeFix into central location <-- DONE
-- Scidac and Ildg metadata handling <-- DONE
-- Binary I/O MPI2 IO <-- DONE
-- Binary I/O speed up & x-strips <-- DONE
-- Cut down the exterior overhead <-- DONE
-- Interior legs from SHM comms <-- DONE
-- Half-precision comms <-- DONE
-- Merge high precision reduction into develop <-- DONE
-- BlockCG, BCGrQ <-- DONE
-- multiRHS DWF; benchmark on Cori/BNL for comms elimination <-- DONE
-- slice* linalg routines for multiRHS, BlockCG
-----
* Forces; the UdSdU term in gauge force term is half of what I think it should * Forces; the UdSdU term in gauge force term is half of what I think it should
be. This is a consequence of taking ONLY the first term in: be. This is a consequence of taking ONLY the first term in:
@ -48,8 +21,16 @@ Recent DONE
This means we must double the force in the Test_xxx_force routines, and is the origin of the factor of two. This means we must double the force in the Test_xxx_force routines, and is the origin of the factor of two.
This 2x is applied by hand in the fermion routines and in the Test_rect_force routine. This 2x is applied by hand in the fermion routines and in the Test_rect_force routine.
Policies:
* Link smearing/boundary conds; Policy class based implementation ; framework more in place
* Support different boundary conditions (finite temp, chem. potential ... ) * Support different boundary conditions (finite temp, chem. potential ... )
* Support different fermion representations?
- contained entirely within the integrator presently
- Sign of force term. - Sign of force term.
- Reversibility test. - Reversibility test.
@ -60,6 +41,11 @@ Recent DONE
- Audit oIndex usage for cb behaviour - Audit oIndex usage for cb behaviour
- Rectangle gauge actions.
Iwasaki,
Symanzik,
... etc...
- Prepare multigrid for HMC. - Alternate setup schemes. - Prepare multigrid for HMC. - Alternate setup schemes.
- Support for ILDG --- ugly, not done - Support for ILDG --- ugly, not done
@ -69,11 +55,9 @@ Recent DONE
- FFTnD ? - FFTnD ?
- Gparity; hand opt use template specialisation elegance to enable the optimised paths ? - Gparity; hand opt use template specialisation elegance to enable the optimised paths ?
- Gparity force term; Gparity (R)HMC. - Gparity force term; Gparity (R)HMC.
- Random number state save restore
- Mobius implementation clean up to rmove #if 0 stale code sequences - Mobius implementation clean up to rmove #if 0 stale code sequences
- CG -- profile carefully, kernel fusion, whole CG performance measurements. - CG -- profile carefully, kernel fusion, whole CG performance measurements.
================================================================ ================================================================
@ -106,7 +90,6 @@ Insert/Extract
Not sure of status of this -- reverify. Things are working nicely now though. Not sure of status of this -- reverify. Things are working nicely now though.
* Make the Tensor types and Complex etc... play more nicely. * Make the Tensor types and Complex etc... play more nicely.
- TensorRemove is a hack, come up with a long term rationalised approach to Complex vs. Scalar<Scalar<Scalar<Complex > > > - TensorRemove is a hack, come up with a long term rationalised approach to Complex vs. Scalar<Scalar<Scalar<Complex > > >
QDP forces use of "toDouble" to get back to non tensor scalar. This role is presently taken TensorRemove, but I QDP forces use of "toDouble" to get back to non tensor scalar. This role is presently taken TensorRemove, but I
want to introduce a syntax that does not require this. want to introduce a syntax that does not require this.
@ -129,8 +112,6 @@ Not sure of status of this -- reverify. Things are working nicely now though.
RECENT RECENT
--------------- ---------------
- Support different fermion representations? -- DONE
- contained entirely within the integrator presently
- Clean up HMC -- DONE - Clean up HMC -- DONE
- LorentzScalar<GaugeField> gets Gauge link type (cleaner). -- DONE - LorentzScalar<GaugeField> gets Gauge link type (cleaner). -- DONE
- Simplified the integrators a bit. -- DONE - Simplified the integrators a bit. -- DONE
@ -142,26 +123,6 @@ RECENT
- Parallel io improvements -- DONE - Parallel io improvements -- DONE
- Plaquette and link trace checks into nersc reader from the Grid_nersc_io.cc test. -- DONE - Plaquette and link trace checks into nersc reader from the Grid_nersc_io.cc test. -- DONE
DONE:
- MultiArray -- MultiRHS done
- ConjugateGradientMultiShift -- DONE
- MCR -- DONE
- Remez -- Mike or Boost? -- DONE
- Proto (ET) -- DONE
- uBlas -- DONE ; Eigen
- Potentially Useful Boost libraries -- DONE ; Eigen
- Aligned allocator; memory pool -- DONE
- Multiprecision -- DONE
- Serialization -- DONE
- Regex -- Not needed
- Tokenize -- Why?
- Random number state save restore -- DONE
- Rectangle gauge actions. -- DONE
Iwasaki,
Symanzik,
... etc...
Done: Cayley, Partial , ContFrac force terms. Done: Cayley, Partial , ContFrac force terms.
DONE DONE
@ -246,7 +207,6 @@ Done
FUNCTIONALITY: it pleases me to keep track of things I have done (keeps me arguably sane) FUNCTIONALITY: it pleases me to keep track of things I have done (keeps me arguably sane)
====================================================================================================== ======================================================================================================
* Link smearing/boundary conds; Policy class based implementation ; framework more in place -- DONE
* Command line args for geometry, simd, etc. layout. Is it necessary to have -- DONE * Command line args for geometry, simd, etc. layout. Is it necessary to have -- DONE
user pass these? Is this a QCD specific? user pass these? Is this a QCD specific?

View File

@ -1,5 +1,6 @@
Version : 0.7.0 Version : 0.6.0
- Clang 3.5 and above, ICPC v16 and above, GCC 6.3 and above recommended - AVX512, AVX2, AVX, SSE good
- MPI and MPI3 comms optimisations for KNL and OPA finished - Clang 3.5 and above, ICPC v16 and above, GCC 4.9 and above
- Half precision comms - MPI and MPI3
- HiRep, Smearing, Generic gauge group

View File

@ -31,32 +31,6 @@ using namespace std;
using namespace Grid; using namespace Grid;
using namespace Grid::QCD; using namespace Grid::QCD;
struct time_statistics{
double mean;
double err;
double min;
double max;
void statistics(std::vector<double> v){
double sum = std::accumulate(v.begin(), v.end(), 0.0);
mean = sum / v.size();
std::vector<double> diff(v.size());
std::transform(v.begin(), v.end(), diff.begin(), [=](double x) { return x - mean; });
double sq_sum = std::inner_product(diff.begin(), diff.end(), diff.begin(), 0.0);
err = std::sqrt(sq_sum / (v.size()*(v.size() - 1)));
auto result = std::minmax_element(v.begin(), v.end());
min = *result.first;
max = *result.second;
}
};
void header(){
std::cout <<GridLogMessage << " L "<<"\t"<<" Ls "<<"\t"
<<std::setw(11)<<"bytes"<<"MB/s uni (err/min/max)"<<"\t\t"<<"MB/s bidi (err/min/max)"<<std::endl;
};
int main (int argc, char ** argv) int main (int argc, char ** argv)
{ {
Grid_init(&argc,&argv); Grid_init(&argc,&argv);
@ -66,21 +40,17 @@ int main (int argc, char ** argv)
int threads = GridThread::GetThreads(); int threads = GridThread::GetThreads();
std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl; std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
int Nloop=100; int Nloop=10;
int nmu=0; int nmu=0;
int maxlat=24;
for(int mu=0;mu<Nd;mu++) if (mpi_layout[mu]>1) nmu++; for(int mu=0;mu<Nd;mu++) if (mpi_layout[mu]>1) nmu++;
std::cout << GridLogMessage << "Number of iterations to average: "<< Nloop << std::endl;
std::vector<double> t_time(Nloop);
time_statistics timestat;
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
std::cout<<GridLogMessage << "= Benchmarking concurrent halo exchange in "<<nmu<<" dimensions"<<std::endl; std::cout<<GridLogMessage << "= Benchmarking concurrent halo exchange in "<<nmu<<" dimensions"<<std::endl;
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
header(); std::cout<<GridLogMessage << " L "<<"\t\t"<<" Ls "<<"\t\t"<<"bytes"<<"\t\t"<<"MB/s uni"<<"\t\t"<<"MB/s bidi"<<std::endl;
for(int lat=4;lat<=maxlat;lat+=4){ int maxlat=16;
for(int Ls=8;Ls<=32;Ls*=2){ for(int lat=4;lat<=maxlat;lat+=2){
for(int Ls=1;Ls<=16;Ls*=2){
std::vector<int> latt_size ({lat*mpi_layout[0], std::vector<int> latt_size ({lat*mpi_layout[0],
lat*mpi_layout[1], lat*mpi_layout[1],
@ -88,9 +58,6 @@ int main (int argc, char ** argv)
lat*mpi_layout[3]}); lat*mpi_layout[3]});
GridCartesian Grid(latt_size,simd_layout,mpi_layout); GridCartesian Grid(latt_size,simd_layout,mpi_layout);
RealD Nrank = Grid._Nprocessors;
RealD Nnode = Grid.NodeCount();
RealD ppn = Nrank/Nnode;
std::vector<std::vector<HalfSpinColourVectorD> > xbuf(8,std::vector<HalfSpinColourVectorD>(lat*lat*lat*Ls)); std::vector<std::vector<HalfSpinColourVectorD> > xbuf(8,std::vector<HalfSpinColourVectorD>(lat*lat*lat*Ls));
std::vector<std::vector<HalfSpinColourVectorD> > rbuf(8,std::vector<HalfSpinColourVectorD>(lat*lat*lat*Ls)); std::vector<std::vector<HalfSpinColourVectorD> > rbuf(8,std::vector<HalfSpinColourVectorD>(lat*lat*lat*Ls));
@ -98,8 +65,8 @@ int main (int argc, char ** argv)
int ncomm; int ncomm;
int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD); int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD);
for(int i=0;i<Nloop;i++){
double start=usecond(); double start=usecond();
for(int i=0;i<Nloop;i++){
std::vector<CartesianCommunicator::CommsRequest_t> requests; std::vector<CartesianCommunicator::CommsRequest_t> requests;
@ -135,24 +102,18 @@ int main (int argc, char ** argv)
} }
Grid.SendToRecvFromComplete(requests); Grid.SendToRecvFromComplete(requests);
Grid.Barrier(); Grid.Barrier();
double stop=usecond();
t_time[i] = stop-start; // microseconds
} }
double stop=usecond();
timestat.statistics(t_time); double dbytes = bytes;
double xbytes = Nloop*dbytes*2.0*ncomm;
double dbytes = bytes*ppn;
double xbytes = dbytes*2.0*ncomm;
double rbytes = xbytes; double rbytes = xbytes;
double bidibytes = xbytes+rbytes; double bidibytes = xbytes+rbytes;
std::cout<<GridLogMessage << std::setw(4) << lat<<"\t"<<Ls<<"\t" double time = stop-start; // microseconds
<<std::setw(11) << bytes<< std::fixed << std::setprecision(1) << std::setw(7)
<<std::right<< xbytes/timestat.mean<<" "<< xbytes*timestat.err/(timestat.mean*timestat.mean)<< " "
<<xbytes/timestat.max <<" "<< xbytes/timestat.min
<< "\t\t"<<std::setw(7)<< bidibytes/timestat.mean<< " " << bidibytes*timestat.err/(timestat.mean*timestat.mean) << " "
<< bidibytes/timestat.max << " " << bidibytes/timestat.min << std::endl;
std::cout<<GridLogMessage << lat<<"\t\t"<<Ls<<"\t\t"<<bytes<<"\t\t"<<xbytes/time<<"\t\t"<<bidibytes/time<<std::endl;
} }
} }
@ -160,17 +121,15 @@ int main (int argc, char ** argv)
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
std::cout<<GridLogMessage << "= Benchmarking sequential halo exchange in "<<nmu<<" dimensions"<<std::endl; std::cout<<GridLogMessage << "= Benchmarking sequential halo exchange in "<<nmu<<" dimensions"<<std::endl;
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
header(); std::cout<<GridLogMessage << " L "<<"\t\t"<<" Ls "<<"\t\t"<<"bytes"<<"\t\t"<<"MB/s uni"<<"\t\t"<<"MB/s bidi"<<std::endl;
for(int lat=4;lat<=maxlat;lat+=4){
for(int Ls=8;Ls<=32;Ls*=2){ for(int lat=4;lat<=maxlat;lat+=2){
for(int Ls=1;Ls<=16;Ls*=2){
std::vector<int> latt_size ({lat,lat,lat,lat}); std::vector<int> latt_size ({lat,lat,lat,lat});
GridCartesian Grid(latt_size,simd_layout,mpi_layout); GridCartesian Grid(latt_size,simd_layout,mpi_layout);
RealD Nrank = Grid._Nprocessors;
RealD Nnode = Grid.NodeCount();
RealD ppn = Nrank/Nnode;
std::vector<std::vector<HalfSpinColourVectorD> > xbuf(8,std::vector<HalfSpinColourVectorD>(lat*lat*lat*Ls)); std::vector<std::vector<HalfSpinColourVectorD> > xbuf(8,std::vector<HalfSpinColourVectorD>(lat*lat*lat*Ls));
std::vector<std::vector<HalfSpinColourVectorD> > rbuf(8,std::vector<HalfSpinColourVectorD>(lat*lat*lat*Ls)); std::vector<std::vector<HalfSpinColourVectorD> > rbuf(8,std::vector<HalfSpinColourVectorD>(lat*lat*lat*Ls));
@ -179,8 +138,8 @@ int main (int argc, char ** argv)
int ncomm; int ncomm;
int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD); int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD);
for(int i=0;i<Nloop;i++){
double start=usecond(); double start=usecond();
for(int i=0;i<Nloop;i++){
ncomm=0; ncomm=0;
for(int mu=0;mu<4;mu++){ for(int mu=0;mu<4;mu++){
@ -219,37 +178,30 @@ int main (int argc, char ** argv)
} }
} }
Grid.Barrier(); Grid.Barrier();
double stop=usecond();
t_time[i] = stop-start; // microseconds
} }
timestat.statistics(t_time); double stop=usecond();
double dbytes = bytes*ppn; double dbytes = bytes;
double xbytes = dbytes*2.0*ncomm; double xbytes = Nloop*dbytes*2.0*ncomm;
double rbytes = xbytes; double rbytes = xbytes;
double bidibytes = xbytes+rbytes; double bidibytes = xbytes+rbytes;
std::cout<<GridLogMessage << std::setw(4) << lat<<"\t"<<Ls<<"\t" double time = stop-start;
<<std::setw(11) << bytes<< std::fixed << std::setprecision(1) << std::setw(7)
<<std::right<< xbytes/timestat.mean<<" "<< xbytes*timestat.err/(timestat.mean*timestat.mean)<< " "
<<xbytes/timestat.max <<" "<< xbytes/timestat.min
<< "\t\t"<<std::setw(7)<< bidibytes/timestat.mean<< " " << bidibytes*timestat.err/(timestat.mean*timestat.mean) << " "
<< bidibytes/timestat.max << " " << bidibytes/timestat.min << std::endl;
std::cout<<GridLogMessage << lat<<"\t\t"<<Ls<<"\t\t"<<bytes<<"\t\t"<<xbytes/time<<"\t\t"<<bidibytes/time<<std::endl;
} }
} }
Nloop=100;
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
std::cout<<GridLogMessage << "= Benchmarking concurrent STENCIL halo exchange in "<<nmu<<" dimensions"<<std::endl; std::cout<<GridLogMessage << "= Benchmarking concurrent STENCIL halo exchange in "<<nmu<<" dimensions"<<std::endl;
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
header(); std::cout<<GridLogMessage << " L "<<"\t\t"<<" Ls "<<"\t\t"<<"bytes"<<"\t\t"<<"MB/s uni"<<"\t\t"<<"MB/s bidi"<<std::endl;
for(int lat=4;lat<=maxlat;lat+=4){ for(int lat=4;lat<=maxlat;lat+=2){
for(int Ls=8;Ls<=32;Ls*=2){ for(int Ls=1;Ls<=16;Ls*=2){
std::vector<int> latt_size ({lat*mpi_layout[0], std::vector<int> latt_size ({lat*mpi_layout[0],
lat*mpi_layout[1], lat*mpi_layout[1],
@ -257,9 +209,6 @@ int main (int argc, char ** argv)
lat*mpi_layout[3]}); lat*mpi_layout[3]});
GridCartesian Grid(latt_size,simd_layout,mpi_layout); GridCartesian Grid(latt_size,simd_layout,mpi_layout);
RealD Nrank = Grid._Nprocessors;
RealD Nnode = Grid.NodeCount();
RealD ppn = Nrank/Nnode;
std::vector<HalfSpinColourVectorD *> xbuf(8); std::vector<HalfSpinColourVectorD *> xbuf(8);
std::vector<HalfSpinColourVectorD *> rbuf(8); std::vector<HalfSpinColourVectorD *> rbuf(8);
@ -267,115 +216,16 @@ int main (int argc, char ** argv)
for(int d=0;d<8;d++){ for(int d=0;d<8;d++){
xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
bzero((void *)xbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
bzero((void *)rbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
} }
int ncomm; int ncomm;
int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD); int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD);
double dbytes; double start=usecond();
for(int i=0;i<Nloop;i++){ for(int i=0;i<Nloop;i++){
double start=usecond();
dbytes=0;
ncomm=0;
std::vector<CartesianCommunicator::CommsRequest_t> requests; std::vector<CartesianCommunicator::CommsRequest_t> requests;
for(int mu=0;mu<4;mu++){
if (mpi_layout[mu]>1 ) {
ncomm++;
int comm_proc=1;
int xmit_to_rank;
int recv_from_rank;
Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
dbytes+=
Grid.StencilSendToRecvFromBegin(requests,
(void *)&xbuf[mu][0],
xmit_to_rank,
(void *)&rbuf[mu][0],
recv_from_rank,
bytes);
comm_proc = mpi_layout[mu]-1;
Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
dbytes+=
Grid.StencilSendToRecvFromBegin(requests,
(void *)&xbuf[mu+4][0],
xmit_to_rank,
(void *)&rbuf[mu+4][0],
recv_from_rank,
bytes);
}
}
Grid.StencilSendToRecvFromComplete(requests);
Grid.Barrier();
double stop=usecond();
t_time[i] = stop-start; // microseconds
}
timestat.statistics(t_time);
dbytes=dbytes*ppn;
double xbytes = dbytes*0.5;
double rbytes = dbytes*0.5;
double bidibytes = dbytes;
std::cout<<GridLogMessage << std::setw(4) << lat<<"\t"<<Ls<<"\t"
<<std::setw(11) << bytes<< std::fixed << std::setprecision(1) << std::setw(7)
<<std::right<< xbytes/timestat.mean<<" "<< xbytes*timestat.err/(timestat.mean*timestat.mean)<< " "
<<xbytes/timestat.max <<" "<< xbytes/timestat.min
<< "\t\t"<<std::setw(7)<< bidibytes/timestat.mean<< " " << bidibytes*timestat.err/(timestat.mean*timestat.mean) << " "
<< bidibytes/timestat.max << " " << bidibytes/timestat.min << std::endl;
}
}
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
std::cout<<GridLogMessage << "= Benchmarking sequential STENCIL halo exchange in "<<nmu<<" dimensions"<<std::endl;
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
header();
for(int lat=4;lat<=maxlat;lat+=4){
for(int Ls=8;Ls<=32;Ls*=2){
std::vector<int> latt_size ({lat*mpi_layout[0],
lat*mpi_layout[1],
lat*mpi_layout[2],
lat*mpi_layout[3]});
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
RealD Nrank = Grid._Nprocessors;
RealD Nnode = Grid.NodeCount();
RealD ppn = Nrank/Nnode;
std::vector<HalfSpinColourVectorD *> xbuf(8);
std::vector<HalfSpinColourVectorD *> rbuf(8);
Grid.ShmBufferFreeAll();
for(int d=0;d<8;d++){
xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
bzero((void *)xbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
bzero((void *)rbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
}
int ncomm;
int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD);
double dbytes;
for(int i=0;i<Nloop;i++){
double start=usecond();
std::vector<CartesianCommunicator::CommsRequest_t> requests;
dbytes=0;
ncomm=0; ncomm=0;
for(int mu=0;mu<4;mu++){ for(int mu=0;mu<4;mu++){
@ -387,52 +237,123 @@ int main (int argc, char ** argv)
int recv_from_rank; int recv_from_rank;
Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
dbytes+= Grid.StencilSendToRecvFromBegin(requests,
Grid.StencilSendToRecvFromBegin(requests, (void *)&xbuf[mu][0],
(void *)&xbuf[mu][0], xmit_to_rank,
xmit_to_rank, (void *)&rbuf[mu][0],
(void *)&rbuf[mu][0], recv_from_rank,
recv_from_rank, bytes);
bytes);
Grid.StencilSendToRecvFromComplete(requests); comm_proc = mpi_layout[mu]-1;
requests.resize(0);
Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
Grid.StencilSendToRecvFromBegin(requests,
(void *)&xbuf[mu+4][0],
xmit_to_rank,
(void *)&rbuf[mu+4][0],
recv_from_rank,
bytes);
}
}
Grid.StencilSendToRecvFromComplete(requests);
Grid.Barrier();
}
double stop=usecond();
double dbytes = bytes;
double xbytes = Nloop*dbytes*2.0*ncomm;
double rbytes = xbytes;
double bidibytes = xbytes+rbytes;
double time = stop-start; // microseconds
std::cout<<GridLogMessage << lat<<"\t\t"<<Ls<<"\t\t"<<bytes<<"\t\t"<<xbytes/time<<"\t\t"<<bidibytes/time<<std::endl;
}
}
Nloop=100;
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
std::cout<<GridLogMessage << "= Benchmarking sequential STENCIL halo exchange in "<<nmu<<" dimensions"<<std::endl;
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
std::cout<<GridLogMessage << " L "<<"\t\t"<<" Ls "<<"\t\t"<<"bytes"<<"\t\t"<<"MB/s uni"<<"\t\t"<<"MB/s bidi"<<std::endl;
for(int lat=4;lat<=maxlat;lat+=2){
for(int Ls=1;Ls<=16;Ls*=2){
std::vector<int> latt_size ({lat*mpi_layout[0],
lat*mpi_layout[1],
lat*mpi_layout[2],
lat*mpi_layout[3]});
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
std::vector<HalfSpinColourVectorD *> xbuf(8);
std::vector<HalfSpinColourVectorD *> rbuf(8);
Grid.ShmBufferFreeAll();
for(int d=0;d<8;d++){
xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
}
int ncomm;
int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD);
double start=usecond();
for(int i=0;i<Nloop;i++){
std::vector<CartesianCommunicator::CommsRequest_t> requests;
ncomm=0;
for(int mu=0;mu<4;mu++){
if (mpi_layout[mu]>1 ) {
ncomm++;
int comm_proc=1;
int xmit_to_rank;
int recv_from_rank;
Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
Grid.StencilSendToRecvFromBegin(requests,
(void *)&xbuf[mu][0],
xmit_to_rank,
(void *)&rbuf[mu][0],
recv_from_rank,
bytes);
// Grid.StencilSendToRecvFromComplete(requests);
// requests.resize(0);
comm_proc = mpi_layout[mu]-1; comm_proc = mpi_layout[mu]-1;
Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
dbytes+= Grid.StencilSendToRecvFromBegin(requests,
Grid.StencilSendToRecvFromBegin(requests, (void *)&xbuf[mu+4][0],
(void *)&xbuf[mu+4][0], xmit_to_rank,
xmit_to_rank, (void *)&rbuf[mu+4][0],
(void *)&rbuf[mu+4][0], recv_from_rank,
recv_from_rank, bytes);
bytes);
Grid.StencilSendToRecvFromComplete(requests); Grid.StencilSendToRecvFromComplete(requests);
requests.resize(0); requests.resize(0);
} }
} }
Grid.Barrier(); Grid.Barrier();
double stop=usecond();
t_time[i] = stop-start; // microseconds
} }
double stop=usecond();
timestat.statistics(t_time); double dbytes = bytes;
double xbytes = Nloop*dbytes*2.0*ncomm;
double rbytes = xbytes;
double bidibytes = xbytes+rbytes;
dbytes=dbytes*ppn; double time = stop-start; // microseconds
double xbytes = dbytes*0.5;
double rbytes = dbytes*0.5;
double bidibytes = dbytes;
std::cout<<GridLogMessage << lat<<"\t\t"<<Ls<<"\t\t"<<bytes<<"\t\t"<<xbytes/time<<"\t\t"<<bidibytes/time<<std::endl;
std::cout<<GridLogMessage << std::setw(4) << lat<<"\t"<<Ls<<"\t"
<<std::setw(11) << bytes<< std::fixed << std::setprecision(1) << std::setw(7)
<<std::right<< xbytes/timestat.mean<<" "<< xbytes*timestat.err/(timestat.mean*timestat.mean)<< " "
<<xbytes/timestat.max <<" "<< xbytes/timestat.min
<< "\t\t"<<std::setw(7)<< bidibytes/timestat.mean<< " " << bidibytes*timestat.err/(timestat.mean*timestat.mean) << " "
<< bidibytes/timestat.max << " " << bidibytes/timestat.min << std::endl;
} }
} }

View File

@ -1,22 +1,28 @@
/************************************************************************************* /*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid Grid physics library, www.github.com/paboyle/Grid
Source file: ./benchmarks/Benchmark_dwf.cc Source file: ./benchmarks/Benchmark_dwf.cc
Copyright (C) 2015 Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk> Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: paboyle <paboyle@ph.ed.ac.uk> Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or the Free Software Foundation; either version 2 of the License, or
(at your option) any later version. (at your option) any later version.
This program is distributed in the hope that it will be useful, This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details. GNU General Public License for more details.
You should have received a copy of the GNU General Public License along You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc., with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/ *************************************************************************************/
/* END LEGAL */ /* END LEGAL */
@ -42,16 +48,16 @@ typedef WilsonFermion5D<DomainWallVec5dImplR> WilsonFermion5DR;
typedef WilsonFermion5D<DomainWallVec5dImplF> WilsonFermion5DF; typedef WilsonFermion5D<DomainWallVec5dImplF> WilsonFermion5DF;
typedef WilsonFermion5D<DomainWallVec5dImplD> WilsonFermion5DD; typedef WilsonFermion5D<DomainWallVec5dImplD> WilsonFermion5DD;
int main (int argc, char ** argv) int main (int argc, char ** argv)
{ {
Grid_init(&argc,&argv); Grid_init(&argc,&argv);
int threads = GridThread::GetThreads(); int threads = GridThread::GetThreads();
std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl; std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
std::vector<int> latt4 = GridDefaultLatt(); std::vector<int> latt4 = GridDefaultLatt();
const int Ls=16; const int Ls=8;
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi()); GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid); GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
@ -65,66 +71,35 @@ int main (int argc, char ** argv)
std::vector<int> seeds4({1,2,3,4}); std::vector<int> seeds4({1,2,3,4});
std::vector<int> seeds5({5,6,7,8}); std::vector<int> seeds5({5,6,7,8});
std::cout << GridLogMessage << "Initialising 4d RNG" << std::endl;
GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4); GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
std::cout << GridLogMessage << "Initialising 5d RNG" << std::endl;
GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5); GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5);
std::cout << GridLogMessage << "Initialised RNGs" << std::endl;
LatticeFermion src (FGrid); random(RNG5,src); LatticeFermion src (FGrid); random(RNG5,src);
#if 0
src = zero;
{
std::vector<int> origin({0,0,0,latt4[2]-1,0});
SpinColourVectorF tmp;
tmp=zero;
tmp()(0)(0)=Complex(-2.0,0.0);
std::cout << " source site 0 " << tmp<<std::endl;
pokeSite(tmp,src,origin);
}
#else
RealD N2 = 1.0/::sqrt(norm2(src));
src = src*N2;
#endif
LatticeFermion result(FGrid); result=zero; LatticeFermion result(FGrid); result=zero;
LatticeFermion ref(FGrid); ref=zero; LatticeFermion ref(FGrid); ref=zero;
LatticeFermion tmp(FGrid); LatticeFermion tmp(FGrid);
LatticeFermion err(FGrid); LatticeFermion err(FGrid);
std::cout << GridLogMessage << "Drawing gauge field" << std::endl;
LatticeGaugeField Umu(UGrid); LatticeGaugeField Umu(UGrid);
SU3::HotConfiguration(RNG4,Umu); random(RNG4,Umu);
std::cout << GridLogMessage << "Random gauge initialised " << std::endl;
#if 0
Umu=1.0;
for(int mu=0;mu<Nd;mu++){
LatticeColourMatrix ttmp(UGrid);
ttmp = PeekIndex<LorentzIndex>(Umu,mu);
// if (mu !=2 ) ttmp = 0;
// ttmp = ttmp* pow(10.0,mu);
PokeIndex<LorentzIndex>(Umu,ttmp,mu);
}
std::cout << GridLogMessage << "Forced to diagonal " << std::endl;
#endif
////////////////////////////////////
// Naive wilson implementation
////////////////////////////////////
// replicate across fifth dimension
LatticeGaugeField Umu5d(FGrid); LatticeGaugeField Umu5d(FGrid);
std::vector<LatticeColourMatrix> U(4,FGrid);
// replicate across fifth dimension
for(int ss=0;ss<Umu._grid->oSites();ss++){ for(int ss=0;ss<Umu._grid->oSites();ss++){
for(int s=0;s<Ls;s++){ for(int s=0;s<Ls;s++){
Umu5d._odata[Ls*ss+s] = Umu._odata[ss]; Umu5d._odata[Ls*ss+s] = Umu._odata[ss];
} }
} }
////////////////////////////////////
// Naive wilson implementation
////////////////////////////////////
std::vector<LatticeColourMatrix> U(4,FGrid);
for(int mu=0;mu<Nd;mu++){ for(int mu=0;mu<Nd;mu++){
U[mu] = PeekIndex<LorentzIndex>(Umu5d,mu); U[mu] = PeekIndex<LorentzIndex>(Umu5d,mu);
} }
std::cout << GridLogMessage << "Setting up Cshift based reference " << std::endl;
if (1) if (1)
{ {
@ -145,7 +120,8 @@ int main (int argc, char ** argv)
RealD M5 =1.8; RealD M5 =1.8;
RealD NP = UGrid->_Nprocessors; RealD NP = UGrid->_Nprocessors;
RealD NN = UGrid->NodeCount();
DomainWallFermionR Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
std::cout << GridLogMessage<< "*****************************************************************" <<std::endl; std::cout << GridLogMessage<< "*****************************************************************" <<std::endl;
std::cout << GridLogMessage<< "* Kernel options --dslash-generic, --dslash-unroll, --dslash-asm" <<std::endl; std::cout << GridLogMessage<< "* Kernel options --dslash-generic, --dslash-unroll, --dslash-asm" <<std::endl;
@ -155,22 +131,15 @@ int main (int argc, char ** argv)
std::cout << GridLogMessage<< "* Vectorising space-time by "<<vComplex::Nsimd()<<std::endl; std::cout << GridLogMessage<< "* Vectorising space-time by "<<vComplex::Nsimd()<<std::endl;
if ( sizeof(Real)==4 ) std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl; if ( sizeof(Real)==4 ) std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl;
if ( sizeof(Real)==8 ) std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl; if ( sizeof(Real)==8 ) std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl;
#ifdef GRID_OMP
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute ) std::cout << GridLogMessage<< "* Using Overlapped Comms/Compute" <<std::endl;
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsThenCompute) std::cout << GridLogMessage<< "* Using sequential comms compute" <<std::endl;
#endif
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl; if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl;
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using Nc=3 WilsonKernels" <<std::endl; if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using Nc=3 WilsonKernels" <<std::endl;
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3 WilsonKernels" <<std::endl; if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3 WilsonKernels" <<std::endl;
std::cout << GridLogMessage<< "*****************************************************************" <<std::endl; std::cout << GridLogMessage<< "*****************************************************************" <<std::endl;
DomainWallFermionR Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5); int ncall =100;
int ncall =1000;
if (1) { if (1) {
FGrid->Barrier(); FGrid->Barrier();
Dw.ZeroCounters(); Dw.ZeroCounters();
Dw.Dhop(src,result,0);
std::cout<<GridLogMessage<<"Called warmup"<<std::endl;
double t0=usecond(); double t0=usecond();
for(int i=0;i<ncall;i++){ for(int i=0;i<ncall;i++){
__SSC_START; __SSC_START;
@ -184,55 +153,16 @@ int main (int argc, char ** argv)
double flops=1344*volume*ncall; double flops=1344*volume*ncall;
std::cout<<GridLogMessage << "Called Dw "<<ncall<<" times in "<<t1-t0<<" us"<<std::endl; std::cout<<GridLogMessage << "Called Dw "<<ncall<<" times in "<<t1-t0<<" us"<<std::endl;
// std::cout<<GridLogMessage << "norm result "<< norm2(result)<<std::endl; std::cout<<GridLogMessage << "norm result "<< norm2(result)<<std::endl;
// std::cout<<GridLogMessage << "norm ref "<< norm2(ref)<<std::endl; std::cout<<GridLogMessage << "norm ref "<< norm2(ref)<<std::endl;
std::cout<<GridLogMessage << "mflop/s = "<< flops/(t1-t0)<<std::endl; std::cout<<GridLogMessage << "mflop/s = "<< flops/(t1-t0)<<std::endl;
std::cout<<GridLogMessage << "mflop/s per rank = "<< flops/(t1-t0)/NP<<std::endl; std::cout<<GridLogMessage << "mflop/s per rank = "<< flops/(t1-t0)/NP<<std::endl;
std::cout<<GridLogMessage << "mflop/s per node = "<< flops/(t1-t0)/NN<<std::endl;
err = ref-result; err = ref-result;
std::cout<<GridLogMessage << "norm diff "<< norm2(err)<<std::endl; std::cout<<GridLogMessage << "norm diff "<< norm2(err)<<std::endl;
/*
if(( norm2(err)>1.0e-4) ) {
std::cout << "RESULT\n " << result<<std::endl;
std::cout << "REF \n " << ref <<std::endl;
std::cout << "ERR \n " << err <<std::endl;
FGrid->Barrier();
exit(-1);
}
*/
assert (norm2(err)< 1.0e-4 ); assert (norm2(err)< 1.0e-4 );
Dw.Report(); Dw.Report();
} }
DomainWallFermionRL DwH(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
if (1) {
FGrid->Barrier();
DwH.ZeroCounters();
DwH.Dhop(src,result,0);
double t0=usecond();
for(int i=0;i<ncall;i++){
__SSC_START;
DwH.Dhop(src,result,0);
__SSC_STOP;
}
double t1=usecond();
FGrid->Barrier();
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
double flops=1344*volume*ncall;
std::cout<<GridLogMessage << "Called half prec comms Dw "<<ncall<<" times in "<<t1-t0<<" us"<<std::endl;
std::cout<<GridLogMessage << "mflop/s = "<< flops/(t1-t0)<<std::endl;
std::cout<<GridLogMessage << "mflop/s per rank = "<< flops/(t1-t0)/NP<<std::endl;
std::cout<<GridLogMessage << "mflop/s per node = "<< flops/(t1-t0)/NN<<std::endl;
err = ref-result;
std::cout<<GridLogMessage << "norm diff "<< norm2(err)<<std::endl;
assert (norm2(err)< 1.0e-3 );
DwH.Report();
}
if (1) if (1)
{ {
@ -241,10 +171,6 @@ int main (int argc, char ** argv)
std::cout << GridLogMessage<< "* Vectorising fifth dimension by "<<vComplex::Nsimd()<<std::endl; std::cout << GridLogMessage<< "* Vectorising fifth dimension by "<<vComplex::Nsimd()<<std::endl;
if ( sizeof(Real)==4 ) std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl; if ( sizeof(Real)==4 ) std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl;
if ( sizeof(Real)==8 ) std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl; if ( sizeof(Real)==8 ) std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl;
#ifdef GRID_OMP
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute ) std::cout << GridLogMessage<< "* Using Overlapped Comms/Compute" <<std::endl;
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsThenCompute) std::cout << GridLogMessage<< "* Using sequential comms compute" <<std::endl;
#endif
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl; if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl;
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using Nc=3 WilsonKernels" <<std::endl; if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using Nc=3 WilsonKernels" <<std::endl;
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3 WilsonKernels" <<std::endl; if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3 WilsonKernels" <<std::endl;
@ -256,13 +182,21 @@ int main (int argc, char ** argv)
LatticeFermion sresult(sFGrid); LatticeFermion sresult(sFGrid);
WilsonFermion5DR sDw(Umu,*sFGrid,*sFrbGrid,*sUGrid,*sUrbGrid,M5); WilsonFermion5DR sDw(Umu,*sFGrid,*sFrbGrid,*sUGrid,*sUrbGrid,M5);
localConvert(src,ssrc); for(int x=0;x<latt4[0];x++){
for(int y=0;y<latt4[1];y++){
for(int z=0;z<latt4[2];z++){
for(int t=0;t<latt4[3];t++){
for(int s=0;s<Ls;s++){
std::vector<int> site({s,x,y,z,t});
SpinColourVector tmp;
peekSite(tmp,src,site);
pokeSite(tmp,ssrc,site);
}}}}}
std::cout<<GridLogMessage<< "src norms "<< norm2(src)<<" " <<norm2(ssrc)<<std::endl; std::cout<<GridLogMessage<< "src norms "<< norm2(src)<<" " <<norm2(ssrc)<<std::endl;
FGrid->Barrier(); FGrid->Barrier();
sDw.Dhop(ssrc,sresult,0);
sDw.ZeroCounters();
double t0=usecond(); double t0=usecond();
sDw.ZeroCounters();
for(int i=0;i<ncall;i++){ for(int i=0;i<ncall;i++){
__SSC_START; __SSC_START;
sDw.Dhop(ssrc,sresult,0); sDw.Dhop(ssrc,sresult,0);
@ -276,52 +210,46 @@ int main (int argc, char ** argv)
std::cout<<GridLogMessage << "Called Dw s_inner "<<ncall<<" times in "<<t1-t0<<" us"<<std::endl; std::cout<<GridLogMessage << "Called Dw s_inner "<<ncall<<" times in "<<t1-t0<<" us"<<std::endl;
std::cout<<GridLogMessage << "mflop/s = "<< flops/(t1-t0)<<std::endl; std::cout<<GridLogMessage << "mflop/s = "<< flops/(t1-t0)<<std::endl;
std::cout<<GridLogMessage << "mflop/s per rank = "<< flops/(t1-t0)/NP<<std::endl; std::cout<<GridLogMessage << "mflop/s per rank = "<< flops/(t1-t0)/NP<<std::endl;
std::cout<<GridLogMessage << "mflop/s per node = "<< flops/(t1-t0)/NN<<std::endl;
// std::cout<<GridLogMessage<< "res norms "<< norm2(result)<<" " <<norm2(sresult)<<std::endl;
sDw.Report(); sDw.Report();
if(0){
for(int i=0;i< PerformanceCounter::NumTypes(); i++ ){
sDw.Dhop(ssrc,sresult,0);
PerformanceCounter Counter(i);
Counter.Start();
sDw.Dhop(ssrc,sresult,0);
Counter.Stop();
Counter.Report();
}
}
std::cout<<GridLogMessage<< "res norms "<< norm2(result)<<" " <<norm2(sresult)<<std::endl;
RealD sum=0; RealD sum=0;
for(int x=0;x<latt4[0];x++){
for(int y=0;y<latt4[1];y++){
for(int z=0;z<latt4[2];z++){
for(int t=0;t<latt4[3];t++){
for(int s=0;s<Ls;s++){
std::vector<int> site({s,x,y,z,t});
SpinColourVector normal, simd;
peekSite(normal,result,site);
peekSite(simd,sresult,site);
sum=sum+norm2(normal-simd);
if (norm2(normal-simd) > 1.0e-6 ) {
std::cout << "site "<<x<<","<<y<<","<<z<<","<<t<<","<<s<<" "<<norm2(normal-simd)<<std::endl;
std::cout << "site "<<x<<","<<y<<","<<z<<","<<t<<","<<s<<" normal "<<normal<<std::endl;
std::cout << "site "<<x<<","<<y<<","<<z<<","<<t<<","<<s<<" simd "<<simd<<std::endl;
}
}}}}}
std::cout<<GridLogMessage<<" difference between normal and simd is "<<sum<<std::endl;
assert (sum< 1.0e-4 );
err=zero;
localConvert(sresult,err);
err = err - ref;
sum = norm2(err);
std::cout<<GridLogMessage<<" difference between normal ref and simd is "<<sum<<std::endl;
if(sum > 1.0e-4 ){
std::cout<< "sD REF\n " <<ref << std::endl;
std::cout<< "sD ERR \n " <<err <<std::endl;
}
// assert(sum < 1.0e-4);
err=zero; if (1) {
localConvert(sresult,err);
err = err - result;
sum = norm2(err);
std::cout<<GridLogMessage<<" difference between normal result and simd is "<<sum<<std::endl;
if(sum > 1.0e-4 ){
std::cout<< "sD REF\n " <<result << std::endl;
std::cout<< "sD ERR \n " << err <<std::endl;
}
assert(sum < 1.0e-4);
if(1){
std::cout << GridLogMessage<< "*********************************************************" <<std::endl;
std::cout << GridLogMessage<< "* Benchmarking WilsonFermion5D<DomainWallVec5dImplR>::DhopEO "<<std::endl;
std::cout << GridLogMessage<< "* Vectorising fifth dimension by "<<vComplex::Nsimd()<<std::endl;
if ( sizeof(Real)==4 ) std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl;
if ( sizeof(Real)==8 ) std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl;
#ifdef GRID_OMP
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute ) std::cout << GridLogMessage<< "* Using Overlapped Comms/Compute" <<std::endl;
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsThenCompute) std::cout << GridLogMessage<< "* Using sequential comms compute" <<std::endl;
#endif
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric )
std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl;
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll)
std::cout << GridLogMessage<< "* Using Nc=3 WilsonKernels" <<std::endl;
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm )
std::cout << GridLogMessage<< "* Using Asm Nc=3 WilsonKernels" <<std::endl;
std::cout << GridLogMessage<< "*********************************************************" <<std::endl;
LatticeFermion sr_eo(sFGrid); LatticeFermion sr_eo(sFGrid);
LatticeFermion ssrc_e (sFrbGrid); LatticeFermion ssrc_e (sFrbGrid);
LatticeFermion ssrc_o (sFrbGrid); LatticeFermion ssrc_o (sFrbGrid);
LatticeFermion sr_e (sFrbGrid); LatticeFermion sr_e (sFrbGrid);
@ -329,30 +257,39 @@ int main (int argc, char ** argv)
pickCheckerboard(Even,ssrc_e,ssrc); pickCheckerboard(Even,ssrc_e,ssrc);
pickCheckerboard(Odd,ssrc_o,ssrc); pickCheckerboard(Odd,ssrc_o,ssrc);
// setCheckerboard(sr_eo,ssrc_o);
// setCheckerboard(sr_eo,ssrc_e); setCheckerboard(sr_eo,ssrc_o);
setCheckerboard(sr_eo,ssrc_e);
sr_e = zero; sr_e = zero;
sr_o = zero; sr_o = zero;
std::cout << GridLogMessage<< "*********************************************************" <<std::endl;
std::cout << GridLogMessage<< "* Benchmarking WilsonFermion5D<DomainWallVec5dImplR>::DhopEO "<<std::endl;
std::cout << GridLogMessage<< "* Vectorising fifth dimension by "<<vComplex::Nsimd()<<std::endl;
if ( sizeof(Real)==4 ) std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl;
if ( sizeof(Real)==8 ) std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl;
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl;
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using Nc=3 WilsonKernels" <<std::endl;
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3 WilsonKernels" <<std::endl;
std::cout << GridLogMessage<< "*********************************************************" <<std::endl;
FGrid->Barrier(); FGrid->Barrier();
sDw.DhopEO(ssrc_o, sr_e, DaggerNo);
sDw.ZeroCounters(); sDw.ZeroCounters();
// sDw.stat.init("DhopEO"); sDw.stat.init("DhopEO");
double t0=usecond(); double t0=usecond();
for (int i = 0; i < ncall; i++) { for (int i = 0; i < ncall; i++) {
sDw.DhopEO(ssrc_o, sr_e, DaggerNo); sDw.DhopEO(ssrc_o, sr_e, DaggerNo);
} }
double t1=usecond(); double t1=usecond();
FGrid->Barrier(); FGrid->Barrier();
// sDw.stat.print(); sDw.stat.print();
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu]; double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
double flops=(1344.0*volume*ncall)/2; double flops=(1344.0*volume*ncall)/2;
std::cout<<GridLogMessage << "sDeo mflop/s = "<< flops/(t1-t0)<<std::endl; std::cout<<GridLogMessage << "sDeo mflop/s = "<< flops/(t1-t0)<<std::endl;
std::cout<<GridLogMessage << "sDeo mflop/s per rank "<< flops/(t1-t0)/NP<<std::endl; std::cout<<GridLogMessage << "sDeo mflop/s per rank "<< flops/(t1-t0)/NP<<std::endl;
std::cout<<GridLogMessage << "sDeo mflop/s per node "<< flops/(t1-t0)/NN<<std::endl;
sDw.Report(); sDw.Report();
sDw.DhopEO(ssrc_o,sr_e,DaggerNo); sDw.DhopEO(ssrc_o,sr_e,DaggerNo);
@ -361,26 +298,22 @@ int main (int argc, char ** argv)
pickCheckerboard(Even,ssrc_e,sresult); pickCheckerboard(Even,ssrc_e,sresult);
pickCheckerboard(Odd ,ssrc_o,sresult); pickCheckerboard(Odd ,ssrc_o,sresult);
ssrc_e = ssrc_e - sr_e; ssrc_e = ssrc_e - sr_e;
RealD error = norm2(ssrc_e); RealD error = norm2(ssrc_e);
std::cout<<GridLogMessage << "sE norm diff "<< norm2(ssrc_e)<< " vec nrm"<<norm2(sr_e) <<std::endl;
std::cout<<GridLogMessage << "sE norm diff "<< norm2(ssrc_e)<< " vec nrm"<<norm2(sr_e) <<std::endl;
ssrc_o = ssrc_o - sr_o; ssrc_o = ssrc_o - sr_o;
error+= norm2(ssrc_o); error+= norm2(ssrc_o);
std::cout<<GridLogMessage << "sO norm diff "<< norm2(ssrc_o)<< " vec nrm"<<norm2(sr_o) <<std::endl; std::cout<<GridLogMessage << "sO norm diff "<< norm2(ssrc_o)<< " vec nrm"<<norm2(sr_o) <<std::endl;
if(error>1.0e-4) {
if(( error>1.0e-4) ) {
setCheckerboard(ssrc,ssrc_o); setCheckerboard(ssrc,ssrc_o);
setCheckerboard(ssrc,ssrc_e); setCheckerboard(ssrc,ssrc_e);
std::cout<< "DIFF\n " <<ssrc << std::endl; std::cout<< ssrc << std::endl;
setCheckerboard(ssrc,sr_o);
setCheckerboard(ssrc,sr_e);
std::cout<< "CBRESULT\n " <<ssrc << std::endl;
std::cout<< "RESULT\n " <<sresult<< std::endl;
} }
assert(error<1.0e-4);
} }
} }
if (1) if (1)
@ -391,30 +324,25 @@ int main (int argc, char ** argv)
// ref = src - Gamma(Gamma::Algebra::GammaX)* src ; // 1+gamma_x // ref = src - Gamma(Gamma::Algebra::GammaX)* src ; // 1+gamma_x
tmp = U[mu]*Cshift(src,mu+1,1); tmp = U[mu]*Cshift(src,mu+1,1);
for(int i=0;i<ref._odata.size();i++){ for(int i=0;i<ref._odata.size();i++){
ref._odata[i]+= tmp._odata[i] + Gamma(Gmu[mu])*tmp._odata[i]; ; ref._odata[i]+= tmp._odata[i] + Gamma(Gmu[mu])*tmp._odata[i]; ;
} }
tmp =adj(U[mu])*src; tmp =adj(U[mu])*src;
tmp =Cshift(tmp,mu+1,-1); tmp =Cshift(tmp,mu+1,-1);
for(int i=0;i<ref._odata.size();i++){ for(int i=0;i<ref._odata.size();i++){
ref._odata[i]+= tmp._odata[i] - Gamma(Gmu[mu])*tmp._odata[i]; ; ref._odata[i]+= tmp._odata[i] - Gamma(Gmu[mu])*tmp._odata[i]; ;
} }
} }
ref = -0.5*ref; ref = -0.5*ref;
} }
// dump=1;
Dw.Dhop(src,result,1); Dw.Dhop(src,result,1);
std::cout << GridLogMessage << "Compare to naive wilson implementation Dag to verify correctness" << std::endl; std::cout << GridLogMessage << "Compare to naive wilson implementation Dag to verify correctness" << std::endl;
std::cout<<GridLogMessage << "Called DwDag"<<std::endl; std::cout<<GridLogMessage << "Called DwDag"<<std::endl;
std::cout<<GridLogMessage << "norm dag result "<< norm2(result)<<std::endl; std::cout<<GridLogMessage << "norm result "<< norm2(result)<<std::endl;
std::cout<<GridLogMessage << "norm dag ref "<< norm2(ref)<<std::endl; std::cout<<GridLogMessage << "norm ref "<< norm2(ref)<<std::endl;
err = ref-result; err = ref-result;
std::cout<<GridLogMessage << "norm dag diff "<< norm2(err)<<std::endl; std::cout<<GridLogMessage << "norm diff "<< norm2(err)<<std::endl;
if((norm2(err)>1.0e-4)){ assert(norm2(err)<1.0e-4);
std::cout<< "DAG RESULT\n " <<ref << std::endl;
std::cout<< "DAG sRESULT\n " <<result << std::endl;
std::cout<< "DAG ERR \n " << err <<std::endl;
}
LatticeFermion src_e (FrbGrid); LatticeFermion src_e (FrbGrid);
LatticeFermion src_o (FrbGrid); LatticeFermion src_o (FrbGrid);
LatticeFermion r_e (FrbGrid); LatticeFermion r_e (FrbGrid);
@ -422,24 +350,18 @@ int main (int argc, char ** argv)
LatticeFermion r_eo (FGrid); LatticeFermion r_eo (FGrid);
std::cout<<GridLogMessage << "Calling Deo and Doe and //assert Deo+Doe == Dunprec"<<std::endl; std::cout<<GridLogMessage << "Calling Deo and Doe and assert Deo+Doe == Dunprec"<<std::endl;
pickCheckerboard(Even,src_e,src); pickCheckerboard(Even,src_e,src);
pickCheckerboard(Odd,src_o,src); pickCheckerboard(Odd,src_o,src);
std::cout<<GridLogMessage << "src_e"<<norm2(src_e)<<std::endl; std::cout<<GridLogMessage << "src_e"<<norm2(src_e)<<std::endl;
std::cout<<GridLogMessage << "src_o"<<norm2(src_o)<<std::endl; std::cout<<GridLogMessage << "src_o"<<norm2(src_o)<<std::endl;
// S-direction is INNERMOST and takes no part in the parity.
std::cout << GridLogMessage<< "*********************************************************" <<std::endl; std::cout << GridLogMessage<< "*********************************************************" <<std::endl;
std::cout << GridLogMessage<< "* Benchmarking DomainWallFermionR::DhopEO "<<std::endl; std::cout << GridLogMessage<< "* Benchmarking DomainWallFermionR::DhopEO "<<std::endl;
std::cout << GridLogMessage<< "* Vectorising space-time by "<<vComplex::Nsimd()<<std::endl; std::cout << GridLogMessage<< "* Vectorising space-time by "<<vComplex::Nsimd()<<std::endl;
if ( sizeof(Real)==4 ) std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl; if ( sizeof(Real)==4 ) std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl;
if ( sizeof(Real)==8 ) std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl; if ( sizeof(Real)==8 ) std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl;
#ifdef GRID_OMP
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute ) std::cout << GridLogMessage<< "* Using Overlapped Comms/Compute" <<std::endl;
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsThenCompute) std::cout << GridLogMessage<< "* Using sequential comms compute" <<std::endl;
#endif
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl; if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl;
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using Nc=3 WilsonKernels" <<std::endl; if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using Nc=3 WilsonKernels" <<std::endl;
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3 WilsonKernels" <<std::endl; if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3 WilsonKernels" <<std::endl;
@ -447,7 +369,6 @@ int main (int argc, char ** argv)
{ {
Dw.ZeroCounters(); Dw.ZeroCounters();
FGrid->Barrier(); FGrid->Barrier();
Dw.DhopEO(src_o,r_e,DaggerNo);
double t0=usecond(); double t0=usecond();
for(int i=0;i<ncall;i++){ for(int i=0;i<ncall;i++){
Dw.DhopEO(src_o,r_e,DaggerNo); Dw.DhopEO(src_o,r_e,DaggerNo);
@ -460,7 +381,6 @@ int main (int argc, char ** argv)
std::cout<<GridLogMessage << "Deo mflop/s = "<< flops/(t1-t0)<<std::endl; std::cout<<GridLogMessage << "Deo mflop/s = "<< flops/(t1-t0)<<std::endl;
std::cout<<GridLogMessage << "Deo mflop/s per rank "<< flops/(t1-t0)/NP<<std::endl; std::cout<<GridLogMessage << "Deo mflop/s per rank "<< flops/(t1-t0)/NP<<std::endl;
std::cout<<GridLogMessage << "Deo mflop/s per node "<< flops/(t1-t0)/NN<<std::endl;
Dw.Report(); Dw.Report();
} }
Dw.DhopEO(src_o,r_e,DaggerNo); Dw.DhopEO(src_o,r_e,DaggerNo);
@ -476,20 +396,14 @@ int main (int argc, char ** argv)
err = r_eo-result; err = r_eo-result;
std::cout<<GridLogMessage << "norm diff "<< norm2(err)<<std::endl; std::cout<<GridLogMessage << "norm diff "<< norm2(err)<<std::endl;
if((norm2(err)>1.0e-4)){ assert(norm2(err)<1.0e-4);
std::cout<< "Deo RESULT\n " <<r_eo << std::endl;
std::cout<< "Deo REF\n " <<result << std::endl;
std::cout<< "Deo ERR \n " << err <<std::endl;
}
pickCheckerboard(Even,src_e,err); pickCheckerboard(Even,src_e,err);
pickCheckerboard(Odd,src_o,err); pickCheckerboard(Odd,src_o,err);
std::cout<<GridLogMessage << "norm diff even "<< norm2(src_e)<<std::endl; std::cout<<GridLogMessage << "norm diff even "<< norm2(src_e)<<std::endl;
std::cout<<GridLogMessage << "norm diff odd "<< norm2(src_o)<<std::endl; std::cout<<GridLogMessage << "norm diff odd "<< norm2(src_o)<<std::endl;
assert(norm2(src_e)<1.0e-4);
//assert(norm2(src_e)<1.0e-4); assert(norm2(src_o)<1.0e-4);
//assert(norm2(src_o)<1.0e-4);
Grid_finalize(); Grid_finalize();
} }

View File

@ -66,8 +66,7 @@ int main (int argc, char ** argv)
Vec tsum; tsum = zero; Vec tsum; tsum = zero;
GridParallelRNG pRNG(&Grid); GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice();
pRNG.SeedFixedIntegers(std::vector<int>({56,17,89,101}));
std::vector<double> stop(threads); std::vector<double> stop(threads);
Vector<Vec> sum(threads); Vector<Vec> sum(threads);
@ -78,7 +77,8 @@ int main (int argc, char ** argv)
} }
double start=usecond(); double start=usecond();
parallel_for(int t=0;t<threads;t++){ PARALLEL_FOR_LOOP
for(int t=0;t<threads;t++){
sum[t] = x[t]._odata[0]; sum[t] = x[t]._odata[0];
for(int i=0;i<Nloop;i++){ for(int i=0;i<Nloop;i++){

View File

@ -55,8 +55,8 @@ int main (int argc, char ** argv)
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s"<<"\t\t"<<"Gflop/s"<<"\t\t seconds"<<std::endl; std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s"<<"\t\t"<<"Gflop/s"<<"\t\t seconds"<<std::endl;
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl; std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
uint64_t lmax=64; uint64_t lmax=44;
#define NLOOP (100*lmax*lmax*lmax*lmax/vol) #define NLOOP (1*lmax*lmax*lmax*lmax/vol)
for(int lat=4;lat<=lmax;lat+=4){ for(int lat=4;lat<=lmax;lat+=4){
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
@ -65,7 +65,7 @@ int main (int argc, char ** argv)
uint64_t Nloop=NLOOP; uint64_t Nloop=NLOOP;
// GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}); // GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice();
LatticeVec z(&Grid); //random(pRNG,z); LatticeVec z(&Grid); //random(pRNG,z);
LatticeVec x(&Grid); //random(pRNG,x); LatticeVec x(&Grid); //random(pRNG,x);
@ -100,7 +100,7 @@ int main (int argc, char ** argv)
int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
GridCartesian Grid(latt_size,simd_layout,mpi_layout); GridCartesian Grid(latt_size,simd_layout,mpi_layout);
// GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}); // GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice();
LatticeVec z(&Grid); //random(pRNG,z); LatticeVec z(&Grid); //random(pRNG,z);
LatticeVec x(&Grid); //random(pRNG,x); LatticeVec x(&Grid); //random(pRNG,x);
@ -138,7 +138,7 @@ int main (int argc, char ** argv)
GridCartesian Grid(latt_size,simd_layout,mpi_layout); GridCartesian Grid(latt_size,simd_layout,mpi_layout);
// GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}); // GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice();
LatticeVec z(&Grid); //random(pRNG,z); LatticeVec z(&Grid); //random(pRNG,z);
LatticeVec x(&Grid); //random(pRNG,x); LatticeVec x(&Grid); //random(pRNG,x);
@ -173,7 +173,7 @@ int main (int argc, char ** argv)
uint64_t Nloop=NLOOP; uint64_t Nloop=NLOOP;
GridCartesian Grid(latt_size,simd_layout,mpi_layout); GridCartesian Grid(latt_size,simd_layout,mpi_layout);
// GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}); // GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice();
LatticeVec z(&Grid); //random(pRNG,z); LatticeVec z(&Grid); //random(pRNG,z);
LatticeVec x(&Grid); //random(pRNG,x); LatticeVec x(&Grid); //random(pRNG,x);
LatticeVec y(&Grid); //random(pRNG,y); LatticeVec y(&Grid); //random(pRNG,y);

View File

@ -1,134 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./benchmarks/Benchmark_staggered.cc
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid.h>
using namespace std;
using namespace Grid;
using namespace Grid::QCD;
int main (int argc, char ** argv)
{
Grid_init(&argc,&argv);
std::vector<int> latt_size = GridDefaultLatt();
std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
std::vector<int> mpi_layout = GridDefaultMpi();
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
GridRedBlackCartesian RBGrid(latt_size,simd_layout,mpi_layout);
int threads = GridThread::GetThreads();
std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
std::cout<<GridLogMessage << "Grid floating point word size is REALF"<< sizeof(RealF)<<std::endl;
std::cout<<GridLogMessage << "Grid floating point word size is REALD"<< sizeof(RealD)<<std::endl;
std::cout<<GridLogMessage << "Grid floating point word size is REAL"<< sizeof(Real)<<std::endl;
std::vector<int> seeds({1,2,3,4});
GridParallelRNG pRNG(&Grid);
pRNG.SeedFixedIntegers(seeds);
// pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9});
typedef typename ImprovedStaggeredFermionR::FermionField FermionField;
typename ImprovedStaggeredFermionR::ImplParams params;
FermionField src (&Grid); random(pRNG,src);
FermionField result(&Grid); result=zero;
FermionField ref(&Grid); ref=zero;
FermionField tmp(&Grid); tmp=zero;
FermionField err(&Grid); tmp=zero;
LatticeGaugeField Umu(&Grid); random(pRNG,Umu);
std::vector<LatticeColourMatrix> U(4,&Grid);
double volume=1;
for(int mu=0;mu<Nd;mu++){
volume=volume*latt_size[mu];
}
// Only one non-zero (y)
#if 0
Umu=zero;
Complex cone(1.0,0.0);
for(int nn=0;nn<Nd;nn++){
random(pRNG,U[nn]);
if(1) {
if (nn!=2) { U[nn]=zero; std::cout<<GridLogMessage << "zeroing gauge field in dir "<<nn<<std::endl; }
// else { U[nn]= cone;std::cout<<GridLogMessage << "unit gauge field in dir "<<nn<<std::endl; }
else { std::cout<<GridLogMessage << "random gauge field in dir "<<nn<<std::endl; }
}
PokeIndex<LorentzIndex>(Umu,U[nn],nn);
}
#endif
for(int mu=0;mu<Nd;mu++){
U[mu] = PeekIndex<LorentzIndex>(Umu,mu);
}
ref = zero;
/*
{ // Naive wilson implementation
ref = zero;
for(int mu=0;mu<Nd;mu++){
// ref = src + Gamma(Gamma::GammaX)* src ; // 1-gamma_x
tmp = U[mu]*Cshift(src,mu,1);
for(int i=0;i<ref._odata.size();i++){
ref._odata[i]+= tmp._odata[i] - Gamma(Gmu[mu])*tmp._odata[i]; ;
}
tmp =adj(U[mu])*src;
tmp =Cshift(tmp,mu,-1);
for(int i=0;i<ref._odata.size();i++){
ref._odata[i]+= tmp._odata[i] + Gamma(Gmu[mu])*tmp._odata[i]; ;
}
}
}
ref = -0.5*ref;
*/
RealD mass=0.1;
RealD c1=9.0/8.0;
RealD c2=-1.0/24.0;
RealD u0=1.0;
ImprovedStaggeredFermionR Ds(Umu,Umu,Grid,RBGrid,mass,c1,c2,u0,params);
std::cout<<GridLogMessage << "Calling Ds"<<std::endl;
int ncall=1000;
double t0=usecond();
for(int i=0;i<ncall;i++){
Ds.Dhop(src,result,0);
}
double t1=usecond();
double flops=(16*(3*(6+8+8)) + 15*3*2)*volume*ncall; // == 66*16 + == 1146
std::cout<<GridLogMessage << "Called Ds"<<std::endl;
std::cout<<GridLogMessage << "norm result "<< norm2(result)<<std::endl;
std::cout<<GridLogMessage << "norm ref "<< norm2(ref)<<std::endl;
std::cout<<GridLogMessage << "mflop/s = "<< flops/(t1-t0)<<std::endl;
err = ref-result;
std::cout<<GridLogMessage << "norm diff "<< norm2(err)<<std::endl;
Grid_finalize();
}

View File

@ -35,9 +35,8 @@ using namespace Grid::QCD;
int main (int argc, char ** argv) int main (int argc, char ** argv)
{ {
Grid_init(&argc,&argv); Grid_init(&argc,&argv);
#define LMAX (64)
int Nloop=20; int Nloop=1000;
std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd()); std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
std::vector<int> mpi_layout = GridDefaultMpi(); std::vector<int> mpi_layout = GridDefaultMpi();
@ -51,12 +50,12 @@ int main (int argc, char ** argv)
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl; std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl;
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl; std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
for(int lat=2;lat<=LMAX;lat+=2){ for(int lat=2;lat<=32;lat+=2){
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
GridCartesian Grid(latt_size,simd_layout,mpi_layout); GridCartesian Grid(latt_size,simd_layout,mpi_layout);
// GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}); // GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice();
LatticeColourMatrix z(&Grid);// random(pRNG,z); LatticeColourMatrix z(&Grid);// random(pRNG,z);
LatticeColourMatrix x(&Grid);// random(pRNG,x); LatticeColourMatrix x(&Grid);// random(pRNG,x);
@ -83,13 +82,13 @@ int main (int argc, char ** argv)
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl; std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl;
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl; std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
for(int lat=2;lat<=LMAX;lat+=2){ for(int lat=2;lat<=32;lat+=2){
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
GridCartesian Grid(latt_size,simd_layout,mpi_layout); GridCartesian Grid(latt_size,simd_layout,mpi_layout);
// GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}); // GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice();
LatticeColourMatrix z(&Grid); //random(pRNG,z); LatticeColourMatrix z(&Grid); //random(pRNG,z);
LatticeColourMatrix x(&Grid); //random(pRNG,x); LatticeColourMatrix x(&Grid); //random(pRNG,x);
@ -114,13 +113,13 @@ int main (int argc, char ** argv)
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl; std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl;
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl; std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
for(int lat=2;lat<=LMAX;lat+=2){ for(int lat=2;lat<=32;lat+=2){
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
GridCartesian Grid(latt_size,simd_layout,mpi_layout); GridCartesian Grid(latt_size,simd_layout,mpi_layout);
// GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}); // GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice();
LatticeColourMatrix z(&Grid); //random(pRNG,z); LatticeColourMatrix z(&Grid); //random(pRNG,z);
LatticeColourMatrix x(&Grid); //random(pRNG,x); LatticeColourMatrix x(&Grid); //random(pRNG,x);
@ -145,13 +144,13 @@ int main (int argc, char ** argv)
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl; std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl;
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl; std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
for(int lat=2;lat<=LMAX;lat+=2){ for(int lat=2;lat<=32;lat+=2){
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
GridCartesian Grid(latt_size,simd_layout,mpi_layout); GridCartesian Grid(latt_size,simd_layout,mpi_layout);
// GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}); // GridParallelRNG pRNG(&Grid); pRNG.SeedRandomDevice();
LatticeColourMatrix z(&Grid); //random(pRNG,z); LatticeColourMatrix z(&Grid); //random(pRNG,z);
LatticeColourMatrix x(&Grid); //random(pRNG,x); LatticeColourMatrix x(&Grid); //random(pRNG,x);

View File

@ -69,7 +69,7 @@ int main (int argc, char ** argv)
std::vector<int> seeds({1,2,3,4}); std::vector<int> seeds({1,2,3,4});
GridParallelRNG pRNG(&Grid); GridParallelRNG pRNG(&Grid);
pRNG.SeedFixedIntegers(seeds); pRNG.SeedFixedIntegers(seeds);
// pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}); // pRNG.SeedRandomDevice();
LatticeFermion src (&Grid); random(pRNG,src); LatticeFermion src (&Grid); random(pRNG,src);
LatticeFermion result(&Grid); result=zero; LatticeFermion result(&Grid); result=zero;

View File

@ -1,7 +1,11 @@
include Make.inc include Make.inc
bench-local: all simple: simple_su3_test.o simple_su3_expr.o simple_simd_test.o
./Benchmark_su3
./Benchmark_memory_bandwidth EXTRA_LIBRARIES = libsimple_su3_test.a libsimple_su3_expr.a libsimple_simd_test.a
./Benchmark_wilson
./Benchmark_dwf --dslash-unroll libsimple_su3_test_a_SOURCES = simple_su3_test.cc
libsimple_su3_expr_a_SOURCES = simple_su3_expr.cc
libsimple_simd_test_a_SOURCES = simple_simd_test.cc

View File

@ -1,6 +1,6 @@
#!/usr/bin/env bash #!/usr/bin/env bash
EIGEN_URL='http://bitbucket.org/eigen/eigen/get/3.3.3.tar.bz2' EIGEN_URL='http://bitbucket.org/eigen/eigen/get/3.2.9.tar.bz2'
echo "-- deploying Eigen source..." echo "-- deploying Eigen source..."
wget ${EIGEN_URL} --no-check-certificate wget ${EIGEN_URL} --no-check-certificate

View File

@ -1,19 +1,16 @@
AC_PREREQ([2.63]) AC_PREREQ([2.63])
AC_INIT([Grid], [0.7.0], [https://github.com/paboyle/Grid], [Grid]) AC_INIT([Grid], [0.6.0], [https://github.com/paboyle/Grid], [Grid])
AC_CANONICAL_BUILD AC_CANONICAL_BUILD
AC_CANONICAL_HOST AC_CANONICAL_HOST
AC_CANONICAL_TARGET AC_CANONICAL_TARGET
AM_INIT_AUTOMAKE([subdir-objects 1.13]) AM_INIT_AUTOMAKE(subdir-objects)
AM_EXTRA_RECURSIVE_TARGETS([tests bench])
AC_CONFIG_MACRO_DIR([m4]) AC_CONFIG_MACRO_DIR([m4])
AC_CONFIG_SRCDIR([lib/Grid.h]) AC_CONFIG_SRCDIR([lib/Grid.h])
AC_CONFIG_HEADERS([lib/Config.h],[sed -i 's|PACKAGE_|GRID_|' lib/Config.h]) AC_CONFIG_HEADERS([lib/Config.h],[sed -i 's|PACKAGE_|GRID_|' lib/Config.h])
m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])]) m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])])
################ Get git info
#AC_REVISION([m4_esyscmd_s([./scripts/configure.commit])])
############### Checks for programs ############### Checks for programs
CXXFLAGS="-O3 $CXXFLAGS"
AC_PROG_CXX AC_PROG_CXX
AC_PROG_RANLIB AC_PROG_RANLIB
@ -27,15 +24,12 @@ AX_GXX_VERSION
AC_DEFINE_UNQUOTED([GXX_VERSION],["$GXX_VERSION"], AC_DEFINE_UNQUOTED([GXX_VERSION],["$GXX_VERSION"],
[version of g++ that will compile the code]) [version of g++ that will compile the code])
CXXFLAGS="-g $CXXFLAGS"
############### Checks for typedefs, structures, and compiler characteristics ############### Checks for typedefs, structures, and compiler characteristics
AC_TYPE_SIZE_T AC_TYPE_SIZE_T
AC_TYPE_UINT32_T AC_TYPE_UINT32_T
AC_TYPE_UINT64_T AC_TYPE_UINT64_T
############### OpenMP ############### OpenMP
AC_OPENMP AC_OPENMP
ac_openmp=no ac_openmp=no
if test "${OPENMP_CXXFLAGS}X" != "X"; then if test "${OPENMP_CXXFLAGS}X" != "X"; then
@ -66,23 +60,16 @@ AC_ARG_WITH([mpfr],
[AM_CXXFLAGS="-I$with_mpfr/include $AM_CXXFLAGS"] [AM_CXXFLAGS="-I$with_mpfr/include $AM_CXXFLAGS"]
[AM_LDFLAGS="-L$with_mpfr/lib $AM_LDFLAGS"]) [AM_LDFLAGS="-L$with_mpfr/lib $AM_LDFLAGS"])
############### FFTW3 ############### FFTW3
AC_ARG_WITH([fftw], AC_ARG_WITH([fftw],
[AS_HELP_STRING([--with-fftw=prefix], [AS_HELP_STRING([--with-fftw=prefix],
[try this for a non-standard install prefix of the FFTW3 library])], [try this for a non-standard install prefix of the FFTW3 library])],
[AM_CXXFLAGS="-I$with_fftw/include $AM_CXXFLAGS"] [AM_CXXFLAGS="-I$with_fftw/include $AM_CXXFLAGS"]
[AM_LDFLAGS="-L$with_fftw/lib $AM_LDFLAGS"]) [AM_LDFLAGS="-L$with_fftw/lib $AM_LDFLAGS"])
############### LIME ############### lapack
AC_ARG_WITH([lime],
[AS_HELP_STRING([--with-lime=prefix],
[try this for a non-standard install prefix of the LIME library])],
[AM_CXXFLAGS="-I$with_lime/include $AM_CXXFLAGS"]
[AM_LDFLAGS="-L$with_lime/lib $AM_LDFLAGS"])
############### lapack
AC_ARG_ENABLE([lapack], AC_ARG_ENABLE([lapack],
[AC_HELP_STRING([--enable-lapack=yes|no|prefix], [enable LAPACK])], [AC_HELP_STRING([--enable-lapack=yes|no|prefix], [enable LAPACK])],
[ac_LAPACK=${enable_lapack}], [ac_LAPACK=no]) [ac_LAPACK=${enable_lapack}], [ac_LAPACK=no])
case ${ac_LAPACK} in case ${ac_LAPACK} in
@ -96,18 +83,6 @@ case ${ac_LAPACK} in
AC_DEFINE([USE_LAPACK],[1],[use LAPACK]);; AC_DEFINE([USE_LAPACK],[1],[use LAPACK]);;
esac esac
############### FP16 conversions
AC_ARG_ENABLE([sfw-fp16],
[AC_HELP_STRING([--enable-sfw-fp16=yes|no], [enable software fp16 comms])],
[ac_SFW_FP16=${enable_sfw_fp16}], [ac_SFW_FP16=yes])
case ${ac_SFW_FP16} in
yes)
AC_DEFINE([SFW_FP16],[1],[software conversion to fp16]);;
no);;
*)
AC_MSG_ERROR(["SFW FP16 option not supported ${ac_SFW_FP16}"]);;
esac
############### MKL ############### MKL
AC_ARG_ENABLE([mkl], AC_ARG_ENABLE([mkl],
[AC_HELP_STRING([--enable-mkl=yes|no|prefix], [enable Intel MKL for LAPACK & FFTW])], [AC_HELP_STRING([--enable-mkl=yes|no|prefix], [enable Intel MKL for LAPACK & FFTW])],
@ -133,7 +108,7 @@ AC_ARG_WITH([hdf5],
############### first-touch ############### first-touch
AC_ARG_ENABLE([numa], AC_ARG_ENABLE([numa],
[AC_HELP_STRING([--enable-numa=yes|no|prefix], [enable first touch numa opt])], [AC_HELP_STRING([--enable-numa=yes|no|prefix], [enable first touch numa opt])],
[ac_NUMA=${enable_NUMA}],[ac_NUMA=no]) [ac_NUMA=${enable_NUMA}],[ac_NUMA=no])
case ${ac_NUMA} in case ${ac_NUMA} in
@ -159,8 +134,8 @@ if test "${ac_MKL}x" != "nox"; then
fi fi
AC_SEARCH_LIBS([__gmpf_init], [gmp], AC_SEARCH_LIBS([__gmpf_init], [gmp],
[AC_SEARCH_LIBS([mpfr_init], [mpfr], [AC_SEARCH_LIBS([mpfr_init], [mpfr],
[AC_DEFINE([HAVE_LIBMPFR], [1], [AC_DEFINE([HAVE_LIBMPFR], [1],
[Define to 1 if you have the `MPFR' library])] [Define to 1 if you have the `MPFR' library])]
[have_mpfr=true], [AC_MSG_ERROR([MPFR library not found])])] [have_mpfr=true], [AC_MSG_ERROR([MPFR library not found])])]
[AC_DEFINE([HAVE_LIBGMP], [1], [Define to 1 if you have the `GMP' library])] [AC_DEFINE([HAVE_LIBGMP], [1], [Define to 1 if you have the `GMP' library])]
@ -169,7 +144,7 @@ AC_SEARCH_LIBS([__gmpf_init], [gmp],
if test "${ac_LAPACK}x" != "nox"; then if test "${ac_LAPACK}x" != "nox"; then
AC_SEARCH_LIBS([LAPACKE_sbdsdc], [lapack], [], AC_SEARCH_LIBS([LAPACKE_sbdsdc], [lapack], [],
[AC_MSG_ERROR("LAPACK enabled but library not found")]) [AC_MSG_ERROR("LAPACK enabled but library not found")])
fi fi
AC_SEARCH_LIBS([fftw_execute], [fftw3], AC_SEARCH_LIBS([fftw_execute], [fftw3],
[AC_SEARCH_LIBS([fftwf_execute], [fftw3f], [], [AC_SEARCH_LIBS([fftwf_execute], [fftw3f], [],
@ -177,18 +152,6 @@ AC_SEARCH_LIBS([fftw_execute], [fftw3],
[AC_DEFINE([HAVE_FFTW], [1], [Define to 1 if you have the `FFTW' library])] [AC_DEFINE([HAVE_FFTW], [1], [Define to 1 if you have the `FFTW' library])]
[have_fftw=true]) [have_fftw=true])
AC_SEARCH_LIBS([limeCreateReader], [lime],
[AC_DEFINE([HAVE_LIME], [1], [Define to 1 if you have the `LIME' library])]
[have_lime=true],
[AC_MSG_WARN(C-LIME library was not found in your system.
In order to use ILGG file format please install or provide the correct path to your installation
Info at: http://usqcd.jlab.org/usqcd-docs/c-lime/)])
AC_SEARCH_LIBS([crc32], [z],
[AC_DEFINE([HAVE_ZLIB], [1], [Define to 1 if you have the `LIBZ' library])]
[have_zlib=true],
[AC_MSG_ERROR(zlib library was not found in your system.)])
AC_SEARCH_LIBS([H5Fopen], [hdf5_cpp], AC_SEARCH_LIBS([H5Fopen], [hdf5_cpp],
[AC_DEFINE([HAVE_HDF5], [1], [Define to 1 if you have the `HDF5' library])] [AC_DEFINE([HAVE_HDF5], [1], [Define to 1 if you have the `HDF5' library])]
[have_hdf5=true] [have_hdf5=true]
@ -213,26 +176,19 @@ case ${ax_cv_cxx_compiler_vendor} in
case ${ac_SIMD} in case ${ac_SIMD} in
SSE4) SSE4)
AC_DEFINE([SSE4],[1],[SSE4 intrinsics]) AC_DEFINE([SSE4],[1],[SSE4 intrinsics])
case ${ac_SFW_FP16} in SIMD_FLAGS='-msse4.2';;
yes)
SIMD_FLAGS='-msse4.2';;
no)
SIMD_FLAGS='-msse4.2 -mf16c';;
*)
AC_MSG_ERROR(["SFW_FP16 must be either yes or no value ${ac_SFW_FP16} "]);;
esac;;
AVX) AVX)
AC_DEFINE([AVX1],[1],[AVX intrinsics]) AC_DEFINE([AVX1],[1],[AVX intrinsics])
SIMD_FLAGS='-mavx -mf16c';; SIMD_FLAGS='-mavx';;
AVXFMA4) AVXFMA4)
AC_DEFINE([AVXFMA4],[1],[AVX intrinsics with FMA4]) AC_DEFINE([AVXFMA4],[1],[AVX intrinsics with FMA4])
SIMD_FLAGS='-mavx -mfma4 -mf16c';; SIMD_FLAGS='-mavx -mfma4';;
AVXFMA) AVXFMA)
AC_DEFINE([AVXFMA],[1],[AVX intrinsics with FMA3]) AC_DEFINE([AVXFMA],[1],[AVX intrinsics with FMA3])
SIMD_FLAGS='-mavx -mfma -mf16c';; SIMD_FLAGS='-mavx -mfma';;
AVX2) AVX2)
AC_DEFINE([AVX2],[1],[AVX2 intrinsics]) AC_DEFINE([AVX2],[1],[AVX2 intrinsics])
SIMD_FLAGS='-mavx2 -mfma -mf16c';; SIMD_FLAGS='-mavx2 -mfma';;
AVX512) AVX512)
AC_DEFINE([AVX512],[1],[AVX512 intrinsics]) AC_DEFINE([AVX512],[1],[AVX512 intrinsics])
SIMD_FLAGS='-mavx512f -mavx512pf -mavx512er -mavx512cd';; SIMD_FLAGS='-mavx512f -mavx512pf -mavx512er -mavx512cd';;
@ -341,7 +297,7 @@ case ${ac_COMMS} in
comms_type='shmem' comms_type='shmem'
;; ;;
*) *)
AC_MSG_ERROR([${ac_COMMS} unsupported --enable-comms option]); AC_MSG_ERROR([${ac_COMMS} unsupported --enable-comms option]);
;; ;;
esac esac
case ${ac_COMMS} in case ${ac_COMMS} in
@ -365,7 +321,7 @@ AM_CONDITIONAL(BUILD_COMMS_NONE, [ test "${comms_type}X" == "noneX" ])
############### RNG selection ############### RNG selection
AC_ARG_ENABLE([rng],[AC_HELP_STRING([--enable-rng=ranlux48|mt19937|sitmo],\ AC_ARG_ENABLE([rng],[AC_HELP_STRING([--enable-rng=ranlux48|mt19937|sitmo],\
[Select Random Number Generator to be used])],\ [Select Random Number Generator to be used])],\
[ac_RNG=${enable_rng}],[ac_RNG=sitmo]) [ac_RNG=${enable_rng}],[ac_RNG=ranlux48])
case ${ac_RNG} in case ${ac_RNG} in
ranlux48) ranlux48)
@ -378,7 +334,7 @@ case ${ac_RNG} in
AC_DEFINE([RNG_SITMO],[1],[RNG_SITMO] ) AC_DEFINE([RNG_SITMO],[1],[RNG_SITMO] )
;; ;;
*) *)
AC_MSG_ERROR([${ac_RNG} unsupported --enable-rng option]); AC_MSG_ERROR([${ac_RNG} unsupported --enable-rng option]);
;; ;;
esac esac
@ -395,7 +351,7 @@ case ${ac_TIMERS} in
AC_DEFINE([TIMERS_OFF],[1],[TIMERS_OFF] ) AC_DEFINE([TIMERS_OFF],[1],[TIMERS_OFF] )
;; ;;
*) *)
AC_MSG_ERROR([${ac_TIMERS} unsupported --enable-timers option]); AC_MSG_ERROR([${ac_TIMERS} unsupported --enable-timers option]);
;; ;;
esac esac
@ -407,7 +363,7 @@ case ${ac_CHROMA} in
yes|no) yes|no)
;; ;;
*) *)
AC_MSG_ERROR([${ac_CHROMA} unsupported --enable-chroma option]); AC_MSG_ERROR([${ac_CHROMA} unsupported --enable-chroma option]);
;; ;;
esac esac
@ -428,65 +384,12 @@ DX_INIT_DOXYGEN([$PACKAGE_NAME], [doxygen.cfg])
############### Ouput ############### Ouput
cwd=`pwd -P`; cd ${srcdir}; abs_srcdir=`pwd -P`; cd ${cwd} cwd=`pwd -P`; cd ${srcdir}; abs_srcdir=`pwd -P`; cd ${cwd}
GRID_CXXFLAGS="$AM_CXXFLAGS $CXXFLAGS"
GRID_LDFLAGS="$AM_LDFLAGS $LDFLAGS"
GRID_LIBS=$LIBS
GRID_SHORT_SHA=`git rev-parse --short HEAD`
GRID_SHA=`git rev-parse HEAD`
GRID_BRANCH=`git rev-parse --abbrev-ref HEAD`
AM_CXXFLAGS="-I${abs_srcdir}/include $AM_CXXFLAGS" AM_CXXFLAGS="-I${abs_srcdir}/include $AM_CXXFLAGS"
AM_CFLAGS="-I${abs_srcdir}/include $AM_CFLAGS" AM_CFLAGS="-I${abs_srcdir}/include $AM_CFLAGS"
AM_LDFLAGS="-L${cwd}/lib $AM_LDFLAGS" AM_LDFLAGS="-L${cwd}/lib $AM_LDFLAGS"
AC_SUBST([AM_CFLAGS]) AC_SUBST([AM_CFLAGS])
AC_SUBST([AM_CXXFLAGS]) AC_SUBST([AM_CXXFLAGS])
AC_SUBST([AM_LDFLAGS]) AC_SUBST([AM_LDFLAGS])
AC_SUBST([GRID_CXXFLAGS])
AC_SUBST([GRID_LDFLAGS])
AC_SUBST([GRID_LIBS])
AC_SUBST([GRID_SHA])
AC_SUBST([GRID_BRANCH])
git_commit=`cd $srcdir && ./scripts/configure.commit`
echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Summary of configuration for $PACKAGE v$VERSION
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
----- GIT VERSION -------------------------------------
$git_commit
----- PLATFORM ----------------------------------------
architecture (build) : $build_cpu
os (build) : $build_os
architecture (target) : $target_cpu
os (target) : $target_os
compiler vendor : ${ax_cv_cxx_compiler_vendor}
compiler version : ${ax_cv_gxx_version}
----- BUILD OPTIONS -----------------------------------
SIMD : ${ac_SIMD}${SIMD_GEN_WIDTH_MSG}
Threading : ${ac_openmp}
Communications type : ${comms_type}
Default precision : ${ac_PRECISION}
Software FP16 conversion : ${ac_SFW_FP16}
RNG choice : ${ac_RNG}
GMP : `if test "x$have_gmp" = xtrue; then echo yes; else echo no; fi`
LAPACK : ${ac_LAPACK}
FFTW : `if test "x$have_fftw" = xtrue; then echo yes; else echo no; fi`
LIME (ILDG support) : `if test "x$have_lime" = xtrue; then echo yes; else echo no; fi`
HDF5 : `if test "x$have_hdf5" = xtrue; then echo yes; else echo no; fi`
build DOXYGEN documentation : `if test "$DX_FLAG_doc" = '1'; then echo yes; else echo no; fi`
----- BUILD FLAGS -------------------------------------
CXXFLAGS:
`echo ${AM_CXXFLAGS} ${CXXFLAGS} | tr ' ' '\n' | sed 's/^-/ -/g'`
LDFLAGS:
`echo ${AM_LDFLAGS} ${LDFLAGS} | tr ' ' '\n' | sed 's/^-/ -/g'`
LIBS:
`echo ${LIBS} | tr ' ' '\n' | sed 's/^-/ -/g'`
-------------------------------------------------------" > grid.configure.summary
GRID_SUMMARY="`cat grid.configure.summary`"
AM_SUBST_NOTMAKE([GRID_SUMMARY])
AC_SUBST([GRID_SUMMARY])
AC_CONFIG_FILES([grid-config], [chmod +x grid-config])
AC_CONFIG_FILES(Makefile) AC_CONFIG_FILES(Makefile)
AC_CONFIG_FILES(lib/Makefile) AC_CONFIG_FILES(lib/Makefile)
AC_CONFIG_FILES(tests/Makefile) AC_CONFIG_FILES(tests/Makefile)
@ -497,15 +400,42 @@ AC_CONFIG_FILES(tests/forces/Makefile)
AC_CONFIG_FILES(tests/hadrons/Makefile) AC_CONFIG_FILES(tests/hadrons/Makefile)
AC_CONFIG_FILES(tests/hmc/Makefile) AC_CONFIG_FILES(tests/hmc/Makefile)
AC_CONFIG_FILES(tests/solver/Makefile) AC_CONFIG_FILES(tests/solver/Makefile)
AC_CONFIG_FILES(tests/smearing/Makefile)
AC_CONFIG_FILES(tests/qdpxx/Makefile) AC_CONFIG_FILES(tests/qdpxx/Makefile)
AC_CONFIG_FILES(tests/testu01/Makefile)
AC_CONFIG_FILES(benchmarks/Makefile) AC_CONFIG_FILES(benchmarks/Makefile)
AC_CONFIG_FILES(extras/Makefile) AC_CONFIG_FILES(extras/Makefile)
AC_CONFIG_FILES(extras/Hadrons/Makefile) AC_CONFIG_FILES(extras/Hadrons/Makefile)
AC_OUTPUT AC_OUTPUT
echo "" echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
cat grid.configure.summary Summary of configuration for $PACKAGE v$VERSION
echo "" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
----- PLATFORM ----------------------------------------
architecture (build) : $build_cpu
os (build) : $build_os
architecture (target) : $target_cpu
os (target) : $target_os
compiler vendor : ${ax_cv_cxx_compiler_vendor}
compiler version : ${ax_cv_gxx_version}
----- BUILD OPTIONS -----------------------------------
SIMD : ${ac_SIMD}${SIMD_GEN_WIDTH_MSG}
Threading : ${ac_openmp}
Communications type : ${comms_type}
Default precision : ${ac_PRECISION}
RNG choice : ${ac_RNG}
GMP : `if test "x$have_gmp" = xtrue; then echo yes; else echo no; fi`
LAPACK : ${ac_LAPACK}
FFTW : `if test "x$have_fftw" = xtrue; then echo yes; else echo no; fi`
HDF5 : `if test "x$have_hdf5" = xtrue; then echo yes; else echo no; fi`
build DOXYGEN documentation : `if test "$DX_FLAG_doc" = '1'; then echo yes; else echo no; fi`
----- BUILD FLAGS -------------------------------------
CXXFLAGS:
`echo ${AM_CXXFLAGS} ${CXXFLAGS} | tr ' ' '\n' | sed 's/^-/ -/g'`
LDFLAGS:
`echo ${AM_LDFLAGS} ${LDFLAGS} | tr ' ' '\n' | sed 's/^-/ -/g'`
LIBS:
`echo ${LIBS} | tr ' ' '\n' | sed 's/^-/ -/g'`
-------------------------------------------------------" > config.summary
echo ""
cat config.summary
echo ""

View File

@ -162,8 +162,7 @@ void Application::saveParameterFile(const std::string parameterFileName)
sizeString((size)*locVol_) << " (" << sizeString(size) << "/site)" sizeString((size)*locVol_) << " (" << sizeString(size) << "/site)"
#define DEFINE_MEMPEAK \ #define DEFINE_MEMPEAK \
GeneticScheduler<unsigned int>::ObjFunc memPeak = \ auto memPeak = [this](const std::vector<unsigned int> &program)\
[this](const std::vector<unsigned int> &program)\
{\ {\
unsigned int memPeak;\ unsigned int memPeak;\
bool msg;\ bool msg;\

View File

@ -145,15 +145,6 @@ std::string typeName(void)
return typeName(typeIdPt<T>()); return typeName(typeIdPt<T>());
} }
// default writers/readers
#ifdef HAVE_HDF5
typedef Hdf5Reader CorrReader;
typedef Hdf5Writer CorrWriter;
#else
typedef XmlReader CorrReader;
typedef XmlWriter CorrWriter;
#endif
END_HADRONS_NAMESPACE END_HADRONS_NAMESPACE
#endif // Hadrons_Global_hpp_ #endif // Hadrons_Global_hpp_

View File

@ -29,20 +29,12 @@ See the full license in the file "LICENSE" in the top level distribution directo
#include <Grid/Hadrons/Modules/MAction/DWF.hpp> #include <Grid/Hadrons/Modules/MAction/DWF.hpp>
#include <Grid/Hadrons/Modules/MAction/Wilson.hpp> #include <Grid/Hadrons/Modules/MAction/Wilson.hpp>
#include <Grid/Hadrons/Modules/MContraction/Baryon.hpp> #include <Grid/Hadrons/Modules/MContraction/Baryon.hpp>
#include <Grid/Hadrons/Modules/MContraction/DiscLoop.hpp>
#include <Grid/Hadrons/Modules/MContraction/Gamma3pt.hpp>
#include <Grid/Hadrons/Modules/MContraction/Meson.hpp> #include <Grid/Hadrons/Modules/MContraction/Meson.hpp>
#include <Grid/Hadrons/Modules/MContraction/WeakHamiltonian.hpp>
#include <Grid/Hadrons/Modules/MContraction/WeakHamiltonianEye.hpp>
#include <Grid/Hadrons/Modules/MContraction/WeakHamiltonianNonEye.hpp>
#include <Grid/Hadrons/Modules/MContraction/WeakNeutral4ptDisc.hpp>
#include <Grid/Hadrons/Modules/MGauge/Load.hpp> #include <Grid/Hadrons/Modules/MGauge/Load.hpp>
#include <Grid/Hadrons/Modules/MGauge/Random.hpp> #include <Grid/Hadrons/Modules/MGauge/Random.hpp>
#include <Grid/Hadrons/Modules/MGauge/Unit.hpp> #include <Grid/Hadrons/Modules/MGauge/Unit.hpp>
#include <Grid/Hadrons/Modules/MLoop/NoiseLoop.hpp>
#include <Grid/Hadrons/Modules/MSolver/RBPrecCG.hpp> #include <Grid/Hadrons/Modules/MSolver/RBPrecCG.hpp>
#include <Grid/Hadrons/Modules/MSource/Point.hpp> #include <Grid/Hadrons/Modules/MSource/Point.hpp>
#include <Grid/Hadrons/Modules/MSource/SeqGamma.hpp> #include <Grid/Hadrons/Modules/MSource/SeqGamma.hpp>
#include <Grid/Hadrons/Modules/MSource/Wall.hpp>
#include <Grid/Hadrons/Modules/MSource/Z2.hpp> #include <Grid/Hadrons/Modules/MSource/Z2.hpp>
#include <Grid/Hadrons/Modules/Quark.hpp> #include <Grid/Hadrons/Modules/Quark.hpp>

View File

@ -48,8 +48,7 @@ public:
std::string, gauge, std::string, gauge,
unsigned int, Ls, unsigned int, Ls,
double , mass, double , mass,
double , M5, double , M5);
std::string , boundary);
}; };
template <typename FImpl> template <typename FImpl>
@ -117,19 +116,14 @@ void TDWF<FImpl>::execute(void)
<< par().mass << ", M5= " << par().M5 << " and Ls= " << par().mass << ", M5= " << par().M5 << " and Ls= "
<< par().Ls << " using gauge field '" << par().gauge << "'" << par().Ls << " using gauge field '" << par().gauge << "'"
<< std::endl; << std::endl;
LOG(Message) << "Fermion boundary conditions: " << par().boundary
<< std::endl;
env().createGrid(par().Ls); env().createGrid(par().Ls);
auto &U = *env().template getObject<LatticeGaugeField>(par().gauge); auto &U = *env().template getObject<LatticeGaugeField>(par().gauge);
auto &g4 = *env().getGrid(); auto &g4 = *env().getGrid();
auto &grb4 = *env().getRbGrid(); auto &grb4 = *env().getRbGrid();
auto &g5 = *env().getGrid(par().Ls); auto &g5 = *env().getGrid(par().Ls);
auto &grb5 = *env().getRbGrid(par().Ls); auto &grb5 = *env().getRbGrid(par().Ls);
std::vector<Complex> boundary = strToVec<Complex>(par().boundary);
typename DomainWallFermion<FImpl>::ImplParams implParams(boundary);
FMat *fMatPt = new DomainWallFermion<FImpl>(U, g5, grb5, g4, grb4, FMat *fMatPt = new DomainWallFermion<FImpl>(U, g5, grb5, g4, grb4,
par().mass, par().M5, par().mass, par().M5);
implParams);
env().setObject(getName(), fMatPt); env().setObject(getName(), fMatPt);
} }

View File

@ -46,8 +46,7 @@ class WilsonPar: Serializable
public: public:
GRID_SERIALIZABLE_CLASS_MEMBERS(WilsonPar, GRID_SERIALIZABLE_CLASS_MEMBERS(WilsonPar,
std::string, gauge, std::string, gauge,
double , mass, double , mass);
std::string, boundary);
}; };
template <typename FImpl> template <typename FImpl>
@ -113,15 +112,10 @@ void TWilson<FImpl>::execute()
{ {
LOG(Message) << "Setting up TWilson fermion matrix with m= " << par().mass LOG(Message) << "Setting up TWilson fermion matrix with m= " << par().mass
<< " using gauge field '" << par().gauge << "'" << std::endl; << " using gauge field '" << par().gauge << "'" << std::endl;
LOG(Message) << "Fermion boundary conditions: " << par().boundary
<< std::endl;
auto &U = *env().template getObject<LatticeGaugeField>(par().gauge); auto &U = *env().template getObject<LatticeGaugeField>(par().gauge);
auto &grid = *env().getGrid(); auto &grid = *env().getGrid();
auto &gridRb = *env().getRbGrid(); auto &gridRb = *env().getRbGrid();
std::vector<Complex> boundary = strToVec<Complex>(par().boundary); FMat *fMatPt = new WilsonFermion<FImpl>(U, grid, gridRb, par().mass);
typename WilsonFermion<FImpl>::ImplParams implParams(boundary);
FMat *fMatPt = new WilsonFermion<FImpl>(U, grid, gridRb, par().mass,
implParams);
env().setObject(getName(), fMatPt); env().setObject(getName(), fMatPt);
} }

View File

@ -112,7 +112,7 @@ void TBaryon<FImpl1, FImpl2, FImpl3>::execute(void)
<< " quarks '" << par().q1 << "', '" << par().q2 << "', and '" << " quarks '" << par().q1 << "', '" << par().q2 << "', and '"
<< par().q3 << "'" << std::endl; << par().q3 << "'" << std::endl;
CorrWriter writer(par().output); XmlWriter writer(par().output);
PropagatorField1 &q1 = *env().template getObject<PropagatorField1>(par().q1); PropagatorField1 &q1 = *env().template getObject<PropagatorField1>(par().q1);
PropagatorField2 &q2 = *env().template getObject<PropagatorField2>(par().q2); PropagatorField2 &q2 = *env().template getObject<PropagatorField2>(par().q2);
PropagatorField3 &q3 = *env().template getObject<PropagatorField3>(par().q2); PropagatorField3 &q3 = *env().template getObject<PropagatorField3>(par().q2);

View File

@ -1,144 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Modules/MContraction/DiscLoop.hpp
Copyright (C) 2017
Author: Andrew Lawson <andrew.lawson1991@gmail.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef Hadrons_DiscLoop_hpp_
#define Hadrons_DiscLoop_hpp_
#include <Grid/Hadrons/Global.hpp>
#include <Grid/Hadrons/Module.hpp>
#include <Grid/Hadrons/ModuleFactory.hpp>
BEGIN_HADRONS_NAMESPACE
/******************************************************************************
* DiscLoop *
******************************************************************************/
BEGIN_MODULE_NAMESPACE(MContraction)
class DiscLoopPar: Serializable
{
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(DiscLoopPar,
std::string, q_loop,
Gamma::Algebra, gamma,
std::string, output);
};
template <typename FImpl>
class TDiscLoop: public Module<DiscLoopPar>
{
TYPE_ALIASES(FImpl,);
class Result: Serializable
{
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(Result,
Gamma::Algebra, gamma,
std::vector<Complex>, corr);
};
public:
// constructor
TDiscLoop(const std::string name);
// destructor
virtual ~TDiscLoop(void) = default;
// dependency relation
virtual std::vector<std::string> getInput(void);
virtual std::vector<std::string> getOutput(void);
// setup
virtual void setup(void);
// execution
virtual void execute(void);
};
MODULE_REGISTER_NS(DiscLoop, TDiscLoop<FIMPL>, MContraction);
/******************************************************************************
* TDiscLoop implementation *
******************************************************************************/
// constructor /////////////////////////////////////////////////////////////////
template <typename FImpl>
TDiscLoop<FImpl>::TDiscLoop(const std::string name)
: Module<DiscLoopPar>(name)
{}
// dependencies/products ///////////////////////////////////////////////////////
template <typename FImpl>
std::vector<std::string> TDiscLoop<FImpl>::getInput(void)
{
std::vector<std::string> in = {par().q_loop};
return in;
}
template <typename FImpl>
std::vector<std::string> TDiscLoop<FImpl>::getOutput(void)
{
std::vector<std::string> out = {getName()};
return out;
}
// setup ///////////////////////////////////////////////////////////////////////
template <typename FImpl>
void TDiscLoop<FImpl>::setup(void)
{
}
// execution ///////////////////////////////////////////////////////////////////
template <typename FImpl>
void TDiscLoop<FImpl>::execute(void)
{
LOG(Message) << "Computing disconnected loop contraction '" << getName()
<< "' using '" << par().q_loop << "' with " << par().gamma
<< " insertion." << std::endl;
CorrWriter writer(par().output);
PropagatorField &q_loop = *env().template getObject<PropagatorField>(par().q_loop);
LatticeComplex c(env().getGrid());
Gamma gamma(par().gamma);
std::vector<TComplex> buf;
Result result;
c = trace(gamma*q_loop);
sliceSum(c, buf, Tp);
result.gamma = par().gamma;
result.corr.resize(buf.size());
for (unsigned int t = 0; t < buf.size(); ++t)
{
result.corr[t] = TensorRemove(buf[t]);
}
write(writer, "disc", result);
}
END_MODULE_NAMESPACE
END_HADRONS_NAMESPACE
#endif // Hadrons_DiscLoop_hpp_

View File

@ -1,170 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Modules/MContraction/Gamma3pt.hpp
Copyright (C) 2017
Author: Andrew Lawson <andrew.lawson1991@gmail.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef Hadrons_Gamma3pt_hpp_
#define Hadrons_Gamma3pt_hpp_
#include <Grid/Hadrons/Global.hpp>
#include <Grid/Hadrons/Module.hpp>
#include <Grid/Hadrons/ModuleFactory.hpp>
BEGIN_HADRONS_NAMESPACE
/*
* 3pt contraction with gamma matrix insertion.
*
* Schematic:
*
* q2 q3
* /----<------*------<----¬
* / gamma \
* / \
* i * * f
* \ /
* \ /
* \----------->----------/
* q1
*
* trace(g5*q1*adj(q2)*g5*gamma*q3)
*/
/******************************************************************************
* Gamma3pt *
******************************************************************************/
BEGIN_MODULE_NAMESPACE(MContraction)
class Gamma3ptPar: Serializable
{
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(Gamma3ptPar,
std::string, q1,
std::string, q2,
std::string, q3,
Gamma::Algebra, gamma,
std::string, output);
};
template <typename FImpl1, typename FImpl2, typename FImpl3>
class TGamma3pt: public Module<Gamma3ptPar>
{
TYPE_ALIASES(FImpl1, 1);
TYPE_ALIASES(FImpl2, 2);
TYPE_ALIASES(FImpl3, 3);
class Result: Serializable
{
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(Result,
Gamma::Algebra, gamma,
std::vector<Complex>, corr);
};
public:
// constructor
TGamma3pt(const std::string name);
// destructor
virtual ~TGamma3pt(void) = default;
// dependency relation
virtual std::vector<std::string> getInput(void);
virtual std::vector<std::string> getOutput(void);
// setup
virtual void setup(void);
// execution
virtual void execute(void);
};
MODULE_REGISTER_NS(Gamma3pt, ARG(TGamma3pt<FIMPL, FIMPL, FIMPL>), MContraction);
/******************************************************************************
* TGamma3pt implementation *
******************************************************************************/
// constructor /////////////////////////////////////////////////////////////////
template <typename FImpl1, typename FImpl2, typename FImpl3>
TGamma3pt<FImpl1, FImpl2, FImpl3>::TGamma3pt(const std::string name)
: Module<Gamma3ptPar>(name)
{}
// dependencies/products ///////////////////////////////////////////////////////
template <typename FImpl1, typename FImpl2, typename FImpl3>
std::vector<std::string> TGamma3pt<FImpl1, FImpl2, FImpl3>::getInput(void)
{
std::vector<std::string> in = {par().q1, par().q2, par().q3};
return in;
}
template <typename FImpl1, typename FImpl2, typename FImpl3>
std::vector<std::string> TGamma3pt<FImpl1, FImpl2, FImpl3>::getOutput(void)
{
std::vector<std::string> out = {getName()};
return out;
}
// setup ///////////////////////////////////////////////////////////////////////
template <typename FImpl1, typename FImpl2, typename FImpl3>
void TGamma3pt<FImpl1, FImpl2, FImpl3>::setup(void)
{
}
// execution ///////////////////////////////////////////////////////////////////
template <typename FImpl1, typename FImpl2, typename FImpl3>
void TGamma3pt<FImpl1, FImpl2, FImpl3>::execute(void)
{
LOG(Message) << "Computing 3pt contractions '" << getName() << "' using"
<< " quarks '" << par().q1 << "', '" << par().q2 << "' and '"
<< par().q3 << "', with " << par().gamma << " insertion."
<< std::endl;
CorrWriter writer(par().output);
PropagatorField1 &q1 = *env().template getObject<PropagatorField1>(par().q1);
PropagatorField2 &q2 = *env().template getObject<PropagatorField2>(par().q2);
PropagatorField3 &q3 = *env().template getObject<PropagatorField3>(par().q3);
LatticeComplex c(env().getGrid());
Gamma g5(Gamma::Algebra::Gamma5);
Gamma gamma(par().gamma);
std::vector<TComplex> buf;
Result result;
c = trace(g5*q1*adj(q2)*(g5*gamma)*q3);
sliceSum(c, buf, Tp);
result.gamma = par().gamma;
result.corr.resize(buf.size());
for (unsigned int t = 0; t < buf.size(); ++t)
{
result.corr[t] = TensorRemove(buf[t]);
}
write(writer, "gamma3pt", result);
}
END_MODULE_NAMESPACE
END_HADRONS_NAMESPACE
#endif // Hadrons_Gamma3pt_hpp_

View File

@ -6,10 +6,8 @@ Source file: extras/Hadrons/Modules/MContraction/Meson.hpp
Copyright (C) 2015 Copyright (C) 2015
Copyright (C) 2016 Copyright (C) 2016
Copyright (C) 2017
Author: Antonin Portelli <antonin.portelli@me.com> Author: Antonin Portelli <antonin.portelli@me.com>
Andrew Lawson <andrew.lawson1991@gmail.com>
This program is free software; you can redistribute it and/or modify This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@ -38,39 +36,20 @@ See the full license in the file "LICENSE" in the top level distribution directo
BEGIN_HADRONS_NAMESPACE BEGIN_HADRONS_NAMESPACE
/*
Meson contractions
-----------------------------
* options:
- q1: input propagator 1 (string)
- q2: input propagator 2 (string)
- gammas: gamma products to insert at sink & source, pairs of gamma matrices
(space-separated strings) in angled brackets (i.e. <g_sink g_src>),
in a sequence (e.g. "<Gamma5 Gamma5><Gamma5 GammaT>").
Special values: "all" - perform all possible contractions.
- mom: momentum insertion, space-separated float sequence (e.g ".1 .2 1. 0."),
given as multiples of (2*pi) / L.
*/
/****************************************************************************** /******************************************************************************
* TMeson * * TMeson *
******************************************************************************/ ******************************************************************************/
BEGIN_MODULE_NAMESPACE(MContraction) BEGIN_MODULE_NAMESPACE(MContraction)
typedef std::pair<Gamma::Algebra, Gamma::Algebra> GammaPair;
class MesonPar: Serializable class MesonPar: Serializable
{ {
public: public:
GRID_SERIALIZABLE_CLASS_MEMBERS(MesonPar, GRID_SERIALIZABLE_CLASS_MEMBERS(MesonPar,
std::string, q1, std::string, q1,
std::string, q2, std::string, q2,
std::string, gammas, std::string, output,
std::string, mom, Gamma::Algebra, gammaSource,
std::string, output); Gamma::Algebra, gammaSink);
}; };
template <typename FImpl1, typename FImpl2> template <typename FImpl1, typename FImpl2>
@ -82,10 +61,7 @@ public:
class Result: Serializable class Result: Serializable
{ {
public: public:
GRID_SERIALIZABLE_CLASS_MEMBERS(Result, GRID_SERIALIZABLE_CLASS_MEMBERS(Result, std::vector<Complex>, corr);
Gamma::Algebra, gamma_snk,
Gamma::Algebra, gamma_src,
std::vector<Complex>, corr);
}; };
public: public:
// constructor // constructor
@ -95,7 +71,6 @@ public:
// dependencies/products // dependencies/products
virtual std::vector<std::string> getInput(void); virtual std::vector<std::string> getInput(void);
virtual std::vector<std::string> getOutput(void); virtual std::vector<std::string> getOutput(void);
virtual void parseGammaString(std::vector<GammaPair> &gammaList);
// execution // execution
virtual void execute(void); virtual void execute(void);
}; };
@ -128,31 +103,6 @@ std::vector<std::string> TMeson<FImpl1, FImpl2>::getOutput(void)
return output; return output;
} }
template <typename FImpl1, typename FImpl2>
void TMeson<FImpl1, FImpl2>::parseGammaString(std::vector<GammaPair> &gammaList)
{
gammaList.clear();
// Determine gamma matrices to insert at source/sink.
if (par().gammas.compare("all") == 0)
{
// Do all contractions.
for (unsigned int i = 1; i < Gamma::nGamma; i += 2)
{
for (unsigned int j = 1; j < Gamma::nGamma; j += 2)
{
gammaList.push_back(std::make_pair((Gamma::Algebra)i,
(Gamma::Algebra)j));
}
}
}
else
{
// Parse individual contractions from input string.
gammaList = strToVec<GammaPair>(par().gammas);
}
}
// execution /////////////////////////////////////////////////////////////////// // execution ///////////////////////////////////////////////////////////////////
template <typename FImpl1, typename FImpl2> template <typename FImpl1, typename FImpl2>
void TMeson<FImpl1, FImpl2>::execute(void) void TMeson<FImpl1, FImpl2>::execute(void)
@ -161,44 +111,21 @@ void TMeson<FImpl1, FImpl2>::execute(void)
<< " quarks '" << par().q1 << "' and '" << par().q2 << "'" << " quarks '" << par().q1 << "' and '" << par().q2 << "'"
<< std::endl; << std::endl;
CorrWriter writer(par().output); XmlWriter writer(par().output);
PropagatorField1 &q1 = *env().template getObject<PropagatorField1>(par().q1); PropagatorField1 &q1 = *env().template getObject<PropagatorField1>(par().q1);
PropagatorField2 &q2 = *env().template getObject<PropagatorField2>(par().q2); PropagatorField2 &q2 = *env().template getObject<PropagatorField2>(par().q2);
LatticeComplex c(env().getGrid()); LatticeComplex c(env().getGrid());
Gamma g5(Gamma::Algebra::Gamma5); Gamma gSrc(par().gammaSource), gSnk(par().gammaSink);
std::vector<GammaPair> gammaList; Gamma g5(Gamma::Algebra::Gamma5);
std::vector<TComplex> buf; std::vector<TComplex> buf;
std::vector<Result> result; Result result;
std::vector<Real> p;
p = strToVec<Real>(par().mom);
LatticeComplex ph(env().getGrid()), coor(env().getGrid());
Complex i(0.0,1.0);
ph = zero;
for(unsigned int mu = 0; mu < env().getNd(); mu++)
{
LatticeCoordinate(coor, mu);
ph = ph + p[mu]*coor*((1./(env().getGrid()->_fdimensions[mu])));
}
ph = exp((Real)(2*M_PI)*i*ph);
parseGammaString(gammaList); c = trace(gSnk*q1*adj(gSrc)*g5*adj(q2)*g5);
sliceSum(c, buf, Tp);
result.resize(gammaList.size()); result.corr.resize(buf.size());
for (unsigned int i = 0; i < result.size(); ++i) for (unsigned int t = 0; t < buf.size(); ++t)
{ {
Gamma gSnk(gammaList[i].first); result.corr[t] = TensorRemove(buf[t]);
Gamma gSrc(gammaList[i].second);
c = trace((g5*gSnk)*q1*(adj(gSrc)*g5)*adj(q2))*ph;
sliceSum(c, buf, Tp);
result[i].gamma_snk = gammaList[i].first;
result[i].gamma_src = gammaList[i].second;
result[i].corr.resize(buf.size());
for (unsigned int t = 0; t < buf.size(); ++t)
{
result[i].corr[t] = TensorRemove(buf[t]);
}
} }
write(writer, "meson", result); write(writer, "meson", result);
} }

View File

@ -1,114 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Modules/MContraction/WeakHamiltonian.hpp
Copyright (C) 2017
Author: Andrew Lawson <andrew.lawson1991@gmail.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef Hadrons_WeakHamiltonian_hpp_
#define Hadrons_WeakHamiltonian_hpp_
#include <Grid/Hadrons/Global.hpp>
#include <Grid/Hadrons/Module.hpp>
#include <Grid/Hadrons/ModuleFactory.hpp>
BEGIN_HADRONS_NAMESPACE
/******************************************************************************
* WeakHamiltonian *
******************************************************************************/
BEGIN_MODULE_NAMESPACE(MContraction)
/*******************************************************************************
* Utilities for contractions involving the Weak Hamiltonian.
******************************************************************************/
//// Sum and store correlator.
#define MAKE_DIAG(exp, buf, res, n)\
sliceSum(exp, buf, Tp);\
res.name = (n);\
res.corr.resize(buf.size());\
for (unsigned int t = 0; t < buf.size(); ++t)\
{\
res.corr[t] = TensorRemove(buf[t]);\
}
//// Contraction of mu index: use 'mu' variable in exp.
#define SUM_MU(buf,exp)\
buf = zero;\
for (unsigned int mu = 0; mu < ndim; ++mu)\
{\
buf += exp;\
}
enum
{
i_V = 0,
i_A = 1,
n_i = 2
};
class WeakHamiltonianPar: Serializable
{
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(WeakHamiltonianPar,
std::string, q1,
std::string, q2,
std::string, q3,
std::string, q4,
std::string, output);
};
#define MAKE_WEAK_MODULE(modname)\
class T##modname: public Module<WeakHamiltonianPar>\
{\
public:\
TYPE_ALIASES(FIMPL,)\
class Result: Serializable\
{\
public:\
GRID_SERIALIZABLE_CLASS_MEMBERS(Result,\
std::string, name,\
std::vector<Complex>, corr);\
};\
public:\
/* constructor */ \
T##modname(const std::string name);\
/* destructor */ \
virtual ~T##modname(void) = default;\
/* dependency relation */ \
virtual std::vector<std::string> getInput(void);\
virtual std::vector<std::string> getOutput(void);\
/* setup */ \
virtual void setup(void);\
/* execution */ \
virtual void execute(void);\
std::vector<std::string> VA_label = {"V", "A"};\
};\
MODULE_REGISTER_NS(modname, T##modname, MContraction);
END_MODULE_NAMESPACE
END_HADRONS_NAMESPACE
#endif // Hadrons_WeakHamiltonian_hpp_

View File

@ -1,137 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Modules/MContraction/WeakHamiltonianEye.cc
Copyright (C) 2017
Author: Andrew Lawson <andrew.lawson1991@gmail.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Hadrons/Modules/MContraction/WeakHamiltonianEye.hpp>
using namespace Grid;
using namespace Hadrons;
using namespace MContraction;
/*
* Weak Hamiltonian current-current contractions, Eye-type.
*
* These contractions are generated by the Q1 and Q2 operators in the physical
* basis (see e.g. Fig 3 of arXiv:1507.03094).
*
* Schematics: q4 |
* /-<-¬ |
* / \ | q2 q3
* \ / | /----<------*------<----¬
* q2 \ / q3 | / /-*-¬ \
* /-----<-----* *-----<----¬ | / / \ \
* i * H_W * f | i * \ / q4 * f
* \ / | \ \->-/ /
* \ / | \ /
* \---------->---------/ | \----------->----------/
* q1 | q1
* |
* Saucer (S) | Eye (E)
*
* S: trace(q3*g5*q1*adj(q2)*g5*gL[mu][p_1]*q4*gL[mu][p_2])
* E: trace(q3*g5*q1*adj(q2)*g5*gL[mu][p_1])*trace(q4*gL[mu][p_2])
*/
/******************************************************************************
* TWeakHamiltonianEye implementation *
******************************************************************************/
// constructor /////////////////////////////////////////////////////////////////
TWeakHamiltonianEye::TWeakHamiltonianEye(const std::string name)
: Module<WeakHamiltonianPar>(name)
{}
// dependencies/products ///////////////////////////////////////////////////////
std::vector<std::string> TWeakHamiltonianEye::getInput(void)
{
std::vector<std::string> in = {par().q1, par().q2, par().q3, par().q4};
return in;
}
std::vector<std::string> TWeakHamiltonianEye::getOutput(void)
{
std::vector<std::string> out = {getName()};
return out;
}
// setup ///////////////////////////////////////////////////////////////////////
void TWeakHamiltonianEye::setup(void)
{
}
// execution ///////////////////////////////////////////////////////////////////
void TWeakHamiltonianEye::execute(void)
{
LOG(Message) << "Computing Weak Hamiltonian (Eye type) contractions '"
<< getName() << "' using quarks '" << par().q1 << "', '"
<< par().q2 << ", '" << par().q3 << "' and '" << par().q4
<< "'." << std::endl;
CorrWriter writer(par().output);
PropagatorField &q1 = *env().template getObject<PropagatorField>(par().q1);
PropagatorField &q2 = *env().template getObject<PropagatorField>(par().q2);
PropagatorField &q3 = *env().template getObject<PropagatorField>(par().q3);
PropagatorField &q4 = *env().template getObject<PropagatorField>(par().q4);
Gamma g5 = Gamma(Gamma::Algebra::Gamma5);
LatticeComplex expbuf(env().getGrid());
std::vector<TComplex> corrbuf;
std::vector<Result> result(n_eye_diag);
unsigned int ndim = env().getNd();
PropagatorField tmp1(env().getGrid());
LatticeComplex tmp2(env().getGrid());
std::vector<PropagatorField> S_body(ndim, tmp1);
std::vector<PropagatorField> S_loop(ndim, tmp1);
std::vector<LatticeComplex> E_body(ndim, tmp2);
std::vector<LatticeComplex> E_loop(ndim, tmp2);
// Setup for S-type contractions.
for (int mu = 0; mu < ndim; ++mu)
{
S_body[mu] = MAKE_SE_BODY(q1, q2, q3, GammaL(Gamma::gmu[mu]));
S_loop[mu] = MAKE_SE_LOOP(q4, GammaL(Gamma::gmu[mu]));
}
// Perform S-type contractions.
SUM_MU(expbuf, trace(S_body[mu]*S_loop[mu]))
MAKE_DIAG(expbuf, corrbuf, result[S_diag], "HW_S")
// Recycle sub-expressions for E-type contractions.
for (unsigned int mu = 0; mu < ndim; ++mu)
{
E_body[mu] = trace(S_body[mu]);
E_loop[mu] = trace(S_loop[mu]);
}
// Perform E-type contractions.
SUM_MU(expbuf, E_body[mu]*E_loop[mu])
MAKE_DIAG(expbuf, corrbuf, result[E_diag], "HW_E")
write(writer, "HW_Eye", result);
}

View File

@ -1,58 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Modules/MContraction/WeakHamiltonianEye.hpp
Copyright (C) 2017
Author: Andrew Lawson <andrew.lawson1991@gmail.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef Hadrons_WeakHamiltonianEye_hpp_
#define Hadrons_WeakHamiltonianEye_hpp_
#include <Grid/Hadrons/Modules/MContraction/WeakHamiltonian.hpp>
BEGIN_HADRONS_NAMESPACE
/******************************************************************************
* WeakHamiltonianEye *
******************************************************************************/
BEGIN_MODULE_NAMESPACE(MContraction)
enum
{
S_diag = 0,
E_diag = 1,
n_eye_diag = 2
};
// Saucer and Eye subdiagram contractions.
#define MAKE_SE_BODY(Q_1, Q_2, Q_3, gamma) (Q_3*g5*Q_1*adj(Q_2)*g5*gamma)
#define MAKE_SE_LOOP(Q_loop, gamma) (Q_loop*gamma)
MAKE_WEAK_MODULE(WeakHamiltonianEye)
END_MODULE_NAMESPACE
END_HADRONS_NAMESPACE
#endif // Hadrons_WeakHamiltonianEye_hpp_

View File

@ -1,139 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Modules/MContraction/WeakHamiltonianNonEye.cc
Copyright (C) 2017
Author: Andrew Lawson <andrew.lawson1991@gmail.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Hadrons/Modules/MContraction/WeakHamiltonianNonEye.hpp>
using namespace Grid;
using namespace Hadrons;
using namespace MContraction;
/*
* Weak Hamiltonian current-current contractions, Non-Eye-type.
*
* These contractions are generated by the Q1 and Q2 operators in the physical
* basis (see e.g. Fig 3 of arXiv:1507.03094).
*
* Schematic:
* q2 q3 | q2 q3
* /--<--¬ /--<--¬ | /--<--¬ /--<--¬
* / \ / \ | / \ / \
* / \ / \ | / \ / \
* / \ / \ | / \ / \
* i * * H_W * f | i * * * H_W * f
* \ * | | \ / \ /
* \ / \ / | \ / \ /
* \ / \ / | \ / \ /
* \ / \ / | \-->--/ \-->--/
* \-->--/ \-->--/ | q1 q4
* q1 q4 |
* Connected (C) | Wing (W)
*
* C: trace(q1*adj(q2)*g5*gL[mu]*q3*adj(q4)*g5*gL[mu])
* W: trace(q1*adj(q2)*g5*gL[mu])*trace(q3*adj(q4)*g5*gL[mu])
*
*/
/******************************************************************************
* TWeakHamiltonianNonEye implementation *
******************************************************************************/
// constructor /////////////////////////////////////////////////////////////////
TWeakHamiltonianNonEye::TWeakHamiltonianNonEye(const std::string name)
: Module<WeakHamiltonianPar>(name)
{}
// dependencies/products ///////////////////////////////////////////////////////
std::vector<std::string> TWeakHamiltonianNonEye::getInput(void)
{
std::vector<std::string> in = {par().q1, par().q2, par().q3, par().q4};
return in;
}
std::vector<std::string> TWeakHamiltonianNonEye::getOutput(void)
{
std::vector<std::string> out = {getName()};
return out;
}
// setup ///////////////////////////////////////////////////////////////////////
void TWeakHamiltonianNonEye::setup(void)
{
}
// execution ///////////////////////////////////////////////////////////////////
void TWeakHamiltonianNonEye::execute(void)
{
LOG(Message) << "Computing Weak Hamiltonian (Non-Eye type) contractions '"
<< getName() << "' using quarks '" << par().q1 << "', '"
<< par().q2 << ", '" << par().q3 << "' and '" << par().q4
<< "'." << std::endl;
CorrWriter writer(par().output);
PropagatorField &q1 = *env().template getObject<PropagatorField>(par().q1);
PropagatorField &q2 = *env().template getObject<PropagatorField>(par().q2);
PropagatorField &q3 = *env().template getObject<PropagatorField>(par().q3);
PropagatorField &q4 = *env().template getObject<PropagatorField>(par().q4);
Gamma g5 = Gamma(Gamma::Algebra::Gamma5);
LatticeComplex expbuf(env().getGrid());
std::vector<TComplex> corrbuf;
std::vector<Result> result(n_noneye_diag);
unsigned int ndim = env().getNd();
PropagatorField tmp1(env().getGrid());
LatticeComplex tmp2(env().getGrid());
std::vector<PropagatorField> C_i_side_loop(ndim, tmp1);
std::vector<PropagatorField> C_f_side_loop(ndim, tmp1);
std::vector<LatticeComplex> W_i_side_loop(ndim, tmp2);
std::vector<LatticeComplex> W_f_side_loop(ndim, tmp2);
// Setup for C-type contractions.
for (int mu = 0; mu < ndim; ++mu)
{
C_i_side_loop[mu] = MAKE_CW_SUBDIAG(q1, q2, GammaL(Gamma::gmu[mu]));
C_f_side_loop[mu] = MAKE_CW_SUBDIAG(q3, q4, GammaL(Gamma::gmu[mu]));
}
// Perform C-type contractions.
SUM_MU(expbuf, trace(C_i_side_loop[mu]*C_f_side_loop[mu]))
MAKE_DIAG(expbuf, corrbuf, result[C_diag], "HW_C")
// Recycle sub-expressions for W-type contractions.
for (unsigned int mu = 0; mu < ndim; ++mu)
{
W_i_side_loop[mu] = trace(C_i_side_loop[mu]);
W_f_side_loop[mu] = trace(C_f_side_loop[mu]);
}
// Perform W-type contractions.
SUM_MU(expbuf, W_i_side_loop[mu]*W_f_side_loop[mu])
MAKE_DIAG(expbuf, corrbuf, result[W_diag], "HW_W")
write(writer, "HW_NonEye", result);
}

View File

@ -1,57 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Modules/MContraction/WeakHamiltonianNonEye.hpp
Copyright (C) 2017
Author: Andrew Lawson <andrew.lawson1991@gmail.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef Hadrons_WeakHamiltonianNonEye_hpp_
#define Hadrons_WeakHamiltonianNonEye_hpp_
#include <Grid/Hadrons/Modules/MContraction/WeakHamiltonian.hpp>
BEGIN_HADRONS_NAMESPACE
/******************************************************************************
* WeakHamiltonianNonEye *
******************************************************************************/
BEGIN_MODULE_NAMESPACE(MContraction)
enum
{
W_diag = 0,
C_diag = 1,
n_noneye_diag = 2
};
// Wing and Connected subdiagram contractions
#define MAKE_CW_SUBDIAG(Q_1, Q_2, gamma) (Q_1*adj(Q_2)*g5*gamma)
MAKE_WEAK_MODULE(WeakHamiltonianNonEye)
END_MODULE_NAMESPACE
END_HADRONS_NAMESPACE
#endif // Hadrons_WeakHamiltonianNonEye_hpp_

View File

@ -1,135 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Modules/MContraction/WeakNeutral4ptDisc.cc
Copyright (C) 2017
Author: Andrew Lawson <andrew.lawson1991@gmail.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Hadrons/Modules/MContraction/WeakNeutral4ptDisc.hpp>
using namespace Grid;
using namespace Hadrons;
using namespace MContraction;
/*
* Weak Hamiltonian + current contractions, disconnected topology for neutral
* mesons.
*
* These contractions are generated by operators Q_1,...,10 of the dS=1 Weak
* Hamiltonian in the physical basis and an additional current J (see e.g.
* Fig 11 of arXiv:1507.03094).
*
* Schematic:
*
* q2 q4 q3
* /--<--¬ /---<--¬ /---<--¬
* / \ / \ / \
* i * * H_W | J * * f
* \ / \ / \ /
* \--->---/ \-------/ \------/
* q1
*
* options
* - q1: input propagator 1 (string)
* - q2: input propagator 2 (string)
* - q3: input propagator 3 (string), assumed to be sequential propagator
* - q4: input propagator 4 (string), assumed to be a loop
*
* type 1: trace(q1*adj(q2)*g5*gL[mu])*trace(loop*gL[mu])*trace(q3*g5)
* type 2: trace(q1*adj(q2)*g5*gL[mu]*loop*gL[mu])*trace(q3*g5)
*/
/*******************************************************************************
* TWeakNeutral4ptDisc implementation *
******************************************************************************/
// constructor /////////////////////////////////////////////////////////////////
TWeakNeutral4ptDisc::TWeakNeutral4ptDisc(const std::string name)
: Module<WeakHamiltonianPar>(name)
{}
// dependencies/products ///////////////////////////////////////////////////////
std::vector<std::string> TWeakNeutral4ptDisc::getInput(void)
{
std::vector<std::string> in = {par().q1, par().q2, par().q3, par().q4};
return in;
}
std::vector<std::string> TWeakNeutral4ptDisc::getOutput(void)
{
std::vector<std::string> out = {getName()};
return out;
}
// setup ///////////////////////////////////////////////////////////////////////
void TWeakNeutral4ptDisc::setup(void)
{
}
// execution ///////////////////////////////////////////////////////////////////
void TWeakNeutral4ptDisc::execute(void)
{
LOG(Message) << "Computing Weak Hamiltonian neutral disconnected contractions '"
<< getName() << "' using quarks '" << par().q1 << "', '"
<< par().q2 << ", '" << par().q3 << "' and '" << par().q4
<< "'." << std::endl;
CorrWriter writer(par().output);
PropagatorField &q1 = *env().template getObject<PropagatorField>(par().q1);
PropagatorField &q2 = *env().template getObject<PropagatorField>(par().q2);
PropagatorField &q3 = *env().template getObject<PropagatorField>(par().q3);
PropagatorField &q4 = *env().template getObject<PropagatorField>(par().q4);
Gamma g5 = Gamma(Gamma::Algebra::Gamma5);
LatticeComplex expbuf(env().getGrid());
std::vector<TComplex> corrbuf;
std::vector<Result> result(n_neut_disc_diag);
unsigned int ndim = env().getNd();
PropagatorField tmp(env().getGrid());
std::vector<PropagatorField> meson(ndim, tmp);
std::vector<PropagatorField> loop(ndim, tmp);
LatticeComplex curr(env().getGrid());
// Setup for type 1 contractions.
for (int mu = 0; mu < ndim; ++mu)
{
meson[mu] = MAKE_DISC_MESON(q1, q2, GammaL(Gamma::gmu[mu]));
loop[mu] = MAKE_DISC_LOOP(q4, GammaL(Gamma::gmu[mu]));
}
curr = MAKE_DISC_CURR(q3, GammaL(Gamma::Algebra::Gamma5));
// Perform type 1 contractions.
SUM_MU(expbuf, trace(meson[mu]*loop[mu]))
expbuf *= curr;
MAKE_DIAG(expbuf, corrbuf, result[neut_disc_1_diag], "HW_disc0_1")
// Perform type 2 contractions.
SUM_MU(expbuf, trace(meson[mu])*trace(loop[mu]))
expbuf *= curr;
MAKE_DIAG(expbuf, corrbuf, result[neut_disc_2_diag], "HW_disc0_2")
write(writer, "HW_disc0", result);
}

View File

@ -1,59 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Modules/MContraction/WeakNeutral4ptDisc.hpp
Copyright (C) 2017
Author: Andrew Lawson <andrew.lawson1991@gmail.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef Hadrons_WeakNeutral4ptDisc_hpp_
#define Hadrons_WeakNeutral4ptDisc_hpp_
#include <Grid/Hadrons/Modules/MContraction/WeakHamiltonian.hpp>
BEGIN_HADRONS_NAMESPACE
/******************************************************************************
* WeakNeutral4ptDisc *
******************************************************************************/
BEGIN_MODULE_NAMESPACE(MContraction)
enum
{
neut_disc_1_diag = 0,
neut_disc_2_diag = 1,
n_neut_disc_diag = 2
};
// Neutral 4pt disconnected subdiagram contractions.
#define MAKE_DISC_MESON(Q_1, Q_2, gamma) (Q_1*adj(Q_2)*g5*gamma)
#define MAKE_DISC_LOOP(Q_LOOP, gamma) (Q_LOOP*gamma)
#define MAKE_DISC_CURR(Q_c, gamma) (trace(Q_c*gamma))
MAKE_WEAK_MODULE(WeakNeutral4ptDisc)
END_MODULE_NAMESPACE
END_HADRONS_NAMESPACE
#endif // Hadrons_WeakNeutral4ptDisc_hpp_

View File

@ -65,7 +65,7 @@ void TLoad::setup(void)
// execution /////////////////////////////////////////////////////////////////// // execution ///////////////////////////////////////////////////////////////////
void TLoad::execute(void) void TLoad::execute(void)
{ {
FieldMetaData header; NerscField header;
std::string fileName = par().file + "." std::string fileName = par().file + "."
+ std::to_string(env().getTrajectory()); + std::to_string(env().getTrajectory());
@ -74,5 +74,5 @@ void TLoad::execute(void)
LatticeGaugeField &U = *env().createLattice<LatticeGaugeField>(getName()); LatticeGaugeField &U = *env().createLattice<LatticeGaugeField>(getName());
NerscIO::readConfiguration(U, header, fileName); NerscIO::readConfiguration(U, header, fileName);
LOG(Message) << "NERSC header:" << std::endl; LOG(Message) << "NERSC header:" << std::endl;
dump_meta_data(header, LOG(Message)); dump_nersc_header(header, LOG(Message));
} }

View File

@ -1,132 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Modules/MLoop/NoiseLoop.hpp
Copyright (C) 2016
Author: Andrew Lawson <andrew.lawson1991@gmail.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef Hadrons_NoiseLoop_hpp_
#define Hadrons_NoiseLoop_hpp_
#include <Grid/Hadrons/Global.hpp>
#include <Grid/Hadrons/Module.hpp>
#include <Grid/Hadrons/ModuleFactory.hpp>
BEGIN_HADRONS_NAMESPACE
/*
Noise loop propagator
-----------------------------
* loop_x = q_x * adj(eta_x)
* options:
- q = Result of inversion on noise source.
- eta = noise source.
*/
/******************************************************************************
* NoiseLoop *
******************************************************************************/
BEGIN_MODULE_NAMESPACE(MLoop)
class NoiseLoopPar: Serializable
{
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(NoiseLoopPar,
std::string, q,
std::string, eta);
};
template <typename FImpl>
class TNoiseLoop: public Module<NoiseLoopPar>
{
public:
TYPE_ALIASES(FImpl,);
public:
// constructor
TNoiseLoop(const std::string name);
// destructor
virtual ~TNoiseLoop(void) = default;
// dependency relation
virtual std::vector<std::string> getInput(void);
virtual std::vector<std::string> getOutput(void);
// setup
virtual void setup(void);
// execution
virtual void execute(void);
};
MODULE_REGISTER_NS(NoiseLoop, TNoiseLoop<FIMPL>, MLoop);
/******************************************************************************
* TNoiseLoop implementation *
******************************************************************************/
// constructor /////////////////////////////////////////////////////////////////
template <typename FImpl>
TNoiseLoop<FImpl>::TNoiseLoop(const std::string name)
: Module<NoiseLoopPar>(name)
{}
// dependencies/products ///////////////////////////////////////////////////////
template <typename FImpl>
std::vector<std::string> TNoiseLoop<FImpl>::getInput(void)
{
std::vector<std::string> in = {par().q, par().eta};
return in;
}
template <typename FImpl>
std::vector<std::string> TNoiseLoop<FImpl>::getOutput(void)
{
std::vector<std::string> out = {getName()};
return out;
}
// setup ///////////////////////////////////////////////////////////////////////
template <typename FImpl>
void TNoiseLoop<FImpl>::setup(void)
{
env().template registerLattice<PropagatorField>(getName());
}
// execution ///////////////////////////////////////////////////////////////////
template <typename FImpl>
void TNoiseLoop<FImpl>::execute(void)
{
PropagatorField &loop = *env().template createLattice<PropagatorField>(getName());
PropagatorField &q = *env().template getObject<PropagatorField>(par().q);
PropagatorField &eta = *env().template getObject<PropagatorField>(par().eta);
loop = q*adj(eta);
}
END_MODULE_NAMESPACE
END_HADRONS_NAMESPACE
#endif // Hadrons_NoiseLoop_hpp_

View File

@ -6,7 +6,6 @@ Source file: extras/Hadrons/Modules/MSource/SeqGamma.hpp
Copyright (C) 2015 Copyright (C) 2015
Copyright (C) 2016 Copyright (C) 2016
Copyright (C) 2017
Author: Antonin Portelli <antonin.portelli@me.com> Author: Antonin Portelli <antonin.portelli@me.com>
@ -150,9 +149,9 @@ void TSeqGamma<FImpl>::execute(void)
for(unsigned int mu = 0; mu < env().getNd(); mu++) for(unsigned int mu = 0; mu < env().getNd(); mu++)
{ {
LatticeCoordinate(coor, mu); LatticeCoordinate(coor, mu);
ph = ph + p[mu]*coor*((1./(env().getGrid()->_fdimensions[mu]))); ph = ph + p[mu]*coor;
} }
ph = exp((Real)(2*M_PI)*i*ph); ph = exp(i*ph);
LatticeCoordinate(t, Tp); LatticeCoordinate(t, Tp);
src = where((t >= par().tA) and (t <= par().tB), ph*(g*q), 0.*q); src = where((t >= par().tA) and (t <= par().tB), ph*(g*q), 0.*q);
} }

View File

@ -1,147 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Modules/MSource/Wall.hpp
Copyright (C) 2017
Author: Andrew Lawson <andrew.lawson1991@gmail.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef Hadrons_WallSource_hpp_
#define Hadrons_WallSource_hpp_
#include <Grid/Hadrons/Global.hpp>
#include <Grid/Hadrons/Module.hpp>
#include <Grid/Hadrons/ModuleFactory.hpp>
BEGIN_HADRONS_NAMESPACE
/*
Wall source
-----------------------------
* src_x = delta(x_3 - tW) * exp(i x.mom)
* options:
- tW: source timeslice (integer)
- mom: momentum insertion, space-separated float sequence (e.g ".1 .2 1. 0.")
*/
/******************************************************************************
* Wall *
******************************************************************************/
BEGIN_MODULE_NAMESPACE(MSource)
class WallPar: Serializable
{
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(WallPar,
unsigned int, tW,
std::string, mom);
};
template <typename FImpl>
class TWall: public Module<WallPar>
{
public:
TYPE_ALIASES(FImpl,);
public:
// constructor
TWall(const std::string name);
// destructor
virtual ~TWall(void) = default;
// dependency relation
virtual std::vector<std::string> getInput(void);
virtual std::vector<std::string> getOutput(void);
// setup
virtual void setup(void);
// execution
virtual void execute(void);
};
MODULE_REGISTER_NS(Wall, TWall<FIMPL>, MSource);
/******************************************************************************
* TWall implementation *
******************************************************************************/
// constructor /////////////////////////////////////////////////////////////////
template <typename FImpl>
TWall<FImpl>::TWall(const std::string name)
: Module<WallPar>(name)
{}
// dependencies/products ///////////////////////////////////////////////////////
template <typename FImpl>
std::vector<std::string> TWall<FImpl>::getInput(void)
{
std::vector<std::string> in;
return in;
}
template <typename FImpl>
std::vector<std::string> TWall<FImpl>::getOutput(void)
{
std::vector<std::string> out = {getName()};
return out;
}
// setup ///////////////////////////////////////////////////////////////////////
template <typename FImpl>
void TWall<FImpl>::setup(void)
{
env().template registerLattice<PropagatorField>(getName());
}
// execution ///////////////////////////////////////////////////////////////////
template <typename FImpl>
void TWall<FImpl>::execute(void)
{
LOG(Message) << "Generating wall source at t = " << par().tW
<< " with momentum " << par().mom << std::endl;
PropagatorField &src = *env().template createLattice<PropagatorField>(getName());
Lattice<iScalar<vInteger>> t(env().getGrid());
LatticeComplex ph(env().getGrid()), coor(env().getGrid());
std::vector<Real> p;
Complex i(0.0,1.0);
p = strToVec<Real>(par().mom);
ph = zero;
for(unsigned int mu = 0; mu < Nd; mu++)
{
LatticeCoordinate(coor, mu);
ph = ph + p[mu]*coor*((1./(env().getGrid()->_fdimensions[mu])));
}
ph = exp((Real)(2*M_PI)*i*ph);
LatticeCoordinate(t, Tp);
src = 1.;
src = where((t == par().tW), src*ph, 0.*src);
}
END_MODULE_NAMESPACE
END_HADRONS_NAMESPACE
#endif // Hadrons_WallSource_hpp_

View File

@ -173,7 +173,7 @@ void TQuark<FImpl>::execute(void)
*env().template getObject<PropagatorField>(getName()); *env().template getObject<PropagatorField>(getName());
axpby_ssp_pminus(sol, 0., sol, 1., sol, 0, 0); axpby_ssp_pminus(sol, 0., sol, 1., sol, 0, 0);
axpby_ssp_pplus(sol, 1., sol, 1., sol, 0, Ls_-1); axpby_ssp_pplus(sol, 0., sol, 1., sol, 0, Ls_-1);
ExtractSlice(tmp, sol, 0, 0); ExtractSlice(tmp, sol, 0, 0);
FermToProp(p4d, tmp, s, c); FermToProp(p4d, tmp, s, c);
} }

View File

@ -1,7 +1,4 @@
modules_cc =\ modules_cc =\
Modules/MContraction/WeakHamiltonianEye.cc \
Modules/MContraction/WeakHamiltonianNonEye.cc \
Modules/MContraction/WeakNeutral4ptDisc.cc \
Modules/MGauge/Load.cc \ Modules/MGauge/Load.cc \
Modules/MGauge/Random.cc \ Modules/MGauge/Random.cc \
Modules/MGauge/Unit.cc Modules/MGauge/Unit.cc
@ -10,21 +7,13 @@ modules_hpp =\
Modules/MAction/DWF.hpp \ Modules/MAction/DWF.hpp \
Modules/MAction/Wilson.hpp \ Modules/MAction/Wilson.hpp \
Modules/MContraction/Baryon.hpp \ Modules/MContraction/Baryon.hpp \
Modules/MContraction/DiscLoop.hpp \
Modules/MContraction/Gamma3pt.hpp \
Modules/MContraction/Meson.hpp \ Modules/MContraction/Meson.hpp \
Modules/MContraction/WeakHamiltonian.hpp \
Modules/MContraction/WeakHamiltonianEye.hpp \
Modules/MContraction/WeakHamiltonianNonEye.hpp \
Modules/MContraction/WeakNeutral4ptDisc.hpp \
Modules/MGauge/Load.hpp \ Modules/MGauge/Load.hpp \
Modules/MGauge/Random.hpp \ Modules/MGauge/Random.hpp \
Modules/MGauge/Unit.hpp \ Modules/MGauge/Unit.hpp \
Modules/MLoop/NoiseLoop.hpp \
Modules/MSolver/RBPrecCG.hpp \ Modules/MSolver/RBPrecCG.hpp \
Modules/MSource/Point.hpp \ Modules/MSource/Point.hpp \
Modules/MSource/SeqGamma.hpp \ Modules/MSource/SeqGamma.hpp \
Modules/MSource/Wall.hpp \
Modules/MSource/Z2.hpp \ Modules/MSource/Z2.hpp \
Modules/Quark.hpp Modules/Quark.hpp

View File

@ -20,17 +20,4 @@ The simple testcase in this directory is the submitted bug report that encapsula
problem. The test case works with icpc and with clang++, but fails consistently on g++ problem. The test case works with icpc and with clang++, but fails consistently on g++
current variants. current variants.
Peter Peter
************
Second GCC bug reported, see Issue 100.
https://wandbox.org/permlink/tzssJza6R9XnqANw
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=80652
Getting Travis fails under gcc-5 for Test_simd, now that I added more comprehensive testing to the
CI test suite. The limitations of Travis runtime limits & weak cores are being shown.
Travis uses 5.4.1 for g++-5.

View File

@ -1,86 +0,0 @@
#! /bin/sh
prefix=@prefix@
exec_prefix=@exec_prefix@
includedir=@includedir@
usage()
{
cat <<EOF
Usage: grid-config [OPTION]
Known values for OPTION are:
--prefix show Grid installation prefix
--cxxflags print pre-processor and compiler flags
--ldflags print library linking flags
--libs print library linking information
--summary print full build summary
--help display this help and exit
--version output version information
--git print git revision
EOF
exit $1
}
if test $# -eq 0; then
usage 1
fi
cflags=false
libs=false
while test $# -gt 0; do
case "$1" in
-*=*) optarg=`echo "$1" | sed 's/[-_a-zA-Z0-9]*=//'` ;;
*) optarg= ;;
esac
case "$1" in
--prefix)
echo $prefix
;;
--version)
echo @VERSION@
exit 0
;;
--git)
echo "@GRID_BRANCH@ @GRID_SHA@"
exit 0
;;
--help)
usage 0
;;
--cxxflags)
echo @GRID_CXXFLAGS@
;;
--ldflags)
echo @GRID_LDFLAGS@
;;
--libs)
echo @GRID_LIBS@
;;
--summary)
echo ""
echo "@GRID_SUMMARY@"
echo ""
;;
*)
usage
exit 1
;;
esac
shift
done
exit 0

View File

@ -39,17 +39,19 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
#include <Grid/algorithms/approx/MultiShiftFunction.h> #include <Grid/algorithms/approx/MultiShiftFunction.h>
#include <Grid/algorithms/iterative/ConjugateGradient.h> #include <Grid/algorithms/iterative/ConjugateGradient.h>
#include <Grid/algorithms/iterative/ConjugateGradientShifted.h>
#include <Grid/algorithms/iterative/ConjugateResidual.h> #include <Grid/algorithms/iterative/ConjugateResidual.h>
#include <Grid/algorithms/iterative/NormalEquations.h> #include <Grid/algorithms/iterative/NormalEquations.h>
#include <Grid/algorithms/iterative/SchurRedBlack.h> #include <Grid/algorithms/iterative/SchurRedBlack.h>
#include <Grid/algorithms/iterative/ConjugateGradientMultiShift.h> #include <Grid/algorithms/iterative/ConjugateGradientMultiShift.h>
#include <Grid/algorithms/iterative/ConjugateGradientMixedPrec.h> #include <Grid/algorithms/iterative/ConjugateGradientMixedPrec.h>
// Lanczos support // Lanczos support
//#include <Grid/algorithms/iterative/MatrixUtils.h> #include <Grid/algorithms/iterative/MatrixUtils.h>
#include <Grid/algorithms/iterative/ImplicitlyRestartedLanczos.h> #include <Grid/algorithms/iterative/ImplicitlyRestartedLanczos.h>
#include <Grid/algorithms/CoarsenedMatrix.h> #include <Grid/algorithms/CoarsenedMatrix.h>
#include <Grid/algorithms/FFT.h>
// Eigen/lanczos // Eigen/lanczos
// EigCg // EigCg

View File

@ -1,7 +1,7 @@
#include <Grid/GridCore.h> #include <Grid/Grid.h>
namespace Grid { namespace Grid {
@ -13,10 +13,9 @@ void *PointerCache::Insert(void *ptr,size_t bytes) {
if (bytes < 4096 ) return NULL; if (bytes < 4096 ) return NULL;
#ifdef GRID_OMP #ifdef _OPENMP
assert(omp_in_parallel()==0); assert(omp_in_parallel()==0);
#endif #endif
void * ret = NULL; void * ret = NULL;
int v = -1; int v = -1;

View File

@ -1,37 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/DisableWarnings.h
Copyright (C) 2016
Author: Guido Cossu <guido.cossu@ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#ifndef DISABLE_WARNINGS_H
#define DISABLE_WARNINGS_H
//disables and intel compiler specific warning (in json.hpp)
#pragma warning disable 488
#endif

View File

@ -38,12 +38,52 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
#ifndef GRID_H #ifndef GRID_H
#define GRID_H #define GRID_H
#include <Grid/GridCore.h> ///////////////////
#include <Grid/GridQCDcore.h> // Std C++ dependencies
#include <Grid/qcd/action/Action.h> ///////////////////
#include <Grid/qcd/utils/GaugeFix.h> #include <cassert>
#include <Grid/qcd/smearing/Smearing.h> #include <complex>
#include <Grid/parallelIO/MetaData.h> #include <vector>
#include <Grid/qcd/hmc/HMC_aggregate.h> #include <iostream>
#include <iomanip>
#include <random>
#include <functional>
#include <stdio.h>
#include <stdlib.h>
#include <stdio.h>
#include <signal.h>
#include <ctime>
#include <sys/time.h>
#include <chrono>
///////////////////
// Grid headers
///////////////////
#include "Config.h"
#include <Grid/Timer.h>
#include <Grid/PerfCount.h>
#include <Grid/Log.h>
#include <Grid/AlignedAllocator.h>
#include <Grid/Simd.h>
#include <Grid/serialisation/Serialisation.h>
#include <Grid/Threads.h>
#include <Grid/Lexicographic.h>
#include <Grid/Init.h>
#include <Grid/Communicator.h>
#include <Grid/Cartesian.h>
#include <Grid/Tensors.h>
#include <Grid/Lattice.h>
#include <Grid/Cshift.h>
#include <Grid/Stencil.h>
#include <Grid/Algorithms.h>
#include <Grid/parallelIO/BinaryIO.h>
#include <Grid/FFT.h>
#include <Grid/qcd/QCD.h>
#include <Grid/parallelIO/NerscIO.h>
#include <Grid/qcd/hmc/NerscCheckpointer.h>
#include <Grid/qcd/hmc/HmcRunner.h>
#endif #endif

View File

@ -1,29 +0,0 @@
#ifndef GRID_STD_H
#define GRID_STD_H
///////////////////
// Std C++ dependencies
///////////////////
#include <cassert>
#include <complex>
#include <vector>
#include <string>
#include <iostream>
#include <iomanip>
#include <random>
#include <functional>
#include <stdio.h>
#include <stdlib.h>
#include <stdio.h>
#include <signal.h>
#include <ctime>
#include <sys/time.h>
#include <chrono>
#include <zlib.h>
///////////////////
// Grid config
///////////////////
#include "Config.h"
#endif /* GRID_STD_H */

View File

@ -1,9 +0,0 @@
#pragma once
#if defined __GNUC__
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
#endif
#include <Grid/Eigen/Dense>
#if defined __GNUC__
#pragma GCC diagnostic pop
#endif

View File

@ -1,6 +1,6 @@
/************************************************************************************* /*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/Init.cc Source file: ./lib/Init.cc
@ -36,20 +36,17 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
#include <stdint.h> #include <stdint.h>
#include <unistd.h> #include <unistd.h>
#include <sys/mman.h> #include <sys/mman.h>
#include <sys/stat.h> #include <sys/stat.h>
#include <sys/time.h> #include <sys/time.h>
#include <signal.h> #include <signal.h>
#include <iostream> #include <iostream>
#include <iterator> #include <iterator>
#include <Grid/Grid.h>
#include <algorithm> #include <algorithm>
#include <iterator> #include <iterator>
#include <cstdlib> #include <cstdlib>
#include <memory> #include <memory>
#include <Grid/Grid.h>
#include <Grid/util/CompilerCompatible.h>
#include <fenv.h> #include <fenv.h>
#ifdef __APPLE__ #ifdef __APPLE__
@ -95,14 +92,14 @@ const std::vector<int> GridDefaultSimd(int dims,int nsimd)
if ( nn>=2) { if ( nn>=2) {
layout[d]=2; layout[d]=2;
nn/=2; nn/=2;
} else { } else {
layout[d]=1; layout[d]=1;
} }
} }
assert(nn==1); assert(nn==1);
return layout; return layout;
} }
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
// Command line parsing assist for stock controls // Command line parsing assist for stock controls
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
@ -146,7 +143,7 @@ void GridCmdOptionIntVector(std::string &str,std::vector<int> & vec)
vec.push_back(i); vec.push_back(i);
if(std::ispunct(ss.peek())) if(std::ispunct(ss.peek()))
ss.ignore(); ss.ignore();
} }
return; return;
} }
@ -222,59 +219,8 @@ void Grid_init(int *argc,char ***argv)
CartesianCommunicator::MAX_MPI_SHM_BYTES = MB*1024*1024; CartesianCommunicator::MAX_MPI_SHM_BYTES = MB*1024*1024;
} }
if( GridCmdOptionExists(*argv,*argv+*argc,"--debug-signals") ){
Grid_debug_handler_init();
}
CartesianCommunicator::Init(argc,argv); CartesianCommunicator::Init(argc,argv);
if( !GridCmdOptionExists(*argv,*argv+*argc,"--debug-stdout") ){
Grid_quiesce_nodes();
} else {
FILE *fp;
std::ostringstream fname;
fname<<"Grid.stdout.";
fname<<CartesianCommunicator::RankWorld();
fp=freopen(fname.str().c_str(),"w",stdout);
assert(fp!=(FILE *)NULL);
}
////////////////////////////////////
// Banner
////////////////////////////////////
if ( CartesianCommunicator::RankWorld() == 0 ) {
std::cout <<std::endl;
std::cout << "__|__|__|__|__|__|__|__|__|__|__|__|__|__|__"<<std::endl;
std::cout << "__|__|__|__|__|__|__|__|__|__|__|__|__|__|__"<<std::endl;
std::cout << "__|_ | | | | | | | | | | | | _|__"<<std::endl;
std::cout << "__|_ _|__"<<std::endl;
std::cout << "__|_ GGGG RRRR III DDDD _|__"<<std::endl;
std::cout << "__|_ G R R I D D _|__"<<std::endl;
std::cout << "__|_ G R R I D D _|__"<<std::endl;
std::cout << "__|_ G GG RRRR I D D _|__"<<std::endl;
std::cout << "__|_ G G R R I D D _|__"<<std::endl;
std::cout << "__|_ GGGG R R III DDDD _|__"<<std::endl;
std::cout << "__|_ _|__"<<std::endl;
std::cout << "__|__|__|__|__|__|__|__|__|__|__|__|__|__|__"<<std::endl;
std::cout << "__|__|__|__|__|__|__|__|__|__|__|__|__|__|__"<<std::endl;
std::cout << " | | | | | | | | | | | | | | "<<std::endl;
std::cout << std::endl;
std::cout << std::endl;
std::cout << "Copyright (C) 2015 Peter Boyle, Azusa Yamaguchi, Guido Cossu, Antonin Portelli and other authors"<<std::endl;
std::cout << std::endl;
std::cout << "This program is free software; you can redistribute it and/or modify"<<std::endl;
std::cout << "it under the terms of the GNU General Public License as published by"<<std::endl;
std::cout << "the Free Software Foundation; either version 2 of the License, or"<<std::endl;
std::cout << "(at your option) any later version."<<std::endl;
std::cout << std::endl;
std::cout << "This program is distributed in the hope that it will be useful,"<<std::endl;
std::cout << "but WITHOUT ANY WARRANTY; without even the implied warranty of"<<std::endl;
std::cout << "MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the"<<std::endl;
std::cout << "GNU General Public License for more details."<<std::endl;
std::cout << std::endl;
}
//////////////////////////////////// ////////////////////////////////////
// Logging // Logging
//////////////////////////////////// ////////////////////////////////////
@ -284,6 +230,9 @@ void Grid_init(int *argc,char ***argv)
GridCmdOptionCSL(defaultLog,logstreams); GridCmdOptionCSL(defaultLog,logstreams);
GridLogConfigure(logstreams); GridLogConfigure(logstreams);
if( !GridCmdOptionExists(*argv,*argv+*argc,"--debug-stdout") ){
Grid_quiesce_nodes();
}
if( GridCmdOptionExists(*argv,*argv+*argc,"--log") ){ if( GridCmdOptionExists(*argv,*argv+*argc,"--log") ){
arg = GridCmdOptionPayload(*argv,*argv+*argc,"--log"); arg = GridCmdOptionPayload(*argv,*argv+*argc,"--log");
@ -299,74 +248,101 @@ void Grid_init(int *argc,char ***argv)
std::cout<<GridLogMessage<<" --help : this message"<<std::endl; std::cout<<GridLogMessage<<" --help : this message"<<std::endl;
std::cout<<GridLogMessage<<std::endl; std::cout<<GridLogMessage<<std::endl;
std::cout<<GridLogMessage<<"Geometry:"<<std::endl; std::cout<<GridLogMessage<<"Geometry:"<<std::endl;
std::cout<<GridLogMessage<<std::endl;
std::cout<<GridLogMessage<<" --mpi n.n.n.n : default MPI decomposition"<<std::endl; std::cout<<GridLogMessage<<" --mpi n.n.n.n : default MPI decomposition"<<std::endl;
std::cout<<GridLogMessage<<" --threads n : default number of OMP threads"<<std::endl; std::cout<<GridLogMessage<<" --threads n : default number of OMP threads"<<std::endl;
std::cout<<GridLogMessage<<" --grid n.n.n.n : default Grid size"<<std::endl; std::cout<<GridLogMessage<<" --grid n.n.n.n : default Grid size"<<std::endl;
std::cout<<GridLogMessage<<" --shm M : allocate M megabytes of shared memory for comms"<<std::endl; std::cout<<GridLogMessage<<" --shm M : allocate M megabytes of shared memory for comms"<<std::endl;
std::cout<<GridLogMessage<<std::endl; std::cout<<GridLogMessage<<std::endl;
std::cout<<GridLogMessage<<"Verbose and debug:"<<std::endl; std::cout<<GridLogMessage<<"Verbose and debug:"<<std::endl;
std::cout<<GridLogMessage<<std::endl; std::cout<<GridLogMessage<<" --log list : comma separted list of streams from Error,Warning,Message,Performance,Iterative,Integrator,Debug,Colours"<<std::endl;
std::cout<<GridLogMessage<<" --log list : comma separated list from Error,Warning,Message,Performance,Iterative,Integrator,Debug,Colours"<<std::endl;
std::cout<<GridLogMessage<<" --decomposition : report on default omp,mpi and simd decomposition"<<std::endl; std::cout<<GridLogMessage<<" --decomposition : report on default omp,mpi and simd decomposition"<<std::endl;
std::cout<<GridLogMessage<<" --debug-signals : catch sigsegv and print a blame report"<<std::endl; std::cout<<GridLogMessage<<" --debug-signals : catch sigsegv and print a blame report"<<std::endl;
std::cout<<GridLogMessage<<" --debug-stdout : print stdout from EVERY node"<<std::endl; std::cout<<GridLogMessage<<" --debug-stdout : print stdout from EVERY node"<<std::endl;
std::cout<<GridLogMessage<<" --notimestamp : suppress millisecond resolution stamps"<<std::endl; std::cout<<GridLogMessage<<" --notimestamp : suppress millisecond resolution stamps"<<std::endl;
std::cout<<GridLogMessage<<std::endl; std::cout<<GridLogMessage<<std::endl;
std::cout<<GridLogMessage<<"Performance:"<<std::endl; std::cout<<GridLogMessage<<"Performance:"<<std::endl;
std::cout<<GridLogMessage<<std::endl;
std::cout<<GridLogMessage<<" --comms-concurrent : Asynchronous MPI calls; several dirs at a time "<<std::endl;
std::cout<<GridLogMessage<<" --comms-sequential : Synchronous MPI calls; one dirs at a time "<<std::endl;
std::cout<<GridLogMessage<<" --comms-overlap : Overlap comms with compute "<<std::endl;
std::cout<<GridLogMessage<<std::endl;
std::cout<<GridLogMessage<<" --dslash-generic: Wilson kernel for generic Nc"<<std::endl; std::cout<<GridLogMessage<<" --dslash-generic: Wilson kernel for generic Nc"<<std::endl;
std::cout<<GridLogMessage<<" --dslash-unroll : Wilson kernel for Nc=3"<<std::endl; std::cout<<GridLogMessage<<" --dslash-unroll : Wilson kernel for Nc=3"<<std::endl;
std::cout<<GridLogMessage<<" --dslash-asm : Wilson kernel for AVX512"<<std::endl; std::cout<<GridLogMessage<<" --dslash-asm : Wilson kernel for AVX512"<<std::endl;
std::cout<<GridLogMessage<<std::endl;
std::cout<<GridLogMessage<<" --lebesgue : Cache oblivious Lebesgue curve/Morton order/Z-graph stencil looping"<<std::endl; std::cout<<GridLogMessage<<" --lebesgue : Cache oblivious Lebesgue curve/Morton order/Z-graph stencil looping"<<std::endl;
std::cout<<GridLogMessage<<" --cacheblocking n.m.o.p : Hypercuboidal cache blocking"<<std::endl; std::cout<<GridLogMessage<<" --cacheblocking n.m.o.p : Hypercuboidal cache blocking"<<std::endl;
std::cout<<GridLogMessage<<std::endl; std::cout<<GridLogMessage<<std::endl;
exit(EXIT_SUCCESS); exit(EXIT_SUCCESS);
} }
////////////////////////////////////
// Banner
////////////////////////////////////
std::string COL_RED = GridLogColours.colour["RED"];
std::string COL_PURPLE = GridLogColours.colour["PURPLE"];
std::string COL_BLACK = GridLogColours.colour["BLACK"];
std::string COL_GREEN = GridLogColours.colour["GREEN"];
std::string COL_BLUE = GridLogColours.colour["BLUE"];
std::string COL_YELLOW = GridLogColours.colour["YELLOW"];
std::string COL_BACKGROUND = GridLogColours.colour["NORMAL"];
std::cout <<std::endl;
std::cout <<COL_RED << "__|__|__|__|__"<< "|__|__|_"<<COL_PURPLE<<"_|__|__|"<< "__|__|__|__|__"<<std::endl;
std::cout <<COL_RED << "__|__|__|__|__"<< "|__|__|_"<<COL_PURPLE<<"_|__|__|"<< "__|__|__|__|__"<<std::endl;
std::cout <<COL_RED << "__|_ | | | "<< "| | | "<<COL_PURPLE<<" | | |"<< " | | | _|__"<<std::endl;
std::cout <<COL_RED << "__|_ "<< " "<<COL_PURPLE<<" "<< " _|__"<<std::endl;
std::cout <<COL_RED << "__|_ "<<COL_GREEN<<" GGGG "<<COL_RED<<" RRRR "<<COL_BLUE <<" III "<<COL_PURPLE<<"DDDD "<<COL_PURPLE<<" _|__"<<std::endl;
std::cout <<COL_RED << "__|_ "<<COL_GREEN<<"G "<<COL_RED<<" R R "<<COL_BLUE <<" I "<<COL_PURPLE<<"D D "<<COL_PURPLE<<" _|__"<<std::endl;
std::cout <<COL_RED << "__|_ "<<COL_GREEN<<"G "<<COL_RED<<" R R "<<COL_BLUE <<" I "<<COL_PURPLE<<"D D"<<COL_PURPLE<<" _|__"<<std::endl;
std::cout <<COL_BLUE << "__|_ "<<COL_GREEN<<"G GG "<<COL_RED<<" RRRR "<<COL_BLUE <<" I "<<COL_PURPLE<<"D D"<<COL_GREEN <<" _|__"<<std::endl;
std::cout <<COL_BLUE << "__|_ "<<COL_GREEN<<"G G "<<COL_RED<<" R R "<<COL_BLUE <<" I "<<COL_PURPLE<<"D D "<<COL_GREEN <<" _|__"<<std::endl;
std::cout <<COL_BLUE << "__|_ "<<COL_GREEN<<" GGGG "<<COL_RED<<" R R "<<COL_BLUE <<" III "<<COL_PURPLE<<"DDDD "<<COL_GREEN <<" _|__"<<std::endl;
std::cout <<COL_BLUE << "__|_ "<< " "<<COL_GREEN <<" "<< " _|__"<<std::endl;
std::cout <<COL_BLUE << "__|__|__|__|__"<< "|__|__|_"<<COL_GREEN <<"_|__|__|"<< "__|__|__|__|__"<<std::endl;
std::cout <<COL_BLUE << "__|__|__|__|__"<< "|__|__|_"<<COL_GREEN <<"_|__|__|"<< "__|__|__|__|__"<<std::endl;
std::cout <<COL_BLUE << " | | | | "<< "| | | "<<COL_GREEN <<" | | |"<< " | | | | "<<std::endl;
std::cout << std::endl;
std::cout << std::endl;
std::cout <<COL_YELLOW<< std::endl;
std::cout << "Copyright (C) 2015 Peter Boyle, Azusa Yamaguchi, Guido Cossu, Antonin Portelli and other authors"<<std::endl;
std::cout << std::endl;
std::cout << "This program is free software; you can redistribute it and/or modify"<<std::endl;
std::cout << "it under the terms of the GNU General Public License as published by"<<std::endl;
std::cout << "the Free Software Foundation; either version 2 of the License, or"<<std::endl;
std::cout << "(at your option) any later version."<<std::endl;
std::cout << std::endl;
std::cout << "This program is distributed in the hope that it will be useful,"<<std::endl;
std::cout << "but WITHOUT ANY WARRANTY; without even the implied warranty of"<<std::endl;
std::cout << "MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the"<<std::endl;
std::cout << "GNU General Public License for more details."<<std::endl;
std::cout << COL_BACKGROUND <<std::endl;
std::cout << std::endl;
//////////////////////////////////// ////////////////////////////////////
// Debug and performance options // Debug and performance options
//////////////////////////////////// ////////////////////////////////////
if( GridCmdOptionExists(*argv,*argv+*argc,"--debug-signals") ){
Grid_debug_handler_init();
}
if( GridCmdOptionExists(*argv,*argv+*argc,"--dslash-unroll") ){ if( GridCmdOptionExists(*argv,*argv+*argc,"--dslash-unroll") ){
QCD::WilsonKernelsStatic::Opt=QCD::WilsonKernelsStatic::OptHandUnroll; QCD::WilsonKernelsStatic::Opt=QCD::WilsonKernelsStatic::OptHandUnroll;
QCD::StaggeredKernelsStatic::Opt=QCD::StaggeredKernelsStatic::OptHandUnroll;
} }
if( GridCmdOptionExists(*argv,*argv+*argc,"--dslash-asm") ){ if( GridCmdOptionExists(*argv,*argv+*argc,"--dslash-asm") ){
QCD::WilsonKernelsStatic::Opt=QCD::WilsonKernelsStatic::OptInlineAsm; QCD::WilsonKernelsStatic::Opt=QCD::WilsonKernelsStatic::OptInlineAsm;
QCD::StaggeredKernelsStatic::Opt=QCD::StaggeredKernelsStatic::OptInlineAsm;
} }
if( GridCmdOptionExists(*argv,*argv+*argc,"--dslash-generic") ){ if( GridCmdOptionExists(*argv,*argv+*argc,"--dslash-generic") ){
QCD::WilsonKernelsStatic::Opt=QCD::WilsonKernelsStatic::OptGeneric; QCD::WilsonKernelsStatic::Opt=QCD::WilsonKernelsStatic::OptGeneric;
QCD::StaggeredKernelsStatic::Opt=QCD::StaggeredKernelsStatic::OptGeneric;
}
if( GridCmdOptionExists(*argv,*argv+*argc,"--comms-overlap") ){
QCD::WilsonKernelsStatic::Comms = QCD::WilsonKernelsStatic::CommsAndCompute;
} else {
QCD::WilsonKernelsStatic::Comms = QCD::WilsonKernelsStatic::CommsThenCompute;
}
if( GridCmdOptionExists(*argv,*argv+*argc,"--comms-concurrent") ){
CartesianCommunicator::SetCommunicatorPolicy(CartesianCommunicator::CommunicatorPolicyConcurrent);
}
if( GridCmdOptionExists(*argv,*argv+*argc,"--comms-sequential") ){
CartesianCommunicator::SetCommunicatorPolicy(CartesianCommunicator::CommunicatorPolicySequential);
} }
if( GridCmdOptionExists(*argv,*argv+*argc,"--lebesgue") ){ if( GridCmdOptionExists(*argv,*argv+*argc,"--lebesgue") ){
LebesgueOrder::UseLebesgueOrder=1; LebesgueOrder::UseLebesgueOrder=1;
} }
if( GridCmdOptionExists(*argv,*argv+*argc,"--cacheblocking") ){ if( GridCmdOptionExists(*argv,*argv+*argc,"--cacheblocking") ){
arg= GridCmdOptionPayload(*argv,*argv+*argc,"--cacheblocking"); arg= GridCmdOptionPayload(*argv,*argv+*argc,"--cacheblocking");
GridCmdOptionIntVector(arg,LebesgueOrder::Block); GridCmdOptionIntVector(arg,LebesgueOrder::Block);
} }
if( GridCmdOptionExists(*argv,*argv+*argc,"--notimestamp") ){ if( GridCmdOptionExists(*argv,*argv+*argc,"--notimestamp") ){
GridLogTimestamp(0); GridLogTimestamp(0);
} else { } else {
GridLogTimestamp(1); GridLogTimestamp(1);
} }
@ -374,7 +350,7 @@ void Grid_init(int *argc,char ***argv)
Grid_default_latt, Grid_default_latt,
Grid_default_mpi); Grid_default_mpi);
std::cout << GridLogDebug << "Requesting "<< CartesianCommunicator::MAX_MPI_SHM_BYTES <<" byte stencil comms buffers "<<std::endl; std::cout << GridLogMessage << "Requesting "<< CartesianCommunicator::MAX_MPI_SHM_BYTES <<" byte stencil comms buffers "<<std::endl;
if( GridCmdOptionExists(*argv,*argv+*argc,"--decomposition") ){ if( GridCmdOptionExists(*argv,*argv+*argc,"--decomposition") ){
std::cout<<GridLogMessage<<"Grid Decomposition\n"; std::cout<<GridLogMessage<<"Grid Decomposition\n";
@ -390,39 +366,30 @@ void Grid_init(int *argc,char ***argv)
Grid_is_initialised = 1; Grid_is_initialised = 1;
} }
void Grid_finalize(void) void Grid_finalize(void)
{ {
#if defined (GRID_COMMS_MPI) || defined (GRID_COMMS_MPI3) #if defined (GRID_COMMS_MPI) || defined (GRID_COMMS_MPI3)
MPI_Finalize(); MPI_Finalize();
Grid_unquiesce_nodes(); Grid_unquiesce_nodes();
#endif #endif
#if defined (GRID_COMMS_SHMEM)
shmem_finalize();
#endif
}
void GridLogLayout() {
std::cout << GridLogMessage << "Grid Layout\n";
std::cout << GridLogMessage << "\tGlobal lattice size : "<< GridCmdVectorIntToString(GridDefaultLatt()) << std::endl;
std::cout << GridLogMessage << "\tOpenMP threads : "<< GridThread::GetThreads() <<std::endl;
std::cout << GridLogMessage << "\tMPI tasks : "<< GridCmdVectorIntToString(GridDefaultMpi()) << std::endl;
} }
void * Grid_backtrace_buffer[_NBACKTRACE]; void * Grid_backtrace_buffer[_NBACKTRACE];
void Grid_sa_signal_handler(int sig,siginfo_t *si,void * ptr) void Grid_sa_signal_handler(int sig,siginfo_t *si,void * ptr)
{ {
fprintf(stderr,"Caught signal %d\n",si->si_signo); printf("Caught signal %d\n",si->si_signo);
fprintf(stderr," mem address %llx\n",(unsigned long long)si->si_addr); printf(" mem address %llx\n",(unsigned long long)si->si_addr);
fprintf(stderr," code %d\n",si->si_code); printf(" code %d\n",si->si_code);
// Linux/Posix // Linux/Posix
#ifdef __linux__ #ifdef __linux__
// And x86 64bit // And x86 64bit
#ifdef __x86_64__ #ifdef __x86_64__
ucontext_t * uc= (ucontext_t *)ptr; ucontext_t * uc= (ucontext_t *)ptr;
struct sigcontext *sc = (struct sigcontext *)&uc->uc_mcontext; struct sigcontext *sc = (struct sigcontext *)&uc->uc_mcontext;
fprintf(stderr," instruction %llx\n",(unsigned long long)sc->rip); printf(" instruction %llx\n",(unsigned long long)sc->rip);
#define REG(A) printf(" %s %lx\n",#A,sc-> A); #define REG(A) printf(" %s %lx\n",#A,sc-> A);
REG(rdi); REG(rdi);
REG(rsi); REG(rsi);
@ -445,11 +412,7 @@ void Grid_sa_signal_handler(int sig,siginfo_t *si,void * ptr)
REG(r15); REG(r15);
#endif #endif
#endif #endif
fflush(stderr); BACKTRACE();
BACKTRACEFP(stderr);
fprintf(stderr,"Called backtrace\n");
fflush(stdout);
fflush(stderr);
exit(0); exit(0);
return; return;
}; };
@ -462,12 +425,9 @@ void Grid_debug_handler_init(void)
sa.sa_flags = SA_SIGINFO; sa.sa_flags = SA_SIGINFO;
sigaction(SIGSEGV,&sa,NULL); sigaction(SIGSEGV,&sa,NULL);
sigaction(SIGTRAP,&sa,NULL); sigaction(SIGTRAP,&sa,NULL);
sigaction(SIGBUS,&sa,NULL);
feenableexcept( FE_INVALID|FE_OVERFLOW|FE_DIVBYZERO); feenableexcept( FE_INVALID|FE_OVERFLOW|FE_DIVBYZERO);
sigaction(SIGFPE,&sa,NULL); sigaction(SIGFPE,&sa,NULL);
sigaction(SIGKILL,&sa,NULL);
sigaction(SIGILL,&sa,NULL);
} }
} }

View File

@ -1,6 +1,6 @@
/************************************************************************************* /*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/Init.h Source file: ./lib/Init.h
@ -46,7 +46,6 @@ namespace Grid {
const int &GridThreads(void) ; const int &GridThreads(void) ;
void GridSetThreads(int t) ; void GridSetThreads(int t) ;
void GridLogTimestamp(int); void GridLogTimestamp(int);
void GridLogLayout();
// Common parsing chores // Common parsing chores
std::string GridCmdOptionPayload(char ** begin, char ** end, const std::string & option); std::string GridCmdOptionPayload(char ** begin, char ** end, const std::string & option);

View File

@ -29,11 +29,9 @@ See the full license in the file "LICENSE" in the top level distribution
directory directory
*************************************************************************************/ *************************************************************************************/
/* END LEGAL */ /* END LEGAL */
#include <Grid/GridCore.h> #include <Grid/Grid.h>
#include <Grid/util/CompilerCompatible.h>
#include <cxxabi.h> #include <cxxabi.h>
#include <memory>
namespace Grid { namespace Grid {

View File

@ -110,8 +110,8 @@ public:
friend std::ostream& operator<< (std::ostream& stream, Logger& log){ friend std::ostream& operator<< (std::ostream& stream, Logger& log){
if ( log.active ) { if ( log.active ) {
stream << log.background()<< std::setw(8) << std::left << log.topName << log.background()<< " : "; stream << log.background()<< std::setw(10) << std::left << log.topName << log.background()<< " : ";
stream << log.colour() << std::setw(10) << std::left << log.name << log.background() << " : "; stream << log.colour() << std::setw(14) << std::left << log.name << log.background() << " : ";
if ( log.timestamp ) { if ( log.timestamp ) {
StopWatch.Stop(); StopWatch.Stop();
GridTime now = StopWatch.Elapsed(); GridTime now = StopWatch.Elapsed();

BIN
lib/Old/Endeavour.tgz Normal file

Binary file not shown.

154
lib/Old/Tensor_peek.h Normal file
View File

@ -0,0 +1,154 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/Old/Tensor_peek.h
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_MATH_PEEK_H
#define GRID_MATH_PEEK_H
namespace Grid {
//////////////////////////////////////////////////////////////////////////////
// Peek on a specific index; returns a scalar in that index, tensor inherits rest
//////////////////////////////////////////////////////////////////////////////
// If we hit the right index, return scalar with no further recursion
//template<int Level> inline ComplexF peekIndex(const ComplexF arg) { return arg;}
//template<int Level> inline ComplexD peekIndex(const ComplexD arg) { return arg;}
//template<int Level> inline RealF peekIndex(const RealF arg) { return arg;}
//template<int Level> inline RealD peekIndex(const RealD arg) { return arg;}
#if 0
// Scalar peek, no indices
template<int Level,class vtype,typename std::enable_if< iScalar<vtype>::TensorLevel == Level >::type * =nullptr> inline
auto peekIndex(const iScalar<vtype> &arg) -> iScalar<vtype>
{
return arg;
}
// Vector peek, one index
template<int Level,class vtype,int N,typename std::enable_if< iScalar<vtype>::TensorLevel == Level >::type * =nullptr> inline
auto peekIndex(const iVector<vtype,N> &arg,int i) -> iScalar<vtype> // Index matches
{
iScalar<vtype> ret; // return scalar
ret._internal = arg._internal[i];
return ret;
}
// Matrix peek, two indices
template<int Level,class vtype,int N,typename std::enable_if< iScalar<vtype>::TensorLevel == Level >::type * =nullptr> inline
auto peekIndex(const iMatrix<vtype,N> &arg,int i,int j) -> iScalar<vtype>
{
iScalar<vtype> ret; // return scalar
ret._internal = arg._internal[i][j];
return ret;
}
/////////////
// No match peek for scalar,vector,matrix must forward on either 0,1,2 args. Must have 9 routines with notvalue
/////////////
// scalar
template<int Level,class vtype,typename std::enable_if< iScalar<vtype>::TensorLevel != Level >::type * =nullptr> inline
auto peekIndex(const iScalar<vtype> &arg) -> iScalar<decltype(peekIndex<Level>(arg._internal))>
{
iScalar<decltype(peekIndex<Level>(arg._internal))> ret;
ret._internal= peekIndex<Level>(arg._internal);
return ret;
}
template<int Level,class vtype, typename std::enable_if< iScalar<vtype>::TensorLevel != Level >::type * =nullptr> inline
auto peekIndex(const iScalar<vtype> &arg,int i) -> iScalar<decltype(peekIndex<Level>(arg._internal,i))>
{
iScalar<decltype(peekIndex<Level>(arg._internal,i))> ret;
ret._internal=peekIndex<Level>(arg._internal,i);
return ret;
}
template<int Level,class vtype, typename std::enable_if< iScalar<vtype>::TensorLevel != Level >::type * =nullptr> inline
auto peekIndex(const iScalar<vtype> &arg,int i,int j) -> iScalar<decltype(peekIndex<Level>(arg._internal,i,j))>
{
iScalar<decltype(peekIndex<Level>(arg._internal,i,j))> ret;
ret._internal=peekIndex<Level>(arg._internal,i,j);
return ret;
}
// vector
template<int Level,class vtype,int N, typename std::enable_if< iScalar<vtype>::TensorLevel != Level >::type * =nullptr> inline
auto peekIndex(const iVector<vtype,N> &arg) -> iVector<decltype(peekIndex<Level>(arg._internal[0])),N>
{
iVector<decltype(peekIndex<Level>(arg._internal[0])),N> ret;
for(int ii=0;ii<N;ii++){
ret._internal[ii]=peekIndex<Level>(arg._internal[ii]);
}
return ret;
}
template<int Level,class vtype,int N, typename std::enable_if< iScalar<vtype>::TensorLevel != Level >::type * =nullptr> inline
auto peekIndex(const iVector<vtype,N> &arg,int i) -> iVector<decltype(peekIndex<Level>(arg._internal[0],i)),N>
{
iVector<decltype(peekIndex<Level>(arg._internal[0],i)),N> ret;
for(int ii=0;ii<N;ii++){
ret._internal[ii]=peekIndex<Level>(arg._internal[ii],i);
}
return ret;
}
template<int Level,class vtype,int N, typename std::enable_if< iScalar<vtype>::TensorLevel != Level >::type * =nullptr> inline
auto peekIndex(const iVector<vtype,N> &arg,int i,int j) -> iVector<decltype(peekIndex<Level>(arg._internal[0],i,j)),N>
{
iVector<decltype(peekIndex<Level>(arg._internal[0],i,j)),N> ret;
for(int ii=0;ii<N;ii++){
ret._internal[ii]=peekIndex<Level>(arg._internal[ii],i,j);
}
return ret;
}
// matrix
template<int Level,class vtype,int N, typename std::enable_if< iScalar<vtype>::TensorLevel != Level >::type * =nullptr> inline
auto peekIndex(const iMatrix<vtype,N> &arg) -> iMatrix<decltype(peekIndex<Level>(arg._internal[0][0])),N>
{
iMatrix<decltype(peekIndex<Level>(arg._internal[0][0])),N> ret;
for(int ii=0;ii<N;ii++){
for(int jj=0;jj<N;jj++){
ret._internal[ii][jj]=peekIndex<Level>(arg._internal[ii][jj]);// Could avoid this because peeking a scalar is dumb
}}
return ret;
}
template<int Level,class vtype,int N, typename std::enable_if< iScalar<vtype>::TensorLevel != Level >::type * =nullptr> inline
auto peekIndex(const iMatrix<vtype,N> &arg,int i) -> iMatrix<decltype(peekIndex<Level>(arg._internal[0][0],i)),N>
{
iMatrix<decltype(peekIndex<Level>(arg._internal[0][0],i)),N> ret;
for(int ii=0;ii<N;ii++){
for(int jj=0;jj<N;jj++){
ret._internal[ii][jj]=peekIndex<Level>(arg._internal[ii][jj],i);
}}
return ret;
}
template<int Level,class vtype,int N, typename std::enable_if< iScalar<vtype>::TensorLevel != Level >::type * =nullptr> inline
auto peekIndex(const iMatrix<vtype,N> &arg,int i,int j) -> iMatrix<decltype(peekIndex<Level>(arg._internal[0][0],i,j)),N>
{
iMatrix<decltype(peekIndex<Level>(arg._internal[0][0],i,j)),N> ret;
for(int ii=0;ii<N;ii++){
for(int jj=0;jj<N;jj++){
ret._internal[ii][jj]=peekIndex<Level>(arg._internal[ii][jj],i,j);
}}
return ret;
}
#endif
}
#endif

127
lib/Old/Tensor_poke.h Normal file
View File

@ -0,0 +1,127 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/Old/Tensor_poke.h
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_MATH_POKE_H
#define GRID_MATH_POKE_H
namespace Grid {
//////////////////////////////////////////////////////////////////////////////
// Poke a specific index;
//////////////////////////////////////////////////////////////////////////////
#if 0
// Scalar poke
template<int Level,class vtype,typename std::enable_if< iScalar<vtype>::TensorLevel == Level >::type * =nullptr> inline
void pokeIndex(iScalar<vtype> &ret, const iScalar<vtype> &arg)
{
ret._internal = arg._internal;
}
// Vector poke, one index
template<int Level,class vtype,int N,typename std::enable_if< iScalar<vtype>::TensorLevel == Level >::type * =nullptr> inline
void pokeIndex(iVector<vtype,N> &ret, const iScalar<vtype> &arg,int i)
{
ret._internal[i] = arg._internal;
}
//Matrix poke, two indices
template<int Level,class vtype,int N,typename std::enable_if< iScalar<vtype>::TensorLevel == Level >::type * =nullptr> inline
void pokeIndex(iMatrix<vtype,N> &ret, const iScalar<vtype> &arg,int i,int j)
{
ret._internal[i][j] = arg._internal;
}
/////////////
// No match poke for scalar,vector,matrix must forward on either 0,1,2 args. Must have 9 routines with notvalue
/////////////
// scalar
template<int Level,class vtype,typename std::enable_if< iScalar<vtype>::TensorLevel != Level >::type * =nullptr> inline
void pokeIndex(iScalar<vtype> &ret, const iScalar<decltype(peekIndex<Level>(ret._internal))> &arg)
{
pokeIndex<Level>(ret._internal,arg._internal);
}
template<int Level,class vtype,typename std::enable_if< iScalar<vtype>::TensorLevel != Level >::type * =nullptr> inline
void pokeIndex(iScalar<vtype> &ret, const iScalar<decltype(peekIndex<Level>(ret._internal,0))> &arg, int i)
{
pokeIndex<Level>(ret._internal,arg._internal,i);
}
template<int Level,class vtype,typename std::enable_if< iScalar<vtype>::TensorLevel != Level >::type * =nullptr> inline
void pokeIndex(iScalar<vtype> &ret, const iScalar<decltype(peekIndex<Level>(ret._internal,0,0))> &arg,int i,int j)
{
pokeIndex<Level>(ret._internal,arg._internal,i,j);
}
// Vector
template<int Level,class vtype,int N,typename std::enable_if< iScalar<vtype>::TensorLevel != Level >::type * =nullptr> inline
void pokeIndex(iVector<vtype,N> &ret, iVector<decltype(peekIndex<Level>(ret._internal)),N> &arg)
{
for(int ii=0;ii<N;ii++){
pokeIndex<Level>(ret._internal[ii],arg._internal[ii]);
}
}
template<int Level,class vtype,int N,typename std::enable_if< iScalar<vtype>::TensorLevel != Level >::type * =nullptr> inline
void pokeIndex(iVector<vtype,N> &ret, const iVector<decltype(peekIndex<Level>(ret._internal,0)),N> &arg,int i)
{
for(int ii=0;ii<N;ii++){
pokeIndex<Level>(ret._internal[ii],arg._internal[ii],i);
}
}
template<int Level,class vtype,int N,typename std::enable_if< iScalar<vtype>::TensorLevel != Level >::type * =nullptr> inline
void pokeIndex(iVector<vtype,N> &ret, const iVector<decltype(peekIndex<Level>(ret._internal,0,0)),N> &arg,int i,int j)
{
for(int ii=0;ii<N;ii++){
pokeIndex<Level>(ret._internal[ii],arg._internal[ii],i,j);
}
}
// Matrix
template<int Level,class vtype,int N,typename std::enable_if< iScalar<vtype>::TensorLevel != Level >::type * =nullptr> inline
void pokeIndex(iMatrix<vtype,N> &ret, const iMatrix<decltype(peekIndex<Level>(ret._internal)),N> &arg)
{
for(int ii=0;ii<N;ii++){
for(int jj=0;jj<N;jj++){
pokeIndex<Level>(ret._internal[ii][jj],arg._internal[ii][jj]);
}}
}
template<int Level,class vtype,int N,typename std::enable_if< iScalar<vtype>::TensorLevel != Level >::type * =nullptr> inline
void pokeIndex(iMatrix<vtype,N> &ret, const iMatrix<decltype(peekIndex<Level>(ret._internal,0)),N> &arg,int i)
{
for(int ii=0;ii<N;ii++){
for(int jj=0;jj<N;jj++){
pokeIndex<Level>(ret._internal[ii][jj],arg._internal[ii][jj],i);
}}
}
template<int Level,class vtype,int N,typename std::enable_if< iScalar<vtype>::TensorLevel != Level >::type * =nullptr> inline
void pokeIndex(iMatrix<vtype,N> &ret, const iMatrix<decltype(peekIndex<Level>(ret._internal,0,0)),N> &arg, int i,int j)
{
for(int ii=0;ii<N;ii++){
for(int jj=0;jj<N;jj++){
pokeIndex<Level>(ret._internal[ii][jj],arg._internal[ii][jj],i,j);
}}
}
#endif
}
#endif

View File

@ -26,8 +26,8 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
*************************************************************************************/ *************************************************************************************/
/* END LEGAL */ /* END LEGAL */
#include <Grid/GridCore.h> #include <Grid/Grid.h>
#include <Grid/perfmon/PerfCount.h> #include <Grid/PerfCount.h>
namespace Grid { namespace Grid {

View File

@ -172,7 +172,7 @@ public:
const char * name = PerformanceCounterConfigs[PCT].name; const char * name = PerformanceCounterConfigs[PCT].name;
fd = perf_event_open(&pe, 0, -1, -1, 0); // pid 0, cpu -1 current process any cpu. group -1 fd = perf_event_open(&pe, 0, -1, -1, 0); // pid 0, cpu -1 current process any cpu. group -1
if (fd == -1) { if (fd == -1) {
fprintf(stderr, "Error opening leader %llx for event %s\n",(long long) pe.config,name); fprintf(stderr, "Error opening leader %llx for event %s\n", pe.config,name);
perror("Error is"); perror("Error is");
} }
int norm = PerformanceCounterConfigs[PCT].normalisation; int norm = PerformanceCounterConfigs[PCT].normalisation;
@ -181,7 +181,7 @@ public:
name = PerformanceCounterConfigs[norm].name; name = PerformanceCounterConfigs[norm].name;
cyclefd = perf_event_open(&pe, 0, -1, -1, 0); // pid 0, cpu -1 current process any cpu. group -1 cyclefd = perf_event_open(&pe, 0, -1, -1, 0); // pid 0, cpu -1 current process any cpu. group -1
if (cyclefd == -1) { if (cyclefd == -1) {
fprintf(stderr, "Error opening leader %llx for event %s\n",(long long) pe.config,name); fprintf(stderr, "Error opening leader %llx for event %s\n", pe.config,name);
perror("Error is"); perror("Error is");
} }
#endif #endif
@ -205,14 +205,13 @@ public:
void Stop(void) { void Stop(void) {
count=0; count=0;
cycles=0; cycles=0;
size_t ign;
#ifdef __linux__ #ifdef __linux__
ssize_t ign;
if ( fd!= -1) { if ( fd!= -1) {
::ioctl(fd, PERF_EVENT_IOC_DISABLE, 0); ::ioctl(fd, PERF_EVENT_IOC_DISABLE, 0);
::ioctl(cyclefd, PERF_EVENT_IOC_DISABLE, 0); ::ioctl(cyclefd, PERF_EVENT_IOC_DISABLE, 0);
ign=::read(fd, &count, sizeof(long long)); ign=::read(fd, &count, sizeof(long long));
ign+=::read(cyclefd, &cycles, sizeof(long long)); ign=::read(cyclefd, &cycles, sizeof(long long));
assert(ign=2*sizeof(long long));
} }
elapsed = cyclecount() - begin; elapsed = cyclecount() - begin;
#else #else

View File

@ -172,8 +172,8 @@ namespace Grid {
}; };
#include <Grid/simd/Grid_vector_types.h> #include "simd/Grid_vector_types.h"
#include <Grid/simd/Grid_vector_unops.h> #include "simd/Grid_vector_unops.h"
namespace Grid { namespace Grid {
// Default precision // Default precision

View File

@ -1,9 +1,11 @@
#include <Grid/GridCore.h> #include <Grid/Grid.h>
#include <Grid/perfmon/PerfCount.h> #include <Grid/PerfCount.h>
#include <Grid/perfmon/Stat.h> #include <Grid/Stat.h>
namespace Grid { namespace Grid {
bool PmuStat::pmu_initialized=false; bool PmuStat::pmu_initialized=false;

File diff suppressed because it is too large Load Diff

View File

@ -37,9 +37,13 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
#ifdef GRID_OMP #ifdef GRID_OMP
#include <omp.h> #include <omp.h>
#ifdef GRID_NUMA
#define PARALLEL_FOR_LOOP _Pragma("omp parallel for schedule(static)") #define PARALLEL_FOR_LOOP _Pragma("omp parallel for schedule(static)")
#define PARALLEL_FOR_LOOP_INTERN _Pragma("omp for schedule(static)") #define PARALLEL_FOR_LOOP_INTERN _Pragma("omp for schedule(static)")
#else
#define PARALLEL_FOR_LOOP _Pragma("omp parallel for schedule(runtime)")
#define PARALLEL_FOR_LOOP_INTERN _Pragma("omp for schedule(runtime)")
#endif
#define PARALLEL_NESTED_LOOP2 _Pragma("omp parallel for collapse(2)") #define PARALLEL_NESTED_LOOP2 _Pragma("omp parallel for collapse(2)")
#define PARALLEL_REGION _Pragma("omp parallel") #define PARALLEL_REGION _Pragma("omp parallel")
#define PARALLEL_CRITICAL _Pragma("omp critical") #define PARALLEL_CRITICAL _Pragma("omp critical")
@ -51,9 +55,6 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
#define PARALLEL_CRITICAL #define PARALLEL_CRITICAL
#endif #endif
#define parallel_for PARALLEL_FOR_LOOP for
#define parallel_for_nest2 PARALLEL_NESTED_LOOP2 for
namespace Grid { namespace Grid {
// Introduce a class to gain deterministic bit reproducible reduction. // Introduce a class to gain deterministic bit reproducible reduction.

View File

@ -267,7 +267,8 @@ namespace Grid {
SimpleCompressor<siteVector> compressor; SimpleCompressor<siteVector> compressor;
Stencil.HaloExchange(in,compressor); Stencil.HaloExchange(in,compressor);
parallel_for(int ss=0;ss<Grid()->oSites();ss++){ PARALLEL_FOR_LOOP
for(int ss=0;ss<Grid()->oSites();ss++){
siteVector res = zero; siteVector res = zero;
siteVector nbr; siteVector nbr;
int ptype; int ptype;
@ -379,7 +380,8 @@ namespace Grid {
Subspace.ProjectToSubspace(oProj,oblock); Subspace.ProjectToSubspace(oProj,oblock);
// blockProject(iProj,iblock,Subspace.subspace); // blockProject(iProj,iblock,Subspace.subspace);
// blockProject(oProj,oblock,Subspace.subspace); // blockProject(oProj,oblock,Subspace.subspace);
parallel_for(int ss=0;ss<Grid()->oSites();ss++){ PARALLEL_FOR_LOOP
for(int ss=0;ss<Grid()->oSites();ss++){
for(int j=0;j<nbasis;j++){ for(int j=0;j<nbasis;j++){
if( disp!= 0 ) { if( disp!= 0 ) {
A[p]._odata[ss](j,i) = oProj._odata[ss](j); A[p]._odata[ss](j,i) = oProj._odata[ss](j);
@ -425,7 +427,7 @@ namespace Grid {
A[p]=zero; A[p]=zero;
} }
GridParallelRNG RNG(Grid()); RNG.SeedFixedIntegers(std::vector<int>({55,72,19,17,34})); GridParallelRNG RNG(Grid()); RNG.SeedRandomDevice();
Lattice<iScalar<CComplex> > val(Grid()); random(RNG,val); Lattice<iScalar<CComplex> > val(Grid()); random(RNG,val);
Complex one(1.0); Complex one(1.0);

View File

@ -235,7 +235,7 @@ namespace Grid {
Field tmp(in._grid); Field tmp(in._grid);
_Mat.MeooeDag(in,tmp); _Mat.MeooeDag(in,tmp);
_Mat.MooeeInvDag(tmp,out); _Mat.MooeeInvDag(tmp,out);
_Mat.MeooeDag(out,tmp); _Mat.MeooeDag(out,tmp);
_Mat.MooeeDag(in,out); _Mat.MooeeDag(in,out);

View File

View File

@ -197,9 +197,8 @@ namespace Grid {
void operator() (LinearOperatorBase<Field> &Linop, const Field &in, Field &out) { void operator() (LinearOperatorBase<Field> &Linop, const Field &in, Field &out) {
GridBase *grid=in._grid; GridBase *grid=in._grid;
//std::cout << "Chevyshef(): in._grid="<<in._grid<<std::endl;
// std::cout << "Chevyshef(): in._grid="<<in._grid<<std::endl; //<<" Linop.Grid()="<<Linop.Grid()<<"Linop.RedBlackGrid()="<<Linop.RedBlackGrid()<<std::endl;
//std::cout <<" Linop.Grid()="<<Linop.Grid()<<"Linop.RedBlackGrid()="<<Linop.RedBlackGrid()<<std::endl;
int vol=grid->gSites(); int vol=grid->gSites();

View File

@ -25,7 +25,7 @@ Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
See the full license in the file "LICENSE" in the top level distribution directory See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/ *************************************************************************************/
/* END LEGAL */ /* END LEGAL */
#include <Grid/GridCore.h> #include <Grid/Grid.h>
namespace Grid { namespace Grid {
double MultiShiftFunction::approx(double x) double MultiShiftFunction::approx(double x)

View File

@ -16,7 +16,7 @@
#define INCLUDED_ALG_REMEZ_H #define INCLUDED_ALG_REMEZ_H
#include <stddef.h> #include <stddef.h>
#include <Grid/GridStd.h> #include <Config.h>
#ifdef HAVE_LIBGMP #ifdef HAVE_LIBGMP
#include "bigfloat.h" #include "bigfloat.h"

View File

@ -1,593 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/algorithms/iterative/BlockConjugateGradient.h
Copyright (C) 2017
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_BLOCK_CONJUGATE_GRADIENT_H
#define GRID_BLOCK_CONJUGATE_GRADIENT_H
namespace Grid {
enum BlockCGtype { BlockCG, BlockCGrQ, CGmultiRHS };
//////////////////////////////////////////////////////////////////////////
// Block conjugate gradient. Dimension zero should be the block direction
//////////////////////////////////////////////////////////////////////////
template <class Field>
class BlockConjugateGradient : public OperatorFunction<Field> {
public:
typedef typename Field::scalar_type scomplex;
int blockDim ;
int Nblock;
BlockCGtype CGtype;
bool ErrorOnNoConverge; // throw an assert when the CG fails to converge.
// Defaults true.
RealD Tolerance;
Integer MaxIterations;
Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion
BlockConjugateGradient(BlockCGtype cgtype,int _Orthog,RealD tol, Integer maxit, bool err_on_no_conv = true)
: Tolerance(tol), CGtype(cgtype), blockDim(_Orthog), MaxIterations(maxit), ErrorOnNoConverge(err_on_no_conv)
{};
////////////////////////////////////////////////////////////////////////////////////////////////////
// Thin QR factorisation (google it)
////////////////////////////////////////////////////////////////////////////////////////////////////
void ThinQRfact (Eigen::MatrixXcd &m_rr,
Eigen::MatrixXcd &C,
Eigen::MatrixXcd &Cinv,
Field & Q,
const Field & R)
{
int Orthog = blockDim; // First dimension is block dim; this is an assumption
////////////////////////////////////////////////////////////////////////////////////////////////////
//Dimensions
// R_{ferm x Nblock} = Q_{ferm x Nblock} x C_{Nblock x Nblock} -> ferm x Nblock
//
// Rdag R = m_rr = Herm = L L^dag <-- Cholesky decomposition (LLT routine in Eigen)
//
// Q C = R => Q = R C^{-1}
//
// Want Ident = Q^dag Q = C^{-dag} R^dag R C^{-1} = C^{-dag} L L^dag C^{-1} = 1_{Nblock x Nblock}
//
// Set C = L^{dag}, and then Q^dag Q = ident
//
// Checks:
// Cdag C = Rdag R ; passes.
// QdagQ = 1 ; passes
////////////////////////////////////////////////////////////////////////////////////////////////////
sliceInnerProductMatrix(m_rr,R,R,Orthog);
////////////////////////////////////////////////////////////////////////////////////////////////////
// Cholesky from Eigen
// There exists a ldlt that is documented as more stable
////////////////////////////////////////////////////////////////////////////////////////////////////
Eigen::MatrixXcd L = m_rr.llt().matrixL();
C = L.adjoint();
Cinv = C.inverse();
////////////////////////////////////////////////////////////////////////////////////////////////////
// Q = R C^{-1}
//
// Q_j = R_i Cinv(i,j)
//
// NB maddMatrix conventions are Right multiplication X[j] a[j,i] already
////////////////////////////////////////////////////////////////////////////////////////////////////
// FIXME:: make a sliceMulMatrix to avoid zero vector
sliceMulMatrix(Q,Cinv,R,Orthog);
}
////////////////////////////////////////////////////////////////////////////////////////////////////
// Call one of several implementations
////////////////////////////////////////////////////////////////////////////////////////////////////
void operator()(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi)
{
if ( CGtype == BlockCGrQ ) {
BlockCGrQsolve(Linop,Src,Psi);
} else if (CGtype == BlockCG ) {
BlockCGsolve(Linop,Src,Psi);
} else if (CGtype == CGmultiRHS ) {
CGmultiRHSsolve(Linop,Src,Psi);
} else {
assert(0);
}
}
////////////////////////////////////////////////////////////////////////////
// BlockCGrQ implementation:
//--------------------------
// X is guess/Solution
// B is RHS
// Solve A X_i = B_i ; i refers to Nblock index
////////////////////////////////////////////////////////////////////////////
void BlockCGrQsolve(LinearOperatorBase<Field> &Linop, const Field &B, Field &X)
{
int Orthog = blockDim; // First dimension is block dim; this is an assumption
Nblock = B._grid->_fdimensions[Orthog];
std::cout<<GridLogMessage<<" Block Conjugate Gradient : Orthog "<<Orthog<<" Nblock "<<Nblock<<std::endl;
X.checkerboard = B.checkerboard;
conformable(X, B);
Field tmp(B);
Field Q(B);
Field D(B);
Field Z(B);
Field AD(B);
Eigen::MatrixXcd m_DZ = Eigen::MatrixXcd::Identity(Nblock,Nblock);
Eigen::MatrixXcd m_M = Eigen::MatrixXcd::Identity(Nblock,Nblock);
Eigen::MatrixXcd m_rr = Eigen::MatrixXcd::Zero(Nblock,Nblock);
Eigen::MatrixXcd m_C = Eigen::MatrixXcd::Zero(Nblock,Nblock);
Eigen::MatrixXcd m_Cinv = Eigen::MatrixXcd::Zero(Nblock,Nblock);
Eigen::MatrixXcd m_S = Eigen::MatrixXcd::Zero(Nblock,Nblock);
Eigen::MatrixXcd m_Sinv = Eigen::MatrixXcd::Zero(Nblock,Nblock);
Eigen::MatrixXcd m_tmp = Eigen::MatrixXcd::Identity(Nblock,Nblock);
Eigen::MatrixXcd m_tmp1 = Eigen::MatrixXcd::Identity(Nblock,Nblock);
// Initial residual computation & set up
std::vector<RealD> residuals(Nblock);
std::vector<RealD> ssq(Nblock);
sliceNorm(ssq,B,Orthog);
RealD sssum=0;
for(int b=0;b<Nblock;b++) sssum+=ssq[b];
sliceNorm(residuals,B,Orthog);
for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); }
sliceNorm(residuals,X,Orthog);
for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); }
/************************************************************************
* Block conjugate gradient rQ (Sebastien Birk Thesis, after Dubrulle 2001)
************************************************************************
* Dimensions:
*
* X,B==(Nferm x Nblock)
* A==(Nferm x Nferm)
*
* Nferm = Nspin x Ncolour x Ncomplex x Nlattice_site
*
* QC = R = B-AX, D = Q ; QC => Thin QR factorisation (google it)
* for k:
* Z = AD
* M = [D^dag Z]^{-1}
* X = X + D MC
* QS = Q - ZM
* D = Q + D S^dag
* C = S C
*/
///////////////////////////////////////
// Initial block: initial search dir is guess
///////////////////////////////////////
std::cout << GridLogMessage<<"BlockCGrQ algorithm initialisation " <<std::endl;
//1. QC = R = B-AX, D = Q ; QC => Thin QR factorisation (google it)
Linop.HermOp(X, AD);
tmp = B - AD;
ThinQRfact (m_rr, m_C, m_Cinv, Q, tmp);
D=Q;
std::cout << GridLogMessage<<"BlockCGrQ computed initial residual and QR fact " <<std::endl;
///////////////////////////////////////
// Timers
///////////////////////////////////////
GridStopWatch sliceInnerTimer;
GridStopWatch sliceMaddTimer;
GridStopWatch QRTimer;
GridStopWatch MatrixTimer;
GridStopWatch SolverTimer;
SolverTimer.Start();
int k;
for (k = 1; k <= MaxIterations; k++) {
//3. Z = AD
MatrixTimer.Start();
Linop.HermOp(D, Z);
MatrixTimer.Stop();
//4. M = [D^dag Z]^{-1}
sliceInnerTimer.Start();
sliceInnerProductMatrix(m_DZ,D,Z,Orthog);
sliceInnerTimer.Stop();
m_M = m_DZ.inverse();
//5. X = X + D MC
m_tmp = m_M * m_C;
sliceMaddTimer.Start();
sliceMaddMatrix(X,m_tmp, D,X,Orthog);
sliceMaddTimer.Stop();
//6. QS = Q - ZM
sliceMaddTimer.Start();
sliceMaddMatrix(tmp,m_M,Z,Q,Orthog,-1.0);
sliceMaddTimer.Stop();
QRTimer.Start();
ThinQRfact (m_rr, m_S, m_Sinv, Q, tmp);
QRTimer.Stop();
//7. D = Q + D S^dag
m_tmp = m_S.adjoint();
sliceMaddTimer.Start();
sliceMaddMatrix(D,m_tmp,D,Q,Orthog);
sliceMaddTimer.Stop();
//8. C = S C
m_C = m_S*m_C;
/*********************
* convergence monitor
*********************
*/
m_rr = m_C.adjoint() * m_C;
RealD max_resid=0;
RealD rrsum=0;
RealD rr;
for(int b=0;b<Nblock;b++) {
rrsum+=real(m_rr(b,b));
rr = real(m_rr(b,b))/ssq[b];
if ( rr > max_resid ) max_resid = rr;
}
std::cout << GridLogIterative << "\titeration "<<k<<" rr_sum "<<rrsum<<" ssq_sum "<< sssum
<<" ave "<<std::sqrt(rrsum/sssum) << " max "<< max_resid <<std::endl;
if ( max_resid < Tolerance*Tolerance ) {
SolverTimer.Stop();
std::cout << GridLogMessage<<"BlockCGrQ converged in "<<k<<" iterations"<<std::endl;
for(int b=0;b<Nblock;b++){
std::cout << GridLogMessage<< "\t\tblock "<<b<<" computed resid "
<< std::sqrt(real(m_rr(b,b))/ssq[b])<<std::endl;
}
std::cout << GridLogMessage<<"\tMax residual is "<<std::sqrt(max_resid)<<std::endl;
Linop.HermOp(X, AD);
AD = AD-B;
std::cout << GridLogMessage <<"\t True residual is " << std::sqrt(norm2(AD)/norm2(B)) <<std::endl;
std::cout << GridLogMessage << "Time Breakdown "<<std::endl;
std::cout << GridLogMessage << "\tElapsed " << SolverTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tMatrix " << MatrixTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tInnerProd " << sliceInnerTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tMaddMatrix " << sliceMaddTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tThinQRfact " << QRTimer.Elapsed() <<std::endl;
IterationsToComplete = k;
return;
}
}
std::cout << GridLogMessage << "BlockConjugateGradient(rQ) did NOT converge" << std::endl;
if (ErrorOnNoConverge) assert(0);
IterationsToComplete = k;
}
//////////////////////////////////////////////////////////////////////////
// Block conjugate gradient; Original O'Leary Dimension zero should be the block direction
//////////////////////////////////////////////////////////////////////////
void BlockCGsolve(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi)
{
int Orthog = blockDim; // First dimension is block dim; this is an assumption
Nblock = Src._grid->_fdimensions[Orthog];
std::cout<<GridLogMessage<<" Block Conjugate Gradient : Orthog "<<Orthog<<" Nblock "<<Nblock<<std::endl;
Psi.checkerboard = Src.checkerboard;
conformable(Psi, Src);
Field P(Src);
Field AP(Src);
Field R(Src);
Eigen::MatrixXcd m_pAp = Eigen::MatrixXcd::Identity(Nblock,Nblock);
Eigen::MatrixXcd m_pAp_inv= Eigen::MatrixXcd::Identity(Nblock,Nblock);
Eigen::MatrixXcd m_rr = Eigen::MatrixXcd::Zero(Nblock,Nblock);
Eigen::MatrixXcd m_rr_inv = Eigen::MatrixXcd::Zero(Nblock,Nblock);
Eigen::MatrixXcd m_alpha = Eigen::MatrixXcd::Zero(Nblock,Nblock);
Eigen::MatrixXcd m_beta = Eigen::MatrixXcd::Zero(Nblock,Nblock);
// Initial residual computation & set up
std::vector<RealD> residuals(Nblock);
std::vector<RealD> ssq(Nblock);
sliceNorm(ssq,Src,Orthog);
RealD sssum=0;
for(int b=0;b<Nblock;b++) sssum+=ssq[b];
sliceNorm(residuals,Src,Orthog);
for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); }
sliceNorm(residuals,Psi,Orthog);
for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); }
// Initial search dir is guess
Linop.HermOp(Psi, AP);
/************************************************************************
* Block conjugate gradient (Stephen Pickles, thesis 1995, pp 71, O Leary 1980)
************************************************************************
* O'Leary : R = B - A X
* O'Leary : P = M R ; preconditioner M = 1
* O'Leary : alpha = PAP^{-1} RMR
* O'Leary : beta = RMR^{-1}_old RMR_new
* O'Leary : X=X+Palpha
* O'Leary : R_new=R_old-AP alpha
* O'Leary : P=MR_new+P beta
*/
R = Src - AP;
P = R;
sliceInnerProductMatrix(m_rr,R,R,Orthog);
GridStopWatch sliceInnerTimer;
GridStopWatch sliceMaddTimer;
GridStopWatch MatrixTimer;
GridStopWatch SolverTimer;
SolverTimer.Start();
int k;
for (k = 1; k <= MaxIterations; k++) {
RealD rrsum=0;
for(int b=0;b<Nblock;b++) rrsum+=real(m_rr(b,b));
std::cout << GridLogIterative << "\titeration "<<k<<" rr_sum "<<rrsum<<" ssq_sum "<< sssum
<<" / "<<std::sqrt(rrsum/sssum) <<std::endl;
MatrixTimer.Start();
Linop.HermOp(P, AP);
MatrixTimer.Stop();
// Alpha
sliceInnerTimer.Start();
sliceInnerProductMatrix(m_pAp,P,AP,Orthog);
sliceInnerTimer.Stop();
m_pAp_inv = m_pAp.inverse();
m_alpha = m_pAp_inv * m_rr ;
// Psi, R update
sliceMaddTimer.Start();
sliceMaddMatrix(Psi,m_alpha, P,Psi,Orthog); // add alpha * P to psi
sliceMaddMatrix(R ,m_alpha,AP, R,Orthog,-1.0);// sub alpha * AP to resid
sliceMaddTimer.Stop();
// Beta
m_rr_inv = m_rr.inverse();
sliceInnerTimer.Start();
sliceInnerProductMatrix(m_rr,R,R,Orthog);
sliceInnerTimer.Stop();
m_beta = m_rr_inv *m_rr;
// Search update
sliceMaddTimer.Start();
sliceMaddMatrix(AP,m_beta,P,R,Orthog);
sliceMaddTimer.Stop();
P= AP;
/*********************
* convergence monitor
*********************
*/
RealD max_resid=0;
RealD rr;
for(int b=0;b<Nblock;b++){
rr = real(m_rr(b,b))/ssq[b];
if ( rr > max_resid ) max_resid = rr;
}
if ( max_resid < Tolerance*Tolerance ) {
SolverTimer.Stop();
std::cout << GridLogMessage<<"BlockCG converged in "<<k<<" iterations"<<std::endl;
for(int b=0;b<Nblock;b++){
std::cout << GridLogMessage<< "\t\tblock "<<b<<" computed resid "
<< std::sqrt(real(m_rr(b,b))/ssq[b])<<std::endl;
}
std::cout << GridLogMessage<<"\tMax residual is "<<std::sqrt(max_resid)<<std::endl;
Linop.HermOp(Psi, AP);
AP = AP-Src;
std::cout << GridLogMessage <<"\t True residual is " << std::sqrt(norm2(AP)/norm2(Src)) <<std::endl;
std::cout << GridLogMessage << "Time Breakdown "<<std::endl;
std::cout << GridLogMessage << "\tElapsed " << SolverTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tMatrix " << MatrixTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tInnerProd " << sliceInnerTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tMaddMatrix " << sliceMaddTimer.Elapsed() <<std::endl;
IterationsToComplete = k;
return;
}
}
std::cout << GridLogMessage << "BlockConjugateGradient did NOT converge" << std::endl;
if (ErrorOnNoConverge) assert(0);
IterationsToComplete = k;
}
//////////////////////////////////////////////////////////////////////////
// multiRHS conjugate gradient. Dimension zero should be the block direction
// Use this for spread out across nodes
//////////////////////////////////////////////////////////////////////////
void CGmultiRHSsolve(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi)
{
int Orthog = blockDim; // First dimension is block dim
Nblock = Src._grid->_fdimensions[Orthog];
std::cout<<GridLogMessage<<"MultiRHS Conjugate Gradient : Orthog "<<Orthog<<" Nblock "<<Nblock<<std::endl;
Psi.checkerboard = Src.checkerboard;
conformable(Psi, Src);
Field P(Src);
Field AP(Src);
Field R(Src);
std::vector<ComplexD> v_pAp(Nblock);
std::vector<RealD> v_rr (Nblock);
std::vector<RealD> v_rr_inv(Nblock);
std::vector<RealD> v_alpha(Nblock);
std::vector<RealD> v_beta(Nblock);
// Initial residual computation & set up
std::vector<RealD> residuals(Nblock);
std::vector<RealD> ssq(Nblock);
sliceNorm(ssq,Src,Orthog);
RealD sssum=0;
for(int b=0;b<Nblock;b++) sssum+=ssq[b];
sliceNorm(residuals,Src,Orthog);
for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); }
sliceNorm(residuals,Psi,Orthog);
for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); }
// Initial search dir is guess
Linop.HermOp(Psi, AP);
R = Src - AP;
P = R;
sliceNorm(v_rr,R,Orthog);
GridStopWatch sliceInnerTimer;
GridStopWatch sliceMaddTimer;
GridStopWatch sliceNormTimer;
GridStopWatch MatrixTimer;
GridStopWatch SolverTimer;
SolverTimer.Start();
int k;
for (k = 1; k <= MaxIterations; k++) {
RealD rrsum=0;
for(int b=0;b<Nblock;b++) rrsum+=real(v_rr[b]);
std::cout << GridLogIterative << "\titeration "<<k<<" rr_sum "<<rrsum<<" ssq_sum "<< sssum
<<" / "<<std::sqrt(rrsum/sssum) <<std::endl;
MatrixTimer.Start();
Linop.HermOp(P, AP);
MatrixTimer.Stop();
// Alpha
sliceInnerTimer.Start();
sliceInnerProductVector(v_pAp,P,AP,Orthog);
sliceInnerTimer.Stop();
for(int b=0;b<Nblock;b++){
v_alpha[b] = v_rr[b]/real(v_pAp[b]);
}
// Psi, R update
sliceMaddTimer.Start();
sliceMaddVector(Psi,v_alpha, P,Psi,Orthog); // add alpha * P to psi
sliceMaddVector(R ,v_alpha,AP, R,Orthog,-1.0);// sub alpha * AP to resid
sliceMaddTimer.Stop();
// Beta
for(int b=0;b<Nblock;b++){
v_rr_inv[b] = 1.0/v_rr[b];
}
sliceNormTimer.Start();
sliceNorm(v_rr,R,Orthog);
sliceNormTimer.Stop();
for(int b=0;b<Nblock;b++){
v_beta[b] = v_rr_inv[b] *v_rr[b];
}
// Search update
sliceMaddTimer.Start();
sliceMaddVector(P,v_beta,P,R,Orthog);
sliceMaddTimer.Stop();
/*********************
* convergence monitor
*********************
*/
RealD max_resid=0;
for(int b=0;b<Nblock;b++){
RealD rr = v_rr[b]/ssq[b];
if ( rr > max_resid ) max_resid = rr;
}
if ( max_resid < Tolerance*Tolerance ) {
SolverTimer.Stop();
std::cout << GridLogMessage<<"MultiRHS solver converged in " <<k<<" iterations"<<std::endl;
for(int b=0;b<Nblock;b++){
std::cout << GridLogMessage<< "\t\tBlock "<<b<<" computed resid "<< std::sqrt(v_rr[b]/ssq[b])<<std::endl;
}
std::cout << GridLogMessage<<"\tMax residual is "<<std::sqrt(max_resid)<<std::endl;
Linop.HermOp(Psi, AP);
AP = AP-Src;
std::cout <<GridLogMessage << "\tTrue residual is " << std::sqrt(norm2(AP)/norm2(Src)) <<std::endl;
std::cout << GridLogMessage << "Time Breakdown "<<std::endl;
std::cout << GridLogMessage << "\tElapsed " << SolverTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tMatrix " << MatrixTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tInnerProd " << sliceInnerTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tNorm " << sliceNormTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tMaddMatrix " << sliceMaddTimer.Elapsed() <<std::endl;
IterationsToComplete = k;
return;
}
}
std::cout << GridLogMessage << "MultiRHSConjugateGradient did NOT converge" << std::endl;
if (ErrorOnNoConverge) assert(0);
IterationsToComplete = k;
}
};
}
#endif

View File

@ -45,8 +45,6 @@ class ConjugateGradient : public OperatorFunction<Field> {
// Defaults true. // Defaults true.
RealD Tolerance; RealD Tolerance;
Integer MaxIterations; Integer MaxIterations;
Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion
ConjugateGradient(RealD tol, Integer maxit, bool err_on_no_conv = true) ConjugateGradient(RealD tol, Integer maxit, bool err_on_no_conv = true)
: Tolerance(tol), : Tolerance(tol),
MaxIterations(maxit), MaxIterations(maxit),
@ -78,12 +76,18 @@ class ConjugateGradient : public OperatorFunction<Field> {
cp = a; cp = a;
ssq = norm2(src); ssq = norm2(src);
std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradient: guess " << guess << std::endl; std::cout << GridLogIterative << std::setprecision(4)
std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradient: src " << ssq << std::endl; << "ConjugateGradient: guess " << guess << std::endl;
std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradient: mp " << d << std::endl; std::cout << GridLogIterative << std::setprecision(4)
std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradient: mmp " << b << std::endl; << "ConjugateGradient: src " << ssq << std::endl;
std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradient: cp,r " << cp << std::endl; std::cout << GridLogIterative << std::setprecision(4)
std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradient: p " << a << std::endl; << "ConjugateGradient: mp " << d << std::endl;
std::cout << GridLogIterative << std::setprecision(4)
<< "ConjugateGradient: mmp " << b << std::endl;
std::cout << GridLogIterative << std::setprecision(4)
<< "ConjugateGradient: cp,r " << cp << std::endl;
std::cout << GridLogIterative << std::setprecision(4)
<< "ConjugateGradient: p " << a << std::endl;
RealD rsq = Tolerance * Tolerance * ssq; RealD rsq = Tolerance * Tolerance * ssq;
@ -93,7 +97,8 @@ class ConjugateGradient : public OperatorFunction<Field> {
} }
std::cout << GridLogIterative << std::setprecision(4) std::cout << GridLogIterative << std::setprecision(4)
<< "ConjugateGradient: k=0 residual " << cp << " target " << rsq << std::endl; << "ConjugateGradient: k=0 residual " << cp << " target " << rsq
<< std::endl;
GridStopWatch LinalgTimer; GridStopWatch LinalgTimer;
GridStopWatch MatrixTimer; GridStopWatch MatrixTimer;
@ -123,11 +128,8 @@ class ConjugateGradient : public OperatorFunction<Field> {
p = p * b + r; p = p * b + r;
LinalgTimer.Stop(); LinalgTimer.Stop();
std::cout << GridLogIterative << "ConjugateGradient: Iteration " << k std::cout << GridLogIterative << "ConjugateGradient: Iteration " << k
<< " residual " << cp << " target " << rsq << std::endl; << " residual " << cp << " target " << rsq << std::endl;
std::cout << GridLogDebug << "a = "<< a << " b_pred = "<< b_pred << " b = "<< b << std::endl;
std::cout << GridLogDebug << "qq = "<< qq << " d = "<< d << " c = "<< c << std::endl;
// Stopping condition // Stopping condition
if (cp <= rsq) { if (cp <= rsq) {
@ -135,33 +137,31 @@ class ConjugateGradient : public OperatorFunction<Field> {
Linop.HermOpAndNorm(psi, mmp, d, qq); Linop.HermOpAndNorm(psi, mmp, d, qq);
p = mmp - src; p = mmp - src;
RealD mmpnorm = sqrt(norm2(mmp));
RealD psinorm = sqrt(norm2(psi));
RealD srcnorm = sqrt(norm2(src)); RealD srcnorm = sqrt(norm2(src));
RealD resnorm = sqrt(norm2(p)); RealD resnorm = sqrt(norm2(p));
RealD true_residual = resnorm / srcnorm; RealD true_residual = resnorm / srcnorm;
std::cout << GridLogMessage << "ConjugateGradient Converged on iteration " << k << std::endl; std::cout << GridLogMessage
std::cout << GridLogMessage << "\tComputed residual " << sqrt(cp / ssq)<<std::endl; << "ConjugateGradient: Converged on iteration " << k << std::endl;
std::cout << GridLogMessage << "\tTrue residual " << true_residual<<std::endl; std::cout << GridLogMessage << "Computed residual " << sqrt(cp / ssq)
std::cout << GridLogMessage << "\tTarget " << Tolerance << std::endl; << " true residual " << true_residual << " target "
<< Tolerance << std::endl;
std::cout << GridLogMessage << "Time breakdown "<<std::endl; std::cout << GridLogMessage << "Time elapsed: Iterations "
std::cout << GridLogMessage << "\tElapsed " << SolverTimer.Elapsed() <<std::endl; << SolverTimer.Elapsed() << " Matrix "
std::cout << GridLogMessage << "\tMatrix " << MatrixTimer.Elapsed() <<std::endl; << MatrixTimer.Elapsed() << " Linalg "
std::cout << GridLogMessage << "\tLinalg " << LinalgTimer.Elapsed() <<std::endl; << LinalgTimer.Elapsed();
std::cout << std::endl;
if (ErrorOnNoConverge) assert(true_residual / Tolerance < 10000.0); if (ErrorOnNoConverge) assert(true_residual / Tolerance < 10000.0);
IterationsToComplete = k;
return; return;
} }
} }
std::cout << GridLogMessage << "ConjugateGradient did NOT converge" std::cout << GridLogMessage << "ConjugateGradient did NOT converge"
<< std::endl; << std::endl;
if (ErrorOnNoConverge) assert(0); if (ErrorOnNoConverge) assert(0);
IterationsToComplete = k;
} }
}; };
} }

View File

@ -35,7 +35,6 @@ namespace Grid {
class MixedPrecisionConjugateGradient : public LinearFunction<FieldD> { class MixedPrecisionConjugateGradient : public LinearFunction<FieldD> {
public: public:
RealD Tolerance; RealD Tolerance;
RealD InnerTolerance; //Initial tolerance for inner CG. Defaults to Tolerance but can be changed
Integer MaxInnerIterations; Integer MaxInnerIterations;
Integer MaxOuterIterations; Integer MaxOuterIterations;
GridBase* SinglePrecGrid; //Grid for single-precision fields GridBase* SinglePrecGrid; //Grid for single-precision fields
@ -43,16 +42,12 @@ namespace Grid {
LinearOperatorBase<FieldF> &Linop_f; LinearOperatorBase<FieldF> &Linop_f;
LinearOperatorBase<FieldD> &Linop_d; LinearOperatorBase<FieldD> &Linop_d;
Integer TotalInnerIterations; //Number of inner CG iterations
Integer TotalOuterIterations; //Number of restarts
Integer TotalFinalStepIterations; //Number of CG iterations in final patch-up step
//Option to speed up *inner single precision* solves using a LinearFunction that produces a guess //Option to speed up *inner single precision* solves using a LinearFunction that produces a guess
LinearFunction<FieldF> *guesser; LinearFunction<FieldF> *guesser;
MixedPrecisionConjugateGradient(RealD tol, Integer maxinnerit, Integer maxouterit, GridBase* _sp_grid, LinearOperatorBase<FieldF> &_Linop_f, LinearOperatorBase<FieldD> &_Linop_d) : MixedPrecisionConjugateGradient(RealD tol, Integer maxinnerit, Integer maxouterit, GridBase* _sp_grid, LinearOperatorBase<FieldF> &_Linop_f, LinearOperatorBase<FieldD> &_Linop_d) :
Linop_f(_Linop_f), Linop_d(_Linop_d), Linop_f(_Linop_f), Linop_d(_Linop_d),
Tolerance(tol), InnerTolerance(tol), MaxInnerIterations(maxinnerit), MaxOuterIterations(maxouterit), SinglePrecGrid(_sp_grid), Tolerance(tol), MaxInnerIterations(maxinnerit), MaxOuterIterations(maxouterit), SinglePrecGrid(_sp_grid),
OuterLoopNormMult(100.), guesser(NULL){ }; OuterLoopNormMult(100.), guesser(NULL){ };
void useGuesser(LinearFunction<FieldF> &g){ void useGuesser(LinearFunction<FieldF> &g){
@ -60,8 +55,9 @@ namespace Grid {
} }
void operator() (const FieldD &src_d_in, FieldD &sol_d){ void operator() (const FieldD &src_d_in, FieldD &sol_d){
TotalInnerIterations = 0; (*this)(src_d_in,sol_d,NULL);
}
void operator() (const FieldD &src_d_in, FieldD &sol_d, RealD *shift){
GridStopWatch TotalTimer; GridStopWatch TotalTimer;
TotalTimer.Start(); TotalTimer.Start();
@ -81,7 +77,7 @@ namespace Grid {
FieldD src_d(DoublePrecGrid); FieldD src_d(DoublePrecGrid);
src_d = src_d_in; //source for next inner iteration, computed from residual during operation src_d = src_d_in; //source for next inner iteration, computed from residual during operation
RealD inner_tol = InnerTolerance; RealD inner_tol = Tolerance;
FieldF src_f(SinglePrecGrid); FieldF src_f(SinglePrecGrid);
src_f.checkerboard = cb; src_f.checkerboard = cb;
@ -89,18 +85,17 @@ namespace Grid {
FieldF sol_f(SinglePrecGrid); FieldF sol_f(SinglePrecGrid);
sol_f.checkerboard = cb; sol_f.checkerboard = cb;
ConjugateGradient<FieldF> CG_f(inner_tol, MaxInnerIterations); ConjugateGradientShifted<FieldF> CG_f(inner_tol, MaxInnerIterations);
CG_f.ErrorOnNoConverge = false; CG_f.ErrorOnNoConverge = false;
GridStopWatch InnerCGtimer; GridStopWatch InnerCGtimer;
GridStopWatch PrecChangeTimer; GridStopWatch PrecChangeTimer;
Integer &outer_iter = TotalOuterIterations; //so it will be equal to the final iteration count for(Integer outer_iter = 0; outer_iter < MaxOuterIterations; outer_iter++){
for(outer_iter = 0; outer_iter < MaxOuterIterations; outer_iter++){
//Compute double precision rsd and also new RHS vector. //Compute double precision rsd and also new RHS vector.
Linop_d.HermOp(sol_d, tmp_d); Linop_d.HermOp(sol_d, tmp_d);
if(shift) axpy(tmp_d,*shift,sol_d,tmp_d);
RealD norm = axpy_norm(src_d, -1., tmp_d, src_d_in); //src_d is residual vector RealD norm = axpy_norm(src_d, -1., tmp_d, src_d_in); //src_d is residual vector
std::cout<<GridLogMessage<<"MixedPrecisionConjugateGradient: Outer iteration " <<outer_iter<<" residual "<< norm<< " target "<< stop<<std::endl; std::cout<<GridLogMessage<<"MixedPrecisionConjugateGradient: Outer iteration " <<outer_iter<<" residual "<< norm<< " target "<< stop<<std::endl;
@ -124,9 +119,8 @@ namespace Grid {
//Inner CG //Inner CG
CG_f.Tolerance = inner_tol; CG_f.Tolerance = inner_tol;
InnerCGtimer.Start(); InnerCGtimer.Start();
CG_f(Linop_f, src_f, sol_f); CG_f(Linop_f, src_f, sol_f,shift);
InnerCGtimer.Stop(); InnerCGtimer.Stop();
TotalInnerIterations += CG_f.IterationsToComplete;
//Convert sol back to double and add to double prec solution //Convert sol back to double and add to double prec solution
PrecChangeTimer.Start(); PrecChangeTimer.Start();
@ -139,13 +133,11 @@ namespace Grid {
//Final trial CG //Final trial CG
std::cout<<GridLogMessage<<"MixedPrecisionConjugateGradient: Starting final patch-up double-precision solve"<<std::endl; std::cout<<GridLogMessage<<"MixedPrecisionConjugateGradient: Starting final patch-up double-precision solve"<<std::endl;
ConjugateGradient<FieldD> CG_d(Tolerance, MaxInnerIterations); ConjugateGradientShifted<FieldD> CG_d(Tolerance, MaxInnerIterations);
CG_d(Linop_d, src_d_in, sol_d); CG_d(Linop_d, src_d_in, sol_d,shift);
TotalFinalStepIterations = CG_d.IterationsToComplete;
TotalTimer.Stop(); TotalTimer.Stop();
std::cout<<GridLogMessage<<"MixedPrecisionConjugateGradient: Inner CG iterations " << TotalInnerIterations << " Restarts " << TotalOuterIterations << " Final CG iterations " << TotalFinalStepIterations << std::endl; std::cout<<GridLogMessage<<"MixedPrecisionConjugateGradient: Total " << TotalTimer.Elapsed() << " Precision change " << PrecChangeTimer.Elapsed() << " Inner CG total " << InnerCGtimer.Elapsed() << std::endl;
std::cout<<GridLogMessage<<"MixedPrecisionConjugateGradient: Total time " << TotalTimer.Elapsed() << " Precision change " << PrecChangeTimer.Elapsed() << " Inner CG total " << InnerCGtimer.Elapsed() << std::endl;
} }
}; };

View File

@ -45,6 +45,7 @@ public:
Integer MaxIterations; Integer MaxIterations;
int verbose; int verbose;
MultiShiftFunction shifts; MultiShiftFunction shifts;
int iter;
ConjugateGradientMultiShift(Integer maxit,MultiShiftFunction &_shifts) : ConjugateGradientMultiShift(Integer maxit,MultiShiftFunction &_shifts) :
MaxIterations(maxit), MaxIterations(maxit),
@ -60,6 +61,7 @@ void operator() (LinearOperatorBase<Field> &Linop, const Field &src, Field &psi)
std::vector<Field> results(nshift,grid); std::vector<Field> results(nshift,grid);
(*this)(Linop,src,results,psi); (*this)(Linop,src,results,psi);
} }
void operator() (LinearOperatorBase<Field> &Linop, const Field &src, std::vector<Field> &results, Field &psi) void operator() (LinearOperatorBase<Field> &Linop, const Field &src, std::vector<Field> &results, Field &psi)
{ {
int nshift = shifts.order; int nshift = shifts.order;
@ -105,11 +107,12 @@ void operator() (LinearOperatorBase<Field> &Linop, const Field &src, std::vector
RealD a,b,c,d; RealD a,b,c,d;
RealD cp,bp,qq; //prev RealD cp,bp,qq; //prev
int cb=src.checkerboard;
// Matrix mult fields // Matrix mult fields
Field r(grid); Field r(grid);
Field p(grid); Field p(grid); p.checkerboard = src.checkerboard;
Field tmp(grid); Field tmp(grid);
Field mmp(grid); Field mmp(grid);mmp.checkerboard = src.checkerboard;
// Check lightest mass // Check lightest mass
for(int s=0;s<nshift;s++){ for(int s=0;s<nshift;s++){
@ -132,6 +135,9 @@ void operator() (LinearOperatorBase<Field> &Linop, const Field &src, std::vector
p=src; p=src;
//MdagM+m[0] //MdagM+m[0]
std::cout << "p.checkerboard " << p.checkerboard
<< "mmp.checkerboard " << mmp.checkerboard << std::endl;
Linop.HermOpAndNorm(p,mmp,d,qq); Linop.HermOpAndNorm(p,mmp,d,qq);
axpy(mmp,mass[0],p,mmp); axpy(mmp,mass[0],p,mmp);
RealD rn = norm2(p); RealD rn = norm2(p);
@ -269,6 +275,7 @@ void operator() (LinearOperatorBase<Field> &Linop, const Field &src, std::vector
RealD cn = norm2(src); RealD cn = norm2(src);
std::cout<<GridLogMessage<<"CGMultiShift: shift["<<s<<"] true residual "<<std::sqrt(rn/cn)<<std::endl; std::cout<<GridLogMessage<<"CGMultiShift: shift["<<s<<"] true residual "<<std::sqrt(rn/cn)<<std::endl;
} }
iter = k;
return; return;
} }
} }

View File

@ -0,0 +1,404 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/algorithms/iterative/ConjugateGradientMultiShiftMixedPrec.h
Copyright (C) 2015
Author: Chulwoo Jung <chulwoo@quark.phy.bnl.gov>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END/ LEGAL */
#ifndef GRID_CONJUGATE_GRADIENT_MULTI_MIXED_PREC_H
#define GRID_CONJUGATE_GRADIENT_MULTI_MIXED_PREC_H
namespace Grid {
//Mixed precision restarted defect correction CG
template<class FieldD,class FieldF
//, typename std::enable_if< getPrecision<FieldD>::value == 2, int>::type = 0
//, typename std::enable_if< getPrecision<FieldF>::value == 1, int>::type = 0
>
class MixedPrecisionConjugateGradientMultiShift : public LinearFunction<FieldD> {
public:
// RealD Tolerance;
Integer MaxInnerIterations;
Integer MaxOuterIterations;
GridBase* SinglePrecGrid; //Grid for single-precision fields
RealD OuterLoopNormMult; //Stop the outer loop and move to a final double prec solve when the residual is OuterLoopNormMult * Tolerance
LinearOperatorBase<FieldF> &Linop_f;
LinearOperatorBase<FieldD> &Linop_d;
MultiShiftFunction shifts;
Integer iter;
//Option to speed up *inner single precision* solves using a LinearFunction that produces a guess
// LinearFunction<FieldF> *guesser;
MixedPrecisionConjugateGradientMultiShift(GridBase* _sp_grid, LinearOperatorBase<FieldF> &_Linop_f, LinearOperatorBase<FieldD> &_Linop_d,
Integer maxinnerit, MultiShiftFunction &_shifts ) :
Linop_f(_Linop_f), Linop_d(_Linop_d),
MaxInnerIterations(maxinnerit), SinglePrecGrid(_sp_grid),
OuterLoopNormMult(100.), shifts(_shifts) {};
void operator() (const FieldD &src_d_in, FieldD &sol_d){
assert(0); // not yet implemented
}
void operator() (const FieldD &src_d_in, std::vector<FieldD> &sol_d){
GridStopWatch TotalTimer;
TotalTimer.Start();
int cb = src_d_in.checkerboard;
int nshift = shifts.order;
assert(nshift == sol_d.size());
for(int i=0;i<nshift;i++) sol_d[i].checkerboard = cb;
RealD src_norm = norm2(src_d_in);
// RealD stop = src_norm * Tolerance*Tolerance;
GridBase* DoublePrecGrid = src_d_in._grid;
FieldD tmp_d(DoublePrecGrid); tmp_d.checkerboard = cb;
FieldD tmp2_d(DoublePrecGrid); tmp2_d.checkerboard = cb;
FieldD src_d(DoublePrecGrid);
src_d = src_d_in; //source for next inner iteration, computed from residual during operation
// RealD inner_tol = Tolerance;
FieldD psi_d(DoublePrecGrid);psi_d.checkerboard = cb;
FieldF src_f(SinglePrecGrid);
src_f.checkerboard = cb;
std::vector<FieldF> sol_f(nshift,SinglePrecGrid);
for(int i=0;i<nshift;i++) sol_f[i].checkerboard = cb;
// ConjugateGradientShifted<FieldF> CG_f(inner_tol, MaxInnerIterations);
ConjugateGradientMultiShift<FieldF> MSCG(MaxInnerIterations,shifts);
// CG_f.ErrorOnNoConverge = false;
GridStopWatch InnerCGtimer;
GridStopWatch PrecChangeTimer;
{
// std::cout<<GridLogMessage<<"MixedPrecisionConjugateGradient: Outer iteration " <<outer_iter<<" residual "<< norm<< " target "<< stop<<std::endl;
// if(norm < OuterLoopNormMult * stop){
// std::cout<<GridLogMessage<<"MixedPrecisionConjugateGradient: Outer iteration converged on iteration " <<outer_iter <<std::endl;
// break;
// }
// while(norm * inner_tol * inner_tol < stop) inner_tol *= 2; // inner_tol = sqrt(stop/norm) ??
PrecChangeTimer.Start();
precisionChange(src_f, src_d);
PrecChangeTimer.Stop();
// zeroit(sol_f);
//Inner CG
InnerCGtimer.Start();
int if_relup = 0;
#if 0
MSCG(Linop_f,src_f,sol_f);
#else
{
GridBase *grid = SinglePrecGrid;
////////////////////////////////////////////////////////////////////////
// Convenience references to the info stored in "MultiShiftFunction"
////////////////////////////////////////////////////////////////////////
int nshift = shifts.order;
std::vector<RealD> &mass(shifts.poles); // Make references to array in "shifts"
std::vector<RealD> &mresidual(shifts.tolerances);
std::vector<RealD> alpha(nshift,1.);
std::vector<FieldF> ps(nshift,grid);// Search directions
assert(sol_f.size()==nshift);
assert(mass.size()==nshift);
assert(mresidual.size()==nshift);
// dynamic sized arrays on stack; 2d is a pain with vector
RealD bs[nshift];
RealD rsq[nshift];
RealD z[nshift][2];
int converged[nshift];
const int primary =0;
//Primary shift fields CG iteration
RealD a,b,c,d;
RealD cp,bp,qq; //prev
int cb=src_f.checkerboard;
// Matrix mult fields
FieldF r(grid); r.checkerboard = src_f.checkerboard;
FieldF p(grid); p.checkerboard = src_f.checkerboard;
FieldF tmp(grid); tmp.checkerboard = src_f.checkerboard;
FieldF mmp(grid);mmp.checkerboard = src_f.checkerboard;
FieldF psi(grid);psi.checkerboard = src_f.checkerboard;
std::cout.precision(12);
std::cout<<GridLogMessage<<"norm2(psi_d)= "<<norm2(psi_d)<<std::endl;
std::cout<<GridLogMessage<<"norm2(psi)= "<<norm2(psi)<<std::endl;
// Check lightest mass
for(int s=0;s<nshift;s++){
assert( mass[s]>= mass[primary] );
converged[s]=0;
}
// Wire guess to zero
// Residuals "r" are src
// First search direction "p" is also src
cp = norm2(src_f);
Real c_relup = cp;
for(int s=0;s<nshift;s++){
rsq[s] = cp * mresidual[s] * mresidual[s];
std::cout<<GridLogMessage<<"MixedPrecisionConjugateGradientMultiShift: shift "<<s
<<" target resid "<<rsq[s]<<std::endl;
ps[s] = src_f;
}
// r and p for primary
r=src_f;
p=src_f;
//MdagM+m[0]
std::cout << "p.checkerboard " << p.checkerboard
<< "mmp.checkerboard " << mmp.checkerboard << std::endl;
Linop_f.HermOpAndNorm(p,mmp,d,qq);
axpy(mmp,mass[0],p,mmp);
RealD rn = norm2(p);
d += rn*mass[0];
// have verified that inner product of
// p and mmp is equal to d after this since
// the d computation is tricky
// qq = real(innerProduct(p,mmp));
// std::cout<<GridLogMessage << "debug equal ? qq "<<qq<<" d "<< d<<std::endl;
b = -cp /d;
// Set up the various shift variables
int iz=0;
z[0][1-iz] = 1.0;
z[0][iz] = 1.0;
bs[0] = b;
for(int s=1;s<nshift;s++){
z[s][1-iz] = 1.0;
z[s][iz] = 1.0/( 1.0 - b*(mass[s]-mass[0]));
bs[s] = b*z[s][iz];
}
// r += b[0] A.p[0]
// c= norm(r)
c=axpy_norm(r,b,mmp,r);
axpby(psi,0.,-bs[0],src_f,src_f);
for(int s=0;s<nshift;s++) {
axpby(sol_f[s],0.,-bs[s]*alpha[s],src_f,src_f);
}
// Iteration loop
int k;
// inefficient zeroing, please replace!
// RealD sol_norm = axpy_norm(sol_d[0],-1.,sol_d[0],sol_d[0]);
zeroit(sol_d[0]);
std::cout<<GridLogMessage<<"norm(sol_d[0])= "<<norm2(sol_d[0])<<std::endl;
int all_converged = 1;
RealD tmp1,tmp2;
for (k=1;k<=MaxOuterIterations;k++){
a = c /cp;
axpy(p,a,p,r);
// Note to self - direction ps is iterated seperately
// for each shift. Does not appear to have any scope
// for avoiding linear algebra in "single" case.
//
// However SAME r is used. Could load "r" and update
// ALL ps[s]. 2/3 Bandwidth saving
// New Kernel: Load r, vector of coeffs, vector of pointers ps
for(int s=0;s<nshift;s++){
if ( ! converged[s] ) {
if (s==0){
axpy(ps[s],a,ps[s],r);
} else{
RealD as =a *z[s][iz]*bs[s] /(z[s][1-iz]*b);
axpby(ps[s],z[s][iz],as,r,ps[s]);
}
}
}
cp=c;
Linop_f.HermOpAndNorm(p,mmp,d,qq);
axpy(mmp,mass[0],p,mmp);
RealD rn = norm2(p);
d += rn*mass[0];
bp=b;
b=-cp/d;
c=axpy_norm(r,b,mmp,r);
// Toggle the recurrence history
bs[0] = b;
iz = 1-iz;
for(int s=1;s<nshift;s++){
if((!converged[s])){
RealD z0 = z[s][1-iz];
RealD z1 = z[s][iz];
z[s][iz] = z0*z1*bp
/ (b*a*(z1-z0) + z1*bp*(1- (mass[s]-mass[0])*b));
bs[s] = b*z[s][iz]/z0; // NB sign rel to Mike
}
}
axpy(psi,-bs[0],ps[0],psi);
for(int s=0;s<nshift;s++){
int ss = s;
// Scope for optimisation here in case of "single".
// Could load sol_f[0] and pull all ps[s] in.
// if ( single ) ss=primary;
// Bandwith saving in single case is Ls * 3 -> 2+Ls, so ~ 3x saving
// Pipelined CG gain:
//
// New Kernel: Load r, vector of coeffs, vector of pointers ps
// New Kernel: Load sol_f[0], vector of coeffs, vector of pointers ps
// If can predict the coefficient bs then we can fuse these and avoid write reread cyce
// on ps[s].
// Before: 3 x npole + 3 x npole
// After : 2 x npole (ps[s]) => 3x speed up of multishift CG.
if( (!converged[s]) ) {
axpy(sol_f[ss],-bs[s]*alpha[s],ps[s],sol_f[ss]);
}
}
if (k%MaxInnerIterations==0){
// if (c < 1e-4*c_relup){
RealD c_f=c;
precisionChange(tmp_d,psi);
RealD sol_norm =axpy_norm (psi_d,1.,tmp_d,psi_d);
tmp1 = norm2(psi);
zeroit(psi);
tmp2 = norm2(psi);
std::cout<<GridLogMessage<<"k= "<<k<<" norm2(sol)= "<<sol_norm<<" "<<tmp1<<" "<<tmp2<<std::endl;
// precisionChange(sol_d[0],sol_f[0]);
Linop_d.HermOpAndNorm(psi_d,tmp_d,tmp1,tmp2);
axpy(tmp2_d,mass[0],psi_d,tmp_d);
axpy(tmp_d,-1.,tmp2_d,src_d);
precisionChange(r,tmp_d);
c_relup = norm2(r);
std::cout<<GridLogMessage<<"k= "<<k<<" norm2(r)= "<<c<<" "<<c_relup<<" "<<c_f<<std::endl;
if_relup=1;
}
// Convergence checks
all_converged=1;
for(int s=0;s<nshift;s++){
if ( (!converged[s]) ){
RealD css = c * z[s][iz]* z[s][iz];
if(css<rsq[s]){
if ( ! converged[s] )
std::cout<<GridLogMessage<<"ConjugateGradientMultiShift k="<<k<<" Shift "<<s<<" has converged"<<std::endl;
converged[s]=1;
} else {
if (k%MaxInnerIterations==0)
std::cout<<GridLogMessage<<"ConjugateGradientMultiShift k="<<k<<" Shift "<<s<<" has not converged "<<css<<"<"<<rsq[s]<<std::endl;
all_converged=0;
}
}
}
#if 0
if ( all_converged ){
std::cout<<GridLogMessage<< "CGMultiShift: All shifts have converged iteration "<<k<<std::endl;
#else
if ( converged[0] ){
std::cout<<GridLogMessage<< "CGMultiShift: Shift 0 have converged iteration, terminating "<<k<<std::endl;
#endif
#if 1
for(int s=1; s < nshift; s++) {
Linop_f.HermOpAndNorm(sol_f[s],mmp,d,qq);
axpy(tmp,mass[s],sol_f[s],mmp);
axpy(r,-alpha[s],src_f,tmp);
RealD rn = norm2(r);
RealD cn = norm2(src_f);
std::cout<<GridLogMessage<<"CGMultiShift: shift["<<s<<"] true residual "<<std::sqrt(rn/cn)<<std::endl;
}
#endif
iter = k;
break;
}
}
// ugly hack
if ( !all_converged )
std::cout<<GridLogMessage<<"CG multi shift did not converge"<<std::endl;
// assert(0);
}
#endif
InnerCGtimer.Stop();
//Convert sol back to double and add to double prec solution
PrecChangeTimer.Start();
sol_d[0]=psi_d;
for(int i=1;i<nshift;i++)precisionChange(sol_d[i], sol_f[i]);
std::cout<<GridLogMessage<< "CGMultiShift: Checking solutions"<<std::endl;
// Check answers
for(int s=0; s < nshift; s++) {
RealD tmp1,tmp2;
Linop_d.HermOpAndNorm(sol_d[s],tmp_d,tmp1,tmp2);
axpy(tmp2_d,shifts.poles[s],sol_d[s],tmp_d);
axpy(tmp_d,-1.,src_d,tmp2_d);
std::cout<<GridLogMessage<<"CGMultiShift: shift["<<s<<"] true residual "<<std::sqrt(norm2(tmp_d)/norm2(src_d))<<std::endl;
}
PrecChangeTimer.Stop();
}
//Final trial CG
// std::cout<<GridLogMessage<<"MixedPrecisionConjugateGradient: Starting final patch-up double-precision solve"<<std::endl;
TotalTimer.Stop();
std::cout<<GridLogMessage<<"MixedPrecisionConjugateGradient: Total " << TotalTimer.Elapsed() << " Precision change " << PrecChangeTimer.Elapsed() << " Inner CG total " << InnerCGtimer.Elapsed() << std::endl;
}
};
}
#endif

View File

@ -0,0 +1,168 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/algorithms/iterative/ConjugateGradient.h
Copyright (C) 2015
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_CONJUGATE_GRADIENT_SHIFTED_H
#define GRID_CONJUGATE_GRADIENT_SHIFTED_H
namespace Grid {
/////////////////////////////////////////////////////////////
// Base classes for iterative processes based on operators
// single input vec, single output vec.
/////////////////////////////////////////////////////////////
template<class Field>
class ConjugateGradientShifted : public OperatorFunction<Field> {
public:
bool ErrorOnNoConverge; //throw an assert when the CG fails to converge. Defaults true.
RealD Tolerance;
Integer MaxIterations;
ConjugateGradientShifted(RealD tol,Integer maxit, bool err_on_no_conv = true) : Tolerance(tol), MaxIterations(maxit), ErrorOnNoConverge(err_on_no_conv) {
};
void operator() (LinearOperatorBase<Field> &Linop,const Field &src, Field &psi ){
(*this)(Linop,src,psi,NULL);
}
void operator() (LinearOperatorBase<Field> &Linop,const Field &src, Field &psi, RealD *shift){
psi.checkerboard = src.checkerboard;
conformable(psi,src);
RealD cp,c,a,d,b,ssq,qq,b_pred;
Field p(src);
Field mmp(src);
Field r(src);
//Initial residual computation & set up
RealD guess = norm2(psi);
assert(std::isnan(guess)==0);
Linop.HermOpAndNorm(psi,mmp,d,b);
if(shift) axpy(mmp,*shift,psi,mmp);
RealD rn = norm2(psi);
if(shift) d += rn*(*shift);
RealD d2 = real(innerProduct(psi,mmp));
b= norm2(mmp);
RealD src_norm=norm2(src);
r= src-mmp;
p= r;
a =norm2(p);
cp =a;
ssq=norm2(src);
std::cout<<GridLogIterative <<std::setprecision(4)<< "ConjugateGradient: guess "<<guess<<std::endl;
std::cout<<GridLogIterative <<std::setprecision(4)<< "ConjugateGradient: src "<<ssq <<std::endl;
std::cout<<GridLogIterative <<std::setprecision(4)<< "ConjugateGradient: mp "<<d <<std::endl;
std::cout<<GridLogIterative <<std::setprecision(4)<< "ConjugateGradient: mmp "<<b <<std::endl;
std::cout<<GridLogIterative <<std::setprecision(4)<< "ConjugateGradient: cp,r "<<cp <<std::endl;
std::cout<<GridLogIterative <<std::setprecision(4)<< "ConjugateGradient: p "<<a <<std::endl;
RealD rsq = Tolerance* Tolerance*ssq;
//Check if guess is really REALLY good :)
if ( cp <= rsq ) {
return;
}
std::cout<<GridLogIterative << std::setprecision(4)<< "ConjugateGradient: k=0 residual "<<cp<<" target "<<rsq<<std::endl;
GridStopWatch LinalgTimer;
GridStopWatch MatrixTimer;
GridStopWatch SolverTimer;
SolverTimer.Start();
int k;
for (k=1;k<=MaxIterations;k++){
c=cp;
MatrixTimer.Start();
Linop.HermOpAndNorm(p,mmp,d,qq);
MatrixTimer.Stop();
LinalgTimer.Start();
if(shift) axpy(mmp,*shift,p,mmp);
RealD rn = norm2(p);
if(shift) d += rn*(*shift);
RealD d2 = real(innerProduct(p,mmp));
qq = norm2(mmp);
if (k%10==1) std::cout<< std::setprecision(4)<< "d: "<<d<<" d2= "<<d2<<std::endl;
// RealD qqck = norm2(mmp);
// ComplexD dck = innerProduct(p,mmp);
a = c/d;
b_pred = a*(a*qq-d)/c;
cp = axpy_norm(r,-a,mmp,r);
b = cp/c;
if (k%10==1) std::cout<< std::setprecision(4)<<"k= "<<k<<" src: "<<src_norm<<" r= "<<cp<<std::endl;
// Fuse these loops ; should be really easy
psi= a*p+psi;
p = p*b+r;
LinalgTimer.Stop();
std::cout<<GridLogIterative<<"ConjugateGradient: Iteration " <<k<<" residual "<<cp<< " target "<< rsq<<std::endl;
// Stopping condition
if ( cp <= rsq ) {
SolverTimer.Stop();
Linop.HermOpAndNorm(psi,mmp,d,qq);
if(shift) mmp = mmp + (*shift) * psi;
p=mmp-src;
RealD mmpnorm = sqrt(norm2(mmp));
RealD psinorm = sqrt(norm2(psi));
RealD srcnorm = sqrt(norm2(src));
RealD resnorm = sqrt(norm2(p));
RealD true_residual = resnorm/srcnorm;
std::cout<<GridLogMessage<<"ConjugateGradient: Converged on iteration " <<k
<<" computed residual "<<sqrt(cp/ssq)
<<" true residual " <<true_residual
<<" target "<<Tolerance<<std::endl;
std::cout<<GridLogMessage<<"Time elapsed: Total "<< SolverTimer.Elapsed() << " Matrix "<<MatrixTimer.Elapsed() << " Linalg "<<LinalgTimer.Elapsed();
std::cout<<std::endl;
if(ErrorOnNoConverge)
assert(true_residual/Tolerance < 1000.0);
return;
}
}
std::cout<<GridLogMessage<<"ConjugateGradient did NOT converge"<<std::endl;
// assert(0);
}
};
}
#endif

View File

@ -0,0 +1,137 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/algorithms/iterative/DenseMatrix.h
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_DENSE_MATRIX_H
#define GRID_DENSE_MATRIX_H
namespace Grid {
/////////////////////////////////////////////////////////////
// Matrix untils
/////////////////////////////////////////////////////////////
template<class T> using DenseVector = std::vector<T>;
template<class T> using DenseMatrix = DenseVector<DenseVector<T> >;
template<class T> void Size(DenseVector<T> & vec, int &N)
{
N= vec.size();
}
template<class T> void Size(DenseMatrix<T> & mat, int &N,int &M)
{
N= mat.size();
M= mat[0].size();
}
template<class T> void SizeSquare(DenseMatrix<T> & mat, int &N)
{
int M; Size(mat,N,M);
assert(N==M);
}
template<class T> void Resize(DenseVector<T > & mat, int N) {
mat.resize(N);
}
template<class T> void Resize(DenseMatrix<T > & mat, int N, int M) {
mat.resize(N);
for(int i=0;i<N;i++){
mat[i].resize(M);
}
}
template<class T> void Fill(DenseMatrix<T> & mat, T&val) {
int N,M;
Size(mat,N,M);
for(int i=0;i<N;i++){
for(int j=0;j<M;j++){
mat[i][j] = val;
}}
}
/** Transpose of a matrix **/
template<class T> DenseMatrix<T> Transpose(DenseMatrix<T> & mat){
int N,M;
Size(mat,N,M);
DenseMatrix<T> C; Resize(C,M,N);
for(int i=0;i<M;i++){
for(int j=0;j<N;j++){
C[i][j] = mat[j][i];
}}
return C;
}
/** Set DenseMatrix to unit matrix **/
template<class T> void Unity(DenseMatrix<T> &A){
int N; SizeSquare(A,N);
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
if ( i==j ) A[i][j] = 1;
else A[i][j] = 0;
}
}
}
/** Add C * I to matrix **/
template<class T>
void PlusUnit(DenseMatrix<T> & A,T c){
int dim; SizeSquare(A,dim);
for(int i=0;i<dim;i++){A[i][i] = A[i][i] + c;}
}
/** return the Hermitian conjugate of matrix **/
template<class T>
DenseMatrix<T> HermitianConj(DenseMatrix<T> &mat){
int dim; SizeSquare(mat,dim);
DenseMatrix<T> C; Resize(C,dim,dim);
for(int i=0;i<dim;i++){
for(int j=0;j<dim;j++){
C[i][j] = conj(mat[j][i]);
}
}
return C;
}
/**Get a square submatrix**/
template <class T>
DenseMatrix<T> GetSubMtx(DenseMatrix<T> &A,int row_st, int row_end, int col_st, int col_end)
{
DenseMatrix<T> H; Resize(H,row_end - row_st,col_end-col_st);
for(int i = row_st; i<row_end; i++){
for(int j = col_st; j<col_end; j++){
H[i-row_st][j-col_st]=A[i][j];
}}
return H;
}
}
#include "Householder.h"
#include "Francis.h"
#endif

View File

@ -0,0 +1,81 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/algorithms/iterative/EigenSort.h
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_EIGENSORT_H
#define GRID_EIGENSORT_H
namespace Grid {
/////////////////////////////////////////////////////////////
// Eigen sorter to begin with
/////////////////////////////////////////////////////////////
template<class Field>
class SortEigen {
private:
//hacking for testing for now
private:
static bool less_lmd(RealD left,RealD right){
return left > right;
}
static bool less_pair(std::pair<RealD,Field const*>& left,
std::pair<RealD,Field const*>& right){
return left.first > (right.first);
}
public:
void push(DenseVector<RealD>& lmd,
DenseVector<Field>& evec,int N) {
DenseVector<Field> cpy(lmd.size(),evec[0]._grid);
for(int i=0;i<lmd.size();i++) cpy[i] = evec[i];
DenseVector<std::pair<RealD, Field const*> > emod(lmd.size());
for(int i=0;i<lmd.size();++i)
emod[i] = std::pair<RealD,Field const*>(lmd[i],&cpy[i]);
partial_sort(emod.begin(),emod.begin()+N,emod.end(),less_pair);
typename DenseVector<std::pair<RealD, Field const*> >::iterator it = emod.begin();
for(int i=0;i<N;++i){
lmd[i]=it->first;
evec[i]=*(it->second);
++it;
}
}
void push(DenseVector<RealD>& lmd,int N) {
std::partial_sort(lmd.begin(),lmd.begin()+N,lmd.end(),less_lmd);
}
bool saturated(RealD lmd, RealD thrs) {
return fabs(lmd) > fabs(thrs);
}
};
}
#endif

View File

@ -0,0 +1,525 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/algorithms/iterative/Francis.h
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef FRANCIS_H
#define FRANCIS_H
#include <cstdlib>
#include <string>
#include <cmath>
#include <iostream>
#include <sstream>
#include <stdexcept>
#include <fstream>
#include <complex>
#include <algorithm>
//#include <timer.h>
//#include <lapacke.h>
//#include <Eigen/Dense>
namespace Grid {
template <class T> int SymmEigensystem(DenseMatrix<T > &Ain, DenseVector<T> &evals, DenseMatrix<T> &evecs, RealD small);
template <class T> int Eigensystem(DenseMatrix<T > &Ain, DenseVector<T> &evals, DenseMatrix<T> &evecs, RealD small);
/**
Find the eigenvalues of an upper hessenberg matrix using the Francis QR algorithm.
H =
x x x x x x x x x
x x x x x x x x x
0 x x x x x x x x
0 0 x x x x x x x
0 0 0 x x x x x x
0 0 0 0 x x x x x
0 0 0 0 0 x x x x
0 0 0 0 0 0 x x x
0 0 0 0 0 0 0 x x
Factorization is P T P^H where T is upper triangular (mod cc blocks) and P is orthagonal/unitary.
**/
template <class T>
int QReigensystem(DenseMatrix<T> &Hin, DenseVector<T> &evals, DenseMatrix<T> &evecs, RealD small)
{
DenseMatrix<T> H = Hin;
int N ; SizeSquare(H,N);
int M = N;
Fill(evals,0);
Fill(evecs,0);
T s,t,x=0,y=0,z=0;
T u,d;
T apd,amd,bc;
DenseVector<T> p(N,0);
T nrm = Norm(H); ///DenseMatrix Norm
int n, m;
int e = 0;
int it = 0;
int tot_it = 0;
int l = 0;
int r = 0;
DenseMatrix<T> P; Resize(P,N,N); Unity(P);
DenseVector<int> trows(N,0);
/// Check if the matrix is really hessenberg, if not abort
RealD sth = 0;
for(int j=0;j<N;j++){
for(int i=j+2;i<N;i++){
sth = abs(H[i][j]);
if(sth > small){
std::cout << "Non hessenberg H = " << sth << " > " << small << std::endl;
exit(1);
}
}
}
do{
std::cout << "Francis QR Step N = " << N << std::endl;
/** Check for convergence
x x x x x
0 x x x x
0 0 x x x
0 0 x x x
0 0 0 0 x
for this matrix l = 4
**/
do{
l = Chop_subdiag(H,nrm,e,small);
r = 0; ///May have converged on more than one eval
///Single eval
if(l == N-1){
evals[e] = H[l][l];
N--; e++; r++; it = 0;
}
///RealD eval
if(l == N-2){
trows[l+1] = 1; ///Needed for UTSolve
apd = H[l][l] + H[l+1][l+1];
amd = H[l][l] - H[l+1][l+1];
bc = (T)4.0*H[l+1][l]*H[l][l+1];
evals[e] = (T)0.5*( apd + sqrt(amd*amd + bc) );
evals[e+1] = (T)0.5*( apd - sqrt(amd*amd + bc) );
N-=2; e+=2; r++; it = 0;
}
} while(r>0);
if(N ==0) break;
DenseVector<T > ck; Resize(ck,3);
DenseVector<T> v; Resize(v,3);
for(int m = N-3; m >= l; m--){
///Starting vector essentially random shift.
if(it%10 == 0 && N >= 3 && it > 0){
s = (T)1.618033989*( abs( H[N-1][N-2] ) + abs( H[N-2][N-3] ) );
t = (T)0.618033989*( abs( H[N-1][N-2] ) + abs( H[N-2][N-3] ) );
x = H[m][m]*H[m][m] + H[m][m+1]*H[m+1][m] - s*H[m][m] + t;
y = H[m+1][m]*(H[m][m] + H[m+1][m+1] - s);
z = H[m+1][m]*H[m+2][m+1];
}
///Starting vector implicit Q theorem
else{
s = (H[N-2][N-2] + H[N-1][N-1]);
t = (H[N-2][N-2]*H[N-1][N-1] - H[N-2][N-1]*H[N-1][N-2]);
x = H[m][m]*H[m][m] + H[m][m+1]*H[m+1][m] - s*H[m][m] + t;
y = H[m+1][m]*(H[m][m] + H[m+1][m+1] - s);
z = H[m+1][m]*H[m+2][m+1];
}
ck[0] = x; ck[1] = y; ck[2] = z;
if(m == l) break;
/** Some stupid thing from numerical recipies, seems to work**/
// PAB.. for heaven's sake quote page, purpose, evidence it works.
// what sort of comment is that!?!?!?
u=abs(H[m][m-1])*(abs(y)+abs(z));
d=abs(x)*(abs(H[m-1][m-1])+abs(H[m][m])+abs(H[m+1][m+1]));
if ((T)abs(u+d) == (T)abs(d) ){
l = m; break;
}
//if (u < small){l = m; break;}
}
if(it > 100000){
std::cout << "QReigensystem: bugger it got stuck after 100000 iterations" << std::endl;
std::cout << "got " << e << " evals " << l << " " << N << std::endl;
exit(1);
}
normalize(ck); ///Normalization cancels in PHP anyway
T beta;
Householder_vector<T >(ck, 0, 2, v, beta);
Householder_mult<T >(H,v,beta,0,l,l+2,0);
Householder_mult<T >(H,v,beta,0,l,l+2,1);
///Accumulate eigenvector
Householder_mult<T >(P,v,beta,0,l,l+2,1);
int sw = 0; ///Are we on the last row?
for(int k=l;k<N-2;k++){
x = H[k+1][k];
y = H[k+2][k];
z = (T)0.0;
if(k+3 <= N-1){
z = H[k+3][k];
} else{
sw = 1;
v[2] = (T)0.0;
}
ck[0] = x; ck[1] = y; ck[2] = z;
normalize(ck);
Householder_vector<T >(ck, 0, 2-sw, v, beta);
Householder_mult<T >(H,v, beta,0,k+1,k+3-sw,0);
Householder_mult<T >(H,v, beta,0,k+1,k+3-sw,1);
///Accumulate eigenvector
Householder_mult<T >(P,v, beta,0,k+1,k+3-sw,1);
}
it++;
tot_it++;
}while(N > 1);
N = evals.size();
///Annoying - UT solves in reverse order;
DenseVector<T> tmp; Resize(tmp,N);
for(int i=0;i<N;i++){
tmp[i] = evals[N-i-1];
}
evals = tmp;
UTeigenvectors(H, trows, evals, evecs);
for(int i=0;i<evals.size();i++){evecs[i] = P*evecs[i]; normalize(evecs[i]);}
return tot_it;
}
template <class T>
int my_Wilkinson(DenseMatrix<T> &Hin, DenseVector<T> &evals, DenseMatrix<T> &evecs, RealD small)
{
/**
Find the eigenvalues of an upper Hessenberg matrix using the Wilkinson QR algorithm.
H =
x x 0 0 0 0
x x x 0 0 0
0 x x x 0 0
0 0 x x x 0
0 0 0 x x x
0 0 0 0 x x
Factorization is P T P^H where T is upper triangular (mod cc blocks) and P is orthagonal/unitary. **/
return my_Wilkinson(Hin, evals, evecs, small, small);
}
template <class T>
int my_Wilkinson(DenseMatrix<T> &Hin, DenseVector<T> &evals, DenseMatrix<T> &evecs, RealD small, RealD tol)
{
int N; SizeSquare(Hin,N);
int M = N;
///I don't want to modify the input but matricies must be passed by reference
//Scale a matrix by its "norm"
//RealD Hnorm = abs( Hin.LargestDiag() ); H = H*(1.0/Hnorm);
DenseMatrix<T> H; H = Hin;
RealD Hnorm = abs(Norm(Hin));
H = H * (1.0 / Hnorm);
// TODO use openmp and memset
Fill(evals,0);
Fill(evecs,0);
T s, t, x = 0, y = 0, z = 0;
T u, d;
T apd, amd, bc;
DenseVector<T> p; Resize(p,N); Fill(p,0);
T nrm = Norm(H); ///DenseMatrix Norm
int n, m;
int e = 0;
int it = 0;
int tot_it = 0;
int l = 0;
int r = 0;
DenseMatrix<T> P; Resize(P,N,N);
Unity(P);
DenseVector<int> trows(N, 0);
/// Check if the matrix is really symm tridiag
RealD sth = 0;
for(int j = 0; j < N; ++j)
{
for(int i = j + 2; i < N; ++i)
{
if(abs(H[i][j]) > tol || abs(H[j][i]) > tol)
{
std::cout << "Non Tridiagonal H(" << i << ","<< j << ") = |" << Real( real( H[j][i] ) ) << "| > " << tol << std::endl;
std::cout << "Warning tridiagonalize and call again" << std::endl;
// exit(1); // see what is going on
//return;
}
}
}
do{
do{
//Jasper
//Check if the subdiagonal term is small enough (<small)
//if true then it is converged.
//check start from H.dim - e - 1
//How to deal with more than 2 are converged?
//What if Chop_symm_subdiag return something int the middle?
//--------------
l = Chop_symm_subdiag(H,nrm, e, small);
r = 0; ///May have converged on more than one eval
//Jasper
//In this case
// x x 0 0 0 0
// x x x 0 0 0
// 0 x x x 0 0
// 0 0 x x x 0
// 0 0 0 x x 0
// 0 0 0 0 0 x <- l
//--------------
///Single eval
if(l == N - 1)
{
evals[e] = H[l][l];
N--;
e++;
r++;
it = 0;
}
//Jasper
// x x 0 0 0 0
// x x x 0 0 0
// 0 x x x 0 0
// 0 0 x x 0 0
// 0 0 0 0 x x <- l
// 0 0 0 0 x x
//--------------
///RealD eval
if(l == N - 2)
{
trows[l + 1] = 1; ///Needed for UTSolve
apd = H[l][l] + H[l + 1][ l + 1];
amd = H[l][l] - H[l + 1][l + 1];
bc = (T) 4.0 * H[l + 1][l] * H[l][l + 1];
evals[e] = (T) 0.5 * (apd + sqrt(amd * amd + bc));
evals[e + 1] = (T) 0.5 * (apd - sqrt(amd * amd + bc));
N -= 2;
e += 2;
r++;
it = 0;
}
}while(r > 0);
//Jasper
//Already converged
//--------------
if(N == 0) break;
DenseVector<T> ck,v; Resize(ck,2); Resize(v,2);
for(int m = N - 3; m >= l; m--)
{
///Starting vector essentially random shift.
if(it%10 == 0 && N >= 3 && it > 0)
{
t = abs(H[N - 1][N - 2]) + abs(H[N - 2][N - 3]);
x = H[m][m] - t;
z = H[m + 1][m];
} else {
///Starting vector implicit Q theorem
d = (H[N - 2][N - 2] - H[N - 1][N - 1]) * (T) 0.5;
t = H[N - 1][N - 1] - H[N - 1][N - 2] * H[N - 1][N - 2]
/ (d + sign(d) * sqrt(d * d + H[N - 1][N - 2] * H[N - 1][N - 2]));
x = H[m][m] - t;
z = H[m + 1][m];
}
//Jasper
//why it is here????
//-----------------------
if(m == l)
break;
u = abs(H[m][m - 1]) * (abs(y) + abs(z));
d = abs(x) * (abs(H[m - 1][m - 1]) + abs(H[m][m]) + abs(H[m + 1][m + 1]));
if ((T)abs(u + d) == (T)abs(d))
{
l = m;
break;
}
}
//Jasper
if(it > 1000000)
{
std::cout << "Wilkinson: bugger it got stuck after 100000 iterations" << std::endl;
std::cout << "got " << e << " evals " << l << " " << N << std::endl;
exit(1);
}
//
T s, c;
Givens_calc<T>(x, z, c, s);
Givens_mult<T>(H, l, l + 1, c, -s, 0);
Givens_mult<T>(H, l, l + 1, c, s, 1);
Givens_mult<T>(P, l, l + 1, c, s, 1);
//
for(int k = l; k < N - 2; ++k)
{
x = H.A[k + 1][k];
z = H.A[k + 2][k];
Givens_calc<T>(x, z, c, s);
Givens_mult<T>(H, k + 1, k + 2, c, -s, 0);
Givens_mult<T>(H, k + 1, k + 2, c, s, 1);
Givens_mult<T>(P, k + 1, k + 2, c, s, 1);
}
it++;
tot_it++;
}while(N > 1);
N = evals.size();
///Annoying - UT solves in reverse order;
DenseVector<T> tmp(N);
for(int i = 0; i < N; ++i)
tmp[i] = evals[N-i-1];
evals = tmp;
//
UTeigenvectors(H, trows, evals, evecs);
//UTSymmEigenvectors(H, trows, evals, evecs);
for(int i = 0; i < evals.size(); ++i)
{
evecs[i] = P * evecs[i];
normalize(evecs[i]);
evals[i] = evals[i] * Hnorm;
}
// // FIXME this is to test
// Hin.write("evecs3", evecs);
// Hin.write("evals3", evals);
// // check rsd
// for(int i = 0; i < M; i++) {
// vector<T> Aevec = Hin * evecs[i];
// RealD norm2(0.);
// for(int j = 0; j < M; j++) {
// norm2 += (Aevec[j] - evals[i] * evecs[i][j]) * (Aevec[j] - evals[i] * evecs[i][j]);
// }
// }
return tot_it;
}
template <class T>
void Hess(DenseMatrix<T > &A, DenseMatrix<T> &Q, int start){
/**
turn a matrix A =
x x x x x
x x x x x
x x x x x
x x x x x
x x x x x
into
x x x x x
x x x x x
0 x x x x
0 0 x x x
0 0 0 x x
with householder rotations
Slow.
*/
int N ; SizeSquare(A,N);
DenseVector<T > p; Resize(p,N); Fill(p,0);
for(int k=start;k<N-2;k++){
//cerr << "hess" << k << std::endl;
DenseVector<T > ck,v; Resize(ck,N-k-1); Resize(v,N-k-1);
for(int i=k+1;i<N;i++){ck[i-k-1] = A(i,k);} ///kth column
normalize(ck); ///Normalization cancels in PHP anyway
T beta;
Householder_vector<T >(ck, 0, ck.size()-1, v, beta); ///Householder vector
Householder_mult<T>(A,v,beta,start,k+1,N-1,0); ///A -> PA
Householder_mult<T >(A,v,beta,start,k+1,N-1,1); ///PA -> PAP^H
///Accumulate eigenvector
Householder_mult<T >(Q,v,beta,start,k+1,N-1,1); ///Q -> QP^H
}
/*for(int l=0;l<N-2;l++){
for(int k=l+2;k<N;k++){
A(0,k,l);
}
}*/
}
template <class T>
void Tri(DenseMatrix<T > &A, DenseMatrix<T> &Q, int start){
///Tridiagonalize a matrix
int N; SizeSquare(A,N);
Hess(A,Q,start);
/*for(int l=0;l<N-2;l++){
for(int k=l+2;k<N;k++){
A(0,l,k);
}
}*/
}
template <class T>
void ForceTridiagonal(DenseMatrix<T> &A){
///Tridiagonalize a matrix
int N ; SizeSquare(A,N);
for(int l=0;l<N-2;l++){
for(int k=l+2;k<N;k++){
A[l][k]=0;
A[k][l]=0;
}
}
}
template <class T>
int my_SymmEigensystem(DenseMatrix<T > &Ain, DenseVector<T> &evals, DenseVector<DenseVector<T> > &evecs, RealD small){
///Solve a symmetric eigensystem, not necessarily in tridiagonal form
int N; SizeSquare(Ain,N);
DenseMatrix<T > A; A = Ain;
DenseMatrix<T > Q; Resize(Q,N,N); Unity(Q);
Tri(A,Q,0);
int it = my_Wilkinson<T>(A, evals, evecs, small);
for(int k=0;k<N;k++){evecs[k] = Q*evecs[k];}
return it;
}
template <class T>
int Wilkinson(DenseMatrix<T> &Ain, DenseVector<T> &evals, DenseVector<DenseVector<T> > &evecs, RealD small){
return my_Wilkinson(Ain, evals, evecs, small);
}
template <class T>
int SymmEigensystem(DenseMatrix<T> &Ain, DenseVector<T> &evals, DenseVector<DenseVector<T> > &evecs, RealD small){
return my_SymmEigensystem(Ain, evals, evecs, small);
}
template <class T>
int Eigensystem(DenseMatrix<T > &Ain, DenseVector<T> &evals, DenseVector<DenseVector<T> > &evecs, RealD small){
///Solve a general eigensystem, not necessarily in tridiagonal form
int N = Ain.dim;
DenseMatrix<T > A(N); A = Ain;
DenseMatrix<T > Q(N);Q.Unity();
Hess(A,Q,0);
int it = QReigensystem<T>(A, evals, evecs, small);
for(int k=0;k<N;k++){evecs[k] = Q*evecs[k];}
return it;
}
}
#endif

View File

@ -0,0 +1,242 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/algorithms/iterative/Householder.h
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef HOUSEHOLDER_H
#define HOUSEHOLDER_H
#define TIMER(A) std::cout << GridLogMessage << __FUNC__ << " file "<< __FILE__ <<" line " << __LINE__ << std::endl;
#define ENTER() std::cout << GridLogMessage << "ENTRY "<<__FUNC__ << " file "<< __FILE__ <<" line " << __LINE__ << std::endl;
#define LEAVE() std::cout << GridLogMessage << "EXIT "<<__FUNC__ << " file "<< __FILE__ <<" line " << __LINE__ << std::endl;
#include <cstdlib>
#include <string>
#include <cmath>
#include <iostream>
#include <sstream>
#include <stdexcept>
#include <fstream>
#include <complex>
#include <algorithm>
namespace Grid {
/** Comparison function for finding the max element in a vector **/
template <class T> bool cf(T i, T j) {
return abs(i) < abs(j);
}
/**
Calculate a real Givens angle
**/
template <class T> inline void Givens_calc(T y, T z, T &c, T &s){
RealD mz = (RealD)abs(z);
if(mz==0.0){
c = 1; s = 0;
}
if(mz >= (RealD)abs(y)){
T t = -y/z;
s = (T)1.0 / sqrt ((T)1.0 + t * t);
c = s * t;
} else {
T t = -z/y;
c = (T)1.0 / sqrt ((T)1.0 + t * t);
s = c * t;
}
}
template <class T> inline void Givens_mult(DenseMatrix<T> &A, int i, int k, T c, T s, int dir)
{
int q ; SizeSquare(A,q);
if(dir == 0){
for(int j=0;j<q;j++){
T nu = A[i][j];
T w = A[k][j];
A[i][j] = (c*nu + s*w);
A[k][j] = (-s*nu + c*w);
}
}
if(dir == 1){
for(int j=0;j<q;j++){
T nu = A[j][i];
T w = A[j][k];
A[j][i] = (c*nu - s*w);
A[j][k] = (s*nu + c*w);
}
}
}
/**
from input = x;
Compute the complex Householder vector, v, such that
P = (I - b v transpose(v) )
b = 2/v.v
P | x | | x | k = 0
| x | | 0 |
| x | = | 0 |
| x | | 0 | j = 3
| x | | x |
These are the "Unreduced" Householder vectors.
**/
template <class T> inline void Householder_vector(DenseVector<T> input, int k, int j, DenseVector<T> &v, T &beta)
{
int N ; Size(input,N);
T m = *max_element(input.begin() + k, input.begin() + j + 1, cf<T> );
if(abs(m) > 0.0){
T alpha = 0;
for(int i=k; i<j+1; i++){
v[i] = input[i]/m;
alpha = alpha + v[i]*conj(v[i]);
}
alpha = sqrt(alpha);
beta = (T)1.0/(alpha*(alpha + abs(v[k]) ));
if(abs(v[k]) > 0.0) v[k] = v[k] + (v[k]/abs(v[k]))*alpha;
else v[k] = -alpha;
} else{
for(int i=k; i<j+1; i++){
v[i] = 0.0;
}
}
}
/**
from input = x;
Compute the complex Householder vector, v, such that
P = (I - b v transpose(v) )
b = 2/v.v
Px = alpha*e_dir
These are the "Unreduced" Householder vectors.
**/
template <class T> inline void Householder_vector(DenseVector<T> input, int k, int j, int dir, DenseVector<T> &v, T &beta)
{
int N = input.size();
T m = *max_element(input.begin() + k, input.begin() + j + 1, cf);
if(abs(m) > 0.0){
T alpha = 0;
for(int i=k; i<j+1; i++){
v[i] = input[i]/m;
alpha = alpha + v[i]*conj(v[i]);
}
alpha = sqrt(alpha);
beta = 1.0/(alpha*(alpha + abs(v[dir]) ));
if(abs(v[dir]) > 0.0) v[dir] = v[dir] + (v[dir]/abs(v[dir]))*alpha;
else v[dir] = -alpha;
}else{
for(int i=k; i<j+1; i++){
v[i] = 0.0;
}
}
}
/**
Compute the product PA if trans = 0
AP if trans = 1
P = (I - b v transpose(v) )
b = 2/v.v
start at element l of matrix A
v is of length j - k + 1 of v are nonzero
**/
template <class T> inline void Householder_mult(DenseMatrix<T> &A , DenseVector<T> v, T beta, int l, int k, int j, int trans)
{
int N ; SizeSquare(A,N);
if(abs(beta) > 0.0){
for(int p=l; p<N; p++){
T s = 0;
if(trans==0){
for(int i=k;i<j+1;i++) s += conj(v[i-k])*A[i][p];
s *= beta;
for(int i=k;i<j+1;i++){ A[i][p] = A[i][p]-s*conj(v[i-k]);}
} else {
for(int i=k;i<j+1;i++){ s += conj(v[i-k])*A[p][i];}
s *= beta;
for(int i=k;i<j+1;i++){ A[p][i]=A[p][i]-s*conj(v[i-k]);}
}
}
}
}
/**
Compute the product PA if trans = 0
AP if trans = 1
P = (I - b v transpose(v) )
b = 2/v.v
start at element l of matrix A
v is of length j - k + 1 of v are nonzero
A is tridiagonal
**/
template <class T> inline void Householder_mult_tri(DenseMatrix<T> &A , DenseVector<T> v, T beta, int l, int M, int k, int j, int trans)
{
if(abs(beta) > 0.0){
int N ; SizeSquare(A,N);
DenseMatrix<T> tmp; Resize(tmp,N,N); Fill(tmp,0);
T s;
for(int p=l; p<M; p++){
s = 0;
if(trans==0){
for(int i=k;i<j+1;i++) s = s + conj(v[i-k])*A[i][p];
}else{
for(int i=k;i<j+1;i++) s = s + v[i-k]*A[p][i];
}
s = beta*s;
if(trans==0){
for(int i=k;i<j+1;i++) tmp[i][p] = tmp(i,p) - s*v[i-k];
}else{
for(int i=k;i<j+1;i++) tmp[p][i] = tmp[p][i] - s*conj(v[i-k]);
}
}
for(int p=l; p<M; p++){
if(trans==0){
for(int i=k;i<j+1;i++) A[i][p] = A[i][p] + tmp[i][p];
}else{
for(int i=k;i<j+1;i++) A[p][i] = A[p][i] + tmp[p][i];
}
}
}
}
}
#endif

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,453 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/algorithms/iterative/Matrix.h
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef MATRIX_H
#define MATRIX_H
#include <cstdlib>
#include <string>
#include <cmath>
#include <vector>
#include <iostream>
#include <iomanip>
#include <complex>
#include <typeinfo>
#include <Grid.h>
/** Sign function **/
template <class T> T sign(T p){return ( p/abs(p) );}
/////////////////////////////////////////////////////////////////////////////////////////////////////////
///////////////////// Hijack STL containers for our wicked means /////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////////////////////////////////
template<class T> using Vector = Vector<T>;
template<class T> using Matrix = Vector<Vector<T> >;
template<class T> void Resize(Vector<T > & vec, int N) { vec.resize(N); }
template<class T> void Resize(Matrix<T > & mat, int N, int M) {
mat.resize(N);
for(int i=0;i<N;i++){
mat[i].resize(M);
}
}
template<class T> void Size(Vector<T> & vec, int &N)
{
N= vec.size();
}
template<class T> void Size(Matrix<T> & mat, int &N,int &M)
{
N= mat.size();
M= mat[0].size();
}
template<class T> void SizeSquare(Matrix<T> & mat, int &N)
{
int M; Size(mat,N,M);
assert(N==M);
}
template<class T> void SizeSame(Matrix<T> & mat1,Matrix<T> &mat2, int &N1,int &M1)
{
int N2,M2;
Size(mat1,N1,M1);
Size(mat2,N2,M2);
assert(N1==N2);
assert(M1==M2);
}
//*****************************************
//* (Complex) Vector operations *
//*****************************************
/**Conj of a Vector **/
template <class T> Vector<T> conj(Vector<T> p){
Vector<T> q(p.size());
for(int i=0;i<p.size();i++){q[i] = conj(p[i]);}
return q;
}
/** Norm of a Vector**/
template <class T> T norm(Vector<T> p){
T sum = 0;
for(int i=0;i<p.size();i++){sum = sum + p[i]*conj(p[i]);}
return abs(sqrt(sum));
}
/** Norm squared of a Vector **/
template <class T> T norm2(Vector<T> p){
T sum = 0;
for(int i=0;i<p.size();i++){sum = sum + p[i]*conj(p[i]);}
return abs((sum));
}
/** Sum elements of a Vector **/
template <class T> T trace(Vector<T> p){
T sum = 0;
for(int i=0;i<p.size();i++){sum = sum + p[i];}
return sum;
}
/** Fill a Vector with constant c **/
template <class T> void Fill(Vector<T> &p, T c){
for(int i=0;i<p.size();i++){p[i] = c;}
}
/** Normalize a Vector **/
template <class T> void normalize(Vector<T> &p){
T m = norm(p);
if( abs(m) > 0.0) for(int i=0;i<p.size();i++){p[i] /= m;}
}
/** Vector by scalar **/
template <class T, class U> Vector<T> times(Vector<T> p, U s){
for(int i=0;i<p.size();i++){p[i] *= s;}
return p;
}
template <class T, class U> Vector<T> times(U s, Vector<T> p){
for(int i=0;i<p.size();i++){p[i] *= s;}
return p;
}
/** inner product of a and b = conj(a) . b **/
template <class T> T inner(Vector<T> a, Vector<T> b){
T m = 0.;
for(int i=0;i<a.size();i++){m = m + conj(a[i])*b[i];}
return m;
}
/** sum of a and b = a + b **/
template <class T> Vector<T> add(Vector<T> a, Vector<T> b){
Vector<T> m(a.size());
for(int i=0;i<a.size();i++){m[i] = a[i] + b[i];}
return m;
}
/** sum of a and b = a - b **/
template <class T> Vector<T> sub(Vector<T> a, Vector<T> b){
Vector<T> m(a.size());
for(int i=0;i<a.size();i++){m[i] = a[i] - b[i];}
return m;
}
/**
*********************************
* Matrices *
*********************************
**/
template<class T> void Fill(Matrix<T> & mat, T&val) {
int N,M;
Size(mat,N,M);
for(int i=0;i<N;i++){
for(int j=0;j<M;j++){
mat[i][j] = val;
}}
}
/** Transpose of a matrix **/
Matrix<T> Transpose(Matrix<T> & mat){
int N,M;
Size(mat,N,M);
Matrix C; Resize(C,M,N);
for(int i=0;i<M;i++){
for(int j=0;j<N;j++){
C[i][j] = mat[j][i];
}}
return C;
}
/** Set Matrix to unit matrix **/
template<class T> void Unity(Matrix<T> &mat){
int N; SizeSquare(mat,N);
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
if ( i==j ) A[i][j] = 1;
else A[i][j] = 0;
}
}
}
/** Add C * I to matrix **/
template<class T>
void PlusUnit(Matrix<T> & A,T c){
int dim; SizeSquare(A,dim);
for(int i=0;i<dim;i++){A[i][i] = A[i][i] + c;}
}
/** return the Hermitian conjugate of matrix **/
Matrix<T> HermitianConj(Matrix<T> &mat){
int dim; SizeSquare(mat,dim);
Matrix<T> C; Resize(C,dim,dim);
for(int i=0;i<dim;i++){
for(int j=0;j<dim;j++){
C[i][j] = conj(mat[j][i]);
}
}
return C;
}
/** return diagonal entries as a Vector **/
Vector<T> diag(Matrix<T> &A)
{
int dim; SizeSquare(A,dim);
Vector<T> d; Resize(d,dim);
for(int i=0;i<dim;i++){
d[i] = A[i][i];
}
return d;
}
/** Left multiply by a Vector **/
Vector<T> operator *(Vector<T> &B,Matrix<T> &A)
{
int K,M,N;
Size(B,K);
Size(A,M,N);
assert(K==M);
Vector<T> C; Resize(C,N);
for(int j=0;j<N;j++){
T sum = 0.0;
for(int i=0;i<M;i++){
sum += B[i] * A[i][j];
}
C[j] = sum;
}
return C;
}
/** return 1/diagonal entries as a Vector **/
Vector<T> inv_diag(Matrix<T> & A){
int dim; SizeSquare(A,dim);
Vector<T> d; Resize(d,dim);
for(int i=0;i<dim;i++){
d[i] = 1.0/A[i][i];
}
return d;
}
/** Matrix Addition **/
inline Matrix<T> operator + (Matrix<T> &A,Matrix<T> &B)
{
int N,M ; SizeSame(A,B,N,M);
Matrix C; Resize(C,N,M);
for(int i=0;i<N;i++){
for(int j=0;j<M;j++){
C[i][j] = A[i][j] + B[i][j];
}
}
return C;
}
/** Matrix Subtraction **/
inline Matrix<T> operator- (Matrix<T> & A,Matrix<T> &B){
int N,M ; SizeSame(A,B,N,M);
Matrix C; Resize(C,N,M);
for(int i=0;i<N;i++){
for(int j=0;j<M;j++){
C[i][j] = A[i][j] - B[i][j];
}}
return C;
}
/** Matrix scalar multiplication **/
inline Matrix<T> operator* (Matrix<T> & A,T c){
int N,M; Size(A,N,M);
Matrix C; Resize(C,N,M);
for(int i=0;i<N;i++){
for(int j=0;j<M;j++){
C[i][j] = A[i][j]*c;
}}
return C;
}
/** Matrix Matrix multiplication **/
inline Matrix<T> operator* (Matrix<T> &A,Matrix<T> &B){
int K,L,N,M;
Size(A,K,L);
Size(B,N,M); assert(L==N);
Matrix C; Resize(C,K,M);
for(int i=0;i<K;i++){
for(int j=0;j<M;j++){
T sum = 0.0;
for(int k=0;k<N;k++) sum += A[i][k]*B[k][j];
C[i][j] =sum;
}
}
return C;
}
/** Matrix Vector multiplication **/
inline Vector<T> operator* (Matrix<T> &A,Vector<T> &B){
int M,N,K;
Size(A,N,M);
Size(B,K); assert(K==M);
Vector<T> C; Resize(C,N);
for(int i=0;i<N;i++){
T sum = 0.0;
for(int j=0;j<M;j++) sum += A[i][j]*B[j];
C[i] = sum;
}
return C;
}
/** Some version of Matrix norm **/
/*
inline T Norm(){ // this is not a usual L2 norm
T norm = 0;
for(int i=0;i<dim;i++){
for(int j=0;j<dim;j++){
norm += abs(A[i][j]);
}}
return norm;
}
*/
/** Some version of Matrix norm **/
template<class T> T LargestDiag(Matrix<T> &A)
{
int dim ; SizeSquare(A,dim);
T ld = abs(A[0][0]);
for(int i=1;i<dim;i++){
T cf = abs(A[i][i]);
if(abs(cf) > abs(ld) ){ld = cf;}
}
return ld;
}
/** Look for entries on the leading subdiagonal that are smaller than 'small' **/
template <class T,class U> int Chop_subdiag(Matrix<T> &A,T norm, int offset, U small)
{
int dim; SizeSquare(A,dim);
for(int l = dim - 1 - offset; l >= 1; l--) {
if((U)abs(A[l][l - 1]) < (U)small) {
A[l][l-1]=(U)0.0;
return l;
}
}
return 0;
}
/** Look for entries on the leading subdiagonal that are smaller than 'small' **/
template <class T,class U> int Chop_symm_subdiag(Matrix<T> & A,T norm, int offset, U small)
{
int dim; SizeSquare(A,dim);
for(int l = dim - 1 - offset; l >= 1; l--) {
if((U)abs(A[l][l - 1]) < (U)small) {
A[l][l - 1] = (U)0.0;
A[l - 1][l] = (U)0.0;
return l;
}
}
return 0;
}
/**Assign a submatrix to a larger one**/
template<class T>
void AssignSubMtx(Matrix<T> & A,int row_st, int row_end, int col_st, int col_end, Matrix<T> &S)
{
for(int i = row_st; i<row_end; i++){
for(int j = col_st; j<col_end; j++){
A[i][j] = S[i - row_st][j - col_st];
}
}
}
/**Get a square submatrix**/
template <class T>
Matrix<T> GetSubMtx(Matrix<T> &A,int row_st, int row_end, int col_st, int col_end)
{
Matrix<T> H; Resize(row_end - row_st,col_end-col_st);
for(int i = row_st; i<row_end; i++){
for(int j = col_st; j<col_end; j++){
H[i-row_st][j-col_st]=A[i][j];
}}
return H;
}
/**Assign a submatrix to a larger one NB remember Vector Vectors are transposes of the matricies they represent**/
template<class T>
void AssignSubMtx(Matrix<T> & A,int row_st, int row_end, int col_st, int col_end, Matrix<T> &S)
{
for(int i = row_st; i<row_end; i++){
for(int j = col_st; j<col_end; j++){
A[i][j] = S[i - row_st][j - col_st];
}}
}
/** compute b_i A_ij b_j **/ // surprised no Conj
template<class T> T proj(Matrix<T> A, Vector<T> B){
int dim; SizeSquare(A,dim);
int dimB; Size(B,dimB);
assert(dimB==dim);
T C = 0;
for(int i=0;i<dim;i++){
T sum = 0.0;
for(int j=0;j<dim;j++){
sum += A[i][j]*B[j];
}
C += B[i]*sum; // No conj?
}
return C;
}
/*
*************************************************************
*
* Matrix Vector products
*
*************************************************************
*/
// Instead make a linop and call my CG;
/// q -> q Q
template <class T,class Fermion> void times(Vector<Fermion> &q, Matrix<T> &Q)
{
int M; SizeSquare(Q,M);
int N; Size(q,N);
assert(M==N);
times(q,Q,N);
}
/// q -> q Q
template <class T> void times(multi1d<LatticeFermion> &q, Matrix<T> &Q, int N)
{
GridBase *grid = q[0]._grid;
int M; SizeSquare(Q,M);
int K; Size(q,K);
assert(N<M);
assert(N<K);
Vector<Fermion> S(N,grid );
for(int j=0;j<N;j++){
S[j] = zero;
for(int k=0;k<N;k++){
S[j] = S[j] + q[k]* Q[k][j];
}
}
for(int j=0;j<q.size();j++){
q[j] = S[j];
}
}
#endif

View File

@ -2,13 +2,11 @@
Grid physics library, www.github.com/paboyle/Grid Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/Grid.h Source file: ./lib/algorithms/iterative/MatrixUtils.h
Copyright (C) 2015 Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk> Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: azusayamaguchi <ayamaguc@YAMAKAZE.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@ -27,34 +25,51 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
See the full license in the file "LICENSE" in the top level distribution directory See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/ *************************************************************************************/
/* END LEGAL */ /* END LEGAL */
// #ifndef GRID_MATRIX_UTILS_H
// Grid.h #define GRID_MATRIX_UTILS_H
// simd
//
// Created by Peter Boyle on 09/05/2014.
// Copyright (c) 2014 University of Edinburgh. All rights reserved.
//
#ifndef GRID_BASE_H namespace Grid {
#define GRID_BASE_H
#include <Grid/GridStd.h> namespace MatrixUtils {
#include <Grid/perfmon/Timer.h> template<class T> inline void Size(Matrix<T>& A,int &N,int &M){
#include <Grid/perfmon/PerfCount.h> N=A.size(); assert(N>0);
#include <Grid/log/Log.h> M=A[0].size();
#include <Grid/allocator/AlignedAllocator.h> for(int i=0;i<N;i++){
#include <Grid/simd/Simd.h> assert(A[i].size()==M);
#include <Grid/serialisation/Serialisation.h> }
#include <Grid/threads/Threads.h> }
#include <Grid/util/Util.h>
#include <Grid/communicator/Communicator.h>
#include <Grid/cartesian/Cartesian.h>
#include <Grid/tensors/Tensors.h>
#include <Grid/lattice/Lattice.h>
#include <Grid/cshift/Cshift.h>
#include <Grid/stencil/Stencil.h>
#include <Grid/parallelIO/BinaryIO.h>
#include <Grid/algorithms/Algorithms.h>
template<class T> inline void SizeSquare(Matrix<T>& A,int &N)
{
int M;
Size(A,N,M);
assert(N==M);
}
template<class T> inline void Fill(Matrix<T>& A,T & val)
{
int N,M;
Size(A,N,M);
for(int i=0;i<N;i++){
for(int j=0;j<M;j++){
A[i][j]=val;
}}
}
template<class T> inline void Diagonal(Matrix<T>& A,T & val)
{
int N;
SizeSquare(A,N);
for(int i=0;i<N;i++){
A[i][i]=val;
}
}
template<class T> inline void Identity(Matrix<T>& A)
{
Fill(A,0.0);
Diagonal(A,1.0);
}
};
}
#endif #endif

View File

@ -0,0 +1,15 @@
- ConjugateGradientMultiShift
- MCR
- Potentially Useful Boost libraries
- MultiArray
- Aligned allocator; memory pool
- Remez -- Mike or Boost?
- Multiprecision
- quaternians
- Tokenize
- Serialization
- Regex
- Proto (ET)
- uBlas

View File

@ -0,0 +1,122 @@
#include <math.h>
#include <stdlib.h>
#include <vector>
struct Bisection {
static void get_eig2(int row_num,std::vector<RealD> &ALPHA,std::vector<RealD> &BETA, std::vector<RealD> & eig)
{
int i,j;
std::vector<RealD> evec1(row_num+3);
std::vector<RealD> evec2(row_num+3);
RealD eps2;
ALPHA[1]=0.;
BETHA[1]=0.;
for(i=0;i<row_num-1;i++) {
ALPHA[i+1] = A[i*(row_num+1)].real();
BETHA[i+2] = A[i*(row_num+1)+1].real();
}
ALPHA[row_num] = A[(row_num-1)*(row_num+1)].real();
bisec(ALPHA,BETHA,row_num,1,row_num,1e-10,1e-10,evec1,eps2);
bisec(ALPHA,BETHA,row_num,1,row_num,1e-16,1e-16,evec2,eps2);
// Do we really need to sort here?
int begin=1;
int end = row_num;
int swapped=1;
while(swapped) {
swapped=0;
for(i=begin;i<end;i++){
if(mag(evec2[i])>mag(evec2[i+1])) {
swap(evec2+i,evec2+i+1);
swapped=1;
}
}
end--;
for(i=end-1;i>=begin;i--){
if(mag(evec2[i])>mag(evec2[i+1])) {
swap(evec2+i,evec2+i+1);
swapped=1;
}
}
begin++;
}
for(i=0;i<row_num;i++){
for(j=0;j<row_num;j++) {
if(i==j) H[i*row_num+j]=evec2[i+1];
else H[i*row_num+j]=0.;
}
}
}
static void bisec(std::vector<RealD> &c,
std::vector<RealD> &b,
int n,
int m1,
int m2,
RealD eps1,
RealD relfeh,
std::vector<RealD> &x,
RealD &eps2)
{
std::vector<RealD> wu(n+2);
RealD h,q,x1,xu,x0,xmin,xmax;
int i,a,k;
b[1]=0.0;
xmin=c[n]-fabs(b[n]);
xmax=c[n]+fabs(b[n]);
for(i=1;i<n;i++){
h=fabs(b[i])+fabs(b[i+1]);
if(c[i]+h>xmax) xmax= c[i]+h;
if(c[i]-h<xmin) xmin= c[i]-h;
}
xmax *=2.;
eps2=relfeh*((xmin+xmax)>0.0 ? xmax : -xmin);
if(eps1<=0.0) eps1=eps2;
eps2=0.5*eps1+7.0*(eps2);
x0=xmax;
for(i=m1;i<=m2;i++){
x[i]=xmax;
wu[i]=xmin;
}
for(k=m2;k>=m1;k--){
xu=xmin;
i=k;
do{
if(xu<wu[i]){
xu=wu[i];
i=m1-1;
}
i--;
}while(i>=m1);
if(x0>x[k]) x0=x[k];
while((x0-xu)>2*relfeh*(fabs(xu)+fabs(x0))+eps1){
x1=(xu+x0)/2;
a=0;
q=1.0;
for(i=1;i<=n;i++){
q=c[i]-x1-((q!=0.0)? b[i]*b[i]/q:fabs(b[i])/relfeh);
if(q<0) a++;
}
// printf("x1=%e a=%d\n",x1,a);
if(a<k){
if(a<m1){
xu=x1;
wu[m1]=x1;
}else {
xu=x1;
wu[a+1]=x1;
if(x[a]>x1) x[a]=x1;
}
}else x0=x1;
}
x[k]=(x0+xu)/2;
}
}
}

View File

@ -0,0 +1 @@

View File

@ -6,9 +6,8 @@
Copyright (C) 2015 Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk> Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: paboyle <paboyle@ph.ed.ac.uk> Author: paboyle <paboyle@ph.ed.ac.uk>
Author: Guido Cossu <guido.cossu@ed.ac.uk>
This program is free software; you can redistribute it and/or modify This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@ -50,9 +49,10 @@ public:
GridBase(const std::vector<int> & processor_grid) : CartesianCommunicator(processor_grid) {}; GridBase(const std::vector<int> & processor_grid) : CartesianCommunicator(processor_grid) {};
// Physics Grid information. // Physics Grid information.
std::vector<int> _simd_layout;// Which dimensions get relayed out over simd lanes. std::vector<int> _simd_layout;// Which dimensions get relayed out over simd lanes.
std::vector<int> _fdimensions;// (full) Global dimensions of array prior to cb removal std::vector<int> _fdimensions;// Global dimensions of array prior to cb removal
std::vector<int> _gdimensions;// Global dimensions of array after cb removal std::vector<int> _gdimensions;// Global dimensions of array after cb removal
std::vector<int> _ldimensions;// local dimensions of array with processor images removed std::vector<int> _ldimensions;// local dimensions of array with processor images removed
std::vector<int> _rdimensions;// Reduced local dimensions with simd lane images and processor images removed std::vector<int> _rdimensions;// Reduced local dimensions with simd lane images and processor images removed
@ -62,12 +62,13 @@ public:
int _isites; int _isites;
int _fsites; // _isites*_osites = product(dimensions). int _fsites; // _isites*_osites = product(dimensions).
int _gsites; int _gsites;
std::vector<int> _slice_block;// subslice information std::vector<int> _slice_block; // subslice information
std::vector<int> _slice_stride; std::vector<int> _slice_stride;
std::vector<int> _slice_nblock; std::vector<int> _slice_nblock;
std::vector<int> _lstart; // local start of array in gcoors _processor_coor[d]*_ldimensions[d] // Might need these at some point
std::vector<int> _lend ; // local end of array in gcoors _processor_coor[d]*_ldimensions[d]+_ldimensions_[d]-1 // std::vector<int> _lstart; // local start of array in gcoors. _processor_coor[d]*_ldimensions[d]
// std::vector<int> _lend; // local end of array in gcoors _processor_coor[d]*_ldimensions[d]+_ldimensions_[d]-1
public: public:
@ -98,7 +99,7 @@ public:
virtual int oIndex(std::vector<int> &coor) virtual int oIndex(std::vector<int> &coor)
{ {
int idx=0; int idx=0;
// Works with either global or local coordinates // Works with either global or local coordinates
for(int d=0;d<_ndimension;d++) idx+=_ostride[d]*(coor[d]%_rdimensions[d]); for(int d=0;d<_ndimension;d++) idx+=_ostride[d]*(coor[d]%_rdimensions[d]);
return idx; return idx;
} }
@ -120,11 +121,6 @@ public:
Lexicographic::CoorFromIndex(coor,Oindex,_rdimensions); Lexicographic::CoorFromIndex(coor,Oindex,_rdimensions);
} }
inline void InOutCoorToLocalCoor (std::vector<int> &ocoor, std::vector<int> &icoor, std::vector<int> &lcoor) {
lcoor.resize(_ndimension);
for (int d = 0; d < _ndimension; d++)
lcoor[d] = ocoor[d] + _rdimensions[d] * icoor[d];
}
////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////
// SIMD lane addressing // SIMD lane addressing
@ -133,7 +129,6 @@ public:
{ {
Lexicographic::CoorFromIndex(coor,lane,_simd_layout); Lexicographic::CoorFromIndex(coor,lane,_simd_layout);
} }
inline int PermuteDim(int dimension){ inline int PermuteDim(int dimension){
return _simd_layout[dimension]>1; return _simd_layout[dimension]>1;
} }
@ -151,15 +146,15 @@ public:
// Distance should be either 0,1,2.. // Distance should be either 0,1,2..
// //
if ( _simd_layout[dimension] > 2 ) { if ( _simd_layout[dimension] > 2 ) {
for(int d=0;d<_ndimension;d++){ for(int d=0;d<_ndimension;d++){
if ( d != dimension ) assert ( (_simd_layout[d]==1) ); if ( d != dimension ) assert ( (_simd_layout[d]==1) );
} }
permute_type = RotateBit; // How to specify distance; this is not just direction. permute_type = RotateBit; // How to specify distance; this is not just direction.
return permute_type; return permute_type;
} }
for(int d=_ndimension-1;d>dimension;d--){ for(int d=_ndimension-1;d>dimension;d--){
if (_simd_layout[d]>1 ) permute_type++; if (_simd_layout[d]>1 ) permute_type++;
} }
return permute_type; return permute_type;
} }
@ -174,50 +169,26 @@ public:
inline int gSites(void) const { return _isites*_osites*_Nprocessors; }; inline int gSites(void) const { return _isites*_osites*_Nprocessors; };
inline int Nd (void) const { return _ndimension;}; inline int Nd (void) const { return _ndimension;};
inline const std::vector<int> LocalStarts(void) { return _lstart; };
inline const std::vector<int> &FullDimensions(void) { return _fdimensions;}; inline const std::vector<int> &FullDimensions(void) { return _fdimensions;};
inline const std::vector<int> &GlobalDimensions(void) { return _gdimensions;}; inline const std::vector<int> &GlobalDimensions(void) { return _gdimensions;};
inline const std::vector<int> &LocalDimensions(void) { return _ldimensions;}; inline const std::vector<int> &LocalDimensions(void) { return _ldimensions;};
inline const std::vector<int> &VirtualLocalDimensions(void) { return _ldimensions;}; inline const std::vector<int> &VirtualLocalDimensions(void) { return _ldimensions;};
////////////////////////////////////////////////////////////////
// Utility to print the full decomposition details
////////////////////////////////////////////////////////////////
void show_decomposition(){
std::cout << GridLogMessage << "Full Dimensions : " << _fdimensions << std::endl;
std::cout << GridLogMessage << "Global Dimensions : " << _gdimensions << std::endl;
std::cout << GridLogMessage << "Local Dimensions : " << _ldimensions << std::endl;
std::cout << GridLogMessage << "Reduced Dimensions : " << _rdimensions << std::endl;
std::cout << GridLogMessage << "Outer strides : " << _ostride << std::endl;
std::cout << GridLogMessage << "Inner strides : " << _istride << std::endl;
std::cout << GridLogMessage << "iSites : " << _isites << std::endl;
std::cout << GridLogMessage << "oSites : " << _osites << std::endl;
std::cout << GridLogMessage << "lSites : " << lSites() << std::endl;
std::cout << GridLogMessage << "gSites : " << gSites() << std::endl;
std::cout << GridLogMessage << "Nd : " << _ndimension << std::endl;
}
//////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////
// Global addressing // Global addressing
//////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////
void GlobalIndexToGlobalCoor(int gidx,std::vector<int> &gcoor){ void GlobalIndexToGlobalCoor(int gidx,std::vector<int> &gcoor){
assert(gidx< gSites());
Lexicographic::CoorFromIndex(gcoor,gidx,_gdimensions); Lexicographic::CoorFromIndex(gcoor,gidx,_gdimensions);
} }
void LocalIndexToLocalCoor(int lidx,std::vector<int> &lcoor){ void LocalIndexToLocalCoor(int lidx,std::vector<int> &lcoor){
assert(lidx<lSites());
Lexicographic::CoorFromIndex(lcoor,lidx,_ldimensions); Lexicographic::CoorFromIndex(lcoor,lidx,_ldimensions);
} }
void GlobalCoorToGlobalIndex(const std::vector<int> & gcoor,int & gidx){ void GlobalCoorToGlobalIndex(const std::vector<int> & gcoor,int & gidx){
gidx=0; gidx=0;
int mult=1; int mult=1;
for(int mu=0;mu<_ndimension;mu++) { for(int mu=0;mu<_ndimension;mu++) {
gidx+=mult*gcoor[mu]; gidx+=mult*gcoor[mu];
mult*=_gdimensions[mu]; mult*=_gdimensions[mu];
} }
} }
void GlobalCoorToProcessorCoorLocalCoor(std::vector<int> &pcoor,std::vector<int> &lcoor,const std::vector<int> &gcoor) void GlobalCoorToProcessorCoorLocalCoor(std::vector<int> &pcoor,std::vector<int> &lcoor,const std::vector<int> &gcoor)
@ -225,9 +196,9 @@ public:
pcoor.resize(_ndimension); pcoor.resize(_ndimension);
lcoor.resize(_ndimension); lcoor.resize(_ndimension);
for(int mu=0;mu<_ndimension;mu++){ for(int mu=0;mu<_ndimension;mu++){
int _fld = _fdimensions[mu]/_processors[mu]; int _fld = _fdimensions[mu]/_processors[mu];
pcoor[mu] = gcoor[mu]/_fld; pcoor[mu] = gcoor[mu]/_fld;
lcoor[mu] = gcoor[mu]%_fld; lcoor[mu] = gcoor[mu]%_fld;
} }
} }
void GlobalCoorToRankIndex(int &rank, int &o_idx, int &i_idx ,const std::vector<int> &gcoor) void GlobalCoorToRankIndex(int &rank, int &o_idx, int &i_idx ,const std::vector<int> &gcoor)
@ -236,16 +207,16 @@ public:
std::vector<int> lcoor; std::vector<int> lcoor;
GlobalCoorToProcessorCoorLocalCoor(pcoor,lcoor,gcoor); GlobalCoorToProcessorCoorLocalCoor(pcoor,lcoor,gcoor);
rank = RankFromProcessorCoor(pcoor); rank = RankFromProcessorCoor(pcoor);
/*
std::vector<int> cblcoor(lcoor); std::vector<int> cblcoor(lcoor);
for(int d=0;d<cblcoor.size();d++){ for(int d=0;d<cblcoor.size();d++){
if( this->CheckerBoarded(d) ) { if( this->CheckerBoarded(d) ) {
cblcoor[d] = lcoor[d]/2; cblcoor[d] = lcoor[d]/2;
} }
} }
*/
i_idx= iIndex(lcoor); i_idx= iIndex(cblcoor);// this does not imply divide by 2 on checker dim
o_idx= oIndex(lcoor); o_idx= oIndex(lcoor); // this implies divide by 2 on checkerdim
} }
void RankIndexToGlobalCoor(int rank, int o_idx, int i_idx , std::vector<int> &gcoor) void RankIndexToGlobalCoor(int rank, int o_idx, int i_idx , std::vector<int> &gcoor)
@ -267,7 +238,7 @@ public:
{ {
RankIndexToGlobalCoor(rank,o_idx,i_idx ,fcoor); RankIndexToGlobalCoor(rank,o_idx,i_idx ,fcoor);
if(CheckerBoarded(0)){ if(CheckerBoarded(0)){
fcoor[0] = fcoor[0]*2+cb; fcoor[0] = fcoor[0]*2+cb;
} }
} }
void ProcessorCoorLocalCoorToGlobalCoor(std::vector<int> &Pcoor,std::vector<int> &Lcoor,std::vector<int> &gcoor) void ProcessorCoorLocalCoorToGlobalCoor(std::vector<int> &Pcoor,std::vector<int> &Lcoor,std::vector<int> &gcoor)

View File

@ -76,8 +76,6 @@ public:
_ldimensions.resize(_ndimension); _ldimensions.resize(_ndimension);
_rdimensions.resize(_ndimension); _rdimensions.resize(_ndimension);
_simd_layout.resize(_ndimension); _simd_layout.resize(_ndimension);
_lstart.resize(_ndimension);
_lend.resize(_ndimension);
_ostride.resize(_ndimension); _ostride.resize(_ndimension);
_istride.resize(_ndimension); _istride.resize(_ndimension);
@ -96,10 +94,8 @@ public:
// Use a reduced simd grid // Use a reduced simd grid
_ldimensions[d]= _gdimensions[d]/_processors[d]; //local dimensions _ldimensions[d]= _gdimensions[d]/_processors[d]; //local dimensions
_rdimensions[d]= _ldimensions[d]/_simd_layout[d]; //overdecomposition _rdimensions[d]= _ldimensions[d]/_simd_layout[d]; //overdecomposition
_lstart[d] = _processor_coor[d]*_ldimensions[d]; _osites *= _rdimensions[d];
_lend[d] = _processor_coor[d]*_ldimensions[d]+_ldimensions[d]-1; _isites *= _simd_layout[d];
_osites *= _rdimensions[d];
_isites *= _simd_layout[d];
// Addressing support // Addressing support
if ( d==0 ) { if ( d==0 ) {

View File

@ -151,8 +151,6 @@ public:
_ldimensions.resize(_ndimension); _ldimensions.resize(_ndimension);
_rdimensions.resize(_ndimension); _rdimensions.resize(_ndimension);
_simd_layout.resize(_ndimension); _simd_layout.resize(_ndimension);
_lstart.resize(_ndimension);
_lend.resize(_ndimension);
_ostride.resize(_ndimension); _ostride.resize(_ndimension);
_istride.resize(_ndimension); _istride.resize(_ndimension);
@ -171,8 +169,6 @@ public:
_gdimensions[d] = _gdimensions[d]/2; // Remove a checkerboard _gdimensions[d] = _gdimensions[d]/2; // Remove a checkerboard
} }
_ldimensions[d] = _gdimensions[d]/_processors[d]; _ldimensions[d] = _gdimensions[d]/_processors[d];
_lstart[d] = _processor_coor[d]*_ldimensions[d];
_lend[d] = _processor_coor[d]*_ldimensions[d]+_ldimensions[d]-1;
// Use a reduced simd grid // Use a reduced simd grid
_simd_layout[d] = simd_layout[d]; _simd_layout[d] = simd_layout[d];

View File

View File

@ -25,8 +25,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
See the full license in the file "LICENSE" in the top level distribution directory See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/ *************************************************************************************/
/* END LEGAL */ /* END LEGAL */
#include <Grid/GridCore.h> #include <Grid/Grid.h>
namespace Grid { namespace Grid {
/////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////
@ -34,7 +33,6 @@ namespace Grid {
/////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////
void * CartesianCommunicator::ShmCommBuf; void * CartesianCommunicator::ShmCommBuf;
uint64_t CartesianCommunicator::MAX_MPI_SHM_BYTES = 128*1024*1024; uint64_t CartesianCommunicator::MAX_MPI_SHM_BYTES = 128*1024*1024;
CartesianCommunicator::CommunicatorPolicy_t CartesianCommunicator::CommunicatorPolicy= CartesianCommunicator::CommunicatorPolicyConcurrent;
///////////////////////////////// /////////////////////////////////
// Alloc, free shmem region // Alloc, free shmem region
@ -60,7 +58,6 @@ void CartesianCommunicator::ShmBufferFreeAll(void) {
///////////////////////////////// /////////////////////////////////
// Grid information queries // Grid information queries
///////////////////////////////// /////////////////////////////////
int CartesianCommunicator::Dimensions(void) { return _ndimension; };
int CartesianCommunicator::IsBoss(void) { return _processor==0; }; int CartesianCommunicator::IsBoss(void) { return _processor==0; };
int CartesianCommunicator::BossRank(void) { return 0; }; int CartesianCommunicator::BossRank(void) { return 0; };
int CartesianCommunicator::ThisRank(void) { return _processor; }; int CartesianCommunicator::ThisRank(void) { return _processor; };
@ -91,10 +88,7 @@ void CartesianCommunicator::GlobalSumVector(ComplexD *c,int N)
#if !defined( GRID_COMMS_MPI3) && !defined (GRID_COMMS_MPI3L) #if !defined( GRID_COMMS_MPI3) && !defined (GRID_COMMS_MPI3L)
int CartesianCommunicator::NodeCount(void) { return ProcessorCount();}; void CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list,
int CartesianCommunicator::RankCount(void) { return ProcessorCount();};
double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list,
void *xmit, void *xmit,
int xmit_to_rank, int xmit_to_rank,
void *recv, void *recv,
@ -102,7 +96,6 @@ double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsReques
int bytes) int bytes)
{ {
SendToRecvFromBegin(list,xmit,xmit_to_rank,recv,recv_from_rank,bytes); SendToRecvFromBegin(list,xmit,xmit_to_rank,recv,recv_from_rank,bytes);
return 2.0*bytes;
} }
void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector<CommsRequest_t> &waitall) void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector<CommsRequest_t> &waitall)
{ {

View File

@ -116,12 +116,6 @@ class CartesianCommunicator {
// Implemented in Communicator_base.C // Implemented in Communicator_base.C
///////////////////////////////// /////////////////////////////////
static void * ShmCommBuf; static void * ShmCommBuf;
// Isend/Irecv/Wait, or Sendrecv blocking
enum CommunicatorPolicy_t { CommunicatorPolicyConcurrent, CommunicatorPolicySequential };
static CommunicatorPolicy_t CommunicatorPolicy;
static void SetCommunicatorPolicy(CommunicatorPolicy_t policy ) { CommunicatorPolicy = policy; }
size_t heap_top; size_t heap_top;
size_t heap_bytes; size_t heap_bytes;
@ -148,15 +142,12 @@ class CartesianCommunicator {
int RankFromProcessorCoor(std::vector<int> &coor); int RankFromProcessorCoor(std::vector<int> &coor);
void ProcessorCoorFromRank(int rank,std::vector<int> &coor); void ProcessorCoorFromRank(int rank,std::vector<int> &coor);
int Dimensions(void) ;
int IsBoss(void) ; int IsBoss(void) ;
int BossRank(void) ; int BossRank(void) ;
int ThisRank(void) ; int ThisRank(void) ;
const std::vector<int> & ThisProcessorCoor(void) ; const std::vector<int> & ThisProcessorCoor(void) ;
const std::vector<int> & ProcessorGrid(void) ; const std::vector<int> & ProcessorGrid(void) ;
int ProcessorCount(void) ; int ProcessorCount(void) ;
int NodeCount(void) ;
int RankCount(void) ;
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// very VERY rarely (Log, serial RNG) we need world without a grid // very VERY rarely (Log, serial RNG) we need world without a grid
@ -177,8 +168,6 @@ class CartesianCommunicator {
void GlobalSumVector(ComplexF *c,int N); void GlobalSumVector(ComplexF *c,int N);
void GlobalSum(ComplexD &c); void GlobalSum(ComplexD &c);
void GlobalSumVector(ComplexD *c,int N); void GlobalSumVector(ComplexD *c,int N);
void GlobalXOR(uint32_t &);
void GlobalXOR(uint64_t &);
template<class obj> void GlobalSum(obj &o){ template<class obj> void GlobalSum(obj &o){
typedef typename obj::scalar_type scalar_type; typedef typename obj::scalar_type scalar_type;
@ -211,7 +200,7 @@ class CartesianCommunicator {
void SendToRecvFromComplete(std::vector<CommsRequest_t> &waitall); void SendToRecvFromComplete(std::vector<CommsRequest_t> &waitall);
double StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list, void StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list,
void *xmit, void *xmit,
int xmit_to_rank, int xmit_to_rank,
void *recv, void *recv,

View File

@ -25,9 +25,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
See the full license in the file "LICENSE" in the top level distribution directory See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/ *************************************************************************************/
/* END LEGAL */ /* END LEGAL */
#include <Grid/GridCore.h> #include <Grid/Grid.h>
#include <Grid/GridQCDcore.h>
#include <Grid/qcd/action/ActionCore.h>
#include <mpi.h> #include <mpi.h>
namespace Grid { namespace Grid {
@ -41,13 +39,9 @@ MPI_Comm CartesianCommunicator::communicator_world;
// Should error check all MPI calls. // Should error check all MPI calls.
void CartesianCommunicator::Init(int *argc, char ***argv) { void CartesianCommunicator::Init(int *argc, char ***argv) {
int flag; int flag;
int provided;
MPI_Initialized(&flag); // needed to coexist with other libs apparently MPI_Initialized(&flag); // needed to coexist with other libs apparently
if ( !flag ) { if ( !flag ) {
MPI_Init_thread(argc,argv,MPI_THREAD_MULTIPLE,&provided); MPI_Init(argc,argv);
if ( provided != MPI_THREAD_MULTIPLE ) {
QCD::WilsonKernelsStatic::Comms = QCD::WilsonKernelsStatic::CommsThenCompute;
}
} }
MPI_Comm_dup (MPI_COMM_WORLD,&communicator_world); MPI_Comm_dup (MPI_COMM_WORLD,&communicator_world);
ShmInitGeneric(); ShmInitGeneric();
@ -83,14 +77,6 @@ void CartesianCommunicator::GlobalSum(uint64_t &u){
int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_SUM,communicator); int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_SUM,communicator);
assert(ierr==0); assert(ierr==0);
} }
void CartesianCommunicator::GlobalXOR(uint32_t &u){
int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_BXOR,communicator);
assert(ierr==0);
}
void CartesianCommunicator::GlobalXOR(uint64_t &u){
int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_BXOR,communicator);
assert(ierr==0);
}
void CartesianCommunicator::GlobalSum(float &f){ void CartesianCommunicator::GlobalSum(float &f){
int ierr=MPI_Allreduce(MPI_IN_PLACE,&f,1,MPI_FLOAT,MPI_SUM,communicator); int ierr=MPI_Allreduce(MPI_IN_PLACE,&f,1,MPI_FLOAT,MPI_SUM,communicator);
assert(ierr==0); assert(ierr==0);
@ -166,34 +152,24 @@ void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &lis
int from, int from,
int bytes) int bytes)
{ {
int myrank = _processor; MPI_Request xrq;
MPI_Request rrq;
int rank = _processor;
int ierr; int ierr;
if ( CommunicatorPolicy == CommunicatorPolicyConcurrent ) { ierr =MPI_Isend(xmit, bytes, MPI_CHAR,dest,_processor,communicator,&xrq);
MPI_Request xrq; ierr|=MPI_Irecv(recv, bytes, MPI_CHAR,from,from,communicator,&rrq);
MPI_Request rrq;
assert(ierr==0);
ierr =MPI_Irecv(recv, bytes, MPI_CHAR,from,from,communicator,&rrq); list.push_back(xrq);
ierr|=MPI_Isend(xmit, bytes, MPI_CHAR,dest,_processor,communicator,&xrq); list.push_back(rrq);
assert(ierr==0);
list.push_back(xrq);
list.push_back(rrq);
} else {
// Give the CPU to MPI immediately; can use threads to overlap optionally
ierr=MPI_Sendrecv(xmit,bytes,MPI_CHAR,dest,myrank,
recv,bytes,MPI_CHAR,from, from,
communicator,MPI_STATUS_IGNORE);
assert(ierr==0);
}
} }
void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &list) void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &list)
{ {
if ( CommunicatorPolicy == CommunicatorPolicyConcurrent ) { int nreq=list.size();
int nreq=list.size(); std::vector<MPI_Status> status(nreq);
std::vector<MPI_Status> status(nreq); int ierr = MPI_Waitall(nreq,&list[0],&status[0]);
int ierr = MPI_Waitall(nreq,&list[0],&status[0]); assert(ierr==0);
assert(ierr==0);
}
} }
void CartesianCommunicator::Barrier(void) void CartesianCommunicator::Barrier(void)

View File

@ -1,4 +1,4 @@
/************************************************************************************* /*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid Grid physics library, www.github.com/paboyle/Grid
@ -25,23 +25,9 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
See the full license in the file "LICENSE" in the top level distribution directory See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/ *************************************************************************************/
/* END LEGAL */ /* END LEGAL */
#include <Grid/GridCore.h> #include <Grid/Grid.h>
#include <mpi.h> #include <mpi.h>
#include <semaphore.h>
#include <fcntl.h>
#include <unistd.h>
#include <limits.h>
#include <sys/types.h>
#include <sys/ipc.h>
#include <sys/shm.h>
#include <sys/mman.h>
//#include <zlib.h>
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif
namespace Grid { namespace Grid {
/////////////////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////////////////
@ -64,11 +50,6 @@ std::vector<int> CartesianCommunicator::GroupRanks;
std::vector<int> CartesianCommunicator::MyGroup; std::vector<int> CartesianCommunicator::MyGroup;
std::vector<void *> CartesianCommunicator::ShmCommBufs; std::vector<void *> CartesianCommunicator::ShmCommBufs;
int CartesianCommunicator::NodeCount(void) { return GroupSize;};
int CartesianCommunicator::RankCount(void) { return WorldSize;};
#undef FORCE_COMMS
void *CartesianCommunicator::ShmBufferSelf(void) void *CartesianCommunicator::ShmBufferSelf(void)
{ {
return ShmCommBufs[ShmRank]; return ShmCommBufs[ShmRank];
@ -76,9 +57,6 @@ void *CartesianCommunicator::ShmBufferSelf(void)
void *CartesianCommunicator::ShmBuffer(int rank) void *CartesianCommunicator::ShmBuffer(int rank)
{ {
int gpeer = GroupRanks[rank]; int gpeer = GroupRanks[rank];
#ifdef FORCE_COMMS
return NULL;
#endif
if (gpeer == MPI_UNDEFINED){ if (gpeer == MPI_UNDEFINED){
return NULL; return NULL;
} else { } else {
@ -87,13 +65,7 @@ void *CartesianCommunicator::ShmBuffer(int rank)
} }
void *CartesianCommunicator::ShmBufferTranslate(int rank,void * local_p) void *CartesianCommunicator::ShmBufferTranslate(int rank,void * local_p)
{ {
static int count =0;
int gpeer = GroupRanks[rank]; int gpeer = GroupRanks[rank];
assert(gpeer!=ShmRank); // never send to self
assert(rank!=WorldRank);// never send to self
#ifdef FORCE_COMMS
return NULL;
#endif
if (gpeer == MPI_UNDEFINED){ if (gpeer == MPI_UNDEFINED){
return NULL; return NULL;
} else { } else {
@ -104,27 +76,16 @@ void *CartesianCommunicator::ShmBufferTranslate(int rank,void * local_p)
} }
void CartesianCommunicator::Init(int *argc, char ***argv) { void CartesianCommunicator::Init(int *argc, char ***argv) {
int flag; int flag;
int provided;
// mtrace();
MPI_Initialized(&flag); // needed to coexist with other libs apparently MPI_Initialized(&flag); // needed to coexist with other libs apparently
if ( !flag ) { if ( !flag ) {
MPI_Init_thread(argc,argv,MPI_THREAD_MULTIPLE,&provided); MPI_Init(argc,argv);
assert (provided == MPI_THREAD_MULTIPLE);
} }
Grid_quiesce_nodes();
MPI_Comm_dup (MPI_COMM_WORLD,&communicator_world); MPI_Comm_dup (MPI_COMM_WORLD,&communicator_world);
MPI_Comm_rank(communicator_world,&WorldRank); MPI_Comm_rank(communicator_world,&WorldRank);
MPI_Comm_size(communicator_world,&WorldSize); MPI_Comm_size(communicator_world,&WorldSize);
if ( WorldRank == 0 ) {
std::cout << GridLogMessage<< "Initialising MPI "<< WorldRank <<"/"<<WorldSize <<std::endl;
}
///////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////
// Split into groups that can share memory // Split into groups that can share memory
///////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////
@ -170,6 +131,7 @@ void CartesianCommunicator::Init(int *argc, char ***argv) {
/////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////
int ierr=MPI_Allreduce(MPI_IN_PLACE,&leaders_1hot[0],WorldSize,MPI_INT,MPI_SUM,communicator_world); int ierr=MPI_Allreduce(MPI_IN_PLACE,&leaders_1hot[0],WorldSize,MPI_INT,MPI_SUM,communicator_world);
assert(ierr==0); assert(ierr==0);
/////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////
// find the group leaders world rank // find the group leaders world rank
/////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////
@ -179,6 +141,7 @@ void CartesianCommunicator::Init(int *argc, char ***argv) {
leaders_group[group++] = l; leaders_group[group++] = l;
} }
} }
/////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////
// Identify the rank of the group in which I (and my leader) live // Identify the rank of the group in which I (and my leader) live
/////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////
@ -189,113 +152,38 @@ void CartesianCommunicator::Init(int *argc, char ***argv) {
} }
} }
assert(GroupRank!=-1); assert(GroupRank!=-1);
////////////////////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////////////////////
// allocate the shared window for our group // allocate the shared window for our group
////////////////////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////////////////////
MPI_Barrier(ShmComm);
ShmCommBuf = 0; ShmCommBuf = 0;
ShmCommBufs.resize(ShmSize); ierr = MPI_Win_allocate_shared(MAX_MPI_SHM_BYTES,1,MPI_INFO_NULL,ShmComm,&ShmCommBuf,&ShmWindow);
assert(ierr==0);
#if 1 // KNL hack -- force to numa-domain 1 in flat
char shm_name [NAME_MAX]; #if 0
if ( ShmRank == 0 ) { //#include <numaif.h>
for(int r=0;r<ShmSize;r++){ for(uint64_t page=0;page<MAX_MPI_SHM_BYTES;page+=4096){
void *pages = (void *) ( page + ShmCommBuf );
size_t size = CartesianCommunicator::MAX_MPI_SHM_BYTES; int status;
int flags=MPOL_MF_MOVE_ALL;
sprintf(shm_name,"/Grid_mpi3_shm_%d_%d",GroupRank,r); int nodes=1; // numa domain == MCDRAM
unsigned long count=1;
shm_unlink(shm_name); ierr= move_pages(0,count, &pages,&nodes,&status,flags);
int fd=shm_open(shm_name,O_RDWR|O_CREAT,0666); if (ierr && (page==0)) perror("numa relocate command failed");
if ( fd < 0 ) { perror("failed shm_open"); assert(0); }
ftruncate(fd, size);
void * ptr = mmap(NULL,size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
if ( ptr == MAP_FAILED ) { perror("failed mmap"); assert(0); }
assert(((uint64_t)ptr&0x3F)==0);
ShmCommBufs[r] =ptr;
}
} }
MPI_Barrier(ShmComm);
if ( ShmRank != 0 ) {
for(int r=0;r<ShmSize;r++){
size_t size = CartesianCommunicator::MAX_MPI_SHM_BYTES ;
sprintf(shm_name,"/Grid_mpi3_shm_%d_%d",GroupRank,r);
int fd=shm_open(shm_name,O_RDWR,0666);
if ( fd<0 ) { perror("failed shm_open"); assert(0); }
void * ptr = mmap(NULL,size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
if ( ptr == MAP_FAILED ) { perror("failed mmap"); assert(0); }
assert(((uint64_t)ptr&0x3F)==0);
ShmCommBufs[r] =ptr;
}
}
#else
std::vector<int> shmids(ShmSize);
if ( ShmRank == 0 ) {
for(int r=0;r<ShmSize;r++){
size_t size = CartesianCommunicator::MAX_MPI_SHM_BYTES;
key_t key = 0x4545 + r;
if ((shmids[r]= shmget(key,size, SHM_HUGETLB | IPC_CREAT | SHM_R | SHM_W)) < 0) {
int errsv = errno;
printf("Errno %d\n",errsv);
perror("shmget");
exit(1);
}
printf("shmid: 0x%x\n", shmids[r]);
}
}
MPI_Barrier(ShmComm);
MPI_Bcast(&shmids[0],ShmSize*sizeof(int),MPI_BYTE,0,ShmComm);
MPI_Barrier(ShmComm);
for(int r=0;r<ShmSize;r++){
ShmCommBufs[r] = (uint64_t *)shmat(shmids[r], NULL,0);
if (ShmCommBufs[r] == (uint64_t *)-1) {
perror("Shared memory attach failure");
shmctl(shmids[r], IPC_RMID, NULL);
exit(2);
}
printf("shmaddr: %p\n", ShmCommBufs[r]);
}
MPI_Barrier(ShmComm);
// Mark for clean up
for(int r=0;r<ShmSize;r++){
shmctl(shmids[r], IPC_RMID,(struct shmid_ds *)NULL);
}
MPI_Barrier(ShmComm);
#endif #endif
ShmCommBuf = ShmCommBufs[ShmRank]; MPI_Win_lock_all (MPI_MODE_NOCHECK, ShmWindow);
MPI_Barrier(ShmComm); /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
if ( ShmRank == 0 ) { // Plan: allocate a fixed SHM region. Scratch that is just used via some scheme during stencil comms, with no allocate free.
for(int r=0;r<ShmSize;r++){ /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
uint64_t * check = (uint64_t *) ShmCommBufs[r]; ShmCommBufs.resize(ShmSize);
check[0] = GroupRank;
check[1] = r;
check[2] = 0x5A5A5A;
}
}
MPI_Barrier(ShmComm);
for(int r=0;r<ShmSize;r++){ for(int r=0;r<ShmSize;r++){
uint64_t * check = (uint64_t *) ShmCommBufs[r]; MPI_Aint sz;
int dsp_unit;
assert(check[0]==GroupRank); MPI_Win_shared_query (ShmWindow, r, &sz, &dsp_unit, &ShmCommBufs[r]);
assert(check[1]==r);
assert(check[2]==0x5A5A5A);
} }
MPI_Barrier(ShmComm);
////////////////////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////////////////////
// Verbose for now // Verbose for now
@ -304,7 +192,7 @@ void CartesianCommunicator::Init(int *argc, char ***argv) {
std::cout<<GridLogMessage<< "Grid MPI-3 configuration: detected "; std::cout<<GridLogMessage<< "Grid MPI-3 configuration: detected ";
std::cout<< WorldSize << " Ranks " ; std::cout<< WorldSize << " Ranks " ;
std::cout<< GroupSize << " Nodes " ; std::cout<< GroupSize << " Nodes " ;
std::cout<< " with "<< ShmSize << " ranks-per-node "<<std::endl; std::cout<< ShmSize << " with ranks-per-node "<<std::endl;
std::cout<<GridLogMessage <<"Grid MPI-3 configuration: allocated shared memory region of size "; std::cout<<GridLogMessage <<"Grid MPI-3 configuration: allocated shared memory region of size ";
std::cout<<std::hex << MAX_MPI_SHM_BYTES <<" ShmCommBuf address = "<<ShmCommBuf << std::dec<<std::endl; std::cout<<std::hex << MAX_MPI_SHM_BYTES <<" ShmCommBuf address = "<<ShmCommBuf << std::dec<<std::endl;
@ -319,6 +207,7 @@ void CartesianCommunicator::Init(int *argc, char ***argv) {
if(g!=ShmSize-1) std::cout<<","; if(g!=ShmSize-1) std::cout<<",";
else std::cout<<"}"<<std::endl; else std::cout<<"}"<<std::endl;
} }
} }
for(int g=0;g<GroupSize;g++){ for(int g=0;g<GroupSize;g++){
@ -327,21 +216,23 @@ void CartesianCommunicator::Init(int *argc, char ***argv) {
if ( (ShmRank == 0) && (GroupRank==g) ) { if ( (ShmRank == 0) && (GroupRank==g) ) {
std::cout<<MyGroup[r]; std::cout<<MyGroup[r];
if(r<ShmSize-1) std::cout<<","; if(r<ShmSize-1) std::cout<<",";
else std::cout<<"}"<<std::endl<<std::flush; else std::cout<<"}"<<std::endl;
} }
MPI_Barrier(communicator_world); MPI_Barrier(communicator_world);
} }
} }
assert(ShmSetup==0); ShmSetup=1; assert(ShmSetup==0); ShmSetup=1;
} }
//////////////////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Want to implement some magic ... Group sub-cubes into those on same node // Want to implement some magic ... Group sub-cubes into those on same node
//////////////////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////////////
void CartesianCommunicator::ShiftedRanks(int dim,int shift,int &dest,int &source) void CartesianCommunicator::ShiftedRanks(int dim,int shift,int &source,int &dest)
{ {
std::vector<int> coor = _processor_coor; // my coord std::vector<int> coor = _processor_coor;
assert(std::abs(shift) <_processors[dim]); assert(std::abs(shift) <_processors[dim]);
coor[dim] = (_processor_coor[dim] + shift + _processors[dim])%_processors[dim]; coor[dim] = (_processor_coor[dim] + shift + _processors[dim])%_processors[dim];
@ -351,32 +242,28 @@ void CartesianCommunicator::ShiftedRanks(int dim,int shift,int &dest,int &source
coor[dim] = (_processor_coor[dim] - shift + _processors[dim])%_processors[dim]; coor[dim] = (_processor_coor[dim] - shift + _processors[dim])%_processors[dim];
Lexicographic::IndexFromCoor(coor,dest,_processors); Lexicographic::IndexFromCoor(coor,dest,_processors);
dest = LexicographicToWorldRank[dest]; dest = LexicographicToWorldRank[dest];
}
}// rank is world rank.
int CartesianCommunicator::RankFromProcessorCoor(std::vector<int> &coor) int CartesianCommunicator::RankFromProcessorCoor(std::vector<int> &coor)
{ {
int rank; int rank;
Lexicographic::IndexFromCoor(coor,rank,_processors); Lexicographic::IndexFromCoor(coor,rank,_processors);
rank = LexicographicToWorldRank[rank]; rank = LexicographicToWorldRank[rank];
return rank; return rank;
}// rank is world rank }
void CartesianCommunicator::ProcessorCoorFromRank(int rank, std::vector<int> &coor) void CartesianCommunicator::ProcessorCoorFromRank(int rank, std::vector<int> &coor)
{ {
int lr=-1; Lexicographic::CoorFromIndex(coor,rank,_processors);
for(int r=0;r<WorldSize;r++){// map world Rank to lexico and then to coor rank = LexicographicToWorldRank[rank];
if( LexicographicToWorldRank[r]==rank) lr = r;
}
assert(lr!=-1);
Lexicographic::CoorFromIndex(coor,lr,_processors);
} }
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors) CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
{ {
int ierr; int ierr;
communicator=communicator_world;
_ndimension = processors.size();
communicator=communicator_world;
_ndimension = processors.size();
//////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////
// Assert power of two shm_size. // Assert power of two shm_size.
//////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////
@ -388,22 +275,24 @@ CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
} }
} }
assert(log2size != -1); assert(log2size != -1);
//////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////
// Identify subblock of ranks on node spreading across dims // Identify subblock of ranks on node spreading across dims
// in a maximally symmetrical way // in a maximally symmetrical way
//////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////
int dim = 0;
std::vector<int> WorldDims = processors; std::vector<int> WorldDims = processors;
ShmDims.resize (_ndimension,1); ShmDims.resize(_ndimension,1);
GroupDims.resize(_ndimension); GroupDims.resize(_ndimension);
ShmCoor.resize (_ndimension);
ShmCoor.resize(_ndimension);
GroupCoor.resize(_ndimension); GroupCoor.resize(_ndimension);
WorldCoor.resize(_ndimension); WorldCoor.resize(_ndimension);
int dim = 0;
for(int l2=0;l2<log2size;l2++){ for(int l2=0;l2<log2size;l2++){
while ( (WorldDims[dim] / ShmDims[dim]) <= 1 ) dim=(dim+1)%_ndimension; while ( WorldDims[dim] / ShmDims[dim] <= 1 ) dim=(dim+1)%_ndimension;
ShmDims[dim]*=2; ShmDims[dim]*=2;
dim=(dim+1)%_ndimension; dim=(dim+1)%_ndimension;
} }
@ -415,29 +304,6 @@ CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
GroupDims[d] = WorldDims[d]/ShmDims[d]; GroupDims[d] = WorldDims[d]/ShmDims[d];
} }
////////////////////////////////////////////////////////////////
// Verbose
////////////////////////////////////////////////////////////////
#if 0
std::cout<< GridLogMessage << "MPI-3 usage "<<std::endl;
std::cout<< GridLogMessage << "SHM ";
for(int d=0;d<_ndimension;d++){
std::cout<< ShmDims[d] <<" ";
}
std::cout<< std::endl;
std::cout<< GridLogMessage << "Group ";
for(int d=0;d<_ndimension;d++){
std::cout<< GroupDims[d] <<" ";
}
std::cout<< std::endl;
std::cout<< GridLogMessage<<"World ";
for(int d=0;d<_ndimension;d++){
std::cout<< WorldDims[d] <<" ";
}
std::cout<< std::endl;
#endif
//////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////
// Check processor counts match // Check processor counts match
//////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////
@ -451,57 +317,29 @@ CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
//////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////
// Establish mapping between lexico physics coord and WorldRank // Establish mapping between lexico physics coord and WorldRank
//
//////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////
LexicographicToWorldRank.resize(WorldSize,0);
Lexicographic::CoorFromIndex(GroupCoor,GroupRank,GroupDims); Lexicographic::CoorFromIndex(GroupCoor,GroupRank,GroupDims);
Lexicographic::CoorFromIndex(ShmCoor,ShmRank,ShmDims); Lexicographic::CoorFromIndex(ShmCoor,ShmRank,ShmDims);
for(int d=0;d<_ndimension;d++){ for(int d=0;d<_ndimension;d++){
WorldCoor[d] = GroupCoor[d]*ShmDims[d]+ShmCoor[d]; WorldCoor[d] = GroupCoor[d]*ShmDims[d]+ShmCoor[d];
} }
_processor_coor = WorldCoor; _processor_coor = WorldCoor;
_processor = WorldRank;
int lexico;
Lexicographic::IndexFromCoor(WorldCoor,lexico,WorldDims);
LexicographicToWorldRank[lexico]=WorldRank;
_processor = lexico;
/////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////
// global sum Lexico to World mapping // global sum Lexico to World mapping
/////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////
int lexico;
LexicographicToWorldRank.resize(WorldSize,0);
Lexicographic::IndexFromCoor(WorldCoor,lexico,WorldDims);
LexicographicToWorldRank[lexico] = WorldRank;
ierr=MPI_Allreduce(MPI_IN_PLACE,&LexicographicToWorldRank[0],WorldSize,MPI_INT,MPI_SUM,communicator); ierr=MPI_Allreduce(MPI_IN_PLACE,&LexicographicToWorldRank[0],WorldSize,MPI_INT,MPI_SUM,communicator);
assert(ierr==0); assert(ierr==0);
for(int i=0;i<WorldSize;i++){
int wr = LexicographicToWorldRank[i];
// int wr = i;
std::vector<int> coor(_ndimension);
ProcessorCoorFromRank(wr,coor); // from world rank
int ck = RankFromProcessorCoor(coor);
assert(ck==wr);
if ( wr == WorldRank ) {
for(int j=0;j<coor.size();j++) {
assert(coor[j] == _processor_coor[j]);
}
}
/*
std::cout << GridLogMessage<< " Lexicographic "<<i;
std::cout << " MPI rank "<<wr;
std::cout << " Coor ";
for(int j=0;j<coor.size();j++) std::cout << coor[j];
std::cout<< std::endl;
*/
/////////////////////////////////////////////////////
// Check everyone agrees on everyone elses coords
/////////////////////////////////////////////////////
std::vector<int> mcoor = coor;
this->Broadcast(0,(void *)&mcoor[0],mcoor.size()*sizeof(int));
for(int d = 0 ; d< _ndimension; d++) {
assert(coor[d] == mcoor[d]);
}
}
}; };
void CartesianCommunicator::GlobalSum(uint32_t &u){ void CartesianCommunicator::GlobalSum(uint32_t &u){
int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_SUM,communicator); int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_SUM,communicator);
assert(ierr==0); assert(ierr==0);
@ -510,14 +348,6 @@ void CartesianCommunicator::GlobalSum(uint64_t &u){
int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_SUM,communicator); int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_SUM,communicator);
assert(ierr==0); assert(ierr==0);
} }
void CartesianCommunicator::GlobalXOR(uint32_t &u){
int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_BXOR,communicator);
assert(ierr==0);
}
void CartesianCommunicator::GlobalXOR(uint64_t &u){
int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_BXOR,communicator);
assert(ierr==0);
}
void CartesianCommunicator::GlobalSum(float &f){ void CartesianCommunicator::GlobalSum(float &f){
int ierr=MPI_Allreduce(MPI_IN_PLACE,&f,1,MPI_FLOAT,MPI_SUM,communicator); int ierr=MPI_Allreduce(MPI_IN_PLACE,&f,1,MPI_FLOAT,MPI_SUM,communicator);
assert(ierr==0); assert(ierr==0);
@ -537,6 +367,8 @@ void CartesianCommunicator::GlobalSumVector(double *d,int N)
int ierr = MPI_Allreduce(MPI_IN_PLACE,d,N,MPI_DOUBLE,MPI_SUM,communicator); int ierr = MPI_Allreduce(MPI_IN_PLACE,d,N,MPI_DOUBLE,MPI_SUM,communicator);
assert(ierr==0); assert(ierr==0);
} }
// Basic Halo comms primitive // Basic Halo comms primitive
void CartesianCommunicator::SendToRecvFrom(void *xmit, void CartesianCommunicator::SendToRecvFrom(void *xmit,
int dest, int dest,
@ -545,14 +377,10 @@ void CartesianCommunicator::SendToRecvFrom(void *xmit,
int bytes) int bytes)
{ {
std::vector<CommsRequest_t> reqs(0); std::vector<CommsRequest_t> reqs(0);
// unsigned long xcrc = crc32(0L, Z_NULL, 0);
// unsigned long rcrc = crc32(0L, Z_NULL, 0);
// xcrc = crc32(xcrc,(unsigned char *)xmit,bytes);
SendToRecvFromBegin(reqs,xmit,dest,recv,from,bytes); SendToRecvFromBegin(reqs,xmit,dest,recv,from,bytes);
SendToRecvFromComplete(reqs); SendToRecvFromComplete(reqs);
// rcrc = crc32(rcrc,(unsigned char *)recv,bytes);
// printf("proc %d SendToRecvFrom %d bytes %lx %lx\n",_processor,bytes,xcrc,rcrc);
} }
void CartesianCommunicator::SendRecvPacket(void *xmit, void CartesianCommunicator::SendRecvPacket(void *xmit,
void *recv, void *recv,
int sender, int sender,
@ -569,6 +397,7 @@ void CartesianCommunicator::SendRecvPacket(void *xmit,
MPI_Recv(recv, bytes, MPI_CHAR,sender,tag,communicator,&stat); MPI_Recv(recv, bytes, MPI_CHAR,sender,tag,communicator,&stat);
} }
} }
// Basic Halo comms primitive // Basic Halo comms primitive
void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &list, void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &list,
void *xmit, void *xmit,
@ -577,29 +406,95 @@ void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &lis
int from, int from,
int bytes) int bytes)
{ {
int myrank = _processor; #if 0
this->StencilBarrier();
MPI_Request xrq;
MPI_Request rrq;
static int sequence;
int ierr; int ierr;
int tag;
int check;
if ( CommunicatorPolicy == CommunicatorPolicyConcurrent ) { assert(dest != _processor);
MPI_Request xrq; assert(from != _processor);
MPI_Request rrq;
int gdest = GroupRanks[dest];
int gfrom = GroupRanks[from];
int gme = GroupRanks[_processor];
ierr =MPI_Irecv(recv, bytes, MPI_CHAR,from,from,communicator,&rrq); sequence++;
ierr|=MPI_Isend(xmit, bytes, MPI_CHAR,dest,_processor,communicator,&xrq);
char *from_ptr = (char *)ShmCommBufs[ShmRank];
int small = (bytes<MAX_MPI_SHM_BYTES);
typedef uint64_t T;
int words = bytes/sizeof(T);
assert(((size_t)bytes &(sizeof(T)-1))==0);
assert(gme == ShmRank);
if ( small && (gdest !=MPI_UNDEFINED) ) {
char *to_ptr = (char *)ShmCommBufs[gdest];
assert(gme != gdest);
T *ip = (T *)xmit;
T *op = (T *)to_ptr;
PARALLEL_FOR_LOOP
for(int w=0;w<words;w++) {
op[w]=ip[w];
}
bcopy(&_processor,&to_ptr[bytes],sizeof(_processor));
bcopy(& sequence,&to_ptr[bytes+4],sizeof(sequence));
} else {
ierr =MPI_Isend(xmit, bytes, MPI_CHAR,dest,_processor,communicator,&xrq);
assert(ierr==0); assert(ierr==0);
list.push_back(xrq); list.push_back(xrq);
list.push_back(rrq);
} else {
// Give the CPU to MPI immediately; can use threads to overlap optionally
ierr=MPI_Sendrecv(xmit,bytes,MPI_CHAR,dest,myrank,
recv,bytes,MPI_CHAR,from, from,
communicator,MPI_STATUS_IGNORE);
assert(ierr==0);
} }
this->StencilBarrier();
if (small && (gfrom !=MPI_UNDEFINED) ) {
T *ip = (T *)from_ptr;
T *op = (T *)recv;
PARALLEL_FOR_LOOP
for(int w=0;w<words;w++) {
op[w]=ip[w];
}
bcopy(&from_ptr[bytes] ,&tag ,sizeof(tag));
bcopy(&from_ptr[bytes+4],&check,sizeof(check));
assert(check==sequence);
assert(tag==from);
} else {
ierr=MPI_Irecv(recv, bytes, MPI_CHAR,from,from,communicator,&rrq);
assert(ierr==0);
list.push_back(rrq);
}
this->StencilBarrier();
#else
MPI_Request xrq;
MPI_Request rrq;
int rank = _processor;
int ierr;
ierr =MPI_Isend(xmit, bytes, MPI_CHAR,dest,_processor,communicator,&xrq);
ierr|=MPI_Irecv(recv, bytes, MPI_CHAR,from,from,communicator,&rrq);
assert(ierr==0);
list.push_back(xrq);
list.push_back(rrq);
#endif
} }
double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list, void CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list,
void *xmit, void *xmit,
int dest, int dest,
void *recv, void *recv,
@ -610,63 +505,57 @@ double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsReques
MPI_Request rrq; MPI_Request rrq;
int ierr; int ierr;
assert(dest != _processor);
assert(from != _processor);
int gdest = GroupRanks[dest]; int gdest = GroupRanks[dest];
int gfrom = GroupRanks[from]; int gfrom = GroupRanks[from];
int gme = GroupRanks[_processor]; int gme = GroupRanks[_processor];
assert(dest != _processor); assert(gme == ShmRank);
assert(from != _processor);
assert(gme == ShmRank);
double off_node_bytes=0.0;
#ifdef FORCE_COMMS
gdest = MPI_UNDEFINED;
gfrom = MPI_UNDEFINED;
#endif
if ( gfrom ==MPI_UNDEFINED) {
ierr=MPI_Irecv(recv, bytes, MPI_CHAR,from,from,communicator,&rrq);
assert(ierr==0);
list.push_back(rrq);
off_node_bytes+=bytes;
}
if ( gdest == MPI_UNDEFINED ) { if ( gdest == MPI_UNDEFINED ) {
ierr =MPI_Isend(xmit, bytes, MPI_CHAR,dest,_processor,communicator,&xrq); ierr =MPI_Isend(xmit, bytes, MPI_CHAR,dest,_processor,communicator,&xrq);
assert(ierr==0); assert(ierr==0);
list.push_back(xrq); list.push_back(xrq);
off_node_bytes+=bytes; }
if ( gfrom ==MPI_UNDEFINED) {
ierr=MPI_Irecv(recv, bytes, MPI_CHAR,from,from,communicator,&rrq);
assert(ierr==0);
list.push_back(rrq);
} }
if ( CommunicatorPolicy == CommunicatorPolicySequential ) {
this->StencilSendToRecvFromComplete(list);
}
return off_node_bytes;
} }
void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector<CommsRequest_t> &waitall)
void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector<CommsRequest_t> &list)
{ {
SendToRecvFromComplete(waitall); SendToRecvFromComplete(list);
} }
void CartesianCommunicator::StencilBarrier(void) void CartesianCommunicator::StencilBarrier(void)
{ {
MPI_Win_sync (ShmWindow);
MPI_Barrier (ShmComm); MPI_Barrier (ShmComm);
MPI_Win_sync (ShmWindow);
} }
void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &list) void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &list)
{ {
int nreq=list.size(); int nreq=list.size();
if (nreq==0) return;
std::vector<MPI_Status> status(nreq); std::vector<MPI_Status> status(nreq);
int ierr = MPI_Waitall(nreq,&list[0],&status[0]); int ierr = MPI_Waitall(nreq,&list[0],&status[0]);
assert(ierr==0); assert(ierr==0);
list.resize(0);
} }
void CartesianCommunicator::Barrier(void) void CartesianCommunicator::Barrier(void)
{ {
int ierr = MPI_Barrier(communicator); int ierr = MPI_Barrier(communicator);
assert(ierr==0); assert(ierr==0);
} }
void CartesianCommunicator::Broadcast(int root,void* data, int bytes) void CartesianCommunicator::Broadcast(int root,void* data, int bytes)
{ {
int ierr=MPI_Bcast(data, int ierr=MPI_Bcast(data,
@ -676,11 +565,7 @@ void CartesianCommunicator::Broadcast(int root,void* data, int bytes)
communicator); communicator);
assert(ierr==0); assert(ierr==0);
} }
int CartesianCommunicator::RankWorld(void){
int r;
MPI_Comm_rank(communicator_world,&r);
return r;
}
void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes) void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes)
{ {
int ierr= MPI_Bcast(data, int ierr= MPI_Bcast(data,

View File

@ -27,7 +27,6 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
/* END LEGAL */ /* END LEGAL */
#include "Grid.h" #include "Grid.h"
#include <mpi.h> #include <mpi.h>
//#include <numaif.h>
//////////////////////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/// Workarounds: /// Workarounds:
@ -43,27 +42,19 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
#include <fcntl.h> #include <fcntl.h>
#include <unistd.h> #include <unistd.h>
#include <limits.h> #include <limits.h>
typedef sem_t *Grid_semaphore; typedef sem_t *Grid_semaphore;
#error /*THis is deprecated*/
#if 0
#define SEM_INIT(S) S = sem_open(sem_name,0,0600,0); assert ( S != SEM_FAILED ); #define SEM_INIT(S) S = sem_open(sem_name,0,0600,0); assert ( S != SEM_FAILED );
#define SEM_INIT_EXCL(S) sem_unlink(sem_name); S = sem_open(sem_name,O_CREAT|O_EXCL,0600,0); assert ( S != SEM_FAILED ); #define SEM_INIT_EXCL(S) sem_unlink(sem_name); S = sem_open(sem_name,O_CREAT|O_EXCL,0600,0); assert ( S != SEM_FAILED );
#define SEM_POST(S) assert ( sem_post(S) == 0 ); #define SEM_POST(S) assert ( sem_post(S) == 0 );
#define SEM_WAIT(S) assert ( sem_wait(S) == 0 ); #define SEM_WAIT(S) assert ( sem_wait(S) == 0 );
#else
#define SEM_INIT(S) ;
#define SEM_INIT_EXCL(S) ;
#define SEM_POST(S) ;
#define SEM_WAIT(S) ;
#endif
#include <sys/mman.h> #include <sys/mman.h>
namespace Grid { namespace Grid {
enum { COMMAND_ISEND, COMMAND_IRECV, COMMAND_WAITALL, COMMAND_SENDRECV }; enum { COMMAND_ISEND, COMMAND_IRECV, COMMAND_WAITALL };
struct Descriptor { struct Descriptor {
uint64_t buf; uint64_t buf;
@ -71,12 +62,6 @@ struct Descriptor {
int rank; int rank;
int tag; int tag;
int command; int command;
uint64_t xbuf;
uint64_t rbuf;
int xtag;
int rtag;
int src;
int dest;
MPI_Request request; MPI_Request request;
}; };
@ -109,14 +94,18 @@ public:
void SemInit(void) { void SemInit(void) {
sprintf(sem_name,"/Grid_mpi3_sem_head_%d",universe_rank); sprintf(sem_name,"/Grid_mpi3_sem_head_%d",universe_rank);
// printf("SEM_NAME: %s \n",sem_name);
SEM_INIT(sem_head); SEM_INIT(sem_head);
sprintf(sem_name,"/Grid_mpi3_sem_tail_%d",universe_rank); sprintf(sem_name,"/Grid_mpi3_sem_tail_%d",universe_rank);
// printf("SEM_NAME: %s \n",sem_name);
SEM_INIT(sem_tail); SEM_INIT(sem_tail);
} }
void SemInitExcl(void) { void SemInitExcl(void) {
sprintf(sem_name,"/Grid_mpi3_sem_head_%d",universe_rank); sprintf(sem_name,"/Grid_mpi3_sem_head_%d",universe_rank);
// printf("SEM_INIT_EXCL: %s \n",sem_name);
SEM_INIT_EXCL(sem_head); SEM_INIT_EXCL(sem_head);
sprintf(sem_name,"/Grid_mpi3_sem_tail_%d",universe_rank); sprintf(sem_name,"/Grid_mpi3_sem_tail_%d",universe_rank);
// printf("SEM_INIT_EXCL: %s \n",sem_name);
SEM_INIT_EXCL(sem_tail); SEM_INIT_EXCL(sem_tail);
} }
void WakeUpDMA(void) { void WakeUpDMA(void) {
@ -136,13 +125,6 @@ public:
while(1){ while(1){
WaitForCommand(); WaitForCommand();
// std::cout << "Getting command "<<std::endl; // std::cout << "Getting command "<<std::endl;
#if 0
_mm_monitor((void *)&state->head,0,0);
int s=state->start;
if ( s != state->head ) {
_mm_mwait(0,0);
}
#endif
Event(); Event();
} }
} }
@ -150,7 +132,6 @@ public:
int Event (void) ; int Event (void) ;
uint64_t QueueCommand(int command,void *buf, int bytes, int hashtag, MPI_Comm comm,int u_rank) ; uint64_t QueueCommand(int command,void *buf, int bytes, int hashtag, MPI_Comm comm,int u_rank) ;
void QueueSendRecv(void *xbuf, void *rbuf, int bytes, int xtag, int rtag, MPI_Comm comm,int dest,int src) ;
void WaitAll() { void WaitAll() {
// std::cout << "Queueing WAIT command "<<std::endl; // std::cout << "Queueing WAIT command "<<std::endl;
@ -160,7 +141,7 @@ public:
// std::cout << "Waiting from semaphore "<<std::endl; // std::cout << "Waiting from semaphore "<<std::endl;
WaitForComplete(); WaitForComplete();
// std::cout << "Checking FIFO is empty "<<std::endl; // std::cout << "Checking FIFO is empty "<<std::endl;
while ( state->tail != state->head ); assert ( state->tail == state->head );
} }
}; };
@ -215,12 +196,6 @@ public:
// std::cout << "Waking up DMA "<< slave<<std::endl; // std::cout << "Waking up DMA "<< slave<<std::endl;
}; };
static void QueueSendRecv(int slave,void *xbuf, void *rbuf, int bytes, int xtag, int rtag, MPI_Comm comm,int dest,int src)
{
Slaves[slave].QueueSendRecv(xbuf,rbuf,bytes,xtag,rtag,comm,dest,src);
Slaves[slave].WakeUpDMA();
}
static void QueueRecv(int slave, void *buf, int bytes, int tag, MPI_Comm comm,int rank) { static void QueueRecv(int slave, void *buf, int bytes, int tag, MPI_Comm comm,int rank) {
// std::cout<< " Queueing recv "<< bytes<< " slave "<< slave << " from comm "<<rank <<std::endl; // std::cout<< " Queueing recv "<< bytes<< " slave "<< slave << " from comm "<<rank <<std::endl;
Slaves[slave].QueueCommand(COMMAND_IRECV,buf,bytes,tag,comm,rank); Slaves[slave].QueueCommand(COMMAND_IRECV,buf,bytes,tag,comm,rank);
@ -251,28 +226,6 @@ public:
return; return;
}; };
static void QueueRoundRobinSendRecv(void *xbuf, void *rbuf, int bytes, int xtag, int rtag, MPI_Comm comm,int dest,int src) {
uint8_t * cxbuf = (uint8_t *) xbuf;
uint8_t * crbuf = (uint8_t *) rbuf;
static int rrp=0;
int procs = VerticalSize-1;
int myoff=0;
int mywork=bytes;
QueueSendRecv(rrp+1,&cxbuf[myoff],&crbuf[myoff],mywork,xtag,rtag,comm,dest,src);
rrp = rrp+1;
if ( rrp == (VerticalSize-1) ) rrp = 0;
}
static void QueueMultiplexedSendRecv(void *xbuf, void *rbuf, int bytes, int xtag, int rtag, MPI_Comm comm,int dest,int src) {
uint8_t * cxbuf = (uint8_t *) xbuf;
uint8_t * crbuf = (uint8_t *) rbuf;
int mywork, myoff, procs;
procs = VerticalSize-1;
for(int s=0;s<procs;s++) {
GetWork(bytes,s,mywork,myoff,procs);
QueueSendRecv(s+1,&cxbuf[myoff],&crbuf[myoff],mywork,xtag,rtag,comm,dest,src);
}
};
static void QueueMultiplexedSend(void *buf, int bytes, int tag, MPI_Comm comm,int rank) { static void QueueMultiplexedSend(void *buf, int bytes, int tag, MPI_Comm comm,int rank) {
uint8_t * cbuf = (uint8_t *) buf; uint8_t * cbuf = (uint8_t *) buf;
int mywork, myoff, procs; int mywork, myoff, procs;
@ -322,7 +275,6 @@ std::vector<void *> MPIoffloadEngine::VerticalShmBufs;
std::vector<std::vector<int> > MPIoffloadEngine::UniverseRanks; std::vector<std::vector<int> > MPIoffloadEngine::UniverseRanks;
std::vector<int> MPIoffloadEngine::UserCommunicatorToWorldRanks; std::vector<int> MPIoffloadEngine::UserCommunicatorToWorldRanks;
int CartesianCommunicator::NodeCount(void) { return HorizontalSize;};
int MPIoffloadEngine::ShmSetup = 0; int MPIoffloadEngine::ShmSetup = 0;
void MPIoffloadEngine::CommunicatorInit (MPI_Comm &communicator_world, void MPIoffloadEngine::CommunicatorInit (MPI_Comm &communicator_world,
@ -418,22 +370,12 @@ void MPIoffloadEngine::CommunicatorInit (MPI_Comm &communicator_world,
ftruncate(fd, size); ftruncate(fd, size);
VerticalShmBufs[r] = mmap(NULL,size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); VerticalShmBufs[r] = mmap(NULL,size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
if ( VerticalShmBufs[r] == MAP_FAILED ) { if ( VerticalShmBufs[r] == MAP_FAILED ) {
perror("failed mmap"); perror("failed mmap");
assert(0); assert(0);
} }
/*
for(uint64_t page=0;page<size;page+=4096){
void *pages = (void *) ( page + (uint64_t)VerticalShmBufs[r] );
int status;
int flags=MPOL_MF_MOVE_ALL;
int nodes=1; // numa domain == MCDRAM
unsigned long count=1;
ierr= move_pages(0,count, &pages,&nodes,&status,flags);
if (ierr && (page==0)) perror("numa relocate command failed");
}
*/
uint64_t * check = (uint64_t *) VerticalShmBufs[r]; uint64_t * check = (uint64_t *) VerticalShmBufs[r];
check[0] = WorldRank; check[0] = WorldRank;
check[1] = r; check[1] = r;
@ -462,7 +404,7 @@ void MPIoffloadEngine::CommunicatorInit (MPI_Comm &communicator_world,
uint64_t * check = (uint64_t *) VerticalShmBufs[r]; uint64_t * check = (uint64_t *) VerticalShmBufs[r];
assert(check[0]== WorldRank); assert(check[0]== WorldRank);
assert(check[1]== r); assert(check[1]== r);
// std::cerr<<"SHM "<<r<<" " <<VerticalShmBufs[r]<<std::endl; std::cerr<<"SHM "<<r<<" " <<VerticalShmBufs[r]<<std::endl;
} }
} }
#endif #endif
@ -600,8 +542,6 @@ int Slave::Event (void) {
static int head_last; static int head_last;
static int start_last; static int start_last;
int ierr; int ierr;
MPI_Status stat;
static int i=0;
//////////////////////////////////////////////////// ////////////////////////////////////////////////////
// Try to advance the start pointers // Try to advance the start pointers
@ -610,6 +550,11 @@ int Slave::Event (void) {
if ( s != state->head ) { if ( s != state->head ) {
switch ( state->Descrs[s].command ) { switch ( state->Descrs[s].command ) {
case COMMAND_ISEND: case COMMAND_ISEND:
/*
std::cout<< " Send "<<s << " ptr "<< state<<" "<< state->Descrs[s].buf<< "["<<state->Descrs[s].bytes<<"]"
<< " to " << state->Descrs[s].rank<< " tag" << state->Descrs[s].tag
<< " Comm " << MPIoffloadEngine::communicator_universe<< " me " <<universe_rank<< std::endl;
*/
ierr = MPI_Isend((void *)(state->Descrs[s].buf+base), ierr = MPI_Isend((void *)(state->Descrs[s].buf+base),
state->Descrs[s].bytes, state->Descrs[s].bytes,
MPI_CHAR, MPI_CHAR,
@ -623,6 +568,11 @@ int Slave::Event (void) {
break; break;
case COMMAND_IRECV: case COMMAND_IRECV:
/*
std::cout<< " Recv "<<s << " ptr "<< state<<" "<< state->Descrs[s].buf<< "["<<state->Descrs[s].bytes<<"]"
<< " from " << state->Descrs[s].rank<< " tag" << state->Descrs[s].tag
<< " Comm " << MPIoffloadEngine::communicator_universe<< " me "<< universe_rank<< std::endl;
*/
ierr=MPI_Irecv((void *)(state->Descrs[s].buf+base), ierr=MPI_Irecv((void *)(state->Descrs[s].buf+base),
state->Descrs[s].bytes, state->Descrs[s].bytes,
MPI_CHAR, MPI_CHAR,
@ -638,32 +588,10 @@ int Slave::Event (void) {
return 1; return 1;
break; break;
case COMMAND_SENDRECV:
// fprintf(stderr,"Sendrecv ->%d %d : <-%d %d \n",state->Descrs[s].dest, state->Descrs[s].xtag+i*10,state->Descrs[s].src, state->Descrs[s].rtag+i*10);
ierr=MPI_Sendrecv((void *)(state->Descrs[s].xbuf+base), state->Descrs[s].bytes, MPI_CHAR, state->Descrs[s].dest, state->Descrs[s].xtag+i*10,
(void *)(state->Descrs[s].rbuf+base), state->Descrs[s].bytes, MPI_CHAR, state->Descrs[s].src , state->Descrs[s].rtag+i*10,
MPIoffloadEngine::communicator_universe,MPI_STATUS_IGNORE);
assert(ierr==0);
// fprintf(stderr,"Sendrecv done %d %d\n",ierr,i);
// MPI_Barrier(MPIoffloadEngine::HorizontalComm);
// fprintf(stderr,"Barrier\n");
i++;
state->start = PERI_PLUS(s);
return 1;
break;
case COMMAND_WAITALL: case COMMAND_WAITALL:
for(int t=state->tail;t!=s; t=PERI_PLUS(t) ){ for(int t=state->tail;t!=s; t=PERI_PLUS(t) ){
if ( state->Descrs[t].command != COMMAND_SENDRECV ) { MPI_Wait((MPI_Request *)&state->Descrs[t].request,MPI_STATUS_IGNORE);
MPI_Wait((MPI_Request *)&state->Descrs[t].request,MPI_STATUS_IGNORE);
}
}; };
s=PERI_PLUS(s); s=PERI_PLUS(s);
state->start = s; state->start = s;
@ -685,45 +613,6 @@ int Slave::Event (void) {
// External interaction with the queue // External interaction with the queue
////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////
void Slave::QueueSendRecv(void *xbuf, void *rbuf, int bytes, int xtag, int rtag, MPI_Comm comm,int dest,int src)
{
int head =state->head;
int next = PERI_PLUS(head);
// Set up descriptor
int worldrank;
int hashtag;
MPI_Comm communicator;
MPI_Request request;
uint64_t relative;
relative = (uint64_t)xbuf - base;
state->Descrs[head].xbuf = relative;
relative= (uint64_t)rbuf - base;
state->Descrs[head].rbuf = relative;
state->Descrs[head].bytes = bytes;
MPIoffloadEngine::MapCommRankToWorldRank(hashtag,worldrank,xtag,comm,dest);
state->Descrs[head].dest = MPIoffloadEngine::UniverseRanks[worldrank][vertical_rank];
state->Descrs[head].xtag = hashtag;
MPIoffloadEngine::MapCommRankToWorldRank(hashtag,worldrank,rtag,comm,src);
state->Descrs[head].src = MPIoffloadEngine::UniverseRanks[worldrank][vertical_rank];
state->Descrs[head].rtag = hashtag;
state->Descrs[head].command= COMMAND_SENDRECV;
// Block until FIFO has space
while( state->tail==next );
// Msync on weak order architectures
// Advance pointer
state->head = next;
};
uint64_t Slave::QueueCommand(int command,void *buf, int bytes, int tag, MPI_Comm comm,int commrank) uint64_t Slave::QueueCommand(int command,void *buf, int bytes, int tag, MPI_Comm comm,int commrank)
{ {
///////////////////////////////////////// /////////////////////////////////////////
@ -923,22 +812,19 @@ void CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsRequest_
assert( (recv_i >= shm) && (recv_i+bytes <= shm+MAX_MPI_SHM_BYTES) ); assert( (recv_i >= shm) && (recv_i+bytes <= shm+MAX_MPI_SHM_BYTES) );
assert(from!=_processor); assert(from!=_processor);
assert(dest!=_processor); assert(dest!=_processor);
MPIoffloadEngine::QueueMultiplexedSend(xmit,bytes,_processor,communicator,dest);
MPIoffloadEngine::QueueMultiplexedSendRecv(xmit,recv,bytes,_processor,from,communicator,dest,from); MPIoffloadEngine::QueueMultiplexedRecv(recv,bytes,from,communicator,from);
//MPIoffloadEngine::QueueRoundRobinSendRecv(xmit,recv,bytes,_processor,from,communicator,dest,from);
//MPIoffloadEngine::QueueMultiplexedSend(xmit,bytes,_processor,communicator,dest);
//MPIoffloadEngine::QueueMultiplexedRecv(recv,bytes,from,communicator,from);
} }
void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector<CommsRequest_t> &list) void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector<CommsRequest_t> &list)
{ {
MPIoffloadEngine::WaitAll(); MPIoffloadEngine::WaitAll();
//this->Barrier();
} }
void CartesianCommunicator::StencilBarrier(void) { } void CartesianCommunicator::StencilBarrier(void)
{
}
void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &list) void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &list)
{ {

Some files were not shown because too many files have changed in this diff Show More