diff --git a/.gitignore b/.gitignore index 80ea8e86..e82ecf9c 100644 --- a/.gitignore +++ b/.gitignore @@ -5,7 +5,6 @@ *.o *.obj - # Editor files # ################ *~ @@ -48,6 +47,7 @@ Config.h.in config.log config.status .deps +*.inc # http://www.gnu.org/software/autoconf # ######################################## @@ -63,19 +63,7 @@ config.sub config.guess INSTALL .dirstamp - -# Packages # -############ -# it's better to unpack these files and commit the raw source -# git has its own built in compression methods -*.7z -*.dmg -*.gz -*.iso -*.jar -*.rar -*.tar -*.zip +ltmain.sh # Logs and databases # ###################### @@ -101,3 +89,16 @@ build*/* ##################### *.xcodeproj/* build.sh + +# Eigen source # +################ +lib/Eigen/* + +# FFTW source # +################ +lib/fftw/* + +# libtool macros # +################## +m4/lt* +m4/libtool.m4 \ No newline at end of file diff --git a/.travis.yml b/.travis.yml index a2154ead..ae3efda8 100644 --- a/.travis.yml +++ b/.travis.yml @@ -9,10 +9,6 @@ matrix: - os: osx osx_image: xcode7.2 compiler: clang - - os: osx - osx_image: xcode7.2 - compiler: gcc - env: VERSION=-5 - compiler: gcc addons: apt: @@ -23,6 +19,8 @@ matrix: - libmpfr-dev - libgmp-dev - libmpc-dev + - libopenmpi-dev + - openmpi-bin - binutils-dev env: VERSION=-4.9 - compiler: gcc @@ -35,6 +33,8 @@ matrix: - libmpfr-dev - libgmp-dev - libmpc-dev + - libopenmpi-dev + - openmpi-bin - binutils-dev env: VERSION=-5 - compiler: clang @@ -47,6 +47,8 @@ matrix: - libmpfr-dev - libgmp-dev - libmpc-dev + - libopenmpi-dev + - openmpi-bin - binutils-dev env: CLANG_LINK=http://llvm.org/releases/3.8.0/clang+llvm-3.8.0-x86_64-linux-gnu-ubuntu-14.04.tar.xz - compiler: clang @@ -59,6 +61,8 @@ matrix: - libmpfr-dev - libgmp-dev - libmpc-dev + - libopenmpi-dev + - openmpi-bin - binutils-dev env: CLANG_LINK=http://llvm.org/releases/3.7.0/clang+llvm-3.7.0-x86_64-linux-gnu-ubuntu-14.04.tar.xz @@ -69,6 +73,7 @@ before_install: - if [[ "$TRAVIS_OS_NAME" == "linux" ]] && [[ "$CC" == "clang" ]]; then export LD_LIBRARY_PATH="${GRIDDIR}/clang/lib:${LD_LIBRARY_PATH}"; fi - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew update; fi - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install libmpc; fi + - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install openmpi; fi - if [[ "$TRAVIS_OS_NAME" == "osx" ]] && [[ "$CC" == "gcc" ]]; then brew install gcc5; fi install: @@ -82,13 +87,20 @@ install: - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then export LDFLAGS='-L/usr/local/lib'; fi script: - - ./scripts/reconfigure_script + - ./bootstrap.sh - mkdir build - cd build - - ../configure CXXFLAGS="-msse4.2 -O3 -std=c++11" LIBS="-lmpfr -lgmp" --enable-precision=single --enable-simd=SSE4 --enable-comms=none + - ../configure --enable-precision=single --enable-simd=SSE4 --enable-comms=none + - make -j4 + - ./benchmarks/Benchmark_dwf --threads 1 + - echo make clean + - ../configure --enable-precision=double --enable-simd=SSE4 --enable-comms=none - make -j4 - ./benchmarks/Benchmark_dwf --threads 1 - - make clean - - ../configure CXXFLAGS="-msse4.2 -O3 -std=c++11" LIBS="-lmpfr -lgmp" --enable-precision=double --enable-simd=SSE4 --enable-comms=none + - echo make clean + - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then export CXXFLAGS='-DMPI_UINT32_T=MPI_UNSIGNED -DMPI_UINT64_T=MPI_UNSIGNED_LONG'; fi + - ../configure --enable-precision=single --enable-simd=SSE4 --enable-comms=mpi-auto - make -j4 - - ./benchmarks/Benchmark_dwf --threads 1 + - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then mpirun.openmpi -n 2 ./benchmarks/Benchmark_dwf --threads 1 --mpi 2.1.1.1; fi + - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then mpirun -n 2 ./benchmarks/Benchmark_dwf --threads 1 --mpi 2.1.1.1; fi + diff --git a/Makefile.am b/Makefile.am index 3b1d5690..18b3ddc3 100644 --- a/Makefile.am +++ b/Makefile.am @@ -1,5 +1,10 @@ # additional include paths necessary to compile the C++ library -AM_CXXFLAGS = -I$(top_srcdir)/ -SUBDIRS = lib tests benchmarks +SUBDIRS = lib benchmarks tests -filelist: $(SUBDIRS) \ No newline at end of file +.PHONY: tests + +tests: all + $(MAKE) -C tests tests + +AM_CXXFLAGS += -I$(top_builddir)/include +ACLOCAL_AMFLAGS = -I m4 diff --git a/README b/README deleted file mode 100644 index 17e92fa0..00000000 --- a/README +++ /dev/null @@ -1,44 +0,0 @@ -This library provides data parallel C++ container classes with internal memory layout -that is transformed to map efficiently to SIMD architectures. CSHIFT facilities -are provided, similar to HPF and cmfortran, and user control is given over the mapping of -array indices to both MPI tasks and SIMD processing elements. - -* Identically shaped arrays then be processed with perfect data parallelisation. -* Such identically shapped arrays are called conformable arrays. - -The transformation is based on the observation that Cartesian array processing involves -identical processing to be performed on different regions of the Cartesian array. - -The library will (eventually) both geometrically decompose into MPI tasks and across SIMD lanes. - -Data parallel array operations can then be specified with a SINGLE data parallel paradigm, but -optimally use MPI, OpenMP and SIMD parallelism under the hood. This is a significant simplification -for most programmers. - -The layout transformations are parametrised by the SIMD vector length. This adapts according to the architecture. -Presently SSE2 (128 bit) AVX, AVX2 (256 bit) and IMCI and AVX512 (512 bit) targets are supported. - -These are presented as - - vRealF, vRealD, vComplexF, vComplexD - -internal vector data types. These may be useful in themselves for other programmers. -The corresponding scalar types are named - - RealF, RealD, ComplexF, ComplexD - -MPI parallelism is UNIMPLEMENTED and for now only OpenMP and SIMD parallelism is present in the library. - - You can give `configure' initial values for configuration parameters -by setting variables in the command line or in the environment. Here -is are examples: - - ./configure CXX=clang++ CXXFLAGS="-std=c++11 -O3 -msse4" --enable-simd=SSE4 - - ./configure CXX=clang++ CXXFLAGS="-std=c++11 -O3 -mavx" --enable-simd=AVX1 - - ./configure CXX=clang++ CXXFLAGS="-std=c++11 -O3 -mavx2" --enable-simd=AVX2 - - ./configure CXX=icpc CXXFLAGS="-std=c++11 -O3 -mmic" --enable-simd=AVX512 --host=none - - diff --git a/README b/README new file mode 120000 index 00000000..42061c01 --- /dev/null +++ b/README @@ -0,0 +1 @@ +README.md \ No newline at end of file diff --git a/README.md b/README.md index 0a17bd45..f4a376f1 100644 --- a/README.md +++ b/README.md @@ -1,15 +1,51 @@ -# Grid [![Build Status](https://travis-ci.org/paboyle/Grid.svg?branch=master)](https://travis-ci.org/paboyle/Grid) -Data parallel C++ mathematical object library +# Grid + + + + + + + + + +
Last stable release + +
Development branch + +
-Last update 2015/7/30 +**Data parallel C++ mathematical object library.** +License: GPL v2. + +Last update Nov 2016. + +_Please do not send pull requests to the `master` branch which is reserved for releases._ + +### Bug report + +_To help us tracking and solving more efficiently issues with Grid, please report problems using the issue system of GitHub rather than sending emails to Grid developers._ + +When you file an issue, please go though the following checklist: + +1. Check that the code is pointing to the `HEAD` of `develop` or any commit in `master` which is tagged with a version number. +2. Give a description of the target platform (CPU, network, compiler). Please give the full CPU part description, using for example `cat /proc/cpuinfo | grep 'model name' | uniq` (Linux) or `sysctl machdep.cpu.brand_string` (macOS) and the full output the `--version` option of your compiler. +3. Give the exact `configure` command used. +4. Attach `config.log`. +5. Attach `config.summary`. +6. Attach the output of `make V=1`. +7. Describe the issue and any previous attempt to solve it. If relevant, show how to reproduce the issue using a minimal working example. + + + +### Description This library provides data parallel C++ container classes with internal memory layout that is transformed to map efficiently to SIMD architectures. CSHIFT facilities are provided, similar to HPF and cmfortran, and user control is given over the mapping of array indices to both MPI tasks and SIMD processing elements. * Identically shaped arrays then be processed with perfect data parallelisation. -* Such identically shapped arrays are called conformable arrays. +* Such identically shaped arrays are called conformable arrays. The transformation is based on the observation that Cartesian array processing involves identical processing to be performed on different regions of the Cartesian array. @@ -22,37 +58,136 @@ optimally use MPI, OpenMP and SIMD parallelism under the hood. This is a signifi for most programmers. The layout transformations are parametrised by the SIMD vector length. This adapts according to the architecture. -Presently SSE4 (128 bit) AVX, AVX2 (256 bit) and IMCI and AVX512 (512 bit) targets are supported (ARM NEON on the way). +Presently SSE4 (128 bit) AVX, AVX2, QPX (256 bit), IMCI, and AVX512 (512 bit) targets are supported (ARM NEON on the way). -These are presented as - - vRealF, vRealD, vComplexF, vComplexD - -internal vector data types. These may be useful in themselves for other programmers. -The corresponding scalar types are named - - RealF, RealD, ComplexF, ComplexD +These are presented as `vRealF`, `vRealD`, `vComplexF`, and `vComplexD` internal vector data types. These may be useful in themselves for other programmers. +The corresponding scalar types are named `RealF`, `RealD`, `ComplexF` and `ComplexD`. MPI, OpenMP, and SIMD parallelism are present in the library. +Please see https://arxiv.org/abs/1512.03487 for more detail. - You can give `configure' initial values for configuration parameters -by setting variables in the command line or in the environment. Here -are examples: +### Quick start +First, start by cloning the repository: - ./configure CXX=clang++ CXXFLAGS="-std=c++11 -O3 -msse4" --enable-simd=SSE4 +``` bash +git clone https://github.com/paboyle/Grid.git +``` - ./configure CXX=clang++ CXXFLAGS="-std=c++11 -O3 -mavx" --enable-simd=AVX +Then enter the cloned directory and set up the build system: - ./configure CXX=clang++ CXXFLAGS="-std=c++11 -O3 -mavx2" --enable-simd=AVX2 +``` bash +cd Grid +./bootstrap.sh +``` - ./configure CXX=icpc CXXFLAGS="-std=c++11 -O3 -mmic" --enable-simd=AVX512 --host=none - -Note: Before running configure it could be necessary to execute the script - - script/filelist +Now you can execute the `configure` script to generate makefiles (here from a build directory): +``` bash +mkdir build; cd build +../configure --enable-precision=double --enable-simd=AVX --enable-comms=mpi-auto --prefix= +``` - -For developers: -Use reconfigure_script in the scripts/ directory to create the autotools environment +where `--enable-precision=` set the default precision, +`--enable-simd=` set the SIMD type, `--enable- +comms=`, and `` should be replaced by the prefix path where you want to +install Grid. Other options are detailed in the next section, you can also use `configure +--help` to display them. Like with any other program using GNU autotool, the +`CXX`, `CXXFLAGS`, `LDFLAGS`, ... environment variables can be modified to +customise the build. +Finally, you can build and install Grid: + +``` bash +make; make install +``` + +To minimise the build time, only the tests at the root of the `tests` directory are built by default. If you want to build tests in the sub-directory `` you can execute: + +``` bash +make -C tests/ tests +``` +If you want to build all the tests at once just use `make tests`. + +### Build configuration options + +- `--prefix=`: installation prefix for Grid. +- `--with-gmp=`: look for GMP in the UNIX prefix `` +- `--with-mpfr=`: look for MPFR in the UNIX prefix `` +- `--with-fftw=`: look for FFTW in the UNIX prefix `` +- `--enable-lapack[=]`: enable LAPACK support in Lanczos eigensolver. A UNIX prefix containing the library can be specified (optional). +- `--enable-mkl[=]`: use Intel MKL for FFT (and LAPACK if enabled) routines. A UNIX prefix containing the library can be specified (optional). +- `--enable-numa`: ??? +- `--enable-simd=`: setup Grid for the SIMD target `` (default: `GEN`). A list of possible SIMD targets is detailed in a section below. +- `--enable-precision={single|double}`: set the default precision (default: `double`). +- `--enable-precision=`: Use `` for message passing (default: `none`). A list of possible SIMD targets is detailed in a section below. +- `--enable-rng={ranlux48|mt19937}`: choose the RNG (default: `ranlux48 `). +- `--disable-timers`: disable system dependent high-resolution timers. +- `--enable-chroma`: enable Chroma regression tests. + +### Possible communication interfaces + +The following options can be use with the `--enable-comms=` option to target different communication interfaces: + +| `` | Description | +| -------------- | ------------------------------------------------------------- | +| `none` | no communications | +| `mpi[-auto]` | MPI communications | +| `mpi3[-auto]` | MPI communications using MPI 3 shared memory | +| `mpi3l[-auto]` | MPI communications using MPI 3 shared memory and leader model | +| `shmem ` | Cray SHMEM communications | + +For the MPI interfaces the optional `-auto` suffix instructs the `configure` scripts to determine all the necessary compilation and linking flags. This is done by extracting the informations from the MPI wrapper specified in the environment variable `MPICXX` (if not specified `configure` will scan though a list of default names). + +### Possible SIMD types + +The following options can be use with the `--enable-simd=` option to target different SIMD instruction sets: + +| `` | Description | +| ----------- | -------------------------------------- | +| `GEN` | generic portable vector code | +| `SSE4` | SSE 4.2 (128 bit) | +| `AVX` | AVX (256 bit) | +| `AVXFMA` | AVX (256 bit) + FMA | +| `AVXFMA4` | AVX (256 bit) + FMA4 | +| `AVX2` | AVX 2 (256 bit) | +| `AVX512` | AVX 512 bit | +| `QPX` | QPX (256 bit) | + +Alternatively, some CPU codenames can be directly used: + +| `` | Description | +| ----------- | -------------------------------------- | +| `KNC` | [Intel Xeon Phi codename Knights Corner](http://ark.intel.com/products/codename/57721/Knights-Corner) | +| `KNL` | [Intel Xeon Phi codename Knights Landing](http://ark.intel.com/products/codename/48999/Knights-Landing) | +| `BGQ` | Blue Gene/Q | + +#### Notes: +- We currently support AVX512 only for the Intel compiler. Support for GCC and clang will appear in future versions of Grid when the AVX512 support within GCC and clang will be more advanced. +- For BG/Q only [bgclang](http://trac.alcf.anl.gov/projects/llvm-bgq) is supported. We do not presently plan to support more compilers for this platform. +- BG/Q performances are currently rather poor. This is being investigated for future versions. + +### Build setup for Intel Knights Landing platform + +The following configuration is recommended for the Intel Knights Landing platform: + +``` bash +../configure --enable-precision=double\ + --enable-simd=KNL \ + --enable-comms=mpi-auto \ + --with-gmp= \ + --with-mpfr= \ + --enable-mkl \ + CXX=icpc MPICXX=mpiicpc +``` + +where `` is the UNIX prefix where GMP and MPFR are installed. If you are working on a Cray machine that does not use the `mpiicpc` wrapper, please use: + +``` bash +../configure --enable-precision=double\ + --enable-simd=KNL \ + --enable-comms=mpi \ + --with-gmp= \ + --with-mpfr= \ + --enable-mkl \ + CXX=CC CC=cc +``` \ No newline at end of file diff --git a/VERSION b/VERSION index c12f9497..e7abbba7 100644 --- a/VERSION +++ b/VERSION @@ -1,4 +1,6 @@ -Version : 0.5.0 +Version : 0.6.0 - AVX512, AVX2, AVX, SSE good - Clang 3.5 and above, ICPC v16 and above, GCC 4.9 and above +- MPI and MPI3 +- HiRep, Smearing, Generic gauge group diff --git a/benchmarks/Benchmark_comms.cc b/benchmarks/Benchmark_comms.cc index 21b0dd0f..969a2a42 100644 --- a/benchmarks/Benchmark_comms.cc +++ b/benchmarks/Benchmark_comms.cc @@ -25,7 +25,7 @@ Author: Peter Boyle See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ /* END LEGAL */ -#include +#include using namespace std; using namespace Grid; @@ -42,15 +42,14 @@ int main (int argc, char ** argv) int Nloop=10; int nmu=0; - for(int mu=0;mu<4;mu++) if (mpi_layout[mu]>1) nmu++; + for(int mu=0;mu1) nmu++; + std::cout< latt_size ({lat*mpi_layout[0], @@ -125,7 +124,7 @@ int main (int argc, char ** argv) std::cout< latt_size ({lat,lat,lat,lat}); @@ -195,6 +194,168 @@ int main (int argc, char ** argv) } + Nloop=100; + std::cout< latt_size ({lat*mpi_layout[0], + lat*mpi_layout[1], + lat*mpi_layout[2], + lat*mpi_layout[3]}); + + GridCartesian Grid(latt_size,simd_layout,mpi_layout); + + std::vector xbuf(8); + std::vector rbuf(8); + Grid.ShmBufferFreeAll(); + for(int d=0;d<8;d++){ + xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); + rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); + } + + int ncomm; + int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD); + + double start=usecond(); + for(int i=0;i requests; + + ncomm=0; + for(int mu=0;mu<4;mu++){ + + if (mpi_layout[mu]>1 ) { + + ncomm++; + int comm_proc=1; + int xmit_to_rank; + int recv_from_rank; + + Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); + Grid.StencilSendToRecvFromBegin(requests, + (void *)&xbuf[mu][0], + xmit_to_rank, + (void *)&rbuf[mu][0], + recv_from_rank, + bytes); + + comm_proc = mpi_layout[mu]-1; + + Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); + Grid.StencilSendToRecvFromBegin(requests, + (void *)&xbuf[mu+4][0], + xmit_to_rank, + (void *)&rbuf[mu+4][0], + recv_from_rank, + bytes); + + } + } + Grid.StencilSendToRecvFromComplete(requests); + Grid.Barrier(); + + } + double stop=usecond(); + + double dbytes = bytes; + double xbytes = Nloop*dbytes*2.0*ncomm; + double rbytes = xbytes; + double bidibytes = xbytes+rbytes; + + double time = stop-start; // microseconds + + std::cout< latt_size ({lat*mpi_layout[0], + lat*mpi_layout[1], + lat*mpi_layout[2], + lat*mpi_layout[3]}); + + GridCartesian Grid(latt_size,simd_layout,mpi_layout); + + std::vector xbuf(8); + std::vector rbuf(8); + Grid.ShmBufferFreeAll(); + for(int d=0;d<8;d++){ + xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); + rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); + } + + int ncomm; + int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD); + + double start=usecond(); + for(int i=0;i requests; + + ncomm=0; + for(int mu=0;mu<4;mu++){ + + if (mpi_layout[mu]>1 ) { + + ncomm++; + int comm_proc=1; + int xmit_to_rank; + int recv_from_rank; + + Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); + Grid.StencilSendToRecvFromBegin(requests, + (void *)&xbuf[mu][0], + xmit_to_rank, + (void *)&rbuf[mu][0], + recv_from_rank, + bytes); + // Grid.StencilSendToRecvFromComplete(requests); + // requests.resize(0); + + comm_proc = mpi_layout[mu]-1; + + Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); + Grid.StencilSendToRecvFromBegin(requests, + (void *)&xbuf[mu+4][0], + xmit_to_rank, + (void *)&rbuf[mu+4][0], + recv_from_rank, + bytes); + Grid.StencilSendToRecvFromComplete(requests); + requests.resize(0); + + } + } + Grid.Barrier(); + + } + double stop=usecond(); + + double dbytes = bytes; + double xbytes = Nloop*dbytes*2.0*ncomm; + double rbytes = xbytes; + double bidibytes = xbytes+rbytes; + + double time = stop-start; // microseconds + + std::cout< See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ /* END LEGAL */ -#include -#include +#include using namespace std; using namespace Grid; @@ -45,25 +44,20 @@ struct scal { Gamma::GammaT }; -bool overlapComms = false; -typedef WilsonFermion5D WilsonFermion5DR; -typedef WilsonFermion5D WilsonFermion5DF; -typedef WilsonFermion5D WilsonFermion5DD; +typedef WilsonFermion5D WilsonFermion5DR; +typedef WilsonFermion5D WilsonFermion5DF; +typedef WilsonFermion5D WilsonFermion5DD; int main (int argc, char ** argv) { Grid_init(&argc,&argv); - if( GridCmdOptionExists(argv,argv+argc,"--asynch") ){ - overlapComms = true; - } - int threads = GridThread::GetThreads(); std::cout< latt4 = GridDefaultLatt(); - const int Ls=16; + const int Ls=8; GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi()); GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid); @@ -71,8 +65,8 @@ int main (int argc, char ** argv) std::cout << GridLogMessage << "Making s innermost grids"< seeds4({1,2,3,4}); @@ -87,8 +81,6 @@ int main (int argc, char ** argv) LatticeFermion tmp(FGrid); LatticeFermion err(FGrid); - ColourMatrix cm = Complex(1.0,0.0); - LatticeGaugeField Umu(UGrid); random(RNG4,Umu); @@ -127,21 +119,27 @@ int main (int argc, char ** argv) RealD mass=0.1; RealD M5 =1.8; - typename DomainWallFermionR::ImplParams params; - params.overlapCommsCompute = overlapComms; - RealD NP = UGrid->_Nprocessors; - for(int doasm=1;doasm<2;doasm++){ + DomainWallFermionR Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5); - QCD::WilsonKernelsStatic::AsmOpt=doasm; + std::cout << GridLogMessage<< "*****************************************************************" <Barrier(); + Dw.ZeroCounters(); double t0=usecond(); for(int i=0;iBarrier(); double volume=Ls; for(int mu=0;mu WilsonFermion5DR; + + std::cout << GridLogMessage<< "*********************************************************" <::Dhop "< WilsonFermion5DR; LatticeFermion ssrc(sFGrid); LatticeFermion sref(sFGrid); LatticeFermion sresult(sFGrid); - WilsonFermion5DR sDw(1,Umu,*sFGrid,*sFrbGrid,*sUGrid,M5,params); + + WilsonFermion5DR sDw(Umu,*sFGrid,*sFrbGrid,*sUGrid,*sUrbGrid,M5); for(int x=0;xBarrier(); double t0=usecond(); + sDw.ZeroCounters(); for(int i=0;iBarrier(); double volume=Ls; for(int mu=0;mu 1.0e-6 ) { + std::cout << "site "<::DhopEO "<Barrier(); + sDw.ZeroCounters(); + sDw.stat.init("DhopEO"); double t0=usecond(); - for(int i=0;iBarrier(); + sDw.stat.print(); double volume=Ls; for(int mu=0;mu1.0e-4) { + setCheckerboard(ssrc,ssrc_o); + setCheckerboard(ssrc,ssrc_e); + std::cout<< ssrc << std::endl; + } } @@ -284,24 +324,25 @@ int main (int argc, char ** argv) // ref = src - Gamma(Gamma::GammaX)* src ; // 1+gamma_x tmp = U[mu]*Cshift(src,mu+1,1); for(int i=0;iBarrier(); double t0=usecond(); for(int i=0;iBarrier(); double volume=Ls; for(int mu=0;mu -Author: paboyle - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - - See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ -#include -#include - -using namespace std; -using namespace Grid; -using namespace Grid::QCD; - -template -struct scal { - d internal; -}; - - Gamma::GammaMatrix Gmu [] = { - Gamma::GammaX, - Gamma::GammaY, - Gamma::GammaZ, - Gamma::GammaT - }; - -bool overlapComms = false; - - -int main (int argc, char ** argv) -{ - Grid_init(&argc,&argv); - - if( GridCmdOptionExists(argv,argv+argc,"--asynch") ){ - overlapComms = true; - } - - int threads = GridThread::GetThreads(); - std::cout< latt4 = GridDefaultLatt(); - const int Ls=16; - GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi()); - GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); - GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid); - GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid); - - std::vector seeds4({1,2,3,4}); - std::vector seeds5({5,6,7,8}); - - GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4); - GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5); - - LatticeFermion src (FGrid); random(RNG5,src); - LatticeFermion result(FGrid); result=zero; - LatticeFermion ref(FGrid); ref=zero; - LatticeFermion tmp(FGrid); - LatticeFermion err(FGrid); - - ColourMatrix cm = Complex(1.0,0.0); - - LatticeGaugeField Umu(UGrid); - random(RNG4,Umu); - - LatticeGaugeField Umu5d(FGrid); - - // replicate across fifth dimension - for(int ss=0;ssoSites();ss++){ - for(int s=0;s U(4,FGrid); - for(int mu=0;mu(Umu5d,mu); - } - - if (1) - { - ref = zero; - for(int mu=0;mu_Nprocessors; - - - QCD::WilsonKernelsStatic::AsmOpt=1; - - DomainWallFermionR Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,params); - - std::cout< See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ /* END LEGAL */ -#include -#include +#include using namespace std; using namespace Grid; @@ -52,22 +51,26 @@ int main (int argc, char ** argv) { Grid_init(&argc,&argv); - const int Ls=16; + std::cout << GridLogMessage<< "*****************************************************************" < & latt4, int Ls, int threads,int report ) ColourMatrix cm = Complex(1.0,0.0); - LatticeGaugeField Umu5d(FGrid); // replicate across fifth dimension @@ -145,11 +147,10 @@ void benchDw(std::vector & latt4, int Ls, int threads,int report ) } #ifdef CHECK - if (1) - { + if (1) { + ref = zero; for(int mu=0;mu & latt4, int Ls, int threads,int report ) Counter.Report(); } - if ( ! report ) - { - double volume=Ls; for(int mu=0;mu 1.0e-4 ) { - std::cout< 1.0e-4 ) { + std::cout< & latt4, int Ls, int threads,int report ) std::cout<< flops/(t1-t0); } } - } -#undef CHECK_SDW +#define CHECK_SDW void benchsDw(std::vector & latt4, int Ls, int threads, int report ) { @@ -243,7 +242,9 @@ void benchsDw(std::vector & latt4, int Ls, int threads, int report ) GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid); GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid); + GridCartesian * sUGrid = SpaceTimeGrid::makeFourDimDWFGrid(latt4,GridDefaultMpi()); + GridRedBlackCartesian * sUrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(sUGrid); GridCartesian * sFGrid = SpaceTimeGrid::makeFiveDimDWFGrid(Ls,UGrid); GridRedBlackCartesian * sFrbGrid = SpaceTimeGrid::makeFiveDimDWFRedBlackGrid(Ls,UGrid); @@ -277,93 +278,89 @@ void benchsDw(std::vector & latt4, int Ls, int threads, int report ) } } - RealD mass=0.1; RealD M5 =1.8; - typedef WilsonFermion5D WilsonFermion5DR; - LatticeFermion ssrc(sFGrid); - LatticeFermion sref(sFGrid); - LatticeFermion sresult(sFGrid); - WilsonFermion5DR sDw(1,Umu,*sFGrid,*sFrbGrid,*sUGrid,M5); + typedef WilsonFermion5D WilsonFermion5DR; + LatticeFermion ssrc(sFGrid); + LatticeFermion sref(sFGrid); + LatticeFermion sresult(sFGrid); + WilsonFermion5DR sDw(Umu,*sFGrid,*sFrbGrid,*sUGrid,*sUrbGrid,M5); - for(int x=0;x site({s,x,y,z,t}); - SpinColourVector tmp; - peekSite(tmp,src,site); - pokeSite(tmp,ssrc,site); - }}}}} + for(int x=0;x site({s,x,y,z,t}); + SpinColourVector tmp; + peekSite(tmp,src,site); + pokeSite(tmp,ssrc,site); + }}}}} - double t0=usecond(); - sDw.Dhop(ssrc,sresult,0); - double t1=usecond(); + double t0=usecond(); + sDw.Dhop(ssrc,sresult,0); + double t1=usecond(); #ifdef TIMERS_OFF - int ncall =10; + int ncall =10; #else - int ncall =1+(int) ((5.0*1000*1000)/(t1-t0)); + int ncall =1+(int) ((5.0*1000*1000)/(t1-t0)); #endif - PerformanceCounter Counter(8); - Counter.Start(); - t0=usecond(); - for(int i=0;i See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ /* END LEGAL */ -#include +#include using namespace std; using namespace Grid; diff --git a/benchmarks/Benchmark_memory_bandwidth.cc b/benchmarks/Benchmark_memory_bandwidth.cc index 1fc5cbc4..435af7f4 100644 --- a/benchmarks/Benchmark_memory_bandwidth.cc +++ b/benchmarks/Benchmark_memory_bandwidth.cc @@ -26,7 +26,7 @@ Author: paboyle See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ /* END LEGAL */ -#include +#include using namespace std; using namespace Grid; diff --git a/benchmarks/Benchmark_su3.cc b/benchmarks/Benchmark_su3.cc index a5f71299..b6d1d303 100644 --- a/benchmarks/Benchmark_su3.cc +++ b/benchmarks/Benchmark_su3.cc @@ -26,7 +26,7 @@ Author: Peter Boyle See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ /* END LEGAL */ -#include +#include using namespace std; using namespace Grid; diff --git a/benchmarks/Benchmark_wilson.cc b/benchmarks/Benchmark_wilson.cc index 10aa7009..4930713c 100644 --- a/benchmarks/Benchmark_wilson.cc +++ b/benchmarks/Benchmark_wilson.cc @@ -26,7 +26,7 @@ Author: paboyle See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ /* END LEGAL */ -#include +#include using namespace std; using namespace Grid; diff --git a/benchmarks/Benchmark_wilson_sweep.cc b/benchmarks/Benchmark_wilson_sweep.cc new file mode 100644 index 00000000..96e5b5e4 --- /dev/null +++ b/benchmarks/Benchmark_wilson_sweep.cc @@ -0,0 +1,130 @@ +/************************************************************************************* + Grid physics library, www.github.com/paboyle/Grid + Source file: ./benchmarks/Benchmark_wilson.cc + Copyright (C) 2015 +Author: Peter Boyle +Author: paboyle +Author: Richard Rollins + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ +#include + +using namespace std; +using namespace Grid; +using namespace Grid::QCD; + +template +struct scal { + d internal; +}; + +Gamma::GammaMatrix Gmu [] = { + Gamma::GammaX, + Gamma::GammaY, + Gamma::GammaZ, + Gamma::GammaT +}; + +bool overlapComms = false; + +void bench_wilson ( + LatticeFermion & src, + LatticeFermion & result, + WilsonFermionR & Dw, + double const volume, + int const dag ); + +int main (int argc, char ** argv) +{ + Grid_init(&argc,&argv); + if( GridCmdOptionExists(argv,argv+argc,"--asynch") ){ overlapComms = true; } + typename WilsonFermionR::ImplParams params; + params.overlapCommsCompute = overlapComms; + + std::vector simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd()); + std::vector mpi_layout = GridDefaultMpi(); + std::vector seeds({1,2,3,4}); + RealD mass = 0.1; + + std::cout << GridLogMessage<< "*****************************************************************" < latt_size = std::vector(4,L); + for(int d=4; d>dmin; d--) + { + if ( d<=3 ) { latt_size[d] *= 2; } + + std::cout << GridLogMessage; + std::copy( latt_size.begin(), --latt_size.end(), std::ostream_iterator( std::cout, std::string("x").c_str() ) ); + std::cout << latt_size.back() << "\t\t"; + + GridCartesian Grid(latt_size,simd_layout,mpi_layout); + GridRedBlackCartesian RBGrid(latt_size,simd_layout,mpi_layout); + + GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(seeds); + LatticeGaugeField Umu(&Grid); random(pRNG,Umu); + LatticeFermion src(&Grid); random(pRNG,src); + LatticeFermion result(&Grid); result=zero; + + double volume = std::accumulate(latt_size.begin(),latt_size.end(),1,std::multiplies()); + + WilsonFermionR Dw(Umu,Grid,RBGrid,mass,params); + + bench_wilson(src,result,Dw,volume,DaggerNo); + bench_wilson(src,result,Dw,volume,DaggerYes); + std::cout << std::endl; + } + } + + std::cout< - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - - See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ -#include -#include - - -using namespace Grid; -using namespace Grid::QCD; - - -int bench(std::ofstream &os, std::vector &latt4,int Ls); - -int main(int argc,char **argv) -{ - Grid_init(&argc,&argv); - std::ofstream os("zmm.dat"); - - os << "#V Ls Lxy Lzt C++ Asm OMP L1 " < grid({L,L,m*L,m*L}); - for(int i=0;i<4;i++) { - std::cout << grid[i]<<"x"; - } - std::cout << Ls< &latt4,int Ls) -{ - - GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(latt4, GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi()); - GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); - GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid); - GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid); - - std::vector simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd()); - std::vector mpi_layout = GridDefaultMpi(); - int threads = GridThread::GetThreads(); - - std::vector seeds4({1,2,3,4}); - std::vector seeds5({5,6,7,8}); - - GridSerialRNG sRNG; sRNG.SeedFixedIntegers(seeds4); - - LatticeFermion src (FGrid); - LatticeFermion tmp (FGrid); - LatticeFermion srce(FrbGrid); - - LatticeFermion resulto(FrbGrid); resulto=zero; - LatticeFermion resulta(FrbGrid); resulta=zero; - LatticeFermion junk(FrbGrid); junk=zero; - LatticeFermion diff(FrbGrid); - LatticeGaugeField Umu(UGrid); - - double mfc, mfa, mfo, mfl1; - - GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4); - GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5); - random(RNG5,src); -#if 1 - random(RNG4,Umu); -#else - int mmu=2; - std::vector U(4,UGrid); - for(int mu=0;mu(Umu,mu); - if ( mu!=mmu ) U[mu] = zero; - if ( mu==mmu ) U[mu] = 1.0; - PokeIndex(Umu,U[mu],mu); - } -#endif - pickCheckerboard(Even,srce,src); - - RealD mass=0.1; - RealD M5 =1.8; - DomainWallFermionR Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5); - - std::cout< - AC_PREREQ([2.63]) -AC_INIT([Grid], [1.0], [paboyle@ph.ed.ac.uk]) -AC_CANONICAL_SYSTEM +AC_INIT([Grid], [0.6.0], [https://github.com/paboyle/Grid], [Grid]) +AC_CANONICAL_BUILD +AC_CANONICAL_HOST +AC_CANONICAL_TARGET AM_INIT_AUTOMAKE(subdir-objects) AC_CONFIG_MACRO_DIR([m4]) AC_CONFIG_SRCDIR([lib/Grid.h]) AC_CONFIG_HEADERS([lib/Config.h]) m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])]) -AC_MSG_NOTICE([ - -::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: -Configuring $PACKAGE v$VERSION for $host -::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: -]) - -# Checks for programs. -AC_LANG(C++) +############### Checks for programs +CXXFLAGS="-O3 $CXXFLAGS" AC_PROG_CXX -AC_OPENMP AC_PROG_RANLIB -#AX_CXX_COMPILE_STDCXX_11(noext, mandatory) -AX_EXT -# Checks for libraries. -#AX_GCC_VAR_ATTRIBUTE(aligned) +############### Get compiler informations +AC_LANG([C++]) +AX_CXX_COMPILE_STDCXX_11([noext],[mandatory]) +AX_COMPILER_VENDOR +AC_DEFINE_UNQUOTED([CXX_COMP_VENDOR],["$ax_cv_cxx_compiler_vendor"], + [vendor of C++ compiler that will compile the code]) +AX_GXX_VERSION +AC_DEFINE_UNQUOTED([GXX_VERSION],["$GXX_VERSION"], + [version of g++ that will compile the code]) -# Checks for header files. +############### Checks for typedefs, structures, and compiler characteristics +AC_TYPE_SIZE_T +AC_TYPE_UINT32_T +AC_TYPE_UINT64_T + +############### OpenMP +AC_OPENMP +ac_openmp=no +if test "${OPENMP_CXXFLAGS}X" != "X"; then + ac_openmp=yes + AM_CXXFLAGS="$OPENMP_CXXFLAGS $AM_CXXFLAGS" + AM_LDFLAGS="$OPENMP_CXXFLAGS $AM_LDFLAGS" +fi + +############### Checks for header files AC_CHECK_HEADERS(stdint.h) AC_CHECK_HEADERS(mm_malloc.h) AC_CHECK_HEADERS(malloc/malloc.h) AC_CHECK_HEADERS(malloc.h) AC_CHECK_HEADERS(endian.h) AC_CHECK_HEADERS(execinfo.h) -AC_CHECK_HEADERS(gmp.h) AC_CHECK_DECLS([ntohll],[], [], [[#include ]]) AC_CHECK_DECLS([be64toh],[], [], [[#include ]]) -# Checks for typedefs, structures, and compiler characteristics. -AC_TYPE_SIZE_T -AC_TYPE_UINT32_T -AC_TYPE_UINT64_T +############### GMP and MPFR +AC_ARG_WITH([gmp], + [AS_HELP_STRING([--with-gmp=prefix], + [try this for a non-standard install prefix of the GMP library])], + [AM_CXXFLAGS="-I$with_gmp/include $AM_CXXFLAGS"] + [AM_LDFLAGS="-L$with_gmp/lib $AM_LDFLAGS"]) +AC_ARG_WITH([mpfr], + [AS_HELP_STRING([--with-mpfr=prefix], + [try this for a non-standard install prefix of the MPFR library])], + [AM_CXXFLAGS="-I$with_mpfr/include $AM_CXXFLAGS"] + [AM_LDFLAGS="-L$with_mpfr/lib $AM_LDFLAGS"]) -# Checks for library functions. -echo -echo Checking libraries -echo ::::::::::::::::::::::::::::::::::::::::::: +############### FFTW3 +AC_ARG_WITH([fftw], + [AS_HELP_STRING([--with-fftw=prefix], + [try this for a non-standard install prefix of the FFTW3 library])], + [AM_CXXFLAGS="-I$with_fftw/include $AM_CXXFLAGS"] + [AM_LDFLAGS="-L$with_fftw/lib $AM_LDFLAGS"]) + +############### lapack +AC_ARG_ENABLE([lapack], + [AC_HELP_STRING([--enable-lapack=yes|no|prefix], [enable LAPACK])], + [ac_LAPACK=${enable_lapack}], [ac_LAPACK=no]) + +case ${ac_LAPACK} in + no) + ;; + yes) + AC_DEFINE([USE_LAPACK],[1],[use LAPACK]);; + *) + AM_CXXFLAGS="-I$ac_LAPACK/include $AM_CXXFLAGS" + AM_LDFLAGS="-L$ac_LAPACK/lib $AM_LDFLAGS" + AC_DEFINE([USE_LAPACK],[1],[use LAPACK]);; +esac + +############### MKL +AC_ARG_ENABLE([mkl], + [AC_HELP_STRING([--enable-mkl=yes|no|prefix], [enable Intel MKL for LAPACK & FFTW])], + [ac_MKL=${enable_mkl}], [ac_MKL=no]) + +case ${ac_MKL} in + no) + ;; + yes) + AC_DEFINE([USE_MKL], [1], [Define to 1 if you use the Intel MKL]);; + *) + AM_CXXFLAGS="-I$ac_MKL/include $AM_CXXFLAGS" + AM_LDFLAGS="-L$ac_MKL/lib $AM_LDFLAGS" + AC_DEFINE([USE_MKL], [1], [Define to 1 if you use the Intel MKL]);; +esac + +############### first-touch +AC_ARG_ENABLE([numa], + [AC_HELP_STRING([--enable-numa=yes|no|prefix], [enable first touch numa opt])], + [ac_NUMA=${enable_NUMA}],[ac_NUMA=no]) + +case ${ac_NUMA} in + no) + ;; + yes) + AC_DEFINE([GRID_NUMA],[1],[First touch numa locality]);; + *) + AC_DEFINE([GRID_NUMA],[1],[First touch numa locality]);; +esac + +############### Checks for library functions +CXXFLAGS_CPY=$CXXFLAGS +LDFLAGS_CPY=$LDFLAGS +CXXFLAGS="$AM_CXXFLAGS $CXXFLAGS" +LDFLAGS="$AM_LDFLAGS $LDFLAGS" AC_CHECK_FUNCS([gettimeofday]) -#AC_CHECK_LIB([gmp],[__gmpf_init],, -# [AC_MSG_ERROR(GNU Multiple Precision GMP library was not found in your system. -#Please install or provide the correct path to your installation -#Info at: http://www.gmplib.org)]) +if test "${ac_MKL}x" != "nox"; then + AC_SEARCH_LIBS([mkl_set_interface_layer], [mkl_rt], [], + [AC_MSG_ERROR("MKL enabled but library not found")]) +fi -#AC_CHECK_LIB([mpfr],[mpfr_init],, -# [AC_MSG_ERROR(GNU Multiple Precision MPFR library was not found in your system. -#Please install or provide the correct path to your installation -#Info at: http://www.mpfr.org/)]) +AC_SEARCH_LIBS([__gmpf_init], [gmp], + [AC_SEARCH_LIBS([mpfr_init], [mpfr], + [AC_DEFINE([HAVE_LIBMPFR], [1], + [Define to 1 if you have the `MPFR' library])] + [have_mpfr=true], [AC_MSG_ERROR([MPFR library not found])])] + [AC_DEFINE([HAVE_LIBGMP], [1], [Define to 1 if you have the `GMP' library])] + [have_gmp=true]) -# -# SIMD instructions selection -# +if test "${ac_LAPACK}x" != "nox"; then + AC_SEARCH_LIBS([LAPACKE_sbdsdc], [lapack], [], + [AC_MSG_ERROR("LAPACK enabled but library not found")]) +fi -AC_ARG_ENABLE([simd],[AC_HELP_STRING([--enable-simd=SSE4|AVX|AVXFMA4|AVX2|AVX512|IMCI],\ - [Select instructions to be SSE4.0, AVX 1.0, AVX 2.0+FMA, AVX 512, IMCI])],\ - [ac_SIMD=${enable_simd}],[ac_SIMD=DEBUG]) +AC_SEARCH_LIBS([fftw_execute], [fftw3], + [AC_SEARCH_LIBS([fftwf_execute], [fftw3f], [], + [AC_MSG_ERROR("single precision FFTW library not found")])] + [AC_DEFINE([HAVE_FFTW], [1], [Define to 1 if you have the `FFTW' library])] + [have_fftw=true]) -supported=no +CXXFLAGS=$CXXFLAGS_CPY +LDFLAGS=$LDFLAGS_CPY -ac_ZMM=no; +############### SIMD instruction selection +AC_ARG_ENABLE([simd],[AC_HELP_STRING([--enable-simd=], + [select SIMD target (cf. README.md)])], [ac_SIMD=${enable_simd}], [ac_SIMD=GEN]) + +case ${ax_cv_cxx_compiler_vendor} in + clang|gnu) + case ${ac_SIMD} in + SSE4) + AC_DEFINE([SSE4],[1],[SSE4 intrinsics]) + SIMD_FLAGS='-msse4.2';; + AVX) + AC_DEFINE([AVX1],[1],[AVX intrinsics]) + SIMD_FLAGS='-mavx';; + AVXFMA4) + AC_DEFINE([AVXFMA4],[1],[AVX intrinsics with FMA4]) + SIMD_FLAGS='-mavx -mfma4';; + AVXFMA) + AC_DEFINE([AVXFMA],[1],[AVX intrinsics with FMA3]) + SIMD_FLAGS='-mavx -mfma';; + AVX2) + AC_DEFINE([AVX2],[1],[AVX2 intrinsics]) + SIMD_FLAGS='-mavx2 -mfma';; + AVX512) + AC_DEFINE([AVX512],[1],[AVX512 intrinsics]) + SIMD_FLAGS='-mavx512f -mavx512pf -mavx512er -mavx512cd';; + KNC) + AC_DEFINE([IMCI],[1],[IMCI intrinsics for Knights Corner]) + SIMD_FLAGS='';; + KNL) + AC_DEFINE([AVX512],[1],[AVX512 intrinsics]) + SIMD_FLAGS='-march=knl';; + GEN) + AC_DEFINE([GENERIC_VEC],[1],[generic vector code]) + SIMD_FLAGS='';; + QPX|BGQ) + AC_DEFINE([QPX],[1],[QPX intrinsics for BG/Q]) + SIMD_FLAGS='';; + *) + AC_MSG_ERROR(["SIMD option ${ac_SIMD} not supported by the GCC/Clang compiler"]);; + esac;; + intel) + case ${ac_SIMD} in + SSE4) + AC_DEFINE([SSE4],[1],[SSE4 intrinsics]) + SIMD_FLAGS='-msse4.2 -xsse4.2';; + AVX) + AC_DEFINE([AVX1],[1],[AVX intrinsics]) + SIMD_FLAGS='-mavx -xavx';; + AVXFMA) + AC_DEFINE([AVXFMA],[1],[AVX intrinsics with FMA4]) + SIMD_FLAGS='-mavx -mfma';; + AVX2) + AC_DEFINE([AVX2],[1],[AVX2 intrinsics]) + SIMD_FLAGS='-march=core-avx2 -xcore-avx2';; + AVX512) + AC_DEFINE([AVX512],[1],[AVX512 intrinsics]) + SIMD_FLAGS='-xcore-avx512';; + KNC) + AC_DEFINE([IMCI],[1],[IMCI Intrinsics for Knights Corner]) + SIMD_FLAGS='';; + KNL) + AC_DEFINE([AVX512],[1],[AVX512 intrinsics for Knights Landing]) + SIMD_FLAGS='-xmic-avx512';; + GEN) + AC_DEFINE([GENERIC_VEC],[1],[generic vector code]) + SIMD_FLAGS='';; + *) + AC_MSG_ERROR(["SIMD option ${ac_SIMD} not supported by the Intel compiler"]);; + esac;; + *) + AC_MSG_WARN([Compiler unknown, using generic vector code]) + AC_DEFINE([GENERIC_VEC],[1],[generic vector code]);; +esac +AM_CXXFLAGS="$SIMD_FLAGS $AM_CXXFLAGS" +AM_CFLAGS="$SIMD_FLAGS $AM_CFLAGS" case ${ac_SIMD} in - SSE4) - echo Configuring for SSE4 - AC_DEFINE([SSE4],[1],[SSE4 Intrinsics] ) - if test x"$ax_cv_support_ssse3_ext" = x"yes"; then dnl minimal support for SSE4 - supported=yes - else - AC_MSG_WARN([Your processor does not support SSE4 instructions]) - fi - ;; - AVX) - echo Configuring for AVX - AC_DEFINE([AVX1],[1],[AVX Intrinsics] ) - if test x"$ax_cv_support_avx_ext" = x"yes"; then dnl minimal support for AVX - supported=yes - else - AC_MSG_WARN([Your processor does not support AVX instructions]) - fi - ;; - AVXFMA4) - echo Configuring for AVX - AC_DEFINE([AVXFMA4],[1],[AVX Intrinsics with FMA4] ) - if test x"$ax_cv_support_avx_ext" = x"yes"; then dnl minimal support for AVX - supported=yes - else - AC_MSG_WARN([Your processor does not support AVX instructions]) - fi - ;; - AVX2) - echo Configuring for AVX2 - AC_DEFINE([AVX2],[1],[AVX2 Intrinsics] ) - if test x"$ax_cv_support_avx2_ext" = x"yes"; then dnl minimal support for AVX2 - supported=yes - else - AC_MSG_WARN([Your processor does not support AVX2 instructions]) - fi - ;; - AVX512) - echo Configuring for AVX512 - AC_DEFINE([AVX512],[1],[AVX512 Intrinsics for Knights Landing] ) - supported="cross compilation" - ac_ZMM=yes; - ;; - IMCI) - echo Configuring for IMCI - AC_DEFINE([IMCI],[1],[IMCI Intrinsics for Knights Corner] ) - supported="cross compilation" - ac_ZMM=no; - ;; - NEONv8) - echo Configuring for experimental ARMv8a support - AC_DEFINE([NEONv8],[1],[NEON ARMv8 Experimental support ] ) - supported="cross compilation" - ;; - DEBUG) - echo Configuring without SIMD support - only for compiler DEBUGGING! - AC_DEFINE([EMPTY_SIMD],[1],[EMPTY_SIMD only for DEBUGGING] ) - ;; - *) - AC_MSG_ERROR([${ac_SIMD} flag unsupported as --enable-simd option\nRun ./configure --help for the list of options]); - ;; + AVX512|KNL) + AC_DEFINE([TEST_ZMM],[1],[compile ZMM test]);; + *) + ;; esac -case ${ac_ZMM} in -yes) - echo Enabling ZMM source code -;; -no) - echo Disabling ZMM source code -;; -esac +############### Precision selection +AC_ARG_ENABLE([precision], + [AC_HELP_STRING([--enable-precision=single|double], + [Select default word size of Real])], + [ac_PRECISION=${enable_precision}],[ac_PRECISION=double]) -AM_CONDITIONAL(BUILD_ZMM,[ test "X${ac_ZMM}X" == "XyesX" ]) - -AC_ARG_ENABLE([precision],[AC_HELP_STRING([--enable-precision=single|double],[Select default word size of Real])],[ac_PRECISION=${enable_precision}],[ac_PRECISION=double]) case ${ac_PRECISION} in single) - echo default precision is single AC_DEFINE([GRID_DEFAULT_PRECISION_SINGLE],[1],[GRID_DEFAULT_PRECISION is SINGLE] ) ;; double) - echo default precision is double AC_DEFINE([GRID_DEFAULT_PRECISION_DOUBLE],[1],[GRID_DEFAULT_PRECISION is DOUBLE] ) ;; esac -# -# Comms selection -# - -AC_ARG_ENABLE([comms],[AC_HELP_STRING([--enable-comms=none|mpi],[Select communications])],[ac_COMMS=${enable_comms}],[ac_COMMS=none]) +############### communication type selection +AC_ARG_ENABLE([comms],[AC_HELP_STRING([--enable-comms=none|mpi|mpi-auto|mpi3|mpi3-auto|shmem], + [Select communications])],[ac_COMMS=${enable_comms}],[ac_COMMS=none]) case ${ac_COMMS} in none) - echo Configuring for NO communications - AC_DEFINE([GRID_COMMS_NONE],[1],[GRID_COMMS_NONE] ) + AC_DEFINE([GRID_COMMS_NONE],[1],[GRID_COMMS_NONE] ) + comms_type='none' ;; - mpi) - echo Configuring for MPI communications - AC_DEFINE([GRID_COMMS_MPI],[1],[GRID_COMMS_MPI] ) + mpi3l*) + AC_DEFINE([GRID_COMMS_MPI3L],[1],[GRID_COMMS_MPI3L] ) + comms_type='mpi3l' + ;; + mpi3*) + AC_DEFINE([GRID_COMMS_MPI3],[1],[GRID_COMMS_MPI3] ) + comms_type='mpi3' + ;; + mpi*) + AC_DEFINE([GRID_COMMS_MPI],[1],[GRID_COMMS_MPI] ) + comms_type='mpi' ;; shmem) - echo Configuring for SHMEM communications - AC_DEFINE([GRID_COMMS_SHMEM],[1],[GRID_COMMS_SHMEM] ) + AC_DEFINE([GRID_COMMS_SHMEM],[1],[GRID_COMMS_SHMEM] ) + comms_type='shmem' ;; *) - AC_MSG_ERROR([${ac_COMMS} unsupported --enable-comms option]); + AC_MSG_ERROR([${ac_COMMS} unsupported --enable-comms option]); ;; esac +case ${ac_COMMS} in + *-auto) + LX_FIND_MPI + if test "x$have_CXX_mpi" = 'xno'; then AC_MSG_ERROR(["MPI not found"]); fi + AM_CXXFLAGS="$MPI_CXXFLAGS $AM_CXXFLAGS" + AM_CFLAGS="$MPI_CFLAGS $AM_CFLAGS" + AM_LDFLAGS="`echo $MPI_CXXLDFLAGS | sed -E 's/-l@<:@^ @:>@+//g'` $AM_LDFLAGS" + LIBS="`echo $MPI_CXXLDFLAGS | sed -E 's/-L@<:@^ @:>@+//g'` $LIBS";; + *) + ;; +esac -AM_CONDITIONAL(BUILD_COMMS_SHMEM,[ test "X${ac_COMMS}X" == "XshmemX" ]) -AM_CONDITIONAL(BUILD_COMMS_MPI,[ test "X${ac_COMMS}X" == "XmpiX" ]) -AM_CONDITIONAL(BUILD_COMMS_NONE,[ test "X${ac_COMMS}X" == "XnoneX" ]) +AM_CONDITIONAL(BUILD_COMMS_SHMEM, [ test "${comms_type}X" == "shmemX" ]) +AM_CONDITIONAL(BUILD_COMMS_MPI, [ test "${comms_type}X" == "mpiX" ]) +AM_CONDITIONAL(BUILD_COMMS_MPI3, [ test "${comms_type}X" == "mpi3X" ] ) +AM_CONDITIONAL(BUILD_COMMS_MPI3L, [ test "${comms_type}X" == "mpi3lX" ] ) +AM_CONDITIONAL(BUILD_COMMS_NONE, [ test "${comms_type}X" == "noneX" ]) -# -# RNG selection -# +############### RNG selection AC_ARG_ENABLE([rng],[AC_HELP_STRING([--enable-rng=ranlux48|mt19937],\ - [Select Random Number Generator to be used])],\ - [ac_RNG=${enable_rng}],[ac_RNG=ranlux48]) + [Select Random Number Generator to be used])],\ + [ac_RNG=${enable_rng}],[ac_RNG=ranlux48]) + case ${ac_RNG} in ranlux48) - AC_DEFINE([RNG_RANLUX],[1],[RNG_RANLUX] ) + AC_DEFINE([RNG_RANLUX],[1],[RNG_RANLUX] ) ;; mt19937) - AC_DEFINE([RNG_MT19937],[1],[RNG_MT19937] ) + AC_DEFINE([RNG_MT19937],[1],[RNG_MT19937] ) ;; *) - AC_MSG_ERROR([${ac_RNG} unsupported --enable-rng option]); + AC_MSG_ERROR([${ac_RNG} unsupported --enable-rng option]); ;; esac -# -# SDE timing mode -# -AC_ARG_ENABLE([timers],[AC_HELP_STRING([--enable-timers=yes|no],\ - [Enable system dependent high res timers])],\ - [ac_TIMERS=${enable_timers}],[ac_TIMERS=yes]) +############### Timer option +AC_ARG_ENABLE([timers],[AC_HELP_STRING([--enable-timers],\ + [Enable system dependent high res timers])],\ + [ac_TIMERS=${enable_timers}],[ac_TIMERS=yes]) + case ${ac_TIMERS} in yes) - AC_DEFINE([TIMERS_ON],[1],[TIMERS_ON] ) + AC_DEFINE([TIMERS_ON],[1],[TIMERS_ON] ) ;; no) - AC_DEFINE([TIMERS_OFF],[1],[TIMERS_OFF] ) + AC_DEFINE([TIMERS_OFF],[1],[TIMERS_OFF] ) ;; *) - AC_MSG_ERROR([${ac_TIMERS} unsupported --enable-timers option]); + AC_MSG_ERROR([${ac_TIMERS} unsupported --enable-timers option]); ;; esac -# -# Chroma regression tests -# -AC_ARG_ENABLE([chroma],[AC_HELP_STRING([--enable-chroma],[Expect chroma compiled under c++11 ])],ac_CHROMA=yes,ac_CHROMA=no) +############### Chroma regression test +AC_ARG_ENABLE([chroma],[AC_HELP_STRING([--enable-chroma], + [Expect chroma compiled under c++11 ])],ac_CHROMA=yes,ac_CHROMA=no) case ${ac_CHROMA} in - yes) - echo Enabling tests regressing to Chroma - ;; - no) - echo Disabling tests regressing to Chroma + yes|no) ;; *) - AC_MSG_ERROR([${ac_CHROMA} unsupported --enable-chroma option]); + AC_MSG_ERROR([${ac_CHROMA} unsupported --enable-chroma option]); ;; esac AM_CONDITIONAL(BUILD_CHROMA_REGRESSION,[ test "X${ac_CHROMA}X" == "XyesX" ]) -# -# Lapack -# -AC_ARG_ENABLE([lapack],[AC_HELP_STRING([--enable-lapack],[Enable lapack yes/no ])],[ac_LAPACK=${enable_lapack}],[ac_LAPACK=no]) +############### Doxygen +AC_PROG_DOXYGEN -case ${ac_LAPACK} in - yes) - echo Enabling lapack - ;; - no) - echo Disabling lapack - ;; - *) - echo Enabling lapack at ${ac_LAPACK} - ;; -esac +if test -n "$DOXYGEN" +then +AC_CONFIG_FILES([docs/doxy.cfg]) +fi -AM_CONDITIONAL(USE_LAPACK,[ test "X${ac_LAPACK}X" != "XnoX" ]) -AM_CONDITIONAL(USE_LAPACK_LIB,[ test "X${ac_LAPACK}X" != "XyesX" ]) - -################################################################### -# Checks for doxygen support -# if present enables the "make doxyfile" command -#echo -#echo Checking doxygen support -#echo ::::::::::::::::::::::::::::::::::::::::::: -#AC_PROG_DOXYGEN - -#if test -n "$DOXYGEN" -#then -#AC_CONFIG_FILES([docs/doxy.cfg]) -#fi - -echo -echo Creating configuration files -echo ::::::::::::::::::::::::::::::::::::::::::: +############### Ouput +cwd=`pwd -P`; cd ${srcdir}; abs_srcdir=`pwd -P`; cd ${cwd} +AM_CXXFLAGS="-I${abs_srcdir}/include $AM_CXXFLAGS" +AM_CFLAGS="-I${abs_srcdir}/include $AM_CFLAGS" +AM_LDFLAGS="-L${cwd}/lib $AM_LDFLAGS" +AC_SUBST([AM_CFLAGS]) +AC_SUBST([AM_CXXFLAGS]) +AC_SUBST([AM_LDFLAGS]) AC_CONFIG_FILES(Makefile) AC_CONFIG_FILES(lib/Makefile) AC_CONFIG_FILES(tests/Makefile) +AC_CONFIG_FILES(tests/IO/Makefile) +AC_CONFIG_FILES(tests/core/Makefile) +AC_CONFIG_FILES(tests/debug/Makefile) +AC_CONFIG_FILES(tests/forces/Makefile) +AC_CONFIG_FILES(tests/hmc/Makefile) +AC_CONFIG_FILES(tests/solver/Makefile) AC_CONFIG_FILES(tests/qdpxx/Makefile) AC_CONFIG_FILES(benchmarks/Makefile) AC_OUTPUT - -echo " -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Summary of configuration for $PACKAGE v$VERSION ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -The following features are enabled: - -- architecture (build) : $build_cpu -- os (build) : $build_os -- architecture (target) : $target_cpu -- os (target) : $target_os -- build DOXYGEN documentation : `if test "x$enable_doc" = xyes; then echo yes; else echo no; fi` -- graphs and diagrams : `if test "x$enable_dot" = xyes; then echo yes; else echo no; fi` -- Supported SIMD flags : $SIMD_FLAGS ----------------------------------------------------------- -- enabled simd support : ${ac_SIMD} (config macro says supported: $supported ) -- communications type : ${ac_COMMS} -- default precision : ${ac_PRECISION} -- RNG choice : ${ac_RNG} -- LAPACK : ${ac_LAPACK} - - -" +----- PLATFORM ---------------------------------------- +architecture (build) : $build_cpu +os (build) : $build_os +architecture (target) : $target_cpu +os (target) : $target_os +compiler vendor : ${ax_cv_cxx_compiler_vendor} +compiler version : ${ax_cv_gxx_version} +----- BUILD OPTIONS ----------------------------------- +SIMD : ${ac_SIMD} +Threading : ${ac_openmp} +Communications type : ${comms_type} +Default precision : ${ac_PRECISION} +RNG choice : ${ac_RNG} +GMP : `if test "x$have_gmp" = xtrue; then echo yes; else echo no; fi` +LAPACK : ${ac_LAPACK} +FFTW : `if test "x$have_fftw" = xtrue; then echo yes; else echo no; fi` +build DOXYGEN documentation : `if test "x$enable_doc" = xyes; then echo yes; else echo no; fi` +graphs and diagrams : `if test "x$enable_dot" = xyes; then echo yes; else echo no; fi` +----- BUILD FLAGS ------------------------------------- +CXXFLAGS: +`echo ${AM_CXXFLAGS} ${CXXFLAGS} | tr ' ' '\n' | sed 's/^-/ -/g'` +LDFLAGS: +`echo ${AM_LDFLAGS} ${LDFLAGS} | tr ' ' '\n' | sed 's/^-/ -/g'` +LIBS: +`echo ${LIBS} | tr ' ' '\n' | sed 's/^-/ -/g'` +-------------------------------------------------------" > config.summary +echo "" +cat config.summary +echo "" diff --git a/include/Grid b/include/Grid new file mode 120000 index 00000000..dc598c56 --- /dev/null +++ b/include/Grid @@ -0,0 +1 @@ +../lib \ No newline at end of file diff --git a/lib/Algorithms.h b/lib/Algorithms.h index 0a3d34ce..67eb11c3 100644 --- a/lib/Algorithms.h +++ b/lib/Algorithms.h @@ -29,27 +29,28 @@ Author: Peter Boyle #ifndef GRID_ALGORITHMS_H #define GRID_ALGORITHMS_H -#include -#include -#include +#include +#include +#include -#include -#include -#include -#include +#include +#include +#include +#include -#include -#include -#include -#include +#include +#include +#include +#include -#include +#include +#include // Lanczos support -#include -#include +#include +#include -#include +#include // Eigen/lanczos // EigCg diff --git a/lib/AlignedAllocator.h b/lib/AlignedAllocator.h index 2cd8263d..a8b9c53b 100644 --- a/lib/AlignedAllocator.h +++ b/lib/AlignedAllocator.h @@ -40,14 +40,6 @@ Author: Peter Boyle #include #endif -#ifdef GRID_COMMS_SHMEM -extern "C" { -#include -extern void * shmem_align(size_t, size_t); -extern void shmem_free(void *); -} -#endif - namespace Grid { //////////////////////////////////////////////////////////////////// @@ -65,28 +57,85 @@ public: typedef _Tp value_type; template struct rebind { typedef alignedAllocator<_Tp1> other; }; - alignedAllocator() throw() { } - alignedAllocator(const alignedAllocator&) throw() { } - template alignedAllocator(const alignedAllocator<_Tp1>&) throw() { } - ~alignedAllocator() throw() { } - pointer address(reference __x) const { return &__x; } - // const_pointer address(const_reference __x) const { return &__x; } - size_type max_size() const throw() { return size_t(-1) / sizeof(_Tp); } pointer allocate(size_type __n, const void* _p= 0) { +#ifdef HAVE_MM_MALLOC_H + _Tp * ptr = (_Tp *) _mm_malloc(__n*sizeof(_Tp),128); +#else + _Tp * ptr = (_Tp *) memalign(128,__n*sizeof(_Tp)); +#endif + + _Tp tmp; +#ifdef GRID_NUMA +#pragma omp parallel for schedule(static) + for(int i=0;i<__n;i++){ + ptr[i]=tmp; + } +#endif + return ptr; + } + + void deallocate(pointer __p, size_type) { +#ifdef HAVE_MM_MALLOC_H + _mm_free((void *)__p); +#else + free((void *)__p); +#endif + } + void construct(pointer __p, const _Tp& __val) { }; + void construct(pointer __p) { }; + void destroy(pointer __p) { }; +}; +template inline bool operator==(const alignedAllocator<_Tp>&, const alignedAllocator<_Tp>&){ return true; } +template inline bool operator!=(const alignedAllocator<_Tp>&, const alignedAllocator<_Tp>&){ return false; } + +////////////////////////////////////////////////////////////////////////////////////////// +// MPI3 : comms must use shm region +// SHMEM: comms must use symmetric heap +////////////////////////////////////////////////////////////////////////////////////////// #ifdef GRID_COMMS_SHMEM - - _Tp *ptr = (_Tp *) shmem_align(__n*sizeof(_Tp),64); - - +extern "C" { +#include +extern void * shmem_align(size_t, size_t); +extern void shmem_free(void *); +} #define PARANOID_SYMMETRIC_HEAP +#endif + +template +class commAllocator { +public: + typedef std::size_t size_type; + typedef std::ptrdiff_t difference_type; + typedef _Tp* pointer; + typedef const _Tp* const_pointer; + typedef _Tp& reference; + typedef const _Tp& const_reference; + typedef _Tp value_type; + + template struct rebind { typedef commAllocator<_Tp1> other; }; + commAllocator() throw() { } + commAllocator(const commAllocator&) throw() { } + template commAllocator(const commAllocator<_Tp1>&) throw() { } + ~commAllocator() throw() { } + pointer address(reference __x) const { return &__x; } + size_type max_size() const throw() { return size_t(-1) / sizeof(_Tp); } + +#ifdef GRID_COMMS_SHMEM + pointer allocate(size_type __n, const void* _p= 0) + { +#ifdef CRAY + _Tp *ptr = (_Tp *) shmem_align(__n*sizeof(_Tp),64); +#else + _Tp *ptr = (_Tp *) shmem_align(64,__n*sizeof(_Tp)); +#endif #ifdef PARANOID_SYMMETRIC_HEAP static void * bcast; static long psync[_SHMEM_REDUCE_SYNC_SIZE]; @@ -96,55 +145,47 @@ public: if ( bcast != ptr ) { std::printf("inconsistent alloc pe %d %lx %lx \n",shmem_my_pe(),bcast,ptr);std::fflush(stdout); - BACKTRACEFILE(); + // BACKTRACEFILE(); exit(0); } - assert( bcast == (void *) ptr); - #endif + return ptr; + } + void deallocate(pointer __p, size_type) { + shmem_free((void *)__p); + } #else - + pointer allocate(size_type __n, const void* _p= 0) + { #ifdef HAVE_MM_MALLOC_H _Tp * ptr = (_Tp *) _mm_malloc(__n*sizeof(_Tp),128); #else _Tp * ptr = (_Tp *) memalign(128,__n*sizeof(_Tp)); #endif - -#endif - _Tp tmp; -#undef FIRST_TOUCH_OPTIMISE -#ifdef FIRST_TOUCH_OPTIMISE -#pragma omp parallel for - for(int i=0;i<__n;i++){ - ptr[i]=tmp; - } -#endif return ptr; } - void deallocate(pointer __p, size_type) { -#ifdef GRID_COMMS_SHMEM - shmem_free((void *)__p); -#else #ifdef HAVE_MM_MALLOC_H _mm_free((void *)__p); #else free((void *)__p); -#endif #endif } +#endif void construct(pointer __p, const _Tp& __val) { }; void construct(pointer __p) { }; - void destroy(pointer __p) { }; }; +template inline bool operator==(const commAllocator<_Tp>&, const commAllocator<_Tp>&){ return true; } +template inline bool operator!=(const commAllocator<_Tp>&, const commAllocator<_Tp>&){ return false; } -template inline bool -operator==(const alignedAllocator<_Tp>&, const alignedAllocator<_Tp>&){ return true; } - -template inline bool -operator!=(const alignedAllocator<_Tp>&, const alignedAllocator<_Tp>&){ return false; } +//////////////////////////////////////////////////////////////////////////////// +// Template typedefs +//////////////////////////////////////////////////////////////////////////////// +template using Vector = std::vector >; +template using commVector = std::vector >; +template using Matrix = std::vector > >; }; // namespace Grid #endif diff --git a/lib/Cartesian.h b/lib/Cartesian.h index aeffe331..f3710a48 100644 --- a/lib/Cartesian.h +++ b/lib/Cartesian.h @@ -28,8 +28,8 @@ Author: Peter Boyle #ifndef GRID_CARTESIAN_H #define GRID_CARTESIAN_H -#include -#include -#include +#include +#include +#include #endif diff --git a/lib/Communicator.h b/lib/Communicator.h index bc3ae166..09ce50dc 100644 --- a/lib/Communicator.h +++ b/lib/Communicator.h @@ -28,6 +28,6 @@ Author: Peter Boyle #ifndef GRID_COMMUNICATOR_H #define GRID_COMMUNICATOR_H -#include +#include #endif diff --git a/lib/Cshift.h b/lib/Cshift.h index 675544e2..cd162e35 100644 --- a/lib/Cshift.h +++ b/lib/Cshift.h @@ -28,17 +28,25 @@ Author: Peter Boyle #ifndef _GRID_CSHIFT_H_ #define _GRID_CSHIFT_H_ -#include +#include #ifdef GRID_COMMS_NONE -#include +#include #endif #ifdef GRID_COMMS_MPI -#include +#include +#endif + +#ifdef GRID_COMMS_MPI3 +#include +#endif + +#ifdef GRID_COMMS_MPI3L +#include #endif #ifdef GRID_COMMS_SHMEM -#include // uses same implementation of communicator +#include // uses same implementation of communicator #endif #endif diff --git a/lib/FFT.h b/lib/FFT.h new file mode 100644 index 00000000..b5b31d82 --- /dev/null +++ b/lib/FFT.h @@ -0,0 +1,302 @@ + + /************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/Cshift.h + + Copyright (C) 2015 + +Author: Peter Boyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +#ifndef _GRID_FFT_H_ +#define _GRID_FFT_H_ + +#ifdef HAVE_FFTW +#ifdef USE_MKL +#include +#else +#include +#endif +#endif + + +namespace Grid { + + template struct FFTW { }; + +#ifdef HAVE_FFTW + template<> struct FFTW { + public: + + typedef fftw_complex FFTW_scalar; + typedef fftw_plan FFTW_plan; + + static FFTW_plan fftw_plan_many_dft(int rank, const int *n,int howmany, + FFTW_scalar *in, const int *inembed, + int istride, int idist, + FFTW_scalar *out, const int *onembed, + int ostride, int odist, + int sign, unsigned flags) { + return ::fftw_plan_many_dft(rank,n,howmany,in,inembed,istride,idist,out,onembed,ostride,odist,sign,flags); + } + + static void fftw_flops(const FFTW_plan p,double *add, double *mul, double *fmas){ + ::fftw_flops(p,add,mul,fmas); + } + + inline static void fftw_execute_dft(const FFTW_plan p,FFTW_scalar *in,FFTW_scalar *out) { + ::fftw_execute_dft(p,in,out); + } + inline static void fftw_destroy_plan(const FFTW_plan p) { + ::fftw_destroy_plan(p); + } + }; + + template<> struct FFTW { + public: + + typedef fftwf_complex FFTW_scalar; + typedef fftwf_plan FFTW_plan; + + static FFTW_plan fftw_plan_many_dft(int rank, const int *n,int howmany, + FFTW_scalar *in, const int *inembed, + int istride, int idist, + FFTW_scalar *out, const int *onembed, + int ostride, int odist, + int sign, unsigned flags) { + return ::fftwf_plan_many_dft(rank,n,howmany,in,inembed,istride,idist,out,onembed,ostride,odist,sign,flags); + } + + static void fftw_flops(const FFTW_plan p,double *add, double *mul, double *fmas){ + ::fftwf_flops(p,add,mul,fmas); + } + + inline static void fftw_execute_dft(const FFTW_plan p,FFTW_scalar *in,FFTW_scalar *out) { + ::fftwf_execute_dft(p,in,out); + } + inline static void fftw_destroy_plan(const FFTW_plan p) { + ::fftwf_destroy_plan(p); + } + }; + +#endif + +#ifndef FFTW_FORWARD +#define FFTW_FORWARD (-1) +#define FFTW_BACKWARD (+1) +#endif + + class FFT { + private: + + GridCartesian *vgrid; + GridCartesian *sgrid; + + int Nd; + double flops; + double flops_call; + uint64_t usec; + + std::vector dimensions; + std::vector processors; + std::vector processor_coor; + + public: + + static const int forward=FFTW_FORWARD; + static const int backward=FFTW_BACKWARD; + + double Flops(void) {return flops;} + double MFlops(void) {return flops/usec;} + double USec(void) {return (double)usec;} + + FFT ( GridCartesian * grid ) : + vgrid(grid), + Nd(grid->_ndimension), + dimensions(grid->_fdimensions), + processors(grid->_processors), + processor_coor(grid->_processor_coor) + { + flops=0; + usec =0; + std::vector layout(Nd,1); + sgrid = new GridCartesian(dimensions,layout,processors); + }; + + ~FFT ( void) { + delete sgrid; + } + + template + void FFT_dim_mask(Lattice &result,const Lattice &source,std::vector mask,int sign){ + + conformable(result._grid,vgrid); + conformable(source._grid,vgrid); + Lattice tmp(vgrid); + tmp = source; + for(int d=0;d + void FFT_all_dim(Lattice &result,const Lattice &source,int sign){ + std::vector mask(Nd,1); + FFT_dim_mask(result,source,mask,sign); + } + + + template + void FFT_dim(Lattice &result,const Lattice &source,int dim, int sign){ +#ifndef HAVE_FFTW + assert(0); +#else + conformable(result._grid,vgrid); + conformable(source._grid,vgrid); + + int L = vgrid->_ldimensions[dim]; + int G = vgrid->_fdimensions[dim]; + + std::vector layout(Nd,1); + std::vector pencil_gd(vgrid->_fdimensions); + + pencil_gd[dim] = G*processors[dim]; + + // Pencil global vol LxLxGxLxL per node + GridCartesian pencil_g(pencil_gd,layout,processors); + + // Construct pencils + typedef typename vobj::scalar_object sobj; + typedef typename sobj::scalar_type scalar; + + Lattice pgbuf(&pencil_g); + + + typedef typename FFTW::FFTW_scalar FFTW_scalar; + typedef typename FFTW::FFTW_plan FFTW_plan; + + int Ncomp = sizeof(sobj)/sizeof(scalar); + int Nlow = 1; + for(int d=0;d_ldimensions[d]; + } + + int rank = 1; /* 1d transforms */ + int n[] = {G}; /* 1d transforms of length G */ + int howmany = Ncomp; + int odist,idist,istride,ostride; + idist = odist = 1; /* Distance between consecutive FT's */ + istride = ostride = Ncomp*Nlow; /* distance between two elements in the same FT */ + int *inembed = n, *onembed = n; + + scalar div; + if ( sign == backward ) div = 1.0/G; + else if ( sign == forward ) div = 1.0; + else assert(0); + + FFTW_plan p; + { + FFTW_scalar *in = (FFTW_scalar *)&pgbuf._odata[0]; + FFTW_scalar *out= (FFTW_scalar *)&pgbuf._odata[0]; + p = FFTW::fftw_plan_many_dft(rank,n,howmany, + in,inembed, + istride,idist, + out,onembed, + ostride, odist, + sign,FFTW_ESTIMATE); + } + + // Barrel shift and collect global pencil + std::vector lcoor(Nd), gcoor(Nd); + result = source; + for(int p=0;p cbuf(Nd); + sobj s; + + PARALLEL_FOR_LOOP_INTERN + for(int idx=0;idxlSites();idx++) { + sgrid->LocalIndexToLocalCoor(idx,cbuf); + peekLocalSite(s,result,cbuf); + cbuf[dim]+=p*L; + pokeLocalSite(s,pgbuf,cbuf); + } + } + result = Cshift(result,dim,L); + } + + // Loop over orthog coords + int NN=pencil_g.lSites(); + GridStopWatch timer; + timer.Start(); + PARALLEL_REGION + { + std::vector cbuf(Nd); + + PARALLEL_FOR_LOOP_INTERN + for(int idx=0;idx::fftw_execute_dft(p,in,out); + } + } + } + timer.Stop(); + + // performance counting + double add,mul,fma; + FFTW::fftw_flops(p,&add,&mul,&fma); + flops_call = add+mul+2.0*fma; + usec += timer.useconds(); + flops+= flops_call*NN; + + // writing out result + int pc = processor_coor[dim]; + PARALLEL_REGION + { + std::vector clbuf(Nd), cgbuf(Nd); + sobj s; + + PARALLEL_FOR_LOOP_INTERN + for(int idx=0;idxlSites();idx++) { + sgrid->LocalIndexToLocalCoor(idx,clbuf); + cgbuf = clbuf; + cgbuf[dim] = clbuf[dim]+L*pc; + peekLocalSite(s,pgbuf,cgbuf); + s = s * div; + pokeLocalSite(s,result,clbuf); + } + } + + // destroying plan + FFTW::fftw_destroy_plan(p); +#endif + } + }; +} + +#endif diff --git a/lib/Grid.h b/lib/Grid.h index eb2be1d1..0c5983f3 100644 --- a/lib/Grid.h +++ b/lib/Grid.h @@ -59,29 +59,30 @@ Author: paboyle /////////////////// // Grid headers /////////////////// -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include +#include "Config.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include -#include -#include +#include +#include +#include +#include diff --git a/lib/Init.cc b/lib/Init.cc index 6ab8e7b7..d6d6b9f8 100644 --- a/lib/Init.cc +++ b/lib/Init.cc @@ -44,9 +44,33 @@ Author: paboyle #include #include #include +#include +#include + + +#include +#ifdef __APPLE__ +static int +feenableexcept (unsigned int excepts) +{ + static fenv_t fenv; + unsigned int new_excepts = excepts & FE_ALL_EXCEPT, + old_excepts; // previous masks + + if ( fegetenv (&fenv) ) return -1; + old_excepts = fenv.__control & FE_ALL_EXCEPT; + + // unmask + fenv.__control &= ~new_excepts; + fenv.__mxcsr &= ~(new_excepts << 7); + + return ( fesetenv (&fenv) ? -1 : old_excepts ); +} +#endif namespace Grid { + ////////////////////////////////////////////////////// // Convenience functions to access stadard command line arg // driven parallelism controls @@ -123,6 +147,13 @@ void GridCmdOptionIntVector(std::string &str,std::vector & vec) return; } +void GridCmdOptionInt(std::string &str,int & val) +{ + std::stringstream ss(str); + ss>>val; + return; +} + void GridParseLayout(char **argv,int argc, std::vector &latt, @@ -154,12 +185,11 @@ void GridParseLayout(char **argv,int argc, GridThread::SetThreads(ompthreads[0]); } if( GridCmdOptionExists(argv,argv+argc,"--cores") ){ - std::vector cores(0); + int cores; arg= GridCmdOptionPayload(argv,argv+argc,"--cores"); - GridCmdOptionIntVector(arg,cores); - GridThread::SetCores(cores[0]); + GridCmdOptionInt(arg,cores); + GridThread::SetCores(cores); } - } std::string GridCmdVectorIntToString(const std::vector & vec){ @@ -168,33 +198,40 @@ std::string GridCmdVectorIntToString(const std::vector & vec){ return oss.str(); } ///////////////////////////////////////////////////////// -// +// Reinit guard ///////////////////////////////////////////////////////// +static int Grid_is_initialised = 0; + + void Grid_init(int *argc,char ***argv) { - CartesianCommunicator::Init(argc,argv); - - // Parse command line args. - GridLogger::StopWatch.Start(); std::string arg; + + //////////////////////////////////// + // Shared memory block size + //////////////////////////////////// + if( GridCmdOptionExists(*argv,*argv+*argc,"--shm") ){ + int MB; + arg= GridCmdOptionPayload(*argv,*argv+*argc,"--shm"); + GridCmdOptionInt(arg,MB); + CartesianCommunicator::MAX_MPI_SHM_BYTES = MB*1024*1024; + } + + CartesianCommunicator::Init(argc,argv); + + //////////////////////////////////// + // Logging + //////////////////////////////////// + std::vector logstreams; std::string defaultLog("Error,Warning,Message,Performance"); - GridCmdOptionCSL(defaultLog,logstreams); GridLogConfigure(logstreams); - if( GridCmdOptionExists(*argv,*argv+*argc,"--help") ){ - std::cout< -#endif + void Grid_debug_handler_init(void) { struct sigaction sa,osa; @@ -329,9 +425,9 @@ void Grid_debug_handler_init(void) sa.sa_flags = SA_SIGINFO; sigaction(SIGSEGV,&sa,NULL); sigaction(SIGTRAP,&sa,NULL); -#ifdef GRID_FPE + feenableexcept( FE_INVALID|FE_OVERFLOW|FE_DIVBYZERO); + sigaction(SIGFPE,&sa,NULL); -#endif } } diff --git a/lib/Init.h b/lib/Init.h index 25fda569..6b70d42d 100644 --- a/lib/Init.h +++ b/lib/Init.h @@ -33,6 +33,7 @@ namespace Grid { void Grid_init(int *argc,char ***argv); void Grid_finalize(void); + // internal, controled with --handle void Grid_sa_signal_handler(int sig,siginfo_t *si,void * ptr); void Grid_debug_handler_init(void); @@ -44,6 +45,7 @@ namespace Grid { const std::vector &GridDefaultMpi(void); const int &GridThreads(void) ; void GridSetThreads(int t) ; + void GridLogTimestamp(int); // Common parsing chores std::string GridCmdOptionPayload(char ** begin, char ** end, const std::string & option); @@ -52,6 +54,7 @@ namespace Grid { void GridCmdOptionCSL(std::string str,std::vector & vec); void GridCmdOptionIntVector(std::string &str,std::vector & vec); + void GridParseLayout(char **argv,int argc, std::vector &latt, std::vector &simd, diff --git a/lib/Lattice.h b/lib/Lattice.h index 5171e999..e2bb2a82 100644 --- a/lib/Lattice.h +++ b/lib/Lattice.h @@ -28,6 +28,6 @@ Author: Peter Boyle #ifndef GRID_LATTICE_H #define GRID_LATTICE_H -#include +#include #endif diff --git a/lib/Log.cc b/lib/Log.cc index 06724cab..7521657b 100644 --- a/lib/Log.cc +++ b/lib/Log.cc @@ -1,126 +1,112 @@ - /************************************************************************************* +/************************************************************************************* - Grid physics library, www.github.com/paboyle/Grid +Grid physics library, www.github.com/paboyle/Grid - Source file: ./lib/Log.cc +Source file: ./lib/Log.cc - Copyright (C) 2015 +Copyright (C) 2015 Author: Antonin Portelli Author: Azusa Yamaguchi Author: Peter Boyle Author: paboyle - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ #include +#include + namespace Grid { + std::string demangle(const char* name) { + + int status = -4; // some arbitrary value to eliminate the compiler warning + + // enable c++11 by passing the flag -std=c++11 to g++ + std::unique_ptr res { + abi::__cxa_demangle(name, NULL, NULL, &status), + std::free + }; + + return (status==0) ? res.get() : name ; + } + GridStopWatch Logger::StopWatch; -std::ostream Logger::devnull(0); -std::string Logger::BLACK("\033[30m"); -std::string Logger::RED("\033[31m"); -std::string Logger::GREEN("\033[32m"); -std::string Logger::YELLOW("\033[33m"); -std::string Logger::BLUE("\033[34m"); -std::string Logger::PURPLE("\033[35m"); -std::string Logger::CYAN("\033[36m"); -std::string Logger::WHITE("\033[37m"); -std::string Logger::NORMAL("\033[0;39m"); -std::string EMPTY(""); +int Logger::timestamp; +std::ostream Logger::devnull(0); -#if 0 - GridLogger GridLogError (1,"Error",Logger::RED); - GridLogger GridLogWarning (1,"Warning",Logger::YELLOW); - GridLogger GridLogMessage (1,"Message",Logger::BLACK); - GridLogger GridLogDebug (1,"Debug",Logger::PURPLE); - GridLogger GridLogPerformance(1,"Performance",Logger::GREEN); - GridLogger GridLogIterative (1,"Iterative",Logger::BLUE); - GridLogger GridLogIntegrator (1,"Integrator",Logger::BLUE); -#else - GridLogger GridLogError (1,"Error",EMPTY); - GridLogger GridLogWarning (1,"Warning",EMPTY); - GridLogger GridLogMessage (1,"Message",EMPTY); - GridLogger GridLogDebug (1,"Debug",EMPTY); - GridLogger GridLogPerformance(1,"Performance",EMPTY); - GridLogger GridLogIterative (1,"Iterative",EMPTY); - GridLogger GridLogIntegrator (1,"Integrator",EMPTY); -#endif +void GridLogTimestamp(int on){ + Logger::Timestamp(on); +} -void GridLogConfigure(std::vector &logstreams) -{ +Colours GridLogColours(0); +GridLogger GridLogError(1, "Error", GridLogColours, "RED"); +GridLogger GridLogWarning(1, "Warning", GridLogColours, "YELLOW"); +GridLogger GridLogMessage(1, "Message", GridLogColours, "NORMAL"); +GridLogger GridLogDebug(1, "Debug", GridLogColours, "PURPLE"); +GridLogger GridLogPerformance(1, "Performance", GridLogColours, "GREEN"); +GridLogger GridLogIterative(1, "Iterative", GridLogColours, "BLUE"); +GridLogger GridLogIntegrator(1, "Integrator", GridLogColours, "BLUE"); + +void GridLogConfigure(std::vector &logstreams) { GridLogError.Active(0); GridLogWarning.Active(0); - GridLogMessage.Active(0); + GridLogMessage.Active(1); // at least the messages should be always on GridLogIterative.Active(0); GridLogDebug.Active(0); GridLogPerformance.Active(0); GridLogIntegrator.Active(0); + GridLogColours.Active(0); - int blackAndWhite = 1; - if(blackAndWhite){ - Logger::BLACK = std::string(""); - Logger::RED =Logger::BLACK; - Logger::GREEN =Logger::BLACK; - Logger::YELLOW =Logger::BLACK; - Logger::BLUE =Logger::BLACK; - Logger::PURPLE =Logger::BLACK; - Logger::CYAN =Logger::BLACK; - Logger::WHITE =Logger::BLACK; - Logger::NORMAL =Logger::BLACK; - } - - for(int i=0;i -Author: Azusa Yamaguchi -Author: Peter Boyle + Author: Antonin Portelli + Author: Azusa Yamaguchi + Author: Peter Boyle This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -27,6 +27,9 @@ Author: Peter Boyle See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ /* END LEGAL */ + +#include + #ifndef GRID_LOG_H #define GRID_LOG_H @@ -36,54 +39,98 @@ Author: Peter Boyle namespace Grid { +////////////////////////////////////////////////////////////////////////////////////////////////// // Dress the output; use std::chrono for time stamping via the StopWatch class -int Rank(void); // used for early stage debug before library init +////////////////////////////////////////////////////////////////////////////////////////////////// + + +class Colours{ +protected: + bool is_active; +public: + std::map colour; + + Colours(bool activate=false){ + Active(activate); + }; + + void Active(bool activate){ + is_active=activate; + if (is_active){ + colour["BLACK"] ="\033[30m"; + colour["RED"] ="\033[31m"; + colour["GREEN"] ="\033[32m"; + colour["YELLOW"] ="\033[33m"; + colour["BLUE"] ="\033[34m"; + colour["PURPLE"] ="\033[35m"; + colour["CYAN"] ="\033[36m"; + colour["WHITE"] ="\033[37m"; + colour["NORMAL"] ="\033[0;39m"; + } else { + colour["BLACK"] =""; + colour["RED"] =""; + colour["GREEN"] =""; + colour["YELLOW"]=""; + colour["BLUE"] =""; + colour["PURPLE"]=""; + colour["CYAN"] =""; + colour["WHITE"] =""; + colour["NORMAL"]=""; + } + }; +}; class Logger { protected: - int active; - std::string name, topName, COLOUR; -public: - static GridStopWatch StopWatch; - static std::ostream devnull; + Colours &Painter; + int active; + static int timestamp; + std::string name, topName; + std::string COLOUR; - static std::string BLACK; - static std::string RED ; - static std::string GREEN; - static std::string YELLOW; - static std::string BLUE ; - static std::string PURPLE; - static std::string CYAN ; - static std::string WHITE ; - static std::string NORMAL; - - Logger(std::string topNm, int on, std::string nm,std::string col) - : active(on), name(nm), topName(topNm), COLOUR(col) {}; - - void Active(int on) {active = on;}; - int isActive(void) {return active;}; - - friend std::ostream& operator<< (std::ostream& stream, const Logger& log){ - if ( log.active ) { - StopWatch.Stop(); - GridTime now = StopWatch.Elapsed(); - StopWatch.Start(); - stream << BLACK < &logstreams); @@ -95,38 +142,41 @@ extern GridLogger GridLogDebug ; extern GridLogger GridLogPerformance; extern GridLogger GridLogIterative ; extern GridLogger GridLogIntegrator ; +extern Colours GridLogColours; + std::string demangle(const char* name) ; #define _NBACKTRACE (256) extern void * Grid_backtrace_buffer[_NBACKTRACE]; #define BACKTRACEFILE() {\ - char string[20]; \ - std::sprintf(string,"backtrace.%d",Rank()); \ - std::FILE * fp = std::fopen(string,"w"); \ - BACKTRACEFP(fp)\ - std::fclose(fp); \ +char string[20]; \ +std::sprintf(string,"backtrace.%d",CartesianCommunicator::RankWorld()); \ +std::FILE * fp = std::fopen(string,"w"); \ +BACKTRACEFP(fp)\ +std::fclose(fp); \ } #ifdef HAVE_EXECINFO_H #define BACKTRACEFP(fp) { \ - int symbols = backtrace (Grid_backtrace_buffer,_NBACKTRACE);\ - char **strings = backtrace_symbols(Grid_backtrace_buffer,symbols);\ - for (int i = 0; i < symbols; i++){\ - std::fprintf (fp,"BackTrace Strings: %d %s\n",i, strings[i]); std::fflush(fp); \ - }\ +int symbols = backtrace (Grid_backtrace_buffer,_NBACKTRACE);\ +char **strings = backtrace_symbols(Grid_backtrace_buffer,symbols);\ +for (int i = 0; i < symbols; i++){\ + std::fprintf (fp,"BackTrace Strings: %d %s\n",i, demangle(strings[i]).c_str()); std::fflush(fp); \ +}\ } #else #define BACKTRACEFP(fp) { \ - std::fprintf (fp,"BT %d %lx\n",0, __builtin_return_address(0)); std::fflush(fp); \ - std::fprintf (fp,"BT %d %lx\n",1, __builtin_return_address(1)); std::fflush(fp); \ - std::fprintf (fp,"BT %d %lx\n",2, __builtin_return_address(2)); std::fflush(fp); \ - std::fprintf (fp,"BT %d %lx\n",3, __builtin_return_address(3)); std::fflush(fp); \ +std::fprintf (fp,"BT %d %lx\n",0, __builtin_return_address(0)); std::fflush(fp); \ +std::fprintf (fp,"BT %d %lx\n",1, __builtin_return_address(1)); std::fflush(fp); \ +std::fprintf (fp,"BT %d %lx\n",2, __builtin_return_address(2)); std::fflush(fp); \ +std::fprintf (fp,"BT %d %lx\n",3, __builtin_return_address(3)); std::fflush(fp); \ } #endif #define BACKTRACE() BACKTRACEFP(stdout) + } #endif diff --git a/lib/Make.inc b/lib/Make.inc deleted file mode 100644 index 8763692a..00000000 --- a/lib/Make.inc +++ /dev/null @@ -1,4 +0,0 @@ - -HFILES=./Algorithms.h ./AlignedAllocator.h ./Cartesian.h ./Communicator.h ./Cshift.h ./Grid.h ./Init.h ./Lattice.h ./Lexicographic.h ./Log.h ./Old/Tensor_peek.h ./Old/Tensor_poke.h ./PerfCount.h ./Simd.h ./Stencil.h ./Tensors.h ./Threads.h ./Timer.h ./algorithms/CoarsenedMatrix.h ./algorithms/LinearOperator.h ./algorithms/Preconditioner.h ./algorithms/SparseMatrix.h ./algorithms/approx/Chebyshev.h ./algorithms/approx/MultiShiftFunction.h ./algorithms/approx/Remez.h ./algorithms/approx/Zolotarev.h ./algorithms/approx/bigfloat.h ./algorithms/approx/bigfloat_double.h ./algorithms/iterative/AdefGeneric.h ./algorithms/iterative/ConjugateGradient.h ./algorithms/iterative/ConjugateGradientMultiShift.h ./algorithms/iterative/ConjugateResidual.h ./algorithms/iterative/DenseMatrix.h ./algorithms/iterative/EigenSort.h ./algorithms/iterative/Francis.h ./algorithms/iterative/Householder.h ./algorithms/iterative/ImplicitlyRestartedLanczos.h ./algorithms/iterative/Matrix.h ./algorithms/iterative/MatrixUtils.h ./algorithms/iterative/NormalEquations.h ./algorithms/iterative/PrecConjugateResidual.h ./algorithms/iterative/PrecGeneralisedConjugateResidual.h ./algorithms/iterative/SchurRedBlack.h ./cartesian/Cartesian_base.h ./cartesian/Cartesian_full.h ./cartesian/Cartesian_red_black.h ./communicator/Communicator_base.h ./cshift/Cshift_common.h ./cshift/Cshift_mpi.h ./cshift/Cshift_none.h ./lattice/Lattice_ET.h ./lattice/Lattice_arith.h ./lattice/Lattice_base.h ./lattice/Lattice_comparison.h ./lattice/Lattice_comparison_utils.h ./lattice/Lattice_conformable.h ./lattice/Lattice_coordinate.h ./lattice/Lattice_local.h ./lattice/Lattice_overload.h ./lattice/Lattice_peekpoke.h ./lattice/Lattice_reality.h ./lattice/Lattice_reduction.h ./lattice/Lattice_rng.h ./lattice/Lattice_trace.h ./lattice/Lattice_transfer.h ./lattice/Lattice_transpose.h ./lattice/Lattice_unary.h ./lattice/Lattice_where.h ./parallelIO/BinaryIO.h ./parallelIO/NerscIO.h ./pugixml/pugixml.h ./qcd/QCD.h ./qcd/action/ActionBase.h ./qcd/action/ActionParams.h ./qcd/action/Actions.h ./qcd/action/fermion/CayleyFermion5D.h ./qcd/action/fermion/ContinuedFractionFermion5D.h ./qcd/action/fermion/DomainWallFermion.h ./qcd/action/fermion/FermionOperator.h ./qcd/action/fermion/FermionOperatorImpl.h ./qcd/action/fermion/MobiusFermion.h ./qcd/action/fermion/MobiusZolotarevFermion.h ./qcd/action/fermion/OverlapWilsonCayleyTanhFermion.h ./qcd/action/fermion/OverlapWilsonCayleyZolotarevFermion.h ./qcd/action/fermion/OverlapWilsonContfracTanhFermion.h ./qcd/action/fermion/OverlapWilsonContfracZolotarevFermion.h ./qcd/action/fermion/OverlapWilsonPartialFractionTanhFermion.h ./qcd/action/fermion/OverlapWilsonPartialFractionZolotarevFermion.h ./qcd/action/fermion/PartialFractionFermion5D.h ./qcd/action/fermion/ScaledShamirFermion.h ./qcd/action/fermion/ShamirZolotarevFermion.h ./qcd/action/fermion/WilsonCompressor.h ./qcd/action/fermion/WilsonFermion.h ./qcd/action/fermion/WilsonFermion5D.h ./qcd/action/fermion/WilsonKernels.h ./qcd/action/fermion/WilsonTMFermion.h ./qcd/action/fermion/g5HermitianLinop.h ./qcd/action/fermion/WilsonKernelsAsmBody.h ./qcd/action/gauge/GaugeImpl.h ./qcd/action/gauge/PlaqPlusRectangleAction.h ./qcd/action/gauge/WilsonGaugeAction.h ./qcd/action/pseudofermion/EvenOddSchurDifferentiable.h ./qcd/action/pseudofermion/OneFlavourEvenOddRational.h ./qcd/action/pseudofermion/OneFlavourEvenOddRationalRatio.h ./qcd/action/pseudofermion/OneFlavourRational.h ./qcd/action/pseudofermion/OneFlavourRationalRatio.h ./qcd/action/pseudofermion/TwoFlavour.h ./qcd/action/pseudofermion/TwoFlavourEvenOdd.h ./qcd/action/pseudofermion/TwoFlavourEvenOddRatio.h ./qcd/action/pseudofermion/TwoFlavourRatio.h ./qcd/hmc/HMC.h ./qcd/hmc/HmcRunner.h ./qcd/hmc/NerscCheckpointer.h ./qcd/hmc/integrators/Integrator.h ./qcd/hmc/integrators/Integrator_algorithm.h ./qcd/spin/Dirac.h ./qcd/spin/TwoSpinor.h ./qcd/utils/CovariantCshift.h ./qcd/utils/LinalgUtils.h ./qcd/utils/SUn.h ./qcd/utils/SpaceTimeGrid.h ./qcd/utils/WilsonLoops.h ./serialisation/BaseIO.h ./serialisation/BinaryIO.h ./serialisation/MacroMagic.h ./serialisation/Serialisation.h ./serialisation/TextIO.h ./serialisation/XmlIO.h ./simd/Grid_avx.h ./simd/Grid_avx512.h ./simd/Grid_empty.h ./simd/Grid_imci.h ./simd/Grid_neon.h ./simd/Grid_qpx.h ./simd/Grid_sse4.h ./simd/Grid_vector_types.h ./simd/Grid_vector_unops.h ./simd/Intel512avx.h ./simd/Intel512wilson.h ./simd/Intel512common.h ./simd/Intel512double.h ./simd/Intel512imci.h ./simd/Intel512single.h ./stencil/Lebesgue.h ./tensors/Tensor_Ta.h ./tensors/Tensor_arith.h ./tensors/Tensor_arith_add.h ./tensors/Tensor_arith_mac.h ./tensors/Tensor_arith_mul.h ./tensors/Tensor_arith_scalar.h ./tensors/Tensor_arith_sub.h ./tensors/Tensor_class.h ./tensors/Tensor_determinant.h ./tensors/Tensor_exp.h ./tensors/Tensor_extract_merge.h ./tensors/Tensor_index.h ./tensors/Tensor_inner.h ./tensors/Tensor_logical.h ./tensors/Tensor_outer.h ./tensors/Tensor_reality.h ./tensors/Tensor_trace.h ./tensors/Tensor_traits.h ./tensors/Tensor_transpose.h ./tensors/Tensor_unary.h - -CCFILES=./Init.cc ./Log.cc ./PerfCount.cc ./algorithms/approx/MultiShiftFunction.cc ./algorithms/approx/Remez.cc ./algorithms/approx/Zolotarev.cc ./pugixml/pugixml.cc ./qcd/action/fermion/CayleyFermion5D.cc ./qcd/action/fermion/ContinuedFractionFermion5D.cc ./qcd/action/fermion/PartialFractionFermion5D.cc ./qcd/action/fermion/WilsonFermion.cc ./qcd/action/fermion/WilsonFermion5D.cc ./qcd/action/fermion/WilsonKernels.cc ./qcd/action/fermion/WilsonKernelsAsm.cc ./qcd/action/fermion/WilsonKernelsHand.cc ./qcd/action/fermion/WilsonTMFermion.cc ./qcd/hmc/HMC.cc ./qcd/spin/Dirac.cc ./qcd/utils/SpaceTimeGrid.cc ./serialisation/BinaryIO.cc ./serialisation/TextIO.cc ./serialisation/XmlIO.cc ./stencil/Lebesgue.cc ./stencil/Stencil_common.cc diff --git a/lib/Makefile.am b/lib/Makefile.am index e626ee22..a779135f 100644 --- a/lib/Makefile.am +++ b/lib/Makefile.am @@ -1,32 +1,37 @@ -# additional include paths necessary to compile the C++ library -AM_CXXFLAGS = -I$(top_srcdir)/ - extra_sources= if BUILD_COMMS_MPI extra_sources+=communicator/Communicator_mpi.cc + extra_sources+=communicator/Communicator_base.cc +endif + +if BUILD_COMMS_MPI3 + extra_sources+=communicator/Communicator_mpi3.cc + extra_sources+=communicator/Communicator_base.cc +endif + +if BUILD_COMMS_MPI3L + extra_sources+=communicator/Communicator_mpi3_leader.cc + extra_sources+=communicator/Communicator_base.cc endif if BUILD_COMMS_SHMEM extra_sources+=communicator/Communicator_shmem.cc + extra_sources+=communicator/Communicator_base.cc endif if BUILD_COMMS_NONE extra_sources+=communicator/Communicator_none.cc + extra_sources+=communicator/Communicator_base.cc endif # # Libraries # - include Make.inc +include Eigen.inc lib_LIBRARIES = libGrid.a -libGrid_a_SOURCES = $(CCFILES) $(extra_sources) - - -# qcd/action/fermion/PartialFractionFermion5D.cc\ \ -# -# Include files -# -nobase_include_HEADERS=$(HFILES) +libGrid_a_SOURCES = $(CCFILES) $(extra_sources) +libGrid_adir = $(pkgincludedir) +nobase_dist_pkginclude_HEADERS = $(HFILES) $(eigen_files) Config.h diff --git a/lib/PerfCount.h b/lib/PerfCount.h index 9ac58883..5ab07c02 100644 --- a/lib/PerfCount.h +++ b/lib/PerfCount.h @@ -43,6 +43,9 @@ Author: paboyle #else #include #endif +#ifdef __x86_64__ +#include +#endif namespace Grid { @@ -86,7 +89,6 @@ inline uint64_t cyclecount(void){ return tmp; } #elif defined __x86_64__ -#include inline uint64_t cyclecount(void){ return __rdtsc(); // unsigned int dummy; diff --git a/lib/Simd.h b/lib/Simd.h index de49cca7..adc2849d 100644 --- a/lib/Simd.h +++ b/lib/Simd.h @@ -1,32 +1,33 @@ - /************************************************************************************* +/************************************************************************************* - Grid physics library, www.github.com/paboyle/Grid +Grid physics library, www.github.com/paboyle/Grid - Source file: ./lib/Simd.h +Source file: ./lib/Simd.h - Copyright (C) 2015 +Copyright (C) 2015 Author: Peter Boyle Author: neo Author: paboyle - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ #ifndef GRID_SIMD_H #define GRID_SIMD_H @@ -118,6 +119,14 @@ namespace Grid { inline ComplexD timesI(const ComplexD &r) { return(r*ComplexD(0.0,1.0));} inline ComplexF timesMinusI(const ComplexF &r){ return(r*ComplexF(0.0,-1.0));} inline ComplexD timesMinusI(const ComplexD &r){ return(r*ComplexD(0.0,-1.0));} + + // define projections to real and imaginay parts + inline ComplexF projReal(const ComplexF &r){return( ComplexF(std::real(r), 0.0));} + inline ComplexD projReal(const ComplexD &r){return( ComplexD(std::real(r), 0.0));} + inline ComplexF projImag(const ComplexF &r){return (ComplexF(std::imag(r), 0.0 ));} + inline ComplexD projImag(const ComplexD &r){return (ComplexD(std::imag(r), 0.0));} + + // define auxiliary functions for complex computations inline void timesI(ComplexF &ret,const ComplexF &r) { ret = timesI(r);} inline void timesI(ComplexD &ret,const ComplexD &r) { ret = timesI(r);} inline void timesMinusI(ComplexF &ret,const ComplexF &r){ ret = timesMinusI(r);} @@ -163,8 +172,8 @@ namespace Grid { }; -#include -#include +#include "simd/Grid_vector_types.h" +#include "simd/Grid_vector_unops.h" namespace Grid { // Default precision @@ -228,6 +237,18 @@ namespace Grid { stream<<">"; return stream; } + inline std::ostream& operator<< (std::ostream& stream, const vInteger &o){ + int nn=vInteger::Nsimd(); + std::vector > buf(nn); + vstore(o,&buf[0]); + stream<<"<"; + for(int i=0;i"; + return stream; + } } diff --git a/lib/Stat.cc b/lib/Stat.cc new file mode 100644 index 00000000..7f2e4086 --- /dev/null +++ b/lib/Stat.cc @@ -0,0 +1,247 @@ +#include +#include +#include + + +namespace Grid { + + +bool PmuStat::pmu_initialized=false; + + +void PmuStat::init(const char *regname) +{ +#ifdef __x86_64__ + name = regname; + if (!pmu_initialized) + { + std::cout<<"initialising pmu"< - #include // subdir aggregate + #include // subdir aggregate ////////////////////////////////////////////////////////////////////////////////////////// // Must not lose sight that goal is to be able to construct really efficient @@ -68,827 +68,894 @@ // ////////////////////////////////////////////////////////////////////////////////////////// - namespace Grid { +namespace Grid { - struct StencilEntry { - uint32_t _offset; - uint32_t _byte_offset; - uint16_t _is_local; - uint16_t _permute; - uint32_t _around_the_world; //256 bits, 32 bytes, 1/2 cacheline - }; +inline void Gather_plane_simple_table_compute (GridBase *grid,int dimension,int plane,int cbmask, + int off,std::vector > & table) +{ + table.resize(0); + int rd = grid->_rdimensions[dimension]; - template - class CartesianStencil { // Stencil runs along coordinate axes only; NO diagonal fill in. - public: - - typedef uint32_t StencilInteger; - typedef typename cobj::vector_type vector_type; - typedef typename cobj::scalar_type scalar_type; - typedef typename cobj::scalar_object scalar_object; - - ////////////////////////////////////////// - // Comms packet queue for asynch thread - ////////////////////////////////////////// - - struct Packet { - void * send_buf; - void * recv_buf; - Integer to_rank; - Integer from_rank; - Integer bytes; - volatile Integer done; - }; - - std::vector Packets; - - #define SEND_IMMEDIATE - #define SERIAL_SENDS - - void AddPacket(void *xmit,void * rcv, Integer to,Integer from,Integer bytes){ - comms_bytes+=2.0*bytes; - #ifdef SEND_IMMEDIATE - commtime-=usecond(); - _grid->SendToRecvFrom(xmit,to,rcv,from,bytes); - commtime+=usecond(); - #endif - Packet p; - p.send_buf = xmit; - p.recv_buf = rcv; - p.to_rank = to; - p.from_rank= from; - p.bytes = bytes; - p.done = 0; - comms_bytes+=2.0*bytes; - Packets.push_back(p); - - } - - #ifdef SERIAL_SENDS - void Communicate(void ) { - commtime-=usecond(); - for(int i=0;iSendToRecvFrom( - Packets[i].send_buf, - Packets[i].to_rank, - Packets[i].recv_buf, - Packets[i].from_rank, - Packets[i].bytes); - #endif - Packets[i].done = 1; - } - commtime+=usecond(); - } - #else - void Communicate(void ) { - typedef CartesianCommunicator::CommsRequest_t CommsRequest_t; - std::vector > reqs(Packets.size()); - commtime-=usecond(); - const int concurrency=2; - for(int i=0;iSendToRecvFromBegin(reqs[j], - Packets[j].send_buf, - Packets[j].to_rank, - Packets[j].recv_buf, - Packets[j].from_rank, - Packets[j].bytes); - #endif - } - } - for(int ii=0;iiSendToRecvFromComplete(reqs[i]); - #endif - } - } - for(int ii=0;ii rpointers; - Integer buffer_size; - Integer packet_id; - }; - - std::vector Mergers; - - void AddMerge(cobj *merge_p,std::vector &rpointers,Integer buffer_size,Integer packet_id) { - Merge m; - m.mpointer = merge_p; - m.rpointers= rpointers; - m.buffer_size = buffer_size; - m.packet_id = packet_id; - #ifdef SEND_IMMEDIATE - mergetime-=usecond(); - PARALLEL_FOR_LOOP - for(int o=0;oCheckerBoarded(dimension) ) { + cbmask = 0x3; + } + int so= plane*grid->_ostride[dimension]; // base offset for start of plane + int e1=grid->_slice_nblock[dimension]; + int e2=grid->_slice_block[dimension]; + int stride=grid->_slice_stride[dimension]; + if ( cbmask == 0x3 ) { + table.resize(e1*e2); + for(int n=0;n(bo+b,o+b); + } + } + } else { + int bo=0; + table.resize(e1*e2/2); + for(int n=0;nCheckerBoardFromOindexTable(o+b); + if ( ocb &cbmask ) { + table[bo]=std::pair(bo,o+b); bo++; } } + } + } +} - //////////////////////////////////////// - // Basic Grid and stencil info - //////////////////////////////////////// +template void +Gather_plane_simple_table (std::vector >& table,const Lattice &rhs,cobj *buffer,compressor &compress, int off,int so) +{ +PARALLEL_FOR_LOOP + for(int i=0;i _directions; - std::vector _distances; - std::vector _comm_buf_size; - std::vector _permute_type; +template +class CartesianStencil { // Stencil runs along coordinate axes only; NO diagonal fill in. + public: - // npoints x Osites() of these - // Flat vector, change layout for cache friendly. - Vector _entries; + typedef CartesianCommunicator::CommsRequest_t CommsRequest_t; + typedef uint32_t StencilInteger; + typedef typename cobj::vector_type vector_type; + typedef typename cobj::scalar_type scalar_type; + typedef typename cobj::scalar_object scalar_object; + + ////////////////////////////////////////// + // Comms packet queue for asynch thread + ////////////////////////////////////////// - inline StencilEntry * GetEntry(int &ptype,int point,int osite) { ptype = _permute_type[point]; return & _entries[point+_npoints*osite]; } + struct Packet { + void * send_buf; + void * recv_buf; + Integer to_rank; + Integer from_rank; + Integer bytes; + }; - void PrecomputeByteOffsets(void){ - for(int i=0;i<_entries.size();i++){ - if( _entries[i]._is_local ) { - _entries[i]._byte_offset = _entries[i]._offset*sizeof(vobj); - } else { - _entries[i]._byte_offset =(uint64_t)&comm_buf[0]+ _entries[i]._offset*sizeof(cobj); - } - } - }; + std::vector Packets; - inline uint64_t Touch(int ent) { - // _mm_prefetch((char *)&_entries[ent],_MM_HINT_T0); - } - inline uint64_t GetInfo(int &ptype,int &local,int &perm,int point,int ent,uint64_t base) { - _mm_prefetch((char *)&_entries[ent+1],_MM_HINT_T0); - local = _entries[ent]._is_local; - perm = _entries[ent]._permute; - if (perm) ptype = _permute_type[point]; - if (local) return base + _entries[ent]._byte_offset; - else return _entries[ent]._byte_offset; - } - inline uint64_t GetPFInfo(int ent,uint64_t base) { - int local = _entries[ent]._is_local; - if (local) return base + _entries[ent]._byte_offset; - else return _entries[ent]._byte_offset; - } + int face_table_computed; + std::vector > > face_table ; + + void AddPacket(void *xmit,void * rcv, Integer to,Integer from,Integer bytes){ + Packet p; + p.send_buf = xmit; + p.recv_buf = rcv; + p.to_rank = to; + p.from_rank= from; + p.bytes = bytes; + comms_bytes+=2.0*bytes; + Packets.push_back(p); + } - // Comms buffers - std::vector > u_simd_send_buf; - std::vector > u_simd_recv_buf; - Vector u_send_buf; - Vector comm_buf; - int u_comm_offset; - int _unified_buffer_size; + void CommunicateBegin(std::vector > &reqs) + { + reqs.resize(Packets.size()); + commtime-=usecond(); + for(int i=0;iStencilSendToRecvFromBegin(reqs[i], + Packets[i].send_buf, + Packets[i].to_rank, + Packets[i].recv_buf, + Packets[i].from_rank, + Packets[i].bytes); + /* + }else{ + _grid->SendToRecvFromBegin(reqs[i], + Packets[i].send_buf, + Packets[i].to_rank, + Packets[i].recv_buf, + Packets[i].from_rank, + Packets[i].bytes); + } + */ + } + commtime+=usecond(); + } + void CommunicateComplete(std::vector > &reqs) + { + commtime-=usecond(); - ///////////////////////////////////////// - // Timing info; ugly; possibly temporary - ///////////////////////////////////////// + for(int i=0;iStencilSendToRecvFromComplete(reqs[i]); + // else + // _grid->SendToRecvFromComplete(reqs[i]); + } + commtime+=usecond(); + } + + /////////////////////////////////////////// + // Simd merge queue for asynch comms + /////////////////////////////////////////// + struct Merge { + cobj * mpointer; + std::vector rpointers; + Integer buffer_size; + Integer packet_id; + }; + + std::vector Mergers; + + void AddMerge(cobj *merge_p,std::vector &rpointers,Integer buffer_size,Integer packet_id) { + Merge m; + m.mpointer = merge_p; + m.rpointers= rpointers; + m.buffer_size = buffer_size; + m.packet_id = packet_id; + Mergers.push_back(m); + } + + void CommsMerge(void ) { + + for(int i=0;i _directions; + std::vector _distances; + std::vector _comm_buf_size; + std::vector _permute_type; + + // npoints x Osites() of these + // Flat vector, change layout for cache friendly. + Vector _entries; + + void PrecomputeByteOffsets(void){ + for(int i=0;i<_entries.size();i++){ + if( _entries[i]._is_local ) { + _entries[i]._byte_offset = _entries[i]._offset*sizeof(vobj); + } else { + _entries[i]._byte_offset = _entries[i]._offset*sizeof(cobj); + } + } + }; + + inline StencilEntry * GetEntry(int &ptype,int point,int osite) { ptype = _permute_type[point]; return & _entries[point+_npoints*osite]; } + inline uint64_t GetInfo(int &ptype,int &local,int &perm,int point,int ent,uint64_t base) { + uint64_t cbase = (uint64_t)&u_recv_buf_p[0]; + local = _entries[ent]._is_local; + perm = _entries[ent]._permute; + if (perm) ptype = _permute_type[point]; + if (local) { + return base + _entries[ent]._byte_offset; + } else { + return cbase + _entries[ent]._byte_offset; + } + } + inline uint64_t GetPFInfo(int ent,uint64_t base) { + uint64_t cbase = (uint64_t)&u_recv_buf_p[0]; + int local = _entries[ent]._is_local; + if (local) return base + _entries[ent]._byte_offset; + else return cbase + _entries[ent]._byte_offset; + } + + /////////////////////////////////////////////////////////// + // Unified Comms buffers for all directions + /////////////////////////////////////////////////////////// + // Vectors that live on the symmetric heap in case of SHMEM + // std::vector > u_simd_send_buf_hide; + // std::vector > u_simd_recv_buf_hide; + // commVector u_send_buf_hide; + // commVector u_recv_buf_hide; + + // These are used; either SHM objects or refs to the above symmetric heap vectors + // depending on comms target + cobj* u_recv_buf_p; + cobj* u_send_buf_p; + std::vector u_simd_send_buf; + std::vector u_simd_recv_buf; + + int u_comm_offset; + int _unified_buffer_size; + + cobj *CommBuf(void) { return u_recv_buf_p; } + + ///////////////////////////////////////// + // Timing info; ugly; possibly temporary + ///////////////////////////////////////// #define TIMING_HACK #ifdef TIMING_HACK - double jointime; - double gathertime; - double commtime; - double halogtime; - double mergetime; - double spintime; - double comms_bytes; - double gathermtime; - double splicetime; - double nosplicetime; + double jointime; + double gathertime; + double commtime; + double halogtime; + double mergetime; + double spintime; + double comms_bytes; + double gathermtime; + double splicetime; + double nosplicetime; + double t_data; + double t_table; + double calls; + + void ZeroCounters(void) { + gathertime = 0.; + jointime = 0.; + commtime = 0.; + halogtime = 0.; + mergetime = 0.; + spintime = 0.; + gathermtime = 0.; + splicetime = 0.; + nosplicetime = 0.; + t_data = 0.0; + t_table= 0.0; + comms_bytes = 0.; + calls = 0.; + }; + + void Report(void) { +#define PRINTIT(A) \ + std::cout << GridLogMessage << " Stencil " << #A << " "<< A/calls< 0. ) { + std::cout << GridLogMessage << " Stencil calls "<1.0){ + PRINTIT(comms_bytes); + PRINTIT(commtime); + std::cout << GridLogMessage << " Stencil " << comms_bytes/commtime/1000. << " GB/s "< &directions, - const std::vector &distances) - : _permute_type(npoints), _comm_buf_size(npoints) - { - #ifdef TIMING_HACK - gathertime=0; - jointime=0; - commtime=0; - halogtime=0; - mergetime=0; - spintime=0; - gathermtime=0; - splicetime=0; - nosplicetime=0; - comms_bytes=0; - #endif - _npoints = npoints; - _grid = grid; - _directions = directions; - _distances = distances; - _unified_buffer_size=0; - - int osites = _grid->oSites(); - - _entries.resize(_npoints* osites); - for(int ii=0;ii_fdimensions[dimension]; - int rd = _grid->_rdimensions[dimension]; - _permute_type[point]=_grid->PermuteType(dimension); - - _checkerboard = checkerboard; - - // the permute type - int simd_layout = _grid->_simd_layout[dimension]; - int comm_dim = _grid->_processors[dimension] >1 ; - int splice_dim = _grid->_simd_layout[dimension]>1 && (comm_dim); - int rotate_dim = _grid->_simd_layout[dimension]>2; - - assert ( (rotate_dim && comm_dim) == false) ; // Do not think spread out is supported - - int sshift[2]; - - // Underlying approach. For each local site build - // up a table containing the npoint "neighbours" and whether they - // live in lattice or a comms buffer. - if ( !comm_dim ) { - sshift[0] = _grid->CheckerBoardShiftForCB(_checkerboard,dimension,shift,Even); - sshift[1] = _grid->CheckerBoardShiftForCB(_checkerboard,dimension,shift,Odd); - - if ( sshift[0] == sshift[1] ) { - Local(point,dimension,shift,0x3); - } else { - Local(point,dimension,shift,0x1);// if checkerboard is unfavourable take two passes - Local(point,dimension,shift,0x2);// both with block stride loop iteration - } - } else { // All permute extract done in comms phase prior to Stencil application - // So tables are the same whether comm_dim or splice_dim - sshift[0] = _grid->CheckerBoardShiftForCB(_checkerboard,dimension,shift,Even); - sshift[1] = _grid->CheckerBoardShiftForCB(_checkerboard,dimension,shift,Odd); - - if ( sshift[0] == sshift[1] ) { - Comms(point,dimension,shift,0x3); - } else { - Comms(point,dimension,shift,0x1);// if checkerboard is unfavourable take two passes - Comms(point,dimension,shift,0x2);// both with block stride loop iteration - } - } - } - u_send_buf.resize(_unified_buffer_size); - comm_buf.resize(_unified_buffer_size); - - PrecomputeByteOffsets(); - - const int Nsimd = grid->Nsimd(); - u_simd_send_buf.resize(Nsimd); - u_simd_recv_buf.resize(Nsimd); - for(int l=0;l_fdimensions[dimension]; - int rd = _grid->_rdimensions[dimension]; - int ld = _grid->_ldimensions[dimension]; - int gd = _grid->_gdimensions[dimension]; - int ly = _grid->_simd_layout[dimension]; - - // Map to always positive shift modulo global full dimension. - int shift = (shiftpm+fd)%fd; - - // the permute type - int permute_dim =_grid->PermuteDim(dimension); - - for(int x=0;x_ostride[dimension]; - - int cb= (cbmask==0x2)? Odd : Even; - - int sshift = _grid->CheckerBoardShiftForCB(_checkerboard,dimension,shift,cb); - int sx = (x+sshift)%rd; - - int wraparound=0; - if ( (shiftpm==-1) && (sx>x) ) { - wraparound = 1; - } - if ( (shiftpm== 1) && (sxNsimd(); - - int fd = _grid->_fdimensions[dimension]; - int ld = _grid->_ldimensions[dimension]; - int rd = _grid->_rdimensions[dimension]; - int pd = _grid->_processors[dimension]; - int simd_layout = _grid->_simd_layout[dimension]; - int comm_dim = _grid->_processors[dimension] >1 ; - - assert(comm_dim==1); - int shift = (shiftpm + fd) %fd; - assert(shift>=0); - assert(shift_slice_nblock[dimension]*_grid->_slice_block[dimension]; // done in reduced dims, so SIMD factored - - _comm_buf_size[point] = buffer_size; // Size of _one_ plane. Multiple planes may be gathered and - // send to one or more remote nodes. - - int cb= (cbmask==0x2)? Odd : Even; - int sshift= _grid->CheckerBoardShiftForCB(_checkerboard,dimension,shift,cb); - - for(int x=0;xPermuteType(dimension); - - int sx = (x+sshift)%rd; - - int offnode = 0; - if ( simd_layout > 1 ) { - - for(int i=0;i>(permute_type+1)); - int ic= (i&inner_bit)? 1:0; - int my_coor = rd*ic + x; - int nbr_coor = my_coor+sshift; - int nbr_proc = ((nbr_coor)/ld) % pd;// relative shift in processors - - if ( nbr_proc ) { - offnode =1; - } - } - - } else { - int comm_proc = ((x+sshift)/rd)%pd; - offnode = (comm_proc!= 0); - } - - - int wraparound=0; - if ( (shiftpm==-1) && (sx>x) && (grid->_processor_coor[dimension]==0) ) { - wraparound = 1; - } - if ( (shiftpm== 1) && (sx_processor_coor[dimension]==grid->_processors[dimension]-1) ) { - wraparound = 1; - } - if (!offnode) { - - int permute_slice=0; - CopyPlane(point,dimension,x,sx,cbmask,permute_slice,wraparound); - - } else { - - int words = buffer_size; - if (cbmask != 0x3) words=words>>1; - - int rank = grid->_processor; - int recv_from_rank; - int xmit_to_rank; - - int unified_buffer_offset = _unified_buffer_size; - _unified_buffer_size += words; - - ScatterPlane(point,dimension,x,cbmask,unified_buffer_offset,wraparound); // permute/extract/merge is done in comms phase - - } - } - } - // Routine builds up integer table for each site in _offsets, _is_local, _permute - void CopyPlane(int point, int dimension,int lplane,int rplane,int cbmask,int permute,int wrap) - { - int rd = _grid->_rdimensions[dimension]; - - if ( !_grid->CheckerBoarded(dimension) ) { - - int o = 0; // relative offset to base within plane - int ro = rplane*_grid->_ostride[dimension]; // base offset for start of plane - int lo = lplane*_grid->_ostride[dimension]; // offset in buffer - - // Simple block stride gather of SIMD objects - for(int n=0;n<_grid->_slice_nblock[dimension];n++){ - for(int b=0;b<_grid->_slice_block[dimension];b++){ - int idx=point+(lo+o+b)*_npoints; - _entries[idx]._offset =ro+o+b; - _entries[idx]._permute=permute; - _entries[idx]._is_local=1; - _entries[idx]._around_the_world=wrap; - } - o +=_grid->_slice_stride[dimension]; - } - - } else { - - int ro = rplane*_grid->_ostride[dimension]; // base offset for start of plane - int lo = lplane*_grid->_ostride[dimension]; // base offset for start of plane - int o = 0; // relative offset to base within plane - - for(int n=0;n<_grid->_slice_nblock[dimension];n++){ - for(int b=0;b<_grid->_slice_block[dimension];b++){ - - int ocb=1<<_grid->CheckerBoardFromOindex(o+b); - - if ( ocb&cbmask ) { - int idx = point+(lo+o+b)*_npoints; - _entries[idx]._offset =ro+o+b; - _entries[idx]._is_local=1; - _entries[idx]._permute=permute; - _entries[idx]._around_the_world=wrap; - } - - } - o +=_grid->_slice_stride[dimension]; - } - - } - } - // Routine builds up integer table for each site in _offsets, _is_local, _permute - void ScatterPlane (int point,int dimension,int plane,int cbmask,int offset, int wrap) - { - int rd = _grid->_rdimensions[dimension]; - - if ( !_grid->CheckerBoarded(dimension) ) { - - int so = plane*_grid->_ostride[dimension]; // base offset for start of plane - int o = 0; // relative offset to base within plane - int bo = 0; // offset in buffer - - // Simple block stride gather of SIMD objects - for(int n=0;n<_grid->_slice_nblock[dimension];n++){ - for(int b=0;b<_grid->_slice_block[dimension];b++){ - int idx=point+(so+o+b)*_npoints; - _entries[idx]._offset =offset+(bo++); - _entries[idx]._is_local=0; - _entries[idx]._permute=0; - _entries[idx]._around_the_world=wrap; - } - o +=_grid->_slice_stride[dimension]; - } - - } else { - - int so = plane*_grid->_ostride[dimension]; // base offset for start of plane - int o = 0; // relative offset to base within plane - int bo = 0; // offset in buffer - - for(int n=0;n<_grid->_slice_nblock[dimension];n++){ - for(int b=0;b<_grid->_slice_block[dimension];b++){ - - int ocb=1<<_grid->CheckerBoardFromOindex(o+b);// Could easily be a table lookup - if ( ocb & cbmask ) { - int idx = point+(so+o+b)*_npoints; - _entries[idx]._offset =offset+(bo++); - _entries[idx]._is_local=0; - _entries[idx]._permute =0; - _entries[idx]._around_the_world=wrap; - } - } - o +=_grid->_slice_stride[dimension]; - } - } - } - - - - template - void HaloExchange(const Lattice &source,compressor &compress) - { - Mergers.resize(0); - Packets.resize(0); - HaloGather(source,compress); - this->Communicate(); - CommsMerge(); // spins - } -#if 0 - // Overlapping comms and compute typically slows down compute and is useless - // unless memory bandwidth greatly exceeds network - template - std::thread HaloExchangeBegin(const Lattice &source,compressor &compress) { - Mergers.resize(0); - Packets.resize(0); - HaloGather(source,compress); - return std::thread([&] { this->Communicate(); }); - } - void HaloExchangeComplete(std::thread &thr) - { - CommsMerge(); // spins - jointime-=usecond(); - thr.join(); - jointime+=usecond(); - } -#endif - template - void HaloGatherDir(const Lattice &source,compressor &compress,int point) - { - int dimension = _directions[point]; - int displacement = _distances[point]; - - int fd = _grid->_fdimensions[dimension]; - int rd = _grid->_rdimensions[dimension]; - - - // Map to always positive shift modulo global full dimension. - int shift = (displacement+fd)%fd; - - // int checkerboard = _grid->CheckerBoardDestination(source.checkerboard,shift); - assert (source.checkerboard== _checkerboard); - - // the permute type - int simd_layout = _grid->_simd_layout[dimension]; - int comm_dim = _grid->_processors[dimension] >1 ; - int splice_dim = _grid->_simd_layout[dimension]>1 && (comm_dim); - - // Gather phase - int sshift [2]; - if ( comm_dim ) { - sshift[0] = _grid->CheckerBoardShiftForCB(_checkerboard,dimension,shift,Even); - sshift[1] = _grid->CheckerBoardShiftForCB(_checkerboard,dimension,shift,Odd); - if ( sshift[0] == sshift[1] ) { - if (splice_dim) { - splicetime-=usecond(); - GatherSimd(source,dimension,shift,0x3,compress); - splicetime+=usecond(); - } else { - nosplicetime-=usecond(); - Gather(source,dimension,shift,0x3,compress); - nosplicetime+=usecond(); - } - } else { - if(splice_dim){ - splicetime-=usecond(); - GatherSimd(source,dimension,shift,0x1,compress);// if checkerboard is unfavourable take two passes - GatherSimd(source,dimension,shift,0x2,compress);// both with block stride loop iteration - splicetime+=usecond(); - } else { - nosplicetime-=usecond(); - Gather(source,dimension,shift,0x1,compress); - Gather(source,dimension,shift,0x2,compress); - nosplicetime+=usecond(); - } - } - } - } - - template - void HaloGather(const Lattice &source,compressor &compress) - { - // conformable(source._grid,_grid); - assert(source._grid==_grid); - halogtime-=usecond(); - - assert (comm_buf.size() == _unified_buffer_size ); - u_comm_offset=0; - - // Gather all comms buffers - for(int point = 0 ; point < _npoints; point++) { - compress.Point(point); - HaloGatherDir(source,compress,point); - } - - assert(u_comm_offset==_unified_buffer_size); - halogtime+=usecond(); - } - - template - void Gather(const Lattice &rhs,int dimension,int shift,int cbmask,compressor & compress) - { - typedef typename cobj::vector_type vector_type; - typedef typename cobj::scalar_type scalar_type; - - GridBase *grid=_grid; - assert(rhs._grid==_grid); - // conformable(_grid,rhs._grid); - - int fd = _grid->_fdimensions[dimension]; - int rd = _grid->_rdimensions[dimension]; - int pd = _grid->_processors[dimension]; - int simd_layout = _grid->_simd_layout[dimension]; - int comm_dim = _grid->_processors[dimension] >1 ; - assert(simd_layout==1); - assert(comm_dim==1); - assert(shift>=0); - assert(shift_slice_nblock[dimension]*_grid->_slice_block[dimension]; - - int cb= (cbmask==0x2)? Odd : Even; - int sshift= _grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,cb); - - for(int x=0;x>1; - - int bytes = words * sizeof(cobj); - - gathertime-=usecond(); - Gather_plane_simple (rhs,u_send_buf,dimension,sx,cbmask,compress,u_comm_offset); - gathertime+=usecond(); - - int rank = _grid->_processor; - int recv_from_rank; - int xmit_to_rank; - _grid->ShiftedRanks(dimension,comm_proc,xmit_to_rank,recv_from_rank); - assert (xmit_to_rank != _grid->ThisRank()); - assert (recv_from_rank != _grid->ThisRank()); - - // FIXME Implement asynchronous send & also avoid buffer copy - AddPacket((void *)&u_send_buf[u_comm_offset], - (void *) &comm_buf[u_comm_offset], - xmit_to_rank, - recv_from_rank, - bytes); - - u_comm_offset+=words; - } - } - } - - - template - void GatherSimd(const Lattice &rhs,int dimension,int shift,int cbmask,compressor &compress) - { - const int Nsimd = _grid->Nsimd(); - - int fd = _grid->_fdimensions[dimension]; - int rd = _grid->_rdimensions[dimension]; - int ld = _grid->_ldimensions[dimension]; - int pd = _grid->_processors[dimension]; - int simd_layout = _grid->_simd_layout[dimension]; - int comm_dim = _grid->_processors[dimension] >1 ; - - assert(comm_dim==1); - // This will not work with a rotate dim - assert(simd_layout==2); - assert(shift>=0); - assert(shiftPermuteType(dimension); - - /////////////////////////////////////////////// - // Simd direction uses an extract/merge pair - /////////////////////////////////////////////// - int buffer_size = _grid->_slice_nblock[dimension]*_grid->_slice_block[dimension]; - int words = sizeof(cobj)/sizeof(vector_type); - - assert(cbmask==0x3); // Fixme think there is a latent bug if not true - - int bytes = buffer_size*sizeof(scalar_object); - - std::vector rpointers(Nsimd); - std::vector spointers(Nsimd); - - /////////////////////////////////////////// - // Work out what to send where - /////////////////////////////////////////// - - int cb = (cbmask==0x2)? Odd : Even; - int sshift= _grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,cb); - - // loop over outer coord planes orthog to dim - for(int x=0;x= rd ); - - if ( any_offnode ) { - - for(int i=0;i(rhs,spointers,dimension,sx,cbmask,compress); - gathermtime+=usecond(); - - for(int i=0;i2 - // std::cout << "GatherSimd : lane 1st elem " << i << u_simd_send_buf[i ][u_comm_offset]<>(permute_type+1)); - int ic= (i&inner_bit)? 1:0; - - int my_coor = rd*ic + x; - int nbr_coor = my_coor+sshift; - int nbr_proc = ((nbr_coor)/ld) % pd;// relative shift in processors - int nbr_lcoor= (nbr_coor%ld); - int nbr_ic = (nbr_lcoor)/rd; // inner coord of peer - int nbr_ox = (nbr_lcoor%rd); // outer coord of peer - int nbr_lane = (i&(~inner_bit)); - - if (nbr_ic) nbr_lane|=inner_bit; - assert (sx == nbr_ox); - - auto rp = &u_simd_recv_buf[i ][u_comm_offset]; - auto sp = &u_simd_send_buf[nbr_lane][u_comm_offset]; - - void *vrp = (void *)rp; - void *vsp = (void *)sp; - - - if(nbr_proc){ - - int recv_from_rank; - int xmit_to_rank; - - _grid->ShiftedRanks(dimension,nbr_proc,xmit_to_rank,recv_from_rank); - - AddPacket( vsp,vrp,xmit_to_rank,recv_from_rank,bytes); - - rpointers[i] = rp; - - } else { - - rpointers[i] = sp; - - } - } - - AddMerge(&comm_buf[u_comm_offset],rpointers,buffer_size,Packets.size()-1); - - u_comm_offset +=buffer_size; - } - } - } - - }; - } + CartesianStencil(GridBase *grid, + int npoints, + int checkerboard, + const std::vector &directions, + const std::vector &distances) + : _permute_type(npoints), _comm_buf_size(npoints) + { + face_table_computed=0; + _npoints = npoints; + _grid = grid; + _directions = directions; + _distances = distances; + _unified_buffer_size=0; + + int osites = _grid->oSites(); + + _entries.resize(_npoints* osites); + for(int ii=0;ii_fdimensions[dimension]; + int rd = _grid->_rdimensions[dimension]; + _permute_type[point]=_grid->PermuteType(dimension); + + _checkerboard = checkerboard; + + // the permute type + int simd_layout = _grid->_simd_layout[dimension]; + int comm_dim = _grid->_processors[dimension] >1 ; + int splice_dim = _grid->_simd_layout[dimension]>1 && (comm_dim); + int rotate_dim = _grid->_simd_layout[dimension]>2; + + assert ( (rotate_dim && comm_dim) == false) ; // Do not think spread out is supported + + int sshift[2]; + + // Underlying approach. For each local site build + // up a table containing the npoint "neighbours" and whether they + // live in lattice or a comms buffer. + if ( !comm_dim ) { + sshift[0] = _grid->CheckerBoardShiftForCB(_checkerboard,dimension,shift,Even); + sshift[1] = _grid->CheckerBoardShiftForCB(_checkerboard,dimension,shift,Odd); + + if ( sshift[0] == sshift[1] ) { + Local(point,dimension,shift,0x3); + } else { + Local(point,dimension,shift,0x1);// if checkerboard is unfavourable take two passes + Local(point,dimension,shift,0x2);// both with block stride loop iteration + } + } else { // All permute extract done in comms phase prior to Stencil application + // So tables are the same whether comm_dim or splice_dim + sshift[0] = _grid->CheckerBoardShiftForCB(_checkerboard,dimension,shift,Even); + sshift[1] = _grid->CheckerBoardShiftForCB(_checkerboard,dimension,shift,Odd); + + if ( sshift[0] == sshift[1] ) { + Comms(point,dimension,shift,0x3); + } else { + Comms(point,dimension,shift,0x1);// if checkerboard is unfavourable take two passes + Comms(point,dimension,shift,0x2);// both with block stride loop iteration + } + } + } + + ///////////////////////////////////////////////////////////////////////////////// + // Try to allocate for receiving in a shared memory region, fall back to buffer + ///////////////////////////////////////////////////////////////////////////////// + const int Nsimd = grid->Nsimd(); + + _grid->ShmBufferFreeAll(); + + u_simd_send_buf.resize(Nsimd); + u_simd_recv_buf.resize(Nsimd); + + u_send_buf_p=(cobj *)_grid->ShmBufferMalloc(_unified_buffer_size*sizeof(cobj)); + u_recv_buf_p=(cobj *)_grid->ShmBufferMalloc(_unified_buffer_size*sizeof(cobj)); + for(int l=0;lShmBufferMalloc(_unified_buffer_size*sizeof(scalar_object)); + u_simd_send_buf[l] = (scalar_object *)_grid->ShmBufferMalloc(_unified_buffer_size*sizeof(scalar_object)); + } + + PrecomputeByteOffsets(); + } + + void Local (int point, int dimension,int shiftpm,int cbmask) + { + int fd = _grid->_fdimensions[dimension]; + int rd = _grid->_rdimensions[dimension]; + int ld = _grid->_ldimensions[dimension]; + int gd = _grid->_gdimensions[dimension]; + int ly = _grid->_simd_layout[dimension]; + + // Map to always positive shift modulo global full dimension. + int shift = (shiftpm+fd)%fd; + + // the permute type + int permute_dim =_grid->PermuteDim(dimension); + + for(int x=0;x_ostride[dimension]; + + int cb= (cbmask==0x2)? Odd : Even; + + int sshift = _grid->CheckerBoardShiftForCB(_checkerboard,dimension,shift,cb); + int sx = (x+sshift)%rd; + + int wraparound=0; + if ( (shiftpm==-1) && (sx>x) ) { + wraparound = 1; + } + if ( (shiftpm== 1) && (sxNsimd(); + + int fd = _grid->_fdimensions[dimension]; + int ld = _grid->_ldimensions[dimension]; + int rd = _grid->_rdimensions[dimension]; + int pd = _grid->_processors[dimension]; + int simd_layout = _grid->_simd_layout[dimension]; + int comm_dim = _grid->_processors[dimension] >1 ; + + assert(comm_dim==1); + int shift = (shiftpm + fd) %fd; + assert(shift>=0); + assert(shift_slice_nblock[dimension]*_grid->_slice_block[dimension]; // done in reduced dims, so SIMD factored + + _comm_buf_size[point] = buffer_size; // Size of _one_ plane. Multiple planes may be gathered and + // send to one or more remote nodes. + + int cb= (cbmask==0x2)? Odd : Even; + int sshift= _grid->CheckerBoardShiftForCB(_checkerboard,dimension,shift,cb); + + for(int x=0;xPermuteType(dimension); + + int sx = (x+sshift)%rd; + + int offnode = 0; + if ( simd_layout > 1 ) { + + for(int i=0;i>(permute_type+1)); + int ic= (i&inner_bit)? 1:0; + int my_coor = rd*ic + x; + int nbr_coor = my_coor+sshift; + int nbr_proc = ((nbr_coor)/ld) % pd;// relative shift in processors + + if ( nbr_proc ) { + offnode =1; + } + } + + } else { + int comm_proc = ((x+sshift)/rd)%pd; + offnode = (comm_proc!= 0); + } + + + int wraparound=0; + if ( (shiftpm==-1) && (sx>x) && (grid->_processor_coor[dimension]==0) ) { + wraparound = 1; + } + if ( (shiftpm== 1) && (sx_processor_coor[dimension]==grid->_processors[dimension]-1) ) { + wraparound = 1; + } + if (!offnode) { + + int permute_slice=0; + CopyPlane(point,dimension,x,sx,cbmask,permute_slice,wraparound); + + } else { + + int words = buffer_size; + if (cbmask != 0x3) words=words>>1; + + int rank = grid->_processor; + int recv_from_rank; + int xmit_to_rank; + + int unified_buffer_offset = _unified_buffer_size; + _unified_buffer_size += words; + + ScatterPlane(point,dimension,x,cbmask,unified_buffer_offset,wraparound); // permute/extract/merge is done in comms phase + + } + } + } + // Routine builds up integer table for each site in _offsets, _is_local, _permute + void CopyPlane(int point, int dimension,int lplane,int rplane,int cbmask,int permute,int wrap) + { + int rd = _grid->_rdimensions[dimension]; + + if ( !_grid->CheckerBoarded(dimension) ) { + + int o = 0; // relative offset to base within plane + int ro = rplane*_grid->_ostride[dimension]; // base offset for start of plane + int lo = lplane*_grid->_ostride[dimension]; // offset in buffer + + // Simple block stride gather of SIMD objects + for(int n=0;n<_grid->_slice_nblock[dimension];n++){ + for(int b=0;b<_grid->_slice_block[dimension];b++){ + int idx=point+(lo+o+b)*_npoints; + _entries[idx]._offset =ro+o+b; + _entries[idx]._permute=permute; + _entries[idx]._is_local=1; + _entries[idx]._around_the_world=wrap; + } + o +=_grid->_slice_stride[dimension]; + } + + } else { + + int ro = rplane*_grid->_ostride[dimension]; // base offset for start of plane + int lo = lplane*_grid->_ostride[dimension]; // base offset for start of plane + int o = 0; // relative offset to base within plane + + for(int n=0;n<_grid->_slice_nblock[dimension];n++){ + for(int b=0;b<_grid->_slice_block[dimension];b++){ + + int ocb=1<<_grid->CheckerBoardFromOindex(o+b); + + if ( ocb&cbmask ) { + int idx = point+(lo+o+b)*_npoints; + _entries[idx]._offset =ro+o+b; + _entries[idx]._is_local=1; + _entries[idx]._permute=permute; + _entries[idx]._around_the_world=wrap; + } + + } + o +=_grid->_slice_stride[dimension]; + } + + } + } + // Routine builds up integer table for each site in _offsets, _is_local, _permute + void ScatterPlane (int point,int dimension,int plane,int cbmask,int offset, int wrap) + { + int rd = _grid->_rdimensions[dimension]; + + if ( !_grid->CheckerBoarded(dimension) ) { + + int so = plane*_grid->_ostride[dimension]; // base offset for start of plane + int o = 0; // relative offset to base within plane + int bo = 0; // offset in buffer + + // Simple block stride gather of SIMD objects + for(int n=0;n<_grid->_slice_nblock[dimension];n++){ + for(int b=0;b<_grid->_slice_block[dimension];b++){ + int idx=point+(so+o+b)*_npoints; + _entries[idx]._offset =offset+(bo++); + _entries[idx]._is_local=0; + _entries[idx]._permute=0; + _entries[idx]._around_the_world=wrap; + } + o +=_grid->_slice_stride[dimension]; + } + + } else { + + int so = plane*_grid->_ostride[dimension]; // base offset for start of plane + int o = 0; // relative offset to base within plane + int bo = 0; // offset in buffer + + for(int n=0;n<_grid->_slice_nblock[dimension];n++){ + for(int b=0;b<_grid->_slice_block[dimension];b++){ + + int ocb=1<<_grid->CheckerBoardFromOindex(o+b);// Could easily be a table lookup + if ( ocb & cbmask ) { + int idx = point+(so+o+b)*_npoints; + _entries[idx]._offset =offset+(bo++); + _entries[idx]._is_local=0; + _entries[idx]._permute =0; + _entries[idx]._around_the_world=wrap; + } + } + o +=_grid->_slice_stride[dimension]; + } + } + } + + template void HaloExchange(const Lattice &source,compressor &compress) + { + std::vector > reqs; + calls++; + Mergers.resize(0); + Packets.resize(0); + _grid->StencilBarrier(); + HaloGather(source,compress); + this->CommunicateBegin(reqs); + _grid->StencilBarrier(); + this->CommunicateComplete(reqs); + _grid->StencilBarrier(); + CommsMerge(); // spins + } + + template void HaloGatherDir(const Lattice &source,compressor &compress,int point,int & face_idx) + { + int dimension = _directions[point]; + int displacement = _distances[point]; + + int fd = _grid->_fdimensions[dimension]; + int rd = _grid->_rdimensions[dimension]; + + + // Map to always positive shift modulo global full dimension. + int shift = (displacement+fd)%fd; + + // int checkerboard = _grid->CheckerBoardDestination(source.checkerboard,shift); + assert (source.checkerboard== _checkerboard); + + // the permute type + int simd_layout = _grid->_simd_layout[dimension]; + int comm_dim = _grid->_processors[dimension] >1 ; + int splice_dim = _grid->_simd_layout[dimension]>1 && (comm_dim); + + // Gather phase + int sshift [2]; + if ( comm_dim ) { + sshift[0] = _grid->CheckerBoardShiftForCB(_checkerboard,dimension,shift,Even); + sshift[1] = _grid->CheckerBoardShiftForCB(_checkerboard,dimension,shift,Odd); + if ( sshift[0] == sshift[1] ) { + if (splice_dim) { + splicetime-=usecond(); + GatherSimd(source,dimension,shift,0x3,compress,face_idx); + splicetime+=usecond(); + } else { + nosplicetime-=usecond(); + Gather(source,dimension,shift,0x3,compress,face_idx); + nosplicetime+=usecond(); + } + } else { + if(splice_dim){ + splicetime-=usecond(); + GatherSimd(source,dimension,shift,0x1,compress,face_idx);// if checkerboard is unfavourable take two passes + GatherSimd(source,dimension,shift,0x2,compress,face_idx);// both with block stride loop iteration + splicetime+=usecond(); + } else { + nosplicetime-=usecond(); + Gather(source,dimension,shift,0x1,compress,face_idx); + Gather(source,dimension,shift,0x2,compress,face_idx); + nosplicetime+=usecond(); + } + } + } + } + + template + void HaloGather(const Lattice &source,compressor &compress) + { + // conformable(source._grid,_grid); + assert(source._grid==_grid); + halogtime-=usecond(); + + u_comm_offset=0; + + // Gather all comms buffers + int face_idx=0; + for(int point = 0 ; point < _npoints; point++) { + compress.Point(point); + HaloGatherDir(source,compress,point,face_idx); + } + face_table_computed=1; + + assert(u_comm_offset==_unified_buffer_size); + halogtime+=usecond(); + } + + template + void Gather(const Lattice &rhs,int dimension,int shift,int cbmask,compressor & compress,int &face_idx) + { + typedef typename cobj::vector_type vector_type; + typedef typename cobj::scalar_type scalar_type; + + GridBase *grid=_grid; + assert(rhs._grid==_grid); + // conformable(_grid,rhs._grid); + + int fd = _grid->_fdimensions[dimension]; + int rd = _grid->_rdimensions[dimension]; + int pd = _grid->_processors[dimension]; + int simd_layout = _grid->_simd_layout[dimension]; + int comm_dim = _grid->_processors[dimension] >1 ; + assert(simd_layout==1); + assert(comm_dim==1); + assert(shift>=0); + assert(shift_slice_nblock[dimension]*_grid->_slice_block[dimension]; + + int cb= (cbmask==0x2)? Odd : Even; + int sshift= _grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,cb); + + for(int x=0;x>1; + + int bytes = words * sizeof(cobj); + + gathertime-=usecond(); + int so = sx*rhs._grid->_ostride[dimension]; // base offset for start of plane + if ( !face_table_computed ) { + t_table-=usecond(); + face_table.resize(face_idx+1); + Gather_plane_simple_table_compute ((GridBase *)_grid,dimension,sx,cbmask,u_comm_offset, + face_table[face_idx]); + t_table+=usecond(); + } + + + int rank = _grid->_processor; + int recv_from_rank; + int xmit_to_rank; + _grid->ShiftedRanks(dimension,comm_proc,xmit_to_rank,recv_from_rank); + + assert (xmit_to_rank != _grid->ThisRank()); + assert (recv_from_rank != _grid->ThisRank()); + + ///////////////////////////////////////////////////////// + // try the direct copy if possible + ///////////////////////////////////////////////////////// + + + cobj *send_buf = (cobj *)_grid->ShmBufferTranslate(xmit_to_rank,u_recv_buf_p); + if ( send_buf==NULL ) { + send_buf = u_send_buf_p; + } + // std::cout << " send_bufs "< + void GatherSimd(const Lattice &rhs,int dimension,int shift,int cbmask,compressor &compress,int & face_idx) + { + const int Nsimd = _grid->Nsimd(); + + int fd = _grid->_fdimensions[dimension]; + int rd = _grid->_rdimensions[dimension]; + int ld = _grid->_ldimensions[dimension]; + int pd = _grid->_processors[dimension]; + int simd_layout = _grid->_simd_layout[dimension]; + int comm_dim = _grid->_processors[dimension] >1 ; + + assert(comm_dim==1); + // This will not work with a rotate dim + assert(simd_layout==2); + assert(shift>=0); + assert(shiftPermuteType(dimension); + + /////////////////////////////////////////////// + // Simd direction uses an extract/merge pair + /////////////////////////////////////////////// + int buffer_size = _grid->_slice_nblock[dimension]*_grid->_slice_block[dimension]; + int words = sizeof(cobj)/sizeof(vector_type); + + assert(cbmask==0x3); // Fixme think there is a latent bug if not true + + int bytes = buffer_size*sizeof(scalar_object); + + std::vector rpointers(Nsimd); + std::vector spointers(Nsimd); + + /////////////////////////////////////////// + // Work out what to send where + /////////////////////////////////////////// + + int cb = (cbmask==0x2)? Odd : Even; + int sshift= _grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,cb); + + // loop over outer coord planes orthog to dim + for(int x=0;x= rd ); + + if ( any_offnode ) { + + for(int i=0;i(rhs,spointers,dimension,sx,cbmask,compress); + gathermtime+=usecond(); + + for(int i=0;i2 + // std::cout << "GatherSimd : lane 1st elem " << i << u_simd_send_buf[i ][u_comm_offset]<>(permute_type+1)); + int ic= (i&inner_bit)? 1:0; + + int my_coor = rd*ic + x; + int nbr_coor = my_coor+sshift; + int nbr_proc = ((nbr_coor)/ld) % pd;// relative shift in processors + int nbr_lcoor= (nbr_coor%ld); + int nbr_ic = (nbr_lcoor)/rd; // inner coord of peer + int nbr_ox = (nbr_lcoor%rd); // outer coord of peer + int nbr_lane = (i&(~inner_bit)); + + if (nbr_ic) nbr_lane|=inner_bit; + assert (sx == nbr_ox); + + auto rp = &u_simd_recv_buf[i ][u_comm_offset]; + auto sp = &u_simd_send_buf[nbr_lane][u_comm_offset]; + + if(nbr_proc){ + + int recv_from_rank; + int xmit_to_rank; + + _grid->ShiftedRanks(dimension,nbr_proc,xmit_to_rank,recv_from_rank); + + scalar_object *shm = (scalar_object *) _grid->ShmBufferTranslate(recv_from_rank,sp); + // if ((ShmDirectCopy==0)||(shm==NULL)) { + if (shm==NULL) { + shm = rp; + } + + // if Direct, StencilSendToRecvFrom will suppress copy to a peer on node + // assuming above pointer flip + AddPacket((void *)sp,(void *)rp,xmit_to_rank,recv_from_rank,bytes); + + rpointers[i] = shm; + + } else { + + rpointers[i] = sp; + + } + } + + AddMerge(&u_recv_buf_p[u_comm_offset],rpointers,buffer_size,Packets.size()-1); + + u_comm_offset +=buffer_size; + } + } + } + +}; +} #endif diff --git a/lib/Tensors.h b/lib/Tensors.h index cd7a68b2..b94f22e3 100644 --- a/lib/Tensors.h +++ b/lib/Tensors.h @@ -30,22 +30,22 @@ Author: neo #ifndef GRID_MATH_H #define GRID_MATH_H -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -//#include -//#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +//#include +//#include +#include +#include +#include +#include #endif diff --git a/lib/Threads.h b/lib/Threads.h index 9d1295e5..2f270b73 100644 --- a/lib/Threads.h +++ b/lib/Threads.h @@ -37,11 +37,20 @@ Author: paboyle #ifdef GRID_OMP #include -#define PARALLEL_FOR_LOOP _Pragma("omp parallel for ") -#define PARALLEL_NESTED_LOOP2 _Pragma("omp parallel for collapse(2)") +#ifdef GRID_NUMA +#define PARALLEL_FOR_LOOP _Pragma("omp parallel for schedule(static)") +#define PARALLEL_FOR_LOOP_INTERN _Pragma("omp for schedule(static)") #else -#define PARALLEL_FOR_LOOP +#define PARALLEL_FOR_LOOP _Pragma("omp parallel for schedule(runtime)") +#define PARALLEL_FOR_LOOP_INTERN _Pragma("omp for schedule(runtime)") +#endif +#define PARALLEL_NESTED_LOOP2 _Pragma("omp parallel for collapse(2)") +#define PARALLEL_REGION _Pragma("omp parallel") +#else +#define PARALLEL_FOR_LOOP +#define PARALLEL_FOR_LOOP_INTERN #define PARALLEL_NESTED_LOOP2 +#define PARALLEL_REGION #endif namespace Grid { @@ -123,6 +132,22 @@ class GridThread { ThreadBarrier(); }; + static void bcopy(const void *src, void *dst, size_t len) { +#ifdef GRID_OMP +#pragma omp parallel + { + const char *c_src =(char *) src; + char *c_dest=(char *) dst; + int me,mywork,myoff; + GridThread::GetWorkBarrier(len,me, mywork,myoff); + bcopy(&c_src[myoff],&c_dest[myoff],mywork); + } +#else + bcopy(src,dst,len); +#endif + } + + }; } diff --git a/lib/algorithms/CoarsenedMatrix.h b/lib/algorithms/CoarsenedMatrix.h index a757e258..fd9acc91 100644 --- a/lib/algorithms/CoarsenedMatrix.h +++ b/lib/algorithms/CoarsenedMatrix.h @@ -31,7 +31,6 @@ Author: paboyle #ifndef GRID_ALGORITHM_COARSENED_MATRIX_H #define GRID_ALGORITHM_COARSENED_MATRIX_H -#include namespace Grid { @@ -283,7 +282,7 @@ PARALLEL_FOR_LOOP } else if(SE->_is_local) { nbr = in._odata[SE->_offset]; } else { - nbr = Stencil.comm_buf[SE->_offset]; + nbr = Stencil.CommBuf()[SE->_offset]; } res = res + A[point]._odata[ss]*nbr; } diff --git a/lib/algorithms/SparseMatrix.h b/lib/algorithms/SparseMatrix.h index 4fc1a3ad..1611a6f4 100644 --- a/lib/algorithms/SparseMatrix.h +++ b/lib/algorithms/SparseMatrix.h @@ -28,7 +28,6 @@ Author: Peter Boyle #ifndef GRID_ALGORITHM_SPARSE_MATRIX_H #define GRID_ALGORITHM_SPARSE_MATRIX_H -#include namespace Grid { diff --git a/lib/algorithms/approx/Chebyshev.h b/lib/algorithms/approx/Chebyshev.h index 96a75a92..6837ae99 100644 --- a/lib/algorithms/approx/Chebyshev.h +++ b/lib/algorithms/approx/Chebyshev.h @@ -29,8 +29,7 @@ Author: paboyle #ifndef GRID_CHEBYSHEV_H #define GRID_CHEBYSHEV_H -#include -#include +#include namespace Grid { diff --git a/lib/algorithms/approx/Remez.h b/lib/algorithms/approx/Remez.h index 4a56d5d2..31938779 100644 --- a/lib/algorithms/approx/Remez.h +++ b/lib/algorithms/approx/Remez.h @@ -18,10 +18,10 @@ #include #include -#ifdef HAVE_GMP_H -#include +#ifdef HAVE_LIBGMP +#include "bigfloat.h" #else -#include +#include "bigfloat_double.h" #endif #define JMAX 10000 //Maximum number of iterations of Newton's approximation diff --git a/lib/algorithms/iterative/ConjugateGradient.h b/lib/algorithms/iterative/ConjugateGradient.h index e0431a53..cf3872c8 100644 --- a/lib/algorithms/iterative/ConjugateGradient.h +++ b/lib/algorithms/iterative/ConjugateGradient.h @@ -1,150 +1,168 @@ - /************************************************************************************* +/************************************************************************************* - Grid physics library, www.github.com/paboyle/Grid +Grid physics library, www.github.com/paboyle/Grid - Source file: ./lib/algorithms/iterative/ConjugateGradient.h +Source file: ./lib/algorithms/iterative/ConjugateGradient.h - Copyright (C) 2015 +Copyright (C) 2015 Author: Azusa Yamaguchi Author: Peter Boyle Author: paboyle - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ #ifndef GRID_CONJUGATE_GRADIENT_H #define GRID_CONJUGATE_GRADIENT_H namespace Grid { - ///////////////////////////////////////////////////////////// - // Base classes for iterative processes based on operators - // single input vec, single output vec. - ///////////////////////////////////////////////////////////// +///////////////////////////////////////////////////////////// +// Base classes for iterative processes based on operators +// single input vec, single output vec. +///////////////////////////////////////////////////////////// - template - class ConjugateGradient : public OperatorFunction { -public: - RealD Tolerance; - Integer MaxIterations; - ConjugateGradient(RealD tol,Integer maxit) : Tolerance(tol), MaxIterations(maxit) { - }; +template +class ConjugateGradient : public OperatorFunction { + public: + bool ErrorOnNoConverge; // throw an assert when the CG fails to converge. + // Defaults true. + RealD Tolerance; + Integer MaxIterations; + ConjugateGradient(RealD tol, Integer maxit, bool err_on_no_conv = true) + : Tolerance(tol), + MaxIterations(maxit), + ErrorOnNoConverge(err_on_no_conv){}; + void operator()(LinearOperatorBase &Linop, const Field &src, + Field &psi) { + psi.checkerboard = src.checkerboard; + conformable(psi, src); - void operator() (LinearOperatorBase &Linop,const Field &src, Field &psi){ + RealD cp, c, a, d, b, ssq, qq, b_pred; - psi.checkerboard = src.checkerboard; - conformable(psi,src); + Field p(src); + Field mmp(src); + Field r(src); - RealD cp,c,a,d,b,ssq,qq,b_pred; - - Field p(src); - Field mmp(src); - Field r(src); - - //Initial residual computation & set up - RealD guess = norm2(psi); - assert(std::isnan(guess)==0); + // Initial residual computation & set up + RealD guess = norm2(psi); + assert(std::isnan(guess) == 0); - Linop.HermOpAndNorm(psi,mmp,d,b); - - r= src-mmp; - p= r; - - a =norm2(p); - cp =a; - ssq=norm2(src); + + Linop.HermOpAndNorm(psi, mmp, d, b); + - std::cout< + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +#ifndef GRID_CONJUGATE_GRADIENT_MIXED_PREC_H +#define GRID_CONJUGATE_GRADIENT_MIXED_PREC_H + +namespace Grid { + + //Mixed precision restarted defect correction CG + template::value == 2, int>::type = 0,typename std::enable_if< getPrecision::value == 1, int>::type = 0> + class MixedPrecisionConjugateGradient : public LinearFunction { + public: + RealD Tolerance; + Integer MaxInnerIterations; + Integer MaxOuterIterations; + GridBase* SinglePrecGrid; //Grid for single-precision fields + RealD OuterLoopNormMult; //Stop the outer loop and move to a final double prec solve when the residual is OuterLoopNormMult * Tolerance + LinearOperatorBase &Linop_f; + LinearOperatorBase &Linop_d; + + //Option to speed up *inner single precision* solves using a LinearFunction that produces a guess + LinearFunction *guesser; + + MixedPrecisionConjugateGradient(RealD tol, Integer maxinnerit, Integer maxouterit, GridBase* _sp_grid, LinearOperatorBase &_Linop_f, LinearOperatorBase &_Linop_d) : + Linop_f(_Linop_f), Linop_d(_Linop_d), + Tolerance(tol), MaxInnerIterations(maxinnerit), MaxOuterIterations(maxouterit), SinglePrecGrid(_sp_grid), + OuterLoopNormMult(100.), guesser(NULL){ }; + + void useGuesser(LinearFunction &g){ + guesser = &g; + } + + void operator() (const FieldD &src_d_in, FieldD &sol_d){ + GridStopWatch TotalTimer; + TotalTimer.Start(); + + int cb = src_d_in.checkerboard; + sol_d.checkerboard = cb; + + RealD src_norm = norm2(src_d_in); + RealD stop = src_norm * Tolerance*Tolerance; + + GridBase* DoublePrecGrid = src_d_in._grid; + FieldD tmp_d(DoublePrecGrid); + tmp_d.checkerboard = cb; + + FieldD tmp2_d(DoublePrecGrid); + tmp2_d.checkerboard = cb; + + FieldD src_d(DoublePrecGrid); + src_d = src_d_in; //source for next inner iteration, computed from residual during operation + + RealD inner_tol = Tolerance; + + FieldF src_f(SinglePrecGrid); + src_f.checkerboard = cb; + + FieldF sol_f(SinglePrecGrid); + sol_f.checkerboard = cb; + + ConjugateGradient CG_f(inner_tol, MaxInnerIterations); + CG_f.ErrorOnNoConverge = false; + + GridStopWatch InnerCGtimer; + + GridStopWatch PrecChangeTimer; + + for(Integer outer_iter = 0; outer_iter < MaxOuterIterations; outer_iter++){ + //Compute double precision rsd and also new RHS vector. + Linop_d.HermOp(sol_d, tmp_d); + RealD norm = axpy_norm(src_d, -1., tmp_d, src_d_in); //src_d is residual vector + + std::cout< CG_d(Tolerance, MaxInnerIterations); + CG_d(Linop_d, src_d_in, sol_d); + + TotalTimer.Stop(); + std::cout< GetSubMtx(DenseMatrix &A,int row_st, int row_end, int col_st, } -#include -#include +#include "Householder.h" +#include "Francis.h" #endif diff --git a/lib/algorithms/iterative/ImplicitlyRestartedLanczos.h b/lib/algorithms/iterative/ImplicitlyRestartedLanczos.h index c53d9318..5d6deae0 100644 --- a/lib/algorithms/iterative/ImplicitlyRestartedLanczos.h +++ b/lib/algorithms/iterative/ImplicitlyRestartedLanczos.h @@ -31,10 +31,14 @@ Author: paboyle #include //memset #ifdef USE_LAPACK -#include +void LAPACK_dstegr(char *jobz, char *range, int *n, double *d, double *e, + double *vl, double *vu, int *il, int *iu, double *abstol, + int *m, double *w, double *z, int *ldz, int *isuppz, + double *work, int *lwork, int *iwork, int *liwork, + int *info); #endif -#include -#include +#include "DenseMatrix.h" +#include "EigenSort.h" namespace Grid { diff --git a/lib/cartesian/Cartesian_base.h b/lib/cartesian/Cartesian_base.h index 8272ac71..72b21ee3 100644 --- a/lib/cartesian/Cartesian_base.h +++ b/lib/cartesian/Cartesian_base.h @@ -29,7 +29,6 @@ Author: paboyle #ifndef GRID_CARTESIAN_BASE_H #define GRID_CARTESIAN_BASE_H -#include namespace Grid{ @@ -78,15 +77,12 @@ public: // GridCartesian / GridRedBlackCartesian //////////////////////////////////////////////////////////////// virtual int CheckerBoarded(int dim)=0; - virtual int CheckerBoard(std::vector site)=0; + virtual int CheckerBoard(std::vector &site)=0; virtual int CheckerBoardDestination(int source_cb,int shift,int dim)=0; virtual int CheckerBoardShift(int source_cb,int dim,int shift,int osite)=0; virtual int CheckerBoardShiftForCB(int source_cb,int dim,int shift,int cb)=0; - int CheckerBoardFromOindex (int Oindex){ - std::vector ocoor; - oCoorFromOindex(ocoor,Oindex); - return CheckerBoard(ocoor); - } + virtual int CheckerBoardFromOindex (int Oindex)=0; + virtual int CheckerBoardFromOindexTable (int Oindex)=0; ////////////////////////////////////////////////////////////////////////////////////////////// // Local layout calculations @@ -107,6 +103,12 @@ public: for(int d=0;d<_ndimension;d++) idx+=_ostride[d]*(coor[d]%_rdimensions[d]); return idx; } + virtual int iIndex(std::vector &lcoor) + { + int idx=0; + for(int d=0;d<_ndimension;d++) idx+=_istride[d]*(lcoor[d]/_rdimensions[d]); + return idx; + } inline int oIndexReduced(std::vector &ocoor) { int idx=0; @@ -123,12 +125,6 @@ public: ////////////////////////////////////////////////////////// // SIMD lane addressing ////////////////////////////////////////////////////////// - inline int iIndex(std::vector &lcoor) - { - int idx=0; - for(int d=0;d<_ndimension;d++) idx+=_istride[d]*(lcoor[d]/_rdimensions[d]); - return idx; - } inline void iCoorFromIindex(std::vector &coor,int lane) { Lexicographic::CoorFromIndex(coor,lane,_simd_layout); @@ -220,7 +216,7 @@ public: } i_idx= iIndex(cblcoor);// this does not imply divide by 2 on checker dim - o_idx= oIndex(lcoor);// this implies divide by 2 on checkerdim + o_idx= oIndex(lcoor); // this implies divide by 2 on checkerdim } void RankIndexToGlobalCoor(int rank, int o_idx, int i_idx , std::vector &gcoor) diff --git a/lib/cartesian/Cartesian_full.h b/lib/cartesian/Cartesian_full.h index 1f8f7514..b0d20441 100644 --- a/lib/cartesian/Cartesian_full.h +++ b/lib/cartesian/Cartesian_full.h @@ -39,10 +39,17 @@ class GridCartesian: public GridBase { public: + virtual int CheckerBoardFromOindexTable (int Oindex) { + return 0; + } + virtual int CheckerBoardFromOindex (int Oindex) + { + return 0; + } virtual int CheckerBoarded(int dim){ return 0; } - virtual int CheckerBoard(std::vector site){ + virtual int CheckerBoard(std::vector &site){ return 0; } virtual int CheckerBoardDestination(int cb,int shift,int dim){ diff --git a/lib/cartesian/Cartesian_red_black.h b/lib/cartesian/Cartesian_red_black.h index 2424d8dc..6a4300d7 100644 --- a/lib/cartesian/Cartesian_red_black.h +++ b/lib/cartesian/Cartesian_red_black.h @@ -32,29 +32,24 @@ Author: Peter Boyle namespace Grid { - static const int CbRed =0; - static const int CbBlack=1; - static const int Even =CbRed; - static const int Odd =CbBlack; - - // Perhaps these are misplaced and - // should be in sparse matrix. - // Also should make these a named enum type - static const int DaggerNo=0; - static const int DaggerYes=1; - + static const int CbRed =0; + static const int CbBlack=1; + static const int Even =CbRed; + static const int Odd =CbBlack; + // Specialise this for red black grids storing half the data like a chess board. class GridRedBlackCartesian : public GridBase { public: std::vector _checker_dim_mask; int _checker_dim; + std::vector _checker_board; virtual int CheckerBoarded(int dim){ if( dim==_checker_dim) return 1; else return 0; } - virtual int CheckerBoard(std::vector site){ + virtual int CheckerBoard(std::vector &site){ int linear=0; assert(site.size()==_ndimension); for(int d=0;d<_ndimension;d++){ @@ -78,12 +73,20 @@ public: // or by looping over x,y,z and multiply rather than computing checkerboard. if ( (source_cb+ocb)&1 ) { - return (shift)/2; } else { return (shift+1)/2; } } + virtual int CheckerBoardFromOindexTable (int Oindex) { + return _checker_board[Oindex]; + } + virtual int CheckerBoardFromOindex (int Oindex) + { + std::vector ocoor; + oCoorFromOindex(ocoor,Oindex); + return CheckerBoard(ocoor); + } virtual int CheckerBoardShift(int source_cb,int dim,int shift,int osite){ if(dim != _checker_dim) return shift; @@ -175,7 +178,7 @@ public: // all elements of a simd vector must have same checkerboard. // If Ls vectorised, this must still be the case; e.g. dwf rb5d if ( _simd_layout[d]>1 ) { - if ( d != _checker_dim ) { + if ( checker_dim_mask[d] ) { assert( (_rdimensions[d]&0x1) == 0 ); } } @@ -191,6 +194,8 @@ public: _ostride[d] = _ostride[d-1]*_rdimensions[d-1]; _istride[d] = _istride[d-1]*_simd_layout[d-1]; } + + } //////////////////////////////////////////////////////////////////////////////////////////// @@ -211,6 +216,18 @@ public: _slice_nblock[d]=nblock; block = block*_rdimensions[d]; } + + //////////////////////////////////////////////// + // Create a checkerboard lookup table + //////////////////////////////////////////////// + int rvol = 1; + for(int d=0;d<_ndimension;d++){ + rvol=rvol * _rdimensions[d]; + } + _checker_board.resize(rvol); + for(int osite=0;osite<_osites;osite++){ + _checker_board[osite] = CheckerBoardFromOindex (osite); + } }; protected: @@ -224,9 +241,21 @@ protected: idx+=_ostride[d]*(coor[d]%_rdimensions[d]); } } - return idx; + return idx; }; + virtual int iIndex(std::vector &lcoor) + { + int idx=0; + for(int d=0;d<_ndimension;d++) { + if( d==_checker_dim ) { + idx+=_istride[d]*(lcoor[d]/(2*_rdimensions[d])); + } else { + idx+=_istride[d]*(lcoor[d]/_rdimensions[d]); + } + } + return idx; + } }; } diff --git a/lib/communicator/Communicator_base.cc b/lib/communicator/Communicator_base.cc new file mode 100644 index 00000000..b003d867 --- /dev/null +++ b/lib/communicator/Communicator_base.cc @@ -0,0 +1,124 @@ + /************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/communicator/Communicator_none.cc + + Copyright (C) 2015 + +Author: Peter Boyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +#include "Grid.h" +namespace Grid { + +/////////////////////////////////////////////////////////////// +// Info that is setup once and indept of cartesian layout +/////////////////////////////////////////////////////////////// +void * CartesianCommunicator::ShmCommBuf; +uint64_t CartesianCommunicator::MAX_MPI_SHM_BYTES = 128*1024*1024; + +///////////////////////////////// +// Alloc, free shmem region +///////////////////////////////// +void *CartesianCommunicator::ShmBufferMalloc(size_t bytes){ + // bytes = (bytes+sizeof(vRealD))&(~(sizeof(vRealD)-1));// align up bytes + void *ptr = (void *)heap_top; + heap_top += bytes; + heap_bytes+= bytes; + if (heap_bytes >= MAX_MPI_SHM_BYTES) { + std::cout<< " ShmBufferMalloc exceeded shared heap size -- try increasing with --shm flag" < & CartesianCommunicator::ThisProcessorCoor(void) { return _processor_coor; }; +const std::vector & CartesianCommunicator::ProcessorGrid(void) { return _processors; }; +int CartesianCommunicator::ProcessorCount(void) { return _Nprocessors; }; + +//////////////////////////////////////////////////////////////////////////////// +// very VERY rarely (Log, serial RNG) we need world without a grid +//////////////////////////////////////////////////////////////////////////////// + +void CartesianCommunicator::GlobalSum(ComplexF &c) +{ + GlobalSumVector((float *)&c,2); +} +void CartesianCommunicator::GlobalSumVector(ComplexF *c,int N) +{ + GlobalSumVector((float *)c,2*N); +} +void CartesianCommunicator::GlobalSum(ComplexD &c) +{ + GlobalSumVector((double *)&c,2); +} +void CartesianCommunicator::GlobalSumVector(ComplexD *c,int N) +{ + GlobalSumVector((double *)c,2*N); +} + +#if !defined( GRID_COMMS_MPI3) && !defined (GRID_COMMS_MPI3L) + +void CartesianCommunicator::StencilSendToRecvFromBegin(std::vector &list, + void *xmit, + int xmit_to_rank, + void *recv, + int recv_from_rank, + int bytes) +{ + SendToRecvFromBegin(list,xmit,xmit_to_rank,recv,recv_from_rank,bytes); +} +void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector &waitall) +{ + SendToRecvFromComplete(waitall); +} +void CartesianCommunicator::StencilBarrier(void){}; + +commVector CartesianCommunicator::ShmBufStorageVector; + +void *CartesianCommunicator::ShmBufferSelf(void) { return ShmCommBuf; } + +void *CartesianCommunicator::ShmBuffer(int rank) { + return NULL; +} +void *CartesianCommunicator::ShmBufferTranslate(int rank,void * local_p) { + return NULL; +} +void CartesianCommunicator::ShmInitGeneric(void){ + ShmBufStorageVector.resize(MAX_MPI_SHM_BYTES); + ShmCommBuf=(void *)&ShmBufStorageVector[0]; +} + +#endif + +} + diff --git a/lib/communicator/Communicator_base.h b/lib/communicator/Communicator_base.h index 94d277e9..94ad1093 100644 --- a/lib/communicator/Communicator_base.h +++ b/lib/communicator/Communicator_base.h @@ -1,3 +1,4 @@ + /************************************************************************************* Grid physics library, www.github.com/paboyle/Grid @@ -34,123 +35,196 @@ Author: Peter Boyle #ifdef GRID_COMMS_MPI #include #endif +#ifdef GRID_COMMS_MPI3 +#include +#endif +#ifdef GRID_COMMS_MPI3L +#include +#endif #ifdef GRID_COMMS_SHMEM #include #endif + namespace Grid { + class CartesianCommunicator { public: + // 65536 ranks per node adequate for now + // 128MB shared memory for comms enought for 48^4 local vol comms + // Give external control (command line override?) of this + + static const int MAXLOG2RANKSPERNODE = 16; + static uint64_t MAX_MPI_SHM_BYTES; + // Communicator should know nothing of the physics grid, only processor grid. + int _Nprocessors; // How many in all + std::vector _processors; // Which dimensions get relayed out over processors lanes. + int _processor; // linear processor rank + std::vector _processor_coor; // linear processor coordinate + unsigned long _ndimension; - int _Nprocessors; // How many in all - std::vector _processors; // Which dimensions get relayed out over processors lanes. - int _processor; // linear processor rank - std::vector _processor_coor; // linear processor coordinate - unsigned long _ndimension; - -#ifdef GRID_COMMS_MPI - MPI_Comm communicator; - typedef MPI_Request CommsRequest_t; +#if defined (GRID_COMMS_MPI) || defined (GRID_COMMS_MPI3) || defined (GRID_COMMS_MPI3L) + static MPI_Comm communicator_world; + MPI_Comm communicator; + typedef MPI_Request CommsRequest_t; #else - typedef int CommsRequest_t; + typedef int CommsRequest_t; #endif - static void Init(int *argc, char ***argv); + //////////////////////////////////////////////////////////////////// + // Helper functionality for SHM Windows common to all other impls + //////////////////////////////////////////////////////////////////// + // Longer term; drop this in favour of a master / slave model with + // cartesian communicator on a subset of ranks, slave ranks controlled + // by group leader with data xfer via shared memory + //////////////////////////////////////////////////////////////////// +#ifdef GRID_COMMS_MPI3 - // Constructor - CartesianCommunicator(const std::vector &pdimensions_in); + static int ShmRank; + static int ShmSize; + static int GroupRank; + static int GroupSize; + static int WorldRank; + static int WorldSize; - // Wraps MPI_Cart routines - void ShiftedRanks(int dim,int shift,int & source, int & dest); - int RankFromProcessorCoor(std::vector &coor); - void ProcessorCoorFromRank(int rank,std::vector &coor); + std::vector WorldDims; + std::vector GroupDims; + std::vector ShmDims; + + std::vector GroupCoor; + std::vector ShmCoor; + std::vector WorldCoor; - ///////////////////////////////// - // Grid information queries - ///////////////////////////////// - int IsBoss(void) { return _processor==0; }; - int BossRank(void) { return 0; }; - int ThisRank(void) { return _processor; }; - const std::vector & ThisProcessorCoor(void) { return _processor_coor; }; - const std::vector & ProcessorGrid(void) { return _processors; }; - int ProcessorCount(void) { return _Nprocessors; }; + static std::vector GroupRanks; + static std::vector MyGroup; + static int ShmSetup; + static MPI_Win ShmWindow; + static MPI_Comm ShmComm; + + std::vector LexicographicToWorldRank; + + static std::vector ShmCommBufs; - //////////////////////////////////////////////////////////// - // Reduction - //////////////////////////////////////////////////////////// - void GlobalSum(RealF &); - void GlobalSumVector(RealF *,int N); +#else + static void ShmInitGeneric(void); + static commVector ShmBufStorageVector; +#endif - void GlobalSum(RealD &); - void GlobalSumVector(RealD *,int N); + ///////////////////////////////// + // Grid information and queries + // Implemented in Communicator_base.C + ///////////////////////////////// + static void * ShmCommBuf; + size_t heap_top; + size_t heap_bytes; - void GlobalSum(uint32_t &); - void GlobalSum(uint64_t &); + void *ShmBufferSelf(void); + void *ShmBuffer(int rank); + void *ShmBufferTranslate(int rank,void * local_p); + void *ShmBufferMalloc(size_t bytes); + void ShmBufferFreeAll(void) ; + + //////////////////////////////////////////////// + // Must call in Grid startup + //////////////////////////////////////////////// + static void Init(int *argc, char ***argv); + + //////////////////////////////////////////////// + // Constructor of any given grid + //////////////////////////////////////////////// + CartesianCommunicator(const std::vector &pdimensions_in); + + //////////////////////////////////////////////////////////////////////////////////////// + // Wraps MPI_Cart routines, or implements equivalent on other impls + //////////////////////////////////////////////////////////////////////////////////////// + void ShiftedRanks(int dim,int shift,int & source, int & dest); + int RankFromProcessorCoor(std::vector &coor); + void ProcessorCoorFromRank(int rank,std::vector &coor); + + int IsBoss(void) ; + int BossRank(void) ; + int ThisRank(void) ; + const std::vector & ThisProcessorCoor(void) ; + const std::vector & ProcessorGrid(void) ; + int ProcessorCount(void) ; - void GlobalSum(ComplexF &c) - { - GlobalSumVector((float *)&c,2); - } - void GlobalSumVector(ComplexF *c,int N) - { - GlobalSumVector((float *)c,2*N); - } + //////////////////////////////////////////////////////////////////////////////// + // very VERY rarely (Log, serial RNG) we need world without a grid + //////////////////////////////////////////////////////////////////////////////// + static int RankWorld(void) ; + static void BroadcastWorld(int root,void* data, int bytes); + + //////////////////////////////////////////////////////////// + // Reduction + //////////////////////////////////////////////////////////// + void GlobalSum(RealF &); + void GlobalSumVector(RealF *,int N); + void GlobalSum(RealD &); + void GlobalSumVector(RealD *,int N); + void GlobalSum(uint32_t &); + void GlobalSum(uint64_t &); + void GlobalSum(ComplexF &c); + void GlobalSumVector(ComplexF *c,int N); + void GlobalSum(ComplexD &c); + void GlobalSumVector(ComplexD *c,int N); + + template void GlobalSum(obj &o){ + typedef typename obj::scalar_type scalar_type; + int words = sizeof(obj)/sizeof(scalar_type); + scalar_type * ptr = (scalar_type *)& o; + GlobalSumVector(ptr,words); + } + + //////////////////////////////////////////////////////////// + // Face exchange, buffer swap in translational invariant way + //////////////////////////////////////////////////////////// + void SendToRecvFrom(void *xmit, + int xmit_to_rank, + void *recv, + int recv_from_rank, + int bytes); + + void SendRecvPacket(void *xmit, + void *recv, + int xmit_to_rank, + int recv_from_rank, + int bytes); + + void SendToRecvFromBegin(std::vector &list, + void *xmit, + int xmit_to_rank, + void *recv, + int recv_from_rank, + int bytes); + + void SendToRecvFromComplete(std::vector &waitall); - void GlobalSum(ComplexD &c) - { - GlobalSumVector((double *)&c,2); - } - void GlobalSumVector(ComplexD *c,int N) - { - GlobalSumVector((double *)c,2*N); - } - - template void GlobalSum(obj &o){ - typedef typename obj::scalar_type scalar_type; - int words = sizeof(obj)/sizeof(scalar_type); - scalar_type * ptr = (scalar_type *)& o; - GlobalSumVector(ptr,words); - } - //////////////////////////////////////////////////////////// - // Face exchange, buffer swap in translational invariant way - //////////////////////////////////////////////////////////// - void SendToRecvFrom(void *xmit, - int xmit_to_rank, - void *recv, - int recv_from_rank, - int bytes); + void StencilSendToRecvFromBegin(std::vector &list, + void *xmit, + int xmit_to_rank, + void *recv, + int recv_from_rank, + int bytes); + + void StencilSendToRecvFromComplete(std::vector &waitall); + void StencilBarrier(void); - void SendRecvPacket(void *xmit, - void *recv, - int xmit_to_rank, - int recv_from_rank, - int bytes); - - void SendToRecvFromBegin(std::vector &list, - void *xmit, - int xmit_to_rank, - void *recv, - int recv_from_rank, - int bytes); - void SendToRecvFromComplete(std::vector &waitall); - - //////////////////////////////////////////////////////////// - // Barrier - //////////////////////////////////////////////////////////// - void Barrier(void); - - //////////////////////////////////////////////////////////// - // Broadcast a buffer and composite larger - //////////////////////////////////////////////////////////// - void Broadcast(int root,void* data, int bytes); - template void Broadcast(int root,obj &data) + //////////////////////////////////////////////////////////// + // Barrier + //////////////////////////////////////////////////////////// + void Barrier(void); + + //////////////////////////////////////////////////////////// + // Broadcast a buffer and composite larger + //////////////////////////////////////////////////////////// + void Broadcast(int root,void* data, int bytes); + + template void Broadcast(int root,obj &data) { Broadcast(root,(void *)&data,sizeof(data)); }; - static void BroadcastWorld(int root,void* data, int bytes); - }; } diff --git a/lib/communicator/Communicator_mpi.cc b/lib/communicator/Communicator_mpi.cc index dff9811a..65ced9c7 100644 --- a/lib/communicator/Communicator_mpi.cc +++ b/lib/communicator/Communicator_mpi.cc @@ -30,21 +30,23 @@ Author: Peter Boyle namespace Grid { - // Should error check all MPI calls. + +/////////////////////////////////////////////////////////////////////////////////////////////////// +// Info that is setup once and indept of cartesian layout +/////////////////////////////////////////////////////////////////////////////////////////////////// +MPI_Comm CartesianCommunicator::communicator_world; + +// Should error check all MPI calls. void CartesianCommunicator::Init(int *argc, char ***argv) { int flag; MPI_Initialized(&flag); // needed to coexist with other libs apparently if ( !flag ) { MPI_Init(argc,argv); } + MPI_Comm_dup (MPI_COMM_WORLD,&communicator_world); + ShmInitGeneric(); } - int Rank(void) { - int pe; - MPI_Comm_rank(MPI_COMM_WORLD,&pe); - return pe; - } - CartesianCommunicator::CartesianCommunicator(const std::vector &processors) { _ndimension = processors.size(); @@ -54,7 +56,7 @@ CartesianCommunicator::CartesianCommunicator(const std::vector &processors) _processors = processors; _processor_coor.resize(_ndimension); - MPI_Cart_create(MPI_COMM_WORLD, _ndimension,&_processors[0],&periodic[0],1,&communicator); + MPI_Cart_create(communicator_world, _ndimension,&_processors[0],&periodic[0],1,&communicator); MPI_Comm_rank(communicator,&_processor); MPI_Cart_coords(communicator,_processor,_ndimension,&_processor_coor[0]); @@ -67,7 +69,6 @@ CartesianCommunicator::CartesianCommunicator(const std::vector &processors) assert(Size==_Nprocessors); } - void CartesianCommunicator::GlobalSum(uint32_t &u){ int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_SUM,communicator); assert(ierr==0); @@ -168,7 +169,6 @@ void CartesianCommunicator::SendToRecvFromComplete(std::vector & int nreq=list.size(); std::vector status(nreq); int ierr = MPI_Waitall(nreq,&list[0],&status[0]); - assert(ierr==0); } @@ -187,14 +187,22 @@ void CartesianCommunicator::Broadcast(int root,void* data, int bytes) communicator); assert(ierr==0); } - + /////////////////////////////////////////////////////// + // Should only be used prior to Grid Init finished. + // Check for this? + /////////////////////////////////////////////////////// +int CartesianCommunicator::RankWorld(void){ + int r; + MPI_Comm_rank(communicator_world,&r); + return r; +} void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes) { int ierr= MPI_Bcast(data, bytes, MPI_BYTE, root, - MPI_COMM_WORLD); + communicator_world); assert(ierr==0); } diff --git a/lib/communicator/Communicator_mpi3.cc b/lib/communicator/Communicator_mpi3.cc new file mode 100644 index 00000000..c707ec1f --- /dev/null +++ b/lib/communicator/Communicator_mpi3.cc @@ -0,0 +1,580 @@ + /************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/communicator/Communicator_mpi.cc + + Copyright (C) 2015 + +Author: Peter Boyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +#include "Grid.h" +#include + +namespace Grid { + +/////////////////////////////////////////////////////////////////////////////////////////////////// +// Info that is setup once and indept of cartesian layout +/////////////////////////////////////////////////////////////////////////////////////////////////// +int CartesianCommunicator::ShmSetup = 0; + +int CartesianCommunicator::ShmRank; +int CartesianCommunicator::ShmSize; +int CartesianCommunicator::GroupRank; +int CartesianCommunicator::GroupSize; +int CartesianCommunicator::WorldRank; +int CartesianCommunicator::WorldSize; + +MPI_Comm CartesianCommunicator::communicator_world; +MPI_Comm CartesianCommunicator::ShmComm; +MPI_Win CartesianCommunicator::ShmWindow; + +std::vector CartesianCommunicator::GroupRanks; +std::vector CartesianCommunicator::MyGroup; +std::vector CartesianCommunicator::ShmCommBufs; + +void *CartesianCommunicator::ShmBufferSelf(void) +{ + return ShmCommBufs[ShmRank]; +} +void *CartesianCommunicator::ShmBuffer(int rank) +{ + int gpeer = GroupRanks[rank]; + if (gpeer == MPI_UNDEFINED){ + return NULL; + } else { + return ShmCommBufs[gpeer]; + } +} +void *CartesianCommunicator::ShmBufferTranslate(int rank,void * local_p) +{ + int gpeer = GroupRanks[rank]; + if (gpeer == MPI_UNDEFINED){ + return NULL; + } else { + uint64_t offset = (uint64_t)local_p - (uint64_t)ShmCommBufs[ShmRank]; + uint64_t remote = (uint64_t)ShmCommBufs[gpeer]+offset; + return (void *) remote; + } +} + +void CartesianCommunicator::Init(int *argc, char ***argv) { + int flag; + MPI_Initialized(&flag); // needed to coexist with other libs apparently + if ( !flag ) { + MPI_Init(argc,argv); + } + + MPI_Comm_dup (MPI_COMM_WORLD,&communicator_world); + MPI_Comm_rank(communicator_world,&WorldRank); + MPI_Comm_size(communicator_world,&WorldSize); + + ///////////////////////////////////////////////////////////////////// + // Split into groups that can share memory + ///////////////////////////////////////////////////////////////////// + MPI_Comm_split_type(communicator_world, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL,&ShmComm); + MPI_Comm_rank(ShmComm ,&ShmRank); + MPI_Comm_size(ShmComm ,&ShmSize); + GroupSize = WorldSize/ShmSize; + + ///////////////////////////////////////////////////////////////////// + // find world ranks in our SHM group (i.e. which ranks are on our node) + ///////////////////////////////////////////////////////////////////// + MPI_Group WorldGroup, ShmGroup; + MPI_Comm_group (communicator_world, &WorldGroup); + MPI_Comm_group (ShmComm, &ShmGroup); + + std::vector world_ranks(WorldSize); + GroupRanks.resize(WorldSize); + for(int r=0;r()); + int myleader = MyGroup[0]; + + std::vector leaders_1hot(WorldSize,0); + std::vector leaders_group(GroupSize,0); + leaders_1hot [ myleader ] = 1; + + /////////////////////////////////////////////////////////////////// + // global sum leaders over comm world + /////////////////////////////////////////////////////////////////// + int ierr=MPI_Allreduce(MPI_IN_PLACE,&leaders_1hot[0],WorldSize,MPI_INT,MPI_SUM,communicator_world); + assert(ierr==0); + + /////////////////////////////////////////////////////////////////// + // find the group leaders world rank + /////////////////////////////////////////////////////////////////// + int group=0; + for(int l=0;l + for(uint64_t page=0;page coor = _processor_coor; + + assert(std::abs(shift) <_processors[dim]); + + coor[dim] = (_processor_coor[dim] + shift + _processors[dim])%_processors[dim]; + Lexicographic::IndexFromCoor(coor,source,_processors); + source = LexicographicToWorldRank[source]; + + coor[dim] = (_processor_coor[dim] - shift + _processors[dim])%_processors[dim]; + Lexicographic::IndexFromCoor(coor,dest,_processors); + dest = LexicographicToWorldRank[dest]; +} +int CartesianCommunicator::RankFromProcessorCoor(std::vector &coor) +{ + int rank; + Lexicographic::IndexFromCoor(coor,rank,_processors); + rank = LexicographicToWorldRank[rank]; + return rank; +} +void CartesianCommunicator::ProcessorCoorFromRank(int rank, std::vector &coor) +{ + Lexicographic::CoorFromIndex(coor,rank,_processors); + rank = LexicographicToWorldRank[rank]; +} + +CartesianCommunicator::CartesianCommunicator(const std::vector &processors) +{ + int ierr; + + communicator=communicator_world; + + _ndimension = processors.size(); + + //////////////////////////////////////////////////////////////// + // Assert power of two shm_size. + //////////////////////////////////////////////////////////////// + int log2size = -1; + for(int i=0;i<=MAXLOG2RANKSPERNODE;i++){ + if ( (0x1< WorldDims = processors; + + ShmDims.resize(_ndimension,1); + GroupDims.resize(_ndimension); + + ShmCoor.resize(_ndimension); + GroupCoor.resize(_ndimension); + WorldCoor.resize(_ndimension); + + for(int l2=0;l2 reqs(0); + SendToRecvFromBegin(reqs,xmit,dest,recv,from,bytes); + SendToRecvFromComplete(reqs); +} + +void CartesianCommunicator::SendRecvPacket(void *xmit, + void *recv, + int sender, + int receiver, + int bytes) +{ + MPI_Status stat; + assert(sender != receiver); + int tag = sender; + if ( _processor == sender ) { + MPI_Send(xmit, bytes, MPI_CHAR,receiver,tag,communicator); + } + if ( _processor == receiver ) { + MPI_Recv(recv, bytes, MPI_CHAR,sender,tag,communicator,&stat); + } +} + +// Basic Halo comms primitive +void CartesianCommunicator::SendToRecvFromBegin(std::vector &list, + void *xmit, + int dest, + void *recv, + int from, + int bytes) +{ +#if 0 + this->StencilBarrier(); + + MPI_Request xrq; + MPI_Request rrq; + + static int sequence; + + int ierr; + int tag; + int check; + + assert(dest != _processor); + assert(from != _processor); + + int gdest = GroupRanks[dest]; + int gfrom = GroupRanks[from]; + int gme = GroupRanks[_processor]; + + sequence++; + + char *from_ptr = (char *)ShmCommBufs[ShmRank]; + + int small = (bytesStencilBarrier(); + + if (small && (gfrom !=MPI_UNDEFINED) ) { + T *ip = (T *)from_ptr; + T *op = (T *)recv; +PARALLEL_FOR_LOOP + for(int w=0;wStencilBarrier(); + +#else + MPI_Request xrq; + MPI_Request rrq; + int rank = _processor; + int ierr; + ierr =MPI_Isend(xmit, bytes, MPI_CHAR,dest,_processor,communicator,&xrq); + ierr|=MPI_Irecv(recv, bytes, MPI_CHAR,from,from,communicator,&rrq); + + assert(ierr==0); + + list.push_back(xrq); + list.push_back(rrq); +#endif +} + +void CartesianCommunicator::StencilSendToRecvFromBegin(std::vector &list, + void *xmit, + int dest, + void *recv, + int from, + int bytes) +{ + MPI_Request xrq; + MPI_Request rrq; + + int ierr; + + assert(dest != _processor); + assert(from != _processor); + + int gdest = GroupRanks[dest]; + int gfrom = GroupRanks[from]; + int gme = GroupRanks[_processor]; + + assert(gme == ShmRank); + + if ( gdest == MPI_UNDEFINED ) { + ierr =MPI_Isend(xmit, bytes, MPI_CHAR,dest,_processor,communicator,&xrq); + assert(ierr==0); + list.push_back(xrq); + } + + if ( gfrom ==MPI_UNDEFINED) { + ierr=MPI_Irecv(recv, bytes, MPI_CHAR,from,from,communicator,&rrq); + assert(ierr==0); + list.push_back(rrq); + } + +} + + +void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector &list) +{ + SendToRecvFromComplete(list); +} + +void CartesianCommunicator::StencilBarrier(void) +{ + MPI_Win_sync (ShmWindow); + MPI_Barrier (ShmComm); + MPI_Win_sync (ShmWindow); +} + +void CartesianCommunicator::SendToRecvFromComplete(std::vector &list) +{ + int nreq=list.size(); + std::vector status(nreq); + int ierr = MPI_Waitall(nreq,&list[0],&status[0]); + assert(ierr==0); +} + +void CartesianCommunicator::Barrier(void) +{ + int ierr = MPI_Barrier(communicator); + assert(ierr==0); +} + +void CartesianCommunicator::Broadcast(int root,void* data, int bytes) +{ + int ierr=MPI_Bcast(data, + bytes, + MPI_BYTE, + root, + communicator); + assert(ierr==0); +} + +void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes) +{ + int ierr= MPI_Bcast(data, + bytes, + MPI_BYTE, + root, + communicator_world); + assert(ierr==0); +} + +} + diff --git a/lib/communicator/Communicator_mpi3_leader.cc b/lib/communicator/Communicator_mpi3_leader.cc new file mode 100644 index 00000000..71f1a913 --- /dev/null +++ b/lib/communicator/Communicator_mpi3_leader.cc @@ -0,0 +1,874 @@ + /************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/communicator/Communicator_mpi.cc + + Copyright (C) 2015 + +Author: Peter Boyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +#include "Grid.h" +#include + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////// +/// Workarounds: +/// i) bloody mac os doesn't implement unnamed semaphores since it is "optional" posix. +/// darwin dispatch semaphores don't seem to be multiprocess. +/// +/// ii) openmpi under --mca shmem posix works with two squadrons per node; +/// openmpi under default mca settings (I think --mca shmem mmap) on MacOS makes two squadrons map the SAME +/// memory as each other, despite their living on different communicators. This appears to be a bug in OpenMPI. +/// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////// +#include +#include +#include +#include + +typedef sem_t *Grid_semaphore; + +#define SEM_INIT(S) S = sem_open(sem_name,0,0600,0); assert ( S != SEM_FAILED ); +#define SEM_INIT_EXCL(S) sem_unlink(sem_name); S = sem_open(sem_name,O_CREAT|O_EXCL,0600,0); assert ( S != SEM_FAILED ); +#define SEM_POST(S) assert ( sem_post(S) == 0 ); +#define SEM_WAIT(S) assert ( sem_wait(S) == 0 ); + +#include + +namespace Grid { + +enum { COMMAND_ISEND, COMMAND_IRECV, COMMAND_WAITALL }; + +struct Descriptor { + uint64_t buf; + size_t bytes; + int rank; + int tag; + int command; + MPI_Request request; +}; + +const int pool = 48; + +class SlaveState { +public: + volatile int head; + volatile int start; + volatile int tail; + volatile Descriptor Descrs[pool]; +}; + +class Slave { +public: + Grid_semaphore sem_head; + Grid_semaphore sem_tail; + SlaveState *state; + MPI_Comm squadron; + uint64_t base; + int universe_rank; + int vertical_rank; + char sem_name [NAME_MAX]; + //////////////////////////////////////////////////////////// + // Descriptor circular pointers + //////////////////////////////////////////////////////////// + Slave() {}; + + void Init(SlaveState * _state,MPI_Comm _squadron,int _universe_rank,int _vertical_rank); + + void SemInit(void) { + sprintf(sem_name,"/Grid_mpi3_sem_head_%d",universe_rank); + // printf("SEM_NAME: %s \n",sem_name); + SEM_INIT(sem_head); + sprintf(sem_name,"/Grid_mpi3_sem_tail_%d",universe_rank); + // printf("SEM_NAME: %s \n",sem_name); + SEM_INIT(sem_tail); + } + void SemInitExcl(void) { + sprintf(sem_name,"/Grid_mpi3_sem_head_%d",universe_rank); + // printf("SEM_INIT_EXCL: %s \n",sem_name); + SEM_INIT_EXCL(sem_head); + sprintf(sem_name,"/Grid_mpi3_sem_tail_%d",universe_rank); + // printf("SEM_INIT_EXCL: %s \n",sem_name); + SEM_INIT_EXCL(sem_tail); + } + void WakeUpDMA(void) { + SEM_POST(sem_head); + }; + void WakeUpCompute(void) { + SEM_POST(sem_tail); + }; + void WaitForCommand(void) { + SEM_WAIT(sem_head); + }; + void WaitForComplete(void) { + SEM_WAIT(sem_tail); + }; + void EventLoop (void) { + // std::cout<< " Entering event loop "<tail == state->head ); + } +}; + +//////////////////////////////////////////////////////////////////////// +// One instance of a data mover. +// Master and Slave must agree on location in shared memory +//////////////////////////////////////////////////////////////////////// + +class MPIoffloadEngine { +public: + + static std::vector Slaves; + + static int ShmSetup; + + static int UniverseRank; + static int UniverseSize; + + static MPI_Comm communicator_universe; + static MPI_Comm communicator_cached; + + static MPI_Comm HorizontalComm; + static int HorizontalRank; + static int HorizontalSize; + + static MPI_Comm VerticalComm; + static MPI_Win VerticalWindow; + static int VerticalSize; + static int VerticalRank; + + static std::vector VerticalShmBufs; + static std::vector > UniverseRanks; + static std::vector UserCommunicatorToWorldRanks; + + static MPI_Group WorldGroup, CachedGroup; + + static void CommunicatorInit (MPI_Comm &communicator_world, + MPI_Comm &ShmComm, + void * &ShmCommBuf); + + static void MapCommRankToWorldRank(int &hashtag, int & comm_world_peer,int tag, MPI_Comm comm,int commrank); + + ///////////////////////////////////////////////////////// + // routines for master proc must handle any communicator + ///////////////////////////////////////////////////////// + + static void QueueSend(int slave,void *buf, int bytes, int tag, MPI_Comm comm,int rank) { + // std::cout<< " Queueing send "<< bytes<< " slave "<< slave << " to comm "<= units ) { + mywork = myoff = 0; + } else { + mywork = (nwork+me)/units; + myoff = basework * me; + if ( me > backfill ) + myoff+= (me-backfill); + } + return; + }; + + static void QueueMultiplexedSend(void *buf, int bytes, int tag, MPI_Comm comm,int rank) { + uint8_t * cbuf = (uint8_t *) buf; + int mywork, myoff, procs; + procs = VerticalSize-1; + for(int s=0;s MPIoffloadEngine::Slaves; + +int MPIoffloadEngine::UniverseRank; +int MPIoffloadEngine::UniverseSize; + +MPI_Comm MPIoffloadEngine::communicator_universe; +MPI_Comm MPIoffloadEngine::communicator_cached; +MPI_Group MPIoffloadEngine::WorldGroup; +MPI_Group MPIoffloadEngine::CachedGroup; + +MPI_Comm MPIoffloadEngine::HorizontalComm; +int MPIoffloadEngine::HorizontalRank; +int MPIoffloadEngine::HorizontalSize; + +MPI_Comm MPIoffloadEngine::VerticalComm; +int MPIoffloadEngine::VerticalSize; +int MPIoffloadEngine::VerticalRank; +MPI_Win MPIoffloadEngine::VerticalWindow; +std::vector MPIoffloadEngine::VerticalShmBufs; +std::vector > MPIoffloadEngine::UniverseRanks; +std::vector MPIoffloadEngine::UserCommunicatorToWorldRanks; + +int MPIoffloadEngine::ShmSetup = 0; + +void MPIoffloadEngine::CommunicatorInit (MPI_Comm &communicator_world, + MPI_Comm &ShmComm, + void * &ShmCommBuf) +{ + int flag; + assert(ShmSetup==0); + + ////////////////////////////////////////////////////////////////////// + // Universe is all nodes prior to squadron grouping + ////////////////////////////////////////////////////////////////////// + MPI_Comm_dup (MPI_COMM_WORLD,&communicator_universe); + MPI_Comm_rank(communicator_universe,&UniverseRank); + MPI_Comm_size(communicator_universe,&UniverseSize); + + ///////////////////////////////////////////////////////////////////// + // Split into groups that can share memory (Verticals) + ///////////////////////////////////////////////////////////////////// +#undef MPI_SHARED_MEM_DEBUG +#ifdef MPI_SHARED_MEM_DEBUG + MPI_Comm_split(communicator_universe,(UniverseRank/4),UniverseRank,&VerticalComm); +#else + MPI_Comm_split_type(communicator_universe, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL,&VerticalComm); +#endif + MPI_Comm_rank(VerticalComm ,&VerticalRank); + MPI_Comm_size(VerticalComm ,&VerticalSize); + + ////////////////////////////////////////////////////////////////////// + // Split into horizontal groups by rank in squadron + ////////////////////////////////////////////////////////////////////// + MPI_Comm_split(communicator_universe,VerticalRank,UniverseRank,&HorizontalComm); + MPI_Comm_rank(HorizontalComm,&HorizontalRank); + MPI_Comm_size(HorizontalComm,&HorizontalSize); + assert(HorizontalSize*VerticalSize==UniverseSize); + + //////////////////////////////////////////////////////////////////////////////// + // What is my place in the world + //////////////////////////////////////////////////////////////////////////////// + int WorldRank=0; + if(VerticalRank==0) WorldRank = HorizontalRank; + int ierr=MPI_Allreduce(MPI_IN_PLACE,&WorldRank,1,MPI_INT,MPI_SUM,VerticalComm); + assert(ierr==0); + + //////////////////////////////////////////////////////////////////////////////// + // Where is the world in the universe? + //////////////////////////////////////////////////////////////////////////////// + UniverseRanks = std::vector >(HorizontalSize,std::vector(VerticalSize,0)); + UniverseRanks[WorldRank][VerticalRank] = UniverseRank; + for(int w=0;w0 ) size = sizeof(SlaveState); + + sprintf(shm_name,"/Grid_mpi3_shm_%d_%d",WorldRank,r); + + shm_unlink(shm_name); + + int fd=shm_open(shm_name,O_RDWR|O_CREAT,0600); + if ( fd < 0 ) { + perror("failed shm_open"); + assert(0); + } + + ftruncate(fd, size); + + VerticalShmBufs[r] = mmap(NULL,size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + + if ( VerticalShmBufs[r] == MAP_FAILED ) { + perror("failed mmap"); + assert(0); + } + + uint64_t * check = (uint64_t *) VerticalShmBufs[r]; + check[0] = WorldRank; + check[1] = r; + + // std::cout<<"SHM "<0 ) size = sizeof(SlaveState); + + sprintf(shm_name,"/Grid_mpi3_shm_%d_%d",WorldRank,r); + + int fd=shm_open(shm_name,O_RDWR|O_CREAT,0600); + if ( fd<0 ) { + perror("failed shm_open"); + assert(0); + } + VerticalShmBufs[r] = mmap(NULL,size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + + uint64_t * check = (uint64_t *) VerticalShmBufs[r]; + assert(check[0]== WorldRank); + assert(check[1]== r); + std::cerr<<"SHM "<"<"< cached_ranks(size); + + for(int r=0;r"<>0 )&0xFFFF)^((icomm>>16)&0xFFFF) + ^ ((icomm>>32)&0xFFFF)^((icomm>>48)&0xFFFF); + + // hashtag = (comm_hash<<15) | tag; + hashtag = tag; + +}; + +void Slave::Init(SlaveState * _state,MPI_Comm _squadron,int _universe_rank,int _vertical_rank) +{ + squadron=_squadron; + universe_rank=_universe_rank; + vertical_rank=_vertical_rank; + state =_state; + // std::cout << "state "<<_state<<" comm "<<_squadron<<" universe_rank"<head = state->tail = state->start = 0; + base = (uint64_t)MPIoffloadEngine::VerticalShmBufs[0]; + int rank; MPI_Comm_rank(_squadron,&rank); +} +#define PERI_PLUS(A) ( (A+1)%pool ) +int Slave::Event (void) { + + static int tail_last; + static int head_last; + static int start_last; + int ierr; + + //////////////////////////////////////////////////// + // Try to advance the start pointers + //////////////////////////////////////////////////// + int s=state->start; + if ( s != state->head ) { + switch ( state->Descrs[s].command ) { + case COMMAND_ISEND: + /* + std::cout<< " Send "<Descrs[s].buf<< "["<Descrs[s].bytes<<"]" + << " to " << state->Descrs[s].rank<< " tag" << state->Descrs[s].tag + << " Comm " << MPIoffloadEngine::communicator_universe<< " me " <Descrs[s].buf+base), + state->Descrs[s].bytes, + MPI_CHAR, + state->Descrs[s].rank, + state->Descrs[s].tag, + MPIoffloadEngine::communicator_universe, + (MPI_Request *)&state->Descrs[s].request); + assert(ierr==0); + state->start = PERI_PLUS(s); + return 1; + break; + + case COMMAND_IRECV: + /* + std::cout<< " Recv "<Descrs[s].buf<< "["<Descrs[s].bytes<<"]" + << " from " << state->Descrs[s].rank<< " tag" << state->Descrs[s].tag + << " Comm " << MPIoffloadEngine::communicator_universe<< " me "<< universe_rank<< std::endl; + */ + ierr=MPI_Irecv((void *)(state->Descrs[s].buf+base), + state->Descrs[s].bytes, + MPI_CHAR, + state->Descrs[s].rank, + state->Descrs[s].tag, + MPIoffloadEngine::communicator_universe, + (MPI_Request *)&state->Descrs[s].request); + + // std::cout<< " Request is "<Descrs[s].request<Descrs[0].request<start = PERI_PLUS(s); + return 1; + break; + + case COMMAND_WAITALL: + + for(int t=state->tail;t!=s; t=PERI_PLUS(t) ){ + MPI_Wait((MPI_Request *)&state->Descrs[t].request,MPI_STATUS_IGNORE); + }; + s=PERI_PLUS(s); + state->start = s; + state->tail = s; + + WakeUpCompute(); + + return 1; + break; + + default: + assert(0); + break; + } + } + return 0; +} + ////////////////////////////////////////////////////////////////////////////// + // External interaction with the queue + ////////////////////////////////////////////////////////////////////////////// + +uint64_t Slave::QueueCommand(int command,void *buf, int bytes, int tag, MPI_Comm comm,int commrank) +{ + ///////////////////////////////////////// + // Spin; if FIFO is full until not full + ///////////////////////////////////////// + int head =state->head; + int next = PERI_PLUS(head); + + // Set up descriptor + int worldrank; + int hashtag; + MPI_Comm communicator; + MPI_Request request; + + MPIoffloadEngine::MapCommRankToWorldRank(hashtag,worldrank,tag,comm,commrank); + + uint64_t relative= (uint64_t)buf - base; + state->Descrs[head].buf = relative; + state->Descrs[head].bytes = bytes; + state->Descrs[head].rank = MPIoffloadEngine::UniverseRanks[worldrank][vertical_rank]; + state->Descrs[head].tag = hashtag; + state->Descrs[head].command= command; + + /* + if ( command == COMMAND_ISEND ) { + std::cout << "QueueSend from "<< universe_rank <<" to commrank " << commrank + << " to worldrank " << worldrank <tail==next ); + + // Msync on weak order architectures + // Advance pointer + state->head = next; + + return 0; +} + + +/////////////////////////////////////////////////////////////////////////////////////////////////// +// Info that is setup once and indept of cartesian layout +/////////////////////////////////////////////////////////////////////////////////////////////////// + +MPI_Comm CartesianCommunicator::communicator_world; + +void CartesianCommunicator::Init(int *argc, char ***argv) +{ + int flag; + MPI_Initialized(&flag); // needed to coexist with other libs apparently + if ( !flag ) { + MPI_Init(argc,argv); + } + communicator_world = MPI_COMM_WORLD; + MPI_Comm ShmComm; + MPIoffloadEngine::CommunicatorInit (communicator_world,ShmComm,ShmCommBuf); +} +void CartesianCommunicator::ShiftedRanks(int dim,int shift,int &source,int &dest) +{ + int ierr=MPI_Cart_shift(communicator,dim,shift,&source,&dest); + assert(ierr==0); +} +int CartesianCommunicator::RankFromProcessorCoor(std::vector &coor) +{ + int rank; + int ierr=MPI_Cart_rank (communicator, &coor[0], &rank); + assert(ierr==0); + return rank; +} +void CartesianCommunicator::ProcessorCoorFromRank(int rank, std::vector &coor) +{ + coor.resize(_ndimension); + int ierr=MPI_Cart_coords (communicator, rank, _ndimension,&coor[0]); + assert(ierr==0); +} + +CartesianCommunicator::CartesianCommunicator(const std::vector &processors) +{ + _ndimension = processors.size(); + std::vector periodic(_ndimension,1); + + _Nprocessors=1; + _processors = processors; + + for(int i=0;i<_ndimension;i++){ + _Nprocessors*=_processors[i]; + } + + int Size; + MPI_Comm_size(communicator_world,&Size); + assert(Size==_Nprocessors); + + _processor_coor.resize(_ndimension); + MPI_Cart_create(communicator_world, _ndimension,&_processors[0],&periodic[0],1,&communicator); + MPI_Comm_rank (communicator,&_processor); + MPI_Cart_coords(communicator,_processor,_ndimension,&_processor_coor[0]); +}; + +void CartesianCommunicator::GlobalSum(uint32_t &u){ + int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_SUM,communicator); + assert(ierr==0); +} +void CartesianCommunicator::GlobalSum(uint64_t &u){ + int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_SUM,communicator); + assert(ierr==0); +} +void CartesianCommunicator::GlobalSum(float &f){ + int ierr=MPI_Allreduce(MPI_IN_PLACE,&f,1,MPI_FLOAT,MPI_SUM,communicator); + assert(ierr==0); +} +void CartesianCommunicator::GlobalSumVector(float *f,int N) +{ + int ierr=MPI_Allreduce(MPI_IN_PLACE,f,N,MPI_FLOAT,MPI_SUM,communicator); + assert(ierr==0); +} +void CartesianCommunicator::GlobalSum(double &d) +{ + int ierr = MPI_Allreduce(MPI_IN_PLACE,&d,1,MPI_DOUBLE,MPI_SUM,communicator); + assert(ierr==0); +} +void CartesianCommunicator::GlobalSumVector(double *d,int N) +{ + int ierr = MPI_Allreduce(MPI_IN_PLACE,d,N,MPI_DOUBLE,MPI_SUM,communicator); + assert(ierr==0); +} + +// Basic Halo comms primitive +void CartesianCommunicator::SendToRecvFrom(void *xmit, + int dest, + void *recv, + int from, + int bytes) +{ + std::vector reqs(0); + SendToRecvFromBegin(reqs,xmit,dest,recv,from,bytes); + SendToRecvFromComplete(reqs); +} + +void CartesianCommunicator::SendRecvPacket(void *xmit, + void *recv, + int sender, + int receiver, + int bytes) +{ + MPI_Status stat; + assert(sender != receiver); + int tag = sender; + if ( _processor == sender ) { + MPI_Send(xmit, bytes, MPI_CHAR,receiver,tag,communicator); + } + if ( _processor == receiver ) { + MPI_Recv(recv, bytes, MPI_CHAR,sender,tag,communicator,&stat); + } +} + +// Basic Halo comms primitive +void CartesianCommunicator::SendToRecvFromBegin(std::vector &list, + void *xmit, + int dest, + void *recv, + int from, + int bytes) +{ + MPI_Request xrq; + MPI_Request rrq; + int rank = _processor; + int ierr; + ierr =MPI_Isend(xmit, bytes, MPI_CHAR,dest,_processor,communicator,&xrq); + ierr|=MPI_Irecv(recv, bytes, MPI_CHAR,from,from,communicator,&rrq); + + assert(ierr==0); + + list.push_back(xrq); + list.push_back(rrq); +} + +void CartesianCommunicator::StencilSendToRecvFromBegin(std::vector &list, + void *xmit, + int dest, + void *recv, + int from, + int bytes) +{ + uint64_t xmit_i = (uint64_t) xmit; + uint64_t recv_i = (uint64_t) recv; + uint64_t shm = (uint64_t) ShmCommBuf; + // assert xmit and recv lie in shared memory region + assert( (xmit_i >= shm) && (xmit_i+bytes <= shm+MAX_MPI_SHM_BYTES) ); + assert( (recv_i >= shm) && (recv_i+bytes <= shm+MAX_MPI_SHM_BYTES) ); + assert(from!=_processor); + assert(dest!=_processor); + MPIoffloadEngine::QueueMultiplexedSend(xmit,bytes,_processor,communicator,dest); + MPIoffloadEngine::QueueMultiplexedRecv(recv,bytes,from,communicator,from); +} + + +void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector &list) +{ + MPIoffloadEngine::WaitAll(); +} + +void CartesianCommunicator::StencilBarrier(void) +{ +} + +void CartesianCommunicator::SendToRecvFromComplete(std::vector &list) +{ + int nreq=list.size(); + std::vector status(nreq); + int ierr = MPI_Waitall(nreq,&list[0],&status[0]); + assert(ierr==0); +} + +void CartesianCommunicator::Barrier(void) +{ + int ierr = MPI_Barrier(communicator); + assert(ierr==0); +} + +void CartesianCommunicator::Broadcast(int root,void* data, int bytes) +{ + int ierr=MPI_Bcast(data, + bytes, + MPI_BYTE, + root, + communicator); + assert(ierr==0); +} + +void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes) +{ + int ierr= MPI_Bcast(data, + bytes, + MPI_BYTE, + root, + communicator_world); + assert(ierr==0); +} + +void *CartesianCommunicator::ShmBufferSelf(void) { return ShmCommBuf; } + +void *CartesianCommunicator::ShmBuffer(int rank) { + return NULL; +} +void *CartesianCommunicator::ShmBufferTranslate(int rank,void * local_p) { + return NULL; +} + + +}; + diff --git a/lib/communicator/Communicator_none.cc b/lib/communicator/Communicator_none.cc index 8601255a..5e91b305 100644 --- a/lib/communicator/Communicator_none.cc +++ b/lib/communicator/Communicator_none.cc @@ -28,12 +28,15 @@ Author: Peter Boyle #include "Grid.h" namespace Grid { +/////////////////////////////////////////////////////////////////////////////////////////////////// +// Info that is setup once and indept of cartesian layout +/////////////////////////////////////////////////////////////////////////////////////////////////// + void CartesianCommunicator::Init(int *argc, char *** arv) { + ShmInitGeneric(); } -int Rank(void ){ return 0; }; - CartesianCommunicator::CartesianCommunicator(const std::vector &processors) { _processors = processors; @@ -89,30 +92,17 @@ void CartesianCommunicator::SendToRecvFromComplete(std::vector & assert(0); } -void CartesianCommunicator::Barrier(void) -{ -} - -void CartesianCommunicator::Broadcast(int root,void* data, int bytes) -{ -} -void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes) -{ -} - - +int CartesianCommunicator::RankWorld(void){return 0;} +void CartesianCommunicator::Barrier(void){} +void CartesianCommunicator::Broadcast(int root,void* data, int bytes) {} +void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes) { } +int CartesianCommunicator::RankFromProcessorCoor(std::vector &coor) { return 0;} +void CartesianCommunicator::ProcessorCoorFromRank(int rank, std::vector &coor){ coor = _processor_coor ;} void CartesianCommunicator::ShiftedRanks(int dim,int shift,int &source,int &dest) { source =0; dest=0; } -int CartesianCommunicator::RankFromProcessorCoor(std::vector &coor) -{ - return 0; -} -void CartesianCommunicator::ProcessorCoorFromRank(int rank, std::vector &coor) -{ -} } diff --git a/lib/communicator/Communicator_shmem.cc b/lib/communicator/Communicator_shmem.cc index 091e266e..56e03224 100644 --- a/lib/communicator/Communicator_shmem.cc +++ b/lib/communicator/Communicator_shmem.cc @@ -39,14 +39,24 @@ namespace Grid { BACKTRACEFILE(); \ }\ } -int Rank(void) { - return shmem_my_pe(); -} + + +/////////////////////////////////////////////////////////////////////////////////////////////////// +// Info that is setup once and indept of cartesian layout +/////////////////////////////////////////////////////////////////////////////////////////////////// + typedef struct HandShake_t { uint64_t seq_local; uint64_t seq_remote; } HandShake; +std::array make_psync_init(void) { + array ret; + ret.fill(SHMEM_SYNC_VALUE); + return ret; +} +static std::array psync_init = make_psync_init(); + static Vector< HandShake > XConnections; static Vector< HandShake > RConnections; @@ -61,7 +71,9 @@ void CartesianCommunicator::Init(int *argc, char ***argv) { RConnections[pe].seq_remote= 0; } shmem_barrier_all(); + ShmInitGeneric(); } + CartesianCommunicator::CartesianCommunicator(const std::vector &processors) { _ndimension = processors.size(); @@ -89,7 +101,7 @@ void CartesianCommunicator::GlobalSum(uint32_t &u){ static long long source ; static long long dest ; static long long llwrk[_SHMEM_REDUCE_MIN_WRKDATA_SIZE]; - static long psync[_SHMEM_REDUCE_SYNC_SIZE]; + static std::array psync = psync_init; // int nreduce=1; // int pestart=0; @@ -105,7 +117,7 @@ void CartesianCommunicator::GlobalSum(uint64_t &u){ static long long source ; static long long dest ; static long long llwrk[_SHMEM_REDUCE_MIN_WRKDATA_SIZE]; - static long psync[_SHMEM_REDUCE_SYNC_SIZE]; + static std::array psync = psync_init; // int nreduce=1; // int pestart=0; @@ -121,7 +133,7 @@ void CartesianCommunicator::GlobalSum(float &f){ static float source ; static float dest ; static float llwrk[_SHMEM_REDUCE_MIN_WRKDATA_SIZE]; - static long psync[_SHMEM_REDUCE_SYNC_SIZE]; + static std::array psync = psync_init; source = f; dest =0.0; @@ -133,7 +145,7 @@ void CartesianCommunicator::GlobalSumVector(float *f,int N) static float source ; static float dest = 0 ; static float llwrk[_SHMEM_REDUCE_MIN_WRKDATA_SIZE]; - static long psync[_SHMEM_REDUCE_SYNC_SIZE]; + static std::array psync = psync_init; if ( shmem_addr_accessible(f,_processor) ){ shmem_float_sum_to_all(f,f,N,0,0,_Nprocessors,llwrk,psync); @@ -152,7 +164,7 @@ void CartesianCommunicator::GlobalSum(double &d) static double source; static double dest ; static double llwrk[_SHMEM_REDUCE_MIN_WRKDATA_SIZE]; - static long psync[_SHMEM_REDUCE_SYNC_SIZE]; + static std::array psync = psync_init; source = d; dest = 0; @@ -164,7 +176,8 @@ void CartesianCommunicator::GlobalSumVector(double *d,int N) static double source ; static double dest ; static double llwrk[_SHMEM_REDUCE_MIN_WRKDATA_SIZE]; - static long psync[_SHMEM_REDUCE_SYNC_SIZE]; + static std::array psync = psync_init; + if ( shmem_addr_accessible(d,_processor) ){ shmem_double_sum_to_all(d,d,N,0,0,_Nprocessors,llwrk,psync); @@ -230,12 +243,9 @@ void CartesianCommunicator::SendRecvPacket(void *xmit, if ( _processor == sender ) { - printf("Sender SHMEM pt2pt %d -> %d\n",sender,receiver); // Check he has posted a receive while(SendSeq->seq_remote == SendSeq->seq_local); - printf("Sender receive %d posted\n",sender,receiver); - // Advance our send count seq = ++(SendSeq->seq_local); @@ -244,26 +254,19 @@ void CartesianCommunicator::SendRecvPacket(void *xmit, shmem_putmem(recv,xmit,bytes,receiver); shmem_fence(); - printf("Sender sent payload %d\n",seq); //Notify him we're done shmem_putmem((void *)&(RecvSeq->seq_remote),&seq,sizeof(seq),receiver); shmem_fence(); - printf("Sender ringing door bell %d\n",seq); } if ( _processor == receiver ) { - printf("Receiver SHMEM pt2pt %d->%d\n",sender,receiver); // Post a receive seq = ++(RecvSeq->seq_local); shmem_putmem((void *)&(SendSeq->seq_remote),&seq,sizeof(seq),sender); - printf("Receiver Opening letter box %d\n",seq); - - // Now wait until he has advanced our reception counter while(RecvSeq->seq_remote != RecvSeq->seq_local); - printf("Receiver Got the mail %d\n",seq); } } @@ -291,7 +294,7 @@ void CartesianCommunicator::Barrier(void) } void CartesianCommunicator::Broadcast(int root,void* data, int bytes) { - static long psync[_SHMEM_REDUCE_SYNC_SIZE]; + static std::array psync = psync_init; static uint32_t word; uint32_t *array = (uint32_t *) data; assert( (bytes % 4)==0); @@ -314,7 +317,7 @@ void CartesianCommunicator::Broadcast(int root,void* data, int bytes) } void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes) { - static long psync[_SHMEM_REDUCE_SYNC_SIZE]; + static std::array psync = psync_init; static uint32_t word; uint32_t *array = (uint32_t *) data; assert( (bytes % 4)==0); diff --git a/lib/cshift/Cshift_common.h b/lib/cshift/Cshift_common.h index b8e1284a..2b146daa 100644 --- a/lib/cshift/Cshift_common.h +++ b/lib/cshift/Cshift_common.h @@ -1,3 +1,4 @@ + /************************************************************************************* Grid physics library, www.github.com/paboyle/Grid @@ -44,7 +45,7 @@ public: // Gather for when there is no need to SIMD split with compression /////////////////////////////////////////////////////////////////// template void -Gather_plane_simple (const Lattice &rhs,std::vector > &buffer,int dimension,int plane,int cbmask,compressor &compress, int off=0) +Gather_plane_simple (const Lattice &rhs,commVector &buffer,int dimension,int plane,int cbmask,compressor &compress, int off=0) { int rd = rhs._grid->_rdimensions[dimension]; @@ -56,6 +57,7 @@ Gather_plane_simple (const Lattice &rhs,std::vector_slice_nblock[dimension]; int e2=rhs._grid->_slice_block[dimension]; + int stride=rhs._grid->_slice_stride[dimension]; if ( cbmask == 0x3 ) { PARALLEL_NESTED_LOOP2 @@ -68,15 +70,20 @@ PARALLEL_NESTED_LOOP2 } } else { int bo=0; + std::vector > table; for(int n=0;nCheckerBoardFromOindex(o+b);// Could easily be a table lookup + int ocb=1<CheckerBoardFromOindexTable(o+b); if ( ocb &cbmask ) { - buffer[off+bo++]=compress(rhs._odata[so+o+b]); + table.push_back(std::pair (bo++,o+b)); } } } +PARALLEL_FOR_LOOP + for(int i=0;i(temp,pointers,offset); } @@ -114,6 +122,7 @@ PARALLEL_NESTED_LOOP2 } else { assert(0); //Fixme think this is buggy + for(int n=0;n_slice_stride[dimension]; @@ -132,7 +141,7 @@ PARALLEL_NESTED_LOOP2 ////////////////////////////////////////////////////// // Gather for when there is no need to SIMD split ////////////////////////////////////////////////////// -template void Gather_plane_simple (const Lattice &rhs,std::vector > &buffer, int dimension,int plane,int cbmask) +template void Gather_plane_simple (const Lattice &rhs,commVector &buffer, int dimension,int plane,int cbmask) { SimpleCompressor dontcompress; Gather_plane_simple (rhs,buffer,dimension,plane,cbmask,dontcompress); @@ -150,7 +159,7 @@ template void Gather_plane_extract(const Lattice &rhs,std::vec ////////////////////////////////////////////////////// // Scatter for when there is no need to SIMD split ////////////////////////////////////////////////////// -template void Scatter_plane_simple (Lattice &rhs,std::vector > &buffer, int dimension,int plane,int cbmask) +template void Scatter_plane_simple (Lattice &rhs,commVector &buffer, int dimension,int plane,int cbmask) { int rd = rhs._grid->_rdimensions[dimension]; diff --git a/lib/cshift/Cshift_mpi.h b/lib/cshift/Cshift_mpi.h index 704fda34..b3c07cd6 100644 --- a/lib/cshift/Cshift_mpi.h +++ b/lib/cshift/Cshift_mpi.h @@ -119,8 +119,8 @@ template void Cshift_comms(Lattice &ret,const Lattice &r assert(shift_slice_nblock[dimension]*rhs._grid->_slice_block[dimension]; - std::vector > send_buf(buffer_size); - std::vector > recv_buf(buffer_size); + commVector send_buf(buffer_size); + commVector recv_buf(buffer_size); int cb= (cbmask==0x2)? Odd : Even; int sshift= rhs._grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,cb); @@ -191,8 +191,8 @@ template void Cshift_comms_simd(Lattice &ret,const Lattice_slice_nblock[dimension]*grid->_slice_block[dimension]; int words = sizeof(vobj)/sizeof(vector_type); - std::vector > send_buf_extract(Nsimd,Vector(buffer_size) ); - std::vector > recv_buf_extract(Nsimd,Vector(buffer_size) ); + std::vector > send_buf_extract(Nsimd,commVector(buffer_size) ); + std::vector > recv_buf_extract(Nsimd,commVector(buffer_size) ); int bytes = buffer_size*sizeof(scalar_object); diff --git a/lib/lattice/Lattice_ET.h b/lib/lattice/Lattice_ET.h index 7644f9da..1bb83901 100644 --- a/lib/lattice/Lattice_ET.h +++ b/lib/lattice/Lattice_ET.h @@ -1,73 +1,74 @@ - /************************************************************************************* +/************************************************************************************* - Grid physics library, www.github.com/paboyle/Grid +Grid physics library, www.github.com/paboyle/Grid - Source file: ./lib/lattice/Lattice_ET.h +Source file: ./lib/lattice/Lattice_ET.h - Copyright (C) 2015 +Copyright (C) 2015 Author: Azusa Yamaguchi Author: Peter Boyle Author: neo - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ #ifndef GRID_LATTICE_ET_H #define GRID_LATTICE_ET_H #include -#include #include #include +#include namespace Grid { - //////////////////////////////////////////////////// - // Predicated where support - //////////////////////////////////////////////////// - template - inline vobj predicatedWhere(const iobj &predicate,const vobj &iftrue,const robj &iffalse) { +//////////////////////////////////////////////////// +// Predicated where support +//////////////////////////////////////////////////// +template +inline vobj predicatedWhere(const iobj &predicate, const vobj &iftrue, + const robj &iffalse) { + typename std::remove_const::type ret; - typename std::remove_const::type ret; + typedef typename vobj::scalar_object scalar_object; + typedef typename vobj::scalar_type scalar_type; + typedef typename vobj::vector_type vector_type; - typedef typename vobj::scalar_object scalar_object; - typedef typename vobj::scalar_type scalar_type; - typedef typename vobj::vector_type vector_type; + const int Nsimd = vobj::vector_type::Nsimd(); + const int words = sizeof(vobj) / sizeof(vector_type); - const int Nsimd = vobj::vector_type::Nsimd(); - const int words = sizeof(vobj)/sizeof(vector_type); + std::vector mask(Nsimd); + std::vector truevals(Nsimd); + std::vector falsevals(Nsimd); - std::vector mask(Nsimd); - std::vector truevals (Nsimd); - std::vector falsevals(Nsimd); + extract(iftrue, truevals); + extract(iffalse, falsevals); + extract(TensorRemove(predicate), mask); - extract(iftrue ,truevals); - extract(iffalse ,falsevals); - extract(TensorRemove(predicate),mask); - - for(int s=0;s +using is_lattice = std::is_base_of; -template using is_lattice = std::is_base_of; +template +using is_lattice_expr = std::is_base_of; template using is_lattice_expr = std::is_base_of; +//Specialization of getVectorType for lattices +template +struct getVectorType >{ + typedef typename Lattice::vector_object type; +}; + template inline sobj eval(const unsigned int ss, const sobj &arg) { return arg; } -template -inline const lobj &eval(const unsigned int ss, const Lattice &arg) -{ - return arg._odata[ss]; +template +inline const lobj &eval(const unsigned int ss, const Lattice &arg) { + return arg._odata[ss]; } // handle nodes in syntax tree template -auto inline eval(const unsigned int ss, const LatticeUnaryExpression &expr) // eval one operand - -> decltype(expr.first.func(eval(ss,std::get<0>(expr.second)))) -{ - return expr.first.func(eval(ss,std::get<0>(expr.second))); +auto inline eval( + const unsigned int ss, + const LatticeUnaryExpression &expr) // eval one operand + -> decltype(expr.first.func(eval(ss, std::get<0>(expr.second)))) { + return expr.first.func(eval(ss, std::get<0>(expr.second))); } template -auto inline eval(const unsigned int ss, const LatticeBinaryExpression &expr) // eval two operands - -> decltype(expr.first.func(eval(ss,std::get<0>(expr.second)),eval(ss,std::get<1>(expr.second)))) -{ - return expr.first.func(eval(ss,std::get<0>(expr.second)),eval(ss,std::get<1>(expr.second))); +auto inline eval( + const unsigned int ss, + const LatticeBinaryExpression &expr) // eval two operands + -> decltype(expr.first.func(eval(ss, std::get<0>(expr.second)), + eval(ss, std::get<1>(expr.second)))) { + return expr.first.func(eval(ss, std::get<0>(expr.second)), + eval(ss, std::get<1>(expr.second))); } template -auto inline eval(const unsigned int ss, const LatticeTrinaryExpression &expr) // eval three operands - -> decltype(expr.first.func(eval(ss,std::get<0>(expr.second)),eval(ss,std::get<1>(expr.second)),eval(ss,std::get<2>(expr.second)))) -{ - return expr.first.func(eval(ss,std::get<0>(expr.second)),eval(ss,std::get<1>(expr.second)),eval(ss,std::get<2>(expr.second)) ); +auto inline eval(const unsigned int ss, + const LatticeTrinaryExpression + &expr) // eval three operands + -> decltype(expr.first.func(eval(ss, std::get<0>(expr.second)), + eval(ss, std::get<1>(expr.second)), + eval(ss, std::get<2>(expr.second)))) { + return expr.first.func(eval(ss, std::get<0>(expr.second)), + eval(ss, std::get<1>(expr.second)), + eval(ss, std::get<2>(expr.second))); } ////////////////////////////////////////////////////////////////////////// -// Obtain the grid from an expression, ensuring conformable. This must follow a tree recursion +// Obtain the grid from an expression, ensuring conformable. This must follow a +// tree recursion ////////////////////////////////////////////////////////////////////////// -template::value, T1>::type * =nullptr > -inline void GridFromExpression(GridBase * &grid,const T1& lat) // Lattice leaf -{ - if ( grid ) { - conformable(grid,lat._grid); - } - grid=lat._grid; -} -template::value, T1>::type * = nullptr > -inline void GridFromExpression(GridBase * &grid,const T1& notlat) // non-lattice leaf +template ::value, T1>::type * = nullptr> +inline void GridFromExpression(GridBase *&grid, const T1 &lat) // Lattice leaf { + if (grid) { + conformable(grid, lat._grid); + } + grid = lat._grid; } +template ::value, T1>::type * = nullptr> +inline void GridFromExpression(GridBase *&grid, + const T1 ¬lat) // non-lattice leaf +{} template -inline void GridFromExpression(GridBase * &grid,const LatticeUnaryExpression &expr) -{ - GridFromExpression(grid,std::get<0>(expr.second));// recurse +inline void GridFromExpression(GridBase *&grid, + const LatticeUnaryExpression &expr) { + GridFromExpression(grid, std::get<0>(expr.second)); // recurse } template -inline void GridFromExpression(GridBase * &grid,const LatticeBinaryExpression &expr) -{ - GridFromExpression(grid,std::get<0>(expr.second));// recurse - GridFromExpression(grid,std::get<1>(expr.second)); +inline void GridFromExpression( + GridBase *&grid, const LatticeBinaryExpression &expr) { + GridFromExpression(grid, std::get<0>(expr.second)); // recurse + GridFromExpression(grid, std::get<1>(expr.second)); } template -inline void GridFromExpression( GridBase * &grid,const LatticeTrinaryExpression &expr) -{ - GridFromExpression(grid,std::get<0>(expr.second));// recurse - GridFromExpression(grid,std::get<1>(expr.second)); - GridFromExpression(grid,std::get<2>(expr.second)); +inline void GridFromExpression( + GridBase *&grid, const LatticeTrinaryExpression &expr) { + GridFromExpression(grid, std::get<0>(expr.second)); // recurse + GridFromExpression(grid, std::get<1>(expr.second)); + GridFromExpression(grid, std::get<2>(expr.second)); } - ////////////////////////////////////////////////////////////////////////// -// Obtain the CB from an expression, ensuring conformable. This must follow a tree recursion +// Obtain the CB from an expression, ensuring conformable. This must follow a +// tree recursion ////////////////////////////////////////////////////////////////////////// -template::value, T1>::type * =nullptr > -inline void CBFromExpression(int &cb,const T1& lat) // Lattice leaf +template ::value, T1>::type * = nullptr> +inline void CBFromExpression(int &cb, const T1 &lat) // Lattice leaf { - if ( (cb==Odd) || (cb==Even) ) { - assert(cb==lat.checkerboard); - } - cb=lat.checkerboard; + if ((cb == Odd) || (cb == Even)) { + assert(cb == lat.checkerboard); + } + cb = lat.checkerboard; // std::cout<::value, T1>::type * = nullptr > -inline void CBFromExpression(int &cb,const T1& notlat) // non-lattice leaf +template ::value, T1>::type * = nullptr> +inline void CBFromExpression(int &cb, const T1 ¬lat) // non-lattice leaf { // std::cout< -inline void CBFromExpression(int &cb,const LatticeUnaryExpression &expr) -{ - CBFromExpression(cb,std::get<0>(expr.second));// recurse +inline void CBFromExpression(int &cb, + const LatticeUnaryExpression &expr) { + CBFromExpression(cb, std::get<0>(expr.second)); // recurse // std::cout< -inline void CBFromExpression(int &cb,const LatticeBinaryExpression &expr) -{ - CBFromExpression(cb,std::get<0>(expr.second));// recurse - CBFromExpression(cb,std::get<1>(expr.second)); +inline void CBFromExpression(int &cb, + const LatticeBinaryExpression &expr) { + CBFromExpression(cb, std::get<0>(expr.second)); // recurse + CBFromExpression(cb, std::get<1>(expr.second)); // std::cout< -inline void CBFromExpression( int &cb,const LatticeTrinaryExpression &expr) -{ - CBFromExpression(cb,std::get<0>(expr.second));// recurse - CBFromExpression(cb,std::get<1>(expr.second)); - CBFromExpression(cb,std::get<2>(expr.second)); +inline void CBFromExpression( + int &cb, const LatticeTrinaryExpression &expr) { + CBFromExpression(cb, std::get<0>(expr.second)); // recurse + CBFromExpression(cb, std::get<1>(expr.second)); + CBFromExpression(cb, std::get<2>(expr.second)); // std::cout< struct name\ -{\ - static auto inline func(const arg a)-> decltype(ret) { return ret; } \ -}; +#define GridUnopClass(name, ret) \ + template \ + struct name { \ + static auto inline func(const arg a) -> decltype(ret) { return ret; } \ + }; -GridUnopClass(UnarySub,-a); -GridUnopClass(UnaryNot,Not(a)); -GridUnopClass(UnaryAdj,adj(a)); -GridUnopClass(UnaryConj,conjugate(a)); -GridUnopClass(UnaryTrace,trace(a)); -GridUnopClass(UnaryTranspose,transpose(a)); -GridUnopClass(UnaryTa,Ta(a)); -GridUnopClass(UnaryProjectOnGroup,ProjectOnGroup(a)); -GridUnopClass(UnaryReal,real(a)); -GridUnopClass(UnaryImag,imag(a)); -GridUnopClass(UnaryToReal,toReal(a)); -GridUnopClass(UnaryToComplex,toComplex(a)); -GridUnopClass(UnaryAbs,abs(a)); -GridUnopClass(UnarySqrt,sqrt(a)); -GridUnopClass(UnaryRsqrt,rsqrt(a)); -GridUnopClass(UnarySin,sin(a)); -GridUnopClass(UnaryCos,cos(a)); -GridUnopClass(UnaryLog,log(a)); -GridUnopClass(UnaryExp,exp(a)); +GridUnopClass(UnarySub, -a); +GridUnopClass(UnaryNot, Not(a)); +GridUnopClass(UnaryAdj, adj(a)); +GridUnopClass(UnaryConj, conjugate(a)); +GridUnopClass(UnaryTrace, trace(a)); +GridUnopClass(UnaryTranspose, transpose(a)); +GridUnopClass(UnaryTa, Ta(a)); +GridUnopClass(UnaryProjectOnGroup, ProjectOnGroup(a)); +GridUnopClass(UnaryReal, real(a)); +GridUnopClass(UnaryImag, imag(a)); +GridUnopClass(UnaryToReal, toReal(a)); +GridUnopClass(UnaryToComplex, toComplex(a)); +GridUnopClass(UnaryTimesI, timesI(a)); +GridUnopClass(UnaryTimesMinusI, timesMinusI(a)); +GridUnopClass(UnaryAbs, abs(a)); +GridUnopClass(UnarySqrt, sqrt(a)); +GridUnopClass(UnaryRsqrt, rsqrt(a)); +GridUnopClass(UnarySin, sin(a)); +GridUnopClass(UnaryCos, cos(a)); +GridUnopClass(UnaryAsin, asin(a)); +GridUnopClass(UnaryAcos, acos(a)); +GridUnopClass(UnaryLog, log(a)); +GridUnopClass(UnaryExp, exp(a)); //////////////////////////////////////////// // Binary operators //////////////////////////////////////////// -#define GridBinOpClass(name,combination)\ -template \ -struct name\ -{\ - static auto inline func(const left &lhs,const right &rhs)-> decltype(combination) const \ - {\ - return combination;\ - }\ -} -GridBinOpClass(BinaryAdd,lhs+rhs); -GridBinOpClass(BinarySub,lhs-rhs); -GridBinOpClass(BinaryMul,lhs*rhs); +#define GridBinOpClass(name, combination) \ + template \ + struct name { \ + static auto inline func(const left &lhs, const right &rhs) \ + -> decltype(combination) const { \ + return combination; \ + } \ + } +GridBinOpClass(BinaryAdd, lhs + rhs); +GridBinOpClass(BinarySub, lhs - rhs); +GridBinOpClass(BinaryMul, lhs *rhs); +GridBinOpClass(BinaryDiv, lhs /rhs); -GridBinOpClass(BinaryAnd ,lhs&rhs); -GridBinOpClass(BinaryOr ,lhs|rhs); -GridBinOpClass(BinaryAndAnd,lhs&&rhs); -GridBinOpClass(BinaryOrOr ,lhs||rhs); +GridBinOpClass(BinaryAnd, lhs &rhs); +GridBinOpClass(BinaryOr, lhs | rhs); +GridBinOpClass(BinaryAndAnd, lhs &&rhs); +GridBinOpClass(BinaryOrOr, lhs || rhs); //////////////////////////////////////////////////// // Trinary conditional op //////////////////////////////////////////////////// -#define GridTrinOpClass(name,combination)\ -template \ -struct name\ -{\ - static auto inline func(const predicate &pred,const left &lhs,const right &rhs)-> decltype(combination) const \ - {\ - return combination;\ - }\ -} +#define GridTrinOpClass(name, combination) \ + template \ + struct name { \ + static auto inline func(const predicate &pred, const left &lhs, \ + const right &rhs) -> decltype(combination) const { \ + return combination; \ + } \ + } -GridTrinOpClass(TrinaryWhere,(predicatedWhere::type, \ - typename std::remove_reference::type> (pred,lhs,rhs))); +GridTrinOpClass( + TrinaryWhere, + (predicatedWhere::type, + typename std::remove_reference::type>(pred, lhs, + rhs))); //////////////////////////////////////////// // Operator syntactical glue //////////////////////////////////////////// - -#define GRID_UNOP(name) name -#define GRID_BINOP(name) name -#define GRID_TRINOP(name) name -#define GRID_DEF_UNOP(op, name)\ -template ::value||is_lattice_expr::value, T1>::type* = nullptr> inline auto op(const T1 &arg) \ - -> decltype(LatticeUnaryExpression(std::make_pair(GRID_UNOP(name)(),std::forward_as_tuple(arg)))) \ -{ return LatticeUnaryExpression(std::make_pair(GRID_UNOP(name)(),std::forward_as_tuple(arg))); } +#define GRID_UNOP(name) name +#define GRID_BINOP(name) name +#define GRID_TRINOP(name) \ + name -#define GRID_BINOP_LEFT(op, name)\ -template ::value||is_lattice_expr::value, T1>::type* = nullptr>\ -inline auto op(const T1 &lhs,const T2&rhs) \ - -> decltype(LatticeBinaryExpression(std::make_pair(GRID_BINOP(name)(),\ - std::forward_as_tuple(lhs, rhs)))) \ -{\ - return LatticeBinaryExpression(std::make_pair(GRID_BINOP(name)(),\ - std::forward_as_tuple(lhs, rhs))); \ -} +#define GRID_DEF_UNOP(op, name) \ + template ::value || \ + is_lattice_expr::value, \ + T1>::type * = nullptr> \ + inline auto op(const T1 &arg) \ + ->decltype(LatticeUnaryExpression( \ + std::make_pair(GRID_UNOP(name)(), std::forward_as_tuple(arg)))) { \ + return LatticeUnaryExpression( \ + std::make_pair(GRID_UNOP(name)(), std::forward_as_tuple(arg))); \ + } -#define GRID_BINOP_RIGHT(op, name)\ - template ::value && !is_lattice_expr::value, T1>::type* = nullptr,\ - typename std::enable_if< is_lattice::value || is_lattice_expr::value, T2>::type* = nullptr> \ -inline auto op(const T1 &lhs,const T2&rhs) \ - -> decltype(LatticeBinaryExpression(std::make_pair(GRID_BINOP(name)(),\ - std::forward_as_tuple(lhs, rhs)))) \ -{\ - return LatticeBinaryExpression(std::make_pair(GRID_BINOP(name)(),\ - std::forward_as_tuple(lhs, rhs))); \ -} +#define GRID_BINOP_LEFT(op, name) \ + template ::value || \ + is_lattice_expr::value, \ + T1>::type * = nullptr> \ + inline auto op(const T1 &lhs, const T2 &rhs) \ + ->decltype( \ + LatticeBinaryExpression( \ + std::make_pair(GRID_BINOP(name)(), \ + std::forward_as_tuple(lhs, rhs)))) { \ + return LatticeBinaryExpression( \ + std::make_pair(GRID_BINOP(name)(), std::forward_as_tuple(lhs, rhs))); \ + } -#define GRID_DEF_BINOP(op, name)\ - GRID_BINOP_LEFT(op,name);\ - GRID_BINOP_RIGHT(op,name); +#define GRID_BINOP_RIGHT(op, name) \ + template ::value && \ + !is_lattice_expr::value, \ + T1>::type * = nullptr, \ + typename std::enable_if::value || \ + is_lattice_expr::value, \ + T2>::type * = nullptr> \ + inline auto op(const T1 &lhs, const T2 &rhs) \ + ->decltype( \ + LatticeBinaryExpression( \ + std::make_pair(GRID_BINOP(name)(), \ + std::forward_as_tuple(lhs, rhs)))) { \ + return LatticeBinaryExpression( \ + std::make_pair(GRID_BINOP(name)(), std::forward_as_tuple(lhs, rhs))); \ + } +#define GRID_DEF_BINOP(op, name) \ + GRID_BINOP_LEFT(op, name); \ + GRID_BINOP_RIGHT(op, name); -#define GRID_DEF_TRINOP(op, name)\ -template inline auto op(const T1 &pred,const T2&lhs,const T3 &rhs) \ - -> decltype(LatticeTrinaryExpression(std::make_pair(GRID_TRINOP(name)(),\ - std::forward_as_tuple(pred,lhs,rhs)))) \ -{\ - return LatticeTrinaryExpression(std::make_pair(GRID_TRINOP(name)(), \ - std::forward_as_tuple(pred,lhs, rhs))); \ -} +#define GRID_DEF_TRINOP(op, name) \ + template \ + inline auto op(const T1 &pred, const T2 &lhs, const T3 &rhs) \ + ->decltype( \ + LatticeTrinaryExpression(std::make_pair( \ + GRID_TRINOP(name)(), std::forward_as_tuple(pred, lhs, rhs)))) { \ + return LatticeTrinaryExpression(std::make_pair( \ + GRID_TRINOP(name)(), std::forward_as_tuple(pred, lhs, rhs))); \ + } //////////////////////// -//Operator definitions +// Operator definitions //////////////////////// -GRID_DEF_UNOP(operator -,UnarySub); -GRID_DEF_UNOP(Not,UnaryNot); -GRID_DEF_UNOP(operator !,UnaryNot); -GRID_DEF_UNOP(adj,UnaryAdj); -GRID_DEF_UNOP(conjugate,UnaryConj); -GRID_DEF_UNOP(trace,UnaryTrace); -GRID_DEF_UNOP(transpose,UnaryTranspose); -GRID_DEF_UNOP(Ta,UnaryTa); -GRID_DEF_UNOP(ProjectOnGroup,UnaryProjectOnGroup); -GRID_DEF_UNOP(real,UnaryReal); -GRID_DEF_UNOP(imag,UnaryImag); -GRID_DEF_UNOP(toReal,UnaryToReal); -GRID_DEF_UNOP(toComplex,UnaryToComplex); -GRID_DEF_UNOP(abs ,UnaryAbs); //abs overloaded in cmath C++98; DON'T do the abs-fabs-dabs-labs thing -GRID_DEF_UNOP(sqrt ,UnarySqrt); -GRID_DEF_UNOP(rsqrt,UnaryRsqrt); -GRID_DEF_UNOP(sin ,UnarySin); -GRID_DEF_UNOP(cos ,UnaryCos); -GRID_DEF_UNOP(log ,UnaryLog); -GRID_DEF_UNOP(exp ,UnaryExp); +GRID_DEF_UNOP(operator-, UnarySub); +GRID_DEF_UNOP(Not, UnaryNot); +GRID_DEF_UNOP(operator!, UnaryNot); +GRID_DEF_UNOP(adj, UnaryAdj); +GRID_DEF_UNOP(conjugate, UnaryConj); +GRID_DEF_UNOP(trace, UnaryTrace); +GRID_DEF_UNOP(transpose, UnaryTranspose); +GRID_DEF_UNOP(Ta, UnaryTa); +GRID_DEF_UNOP(ProjectOnGroup, UnaryProjectOnGroup); +GRID_DEF_UNOP(real, UnaryReal); +GRID_DEF_UNOP(imag, UnaryImag); +GRID_DEF_UNOP(toReal, UnaryToReal); +GRID_DEF_UNOP(toComplex, UnaryToComplex); +GRID_DEF_UNOP(timesI, UnaryTimesI); +GRID_DEF_UNOP(timesMinusI, UnaryTimesMinusI); +GRID_DEF_UNOP(abs, UnaryAbs); // abs overloaded in cmath C++98; DON'T do the + // abs-fabs-dabs-labs thing +GRID_DEF_UNOP(sqrt, UnarySqrt); +GRID_DEF_UNOP(rsqrt, UnaryRsqrt); +GRID_DEF_UNOP(sin, UnarySin); +GRID_DEF_UNOP(cos, UnaryCos); +GRID_DEF_UNOP(asin, UnaryAsin); +GRID_DEF_UNOP(acos, UnaryAcos); +GRID_DEF_UNOP(log, UnaryLog); +GRID_DEF_UNOP(exp, UnaryExp); -GRID_DEF_BINOP(operator+,BinaryAdd); -GRID_DEF_BINOP(operator-,BinarySub); -GRID_DEF_BINOP(operator*,BinaryMul); +GRID_DEF_BINOP(operator+, BinaryAdd); +GRID_DEF_BINOP(operator-, BinarySub); +GRID_DEF_BINOP(operator*, BinaryMul); +GRID_DEF_BINOP(operator/, BinaryDiv); -GRID_DEF_BINOP(operator&,BinaryAnd); -GRID_DEF_BINOP(operator|,BinaryOr); -GRID_DEF_BINOP(operator&&,BinaryAndAnd); -GRID_DEF_BINOP(operator||,BinaryOrOr); +GRID_DEF_BINOP(operator&, BinaryAnd); +GRID_DEF_BINOP(operator|, BinaryOr); +GRID_DEF_BINOP(operator&&, BinaryAndAnd); +GRID_DEF_BINOP(operator||, BinaryOrOr); -GRID_DEF_TRINOP(where,TrinaryWhere); +GRID_DEF_TRINOP(where, TrinaryWhere); ///////////////////////////////////////////////////////////// // Closure convenience to force expression to evaluate ///////////////////////////////////////////////////////////// -template - auto closure(const LatticeUnaryExpression & expr) - -> Lattice(expr.second))))> -{ - Lattice(expr.second))))> ret(expr); +template +auto closure(const LatticeUnaryExpression &expr) + -> Lattice(expr.second))))> { + Lattice(expr.second))))> ret( + expr); return ret; } -template - auto closure(const LatticeBinaryExpression & expr) - -> Lattice(expr.second)), - eval(0,std::get<1>(expr.second))))> -{ - Lattice(expr.second)), - eval(0,std::get<1>(expr.second))))> ret(expr); +template +auto closure(const LatticeBinaryExpression &expr) + -> Lattice(expr.second)), + eval(0, std::get<1>(expr.second))))> { + Lattice(expr.second)), + eval(0, std::get<1>(expr.second))))> + ret(expr); return ret; } -template - auto closure(const LatticeTrinaryExpression & expr) - -> Lattice(expr.second)), - eval(0,std::get<1>(expr.second)), - eval(0,std::get<2>(expr.second))))> -{ - Lattice(expr.second)), - eval(0,std::get<1>(expr.second)), - eval(0,std::get<2>(expr.second))))> ret(expr); +template +auto closure(const LatticeTrinaryExpression &expr) + -> Lattice(expr.second)), + eval(0, std::get<1>(expr.second)), + eval(0, std::get<2>(expr.second))))> { + Lattice(expr.second)), + eval(0, std::get<1>(expr.second)), + eval(0, std::get<2>(expr.second))))> + ret(expr); return ret; } @@ -382,12 +433,11 @@ template #undef GRID_DEF_UNOP #undef GRID_DEF_BINOP #undef GRID_DEF_TRINOP - } #if 0 using namespace Grid; - + int main(int argc,char **argv){ Lattice v1(16); @@ -397,7 +447,7 @@ using namespace Grid; BinaryAdd tmp; LatticeBinaryExpression,Lattice &,Lattice &> expr(std::make_pair(tmp, - std::forward_as_tuple(v1,v2))); + std::forward_as_tuple(v1,v2))); tmp.func(eval(0,v1),eval(0,v2)); auto var = v1+v2; diff --git a/lib/lattice/Lattice_base.h b/lib/lattice/Lattice_base.h index d97b1204..e4dc1ca8 100644 --- a/lib/lattice/Lattice_base.h +++ b/lib/lattice/Lattice_base.h @@ -1,32 +1,33 @@ - /************************************************************************************* +/************************************************************************************* - Grid physics library, www.github.com/paboyle/Grid +Grid physics library, www.github.com/paboyle/Grid - Source file: ./lib/lattice/Lattice_base.h +Source file: ./lib/lattice/Lattice_base.h - Copyright (C) 2015 +Copyright (C) 2015 Author: Azusa Yamaguchi Author: Peter Boyle Author: paboyle - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ #ifndef GRID_LATTICE_BASE_H #define GRID_LATTICE_BASE_H @@ -64,9 +65,6 @@ public: class LatticeExpressionBase {}; -template using Vector = std::vector >; // Aligned allocator?? -template using Matrix = std::vector > >; // Aligned allocator?? - template class LatticeUnaryExpression : public std::pair > , public LatticeExpressionBase { public: @@ -101,6 +99,7 @@ public: int begin(void) { return 0;}; int end(void) { return _odata.size(); } vobj & operator[](int i) { return _odata[i]; }; + const vobj & operator[](int i) const { return _odata[i]; }; public: typedef typename vobj::scalar_type scalar_type; @@ -255,6 +254,18 @@ PARALLEL_FOR_LOOP checkerboard=0; } + Lattice(const Lattice& r){ // copy constructor + _grid = r._grid; + checkerboard = r.checkerboard; + _odata.resize(_grid->oSites());// essential + PARALLEL_FOR_LOOP + for(int ss=0;ss<_grid->oSites();ss++){ + _odata[ss]=r._odata[ss]; + } + } + + + virtual ~Lattice(void) = default; template strong_inline Lattice & operator = (const sobj & r){ @@ -267,7 +278,7 @@ PARALLEL_FOR_LOOP template strong_inline Lattice & operator = (const Lattice & r){ this->checkerboard = r.checkerboard; conformable(*this,r); - std::cout< +#include "Lattice_ET.h" #else -#include +#include "Lattice_overload.h" #endif -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include "Lattice_arith.h" +#include "Lattice_trace.h" +#include "Lattice_transpose.h" +#include "Lattice_local.h" +#include "Lattice_reduction.h" +#include "Lattice_peekpoke.h" +#include "Lattice_reality.h" +#include "Lattice_comparison_utils.h" +#include "Lattice_comparison.h" +#include "Lattice_coordinate.h" +#include "Lattice_where.h" +#include "Lattice_rng.h" +#include "Lattice_unary.h" +#include "Lattice_transfer.h" #endif diff --git a/lib/lattice/Lattice_peekpoke.h b/lib/lattice/Lattice_peekpoke.h index 9bece943..19d349c4 100644 --- a/lib/lattice/Lattice_peekpoke.h +++ b/lib/lattice/Lattice_peekpoke.h @@ -154,7 +154,7 @@ PARALLEL_FOR_LOOP template void peekLocalSite(sobj &s,const Lattice &l,std::vector &site){ - GridBase *grid=l._grid; + GridBase *grid = l._grid; typedef typename vobj::scalar_type scalar_type; typedef typename vobj::vector_type vector_type; @@ -164,16 +164,18 @@ PARALLEL_FOR_LOOP assert( l.checkerboard== l._grid->CheckerBoard(site)); assert( sizeof(sobj)*Nsimd == sizeof(vobj)); + static const int words=sizeof(vobj)/sizeof(vector_type); int odx,idx; idx= grid->iIndex(site); odx= grid->oIndex(site); - std::vector buf(Nsimd); - - extract(l._odata[odx],buf); + scalar_type * vp = (scalar_type *)&l._odata[odx]; + scalar_type * pt = (scalar_type *)&s; + + for(int w=0;wCheckerBoard(site)); assert( sizeof(sobj)*Nsimd == sizeof(vobj)); + static const int words=sizeof(vobj)/sizeof(vector_type); int odx,idx; idx= grid->iIndex(site); odx= grid->oIndex(site); - std::vector buf(Nsimd); - - // extract-modify-merge cycle is easiest way and this is not perf critical - extract(l._odata[odx],buf); + scalar_type * vp = (scalar_type *)&l._odata[odx]; + scalar_type * pt = (scalar_type *)&s; - buf[idx] = s; - - merge(l._odata[odx],buf); + for(int w=0;w inline RealD norm2(const Lattice &arg){ ComplexD nrm = innerProduct(arg,arg); - return real(nrm); + return std::real(nrm); } template diff --git a/lib/lattice/Lattice_rng.h b/lib/lattice/Lattice_rng.h index 3254af30..51cc16ec 100644 --- a/lib/lattice/Lattice_rng.h +++ b/lib/lattice/Lattice_rng.h @@ -294,11 +294,12 @@ namespace Grid { int rank,o_idx,i_idx; _grid->GlobalIndexToGlobalCoor(gidx,gcoor); _grid->GlobalCoorToRankIndex(rank,o_idx,i_idx,gcoor); - + int l_idx=generator_idx(o_idx,i_idx); - - std::vector site_seeds(4); - for(int i=0;i<4;i++){ + + const int num_rand_seed=16; + std::vector site_seeds(num_rand_seed); + for(int i=0;i &in,Lattice &out) assert(ig->_ldimensions[d] == og->_ldimensions[d]); } -PARALLEL_FOR_LOOP + //PARALLEL_FOR_LOOP for(int idx=0;idxlSites();idx++){ std::vector lcoor(ni); ig->LocalIndexToLocalCoor(idx,lcoor); @@ -386,7 +386,7 @@ void InsertSlice(Lattice &lowDim,Lattice & higherDim,int slice, int } // the above should guarantee that the operations are local -PARALLEL_FOR_LOOP + //PARALLEL_FOR_LOOP for(int idx=0;idxlSites();idx++){ std::vector lcoor(nl); std::vector hcoor(nh); @@ -420,15 +420,15 @@ void ExtractSlice(Lattice &lowDim, Lattice & higherDim,int slice, in assert(hg->_processors[orthog]==1); int dl; dl = 0; - for(int d=0;d_processors[dl] == hg->_processors[d]); - assert(lg->_ldimensions[dl] == hg->_ldimensions[d]); - dl++; + for(int d=0;d_processors[dl] == hg->_processors[d]); + assert(lg->_ldimensions[dl] == hg->_ldimensions[d]); + dl++; } } // the above should guarantee that the operations are local -PARALLEL_FOR_LOOP + //PARALLEL_FOR_LOOP for(int idx=0;idxlSites();idx++){ std::vector lcoor(nl); std::vector hcoor(nh); @@ -446,6 +446,79 @@ PARALLEL_FOR_LOOP } + +template +void InsertSliceLocal(Lattice &lowDim, Lattice & higherDim,int slice_lo,int slice_hi, int orthog) +{ + typedef typename vobj::scalar_object sobj; + sobj s; + + GridBase *lg = lowDim._grid; + GridBase *hg = higherDim._grid; + int nl = lg->_ndimension; + int nh = hg->_ndimension; + + assert(nl == nh); + assert(orthog=0); + + for(int d=0;d_processors[d] == hg->_processors[d]); + assert(lg->_ldimensions[d] == hg->_ldimensions[d]); + } + + // the above should guarantee that the operations are local + //PARALLEL_FOR_LOOP + for(int idx=0;idxlSites();idx++){ + std::vector lcoor(nl); + std::vector hcoor(nh); + lg->LocalIndexToLocalCoor(idx,lcoor); + if( lcoor[orthog] == slice_lo ) { + hcoor=lcoor; + hcoor[orthog] = slice_hi; + peekLocalSite(s,lowDim,lcoor); + pokeLocalSite(s,higherDim,hcoor); + } + } +} + + +template +void ExtractSliceLocal(Lattice &lowDim, Lattice & higherDim,int slice_lo,int slice_hi, int orthog) +{ + typedef typename vobj::scalar_object sobj; + sobj s; + + GridBase *lg = lowDim._grid; + GridBase *hg = higherDim._grid; + int nl = lg->_ndimension; + int nh = hg->_ndimension; + + assert(nl == nh); + assert(orthog=0); + + for(int d=0;d_processors[d] == hg->_processors[d]); + assert(lg->_ldimensions[d] == hg->_ldimensions[d]); + } + + // the above should guarantee that the operations are local + //PARALLEL_FOR_LOOP + for(int idx=0;idxlSites();idx++){ + std::vector lcoor(nl); + std::vector hcoor(nh); + lg->LocalIndexToLocalCoor(idx,lcoor); + if( lcoor[orthog] == slice_lo ) { + hcoor=lcoor; + hcoor[orthog] = slice_hi; + peekLocalSite(s,higherDim,hcoor); + pokeLocalSite(s,lowDim,lcoor); + } + } +} + + template void Replicate(Lattice &coarse,Lattice & fine) { @@ -482,6 +555,96 @@ void Replicate(Lattice &coarse,Lattice & fine) } +//Copy SIMD-vectorized lattice to array of scalar objects in lexicographic order +template +typename std::enable_if::value && !isSIMDvectorized::value, void>::type unvectorizeToLexOrdArray(std::vector &out, const Lattice &in){ + typedef typename vobj::vector_type vtype; + + GridBase* in_grid = in._grid; + out.resize(in_grid->lSites()); + + int ndim = in_grid->Nd(); + int in_nsimd = vtype::Nsimd(); + std::vector > in_icoor(in_nsimd); + + for(int lane=0; lane < in_nsimd; lane++){ + in_icoor[lane].resize(ndim); + in_grid->iCoorFromIindex(in_icoor[lane], lane); + } + +PARALLEL_FOR_LOOP + for(int in_oidx = 0; in_oidx < in_grid->oSites(); in_oidx++){ //loop over outer index + //Assemble vector of pointers to output elements + std::vector out_ptrs(in_nsimd); + + std::vector in_ocoor(ndim); + in_grid->oCoorFromOindex(in_ocoor, in_oidx); + + std::vector lcoor(in_grid->Nd()); + + for(int lane=0; lane < in_nsimd; lane++){ + for(int mu=0;mu_rdimensions[mu]*in_icoor[lane][mu]; + + int lex; + Lexicographic::IndexFromCoor(lcoor, lex, in_grid->_ldimensions); + out_ptrs[lane] = &out[lex]; + } + + //Unpack into those ptrs + const vobj & in_vobj = in._odata[in_oidx]; + extract1(in_vobj, out_ptrs, 0); + } +} + +//Convert a Lattice from one precision to another +template +void precisionChange(Lattice &out, const Lattice &in){ + assert(out._grid->Nd() == in._grid->Nd()); + out.checkerboard = in.checkerboard; + GridBase *in_grid=in._grid; + GridBase *out_grid = out._grid; + + typedef typename VobjOut::scalar_object SobjOut; + typedef typename VobjIn::scalar_object SobjIn; + + int ndim = out._grid->Nd(); + int out_nsimd = out_grid->Nsimd(); + + std::vector > out_icoor(out_nsimd); + + for(int lane=0; lane < out_nsimd; lane++){ + out_icoor[lane].resize(ndim); + out_grid->iCoorFromIindex(out_icoor[lane], lane); + } + + std::vector in_slex_conv(in_grid->lSites()); + unvectorizeToLexOrdArray(in_slex_conv, in); + + PARALLEL_FOR_LOOP + for(int out_oidx=0;out_oidxoSites();out_oidx++){ + std::vector out_ocoor(ndim); + out_grid->oCoorFromOindex(out_ocoor, out_oidx); + + std::vector ptrs(out_nsimd); + + std::vector lcoor(out_grid->Nd()); + + for(int lane=0; lane < out_nsimd; lane++){ + for(int mu=0;mu_rdimensions[mu]*out_icoor[lane][mu]; + + int llex; Lexicographic::IndexFromCoor(lcoor, llex, out_grid->_ldimensions); + ptrs[lane] = &in_slex_conv[llex]; + } + merge(out._odata[out_oidx], ptrs, 0); + } +} + + + + + } #endif diff --git a/lib/parallelIO/BinaryIO.h b/lib/parallelIO/BinaryIO.h index 184209dc..e2af0545 100644 --- a/lib/parallelIO/BinaryIO.h +++ b/lib/parallelIO/BinaryIO.h @@ -194,22 +194,22 @@ class BinaryIO { std::vector site({x,y,z,t}); - if ( grid->IsBoss() ) { - fin.read((char *)&file_object,sizeof(file_object)); - bytes += sizeof(file_object); - if(ieee32big) be32toh_v((void *)&file_object,sizeof(file_object)); - if(ieee32) le32toh_v((void *)&file_object,sizeof(file_object)); - if(ieee64big) be64toh_v((void *)&file_object,sizeof(file_object)); - if(ieee64) le64toh_v((void *)&file_object,sizeof(file_object)); + if (grid->IsBoss()) { + fin.read((char *)&file_object, sizeof(file_object)); + bytes += sizeof(file_object); + if (ieee32big) be32toh_v((void *)&file_object, sizeof(file_object)); + if (ieee32) le32toh_v((void *)&file_object, sizeof(file_object)); + if (ieee64big) be64toh_v((void *)&file_object, sizeof(file_object)); + if (ieee64) le64toh_v((void *)&file_object, sizeof(file_object)); - munge(file_object,munged,csum); + munge(file_object, munged, csum); } // The boss who read the file has their value poked pokeSite(munged,Umu,site); }}}} timer.Stop(); std::cout<IsBoss() ) { - - if(ieee32big) htobe32_v((void *)&file_object,sizeof(file_object)); - if(ieee32) htole32_v((void *)&file_object,sizeof(file_object)); - if(ieee64big) htobe64_v((void *)&file_object,sizeof(file_object)); - if(ieee64) htole64_v((void *)&file_object,sizeof(file_object)); + + if(ieee32big) htobe32_v((void *)&file_object,sizeof(file_object)); + if(ieee32) htole32_v((void *)&file_object,sizeof(file_object)); + if(ieee64big) htobe64_v((void *)&file_object,sizeof(file_object)); + if(ieee64) htole64_v((void *)&file_object,sizeof(file_object)); - // NB could gather an xstrip as an optimisation. - fout.write((char *)&file_object,sizeof(file_object)); - bytes+=sizeof(file_object); + // NB could gather an xstrip as an optimisation. + fout.write((char *)&file_object,sizeof(file_object)); + bytes+=sizeof(file_object); } }}}} timer.Stop(); std::cout<ThisRank() ){ - // std::cout << "rank" << rank<<" Getting state for index "<Broadcast(rank,(void *)&saved[0],bytes); if ( grid->IsBoss() ) { - Uint32Checksum((uint32_t *)&saved[0],bytes,csum); - fout.write((char *)&saved[0],bytes); + Uint32Checksum((uint32_t *)&saved[0],bytes,csum); + fout.write((char *)&saved[0],bytes); } } @@ -355,14 +355,14 @@ class BinaryIO { int l_idx=parallel.generator_idx(o_idx,i_idx); if ( grid->IsBoss() ) { - fin.read((char *)&saved[0],bytes); - Uint32Checksum((uint32_t *)&saved[0],bytes,csum); + fin.read((char *)&saved[0],bytes); + Uint32Checksum((uint32_t *)&saved[0],bytes,csum); } grid->Broadcast(0,(void *)&saved[0],bytes); if( rank == grid->ThisRank() ){ - parallel.SetState(saved,l_idx); + parallel.SetState(saved,l_idx); } } @@ -415,15 +415,15 @@ class BinaryIO { if ( d == 0 ) parallel[d] = 0; if (parallel[d]) { - range[d] = grid->_ldimensions[d]; - start[d] = grid->_processor_coor[d]*range[d]; - ioproc[d]= grid->_processor_coor[d]; + range[d] = grid->_ldimensions[d]; + start[d] = grid->_processor_coor[d]*range[d]; + ioproc[d]= grid->_processor_coor[d]; } else { - range[d] = grid->_gdimensions[d]; - start[d] = 0; - ioproc[d]= 0; + range[d] = grid->_gdimensions[d]; + start[d] = 0; + ioproc[d]= 0; - if ( grid->_processor_coor[d] != 0 ) IOnode = 0; + if ( grid->_processor_coor[d] != 0 ) IOnode = 0; } slice_vol = slice_vol * range[d]; } @@ -434,9 +434,9 @@ class BinaryIO { std::cout<< std::dec ; std::cout<< GridLogMessage<< "Parallel read I/O to "<< file << " with " <_ndimension;d++){ - std::cout<< range[d]; - if( d< grid->_ndimension-1 ) - std::cout<< " x "; + std::cout<< range[d]; + if( d< grid->_ndimension-1 ) + std::cout<< " x "; } std::cout << std::endl; } @@ -457,13 +457,13 @@ class BinaryIO { // available (how short sighted is that?) ////////////////////////////////////////////////////////// Umu = zero; - static uint32_t csum=0; + static uint32_t csum; csum=0; fobj fileObj; static sobj siteObj; // Static to place in symmetric region for SHMEM // need to implement these loops in Nd independent way with a lexico conversion for(int tlex=0;tlex tsite(nd); // temporary mixed up site std::vector gsite(nd); std::vector lsite(nd); @@ -472,8 +472,8 @@ class BinaryIO { Lexicographic::CoorFromIndex(tsite,tlex,range); for(int d=0;d_ldimensions[d]; // local site - gsite[d] = tsite[d]+start[d]; // global site + lsite[d] = tsite[d]%grid->_ldimensions[d]; // local site + gsite[d] = tsite[d]+start[d]; // global site } ///////////////////////// @@ -487,29 +487,29 @@ class BinaryIO { // iorank reads from the seek //////////////////////////////// if (myrank == iorank) { - - fin.seekg(offset+g_idx*sizeof(fileObj)); - fin.read((char *)&fileObj,sizeof(fileObj)); - bytes+=sizeof(fileObj); - - if(ieee32big) be32toh_v((void *)&fileObj,sizeof(fileObj)); - if(ieee32) le32toh_v((void *)&fileObj,sizeof(fileObj)); - if(ieee64big) be64toh_v((void *)&fileObj,sizeof(fileObj)); - if(ieee64) le64toh_v((void *)&fileObj,sizeof(fileObj)); - - munge(fileObj,siteObj,csum); + + fin.seekg(offset+g_idx*sizeof(fileObj)); + fin.read((char *)&fileObj,sizeof(fileObj)); + bytes+=sizeof(fileObj); + + if(ieee32big) be32toh_v((void *)&fileObj,sizeof(fileObj)); + if(ieee32) le32toh_v((void *)&fileObj,sizeof(fileObj)); + if(ieee64big) be64toh_v((void *)&fileObj,sizeof(fileObj)); + if(ieee64) le64toh_v((void *)&fileObj,sizeof(fileObj)); + + munge(fileObj,siteObj,csum); - } + } // Possibly do transport through pt2pt if ( rank != iorank ) { - if ( (myrank == rank) || (myrank==iorank) ) { - grid->SendRecvPacket((void *)&siteObj,(void *)&siteObj,iorank,rank,sizeof(siteObj)); - } + if ( (myrank == rank) || (myrank==iorank) ) { + grid->SendRecvPacket((void *)&siteObj,(void *)&siteObj,iorank,rank,sizeof(siteObj)); + } } // Poke at destination if ( myrank == rank ) { - pokeLocalSite(siteObj,Umu,lsite); + pokeLocalSite(siteObj,Umu,lsite); } grid->Barrier(); // necessary? } @@ -520,7 +520,7 @@ class BinaryIO { timer.Stop(); std::cout<_ndimension-1 ) parallel[d] = 0; if (parallel[d]) { - range[d] = grid->_ldimensions[d]; - start[d] = grid->_processor_coor[d]*range[d]; - ioproc[d]= grid->_processor_coor[d]; + range[d] = grid->_ldimensions[d]; + start[d] = grid->_processor_coor[d]*range[d]; + ioproc[d]= grid->_processor_coor[d]; } else { - range[d] = grid->_gdimensions[d]; - start[d] = 0; - ioproc[d]= 0; + range[d] = grid->_gdimensions[d]; + start[d] = 0; + ioproc[d]= 0; - if ( grid->_processor_coor[d] != 0 ) IOnode = 0; + if ( grid->_processor_coor[d] != 0 ) IOnode = 0; } slice_vol = slice_vol * range[d]; @@ -577,9 +577,9 @@ class BinaryIO { grid->GlobalSum(tmp); std::cout<< GridLogMessage<< "Parallel write I/O from "<< file << " with " <_ndimension;d++){ - std::cout<< range[d]; - if( d< grid->_ndimension-1 ) - std::cout<< " x "; + std::cout<< range[d]; + if( d< grid->_ndimension-1 ) + std::cout<< " x "; } std::cout << std::endl; } @@ -610,7 +610,7 @@ class BinaryIO { // should aggregate a whole chunk and then write. // need to implement these loops in Nd independent way with a lexico conversion for(int tlex=0;tlex tsite(nd); // temporary mixed up site std::vector gsite(nd); std::vector lsite(nd); @@ -619,8 +619,8 @@ class BinaryIO { Lexicographic::CoorFromIndex(tsite,tlex,range); for(int d=0;d_ldimensions[d]; // local site - gsite[d] = tsite[d]+start[d]; // global site + lsite[d] = tsite[d]%grid->_ldimensions[d]; // local site + gsite[d] = tsite[d]+start[d]; // global site } @@ -640,26 +640,26 @@ class BinaryIO { // Pair of nodes may need to do pt2pt send if ( rank != iorank ) { // comms is necessary - if ( (myrank == rank) || (myrank==iorank) ) { // and we have to do it - // Send to IOrank - grid->SendRecvPacket((void *)&siteObj,(void *)&siteObj,rank,iorank,sizeof(siteObj)); - } + if ( (myrank == rank) || (myrank==iorank) ) { // and we have to do it + // Send to IOrank + grid->SendRecvPacket((void *)&siteObj,(void *)&siteObj,rank,iorank,sizeof(siteObj)); + } } grid->Barrier(); // necessary? if (myrank == iorank) { - - munge(siteObj,fileObj,csum); + + munge(siteObj,fileObj,csum); - if(ieee32big) htobe32_v((void *)&fileObj,sizeof(fileObj)); - if(ieee32) htole32_v((void *)&fileObj,sizeof(fileObj)); - if(ieee64big) htobe64_v((void *)&fileObj,sizeof(fileObj)); - if(ieee64) htole64_v((void *)&fileObj,sizeof(fileObj)); - - fout.seekp(offset+g_idx*sizeof(fileObj)); - fout.write((char *)&fileObj,sizeof(fileObj)); - bytes+=sizeof(fileObj); + if(ieee32big) htobe32_v((void *)&fileObj,sizeof(fileObj)); + if(ieee32) htole32_v((void *)&fileObj,sizeof(fileObj)); + if(ieee64big) htobe64_v((void *)&fileObj,sizeof(fileObj)); + if(ieee64) htole64_v((void *)&fileObj,sizeof(fileObj)); + + fout.seekp(offset+g_idx*sizeof(fileObj)); + fout.write((char *)&fileObj,sizeof(fileObj)); + bytes+=sizeof(fileObj); } } @@ -668,7 +668,7 @@ class BinaryIO { timer.Stop(); std::cout< +#include "pugiconfig.hpp" #ifndef HEADER_PUGIXML_HPP #define HEADER_PUGIXML_HPP diff --git a/lib/qcd/QCD.h b/lib/qcd/QCD.h index 92fa5857..f434bdd9 100644 --- a/lib/qcd/QCD.h +++ b/lib/qcd/QCD.h @@ -55,10 +55,19 @@ namespace QCD { ////////////////////////////////////////////////////////////////////////////// // QCD iMatrix types // Index conventions: Lorentz x Spin x Colour + // note: static const int or constexpr will work for type deductions + // with the intel compiler (up to version 17) ////////////////////////////////////////////////////////////////////////////// - static const int ColourIndex = 2; - static const int SpinIndex = 1; - static const int LorentzIndex= 0; + #define ColourIndex 2 + #define SpinIndex 1 + #define LorentzIndex 0 + + + // Also should make these a named enum type + static const int DaggerNo=0; + static const int DaggerYes=1; + static const int InverseNo=0; + static const int InverseYes=1; // Useful traits is this a spin index //typename std::enable_if,SpinorIndex>::value,iVector >::type *SFINAE; @@ -484,16 +493,27 @@ namespace QCD { } //namespace QCD } // Grid -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include + +#include +#include +#include +#include +#include + +// Include representations +#include +#include +#include +#include + +#include + +#include + +#include +#include +#include + #endif diff --git a/lib/qcd/action/ActionBase.h b/lib/qcd/action/ActionBase.h index 8b91fb79..56d6b8e0 100644 --- a/lib/qcd/action/ActionBase.h +++ b/lib/qcd/action/ActionBase.h @@ -1,86 +1,153 @@ - /************************************************************************************* +/************************************************************************************* - Grid physics library, www.github.com/paboyle/Grid +Grid physics library, www.github.com/paboyle/Grid - Source file: ./lib/qcd/action/ActionBase.h +Source file: ./lib/qcd/action/ActionBase.h - Copyright (C) 2015 +Copyright (C) 2015 Author: Peter Boyle Author: neo - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ #ifndef QCD_ACTION_BASE #define QCD_ACTION_BASE namespace Grid { -namespace QCD{ - -template -class Action { +namespace QCD { +template +class Action { public: + bool is_smeared = false; // Boundary conditions? // Heatbath? - virtual void refresh(const GaugeField &U, GridParallelRNG& pRNG) = 0;// refresh pseudofermions - virtual RealD S (const GaugeField &U) = 0; // evaluate the action - virtual void deriv(const GaugeField &U,GaugeField & dSdU ) = 0; // evaluate the action derivative - virtual ~Action() {}; + virtual void refresh(const GaugeField& U, + GridParallelRNG& pRNG) = 0; // refresh pseudofermions + virtual RealD S(const GaugeField& U) = 0; // evaluate the action + virtual void deriv(const GaugeField& U, + GaugeField& dSdU) = 0; // evaluate the action derivative + virtual ~Action(){}; +}; + +// Indexing of tuple types +template +struct Index; + +template +struct Index> { + static const std::size_t value = 0; +}; + +template +struct Index> { + static const std::size_t value = 1 + Index>::value; }; -// Could derive PseudoFermion action with a PF field, FermionField, and a Grid; implement refresh /* -template -class PseudoFermionAction : public Action { +template +struct ActionLevel { public: - FermionField Phi; - GridParallelRNG &pRNG; - GridBase &Grid; + typedef Action* + ActPtr; // now force the same colours as the rest of the code - PseudoFermionAction(GridBase &_Grid,GridParallelRNG &_pRNG) : Grid(_Grid), Phi(&_Grid), pRNG(_pRNG) { - }; + //Add supported representations here - virtual void refresh(const GaugeField &gauge) { - gaussian(Phi,pRNG); - }; -}; -*/ - -template struct ActionLevel{ -public: - - typedef Action* ActPtr; // now force the same colours as the rest of the code - - int multiplier; + unsigned int multiplier; std::vector actions; - ActionLevel(int mul = 1) : multiplier(mul) { - assert (mul > 0); + ActionLevel(unsigned int mul = 1) : actions(0), multiplier(mul) { + assert(mul >= 1); }; - - void push_back(ActPtr ptr){ - actions.push_back(ptr); + + void push_back(ActPtr ptr) { actions.push_back(ptr); } +}; +*/ + +template +struct ActionLevel { + public: + unsigned int multiplier; + + // Fundamental repr actions separated because of the smearing + typedef Action* ActPtr; + + // construct a tuple of vectors of the actions for the corresponding higher + // representation fields + typedef typename AccessTypes::VectorCollection action_collection; + action_collection actions_hirep; + typedef typename AccessTypes::FieldTypeCollection action_hirep_types; + + std::vector& actions; + + // Temporary conversion between ActionLevel and ActionLevelHirep + //ActionLevelHirep(ActionLevel& AL ):actions(AL.actions), multiplier(AL.multiplier){} + + ActionLevel(unsigned int mul = 1) : actions(std::get<0>(actions_hirep)), multiplier(mul) { + // initialize the hirep vectors to zero. + //apply(this->resize, actions_hirep, 0); //need a working resize + assert(mul >= 1); + }; + + //void push_back(ActPtr ptr) { actions.push_back(ptr); } + + + + template < class Field > + void push_back(Action* ptr) { + // insert only in the correct vector + std::get< Index < Field, action_hirep_types>::value >(actions_hirep).push_back(ptr); + }; + + + + template < class ActPtr> + static void resize(ActPtr ap, unsigned int n){ + ap->resize(n); + } + + //template + //auto getRepresentation(Repr& R)->decltype(std::get(R).U) {return std::get(R).U;} + + // Loop on tuple for a callable function + template + inline typename std::enable_if::value, void>::type apply( + Callable, Repr& R,Args&...) const {} + + template + inline typename std::enable_if::value, void>::type apply( + Callable fn, Repr& R, Args&... arguments) const { + fn(std::get(actions_hirep), std::get(R.rep), arguments...); + apply(fn, R, arguments...); + } + }; -template using ActionSet = std::vector >; +//template +//using ActionSet = std::vector >; -}} +template +using ActionSet = std::vector >; + +} +} #endif diff --git a/lib/qcd/action/Actions.h b/lib/qcd/action/Actions.h index ffa3a4d9..ba6e577d 100644 --- a/lib/qcd/action/Actions.h +++ b/lib/qcd/action/Actions.h @@ -40,25 +40,25 @@ Author: paboyle //////////////////////////////////////////// // Abstract base interface //////////////////////////////////////////// -#include -#include +#include +#include //////////////////////////////////////////// // Utility functions //////////////////////////////////////////// -#include -#include +#include +#include -#include //used by all wilson type fermions -#include -#include -#include //used by all wilson type fermions +#include //used by all wilson type fermions +#include +#include +#include //used by all wilson type fermions //////////////////////////////////////////// // Gauge Actions //////////////////////////////////////////// -#include -#include +#include +#include namespace Grid { namespace QCD { @@ -107,41 +107,64 @@ typedef SymanzikGaugeAction ConjugateSymanzikGaugeAction // for EVERY .cc file. This define centralises the list and restores global push of impl cases //////////////////////////////////////////////////////////////////////////////////////////////////// -#define FermOpTemplateInstantiate(A) \ + +#define FermOp4dVecTemplateInstantiate(A) \ template class A; \ template class A; \ + template class A; \ + template class A; \ template class A; \ template class A; +#define AdjointFermOpTemplateInstantiate(A) \ + template class A; \ + template class A; + +#define TwoIndexFermOpTemplateInstantiate(A) \ + template class A; \ + template class A; + +#define FermOp5dVecTemplateInstantiate(A) \ + template class A; \ + template class A; \ + template class A; \ + template class A; + +#define FermOpTemplateInstantiate(A) \ + FermOp4dVecTemplateInstantiate(A) \ + FermOp5dVecTemplateInstantiate(A) + + #define GparityFermOpTemplateInstantiate(A) //////////////////////////////////////////// // Fermion operators / actions //////////////////////////////////////////// -#include // 4d wilson like -#include // 4d wilson like -#include // 5d base used by all 5d overlap types +#include // 4d wilson like +#include // 4d wilson like +#include // 5d base used by all 5d overlap types -//#include +//#include -#include // Cayley types -#include -#include -#include -#include -#include -#include -#include -#include +#include // Cayley types +#include +#include +#include +#include +#include +#include +#include +#include +#include -#include // Continued fraction -#include -#include +#include // Continued fraction +#include +#include -#include // Partial fraction -#include -#include +#include // Partial fraction +#include +#include //////////////////////////////////////////////////////////////////////////////////////////////////// // More maintainable to maintain the following typedef list centrally, as more "impl" targets @@ -157,6 +180,14 @@ typedef WilsonFermion WilsonFermionR; typedef WilsonFermion WilsonFermionF; typedef WilsonFermion WilsonFermionD; +typedef WilsonFermion WilsonAdjFermionR; +typedef WilsonFermion WilsonAdjFermionF; +typedef WilsonFermion WilsonAdjFermionD; + +typedef WilsonFermion WilsonTwoIndexSymmetricFermionR; +typedef WilsonFermion WilsonTwoIndexSymmetricFermionF; +typedef WilsonFermion WilsonTwoIndexSymmetricFermionD; + typedef WilsonTMFermion WilsonTMFermionR; typedef WilsonTMFermion WilsonTMFermionF; typedef WilsonTMFermion WilsonTMFermionD; @@ -167,6 +198,11 @@ typedef DomainWallFermion DomainWallFermionD; typedef MobiusFermion MobiusFermionR; typedef MobiusFermion MobiusFermionF; typedef MobiusFermion MobiusFermionD; + +typedef ZMobiusFermion ZMobiusFermionR; +typedef ZMobiusFermion ZMobiusFermionF; +typedef ZMobiusFermion ZMobiusFermionD; + typedef ScaledShamirFermion ScaledShamirFermionR; typedef ScaledShamirFermion ScaledShamirFermionF; typedef ScaledShamirFermion ScaledShamirFermionD; @@ -222,21 +258,21 @@ typedef MobiusFermion GparityMobiusFermionD; /////////////////////////////////////////////////////////////////////////////// // G5 herm -- this has to live in QCD since dirac matrix is not in the broader sector of code /////////////////////////////////////////////////////////////////////////////// -#include +#include //////////////////////////////////////// // Pseudo fermion combinations for HMC //////////////////////////////////////// -#include +#include -#include -#include -#include -#include +#include +#include +#include +#include -#include -#include -#include -#include +#include +#include +#include +#include #endif diff --git a/lib/qcd/action/fermion/CayleyFermion5D.cc b/lib/qcd/action/fermion/CayleyFermion5D.cc index 0ebcfb82..57b047d4 100644 --- a/lib/qcd/action/fermion/CayleyFermion5D.cc +++ b/lib/qcd/action/fermion/CayleyFermion5D.cc @@ -28,7 +28,10 @@ Author: paboyle See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ /* END LEGAL */ + #include + + namespace Grid { namespace QCD { @@ -45,486 +48,376 @@ namespace QCD { FourDimGrid, FourDimRedBlackGrid,_M5,p), mass(_mass) - { - } + { } - template - void CayleyFermion5D::Meooe5D (const FermionField &psi, FermionField &Din) - { - // Assemble Din - int Ls=this->Ls; - for(int s=0;s +void CayleyFermion5D::Dminus(const FermionField &psi, FermionField &chi) +{ + int Ls=this->Ls; + FermionField tmp(psi._grid); + + this->DW(psi,tmp,DaggerNo); + + for(int s=0;s - void CayleyFermion5D::MeooeDag5D (const FermionField &psi, FermionField &Din) - { - int Ls=this->Ls; - for(int s=0;s +void CayleyFermion5D::DminusDag(const FermionField &psi, FermionField &chi) +{ + int Ls=this->Ls; + FermionField tmp(psi._grid); + + this->DW(psi,tmp,DaggerYes); + + for(int s=0;s - RealD CayleyFermion5D::M (const FermionField &psi, FermionField &chi) - { - int Ls=this->Ls; - - FermionField Din(psi._grid); - - // Assemble Din - /* - for(int s=0;sDW(Din,chi,DaggerNo); - // ((b D_W + D_w hop terms +1) on s-diag - axpby(chi,1.0,1.0,chi,psi); - - // Call Mooee?? - for(int s=0;s +void CayleyFermion5D::M5D (const FermionField &psi, FermionField &chi) +{ + int Ls=this->Ls; + std::vector diag (Ls,1.0); + std::vector upper(Ls,-1.0); upper[Ls-1]=mass; + std::vector lower(Ls,-1.0); lower[0] =mass; + M5D(psi,chi,chi,lower,diag,upper); +} +template +void CayleyFermion5D::Meooe5D (const FermionField &psi, FermionField &Din) +{ + int Ls=this->Ls; + std::vector diag = bs; + std::vector upper= cs; + std::vector lower= cs; + upper[Ls-1]=-mass*upper[Ls-1]; + lower[0] =-mass*lower[0]; + M5D(psi,psi,Din,lower,diag,upper); +} +template void CayleyFermion5D::Meo5D (const FermionField &psi, FermionField &chi) +{ + int Ls=this->Ls; + std::vector diag = beo; + std::vector upper(Ls); + std::vector lower(Ls); + for(int i=0;i - RealD CayleyFermion5D::Mdag (const FermionField &psi, FermionField &chi) - { - // Under adjoint - //D1+ D1- P- -> D1+^dag P+ D2-^dag - //D2- P+ D2+ P-D1-^dag D2+dag - - FermionField Din(psi._grid); - // Apply Dw - this->DW(psi,Din,DaggerYes); - - MeooeDag5D(Din,chi); - - int Ls=this->Ls; - for(int s=0;s +void CayleyFermion5D::Mooee (const FermionField &psi, FermionField &chi) +{ + int Ls=this->Ls; + std::vector diag = bee; + std::vector upper(Ls); + std::vector lower(Ls); + for(int i=0;i - void CayleyFermion5D::Meooe (const FermionField &psi, FermionField &chi) - { - int Ls=this->Ls; +template +void CayleyFermion5D::MooeeDag (const FermionField &psi, FermionField &chi) +{ + int Ls=this->Ls; + std::vector diag = bee; + std::vector upper(Ls); + std::vector lower(Ls); - FermionField tmp(psi._grid); + for (int s=0;s + + +namespace Grid { +namespace QCD { + + // FIXME -- make a version of these routines with site loop outermost for cache reuse. + + // Pminus fowards + // Pplus backwards.. +template +void CayleyFermion5D::M5D(const FermionField &psi, + const FermionField &phi, + FermionField &chi, + std::vector &lower, + std::vector &diag, + std::vector &upper) +{ + int Ls =this->Ls; + GridBase *grid=psi._grid; + assert(phi.checkerboard == psi.checkerboard); + chi.checkerboard=psi.checkerboard; +PARALLEL_FOR_LOOP + for(int ss=0;ssoSites();ss+=Ls){ // adds Ls + for(int s=0;s +void CayleyFermion5D::M5Ddag(const FermionField &psi, + const FermionField &phi, + FermionField &chi, + std::vector &lower, + std::vector &diag, + std::vector &upper) +{ + int Ls =this->Ls; + GridBase *grid=psi._grid; + assert(phi.checkerboard == psi.checkerboard); + chi.checkerboard=psi.checkerboard; + +PARALLEL_FOR_LOOP + for(int ss=0;ssoSites();ss+=Ls){ // adds Ls + auto tmp = psi._odata[0]; + for(int s=0;s +void CayleyFermion5D::MooeeInv (const FermionField &psi, FermionField &chi) +{ + GridBase *grid=psi._grid; + int Ls=this->Ls; + + chi.checkerboard=psi.checkerboard; + +PARALLEL_FOR_LOOP + for(int ss=0;ssoSites();ss+=Ls){ // adds Ls + auto tmp = psi._odata[0]; + + // Apply (L^{\prime})^{-1} + chi[ss]=psi[ss]; // chi[0]=psi[0] + for(int s=1;s=0;s--){ + spProj5m(tmp,chi[ss+s+1]); + chi[ss+s] = chi[ss+s] - uee[s]*tmp; + } + } +} + +template +void CayleyFermion5D::MooeeInvDag (const FermionField &psi, FermionField &chi) +{ + GridBase *grid=psi._grid; + int Ls=this->Ls; + + assert(psi.checkerboard == psi.checkerboard); + chi.checkerboard=psi.checkerboard; + + +PARALLEL_FOR_LOOP + for(int ss=0;ssoSites();ss+=Ls){ // adds Ls + + auto tmp = psi._odata[0]; + + // Apply (U^{\prime})^{-dagger} + chi[ss]=psi[ss]; + for (int s=1;s=0;s--){ + spProj5p(tmp,chi[ss+s+1]); + chi[ss+s] = chi[ss+s] - lee[s]*tmp; + } + } +} + +#ifdef CAYLEY_DPERP_CACHE + INSTANTIATE_DPERP(WilsonImplF); + INSTANTIATE_DPERP(WilsonImplD); + INSTANTIATE_DPERP(GparityWilsonImplF); + INSTANTIATE_DPERP(GparityWilsonImplD); + INSTANTIATE_DPERP(ZWilsonImplF); + INSTANTIATE_DPERP(ZWilsonImplD); +#endif + +}} diff --git a/lib/qcd/action/fermion/CayleyFermion5Ddense.cc b/lib/qcd/action/fermion/CayleyFermion5Ddense.cc new file mode 100644 index 00000000..5fa75b50 --- /dev/null +++ b/lib/qcd/action/fermion/CayleyFermion5Ddense.cc @@ -0,0 +1,133 @@ + /************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/qcd/action/fermion/CayleyFermion5D.cc + + Copyright (C) 2015 + +Author: Peter Boyle +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ + +#include +#include + + +namespace Grid { +namespace QCD { + /* + * Dense matrix versions of routines + */ + + /* +template +void CayleyFermion5D::MooeeInvDag (const FermionField &psi, FermionField &chi) +{ + this->MooeeInternal(psi,chi,DaggerYes,InverseYes); +} + +template +void CayleyFermion5D::MooeeInv(const FermionField &psi, FermionField &chi) +{ + this->MooeeInternal(psi,chi,DaggerNo,InverseYes); +} + */ +template +void CayleyFermion5D::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv) +{ + int Ls=this->Ls; + int LLs = psi._grid->_rdimensions[0]; + int vol = psi._grid->oSites()/LLs; + + chi.checkerboard=psi.checkerboard; + + assert(Ls==LLs); + + Eigen::MatrixXd Pplus = Eigen::MatrixXd::Zero(Ls,Ls); + Eigen::MatrixXd Pminus = Eigen::MatrixXd::Zero(Ls,Ls); + + for(int s=0;s::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv); +template void CayleyFermion5D::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv); +template void CayleyFermion5D::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv); +template void CayleyFermion5D::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv); + +}} diff --git a/lib/qcd/action/fermion/CayleyFermion5Dssp.cc b/lib/qcd/action/fermion/CayleyFermion5Dssp.cc new file mode 100644 index 00000000..ad7daddb --- /dev/null +++ b/lib/qcd/action/fermion/CayleyFermion5Dssp.cc @@ -0,0 +1,149 @@ + /************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/qcd/action/fermion/CayleyFermion5D.cc + + Copyright (C) 2015 + +Author: Peter Boyle +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ + +#include + + +namespace Grid { +namespace QCD { + + // FIXME -- make a version of these routines with site loop outermost for cache reuse. + + // Pminus fowards + // Pplus backwards +template +void CayleyFermion5D::M5D(const FermionField &psi, + const FermionField &phi, + FermionField &chi, + std::vector &lower, + std::vector &diag, + std::vector &upper) +{ + int Ls=this->Ls; + for(int s=0;s +void CayleyFermion5D::M5Ddag(const FermionField &psi, + const FermionField &phi, + FermionField &chi, + std::vector &lower, + std::vector &diag, + std::vector &upper) +{ + int Ls=this->Ls; + for(int s=0;s +void CayleyFermion5D::MooeeInv (const FermionField &psi, FermionField &chi) +{ + chi.checkerboard=psi.checkerboard; + int Ls=this->Ls; + // Apply (L^{\prime})^{-1} + axpby_ssp (chi,1.0,psi, 0.0,psi,0,0); // chi[0]=psi[0] + for (int s=1;s=0;s--){ + axpby_ssp_pminus (chi,1.0,chi,-uee[s],chi,s,s+1); // chi[Ls] + } +} + +template +void CayleyFermion5D::MooeeInvDag (const FermionField &psi, FermionField &chi) +{ + chi.checkerboard=psi.checkerboard; + int Ls=this->Ls; + // Apply (U^{\prime})^{-dagger} + axpby_ssp (chi,1.0,psi, 0.0,psi,0,0); // chi[0]=psi[0] + for (int s=1;s=0;s--){ + axpby_ssp_pplus (chi,1.0,chi,-lee[s],chi,s,s+1); // chi[Ls] + } +} + + +#ifdef CAYLEY_DPERP_LINALG + INSTANTIATE(WilsonImplF); + INSTANTIATE(WilsonImplD); + INSTANTIATE(GparityWilsonImplF); + INSTANTIATE(GparityWilsonImplD); +#endif + +} +} diff --git a/lib/qcd/action/fermion/CayleyFermion5Dvec.cc b/lib/qcd/action/fermion/CayleyFermion5Dvec.cc new file mode 100644 index 00000000..f6569923 --- /dev/null +++ b/lib/qcd/action/fermion/CayleyFermion5Dvec.cc @@ -0,0 +1,309 @@ + /************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/qcd/action/fermion/CayleyFermion5D.cc + + Copyright (C) 2015 + +Author: Peter Boyle +Author: Peter Boyle +Author: Peter Boyle +Author: paboyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ + +#include +#include + + +namespace Grid { +namespace QCD { + /* + * Dense matrix versions of routines + */ +template +void CayleyFermion5D::MooeeInvDag (const FermionField &psi, FermionField &chi) +{ + this->MooeeInternal(psi,chi,DaggerYes,InverseYes); +} + +template +void CayleyFermion5D::MooeeInv(const FermionField &psi, FermionField &chi) +{ + this->MooeeInternal(psi,chi,DaggerNo,InverseYes); +} +template +void CayleyFermion5D::M5D(const FermionField &psi, + const FermionField &phi, + FermionField &chi, + std::vector &lower, + std::vector &diag, + std::vector &upper) +{ + GridBase *grid=psi._grid; + int Ls = this->Ls; + int LLs = grid->_rdimensions[0]; + int nsimd= Simd::Nsimd(); + + Vector > u(LLs); + Vector > l(LLs); + Vector > d(LLs); + + assert(Ls/LLs==nsimd); + assert(phi.checkerboard == psi.checkerboard); + + chi.checkerboard=psi.checkerboard; + + // just directly address via type pun + typedef typename Simd::scalar_type scalar_type; + scalar_type * u_p = (scalar_type *)&u[0]; + scalar_type * l_p = (scalar_type *)&l[0]; + scalar_type * d_p = (scalar_type *)&d[0]; + + for(int o=0;ooSites();ss+=LLs){ // adds LLs + + alignas(64) SiteHalfSpinor hp; + alignas(64) SiteHalfSpinor hm; + alignas(64) SiteSpinor fp; + alignas(64) SiteSpinor fm; + + for(int v=0;v=v ) rotate(hm,hm,nsimd-1); + + hp=hp*0.5; + hm=hm*0.5; + spRecon5m(fp,hp); + spRecon5p(fm,hm); + + chi[ss+v] = d[v]*phi[ss+v]+u[v]*fp; + chi[ss+v] = chi[ss+v] +l[v]*fm; + + } + } +} + +template +void CayleyFermion5D::M5Ddag(const FermionField &psi, + const FermionField &phi, + FermionField &chi, + std::vector &lower, + std::vector &diag, + std::vector &upper) +{ + GridBase *grid=psi._grid; + int Ls = this->Ls; + int LLs = grid->_rdimensions[0]; + int nsimd= Simd::Nsimd(); + + Vector > u(LLs); + Vector > l(LLs); + Vector > d(LLs); + + assert(Ls/LLs==nsimd); + assert(phi.checkerboard == psi.checkerboard); + + chi.checkerboard=psi.checkerboard; + + // just directly address via type pun + typedef typename Simd::scalar_type scalar_type; + scalar_type * u_p = (scalar_type *)&u[0]; + scalar_type * l_p = (scalar_type *)&l[0]; + scalar_type * d_p = (scalar_type *)&d[0]; + + for(int o=0;ooSites();ss+=LLs){ // adds LLs + + alignas(64) SiteHalfSpinor hp; + alignas(64) SiteHalfSpinor hm; + alignas(64) SiteSpinor fp; + alignas(64) SiteSpinor fm; + + for(int v=0;v=v ) rotate(hm,hm,nsimd-1); + + hp=hp*0.5; + hm=hm*0.5; + spRecon5p(fp,hp); + spRecon5m(fm,hm); + + chi[ss+v] = d[v]*phi[ss+v]+u[v]*fp; + chi[ss+v] = chi[ss+v] +l[v]*fm; + + } + } +} + +template +void CayleyFermion5D::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv) +{ + int Ls=this->Ls; + int LLs = psi._grid->_rdimensions[0]; + int vol = psi._grid->oSites()/LLs; + + chi.checkerboard=psi.checkerboard; + + Eigen::MatrixXcd Pplus = Eigen::MatrixXcd::Zero(Ls,Ls); + Eigen::MatrixXcd Pminus = Eigen::MatrixXcd::Zero(Ls,Ls); + + for(int s=0;s > Matp(Ls*LLs); + Vector > Matm(Ls*LLs); + + for(int s2=0;s2 SitePplus(LLs); + Vector SitePminus(LLs); + Vector SiteChiP(LLs); + Vector SiteChiM(LLs); + Vector SiteChi(LLs); + + SiteHalfSpinor BcastP; + SiteHalfSpinor BcastM; + + for(int s=0;s::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv); +template void CayleyFermion5D::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv); +template void CayleyFermion5D::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv); +template void CayleyFermion5D::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv); + +}} diff --git a/lib/qcd/action/fermion/DomainWallFermion.h b/lib/qcd/action/fermion/DomainWallFermion.h index 8e41aa63..c0b6b6aa 100644 --- a/lib/qcd/action/fermion/DomainWallFermion.h +++ b/lib/qcd/action/fermion/DomainWallFermion.h @@ -29,7 +29,7 @@ Author: Peter Boyle #ifndef GRID_QCD_DOMAIN_WALL_FERMION_H #define GRID_QCD_DOMAIN_WALL_FERMION_H -#include +#include namespace Grid { @@ -42,6 +42,10 @@ namespace Grid { INHERIT_IMPL_TYPES(Impl); public: + void MomentumSpacePropagator(FermionField &out,const FermionField &in,RealD _m) { + this->MomentumSpacePropagatorHt(out,in,_m); + }; + virtual void Instantiatable(void) {}; // Constructors DomainWallFermion(GaugeField &_Umu, @@ -51,6 +55,7 @@ namespace Grid { GridRedBlackCartesian &FourDimRedBlackGrid, RealD _mass,RealD _M5,const ImplParams &p= ImplParams()) : + CayleyFermion5D(_Umu, FiveDimGrid, FiveDimRedBlackGrid, diff --git a/lib/qcd/action/fermion/FermionOperator.h b/lib/qcd/action/fermion/FermionOperator.h index ea5583eb..742c6e08 100644 --- a/lib/qcd/action/fermion/FermionOperator.h +++ b/lib/qcd/action/fermion/FermionOperator.h @@ -91,6 +91,20 @@ namespace Grid { virtual void Mdiag (const FermionField &in, FermionField &out) { Mooee(in,out);}; // Same as Mooee applied to both CB's virtual void Mdir (const FermionField &in, FermionField &out,int dir,int disp)=0; // case by case Wilson, Clover, Cayley, ContFrac, PartFrac + + virtual void MomentumSpacePropagator(FermionField &out,const FermionField &in,RealD _m) { assert(0);}; + + virtual void FreePropagator(const FermionField &in,FermionField &out,RealD mass) { + FFT theFFT((GridCartesian *) in._grid); + + FermionField in_k(in._grid); + FermionField prop_k(in._grid); + + theFFT.FFT_all_dim(in_k,in,FFT::forward); + this->MomentumSpacePropagator(prop_k,in_k,mass); + theFFT.FFT_all_dim(out,prop_k,FFT::backward); + }; + /////////////////////////////////////////////// // Updates gauge field during HMC /////////////////////////////////////////////// diff --git a/lib/qcd/action/fermion/FermionOperatorImpl.h b/lib/qcd/action/fermion/FermionOperatorImpl.h index 399c780b..0800dea6 100644 --- a/lib/qcd/action/fermion/FermionOperatorImpl.h +++ b/lib/qcd/action/fermion/FermionOperatorImpl.h @@ -1,490 +1,532 @@ - /************************************************************************************* +/************************************************************************************* - Grid physics library, www.github.com/paboyle/Grid +Grid physics library, www.github.com/paboyle/Grid - Source file: ./lib/qcd/action/fermion/FermionOperatorImpl.h +Source file: ./lib/qcd/action/fermion/FermionOperatorImpl.h - Copyright (C) 2015 +Copyright (C) 2015 Author: Peter Boyle Author: Peter Boyle Author: Peter Boyle Author: paboyle - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ -#ifndef GRID_QCD_FERMION_OPERATOR_IMPL_H -#define GRID_QCD_FERMION_OPERATOR_IMPL_H +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#ifndef GRID_QCD_FERMION_OPERATOR_IMPL_H +#define GRID_QCD_FERMION_OPERATOR_IMPL_H namespace Grid { - - namespace QCD { +namespace QCD { - ////////////////////////////////////////////// - // Template parameter class constructs to package - // externally control Fermion implementations - // in orthogonal directions - // - // Ultimately need Impl to always define types where XXX is opaque - // - // typedef typename XXX Simd; - // typedef typename XXX GaugeLinkField; - // typedef typename XXX GaugeField; - // typedef typename XXX GaugeActField; - // typedef typename XXX FermionField; - // typedef typename XXX DoubledGaugeField; - // typedef typename XXX SiteSpinor; - // typedef typename XXX SiteHalfSpinor; - // typedef typename XXX Compressor; - // - // and Methods: - // void ImportGauge(GridBase *GaugeGrid,DoubledGaugeField &Uds,const GaugeField &Umu) - // void DoubleStore(GridBase *GaugeGrid,DoubledGaugeField &Uds,const GaugeField &Umu) - // void multLink(SiteHalfSpinor &phi,const SiteDoubledGaugeField &U,const SiteHalfSpinor &chi,int mu,StencilEntry *SE,StencilImpl &St) - // void InsertForce4D(GaugeField &mat,const FermionField &Btilde,const FermionField &A,int mu) - // void InsertForce5D(GaugeField &mat,const FermionField &Btilde,const FermionField &A,int mu) - // - // - // To acquire the typedefs from "Base" (either a base class or template param) use: - // - // INHERIT_GIMPL_TYPES(Base) - // INHERIT_FIMPL_TYPES(Base) - // INHERIT_IMPL_TYPES(Base) - // - // The Fermion operators will do the following: - // - // struct MyOpParams { - // RealD mass; - // }; - // - // - // template - // class MyOp : pubic { - // public: - // - // INHERIT_ALL_IMPL_TYPES(Impl); - // - // MyOp(MyOpParams Myparm, ImplParams &ImplParam) : Impl(ImplParam) - // { - // - // }; - // - // } - ////////////////////////////////////////////// - - - //////////////////////////////////////////////////////////////////////// - // Implementation dependent fermion types - //////////////////////////////////////////////////////////////////////// + ////////////////////////////////////////////// + // Template parameter class constructs to package + // externally control Fermion implementations + // in orthogonal directions + // + // Ultimately need Impl to always define types where XXX is opaque + // + // typedef typename XXX Simd; + // typedef typename XXX GaugeLinkField; + // typedef typename XXX GaugeField; + // typedef typename XXX GaugeActField; + // typedef typename XXX FermionField; + // typedef typename XXX DoubledGaugeField; + // typedef typename XXX SiteSpinor; + // typedef typename XXX SiteHalfSpinor; + // typedef typename XXX Compressor; + // + // and Methods: + // void ImportGauge(GridBase *GaugeGrid,DoubledGaugeField &Uds,const GaugeField &Umu) + // void DoubleStore(GridBase *GaugeGrid,DoubledGaugeField &Uds,const GaugeField &Umu) + // void multLink(SiteHalfSpinor &phi,const SiteDoubledGaugeField &U,const SiteHalfSpinor &chi,int mu,StencilEntry *SE,StencilImpl &St) + // void InsertForce4D(GaugeField &mat,const FermionField &Btilde,const FermionField &A,int mu) + // void InsertForce5D(GaugeField &mat,const FermionField &Btilde,const FermionField &A,int mu) + // + // + // To acquire the typedefs from "Base" (either a base class or template param) use: + // + // INHERIT_GIMPL_TYPES(Base) + // INHERIT_FIMPL_TYPES(Base) + // INHERIT_IMPL_TYPES(Base) + // + // The Fermion operators will do the following: + // + // struct MyOpParams { + // RealD mass; + // }; + // + // + // template + // class MyOp : public { + // public: + // + // INHERIT_ALL_IMPL_TYPES(Impl); + // + // MyOp(MyOpParams Myparm, ImplParams &ImplParam) : Impl(ImplParam) + // { + // + // }; + // + // } + ////////////////////////////////////////////// + + //////////////////////////////////////////////////////////////////////// + // Implementation dependent fermion types + //////////////////////////////////////////////////////////////////////// + #define INHERIT_FIMPL_TYPES(Impl)\ - typedef typename Impl::FermionField FermionField; \ - typedef typename Impl::DoubledGaugeField DoubledGaugeField; \ - typedef typename Impl::SiteSpinor SiteSpinor; \ - typedef typename Impl::SiteHalfSpinor SiteHalfSpinor; \ - typedef typename Impl::Compressor Compressor; \ - typedef typename Impl::StencilImpl StencilImpl; \ - typedef typename Impl::ImplParams ImplParams; - + typedef typename Impl::FermionField FermionField; \ + typedef typename Impl::DoubledGaugeField DoubledGaugeField; \ + typedef typename Impl::SiteSpinor SiteSpinor; \ + typedef typename Impl::SiteHalfSpinor SiteHalfSpinor; \ + typedef typename Impl::Compressor Compressor; \ + typedef typename Impl::StencilImpl StencilImpl; \ + typedef typename Impl::ImplParams ImplParams; \ + typedef typename Impl::Coeff_t Coeff_t; + #define INHERIT_IMPL_TYPES(Base) \ - INHERIT_GIMPL_TYPES(Base)\ - INHERIT_FIMPL_TYPES(Base) + INHERIT_GIMPL_TYPES(Base) \ + INHERIT_FIMPL_TYPES(Base) + + ///////////////////////////////////////////////////////////////////////////// + // Single flavour four spinors with colour index + ///////////////////////////////////////////////////////////////////////////// + template + class WilsonImpl : public PeriodicGaugeImpl > { - /////// - // Single flavour four spinors with colour index - /////// - template - class WilsonImpl : public PeriodicGaugeImpl< GaugeImplTypes< S,Nrepresentation> > { public: - typedef PeriodicGaugeImpl< GaugeImplTypes< S,Nrepresentation> > Gimpl; - - INHERIT_GIMPL_TYPES(Gimpl); - - template using iImplSpinor = iScalar, Ns> >; - template using iImplHalfSpinor = iScalar, Nhs> >; - template using iImplDoubledGaugeField = iVector >, Nds >; - - typedef iImplSpinor SiteSpinor; - typedef iImplHalfSpinor SiteHalfSpinor; - typedef iImplDoubledGaugeField SiteDoubledGaugeField; - - typedef Lattice FermionField; - typedef Lattice DoubledGaugeField; - - typedef WilsonCompressor Compressor; - typedef WilsonImplParams ImplParams; - typedef WilsonStencil StencilImpl; - - ImplParams Params; - - WilsonImpl(const ImplParams &p= ImplParams()) : Params(p) {}; - - bool overlapCommsCompute(void) { return Params.overlapCommsCompute; }; - - inline void multLink(SiteHalfSpinor &phi,const SiteDoubledGaugeField &U,const SiteHalfSpinor &chi,int mu,StencilEntry *SE,StencilImpl &St){ - mult(&phi(),&U(mu),&chi()); - } - - template - inline void loadLinkElement(Simd & reg,ref &memory){ - reg = memory; - } - inline void DoubleStore(GridBase *GaugeGrid,DoubledGaugeField &Uds,const GaugeField &Umu) - { - conformable(Uds._grid,GaugeGrid); - conformable(Umu._grid,GaugeGrid); - GaugeLinkField U(GaugeGrid); - for(int mu=0;mu(Umu,mu); - PokeIndex(Uds,U,mu); - U = adj(Cshift(U,mu,-1)); - PokeIndex(Uds,U,mu+4); - } - } + static const int Dimension = Representation::Dimension; + typedef PeriodicGaugeImpl > Gimpl; - inline void InsertForce4D(GaugeField &mat, FermionField &Btilde, FermionField &A,int mu){ - GaugeLinkField link(mat._grid); - link = TraceIndex(outerProduct(Btilde,A)); - PokeIndex(mat,link,mu); - } + //Necessary? + constexpr bool is_fundamental() const{return Dimension == Nc ? 1 : 0;} + + const bool LsVectorised=false; + typedef _Coeff_t Coeff_t; - inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField Ã,int mu){ - - int Ls=Btilde._grid->_fdimensions[0]; - - GaugeLinkField tmp(mat._grid); - tmp = zero; -PARALLEL_FOR_LOOP - for(int sss=0;sssoSites();sss++){ - int sU=sss; - for(int s=0;s(outerProduct(Btilde[sF],Atilde[sF])); // ordering here - } - } - PokeIndex(mat,tmp,mu); - - } - - }; - - - - /////// - // Single flavour four spinors with colour index, 5d redblack - /////// - template - class DomainWallRedBlack5dImpl : public PeriodicGaugeImpl< GaugeImplTypes< S,Nrepresentation> > { - public: - - typedef PeriodicGaugeImpl< GaugeImplTypes< S,Nrepresentation> > Gimpl; - - INHERIT_GIMPL_TYPES(Gimpl); + INHERIT_GIMPL_TYPES(Gimpl); - template using iImplSpinor = iScalar, Ns> >; - template using iImplHalfSpinor = iScalar, Nhs> >; - template using iImplDoubledGaugeField = iVector >, Nds >; - template using iImplGaugeField = iVector >, Nd >; - template using iImplGaugeLink = iScalar > >; + template using iImplSpinor = iScalar, Ns> >; + template using iImplHalfSpinor = iScalar, Nhs> >; + template using iImplDoubledGaugeField = iVector >, Nds>; - typedef iImplSpinor SiteSpinor; - typedef iImplHalfSpinor SiteHalfSpinor; - typedef Lattice FermionField; - - // Make the doubled gauge field a *scalar* - typedef iImplDoubledGaugeField SiteDoubledGaugeField; // This is a scalar - typedef iImplGaugeField SiteScalarGaugeField; // scalar - typedef iImplGaugeLink SiteScalarGaugeLink; // scalar - - typedef Lattice DoubledGaugeField; - - typedef WilsonCompressor Compressor; - typedef WilsonImplParams ImplParams; - typedef WilsonStencil StencilImpl; - - ImplParams Params; - - DomainWallRedBlack5dImpl(const ImplParams &p= ImplParams()) : Params(p) {}; - - bool overlapCommsCompute(void) { return false; }; + typedef iImplSpinor SiteSpinor; + typedef iImplHalfSpinor SiteHalfSpinor; + typedef iImplDoubledGaugeField SiteDoubledGaugeField; - template - inline void loadLinkElement(Simd & reg,ref &memory){ - vsplat(reg,memory); + typedef Lattice FermionField; + typedef Lattice DoubledGaugeField; + + typedef WilsonCompressor Compressor; + typedef WilsonImplParams ImplParams; + typedef WilsonStencil StencilImpl; + + ImplParams Params; + + WilsonImpl(const ImplParams &p = ImplParams()) : Params(p){}; + + bool overlapCommsCompute(void) { return Params.overlapCommsCompute; }; + + inline void multLink(SiteHalfSpinor &phi, + const SiteDoubledGaugeField &U, + const SiteHalfSpinor &chi, + int mu, + StencilEntry *SE, + StencilImpl &St) { + mult(&phi(), &U(mu), &chi()); + } + + template + inline void loadLinkElement(Simd ®, ref &memory) { + reg = memory; + } + + inline void DoubleStore(GridBase *GaugeGrid, + DoubledGaugeField &Uds, + const GaugeField &Umu) { + conformable(Uds._grid, GaugeGrid); + conformable(Umu._grid, GaugeGrid); + GaugeLinkField U(GaugeGrid); + for (int mu = 0; mu < Nd; mu++) { + U = PeekIndex(Umu, mu); + PokeIndex(Uds, U, mu); + U = adj(Cshift(U, mu, -1)); + PokeIndex(Uds, U, mu + 4); } - inline void multLink(SiteHalfSpinor &phi,const SiteDoubledGaugeField &U,const SiteHalfSpinor &chi,int mu,StencilEntry *SE,StencilImpl &St) - { - SiteGaugeLink UU; - for(int i=0;i(outerProduct(Btilde,A)); + PokeIndex(mat,link,mu); + } + + inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField Ã,int mu){ + + int Ls=Btilde._grid->_fdimensions[0]; + GaugeLinkField tmp(mat._grid); + tmp = zero; + + PARALLEL_FOR_LOOP + for(int sss=0;sssoSites();sss++){ + int sU=sss; + for(int s=0;s(outerProduct(Btilde[sF],Atilde[sF])); // ordering here } - mult(&phi(),&UU(),&chi()); } + PokeIndex(mat,tmp,mu); + + } + }; - inline void DoubleStore(GridBase *GaugeGrid,DoubledGaugeField &Uds,const GaugeField &Umu) - { - SiteScalarGaugeField ScalarUmu; - SiteDoubledGaugeField ScalarUds; + //////////////////////////////////////////////////////////////////////////////////// + // Single flavour four spinors with colour index, 5d redblack + //////////////////////////////////////////////////////////////////////////////////// - GaugeLinkField U (Umu._grid); - GaugeField Uadj(Umu._grid); - for(int mu=0;mu(Umu,mu); - U = adj(Cshift(U,mu,-1)); - PokeIndex(Uadj,U,mu); - } - - for(int lidx=0;lidxlSites();lidx++){ - std::vector lcoor; - GaugeGrid->LocalIndexToLocalCoor(lidx,lcoor); - - peekLocalSite(ScalarUmu,Umu,lcoor); - for(int mu=0;mu<4;mu++) ScalarUds(mu) = ScalarUmu(mu); - - peekLocalSite(ScalarUmu,Uadj,lcoor); - for(int mu=0;mu<4;mu++) ScalarUds(mu+4) = ScalarUmu(mu); - - pokeLocalSite(ScalarUds,Uds,lcoor); - } +template +class DomainWallVec5dImpl : public PeriodicGaugeImpl< GaugeImplTypes< S,Nrepresentation> > { + public: + + static const int Dimension = Nrepresentation; + const bool LsVectorised=true; + typedef _Coeff_t Coeff_t; + typedef PeriodicGaugeImpl > Gimpl; + + INHERIT_GIMPL_TYPES(Gimpl); + + template using iImplSpinor = iScalar, Ns> >; + template using iImplHalfSpinor = iScalar, Nhs> >; + template using iImplDoubledGaugeField = iVector >, Nds>; + template using iImplGaugeField = iVector >, Nd>; + template using iImplGaugeLink = iScalar > >; + + typedef iImplSpinor SiteSpinor; + typedef iImplHalfSpinor SiteHalfSpinor; + typedef Lattice FermionField; + + // Make the doubled gauge field a *scalar* + typedef iImplDoubledGaugeField SiteDoubledGaugeField; // This is a scalar + typedef iImplGaugeField SiteScalarGaugeField; // scalar + typedef iImplGaugeLink SiteScalarGaugeLink; // scalar + + typedef Lattice DoubledGaugeField; + + typedef WilsonCompressor Compressor; + typedef WilsonImplParams ImplParams; + typedef WilsonStencil StencilImpl; + + ImplParams Params; + + DomainWallVec5dImpl(const ImplParams &p = ImplParams()) : Params(p){}; + + bool overlapCommsCompute(void) { return false; }; + + template + inline void loadLinkElement(Simd ®, ref &memory) { + vsplat(reg, memory); + } + inline void multLink(SiteHalfSpinor &phi, const SiteDoubledGaugeField &U, + const SiteHalfSpinor &chi, int mu, StencilEntry *SE, + StencilImpl &St) { + SiteGaugeLink UU; + for (int i = 0; i < Nrepresentation; i++) { + for (int j = 0; j < Nrepresentation; j++) { + vsplat(UU()()(i, j), U(mu)()(i, j)); } - - inline void InsertForce4D(GaugeField &mat, FermionField &Btilde, FermionField &A,int mu){ + } + mult(&phi(), &UU(), &chi()); + } + + inline void DoubleStore(GridBase *GaugeGrid, DoubledGaugeField &Uds,const GaugeField &Umu) + { + SiteScalarGaugeField ScalarUmu; + SiteDoubledGaugeField ScalarUds; + + GaugeLinkField U(Umu._grid); + GaugeField Uadj(Umu._grid); + for (int mu = 0; mu < Nd; mu++) { + U = PeekIndex(Umu, mu); + U = adj(Cshift(U, mu, -1)); + PokeIndex(Uadj, U, mu); + } + + for (int lidx = 0; lidx < GaugeGrid->lSites(); lidx++) { + std::vector lcoor; + GaugeGrid->LocalIndexToLocalCoor(lidx, lcoor); + + peekLocalSite(ScalarUmu, Umu, lcoor); + for (int mu = 0; mu < 4; mu++) ScalarUds(mu) = ScalarUmu(mu); + + peekLocalSite(ScalarUmu, Uadj, lcoor); + for (int mu = 0; mu < 4; mu++) ScalarUds(mu + 4) = ScalarUmu(mu); + + pokeLocalSite(ScalarUds, Uds, lcoor); + } + } + + inline void InsertForce4D(GaugeField &mat, FermionField &Btilde,FermionField &A, int mu) + { + assert(0); + } + + inline void InsertForce5D(GaugeField &mat, FermionField &Btilde,FermionField Ã, int mu) + { assert(0); - } - - inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField Ã,int mu){ - assert(0); - } - - }; - - + } +}; + //////////////////////////////////////////////////////////////////////////////////////// // Flavour doubled spinors; is Gparity the only? what about C*? //////////////////////////////////////////////////////////////////////////////////////// - - template - class GparityWilsonImpl : public ConjugateGaugeImpl< GaugeImplTypes >{ - public: - - typedef ConjugateGaugeImpl< GaugeImplTypes > Gimpl; - - INHERIT_GIMPL_TYPES(Gimpl); - - template using iImplSpinor = iVector, Ns>, Ngp >; - template using iImplHalfSpinor = iVector, Nhs>, Ngp >; - template using iImplDoubledGaugeField = iVector >, Nds >, Ngp >; - typedef iImplSpinor SiteSpinor; - typedef iImplHalfSpinor SiteHalfSpinor; - typedef iImplDoubledGaugeField SiteDoubledGaugeField; +template +class GparityWilsonImpl : public ConjugateGaugeImpl > { + public: - typedef Lattice FermionField; - typedef Lattice DoubledGaugeField; + static const int Dimension = Nrepresentation; - typedef WilsonCompressor Compressor; - typedef WilsonStencil StencilImpl; + const bool LsVectorised=false; - typedef GparityWilsonImplParams ImplParams; - - ImplParams Params; - - GparityWilsonImpl(const ImplParams &p= ImplParams()) : Params(p) {}; + typedef _Coeff_t Coeff_t; + typedef ConjugateGaugeImpl< GaugeImplTypes > Gimpl; + + INHERIT_GIMPL_TYPES(Gimpl); - bool overlapCommsCompute(void) { return Params.overlapCommsCompute; }; - - // provide the multiply by link that is differentiated between Gparity (with flavour index) and non-Gparity - inline void multLink(SiteHalfSpinor &phi,const SiteDoubledGaugeField &U,const SiteHalfSpinor &chi,int mu,StencilEntry *SE,StencilImpl &St){ - - typedef SiteHalfSpinor vobj; - typedef typename SiteHalfSpinor::scalar_object sobj; - - vobj vtmp; - sobj stmp; - - GridBase *grid = St._grid; + template using iImplSpinor = iVector, Ns>, Ngp>; + template using iImplHalfSpinor = iVector, Nhs>, Ngp>; + template using iImplDoubledGaugeField = iVector >, Nds>, Ngp>; - const int Nsimd = grid->Nsimd(); - - int direction = St._directions[mu]; - int distance = St._distances[mu]; - int ptype = St._permute_type[mu]; - int sl = St._grid->_simd_layout[direction]; - - // Fixme X.Y.Z.T hardcode in stencil - int mmu = mu % Nd; - - // assert our assumptions - assert((distance==1)||(distance==-1)); // nearest neighbour stencil hard code - assert((sl==1)||(sl==2)); - - std::vector icoor; + typedef iImplSpinor SiteSpinor; + typedef iImplHalfSpinor SiteHalfSpinor; + typedef iImplDoubledGaugeField SiteDoubledGaugeField; + + typedef Lattice FermionField; + typedef Lattice DoubledGaugeField; + + typedef WilsonCompressor Compressor; + typedef WilsonStencil StencilImpl; + + typedef GparityWilsonImplParams ImplParams; - if ( SE->_around_the_world && Params.twists[mmu] ) { + ImplParams Params; - if ( sl == 2 ) { + GparityWilsonImpl(const ImplParams &p = ImplParams()) : Params(p){}; - std::vector vals(Nsimd); + bool overlapCommsCompute(void) { return Params.overlapCommsCompute; }; - extract(chi,vals); - for(int s=0;siCoorFromIindex(icoor,s); + typedef SiteHalfSpinor vobj; + typedef typename SiteHalfSpinor::scalar_object sobj; + + vobj vtmp; + sobj stmp; + + GridBase *grid = St._grid; + + const int Nsimd = grid->Nsimd(); + + int direction = St._directions[mu]; + int distance = St._distances[mu]; + int ptype = St._permute_type[mu]; + int sl = St._grid->_simd_layout[direction]; + + // Fixme X.Y.Z.T hardcode in stencil + int mmu = mu % Nd; + + // assert our assumptions + assert((distance == 1) || (distance == -1)); // nearest neighbour stencil hard code + assert((sl == 1) || (sl == 2)); + + std::vector icoor; + + if ( SE->_around_the_world && Params.twists[mmu] ) { + + if ( sl == 2 ) { + + std::vector vals(Nsimd); + + extract(chi,vals); + for(int s=0;siCoorFromIindex(icoor,s); - assert((icoor[direction]==0)||(icoor[direction]==1)); + assert((icoor[direction]==0)||(icoor[direction]==1)); - int permute_lane; - if ( distance == 1) { - permute_lane = icoor[direction]?1:0; - } else { - permute_lane = icoor[direction]?0:1; + int permute_lane; + if ( distance == 1) { + permute_lane = icoor[direction]?1:0; + } else { + permute_lane = icoor[direction]?0:1; + } + + if ( permute_lane ) { + stmp(0) = vals[s](1); + stmp(1) = vals[s](0); + vals[s] = stmp; } - - if ( permute_lane ) { - stmp(0) = vals[s](1); - stmp(1) = vals[s](0); - vals[s] = stmp; - } - } - merge(vtmp,vals); + } + merge(vtmp,vals); + + } else { + vtmp(0) = chi(1); + vtmp(1) = chi(0); + } + mult(&phi(0),&U(0)(mu),&vtmp(0)); + mult(&phi(1),&U(1)(mu),&vtmp(1)); + + } else { + mult(&phi(0),&U(0)(mu),&chi(0)); + mult(&phi(1),&U(1)(mu),&chi(1)); + } + + } - } else { - vtmp(0) = chi(1); - vtmp(1) = chi(0); - } - mult(&phi(0),&U(0)(mu),&vtmp(0)); - mult(&phi(1),&U(1)(mu),&vtmp(1)); + inline void DoubleStore(GridBase *GaugeGrid,DoubledGaugeField &Uds,const GaugeField &Umu) + { + conformable(Uds._grid,GaugeGrid); + conformable(Umu._grid,GaugeGrid); + + GaugeLinkField Utmp (GaugeGrid); + GaugeLinkField U (GaugeGrid); + GaugeLinkField Uconj(GaugeGrid); + + Lattice > coor(GaugeGrid); + + for(int mu=0;mu > coor(GaugeGrid); - - - for(int mu=0;mu(Umu,mu); - Uconj = conjugate(U); - - // This phase could come from a simple bc 1,1,-1,1 .. - int neglink = GaugeGrid->GlobalDimensions()[mu]-1; - if ( Params.twists[mu] ) { - Uconj = where(coor==neglink,-Uconj,Uconj); - } - + U = PeekIndex(Umu,mu); + Uconj = conjugate(U); + + // This phase could come from a simple bc 1,1,-1,1 .. + int neglink = GaugeGrid->GlobalDimensions()[mu]-1; + if ( Params.twists[mu] ) { + Uconj = where(coor==neglink,-Uconj,Uconj); + } PARALLEL_FOR_LOOP - for(auto ss=U.begin();ss(outerProduct(Btilde, A)); +PARALLEL_FOR_LOOP + for (auto ss = tmp.begin(); ss < tmp.end(); ss++) { + link[ss]() = tmp[ss](0, 0) - conjugate(tmp[ss](1, 1)); + } + PokeIndex(mat, link, mu); + return; + } + + inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField Ã, int mu) { + + int Ls = Btilde._grid->_fdimensions[0]; - // DhopDir provides U or Uconj depending on coor/flavour. - GaugeLinkField link(mat._grid); - // use lorentz for flavour as hack. - auto tmp = TraceIndex(outerProduct(Btilde,A)); + GaugeLinkField tmp(mat._grid); + tmp = zero; PARALLEL_FOR_LOOP - for(auto ss=tmp.begin();ss(mat,link,mu); - return; - } - inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField Ã,int mu){ + for (int ss = 0; ss < tmp._grid->oSites(); ss++) { + for (int s = 0; s < Ls; s++) { + int sF = s + Ls * ss; + auto ttmp = traceIndex(outerProduct(Btilde[sF], Atilde[sF])); + tmp[ss]() = tmp[ss]() + ttmp(0, 0) + conjugate(ttmp(1, 1)); + } + } + PokeIndex(mat, tmp, mu); + return; + } - int Ls=Btilde._grid->_fdimensions[0]; +}; - GaugeLinkField tmp(mat._grid); - tmp = zero; -PARALLEL_FOR_LOOP - for(int ss=0;ssoSites();ss++){ - for(int s=0;s(outerProduct(Btilde[sF],Atilde[sF])); - tmp[ss]() = tmp[ss]()+ ttmp(0,0) + conjugate(ttmp(1,1)); - } - } - PokeIndex(mat,tmp,mu); - return; - } - }; + typedef WilsonImpl WilsonImplR; // Real.. whichever prec + typedef WilsonImpl WilsonImplF; // Float + typedef WilsonImpl WilsonImplD; // Double - typedef WilsonImpl WilsonImplR; // Real.. whichever prec - typedef WilsonImpl WilsonImplF; // Float - typedef WilsonImpl WilsonImplD; // Double + typedef WilsonImpl ZWilsonImplR; // Real.. whichever prec + typedef WilsonImpl ZWilsonImplF; // Float + typedef WilsonImpl ZWilsonImplD; // Double + + typedef WilsonImpl WilsonAdjImplR; // Real.. whichever prec + typedef WilsonImpl WilsonAdjImplF; // Float + typedef WilsonImpl WilsonAdjImplD; // Double + + typedef WilsonImpl WilsonTwoIndexSymmetricImplR; // Real.. whichever prec + typedef WilsonImpl WilsonTwoIndexSymmetricImplF; // Float + typedef WilsonImpl WilsonTwoIndexSymmetricImplD; // Double + + typedef DomainWallVec5dImpl DomainWallVec5dImplR; // Real.. whichever prec + typedef DomainWallVec5dImpl DomainWallVec5dImplF; // Float + typedef DomainWallVec5dImpl DomainWallVec5dImplD; // Double + + typedef DomainWallVec5dImpl ZDomainWallVec5dImplR; // Real.. whichever prec + typedef DomainWallVec5dImpl ZDomainWallVec5dImplF; // Float + typedef DomainWallVec5dImpl ZDomainWallVec5dImplD; // Double + + typedef GparityWilsonImpl GparityWilsonImplR; // Real.. whichever prec + typedef GparityWilsonImpl GparityWilsonImplF; // Float + typedef GparityWilsonImpl GparityWilsonImplD; // Double - typedef DomainWallRedBlack5dImpl DomainWallRedBlack5dImplR; // Real.. whichever prec - typedef DomainWallRedBlack5dImpl DomainWallRedBlack5dImplF; // Float - typedef DomainWallRedBlack5dImpl DomainWallRedBlack5dImplD; // Double +}} - typedef GparityWilsonImpl GparityWilsonImplR; // Real.. whichever prec - typedef GparityWilsonImpl GparityWilsonImplF; // Float - typedef GparityWilsonImpl GparityWilsonImplD; // Double - - } -} #endif diff --git a/lib/qcd/action/fermion/MobiusFermion.h b/lib/qcd/action/fermion/MobiusFermion.h index f08b5a74..ade9ca4d 100644 --- a/lib/qcd/action/fermion/MobiusFermion.h +++ b/lib/qcd/action/fermion/MobiusFermion.h @@ -29,7 +29,7 @@ Author: Peter Boyle #ifndef GRID_QCD_MOBIUS_FERMION_H #define GRID_QCD_MOBIUS_FERMION_H -#include +#include namespace Grid { diff --git a/lib/qcd/action/fermion/MobiusZolotarevFermion.h b/lib/qcd/action/fermion/MobiusZolotarevFermion.h index cdf575f8..609d5cea 100644 --- a/lib/qcd/action/fermion/MobiusZolotarevFermion.h +++ b/lib/qcd/action/fermion/MobiusZolotarevFermion.h @@ -29,7 +29,7 @@ Author: Peter Boyle #ifndef GRID_QCD_MOBIUS_ZOLOTAREV_FERMION_H #define GRID_QCD_MOBIUS_ZOLOTAREV_FERMION_H -#include +#include namespace Grid { diff --git a/lib/qcd/action/fermion/OverlapWilsonCayleyTanhFermion.h b/lib/qcd/action/fermion/OverlapWilsonCayleyTanhFermion.h index 8826df64..9cab0e22 100644 --- a/lib/qcd/action/fermion/OverlapWilsonCayleyTanhFermion.h +++ b/lib/qcd/action/fermion/OverlapWilsonCayleyTanhFermion.h @@ -29,7 +29,7 @@ Author: Peter Boyle #ifndef OVERLAP_WILSON_CAYLEY_TANH_FERMION_H #define OVERLAP_WILSON_CAYLEY_TANH_FERMION_H -#include +#include namespace Grid { @@ -42,7 +42,11 @@ namespace Grid { INHERIT_IMPL_TYPES(Impl); public: - // Constructors + void MomentumSpacePropagator(FermionField &out,const FermionField &in,RealD _m) { + this->MomentumSpacePropagatorHw(out,in,_m); + }; + + // Constructors OverlapWilsonCayleyTanhFermion(GaugeField &_Umu, GridCartesian &FiveDimGrid, GridRedBlackCartesian &FiveDimRedBlackGrid, diff --git a/lib/qcd/action/fermion/OverlapWilsonCayleyZolotarevFermion.h b/lib/qcd/action/fermion/OverlapWilsonCayleyZolotarevFermion.h index 08508e8f..048244cc 100644 --- a/lib/qcd/action/fermion/OverlapWilsonCayleyZolotarevFermion.h +++ b/lib/qcd/action/fermion/OverlapWilsonCayleyZolotarevFermion.h @@ -29,7 +29,7 @@ Author: Peter Boyle #ifndef OVERLAP_WILSON_CAYLEY_ZOLOTAREV_FERMION_H #define OVERLAP_WILSON_CAYLEY_ZOLOTAREV_FERMION_H -#include +#include namespace Grid { diff --git a/lib/qcd/action/fermion/OverlapWilsonContfracTanhFermion.h b/lib/qcd/action/fermion/OverlapWilsonContfracTanhFermion.h index a17cbe7a..bbac735a 100644 --- a/lib/qcd/action/fermion/OverlapWilsonContfracTanhFermion.h +++ b/lib/qcd/action/fermion/OverlapWilsonContfracTanhFermion.h @@ -29,7 +29,7 @@ Author: Peter Boyle #ifndef OVERLAP_WILSON_CONTFRAC_TANH_FERMION_H #define OVERLAP_WILSON_CONTFRAC_TANH_FERMION_H -#include +#include namespace Grid { diff --git a/lib/qcd/action/fermion/OverlapWilsonContfracZolotarevFermion.h b/lib/qcd/action/fermion/OverlapWilsonContfracZolotarevFermion.h index 7102adb0..9da30f65 100644 --- a/lib/qcd/action/fermion/OverlapWilsonContfracZolotarevFermion.h +++ b/lib/qcd/action/fermion/OverlapWilsonContfracZolotarevFermion.h @@ -29,7 +29,7 @@ Author: Peter Boyle #ifndef OVERLAP_WILSON_CONTFRAC_ZOLOTAREV_FERMION_H #define OVERLAP_WILSON_CONTFRAC_ZOLOTAREV_FERMION_H -#include +#include namespace Grid { diff --git a/lib/qcd/action/fermion/OverlapWilsonPartialFractionTanhFermion.h b/lib/qcd/action/fermion/OverlapWilsonPartialFractionTanhFermion.h index b63676c5..3b867174 100644 --- a/lib/qcd/action/fermion/OverlapWilsonPartialFractionTanhFermion.h +++ b/lib/qcd/action/fermion/OverlapWilsonPartialFractionTanhFermion.h @@ -29,7 +29,7 @@ Author: Peter Boyle #ifndef OVERLAP_WILSON_PARTFRAC_TANH_FERMION_H #define OVERLAP_WILSON_PARTFRAC_TANH_FERMION_H -#include +#include namespace Grid { diff --git a/lib/qcd/action/fermion/OverlapWilsonPartialFractionZolotarevFermion.h b/lib/qcd/action/fermion/OverlapWilsonPartialFractionZolotarevFermion.h index 341c193f..e1d0763b 100644 --- a/lib/qcd/action/fermion/OverlapWilsonPartialFractionZolotarevFermion.h +++ b/lib/qcd/action/fermion/OverlapWilsonPartialFractionZolotarevFermion.h @@ -29,7 +29,7 @@ Author: Peter Boyle #ifndef OVERLAP_WILSON_PARTFRAC_ZOLOTAREV_FERMION_H #define OVERLAP_WILSON_PARTFRAC_ZOLOTAREV_FERMION_H -#include +#include namespace Grid { diff --git a/lib/qcd/action/fermion/ScaledShamirFermion.h b/lib/qcd/action/fermion/ScaledShamirFermion.h index c0deef3e..f850ee4d 100644 --- a/lib/qcd/action/fermion/ScaledShamirFermion.h +++ b/lib/qcd/action/fermion/ScaledShamirFermion.h @@ -29,7 +29,7 @@ Author: Peter Boyle #ifndef GRID_QCD_SCALED_SHAMIR_FERMION_H #define GRID_QCD_SCALED_SHAMIR_FERMION_H -#include +#include namespace Grid { diff --git a/lib/qcd/action/fermion/ShamirZolotarevFermion.h b/lib/qcd/action/fermion/ShamirZolotarevFermion.h index f15d83a8..732afa0a 100644 --- a/lib/qcd/action/fermion/ShamirZolotarevFermion.h +++ b/lib/qcd/action/fermion/ShamirZolotarevFermion.h @@ -29,7 +29,7 @@ Author: Peter Boyle #ifndef GRID_QCD_SHAMIR_ZOLOTAREV_FERMION_H #define GRID_QCD_SHAMIR_ZOLOTAREV_FERMION_H -#include +#include namespace Grid { diff --git a/lib/qcd/action/fermion/WilsonFermion.cc b/lib/qcd/action/fermion/WilsonFermion.cc index 59632409..99baa8a0 100644 --- a/lib/qcd/action/fermion/WilsonFermion.cc +++ b/lib/qcd/action/fermion/WilsonFermion.cc @@ -1,130 +1,129 @@ - /************************************************************************************* +/************************************************************************************* - Grid physics library, www.github.com/paboyle/Grid +Grid physics library, www.github.com/paboyle/Grid - Source file: ./lib/qcd/action/fermion/WilsonFermion.cc +Source file: ./lib/qcd/action/fermion/WilsonFermion.cc - Copyright (C) 2015 +Copyright (C) 2015 Author: Peter Boyle Author: Peter Boyle Author: Peter Boyle Author: paboyle - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ #include namespace Grid { namespace QCD { - const std::vector WilsonFermionStatic::directions ({0,1,2,3, 0, 1, 2, 3}); - const std::vector WilsonFermionStatic::displacements({1,1,1,1,-1,-1,-1,-1}); - int WilsonFermionStatic::HandOptDslash; +const std::vector WilsonFermionStatic::directions({0, 1, 2, 3, 0, 1, 2, + 3}); +const std::vector WilsonFermionStatic::displacements({1, 1, 1, 1, -1, -1, + -1, -1}); +int WilsonFermionStatic::HandOptDslash; - ///////////////////////////////// - // Constructor and gauge import - ///////////////////////////////// +///////////////////////////////// +// Constructor and gauge import +///////////////////////////////// - template - WilsonFermion::WilsonFermion(GaugeField &_Umu, - GridCartesian &Fgrid, - GridRedBlackCartesian &Hgrid, - RealD _mass,const ImplParams &p) : - Kernels(p), - _grid(&Fgrid), - _cbgrid(&Hgrid), - Stencil (&Fgrid,npoint,Even,directions,displacements), - StencilEven(&Hgrid,npoint,Even,directions,displacements), // source is Even - StencilOdd (&Hgrid,npoint,Odd ,directions,displacements), // source is Odd - mass(_mass), - Lebesgue(_grid), - LebesgueEvenOdd(_cbgrid), - Umu(&Fgrid), - UmuEven(&Hgrid), - UmuOdd (&Hgrid) - { - // Allocate the required comms buffer - ImportGauge(_Umu); +template +WilsonFermion::WilsonFermion(GaugeField &_Umu, GridCartesian &Fgrid, + GridRedBlackCartesian &Hgrid, RealD _mass, + const ImplParams &p) + : Kernels(p), + _grid(&Fgrid), + _cbgrid(&Hgrid), + Stencil(&Fgrid, npoint, Even, directions, displacements), + StencilEven(&Hgrid, npoint, Even, directions, + displacements), // source is Even + StencilOdd(&Hgrid, npoint, Odd, directions, + displacements), // source is Odd + mass(_mass), + Lebesgue(_grid), + LebesgueEvenOdd(_cbgrid), + Umu(&Fgrid), + UmuEven(&Hgrid), + UmuOdd(&Hgrid) { + // Allocate the required comms buffer + ImportGauge(_Umu); +} + +template +void WilsonFermion::ImportGauge(const GaugeField &_Umu) { + GaugeField HUmu(_Umu._grid); + HUmu = _Umu * (-0.5); + Impl::DoubleStore(GaugeGrid(), Umu, HUmu); + pickCheckerboard(Even, UmuEven, Umu); + pickCheckerboard(Odd, UmuOdd, Umu); +} + +///////////////////////////// +// Implement the interface +///////////////////////////// + +template +RealD WilsonFermion::M(const FermionField &in, FermionField &out) { + out.checkerboard = in.checkerboard; + Dhop(in, out, DaggerNo); + return axpy_norm(out, 4 + mass, in, out); +} + +template +RealD WilsonFermion::Mdag(const FermionField &in, FermionField &out) { + out.checkerboard = in.checkerboard; + Dhop(in, out, DaggerYes); + return axpy_norm(out, 4 + mass, in, out); +} + +template +void WilsonFermion::Meooe(const FermionField &in, FermionField &out) { + if (in.checkerboard == Odd) { + DhopEO(in, out, DaggerNo); + } else { + DhopOE(in, out, DaggerNo); } +} - template - void WilsonFermion::ImportGauge(const GaugeField &_Umu) - { - GaugeField HUmu(_Umu._grid); - HUmu = _Umu*(-0.5); - Impl::DoubleStore(GaugeGrid(),Umu,HUmu); - pickCheckerboard(Even,UmuEven,Umu); - pickCheckerboard(Odd ,UmuOdd,Umu); +template +void WilsonFermion::MeooeDag(const FermionField &in, FermionField &out) { + if (in.checkerboard == Odd) { + DhopEO(in, out, DaggerYes); + } else { + DhopOE(in, out, DaggerYes); } +} - ///////////////////////////// - // Implement the interface - ///////////////////////////// - - template - RealD WilsonFermion::M(const FermionField &in, FermionField &out) - { - out.checkerboard=in.checkerboard; - Dhop(in,out,DaggerNo); - return axpy_norm(out,4+mass,in,out); - } - - template - RealD WilsonFermion::Mdag(const FermionField &in, FermionField &out) - { - out.checkerboard=in.checkerboard; - Dhop(in,out,DaggerYes); - return axpy_norm(out,4+mass,in,out); - } - - template - void WilsonFermion::Meooe(const FermionField &in, FermionField &out) - { - if ( in.checkerboard == Odd ) { - DhopEO(in,out,DaggerNo); - } else { - DhopOE(in,out,DaggerNo); - } - } - template - void WilsonFermion::MeooeDag(const FermionField &in, FermionField &out) - { - if ( in.checkerboard == Odd ) { - DhopEO(in,out,DaggerYes); - } else { - DhopOE(in,out,DaggerYes); - } - } - - template + template void WilsonFermion::Mooee(const FermionField &in, FermionField &out) { out.checkerboard = in.checkerboard; - typename FermionField::scalar_type scal(4.0+mass); - out = scal*in; + typename FermionField::scalar_type scal(4.0 + mass); + out = scal * in; } - - template + + template void WilsonFermion::MooeeDag(const FermionField &in, FermionField &out) { out.checkerboard = in.checkerboard; - Mooee(in,out); + Mooee(in, out); } - + template void WilsonFermion::MooeeInv(const FermionField &in, FermionField &out) { out.checkerboard = in.checkerboard; @@ -136,184 +135,237 @@ namespace QCD { out.checkerboard = in.checkerboard; MooeeInv(in,out); } - - /////////////////////////////////// - // Internal - /////////////////////////////////// template - void WilsonFermion::DerivInternal(StencilImpl & st, - DoubledGaugeField & U, - GaugeField &mat, - const FermionField &A, - const FermionField &B,int dag) { - - assert((dag==DaggerNo) ||(dag==DaggerYes)); - - Compressor compressor(dag); - - FermionField Btilde(B._grid); - FermionField Atilde(B._grid); - Atilde = A; + void WilsonFermion::MomentumSpacePropagator(FermionField &out, const FermionField &in,RealD _m) { - st.HaloExchange(B,compressor); - - for(int mu=0;mu(1-g) if dag - //////////////////////////////////////////////////////////////////////// - int gamma = mu; - if ( !dag ) gamma+= Nd; - - //////////////////////// - // Call the single hop - //////////////////////// -PARALLEL_FOR_LOOP - for(int sss=0;sssoSites();sss++){ - Kernels::DiracOptDhopDir(st,U,st.comm_buf,sss,sss,B,Btilde,mu,gamma); - } - - ////////////////////////////////////////////////// - // spin trace outer product - ////////////////////////////////////////////////// - Impl::InsertForce4D(mat,Btilde,Atilde,mu); + // what type LatticeComplex + conformable(_grid,out._grid); + typedef typename FermionField::vector_type vector_type; + typedef typename FermionField::scalar_type ScalComplex; + + typedef Lattice > LatComplex; + + Gamma::GammaMatrix Gmu [] = { + Gamma::GammaX, + Gamma::GammaY, + Gamma::GammaZ, + Gamma::GammaT + }; + + std::vector latt_size = _grid->_fdimensions; + + FermionField num (_grid); num = zero; + LatComplex wilson(_grid); wilson= zero; + LatComplex one (_grid); one = ScalComplex(1.0,0.0); + + LatComplex denom(_grid); denom= zero; + LatComplex kmu(_grid); + ScalComplex ci(0.0,1.0); + // momphase = n * 2pi / L + for(int mu=0;mu - void WilsonFermion::DhopDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag) - { - conformable(U._grid,_grid); - conformable(U._grid,V._grid); - conformable(U._grid,mat._grid); - - mat.checkerboard = U.checkerboard; - - DerivInternal(Stencil,Umu,mat,U,V,dag); - } - - template - void WilsonFermion::DhopDerivOE(GaugeField &mat,const FermionField &U,const FermionField &V,int dag) - { - conformable(U._grid,_cbgrid); - conformable(U._grid,V._grid); - conformable(U._grid,mat._grid); - - assert(V.checkerboard==Even); - assert(U.checkerboard==Odd); - mat.checkerboard = Odd; - - DerivInternal(StencilEven,UmuOdd,mat,U,V,dag); - } - - template - void WilsonFermion::DhopDerivEO(GaugeField &mat,const FermionField &U,const FermionField &V,int dag) - { - conformable(U._grid,_cbgrid); - conformable(U._grid,V._grid); - conformable(U._grid,mat._grid); - - assert(V.checkerboard==Odd); - assert(U.checkerboard==Even); - mat.checkerboard = Even; - - DerivInternal(StencilOdd,UmuEven,mat,U,V,dag); - } - - template - void WilsonFermion::Dhop(const FermionField &in, FermionField &out,int dag) { - conformable(in._grid,_grid); // verifies full grid - conformable(in._grid,out._grid); - - out.checkerboard = in.checkerboard; - - DhopInternal(Stencil,Lebesgue,Umu,in,out,dag); - } - - template - void WilsonFermion::DhopOE(const FermionField &in, FermionField &out,int dag) { - conformable(in._grid,_cbgrid); // verifies half grid - conformable(in._grid,out._grid); // drops the cb check - - assert(in.checkerboard==Even); - out.checkerboard = Odd; - - DhopInternal(StencilEven,LebesgueEvenOdd,UmuOdd,in,out,dag); - } - - template - void WilsonFermion::DhopEO(const FermionField &in, FermionField &out,int dag) { - conformable(in._grid,_cbgrid); // verifies half grid - conformable(in._grid,out._grid); // drops the cb check - - assert(in.checkerboard==Odd); - out.checkerboard = Even; - - DhopInternal(StencilOdd,LebesgueEvenOdd,UmuEven,in,out,dag); - } - - template - void WilsonFermion::Mdir (const FermionField &in, FermionField &out,int dir,int disp) { - DhopDir(in,out,dir,disp); - } - + wilson = wilson + _m; // 2 sin^2 k/2 + m - template - void WilsonFermion::DhopDir(const FermionField &in, FermionField &out,int dir,int disp){ - - int skip = (disp==1) ? 0 : 1; - int dirdisp = dir+skip*4; - int gamma = dir+(1-skip)*4; - - DhopDirDisp(in,out,dirdisp,gamma,DaggerNo); - - }; - - template - void WilsonFermion::DhopDirDisp(const FermionField &in, FermionField &out,int dirdisp,int gamma,int dag) { - - Compressor compressor(dag); - - Stencil.HaloExchange(in,compressor); - -PARALLEL_FOR_LOOP - for(int sss=0;sssoSites();sss++){ - Kernels::DiracOptDhopDir(Stencil,Umu,Stencil.comm_buf,sss,sss,in,out,dirdisp,gamma); - } - - }; + num = num + wilson*in; // -i gmu sin k + 2 sin^2 k/2 + m - template - void WilsonFermion::DhopInternal(StencilImpl & st,LebesgueOrder& lo,DoubledGaugeField & U, - const FermionField &in, FermionField &out,int dag) - { - assert((dag==DaggerNo) ||(dag==DaggerYes)); + denom= denom+wilson*wilson; // sin^2 k + (2 sin^2 k/2 + m)^2 - Compressor compressor(dag); - st.HaloExchange(in,compressor); - - if ( dag == DaggerYes ) { -PARALLEL_FOR_LOOP - for(int sss=0;sssoSites();sss++){ - Kernels::DiracOptDhopSiteDag(st,lo,U,st.comm_buf,sss,sss,1,1,in,out); - } - } else { -PARALLEL_FOR_LOOP - for(int sss=0;sssoSites();sss++){ - Kernels::DiracOptDhopSite(st,lo,U,st.comm_buf,sss,sss,1,1,in,out); - } - } - }; + denom= one/denom; + out = num*denom; // [ -i gmu sin k + 2 sin^2 k/2 + m] / [ sin^2 k + (2 sin^2 k/2 + m)^2 ] + + } - FermOpTemplateInstantiate(WilsonFermion); - GparityFermOpTemplateInstantiate(WilsonFermion); +/////////////////////////////////// +// Internal +/////////////////////////////////// -}} +template +void WilsonFermion::DerivInternal(StencilImpl &st, DoubledGaugeField &U, + GaugeField &mat, const FermionField &A, + const FermionField &B, int dag) { + assert((dag == DaggerNo) || (dag == DaggerYes)); + Compressor compressor(dag); + FermionField Btilde(B._grid); + FermionField Atilde(B._grid); + Atilde = A; + st.HaloExchange(B, compressor); + + for (int mu = 0; mu < Nd; mu++) { + //////////////////////////////////////////////////////////////////////// + // Flip gamma (1+g)<->(1-g) if dag + //////////////////////////////////////////////////////////////////////// + int gamma = mu; + if (!dag) gamma += Nd; + + //////////////////////// + // Call the single hop + //////////////////////// + PARALLEL_FOR_LOOP + for (int sss = 0; sss < B._grid->oSites(); sss++) { + Kernels::DiracOptDhopDir(st, U, st.CommBuf(), sss, sss, B, Btilde, mu, + gamma); + } + + ////////////////////////////////////////////////// + // spin trace outer product + ////////////////////////////////////////////////// + Impl::InsertForce4D(mat, Btilde, Atilde, mu); + } +} + +template +void WilsonFermion::DhopDeriv(GaugeField &mat, const FermionField &U, + const FermionField &V, int dag) { + conformable(U._grid, _grid); + conformable(U._grid, V._grid); + conformable(U._grid, mat._grid); + + mat.checkerboard = U.checkerboard; + + DerivInternal(Stencil, Umu, mat, U, V, dag); +} + +template +void WilsonFermion::DhopDerivOE(GaugeField &mat, const FermionField &U, + const FermionField &V, int dag) { + conformable(U._grid, _cbgrid); + conformable(U._grid, V._grid); + conformable(U._grid, mat._grid); + + assert(V.checkerboard == Even); + assert(U.checkerboard == Odd); + mat.checkerboard = Odd; + + DerivInternal(StencilEven, UmuOdd, mat, U, V, dag); +} + +template +void WilsonFermion::DhopDerivEO(GaugeField &mat, const FermionField &U, + const FermionField &V, int dag) { + conformable(U._grid, _cbgrid); + conformable(U._grid, V._grid); + conformable(U._grid, mat._grid); + + assert(V.checkerboard == Odd); + assert(U.checkerboard == Even); + mat.checkerboard = Even; + + DerivInternal(StencilOdd, UmuEven, mat, U, V, dag); +} + +template +void WilsonFermion::Dhop(const FermionField &in, FermionField &out, + int dag) { + conformable(in._grid, _grid); // verifies full grid + conformable(in._grid, out._grid); + + out.checkerboard = in.checkerboard; + + DhopInternal(Stencil, Lebesgue, Umu, in, out, dag); +} + +template +void WilsonFermion::DhopOE(const FermionField &in, FermionField &out, + int dag) { + conformable(in._grid, _cbgrid); // verifies half grid + conformable(in._grid, out._grid); // drops the cb check + + assert(in.checkerboard == Even); + out.checkerboard = Odd; + + DhopInternal(StencilEven, LebesgueEvenOdd, UmuOdd, in, out, dag); +} + +template +void WilsonFermion::DhopEO(const FermionField &in, FermionField &out, + int dag) { + conformable(in._grid, _cbgrid); // verifies half grid + conformable(in._grid, out._grid); // drops the cb check + + assert(in.checkerboard == Odd); + out.checkerboard = Even; + + DhopInternal(StencilOdd, LebesgueEvenOdd, UmuEven, in, out, dag); +} + +template +void WilsonFermion::Mdir(const FermionField &in, FermionField &out, + int dir, int disp) { + DhopDir(in, out, dir, disp); +} + +template +void WilsonFermion::DhopDir(const FermionField &in, FermionField &out, + int dir, int disp) { + int skip = (disp == 1) ? 0 : 1; + int dirdisp = dir + skip * 4; + int gamma = dir + (1 - skip) * 4; + + DhopDirDisp(in, out, dirdisp, gamma, DaggerNo); +}; + +template +void WilsonFermion::DhopDirDisp(const FermionField &in, FermionField &out, + int dirdisp, int gamma, int dag) { + Compressor compressor(dag); + + Stencil.HaloExchange(in, compressor); + + PARALLEL_FOR_LOOP + for (int sss = 0; sss < in._grid->oSites(); sss++) { + Kernels::DiracOptDhopDir(Stencil, Umu, Stencil.CommBuf(), sss, sss, in, out, + dirdisp, gamma); + } +}; + +template +void WilsonFermion::DhopInternal(StencilImpl &st, LebesgueOrder &lo, + DoubledGaugeField &U, + const FermionField &in, + FermionField &out, int dag) { + assert((dag == DaggerNo) || (dag == DaggerYes)); + + Compressor compressor(dag); + st.HaloExchange(in, compressor); + + if (dag == DaggerYes) { + PARALLEL_FOR_LOOP + for (int sss = 0; sss < in._grid->oSites(); sss++) { + Kernels::DiracOptDhopSiteDag(st, lo, U, st.CommBuf(), sss, sss, 1, 1, in, + out); + } + } else { + PARALLEL_FOR_LOOP + for (int sss = 0; sss < in._grid->oSites(); sss++) { + Kernels::DiracOptDhopSite(st, lo, U, st.CommBuf(), sss, sss, 1, 1, in, + out); + } + } +}; + +FermOpTemplateInstantiate(WilsonFermion); +AdjointFermOpTemplateInstantiate(WilsonFermion); +TwoIndexFermOpTemplateInstantiate(WilsonFermion); +GparityFermOpTemplateInstantiate(WilsonFermion); +} +} diff --git a/lib/qcd/action/fermion/WilsonFermion.h b/lib/qcd/action/fermion/WilsonFermion.h index 3de2cac4..40fbd1bf 100644 --- a/lib/qcd/action/fermion/WilsonFermion.h +++ b/lib/qcd/action/fermion/WilsonFermion.h @@ -1,161 +1,154 @@ - /************************************************************************************* +/************************************************************************************* - Grid physics library, www.github.com/paboyle/Grid +Grid physics library, www.github.com/paboyle/Grid - Source file: ./lib/qcd/action/fermion/WilsonFermion.h +Source file: ./lib/qcd/action/fermion/WilsonFermion.h - Copyright (C) 2015 +Copyright (C) 2015 Author: Peter Boyle Author: Peter Boyle Author: paboyle - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ -#ifndef GRID_QCD_WILSON_FERMION_H -#define GRID_QCD_WILSON_FERMION_H +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#ifndef GRID_QCD_WILSON_FERMION_H +#define GRID_QCD_WILSON_FERMION_H namespace Grid { - namespace QCD { +namespace QCD { - class WilsonFermionStatic { - public: - static int HandOptDslash; // these are a temporary hack - static int MortonOrder; - static const std::vector directions ; - static const std::vector displacements; - static const int npoint=8; - }; +class WilsonFermionStatic { + public: + static int HandOptDslash; // these are a temporary hack + static int MortonOrder; + static const std::vector directions; + static const std::vector displacements; + static const int npoint = 8; +}; - template - class WilsonFermion : public WilsonKernels, public WilsonFermionStatic - { - public: - INHERIT_IMPL_TYPES(Impl); - typedef WilsonKernels Kernels; +template +class WilsonFermion : public WilsonKernels, public WilsonFermionStatic { + public: + INHERIT_IMPL_TYPES(Impl); + typedef WilsonKernels Kernels; - /////////////////////////////////////////////////////////////// - // Implement the abstract base - /////////////////////////////////////////////////////////////// - GridBase *GaugeGrid(void) { return _grid ;} - GridBase *GaugeRedBlackGrid(void) { return _cbgrid ;} - GridBase *FermionGrid(void) { return _grid;} - GridBase *FermionRedBlackGrid(void) { return _cbgrid;} + /////////////////////////////////////////////////////////////// + // Implement the abstract base + /////////////////////////////////////////////////////////////// + GridBase *GaugeGrid(void) { return _grid; } + GridBase *GaugeRedBlackGrid(void) { return _cbgrid; } + GridBase *FermionGrid(void) { return _grid; } + GridBase *FermionRedBlackGrid(void) { return _cbgrid; } - ////////////////////////////////////////////////////////////////// - // override multiply; cut number routines if pass dagger argument - // and also make interface more uniformly consistent - ////////////////////////////////////////////////////////////////// - RealD M(const FermionField &in, FermionField &out); - RealD Mdag(const FermionField &in, FermionField &out); + ////////////////////////////////////////////////////////////////// + // override multiply; cut number routines if pass dagger argument + // and also make interface more uniformly consistent + ////////////////////////////////////////////////////////////////// + RealD M(const FermionField &in, FermionField &out); + RealD Mdag(const FermionField &in, FermionField &out); - ///////////////////////////////////////////////////////// - // half checkerboard operations - // could remain virtual so we can derive Clover from Wilson base - ///////////////////////////////////////////////////////// - void Meooe(const FermionField &in, FermionField &out) ; - void MeooeDag(const FermionField &in, FermionField &out) ; + ///////////////////////////////////////////////////////// + // half checkerboard operations + // could remain virtual so we can derive Clover from Wilson base + ///////////////////////////////////////////////////////// + void Meooe(const FermionField &in, FermionField &out); + void MeooeDag(const FermionField &in, FermionField &out); - // allow override for twisted mass and clover - virtual void Mooee(const FermionField &in, FermionField &out) ; - virtual void MooeeDag(const FermionField &in, FermionField &out) ; - virtual void MooeeInv(const FermionField &in, FermionField &out) ; - virtual void MooeeInvDag(const FermionField &in, FermionField &out) ; + // allow override for twisted mass and clover + virtual void Mooee(const FermionField &in, FermionField &out); + virtual void MooeeDag(const FermionField &in, FermionField &out); + virtual void MooeeInv(const FermionField &in, FermionField &out); + virtual void MooeeInvDag(const FermionField &in, FermionField &out); - //////////////////////// - // Derivative interface - //////////////////////// - // Interface calls an internal routine - void DhopDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag); - void DhopDerivOE(GaugeField &mat,const FermionField &U,const FermionField &V,int dag); - void DhopDerivEO(GaugeField &mat,const FermionField &U,const FermionField &V,int dag); + virtual void MomentumSpacePropagator(FermionField &out,const FermionField &in,RealD _mass) ; + + //////////////////////// + // Derivative interface + //////////////////////// + // Interface calls an internal routine + void DhopDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag); + void DhopDerivOE(GaugeField &mat,const FermionField &U,const FermionField &V,int dag); + void DhopDerivEO(GaugeField &mat,const FermionField &U,const FermionField &V,int dag); + + /////////////////////////////////////////////////////////////// + // non-hermitian hopping term; half cb or both + /////////////////////////////////////////////////////////////// + void Dhop(const FermionField &in, FermionField &out, int dag); + void DhopOE(const FermionField &in, FermionField &out, int dag); + void DhopEO(const FermionField &in, FermionField &out, int dag); + + /////////////////////////////////////////////////////////////// + // Multigrid assistance; force term uses too + /////////////////////////////////////////////////////////////// + void Mdir(const FermionField &in, FermionField &out, int dir, int disp); + void DhopDir(const FermionField &in, FermionField &out, int dir, int disp); + void DhopDirDisp(const FermionField &in, FermionField &out, int dirdisp, + int gamma, int dag); + + /////////////////////////////////////////////////////////////// + // Extra methods added by derived + /////////////////////////////////////////////////////////////// + void DerivInternal(StencilImpl &st, DoubledGaugeField &U, GaugeField &mat, + const FermionField &A, const FermionField &B, int dag); + + void DhopInternal(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, + const FermionField &in, FermionField &out, int dag); + + // Constructor + WilsonFermion(GaugeField &_Umu, GridCartesian &Fgrid, + GridRedBlackCartesian &Hgrid, RealD _mass, + const ImplParams &p = ImplParams()); + + // DoubleStore impl dependent + void ImportGauge(const GaugeField &_Umu); + + /////////////////////////////////////////////////////////////// + // Data members require to support the functionality + /////////////////////////////////////////////////////////////// + + // protected: + public: + RealD mass; + + GridBase *_grid; + GridBase *_cbgrid; + + // Defines the stencils for even and odd + StencilImpl Stencil; + StencilImpl StencilEven; + StencilImpl StencilOdd; + + // Copy of the gauge field , with even and odd subsets + DoubledGaugeField Umu; + DoubledGaugeField UmuEven; + DoubledGaugeField UmuOdd; + + LebesgueOrder Lebesgue; + LebesgueOrder LebesgueEvenOdd; +}; + +typedef WilsonFermion WilsonFermionF; +typedef WilsonFermion WilsonFermionD; - /////////////////////////////////////////////////////////////// - // non-hermitian hopping term; half cb or both - /////////////////////////////////////////////////////////////// - void Dhop(const FermionField &in, FermionField &out,int dag) ; - void DhopOE(const FermionField &in, FermionField &out,int dag) ; - void DhopEO(const FermionField &in, FermionField &out,int dag) ; - - /////////////////////////////////////////////////////////////// - // Multigrid assistance; force term uses too - /////////////////////////////////////////////////////////////// - void Mdir (const FermionField &in, FermionField &out,int dir,int disp) ; - void DhopDir(const FermionField &in, FermionField &out,int dir,int disp); - void DhopDirDisp(const FermionField &in, FermionField &out,int dirdisp,int gamma,int dag) ; - - /////////////////////////////////////////////////////////////// - // Extra methods added by derived - /////////////////////////////////////////////////////////////// - void DerivInternal(StencilImpl & st, - DoubledGaugeField & U, - GaugeField &mat, - const FermionField &A, - const FermionField &B, - int dag); - - void DhopInternal(StencilImpl & st,LebesgueOrder & lo,DoubledGaugeField & U, - const FermionField &in, FermionField &out,int dag) ; - - // Constructor - WilsonFermion(GaugeField &_Umu, - GridCartesian &Fgrid, - GridRedBlackCartesian &Hgrid, - RealD _mass, - const ImplParams &p= ImplParams() - ) ; - - // DoubleStore impl dependent - void ImportGauge(const GaugeField &_Umu); - - /////////////////////////////////////////////////////////////// - // Data members require to support the functionality - /////////////////////////////////////////////////////////////// - - // protected: - public: - - RealD mass; - - GridBase * _grid; - GridBase * _cbgrid; - - //Defines the stencils for even and odd - StencilImpl Stencil; - StencilImpl StencilEven; - StencilImpl StencilOdd; - - // Copy of the gauge field , with even and odd subsets - DoubledGaugeField Umu; - DoubledGaugeField UmuEven; - DoubledGaugeField UmuOdd; - - LebesgueOrder Lebesgue; - LebesgueOrder LebesgueEvenOdd; - - - }; - - typedef WilsonFermion WilsonFermionF; - typedef WilsonFermion WilsonFermionD; - - } +} } #endif diff --git a/lib/qcd/action/fermion/WilsonFermion5D.cc b/lib/qcd/action/fermion/WilsonFermion5D.cc index 08069bed..4c2d24bf 100644 --- a/lib/qcd/action/fermion/WilsonFermion5D.cc +++ b/lib/qcd/action/fermion/WilsonFermion5D.cc @@ -42,15 +42,15 @@ const std::vector WilsonFermion5DStatic::displacements({1,1,1,1,-1,-1,-1,-1 // 5d lattice for DWF. template WilsonFermion5D::WilsonFermion5D(GaugeField &_Umu, - GridCartesian &FiveDimGrid, - GridRedBlackCartesian &FiveDimRedBlackGrid, - GridCartesian &FourDimGrid, - GridRedBlackCartesian &FourDimRedBlackGrid, - RealD _M5,const ImplParams &p) : + GridCartesian &FiveDimGrid, + GridRedBlackCartesian &FiveDimRedBlackGrid, + GridCartesian &FourDimGrid, + GridRedBlackCartesian &FourDimRedBlackGrid, + RealD _M5,const ImplParams &p) : Kernels(p), - _FiveDimGrid(&FiveDimGrid), + _FiveDimGrid (&FiveDimGrid), _FiveDimRedBlackGrid(&FiveDimRedBlackGrid), - _FourDimGrid(&FourDimGrid), + _FourDimGrid (&FourDimGrid), _FourDimRedBlackGrid(&FourDimRedBlackGrid), Stencil (_FiveDimGrid,npoint,Even,directions,displacements), StencilEven(_FiveDimRedBlackGrid,npoint,Even,directions,displacements), // source is Even @@ -62,60 +62,83 @@ WilsonFermion5D::WilsonFermion5D(GaugeField &_Umu, Lebesgue(_FourDimGrid), LebesgueEvenOdd(_FourDimRedBlackGrid) { - // some assertions - assert(FiveDimGrid._ndimension==5); - assert(FourDimGrid._ndimension==4); - assert(FiveDimRedBlackGrid._ndimension==5); - assert(FourDimRedBlackGrid._ndimension==4); - assert(FiveDimRedBlackGrid._checker_dim==1); + if (Impl::LsVectorised) { - // Dimension zero of the five-d is the Ls direction - Ls=FiveDimGrid._fdimensions[0]; - assert(FiveDimRedBlackGrid._fdimensions[0]==Ls); - assert(FiveDimRedBlackGrid._processors[0] ==1); - assert(FiveDimRedBlackGrid._simd_layout[0]==1); - assert(FiveDimGrid._processors[0] ==1); - assert(FiveDimGrid._simd_layout[0] ==1); + int nsimd = Simd::Nsimd(); + + // some assertions + assert(FiveDimGrid._ndimension==5); + assert(FiveDimRedBlackGrid._ndimension==5); + assert(FiveDimRedBlackGrid._checker_dim==1); // Don't checker the s direction + assert(FourDimGrid._ndimension==4); - // Other dimensions must match the decomposition of the four-D fields - for(int d=0;d<4;d++){ - assert(FourDimRedBlackGrid._fdimensions[d] ==FourDimGrid._fdimensions[d]); - assert(FiveDimRedBlackGrid._fdimensions[d+1]==FourDimGrid._fdimensions[d]); + // Dimension zero of the five-d is the Ls direction + Ls=FiveDimGrid._fdimensions[0]; + assert(FiveDimGrid._processors[0] ==1); + assert(FiveDimGrid._simd_layout[0] ==nsimd); - assert(FourDimRedBlackGrid._processors[d] ==FourDimGrid._processors[d]); - assert(FiveDimRedBlackGrid._processors[d+1] ==FourDimGrid._processors[d]); + assert(FiveDimRedBlackGrid._fdimensions[0]==Ls); + assert(FiveDimRedBlackGrid._processors[0] ==1); + assert(FiveDimRedBlackGrid._simd_layout[0]==nsimd); - assert(FourDimRedBlackGrid._simd_layout[d] ==FourDimGrid._simd_layout[d]); - assert(FiveDimRedBlackGrid._simd_layout[d+1]==FourDimGrid._simd_layout[d]); + // Other dimensions must match the decomposition of the four-D fields + for(int d=0;d<4;d++){ + assert(FiveDimRedBlackGrid._fdimensions[d+1]==FourDimGrid._fdimensions[d]); + assert(FiveDimRedBlackGrid._processors[d+1] ==FourDimGrid._processors[d]); + + assert(FourDimGrid._simd_layout[d]=1); + assert(FourDimRedBlackGrid._simd_layout[d]=1); + assert(FiveDimRedBlackGrid._simd_layout[d+1]==1); - assert(FiveDimGrid._fdimensions[d+1] ==FourDimGrid._fdimensions[d]); - assert(FiveDimGrid._processors[d+1] ==FourDimGrid._processors[d]); - assert(FiveDimGrid._simd_layout[d+1] ==FourDimGrid._simd_layout[d]); + assert(FiveDimGrid._fdimensions[d+1] ==FourDimGrid._fdimensions[d]); + assert(FiveDimGrid._processors[d+1] ==FourDimGrid._processors[d]); + assert(FiveDimGrid._simd_layout[d+1] ==FourDimGrid._simd_layout[d]); + } + + } else { + + // some assertions + assert(FiveDimGrid._ndimension==5); + assert(FourDimGrid._ndimension==4); + assert(FiveDimRedBlackGrid._ndimension==5); + assert(FourDimRedBlackGrid._ndimension==4); + assert(FiveDimRedBlackGrid._checker_dim==1); + + // Dimension zero of the five-d is the Ls direction + Ls=FiveDimGrid._fdimensions[0]; + assert(FiveDimRedBlackGrid._fdimensions[0]==Ls); + assert(FiveDimRedBlackGrid._processors[0] ==1); + assert(FiveDimRedBlackGrid._simd_layout[0]==1); + assert(FiveDimGrid._processors[0] ==1); + assert(FiveDimGrid._simd_layout[0] ==1); + + // Other dimensions must match the decomposition of the four-D fields + for(int d=0;d<4;d++){ + assert(FourDimRedBlackGrid._fdimensions[d] ==FourDimGrid._fdimensions[d]); + assert(FiveDimRedBlackGrid._fdimensions[d+1]==FourDimGrid._fdimensions[d]); + + assert(FourDimRedBlackGrid._processors[d] ==FourDimGrid._processors[d]); + assert(FiveDimRedBlackGrid._processors[d+1] ==FourDimGrid._processors[d]); + + assert(FourDimRedBlackGrid._simd_layout[d] ==FourDimGrid._simd_layout[d]); + assert(FiveDimRedBlackGrid._simd_layout[d+1]==FourDimGrid._simd_layout[d]); + + assert(FiveDimGrid._fdimensions[d+1] ==FourDimGrid._fdimensions[d]); + assert(FiveDimGrid._processors[d+1] ==FourDimGrid._processors[d]); + assert(FiveDimGrid._simd_layout[d+1] ==FourDimGrid._simd_layout[d]); + } } - + // Allocate the required comms buffer ImportGauge(_Umu); -} - +} + /* template WilsonFermion5D::WilsonFermion5D(int simd,GaugeField &_Umu, - GridCartesian &FiveDimGrid, - GridRedBlackCartesian &FiveDimRedBlackGrid, - GridCartesian &FourDimGrid, - RealD _M5,const ImplParams &p) : - Kernels(p), - _FiveDimGrid (&FiveDimGrid), - _FiveDimRedBlackGrid(&FiveDimRedBlackGrid), - _FourDimGrid (&FourDimGrid), - Stencil (_FiveDimGrid,npoint,Even,directions,displacements), - StencilEven(_FiveDimRedBlackGrid,npoint,Even,directions,displacements), // source is Even - StencilOdd (_FiveDimRedBlackGrid,npoint,Odd ,directions,displacements), // source is Odd - M5(_M5), - Umu(_FourDimGrid), - UmuEven(_FourDimGrid), - UmuOdd (_FourDimGrid), - Lebesgue(_FourDimGrid), - LebesgueEvenOdd(_FourDimGrid) + GridCartesian &FiveDimGrid, + GridRedBlackCartesian &FiveDimRedBlackGrid, + GridCartesian &FourDimGrid, + RealD _M5,const ImplParams &p) : { int nsimd = Simd::Nsimd(); @@ -148,13 +171,68 @@ WilsonFermion5D::WilsonFermion5D(int simd,GaugeField &_Umu, } { - GaugeField HUmu(_Umu._grid); - HUmu = _Umu*(-0.5); - Impl::DoubleStore(GaugeGrid(),Umu,HUmu); - UmuEven=Umu;// Really want a reference. - UmuOdd =Umu; } } + */ + +template +void WilsonFermion5D::Report(void) +{ + std::vector latt = GridDefaultLatt(); + RealD volume = Ls; for(int mu=0;mu_Nprocessors; + + if ( DhopCalls > 0 ) { + std::cout << GridLogMessage << "#### Dhop calls report " << std::endl; + std::cout << GridLogMessage << "WilsonFermion5D Number of Dhop Calls : " << DhopCalls << std::endl; + std::cout << GridLogMessage << "WilsonFermion5D Total Communication time : " << DhopCommTime<< " us" << std::endl; + std::cout << GridLogMessage << "WilsonFermion5D CommTime/Calls : " << DhopCommTime / DhopCalls << " us" << std::endl; + std::cout << GridLogMessage << "WilsonFermion5D Total Compute time : " << DhopComputeTime << " us" << std::endl; + std::cout << GridLogMessage << "WilsonFermion5D ComputeTime/Calls : " << DhopComputeTime / DhopCalls << " us" << std::endl; + + RealD mflops = 1344*volume*DhopCalls/DhopComputeTime/2; // 2 for red black counting + std::cout << GridLogMessage << "Average mflops/s per call : " << mflops << std::endl; + std::cout << GridLogMessage << "Average mflops/s per call per rank : " << mflops/NP << std::endl; + + } + + if ( DerivCalls > 0 ) { + std::cout << GridLogMessage << "#### Deriv calls report "<< std::endl; + std::cout << GridLogMessage << "WilsonFermion5D Number of Deriv Calls : " < 0 || DhopCalls > 0){ + std::cout << GridLogMessage << "WilsonFermion5D Stencil"< +void WilsonFermion5D::ZeroCounters(void) { + DhopCalls = 0; + DhopCommTime = 0; + DhopComputeTime = 0; + + DerivCalls = 0; + DerivCommTime = 0; + DerivComputeTime = 0; + DerivDhopComputeTime = 0; + + Stencil.ZeroCounters(); + StencilEven.ZeroCounters(); + StencilOdd.ZeroCounters(); +} template @@ -190,19 +268,20 @@ PARALLEL_FOR_LOOP for(int s=0;s void WilsonFermion5D::DerivInternal(StencilImpl & st, - DoubledGaugeField & U, - GaugeField &mat, - const FermionField &A, - const FermionField &B, - int dag) + DoubledGaugeField & U, + GaugeField &mat, + const FermionField &A, + const FermionField &B, + int dag) { + DerivCalls++; assert((dag==DaggerNo) ||(dag==DaggerYes)); conformable(st._grid,A._grid); @@ -213,51 +292,52 @@ void WilsonFermion5D::DerivInternal(StencilImpl & st, FermionField Btilde(B._grid); FermionField Atilde(B._grid); + DerivCommTime-=usecond(); st.HaloExchange(B,compressor); + DerivCommTime+=usecond(); Atilde=A; - for(int mu=0;muoSites();sss++){ - for(int s=0;soSites(); sss++) { + for (int s = 0; s < Ls; s++) { + int sU = sss; + int sF = s + Ls * sU; - assert ( sF< B._grid->oSites()); - assert ( sU< U._grid->oSites()); + assert(sF < B._grid->oSites()); + assert(sU < U._grid->oSites()); - Kernels::DiracOptDhopDir(st,U,st.comm_buf,sF,sU,B,Btilde,mu,gamma); - - //////////////////////////// - // spin trace outer product - //////////////////////////// + Kernels::DiracOptDhopDir(st, U, st.CommBuf(), sF, sU, B, Btilde, mu, gamma); + //////////////////////////// + // spin trace outer product + //////////////////////////// } - } - - Impl::InsertForce5D(mat,Btilde,Atilde,mu); - + DerivDhopComputeTime += usecond(); + Impl::InsertForce5D(mat, Btilde, Atilde, mu); } + DerivComputeTime += usecond(); } template -void WilsonFermion5D::DhopDeriv( GaugeField &mat, - const FermionField &A, - const FermionField &B, - int dag) +void WilsonFermion5D::DhopDeriv(GaugeField &mat, + const FermionField &A, + const FermionField &B, + int dag) { conformable(A._grid,FermionGrid()); conformable(A._grid,B._grid); @@ -288,9 +368,9 @@ void WilsonFermion5D::DhopDerivEO(GaugeField &mat, template void WilsonFermion5D::DhopDerivOE(GaugeField &mat, - const FermionField &A, - const FermionField &B, - int dag) + const FermionField &A, + const FermionField &B, + int dag) { conformable(A._grid,FermionRedBlackGrid()); conformable(GaugeRedBlackGrid(),mat._grid); @@ -313,30 +393,56 @@ void WilsonFermion5D::DhopInternal(StencilImpl & st, LebesgueOrder &lo, int LLs = in._grid->_rdimensions[0]; + DhopCommTime-=usecond(); st.HaloExchange(in,compressor); + DhopCommTime+=usecond(); + DhopComputeTime-=usecond(); // Dhop takes the 4d grid from U, and makes a 5d index for fermion - if ( dag == DaggerYes ) { -PARALLEL_FOR_LOOP - for(int ss=0;ssoSites();ss++){ - int sU=ss; - int sF=LLs*sU; - Kernels::DiracOptDhopSiteDag(st,lo,U,st.comm_buf,sF,sU,LLs,1,in,out); + if (dag == DaggerYes) { + PARALLEL_FOR_LOOP + for (int ss = 0; ss < U._grid->oSites(); ss++) { + int sU = ss; + int sF = LLs * sU; + Kernels::DiracOptDhopSiteDag(st, lo, U, st.CommBuf(), sF, sU, LLs, 1, in, out); } - } else { -PARALLEL_FOR_LOOP - for(int ss=0;ssoSites();ss++){ +#ifdef AVX512 + } else if (stat.is_init() ) { + + int nthreads; + stat.start(); +#pragma omp parallel + { +#pragma omp master + nthreads = omp_get_num_threads(); + int mythread = omp_get_thread_num(); + stat.enter(mythread); +#pragma omp for nowait + for(int ss=0;ssoSites();ss++) { int sU=ss; int sF=LLs*sU; - Kernels::DiracOptDhopSite(st,lo,U,st.comm_buf,sF,sU,LLs,1,in,out); + Kernels::DiracOptDhopSite(st,lo,U,st.CommBuf(),sF,sU,LLs,1,in,out); + } + stat.exit(mythread); + } + stat.accum(nthreads); +#endif + } else { + PARALLEL_FOR_LOOP + for (int ss = 0; ss < U._grid->oSites(); ss++) { + int sU = ss; + int sF = LLs * sU; + Kernels::DiracOptDhopSite(st,lo,U,st.CommBuf(),sF,sU,LLs,1,in,out); } } + DhopComputeTime+=usecond(); } template void WilsonFermion5D::DhopOE(const FermionField &in, FermionField &out,int dag) { + DhopCalls++; conformable(in._grid,FermionRedBlackGrid()); // verifies half grid conformable(in._grid,out._grid); // drops the cb check @@ -348,6 +454,7 @@ void WilsonFermion5D::DhopOE(const FermionField &in, FermionField &out,int template void WilsonFermion5D::DhopEO(const FermionField &in, FermionField &out,int dag) { + DhopCalls++; conformable(in._grid,FermionRedBlackGrid()); // verifies half grid conformable(in._grid,out._grid); // drops the cb check @@ -359,6 +466,7 @@ void WilsonFermion5D::DhopEO(const FermionField &in, FermionField &out,int template void WilsonFermion5D::Dhop(const FermionField &in, FermionField &out,int dag) { + DhopCalls+=2; conformable(in._grid,FermionGrid()); // verifies full grid conformable(in._grid,out._grid); @@ -374,10 +482,150 @@ void WilsonFermion5D::DW(const FermionField &in, FermionField &out,int dag axpy(out,4.0-M5,in,out); } +template +void WilsonFermion5D::MomentumSpacePropagatorHt(FermionField &out,const FermionField &in, RealD mass) +{ + // what type LatticeComplex + GridBase *_grid = _FourDimGrid; + conformable(_grid,out._grid); + + typedef typename FermionField::vector_type vector_type; + typedef typename FermionField::scalar_type ScalComplex; + typedef iSinglet Tcomplex; + typedef Lattice > LatComplex; + + Gamma::GammaMatrix Gmu [] = { + Gamma::GammaX, + Gamma::GammaY, + Gamma::GammaZ, + Gamma::GammaT + }; + + std::vector latt_size = _grid->_fdimensions; + + + FermionField num (_grid); num = zero; + + LatComplex sk(_grid); sk = zero; + LatComplex sk2(_grid); sk2= zero; + LatComplex W(_grid); W= zero; + LatComplex a(_grid); a= zero; + LatComplex one (_grid); one = ScalComplex(1.0,0.0); + LatComplex denom(_grid); denom= zero; + LatComplex cosha(_grid); + LatComplex kmu(_grid); + LatComplex Wea(_grid); + LatComplex Wema(_grid); + + ScalComplex ci(0.0,1.0); + + for(int mu=0;mu alpha + //////////////////////////////////////////// + cosha = (one + W*W + sk) / (W*2.0); + + // FIXME Need a Lattice acosh + for(int idx=0;idx<_grid->lSites();idx++){ + std::vector lcoor(Nd); + Tcomplex cc; + RealD sgn; + _grid->LocalIndexToLocalCoor(idx,lcoor); + peekLocalSite(cc,cosha,lcoor); + assert((double)real(cc)>=1.0); + assert(fabs((double)imag(cc))<=1.0e-15); + cc = ScalComplex(::acosh(real(cc)),0.0); + pokeLocalSite(cc,a,lcoor); + } + + Wea = ( exp( a) * W ); + Wema= ( exp(-a) * W ); + + num = num + ( one - Wema ) * mass * in; + denom= ( Wea - one ) + mass*mass * (one - Wema); + out = num/denom; +} + +template +void WilsonFermion5D::MomentumSpacePropagatorHw(FermionField &out,const FermionField &in,RealD mass) +{ + Gamma::GammaMatrix Gmu [] = { + Gamma::GammaX, + Gamma::GammaY, + Gamma::GammaZ, + Gamma::GammaT + }; + + GridBase *_grid = _FourDimGrid; + conformable(_grid,out._grid); + + typedef typename FermionField::vector_type vector_type; + typedef typename FermionField::scalar_type ScalComplex; + + typedef Lattice > LatComplex; + + + std::vector latt_size = _grid->_fdimensions; + + LatComplex sk(_grid); sk = zero; + LatComplex sk2(_grid); sk2= zero; + + LatComplex w_k(_grid); w_k= zero; + LatComplex b_k(_grid); b_k= zero; + + LatComplex one (_grid); one = ScalComplex(1.0,0.0); + + FermionField num (_grid); num = zero; + LatComplex denom(_grid); denom= zero; + LatComplex kmu(_grid); + ScalComplex ci(0.0,1.0); + + for(int mu=0;mu; -template class WilsonFermion5D; }} diff --git a/lib/qcd/action/fermion/WilsonFermion5D.h b/lib/qcd/action/fermion/WilsonFermion5D.h index be7d7322..ffb5c58e 100644 --- a/lib/qcd/action/fermion/WilsonFermion5D.h +++ b/lib/qcd/action/fermion/WilsonFermion5D.h @@ -31,9 +31,21 @@ Author: paboyle #ifndef GRID_QCD_WILSON_FERMION_5D_H #define GRID_QCD_WILSON_FERMION_5D_H -namespace Grid { +#include - namespace QCD { +namespace Grid { +namespace QCD { + + //////////////////////////////////////////////////////////////////////////////// + // This is the 4d red black case appropriate to support + // + // parity = (x+y+z+t)|2; + // generalised five dim fermions like mobius, zolotarev etc.. + // + // i.e. even even contains fifth dim hopping term. + // + // [DIFFERS from original CPS red black implementation parity = (x+y+z+t+s)|2 ] + //////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// // This is the 4d red black case appropriate to support @@ -60,6 +72,18 @@ namespace Grid { public: INHERIT_IMPL_TYPES(Impl); typedef WilsonKernels Kernels; + PmuStat stat; + + void Report(void); + void ZeroCounters(void); + double DhopCalls; + double DhopCommTime; + double DhopComputeTime; + + double DerivCalls; + double DerivCommTime; + double DerivComputeTime; + double DerivDhopComputeTime; /////////////////////////////////////////////////////////////// // Implement the abstract base @@ -88,6 +112,9 @@ namespace Grid { virtual void DhopDerivEO(GaugeField &mat,const FermionField &U,const FermionField &V,int dag); virtual void DhopDerivOE(GaugeField &mat,const FermionField &U,const FermionField &V,int dag); + void MomentumSpacePropagatorHt(FermionField &out,const FermionField &in,RealD mass) ; + void MomentumSpacePropagatorHw(FermionField &out,const FermionField &in,RealD mass) ; + // Implement hopping term non-hermitian hopping term; half cb or both // Implement s-diagonal DW void DW (const FermionField &in, FermionField &out,int dag); @@ -97,76 +124,78 @@ namespace Grid { // add a DhopComm // -- suboptimal interface will presently trigger multiple comms. - void DhopDir(const FermionField &in, FermionField &out,int dir,int disp); - - /////////////////////////////////////////////////////////////// - // New methods added - /////////////////////////////////////////////////////////////// - void DerivInternal(StencilImpl & st, - DoubledGaugeField & U, - GaugeField &mat, - const FermionField &A, - const FermionField &B, - int dag); - - void DhopInternal(StencilImpl & st, - LebesgueOrder &lo, - DoubledGaugeField &U, - const FermionField &in, - FermionField &out, - int dag); - - // Constructors - WilsonFermion5D(GaugeField &_Umu, - GridCartesian &FiveDimGrid, - GridRedBlackCartesian &FiveDimRedBlackGrid, - GridCartesian &FourDimGrid, - GridRedBlackCartesian &FourDimRedBlackGrid, - double _M5,const ImplParams &p= ImplParams()); - - // Constructors + void DhopDir(const FermionField &in, FermionField &out,int dir,int disp); + + /////////////////////////////////////////////////////////////// + // New methods added + /////////////////////////////////////////////////////////////// + void DerivInternal(StencilImpl & st, + DoubledGaugeField & U, + GaugeField &mat, + const FermionField &A, + const FermionField &B, + int dag); + + void DhopInternal(StencilImpl & st, + LebesgueOrder &lo, + DoubledGaugeField &U, + const FermionField &in, + FermionField &out, + int dag); + + // Constructors + WilsonFermion5D(GaugeField &_Umu, + GridCartesian &FiveDimGrid, + GridRedBlackCartesian &FiveDimRedBlackGrid, + GridCartesian &FourDimGrid, + GridRedBlackCartesian &FourDimRedBlackGrid, + double _M5,const ImplParams &p= ImplParams()); + + // Constructors + /* WilsonFermion5D(int simd, - GaugeField &_Umu, - GridCartesian &FiveDimGrid, - GridRedBlackCartesian &FiveDimRedBlackGrid, - GridCartesian &FourDimGrid, - double _M5,const ImplParams &p= ImplParams()); + GaugeField &_Umu, + GridCartesian &FiveDimGrid, + GridRedBlackCartesian &FiveDimRedBlackGrid, + GridCartesian &FourDimGrid, + double _M5,const ImplParams &p= ImplParams()); + */ + + // DoubleStore + void ImportGauge(const GaugeField &_Umu); + + /////////////////////////////////////////////////////////////// + // Data members require to support the functionality + /////////////////////////////////////////////////////////////// + public: + + // Add these to the support from Wilson + GridBase *_FourDimGrid; + GridBase *_FourDimRedBlackGrid; + GridBase *_FiveDimGrid; + GridBase *_FiveDimRedBlackGrid; + + double M5; + int Ls; + + //Defines the stencils for even and odd + StencilImpl Stencil; + StencilImpl StencilEven; + StencilImpl StencilOdd; + + // Copy of the gauge field , with even and odd subsets + DoubledGaugeField Umu; + DoubledGaugeField UmuEven; + DoubledGaugeField UmuOdd; + + LebesgueOrder Lebesgue; + LebesgueOrder LebesgueEvenOdd; + + // Comms buffer + std::vector > comm_buf; + + }; - // DoubleStore - void ImportGauge(const GaugeField &_Umu); - - /////////////////////////////////////////////////////////////// - // Data members require to support the functionality - /////////////////////////////////////////////////////////////// - public: - - // Add these to the support from Wilson - GridBase *_FourDimGrid; - GridBase *_FourDimRedBlackGrid; - GridBase *_FiveDimGrid; - GridBase *_FiveDimRedBlackGrid; - - double M5; - int Ls; - - //Defines the stencils for even and odd - StencilImpl Stencil; - StencilImpl StencilEven; - StencilImpl StencilOdd; - - // Copy of the gauge field , with even and odd subsets - DoubledGaugeField Umu; - DoubledGaugeField UmuEven; - DoubledGaugeField UmuOdd; - - LebesgueOrder Lebesgue; - LebesgueOrder LebesgueEvenOdd; - - // Comms buffer - std::vector > comm_buf; - - }; - } -} +}} #endif diff --git a/lib/qcd/action/fermion/WilsonKernels.cc b/lib/qcd/action/fermion/WilsonKernels.cc index 4edd25f9..43776c86 100644 --- a/lib/qcd/action/fermion/WilsonKernels.cc +++ b/lib/qcd/action/fermion/WilsonKernels.cc @@ -1,98 +1,52 @@ - /************************************************************************************* +/************************************************************************************* - Grid physics library, www.github.com/paboyle/Grid +Grid physics library, www.github.com/paboyle/Grid - Source file: ./lib/qcd/action/fermion/WilsonKernels.cc +Source file: ./lib/qcd/action/fermion/WilsonKernels.cc - Copyright (C) 2015 +Copyright (C) 2015 Author: Peter Boyle Author: Peter Boyle Author: paboyle - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ #include namespace Grid { namespace QCD { - int WilsonKernelsStatic::HandOpt; - int WilsonKernelsStatic::AsmOpt; +int WilsonKernelsStatic::Opt; -template -WilsonKernels::WilsonKernels(const ImplParams &p): Base(p) {}; +template +WilsonKernels::WilsonKernels(const ImplParams &p) : Base(p){}; -template -void WilsonKernels::DiracOptDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U, - std::vector > &buf, - int sF,int sU,int Ls, int Ns, const FermionField &in, FermionField &out) -{ -#ifdef AVX512 - if ( AsmOpt ) { +//////////////////////////////////////////// +// Generic implementation; move to different file? +//////////////////////////////////////////// - WilsonKernels::DiracOptAsmDhopSite(st,lo,U,buf,sF,sU,Ls,Ns,in,out); - - } else { -#else - { -#endif - for(int site=0;site::DiracOptHandDhopSite(st,lo,U,buf,sF,sU,in,out); - else WilsonKernels::DiracOptGenericDhopSite(st,lo,U,buf,sF,sU,in,out); - sF++; - } - sU++; - } - - } -} - -template -void WilsonKernels::DiracOptDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U, - std::vector > &buf, - int sF,int sU,int Ls, int Ns, const FermionField &in, FermionField &out) -{ - // No asm implementation yet. - // if ( AsmOpt ) WilsonKernels::DiracOptAsmDhopSiteDag(st,lo,U,buf,sF,sU,in,out); - // else - for(int site=0;site::DiracOptHandDhopSiteDag(st,lo,U,buf,sF,sU,in,out); - else WilsonKernels::DiracOptGenericDhopSiteDag(st,lo,U,buf,sF,sU,in,out); - sF++; - } - sU++; - } -} - - - //////////////////////////////////////////// - // Generic implementation; move to different file? - //////////////////////////////////////////// - -template -void WilsonKernels::DiracOptGenericDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U, - std::vector > &buf, - int sF,int sU,const FermionField &in, FermionField &out) -{ - SiteHalfSpinor tmp; - SiteHalfSpinor chi; +template +void WilsonKernels::DiracOptGenericDhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, + SiteHalfSpinor *buf, int sF, + int sU, const FermionField &in, FermionField &out) { + SiteHalfSpinor tmp; + SiteHalfSpinor chi; SiteHalfSpinor *chi_p; SiteHalfSpinor Uchi; SiteSpinor result; @@ -102,176 +56,174 @@ void WilsonKernels::DiracOptGenericDhopSiteDag(StencilImpl &st,LebesgueOrd /////////////////////////// // Xp /////////////////////////// - SE=st.GetEntry(ptype,Xp,sF); + SE = st.GetEntry(ptype, Xp, sF); - if (SE->_is_local ) { + if (SE->_is_local) { chi_p = χ - if ( SE->_permute ) { - spProjXp(tmp,in._odata[SE->_offset]); - permute(chi,tmp,ptype); + if (SE->_permute) { + spProjXp(tmp, in._odata[SE->_offset]); + permute(chi, tmp, ptype); } else { - spProjXp(chi,in._odata[SE->_offset]); + spProjXp(chi, in._odata[SE->_offset]); } - } else { - chi_p=&buf[SE->_offset]; + } else { + chi_p = &buf[SE->_offset]; } - - Impl::multLink(Uchi,U._odata[sU],*chi_p,Xp,SE,st); - spReconXp(result,Uchi); - + + Impl::multLink(Uchi, U._odata[sU], *chi_p, Xp, SE, st); + spReconXp(result, Uchi); + /////////////////////////// // Yp /////////////////////////// - SE=st.GetEntry(ptype,Yp,sF); + SE = st.GetEntry(ptype, Yp, sF); - if ( SE->_is_local ) { + if (SE->_is_local) { chi_p = χ - if ( SE->_permute ) { - spProjYp(tmp,in._odata[SE->_offset]); - permute(chi,tmp,ptype); + if (SE->_permute) { + spProjYp(tmp, in._odata[SE->_offset]); + permute(chi, tmp, ptype); } else { - spProjYp(chi,in._odata[SE->_offset]); + spProjYp(chi, in._odata[SE->_offset]); } - } else { - chi_p=&buf[SE->_offset]; + } else { + chi_p = &buf[SE->_offset]; } - Impl::multLink(Uchi,U._odata[sU],*chi_p,Yp,SE,st); - accumReconYp(result,Uchi); + Impl::multLink(Uchi, U._odata[sU], *chi_p, Yp, SE, st); + accumReconYp(result, Uchi); /////////////////////////// // Zp /////////////////////////// - SE=st.GetEntry(ptype,Zp,sF); + SE = st.GetEntry(ptype, Zp, sF); - if ( SE->_is_local ) { + if (SE->_is_local) { chi_p = χ - if ( SE->_permute ) { - spProjZp(tmp,in._odata[SE->_offset]); - permute(chi,tmp,ptype); + if (SE->_permute) { + spProjZp(tmp, in._odata[SE->_offset]); + permute(chi, tmp, ptype); } else { - spProjZp(chi,in._odata[SE->_offset]); + spProjZp(chi, in._odata[SE->_offset]); } - } else { - chi_p=&buf[SE->_offset]; + } else { + chi_p = &buf[SE->_offset]; } - Impl::multLink(Uchi,U._odata[sU],*chi_p,Zp,SE,st); - accumReconZp(result,Uchi); + Impl::multLink(Uchi, U._odata[sU], *chi_p, Zp, SE, st); + accumReconZp(result, Uchi); /////////////////////////// // Tp /////////////////////////// - SE=st.GetEntry(ptype,Tp,sF); + SE = st.GetEntry(ptype, Tp, sF); - if ( SE->_is_local ) { + if (SE->_is_local) { chi_p = χ - if ( SE->_permute ) { - spProjTp(tmp,in._odata[SE->_offset]); - permute(chi,tmp,ptype); + if (SE->_permute) { + spProjTp(tmp, in._odata[SE->_offset]); + permute(chi, tmp, ptype); } else { - spProjTp(chi,in._odata[SE->_offset]); + spProjTp(chi, in._odata[SE->_offset]); } } else { - chi_p=&buf[SE->_offset]; + chi_p = &buf[SE->_offset]; } - Impl::multLink(Uchi,U._odata[sU],*chi_p,Tp,SE,st); - accumReconTp(result,Uchi); + Impl::multLink(Uchi, U._odata[sU], *chi_p, Tp, SE, st); + accumReconTp(result, Uchi); /////////////////////////// // Xm /////////////////////////// - SE=st.GetEntry(ptype,Xm,sF); + SE = st.GetEntry(ptype, Xm, sF); - if ( SE->_is_local ) { + if (SE->_is_local) { chi_p = χ - if ( SE->_permute ) { - spProjXm(tmp,in._odata[SE->_offset]); - permute(chi,tmp,ptype); + if (SE->_permute) { + spProjXm(tmp, in._odata[SE->_offset]); + permute(chi, tmp, ptype); } else { - spProjXm(chi,in._odata[SE->_offset]); + spProjXm(chi, in._odata[SE->_offset]); } } else { - chi_p=&buf[SE->_offset]; + chi_p = &buf[SE->_offset]; } - Impl::multLink(Uchi,U._odata[sU],*chi_p,Xm,SE,st); - accumReconXm(result,Uchi); - + Impl::multLink(Uchi, U._odata[sU], *chi_p, Xm, SE, st); + accumReconXm(result, Uchi); + /////////////////////////// // Ym /////////////////////////// - SE=st.GetEntry(ptype,Ym,sF); + SE = st.GetEntry(ptype, Ym, sF); - if ( SE->_is_local ) { + if (SE->_is_local) { chi_p = χ - if ( SE->_permute ) { - spProjYm(tmp,in._odata[SE->_offset]); - permute(chi,tmp,ptype); + if (SE->_permute) { + spProjYm(tmp, in._odata[SE->_offset]); + permute(chi, tmp, ptype); } else { - spProjYm(chi,in._odata[SE->_offset]); + spProjYm(chi, in._odata[SE->_offset]); } } else { - chi_p=&buf[SE->_offset]; + chi_p = &buf[SE->_offset]; } - Impl::multLink(Uchi,U._odata[sU],*chi_p,Ym,SE,st); - accumReconYm(result,Uchi); - + Impl::multLink(Uchi, U._odata[sU], *chi_p, Ym, SE, st); + accumReconYm(result, Uchi); + /////////////////////////// // Zm /////////////////////////// - SE=st.GetEntry(ptype,Zm,sF); + SE = st.GetEntry(ptype, Zm, sF); - if ( SE->_is_local ) { + if (SE->_is_local) { chi_p = χ - if ( SE->_permute ) { - spProjZm(tmp,in._odata[SE->_offset]); - permute(chi,tmp,ptype); + if (SE->_permute) { + spProjZm(tmp, in._odata[SE->_offset]); + permute(chi, tmp, ptype); } else { - spProjZm(chi,in._odata[SE->_offset]); + spProjZm(chi, in._odata[SE->_offset]); } } else { - chi_p=&buf[SE->_offset]; + chi_p = &buf[SE->_offset]; } - Impl::multLink(Uchi,U._odata[sU],*chi_p,Zm,SE,st); - accumReconZm(result,Uchi); + Impl::multLink(Uchi, U._odata[sU], *chi_p, Zm, SE, st); + accumReconZm(result, Uchi); /////////////////////////// // Tm /////////////////////////// - SE=st.GetEntry(ptype,Tm,sF); + SE = st.GetEntry(ptype, Tm, sF); - if ( SE->_is_local ) { + if (SE->_is_local) { chi_p = χ - if ( SE->_permute ) { - spProjTm(tmp,in._odata[SE->_offset]); - permute(chi,tmp,ptype); - } else { - spProjTm(chi,in._odata[SE->_offset]); + if (SE->_permute) { + spProjTm(tmp, in._odata[SE->_offset]); + permute(chi, tmp, ptype); + } else { + spProjTm(chi, in._odata[SE->_offset]); } } else { - chi_p=&buf[SE->_offset]; + chi_p = &buf[SE->_offset]; } - Impl::multLink(Uchi,U._odata[sU],*chi_p,Tm,SE,st); - accumReconTm(result,Uchi); + Impl::multLink(Uchi, U._odata[sU], *chi_p, Tm, SE, st); + accumReconTm(result, Uchi); - vstream(out._odata[sF],result); + vstream(out._odata[sF], result); }; - - // Need controls to do interior, exterior, or both -template -void WilsonKernels::DiracOptGenericDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U, - std::vector > &buf, - int sF,int sU,const FermionField &in, FermionField &out) -{ - SiteHalfSpinor tmp; - SiteHalfSpinor chi; - SiteHalfSpinor *chi_p; +// Need controls to do interior, exterior, or both +template +void WilsonKernels::DiracOptGenericDhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, + SiteHalfSpinor *buf, int sF, + int sU, const FermionField &in, FermionField &out) { + SiteHalfSpinor tmp; + SiteHalfSpinor chi; + SiteHalfSpinor *chi_p; SiteHalfSpinor Uchi; SiteSpinor result; StencilEntry *SE; @@ -280,299 +232,297 @@ void WilsonKernels::DiracOptGenericDhopSite(StencilImpl &st,LebesgueOrder /////////////////////////// // Xp /////////////////////////// - SE=st.GetEntry(ptype,Xm,sF); + SE = st.GetEntry(ptype, Xm, sF); - if ( SE->_is_local ) { + if (SE->_is_local) { chi_p = χ - if ( SE->_permute ) { - spProjXp(tmp,in._odata[SE->_offset]); - permute(chi,tmp,ptype); + if (SE->_permute) { + spProjXp(tmp, in._odata[SE->_offset]); + permute(chi, tmp, ptype); } else { - spProjXp(chi,in._odata[SE->_offset]); + spProjXp(chi, in._odata[SE->_offset]); } - } else { - chi_p=&buf[SE->_offset]; + } else { + chi_p = &buf[SE->_offset]; } - - Impl::multLink(Uchi,U._odata[sU],*chi_p,Xm,SE,st); - spReconXp(result,Uchi); - + + Impl::multLink(Uchi, U._odata[sU], *chi_p, Xm, SE, st); + spReconXp(result, Uchi); + /////////////////////////// // Yp /////////////////////////// - SE=st.GetEntry(ptype,Ym,sF); + SE = st.GetEntry(ptype, Ym, sF); - if ( SE->_is_local ) { + if (SE->_is_local) { chi_p = χ - if ( SE->_permute ) { - spProjYp(tmp,in._odata[SE->_offset]); - permute(chi,tmp,ptype); + if (SE->_permute) { + spProjYp(tmp, in._odata[SE->_offset]); + permute(chi, tmp, ptype); } else { - spProjYp(chi,in._odata[SE->_offset]); + spProjYp(chi, in._odata[SE->_offset]); } - } else { - chi_p=&buf[SE->_offset]; + } else { + chi_p = &buf[SE->_offset]; } - Impl::multLink(Uchi,U._odata[sU],*chi_p,Ym,SE,st); - accumReconYp(result,Uchi); + Impl::multLink(Uchi, U._odata[sU], *chi_p, Ym, SE, st); + accumReconYp(result, Uchi); /////////////////////////// // Zp /////////////////////////// - SE=st.GetEntry(ptype,Zm,sF); + SE = st.GetEntry(ptype, Zm, sF); - if ( SE->_is_local ) { + if (SE->_is_local) { chi_p = χ - if ( SE->_permute ) { - spProjZp(tmp,in._odata[SE->_offset]); - permute(chi,tmp,ptype); + if (SE->_permute) { + spProjZp(tmp, in._odata[SE->_offset]); + permute(chi, tmp, ptype); } else { - spProjZp(chi,in._odata[SE->_offset]); + spProjZp(chi, in._odata[SE->_offset]); } - } else { - chi_p=&buf[SE->_offset]; + } else { + chi_p = &buf[SE->_offset]; } - Impl::multLink(Uchi,U._odata[sU],*chi_p,Zm,SE,st); - accumReconZp(result,Uchi); + Impl::multLink(Uchi, U._odata[sU], *chi_p, Zm, SE, st); + accumReconZp(result, Uchi); /////////////////////////// // Tp /////////////////////////// - SE=st.GetEntry(ptype,Tm,sF); + SE = st.GetEntry(ptype, Tm, sF); - if ( SE->_is_local ) { + if (SE->_is_local) { chi_p = χ - if ( SE->_permute ) { - spProjTp(tmp,in._odata[SE->_offset]); - permute(chi,tmp,ptype); + if (SE->_permute) { + spProjTp(tmp, in._odata[SE->_offset]); + permute(chi, tmp, ptype); } else { - spProjTp(chi,in._odata[SE->_offset]); + spProjTp(chi, in._odata[SE->_offset]); } } else { - chi_p=&buf[SE->_offset]; + chi_p = &buf[SE->_offset]; } - Impl::multLink(Uchi,U._odata[sU],*chi_p,Tm,SE,st); - accumReconTp(result,Uchi); + Impl::multLink(Uchi, U._odata[sU], *chi_p, Tm, SE, st); + accumReconTp(result, Uchi); /////////////////////////// // Xm /////////////////////////// - SE=st.GetEntry(ptype,Xp,sF); + SE = st.GetEntry(ptype, Xp, sF); - if ( SE->_is_local ) { + if (SE->_is_local) { chi_p = χ - if ( SE->_permute ) { - spProjXm(tmp,in._odata[SE->_offset]); - permute(chi,tmp,ptype); + if (SE->_permute) { + spProjXm(tmp, in._odata[SE->_offset]); + permute(chi, tmp, ptype); } else { - spProjXm(chi,in._odata[SE->_offset]); + spProjXm(chi, in._odata[SE->_offset]); } } else { - chi_p=&buf[SE->_offset]; + chi_p = &buf[SE->_offset]; } - Impl::multLink(Uchi,U._odata[sU],*chi_p,Xp,SE,st); - accumReconXm(result,Uchi); + Impl::multLink(Uchi, U._odata[sU], *chi_p, Xp, SE, st); + accumReconXm(result, Uchi); /////////////////////////// // Ym /////////////////////////// - SE=st.GetEntry(ptype,Yp,sF); + SE = st.GetEntry(ptype, Yp, sF); - if ( SE->_is_local ) { + if (SE->_is_local) { chi_p = χ - if ( SE->_permute ) { - spProjYm(tmp,in._odata[SE->_offset]); - permute(chi,tmp,ptype); + if (SE->_permute) { + spProjYm(tmp, in._odata[SE->_offset]); + permute(chi, tmp, ptype); } else { - spProjYm(chi,in._odata[SE->_offset]); + spProjYm(chi, in._odata[SE->_offset]); } } else { - chi_p=&buf[SE->_offset]; + chi_p = &buf[SE->_offset]; } - Impl::multLink(Uchi,U._odata[sU],*chi_p,Yp,SE,st); - accumReconYm(result,Uchi); - + Impl::multLink(Uchi, U._odata[sU], *chi_p, Yp, SE, st); + accumReconYm(result, Uchi); + /////////////////////////// // Zm /////////////////////////// - SE=st.GetEntry(ptype,Zp,sF); + SE = st.GetEntry(ptype, Zp, sF); - if ( SE->_is_local ) { + if (SE->_is_local) { chi_p = χ - if ( SE->_permute ) { - spProjZm(tmp,in._odata[SE->_offset]); - permute(chi,tmp,ptype); + if (SE->_permute) { + spProjZm(tmp, in._odata[SE->_offset]); + permute(chi, tmp, ptype); } else { - spProjZm(chi,in._odata[SE->_offset]); + spProjZm(chi, in._odata[SE->_offset]); } } else { - chi_p=&buf[SE->_offset]; + chi_p = &buf[SE->_offset]; } - Impl::multLink(Uchi,U._odata[sU],*chi_p,Zp,SE,st); - accumReconZm(result,Uchi); + Impl::multLink(Uchi, U._odata[sU], *chi_p, Zp, SE, st); + accumReconZm(result, Uchi); /////////////////////////// // Tm /////////////////////////// - SE=st.GetEntry(ptype,Tp,sF); + SE = st.GetEntry(ptype, Tp, sF); - if ( SE->_is_local ) { + if (SE->_is_local) { chi_p = χ - if ( SE->_permute ) { - spProjTm(tmp,in._odata[SE->_offset]); - permute(chi,tmp,ptype); - } else { - spProjTm(chi,in._odata[SE->_offset]); + if (SE->_permute) { + spProjTm(tmp, in._odata[SE->_offset]); + permute(chi, tmp, ptype); + } else { + spProjTm(chi, in._odata[SE->_offset]); } } else { - chi_p=&buf[SE->_offset]; + chi_p = &buf[SE->_offset]; } - Impl::multLink(Uchi,U._odata[sU],*chi_p,Tp,SE,st); - accumReconTm(result,Uchi); + Impl::multLink(Uchi, U._odata[sU], *chi_p, Tp, SE, st); + accumReconTm(result, Uchi); - vstream(out._odata[sF],result); + vstream(out._odata[sF], result); }; -template -void WilsonKernels::DiracOptDhopDir(StencilImpl &st,DoubledGaugeField &U, - std::vector > &buf, - int sF,int sU,const FermionField &in, FermionField &out,int dir,int gamma) -{ - SiteHalfSpinor tmp; - SiteHalfSpinor chi; - SiteSpinor result; +template +void WilsonKernels::DiracOptDhopDir( StencilImpl &st, DoubledGaugeField &U,SiteHalfSpinor *buf, int sF, + int sU, const FermionField &in, FermionField &out, int dir, int gamma) { + + SiteHalfSpinor tmp; + SiteHalfSpinor chi; + SiteSpinor result; SiteHalfSpinor Uchi; StencilEntry *SE; int ptype; - SE=st.GetEntry(ptype,dir,sF); + SE = st.GetEntry(ptype, dir, sF); // Xp - if(gamma==Xp){ - if ( SE->_is_local && SE->_permute ) { - spProjXp(tmp,in._odata[SE->_offset]); - permute(chi,tmp,ptype); - } else if ( SE->_is_local ) { - spProjXp(chi,in._odata[SE->_offset]); - } else { - chi=buf[SE->_offset]; + if (gamma == Xp) { + if (SE->_is_local && SE->_permute) { + spProjXp(tmp, in._odata[SE->_offset]); + permute(chi, tmp, ptype); + } else if (SE->_is_local) { + spProjXp(chi, in._odata[SE->_offset]); + } else { + chi = buf[SE->_offset]; } - Impl::multLink(Uchi,U._odata[sU],chi,dir,SE,st); - spReconXp(result,Uchi); + Impl::multLink(Uchi, U._odata[sU], chi, dir, SE, st); + spReconXp(result, Uchi); } // Yp - if ( gamma==Yp ){ - if ( SE->_is_local && SE->_permute ) { - spProjYp(tmp,in._odata[SE->_offset]); - permute(chi,tmp,ptype); - } else if ( SE->_is_local ) { - spProjYp(chi,in._odata[SE->_offset]); - } else { - chi=buf[SE->_offset]; + if (gamma == Yp) { + if (SE->_is_local && SE->_permute) { + spProjYp(tmp, in._odata[SE->_offset]); + permute(chi, tmp, ptype); + } else if (SE->_is_local) { + spProjYp(chi, in._odata[SE->_offset]); + } else { + chi = buf[SE->_offset]; } - Impl::multLink(Uchi,U._odata[sU],chi,dir,SE,st); - spReconYp(result,Uchi); + Impl::multLink(Uchi, U._odata[sU], chi, dir, SE, st); + spReconYp(result, Uchi); } - + // Zp - if ( gamma ==Zp ){ - if ( SE->_is_local && SE->_permute ) { - spProjZp(tmp,in._odata[SE->_offset]); - permute(chi,tmp,ptype); - } else if ( SE->_is_local ) { - spProjZp(chi,in._odata[SE->_offset]); - } else { - chi=buf[SE->_offset]; + if (gamma == Zp) { + if (SE->_is_local && SE->_permute) { + spProjZp(tmp, in._odata[SE->_offset]); + permute(chi, tmp, ptype); + } else if (SE->_is_local) { + spProjZp(chi, in._odata[SE->_offset]); + } else { + chi = buf[SE->_offset]; } - Impl::multLink(Uchi,U._odata[sU],chi,dir,SE,st); - spReconZp(result,Uchi); + Impl::multLink(Uchi, U._odata[sU], chi, dir, SE, st); + spReconZp(result, Uchi); } - + // Tp - if ( gamma ==Tp ){ - if ( SE->_is_local && SE->_permute ) { - spProjTp(tmp,in._odata[SE->_offset]); - permute(chi,tmp,ptype); - } else if ( SE->_is_local ) { - spProjTp(chi,in._odata[SE->_offset]); - } else { - chi=buf[SE->_offset]; + if (gamma == Tp) { + if (SE->_is_local && SE->_permute) { + spProjTp(tmp, in._odata[SE->_offset]); + permute(chi, tmp, ptype); + } else if (SE->_is_local) { + spProjTp(chi, in._odata[SE->_offset]); + } else { + chi = buf[SE->_offset]; } - Impl::multLink(Uchi,U._odata[sU],chi,dir,SE,st); - spReconTp(result,Uchi); + Impl::multLink(Uchi, U._odata[sU], chi, dir, SE, st); + spReconTp(result, Uchi); } // Xm - if ( gamma==Xm ){ - if ( SE->_is_local && SE->_permute ) { - spProjXm(tmp,in._odata[SE->_offset]); - permute(chi,tmp,ptype); - } else if ( SE->_is_local ) { - spProjXm(chi,in._odata[SE->_offset]); - } else { - chi=buf[SE->_offset]; + if (gamma == Xm) { + if (SE->_is_local && SE->_permute) { + spProjXm(tmp, in._odata[SE->_offset]); + permute(chi, tmp, ptype); + } else if (SE->_is_local) { + spProjXm(chi, in._odata[SE->_offset]); + } else { + chi = buf[SE->_offset]; } - Impl::multLink(Uchi,U._odata[sU],chi,dir,SE,st); - spReconXm(result,Uchi); + Impl::multLink(Uchi, U._odata[sU], chi, dir, SE, st); + spReconXm(result, Uchi); } // Ym - if ( gamma == Ym ){ - if ( SE->_is_local && SE->_permute ) { - spProjYm(tmp,in._odata[SE->_offset]); - permute(chi,tmp,ptype); - } else if ( SE->_is_local ) { - spProjYm(chi,in._odata[SE->_offset]); - } else { - chi=buf[SE->_offset]; + if (gamma == Ym) { + if (SE->_is_local && SE->_permute) { + spProjYm(tmp, in._odata[SE->_offset]); + permute(chi, tmp, ptype); + } else if (SE->_is_local) { + spProjYm(chi, in._odata[SE->_offset]); + } else { + chi = buf[SE->_offset]; } - Impl::multLink(Uchi,U._odata[sU],chi,dir,SE,st); - spReconYm(result,Uchi); + Impl::multLink(Uchi, U._odata[sU], chi, dir, SE, st); + spReconYm(result, Uchi); } // Zm - if ( gamma == Zm ){ - if ( SE->_is_local && SE->_permute ) { - spProjZm(tmp,in._odata[SE->_offset]); - permute(chi,tmp,ptype); - } else if ( SE->_is_local ) { - spProjZm(chi,in._odata[SE->_offset]); - } else { - chi=buf[SE->_offset]; + if (gamma == Zm) { + if (SE->_is_local && SE->_permute) { + spProjZm(tmp, in._odata[SE->_offset]); + permute(chi, tmp, ptype); + } else if (SE->_is_local) { + spProjZm(chi, in._odata[SE->_offset]); + } else { + chi = buf[SE->_offset]; } - Impl::multLink(Uchi,U._odata[sU],chi,dir,SE,st); - spReconZm(result,Uchi); - } - - // Tm - if ( gamma==Tm ) { - if ( SE->_is_local && SE->_permute ) { - spProjTm(tmp,in._odata[SE->_offset]); - permute(chi,tmp,ptype); - } else if ( SE->_is_local ) { - spProjTm(chi,in._odata[SE->_offset]); - } else { - chi=buf[SE->_offset]; - } - Impl::multLink(Uchi,U._odata[sU],chi,dir,SE,st); - spReconTm(result,Uchi); + Impl::multLink(Uchi, U._odata[sU], chi, dir, SE, st); + spReconZm(result, Uchi); } - vstream(out._odata[sF],result); + // Tm + if (gamma == Tm) { + if (SE->_is_local && SE->_permute) { + spProjTm(tmp, in._odata[SE->_offset]); + permute(chi, tmp, ptype); + } else if (SE->_is_local) { + spProjTm(chi, in._odata[SE->_offset]); + } else { + chi = buf[SE->_offset]; + } + Impl::multLink(Uchi, U._odata[sU], chi, dir, SE, st); + spReconTm(result, Uchi); + } + + vstream(out._odata[sF], result); } - - FermOpTemplateInstantiate(WilsonKernels); - -template class WilsonKernels; -template class WilsonKernels; +FermOpTemplateInstantiate(WilsonKernels); +AdjointFermOpTemplateInstantiate(WilsonKernels); +TwoIndexFermOpTemplateInstantiate(WilsonKernels); }} + diff --git a/lib/qcd/action/fermion/WilsonKernels.h b/lib/qcd/action/fermion/WilsonKernels.h index 231fa293..47da2b14 100644 --- a/lib/qcd/action/fermion/WilsonKernels.h +++ b/lib/qcd/action/fermion/WilsonKernels.h @@ -1,98 +1,183 @@ - /************************************************************************************* +/************************************************************************************* - Grid physics library, www.github.com/paboyle/Grid +Grid physics library, www.github.com/paboyle/Grid - Source file: ./lib/qcd/action/fermion/WilsonKernels.h +Source file: ./lib/qcd/action/fermion/WilsonKernels.h - Copyright (C) 2015 +Copyright (C) 2015 Author: Peter Boyle Author: Peter Boyle Author: paboyle - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ -#ifndef GRID_QCD_DHOP_H -#define GRID_QCD_DHOP_H +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#ifndef GRID_QCD_DHOP_H +#define GRID_QCD_DHOP_H namespace Grid { +namespace QCD { + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////// + // Helper routines that implement Wilson stencil for a single site. + // Common to both the WilsonFermion and WilsonFermion5D + //////////////////////////////////////////////////////////////////////////////////////////////////////////////// +class WilsonKernelsStatic { + public: + enum { OptGeneric, OptHandUnroll, OptInlineAsm }; + // S-direction is INNERMOST and takes no part in the parity. + static int Opt; // these are a temporary hack +}; + +template class WilsonKernels : public FermionOperator , public WilsonKernelsStatic { + public: + + INHERIT_IMPL_TYPES(Impl); + typedef FermionOperator Base; + +public: + + template + typename std::enable_if::type + DiracOptDhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf, + int sF, int sU, int Ls, int Ns, const FermionField &in, FermionField &out) + { + switch(Opt) { +#ifdef AVX512 + case OptInlineAsm: + WilsonKernels::DiracOptAsmDhopSite(st,lo,U,buf,sF,sU,Ls,Ns,in,out); + break; +#endif + case OptHandUnroll: + for (int site = 0; site < Ns; site++) { + for (int s = 0; s < Ls; s++) { + WilsonKernels::DiracOptHandDhopSite(st,lo,U,buf,sF,sU,in,out); + sF++; + } + sU++; + } + break; + case OptGeneric: + for (int site = 0; site < Ns; site++) { + for (int s = 0; s < Ls; s++) { + WilsonKernels::DiracOptGenericDhopSite(st,lo,U,buf,sF,sU,in,out); + sF++; + } + sU++; + } + break; + default: + assert(0); + } + } + + template + typename std::enable_if<(Impl::Dimension != 3 || (Impl::Dimension == 3 && Nc != 3)) && EnableBool, void>::type + DiracOptDhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf, + int sF, int sU, int Ls, int Ns, const FermionField &in, FermionField &out) { + // no kernel choice + for (int site = 0; site < Ns; site++) { + for (int s = 0; s < Ls; s++) { + WilsonKernels::DiracOptGenericDhopSite(st, lo, U, buf, sF, sU, in, out); + sF++; + } + sU++; + } + } + + template + typename std::enable_if::type + DiracOptDhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf, + int sF, int sU, int Ls, int Ns, const FermionField &in, FermionField &out) { + + switch(Opt) { +#ifdef AVX512 + case OptInlineAsm: + WilsonKernels::DiracOptAsmDhopSiteDag(st,lo,U,buf,sF,sU,Ls,Ns,in,out); + break; +#endif + case OptHandUnroll: + for (int site = 0; site < Ns; site++) { + for (int s = 0; s < Ls; s++) { + WilsonKernels::DiracOptHandDhopSiteDag(st,lo,U,buf,sF,sU,in,out); + sF++; + } + sU++; + } + break; + case OptGeneric: + for (int site = 0; site < Ns; site++) { + for (int s = 0; s < Ls; s++) { + WilsonKernels::DiracOptGenericDhopSiteDag(st,lo,U,buf,sF,sU,in,out); + sF++; + } + sU++; + } + break; + default: + assert(0); + } + } + + template + typename std::enable_if<(Impl::Dimension != 3 || (Impl::Dimension == 3 && Nc != 3)) && EnableBool,void>::type + DiracOptDhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,SiteHalfSpinor * buf, + int sF, int sU, int Ls, int Ns, const FermionField &in, FermionField &out) { + + for (int site = 0; site < Ns; site++) { + for (int s = 0; s < Ls; s++) { + WilsonKernels::DiracOptGenericDhopSiteDag(st,lo,U,buf,sF,sU,in,out); + sF++; + } + sU++; + } + } + + void DiracOptDhopDir(StencilImpl &st, DoubledGaugeField &U,SiteHalfSpinor * buf, + int sF, int sU, const FermionField &in, FermionField &out, int dirdisp, int gamma); + +private: + // Specialised variants + void DiracOptGenericDhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf, + int sF, int sU, const FermionField &in, FermionField &out); + + void DiracOptGenericDhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf, + int sF, int sU, const FermionField &in, FermionField &out); + + void DiracOptAsmDhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf, + int sF, int sU, int Ls, int Ns, const FermionField &in,FermionField &out); + + void DiracOptAsmDhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf, + int sF, int sU, int Ls, int Ns, const FermionField &in, FermionField &out); + + void DiracOptHandDhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf, + int sF, int sU, const FermionField &in, FermionField &out); + + void DiracOptHandDhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf, + int sF, int sU, const FermionField &in, FermionField &out); + +public: + + WilsonKernels(const ImplParams &p = ImplParams()); + +}; + +}} - namespace QCD { - - //////////////////////////////////////////////////////////////////////////////////////////////////////////////// - // Helper routines that implement Wilson stencil for a single site. - // Common to both the WilsonFermion and WilsonFermion5D - //////////////////////////////////////////////////////////////////////////////////////////////////////////////// - class WilsonKernelsStatic { - public: - // S-direction is INNERMOST and takes no part in the parity. - static int AsmOpt; // these are a temporary hack - static int HandOpt; // these are a temporary hack - }; - - template class WilsonKernels : public FermionOperator , public WilsonKernelsStatic { - public: - - INHERIT_IMPL_TYPES(Impl); - typedef FermionOperator Base; - - public: - - void DiracOptDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U, - std::vector > &buf, - int sF, int sU,int Ls, int Ns, const FermionField &in, FermionField &out); - - void DiracOptDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U, - std::vector > &buf, - int sF,int sU,int Ls, int Ns, const FermionField &in,FermionField &out); - - void DiracOptDhopDir(StencilImpl &st,DoubledGaugeField &U, - std::vector > &buf, - int sF,int sU,const FermionField &in, FermionField &out,int dirdisp,int gamma); - - private: - // Specialised variants - void DiracOptGenericDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U, - std::vector > &buf, - int sF,int sU, const FermionField &in, FermionField &out); - - void DiracOptGenericDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U, - std::vector > &buf, - int sF,int sU,const FermionField &in,FermionField &out); - - void DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U, - std::vector > &buf, - int sF,int sU,int Ls, int Ns, const FermionField &in, FermionField &out); - - - void DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U, - std::vector > &buf, - int sF,int sU,const FermionField &in, FermionField &out); - - void DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U, - std::vector > &buf, - int sF,int sU,const FermionField &in, FermionField &out); - public: - - WilsonKernels(const ImplParams &p= ImplParams()); - - }; - - } -} #endif diff --git a/lib/qcd/action/fermion/WilsonKernelsAsm.cc b/lib/qcd/action/fermion/WilsonKernelsAsm.cc index 33c464ac..d7a9edd3 100644 --- a/lib/qcd/action/fermion/WilsonKernelsAsm.cc +++ b/lib/qcd/action/fermion/WilsonKernelsAsm.cc @@ -1,4 +1,4 @@ - /************************************************************************************* +/************************************************************************************* Grid physics library, www.github.com/paboyle/Grid @@ -10,6 +10,7 @@ Author: Peter Boyle Author: paboyle +Author: Guido Cossu This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -26,59 +27,75 @@ Author: paboyle 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ +*************************************************************************************/ +/* END LEGAL */ #include + namespace Grid { namespace QCD { + +/////////////////////////////////////////////////////////// +// Default to no assembler implementation +/////////////////////////////////////////////////////////// +template void +WilsonKernels::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) +{ + assert(0); +} - - /////////////////////////////////////////////////////////// - // Default to no assembler implementation - /////////////////////////////////////////////////////////// -template -void WilsonKernels::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, - std::vector > &buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) +template void +WilsonKernels::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) { assert(0); } #if defined(AVX512) - - - /////////////////////////////////////////////////////////// - // If we are AVX512 specialise the single precision routine - /////////////////////////////////////////////////////////// - #include + + /////////////////////////////////////////////////////////// + // If we are AVX512 specialise the single precision routine + /////////////////////////////////////////////////////////// + #include + +static Vector signsF; -static Vector signs; - -int setupSigns(void ){ - Vector bother(2); - signs = bother; - vrsign(signs[0]); - visign(signs[1]); - return 1; -} -static int signInit = setupSigns(); + template + int setupSigns(Vector& signs ){ + Vector bother(2); + signs = bother; + vrsign(signs[0]); + visign(signs[1]); + return 1; + } + static int signInitF = setupSigns(signsF); + #define label(A) ilabel(A) #define ilabel(A) ".globl\n" #A ":\n" - + #define MAYBEPERM(A,perm) if (perm) { A ; } #define MULT_2SPIN(ptr,pf) MULT_ADDSUB_2SPIN(ptr,pf) #define FX(A) WILSONASM_ ##A -template<> -void WilsonKernels::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, - std::vector > &buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) +#define COMPLEX_TYPE vComplexF +#define signs signsF + +#undef KERNEL_DAG +template<> void +WilsonKernels::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) #include - + +#define KERNEL_DAG +template<> void +WilsonKernels::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) +#include + #undef VMOVIDUP #undef VMOVRDUP #undef MAYBEPERM @@ -86,35 +103,104 @@ void WilsonKernels::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrd #undef FX #define FX(A) DWFASM_ ## A #define MAYBEPERM(A,B) -#define VMOVIDUP(A,B,C) VBCASTIDUPf(A,B,C) -#define VMOVRDUP(A,B,C) VBCASTRDUPf(A,B,C) +//#define VMOVIDUP(A,B,C) VBCASTIDUPf(A,B,C) +//#define VMOVRDUP(A,B,C) VBCASTRDUPf(A,B,C) #define MULT_2SPIN(ptr,pf) MULT_ADDSUB_2SPIN_LS(ptr,pf) -template<> -void WilsonKernels::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, - std::vector > &buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) + +#undef KERNEL_DAG +template<> void +WilsonKernels::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) #include + +#define KERNEL_DAG +template<> void +WilsonKernels::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) +#include +#undef COMPLEX_TYPE +#undef signs +#undef VMOVRDUP +#undef MAYBEPERM +#undef MULT_2SPIN +#undef FX + +/////////////////////////////////////////////////////////// +// If we are AVX512 specialise the double precision routine +/////////////////////////////////////////////////////////// -#endif +#include + +static Vector signsD; +#define signs signsD +static int signInitD = setupSigns(signsD); + +#define MAYBEPERM(A,perm) if (perm) { A ; } +#define MULT_2SPIN(ptr,pf) MULT_ADDSUB_2SPIN(ptr,pf) +#define FX(A) WILSONASM_ ##A +#define COMPLEX_TYPE vComplexD + +#undef KERNEL_DAG +template<> void +WilsonKernels::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) +#include + +#define KERNEL_DAG +template<> void +WilsonKernels::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) +#include + +#undef VMOVIDUP +#undef VMOVRDUP +#undef MAYBEPERM +#undef MULT_2SPIN +#undef FX +#define FX(A) DWFASM_ ## A +#define MAYBEPERM(A,B) +//#define VMOVIDUP(A,B,C) VBCASTIDUPd(A,B,C) +//#define VMOVRDUP(A,B,C) VBCASTRDUPd(A,B,C) +#define MULT_2SPIN(ptr,pf) MULT_ADDSUB_2SPIN_LS(ptr,pf) + +#undef KERNEL_DAG +template<> void +WilsonKernels::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) +#include + +#define KERNEL_DAG +template<> void +WilsonKernels::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf, + int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) +#include + +#undef COMPLEX_TYPE +#undef signs +#undef VMOVRDUP +#undef MAYBEPERM +#undef MULT_2SPIN +#undef FX -template void WilsonKernels::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, - std::vector > &buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out); +#endif //AVX512 + +#define INSTANTIATE_ASM(A)\ +template void WilsonKernels::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,\ + int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);\ + \ +template void WilsonKernels::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,\ + int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);\ + +INSTANTIATE_ASM(WilsonImplF); +INSTANTIATE_ASM(WilsonImplD); +INSTANTIATE_ASM(ZWilsonImplF); +INSTANTIATE_ASM(ZWilsonImplD); +INSTANTIATE_ASM(GparityWilsonImplF); +INSTANTIATE_ASM(GparityWilsonImplD); +INSTANTIATE_ASM(DomainWallVec5dImplF); +INSTANTIATE_ASM(DomainWallVec5dImplD); +INSTANTIATE_ASM(ZDomainWallVec5dImplF); +INSTANTIATE_ASM(ZDomainWallVec5dImplD); -template void WilsonKernels::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, - std::vector > &buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out); -template void WilsonKernels::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, - std::vector > &buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out); -template void WilsonKernels::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, - std::vector > &buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out); -template void WilsonKernels::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, - std::vector > &buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out); -template void WilsonKernels::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, - std::vector > &buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out); }} diff --git a/lib/qcd/action/fermion/WilsonKernelsAsmBody.h b/lib/qcd/action/fermion/WilsonKernelsAsmBody.h index 4f3ef861..72e13754 100644 --- a/lib/qcd/action/fermion/WilsonKernelsAsmBody.h +++ b/lib/qcd/action/fermion/WilsonKernelsAsmBody.h @@ -5,7 +5,9 @@ const uint64_t plocal =(uint64_t) & in._odata[0]; // vComplexF isigns[2] = { signs[0], signs[1] }; - vComplexF *isigns = &signs[0]; + //COMPLEX_TYPE is vComplexF of vComplexD depending + //on the chosen precision + COMPLEX_TYPE *isigns = &signs[0]; MASK_REGS; int nmax=U._grid->oSites(); @@ -30,7 +32,11 @@ basep = st.GetPFInfo(nent,plocal); nent++; if ( local ) { LOAD64(%r10,isigns); +#ifdef KERNEL_DAG + XP_PROJMEM(base); +#else XM_PROJMEM(base); +#endif MAYBEPERM(PERMUTE_DIR3,perm); } else { LOAD_CHI(base); @@ -41,15 +47,22 @@ MULT_2SPIN_DIR_PFXP(Xp,basep); } LOAD64(%r10,isigns); +#ifdef KERNEL_DAG + XP_RECON; +#else XM_RECON; - +#endif //////////////////////////////// // Yp //////////////////////////////// basep = st.GetPFInfo(nent,plocal); nent++; if ( local ) { LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit +#ifdef KERNEL_DAG + YP_PROJMEM(base); +#else YM_PROJMEM(base); +#endif MAYBEPERM(PERMUTE_DIR2,perm); } else { LOAD_CHI(base); @@ -60,7 +73,11 @@ MULT_2SPIN_DIR_PFYP(Yp,basep); } LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit +#ifdef KERNEL_DAG + YP_RECON_ACCUM; +#else YM_RECON_ACCUM; +#endif //////////////////////////////// // Zp @@ -68,7 +85,11 @@ basep = st.GetPFInfo(nent,plocal); nent++; if ( local ) { LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit +#ifdef KERNEL_DAG + ZP_PROJMEM(base); +#else ZM_PROJMEM(base); +#endif MAYBEPERM(PERMUTE_DIR1,perm); } else { LOAD_CHI(base); @@ -79,7 +100,11 @@ MULT_2SPIN_DIR_PFZP(Zp,basep); } LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit +#ifdef KERNEL_DAG + ZP_RECON_ACCUM; +#else ZM_RECON_ACCUM; +#endif //////////////////////////////// // Tp @@ -87,7 +112,11 @@ basep = st.GetPFInfo(nent,plocal); nent++; if ( local ) { LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit +#ifdef KERNEL_DAG + TP_PROJMEM(base); +#else TM_PROJMEM(base); +#endif MAYBEPERM(PERMUTE_DIR0,perm); } else { LOAD_CHI(base); @@ -98,16 +127,26 @@ MULT_2SPIN_DIR_PFTP(Tp,basep); } LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit +#ifdef KERNEL_DAG + TP_RECON_ACCUM; +#else TM_RECON_ACCUM; +#endif //////////////////////////////// // Xm //////////////////////////////// +#ifndef STREAM_STORE basep= (uint64_t) &out._odata[ss]; +#endif // basep= st.GetPFInfo(nent,plocal); nent++; if ( local ) { LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit +#ifdef KERNEL_DAG + XM_PROJMEM(base); +#else XP_PROJMEM(base); +#endif MAYBEPERM(PERMUTE_DIR3,perm); } else { LOAD_CHI(base); @@ -118,7 +157,11 @@ MULT_2SPIN_DIR_PFXM(Xm,basep); } LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit +#ifdef KERNEL_DAG + XM_RECON_ACCUM; +#else XP_RECON_ACCUM; +#endif //////////////////////////////// // Ym @@ -126,7 +169,11 @@ basep= st.GetPFInfo(nent,plocal); nent++; if ( local ) { LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit +#ifdef KERNEL_DAG + YM_PROJMEM(base); +#else YP_PROJMEM(base); +#endif MAYBEPERM(PERMUTE_DIR2,perm); } else { LOAD_CHI(base); @@ -137,7 +184,11 @@ MULT_2SPIN_DIR_PFYM(Ym,basep); } LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit +#ifdef KERNEL_DAG + YM_RECON_ACCUM; +#else YP_RECON_ACCUM; +#endif //////////////////////////////// // Zm @@ -145,7 +196,11 @@ basep= st.GetPFInfo(nent,plocal); nent++; if ( local ) { LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit +#ifdef KERNEL_DAG + ZM_PROJMEM(base); +#else ZP_PROJMEM(base); +#endif MAYBEPERM(PERMUTE_DIR1,perm); } else { LOAD_CHI(base); @@ -156,7 +211,11 @@ MULT_2SPIN_DIR_PFZM(Zm,basep); } LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit +#ifdef KERNEL_DAG + ZM_RECON_ACCUM; +#else ZP_RECON_ACCUM; +#endif //////////////////////////////// // Tm @@ -164,18 +223,28 @@ basep= st.GetPFInfo(nent,plocal); nent++; if ( local ) { LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit +#ifdef KERNEL_DAG + TM_PROJMEM(base); +#else TP_PROJMEM(base); +#endif MAYBEPERM(PERMUTE_DIR0,perm); } else { LOAD_CHI(base); } base= (uint64_t) &out._odata[ss]; +#ifndef STREAM_STORE PREFETCH_CHIMU(base); +#endif { MULT_2SPIN_DIR_PFTM(Tm,basep); } LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit +#ifdef KERNEL_DAG + TM_RECON_ACCUM; +#else TP_RECON_ACCUM; +#endif basep= st.GetPFInfo(nent,plocal); nent++; SAVE_RESULT(base,basep); diff --git a/lib/qcd/action/fermion/WilsonKernelsHand.cc b/lib/qcd/action/fermion/WilsonKernelsHand.cc index cb6c01a1..f5900832 100644 --- a/lib/qcd/action/fermion/WilsonKernelsHand.cc +++ b/lib/qcd/action/fermion/WilsonKernelsHand.cc @@ -311,10 +311,9 @@ namespace Grid { namespace QCD { -template -void WilsonKernels::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U, - std::vector > &buf, - int ss,int sU,const FermionField &in, FermionField &out) +template void +WilsonKernels::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, + int ss,int sU,const FermionField &in, FermionField &out) { typedef typename Simd::scalar_type S; typedef typename Simd::vector_type V; @@ -555,9 +554,8 @@ void WilsonKernels::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &l } template -void WilsonKernels::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U, - std::vector > &buf, - int ss,int sU,const FermionField &in, FermionField &out) +void WilsonKernels::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, + int ss,int sU,const FermionField &in, FermionField &out) { // std::cout << "Hand op Dhop "<::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder } } - //////////////////////////////////////////////// // Specialise Gparity to simple implementation //////////////////////////////////////////////// -template<> -void WilsonKernels::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U, - std::vector > &buf, - int sF,int sU,const FermionField &in, FermionField &out) +template<> void +WilsonKernels::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U, + SiteHalfSpinor *buf, + int sF,int sU,const FermionField &in, FermionField &out) { assert(0); } -template<> -void WilsonKernels::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U, - std::vector > &buf, - int sF,int sU,const FermionField &in, FermionField &out) +template<> void +WilsonKernels::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U, + SiteHalfSpinor *buf, + int sF,int sU,const FermionField &in, FermionField &out) { assert(0); } -template<> -void WilsonKernels::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U, - std::vector > &buf, - int sF,int sU,const FermionField &in, FermionField &out) +template<> void +WilsonKernels::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, + int sF,int sU,const FermionField &in, FermionField &out) { assert(0); } -template<> -void WilsonKernels::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U, - std::vector > &buf, - int sF,int sU,const FermionField &in, FermionField &out) +template<> void +WilsonKernels::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf, + int sF,int sU,const FermionField &in, FermionField &out) { assert(0); } @@ -839,46 +834,21 @@ void WilsonKernels::DiracOptHandDhopSiteDag(StencilImpl &st, ////////////// Wilson ; uses this implementation ///////////////////// // Need Nc=3 though // -template void WilsonKernels::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U, - std::vector > &buf, - int ss,int sU,const FermionField &in, FermionField &out); -template void WilsonKernels::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U, - std::vector > &buf, - int ss,int sU,const FermionField &in, FermionField &out); -template void WilsonKernels::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U, - std::vector > &buf, - int ss,int sU,const FermionField &in, FermionField &out); -template void WilsonKernels::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U, - std::vector > &buf, - int ss,int sU,const FermionField &in, FermionField &out); - - -template void WilsonKernels::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U, - std::vector > &buf, - int ss,int sU,const FermionField &in, FermionField &out); -template void WilsonKernels::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U, - std::vector > &buf, - int ss,int sU,const FermionField &in, FermionField &out); -template void WilsonKernels::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U, - std::vector > &buf, - int ss,int sU,const FermionField &in, FermionField &out); -template void WilsonKernels::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U, - std::vector > &buf, - int ss,int sU,const FermionField &in, FermionField &out); - - -template void WilsonKernels::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U, - std::vector > &buf, - int ss,int sU,const FermionField &in, FermionField &out); -template void WilsonKernels::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U, - std::vector > &buf, - int ss,int sU,const FermionField &in, FermionField &out); -template void WilsonKernels::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U, - std::vector > &buf, - int ss,int sU,const FermionField &in, FermionField &out); -template void WilsonKernels::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U, - std::vector > &buf, - int ss,int sU,const FermionField &in, FermionField &out); +#define INSTANTIATE_THEM(A) \ +template void WilsonKernels::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,\ + int ss,int sU,const FermionField &in, FermionField &out); \ +template void WilsonKernels::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,\ + int ss,int sU,const FermionField &in, FermionField &out); +INSTANTIATE_THEM(WilsonImplF); +INSTANTIATE_THEM(WilsonImplD); +INSTANTIATE_THEM(ZWilsonImplF); +INSTANTIATE_THEM(ZWilsonImplD); +INSTANTIATE_THEM(GparityWilsonImplF); +INSTANTIATE_THEM(GparityWilsonImplD); +INSTANTIATE_THEM(DomainWallVec5dImplF); +INSTANTIATE_THEM(DomainWallVec5dImplD); +INSTANTIATE_THEM(ZDomainWallVec5dImplF); +INSTANTIATE_THEM(ZDomainWallVec5dImplD); }} diff --git a/lib/qcd/action/fermion/WilsonTMFermion.h b/lib/qcd/action/fermion/WilsonTMFermion.h index df00a269..5901cb2f 100644 --- a/lib/qcd/action/fermion/WilsonTMFermion.h +++ b/lib/qcd/action/fermion/WilsonTMFermion.h @@ -28,7 +28,7 @@ Author: paboyle #ifndef GRID_QCD_WILSON_TM_FERMION_H #define GRID_QCD_WILSON_TM_FERMION_H -#include +#include namespace Grid { diff --git a/lib/qcd/action/fermion/ZMobiusFermion.h b/lib/qcd/action/fermion/ZMobiusFermion.h new file mode 100644 index 00000000..d0e00657 --- /dev/null +++ b/lib/qcd/action/fermion/ZMobiusFermion.h @@ -0,0 +1,79 @@ + /************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/qcd/action/fermion/MobiusFermion.h + + Copyright (C) 2015 + +Author: Peter Boyle +Author: Peter Boyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +#ifndef GRID_QCD_ZMOBIUS_FERMION_H +#define GRID_QCD_ZMOBIUS_FERMION_H + +#include + +namespace Grid { + + namespace QCD { + + template + class ZMobiusFermion : public CayleyFermion5D + { + public: + INHERIT_IMPL_TYPES(Impl); + public: + + virtual void Instantiatable(void) {}; + // Constructors + ZMobiusFermion(GaugeField &_Umu, + GridCartesian &FiveDimGrid, + GridRedBlackCartesian &FiveDimRedBlackGrid, + GridCartesian &FourDimGrid, + GridRedBlackCartesian &FourDimRedBlackGrid, + RealD _mass,RealD _M5, + std::vector &gamma, RealD b,RealD c,const ImplParams &p= ImplParams()) : + + CayleyFermion5D(_Umu, + FiveDimGrid, + FiveDimRedBlackGrid, + FourDimGrid, + FourDimRedBlackGrid,_mass,_M5,p) + + { + RealD eps = 1.0; + + std::cout< zgamma(this->Ls); + for(int s=0;sLs;s++){ + zgamma[s] = gamma[s]; + } + + // Call base setter + this->SetCoefficientsInternal(1.0,zgamma,b,c); + } + + }; + + } +} + +#endif diff --git a/lib/qcd/action/gauge/GaugeImpl.h b/lib/qcd/action/gauge/GaugeImpl.h index 012d21e4..400381bb 100644 --- a/lib/qcd/action/gauge/GaugeImpl.h +++ b/lib/qcd/action/gauge/GaugeImpl.h @@ -1,181 +1,194 @@ - /************************************************************************************* +/************************************************************************************* - Grid physics library, www.github.com/paboyle/Grid +Grid physics library, www.github.com/paboyle/Grid - Source file: ./lib/qcd/action/gauge/GaugeImpl.h +Source file: ./lib/qcd/action/gauge/GaugeImpl.h - Copyright (C) 2015 +Copyright (C) 2015 Author: paboyle - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ -#ifndef GRID_QCD_GAUGE_IMPL_H -#define GRID_QCD_GAUGE_IMPL_H +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#ifndef GRID_QCD_GAUGE_IMPL_H +#define GRID_QCD_GAUGE_IMPL_H namespace Grid { +namespace QCD { - namespace QCD { +//////////////////////////////////////////////////////////////////////// +// Implementation dependent gauge types +//////////////////////////////////////////////////////////////////////// - - //////////////////////////////////////////////////////////////////////// - // Implementation dependent gauge types - //////////////////////////////////////////////////////////////////////// +template class WilsonLoops; -template class WilsonLoops; +#define INHERIT_GIMPL_TYPES(GImpl) \ + typedef typename GImpl::Simd Simd; \ + typedef typename GImpl::GaugeLinkField GaugeLinkField; \ + typedef typename GImpl::GaugeField GaugeField; \ + typedef typename GImpl::SiteGaugeField SiteGaugeField; \ + typedef typename GImpl::SiteGaugeLink SiteGaugeLink; -#define INHERIT_GIMPL_TYPES(GImpl) \ - typedef typename GImpl::Simd Simd;\ - typedef typename GImpl::GaugeLinkField GaugeLinkField;\ - typedef typename GImpl::GaugeField GaugeField;\ - typedef typename GImpl::SiteGaugeField SiteGaugeField;\ - typedef typename GImpl::SiteGaugeLink SiteGaugeLink; +// +template class GaugeImplTypes { +public: + typedef S Simd; + template + using iImplGaugeLink = iScalar>>; + template + using iImplGaugeField = iVector>, Nd>; - // - template - class GaugeImplTypes { - public: - - typedef S Simd; - - template using iImplGaugeLink = iScalar > >; - template using iImplGaugeField = iVector >, Nd >; - - typedef iImplGaugeLink SiteGaugeLink; - typedef iImplGaugeField SiteGaugeField; - - typedef Lattice GaugeLinkField; // bit ugly naming; polarised gauge field, lorentz... all ugly - typedef Lattice GaugeField; + typedef iImplGaugeLink SiteGaugeLink; + typedef iImplGaugeField SiteGaugeField; - }; + typedef Lattice GaugeLinkField; // bit ugly naming; polarised + // gauge field, lorentz... all + // ugly + typedef Lattice GaugeField; - // Composition with smeared link, bc's etc.. probably need multiple inheritance - // Variable precision "S" and variable Nc - template - class PeriodicGaugeImpl : public GimplTypes { - public: - - INHERIT_GIMPL_TYPES(GimplTypes); - - //////////////////////////////////////////////////////////////////////////////////////////////////////////// - // Support needed for the assembly of loops including all boundary condition effects such as conjugate bcs - //////////////////////////////////////////////////////////////////////////////////////////////////////////// - - template static inline - Lattice CovShiftForward (const GaugeLinkField &Link, int mu, const Lattice &field) { - return PeriodicBC::CovShiftForward(Link,mu,field); - } - - template static inline - Lattice CovShiftBackward(const GaugeLinkField &Link, int mu,const Lattice &field) { - return PeriodicBC::CovShiftBackward(Link,mu,field); - } - static inline - GaugeLinkField CovShiftIdentityBackward(const GaugeLinkField &Link, int mu) { - return Cshift(adj(Link),mu,-1); - } - static inline - GaugeLinkField CovShiftIdentityForward(const GaugeLinkField &Link, int mu) { - return Link; - } - static inline - GaugeLinkField ShiftStaple(const GaugeLinkField &Link, int mu) { - return Cshift(Link,mu,1); - } - - static inline bool isPeriodicGaugeField(void) { - return true; - } - - }; - - - // Composition with smeared link, bc's etc.. probably need multiple inheritance - // Variable precision "S" and variable Nc - template - class ConjugateGaugeImpl : public GimplTypes { - public: - - INHERIT_GIMPL_TYPES(GimplTypes); - - //////////////////////////////////////////////////////////////////////////////////////////////////////////// - // Support needed for the assembly of loops including all boundary condition effects such as Gparity. - //////////////////////////////////////////////////////////////////////////////////////////////////////////// - template static - Lattice CovShiftForward (const GaugeLinkField &Link, int mu, const Lattice &field) { - return ConjugateBC::CovShiftForward(Link,mu,field); + // Move this elsewhere? FIXME + static inline void AddGaugeLink(GaugeField &U, GaugeLinkField &W, + int mu) { // U[mu] += W + PARALLEL_FOR_LOOP + for (auto ss = 0; ss < U._grid->oSites(); ss++) { + U._odata[ss]._internal[mu] = + U._odata[ss]._internal[mu] + W._odata[ss]._internal; } - - template static - Lattice CovShiftBackward(const GaugeLinkField &Link, int mu,const Lattice &field) { - return ConjugateBC::CovShiftBackward(Link,mu,field); - } - - static inline - GaugeLinkField CovShiftIdentityBackward(const GaugeLinkField &Link, int mu) { - GridBase *grid = Link._grid; - int Lmu = grid->GlobalDimensions()[mu]-1; - - Lattice > coor(grid); LatticeCoordinate(coor,mu); - - GaugeLinkField tmp (grid); - tmp=adj(Link); - tmp = where(coor==Lmu,conjugate(tmp),tmp); - return Cshift(tmp,mu,-1);// moves towards positive mu - } - static inline - GaugeLinkField CovShiftIdentityForward(const GaugeLinkField &Link, int mu) { - return Link; - } - - static inline - GaugeLinkField ShiftStaple(const GaugeLinkField &Link, int mu) { - GridBase *grid = Link._grid; - int Lmu = grid->GlobalDimensions()[mu]-1; - - Lattice > coor(grid); LatticeCoordinate(coor,mu); - - GaugeLinkField tmp (grid); - tmp=Cshift(Link,mu,1); - tmp=where(coor==Lmu,conjugate(tmp),tmp); - return tmp; - } - - static inline bool isPeriodicGaugeField(void) { - return false; - } - - }; - - typedef GaugeImplTypes GimplTypesR; - typedef GaugeImplTypes GimplTypesF; - typedef GaugeImplTypes GimplTypesD; - - typedef PeriodicGaugeImpl PeriodicGimplR; // Real.. whichever prec - typedef PeriodicGaugeImpl PeriodicGimplF; // Float - typedef PeriodicGaugeImpl PeriodicGimplD; // Double - - typedef ConjugateGaugeImpl ConjugateGimplR; // Real.. whichever prec - typedef ConjugateGaugeImpl ConjugateGimplF; // Float - typedef ConjugateGaugeImpl ConjugateGimplD; // Double - } +}; + +// Composition with smeared link, bc's etc.. probably need multiple inheritance +// Variable precision "S" and variable Nc +template class PeriodicGaugeImpl : public GimplTypes { +public: + INHERIT_GIMPL_TYPES(GimplTypes); + + //////////////////////////////////////////////////////////////////////////////////////////////////////////// + // Support needed for the assembly of loops including all boundary condition + // effects such as conjugate bcs + //////////////////////////////////////////////////////////////////////////////////////////////////////////// + + template + static inline Lattice + CovShiftForward(const GaugeLinkField &Link, int mu, + const Lattice &field) { + return PeriodicBC::CovShiftForward(Link, mu, field); + } + + template + static inline Lattice + CovShiftBackward(const GaugeLinkField &Link, int mu, + const Lattice &field) { + return PeriodicBC::CovShiftBackward(Link, mu, field); + } + static inline GaugeLinkField + CovShiftIdentityBackward(const GaugeLinkField &Link, int mu) { + return Cshift(adj(Link), mu, -1); + } + static inline GaugeLinkField + CovShiftIdentityForward(const GaugeLinkField &Link, int mu) { + return Link; + } + static inline GaugeLinkField ShiftStaple(const GaugeLinkField &Link, int mu) { + return Cshift(Link, mu, 1); + } + + static inline bool isPeriodicGaugeField(void) { return true; } +}; + +// Composition with smeared link, bc's etc.. probably need multiple inheritance +// Variable precision "S" and variable Nc +template class ConjugateGaugeImpl : public GimplTypes { +public: + INHERIT_GIMPL_TYPES(GimplTypes); + + //////////////////////////////////////////////////////////////////////////////////////////////////////////// + // Support needed for the assembly of loops including all boundary condition + // effects such as Gparity. + //////////////////////////////////////////////////////////////////////////////////////////////////////////// + template + static Lattice CovShiftForward(const GaugeLinkField &Link, int mu, + const Lattice &field) { + return ConjugateBC::CovShiftForward(Link, mu, field); + } + + template + static Lattice CovShiftBackward(const GaugeLinkField &Link, int mu, + const Lattice &field) { + return ConjugateBC::CovShiftBackward(Link, mu, field); + } + + static inline GaugeLinkField + CovShiftIdentityBackward(const GaugeLinkField &Link, int mu) { + GridBase *grid = Link._grid; + int Lmu = grid->GlobalDimensions()[mu] - 1; + + Lattice> coor(grid); + LatticeCoordinate(coor, mu); + + GaugeLinkField tmp(grid); + tmp = adj(Link); + tmp = where(coor == Lmu, conjugate(tmp), tmp); + return Cshift(tmp, mu, -1); // moves towards positive mu + } + static inline GaugeLinkField + CovShiftIdentityForward(const GaugeLinkField &Link, int mu) { + return Link; + } + + static inline GaugeLinkField ShiftStaple(const GaugeLinkField &Link, int mu) { + GridBase *grid = Link._grid; + int Lmu = grid->GlobalDimensions()[mu] - 1; + + Lattice> coor(grid); + LatticeCoordinate(coor, mu); + + GaugeLinkField tmp(grid); + tmp = Cshift(Link, mu, 1); + tmp = where(coor == Lmu, conjugate(tmp), tmp); + return tmp; + } + + static inline bool isPeriodicGaugeField(void) { return false; } +}; + +typedef GaugeImplTypes GimplTypesR; +typedef GaugeImplTypes GimplTypesF; +typedef GaugeImplTypes GimplTypesD; + +typedef GaugeImplTypes::AdjointDimension> GimplAdjointTypesR; +typedef GaugeImplTypes::AdjointDimension> GimplAdjointTypesF; +typedef GaugeImplTypes::AdjointDimension> GimplAdjointTypesD; + +typedef PeriodicGaugeImpl PeriodicGimplR; // Real.. whichever prec +typedef PeriodicGaugeImpl PeriodicGimplF; // Float +typedef PeriodicGaugeImpl PeriodicGimplD; // Double + +typedef PeriodicGaugeImpl PeriodicGimplAdjR; // Real.. whichever prec +typedef PeriodicGaugeImpl PeriodicGimplAdjF; // Float +typedef PeriodicGaugeImpl PeriodicGimplAdjD; // Double + +typedef ConjugateGaugeImpl ConjugateGimplR; // Real.. whichever prec +typedef ConjugateGaugeImpl ConjugateGimplF; // Float +typedef ConjugateGaugeImpl ConjugateGimplD; // Double +} } #endif diff --git a/lib/qcd/action/pseudofermion/OneFlavourEvenOddRational.h b/lib/qcd/action/pseudofermion/OneFlavourEvenOddRational.h index d7d08516..080b1be2 100644 --- a/lib/qcd/action/pseudofermion/OneFlavourEvenOddRational.h +++ b/lib/qcd/action/pseudofermion/OneFlavourEvenOddRational.h @@ -1,212 +1,214 @@ - /************************************************************************************* +/************************************************************************************* - Grid physics library, www.github.com/paboyle/Grid +Grid physics library, www.github.com/paboyle/Grid - Source file: ./lib/qcd/action/pseudofermion/OneFlavourEvenOddRational.h +Source file: ./lib/qcd/action/pseudofermion/OneFlavourEvenOddRational.h - Copyright (C) 2015 +Copyright (C) 2015 Author: Peter Boyle - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ #ifndef QCD_PSEUDOFERMION_ONE_FLAVOUR_EVEN_ODD_RATIONAL_H #define QCD_PSEUDOFERMION_ONE_FLAVOUR_EVEN_ODD_RATIONAL_H -namespace Grid{ - namespace QCD{ +namespace Grid { +namespace QCD { - /////////////////////////////////////// - // One flavour rational - /////////////////////////////////////// +/////////////////////////////////////// +// One flavour rational +/////////////////////////////////////// - // S_f = chi^dag * N(Mpc^dag*Mpc)/D(Mpc^dag*Mpc) * chi +// S_f = chi^dag * N(Mpc^dag*Mpc)/D(Mpc^dag*Mpc) * chi +// +// Here, M is some operator +// N and D makeup the rat. poly +// + +template +class OneFlavourEvenOddRationalPseudoFermionAction + : public Action { + public: + INHERIT_IMPL_TYPES(Impl); + + typedef OneFlavourRationalParams Params; + Params param; + + MultiShiftFunction PowerHalf; + MultiShiftFunction PowerNegHalf; + MultiShiftFunction PowerQuarter; + MultiShiftFunction PowerNegQuarter; + + private: + FermionOperator &FermOp; // the basic operator + + // NOT using "Nroots"; IroIro is -- perhaps later, but this wasn't good for us + // historically + // and hasenbusch works better + + FermionField PhiEven; // the pseudo fermion field for this trajectory + FermionField PhiOdd; // the pseudo fermion field for this trajectory + + public: + OneFlavourEvenOddRationalPseudoFermionAction(FermionOperator &Op, + Params &p) + : FermOp(Op), + PhiEven(Op.FermionRedBlackGrid()), + PhiOdd(Op.FermionRedBlackGrid()), + param(p) { + AlgRemez remez(param.lo, param.hi, param.precision); + + // MdagM^(+- 1/2) + std::cout << GridLogMessage << "Generating degree " << param.degree + << " for x^(1/2)" << std::endl; + remez.generateApprox(param.degree, 1, 2); + PowerHalf.Init(remez, param.tolerance, false); + PowerNegHalf.Init(remez, param.tolerance, true); + + // MdagM^(+- 1/4) + std::cout << GridLogMessage << "Generating degree " << param.degree + << " for x^(1/4)" << std::endl; + remez.generateApprox(param.degree, 1, 4); + PowerQuarter.Init(remez, param.tolerance, false); + PowerNegQuarter.Init(remez, param.tolerance, true); + }; + + virtual void refresh(const GaugeField &U, GridParallelRNG &pRNG) { + // P(phi) = e^{- phi^dag (MpcdagMpc)^-1/2 phi} + // = e^{- phi^dag (MpcdagMpc)^-1/4 (MpcdagMpc)^-1/4 phi} + // Phi = MpcdagMpc^{1/4} eta // - // Here, M is some operator - // N and D makeup the rat. poly + // P(eta) = e^{- eta^dag eta} // - - template - class OneFlavourEvenOddRationalPseudoFermionAction : public Action { - public: - INHERIT_IMPL_TYPES(Impl); + // e^{x^2/2 sig^2} => sig^2 = 0.5. + // + // So eta should be of width sig = 1/sqrt(2). - typedef OneFlavourRationalParams Params; - Params param; + RealD scale = std::sqrt(0.5); - MultiShiftFunction PowerHalf ; - MultiShiftFunction PowerNegHalf; - MultiShiftFunction PowerQuarter; - MultiShiftFunction PowerNegQuarter; + FermionField eta(FermOp.FermionGrid()); + FermionField etaOdd(FermOp.FermionRedBlackGrid()); + FermionField etaEven(FermOp.FermionRedBlackGrid()); - private: - - FermionOperator & FermOp;// the basic operator + gaussian(pRNG, eta); + eta = eta * scale; - // NOT using "Nroots"; IroIro is -- perhaps later, but this wasn't good for us historically - // and hasenbusch works better + pickCheckerboard(Even, etaEven, eta); + pickCheckerboard(Odd, etaOdd, eta); - FermionField PhiEven; // the pseudo fermion field for this trajectory - FermionField PhiOdd; // the pseudo fermion field for this trajectory - + FermOp.ImportGauge(U); - public: + // mutishift CG + SchurDifferentiableOperator Mpc(FermOp); + ConjugateGradientMultiShift msCG(param.MaxIter, PowerQuarter); + msCG(Mpc, etaOdd, PhiOdd); - OneFlavourEvenOddRationalPseudoFermionAction(FermionOperator &Op, - Params & p ) : FermOp(Op), - PhiEven(Op.FermionRedBlackGrid()), - PhiOdd (Op.FermionRedBlackGrid()), - param(p) - { - AlgRemez remez(param.lo,param.hi,param.precision); + ////////////////////////////////////////////////////// + // FIXME : Clover term not yet.. + ////////////////////////////////////////////////////// - // MdagM^(+- 1/2) - std::cout< sig^2 = 0.5. - // - // So eta should be of width sig = 1/sqrt(2). + FermionField Y(FermOp.FermionRedBlackGrid()); - RealD scale = std::sqrt(0.5); + SchurDifferentiableOperator Mpc(FermOp); - FermionField eta (FermOp.FermionGrid()); - FermionField etaOdd (FermOp.FermionRedBlackGrid()); - FermionField etaEven(FermOp.FermionRedBlackGrid()); + ConjugateGradientMultiShift msCG(param.MaxIter, + PowerNegQuarter); - gaussian(pRNG,eta); eta=eta*scale; + msCG(Mpc, PhiOdd, Y); - pickCheckerboard(Even,etaEven,eta); - pickCheckerboard(Odd,etaOdd,eta); + RealD action = norm2(Y); + std::cout << GridLogMessage << "Pseudofermion action FIXME -- is -1/4 " + "solve or -1/2 solve faster??? " + << action << std::endl; - FermOp.ImportGauge(U); + return action; + }; - // mutishift CG - SchurDifferentiableOperator Mpc(FermOp); - ConjugateGradientMultiShift msCG(param.MaxIter,PowerQuarter); - msCG(Mpc,etaOdd,PhiOdd); + ////////////////////////////////////////////////////// + // Need + // dS_f/dU = chi^dag d[N/D] chi + // + // N/D is expressed as partial fraction expansion: + // + // a0 + \sum_k ak/(M^dagM + bk) + // + // d[N/D] is then + // + // \sum_k -ak [M^dagM+bk]^{-1} [ dM^dag M + M^dag dM ] [M^dag M + + // bk]^{-1} + // + // Need + // Mf Phi_k = [MdagM+bk]^{-1} Phi + // Mf Phi = \sum_k ak [MdagM+bk]^{-1} Phi + // + // With these building blocks + // + // dS/dU = \sum_k -ak Mf Phi_k^dag [ dM^dag M + M^dag dM ] Mf + // Phi_k + // S = innerprodReal(Phi,Mf Phi); + ////////////////////////////////////////////////////// + virtual void deriv(const GaugeField &U, GaugeField &dSdU) { + const int Npole = PowerNegHalf.poles.size(); - ////////////////////////////////////////////////////// - // FIXME : Clover term not yet.. - ////////////////////////////////////////////////////// + std::vector MPhi_k(Npole, FermOp.FermionRedBlackGrid()); - assert(FermOp.ConstEE() == 1); - PhiEven = zero; - - }; + FermionField X(FermOp.FermionRedBlackGrid()); + FermionField Y(FermOp.FermionRedBlackGrid()); - ////////////////////////////////////////////////////// - // S = phi^dag (Mdag M)^-1/2 phi - ////////////////////////////////////////////////////// - virtual RealD S(const GaugeField &U) { + GaugeField tmp(FermOp.GaugeGrid()); - FermOp.ImportGauge(U); + FermOp.ImportGauge(U); - FermionField Y(FermOp.FermionRedBlackGrid()); - - SchurDifferentiableOperator Mpc(FermOp); + SchurDifferentiableOperator Mpc(FermOp); - ConjugateGradientMultiShift msCG(param.MaxIter,PowerNegQuarter); + ConjugateGradientMultiShift msCG(param.MaxIter, PowerNegHalf); - msCG(Mpc,PhiOdd,Y); + msCG(Mpc, PhiOdd, MPhi_k); - RealD action = norm2(Y); - std::cout << GridLogMessage << "Pseudofermion action FIXME -- is -1/4 solve or -1/2 solve faster??? "< MPhi_k (Npole,FermOp.FermionRedBlackGrid()); - - FermionField X(FermOp.FermionRedBlackGrid()); - FermionField Y(FermOp.FermionRedBlackGrid()); - - GaugeField tmp(FermOp.GaugeGrid()); - - FermOp.ImportGauge(U); - - SchurDifferentiableOperator Mpc(FermOp); - - ConjugateGradientMultiShift msCG(param.MaxIter,PowerNegHalf); - - msCG(Mpc,PhiOdd,MPhi_k); - - dSdU = zero; - for(int k=0;k Author: Peter Boyle - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ #ifndef QCD_PSEUDOFERMION_TWO_FLAVOUR_H #define QCD_PSEUDOFERMION_TWO_FLAVOUR_H -namespace Grid{ - namespace QCD{ +namespace Grid { +namespace QCD { - //////////////////////////////////////////////////////////////////////// - // Two flavour pseudofermion action for any dop - //////////////////////////////////////////////////////////////////////// - template - class TwoFlavourPseudoFermionAction : public Action { - public: - INHERIT_IMPL_TYPES(Impl); +//////////////////////////////////////////////////////////////////////// +// Two flavour pseudofermion action for any dop +//////////////////////////////////////////////////////////////////////// +template +class TwoFlavourPseudoFermionAction : public Action { + public: + INHERIT_IMPL_TYPES(Impl); - private: - - FermionOperator & FermOp;// the basic operator + private: + FermionOperator &FermOp; // the basic operator - OperatorFunction &DerivativeSolver; + OperatorFunction &DerivativeSolver; - OperatorFunction &ActionSolver; + OperatorFunction &ActionSolver; - FermionField Phi; // the pseudo fermion field for this trajectory + FermionField Phi; // the pseudo fermion field for this trajectory - public: - ///////////////////////////////////////////////// - // Pass in required objects. - ///////////////////////////////////////////////// - TwoFlavourPseudoFermionAction(FermionOperator &Op, - OperatorFunction & DS, - OperatorFunction & AS - ) : FermOp(Op), DerivativeSolver(DS), ActionSolver(AS), Phi(Op.FermionGrid()) { - }; - - ////////////////////////////////////////////////////////////////////////////////////// - // Push the gauge field in to the dops. Assume any BC's and smearing already applied - ////////////////////////////////////////////////////////////////////////////////////// - virtual void refresh(const GaugeField &U, GridParallelRNG& pRNG) { + public: + ///////////////////////////////////////////////// + // Pass in required objects. + ///////////////////////////////////////////////// + TwoFlavourPseudoFermionAction(FermionOperator &Op, + OperatorFunction &DS, + OperatorFunction &AS) + : FermOp(Op), + DerivativeSolver(DS), + ActionSolver(AS), + Phi(Op.FermionGrid()){}; - // P(phi) = e^{- phi^dag (MdagM)^-1 phi} - // Phi = Mdag eta - // P(eta) = e^{- eta^dag eta} - // - // e^{x^2/2 sig^2} => sig^2 = 0.5. - // - // So eta should be of width sig = 1/sqrt(2). - // and must multiply by 0.707.... - // - // Chroma has this scale factor: two_flavor_monomial_w.h - // IroIro: does not use this scale. It is absorbed by a change of vars - // in the Phi integral, and thus is only an irrelevant prefactor for the partition function. - // - RealD scale = std::sqrt(0.5); - FermionField eta(FermOp.FermionGrid()); + ////////////////////////////////////////////////////////////////////////////////////// + // Push the gauge field in to the dops. Assume any BC's and smearing already + // applied + ////////////////////////////////////////////////////////////////////////////////////// + virtual void refresh(const GaugeField &U, GridParallelRNG &pRNG) { + // P(phi) = e^{- phi^dag (MdagM)^-1 phi} + // Phi = Mdag eta + // P(eta) = e^{- eta^dag eta} + // + // e^{x^2/2 sig^2} => sig^2 = 0.5. + // + // So eta should be of width sig = 1/sqrt(2). + // and must multiply by 0.707.... + // + // Chroma has this scale factor: two_flavor_monomial_w.h + // IroIro: does not use this scale. It is absorbed by a change of vars + // in the Phi integral, and thus is only an irrelevant prefactor for + // the partition function. + // + RealD scale = std::sqrt(0.5); + FermionField eta(FermOp.FermionGrid()); - gaussian(pRNG,eta); + gaussian(pRNG, eta); - FermOp.ImportGauge(U); - FermOp.Mdag(eta,Phi); + FermOp.ImportGauge(U); + FermOp.Mdag(eta, Phi); - Phi=Phi*scale; - - }; + Phi = Phi * scale; + }; - ////////////////////////////////////////////////////// - // S = phi^dag (Mdag M)^-1 phi - ////////////////////////////////////////////////////// - virtual RealD S(const GaugeField &U) { + ////////////////////////////////////////////////////// + // S = phi^dag (Mdag M)^-1 phi + ////////////////////////////////////////////////////// + virtual RealD S(const GaugeField &U) { + FermOp.ImportGauge(U); - FermOp.ImportGauge(U); + FermionField X(FermOp.FermionGrid()); + FermionField Y(FermOp.FermionGrid()); - FermionField X(FermOp.FermionGrid()); - FermionField Y(FermOp.FermionGrid()); - - MdagMLinearOperator ,FermionField> MdagMOp(FermOp); - X=zero; - ActionSolver(MdagMOp,Phi,X); - MdagMOp.Op(X,Y); + MdagMLinearOperator, FermionField> MdagMOp(FermOp); + X = zero; + ActionSolver(MdagMOp, Phi, X); + MdagMOp.Op(X, Y); - RealD action = norm2(Y); - std::cout << GridLogMessage << "Pseudofermion action "<, FermionField> MdagMOp(FermOp); - MdagMLinearOperator ,FermionField> MdagMOp(FermOp); + X = zero; + DerivativeSolver(MdagMOp, Phi, X); // X = (MdagM)^-1 phi + MdagMOp.Op(X, Y); // Y = M X = (Mdag)^-1 phi - X=zero; - DerivativeSolver(MdagMOp,Phi,X); - MdagMOp.Op(X,Y); + // Our conventions really make this UdSdU; We do not differentiate wrt Udag + // here. + // So must take dSdU - adj(dSdU) and left multiply by mom to get dS/dt. - // Our conventions really make this UdSdU; We do not differentiate wrt Udag here. - // So must take dSdU - adj(dSdU) and left multiply by mom to get dS/dt. + FermOp.MDeriv(tmp, Y, X, DaggerNo); + dSdU = tmp; + FermOp.MDeriv(tmp, X, Y, DaggerYes); + dSdU = dSdU + tmp; - FermOp.MDeriv(tmp , Y, X,DaggerNo ); dSdU=tmp; - FermOp.MDeriv(tmp , X, Y,DaggerYes); dSdU=dSdU+tmp; - - dSdU = Ta(dSdU); - - }; - - }; - - } + // not taking here the traceless antihermitian component + }; +}; +} } #endif diff --git a/lib/qcd/action/pseudofermion/TwoFlavourEvenOdd.h b/lib/qcd/action/pseudofermion/TwoFlavourEvenOdd.h index e3ac2b2e..5af1761e 100644 --- a/lib/qcd/action/pseudofermion/TwoFlavourEvenOdd.h +++ b/lib/qcd/action/pseudofermion/TwoFlavourEvenOdd.h @@ -1,70 +1,66 @@ - /************************************************************************************* +/************************************************************************************* - Grid physics library, www.github.com/paboyle/Grid +Grid physics library, www.github.com/paboyle/Grid - Source file: ./lib/qcd/action/pseudofermion/TwoFlavourEvenOdd.h +Source file: ./lib/qcd/action/pseudofermion/TwoFlavourEvenOdd.h - Copyright (C) 2015 +Copyright (C) 2015 Author: Peter Boyle Author: Peter Boyle - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ #ifndef QCD_PSEUDOFERMION_TWO_FLAVOUR_EVEN_ODD_H #define QCD_PSEUDOFERMION_TWO_FLAVOUR_EVEN_ODD_H -namespace Grid{ - namespace QCD{ +namespace Grid { +namespace QCD { +//////////////////////////////////////////////////////////////////////// +// Two flavour pseudofermion action for any EO prec dop +//////////////////////////////////////////////////////////////////////// +template +class TwoFlavourEvenOddPseudoFermionAction + : public Action { + public: + INHERIT_IMPL_TYPES(Impl); + private: + FermionOperator &FermOp; // the basic operator - //////////////////////////////////////////////////////////////////////// - // Two flavour pseudofermion action for any EO prec dop - //////////////////////////////////////////////////////////////////////// - template - class TwoFlavourEvenOddPseudoFermionAction : public Action { + OperatorFunction &DerivativeSolver; + OperatorFunction &ActionSolver; - public: + FermionField PhiOdd; // the pseudo fermion field for this trajectory + FermionField PhiEven; // the pseudo fermion field for this trajectory - INHERIT_IMPL_TYPES(Impl); - - private: - - FermionOperator & FermOp;// the basic operator - - OperatorFunction &DerivativeSolver; - OperatorFunction &ActionSolver; - - FermionField PhiOdd; // the pseudo fermion field for this trajectory - FermionField PhiEven; // the pseudo fermion field for this trajectory - - public: - ///////////////////////////////////////////////// - // Pass in required objects. - ///////////////////////////////////////////////// - TwoFlavourEvenOddPseudoFermionAction(FermionOperator &Op, - OperatorFunction & DS, - OperatorFunction & AS - ) : - FermOp(Op), - DerivativeSolver(DS), - ActionSolver(AS), + public: + ///////////////////////////////////////////////// + // Pass in required objects. + ///////////////////////////////////////////////// + TwoFlavourEvenOddPseudoFermionAction(FermionOperator &Op, + OperatorFunction &DS, + OperatorFunction &AS) + : FermOp(Op), + DerivativeSolver(DS), + ActionSolver(AS), PhiEven(Op.FermionRedBlackGrid()), PhiOdd(Op.FermionRedBlackGrid()) {}; @@ -100,7 +96,7 @@ namespace Grid{ PhiOdd =PhiOdd*scale; PhiEven=PhiEven*scale; - + }; ////////////////////////////////////////////////////// @@ -173,7 +169,7 @@ namespace Grid{ FermOp.MeeDeriv(tmp , X, Y,DaggerYes); dSdU=dSdU+tmp; */ - dSdU = Ta(dSdU); + //dSdU = Ta(dSdU); }; diff --git a/lib/qcd/action/pseudofermion/TwoFlavourEvenOddRatio.h b/lib/qcd/action/pseudofermion/TwoFlavourEvenOddRatio.h index b1d73a7c..5e3b80d9 100644 --- a/lib/qcd/action/pseudofermion/TwoFlavourEvenOddRatio.h +++ b/lib/qcd/action/pseudofermion/TwoFlavourEvenOddRatio.h @@ -131,9 +131,11 @@ namespace Grid{ Vpc.MpcDag(PhiOdd,Y); // Y= Vdag phi X=zero; ActionSolver(Mpc,Y,X); // X= (MdagM)^-1 Vdag phi - Mpc.Mpc(X,Y); // Y= Mdag^-1 Vdag phi + //Mpc.Mpc(X,Y); // Y= Mdag^-1 Vdag phi + // Multiply by Ydag + RealD action = real(innerProduct(Y,X)); - RealD action = norm2(Y); + //RealD action = norm2(Y); // The EE factorised block; normally can replace with zero if det is constant (gauge field indept) // Only really clover term that creates this. Leave the EE portion as a future to do to make most @@ -188,8 +190,9 @@ namespace Grid{ assert(NumOp.ConstEE() == 1); assert(DenOp.ConstEE() == 1); - dSdU = -Ta(dSdU); - + //dSdU = -Ta(dSdU); + dSdU = -dSdU; + }; }; } diff --git a/lib/qcd/action/pseudofermion/TwoFlavourRatio.h b/lib/qcd/action/pseudofermion/TwoFlavourRatio.h index 8a28f3b0..26d21094 100644 --- a/lib/qcd/action/pseudofermion/TwoFlavourRatio.h +++ b/lib/qcd/action/pseudofermion/TwoFlavourRatio.h @@ -155,7 +155,8 @@ namespace Grid{ DenOp.MDeriv(force,Y,X,DaggerNo); dSdU=dSdU-force; DenOp.MDeriv(force,X,Y,DaggerYes); dSdU=dSdU-force; - dSdU = - Ta(dSdU); + dSdU *= -1.0; + //dSdU = - Ta(dSdU); }; }; diff --git a/lib/qcd/hmc/HMC.h b/lib/qcd/hmc/HMC.h index 9e762832..05838349 100644 --- a/lib/qcd/hmc/HMC.h +++ b/lib/qcd/hmc/HMC.h @@ -1,33 +1,34 @@ - /************************************************************************************* +/************************************************************************************* - Grid physics library, www.github.com/paboyle/Grid +Grid physics library, www.github.com/paboyle/Grid - Source file: ./lib/qcd/hmc/HMC.h +Source file: ./lib/qcd/hmc/HMC.h - Copyright (C) 2015 +Copyright (C) 2015 Author: Azusa Yamaguchi Author: Peter Boyle Author: neo Author: paboyle - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ //-------------------------------------------------------------------- /*! @file HMC.h * @brief Classes for Hybrid Monte Carlo update @@ -41,172 +42,195 @@ Author: paboyle #include +namespace Grid { +namespace QCD { -namespace Grid{ - namespace QCD{ - +struct HMCparameters { + Integer StartTrajectory; + Integer Trajectories; /* @brief Number of sweeps in this run */ + bool MetropolisTest; + Integer NoMetropolisUntil; - struct HMCparameters{ + HMCparameters() { + ////////////////////////////// Default values + MetropolisTest = true; + NoMetropolisUntil = 10; + StartTrajectory = 0; + Trajectories = 200; + ///////////////////////////////// + } - Integer StartTrajectory; - Integer Trajectories; /* @brief Number of sweeps in this run */ - bool MetropolisTest; - Integer NoMetropolisUntil; + void print() const { + std::cout << GridLogMessage << "[HMC parameter] Trajectories : " << Trajectories << "\n"; + std::cout << GridLogMessage << "[HMC parameter] Start trajectory : " << StartTrajectory << "\n"; + std::cout << GridLogMessage << "[HMC parameter] Metropolis test (on/off): " << MetropolisTest << "\n"; + std::cout << GridLogMessage << "[HMC parameter] Thermalization trajs : " << NoMetropolisUntil << "\n"; + } + +}; - HMCparameters(){ - ////////////////////////////// Default values - MetropolisTest = true; - NoMetropolisUntil = 10; - StartTrajectory = 0; - Trajectories = 200; - ///////////////////////////////// - } - }; +template +class HmcObservable { + public: + virtual void TrajectoryComplete(int traj, GaugeField &U, GridSerialRNG &sRNG, + GridParallelRNG &pRNG) = 0; +}; - template - class HmcObservable { - public: - virtual void TrajectoryComplete (int traj, GaugeField &U, GridSerialRNG &sRNG, GridParallelRNG & pRNG )=0; - }; +template +class PlaquetteLogger : public HmcObservable { + private: + std::string Stem; - template - class PlaquetteLogger : public HmcObservable { - private: - std::string Stem; - public: - INHERIT_GIMPL_TYPES(Gimpl); - PlaquetteLogger(std::string cf) { - Stem = cf; - }; + public: + INHERIT_GIMPL_TYPES(Gimpl); + PlaquetteLogger(std::string cf) { Stem = cf; }; - void TrajectoryComplete(int traj, GaugeField &U, GridSerialRNG &sRNG, GridParallelRNG & pRNG ) - { - std::string file; { std::ostringstream os; os << Stem <<"."<< traj; file = os.str(); } - std::ofstream of(file); + void TrajectoryComplete(int traj, GaugeField &U, GridSerialRNG &sRNG, + GridParallelRNG &pRNG) { + std::string file; + { + std::ostringstream os; + os << Stem << "." << traj; + file = os.str(); + } + std::ofstream of(file); - RealD peri_plaq = WilsonLoops::avgPlaquette(U); - RealD peri_rect = WilsonLoops::avgRectangle(U); + RealD peri_plaq = WilsonLoops::avgPlaquette(U); + RealD peri_rect = WilsonLoops::avgRectangle(U); - RealD impl_plaq = WilsonLoops::avgPlaquette(U); - RealD impl_rect = WilsonLoops::avgRectangle(U); + RealD impl_plaq = WilsonLoops::avgPlaquette(U); + RealD impl_rect = WilsonLoops::avgRectangle(U); - of << traj<<" "<< impl_plaq << " " << impl_rect << " "<< peri_plaq<<" "< - template - class HybridMonteCarlo { - private: +// template +template +class HybridMonteCarlo { + private: + const HMCparameters Params; - const HMCparameters Params; - - GridSerialRNG &sRNG; // Fixme: need a RNG management strategy. - GridParallelRNG &pRNG; // Fixme: need a RNG management strategy. - GaugeField & Ucur; + GridSerialRNG &sRNG; // Fixme: need a RNG management strategy. + GridParallelRNG &pRNG; // Fixme: need a RNG management strategy. + GaugeField &Ucur; - IntegratorType &TheIntegrator; - std::vector *> Observables; + IntegratorType &TheIntegrator; + std::vector *> Observables; - ///////////////////////////////////////////////////////// - // Metropolis step - ///////////////////////////////////////////////////////// - bool metropolis_test(const RealD DeltaH){ + ///////////////////////////////////////////////////////// + // Metropolis step + ///////////////////////////////////////////////////////// + bool metropolis_test(const RealD DeltaH) { + RealD rn_test; - RealD rn_test; + RealD prob = std::exp(-DeltaH); - RealD prob = std::exp(-DeltaH); + random(sRNG, rn_test); - random(sRNG,rn_test); - - std::cout<1.0) || (rn_test <= prob)){ // accepted - std::cout< 1.0) || (rn_test <= prob)) { // accepted + std::cout << GridLogMessage << "Metropolis_test -- ACCEPTED\n"; + std::cout << GridLogMessage + << "--------------------------------------------------\n"; + return true; + } else { // rejected + std::cout << GridLogMessage << "Metropolis_test -- REJECTED\n"; + std::cout << GridLogMessage + << "--------------------------------------------------\n"; + return false; + } + } + + ///////////////////////////////////////////////////////// + // Evolution + ///////////////////////////////////////////////////////// + RealD evolve_step(GaugeField &U) { + TheIntegrator.refresh(U, pRNG); // set U and initialize P and phi's + + RealD H0 = TheIntegrator.S(U); // initial state action + + std::streamsize current_precision = std::cout.precision(); + std::cout.precision(17); + std::cout << GridLogMessage << "Total H before trajectory = " << H0 << "\n"; + std::cout.precision(current_precision); + + TheIntegrator.integrate(U); + + RealD H1 = TheIntegrator.S(U); // updated state action + + std::cout.precision(17); + std::cout << GridLogMessage << "Total H after trajectory = " << H1 + << " dH = " << H1 - H0 << "\n"; + std::cout.precision(current_precision); + + return (H1 - H0); + } + + public: + ///////////////////////////////////////// + // Constructor + ///////////////////////////////////////// + HybridMonteCarlo(HMCparameters Pams, IntegratorType &_Int, + GridSerialRNG &_sRNG, GridParallelRNG &_pRNG, GaugeField &_U) + : Params(Pams), TheIntegrator(_Int), sRNG(_sRNG), pRNG(_pRNG), Ucur(_U) {} + ~HybridMonteCarlo(){}; + + void AddObservable(HmcObservable *obs) { + Observables.push_back(obs); + } + + void evolve(void) { + Real DeltaH; + + GaugeField Ucopy(Ucur._grid); + + Params.print(); + + // Actual updates (evolve a copy Ucopy then copy back eventually) + for (int traj = Params.StartTrajectory; + traj < Params.Trajectories + Params.StartTrajectory; ++traj) { + std::cout << GridLogMessage << "-- # Trajectory = " << traj << "\n"; + Ucopy = Ucur; + + DeltaH = evolve_step(Ucopy); + + bool accept = true; + if (traj >= Params.NoMetropolisUntil) { + accept = metropolis_test(DeltaH); } - ///////////////////////////////////////////////////////// - // Evolution - ///////////////////////////////////////////////////////// - RealD evolve_step(GaugeField& U){ - - TheIntegrator.refresh(U,pRNG); // set U and initialize P and phi's - - RealD H0 = TheIntegrator.S(U); // initial state action - - std::cout< *obs) { - Observables.push_back(obs); + if (accept) { + Ucur = Ucopy; } - void evolve(void){ - - Real DeltaH; - - GaugeField Ucopy(Ucur._grid); - - // Actual updates (evolve a copy Ucopy then copy back eventually) - for(int traj=Params.StartTrajectory; traj < Params.Trajectories+Params.StartTrajectory; ++traj){ - - std::cout< Params.NoMetropolisUntil) { - accept = metropolis_test(DeltaH); - } - - if ( accept ) { - Ucur = Ucopy; - } - - for(int obs = 0;obsTrajectoryComplete (traj+1,Ucur,sRNG,pRNG); - } - - } + for (int obs = 0; obs < Observables.size(); obs++) { + Observables[obs]->TrajectoryComplete(traj + 1, Ucur, sRNG, pRNG); } - }; - - }// QCD -}// Grid + } + } +}; +} // QCD +} // Grid -#endif +#endif diff --git a/lib/qcd/hmc/HmcRunner.h b/lib/qcd/hmc/HmcRunner.h index a9178c74..53b127cf 100644 --- a/lib/qcd/hmc/HmcRunner.h +++ b/lib/qcd/hmc/HmcRunner.h @@ -1,156 +1,191 @@ - /************************************************************************************* +/************************************************************************************* - Grid physics library, www.github.com/paboyle/Grid +Grid physics library, www.github.com/paboyle/Grid - Source file: ./lib/qcd/hmc/HmcRunner.h +Source file: ./lib/qcd/hmc/HmcRunner.h - Copyright (C) 2015 +Copyright (C) 2015 Author: paboyle - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ #ifndef HMC_RUNNER #define HMC_RUNNER -namespace Grid{ - namespace QCD{ +namespace Grid { +namespace QCD { - -template +template class NerscHmcRunnerTemplate { -public: - + public: INHERIT_GIMPL_TYPES(Gimpl); enum StartType_t { ColdStart, HotStart, TepidStart, CheckpointStart }; - ActionSet TheAction; + ActionSet TheAction; - GridCartesian * UGrid ; - GridCartesian * FGrid ; - GridRedBlackCartesian * UrbGrid ; - GridRedBlackCartesian * FrbGrid ; + GridCartesian *UGrid; + GridCartesian *FGrid; + GridRedBlackCartesian *UrbGrid; + GridRedBlackCartesian *FrbGrid; - virtual void BuildTheAction (int argc, char **argv) = 0; - - - void Run (int argc, char **argv){ + virtual void BuildTheAction(int argc, char **argv) = 0; // necessary? + void Run(int argc, char **argv) { StartType_t StartType = HotStart; std::string arg; - if( GridCmdOptionExists(argv,argv+argc,"--StartType") ){ - arg = GridCmdOptionPayload(argv,argv+argc,"--StartType"); - if ( arg == "HotStart" ) { StartType = HotStart; } - else if ( arg == "ColdStart" ) { StartType = ColdStart; } - else if ( arg == "TepidStart" ) { StartType = TepidStart; } - else if ( arg == "CheckpointStart" ) { StartType = CheckpointStart; } - else assert(0); + if (GridCmdOptionExists(argv, argv + argc, "--StartType")) { + arg = GridCmdOptionPayload(argv, argv + argc, "--StartType"); + if (arg == "HotStart") { + StartType = HotStart; + } else if (arg == "ColdStart") { + StartType = ColdStart; + } else if (arg == "TepidStart") { + StartType = TepidStart; + } else if (arg == "CheckpointStart") { + StartType = CheckpointStart; + } else { + std::cout << GridLogError << "Unrecognized option in --StartType\n"; + std::cout << GridLogError << "Valid [HotStart, ColdStart, TepidStart, CheckpointStart]\n"; + assert(0); + } } int StartTraj = 0; - if( GridCmdOptionExists(argv,argv+argc,"--StartTrajectory") ){ - arg= GridCmdOptionPayload(argv,argv+argc,"--StartTrajectory"); + if (GridCmdOptionExists(argv, argv + argc, "--StartTrajectory")) { + arg = GridCmdOptionPayload(argv, argv + argc, "--StartTrajectory"); std::vector ivec(0); - GridCmdOptionIntVector(arg,ivec); + GridCmdOptionIntVector(arg, ivec); StartTraj = ivec[0]; - } + } int NumTraj = 1; - if( GridCmdOptionExists(argv,argv+argc,"--Trajectories") ){ - arg= GridCmdOptionPayload(argv,argv+argc,"--Trajectories"); + if (GridCmdOptionExists(argv, argv + argc, "--Trajectories")) { + arg = GridCmdOptionPayload(argv, argv + argc, "--Trajectories"); std::vector ivec(0); - GridCmdOptionIntVector(arg,ivec); + GridCmdOptionIntVector(arg, ivec); NumTraj = ivec[0]; } - // Create integrator - typedef MinimumNorm2 IntegratorType;// change here to change the algorithm - IntegratorParameters MDpar(20); - IntegratorType MDynamics(UGrid,MDpar, TheAction); + int NumThermalizations = 10; + if (GridCmdOptionExists(argv, argv + argc, "--Thermalizations")) { + arg = GridCmdOptionPayload(argv, argv + argc, "--Thermalizations"); + std::vector ivec(0); + GridCmdOptionIntVector(arg, ivec); + NumThermalizations = ivec[0]; + } + + GridSerialRNG sRNG; + GridParallelRNG pRNG(UGrid); + LatticeGaugeField U(UGrid); // change this to an extended field (smearing class)? + + std::vector SerSeed({1, 2, 3, 4, 5}); + std::vector ParSeed({6, 7, 8, 9, 10}); + + // Create integrator, including the smearing policy + // Smearing policy, only defined for Nc=3 + /* + std::cout << GridLogDebug << " Creating the Stout class\n"; + double rho = 0.1; // smearing parameter, now hardcoded + int Nsmear = 1; // number of smearing levels + Smear_Stout Stout(rho); + std::cout << GridLogDebug << " Creating the SmearedConfiguration class\n"; + //SmearedConfiguration SmearingPolicy(UGrid, Nsmear, Stout); + std::cout << GridLogDebug << " done\n"; + */ + ////////////// + NoSmearing SmearingPolicy; + typedef MinimumNorm2, RepresentationsPolicy > + IntegratorType; // change here to change the algorithm + IntegratorParameters MDpar(40, 1.0); + IntegratorType MDynamics(UGrid, MDpar, TheAction, SmearingPolicy); // Checkpoint strategy - NerscHmcCheckpointer Checkpoint(std::string("ckpoint_lat"),std::string("ckpoint_rng"),1); - PlaquetteLogger PlaqLog(std::string("plaq")); + NerscHmcCheckpointer Checkpoint(std::string("ckpoint_lat"), + std::string("ckpoint_rng"), 1); + PlaquetteLogger PlaqLog(std::string("plaq")); HMCparameters HMCpar; HMCpar.StartTrajectory = StartTraj; - HMCpar.Trajectories = NumTraj; - - GridSerialRNG sRNG; - GridParallelRNG pRNG(UGrid); - LatticeGaugeField U(UGrid); + HMCpar.Trajectories = NumTraj; + HMCpar.NoMetropolisUntil = NumThermalizations; - std::vector SerSeed({1,2,3,4,5}); - std::vector ParSeed({6,7,8,9,10}); - - if ( StartType == HotStart ) { + if (StartType == HotStart) { // Hot start - HMCpar.NoMetropolisUntil =10; HMCpar.MetropolisTest = true; sRNG.SeedFixedIntegers(SerSeed); pRNG.SeedFixedIntegers(ParSeed); - SU3::HotConfiguration(pRNG, U); - } else if ( StartType == ColdStart ) { + SU::HotConfiguration(pRNG, U); + } else if (StartType == ColdStart) { // Cold start - HMCpar.NoMetropolisUntil =10; HMCpar.MetropolisTest = true; sRNG.SeedFixedIntegers(SerSeed); pRNG.SeedFixedIntegers(ParSeed); - SU3::ColdConfiguration(pRNG, U); - } else if ( StartType == TepidStart ) { + SU::ColdConfiguration(pRNG, U); + } else if (StartType == TepidStart) { // Tepid start - HMCpar.NoMetropolisUntil =10; HMCpar.MetropolisTest = true; sRNG.SeedFixedIntegers(SerSeed); pRNG.SeedFixedIntegers(ParSeed); - SU3::TepidConfiguration(pRNG, U); - } else if ( StartType == CheckpointStart ) { - HMCpar.NoMetropolisUntil =10; + SU::TepidConfiguration(pRNG, U); + } else if (StartType == CheckpointStart) { HMCpar.MetropolisTest = true; // CheckpointRestart Checkpoint.CheckpointRestore(StartTraj, U, sRNG, pRNG); } - HybridMonteCarlo HMC(HMCpar, MDynamics,sRNG,pRNG,U); + // Attach the gauge field to the smearing Policy and create the fill the + // smeared set + // notice that the unit configuration is singular in this procedure + std::cout << GridLogMessage << "Filling the smeared set\n"; + SmearingPolicy.set_GaugeField(U); + + HybridMonteCarlo HMC(HMCpar, MDynamics, sRNG, + pRNG, U); HMC.AddObservable(&Checkpoint); HMC.AddObservable(&PlaqLog); - + // Run it HMC.evolve(); - } - }; - typedef NerscHmcRunnerTemplate NerscHmcRunner; - typedef NerscHmcRunnerTemplate NerscHmcRunnerF; - typedef NerscHmcRunnerTemplate NerscHmcRunnerD; +typedef NerscHmcRunnerTemplate NerscHmcRunner; +typedef NerscHmcRunnerTemplate NerscHmcRunnerF; +typedef NerscHmcRunnerTemplate NerscHmcRunnerD; - typedef NerscHmcRunnerTemplate PeriodicNerscHmcRunner; - typedef NerscHmcRunnerTemplate PeriodicNerscHmcRunnerF; - typedef NerscHmcRunnerTemplate PeriodicNerscHmcRunnerD; +typedef NerscHmcRunnerTemplate PeriodicNerscHmcRunner; +typedef NerscHmcRunnerTemplate PeriodicNerscHmcRunnerF; +typedef NerscHmcRunnerTemplate PeriodicNerscHmcRunnerD; - typedef NerscHmcRunnerTemplate ConjugateNerscHmcRunner; - typedef NerscHmcRunnerTemplate ConjugateNerscHmcRunnerF; - typedef NerscHmcRunnerTemplate ConjugateNerscHmcRunnerD; +typedef NerscHmcRunnerTemplate ConjugateNerscHmcRunner; +typedef NerscHmcRunnerTemplate ConjugateNerscHmcRunnerF; +typedef NerscHmcRunnerTemplate ConjugateNerscHmcRunnerD; -}} +template +using NerscHmcRunnerHirep = NerscHmcRunnerTemplate; + + + +} +} #endif diff --git a/lib/qcd/hmc/integrators/Integrator.h b/lib/qcd/hmc/integrators/Integrator.h index e6f6c1cd..f89b7959 100644 --- a/lib/qcd/hmc/integrators/Integrator.h +++ b/lib/qcd/hmc/integrators/Integrator.h @@ -1,33 +1,34 @@ - /************************************************************************************* +/************************************************************************************* - Grid physics library, www.github.com/paboyle/Grid +Grid physics library, www.github.com/paboyle/Grid - Source file: ./lib/qcd/hmc/integrators/Integrator.h +Source file: ./lib/qcd/hmc/integrators/Integrator.h - Copyright (C) 2015 +Copyright (C) 2015 Author: Azusa Yamaguchi Author: Peter Boyle Author: neo Author: paboyle - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ //-------------------------------------------------------------------- /*! @file Integrator.h * @brief Classes for the Molecular Dynamics integrator @@ -40,188 +41,278 @@ Author: paboyle #ifndef INTEGRATOR_INCLUDED #define INTEGRATOR_INCLUDED -//class Observer; +// class Observer; #include -namespace Grid{ - namespace QCD{ +namespace Grid { +namespace QCD { - struct IntegratorParameters{ +struct IntegratorParameters { + int Nexp; + int MDsteps; // number of outer steps + RealD trajL; // trajectory length + RealD stepsize; - int Nexp; - int MDsteps; // number of outer steps - RealD trajL; // trajectory length - RealD stepsize; + IntegratorParameters(int MDsteps_, RealD trajL_ = 1.0, int Nexp_ = 12) + : Nexp(Nexp_), + MDsteps(MDsteps_), + trajL(trajL_), + stepsize(trajL / MDsteps){ + // empty body constructor + }; +}; - IntegratorParameters(int MDsteps_, - RealD trajL_=1.0, - int Nexp_=12): - Nexp(Nexp_), - MDsteps(MDsteps_), - trajL(trajL_), - stepsize(trajL/MDsteps) - { - // empty body constructor - }; +/*! @brief Class for Molecular Dynamics management */ +template +class Integrator { + protected: + typedef IntegratorParameters ParameterType; - }; + IntegratorParameters Params; - /*! @brief Class for Molecular Dynamics management */ - template - class Integrator { + const ActionSet as; - protected: + int levels; // + double t_U; // Track time passing on each level and for U and for P + std::vector t_P; // - typedef IntegratorParameters ParameterType; + GaugeField P; - IntegratorParameters Params; + SmearingPolicy& Smearer; - const ActionSet as; + RepresentationPolicy Representations; - int levels; // - double t_U; // Track time passing on each level and for U and for P - std::vector t_P; // - - GaugeField P; - - // Should match any legal (SU(n)) gauge field - // Need to use this template to match Ncol to pass to SU class - template void generate_momenta(Lattice< iVector< iScalar< iMatrix >, Nd> > & P,GridParallelRNG& pRNG){ - typedef Lattice< iScalar< iScalar< iMatrix > > > GaugeLinkField; - GaugeLinkField Pmu(P._grid); - Pmu = zero; - for(int mu=0;mu::GaussianLieAlgebraMatrix(pRNG, Pmu); - PokeIndex(P, Pmu, mu); - } - } - - - //ObserverList observers; // not yet - // typedef std::vector ObserverList; - // void register_observers(); - // void notify_observers(); - - void update_P(GaugeField&U, int level,double ep){ - t_P[level]+=ep; - update_P(P,U,level,ep); - - std::cout<deriv(U,force); - Mom = Mom - force*ep; - } - } - - void update_U(GaugeField&U, double ep){ - update_U(P,U,ep); - - t_U+=ep; - int fl = levels-1; - std::cout<(U, mu); - auto Pmu=PeekIndex(Mom, mu); - Umu = expMat(Pmu, ep, Params.Nexp)*Umu; - ProjectOnGroup(Umu); - PokeIndex(U, Umu, mu); - } - } - - virtual void step (GaugeField& U,int level, int first,int last)=0; - - public: - - Integrator(GridBase* grid, - IntegratorParameters Par, - ActionSet & Aset): - Params(Par), - as(Aset), - P(grid), - levels(Aset.size()) - { - t_P.resize(levels,0.0); - t_U=0.0; - }; - - virtual ~Integrator(){} - - //Initialization of momenta and actions - void refresh(GaugeField& U,GridParallelRNG &pRNG){ - std::cout<refresh(U, pRNG); - } - } - } - - // Calculate action - RealD S(GaugeField& U){ - - LatticeComplex Hloc(U._grid); Hloc = zero; - // Momenta - for (int mu=0; mu (P, mu); - Hloc -= trace(Pmu*Pmu); - } - Complex Hsum = sum(Hloc); - - RealD H = Hsum.real(); - RealD Hterm; - std::cout<S(U); - std::cout< class + template + void generate_momenta(Lattice >, Nd> >& P, + GridParallelRNG& pRNG) { + typedef Lattice > > > GaugeLinkField; + GaugeLinkField Pmu(P._grid); + Pmu = zero; + for (int mu = 0; mu < Nd; mu++) { + SU::GaussianFundamentalLieAlgebraMatrix(pRNG, Pmu); + PokeIndex(P, Pmu, mu); + } } + + // ObserverList observers; // not yet + // typedef std::vector ObserverList; + // void register_observers(); + // void notify_observers(); + + void update_P(GaugeField& U, int level, double ep) { + t_P[level] += ep; + update_P(P, U, level, ep); + + std::cout << GridLogIntegrator << "[" << level << "] P " + << " dt " << ep << " : t_P " << t_P[level] << std::endl; + } + + // to be used by the actionlevel class to iterate + // over the representations + struct _updateP { + template + void operator()(std::vector*> repr_set, Repr& Rep, + GF& Mom, GF& U, double ep) { + for (int a = 0; a < repr_set.size(); ++a) { + FieldType forceR(U._grid); + // Implement smearing only for the fundamental representation now + repr_set.at(a)->deriv(Rep.U, forceR); + GF force = + Rep.RtoFundamentalProject(forceR); // Ta for the fundamental rep + std::cout << GridLogIntegrator << "Hirep Force average: " + << norm2(force) / (U._grid->gSites()) << std::endl; + Mom -= force * ep ; + } + } + } update_P_hireps{}; + + void update_P(GaugeField& Mom, GaugeField& U, int level, double ep) { + // input U actually not used in the fundamental case + // Fundamental updates, include smearing + for (int a = 0; a < as[level].actions.size(); ++a) { + GaugeField force(U._grid); + GaugeField& Us = Smearer.get_U(as[level].actions.at(a)->is_smeared); + as[level].actions.at(a)->deriv(Us, force); // deriv should NOT include Ta + + std::cout << GridLogIntegrator + << "Smearing (on/off): " << as[level].actions.at(a)->is_smeared + << std::endl; + if (as[level].actions.at(a)->is_smeared) Smearer.smeared_force(force); + force = Ta(force); + std::cout << GridLogIntegrator + << "Force average: " << norm2(force) / (U._grid->gSites()) + << std::endl; + Mom -= force * ep; + } + + // Force from the other representations + as[level].apply(update_P_hireps, Representations, Mom, U, ep); + } + + void update_U(GaugeField& U, double ep) { + update_U(P, U, ep); + + t_U += ep; + int fl = levels - 1; + std::cout << GridLogIntegrator << " " + << "[" << fl << "] U " + << " dt " << ep << " : t_U " << t_U << std::endl; + } + void update_U(GaugeField& Mom, GaugeField& U, double ep) { + // rewrite exponential to deal internally with the lorentz index? + for (int mu = 0; mu < Nd; mu++) { + auto Umu = PeekIndex(U, mu); + auto Pmu = PeekIndex(Mom, mu); + Umu = expMat(Pmu, ep, Params.Nexp) * Umu; + PokeIndex(U, ProjectOnGroup(Umu), mu); + } + + // Update the smeared fields, can be implemented as observer + Smearer.set_GaugeField(U); + // Update the higher representations fields + Representations.update(U); // void functions if fundamental representation + } + + virtual void step(GaugeField& U, int level, int first, int last) = 0; + + public: + Integrator(GridBase* grid, IntegratorParameters Par, + ActionSet& Aset, + SmearingPolicy& Sm) + : Params(Par), + as(Aset), + P(grid), + levels(Aset.size()), + Smearer(Sm), + Representations(grid) { + t_P.resize(levels, 0.0); + t_U = 0.0; + // initialization of smearer delegated outside of Integrator + }; + + virtual ~Integrator() {} + + // to be used by the actionlevel class to iterate + // over the representations + struct _refresh { + template + void operator()(std::vector*> repr_set, Repr& Rep, + GridParallelRNG& pRNG) { + for (int a = 0; a < repr_set.size(); ++a){ + repr_set.at(a)->refresh(Rep.U, pRNG); + + std::cout << GridLogDebug << "Hirep refreshing pseudofermions" << std::endl; + } + } + } refresh_hireps{}; + + // Initialization of momenta and actions + void refresh(GaugeField& U, GridParallelRNG& pRNG) { + std::cout << GridLogIntegrator << "Integrator refresh\n"; + generate_momenta(P, pRNG); + + // Update the smeared fields, can be implemented as observer + // necessary to keep the fields updated even after a reject + // of the Metropolis + Smearer.set_GaugeField(U); + // Set the (eventual) representations gauge fields + Representations.update(U); + + // The Smearer is attached to a pointer of the gauge field + // automatically gets the correct field + // whether or not has been accepted in the previous sweep + for (int level = 0; level < as.size(); ++level) { + for (int actionID = 0; actionID < as[level].actions.size(); ++actionID) { + // get gauge field from the SmearingPolicy and + // based on the boolean is_smeared in actionID + GaugeField& Us = + Smearer.get_U(as[level].actions.at(actionID)->is_smeared); + as[level].actions.at(actionID)->refresh(Us, pRNG); + } + + // Refresh the higher representation actions + as[level].apply(refresh_hireps, Representations, pRNG); + } + } + + // to be used by the actionlevel class to iterate + // over the representations + struct _S { + template + void operator()(std::vector*> repr_set, Repr& Rep, + int level, RealD& H) { + + for (int a = 0; a < repr_set.size(); ++a) { + RealD Hterm = repr_set.at(a)->S(Rep.U); + std::cout << GridLogMessage << "S Level " << level << " term " << a + << " H Hirep = " << Hterm << std::endl; + H += Hterm; + + } + } + } S_hireps{}; + + // Calculate action + RealD S(GaugeField& U) { // here also U not used + + LatticeComplex Hloc(U._grid); + Hloc = zero; + // Momenta + for (int mu = 0; mu < Nd; mu++) { + auto Pmu = PeekIndex(P, mu); + Hloc -= trace(Pmu * Pmu); + } + Complex Hsum = sum(Hloc); + + RealD H = Hsum.real(); + RealD Hterm; + std::cout << GridLogMessage << "Momentum action H_p = " << H << "\n"; + + // Actions + for (int level = 0; level < as.size(); ++level) { + for (int actionID = 0; actionID < as[level].actions.size(); ++actionID) { + // get gauge field from the SmearingPolicy and + // based on the boolean is_smeared in actionID + GaugeField& Us = + Smearer.get_U(as[level].actions.at(actionID)->is_smeared); + Hterm = as[level].actions.at(actionID)->S(Us); + std::cout << GridLogMessage << "S Level " << level << " term " + << actionID << " H = " << Hterm << std::endl; + H += Hterm; + } + as[level].apply(S_hireps, Representations, level, H); + } + + return H; + } + + void integrate(GaugeField& U) { + // reset the clocks + t_U = 0; + for (int level = 0; level < as.size(); ++level) { + t_P[level] = 0; + } + + for (int step = 0; step < Params.MDsteps; ++step) { // MD step + int first_step = (step == 0); + int last_step = (step == Params.MDsteps - 1); + this->step(U, 0, first_step, last_step); + } + + // Check the clocks all match on all levels + for (int level = 0; level < as.size(); ++level) { + assert(fabs(t_U - t_P[level]) < 1.0e-6); // must be the same + std::cout << GridLogIntegrator << " times[" << level + << "]= " << t_P[level] << " " << t_U << std::endl; + } + + // and that we indeed got to the end of the trajectory + assert(fabs(t_U - Params.trajL) < 1.0e-6); + } +}; } -#endif//INTEGRATOR_INCLUDED +} +#endif // INTEGRATOR_INCLUDED diff --git a/lib/qcd/hmc/integrators/Integrator_algorithm.h b/lib/qcd/hmc/integrators/Integrator_algorithm.h index eb1b30ad..cd289b08 100644 --- a/lib/qcd/hmc/integrators/Integrator_algorithm.h +++ b/lib/qcd/hmc/integrators/Integrator_algorithm.h @@ -91,14 +91,19 @@ namespace Grid{ * P 1/2 P 1/2 */ - template class LeapFrog : public Integrator { + template > class LeapFrog : + public Integrator { public: - typedef LeapFrog Algorithm; + typedef LeapFrog Algorithm; LeapFrog(GridBase* grid, IntegratorParameters Par, - ActionSet & Aset): Integrator(grid,Par,Aset) {}; + ActionSet & Aset, + SmearingPolicy & Sm): + Integrator(grid,Par,Aset,Sm) {}; void step (GaugeField& U, int level,int _first, int _last){ @@ -135,7 +140,10 @@ namespace Grid{ } }; - template class MinimumNorm2 : public Integrator { + template > class MinimumNorm2 : + public Integrator { private: const RealD lambda = 0.1931833275037836; @@ -143,7 +151,9 @@ namespace Grid{ MinimumNorm2(GridBase* grid, IntegratorParameters Par, - ActionSet & Aset): Integrator(grid,Par,Aset) {}; + ActionSet & Aset, + SmearingPolicy& Sm): + Integrator(grid,Par,Aset,Sm) {}; void step (GaugeField& U, int level, int _first,int _last){ @@ -191,7 +201,10 @@ namespace Grid{ }; - template class ForceGradient : public Integrator { + template > class ForceGradient : + public Integrator { private: const RealD lambda = 1.0/6.0;; const RealD chi = 1.0/72.0; @@ -202,7 +215,9 @@ namespace Grid{ // Looks like dH scales as dt^4. tested wilson/wilson 2 level. ForceGradient(GridBase* grid, IntegratorParameters Par, - ActionSet & Aset): Integrator(grid,Par,Aset) {}; + ActionSet & Aset, + SmearingPolicy &Sm): + Integrator(grid,Par,Aset, Sm) {}; void FG_update_P(GaugeField&U, int level,double fg_dt,double ep){ diff --git a/lib/qcd/representations/adjoint.h b/lib/qcd/representations/adjoint.h new file mode 100644 index 00000000..facc72f1 --- /dev/null +++ b/lib/qcd/representations/adjoint.h @@ -0,0 +1,115 @@ +/* + * Policy classes for the HMC + * Author: Guido Cossu +*/ + +#ifndef ADJOINT_H +#define ADJOINT_H + +namespace Grid { +namespace QCD { + +/* +* This is an helper class for the HMC +* Should contain only the data for the adjoint representation +* and the facility to convert from the fundamental -> adjoint +*/ + +template +class AdjointRep { + public: + // typdef to be used by the Representations class in HMC to get the + // types for the higher representation fields + typedef typename SU_Adjoint::LatticeAdjMatrix LatticeMatrix; + typedef typename SU_Adjoint::LatticeAdjField LatticeField; + static const int Dimension = ncolour * ncolour - 1; + + LatticeField U; + + explicit AdjointRep(GridBase *grid) : U(grid) {} + + void update_representation(const LatticeGaugeField &Uin) { + std::cout << GridLogDebug << "Updating adjoint representation\n"; + // Uin is in the fundamental representation + // get the U in AdjointRep + // (U_adj)_B = tr[e^a U e^b U^dag] + // e^a = t^a/sqrt(T_F) + // where t^a is the generator in the fundamental + // T_F is 1/2 for the fundamental representation + conformable(U, Uin); + U = zero; + LatticeColourMatrix tmp(Uin._grid); + + Vector::Matrix> ta(Dimension); + + // Debug lines + // LatticeMatrix uno(Uin._grid); + // uno = 1.0; + //////////////// + + // FIXME probably not very efficient to get all the generators + // everytime + for (int a = 0; a < Dimension; a++) SU::generator(a, ta[a]); + + for (int mu = 0; mu < Nd; mu++) { + auto Uin_mu = peekLorentz(Uin, mu); + auto U_mu = peekLorentz(U, mu); + for (int a = 0; a < Dimension; a++) { + tmp = 2.0 * adj(Uin_mu) * ta[a] * Uin_mu; + for (int b = 0; b < Dimension; b++) + pokeColour(U_mu, trace(tmp * ta[b]), a, b); + } + pokeLorentz(U, U_mu, mu); + // Check matrix U_mu, must be real orthogonal + // reality + /* + LatticeMatrix Ucheck = U_mu - conjugate(U_mu); + std::cout << GridLogMessage << "Reality check: " << norm2(Ucheck) << + std::endl; + + Ucheck = U_mu * adj(U_mu) - uno; + std::cout << GridLogMessage << "orthogonality check: " << norm2(Ucheck) << + std::endl; + */ + } + } + + LatticeGaugeField RtoFundamentalProject(const LatticeField &in, + Real scale = 1.0) const { + LatticeGaugeField out(in._grid); + out = zero; + + for (int mu = 0; mu < Nd; mu++) { + LatticeColourMatrix out_mu(in._grid); // fundamental representation + LatticeMatrix in_mu = peekLorentz(in, mu); + + out_mu = zero; + + typename SU::LatticeAlgebraVector h(in._grid); + projectOnAlgebra(h, in_mu, double(Nc) * 2.0); // factor C(r)/C(fund) + FundamentalLieAlgebraMatrix(h, out_mu); // apply scale only once + pokeLorentz(out, out_mu, mu); + // Returns traceless antihermitian matrix Nc * Nc. + // Confirmed + } + return out; + } + + private: + void projectOnAlgebra(typename SU::LatticeAlgebraVector &h_out, + const LatticeMatrix &in, Real scale = 1.0) const { + SU_Adjoint::projectOnAlgebra(h_out, in, scale); + } + + void FundamentalLieAlgebraMatrix( + typename SU::LatticeAlgebraVector &h, + typename SU::LatticeMatrix &out, Real scale = 1.0) const { + SU::FundamentalLieAlgebraMatrix(h, out, scale); + } +}; + +typedef AdjointRep AdjointRepresentation; +} +} + +#endif \ No newline at end of file diff --git a/lib/qcd/representations/fundamental.h b/lib/qcd/representations/fundamental.h new file mode 100644 index 00000000..7d85d357 --- /dev/null +++ b/lib/qcd/representations/fundamental.h @@ -0,0 +1,45 @@ +/* + * Policy classes for the HMC + * Author: Guido Cossu +*/ + +#ifndef FUNDAMENTAL_H +#define FUNDAMENTAL_H + + +namespace Grid { +namespace QCD { + +/* +* This is an helper class for the HMC +* Empty since HMC updates already the fundamental representation +*/ + +template +class FundamentalRep { + public: + static const int Dimension = ncolour; + + // typdef to be used by the Representations class in HMC to get the + // types for the higher representation fields + typedef typename SU::LatticeMatrix LatticeMatrix; + typedef LatticeGaugeField LatticeField; + + explicit FundamentalRep(GridBase* grid) {} //do nothing + void update_representation(const LatticeGaugeField& Uin) {} // do nothing + + LatticeField RtoFundamentalProject(const LatticeField& in, Real scale = 1.0) const{ + return (scale * in); + } + +}; + +typedef FundamentalRep FundamentalRepresentation; + +} +} + + + + +#endif diff --git a/lib/qcd/representations/hmc_types.h b/lib/qcd/representations/hmc_types.h new file mode 100644 index 00000000..7ab15e9b --- /dev/null +++ b/lib/qcd/representations/hmc_types.h @@ -0,0 +1,91 @@ +#ifndef HMC_TYPES_H +#define HMC_TYPES_H + +#include +#include +#include +#include +#include + +namespace Grid { +namespace QCD { + +// Supported types +// enum {Fundamental, Adjoint} repr_type; + +// Utility to add support to the HMC for representations other than the +// fundamental +template +class Representations { + public: + typedef std::tuple Representation_type; + + // Size of the tuple, known at compile time + static const int tuple_size = sizeof...(Reptypes); + // The collection of types for the gauge fields + typedef std::tuple Representation_Fields; + + // To access the Reptypes (FundamentalRepresentation, AdjointRepresentation) + template + using repr_type = typename std::tuple_element::type; + // in order to get the typename of the field use + // type repr_type::LatticeField + + Representation_type rep; + + // Multiple types constructor + explicit Representations(GridBase* grid) : rep(Reptypes(grid)...){}; + + int size() { return tuple_size; } + + // update the fields + template + inline typename std::enable_if<(I == tuple_size), void>::type update( + LatticeGaugeField& U) {} + + template + inline typename std::enable_if<(I < tuple_size), void>::type update( + LatticeGaugeField& U) { + std::get(rep).update_representation(U); + update(U); + } + + + +}; + +typedef Representations NoHirep; + +// Helper classes to access the elements +// Strips the first N parameters from the tuple +// sequence of classes to obtain the S sequence +// Creates a type that is a tuple of vectors of the template type A +template