1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-06-16 23:07:05 +01:00

Compare commits

..

1 Commits

Author SHA1 Message Date
446c768cd3 Merge branch 'hotfix/v0.5.1'
Double precision compile fix
2016-07-01 16:33:59 +01:00
245 changed files with 7466 additions and 15403 deletions

29
.gitignore vendored
View File

@ -5,6 +5,7 @@
*.o *.o
*.obj *.obj
# Editor files # # Editor files #
################ ################
*~ *~
@ -47,7 +48,6 @@ Config.h.in
config.log config.log
config.status config.status
.deps .deps
*.inc
# http://www.gnu.org/software/autoconf # # http://www.gnu.org/software/autoconf #
######################################## ########################################
@ -63,7 +63,19 @@ config.sub
config.guess config.guess
INSTALL INSTALL
.dirstamp .dirstamp
ltmain.sh
# Packages #
############
# it's better to unpack these files and commit the raw source
# git has its own built in compression methods
*.7z
*.dmg
*.gz
*.iso
*.jar
*.rar
*.tar
*.zip
# Logs and databases # # Logs and databases #
###################### ######################
@ -89,16 +101,3 @@ build*/*
##################### #####################
*.xcodeproj/* *.xcodeproj/*
build.sh build.sh
# Eigen source #
################
lib/Eigen/*
# FFTW source #
################
lib/fftw/*
# libtool macros #
##################
m4/lt*
m4/libtool.m4

View File

@ -9,6 +9,10 @@ matrix:
- os: osx - os: osx
osx_image: xcode7.2 osx_image: xcode7.2
compiler: clang compiler: clang
- os: osx
osx_image: xcode7.2
compiler: gcc
env: VERSION=-5
- compiler: gcc - compiler: gcc
addons: addons:
apt: apt:
@ -19,8 +23,6 @@ matrix:
- libmpfr-dev - libmpfr-dev
- libgmp-dev - libgmp-dev
- libmpc-dev - libmpc-dev
- libopenmpi-dev
- openmpi-bin
- binutils-dev - binutils-dev
env: VERSION=-4.9 env: VERSION=-4.9
- compiler: gcc - compiler: gcc
@ -33,8 +35,6 @@ matrix:
- libmpfr-dev - libmpfr-dev
- libgmp-dev - libgmp-dev
- libmpc-dev - libmpc-dev
- libopenmpi-dev
- openmpi-bin
- binutils-dev - binutils-dev
env: VERSION=-5 env: VERSION=-5
- compiler: clang - compiler: clang
@ -47,8 +47,6 @@ matrix:
- libmpfr-dev - libmpfr-dev
- libgmp-dev - libgmp-dev
- libmpc-dev - libmpc-dev
- libopenmpi-dev
- openmpi-bin
- binutils-dev - binutils-dev
env: CLANG_LINK=http://llvm.org/releases/3.8.0/clang+llvm-3.8.0-x86_64-linux-gnu-ubuntu-14.04.tar.xz env: CLANG_LINK=http://llvm.org/releases/3.8.0/clang+llvm-3.8.0-x86_64-linux-gnu-ubuntu-14.04.tar.xz
- compiler: clang - compiler: clang
@ -61,8 +59,6 @@ matrix:
- libmpfr-dev - libmpfr-dev
- libgmp-dev - libgmp-dev
- libmpc-dev - libmpc-dev
- libopenmpi-dev
- openmpi-bin
- binutils-dev - binutils-dev
env: CLANG_LINK=http://llvm.org/releases/3.7.0/clang+llvm-3.7.0-x86_64-linux-gnu-ubuntu-14.04.tar.xz env: CLANG_LINK=http://llvm.org/releases/3.7.0/clang+llvm-3.7.0-x86_64-linux-gnu-ubuntu-14.04.tar.xz
@ -73,7 +69,6 @@ before_install:
- if [[ "$TRAVIS_OS_NAME" == "linux" ]] && [[ "$CC" == "clang" ]]; then export LD_LIBRARY_PATH="${GRIDDIR}/clang/lib:${LD_LIBRARY_PATH}"; fi - if [[ "$TRAVIS_OS_NAME" == "linux" ]] && [[ "$CC" == "clang" ]]; then export LD_LIBRARY_PATH="${GRIDDIR}/clang/lib:${LD_LIBRARY_PATH}"; fi
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew update; fi - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew update; fi
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install libmpc; fi - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install libmpc; fi
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install openmpi; fi
- if [[ "$TRAVIS_OS_NAME" == "osx" ]] && [[ "$CC" == "gcc" ]]; then brew install gcc5; fi - if [[ "$TRAVIS_OS_NAME" == "osx" ]] && [[ "$CC" == "gcc" ]]; then brew install gcc5; fi
install: install:
@ -87,20 +82,13 @@ install:
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then export LDFLAGS='-L/usr/local/lib'; fi - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then export LDFLAGS='-L/usr/local/lib'; fi
script: script:
- ./bootstrap.sh - ./scripts/reconfigure_script
- mkdir build - mkdir build
- cd build - cd build
- ../configure --enable-precision=single --enable-simd=SSE4 --enable-comms=none - ../configure CXXFLAGS="-msse4.2 -O3 -std=c++11" LIBS="-lmpfr -lgmp" --enable-precision=single --enable-simd=SSE4 --enable-comms=none
- make -j4 - make -j4
- ./benchmarks/Benchmark_dwf --threads 1 - ./benchmarks/Benchmark_dwf --threads 1
- echo make clean - make clean
- ../configure --enable-precision=double --enable-simd=SSE4 --enable-comms=none - ../configure CXXFLAGS="-msse4.2 -O3 -std=c++11" LIBS="-lmpfr -lgmp" --enable-precision=double --enable-simd=SSE4 --enable-comms=none
- make -j4 - make -j4
- ./benchmarks/Benchmark_dwf --threads 1 - ./benchmarks/Benchmark_dwf --threads 1
- echo make clean
- if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then export CXXFLAGS='-DMPI_UINT32_T=MPI_UNSIGNED -DMPI_UINT64_T=MPI_UNSIGNED_LONG'; fi
- ../configure --enable-precision=single --enable-simd=SSE4 --enable-comms=mpi-auto
- make -j4
- if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then mpirun.openmpi -n 2 ./benchmarks/Benchmark_dwf --threads 1 --mpi 2.1.1.1; fi
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then mpirun -n 2 ./benchmarks/Benchmark_dwf --threads 1 --mpi 2.1.1.1; fi

View File

@ -1,5 +1,5 @@
# additional include paths necessary to compile the C++ library # additional include paths necessary to compile the C++ library
SUBDIRS = lib benchmarks tests AM_CXXFLAGS = -I$(top_srcdir)/
SUBDIRS = lib tests benchmarks
AM_CXXFLAGS += -I$(top_builddir)/include filelist: $(SUBDIRS)
ACLOCAL_AMFLAGS = -I m4

104
README.md
View File

@ -1,28 +1,8 @@
# Grid # Grid [![Build Status](https://travis-ci.org/paboyle/Grid.svg?branch=master)](https://travis-ci.org/paboyle/Grid)
<table> Data parallel C++ mathematical object library
<tr>
<td>Last stable release</td>
<td><a href="https://travis-ci.org/paboyle/Grid">
<img src="https://travis-ci.org/paboyle/Grid.svg?branch=master"></a>
</td>
</tr>
<tr>
<td>Development branch</td>
<td><a href="https://travis-ci.org/paboyle/Grid">
<img src="https://travis-ci.org/paboyle/Grid.svg?branch=develop"></a>
</td>
</tr>
</table>
**Data parallel C++ mathematical object library.** Last update 2015/7/30
Please send all pull requests to the `develop` branch.
License: GPL v2.
Last update 2016/08/03.
### Description
This library provides data parallel C++ container classes with internal memory layout This library provides data parallel C++ container classes with internal memory layout
that is transformed to map efficiently to SIMD architectures. CSHIFT facilities that is transformed to map efficiently to SIMD architectures. CSHIFT facilities
are provided, similar to HPF and cmfortran, and user control is given over the mapping of are provided, similar to HPF and cmfortran, and user control is given over the mapping of
@ -42,75 +22,37 @@ optimally use MPI, OpenMP and SIMD parallelism under the hood. This is a signifi
for most programmers. for most programmers.
The layout transformations are parametrised by the SIMD vector length. This adapts according to the architecture. The layout transformations are parametrised by the SIMD vector length. This adapts according to the architecture.
Presently SSE4 (128 bit) AVX, AVX2 (256 bit) and IMCI and AVX512 (512 bit) targets are supported (ARM NEON and BG/Q QPX on the way). Presently SSE4 (128 bit) AVX, AVX2 (256 bit) and IMCI and AVX512 (512 bit) targets are supported (ARM NEON on the way).
These are presented as `vRealF`, `vRealD`, `vComplexF`, and `vComplexD` internal vector data types. These may be useful in themselves for other programmers. These are presented as
The corresponding scalar types are named `RealF`, `RealD`, `ComplexF` and `ComplexD`.
vRealF, vRealD, vComplexF, vComplexD
internal vector data types. These may be useful in themselves for other programmers.
The corresponding scalar types are named
RealF, RealD, ComplexF, ComplexD
MPI, OpenMP, and SIMD parallelism are present in the library. MPI, OpenMP, and SIMD parallelism are present in the library.
Please see https://arxiv.org/abs/1512.03487 for more detail.
### Installation You can give `configure' initial values for configuration parameters
First, start by cloning the repository: by setting variables in the command line or in the environment. Here
are examples:
``` bash ./configure CXX=clang++ CXXFLAGS="-std=c++11 -O3 -msse4" --enable-simd=SSE4
git clone https://github.com/paboyle/Grid.git
```
Then enter the cloned directory and set up the build system: ./configure CXX=clang++ CXXFLAGS="-std=c++11 -O3 -mavx" --enable-simd=AVX
``` bash ./configure CXX=clang++ CXXFLAGS="-std=c++11 -O3 -mavx2" --enable-simd=AVX2
cd Grid
./bootstrap.sh
```
Now you can execute the `configure` script to generate makefiles (here from a build directory): ./configure CXX=icpc CXXFLAGS="-std=c++11 -O3 -mmic" --enable-simd=AVX512 --host=none
``` bash Note: Before running configure it could be necessary to execute the script
mkdir build; cd build
../configure --enable-precision=double --enable-simd=AVX --enable-comms=mpi-auto --prefix=<path>
```
where `--enable-precision=` set the default precision (`single` or `double`), script/filelist
`--enable-simd=` set the SIMD type (see possible values below), `--enable-
comms=` set the protocol used for communications (`none`, `mpi`, `mpi-auto` or
`shmem`), and `<path>` should be replaced by the prefix path where you want to
install Grid. The `mpi-auto` communication option set `configure` to determine
automatically how to link to MPI. Other options are available, use `configure
--help` to display them. Like with any other program using GNU autotool, the
`CXX`, `CXXFLAGS`, `LDFLAGS`, ... environment variables can be modified to
customise the build.
Finally, you can build and install Grid:
``` bash
make; make install
```
To minimise the build time, only the tests at the root of the `tests` directory are built by default. If you want to build tests in the sub-directory `<subdir>` you can execute: For developers:
Use reconfigure_script in the scripts/ directory to create the autotools environment
``` bash
make -C tests/<subdir> tests
```
### Possible SIMD types
The following options can be use with the `--enable-simd=` option to target different SIMD instruction sets:
| String | Description |
| ----------- | -------------------------------------- |
| `GEN` | generic portable vector code |
| `SSE4` | SSE 4.2 (128 bit) |
| `AVX` | AVX (256 bit) |
| `AVXFMA4` | AVX (256 bit) + FMA |
| `AVX2` | AVX 2 (256 bit) |
| `AVX512` | AVX 512 bit |
| `AVX512MIC` | AVX 512 bit for Intel MIC architecture |
| `ICMI` | Intel ICMI instructions (512 bit) |
Alternatively, some CPU codenames can be directly used:
| String | Description |
| ----------- | -------------------------------------- |
| `KNC` | [Intel Knights Corner](http://ark.intel.com/products/codename/57721/Knights-Corner) |
| `KNL` | [Intel Knights Landing](http://ark.intel.com/products/codename/48999/Knights-Landing) |

View File

@ -25,7 +25,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
See the full license in the file "LICENSE" in the top level distribution directory See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/ *************************************************************************************/
/* END LEGAL */ /* END LEGAL */
#include <Grid/Grid.h> #include <Grid.h>
using namespace std; using namespace std;
using namespace Grid; using namespace Grid;
@ -194,128 +194,7 @@ int main (int argc, char ** argv)
} }
} }
#if 0
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
std::cout<<GridLogMessage << "= Benchmarking sequential persistent halo exchange in "<<nmu<<" dimensions"<<std::endl;
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
std::cout<<GridLogMessage << " L "<<"\t\t"<<" Ls "<<"\t\t"<<"bytes"<<"\t\t"<<"MB/s uni"<<"\t\t"<<"MB/s bidi"<<std::endl;
for(int lat=4;lat<=32;lat+=2){
for(int Ls=1;Ls<=16;Ls*=2){
std::vector<int> latt_size ({lat,lat,lat,lat});
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
std::vector<std::vector<HalfSpinColourVectorD> > xbuf(8,std::vector<HalfSpinColourVectorD>(lat*lat*lat*Ls));
std::vector<std::vector<HalfSpinColourVectorD> > rbuf(8,std::vector<HalfSpinColourVectorD>(lat*lat*lat*Ls));
int ncomm;
int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD);
std::vector<CartesianCommunicator::CommsRequest_t> empty;
std::vector<std::vector<CartesianCommunicator::CommsRequest_t> > requests_fwd(Nd,empty);
std::vector<std::vector<CartesianCommunicator::CommsRequest_t> > requests_bwd(Nd,empty);
for(int mu=0;mu<4;mu++){
ncomm=0;
if (mpi_layout[mu]>1 ) {
ncomm++;
int comm_proc;
int xmit_to_rank;
int recv_from_rank;
comm_proc=1;
Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
Grid.SendToRecvFromInit(requests_fwd[mu],
(void *)&xbuf[mu][0],
xmit_to_rank,
(void *)&rbuf[mu][0],
recv_from_rank,
bytes);
comm_proc = mpi_layout[mu]-1;
Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
Grid.SendToRecvFromInit(requests_bwd[mu],
(void *)&xbuf[mu+4][0],
xmit_to_rank,
(void *)&rbuf[mu+4][0],
recv_from_rank,
bytes);
}
}
{
double start=usecond();
for(int i=0;i<Nloop;i++){
for(int mu=0;mu<4;mu++){
if (mpi_layout[mu]>1 ) {
Grid.SendToRecvFromBegin(requests_fwd[mu]);
Grid.SendToRecvFromComplete(requests_fwd[mu]);
Grid.SendToRecvFromBegin(requests_bwd[mu]);
Grid.SendToRecvFromComplete(requests_bwd[mu]);
}
}
Grid.Barrier();
}
double stop=usecond();
double dbytes = bytes;
double xbytes = Nloop*dbytes*2.0*ncomm;
double rbytes = xbytes;
double bidibytes = xbytes+rbytes;
double time = stop-start;
std::cout<<GridLogMessage << lat<<"\t\t"<<Ls<<"\t\t"<<bytes<<"\t\t"<<xbytes/time<<"\t\t"<<bidibytes/time<<std::endl;
}
{
double start=usecond();
for(int i=0;i<Nloop;i++){
for(int mu=0;mu<4;mu++){
if (mpi_layout[mu]>1 ) {
Grid.SendToRecvFromBegin(requests_fwd[mu]);
Grid.SendToRecvFromBegin(requests_bwd[mu]);
Grid.SendToRecvFromComplete(requests_fwd[mu]);
Grid.SendToRecvFromComplete(requests_bwd[mu]);
}
}
Grid.Barrier();
}
double stop=usecond();
double dbytes = bytes;
double xbytes = Nloop*dbytes*2.0*ncomm;
double rbytes = xbytes;
double bidibytes = xbytes+rbytes;
double time = stop-start;
std::cout<<GridLogMessage << lat<<"\t\t"<<Ls<<"\t\t"<<bytes<<"\t\t"<<xbytes/time<<"\t\t"<<bidibytes/time<<std::endl;
}
}
}
#endif
Grid_finalize(); Grid_finalize();
} }

View File

@ -26,7 +26,8 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
See the full license in the file "LICENSE" in the top level distribution directory See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/ *************************************************************************************/
/* END LEGAL */ /* END LEGAL */
#include <Grid/Grid.h> #include <Grid.h>
#include <PerfCount.h>
using namespace std; using namespace std;
using namespace Grid; using namespace Grid;
@ -45,9 +46,9 @@ struct scal {
}; };
bool overlapComms = false; bool overlapComms = false;
typedef WilsonFermion5D<DomainWallVec5dImplR> WilsonFermion5DR; typedef WilsonFermion5D<DomainWallRedBlack5dImplR> WilsonFermion5DR;
typedef WilsonFermion5D<DomainWallVec5dImplF> WilsonFermion5DF; typedef WilsonFermion5D<DomainWallRedBlack5dImplF> WilsonFermion5DF;
typedef WilsonFermion5D<DomainWallVec5dImplD> WilsonFermion5DD; typedef WilsonFermion5D<DomainWallRedBlack5dImplD> WilsonFermion5DD;
int main (int argc, char ** argv) int main (int argc, char ** argv)
@ -70,8 +71,8 @@ int main (int argc, char ** argv)
std::cout << GridLogMessage << "Making s innermost grids"<<std::endl; std::cout << GridLogMessage << "Making s innermost grids"<<std::endl;
GridCartesian * sUGrid = SpaceTimeGrid::makeFourDimDWFGrid(GridDefaultLatt(),GridDefaultMpi()); GridCartesian * sUGrid = SpaceTimeGrid::makeFourDimDWFGrid(GridDefaultLatt(),GridDefaultMpi());
GridRedBlackCartesian * sUrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(sUGrid);
GridCartesian * sFGrid = SpaceTimeGrid::makeFiveDimDWFGrid(Ls,UGrid); GridCartesian * sFGrid = SpaceTimeGrid::makeFiveDimDWFGrid(Ls,UGrid);
std::cout << GridLogMessage << "Making s innermost rb grids"<<std::endl;
GridRedBlackCartesian * sFrbGrid = SpaceTimeGrid::makeFiveDimDWFRedBlackGrid(Ls,UGrid); GridRedBlackCartesian * sFrbGrid = SpaceTimeGrid::makeFiveDimDWFRedBlackGrid(Ls,UGrid);
std::vector<int> seeds4({1,2,3,4}); std::vector<int> seeds4({1,2,3,4});
@ -86,6 +87,8 @@ int main (int argc, char ** argv)
LatticeFermion tmp(FGrid); LatticeFermion tmp(FGrid);
LatticeFermion err(FGrid); LatticeFermion err(FGrid);
ColourMatrix cm = Complex(1.0,0.0);
LatticeGaugeField Umu(UGrid); LatticeGaugeField Umu(UGrid);
random(RNG4,Umu); random(RNG4,Umu);
@ -124,20 +127,21 @@ int main (int argc, char ** argv)
RealD mass=0.1; RealD mass=0.1;
RealD M5 =1.8; RealD M5 =1.8;
typename DomainWallFermionR::ImplParams params;
params.overlapCommsCompute = overlapComms;
RealD NP = UGrid->_Nprocessors; RealD NP = UGrid->_Nprocessors;
for(int doasm=1;doasm<2;doasm++){ for(int doasm=1;doasm<2;doasm++){
QCD::WilsonKernelsStatic::AsmOpt=doasm; QCD::WilsonKernelsStatic::AsmOpt=doasm;
DomainWallFermionR Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5); DomainWallFermionR Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,params);
std::cout<<GridLogMessage << "Naive wilson implementation "<<std::endl; std::cout<<GridLogMessage << "Calling Dw"<<std::endl;
std::cout << GridLogMessage<< "Calling Dw"<<std::endl; int ncall =10;
int ncall =100;
if (1) { if (1) {
Dw.ZeroCounters();
double t0=usecond(); double t0=usecond();
for(int i=0;i<ncall;i++){ for(int i=0;i<ncall;i++){
__SSC_START; __SSC_START;
@ -156,17 +160,16 @@ int main (int argc, char ** argv)
std::cout<<GridLogMessage << "mflop/s per node = "<< flops/(t1-t0)/NP<<std::endl; std::cout<<GridLogMessage << "mflop/s per node = "<< flops/(t1-t0)/NP<<std::endl;
err = ref-result; err = ref-result;
std::cout<<GridLogMessage << "norm diff "<< norm2(err)<<std::endl; std::cout<<GridLogMessage << "norm diff "<< norm2(err)<<std::endl;
Dw.Report(); // Dw.Report();
} }
if (1) if (1)
{ {
typedef WilsonFermion5D<DomainWallVec5dImplR> WilsonFermion5DR; typedef WilsonFermion5D<DomainWallRedBlack5dImplR> WilsonFermion5DR;
LatticeFermion ssrc(sFGrid); LatticeFermion ssrc(sFGrid);
LatticeFermion sref(sFGrid); LatticeFermion sref(sFGrid);
LatticeFermion sresult(sFGrid); LatticeFermion sresult(sFGrid);
WilsonFermion5DR sDw(1,Umu,*sFGrid,*sFrbGrid,*sUGrid,M5,params);
WilsonFermion5DR sDw(Umu,*sFGrid,*sFrbGrid,*sUGrid,*sUrbGrid,M5);
for(int x=0;x<latt4[0];x++){ for(int x=0;x<latt4[0];x++){
for(int y=0;y<latt4[1];y++){ for(int y=0;y<latt4[1];y++){
@ -178,9 +181,8 @@ int main (int argc, char ** argv)
peekSite(tmp,src,site); peekSite(tmp,src,site);
pokeSite(tmp,ssrc,site); pokeSite(tmp,ssrc,site);
}}}}} }}}}}
std::cout<<GridLogMessage<< "src norms "<< norm2(src)<<" " <<norm2(ssrc)<<std::endl;
double t0=usecond(); double t0=usecond();
sDw.ZeroCounters();
for(int i=0;i<ncall;i++){ for(int i=0;i<ncall;i++){
__SSC_START; __SSC_START;
sDw.Dhop(ssrc,sresult,0); sDw.Dhop(ssrc,sresult,0);
@ -190,23 +192,22 @@ int main (int argc, char ** argv)
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu]; double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
double flops=1344*volume*ncall; double flops=1344*volume*ncall;
std::cout<<GridLogMessage << "Called Dw s_inner "<<ncall<<" times in "<<t1-t0<<" us"<<std::endl; std::cout<<GridLogMessage << "Called Dw sinner "<<ncall<<" times in "<<t1-t0<<" us"<<std::endl;
std::cout<<GridLogMessage << "mflop/s = "<< flops/(t1-t0)<<std::endl; std::cout<<GridLogMessage << "mflop/s = "<< flops/(t1-t0)<<std::endl;
std::cout<<GridLogMessage << "mflop/s per node = "<< flops/(t1-t0)/NP<<std::endl; std::cout<<GridLogMessage << "mflop/s per node = "<< flops/(t1-t0)/NP<<std::endl;
sDw.Report(); // sDw.Report();
if(0){ if(0){
for(int i=0;i< PerformanceCounter::NumTypes(); i++ ){ for(int i=0;i< PerformanceCounter::NumTypes(); i++ ){
sDw.Dhop(ssrc,sresult,0); sDw.Dhop(ssrc,sresult,0);
PerformanceCounter Counter(i); PerformanceCounter Counter(i);
Counter.Start(); Counter.Start();
sDw.Dhop(ssrc,sresult,0); sDw.Dhop(ssrc,sresult,0);
Counter.Stop(); Counter.Stop();
Counter.Report(); Counter.Report();
} }
} }
std::cout<<GridLogMessage<< "res norms "<< norm2(result)<<" " <<norm2(sresult)<<std::endl;
RealF sum=0; RealF sum=0;
@ -220,13 +221,11 @@ int main (int argc, char ** argv)
peekSite(normal,result,site); peekSite(normal,result,site);
peekSite(simd,sresult,site); peekSite(simd,sresult,site);
sum=sum+norm2(normal-simd); sum=sum+norm2(normal-simd);
if (norm2(normal-simd) > 1.0e-6 ) { // std::cout << "site "<<x<<","<<y<<","<<z<<","<<t<<","<<s<<" "<<norm2(normal-simd)<<std::endl;
std::cout << "site "<<x<<","<<y<<","<<z<<","<<t<<","<<s<<" "<<norm2(normal-simd)<<std::endl; // std::cout << "site "<<x<<","<<y<<","<<z<<","<<t<<","<<s<<" "<<normal<<std::endl;
std::cout << "site "<<x<<","<<y<<","<<z<<","<<t<<","<<s<<" normal "<<normal<<std::endl; // std::cout << "site "<<x<<","<<y<<","<<z<<","<<t<<","<<s<<" "<<simd<<std::endl;
std::cout << "site "<<x<<","<<y<<","<<z<<","<<t<<","<<s<<" simd "<<simd<<std::endl;
}
}}}}} }}}}}
std::cout<<GridLogMessage<<" difference between normal and simd is "<<sum<<std::endl; std::cout<<" difference between normal and simd is "<<sum<<std::endl;
if (1) { if (1) {
@ -250,21 +249,17 @@ int main (int argc, char ** argv)
sr_e = zero; sr_e = zero;
sr_o = zero; sr_o = zero;
sDw.ZeroCounters();
sDw.stat.init("DhopEO");
double t0=usecond(); double t0=usecond();
for (int i = 0; i < ncall; i++) { for(int i=0;i<ncall;i++){
sDw.DhopEO(ssrc_o, sr_e, DaggerNo); sDw.DhopEO(ssrc_o,sr_e,DaggerNo);
} }
double t1=usecond(); double t1=usecond();
sDw.stat.print();
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu]; double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
double flops=(1344.0*volume*ncall)/2; double flops=(1344.0*volume*ncall)/2;
std::cout<<GridLogMessage << "sDeo mflop/s = "<< flops/(t1-t0)<<std::endl; std::cout<<GridLogMessage << "sDeo mflop/s = "<< flops/(t1-t0)<<std::endl;
std::cout<<GridLogMessage << "sDeo mflop/s per node "<< flops/(t1-t0)/NP<<std::endl; std::cout<<GridLogMessage << "sDeo mflop/s per node "<< flops/(t1-t0)/NP<<std::endl;
sDw.Report();
sDw.DhopEO(ssrc_o,sr_e,DaggerNo); sDw.DhopEO(ssrc_o,sr_e,DaggerNo);
sDw.DhopOE(ssrc_e,sr_o,DaggerNo); sDw.DhopOE(ssrc_e,sr_o,DaggerNo);
@ -273,9 +268,9 @@ int main (int argc, char ** argv)
pickCheckerboard(Even,ssrc_e,sresult); pickCheckerboard(Even,ssrc_e,sresult);
pickCheckerboard(Odd ,ssrc_o,sresult); pickCheckerboard(Odd ,ssrc_o,sresult);
ssrc_e = ssrc_e - sr_e; ssrc_e = ssrc_e - sr_e;
std::cout<<GridLogMessage << "sE norm diff "<< norm2(ssrc_e)<< " vec nrm"<<norm2(sr_e) <<std::endl; std::cout<<GridLogMessage << "sE norm diff "<< norm2(ssrc_e)<<std::endl;
ssrc_o = ssrc_o - sr_o; ssrc_o = ssrc_o - sr_o;
std::cout<<GridLogMessage << "sO norm diff "<< norm2(ssrc_o)<< " vec nrm"<<norm2(sr_o) <<std::endl; std::cout<<GridLogMessage << "sO norm diff "<< norm2(ssrc_o)<<std::endl;
} }
@ -289,19 +284,18 @@ int main (int argc, char ** argv)
// ref = src - Gamma(Gamma::GammaX)* src ; // 1+gamma_x // ref = src - Gamma(Gamma::GammaX)* src ; // 1+gamma_x
tmp = U[mu]*Cshift(src,mu+1,1); tmp = U[mu]*Cshift(src,mu+1,1);
for(int i=0;i<ref._odata.size();i++){ for(int i=0;i<ref._odata.size();i++){
ref._odata[i]+= tmp._odata[i] + Gamma(Gmu[mu])*tmp._odata[i]; ; ref._odata[i]+= tmp._odata[i] + Gamma(Gmu[mu])*tmp._odata[i]; ;
} }
tmp =adj(U[mu])*src; tmp =adj(U[mu])*src;
tmp =Cshift(tmp,mu+1,-1); tmp =Cshift(tmp,mu+1,-1);
for(int i=0;i<ref._odata.size();i++){ for(int i=0;i<ref._odata.size();i++){
ref._odata[i]+= tmp._odata[i] - Gamma(Gmu[mu])*tmp._odata[i]; ; ref._odata[i]+= tmp._odata[i] - Gamma(Gmu[mu])*tmp._odata[i]; ;
} }
} }
ref = -0.5*ref; ref = -0.5*ref;
} }
Dw.Dhop(src,result,1); Dw.Dhop(src,result,1);
std::cout << GridLogMessage << "Naive wilson implementation Dag" << std::endl;
std::cout<<GridLogMessage << "Called DwDag"<<std::endl; std::cout<<GridLogMessage << "Called DwDag"<<std::endl;
std::cout<<GridLogMessage << "norm result "<< norm2(result)<<std::endl; std::cout<<GridLogMessage << "norm result "<< norm2(result)<<std::endl;
std::cout<<GridLogMessage << "norm ref "<< norm2(ref)<<std::endl; std::cout<<GridLogMessage << "norm ref "<< norm2(ref)<<std::endl;
@ -323,7 +317,6 @@ int main (int argc, char ** argv)
std::cout<<GridLogMessage << "src_o"<<norm2(src_o)<<std::endl; std::cout<<GridLogMessage << "src_o"<<norm2(src_o)<<std::endl;
{ {
Dw.ZeroCounters();
double t0=usecond(); double t0=usecond();
for(int i=0;i<ncall;i++){ for(int i=0;i<ncall;i++){
Dw.DhopEO(src_o,r_e,DaggerNo); Dw.DhopEO(src_o,r_e,DaggerNo);
@ -335,7 +328,6 @@ int main (int argc, char ** argv)
std::cout<<GridLogMessage << "Deo mflop/s = "<< flops/(t1-t0)<<std::endl; std::cout<<GridLogMessage << "Deo mflop/s = "<< flops/(t1-t0)<<std::endl;
std::cout<<GridLogMessage << "Deo mflop/s per node "<< flops/(t1-t0)/NP<<std::endl; std::cout<<GridLogMessage << "Deo mflop/s per node "<< flops/(t1-t0)/NP<<std::endl;
Dw.Report();
} }
Dw.DhopEO(src_o,r_e,DaggerNo); Dw.DhopEO(src_o,r_e,DaggerNo);
Dw.DhopOE(src_e,r_o,DaggerNo); Dw.DhopOE(src_e,r_o,DaggerNo);

View File

@ -26,7 +26,8 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
See the full license in the file "LICENSE" in the top level distribution directory See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/ *************************************************************************************/
/* END LEGAL */ /* END LEGAL */
#include <Grid/Grid.h> #include <Grid.h>
#include <PerfCount.h>
using namespace std; using namespace std;
using namespace Grid; using namespace Grid;

View File

@ -26,7 +26,8 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
See the full license in the file "LICENSE" in the top level distribution directory See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/ *************************************************************************************/
/* END LEGAL */ /* END LEGAL */
#include <Grid/Grid.h> #include <Grid.h>
#include <PerfCount.h>
using namespace std; using namespace std;
using namespace Grid; using namespace Grid;
@ -51,7 +52,7 @@ int main (int argc, char ** argv)
{ {
Grid_init(&argc,&argv); Grid_init(&argc,&argv);
const int Ls=8; const int Ls=16;
int threads = GridThread::GetThreads(); int threads = GridThread::GetThreads();
std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl; std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
@ -61,8 +62,6 @@ int main (int argc, char ** argv)
QCD::WilsonKernelsStatic::AsmOpt=0; QCD::WilsonKernelsStatic::AsmOpt=0;
} }
std::cout<<GridLogMessage << "=========================================================================="<<std::endl;
std::cout<<GridLogMessage << "= Benchmarking DWF"<<std::endl;
std::cout<<GridLogMessage << "=========================================================================="<<std::endl; std::cout<<GridLogMessage << "=========================================================================="<<std::endl;
std::cout<<GridLogMessage << "Volume \t\t\tProcs \t Dw \t eoDw \t sDw \t eosDw (Mflop/s) "<<std::endl; std::cout<<GridLogMessage << "Volume \t\t\tProcs \t Dw \t eoDw \t sDw \t eosDw (Mflop/s) "<<std::endl;
std::cout<<GridLogMessage << "=========================================================================="<<std::endl; std::cout<<GridLogMessage << "=========================================================================="<<std::endl;
@ -127,6 +126,7 @@ void benchDw(std::vector<int> & latt4, int Ls, int threads,int report )
ColourMatrix cm = Complex(1.0,0.0); ColourMatrix cm = Complex(1.0,0.0);
LatticeGaugeField Umu5d(FGrid); LatticeGaugeField Umu5d(FGrid);
// replicate across fifth dimension // replicate across fifth dimension
@ -145,10 +145,11 @@ void benchDw(std::vector<int> & latt4, int Ls, int threads,int report )
} }
#ifdef CHECK #ifdef CHECK
if (1) { if (1)
{
ref = zero; ref = zero;
for(int mu=0;mu<Nd;mu++){ for(int mu=0;mu<Nd;mu++){
tmp = U[mu]*Cshift(src,mu+1,1); tmp = U[mu]*Cshift(src,mu+1,1);
ref=ref + tmp - Gamma(Gmu[mu])*tmp; ref=ref + tmp - Gamma(Gmu[mu])*tmp;
@ -192,19 +193,20 @@ void benchDw(std::vector<int> & latt4, int Ls, int threads,int report )
Counter.Report(); Counter.Report();
} }
if ( ! report ) { if ( ! report )
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu]; {
double flops=1344*volume*ncall; double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
std::cout <<"\t"<<NP<< "\t"<<flops/(t1-t0)<< "\t"; double flops=1344*volume*ncall;
} std::cout <<"\t"<<NP<< "\t"<<flops/(t1-t0)<< "\t";
}
#ifdef CHECK #ifdef CHECK
err = ref-result; err = ref-result;
RealD errd = norm2(err); RealD errd = norm2(err);
if ( errd> 1.0e-4 ) { if ( errd> 1.0e-4 ) {
std::cout<<GridLogMessage << "oops !!! norm diff "<< norm2(err)<<std::endl; std::cout<<GridLogMessage << "oops !!! norm diff "<< norm2(err)<<std::endl;
exit(-1); exit(-1);
} }
#endif #endif
LatticeFermion src_e (FrbGrid); LatticeFermion src_e (FrbGrid);
@ -230,9 +232,10 @@ void benchDw(std::vector<int> & latt4, int Ls, int threads,int report )
std::cout<< flops/(t1-t0); std::cout<< flops/(t1-t0);
} }
} }
} }
#define CHECK_SDW #undef CHECK_SDW
void benchsDw(std::vector<int> & latt4, int Ls, int threads, int report ) void benchsDw(std::vector<int> & latt4, int Ls, int threads, int report )
{ {
@ -240,9 +243,7 @@ void benchsDw(std::vector<int> & latt4, int Ls, int threads, int report )
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid); GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid); GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
GridCartesian * sUGrid = SpaceTimeGrid::makeFourDimDWFGrid(latt4,GridDefaultMpi()); GridCartesian * sUGrid = SpaceTimeGrid::makeFourDimDWFGrid(latt4,GridDefaultMpi());
GridRedBlackCartesian * sUrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(sUGrid);
GridCartesian * sFGrid = SpaceTimeGrid::makeFiveDimDWFGrid(Ls,UGrid); GridCartesian * sFGrid = SpaceTimeGrid::makeFiveDimDWFGrid(Ls,UGrid);
GridRedBlackCartesian * sFrbGrid = SpaceTimeGrid::makeFiveDimDWFRedBlackGrid(Ls,UGrid); GridRedBlackCartesian * sFrbGrid = SpaceTimeGrid::makeFiveDimDWFRedBlackGrid(Ls,UGrid);
@ -276,89 +277,93 @@ void benchsDw(std::vector<int> & latt4, int Ls, int threads, int report )
} }
} }
RealD mass=0.1; RealD mass=0.1;
RealD M5 =1.8; RealD M5 =1.8;
typedef WilsonFermion5D<DomainWallVec5dImplR> WilsonFermion5DR; typedef WilsonFermion5D<DomainWallRedBlack5dImplR> WilsonFermion5DR;
LatticeFermion ssrc(sFGrid); LatticeFermion ssrc(sFGrid);
LatticeFermion sref(sFGrid); LatticeFermion sref(sFGrid);
LatticeFermion sresult(sFGrid); LatticeFermion sresult(sFGrid);
WilsonFermion5DR sDw(Umu,*sFGrid,*sFrbGrid,*sUGrid,*sUrbGrid,M5); WilsonFermion5DR sDw(1,Umu,*sFGrid,*sFrbGrid,*sUGrid,M5);
for(int x=0;x<latt4[0];x++){ for(int x=0;x<latt4[0];x++){
for(int y=0;y<latt4[1];y++){ for(int y=0;y<latt4[1];y++){
for(int z=0;z<latt4[2];z++){ for(int z=0;z<latt4[2];z++){
for(int t=0;t<latt4[3];t++){ for(int t=0;t<latt4[3];t++){
for(int s=0;s<Ls;s++){ for(int s=0;s<Ls;s++){
std::vector<int> site({s,x,y,z,t}); std::vector<int> site({s,x,y,z,t});
SpinColourVector tmp; SpinColourVector tmp;
peekSite(tmp,src,site); peekSite(tmp,src,site);
pokeSite(tmp,ssrc,site); pokeSite(tmp,ssrc,site);
}}}}} }}}}}
double t0=usecond(); double t0=usecond();
sDw.Dhop(ssrc,sresult,0); sDw.Dhop(ssrc,sresult,0);
double t1=usecond(); double t1=usecond();
#ifdef TIMERS_OFF #ifdef TIMERS_OFF
int ncall =10; int ncall =10;
#else #else
int ncall =1+(int) ((5.0*1000*1000)/(t1-t0)); int ncall =1+(int) ((5.0*1000*1000)/(t1-t0));
#endif #endif
PerformanceCounter Counter(8); PerformanceCounter Counter(8);
Counter.Start(); Counter.Start();
t0=usecond(); t0=usecond();
for(int i=0;i<ncall;i++){ for(int i=0;i<ncall;i++){
sDw.Dhop(ssrc,sresult,0); sDw.Dhop(ssrc,sresult,0);
} }
t1=usecond(); t1=usecond();
Counter.Stop(); Counter.Stop();
if ( report ) { if ( report ) {
Counter.Report(); Counter.Report();
} else { } else {
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
double flops=1344*volume*ncall;
std::cout<<"\t"<< flops/(t1-t0);
}
LatticeFermion sr_eo(sFGrid); double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
LatticeFermion serr(sFGrid); double flops=1344*volume*ncall;
std::cout<<"\t"<< flops/(t1-t0);
}
LatticeFermion ssrc_e (sFrbGrid);
LatticeFermion ssrc_o (sFrbGrid);
LatticeFermion sr_e (sFrbGrid);
LatticeFermion sr_o (sFrbGrid);
pickCheckerboard(Even,ssrc_e,ssrc); LatticeFermion sr_eo(sFGrid);
pickCheckerboard(Odd,ssrc_o,ssrc); LatticeFermion serr(sFGrid);
setCheckerboard(sr_eo,ssrc_o); LatticeFermion ssrc_e (sFrbGrid);
setCheckerboard(sr_eo,ssrc_e); LatticeFermion ssrc_o (sFrbGrid);
LatticeFermion sr_e (sFrbGrid);
LatticeFermion sr_o (sFrbGrid);
sr_e = zero; pickCheckerboard(Even,ssrc_e,ssrc);
sr_o = zero; pickCheckerboard(Odd,ssrc_o,ssrc);
setCheckerboard(sr_eo,ssrc_o);
setCheckerboard(sr_eo,ssrc_e);
sr_e = zero;
sr_o = zero;
sDw.DhopEO(ssrc_o,sr_e,DaggerNo);
PerformanceCounter CounterSdw(8);
CounterSdw.Start();
t0=usecond();
for(int i=0;i<ncall;i++){
__SSC_START;
sDw.DhopEO(ssrc_o,sr_e,DaggerNo); sDw.DhopEO(ssrc_o,sr_e,DaggerNo);
__SSC_STOP; PerformanceCounter CounterSdw(8);
} CounterSdw.Start();
t1=usecond(); t0=usecond();
CounterSdw.Stop(); for(int i=0;i<ncall;i++){
__SSC_START;
sDw.DhopEO(ssrc_o,sr_e,DaggerNo);
__SSC_STOP;
}
t1=usecond();
CounterSdw.Stop();
if ( report ) { if ( report ) {
CounterSdw.Report(); CounterSdw.Report();
} else { } else {
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
double flops=(1344.0*volume*ncall)/2; double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
std::cout<<"\t"<< flops/(t1-t0); double flops=(1344.0*volume*ncall)/2;
} std::cout<<"\t"<< flops/(t1-t0);
}
} }

View File

@ -26,7 +26,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
See the full license in the file "LICENSE" in the top level distribution directory See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/ *************************************************************************************/
/* END LEGAL */ /* END LEGAL */
#include <Grid/Grid.h> #include <Grid.h>
using namespace std; using namespace std;
using namespace Grid; using namespace Grid;

View File

@ -26,7 +26,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
See the full license in the file "LICENSE" in the top level distribution directory See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/ *************************************************************************************/
/* END LEGAL */ /* END LEGAL */
#include <Grid/Grid.h> #include <Grid.h>
using namespace std; using namespace std;
using namespace Grid; using namespace Grid;

View File

@ -26,7 +26,7 @@ Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
See the full license in the file "LICENSE" in the top level distribution directory See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/ *************************************************************************************/
/* END LEGAL */ /* END LEGAL */
#include <Grid/Grid.h> #include <Grid.h>
using namespace std; using namespace std;
using namespace Grid; using namespace Grid;

View File

@ -26,7 +26,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
See the full license in the file "LICENSE" in the top level distribution directory See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/ *************************************************************************************/
/* END LEGAL */ /* END LEGAL */
#include <Grid/Grid.h> #include <Grid.h>
using namespace std; using namespace std;
using namespace Grid; using namespace Grid;

View File

@ -1,117 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./benchmarks/Benchmark_wilson.cc
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: Richard Rollins <rprollins@users.noreply.github.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid.h>
using namespace std;
using namespace Grid;
using namespace Grid::QCD;
template<class d>
struct scal {
d internal;
};
Gamma::GammaMatrix Gmu [] = {
Gamma::GammaX,
Gamma::GammaY,
Gamma::GammaZ,
Gamma::GammaT
};
bool overlapComms = false;
void bench_wilson (
LatticeFermion & src,
LatticeFermion & result,
WilsonFermionR & Dw,
double const volume,
int const dag );
int main (int argc, char ** argv)
{
Grid_init(&argc,&argv);
if( GridCmdOptionExists(argv,argv+argc,"--asynch") ){ overlapComms = true; }
typename WilsonFermionR::ImplParams params;
params.overlapCommsCompute = overlapComms;
std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
std::vector<int> mpi_layout = GridDefaultMpi();
std::vector<int> seeds({1,2,3,4});
RealD mass = 0.1;
std::cout<<GridLogMessage << "============================================================================="<< std::endl;
std::cout<<GridLogMessage << "= Benchmarking Wilson" << std::endl;
std::cout<<GridLogMessage << "============================================================================="<< std::endl;
std::cout<<GridLogMessage << "Volume\t\t\tWilson/MFLOPs\tWilsonDag/MFLOPs" << std::endl;
std::cout<<GridLogMessage << "============================================================================="<< std::endl;
int Lmax = 32;
int dmin = 0;
if ( getenv("LMAX") ) Lmax=atoi(getenv("LMAX"));
if ( getenv("DMIN") ) dmin=atoi(getenv("DMIN"));
for (int L=8; L<=Lmax; L*=2)
{
std::vector<int> latt_size = std::vector<int>(4,L);
for(int d=4; d>dmin; d--)
{
if ( d<=3 ) { latt_size[d] *= 2; }
std::cout << GridLogMessage;
std::copy( latt_size.begin(), --latt_size.end(), std::ostream_iterator<int>( std::cout, std::string("x").c_str() ) );
std::cout << latt_size.back() << "\t\t";
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
GridRedBlackCartesian RBGrid(latt_size,simd_layout,mpi_layout);
GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(seeds);
LatticeGaugeField Umu(&Grid); random(pRNG,Umu);
LatticeFermion src(&Grid); random(pRNG,src);
LatticeFermion result(&Grid); result=zero;
double volume = std::accumulate(latt_size.begin(),latt_size.end(),1,std::multiplies<int>());
WilsonFermionR Dw(Umu,Grid,RBGrid,mass,params);
bench_wilson(src,result,Dw,volume,DaggerNo);
bench_wilson(src,result,Dw,volume,DaggerYes);
std::cout << std::endl;
}
}
std::cout<<GridLogMessage << "============================================================================="<< std::endl;
Grid_finalize();
}
void bench_wilson (
LatticeFermion & src,
LatticeFermion & result,
WilsonFermionR & Dw,
double const volume,
int const dag )
{
int ncall = 1000;
double t0 = usecond();
for(int i=0; i<ncall; i++) { Dw.Dhop(src,result,dag); }
double t1 = usecond();
double flops = 1344 * volume * ncall;
std::cout << flops/(t1-t0) << "\t\t";
}

View File

@ -25,7 +25,8 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
See the full license in the file "LICENSE" in the top level distribution directory See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/ *************************************************************************************/
/* END LEGAL */ /* END LEGAL */
#include <Grid/Grid.h> #include <Grid.h>
#include <PerfCount.h>
using namespace Grid; using namespace Grid;
@ -40,20 +41,14 @@ int main(int argc,char **argv)
std::ofstream os("zmm.dat"); std::ofstream os("zmm.dat");
os << "#V Ls Lxy Lzt C++ Asm OMP L1 " <<std::endl; os << "#V Ls Lxy Lzt C++ Asm OMP L1 " <<std::endl;
std::cout<<GridLogMessage << "====================================================================="<<std::endl;
std::cout<<GridLogMessage << "= Benchmarking ZMM"<<std::endl;
std::cout<<GridLogMessage << "====================================================================="<<std::endl;
std::cout<<GridLogMessage << "Volume \t\t\t\tC++DW/MFLOPs\tASM-DW/MFLOPs\tdiff"<<std::endl;
std::cout<<GridLogMessage << "====================================================================="<<std::endl;
for(int L=4;L<=32;L+=4){ for(int L=4;L<=32;L+=4){
for(int m=1;m<=2;m++){ for(int m=1;m<=2;m++){
for(int Ls=8;Ls<=16;Ls+=8){ for(int Ls=8;Ls<=16;Ls+=8){
std::vector<int> grid({L,L,m*L,m*L}); std::vector<int> grid({L,L,m*L,m*L});
std::cout << GridLogMessage <<"\t";
for(int i=0;i<4;i++) { for(int i=0;i<4;i++) {
std::cout << grid[i]<<"x"; std::cout << grid[i]<<"x";
} }
std::cout << Ls<<"\t\t"; std::cout << Ls<<std::endl;
bench(os,grid,Ls); bench(os,grid,Ls);
} }
} }
@ -110,6 +105,7 @@ int bench(std::ofstream &os, std::vector<int> &latt4,int Ls)
RealD M5 =1.8; RealD M5 =1.8;
DomainWallFermionR Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5); DomainWallFermionR Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
std::cout<<GridLogMessage << "Calling Dw"<<std::endl;
int ncall=50; int ncall=50;
double t0=usecond(); double t0=usecond();
for(int i=0;i<ncall;i++){ for(int i=0;i<ncall;i++){
@ -121,7 +117,7 @@ int bench(std::ofstream &os, std::vector<int> &latt4,int Ls)
double flops=1344*volume/2; double flops=1344*volume/2;
mfc = flops*ncall/(t1-t0); mfc = flops*ncall/(t1-t0);
std::cout<<mfc<<"\t\t"; std::cout<<GridLogMessage << "Called C++ Dw"<< " mflop/s = "<< mfc<<std::endl;
QCD::WilsonKernelsStatic::AsmOpt=1; QCD::WilsonKernelsStatic::AsmOpt=1;
t0=usecond(); t0=usecond();
@ -130,7 +126,7 @@ int bench(std::ofstream &os, std::vector<int> &latt4,int Ls)
} }
t1=usecond(); t1=usecond();
mfa = flops*ncall/(t1-t0); mfa = flops*ncall/(t1-t0);
std::cout<<mfa<<"\t\t"; std::cout<<GridLogMessage << "Called ASM Dw"<< " mflop/s = "<< mfa<<std::endl;
/* /*
int dag=DaggerNo; int dag=DaggerNo;
t0=usecond(); t0=usecond();
@ -168,7 +164,8 @@ int bench(std::ofstream &os, std::vector<int> &latt4,int Ls)
//resulta = (-0.5) * resulta; //resulta = (-0.5) * resulta;
diff = resulto-resulta; diff = resulto-resulta;
std::cout<<norm2(diff)<<std::endl; std::cout<<GridLogMessage << "diff "<< norm2(diff)<<std::endl;
std::cout<<std::endl;
return 0; return 0;
} }

39
benchmarks/Make.inc Normal file
View File

@ -0,0 +1,39 @@
bin_PROGRAMS = Benchmark_comms Benchmark_dwf Benchmark_dwf_ntpf Benchmark_dwf_sweep Benchmark_memory_asynch Benchmark_memory_bandwidth Benchmark_su3 Benchmark_wilson Benchmark_zmm
Benchmark_comms_SOURCES=Benchmark_comms.cc
Benchmark_comms_LDADD=-lGrid
Benchmark_dwf_SOURCES=Benchmark_dwf.cc
Benchmark_dwf_LDADD=-lGrid
Benchmark_dwf_ntpf_SOURCES=Benchmark_dwf_ntpf.cc
Benchmark_dwf_ntpf_LDADD=-lGrid
Benchmark_dwf_sweep_SOURCES=Benchmark_dwf_sweep.cc
Benchmark_dwf_sweep_LDADD=-lGrid
Benchmark_memory_asynch_SOURCES=Benchmark_memory_asynch.cc
Benchmark_memory_asynch_LDADD=-lGrid
Benchmark_memory_bandwidth_SOURCES=Benchmark_memory_bandwidth.cc
Benchmark_memory_bandwidth_LDADD=-lGrid
Benchmark_su3_SOURCES=Benchmark_su3.cc
Benchmark_su3_LDADD=-lGrid
Benchmark_wilson_SOURCES=Benchmark_wilson.cc
Benchmark_wilson_LDADD=-lGrid
Benchmark_zmm_SOURCES=Benchmark_zmm.cc
Benchmark_zmm_LDADD=-lGrid

View File

@ -1 +1,8 @@
# additional include paths necessary to compile the C++ library
AM_CXXFLAGS = -I$(top_srcdir)/lib
AM_LDFLAGS = -L$(top_builddir)/lib
#
# Test code
#
include Make.inc include Make.inc

View File

@ -1,19 +0,0 @@
#!/usr/bin/env bash
EIGEN_URL='http://bitbucket.org/eigen/eigen/get/3.2.9.tar.bz2'
FFTW_URL=http://www.fftw.org/fftw-3.3.4.tar.gz
echo "-- deploying Eigen source..."
wget ${EIGEN_URL} --no-check-certificate
./scripts/update_eigen.sh `basename ${EIGEN_URL}`
rm `basename ${EIGEN_URL}`
echo "-- copying fftw prototypes..."
wget ${FFTW_URL}
./scripts/update_fftw.sh `basename ${FFTW_URL}`
rm `basename ${FFTW_URL}`
echo '-- generating Make.inc files...'
./scripts/filelist
echo '-- generating configure script...'
autoreconf -fvi

View File

@ -1,362 +1,315 @@
# -*- Autoconf -*-
# Process this file with autoconf to produce a configure script.
#
# Project Grid package
#
# Time-stamp: <2015-07-10 17:46:21 neo>
AC_PREREQ([2.63]) AC_PREREQ([2.63])
AC_INIT([Grid], [0.5.1-dev], [https://github.com/paboyle/Grid], [Grid]) AC_INIT([Grid], [1.0], [paboyle@ph.ed.ac.uk])
AC_CANONICAL_BUILD AC_CANONICAL_SYSTEM
AC_CANONICAL_HOST
AC_CANONICAL_TARGET
AM_INIT_AUTOMAKE(subdir-objects) AM_INIT_AUTOMAKE(subdir-objects)
AC_CONFIG_MACRO_DIR([m4]) AC_CONFIG_MACRO_DIR([m4])
AC_CONFIG_SRCDIR([lib/Grid.h]) AC_CONFIG_SRCDIR([lib/Grid.h])
AC_CONFIG_HEADERS([lib/Config.h]) AC_CONFIG_HEADERS([lib/Config.h])
m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])]) m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])])
AC_MSG_NOTICE([
############### Checks for programs :::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
Configuring $PACKAGE v$VERSION for $host
:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
])
# Checks for programs.
AC_LANG(C++) AC_LANG(C++)
CXXFLAGS="-O3 $CXXFLAGS"
AC_PROG_CXX AC_PROG_CXX
AC_PROG_RANLIB
############ openmp ###############
AC_OPENMP AC_OPENMP
AC_PROG_RANLIB
#AX_CXX_COMPILE_STDCXX_11(noext, mandatory)
AX_EXT
ac_openmp=no # Checks for libraries.
#AX_GCC_VAR_ATTRIBUTE(aligned)
if test "${OPENMP_CXXFLAGS}X" != "X"; then # Checks for header files.
ac_openmp=yes
AM_CXXFLAGS="$OPENMP_CXXFLAGS $AM_CXXFLAGS"
AM_LDFLAGS="$OPENMP_CXXFLAGS $AM_LDFLAGS"
fi
############### Checks for header files
AC_CHECK_HEADERS(stdint.h) AC_CHECK_HEADERS(stdint.h)
AC_CHECK_HEADERS(mm_malloc.h) AC_CHECK_HEADERS(mm_malloc.h)
AC_CHECK_HEADERS(malloc/malloc.h) AC_CHECK_HEADERS(malloc/malloc.h)
AC_CHECK_HEADERS(malloc.h) AC_CHECK_HEADERS(malloc.h)
AC_CHECK_HEADERS(endian.h) AC_CHECK_HEADERS(endian.h)
AC_CHECK_HEADERS(execinfo.h) AC_CHECK_HEADERS(execinfo.h)
AC_CHECK_HEADERS(gmp.h)
AC_CHECK_DECLS([ntohll],[], [], [[#include <arpa/inet.h>]]) AC_CHECK_DECLS([ntohll],[], [], [[#include <arpa/inet.h>]])
AC_CHECK_DECLS([be64toh],[], [], [[#include <arpa/inet.h>]]) AC_CHECK_DECLS([be64toh],[], [], [[#include <arpa/inet.h>]])
############### Checks for typedefs, structures, and compiler characteristics # Checks for typedefs, structures, and compiler characteristics.
AC_TYPE_SIZE_T AC_TYPE_SIZE_T
AC_TYPE_UINT32_T AC_TYPE_UINT32_T
AC_TYPE_UINT64_T AC_TYPE_UINT64_T
############### GMP and MPFR ################# # Checks for library functions.
AC_ARG_WITH([gmp], echo
[AS_HELP_STRING([--with-gmp=prefix], echo Checking libraries
[try this for a non-standard install prefix of the GMP library])], echo :::::::::::::::::::::::::::::::::::::::::::
[AM_CXXFLAGS="-I$with_gmp/include $AM_CXXFLAGS"]
[AM_LDFLAGS="-L$with_gmp/lib $AM_LDFLAGS"])
AC_ARG_WITH([mpfr],
[AS_HELP_STRING([--with-mpfr=prefix],
[try this for a non-standard install prefix of the MPFR library])],
[AM_CXXFLAGS="-I$with_mpfr/include $AM_CXXFLAGS"]
[AM_LDFLAGS="-L$with_mpfr/lib $AM_LDFLAGS"])
################## lapack ####################
AC_ARG_ENABLE([lapack],
[AC_HELP_STRING([--enable-lapack=yes|no|prefix], [enable LAPACK])],
[ac_LAPACK=${enable_lapack}],[ac_LAPACK=no])
case ${ac_LAPACK} in
no)
;;
yes)
AC_DEFINE([USE_LAPACK],[1],[use LAPACK]);;
*)
AM_CXXFLAGS="-I$ac_LAPACK/include $AM_CXXFLAGS"
AM_LDFLAGS="-L$ac_LAPACK/lib $AM_LDFLAGS"
AC_DEFINE([USE_LAPACK],[1],[use LAPACK])
esac
################## first-touch ####################
AC_ARG_ENABLE([numa],
[AC_HELP_STRING([--enable-numa=yes|no|prefix], [enable first touch numa opt])],
[ac_NUMA=${enable_NUMA}],[ac_NUMA=no])
case ${ac_NUMA} in
no)
;;
yes)
AC_DEFINE([GRID_NUMA],[1],[First touch numa locality]);;
*)
AC_DEFINE([GRID_NUMA],[1],[First touch numa locality]);;
esac
################## FFTW3 ####################
AC_ARG_WITH([fftw],
[AS_HELP_STRING([--with-fftw=prefix],
[try this for a non-standard install prefix of the FFTW3 library])],
[AM_CXXFLAGS="-I$with_fftw/include $AM_CXXFLAGS"]
[AM_LDFLAGS="-L$with_fftw/lib $AM_LDFLAGS"])
################ Get compiler informations
AC_LANG([C++])
AX_CXX_COMPILE_STDCXX_11([noext],[mandatory])
AX_COMPILER_VENDOR
AC_DEFINE_UNQUOTED([CXX_COMP_VENDOR],["$ax_cv_cxx_compiler_vendor"],
[vendor of C++ compiler that will compile the code])
AX_GXX_VERSION
AC_DEFINE_UNQUOTED([GXX_VERSION],["$GXX_VERSION"],
[version of g++ that will compile the code])
############### Checks for library functions
CXXFLAGS_CPY=$CXXFLAGS
LDFLAGS_CPY=$LDFLAGS
CXXFLAGS="$AM_CXXFLAGS $CXXFLAGS"
LDFLAGS="$AM_LDFLAGS $LDFLAGS"
AC_CHECK_FUNCS([gettimeofday]) AC_CHECK_FUNCS([gettimeofday])
AC_CHECK_LIB([gmp],[__gmpf_init],
[AC_CHECK_LIB([mpfr],[mpfr_init],
[AC_DEFINE([HAVE_LIBMPFR], [1], [Define to 1 if you have the `MPFR' library (-lmpfr).])]
[have_mpfr=true]
[LIBS="$LIBS -lmpfr"],
[AC_MSG_ERROR([MPFR library not found])])]
[AC_DEFINE([HAVE_LIBGMP], [1], [Define to 1 if you have the `GMP' library (-lgmp).])]
[have_gmp=true]
[LIBS="$LIBS -lgmp"],
[AC_MSG_WARN([**** GMP library not found, Grid can still compile but RHMC will not work ****])])
if test "${ac_LAPACK}x" != "nox"; then #AC_CHECK_LIB([gmp],[__gmpf_init],,
AC_CHECK_LIB([lapack],[LAPACKE_sbdsdc],[], # [AC_MSG_ERROR(GNU Multiple Precision GMP library was not found in your system.
[AC_MSG_ERROR("LAPACK enabled but library not found")]) #Please install or provide the correct path to your installation
fi #Info at: http://www.gmplib.org)])
AC_CHECK_LIB([fftw3],[fftw_execute],
[AC_DEFINE([HAVE_FFTW],[1],[Define to 1 if you have the `FFTW' library (-lfftw3).])]
[have_fftw=true]
[LIBS="$LIBS -lfftw3 -lfftw3f"],
[AC_MSG_WARN([**** FFTW library not found, Grid can still compile but FFT-based routines will not work ****])])
CXXFLAGS=$CXXFLAGS_CPY
LDFLAGS=$LDFLAGS_CPY
############### SIMD instruction selection #AC_CHECK_LIB([mpfr],[mpfr_init],,
AC_ARG_ENABLE([simd],[AC_HELP_STRING([--enable-simd=SSE4|AVX|AVXFMA4|AVXFMA|AVX2|AVX512|AVX512MIC|IMCI|KNL|KNC],\ # [AC_MSG_ERROR(GNU Multiple Precision MPFR library was not found in your system.
#Please install or provide the correct path to your installation
#Info at: http://www.mpfr.org/)])
#
# SIMD instructions selection
#
AC_ARG_ENABLE([simd],[AC_HELP_STRING([--enable-simd=SSE4|AVX|AVXFMA4|AVX2|AVX512|IMCI],\
[Select instructions to be SSE4.0, AVX 1.0, AVX 2.0+FMA, AVX 512, IMCI])],\ [Select instructions to be SSE4.0, AVX 1.0, AVX 2.0+FMA, AVX 512, IMCI])],\
[ac_SIMD=${enable_simd}],[ac_SIMD=GEN]) [ac_SIMD=${enable_simd}],[ac_SIMD=DEBUG])
case ${ax_cv_cxx_compiler_vendor} in supported=no
clang|gnu)
case ${ac_SIMD} in ac_ZMM=no;
SSE4)
AC_DEFINE([SSE4],[1],[SSE4 intrinsics])
SIMD_FLAGS='-msse4.2';;
AVX)
AC_DEFINE([AVX1],[1],[AVX intrinsics])
SIMD_FLAGS='-mavx';;
AVXFMA4)
AC_DEFINE([AVXFMA4],[1],[AVX intrinsics with FMA4])
SIMD_FLAGS='-mavx -mfma4';;
AVXFMA)
AC_DEFINE([AVXFMA],[1],[AVX intrinsics with FMA3])
SIMD_FLAGS='-mavx -mfma';;
AVX2)
AC_DEFINE([AVX2],[1],[AVX2 intrinsics])
SIMD_FLAGS='-mavx2 -mfma';;
AVX512|AVX512MIC|KNL)
AC_DEFINE([AVX512],[1],[AVX512 intrinsics])
SIMD_FLAGS='-mavx512f -mavx512pf -mavx512er -mavx512cd';;
IMCI|KNC)
AC_DEFINE([IMCI],[1],[IMCI intrinsics for Knights Corner])
SIMD_FLAGS='';;
GEN)
AC_DEFINE([GENERIC_VEC],[1],[generic vector code])
SIMD_FLAGS='';;
QPX|BGQ)
AC_DEFINE([QPX],[1],[QPX intrinsics for BG/Q])
SIMD_FLAGS='';;
*)
AC_MSG_ERROR(["SIMD option ${ac_SIMD} not supported by the GCC/Clang compiler"]);;
esac;;
intel)
case ${ac_SIMD} in
SSE4)
AC_DEFINE([SSE4],[1],[SSE4 intrinsics])
SIMD_FLAGS='-msse4.2 -xsse4.2';;
AVX)
AC_DEFINE([AVX1],[1],[AVX intrinsics])
SIMD_FLAGS='-mavx -xavx';;
AVXFMA4)
AC_DEFINE([AVXFMA4],[1],[AVX intrinsics with FMA4])
SIMD_FLAGS='-mavx -mfma';;
AVXFMA)
AC_DEFINE([AVXFMA],[1],[AVX intrinsics with FMA4])
SIMD_FLAGS='-mavx -mfma';;
AVX2)
AC_DEFINE([AVX2],[1],[AVX2 intrinsics])
SIMD_FLAGS='-march=core-avx2 -xcore-avx2';;
AVX512)
AC_DEFINE([AVX512],[1],[AVX512 intrinsics])
SIMD_FLAGS='-xcore-avx512';;
AVX512MIC|KNL)
AC_DEFINE([AVX512],[1],[AVX512 intrinsics for Knights Landing])
SIMD_FLAGS='-xmic-avx512';;
IMCI|KNC)
AC_DEFINE([IMCI],[1],[IMCI Intrinsics for Knights Corner])
SIMD_FLAGS='';;
GEN)
AC_DEFINE([GENERIC_VEC],[1],[generic vector code])
SIMD_FLAGS='';;
*)
AC_MSG_ERROR(["SIMD option ${ac_SIMD} not supported by the Intel compiler"]);;
esac;;
*)
AC_MSG_WARN([Compiler unknown, using generic vector code])
AC_DEFINE([GENERIC_VEC],[1],[generic vector code]);;
esac
AM_CXXFLAGS="$SIMD_FLAGS $AM_CXXFLAGS"
AM_CFLAGS="$SIMD_FLAGS $AM_CFLAGS"
case ${ac_SIMD} in case ${ac_SIMD} in
AVX512|AVX512MIC|KNL) SSE4)
AC_DEFINE([TEST_ZMM],[1],[compile ZMM test]);; echo Configuring for SSE4
*) AC_DEFINE([SSE4],[1],[SSE4 Intrinsics] )
;; if test x"$ax_cv_support_ssse3_ext" = x"yes"; then dnl minimal support for SSE4
supported=yes
else
AC_MSG_WARN([Your processor does not support SSE4 instructions])
fi
;;
AVX)
echo Configuring for AVX
AC_DEFINE([AVX1],[1],[AVX Intrinsics] )
if test x"$ax_cv_support_avx_ext" = x"yes"; then dnl minimal support for AVX
supported=yes
else
AC_MSG_WARN([Your processor does not support AVX instructions])
fi
;;
AVXFMA4)
echo Configuring for AVX
AC_DEFINE([AVXFMA4],[1],[AVX Intrinsics with FMA4] )
if test x"$ax_cv_support_avx_ext" = x"yes"; then dnl minimal support for AVX
supported=yes
else
AC_MSG_WARN([Your processor does not support AVX instructions])
fi
;;
AVX2)
echo Configuring for AVX2
AC_DEFINE([AVX2],[1],[AVX2 Intrinsics] )
if test x"$ax_cv_support_avx2_ext" = x"yes"; then dnl minimal support for AVX2
supported=yes
else
AC_MSG_WARN([Your processor does not support AVX2 instructions])
fi
;;
AVX512)
echo Configuring for AVX512
AC_DEFINE([AVX512],[1],[AVX512 Intrinsics for Knights Landing] )
supported="cross compilation"
ac_ZMM=yes;
;;
IMCI)
echo Configuring for IMCI
AC_DEFINE([IMCI],[1],[IMCI Intrinsics for Knights Corner] )
supported="cross compilation"
ac_ZMM=no;
;;
NEONv8)
echo Configuring for experimental ARMv8a support
AC_DEFINE([NEONv8],[1],[NEON ARMv8 Experimental support ] )
supported="cross compilation"
;;
DEBUG)
echo Configuring without SIMD support - only for compiler DEBUGGING!
AC_DEFINE([EMPTY_SIMD],[1],[EMPTY_SIMD only for DEBUGGING] )
;;
*)
AC_MSG_ERROR([${ac_SIMD} flag unsupported as --enable-simd option\nRun ./configure --help for the list of options]);
;;
esac esac
############### precision selection case ${ac_ZMM} in
yes)
echo Enabling ZMM source code
;;
no)
echo Disabling ZMM source code
;;
esac
AM_CONDITIONAL(BUILD_ZMM,[ test "X${ac_ZMM}X" == "XyesX" ])
AC_ARG_ENABLE([precision],[AC_HELP_STRING([--enable-precision=single|double],[Select default word size of Real])],[ac_PRECISION=${enable_precision}],[ac_PRECISION=double]) AC_ARG_ENABLE([precision],[AC_HELP_STRING([--enable-precision=single|double],[Select default word size of Real])],[ac_PRECISION=${enable_precision}],[ac_PRECISION=double])
case ${ac_PRECISION} in case ${ac_PRECISION} in
single) single)
echo default precision is single
AC_DEFINE([GRID_DEFAULT_PRECISION_SINGLE],[1],[GRID_DEFAULT_PRECISION is SINGLE] ) AC_DEFINE([GRID_DEFAULT_PRECISION_SINGLE],[1],[GRID_DEFAULT_PRECISION is SINGLE] )
;; ;;
double) double)
echo default precision is double
AC_DEFINE([GRID_DEFAULT_PRECISION_DOUBLE],[1],[GRID_DEFAULT_PRECISION is DOUBLE] ) AC_DEFINE([GRID_DEFAULT_PRECISION_DOUBLE],[1],[GRID_DEFAULT_PRECISION is DOUBLE] )
;; ;;
esac esac
############### communication type selection #
AC_ARG_ENABLE([comms],[AC_HELP_STRING([--enable-comms=none|mpi|mpi-auto|shmem],[Select communications])],[ac_COMMS=${enable_comms}],[ac_COMMS=none]) # Comms selection
#
AC_ARG_ENABLE([comms],[AC_HELP_STRING([--enable-comms=none|mpi],[Select communications])],[ac_COMMS=${enable_comms}],[ac_COMMS=none])
case ${ac_COMMS} in case ${ac_COMMS} in
none) none)
echo Configuring for NO communications
AC_DEFINE([GRID_COMMS_NONE],[1],[GRID_COMMS_NONE] ) AC_DEFINE([GRID_COMMS_NONE],[1],[GRID_COMMS_NONE] )
;; ;;
mpi-auto)
AC_DEFINE([GRID_COMMS_MPI],[1],[GRID_COMMS_MPI] )
LX_FIND_MPI
if test "x$have_CXX_mpi" = 'xno'; then AC_MSG_ERROR(["MPI not found"]); fi
AM_CXXFLAGS="$MPI_CXXFLAGS $AM_CXXFLAGS"
AM_CFLAGS="$MPI_CFLAGS $AM_CFLAGS"
AM_LDFLAGS="`echo $MPI_CXXLDFLAGS | sed -E 's/-l@<:@^ @:>@+//g'` $AM_LDFLAGS"
LIBS="`echo $MPI_CXXLDFLAGS | sed -E 's/-L@<:@^ @:>@+//g'` $LIBS"
;;
mpi) mpi)
echo Configuring for MPI communications
AC_DEFINE([GRID_COMMS_MPI],[1],[GRID_COMMS_MPI] ) AC_DEFINE([GRID_COMMS_MPI],[1],[GRID_COMMS_MPI] )
;; ;;
shmem) shmem)
echo Configuring for SHMEM communications
AC_DEFINE([GRID_COMMS_SHMEM],[1],[GRID_COMMS_SHMEM] ) AC_DEFINE([GRID_COMMS_SHMEM],[1],[GRID_COMMS_SHMEM] )
;; ;;
*) *)
AC_MSG_ERROR([${ac_COMMS} unsupported --enable-comms option]); AC_MSG_ERROR([${ac_COMMS} unsupported --enable-comms option]);
;; ;;
esac esac
AM_CONDITIONAL(BUILD_COMMS_SHMEM,[ test "X${ac_COMMS}X" == "XshmemX" ]) AM_CONDITIONAL(BUILD_COMMS_SHMEM,[ test "X${ac_COMMS}X" == "XshmemX" ])
AM_CONDITIONAL(BUILD_COMMS_MPI,[ test "X${ac_COMMS}X" == "XmpiX" || test "X${ac_COMMS}X" == "Xmpi-autoX" ]) AM_CONDITIONAL(BUILD_COMMS_MPI,[ test "X${ac_COMMS}X" == "XmpiX" ])
AM_CONDITIONAL(BUILD_COMMS_NONE,[ test "X${ac_COMMS}X" == "XnoneX" ]) AM_CONDITIONAL(BUILD_COMMS_NONE,[ test "X${ac_COMMS}X" == "XnoneX" ])
############### RNG selection #
# RNG selection
#
AC_ARG_ENABLE([rng],[AC_HELP_STRING([--enable-rng=ranlux48|mt19937],\ AC_ARG_ENABLE([rng],[AC_HELP_STRING([--enable-rng=ranlux48|mt19937],\
[Select Random Number Generator to be used])],\ [Select Random Number Generator to be used])],\
[ac_RNG=${enable_rng}],[ac_RNG=ranlux48]) [ac_RNG=${enable_rng}],[ac_RNG=ranlux48])
case ${ac_RNG} in case ${ac_RNG} in
ranlux48) ranlux48)
AC_DEFINE([RNG_RANLUX],[1],[RNG_RANLUX] ) AC_DEFINE([RNG_RANLUX],[1],[RNG_RANLUX] )
;; ;;
mt19937) mt19937)
AC_DEFINE([RNG_MT19937],[1],[RNG_MT19937] ) AC_DEFINE([RNG_MT19937],[1],[RNG_MT19937] )
;; ;;
*) *)
AC_MSG_ERROR([${ac_RNG} unsupported --enable-rng option]); AC_MSG_ERROR([${ac_RNG} unsupported --enable-rng option]);
;; ;;
esac esac
############### timer option #
AC_ARG_ENABLE([timers],[AC_HELP_STRING([--enable-timers],\ # SDE timing mode
#
AC_ARG_ENABLE([timers],[AC_HELP_STRING([--enable-timers=yes|no],\
[Enable system dependent high res timers])],\ [Enable system dependent high res timers])],\
[ac_TIMERS=${enable_timers}],[ac_TIMERS=yes]) [ac_TIMERS=${enable_timers}],[ac_TIMERS=yes])
case ${ac_TIMERS} in case ${ac_TIMERS} in
yes) yes)
AC_DEFINE([TIMERS_ON],[1],[TIMERS_ON] ) AC_DEFINE([TIMERS_ON],[1],[TIMERS_ON] )
;; ;;
no) no)
AC_DEFINE([TIMERS_OFF],[1],[TIMERS_OFF] ) AC_DEFINE([TIMERS_OFF],[1],[TIMERS_OFF] )
;; ;;
*) *)
AC_MSG_ERROR([${ac_TIMERS} unsupported --enable-timers option]); AC_MSG_ERROR([${ac_TIMERS} unsupported --enable-timers option]);
;; ;;
esac esac
############### Chroma regression test #
# Chroma regression tests
#
AC_ARG_ENABLE([chroma],[AC_HELP_STRING([--enable-chroma],[Expect chroma compiled under c++11 ])],ac_CHROMA=yes,ac_CHROMA=no) AC_ARG_ENABLE([chroma],[AC_HELP_STRING([--enable-chroma],[Expect chroma compiled under c++11 ])],ac_CHROMA=yes,ac_CHROMA=no)
case ${ac_CHROMA} in case ${ac_CHROMA} in
yes|no) yes)
echo Enabling tests regressing to Chroma
;;
no)
echo Disabling tests regressing to Chroma
;; ;;
*) *)
AC_MSG_ERROR([${ac_CHROMA} unsupported --enable-chroma option]); AC_MSG_ERROR([${ac_CHROMA} unsupported --enable-chroma option]);
;; ;;
esac esac
AM_CONDITIONAL(BUILD_CHROMA_REGRESSION,[ test "X${ac_CHROMA}X" == "XyesX" ]) AM_CONDITIONAL(BUILD_CHROMA_REGRESSION,[ test "X${ac_CHROMA}X" == "XyesX" ])
############### Doxygen #
AC_PROG_DOXYGEN # Lapack
#
AC_ARG_ENABLE([lapack],[AC_HELP_STRING([--enable-lapack],[Enable lapack yes/no ])],[ac_LAPACK=${enable_lapack}],[ac_LAPACK=no])
if test -n "$DOXYGEN" case ${ac_LAPACK} in
then yes)
AC_CONFIG_FILES([docs/doxy.cfg]) echo Enabling lapack
fi ;;
no)
echo Disabling lapack
;;
*)
echo Enabling lapack at ${ac_LAPACK}
;;
esac
############### Ouput AM_CONDITIONAL(USE_LAPACK,[ test "X${ac_LAPACK}X" != "XnoX" ])
cwd=`pwd -P`; cd ${srcdir}; abs_srcdir=`pwd -P`; cd ${cwd} AM_CONDITIONAL(USE_LAPACK_LIB,[ test "X${ac_LAPACK}X" != "XyesX" ])
AM_CXXFLAGS="-I${abs_srcdir}/include $AM_CXXFLAGS"
AM_CFLAGS="-I${abs_srcdir}/include $AM_CFLAGS" ###################################################################
AM_LDFLAGS="-L${cwd}/lib $AM_LDFLAGS" # Checks for doxygen support
AC_SUBST([AM_CFLAGS]) # if present enables the "make doxyfile" command
AC_SUBST([AM_CXXFLAGS]) #echo
AC_SUBST([AM_LDFLAGS]) #echo Checking doxygen support
#echo :::::::::::::::::::::::::::::::::::::::::::
#AC_PROG_DOXYGEN
#if test -n "$DOXYGEN"
#then
#AC_CONFIG_FILES([docs/doxy.cfg])
#fi
echo
echo Creating configuration files
echo :::::::::::::::::::::::::::::::::::::::::::
AC_CONFIG_FILES(Makefile) AC_CONFIG_FILES(Makefile)
AC_CONFIG_FILES(lib/Makefile) AC_CONFIG_FILES(lib/Makefile)
AC_CONFIG_FILES(tests/Makefile) AC_CONFIG_FILES(tests/Makefile)
AC_CONFIG_FILES(tests/IO/Makefile)
AC_CONFIG_FILES(tests/core/Makefile)
AC_CONFIG_FILES(tests/debug/Makefile)
AC_CONFIG_FILES(tests/forces/Makefile)
AC_CONFIG_FILES(tests/hmc/Makefile)
AC_CONFIG_FILES(tests/solver/Makefile)
AC_CONFIG_FILES(tests/qdpxx/Makefile) AC_CONFIG_FILES(tests/qdpxx/Makefile)
AC_CONFIG_FILES(benchmarks/Makefile) AC_CONFIG_FILES(benchmarks/Makefile)
AC_OUTPUT AC_OUTPUT
echo " echo "
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Summary of configuration for $PACKAGE v$VERSION Summary of configuration for $PACKAGE v$VERSION
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
----- PLATFORM ---------------------------------------- The following features are enabled:
- architecture (build) : $build_cpu - architecture (build) : $build_cpu
- os (build) : $build_os - os (build) : $build_os
- architecture (target) : $target_cpu - architecture (target) : $target_cpu
- os (target) : $target_os - os (target) : $target_os
- compiler vendor : ${ax_cv_cxx_compiler_vendor}
- compiler version : ${ax_cv_gxx_version}
----- BUILD OPTIONS -----------------------------------
- SIMD : ${ac_SIMD}
- Threading : ${ac_openmp}
- Communications type : ${ac_COMMS}
- Default precision : ${ac_PRECISION}
- RNG choice : ${ac_RNG}
- GMP : `if test "x$have_gmp" = xtrue; then echo yes; else echo no; fi`
- LAPACK : ${ac_LAPACK}
- FFTW : `if test "x$have_fftw" = xtrue; then echo yes; else echo no; fi`
- build DOXYGEN documentation : `if test "x$enable_doc" = xyes; then echo yes; else echo no; fi` - build DOXYGEN documentation : `if test "x$enable_doc" = xyes; then echo yes; else echo no; fi`
- graphs and diagrams : `if test "x$enable_dot" = xyes; then echo yes; else echo no; fi` - graphs and diagrams : `if test "x$enable_dot" = xyes; then echo yes; else echo no; fi`
----- BUILD FLAGS ------------------------------------- - Supported SIMD flags : $SIMD_FLAGS
- CXXFLAGS: ----------------------------------------------------------
`echo ${AM_CXXFLAGS} ${CXXFLAGS} | tr ' ' '\n' | sed 's/^-/ -/g'` - enabled simd support : ${ac_SIMD} (config macro says supported: $supported )
- LDFLAGS: - communications type : ${ac_COMMS}
`echo ${AM_LDFLAGS} ${LDFLAGS} | tr ' ' '\n' | sed 's/^-/ -/g'` - default precision : ${ac_PRECISION}
- LIBS: - RNG choice : ${ac_RNG}
`echo ${LIBS} | tr ' ' '\n' | sed 's/^-/ -/g'` - LAPACK : ${ac_LAPACK}
-------------------------------------------------------
" "

View File

@ -1 +0,0 @@
../lib

View File

@ -29,28 +29,27 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
#ifndef GRID_ALGORITHMS_H #ifndef GRID_ALGORITHMS_H
#define GRID_ALGORITHMS_H #define GRID_ALGORITHMS_H
#include <Grid/algorithms/SparseMatrix.h> #include <algorithms/SparseMatrix.h>
#include <Grid/algorithms/LinearOperator.h> #include <algorithms/LinearOperator.h>
#include <Grid/algorithms/Preconditioner.h> #include <algorithms/Preconditioner.h>
#include <Grid/algorithms/approx/Zolotarev.h> #include <algorithms/approx/Zolotarev.h>
#include <Grid/algorithms/approx/Chebyshev.h> #include <algorithms/approx/Chebyshev.h>
#include <Grid/algorithms/approx/Remez.h> #include <algorithms/approx/Remez.h>
#include <Grid/algorithms/approx/MultiShiftFunction.h> #include <algorithms/approx/MultiShiftFunction.h>
#include <Grid/algorithms/iterative/ConjugateGradient.h> #include <algorithms/iterative/ConjugateGradient.h>
#include <Grid/algorithms/iterative/ConjugateResidual.h> #include <algorithms/iterative/ConjugateResidual.h>
#include <Grid/algorithms/iterative/NormalEquations.h> #include <algorithms/iterative/NormalEquations.h>
#include <Grid/algorithms/iterative/SchurRedBlack.h> #include <algorithms/iterative/SchurRedBlack.h>
#include <Grid/algorithms/iterative/ConjugateGradientMultiShift.h> #include <algorithms/iterative/ConjugateGradientMultiShift.h>
#include <Grid/algorithms/iterative/ConjugateGradientMixedPrec.h>
// Lanczos support // Lanczos support
#include <Grid/algorithms/iterative/MatrixUtils.h> #include <algorithms/iterative/MatrixUtils.h>
#include <Grid/algorithms/iterative/ImplicitlyRestartedLanczos.h> #include <algorithms/iterative/ImplicitlyRestartedLanczos.h>
#include <Grid/algorithms/CoarsenedMatrix.h> #include <algorithms/CoarsenedMatrix.h>
// Eigen/lanczos // Eigen/lanczos
// EigCg // EigCg

View File

@ -113,8 +113,9 @@ public:
#endif #endif
_Tp tmp; _Tp tmp;
#ifdef GRID_NUMA #undef FIRST_TOUCH_OPTIMISE
#pragma omp parallel for schedule(static) #ifdef FIRST_TOUCH_OPTIMISE
#pragma omp parallel for
for(int i=0;i<__n;i++){ for(int i=0;i<__n;i++){
ptr[i]=tmp; ptr[i]=tmp;
} }

View File

@ -28,8 +28,8 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
#ifndef GRID_CARTESIAN_H #ifndef GRID_CARTESIAN_H
#define GRID_CARTESIAN_H #define GRID_CARTESIAN_H
#include <Grid/cartesian/Cartesian_base.h> #include <cartesian/Cartesian_base.h>
#include <Grid/cartesian/Cartesian_full.h> #include <cartesian/Cartesian_full.h>
#include <Grid/cartesian/Cartesian_red_black.h> #include <cartesian/Cartesian_red_black.h>
#endif #endif

View File

@ -28,6 +28,6 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
#ifndef GRID_COMMUNICATOR_H #ifndef GRID_COMMUNICATOR_H
#define GRID_COMMUNICATOR_H #define GRID_COMMUNICATOR_H
#include <Grid/communicator/Communicator_base.h> #include <communicator/Communicator_base.h>
#endif #endif

View File

@ -28,17 +28,17 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
#ifndef _GRID_CSHIFT_H_ #ifndef _GRID_CSHIFT_H_
#define _GRID_CSHIFT_H_ #define _GRID_CSHIFT_H_
#include <Grid/cshift/Cshift_common.h> #include <cshift/Cshift_common.h>
#ifdef GRID_COMMS_NONE #ifdef GRID_COMMS_NONE
#include <Grid/cshift/Cshift_none.h> #include <cshift/Cshift_none.h>
#endif #endif
#ifdef GRID_COMMS_MPI #ifdef GRID_COMMS_MPI
#include <Grid/cshift/Cshift_mpi.h> #include <cshift/Cshift_mpi.h>
#endif #endif
#ifdef GRID_COMMS_SHMEM #ifdef GRID_COMMS_SHMEM
#include <Grid/cshift/Cshift_mpi.h> // uses same implementation of communicator #include <cshift/Cshift_mpi.h> // uses same implementation of communicator
#endif #endif
#endif #endif

276
lib/FFT.h
View File

@ -1,276 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/Cshift.h
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef _GRID_FFT_H_
#define _GRID_FFT_H_
#ifdef HAVE_FFTW
#include <fftw3.h>
#endif
namespace Grid {
template<class scalar> struct FFTW { };
#ifdef HAVE_FFTW
template<> struct FFTW<ComplexD> {
public:
typedef fftw_complex FFTW_scalar;
typedef fftw_plan FFTW_plan;
static FFTW_plan fftw_plan_many_dft(int rank, const int *n,int howmany,
FFTW_scalar *in, const int *inembed,
int istride, int idist,
FFTW_scalar *out, const int *onembed,
int ostride, int odist,
int sign, unsigned flags) {
return ::fftw_plan_many_dft(rank,n,howmany,in,inembed,istride,idist,out,onembed,ostride,odist,sign,flags);
}
static void fftw_flops(const FFTW_plan p,double *add, double *mul, double *fmas){
::fftw_flops(p,add,mul,fmas);
}
inline static void fftw_execute_dft(const FFTW_plan p,FFTW_scalar *in,FFTW_scalar *out) {
::fftw_execute_dft(p,in,out);
}
inline static void fftw_destroy_plan(const FFTW_plan p) {
::fftw_destroy_plan(p);
}
};
template<> struct FFTW<ComplexF> {
public:
typedef fftwf_complex FFTW_scalar;
typedef fftwf_plan FFTW_plan;
static FFTW_plan fftw_plan_many_dft(int rank, const int *n,int howmany,
FFTW_scalar *in, const int *inembed,
int istride, int idist,
FFTW_scalar *out, const int *onembed,
int ostride, int odist,
int sign, unsigned flags) {
return ::fftwf_plan_many_dft(rank,n,howmany,in,inembed,istride,idist,out,onembed,ostride,odist,sign,flags);
}
static void fftw_flops(const FFTW_plan p,double *add, double *mul, double *fmas){
::fftwf_flops(p,add,mul,fmas);
}
inline static void fftw_execute_dft(const FFTW_plan p,FFTW_scalar *in,FFTW_scalar *out) {
::fftwf_execute_dft(p,in,out);
}
inline static void fftw_destroy_plan(const FFTW_plan p) {
::fftwf_destroy_plan(p);
}
};
#endif
#ifndef FFTW_FORWARD
#define FFTW_FORWARD (-1)
#define FFTW_BACKWARD (+1)
#endif
class FFT {
private:
GridCartesian *vgrid;
GridCartesian *sgrid;
int Nd;
double flops;
double flops_call;
uint64_t usec;
std::vector<int> dimensions;
std::vector<int> processors;
std::vector<int> processor_coor;
public:
static const int forward=FFTW_FORWARD;
static const int backward=FFTW_BACKWARD;
double Flops(void) {return flops;}
double MFlops(void) {return flops/usec;}
FFT ( GridCartesian * grid ) :
vgrid(grid),
Nd(grid->_ndimension),
dimensions(grid->_fdimensions),
processors(grid->_processors),
processor_coor(grid->_processor_coor)
{
flops=0;
usec =0;
std::vector<int> layout(Nd,1);
sgrid = new GridCartesian(dimensions,layout,processors);
};
~FFT ( void) {
delete sgrid;
}
template<class vobj>
void FFT_dim(Lattice<vobj> &result,const Lattice<vobj> &source,int dim, int inverse){
conformable(result._grid,vgrid);
conformable(source._grid,vgrid);
int L = vgrid->_ldimensions[dim];
int G = vgrid->_fdimensions[dim];
std::vector<int> layout(Nd,1);
std::vector<int> pencil_gd(vgrid->_fdimensions);
pencil_gd[dim] = G*processors[dim];
// Pencil global vol LxLxGxLxL per node
GridCartesian pencil_g(pencil_gd,layout,processors);
// Construct pencils
typedef typename vobj::scalar_object sobj;
typedef typename sobj::scalar_type scalar;
Lattice<vobj> ssource(vgrid); ssource =source;
Lattice<sobj> pgsource(&pencil_g);
Lattice<sobj> pgresult(&pencil_g); pgresult=zero;
#ifndef HAVE_FFTW
assert(0);
#else
typedef typename FFTW<scalar>::FFTW_scalar FFTW_scalar;
typedef typename FFTW<scalar>::FFTW_plan FFTW_plan;
{
int Ncomp = sizeof(sobj)/sizeof(scalar);
int Nlow = 1;
for(int d=0;d<dim;d++){
Nlow*=vgrid->_ldimensions[d];
}
int rank = 1; /* 1d transforms */
int n[] = {G}; /* 1d transforms of length G */
int howmany = Ncomp;
int odist,idist,istride,ostride;
idist = odist = 1; /* Distance between consecutive FT's */
istride = ostride = Ncomp*Nlow; /* distance between two elements in the same FT */
int *inembed = n, *onembed = n;
int sign = FFTW_FORWARD;
if (inverse) sign = FFTW_BACKWARD;
FFTW_plan p;
{
FFTW_scalar *in = (FFTW_scalar *)&pgsource._odata[0];
FFTW_scalar *out= (FFTW_scalar *)&pgresult._odata[0];
p = FFTW<scalar>::fftw_plan_many_dft(rank,n,howmany,
in,inembed,
istride,idist,
out,onembed,
ostride, odist,
sign,FFTW_ESTIMATE);
}
double add,mul,fma;
FFTW<scalar>::fftw_flops(p,&add,&mul,&fma);
flops_call = add+mul+2.0*fma;
GridStopWatch timer;
// Barrel shift and collect global pencil
for(int p=0;p<processors[dim];p++) {
for(int idx=0;idx<sgrid->lSites();idx++) {
std::vector<int> lcoor(Nd);
sgrid->LocalIndexToLocalCoor(idx,lcoor);
sobj s;
peekLocalSite(s,ssource,lcoor);
lcoor[dim]+=p*L;
pokeLocalSite(s,pgsource,lcoor);
}
ssource = Cshift(ssource,dim,L);
}
// Loop over orthog coords
int NN=pencil_g.lSites();
GridStopWatch Timer;
Timer.Start();
PARALLEL_FOR_LOOP
for(int idx=0;idx<NN;idx++) {
std::vector<int> lcoor(Nd);
pencil_g.LocalIndexToLocalCoor(idx,lcoor);
if ( lcoor[dim] == 0 ) { // restricts loop to plane at lcoor[dim]==0
FFTW_scalar *in = (FFTW_scalar *)&pgsource._odata[idx];
FFTW_scalar *out= (FFTW_scalar *)&pgresult._odata[idx];
FFTW<scalar>::fftw_execute_dft(p,in,out);
}
}
Timer.Stop();
usec += Timer.useconds();
flops+= flops_call*NN;
int pc = processor_coor[dim];
for(int idx=0;idx<sgrid->lSites();idx++) {
std::vector<int> lcoor(Nd);
sgrid->LocalIndexToLocalCoor(idx,lcoor);
std::vector<int> gcoor = lcoor;
// extract the result
sobj s;
gcoor[dim] = lcoor[dim]+L*pc;
peekLocalSite(s,pgresult,gcoor);
pokeLocalSite(s,result,lcoor);
}
FFTW<scalar>::fftw_destroy_plan(p);
}
#endif
}
};
}
#endif

View File

@ -59,31 +59,29 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
/////////////////// ///////////////////
// Grid headers // Grid headers
/////////////////// ///////////////////
#include <Grid/serialisation/Serialisation.h> #include <serialisation/Serialisation.h>
#include "Config.h" #include <Config.h>
#include <Grid/Timer.h> #include <Timer.h>
#include <Grid/PerfCount.h> #include <PerfCount.h>
#include <Grid/Log.h> #include <Log.h>
#include <Grid/AlignedAllocator.h> #include <AlignedAllocator.h>
#include <Grid/Simd.h> #include <Simd.h>
#include <Grid/Threads.h> #include <Threads.h>
#include <Grid/Lexicographic.h> #include <Lexicographic.h>
#include <Grid/Init.h> #include <Communicator.h>
#include <Grid/Communicator.h> #include <Cartesian.h>
#include <Grid/Cartesian.h> #include <Tensors.h>
#include <Grid/Tensors.h> #include <Lattice.h>
#include <Grid/Lattice.h> #include <Cshift.h>
#include <Grid/Cshift.h> #include <Stencil.h>
#include <Grid/Stencil.h> #include <Algorithms.h>
#include <Grid/Algorithms.h> #include <parallelIO/BinaryIO.h>
#include <Grid/parallelIO/BinaryIO.h> #include <qcd/QCD.h>
#include <Grid/qcd/QCD.h> #include <parallelIO/NerscIO.h>
#include <Grid/parallelIO/NerscIO.h> #include <Init.h>
#include <Grid/FFT.h> #include <qcd/hmc/NerscCheckpointer.h>
#include <qcd/hmc/HmcRunner.h>
#include <Grid/qcd/hmc/NerscCheckpointer.h>
#include <Grid/qcd/hmc/HmcRunner.h>

View File

@ -153,7 +153,6 @@ void GridParseLayout(char **argv,int argc,
assert(ompthreads.size()==1); assert(ompthreads.size()==1);
GridThread::SetThreads(ompthreads[0]); GridThread::SetThreads(ompthreads[0]);
} }
if( GridCmdOptionExists(argv,argv+argc,"--cores") ){ if( GridCmdOptionExists(argv,argv+argc,"--cores") ){
std::vector<int> cores(0); std::vector<int> cores(0);
arg= GridCmdOptionPayload(argv,argv+argc,"--cores"); arg= GridCmdOptionPayload(argv,argv+argc,"--cores");
@ -194,7 +193,7 @@ void Grid_init(int *argc,char ***argv)
std::cout<<GridLogMessage<<"--mpi n.n.n.n : default MPI decomposition"<<std::endl; std::cout<<GridLogMessage<<"--mpi n.n.n.n : default MPI decomposition"<<std::endl;
std::cout<<GridLogMessage<<"--threads n : default number of OMP threads"<<std::endl; std::cout<<GridLogMessage<<"--threads n : default number of OMP threads"<<std::endl;
std::cout<<GridLogMessage<<"--grid n.n.n.n : default Grid size"<<std::endl; std::cout<<GridLogMessage<<"--grid n.n.n.n : default Grid size"<<std::endl;
std::cout<<GridLogMessage<<"--log list : comma separted list of streams from Error,Warning,Message,Performance,Iterative,Integrator,Debug,Colours"<<std::endl; std::cout<<GridLogMessage<<"--log list : comma separted list of streams from Error,Warning,Message,Performance,Iterative,Integrator,Debug"<<std::endl;
exit(EXIT_SUCCESS); exit(EXIT_SUCCESS);
} }
@ -204,6 +203,7 @@ void Grid_init(int *argc,char ***argv)
GridLogConfigure(logstreams); GridLogConfigure(logstreams);
} }
if( GridCmdOptionExists(*argv,*argv+*argc,"--debug-signals") ){ if( GridCmdOptionExists(*argv,*argv+*argc,"--debug-signals") ){
Grid_debug_handler_init(); Grid_debug_handler_init();
} }
@ -234,34 +234,26 @@ void Grid_init(int *argc,char ***argv)
std::cout<<GridLogMessage<<"\tvComplexD : "<<sizeof(vComplexD)*8 <<"bits ; " <<GridCmdVectorIntToString(GridDefaultSimd(4,vComplexD::Nsimd()))<<std::endl; std::cout<<GridLogMessage<<"\tvComplexD : "<<sizeof(vComplexD)*8 <<"bits ; " <<GridCmdVectorIntToString(GridDefaultSimd(4,vComplexD::Nsimd()))<<std::endl;
} }
std::string COL_RED = GridLogColours.colour["RED"];
std::string COL_PURPLE = GridLogColours.colour["PURPLE"];
std::string COL_BLACK = GridLogColours.colour["BLACK"];
std::string COL_GREEN = GridLogColours.colour["GREEN"];
std::string COL_BLUE = GridLogColours.colour["BLUE"];
std::string COL_YELLOW = GridLogColours.colour["YELLOW"];
std::string COL_BACKGROUND = GridLogColours.colour["NORMAL"];
std::cout <<std::endl; std::cout <<std::endl;
std::cout <<COL_RED << "__|__|__|__|__"<< "|__|__|_"<<COL_PURPLE<<"_|__|__|"<< "__|__|__|__|__"<<std::endl; std::cout <<Logger::RED << "__|__|__|__|__"<< "|__|__|_"<<Logger::PURPLE<<"_|__|__|"<< "__|__|__|__|__"<<std::endl;
std::cout <<COL_RED << "__|__|__|__|__"<< "|__|__|_"<<COL_PURPLE<<"_|__|__|"<< "__|__|__|__|__"<<std::endl; std::cout <<Logger::RED << "__|__|__|__|__"<< "|__|__|_"<<Logger::PURPLE<<"_|__|__|"<< "__|__|__|__|__"<<std::endl;
std::cout <<COL_RED << "__|__| | | "<< "| | | "<<COL_PURPLE<<" | | |"<< " | | | _|__"<<std::endl; std::cout <<Logger::RED << "__|__| | | "<< "| | | "<<Logger::PURPLE<<" | | |"<< " | | | _|__"<<std::endl;
std::cout <<COL_RED << "__|__ "<< " "<<COL_PURPLE<<" "<< " _|__"<<std::endl; std::cout <<Logger::RED << "__|__ "<< " "<<Logger::PURPLE<<" "<< " _|__"<<std::endl;
std::cout <<COL_RED << "__|_ "<<COL_GREEN<<" GGGG "<<COL_RED<<" RRRR "<<COL_BLUE <<" III "<<COL_PURPLE<<"DDDD "<<COL_PURPLE<<" _|__"<<std::endl; std::cout <<Logger::RED << "__|_ "<<Logger::GREEN<<" GGGG "<<Logger::RED<<" RRRR "<<Logger::BLUE <<" III "<<Logger::PURPLE<<"DDDD "<<Logger::PURPLE<<" _|__"<<std::endl;
std::cout <<COL_RED << "__|_ "<<COL_GREEN<<"G "<<COL_RED<<" R R "<<COL_BLUE <<" I "<<COL_PURPLE<<"D D "<<COL_PURPLE<<" _|__"<<std::endl; std::cout <<Logger::RED << "__|_ "<<Logger::GREEN<<"G "<<Logger::RED<<" R R "<<Logger::BLUE <<" I "<<Logger::PURPLE<<"D D "<<Logger::PURPLE<<" _|__"<<std::endl;
std::cout <<COL_RED << "__|_ "<<COL_GREEN<<"G "<<COL_RED<<" R R "<<COL_BLUE <<" I "<<COL_PURPLE<<"D D"<<COL_PURPLE<<" _|__"<<std::endl; std::cout <<Logger::RED << "__|_ "<<Logger::GREEN<<"G "<<Logger::RED<<" R R "<<Logger::BLUE <<" I "<<Logger::PURPLE<<"D D"<<Logger::PURPLE<<" _|__"<<std::endl;
std::cout <<COL_BLUE << "__|_ "<<COL_GREEN<<"G GG "<<COL_RED<<" RRRR "<<COL_BLUE <<" I "<<COL_PURPLE<<"D D"<<COL_GREEN <<" _|__"<<std::endl; std::cout <<Logger::BLUE << "__|_ "<<Logger::GREEN<<"G GG "<<Logger::RED<<" RRRR "<<Logger::BLUE <<" I "<<Logger::PURPLE<<"D D"<<Logger::GREEN <<" _|__"<<std::endl;
std::cout <<COL_BLUE << "__|_ "<<COL_GREEN<<"G G "<<COL_RED<<" R R "<<COL_BLUE <<" I "<<COL_PURPLE<<"D D "<<COL_GREEN <<" _|__"<<std::endl; std::cout <<Logger::BLUE << "__|_ "<<Logger::GREEN<<"G G "<<Logger::RED<<" R R "<<Logger::BLUE <<" I "<<Logger::PURPLE<<"D D "<<Logger::GREEN <<" _|__"<<std::endl;
std::cout <<COL_BLUE << "__|_ "<<COL_GREEN<<" GGGG "<<COL_RED<<" R R "<<COL_BLUE <<" III "<<COL_PURPLE<<"DDDD "<<COL_GREEN <<" _|__"<<std::endl; std::cout <<Logger::BLUE << "__|_ "<<Logger::GREEN<<" GGGG "<<Logger::RED<<" R R "<<Logger::BLUE <<" III "<<Logger::PURPLE<<"DDDD "<<Logger::GREEN <<" _|__"<<std::endl;
std::cout <<COL_BLUE << "__|__ "<< " "<<COL_GREEN <<" "<< " _|__"<<std::endl; std::cout <<Logger::BLUE << "__|__ "<< " "<<Logger::GREEN <<" "<< " _|__"<<std::endl;
std::cout <<COL_BLUE << "__|__|__|__|__"<< "|__|__|_"<<COL_GREEN <<"_|__|__|"<< "__|__|__|__|__"<<std::endl; std::cout <<Logger::BLUE << "__|__|__|__|__"<< "|__|__|_"<<Logger::GREEN <<"_|__|__|"<< "__|__|__|__|__"<<std::endl;
std::cout <<COL_BLUE << "__|__|__|__|__"<< "|__|__|_"<<COL_GREEN <<"_|__|__|"<< "__|__|__|__|__"<<std::endl; std::cout <<Logger::BLUE << "__|__|__|__|__"<< "|__|__|_"<<Logger::GREEN <<"_|__|__|"<< "__|__|__|__|__"<<std::endl;
std::cout <<COL_BLUE << " | | | | "<< "| | | "<<COL_GREEN <<" | | |"<< " | | | | "<<std::endl; std::cout <<Logger::BLUE << " | | | | "<< "| | | "<<Logger::GREEN <<" | | |"<< " | | | | "<<std::endl;
std::cout << std::endl; std::cout << std::endl;
std::cout << std::endl; std::cout << std::endl;
std::cout <<COL_YELLOW<< std::endl; std::cout <<Logger::YELLOW<< std::endl;
std::cout << "Copyright (C) 2015 Peter Boyle, Azusa Yamaguchi, Guido Cossu, Antonin Portelli and other authors"<<std::endl; std::cout << "Copyright (C) 2015 Peter Boyle, Azusa Yamaguchi, Guido Cossu, Antonin Portelli and other authors"<<std::endl;
std::cout << "Colours by Tadahito Boyle "<<std::endl;
std::cout << std::endl; std::cout << std::endl;
std::cout << "This program is free software; you can redistribute it and/or modify"<<std::endl; std::cout << "This program is free software; you can redistribute it and/or modify"<<std::endl;
std::cout << "it under the terms of the GNU General Public License as published by"<<std::endl; std::cout << "it under the terms of the GNU General Public License as published by"<<std::endl;
@ -272,8 +264,7 @@ void Grid_init(int *argc,char ***argv)
std::cout << "but WITHOUT ANY WARRANTY; without even the implied warranty of"<<std::endl; std::cout << "but WITHOUT ANY WARRANTY; without even the implied warranty of"<<std::endl;
std::cout << "MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the"<<std::endl; std::cout << "MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the"<<std::endl;
std::cout << "GNU General Public License for more details."<<std::endl; std::cout << "GNU General Public License for more details."<<std::endl;
std::cout << COL_BACKGROUND <<std::endl; std::cout << Logger::BLACK <<std::endl;
std::cout << std::endl;
} }

View File

@ -28,6 +28,6 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
#ifndef GRID_LATTICE_H #ifndef GRID_LATTICE_H
#define GRID_LATTICE_H #define GRID_LATTICE_H
#include <Grid/lattice/Lattice_base.h> #include <lattice/Lattice_base.h>
#endif #endif

View File

@ -1,92 +1,126 @@
/************************************************************************************* /*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/Log.cc Source file: ./lib/Log.cc
Copyright (C) 2015 Copyright (C) 2015
Author: Antonin Portelli <antonin.portelli@me.com> Author: Antonin Portelli <antonin.portelli@me.com>
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk> Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk> Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: paboyle <paboyle@ph.ed.ac.uk> Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or the Free Software Foundation; either version 2 of the License, or
(at your option) any later version. (at your option) any later version.
This program is distributed in the hope that it will be useful, This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details. GNU General Public License for more details.
You should have received a copy of the GNU General Public License along You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc., with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution See the full license in the file "LICENSE" in the top level distribution directory
directory *************************************************************************************/
*************************************************************************************/ /* END LEGAL */
/* END LEGAL */
#include <Grid.h> #include <Grid.h>
namespace Grid { namespace Grid {
GridStopWatch Logger::StopWatch; GridStopWatch Logger::StopWatch;
std::ostream Logger::devnull(0); std::ostream Logger::devnull(0);
std::string Logger::BLACK("\033[30m");
std::string Logger::RED("\033[31m");
std::string Logger::GREEN("\033[32m");
std::string Logger::YELLOW("\033[33m");
std::string Logger::BLUE("\033[34m");
std::string Logger::PURPLE("\033[35m");
std::string Logger::CYAN("\033[36m");
std::string Logger::WHITE("\033[37m");
std::string Logger::NORMAL("\033[0;39m");
std::string EMPTY("");
Colours GridLogColours(0); #if 0
GridLogger GridLogError(1, "Error", GridLogColours, "RED"); GridLogger GridLogError (1,"Error",Logger::RED);
GridLogger GridLogWarning(1, "Warning", GridLogColours, "YELLOW"); GridLogger GridLogWarning (1,"Warning",Logger::YELLOW);
GridLogger GridLogMessage(1, "Message", GridLogColours, "NORMAL"); GridLogger GridLogMessage (1,"Message",Logger::BLACK);
GridLogger GridLogDebug(1, "Debug", GridLogColours, "PURPLE"); GridLogger GridLogDebug (1,"Debug",Logger::PURPLE);
GridLogger GridLogPerformance(1, "Performance", GridLogColours, "GREEN"); GridLogger GridLogPerformance(1,"Performance",Logger::GREEN);
GridLogger GridLogIterative(1, "Iterative", GridLogColours, "BLUE"); GridLogger GridLogIterative (1,"Iterative",Logger::BLUE);
GridLogger GridLogIntegrator(1, "Integrator", GridLogColours, "BLUE"); GridLogger GridLogIntegrator (1,"Integrator",Logger::BLUE);
#else
GridLogger GridLogError (1,"Error",EMPTY);
GridLogger GridLogWarning (1,"Warning",EMPTY);
GridLogger GridLogMessage (1,"Message",EMPTY);
GridLogger GridLogDebug (1,"Debug",EMPTY);
GridLogger GridLogPerformance(1,"Performance",EMPTY);
GridLogger GridLogIterative (1,"Iterative",EMPTY);
GridLogger GridLogIntegrator (1,"Integrator",EMPTY);
#endif
void GridLogConfigure(std::vector<std::string> &logstreams) { void GridLogConfigure(std::vector<std::string> &logstreams)
{
GridLogError.Active(0); GridLogError.Active(0);
GridLogWarning.Active(0); GridLogWarning.Active(0);
GridLogMessage.Active(1); // at least the messages should be always on GridLogMessage.Active(0);
GridLogIterative.Active(0); GridLogIterative.Active(0);
GridLogDebug.Active(0); GridLogDebug.Active(0);
GridLogPerformance.Active(0); GridLogPerformance.Active(0);
GridLogIntegrator.Active(0); GridLogIntegrator.Active(0);
GridLogColours.Active(0);
for (int i = 0; i < logstreams.size(); i++) { int blackAndWhite = 1;
if (logstreams[i] == std::string("Error")) GridLogError.Active(1); if(blackAndWhite){
if (logstreams[i] == std::string("Warning")) GridLogWarning.Active(1); Logger::BLACK = std::string("");
if (logstreams[i] == std::string("NoMessage")) GridLogMessage.Active(0); Logger::RED =Logger::BLACK;
if (logstreams[i] == std::string("Iterative")) GridLogIterative.Active(1); Logger::GREEN =Logger::BLACK;
if (logstreams[i] == std::string("Debug")) GridLogDebug.Active(1); Logger::YELLOW =Logger::BLACK;
if (logstreams[i] == std::string("Performance")) Logger::BLUE =Logger::BLACK;
GridLogPerformance.Active(1); Logger::PURPLE =Logger::BLACK;
if (logstreams[i] == std::string("Integrator")) GridLogIntegrator.Active(1); Logger::CYAN =Logger::BLACK;
if (logstreams[i] == std::string("Colours")) GridLogColours.Active(1); Logger::WHITE =Logger::BLACK;
Logger::NORMAL =Logger::BLACK;
}
for(int i=0;i<logstreams.size();i++){
if ( logstreams[i]== std::string("Error") ) GridLogError.Active(1);
if ( logstreams[i]== std::string("Warning") ) GridLogWarning.Active(1);
if ( logstreams[i]== std::string("Message") ) GridLogMessage.Active(1);
if ( logstreams[i]== std::string("Iterative") ) GridLogIterative.Active(1);
if ( logstreams[i]== std::string("Debug") ) GridLogDebug.Active(1);
if ( logstreams[i]== std::string("Performance") ) GridLogPerformance.Active(1);
if ( logstreams[i]== std::string("Integrator" ) ) GridLogIntegrator.Active(1);
} }
} }
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
// Verbose limiter on MPI tasks // Verbose limiter on MPI tasks
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
void Grid_quiesce_nodes(void) { void Grid_quiesce_nodes(void)
int me = 0; {
int me=0;
#ifdef GRID_COMMS_MPI #ifdef GRID_COMMS_MPI
MPI_Comm_rank(MPI_COMM_WORLD, &me); MPI_Comm_rank(MPI_COMM_WORLD,&me);
#endif #endif
#ifdef GRID_COMMS_SHMEM #ifdef GRID_COMMS_SHMEM
me = shmem_my_pe(); me = shmem_my_pe();
#endif #endif
if (me) { if ( me ) {
std::cout.setstate(std::ios::badbit); std::cout.setstate(std::ios::badbit);
} }
} }
void Grid_unquiesce_nodes(void) { void Grid_unquiesce_nodes(void)
{
#ifdef GRID_COMMS_MPI #ifdef GRID_COMMS_MPI
std::cout.clear(); std::cout.clear();
#endif #endif
} }
} }

146
lib/Log.h
View File

@ -6,9 +6,9 @@
Copyright (C) 2015 Copyright (C) 2015
Author: Antonin Portelli <antonin.portelli@me.com> Author: Antonin Portelli <antonin.portelli@me.com>
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk> Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk> Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@ -27,9 +27,6 @@
See the full license in the file "LICENSE" in the top level distribution directory See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/ *************************************************************************************/
/* END LEGAL */ /* END LEGAL */
#include <map>
#ifndef GRID_LOG_H #ifndef GRID_LOG_H
#define GRID_LOG_H #define GRID_LOG_H
@ -37,99 +34,56 @@
#include <execinfo.h> #include <execinfo.h>
#endif #endif
namespace Grid { namespace Grid {
// Dress the output; use std::chrono for time stamping via the StopWatch class // Dress the output; use std::chrono for time stamping via the StopWatch class
int Rank(void); // used for early stage debug before library init int Rank(void); // used for early stage debug before library init
class Colours{
protected:
bool is_active;
public:
std::map<std::string, std::string> colour;
Colours(bool activate=false){
Active(activate);
};
void Active(bool activate){
is_active=activate;
if (is_active){
colour["BLACK"] ="\033[30m";
colour["RED"] ="\033[31m";
colour["GREEN"] ="\033[32m";
colour["YELLOW"] ="\033[33m";
colour["BLUE"] ="\033[34m";
colour["PURPLE"] ="\033[35m";
colour["CYAN"] ="\033[36m";
colour["WHITE"] ="\033[37m";
colour["NORMAL"] ="\033[0;39m";
} else {
colour["BLACK"] ="";
colour["RED"] ="";
colour["GREEN"] ="";
colour["YELLOW"]="";
colour["BLUE"] ="";
colour["PURPLE"]="";
colour["CYAN"] ="";
colour["WHITE"] ="";
colour["NORMAL"]="";
}
};
};
class Logger { class Logger {
protected: protected:
Colours &Painter; int active;
int active; std::string name, topName, COLOUR;
std::string name, topName;
std::string COLOUR;
public: public:
static GridStopWatch StopWatch; static GridStopWatch StopWatch;
static std::ostream devnull; static std::ostream devnull;
std::string background() {return Painter.colour["NORMAL"];} static std::string BLACK;
std::string evidence() {return Painter.colour["YELLOW"];} static std::string RED ;
std::string colour() {return Painter.colour[COLOUR];} static std::string GREEN;
static std::string YELLOW;
static std::string BLUE ;
static std::string PURPLE;
static std::string CYAN ;
static std::string WHITE ;
static std::string NORMAL;
Logger(std::string topNm, int on, std::string nm, Colours& col_class, std::string col) Logger(std::string topNm, int on, std::string nm,std::string col)
: active(on), : active(on), name(nm), topName(topNm), COLOUR(col) {};
name(nm),
topName(topNm),
Painter(col_class),
COLOUR(col){} ;
void Active(int on) {active = on;}; void Active(int on) {active = on;};
int isActive(void) {return active;}; int isActive(void) {return active;};
friend std::ostream& operator<< (std::ostream& stream, Logger& log){ friend std::ostream& operator<< (std::ostream& stream, const Logger& log){
if ( log.active ) {
if ( log.active ) { StopWatch.Stop();
StopWatch.Stop(); GridTime now = StopWatch.Elapsed();
GridTime now = StopWatch.Elapsed(); StopWatch.Start();
StopWatch.Start(); stream << BLACK <<std::setw(8) << std::left << log.topName << BLACK<< " : ";
stream << log.background()<< log.topName << log.background()<< " : "; stream << log.COLOUR <<std::setw(11) << log.name << BLACK << " : ";
stream << log.colour() <<std::setw(14) << std::left << log.name << log.background() << " : "; stream << YELLOW <<std::setw(6) << now <<BLACK << " : " ;
stream << log.evidence()<< now << log.background() << " : " << log.colour(); stream << log.COLOUR;
return stream; return stream;
} else { } else {
return devnull; return devnull;
}
} }
}
}; };
class GridLogger: public Logger { class GridLogger: public Logger {
public: public:
GridLogger(int on, std::string nm, Colours&col_class, std::string col_key = "NORMAL"): GridLogger(int on, std::string nm, std::string col = Logger::BLACK): Logger("Grid", on, nm, col){};
Logger("Grid", on, nm, col_class, col_key){};
}; };
void GridLogConfigure(std::vector<std::string> &logstreams); void GridLogConfigure(std::vector<std::string> &logstreams);
@ -141,40 +95,38 @@ extern GridLogger GridLogDebug ;
extern GridLogger GridLogPerformance; extern GridLogger GridLogPerformance;
extern GridLogger GridLogIterative ; extern GridLogger GridLogIterative ;
extern GridLogger GridLogIntegrator ; extern GridLogger GridLogIntegrator ;
extern Colours GridLogColours;
#define _NBACKTRACE (256) #define _NBACKTRACE (256)
extern void * Grid_backtrace_buffer[_NBACKTRACE]; extern void * Grid_backtrace_buffer[_NBACKTRACE];
#define BACKTRACEFILE() {\ #define BACKTRACEFILE() {\
char string[20]; \ char string[20]; \
std::sprintf(string,"backtrace.%d",Rank()); \ std::sprintf(string,"backtrace.%d",Rank()); \
std::FILE * fp = std::fopen(string,"w"); \ std::FILE * fp = std::fopen(string,"w"); \
BACKTRACEFP(fp)\ BACKTRACEFP(fp)\
std::fclose(fp); \ std::fclose(fp); \
} }
#ifdef HAVE_EXECINFO_H #ifdef HAVE_EXECINFO_H
#define BACKTRACEFP(fp) { \ #define BACKTRACEFP(fp) { \
int symbols = backtrace (Grid_backtrace_buffer,_NBACKTRACE);\ int symbols = backtrace (Grid_backtrace_buffer,_NBACKTRACE);\
char **strings = backtrace_symbols(Grid_backtrace_buffer,symbols);\ char **strings = backtrace_symbols(Grid_backtrace_buffer,symbols);\
for (int i = 0; i < symbols; i++){\ for (int i = 0; i < symbols; i++){\
std::fprintf (fp,"BackTrace Strings: %d %s\n",i, strings[i]); std::fflush(fp); \ std::fprintf (fp,"BackTrace Strings: %d %s\n",i, strings[i]); std::fflush(fp); \
}\ }\
} }
#else #else
#define BACKTRACEFP(fp) { \ #define BACKTRACEFP(fp) { \
std::fprintf (fp,"BT %d %lx\n",0, __builtin_return_address(0)); std::fflush(fp); \ std::fprintf (fp,"BT %d %lx\n",0, __builtin_return_address(0)); std::fflush(fp); \
std::fprintf (fp,"BT %d %lx\n",1, __builtin_return_address(1)); std::fflush(fp); \ std::fprintf (fp,"BT %d %lx\n",1, __builtin_return_address(1)); std::fflush(fp); \
std::fprintf (fp,"BT %d %lx\n",2, __builtin_return_address(2)); std::fflush(fp); \ std::fprintf (fp,"BT %d %lx\n",2, __builtin_return_address(2)); std::fflush(fp); \
std::fprintf (fp,"BT %d %lx\n",3, __builtin_return_address(3)); std::fflush(fp); \ std::fprintf (fp,"BT %d %lx\n",3, __builtin_return_address(3)); std::fflush(fp); \
} }
#endif #endif
#define BACKTRACE() BACKTRACEFP(stdout) #define BACKTRACE() BACKTRACEFP(stdout)
} }
#endif #endif

4
lib/Make.inc Normal file

File diff suppressed because one or more lines are too long

View File

@ -1,3 +1,6 @@
# additional include paths necessary to compile the C++ library
AM_CXXFLAGS = -I$(top_srcdir)/
extra_sources= extra_sources=
if BUILD_COMMS_MPI if BUILD_COMMS_MPI
extra_sources+=communicator/Communicator_mpi.cc extra_sources+=communicator/Communicator_mpi.cc
@ -14,11 +17,16 @@ endif
# #
# Libraries # Libraries
# #
include Make.inc include Make.inc
include Eigen.inc
lib_LIBRARIES = libGrid.a lib_LIBRARIES = libGrid.a
libGrid_a_SOURCES = $(CCFILES) $(extra_sources)
# qcd/action/fermion/PartialFractionFermion5D.cc\ \
#
# Include files
#
nobase_include_HEADERS=$(HFILES)
libGrid_a_SOURCES = $(CCFILES) $(extra_sources)
libGrid_adir = $(pkgincludedir)
nobase_dist_pkginclude_HEADERS = $(HFILES) $(eigen_files) Config.h

View File

@ -1,33 +1,32 @@
/************************************************************************************* /*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/Simd.h Source file: ./lib/Simd.h
Copyright (C) 2015 Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk> Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: neo <cossu@post.kek.jp> Author: neo <cossu@post.kek.jp>
Author: paboyle <paboyle@ph.ed.ac.uk> Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or the Free Software Foundation; either version 2 of the License, or
(at your option) any later version. (at your option) any later version.
This program is distributed in the hope that it will be useful, This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details. GNU General Public License for more details.
You should have received a copy of the GNU General Public License along You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc., with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution See the full license in the file "LICENSE" in the top level distribution directory
directory *************************************************************************************/
*************************************************************************************/ /* END LEGAL */
/* END LEGAL */
#ifndef GRID_SIMD_H #ifndef GRID_SIMD_H
#define GRID_SIMD_H #define GRID_SIMD_H
@ -119,14 +118,6 @@ namespace Grid {
inline ComplexD timesI(const ComplexD &r) { return(r*ComplexD(0.0,1.0));} inline ComplexD timesI(const ComplexD &r) { return(r*ComplexD(0.0,1.0));}
inline ComplexF timesMinusI(const ComplexF &r){ return(r*ComplexF(0.0,-1.0));} inline ComplexF timesMinusI(const ComplexF &r){ return(r*ComplexF(0.0,-1.0));}
inline ComplexD timesMinusI(const ComplexD &r){ return(r*ComplexD(0.0,-1.0));} inline ComplexD timesMinusI(const ComplexD &r){ return(r*ComplexD(0.0,-1.0));}
// define projections to real and imaginay parts
inline ComplexF projReal(const ComplexF &r){return( ComplexF(std::real(r), 0.0));}
inline ComplexD projReal(const ComplexD &r){return( ComplexD(std::real(r), 0.0));}
inline ComplexF projImag(const ComplexF &r){return (ComplexF(std::imag(r), 0.0 ));}
inline ComplexD projImag(const ComplexD &r){return (ComplexD(std::imag(r), 0.0));}
// define auxiliary functions for complex computations
inline void timesI(ComplexF &ret,const ComplexF &r) { ret = timesI(r);} inline void timesI(ComplexF &ret,const ComplexF &r) { ret = timesI(r);}
inline void timesI(ComplexD &ret,const ComplexD &r) { ret = timesI(r);} inline void timesI(ComplexD &ret,const ComplexD &r) { ret = timesI(r);}
inline void timesMinusI(ComplexF &ret,const ComplexF &r){ ret = timesMinusI(r);} inline void timesMinusI(ComplexF &ret,const ComplexF &r){ ret = timesMinusI(r);}
@ -172,8 +163,8 @@ namespace Grid {
}; };
#include "simd/Grid_vector_types.h" #include <simd/Grid_vector_types.h>
#include "simd/Grid_vector_unops.h" #include <simd/Grid_vector_unops.h>
namespace Grid { namespace Grid {
// Default precision // Default precision

View File

@ -1,247 +0,0 @@
#include <Grid.h>
#include <PerfCount.h>
#include <Stat.h>
namespace Grid {
bool PmuStat::pmu_initialized=false;
void PmuStat::init(const char *regname)
{
#ifdef __x86_64__
name = regname;
if (!pmu_initialized)
{
std::cout<<"initialising pmu"<<std::endl;
pmu_initialized = true;
pmu_init();
}
clear();
#endif
}
void PmuStat::clear(void)
{
#ifdef __x86_64__
count = 0;
tregion = 0;
pmc0 = 0;
pmc1 = 0;
inst = 0;
cyc = 0;
ref = 0;
tcycles = 0;
reads = 0;
writes = 0;
#endif
}
void PmuStat::print(void)
{
#ifdef __x86_64__
std::cout <<"Reg "<<std::string(name)<<":\n";
std::cout <<" region "<<tregion<<std::endl;
std::cout <<" cycles "<<tcycles<<std::endl;
std::cout <<" inst "<<inst <<std::endl;
std::cout <<" cyc "<<cyc <<std::endl;
std::cout <<" ref "<<ref <<std::endl;
std::cout <<" pmc0 "<<pmc0 <<std::endl;
std::cout <<" pmc1 "<<pmc1 <<std::endl;
std::cout <<" count "<<count <<std::endl;
std::cout <<" reads "<<reads <<std::endl;
std::cout <<" writes "<<writes <<std::endl;
#endif
}
void PmuStat::start(void)
{
#ifdef __x86_64__
pmu_start();
++count;
xmemctrs(&mrstart, &mwstart);
tstart = __rdtsc();
#endif
}
void PmuStat::enter(int t)
{
#ifdef __x86_64__
counters[0][t] = __rdpmc(0);
counters[1][t] = __rdpmc(1);
counters[2][t] = __rdpmc((1<<30)|0);
counters[3][t] = __rdpmc((1<<30)|1);
counters[4][t] = __rdpmc((1<<30)|2);
counters[5][t] = __rdtsc();
#endif
}
void PmuStat::exit(int t)
{
#ifdef __x86_64__
counters[0][t] = __rdpmc(0) - counters[0][t];
counters[1][t] = __rdpmc(1) - counters[1][t];
counters[2][t] = __rdpmc((1<<30)|0) - counters[2][t];
counters[3][t] = __rdpmc((1<<30)|1) - counters[3][t];
counters[4][t] = __rdpmc((1<<30)|2) - counters[4][t];
counters[5][t] = __rdtsc() - counters[5][t];
#endif
}
void PmuStat::accum(int nthreads)
{
#ifdef __x86_64__
tend = __rdtsc();
xmemctrs(&mrend, &mwend);
pmu_stop();
for (int t = 0; t < nthreads; ++t) {
pmc0 += counters[0][t];
pmc1 += counters[1][t];
inst += counters[2][t];
cyc += counters[3][t];
ref += counters[4][t];
tcycles += counters[5][t];
}
uint64_t region = tend - tstart;
tregion += region;
uint64_t mreads = mrend - mrstart;
reads += mreads;
uint64_t mwrites = mwend - mwstart;
writes += mwrites;
#endif
}
void PmuStat::pmu_fini(void) {}
void PmuStat::pmu_start(void) {};
void PmuStat::pmu_stop(void) {};
void PmuStat::pmu_init(void)
{
#ifdef _KNIGHTS_LANDING_
KNLsetup();
#endif
}
void PmuStat::xmemctrs(uint64_t *mr, uint64_t *mw)
{
#ifdef _KNIGHTS_LANDING_
ctrs c;
KNLreadctrs(c);
uint64_t emr = 0, emw = 0;
for (int i = 0; i < NEDC; ++i)
{
emr += c.edcrd[i];
emw += c.edcwr[i];
}
*mr = emr;
*mw = emw;
#else
*mr = *mw = 0;
#endif
}
#ifdef _KNIGHTS_LANDING_
struct knl_gbl_ PmuStat::gbl;
#define PMU_MEM
void PmuStat::KNLevsetup(const char *ename, int &fd, int event, int umask)
{
char fname[1024];
snprintf(fname, sizeof(fname), "%s/type", ename);
FILE *fp = fopen(fname, "r");
if (fp == 0) {
::printf("open %s", fname);
::exit(0);
}
int type;
int ret = fscanf(fp, "%d", &type);
assert(ret == 1);
fclose(fp);
// std::cout << "Using PMU type "<<type<<" from " << std::string(ename) <<std::endl;
struct perf_event_attr hw = {};
hw.size = sizeof(hw);
hw.type = type;
// see /sys/devices/uncore_*/format/*
// All of the events we are interested in are configured the same way, but
// that isn't always true. Proper code would parse the format files
hw.config = event | (umask << 8);
//hw.read_format = PERF_FORMAT_GROUP;
// unfortunately the above only works within a single PMU; might
// as well just read them one at a time
int cpu = 0;
fd = perf_event_open(&hw, -1, cpu, -1, 0);
if (fd == -1) {
::printf("CPU %d, box %s, event 0x%lx", cpu, ename, hw.config);
::exit(0);
} else {
// std::cout << "event "<<std::string(ename)<<" set up for fd "<<fd<<" hw.config "<<hw.config <<std::endl;
}
}
void PmuStat::KNLsetup(void){
int ret;
char fname[1024];
// MC RPQ inserts and WPQ inserts (reads & writes)
for (int mc = 0; mc < NMC; ++mc)
{
::snprintf(fname, sizeof(fname), "/sys/devices/uncore_imc_%d",mc);
// RPQ Inserts
KNLevsetup(fname, gbl.mc_rd[mc], 0x1, 0x1);
// WPQ Inserts
KNLevsetup(fname, gbl.mc_wr[mc], 0x2, 0x1);
}
// EDC RPQ inserts and WPQ inserts
for (int edc=0; edc < NEDC; ++edc)
{
::snprintf(fname, sizeof(fname), "/sys/devices/uncore_edc_eclk_%d",edc);
// RPQ inserts
KNLevsetup(fname, gbl.edc_rd[edc], 0x1, 0x1);
// WPQ inserts
KNLevsetup(fname, gbl.edc_wr[edc], 0x2, 0x1);
}
// EDC HitE, HitM, MissE, MissM
for (int edc=0; edc < NEDC; ++edc)
{
::snprintf(fname, sizeof(fname), "/sys/devices/uncore_edc_uclk_%d", edc);
KNLevsetup(fname, gbl.edc_hite[edc], 0x2, 0x1);
KNLevsetup(fname, gbl.edc_hitm[edc], 0x2, 0x2);
KNLevsetup(fname, gbl.edc_misse[edc], 0x2, 0x4);
KNLevsetup(fname, gbl.edc_missm[edc], 0x2, 0x8);
}
}
uint64_t PmuStat::KNLreadctr(int fd)
{
uint64_t data;
size_t s = ::read(fd, &data, sizeof(data));
if (s != sizeof(uint64_t)){
::printf("read counter %lu", s);
::exit(0);
}
return data;
}
void PmuStat::KNLreadctrs(ctrs &c)
{
for (int i = 0; i < NMC; ++i)
{
c.mcrd[i] = KNLreadctr(gbl.mc_rd[i]);
c.mcwr[i] = KNLreadctr(gbl.mc_wr[i]);
}
for (int i = 0; i < NEDC; ++i)
{
c.edcrd[i] = KNLreadctr(gbl.edc_rd[i]);
c.edcwr[i] = KNLreadctr(gbl.edc_wr[i]);
}
for (int i = 0; i < NEDC; ++i)
{
c.edchite[i] = KNLreadctr(gbl.edc_hite[i]);
c.edchitm[i] = KNLreadctr(gbl.edc_hitm[i]);
c.edcmisse[i] = KNLreadctr(gbl.edc_misse[i]);
c.edcmissm[i] = KNLreadctr(gbl.edc_missm[i]);
}
}
#endif
}

View File

@ -1,104 +0,0 @@
#ifndef _GRID_STAT_H
#define _GRID_STAT_H
#ifdef AVX512
#define _KNIGHTS_LANDING_ROOTONLY
#endif
namespace Grid {
///////////////////////////////////////////////////////////////////////////////
// Extra KNL counters from MCDRAM
///////////////////////////////////////////////////////////////////////////////
#ifdef _KNIGHTS_LANDING_
#define NMC 6
#define NEDC 8
struct ctrs
{
uint64_t mcrd[NMC];
uint64_t mcwr[NMC];
uint64_t edcrd[NEDC];
uint64_t edcwr[NEDC];
uint64_t edchite[NEDC];
uint64_t edchitm[NEDC];
uint64_t edcmisse[NEDC];
uint64_t edcmissm[NEDC];
};
// Peter/Azusa:
// Our modification of a code provided by Larry Meadows from Intel
// Verified by email exchange non-NDA, ok for github. Should be as uses /sys/devices/ FS
// so is already public and in the linux kernel for KNL.
struct knl_gbl_
{
int mc_rd[NMC];
int mc_wr[NMC];
int edc_rd[NEDC];
int edc_wr[NEDC];
int edc_hite[NEDC];
int edc_hitm[NEDC];
int edc_misse[NEDC];
int edc_missm[NEDC];
};
#endif
///////////////////////////////////////////////////////////////////////////////
class PmuStat
{
uint64_t counters[8][256];
#ifdef _KNIGHTS_LANDING_
static struct knl_gbl_ gbl;
#endif
const char *name;
uint64_t reads; // memory reads
uint64_t writes; // memory writes
uint64_t mrstart; // memory read counter at start of parallel region
uint64_t mrend; // memory read counter at end of parallel region
uint64_t mwstart; // memory write counter at start of parallel region
uint64_t mwend; // memory write counter at end of parallel region
// cumulative counters
uint64_t count; // number of invocations
uint64_t tregion; // total time in parallel region (from thread 0)
uint64_t tcycles; // total cycles inside parallel region
uint64_t inst, ref, cyc; // fixed counters
uint64_t pmc0, pmc1;// pmu
// add memory counters here
// temp variables
uint64_t tstart; // tsc at start of parallel region
uint64_t tend; // tsc at end of parallel region
// map for ctrs values
// 0 pmc0 start
// 1 pmc0 end
// 2 pmc1 start
// 3 pmc1 end
// 4 tsc start
// 5 tsc end
static bool pmu_initialized;
public:
static bool is_init(void){ return pmu_initialized;}
static void pmu_init(void);
static void pmu_fini(void);
static void pmu_start(void);
static void pmu_stop(void);
void accum(int nthreads);
static void xmemctrs(uint64_t *mr, uint64_t *mw);
void start(void);
void enter(int t);
void exit(int t);
void print(void);
void init(const char *regname);
void clear(void);
#ifdef _KNIGHTS_LANDING_
static void KNLsetup(void);
static uint64_t KNLreadctr(int fd);
static void KNLreadctrs(ctrs &c);
static void KNLevsetup(const char *ename, int &fd, int event, int umask);
#endif
};
}
#endif

View File

@ -30,7 +30,7 @@
#include <thread> #include <thread>
#include <Grid/stencil/Lebesgue.h> // subdir aggregate #include <stencil/Lebesgue.h> // subdir aggregate
////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////
// Must not lose sight that goal is to be able to construct really efficient // Must not lose sight that goal is to be able to construct really efficient
@ -106,6 +106,7 @@
#define SERIAL_SENDS #define SERIAL_SENDS
void AddPacket(void *xmit,void * rcv, Integer to,Integer from,Integer bytes){ void AddPacket(void *xmit,void * rcv, Integer to,Integer from,Integer bytes){
comms_bytes+=2.0*bytes;
#ifdef SEND_IMMEDIATE #ifdef SEND_IMMEDIATE
commtime-=usecond(); commtime-=usecond();
_grid->SendToRecvFrom(xmit,to,rcv,from,bytes); _grid->SendToRecvFrom(xmit,to,rcv,from,bytes);
@ -264,19 +265,17 @@
// _mm_prefetch((char *)&_entries[ent],_MM_HINT_T0); // _mm_prefetch((char *)&_entries[ent],_MM_HINT_T0);
} }
inline uint64_t GetInfo(int &ptype,int &local,int &perm,int point,int ent,uint64_t base) { inline uint64_t GetInfo(int &ptype,int &local,int &perm,int point,int ent,uint64_t base) {
//_mm_prefetch((char *)&_entries[ent+1],_MM_HINT_T0); _mm_prefetch((char *)&_entries[ent+1],_MM_HINT_T0);
uint64_t cbase = (uint64_t) 0;
local = _entries[ent]._is_local; local = _entries[ent]._is_local;
perm = _entries[ent]._permute; perm = _entries[ent]._permute;
if (perm) ptype = _permute_type[point]; if (perm) ptype = _permute_type[point];
if (local) return base + _entries[ent]._byte_offset; if (local) return base + _entries[ent]._byte_offset;
else return cbase + _entries[ent]._byte_offset; else return _entries[ent]._byte_offset;
} }
inline uint64_t GetPFInfo(int ent,uint64_t base) { inline uint64_t GetPFInfo(int ent,uint64_t base) {
uint64_t cbase = (uint64_t) 0;
int local = _entries[ent]._is_local; int local = _entries[ent]._is_local;
if (local) return base + _entries[ent]._byte_offset; if (local) return base + _entries[ent]._byte_offset;
else return cbase + _entries[ent]._byte_offset; else return _entries[ent]._byte_offset;
} }
// Comms buffers // Comms buffers
@ -302,39 +301,6 @@
double gathermtime; double gathermtime;
double splicetime; double splicetime;
double nosplicetime; double nosplicetime;
double calls;
void ZeroCounters(void) {
gathertime = 0.;
jointime = 0.;
commtime = 0.;
halogtime = 0.;
mergetime = 0.;
spintime = 0.;
gathermtime = 0.;
splicetime = 0.;
nosplicetime = 0.;
comms_bytes = 0.;
calls = 0.;
};
void Report(void) {
#define PRINTIT(A) \
std::cout << GridLogMessage << " Stencil " << #A << " "<< A/calls<<std::endl;
if ( calls > 0. ) {
std::cout << GridLogMessage << " Stencil calls "<<calls<<std::endl;
PRINTIT(jointime);
PRINTIT(gathertime);
PRINTIT(commtime);
PRINTIT(halogtime);
PRINTIT(mergetime);
PRINTIT(spintime);
PRINTIT(comms_bytes);
PRINTIT(gathermtime);
PRINTIT(splicetime);
PRINTIT(nosplicetime);
}
};
#endif #endif
CartesianStencil(GridBase *grid, CartesianStencil(GridBase *grid,
@ -344,6 +310,18 @@
const std::vector<int> &distances) const std::vector<int> &distances)
: _permute_type(npoints), _comm_buf_size(npoints) : _permute_type(npoints), _comm_buf_size(npoints)
{ {
#ifdef TIMING_HACK
gathertime=0;
jointime=0;
commtime=0;
halogtime=0;
mergetime=0;
spintime=0;
gathermtime=0;
splicetime=0;
nosplicetime=0;
comms_bytes=0;
#endif
_npoints = npoints; _npoints = npoints;
_grid = grid; _grid = grid;
_directions = directions; _directions = directions;
@ -645,7 +623,6 @@
template<class compressor> template<class compressor>
void HaloExchange(const Lattice<vobj> &source,compressor &compress) void HaloExchange(const Lattice<vobj> &source,compressor &compress)
{ {
calls++;
Mergers.resize(0); Mergers.resize(0);
Packets.resize(0); Packets.resize(0);
HaloGather(source,compress); HaloGather(source,compress);

View File

@ -30,22 +30,22 @@ Author: neo <cossu@post.kek.jp>
#ifndef GRID_MATH_H #ifndef GRID_MATH_H
#define GRID_MATH_H #define GRID_MATH_H
#include <Grid/tensors/Tensor_traits.h> #include <tensors/Tensor_traits.h>
#include <Grid/tensors/Tensor_class.h> #include <tensors/Tensor_class.h>
#include <Grid/tensors/Tensor_arith.h> #include <tensors/Tensor_arith.h>
#include <Grid/tensors/Tensor_inner.h> #include <tensors/Tensor_inner.h>
#include <Grid/tensors/Tensor_outer.h> #include <tensors/Tensor_outer.h>
#include <Grid/tensors/Tensor_transpose.h> #include <tensors/Tensor_transpose.h>
#include <Grid/tensors/Tensor_trace.h> #include <tensors/Tensor_trace.h>
#include <Grid/tensors/Tensor_index.h> #include <tensors/Tensor_index.h>
#include <Grid/tensors/Tensor_Ta.h> #include <tensors/Tensor_Ta.h>
#include <Grid/tensors/Tensor_determinant.h> #include <tensors/Tensor_determinant.h>
#include <Grid/tensors/Tensor_exp.h> #include <tensors/Tensor_exp.h>
//#include <Grid/tensors/Tensor_peek.h> //#include <tensors/Tensor_peek.h>
//#include <Grid/tensors/Tensor_poke.h> //#include <tensors/Tensor_poke.h>
#include <Grid/tensors/Tensor_reality.h> #include <tensors/Tensor_reality.h>
#include <Grid/tensors/Tensor_unary.h> #include <tensors/Tensor_unary.h>
#include <Grid/tensors/Tensor_extract_merge.h> #include <tensors/Tensor_extract_merge.h>
#include <Grid/tensors/Tensor_logical.h> #include <tensors/Tensor_logical.h>
#endif #endif

View File

@ -37,11 +37,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
#ifdef GRID_OMP #ifdef GRID_OMP
#include <omp.h> #include <omp.h>
#ifdef GRID_NUMA #define PARALLEL_FOR_LOOP _Pragma("omp parallel for ")
#define PARALLEL_FOR_LOOP _Pragma("omp parallel for schedule(static)")
#else
#define PARALLEL_FOR_LOOP _Pragma("omp parallel for schedule(runtime)")
#endif
#define PARALLEL_NESTED_LOOP2 _Pragma("omp parallel for collapse(2)") #define PARALLEL_NESTED_LOOP2 _Pragma("omp parallel for collapse(2)")
#else #else
#define PARALLEL_FOR_LOOP #define PARALLEL_FOR_LOOP

View File

@ -31,6 +31,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
#ifndef GRID_ALGORITHM_COARSENED_MATRIX_H #ifndef GRID_ALGORITHM_COARSENED_MATRIX_H
#define GRID_ALGORITHM_COARSENED_MATRIX_H #define GRID_ALGORITHM_COARSENED_MATRIX_H
#include <Grid.h>
namespace Grid { namespace Grid {

View File

@ -28,6 +28,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
#ifndef GRID_ALGORITHM_SPARSE_MATRIX_H #ifndef GRID_ALGORITHM_SPARSE_MATRIX_H
#define GRID_ALGORITHM_SPARSE_MATRIX_H #define GRID_ALGORITHM_SPARSE_MATRIX_H
#include <Grid.h>
namespace Grid { namespace Grid {

View File

@ -29,7 +29,8 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
#ifndef GRID_CHEBYSHEV_H #ifndef GRID_CHEBYSHEV_H
#define GRID_CHEBYSHEV_H #define GRID_CHEBYSHEV_H
#include <Grid/algorithms/LinearOperator.h> #include<Grid.h>
#include<algorithms/LinearOperator.h>
namespace Grid { namespace Grid {

View File

@ -18,10 +18,10 @@
#include <stddef.h> #include <stddef.h>
#include <Config.h> #include <Config.h>
#ifdef HAVE_LIBGMP #ifdef HAVE_GMP_H
#include "bigfloat.h" #include <algorithms/approx/bigfloat.h>
#else #else
#include "bigfloat_double.h" #include <algorithms/approx/bigfloat_double.h>
#endif #endif
#define JMAX 10000 //Maximum number of iterations of Newton's approximation #define JMAX 10000 //Maximum number of iterations of Newton's approximation

View File

@ -1,168 +1,150 @@
/************************************************************************************* /*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/algorithms/iterative/ConjugateGradient.h Source file: ./lib/algorithms/iterative/ConjugateGradient.h
Copyright (C) 2015 Copyright (C) 2015
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk> Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk> Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: paboyle <paboyle@ph.ed.ac.uk> Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or the Free Software Foundation; either version 2 of the License, or
(at your option) any later version. (at your option) any later version.
This program is distributed in the hope that it will be useful, This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details. GNU General Public License for more details.
You should have received a copy of the GNU General Public License along You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc., with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution See the full license in the file "LICENSE" in the top level distribution directory
directory *************************************************************************************/
*************************************************************************************/ /* END LEGAL */
/* END LEGAL */
#ifndef GRID_CONJUGATE_GRADIENT_H #ifndef GRID_CONJUGATE_GRADIENT_H
#define GRID_CONJUGATE_GRADIENT_H #define GRID_CONJUGATE_GRADIENT_H
namespace Grid { namespace Grid {
///////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////
// Base classes for iterative processes based on operators // Base classes for iterative processes based on operators
// single input vec, single output vec. // single input vec, single output vec.
///////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////
template <class Field> template<class Field>
class ConjugateGradient : public OperatorFunction<Field> { class ConjugateGradient : public OperatorFunction<Field> {
public: public:
bool ErrorOnNoConverge; // throw an assert when the CG fails to converge. RealD Tolerance;
// Defaults true. Integer MaxIterations;
RealD Tolerance; ConjugateGradient(RealD tol,Integer maxit) : Tolerance(tol), MaxIterations(maxit) {
Integer MaxIterations; };
ConjugateGradient(RealD tol, Integer maxit, bool err_on_no_conv = true)
: Tolerance(tol),
MaxIterations(maxit),
ErrorOnNoConverge(err_on_no_conv){};
void operator()(LinearOperatorBase<Field> &Linop, const Field &src,
Field &psi) {
psi.checkerboard = src.checkerboard;
conformable(psi, src);
RealD cp, c, a, d, b, ssq, qq, b_pred;
Field p(src);
Field mmp(src);
Field r(src);
// Initial residual computation & set up
RealD guess = norm2(psi);
assert(std::isnan(guess) == 0);
Linop.HermOpAndNorm(psi, mmp, d, b); void operator() (LinearOperatorBase<Field> &Linop,const Field &src, Field &psi){
psi.checkerboard = src.checkerboard;
conformable(psi,src);
r = src - mmp; RealD cp,c,a,d,b,ssq,qq,b_pred;
p = r;
a = norm2(p); Field p(src);
cp = a; Field mmp(src);
ssq = norm2(src); Field r(src);
std::cout << GridLogIterative << std::setprecision(4) //Initial residual computation & set up
<< "ConjugateGradient: guess " << guess << std::endl; RealD guess = norm2(psi);
std::cout << GridLogIterative << std::setprecision(4) assert(std::isnan(guess)==0);
<< "ConjugateGradient: src " << ssq << std::endl;
std::cout << GridLogIterative << std::setprecision(4)
<< "ConjugateGradient: mp " << d << std::endl;
std::cout << GridLogIterative << std::setprecision(4)
<< "ConjugateGradient: mmp " << b << std::endl;
std::cout << GridLogIterative << std::setprecision(4)
<< "ConjugateGradient: cp,r " << cp << std::endl;
std::cout << GridLogIterative << std::setprecision(4)
<< "ConjugateGradient: p " << a << std::endl;
RealD rsq = Tolerance * Tolerance * ssq; Linop.HermOpAndNorm(psi,mmp,d,b);
// Check if guess is really REALLY good :) r= src-mmp;
if (cp <= rsq) { p= r;
return;
}
std::cout << GridLogIterative << std::setprecision(4) a =norm2(p);
<< "ConjugateGradient: k=0 residual " << cp << " target " << rsq cp =a;
<< std::endl; ssq=norm2(src);
GridStopWatch LinalgTimer; std::cout<<GridLogIterative <<std::setprecision(4)<< "ConjugateGradient: guess "<<guess<<std::endl;
GridStopWatch MatrixTimer; std::cout<<GridLogIterative <<std::setprecision(4)<< "ConjugateGradient: src "<<ssq <<std::endl;
GridStopWatch SolverTimer; std::cout<<GridLogIterative <<std::setprecision(4)<< "ConjugateGradient: mp "<<d <<std::endl;
std::cout<<GridLogIterative <<std::setprecision(4)<< "ConjugateGradient: mmp "<<b <<std::endl;
std::cout<<GridLogIterative <<std::setprecision(4)<< "ConjugateGradient: cp,r "<<cp <<std::endl;
std::cout<<GridLogIterative <<std::setprecision(4)<< "ConjugateGradient: p "<<a <<std::endl;
SolverTimer.Start(); RealD rsq = Tolerance* Tolerance*ssq;
int k;
for (k = 1; k <= MaxIterations; k++) {
c = cp;
MatrixTimer.Start(); //Check if guess is really REALLY good :)
Linop.HermOpAndNorm(p, mmp, d, qq); if ( cp <= rsq ) {
MatrixTimer.Stop(); return;
LinalgTimer.Start();
// RealD qqck = norm2(mmp);
// ComplexD dck = innerProduct(p,mmp);
a = c / d;
b_pred = a * (a * qq - d) / c;
cp = axpy_norm(r, -a, mmp, r);
b = cp / c;
// Fuse these loops ; should be really easy
psi = a * p + psi;
p = p * b + r;
LinalgTimer.Stop();
std::cout << GridLogIterative << "ConjugateGradient: Iteration " << k
<< " residual " << cp << " target " << rsq << std::endl;
// Stopping condition
if (cp <= rsq) {
SolverTimer.Stop();
Linop.HermOpAndNorm(psi, mmp, d, qq);
p = mmp - src;
RealD mmpnorm = sqrt(norm2(mmp));
RealD psinorm = sqrt(norm2(psi));
RealD srcnorm = sqrt(norm2(src));
RealD resnorm = sqrt(norm2(p));
RealD true_residual = resnorm / srcnorm;
std::cout << GridLogMessage
<< "ConjugateGradient: Converged on iteration " << k << std::endl;
std::cout << GridLogMessage << "Computed residual " << sqrt(cp / ssq)
<< " true residual " << true_residual << " target "
<< Tolerance << std::endl;
std::cout << GridLogMessage << "Time elapsed: Iterations "
<< SolverTimer.Elapsed() << " Matrix "
<< MatrixTimer.Elapsed() << " Linalg "
<< LinalgTimer.Elapsed();
std::cout << std::endl;
if (ErrorOnNoConverge) assert(true_residual / Tolerance < 1000.0);
return;
} }
std::cout<<GridLogIterative << std::setprecision(4)<< "ConjugateGradient: k=0 residual "<<cp<<" target "<<rsq<<std::endl;
GridStopWatch LinalgTimer;
GridStopWatch MatrixTimer;
GridStopWatch SolverTimer;
SolverTimer.Start();
int k;
for (k=1;k<=MaxIterations;k++){
c=cp;
MatrixTimer.Start();
Linop.HermOpAndNorm(p,mmp,d,qq);
MatrixTimer.Stop();
LinalgTimer.Start();
// RealD qqck = norm2(mmp);
// ComplexD dck = innerProduct(p,mmp);
a = c/d;
b_pred = a*(a*qq-d)/c;
cp = axpy_norm(r,-a,mmp,r);
b = cp/c;
// Fuse these loops ; should be really easy
psi= a*p+psi;
p = p*b+r;
LinalgTimer.Stop();
std::cout<<GridLogIterative<<"ConjugateGradient: Iteration " <<k<<" residual "<<cp<< " target "<< rsq<<std::endl;
// Stopping condition
if ( cp <= rsq ) {
SolverTimer.Stop();
Linop.HermOpAndNorm(psi,mmp,d,qq);
p=mmp-src;
RealD mmpnorm = sqrt(norm2(mmp));
RealD psinorm = sqrt(norm2(psi));
RealD srcnorm = sqrt(norm2(src));
RealD resnorm = sqrt(norm2(p));
RealD true_residual = resnorm/srcnorm;
std::cout<<GridLogMessage<<"ConjugateGradient: Converged on iteration " <<k
<<" computed residual "<<sqrt(cp/ssq)
<<" true residual " <<true_residual
<<" target "<<Tolerance<<std::endl;
std::cout<<GridLogMessage<<"Time elapsed: Total "<< SolverTimer.Elapsed() << " Matrix "<<MatrixTimer.Elapsed() << " Linalg "<<LinalgTimer.Elapsed();
std::cout<<std::endl;
assert(true_residual/Tolerance < 1000.0);
return;
}
}
std::cout<<GridLogMessage<<"ConjugateGradient did NOT converge"<<std::endl;
assert(0);
} }
std::cout << GridLogMessage << "ConjugateGradient did NOT converge" };
<< std::endl;
if (ErrorOnNoConverge) assert(0);
}
};
} }
#endif #endif

View File

@ -1,142 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/algorithms/iterative/ConjugateGradientMixedPrec.h
Copyright (C) 2015
Author: Christopher Kelly <ckelly@phys.columbia.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_CONJUGATE_GRADIENT_MIXED_PREC_H
#define GRID_CONJUGATE_GRADIENT_MIXED_PREC_H
namespace Grid {
//Mixed precision restarted defect correction CG
template<class FieldD,class FieldF, typename std::enable_if< getPrecision<FieldD>::value == 2, int>::type = 0,typename std::enable_if< getPrecision<FieldF>::value == 1, int>::type = 0>
class MixedPrecisionConjugateGradient : public LinearFunction<FieldD> {
public:
RealD Tolerance;
Integer MaxInnerIterations;
Integer MaxOuterIterations;
GridBase* SinglePrecGrid; //Grid for single-precision fields
RealD OuterLoopNormMult; //Stop the outer loop and move to a final double prec solve when the residual is OuterLoopNormMult * Tolerance
LinearOperatorBase<FieldF> &Linop_f;
LinearOperatorBase<FieldD> &Linop_d;
//Option to speed up *inner single precision* solves using a LinearFunction that produces a guess
LinearFunction<FieldF> *guesser;
MixedPrecisionConjugateGradient(RealD tol, Integer maxinnerit, Integer maxouterit, GridBase* _sp_grid, LinearOperatorBase<FieldF> &_Linop_f, LinearOperatorBase<FieldD> &_Linop_d) :
Linop_f(_Linop_f), Linop_d(_Linop_d),
Tolerance(tol), MaxInnerIterations(maxinnerit), MaxOuterIterations(maxouterit), SinglePrecGrid(_sp_grid),
OuterLoopNormMult(100.), guesser(NULL){ };
void useGuesser(LinearFunction<FieldF> &g){
guesser = &g;
}
void operator() (const FieldD &src_d_in, FieldD &sol_d){
GridStopWatch TotalTimer;
TotalTimer.Start();
int cb = src_d_in.checkerboard;
sol_d.checkerboard = cb;
RealD src_norm = norm2(src_d_in);
RealD stop = src_norm * Tolerance*Tolerance;
GridBase* DoublePrecGrid = src_d_in._grid;
FieldD tmp_d(DoublePrecGrid);
tmp_d.checkerboard = cb;
FieldD tmp2_d(DoublePrecGrid);
tmp2_d.checkerboard = cb;
FieldD src_d(DoublePrecGrid);
src_d = src_d_in; //source for next inner iteration, computed from residual during operation
RealD inner_tol = Tolerance;
FieldF src_f(SinglePrecGrid);
src_f.checkerboard = cb;
FieldF sol_f(SinglePrecGrid);
sol_f.checkerboard = cb;
ConjugateGradient<FieldF> CG_f(inner_tol, MaxInnerIterations);
CG_f.ErrorOnNoConverge = false;
GridStopWatch InnerCGtimer;
GridStopWatch PrecChangeTimer;
for(Integer outer_iter = 0; outer_iter < MaxOuterIterations; outer_iter++){
//Compute double precision rsd and also new RHS vector.
Linop_d.HermOp(sol_d, tmp_d);
RealD norm = axpy_norm(src_d, -1., tmp_d, src_d_in); //src_d is residual vector
std::cout<<GridLogMessage<<"MixedPrecisionConjugateGradient: Outer iteration " <<outer_iter<<" residual "<< norm<< " target "<< stop<<std::endl;
if(norm < OuterLoopNormMult * stop){
std::cout<<GridLogMessage<<"MixedPrecisionConjugateGradient: Outer iteration converged on iteration " <<outer_iter <<std::endl;
break;
}
while(norm * inner_tol * inner_tol < stop) inner_tol *= 2; // inner_tol = sqrt(stop/norm) ??
PrecChangeTimer.Start();
precisionChange(src_f, src_d);
PrecChangeTimer.Stop();
zeroit(sol_f);
//Optionally improve inner solver guess (eg using known eigenvectors)
if(guesser != NULL)
(*guesser)(src_f, sol_f);
//Inner CG
CG_f.Tolerance = inner_tol;
InnerCGtimer.Start();
CG_f(Linop_f, src_f, sol_f);
InnerCGtimer.Stop();
//Convert sol back to double and add to double prec solution
PrecChangeTimer.Start();
precisionChange(tmp_d, sol_f);
PrecChangeTimer.Stop();
axpy(sol_d, 1.0, tmp_d, sol_d);
}
//Final trial CG
std::cout<<GridLogMessage<<"MixedPrecisionConjugateGradient: Starting final patch-up double-precision solve"<<std::endl;
ConjugateGradient<FieldD> CG_d(Tolerance, MaxInnerIterations);
CG_d(Linop_d, src_d_in, sol_d);
TotalTimer.Stop();
std::cout<<GridLogMessage<<"MixedPrecisionConjugateGradient: Total " << TotalTimer.Elapsed() << " Precision change " << PrecChangeTimer.Elapsed() << " Inner CG total " << InnerCGtimer.Elapsed() << std::endl;
}
};
}
#endif

View File

@ -130,8 +130,8 @@ DenseMatrix<T> GetSubMtx(DenseMatrix<T> &A,int row_st, int row_end, int col_st,
} }
#include "Householder.h" #include <algorithms/iterative/Householder.h>
#include "Francis.h" #include <algorithms/iterative/Francis.h>
#endif #endif

View File

@ -33,8 +33,8 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
#ifdef USE_LAPACK #ifdef USE_LAPACK
#include <lapacke.h> #include <lapacke.h>
#endif #endif
#include "DenseMatrix.h" #include <algorithms/iterative/DenseMatrix.h>
#include "EigenSort.h" #include <algorithms/iterative/EigenSort.h>
namespace Grid { namespace Grid {

View File

@ -29,6 +29,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
#ifndef GRID_CARTESIAN_BASE_H #ifndef GRID_CARTESIAN_BASE_H
#define GRID_CARTESIAN_BASE_H #define GRID_CARTESIAN_BASE_H
#include <Grid.h>
namespace Grid{ namespace Grid{
@ -81,8 +82,11 @@ public:
virtual int CheckerBoardDestination(int source_cb,int shift,int dim)=0; virtual int CheckerBoardDestination(int source_cb,int shift,int dim)=0;
virtual int CheckerBoardShift(int source_cb,int dim,int shift,int osite)=0; virtual int CheckerBoardShift(int source_cb,int dim,int shift,int osite)=0;
virtual int CheckerBoardShiftForCB(int source_cb,int dim,int shift,int cb)=0; virtual int CheckerBoardShiftForCB(int source_cb,int dim,int shift,int cb)=0;
virtual int CheckerBoardFromOindex (int Oindex)=0; int CheckerBoardFromOindex (int Oindex){
virtual int CheckerBoardFromOindexTable (int Oindex)=0; std::vector<int> ocoor;
oCoorFromOindex(ocoor,Oindex);
return CheckerBoard(ocoor);
}
////////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////////
// Local layout calculations // Local layout calculations
@ -103,12 +107,6 @@ public:
for(int d=0;d<_ndimension;d++) idx+=_ostride[d]*(coor[d]%_rdimensions[d]); for(int d=0;d<_ndimension;d++) idx+=_ostride[d]*(coor[d]%_rdimensions[d]);
return idx; return idx;
} }
virtual int iIndex(std::vector<int> &lcoor)
{
int idx=0;
for(int d=0;d<_ndimension;d++) idx+=_istride[d]*(lcoor[d]/_rdimensions[d]);
return idx;
}
inline int oIndexReduced(std::vector<int> &ocoor) inline int oIndexReduced(std::vector<int> &ocoor)
{ {
int idx=0; int idx=0;
@ -125,6 +123,12 @@ public:
////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////
// SIMD lane addressing // SIMD lane addressing
////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////
inline int iIndex(std::vector<int> &lcoor)
{
int idx=0;
for(int d=0;d<_ndimension;d++) idx+=_istride[d]*(lcoor[d]/_rdimensions[d]);
return idx;
}
inline void iCoorFromIindex(std::vector<int> &coor,int lane) inline void iCoorFromIindex(std::vector<int> &coor,int lane)
{ {
Lexicographic::CoorFromIndex(coor,lane,_simd_layout); Lexicographic::CoorFromIndex(coor,lane,_simd_layout);
@ -216,7 +220,7 @@ public:
} }
i_idx= iIndex(cblcoor);// this does not imply divide by 2 on checker dim i_idx= iIndex(cblcoor);// this does not imply divide by 2 on checker dim
o_idx= oIndex(lcoor); // this implies divide by 2 on checkerdim o_idx= oIndex(lcoor);// this implies divide by 2 on checkerdim
} }
void RankIndexToGlobalCoor(int rank, int o_idx, int i_idx , std::vector<int> &gcoor) void RankIndexToGlobalCoor(int rank, int o_idx, int i_idx , std::vector<int> &gcoor)

View File

@ -39,13 +39,6 @@ class GridCartesian: public GridBase {
public: public:
virtual int CheckerBoardFromOindexTable (int Oindex) {
return 0;
}
virtual int CheckerBoardFromOindex (int Oindex)
{
return 0;
}
virtual int CheckerBoarded(int dim){ virtual int CheckerBoarded(int dim){
return 0; return 0;
} }

View File

@ -32,10 +32,16 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
namespace Grid { namespace Grid {
static const int CbRed =0; static const int CbRed =0;
static const int CbBlack=1; static const int CbBlack=1;
static const int Even =CbRed; static const int Even =CbRed;
static const int Odd =CbBlack; static const int Odd =CbBlack;
// Perhaps these are misplaced and
// should be in sparse matrix.
// Also should make these a named enum type
static const int DaggerNo=0;
static const int DaggerYes=1;
// Specialise this for red black grids storing half the data like a chess board. // Specialise this for red black grids storing half the data like a chess board.
class GridRedBlackCartesian : public GridBase class GridRedBlackCartesian : public GridBase
@ -43,7 +49,6 @@ class GridRedBlackCartesian : public GridBase
public: public:
std::vector<int> _checker_dim_mask; std::vector<int> _checker_dim_mask;
int _checker_dim; int _checker_dim;
std::vector<int> _checker_board;
virtual int CheckerBoarded(int dim){ virtual int CheckerBoarded(int dim){
if( dim==_checker_dim) return 1; if( dim==_checker_dim) return 1;
@ -73,20 +78,12 @@ public:
// or by looping over x,y,z and multiply rather than computing checkerboard. // or by looping over x,y,z and multiply rather than computing checkerboard.
if ( (source_cb+ocb)&1 ) { if ( (source_cb+ocb)&1 ) {
return (shift)/2; return (shift)/2;
} else { } else {
return (shift+1)/2; return (shift+1)/2;
} }
} }
virtual int CheckerBoardFromOindexTable (int Oindex) {
return _checker_board[Oindex];
}
virtual int CheckerBoardFromOindex (int Oindex)
{
std::vector<int> ocoor;
oCoorFromOindex(ocoor,Oindex);
return CheckerBoard(ocoor);
}
virtual int CheckerBoardShift(int source_cb,int dim,int shift,int osite){ virtual int CheckerBoardShift(int source_cb,int dim,int shift,int osite){
if(dim != _checker_dim) return shift; if(dim != _checker_dim) return shift;
@ -178,7 +175,7 @@ public:
// all elements of a simd vector must have same checkerboard. // all elements of a simd vector must have same checkerboard.
// If Ls vectorised, this must still be the case; e.g. dwf rb5d // If Ls vectorised, this must still be the case; e.g. dwf rb5d
if ( _simd_layout[d]>1 ) { if ( _simd_layout[d]>1 ) {
if ( checker_dim_mask[d] ) { if ( d != _checker_dim ) {
assert( (_rdimensions[d]&0x1) == 0 ); assert( (_rdimensions[d]&0x1) == 0 );
} }
} }
@ -194,8 +191,6 @@ public:
_ostride[d] = _ostride[d-1]*_rdimensions[d-1]; _ostride[d] = _ostride[d-1]*_rdimensions[d-1];
_istride[d] = _istride[d-1]*_simd_layout[d-1]; _istride[d] = _istride[d-1]*_simd_layout[d-1];
} }
} }
//////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////
@ -217,18 +212,6 @@ public:
block = block*_rdimensions[d]; block = block*_rdimensions[d];
} }
////////////////////////////////////////////////
// Create a checkerboard lookup table
////////////////////////////////////////////////
int rvol = 1;
for(int d=0;d<_ndimension;d++){
rvol=rvol * _rdimensions[d];
}
_checker_board.resize(rvol);
for(int osite=0;osite<_osites;osite++){
_checker_board[osite] = CheckerBoardFromOindex (osite);
}
}; };
protected: protected:
virtual int oIndex(std::vector<int> &coor) virtual int oIndex(std::vector<int> &coor)
@ -241,21 +224,9 @@ protected:
idx+=_ostride[d]*(coor[d]%_rdimensions[d]); idx+=_ostride[d]*(coor[d]%_rdimensions[d]);
} }
} }
return idx; return idx;
}; };
virtual int iIndex(std::vector<int> &lcoor)
{
int idx=0;
for(int d=0;d<_ndimension;d++) {
if( d==_checker_dim ) {
idx+=_istride[d]*(lcoor[d]/(2*_rdimensions[d]));
} else {
idx+=_istride[d]*(lcoor[d]/_rdimensions[d]);
}
}
return idx;
}
}; };
} }

View File

@ -1,4 +1,3 @@
/************************************************************************************* /*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid Grid physics library, www.github.com/paboyle/Grid
@ -57,7 +56,6 @@ Gather_plane_simple (const Lattice<vobj> &rhs,std::vector<cobj,alignedAllocator<
int e1=rhs._grid->_slice_nblock[dimension]; int e1=rhs._grid->_slice_nblock[dimension];
int e2=rhs._grid->_slice_block[dimension]; int e2=rhs._grid->_slice_block[dimension];
int stride=rhs._grid->_slice_stride[dimension]; int stride=rhs._grid->_slice_stride[dimension];
if ( cbmask == 0x3 ) { if ( cbmask == 0x3 ) {
PARALLEL_NESTED_LOOP2 PARALLEL_NESTED_LOOP2
@ -70,20 +68,15 @@ PARALLEL_NESTED_LOOP2
} }
} else { } else {
int bo=0; int bo=0;
std::vector<std::pair<int,int> > table;
for(int n=0;n<e1;n++){ for(int n=0;n<e1;n++){
for(int b=0;b<e2;b++){ for(int b=0;b<e2;b++){
int o = n*stride; int o = n*stride;
int ocb=1<<rhs._grid->CheckerBoardFromOindexTable(o+b); int ocb=1<<rhs._grid->CheckerBoardFromOindex(o+b);// Could easily be a table lookup
if ( ocb &cbmask ) { if ( ocb &cbmask ) {
table.push_back(std::pair<int,int> (bo++,o+b)); buffer[off+bo++]=compress(rhs._odata[so+o+b]);
} }
} }
} }
PARALLEL_FOR_LOOP
for(int i=0;i<table.size();i++){
buffer[off+table[i].first]=compress(rhs._odata[so+table[i].second]);
}
} }
} }

View File

@ -1,412 +0,0 @@
/*
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
*
* The following statement of license applies *only* to this header file,
* and *not* to the other files distributed with FFTW or derived therefrom:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/***************************** NOTE TO USERS *********************************
*
* THIS IS A HEADER FILE, NOT A MANUAL
*
* If you want to know how to use FFTW, please read the manual,
* online at http://www.fftw.org/doc/ and also included with FFTW.
* For a quick start, see the manual's tutorial section.
*
* (Reading header files to learn how to use a library is a habit
* stemming from code lacking a proper manual. Arguably, it's a
* *bad* habit in most cases, because header files can contain
* interfaces that are not part of the public, stable API.)
*
****************************************************************************/
#ifndef FFTW3_H
#define FFTW3_H
#include <stdio.h>
#ifdef __cplusplus
extern "C"
{
#endif /* __cplusplus */
/* If <complex.h> is included, use the C99 complex type. Otherwise
define a type bit-compatible with C99 complex */
#if !defined(FFTW_NO_Complex) && defined(_Complex_I) && defined(complex) && defined(I)
# define FFTW_DEFINE_COMPLEX(R, C) typedef R _Complex C
#else
# define FFTW_DEFINE_COMPLEX(R, C) typedef R C[2]
#endif
#define FFTW_CONCAT(prefix, name) prefix ## name
#define FFTW_MANGLE_DOUBLE(name) FFTW_CONCAT(fftw_, name)
#define FFTW_MANGLE_FLOAT(name) FFTW_CONCAT(fftwf_, name)
#define FFTW_MANGLE_LONG_DOUBLE(name) FFTW_CONCAT(fftwl_, name)
#define FFTW_MANGLE_QUAD(name) FFTW_CONCAT(fftwq_, name)
/* IMPORTANT: for Windows compilers, you should add a line
#define FFTW_DLL
here and in kernel/ifftw.h if you are compiling/using FFTW as a
DLL, in order to do the proper importing/exporting, or
alternatively compile with -DFFTW_DLL or the equivalent
command-line flag. This is not necessary under MinGW/Cygwin, where
libtool does the imports/exports automatically. */
#if defined(FFTW_DLL) && (defined(_WIN32) || defined(__WIN32__))
/* annoying Windows syntax for shared-library declarations */
# if defined(COMPILING_FFTW) /* defined in api.h when compiling FFTW */
# define FFTW_EXTERN extern __declspec(dllexport)
# else /* user is calling FFTW; import symbol */
# define FFTW_EXTERN extern __declspec(dllimport)
# endif
#else
# define FFTW_EXTERN extern
#endif
enum fftw_r2r_kind_do_not_use_me {
FFTW_R2HC=0, FFTW_HC2R=1, FFTW_DHT=2,
FFTW_REDFT00=3, FFTW_REDFT01=4, FFTW_REDFT10=5, FFTW_REDFT11=6,
FFTW_RODFT00=7, FFTW_RODFT01=8, FFTW_RODFT10=9, FFTW_RODFT11=10
};
struct fftw_iodim_do_not_use_me {
int n; /* dimension size */
int is; /* input stride */
int os; /* output stride */
};
#include <stddef.h> /* for ptrdiff_t */
struct fftw_iodim64_do_not_use_me {
ptrdiff_t n; /* dimension size */
ptrdiff_t is; /* input stride */
ptrdiff_t os; /* output stride */
};
typedef void (*fftw_write_char_func_do_not_use_me)(char c, void *);
typedef int (*fftw_read_char_func_do_not_use_me)(void *);
/*
huge second-order macro that defines prototypes for all API
functions. We expand this macro for each supported precision
X: name-mangling macro
R: real data type
C: complex data type
*/
#define FFTW_DEFINE_API(X, R, C) \
\
FFTW_DEFINE_COMPLEX(R, C); \
\
typedef struct X(plan_s) *X(plan); \
\
typedef struct fftw_iodim_do_not_use_me X(iodim); \
typedef struct fftw_iodim64_do_not_use_me X(iodim64); \
\
typedef enum fftw_r2r_kind_do_not_use_me X(r2r_kind); \
\
typedef fftw_write_char_func_do_not_use_me X(write_char_func); \
typedef fftw_read_char_func_do_not_use_me X(read_char_func); \
\
FFTW_EXTERN void X(execute)(const X(plan) p); \
\
FFTW_EXTERN X(plan) X(plan_dft)(int rank, const int *n, \
C *in, C *out, int sign, unsigned flags); \
\
FFTW_EXTERN X(plan) X(plan_dft_1d)(int n, C *in, C *out, int sign, \
unsigned flags); \
FFTW_EXTERN X(plan) X(plan_dft_2d)(int n0, int n1, \
C *in, C *out, int sign, unsigned flags); \
FFTW_EXTERN X(plan) X(plan_dft_3d)(int n0, int n1, int n2, \
C *in, C *out, int sign, unsigned flags); \
\
FFTW_EXTERN X(plan) X(plan_many_dft)(int rank, const int *n, \
int howmany, \
C *in, const int *inembed, \
int istride, int idist, \
C *out, const int *onembed, \
int ostride, int odist, \
int sign, unsigned flags); \
\
FFTW_EXTERN X(plan) X(plan_guru_dft)(int rank, const X(iodim) *dims, \
int howmany_rank, \
const X(iodim) *howmany_dims, \
C *in, C *out, \
int sign, unsigned flags); \
FFTW_EXTERN X(plan) X(plan_guru_split_dft)(int rank, const X(iodim) *dims, \
int howmany_rank, \
const X(iodim) *howmany_dims, \
R *ri, R *ii, R *ro, R *io, \
unsigned flags); \
\
FFTW_EXTERN X(plan) X(plan_guru64_dft)(int rank, \
const X(iodim64) *dims, \
int howmany_rank, \
const X(iodim64) *howmany_dims, \
C *in, C *out, \
int sign, unsigned flags); \
FFTW_EXTERN X(plan) X(plan_guru64_split_dft)(int rank, \
const X(iodim64) *dims, \
int howmany_rank, \
const X(iodim64) *howmany_dims, \
R *ri, R *ii, R *ro, R *io, \
unsigned flags); \
\
FFTW_EXTERN void X(execute_dft)(const X(plan) p, C *in, C *out); \
FFTW_EXTERN void X(execute_split_dft)(const X(plan) p, R *ri, R *ii, \
R *ro, R *io); \
\
FFTW_EXTERN X(plan) X(plan_many_dft_r2c)(int rank, const int *n, \
int howmany, \
R *in, const int *inembed, \
int istride, int idist, \
C *out, const int *onembed, \
int ostride, int odist, \
unsigned flags); \
\
FFTW_EXTERN X(plan) X(plan_dft_r2c)(int rank, const int *n, \
R *in, C *out, unsigned flags); \
\
FFTW_EXTERN X(plan) X(plan_dft_r2c_1d)(int n,R *in,C *out,unsigned flags); \
FFTW_EXTERN X(plan) X(plan_dft_r2c_2d)(int n0, int n1, \
R *in, C *out, unsigned flags); \
FFTW_EXTERN X(plan) X(plan_dft_r2c_3d)(int n0, int n1, \
int n2, \
R *in, C *out, unsigned flags); \
\
\
FFTW_EXTERN X(plan) X(plan_many_dft_c2r)(int rank, const int *n, \
int howmany, \
C *in, const int *inembed, \
int istride, int idist, \
R *out, const int *onembed, \
int ostride, int odist, \
unsigned flags); \
\
FFTW_EXTERN X(plan) X(plan_dft_c2r)(int rank, const int *n, \
C *in, R *out, unsigned flags); \
\
FFTW_EXTERN X(plan) X(plan_dft_c2r_1d)(int n,C *in,R *out,unsigned flags); \
FFTW_EXTERN X(plan) X(plan_dft_c2r_2d)(int n0, int n1, \
C *in, R *out, unsigned flags); \
FFTW_EXTERN X(plan) X(plan_dft_c2r_3d)(int n0, int n1, \
int n2, \
C *in, R *out, unsigned flags); \
\
FFTW_EXTERN X(plan) X(plan_guru_dft_r2c)(int rank, const X(iodim) *dims, \
int howmany_rank, \
const X(iodim) *howmany_dims, \
R *in, C *out, \
unsigned flags); \
FFTW_EXTERN X(plan) X(plan_guru_dft_c2r)(int rank, const X(iodim) *dims, \
int howmany_rank, \
const X(iodim) *howmany_dims, \
C *in, R *out, \
unsigned flags); \
\
FFTW_EXTERN X(plan) X(plan_guru_split_dft_r2c)( \
int rank, const X(iodim) *dims, \
int howmany_rank, \
const X(iodim) *howmany_dims, \
R *in, R *ro, R *io, \
unsigned flags); \
FFTW_EXTERN X(plan) X(plan_guru_split_dft_c2r)( \
int rank, const X(iodim) *dims, \
int howmany_rank, \
const X(iodim) *howmany_dims, \
R *ri, R *ii, R *out, \
unsigned flags); \
\
FFTW_EXTERN X(plan) X(plan_guru64_dft_r2c)(int rank, \
const X(iodim64) *dims, \
int howmany_rank, \
const X(iodim64) *howmany_dims, \
R *in, C *out, \
unsigned flags); \
FFTW_EXTERN X(plan) X(plan_guru64_dft_c2r)(int rank, \
const X(iodim64) *dims, \
int howmany_rank, \
const X(iodim64) *howmany_dims, \
C *in, R *out, \
unsigned flags); \
\
FFTW_EXTERN X(plan) X(plan_guru64_split_dft_r2c)( \
int rank, const X(iodim64) *dims, \
int howmany_rank, \
const X(iodim64) *howmany_dims, \
R *in, R *ro, R *io, \
unsigned flags); \
FFTW_EXTERN X(plan) X(plan_guru64_split_dft_c2r)( \
int rank, const X(iodim64) *dims, \
int howmany_rank, \
const X(iodim64) *howmany_dims, \
R *ri, R *ii, R *out, \
unsigned flags); \
\
FFTW_EXTERN void X(execute_dft_r2c)(const X(plan) p, R *in, C *out); \
FFTW_EXTERN void X(execute_dft_c2r)(const X(plan) p, C *in, R *out); \
\
FFTW_EXTERN void X(execute_split_dft_r2c)(const X(plan) p, \
R *in, R *ro, R *io); \
FFTW_EXTERN void X(execute_split_dft_c2r)(const X(plan) p, \
R *ri, R *ii, R *out); \
\
FFTW_EXTERN X(plan) X(plan_many_r2r)(int rank, const int *n, \
int howmany, \
R *in, const int *inembed, \
int istride, int idist, \
R *out, const int *onembed, \
int ostride, int odist, \
const X(r2r_kind) *kind, unsigned flags); \
\
FFTW_EXTERN X(plan) X(plan_r2r)(int rank, const int *n, R *in, R *out, \
const X(r2r_kind) *kind, unsigned flags); \
\
FFTW_EXTERN X(plan) X(plan_r2r_1d)(int n, R *in, R *out, \
X(r2r_kind) kind, unsigned flags); \
FFTW_EXTERN X(plan) X(plan_r2r_2d)(int n0, int n1, R *in, R *out, \
X(r2r_kind) kind0, X(r2r_kind) kind1, \
unsigned flags); \
FFTW_EXTERN X(plan) X(plan_r2r_3d)(int n0, int n1, int n2, \
R *in, R *out, X(r2r_kind) kind0, \
X(r2r_kind) kind1, X(r2r_kind) kind2, \
unsigned flags); \
\
FFTW_EXTERN X(plan) X(plan_guru_r2r)(int rank, const X(iodim) *dims, \
int howmany_rank, \
const X(iodim) *howmany_dims, \
R *in, R *out, \
const X(r2r_kind) *kind, unsigned flags); \
\
FFTW_EXTERN X(plan) X(plan_guru64_r2r)(int rank, const X(iodim64) *dims, \
int howmany_rank, \
const X(iodim64) *howmany_dims, \
R *in, R *out, \
const X(r2r_kind) *kind, unsigned flags); \
\
FFTW_EXTERN void X(execute_r2r)(const X(plan) p, R *in, R *out); \
\
FFTW_EXTERN void X(destroy_plan)(X(plan) p); \
FFTW_EXTERN void X(forget_wisdom)(void); \
FFTW_EXTERN void X(cleanup)(void); \
\
FFTW_EXTERN void X(set_timelimit)(double t); \
\
FFTW_EXTERN void X(plan_with_nthreads)(int nthreads); \
FFTW_EXTERN int X(init_threads)(void); \
FFTW_EXTERN void X(cleanup_threads)(void); \
\
FFTW_EXTERN int X(export_wisdom_to_filename)(const char *filename); \
FFTW_EXTERN void X(export_wisdom_to_file)(FILE *output_file); \
FFTW_EXTERN char *X(export_wisdom_to_string)(void); \
FFTW_EXTERN void X(export_wisdom)(X(write_char_func) write_char, \
void *data); \
FFTW_EXTERN int X(import_system_wisdom)(void); \
FFTW_EXTERN int X(import_wisdom_from_filename)(const char *filename); \
FFTW_EXTERN int X(import_wisdom_from_file)(FILE *input_file); \
FFTW_EXTERN int X(import_wisdom_from_string)(const char *input_string); \
FFTW_EXTERN int X(import_wisdom)(X(read_char_func) read_char, void *data); \
\
FFTW_EXTERN void X(fprint_plan)(const X(plan) p, FILE *output_file); \
FFTW_EXTERN void X(print_plan)(const X(plan) p); \
FFTW_EXTERN char *X(sprint_plan)(const X(plan) p); \
\
FFTW_EXTERN void *X(malloc)(size_t n); \
FFTW_EXTERN R *X(alloc_real)(size_t n); \
FFTW_EXTERN C *X(alloc_complex)(size_t n); \
FFTW_EXTERN void X(free)(void *p); \
\
FFTW_EXTERN void X(flops)(const X(plan) p, \
double *add, double *mul, double *fmas); \
FFTW_EXTERN double X(estimate_cost)(const X(plan) p); \
FFTW_EXTERN double X(cost)(const X(plan) p); \
\
FFTW_EXTERN int X(alignment_of)(R *p); \
FFTW_EXTERN const char X(version)[]; \
FFTW_EXTERN const char X(cc)[]; \
FFTW_EXTERN const char X(codelet_optim)[];
/* end of FFTW_DEFINE_API macro */
FFTW_DEFINE_API(FFTW_MANGLE_DOUBLE, double, fftw_complex)
FFTW_DEFINE_API(FFTW_MANGLE_FLOAT, float, fftwf_complex)
FFTW_DEFINE_API(FFTW_MANGLE_LONG_DOUBLE, long double, fftwl_complex)
/* __float128 (quad precision) is a gcc extension on i386, x86_64, and ia64
for gcc >= 4.6 (compiled in FFTW with --enable-quad-precision) */
#if (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)) \
&& !(defined(__ICC) || defined(__INTEL_COMPILER)) \
&& (defined(__i386__) || defined(__x86_64__) || defined(__ia64__))
# if !defined(FFTW_NO_Complex) && defined(_Complex_I) && defined(complex) && defined(I)
/* note: __float128 is a typedef, which is not supported with the _Complex
keyword in gcc, so instead we use this ugly __attribute__ version.
However, we can't simply pass the __attribute__ version to
FFTW_DEFINE_API because the __attribute__ confuses gcc in pointer
types. Hence redefining FFTW_DEFINE_COMPLEX. Ugh. */
# undef FFTW_DEFINE_COMPLEX
# define FFTW_DEFINE_COMPLEX(R, C) typedef _Complex float __attribute__((mode(TC))) C
# endif
FFTW_DEFINE_API(FFTW_MANGLE_QUAD, __float128, fftwq_complex)
#endif
#define FFTW_FORWARD (-1)
#define FFTW_BACKWARD (+1)
#define FFTW_NO_TIMELIMIT (-1.0)
/* documented flags */
#define FFTW_MEASURE (0U)
#define FFTW_DESTROY_INPUT (1U << 0)
#define FFTW_UNALIGNED (1U << 1)
#define FFTW_CONSERVE_MEMORY (1U << 2)
#define FFTW_EXHAUSTIVE (1U << 3) /* NO_EXHAUSTIVE is default */
#define FFTW_PRESERVE_INPUT (1U << 4) /* cancels FFTW_DESTROY_INPUT */
#define FFTW_PATIENT (1U << 5) /* IMPATIENT is default */
#define FFTW_ESTIMATE (1U << 6)
#define FFTW_WISDOM_ONLY (1U << 21)
/* undocumented beyond-guru flags */
#define FFTW_ESTIMATE_PATIENT (1U << 7)
#define FFTW_BELIEVE_PCOST (1U << 8)
#define FFTW_NO_DFT_R2HC (1U << 9)
#define FFTW_NO_NONTHREADED (1U << 10)
#define FFTW_NO_BUFFERING (1U << 11)
#define FFTW_NO_INDIRECT_OP (1U << 12)
#define FFTW_ALLOW_LARGE_GENERIC (1U << 13) /* NO_LARGE_GENERIC is default */
#define FFTW_NO_RANK_SPLITS (1U << 14)
#define FFTW_NO_VRANK_SPLITS (1U << 15)
#define FFTW_NO_VRECURSE (1U << 16)
#define FFTW_NO_SIMD (1U << 17)
#define FFTW_NO_SLOW (1U << 18)
#define FFTW_NO_FIXED_RADIX_LARGE_N (1U << 19)
#define FFTW_ALLOW_PRUNING (1U << 20)
#ifdef __cplusplus
} /* extern "C" */
#endif /* __cplusplus */
#endif /* FFTW3_H */

View File

@ -1,74 +1,73 @@
/************************************************************************************* /*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/lattice/Lattice_ET.h Source file: ./lib/lattice/Lattice_ET.h
Copyright (C) 2015 Copyright (C) 2015
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk> Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk> Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: neo <cossu@post.kek.jp> Author: neo <cossu@post.kek.jp>
This program is free software; you can redistribute it and/or modify This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or the Free Software Foundation; either version 2 of the License, or
(at your option) any later version. (at your option) any later version.
This program is distributed in the hope that it will be useful, This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details. GNU General Public License for more details.
You should have received a copy of the GNU General Public License along You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc., with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution See the full license in the file "LICENSE" in the top level distribution directory
directory *************************************************************************************/
*************************************************************************************/ /* END LEGAL */
/* END LEGAL */
#ifndef GRID_LATTICE_ET_H #ifndef GRID_LATTICE_ET_H
#define GRID_LATTICE_ET_H #define GRID_LATTICE_ET_H
#include <iostream> #include <iostream>
#include <vector>
#include <tuple> #include <tuple>
#include <typeinfo> #include <typeinfo>
#include <vector>
namespace Grid { namespace Grid {
//////////////////////////////////////////////////// ////////////////////////////////////////////////////
// Predicated where support // Predicated where support
//////////////////////////////////////////////////// ////////////////////////////////////////////////////
template <class iobj, class vobj, class robj> template<class iobj,class vobj,class robj>
inline vobj predicatedWhere(const iobj &predicate, const vobj &iftrue, inline vobj predicatedWhere(const iobj &predicate,const vobj &iftrue,const robj &iffalse) {
const robj &iffalse) {
typename std::remove_const<vobj>::type ret;
typedef typename vobj::scalar_object scalar_object; typename std::remove_const<vobj>::type ret;
typedef typename vobj::scalar_type scalar_type;
typedef typename vobj::vector_type vector_type;
const int Nsimd = vobj::vector_type::Nsimd(); typedef typename vobj::scalar_object scalar_object;
const int words = sizeof(vobj) / sizeof(vector_type); typedef typename vobj::scalar_type scalar_type;
typedef typename vobj::vector_type vector_type;
std::vector<Integer> mask(Nsimd); const int Nsimd = vobj::vector_type::Nsimd();
std::vector<scalar_object> truevals(Nsimd); const int words = sizeof(vobj)/sizeof(vector_type);
std::vector<scalar_object> falsevals(Nsimd);
extract(iftrue, truevals); std::vector<Integer> mask(Nsimd);
extract(iffalse, falsevals); std::vector<scalar_object> truevals (Nsimd);
extract<vInteger, Integer>(TensorRemove(predicate), mask); std::vector<scalar_object> falsevals(Nsimd);
for (int s = 0; s < Nsimd; s++) { extract(iftrue ,truevals);
if (mask[s]) falsevals[s] = truevals[s]; extract(iffalse ,falsevals);
extract<vInteger,Integer>(TensorRemove(predicate),mask);
for(int s=0;s<Nsimd;s++){
if (mask[s]) falsevals[s]=truevals[s];
}
merge(ret,falsevals);
return ret;
} }
merge(ret, falsevals);
return ret;
}
//////////////////////////////////////////// ////////////////////////////////////////////
// recursive evaluation of expressions; Could // recursive evaluation of expressions; Could
// switch to generic approach with variadics, a la // switch to generic approach with variadics, a la
@ -76,351 +75,303 @@ inline vobj predicatedWhere(const iobj &predicate, const vobj &iftrue,
// from tuple is hideous; C++14 introduces std::make_index_sequence for this // from tuple is hideous; C++14 introduces std::make_index_sequence for this
//////////////////////////////////////////// ////////////////////////////////////////////
// leaf eval of lattice ; should enable if protect using traits
template <typename T> //leaf eval of lattice ; should enable if protect using traits
using is_lattice = std::is_base_of<LatticeBase, T>;
template <typename T> template <typename T> using is_lattice = std::is_base_of<LatticeBase,T >;
using is_lattice_expr = std::is_base_of<LatticeExpressionBase, T>;
template <typename T> using is_lattice_expr = std::is_base_of<LatticeExpressionBase,T >; template <typename T> using is_lattice_expr = std::is_base_of<LatticeExpressionBase,T >;
//Specialization of getVectorType for lattices
template<typename T>
struct getVectorType<Lattice<T> >{
typedef typename Lattice<T>::vector_object type;
};
template<class sobj> template<class sobj>
inline sobj eval(const unsigned int ss, const sobj &arg) inline sobj eval(const unsigned int ss, const sobj &arg)
{ {
return arg; return arg;
} }
template <class lobj> template<class lobj>
inline const lobj &eval(const unsigned int ss, const Lattice<lobj> &arg) { inline const lobj &eval(const unsigned int ss, const Lattice<lobj> &arg)
return arg._odata[ss]; {
return arg._odata[ss];
} }
// handle nodes in syntax tree // handle nodes in syntax tree
template <typename Op, typename T1> template <typename Op, typename T1>
auto inline eval( auto inline eval(const unsigned int ss, const LatticeUnaryExpression<Op,T1 > &expr) // eval one operand
const unsigned int ss, -> decltype(expr.first.func(eval(ss,std::get<0>(expr.second))))
const LatticeUnaryExpression<Op, T1> &expr) // eval one operand {
-> decltype(expr.first.func(eval(ss, std::get<0>(expr.second)))) { return expr.first.func(eval(ss,std::get<0>(expr.second)));
return expr.first.func(eval(ss, std::get<0>(expr.second)));
} }
template <typename Op, typename T1, typename T2> template <typename Op, typename T1, typename T2>
auto inline eval( auto inline eval(const unsigned int ss, const LatticeBinaryExpression<Op,T1,T2> &expr) // eval two operands
const unsigned int ss, -> decltype(expr.first.func(eval(ss,std::get<0>(expr.second)),eval(ss,std::get<1>(expr.second))))
const LatticeBinaryExpression<Op, T1, T2> &expr) // eval two operands {
-> decltype(expr.first.func(eval(ss, std::get<0>(expr.second)), return expr.first.func(eval(ss,std::get<0>(expr.second)),eval(ss,std::get<1>(expr.second)));
eval(ss, std::get<1>(expr.second)))) {
return expr.first.func(eval(ss, std::get<0>(expr.second)),
eval(ss, std::get<1>(expr.second)));
} }
template <typename Op, typename T1, typename T2, typename T3> template <typename Op, typename T1, typename T2, typename T3>
auto inline eval(const unsigned int ss, auto inline eval(const unsigned int ss, const LatticeTrinaryExpression<Op,T1,T2,T3 > &expr) // eval three operands
const LatticeTrinaryExpression<Op, T1, T2, T3> -> decltype(expr.first.func(eval(ss,std::get<0>(expr.second)),eval(ss,std::get<1>(expr.second)),eval(ss,std::get<2>(expr.second))))
&expr) // eval three operands {
-> decltype(expr.first.func(eval(ss, std::get<0>(expr.second)), return expr.first.func(eval(ss,std::get<0>(expr.second)),eval(ss,std::get<1>(expr.second)),eval(ss,std::get<2>(expr.second)) );
eval(ss, std::get<1>(expr.second)),
eval(ss, std::get<2>(expr.second)))) {
return expr.first.func(eval(ss, std::get<0>(expr.second)),
eval(ss, std::get<1>(expr.second)),
eval(ss, std::get<2>(expr.second)));
} }
////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////
// Obtain the grid from an expression, ensuring conformable. This must follow a // Obtain the grid from an expression, ensuring conformable. This must follow a tree recursion
// tree recursion
////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////
template <class T1, template<class T1, typename std::enable_if<is_lattice<T1>::value, T1>::type * =nullptr >
typename std::enable_if<is_lattice<T1>::value, T1>::type * = nullptr> inline void GridFromExpression(GridBase * &grid,const T1& lat) // Lattice leaf
inline void GridFromExpression(GridBase *&grid, const T1 &lat) // Lattice leaf
{ {
if (grid) { if ( grid ) {
conformable(grid, lat._grid); conformable(grid,lat._grid);
} }
grid = lat._grid; grid=lat._grid;
}
template<class T1,typename std::enable_if<!is_lattice<T1>::value, T1>::type * = nullptr >
inline void GridFromExpression(GridBase * &grid,const T1& notlat) // non-lattice leaf
{
} }
template <class T1,
typename std::enable_if<!is_lattice<T1>::value, T1>::type * = nullptr>
inline void GridFromExpression(GridBase *&grid,
const T1 &notlat) // non-lattice leaf
{}
template <typename Op, typename T1> template <typename Op, typename T1>
inline void GridFromExpression(GridBase *&grid, inline void GridFromExpression(GridBase * &grid,const LatticeUnaryExpression<Op,T1 > &expr)
const LatticeUnaryExpression<Op, T1> &expr) { {
GridFromExpression(grid, std::get<0>(expr.second)); // recurse GridFromExpression(grid,std::get<0>(expr.second));// recurse
} }
template <typename Op, typename T1, typename T2> template <typename Op, typename T1, typename T2>
inline void GridFromExpression( inline void GridFromExpression(GridBase * &grid,const LatticeBinaryExpression<Op,T1,T2> &expr)
GridBase *&grid, const LatticeBinaryExpression<Op, T1, T2> &expr) { {
GridFromExpression(grid, std::get<0>(expr.second)); // recurse GridFromExpression(grid,std::get<0>(expr.second));// recurse
GridFromExpression(grid, std::get<1>(expr.second)); GridFromExpression(grid,std::get<1>(expr.second));
} }
template <typename Op, typename T1, typename T2, typename T3> template <typename Op, typename T1, typename T2, typename T3>
inline void GridFromExpression( inline void GridFromExpression( GridBase * &grid,const LatticeTrinaryExpression<Op,T1,T2,T3 > &expr)
GridBase *&grid, const LatticeTrinaryExpression<Op, T1, T2, T3> &expr) { {
GridFromExpression(grid, std::get<0>(expr.second)); // recurse GridFromExpression(grid,std::get<0>(expr.second));// recurse
GridFromExpression(grid, std::get<1>(expr.second)); GridFromExpression(grid,std::get<1>(expr.second));
GridFromExpression(grid, std::get<2>(expr.second)); GridFromExpression(grid,std::get<2>(expr.second));
} }
////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////
// Obtain the CB from an expression, ensuring conformable. This must follow a // Obtain the CB from an expression, ensuring conformable. This must follow a tree recursion
// tree recursion
////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////
template <class T1, template<class T1, typename std::enable_if<is_lattice<T1>::value, T1>::type * =nullptr >
typename std::enable_if<is_lattice<T1>::value, T1>::type * = nullptr> inline void CBFromExpression(int &cb,const T1& lat) // Lattice leaf
inline void CBFromExpression(int &cb, const T1 &lat) // Lattice leaf
{ {
if ((cb == Odd) || (cb == Even)) { if ( (cb==Odd) || (cb==Even) ) {
assert(cb == lat.checkerboard); assert(cb==lat.checkerboard);
} }
cb = lat.checkerboard; cb=lat.checkerboard;
// std::cout<<GridLogMessage<<"Lattice leaf cb "<<cb<<std::endl; // std::cout<<GridLogMessage<<"Lattice leaf cb "<<cb<<std::endl;
} }
template <class T1, template<class T1,typename std::enable_if<!is_lattice<T1>::value, T1>::type * = nullptr >
typename std::enable_if<!is_lattice<T1>::value, T1>::type * = nullptr> inline void CBFromExpression(int &cb,const T1& notlat) // non-lattice leaf
inline void CBFromExpression(int &cb, const T1 &notlat) // non-lattice leaf
{ {
// std::cout<<GridLogMessage<<"Non lattice leaf cb"<<cb<<std::endl; // std::cout<<GridLogMessage<<"Non lattice leaf cb"<<cb<<std::endl;
} }
template <typename Op, typename T1> template <typename Op, typename T1>
inline void CBFromExpression(int &cb, inline void CBFromExpression(int &cb,const LatticeUnaryExpression<Op,T1 > &expr)
const LatticeUnaryExpression<Op, T1> &expr) { {
CBFromExpression(cb, std::get<0>(expr.second)); // recurse CBFromExpression(cb,std::get<0>(expr.second));// recurse
// std::cout<<GridLogMessage<<"Unary node cb "<<cb<<std::endl; // std::cout<<GridLogMessage<<"Unary node cb "<<cb<<std::endl;
} }
template <typename Op, typename T1, typename T2> template <typename Op, typename T1, typename T2>
inline void CBFromExpression(int &cb, inline void CBFromExpression(int &cb,const LatticeBinaryExpression<Op,T1,T2> &expr)
const LatticeBinaryExpression<Op, T1, T2> &expr) { {
CBFromExpression(cb, std::get<0>(expr.second)); // recurse CBFromExpression(cb,std::get<0>(expr.second));// recurse
CBFromExpression(cb, std::get<1>(expr.second)); CBFromExpression(cb,std::get<1>(expr.second));
// std::cout<<GridLogMessage<<"Binary node cb "<<cb<<std::endl; // std::cout<<GridLogMessage<<"Binary node cb "<<cb<<std::endl;
} }
template <typename Op, typename T1, typename T2, typename T3> template <typename Op, typename T1, typename T2, typename T3>
inline void CBFromExpression( inline void CBFromExpression( int &cb,const LatticeTrinaryExpression<Op,T1,T2,T3 > &expr)
int &cb, const LatticeTrinaryExpression<Op, T1, T2, T3> &expr) { {
CBFromExpression(cb, std::get<0>(expr.second)); // recurse CBFromExpression(cb,std::get<0>(expr.second));// recurse
CBFromExpression(cb, std::get<1>(expr.second)); CBFromExpression(cb,std::get<1>(expr.second));
CBFromExpression(cb, std::get<2>(expr.second)); CBFromExpression(cb,std::get<2>(expr.second));
// std::cout<<GridLogMessage<<"Trinary node cb "<<cb<<std::endl; // std::cout<<GridLogMessage<<"Trinary node cb "<<cb<<std::endl;
} }
//////////////////////////////////////////// ////////////////////////////////////////////
// Unary operators and funcs // Unary operators and funcs
//////////////////////////////////////////// ////////////////////////////////////////////
#define GridUnopClass(name, ret) \ #define GridUnopClass(name,ret)\
template <class arg> \ template <class arg> struct name\
struct name { \ {\
static auto inline func(const arg a) -> decltype(ret) { return ret; } \ static auto inline func(const arg a)-> decltype(ret) { return ret; } \
}; };
GridUnopClass(UnarySub, -a); GridUnopClass(UnarySub,-a);
GridUnopClass(UnaryNot, Not(a)); GridUnopClass(UnaryNot,Not(a));
GridUnopClass(UnaryAdj, adj(a)); GridUnopClass(UnaryAdj,adj(a));
GridUnopClass(UnaryConj, conjugate(a)); GridUnopClass(UnaryConj,conjugate(a));
GridUnopClass(UnaryTrace, trace(a)); GridUnopClass(UnaryTrace,trace(a));
GridUnopClass(UnaryTranspose, transpose(a)); GridUnopClass(UnaryTranspose,transpose(a));
GridUnopClass(UnaryTa, Ta(a)); GridUnopClass(UnaryTa,Ta(a));
GridUnopClass(UnaryProjectOnGroup, ProjectOnGroup(a)); GridUnopClass(UnaryProjectOnGroup,ProjectOnGroup(a));
GridUnopClass(UnaryReal, real(a)); GridUnopClass(UnaryReal,real(a));
GridUnopClass(UnaryImag, imag(a)); GridUnopClass(UnaryImag,imag(a));
GridUnopClass(UnaryToReal, toReal(a)); GridUnopClass(UnaryToReal,toReal(a));
GridUnopClass(UnaryToComplex, toComplex(a)); GridUnopClass(UnaryToComplex,toComplex(a));
GridUnopClass(UnaryTimesI, timesI(a)); GridUnopClass(UnaryAbs,abs(a));
GridUnopClass(UnaryTimesMinusI, timesMinusI(a)); GridUnopClass(UnarySqrt,sqrt(a));
GridUnopClass(UnaryAbs, abs(a)); GridUnopClass(UnaryRsqrt,rsqrt(a));
GridUnopClass(UnarySqrt, sqrt(a)); GridUnopClass(UnarySin,sin(a));
GridUnopClass(UnaryRsqrt, rsqrt(a)); GridUnopClass(UnaryCos,cos(a));
GridUnopClass(UnarySin, sin(a)); GridUnopClass(UnaryLog,log(a));
GridUnopClass(UnaryCos, cos(a)); GridUnopClass(UnaryExp,exp(a));
GridUnopClass(UnaryAsin, asin(a));
GridUnopClass(UnaryAcos, acos(a));
GridUnopClass(UnaryLog, log(a));
GridUnopClass(UnaryExp, exp(a));
//////////////////////////////////////////// ////////////////////////////////////////////
// Binary operators // Binary operators
//////////////////////////////////////////// ////////////////////////////////////////////
#define GridBinOpClass(name, combination) \ #define GridBinOpClass(name,combination)\
template <class left, class right> \ template <class left,class right>\
struct name { \ struct name\
static auto inline func(const left &lhs, const right &rhs) \ {\
-> decltype(combination) const { \ static auto inline func(const left &lhs,const right &rhs)-> decltype(combination) const \
return combination; \ {\
} \ return combination;\
} }\
GridBinOpClass(BinaryAdd, lhs + rhs); }
GridBinOpClass(BinarySub, lhs - rhs); GridBinOpClass(BinaryAdd,lhs+rhs);
GridBinOpClass(BinaryMul, lhs *rhs); GridBinOpClass(BinarySub,lhs-rhs);
GridBinOpClass(BinaryMul,lhs*rhs);
GridBinOpClass(BinaryAnd, lhs &rhs); GridBinOpClass(BinaryAnd ,lhs&rhs);
GridBinOpClass(BinaryOr, lhs | rhs); GridBinOpClass(BinaryOr ,lhs|rhs);
GridBinOpClass(BinaryAndAnd, lhs &&rhs); GridBinOpClass(BinaryAndAnd,lhs&&rhs);
GridBinOpClass(BinaryOrOr, lhs || rhs); GridBinOpClass(BinaryOrOr ,lhs||rhs);
//////////////////////////////////////////////////// ////////////////////////////////////////////////////
// Trinary conditional op // Trinary conditional op
//////////////////////////////////////////////////// ////////////////////////////////////////////////////
#define GridTrinOpClass(name, combination) \ #define GridTrinOpClass(name,combination)\
template <class predicate, class left, class right> \ template <class predicate,class left, class right> \
struct name { \ struct name\
static auto inline func(const predicate &pred, const left &lhs, \ {\
const right &rhs) -> decltype(combination) const { \ static auto inline func(const predicate &pred,const left &lhs,const right &rhs)-> decltype(combination) const \
return combination; \ {\
} \ return combination;\
} }\
}
GridTrinOpClass( GridTrinOpClass(TrinaryWhere,(predicatedWhere<predicate, \
TrinaryWhere, typename std::remove_reference<left>::type, \
(predicatedWhere<predicate, typename std::remove_reference<left>::type, typename std::remove_reference<right>::type> (pred,lhs,rhs)));
typename std::remove_reference<right>::type>(pred, lhs,
rhs)));
//////////////////////////////////////////// ////////////////////////////////////////////
// Operator syntactical glue // Operator syntactical glue
//////////////////////////////////////////// ////////////////////////////////////////////
#define GRID_UNOP(name) name<decltype(eval(0, arg))> #define GRID_UNOP(name) name<decltype(eval(0, arg))>
#define GRID_BINOP(name) name<decltype(eval(0, lhs)), decltype(eval(0, rhs))> #define GRID_BINOP(name) name<decltype(eval(0, lhs)), decltype(eval(0, rhs))>
#define GRID_TRINOP(name) \ #define GRID_TRINOP(name) name<decltype(eval(0, pred)), decltype(eval(0, lhs)), decltype(eval(0, rhs))>
name<decltype(eval(0, pred)), decltype(eval(0, lhs)), decltype(eval(0, rhs))>
#define GRID_DEF_UNOP(op, name) \ #define GRID_DEF_UNOP(op, name)\
template <typename T1, \ template <typename T1,\
typename std::enable_if<is_lattice<T1>::value || \ typename std::enable_if<is_lattice<T1>::value||is_lattice_expr<T1>::value, T1>::type* = nullptr> inline auto op(const T1 &arg) \
is_lattice_expr<T1>::value, \ -> decltype(LatticeUnaryExpression<GRID_UNOP(name),const T1&>(std::make_pair(GRID_UNOP(name)(),std::forward_as_tuple(arg)))) \
T1>::type * = nullptr> \ { return LatticeUnaryExpression<GRID_UNOP(name), const T1 &>(std::make_pair(GRID_UNOP(name)(),std::forward_as_tuple(arg))); }
inline auto op(const T1 &arg) \
->decltype(LatticeUnaryExpression<GRID_UNOP(name), const T1 &>( \
std::make_pair(GRID_UNOP(name)(), std::forward_as_tuple(arg)))) { \
return LatticeUnaryExpression<GRID_UNOP(name), const T1 &>( \
std::make_pair(GRID_UNOP(name)(), std::forward_as_tuple(arg))); \
}
#define GRID_BINOP_LEFT(op, name) \ #define GRID_BINOP_LEFT(op, name)\
template <typename T1, typename T2, \ template <typename T1,typename T2,\
typename std::enable_if<is_lattice<T1>::value || \ typename std::enable_if<is_lattice<T1>::value||is_lattice_expr<T1>::value, T1>::type* = nullptr>\
is_lattice_expr<T1>::value, \ inline auto op(const T1 &lhs,const T2&rhs) \
T1>::type * = nullptr> \ -> decltype(LatticeBinaryExpression<GRID_BINOP(name),const T1&,const T2 &>(std::make_pair(GRID_BINOP(name)(),\
inline auto op(const T1 &lhs, const T2 &rhs) \ std::forward_as_tuple(lhs, rhs)))) \
->decltype( \ {\
LatticeBinaryExpression<GRID_BINOP(name), const T1 &, const T2 &>( \ return LatticeBinaryExpression<GRID_BINOP(name), const T1 &, const T2 &>(std::make_pair(GRID_BINOP(name)(),\
std::make_pair(GRID_BINOP(name)(), \ std::forward_as_tuple(lhs, rhs))); \
std::forward_as_tuple(lhs, rhs)))) { \ }
return LatticeBinaryExpression<GRID_BINOP(name), const T1 &, const T2 &>( \
std::make_pair(GRID_BINOP(name)(), std::forward_as_tuple(lhs, rhs))); \
}
#define GRID_BINOP_RIGHT(op, name) \ #define GRID_BINOP_RIGHT(op, name)\
template <typename T1, typename T2, \ template <typename T1,typename T2,\
typename std::enable_if<!is_lattice<T1>::value && \ typename std::enable_if<!is_lattice<T1>::value && !is_lattice_expr<T1>::value, T1>::type* = nullptr,\
!is_lattice_expr<T1>::value, \ typename std::enable_if< is_lattice<T2>::value || is_lattice_expr<T2>::value, T2>::type* = nullptr> \
T1>::type * = nullptr, \ inline auto op(const T1 &lhs,const T2&rhs) \
typename std::enable_if<is_lattice<T2>::value || \ -> decltype(LatticeBinaryExpression<GRID_BINOP(name),const T1&,const T2 &>(std::make_pair(GRID_BINOP(name)(),\
is_lattice_expr<T2>::value, \ std::forward_as_tuple(lhs, rhs)))) \
T2>::type * = nullptr> \ {\
inline auto op(const T1 &lhs, const T2 &rhs) \ return LatticeBinaryExpression<GRID_BINOP(name), const T1 &, const T2 &>(std::make_pair(GRID_BINOP(name)(),\
->decltype( \ std::forward_as_tuple(lhs, rhs))); \
LatticeBinaryExpression<GRID_BINOP(name), const T1 &, const T2 &>( \ }
std::make_pair(GRID_BINOP(name)(), \
std::forward_as_tuple(lhs, rhs)))) { \
return LatticeBinaryExpression<GRID_BINOP(name), const T1 &, const T2 &>( \
std::make_pair(GRID_BINOP(name)(), std::forward_as_tuple(lhs, rhs))); \
}
#define GRID_DEF_BINOP(op, name) \ #define GRID_DEF_BINOP(op, name)\
GRID_BINOP_LEFT(op, name); \ GRID_BINOP_LEFT(op,name);\
GRID_BINOP_RIGHT(op, name); GRID_BINOP_RIGHT(op,name);
#define GRID_DEF_TRINOP(op, name) \
template <typename T1, typename T2, typename T3> \ #define GRID_DEF_TRINOP(op, name)\
inline auto op(const T1 &pred, const T2 &lhs, const T3 &rhs) \ template <typename T1,typename T2,typename T3> inline auto op(const T1 &pred,const T2&lhs,const T3 &rhs) \
->decltype( \ -> decltype(LatticeTrinaryExpression<GRID_TRINOP(name),const T1&,const T2 &,const T3&>(std::make_pair(GRID_TRINOP(name)(),\
LatticeTrinaryExpression<GRID_TRINOP(name), const T1 &, const T2 &, \ std::forward_as_tuple(pred,lhs,rhs)))) \
const T3 &>(std::make_pair( \ {\
GRID_TRINOP(name)(), std::forward_as_tuple(pred, lhs, rhs)))) { \ return LatticeTrinaryExpression<GRID_TRINOP(name), const T1 &, const T2 &,const T3&>(std::make_pair(GRID_TRINOP(name)(), \
return LatticeTrinaryExpression<GRID_TRINOP(name), const T1 &, const T2 &, \ std::forward_as_tuple(pred,lhs, rhs))); \
const T3 &>(std::make_pair( \ }
GRID_TRINOP(name)(), std::forward_as_tuple(pred, lhs, rhs))); \
}
//////////////////////// ////////////////////////
// Operator definitions //Operator definitions
//////////////////////// ////////////////////////
GRID_DEF_UNOP(operator-, UnarySub); GRID_DEF_UNOP(operator -,UnarySub);
GRID_DEF_UNOP(Not, UnaryNot); GRID_DEF_UNOP(Not,UnaryNot);
GRID_DEF_UNOP(operator!, UnaryNot); GRID_DEF_UNOP(operator !,UnaryNot);
GRID_DEF_UNOP(adj, UnaryAdj); GRID_DEF_UNOP(adj,UnaryAdj);
GRID_DEF_UNOP(conjugate, UnaryConj); GRID_DEF_UNOP(conjugate,UnaryConj);
GRID_DEF_UNOP(trace, UnaryTrace); GRID_DEF_UNOP(trace,UnaryTrace);
GRID_DEF_UNOP(transpose, UnaryTranspose); GRID_DEF_UNOP(transpose,UnaryTranspose);
GRID_DEF_UNOP(Ta, UnaryTa); GRID_DEF_UNOP(Ta,UnaryTa);
GRID_DEF_UNOP(ProjectOnGroup, UnaryProjectOnGroup); GRID_DEF_UNOP(ProjectOnGroup,UnaryProjectOnGroup);
GRID_DEF_UNOP(real, UnaryReal); GRID_DEF_UNOP(real,UnaryReal);
GRID_DEF_UNOP(imag, UnaryImag); GRID_DEF_UNOP(imag,UnaryImag);
GRID_DEF_UNOP(toReal, UnaryToReal); GRID_DEF_UNOP(toReal,UnaryToReal);
GRID_DEF_UNOP(toComplex, UnaryToComplex); GRID_DEF_UNOP(toComplex,UnaryToComplex);
GRID_DEF_UNOP(timesI, UnaryTimesI); GRID_DEF_UNOP(abs ,UnaryAbs); //abs overloaded in cmath C++98; DON'T do the abs-fabs-dabs-labs thing
GRID_DEF_UNOP(timesMinusI, UnaryTimesMinusI); GRID_DEF_UNOP(sqrt ,UnarySqrt);
GRID_DEF_UNOP(abs, UnaryAbs); // abs overloaded in cmath C++98; DON'T do the GRID_DEF_UNOP(rsqrt,UnaryRsqrt);
// abs-fabs-dabs-labs thing GRID_DEF_UNOP(sin ,UnarySin);
GRID_DEF_UNOP(sqrt, UnarySqrt); GRID_DEF_UNOP(cos ,UnaryCos);
GRID_DEF_UNOP(rsqrt, UnaryRsqrt); GRID_DEF_UNOP(log ,UnaryLog);
GRID_DEF_UNOP(sin, UnarySin); GRID_DEF_UNOP(exp ,UnaryExp);
GRID_DEF_UNOP(cos, UnaryCos);
GRID_DEF_UNOP(asin, UnaryAsin);
GRID_DEF_UNOP(acos, UnaryAcos);
GRID_DEF_UNOP(log, UnaryLog);
GRID_DEF_UNOP(exp, UnaryExp);
GRID_DEF_BINOP(operator+, BinaryAdd); GRID_DEF_BINOP(operator+,BinaryAdd);
GRID_DEF_BINOP(operator-, BinarySub); GRID_DEF_BINOP(operator-,BinarySub);
GRID_DEF_BINOP(operator*, BinaryMul); GRID_DEF_BINOP(operator*,BinaryMul);
GRID_DEF_BINOP(operator&, BinaryAnd); GRID_DEF_BINOP(operator&,BinaryAnd);
GRID_DEF_BINOP(operator|, BinaryOr); GRID_DEF_BINOP(operator|,BinaryOr);
GRID_DEF_BINOP(operator&&, BinaryAndAnd); GRID_DEF_BINOP(operator&&,BinaryAndAnd);
GRID_DEF_BINOP(operator||, BinaryOrOr); GRID_DEF_BINOP(operator||,BinaryOrOr);
GRID_DEF_TRINOP(where, TrinaryWhere); GRID_DEF_TRINOP(where,TrinaryWhere);
///////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////
// Closure convenience to force expression to evaluate // Closure convenience to force expression to evaluate
///////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////
template <class Op, class T1> template<class Op,class T1>
auto closure(const LatticeUnaryExpression<Op, T1> &expr) auto closure(const LatticeUnaryExpression<Op,T1> & expr)
-> Lattice<decltype(expr.first.func(eval(0, std::get<0>(expr.second))))> { -> Lattice<decltype(expr.first.func(eval(0,std::get<0>(expr.second))))>
Lattice<decltype(expr.first.func(eval(0, std::get<0>(expr.second))))> ret( {
expr); Lattice<decltype(expr.first.func(eval(0,std::get<0>(expr.second))))> ret(expr);
return ret; return ret;
} }
template <class Op, class T1, class T2> template<class Op,class T1, class T2>
auto closure(const LatticeBinaryExpression<Op, T1, T2> &expr) auto closure(const LatticeBinaryExpression<Op,T1,T2> & expr)
-> Lattice<decltype(expr.first.func(eval(0, std::get<0>(expr.second)), -> Lattice<decltype(expr.first.func(eval(0,std::get<0>(expr.second)),
eval(0, std::get<1>(expr.second))))> { eval(0,std::get<1>(expr.second))))>
Lattice<decltype(expr.first.func(eval(0, std::get<0>(expr.second)), {
eval(0, std::get<1>(expr.second))))> Lattice<decltype(expr.first.func(eval(0,std::get<0>(expr.second)),
ret(expr); eval(0,std::get<1>(expr.second))))> ret(expr);
return ret; return ret;
} }
template <class Op, class T1, class T2, class T3> template<class Op,class T1, class T2, class T3>
auto closure(const LatticeTrinaryExpression<Op, T1, T2, T3> &expr) auto closure(const LatticeTrinaryExpression<Op,T1,T2,T3> & expr)
-> Lattice<decltype(expr.first.func(eval(0, std::get<0>(expr.second)), -> Lattice<decltype(expr.first.func(eval(0,std::get<0>(expr.second)),
eval(0, std::get<1>(expr.second)), eval(0,std::get<1>(expr.second)),
eval(0, std::get<2>(expr.second))))> { eval(0,std::get<2>(expr.second))))>
Lattice<decltype(expr.first.func(eval(0, std::get<0>(expr.second)), {
eval(0, std::get<1>(expr.second)), Lattice<decltype(expr.first.func(eval(0,std::get<0>(expr.second)),
eval(0, std::get<2>(expr.second))))> eval(0,std::get<1>(expr.second)),
ret(expr); eval(0,std::get<2>(expr.second))))> ret(expr);
return ret; return ret;
} }
@ -431,6 +382,7 @@ auto closure(const LatticeTrinaryExpression<Op, T1, T2, T3> &expr)
#undef GRID_DEF_UNOP #undef GRID_DEF_UNOP
#undef GRID_DEF_BINOP #undef GRID_DEF_BINOP
#undef GRID_DEF_TRINOP #undef GRID_DEF_TRINOP
} }
#if 0 #if 0
@ -445,7 +397,7 @@ using namespace Grid;
BinaryAdd<double,double> tmp; BinaryAdd<double,double> tmp;
LatticeBinaryExpression<BinaryAdd<double,double>,Lattice<double> &,Lattice<double> &> LatticeBinaryExpression<BinaryAdd<double,double>,Lattice<double> &,Lattice<double> &>
expr(std::make_pair(tmp, expr(std::make_pair(tmp,
std::forward_as_tuple(v1,v2))); std::forward_as_tuple(v1,v2)));
tmp.func(eval(0,v1),eval(0,v2)); tmp.func(eval(0,v1),eval(0,v2));
auto var = v1+v2; auto var = v1+v2;

View File

@ -1,33 +1,32 @@
/************************************************************************************* /*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/lattice/Lattice_base.h Source file: ./lib/lattice/Lattice_base.h
Copyright (C) 2015 Copyright (C) 2015
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk> Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk> Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: paboyle <paboyle@ph.ed.ac.uk> Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or the Free Software Foundation; either version 2 of the License, or
(at your option) any later version. (at your option) any later version.
This program is distributed in the hope that it will be useful, This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details. GNU General Public License for more details.
You should have received a copy of the GNU General Public License along You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc., with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution See the full license in the file "LICENSE" in the top level distribution directory
directory *************************************************************************************/
*************************************************************************************/ /* END LEGAL */
/* END LEGAL */
#ifndef GRID_LATTICE_BASE_H #ifndef GRID_LATTICE_BASE_H
#define GRID_LATTICE_BASE_H #define GRID_LATTICE_BASE_H
@ -102,7 +101,6 @@ public:
int begin(void) { return 0;}; int begin(void) { return 0;};
int end(void) { return _odata.size(); } int end(void) { return _odata.size(); }
vobj & operator[](int i) { return _odata[i]; }; vobj & operator[](int i) { return _odata[i]; };
const vobj & operator[](int i) const { return _odata[i]; };
public: public:
typedef typename vobj::scalar_type scalar_type; typedef typename vobj::scalar_type scalar_type;
@ -257,18 +255,6 @@ PARALLEL_FOR_LOOP
checkerboard=0; checkerboard=0;
} }
Lattice(const Lattice& r){ // copy constructor
_grid = r._grid;
checkerboard = r.checkerboard;
_odata.resize(_grid->oSites());// essential
PARALLEL_FOR_LOOP
for(int ss=0;ss<_grid->oSites();ss++){
_odata[ss]=r._odata[ss];
}
}
virtual ~Lattice(void) = default; virtual ~Lattice(void) = default;
template<class sobj> strong_inline Lattice<vobj> & operator = (const sobj & r){ template<class sobj> strong_inline Lattice<vobj> & operator = (const sobj & r){
@ -281,7 +267,7 @@ PARALLEL_FOR_LOOP
template<class robj> strong_inline Lattice<vobj> & operator = (const Lattice<robj> & r){ template<class robj> strong_inline Lattice<vobj> & operator = (const Lattice<robj> & r){
this->checkerboard = r.checkerboard; this->checkerboard = r.checkerboard;
conformable(*this,r); conformable(*this,r);
std::cout<<GridLogMessage<<"Lattice operator ="<<std::endl;
PARALLEL_FOR_LOOP PARALLEL_FOR_LOOP
for(int ss=0;ss<_grid->oSites();ss++){ for(int ss=0;ss<_grid->oSites();ss++){
this->_odata[ss]=r._odata[ss]; this->_odata[ss]=r._odata[ss];
@ -338,27 +324,27 @@ PARALLEL_FOR_LOOP
#include "Lattice_conformable.h" #include <lattice/Lattice_conformable.h>
#define GRID_LATTICE_EXPRESSION_TEMPLATES #define GRID_LATTICE_EXPRESSION_TEMPLATES
#ifdef GRID_LATTICE_EXPRESSION_TEMPLATES #ifdef GRID_LATTICE_EXPRESSION_TEMPLATES
#include "Lattice_ET.h" #include <lattice/Lattice_ET.h>
#else #else
#include "Lattice_overload.h" #include <lattice/Lattice_overload.h>
#endif #endif
#include "Lattice_arith.h" #include <lattice/Lattice_arith.h>
#include "Lattice_trace.h" #include <lattice/Lattice_trace.h>
#include "Lattice_transpose.h" #include <lattice/Lattice_transpose.h>
#include "Lattice_local.h" #include <lattice/Lattice_local.h>
#include "Lattice_reduction.h" #include <lattice/Lattice_reduction.h>
#include "Lattice_peekpoke.h" #include <lattice/Lattice_peekpoke.h>
#include "Lattice_reality.h" #include <lattice/Lattice_reality.h>
#include "Lattice_comparison_utils.h" #include <lattice/Lattice_comparison_utils.h>
#include "Lattice_comparison.h" #include <lattice/Lattice_comparison.h>
#include "Lattice_coordinate.h" #include <lattice/Lattice_coordinate.h>
#include "Lattice_where.h" #include <lattice/Lattice_where.h>
#include "Lattice_rng.h" #include <lattice/Lattice_rng.h>
#include "Lattice_unary.h" #include <lattice/Lattice_unary.h>
#include "Lattice_transfer.h" #include <lattice/Lattice_transfer.h>
#endif #endif

View File

@ -40,7 +40,7 @@ namespace Grid {
//////////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////
template<class vobj> inline RealD norm2(const Lattice<vobj> &arg){ template<class vobj> inline RealD norm2(const Lattice<vobj> &arg){
ComplexD nrm = innerProduct(arg,arg); ComplexD nrm = innerProduct(arg,arg);
return std::real(nrm); return real(nrm);
} }
template<class vobj> template<class vobj>

View File

@ -349,7 +349,7 @@ void localConvert(const Lattice<vobj> &in,Lattice<vvobj> &out)
assert(ig->_ldimensions[d] == og->_ldimensions[d]); assert(ig->_ldimensions[d] == og->_ldimensions[d]);
} }
//PARALLEL_FOR_LOOP PARALLEL_FOR_LOOP
for(int idx=0;idx<ig->lSites();idx++){ for(int idx=0;idx<ig->lSites();idx++){
std::vector<int> lcoor(ni); std::vector<int> lcoor(ni);
ig->LocalIndexToLocalCoor(idx,lcoor); ig->LocalIndexToLocalCoor(idx,lcoor);
@ -386,7 +386,7 @@ void InsertSlice(Lattice<vobj> &lowDim,Lattice<vobj> & higherDim,int slice, int
} }
// the above should guarantee that the operations are local // the above should guarantee that the operations are local
//PARALLEL_FOR_LOOP PARALLEL_FOR_LOOP
for(int idx=0;idx<lg->lSites();idx++){ for(int idx=0;idx<lg->lSites();idx++){
std::vector<int> lcoor(nl); std::vector<int> lcoor(nl);
std::vector<int> hcoor(nh); std::vector<int> hcoor(nh);
@ -420,15 +420,15 @@ void ExtractSlice(Lattice<vobj> &lowDim, Lattice<vobj> & higherDim,int slice, in
assert(hg->_processors[orthog]==1); assert(hg->_processors[orthog]==1);
int dl; dl = 0; int dl; dl = 0;
for(int d=0;d<nh;d++){ for(int d=0;d<nh;d++){
if ( d != orthog) { if ( d != orthog) {
assert(lg->_processors[dl] == hg->_processors[d]); assert(lg->_processors[dl] == hg->_processors[d]);
assert(lg->_ldimensions[dl] == hg->_ldimensions[d]); assert(lg->_ldimensions[dl] == hg->_ldimensions[d]);
dl++; dl++;
} }
} }
// the above should guarantee that the operations are local // the above should guarantee that the operations are local
//PARALLEL_FOR_LOOP PARALLEL_FOR_LOOP
for(int idx=0;idx<lg->lSites();idx++){ for(int idx=0;idx<lg->lSites();idx++){
std::vector<int> lcoor(nl); std::vector<int> lcoor(nl);
std::vector<int> hcoor(nh); std::vector<int> hcoor(nh);
@ -446,79 +446,6 @@ void ExtractSlice(Lattice<vobj> &lowDim, Lattice<vobj> & higherDim,int slice, in
} }
template<class vobj>
void InsertSliceLocal(Lattice<vobj> &lowDim, Lattice<vobj> & higherDim,int slice_lo,int slice_hi, int orthog)
{
typedef typename vobj::scalar_object sobj;
sobj s;
GridBase *lg = lowDim._grid;
GridBase *hg = higherDim._grid;
int nl = lg->_ndimension;
int nh = hg->_ndimension;
assert(nl == nh);
assert(orthog<nh);
assert(orthog>=0);
for(int d=0;d<nh;d++){
assert(lg->_processors[d] == hg->_processors[d]);
assert(lg->_ldimensions[d] == hg->_ldimensions[d]);
}
// the above should guarantee that the operations are local
//PARALLEL_FOR_LOOP
for(int idx=0;idx<lg->lSites();idx++){
std::vector<int> lcoor(nl);
std::vector<int> hcoor(nh);
lg->LocalIndexToLocalCoor(idx,lcoor);
if( lcoor[orthog] == slice_lo ) {
hcoor=lcoor;
hcoor[orthog] = slice_hi;
peekLocalSite(s,lowDim,lcoor);
pokeLocalSite(s,higherDim,hcoor);
}
}
}
template<class vobj>
void ExtractSliceLocal(Lattice<vobj> &lowDim, Lattice<vobj> & higherDim,int slice_lo,int slice_hi, int orthog)
{
typedef typename vobj::scalar_object sobj;
sobj s;
GridBase *lg = lowDim._grid;
GridBase *hg = higherDim._grid;
int nl = lg->_ndimension;
int nh = hg->_ndimension;
assert(nl == nh);
assert(orthog<nh);
assert(orthog>=0);
for(int d=0;d<nh;d++){
assert(lg->_processors[d] == hg->_processors[d]);
assert(lg->_ldimensions[d] == hg->_ldimensions[d]);
}
// the above should guarantee that the operations are local
//PARALLEL_FOR_LOOP
for(int idx=0;idx<lg->lSites();idx++){
std::vector<int> lcoor(nl);
std::vector<int> hcoor(nh);
lg->LocalIndexToLocalCoor(idx,lcoor);
if( lcoor[orthog] == slice_lo ) {
hcoor=lcoor;
hcoor[orthog] = slice_hi;
peekLocalSite(s,higherDim,hcoor);
pokeLocalSite(s,lowDim,lcoor);
}
}
}
template<class vobj> template<class vobj>
void Replicate(Lattice<vobj> &coarse,Lattice<vobj> & fine) void Replicate(Lattice<vobj> &coarse,Lattice<vobj> & fine)
{ {
@ -555,96 +482,6 @@ void Replicate(Lattice<vobj> &coarse,Lattice<vobj> & fine)
} }
//Copy SIMD-vectorized lattice to array of scalar objects in lexicographic order
template<typename vobj, typename sobj>
typename std::enable_if<isSIMDvectorized<vobj>::value && !isSIMDvectorized<sobj>::value, void>::type unvectorizeToLexOrdArray(std::vector<sobj> &out, const Lattice<vobj> &in){
typedef typename vobj::vector_type vtype;
GridBase* in_grid = in._grid;
out.resize(in_grid->lSites());
int ndim = in_grid->Nd();
int in_nsimd = vtype::Nsimd();
std::vector<std::vector<int> > in_icoor(in_nsimd);
for(int lane=0; lane < in_nsimd; lane++){
in_icoor[lane].resize(ndim);
in_grid->iCoorFromIindex(in_icoor[lane], lane);
}
PARALLEL_FOR_LOOP
for(int in_oidx = 0; in_oidx < in_grid->oSites(); in_oidx++){ //loop over outer index
//Assemble vector of pointers to output elements
std::vector<sobj*> out_ptrs(in_nsimd);
std::vector<int> in_ocoor(ndim);
in_grid->oCoorFromOindex(in_ocoor, in_oidx);
std::vector<int> lcoor(in_grid->Nd());
for(int lane=0; lane < in_nsimd; lane++){
for(int mu=0;mu<ndim;mu++)
lcoor[mu] = in_ocoor[mu] + in_grid->_rdimensions[mu]*in_icoor[lane][mu];
int lex;
Lexicographic::IndexFromCoor(lcoor, lex, in_grid->_ldimensions);
out_ptrs[lane] = &out[lex];
}
//Unpack into those ptrs
const vobj & in_vobj = in._odata[in_oidx];
extract1(in_vobj, out_ptrs, 0);
}
}
//Convert a Lattice from one precision to another
template<class VobjOut, class VobjIn>
void precisionChange(Lattice<VobjOut> &out, const Lattice<VobjIn> &in){
assert(out._grid->Nd() == in._grid->Nd());
out.checkerboard = in.checkerboard;
GridBase *in_grid=in._grid;
GridBase *out_grid = out._grid;
typedef typename VobjOut::scalar_object SobjOut;
typedef typename VobjIn::scalar_object SobjIn;
int ndim = out._grid->Nd();
int out_nsimd = out_grid->Nsimd();
std::vector<std::vector<int> > out_icoor(out_nsimd);
for(int lane=0; lane < out_nsimd; lane++){
out_icoor[lane].resize(ndim);
out_grid->iCoorFromIindex(out_icoor[lane], lane);
}
std::vector<SobjOut> in_slex_conv(in_grid->lSites());
unvectorizeToLexOrdArray(in_slex_conv, in);
PARALLEL_FOR_LOOP
for(int out_oidx=0;out_oidx<out_grid->oSites();out_oidx++){
std::vector<int> out_ocoor(ndim);
out_grid->oCoorFromOindex(out_ocoor, out_oidx);
std::vector<SobjOut*> ptrs(out_nsimd);
std::vector<int> lcoor(out_grid->Nd());
for(int lane=0; lane < out_nsimd; lane++){
for(int mu=0;mu<ndim;mu++)
lcoor[mu] = out_ocoor[mu] + out_grid->_rdimensions[mu]*out_icoor[lane][mu];
int llex; Lexicographic::IndexFromCoor(lcoor, llex, out_grid->_ldimensions);
ptrs[lane] = &in_slex_conv[llex];
}
merge(out._odata[out_oidx], ptrs, 0);
}
}
} }
#endif #endif

View File

@ -194,22 +194,22 @@ class BinaryIO {
std::vector<int> site({x,y,z,t}); std::vector<int> site({x,y,z,t});
if (grid->IsBoss()) { if ( grid->IsBoss() ) {
fin.read((char *)&file_object, sizeof(file_object)); fin.read((char *)&file_object,sizeof(file_object));
bytes += sizeof(file_object); bytes += sizeof(file_object);
if (ieee32big) be32toh_v((void *)&file_object, sizeof(file_object)); if(ieee32big) be32toh_v((void *)&file_object,sizeof(file_object));
if (ieee32) le32toh_v((void *)&file_object, sizeof(file_object)); if(ieee32) le32toh_v((void *)&file_object,sizeof(file_object));
if (ieee64big) be64toh_v((void *)&file_object, sizeof(file_object)); if(ieee64big) be64toh_v((void *)&file_object,sizeof(file_object));
if (ieee64) le64toh_v((void *)&file_object, sizeof(file_object)); if(ieee64) le64toh_v((void *)&file_object,sizeof(file_object));
munge(file_object, munged, csum); munge(file_object,munged,csum);
} }
// The boss who read the file has their value poked // The boss who read the file has their value poked
pokeSite(munged,Umu,site); pokeSite(munged,Umu,site);
}}}} }}}}
timer.Stop(); timer.Stop();
std::cout<<GridLogPerformance<<"readObjectSerial: read "<< bytes <<" bytes in "<<timer.Elapsed() <<" " std::cout<<GridLogPerformance<<"readObjectSerial: read "<< bytes <<" bytes in "<<timer.Elapsed() <<" "
<< (double)bytes/ (double)timer.useconds() <<" MB/s " <<std::endl; << (double)bytes/ (double)timer.useconds() <<" MB/s " <<std::endl;
return csum; return csum;
} }
@ -255,19 +255,19 @@ class BinaryIO {
if ( grid->IsBoss() ) { if ( grid->IsBoss() ) {
if(ieee32big) htobe32_v((void *)&file_object,sizeof(file_object)); if(ieee32big) htobe32_v((void *)&file_object,sizeof(file_object));
if(ieee32) htole32_v((void *)&file_object,sizeof(file_object)); if(ieee32) htole32_v((void *)&file_object,sizeof(file_object));
if(ieee64big) htobe64_v((void *)&file_object,sizeof(file_object)); if(ieee64big) htobe64_v((void *)&file_object,sizeof(file_object));
if(ieee64) htole64_v((void *)&file_object,sizeof(file_object)); if(ieee64) htole64_v((void *)&file_object,sizeof(file_object));
// NB could gather an xstrip as an optimisation. // NB could gather an xstrip as an optimisation.
fout.write((char *)&file_object,sizeof(file_object)); fout.write((char *)&file_object,sizeof(file_object));
bytes+=sizeof(file_object); bytes+=sizeof(file_object);
} }
}}}} }}}}
timer.Stop(); timer.Stop();
std::cout<<GridLogPerformance<<"writeObjectSerial: wrote "<< bytes <<" bytes in "<<timer.Elapsed() <<" " std::cout<<GridLogPerformance<<"writeObjectSerial: wrote "<< bytes <<" bytes in "<<timer.Elapsed() <<" "
<< (double)bytes/timer.useconds() <<" MB/s " <<std::endl; << (double)bytes/timer.useconds() <<" MB/s " <<std::endl;
return csum; return csum;
} }
@ -305,15 +305,15 @@ class BinaryIO {
int l_idx=parallel.generator_idx(o_idx,i_idx); int l_idx=parallel.generator_idx(o_idx,i_idx);
if( rank == grid->ThisRank() ){ if( rank == grid->ThisRank() ){
// std::cout << "rank" << rank<<" Getting state for index "<<l_idx<<std::endl; // std::cout << "rank" << rank<<" Getting state for index "<<l_idx<<std::endl;
parallel.GetState(saved,l_idx); parallel.GetState(saved,l_idx);
} }
grid->Broadcast(rank,(void *)&saved[0],bytes); grid->Broadcast(rank,(void *)&saved[0],bytes);
if ( grid->IsBoss() ) { if ( grid->IsBoss() ) {
Uint32Checksum((uint32_t *)&saved[0],bytes,csum); Uint32Checksum((uint32_t *)&saved[0],bytes,csum);
fout.write((char *)&saved[0],bytes); fout.write((char *)&saved[0],bytes);
} }
} }
@ -355,14 +355,14 @@ class BinaryIO {
int l_idx=parallel.generator_idx(o_idx,i_idx); int l_idx=parallel.generator_idx(o_idx,i_idx);
if ( grid->IsBoss() ) { if ( grid->IsBoss() ) {
fin.read((char *)&saved[0],bytes); fin.read((char *)&saved[0],bytes);
Uint32Checksum((uint32_t *)&saved[0],bytes,csum); Uint32Checksum((uint32_t *)&saved[0],bytes,csum);
} }
grid->Broadcast(0,(void *)&saved[0],bytes); grid->Broadcast(0,(void *)&saved[0],bytes);
if( rank == grid->ThisRank() ){ if( rank == grid->ThisRank() ){
parallel.SetState(saved,l_idx); parallel.SetState(saved,l_idx);
} }
} }
@ -415,15 +415,15 @@ class BinaryIO {
if ( d == 0 ) parallel[d] = 0; if ( d == 0 ) parallel[d] = 0;
if (parallel[d]) { if (parallel[d]) {
range[d] = grid->_ldimensions[d]; range[d] = grid->_ldimensions[d];
start[d] = grid->_processor_coor[d]*range[d]; start[d] = grid->_processor_coor[d]*range[d];
ioproc[d]= grid->_processor_coor[d]; ioproc[d]= grid->_processor_coor[d];
} else { } else {
range[d] = grid->_gdimensions[d]; range[d] = grid->_gdimensions[d];
start[d] = 0; start[d] = 0;
ioproc[d]= 0; ioproc[d]= 0;
if ( grid->_processor_coor[d] != 0 ) IOnode = 0; if ( grid->_processor_coor[d] != 0 ) IOnode = 0;
} }
slice_vol = slice_vol * range[d]; slice_vol = slice_vol * range[d];
} }
@ -434,9 +434,9 @@ class BinaryIO {
std::cout<< std::dec ; std::cout<< std::dec ;
std::cout<< GridLogMessage<< "Parallel read I/O to "<< file << " with " <<tmp<< " IOnodes for subslice "; std::cout<< GridLogMessage<< "Parallel read I/O to "<< file << " with " <<tmp<< " IOnodes for subslice ";
for(int d=0;d<grid->_ndimension;d++){ for(int d=0;d<grid->_ndimension;d++){
std::cout<< range[d]; std::cout<< range[d];
if( d< grid->_ndimension-1 ) if( d< grid->_ndimension-1 )
std::cout<< " x "; std::cout<< " x ";
} }
std::cout << std::endl; std::cout << std::endl;
} }
@ -472,8 +472,8 @@ class BinaryIO {
Lexicographic::CoorFromIndex(tsite,tlex,range); Lexicographic::CoorFromIndex(tsite,tlex,range);
for(int d=0;d<nd;d++){ for(int d=0;d<nd;d++){
lsite[d] = tsite[d]%grid->_ldimensions[d]; // local site lsite[d] = tsite[d]%grid->_ldimensions[d]; // local site
gsite[d] = tsite[d]+start[d]; // global site gsite[d] = tsite[d]+start[d]; // global site
} }
///////////////////////// /////////////////////////
@ -488,28 +488,28 @@ class BinaryIO {
//////////////////////////////// ////////////////////////////////
if (myrank == iorank) { if (myrank == iorank) {
fin.seekg(offset+g_idx*sizeof(fileObj)); fin.seekg(offset+g_idx*sizeof(fileObj));
fin.read((char *)&fileObj,sizeof(fileObj)); fin.read((char *)&fileObj,sizeof(fileObj));
bytes+=sizeof(fileObj); bytes+=sizeof(fileObj);
if(ieee32big) be32toh_v((void *)&fileObj,sizeof(fileObj)); if(ieee32big) be32toh_v((void *)&fileObj,sizeof(fileObj));
if(ieee32) le32toh_v((void *)&fileObj,sizeof(fileObj)); if(ieee32) le32toh_v((void *)&fileObj,sizeof(fileObj));
if(ieee64big) be64toh_v((void *)&fileObj,sizeof(fileObj)); if(ieee64big) be64toh_v((void *)&fileObj,sizeof(fileObj));
if(ieee64) le64toh_v((void *)&fileObj,sizeof(fileObj)); if(ieee64) le64toh_v((void *)&fileObj,sizeof(fileObj));
munge(fileObj,siteObj,csum); munge(fileObj,siteObj,csum);
} }
// Possibly do transport through pt2pt // Possibly do transport through pt2pt
if ( rank != iorank ) { if ( rank != iorank ) {
if ( (myrank == rank) || (myrank==iorank) ) { if ( (myrank == rank) || (myrank==iorank) ) {
grid->SendRecvPacket((void *)&siteObj,(void *)&siteObj,iorank,rank,sizeof(siteObj)); grid->SendRecvPacket((void *)&siteObj,(void *)&siteObj,iorank,rank,sizeof(siteObj));
} }
} }
// Poke at destination // Poke at destination
if ( myrank == rank ) { if ( myrank == rank ) {
pokeLocalSite(siteObj,Umu,lsite); pokeLocalSite(siteObj,Umu,lsite);
} }
grid->Barrier(); // necessary? grid->Barrier(); // necessary?
} }
@ -520,7 +520,7 @@ class BinaryIO {
timer.Stop(); timer.Stop();
std::cout<<GridLogPerformance<<"readObjectParallel: read "<< bytes <<" bytes in "<<timer.Elapsed() <<" " std::cout<<GridLogPerformance<<"readObjectParallel: read "<< bytes <<" bytes in "<<timer.Elapsed() <<" "
<< (double)bytes/timer.useconds() <<" MB/s " <<std::endl; << (double)bytes/timer.useconds() <<" MB/s " <<std::endl;
return csum; return csum;
} }
@ -558,15 +558,15 @@ class BinaryIO {
if ( d!= grid->_ndimension-1 ) parallel[d] = 0; if ( d!= grid->_ndimension-1 ) parallel[d] = 0;
if (parallel[d]) { if (parallel[d]) {
range[d] = grid->_ldimensions[d]; range[d] = grid->_ldimensions[d];
start[d] = grid->_processor_coor[d]*range[d]; start[d] = grid->_processor_coor[d]*range[d];
ioproc[d]= grid->_processor_coor[d]; ioproc[d]= grid->_processor_coor[d];
} else { } else {
range[d] = grid->_gdimensions[d]; range[d] = grid->_gdimensions[d];
start[d] = 0; start[d] = 0;
ioproc[d]= 0; ioproc[d]= 0;
if ( grid->_processor_coor[d] != 0 ) IOnode = 0; if ( grid->_processor_coor[d] != 0 ) IOnode = 0;
} }
slice_vol = slice_vol * range[d]; slice_vol = slice_vol * range[d];
@ -577,9 +577,9 @@ class BinaryIO {
grid->GlobalSum(tmp); grid->GlobalSum(tmp);
std::cout<< GridLogMessage<< "Parallel write I/O from "<< file << " with " <<tmp<< " IOnodes for subslice "; std::cout<< GridLogMessage<< "Parallel write I/O from "<< file << " with " <<tmp<< " IOnodes for subslice ";
for(int d=0;d<grid->_ndimension;d++){ for(int d=0;d<grid->_ndimension;d++){
std::cout<< range[d]; std::cout<< range[d];
if( d< grid->_ndimension-1 ) if( d< grid->_ndimension-1 )
std::cout<< " x "; std::cout<< " x ";
} }
std::cout << std::endl; std::cout << std::endl;
} }
@ -619,8 +619,8 @@ class BinaryIO {
Lexicographic::CoorFromIndex(tsite,tlex,range); Lexicographic::CoorFromIndex(tsite,tlex,range);
for(int d=0;d<nd;d++){ for(int d=0;d<nd;d++){
lsite[d] = tsite[d]%grid->_ldimensions[d]; // local site lsite[d] = tsite[d]%grid->_ldimensions[d]; // local site
gsite[d] = tsite[d]+start[d]; // global site gsite[d] = tsite[d]+start[d]; // global site
} }
@ -640,26 +640,26 @@ class BinaryIO {
// Pair of nodes may need to do pt2pt send // Pair of nodes may need to do pt2pt send
if ( rank != iorank ) { // comms is necessary if ( rank != iorank ) { // comms is necessary
if ( (myrank == rank) || (myrank==iorank) ) { // and we have to do it if ( (myrank == rank) || (myrank==iorank) ) { // and we have to do it
// Send to IOrank // Send to IOrank
grid->SendRecvPacket((void *)&siteObj,(void *)&siteObj,rank,iorank,sizeof(siteObj)); grid->SendRecvPacket((void *)&siteObj,(void *)&siteObj,rank,iorank,sizeof(siteObj));
} }
} }
grid->Barrier(); // necessary? grid->Barrier(); // necessary?
if (myrank == iorank) { if (myrank == iorank) {
munge(siteObj,fileObj,csum); munge(siteObj,fileObj,csum);
if(ieee32big) htobe32_v((void *)&fileObj,sizeof(fileObj)); if(ieee32big) htobe32_v((void *)&fileObj,sizeof(fileObj));
if(ieee32) htole32_v((void *)&fileObj,sizeof(fileObj)); if(ieee32) htole32_v((void *)&fileObj,sizeof(fileObj));
if(ieee64big) htobe64_v((void *)&fileObj,sizeof(fileObj)); if(ieee64big) htobe64_v((void *)&fileObj,sizeof(fileObj));
if(ieee64) htole64_v((void *)&fileObj,sizeof(fileObj)); if(ieee64) htole64_v((void *)&fileObj,sizeof(fileObj));
fout.seekp(offset+g_idx*sizeof(fileObj)); fout.seekp(offset+g_idx*sizeof(fileObj));
fout.write((char *)&fileObj,sizeof(fileObj)); fout.write((char *)&fileObj,sizeof(fileObj));
bytes+=sizeof(fileObj); bytes+=sizeof(fileObj);
} }
} }
@ -668,7 +668,7 @@ class BinaryIO {
timer.Stop(); timer.Stop();
std::cout<<GridLogPerformance<<"writeObjectParallel: wrote "<< bytes <<" bytes in "<<timer.Elapsed() <<" " std::cout<<GridLogPerformance<<"writeObjectParallel: wrote "<< bytes <<" bytes in "<<timer.Elapsed() <<" "
<< (double)bytes/timer.useconds() <<" MB/s " <<std::endl; << (double)bytes/timer.useconds() <<" MB/s " <<std::endl;
return csum; return csum;
} }

0
lib/pugixml/.dirstamp Normal file
View File

View File

@ -17,7 +17,7 @@
#endif #endif
// Include user configuration file (this can define various configuration macros) // Include user configuration file (this can define various configuration macros)
#include "pugiconfig.hpp" #include <pugixml/pugiconfig.hpp>
#ifndef HEADER_PUGIXML_HPP #ifndef HEADER_PUGIXML_HPP
#define HEADER_PUGIXML_HPP #define HEADER_PUGIXML_HPP

View File

@ -55,19 +55,10 @@ namespace QCD {
////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////
// QCD iMatrix types // QCD iMatrix types
// Index conventions: Lorentz x Spin x Colour // Index conventions: Lorentz x Spin x Colour
// note: static const int or constexpr will work for type deductions
// with the intel compiler (up to version 17)
////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////
#define ColourIndex 2 static const int ColourIndex = 2;
#define SpinIndex 1 static const int SpinIndex = 1;
#define LorentzIndex 0 static const int LorentzIndex= 0;
// Also should make these a named enum type
static const int DaggerNo=0;
static const int DaggerYes=1;
static const int InverseNo=0;
static const int InverseYes=1;
// Useful traits is this a spin index // Useful traits is this a spin index
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE; //typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
@ -493,27 +484,16 @@ namespace QCD {
} //namespace QCD } //namespace QCD
} // Grid } // Grid
#include <qcd/utils/SpaceTimeGrid.h>
#include <Grid/qcd/utils/SpaceTimeGrid.h> #include <qcd/spin/Dirac.h>
#include <Grid/qcd/spin/Dirac.h> #include <qcd/spin/TwoSpinor.h>
#include <Grid/qcd/spin/TwoSpinor.h> #include <qcd/utils/LinalgUtils.h>
#include <Grid/qcd/utils/LinalgUtils.h> #include <qcd/utils/CovariantCshift.h>
#include <Grid/qcd/utils/CovariantCshift.h> #include <qcd/utils/SUn.h>
#include <qcd/action/Actions.h>
// Include representations #include <qcd/hmc/integrators/Integrator.h>
#include <Grid/qcd/utils/SUn.h> #include <qcd/hmc/integrators/Integrator_algorithm.h>
#include <Grid/qcd/utils/SUnAdjoint.h> #include <qcd/hmc/HMC.h>
#include <Grid/qcd/utils/SUnTwoIndex.h>
#include <Grid/qcd/representations/hmc_types.h>
#include <Grid/qcd/action/Actions.h>
#include <Grid/qcd/smearing/Smearing.h>
#include <Grid/qcd/hmc/integrators/Integrator.h>
#include <Grid/qcd/hmc/integrators/Integrator_algorithm.h>
#include <Grid/qcd/hmc/HMC.h>
#endif #endif

View File

@ -1,153 +1,86 @@
/************************************************************************************* /*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/ActionBase.h Source file: ./lib/qcd/action/ActionBase.h
Copyright (C) 2015 Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk> Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: neo <cossu@post.kek.jp> Author: neo <cossu@post.kek.jp>
This program is free software; you can redistribute it and/or modify This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or the Free Software Foundation; either version 2 of the License, or
(at your option) any later version. (at your option) any later version.
This program is distributed in the hope that it will be useful, This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details. GNU General Public License for more details.
You should have received a copy of the GNU General Public License along You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc., with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution See the full license in the file "LICENSE" in the top level distribution directory
directory *************************************************************************************/
*************************************************************************************/ /* END LEGAL */
/* END LEGAL */
#ifndef QCD_ACTION_BASE #ifndef QCD_ACTION_BASE
#define QCD_ACTION_BASE #define QCD_ACTION_BASE
namespace Grid { namespace Grid {
namespace QCD { namespace QCD{
template <class GaugeField> template<class GaugeField>
class Action { class Action {
public: public:
bool is_smeared = false;
// Boundary conditions? // Heatbath? // Boundary conditions? // Heatbath?
virtual void refresh(const GaugeField& U, virtual void refresh(const GaugeField &U, GridParallelRNG& pRNG) = 0;// refresh pseudofermions
GridParallelRNG& pRNG) = 0; // refresh pseudofermions virtual RealD S (const GaugeField &U) = 0; // evaluate the action
virtual RealD S(const GaugeField& U) = 0; // evaluate the action virtual void deriv(const GaugeField &U,GaugeField & dSdU ) = 0; // evaluate the action derivative
virtual void deriv(const GaugeField& U, virtual ~Action() {};
GaugeField& dSdU) = 0; // evaluate the action derivative
virtual ~Action(){};
};
// Indexing of tuple types
template <class T, class Tuple>
struct Index;
template <class T, class... Types>
struct Index<T, std::tuple<T, Types...>> {
static const std::size_t value = 0;
};
template <class T, class U, class... Types>
struct Index<T, std::tuple<U, Types...>> {
static const std::size_t value = 1 + Index<T, std::tuple<Types...>>::value;
}; };
// Could derive PseudoFermion action with a PF field, FermionField, and a Grid; implement refresh
/* /*
template <class GaugeField> template<class GaugeField, class FermionField>
struct ActionLevel { class PseudoFermionAction : public Action<GaugeField> {
public: public:
typedef Action<GaugeField>* FermionField Phi;
ActPtr; // now force the same colours as the rest of the code GridParallelRNG &pRNG;
GridBase &Grid;
//Add supported representations here PseudoFermionAction(GridBase &_Grid,GridParallelRNG &_pRNG) : Grid(_Grid), Phi(&_Grid), pRNG(_pRNG) {
};
unsigned int multiplier; virtual void refresh(const GaugeField &gauge) {
gaussian(Phi,pRNG);
std::vector<ActPtr> actions;
ActionLevel(unsigned int mul = 1) : actions(0), multiplier(mul) {
assert(mul >= 1);
}; };
void push_back(ActPtr ptr) { actions.push_back(ptr); }
}; };
*/ */
template <class GaugeField, class Repr = NoHirep > template<class GaugeField> struct ActionLevel{
struct ActionLevel { public:
public:
unsigned int multiplier;
// Fundamental repr actions separated because of the smearing typedef Action<GaugeField>* ActPtr; // now force the same colours as the rest of the code
typedef Action<GaugeField>* ActPtr;
// construct a tuple of vectors of the actions for the corresponding higher int multiplier;
// representation fields
typedef typename AccessTypes<Action, Repr>::VectorCollection action_collection;
action_collection actions_hirep;
typedef typename AccessTypes<Action, Repr>::FieldTypeCollection action_hirep_types;
std::vector<ActPtr>& actions; std::vector<ActPtr> actions;
// Temporary conversion between ActionLevel and ActionLevelHirep ActionLevel(int mul = 1) : multiplier(mul) {
//ActionLevelHirep(ActionLevel<GaugeField>& AL ):actions(AL.actions), multiplier(AL.multiplier){} assert (mul > 0);
ActionLevel(unsigned int mul = 1) : actions(std::get<0>(actions_hirep)), multiplier(mul) {
// initialize the hirep vectors to zero.
//apply(this->resize, actions_hirep, 0); //need a working resize
assert(mul >= 1);
}; };
//void push_back(ActPtr ptr) { actions.push_back(ptr); } void push_back(ActPtr ptr){
actions.push_back(ptr);
template < class Field >
void push_back(Action<Field>* ptr) {
// insert only in the correct vector
std::get< Index < Field, action_hirep_types>::value >(actions_hirep).push_back(ptr);
};
template < class ActPtr>
static void resize(ActPtr ap, unsigned int n){
ap->resize(n);
} }
//template <std::size_t I>
//auto getRepresentation(Repr& R)->decltype(std::get<I>(R).U) {return std::get<I>(R).U;}
// Loop on tuple for a callable function
template <std::size_t I = 1, typename Callable, typename ...Args>
inline typename std::enable_if<I == std::tuple_size<action_collection>::value, void>::type apply(
Callable, Repr& R,Args&...) const {}
template <std::size_t I = 1, typename Callable, typename ...Args>
inline typename std::enable_if<I < std::tuple_size<action_collection>::value, void>::type apply(
Callable fn, Repr& R, Args&... arguments) const {
fn(std::get<I>(actions_hirep), std::get<I>(R.rep), arguments...);
apply<I + 1>(fn, R, arguments...);
}
}; };
template<class GaugeField> using ActionSet = std::vector<ActionLevel< GaugeField > >;
//template <class GaugeField>
//using ActionSet = std::vector<ActionLevel<GaugeField> >;
template <class GaugeField, class R> }}
using ActionSet = std::vector<ActionLevel<GaugeField, R> >;
}
}
#endif #endif

View File

@ -40,25 +40,25 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
//////////////////////////////////////////// ////////////////////////////////////////////
// Abstract base interface // Abstract base interface
//////////////////////////////////////////// ////////////////////////////////////////////
#include <Grid/qcd/action/ActionBase.h> #include <qcd/action/ActionBase.h>
#include <Grid/qcd/action/ActionParams.h> #include <qcd/action/ActionParams.h>
//////////////////////////////////////////// ////////////////////////////////////////////
// Utility functions // Utility functions
//////////////////////////////////////////// ////////////////////////////////////////////
#include <Grid/qcd/action/gauge/GaugeImpl.h> #include <qcd/action/gauge/GaugeImpl.h>
#include <Grid/qcd/utils/WilsonLoops.h> #include <qcd/utils/WilsonLoops.h>
#include <Grid/qcd/action/fermion/WilsonCompressor.h> //used by all wilson type fermions #include <qcd/action/fermion/WilsonCompressor.h> //used by all wilson type fermions
#include <Grid/qcd/action/fermion/FermionOperatorImpl.h> #include <qcd/action/fermion/FermionOperatorImpl.h>
#include <Grid/qcd/action/fermion/FermionOperator.h> #include <qcd/action/fermion/FermionOperator.h>
#include <Grid/qcd/action/fermion/WilsonKernels.h> //used by all wilson type fermions #include <qcd/action/fermion/WilsonKernels.h> //used by all wilson type fermions
//////////////////////////////////////////// ////////////////////////////////////////////
// Gauge Actions // Gauge Actions
//////////////////////////////////////////// ////////////////////////////////////////////
#include <Grid/qcd/action/gauge/WilsonGaugeAction.h> #include <qcd/action/gauge/WilsonGaugeAction.h>
#include <Grid/qcd/action/gauge/PlaqPlusRectangleAction.h> #include <qcd/action/gauge/PlaqPlusRectangleAction.h>
namespace Grid { namespace Grid {
namespace QCD { namespace QCD {
@ -107,64 +107,41 @@ typedef SymanzikGaugeAction<ConjugateGimplD> ConjugateSymanzikGaugeAction
// for EVERY .cc file. This define centralises the list and restores global push of impl cases // for EVERY .cc file. This define centralises the list and restores global push of impl cases
//////////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////
#define FermOpTemplateInstantiate(A) \
#define FermOp4dVecTemplateInstantiate(A) \
template class A<WilsonImplF>; \ template class A<WilsonImplF>; \
template class A<WilsonImplD>; \ template class A<WilsonImplD>; \
template class A<ZWilsonImplF>; \
template class A<ZWilsonImplD>; \
template class A<GparityWilsonImplF>; \ template class A<GparityWilsonImplF>; \
template class A<GparityWilsonImplD>; template class A<GparityWilsonImplD>;
#define AdjointFermOpTemplateInstantiate(A) \
template class A<WilsonAdjImplF>; \
template class A<WilsonAdjImplD>;
#define TwoIndexFermOpTemplateInstantiate(A) \
template class A<WilsonTwoIndexSymmetricImplF>; \
template class A<WilsonTwoIndexSymmetricImplD>;
#define FermOp5dVecTemplateInstantiate(A) \
template class A<DomainWallVec5dImplF>; \
template class A<DomainWallVec5dImplD>; \
template class A<ZDomainWallVec5dImplF>; \
template class A<ZDomainWallVec5dImplD>;
#define FermOpTemplateInstantiate(A) \
FermOp4dVecTemplateInstantiate(A) \
FermOp5dVecTemplateInstantiate(A)
#define GparityFermOpTemplateInstantiate(A) #define GparityFermOpTemplateInstantiate(A)
//////////////////////////////////////////// ////////////////////////////////////////////
// Fermion operators / actions // Fermion operators / actions
//////////////////////////////////////////// ////////////////////////////////////////////
#include <Grid/qcd/action/fermion/WilsonFermion.h> // 4d wilson like #include <qcd/action/fermion/WilsonFermion.h> // 4d wilson like
#include <Grid/qcd/action/fermion/WilsonTMFermion.h> // 4d wilson like #include <qcd/action/fermion/WilsonTMFermion.h> // 4d wilson like
#include <Grid/qcd/action/fermion/WilsonFermion5D.h> // 5d base used by all 5d overlap types #include <qcd/action/fermion/WilsonFermion5D.h> // 5d base used by all 5d overlap types
//#include <Grid/qcd/action/fermion/CloverFermion.h> //#include <qcd/action/fermion/CloverFermion.h>
#include <Grid/qcd/action/fermion/CayleyFermion5D.h> // Cayley types #include <qcd/action/fermion/CayleyFermion5D.h> // Cayley types
#include <Grid/qcd/action/fermion/DomainWallFermion.h> #include <qcd/action/fermion/DomainWallFermion.h>
#include <Grid/qcd/action/fermion/DomainWallFermion.h> #include <qcd/action/fermion/DomainWallFermion.h>
#include <Grid/qcd/action/fermion/MobiusFermion.h> #include <qcd/action/fermion/MobiusFermion.h>
#include <Grid/qcd/action/fermion/ZMobiusFermion.h> #include <qcd/action/fermion/ScaledShamirFermion.h>
#include <Grid/qcd/action/fermion/ScaledShamirFermion.h> #include <qcd/action/fermion/MobiusZolotarevFermion.h>
#include <Grid/qcd/action/fermion/MobiusZolotarevFermion.h> #include <qcd/action/fermion/ShamirZolotarevFermion.h>
#include <Grid/qcd/action/fermion/ShamirZolotarevFermion.h> #include <qcd/action/fermion/OverlapWilsonCayleyTanhFermion.h>
#include <Grid/qcd/action/fermion/OverlapWilsonCayleyTanhFermion.h> #include <qcd/action/fermion/OverlapWilsonCayleyZolotarevFermion.h>
#include <Grid/qcd/action/fermion/OverlapWilsonCayleyZolotarevFermion.h>
#include <Grid/qcd/action/fermion/ContinuedFractionFermion5D.h> // Continued fraction #include <qcd/action/fermion/ContinuedFractionFermion5D.h> // Continued fraction
#include <Grid/qcd/action/fermion/OverlapWilsonContfracTanhFermion.h> #include <qcd/action/fermion/OverlapWilsonContfracTanhFermion.h>
#include <Grid/qcd/action/fermion/OverlapWilsonContfracZolotarevFermion.h> #include <qcd/action/fermion/OverlapWilsonContfracZolotarevFermion.h>
#include <Grid/qcd/action/fermion/PartialFractionFermion5D.h> // Partial fraction #include <qcd/action/fermion/PartialFractionFermion5D.h> // Partial fraction
#include <Grid/qcd/action/fermion/OverlapWilsonPartialFractionTanhFermion.h> #include <qcd/action/fermion/OverlapWilsonPartialFractionTanhFermion.h>
#include <Grid/qcd/action/fermion/OverlapWilsonPartialFractionZolotarevFermion.h> #include <qcd/action/fermion/OverlapWilsonPartialFractionZolotarevFermion.h>
//////////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////
// More maintainable to maintain the following typedef list centrally, as more "impl" targets // More maintainable to maintain the following typedef list centrally, as more "impl" targets
@ -180,14 +157,6 @@ typedef WilsonFermion<WilsonImplR> WilsonFermionR;
typedef WilsonFermion<WilsonImplF> WilsonFermionF; typedef WilsonFermion<WilsonImplF> WilsonFermionF;
typedef WilsonFermion<WilsonImplD> WilsonFermionD; typedef WilsonFermion<WilsonImplD> WilsonFermionD;
typedef WilsonFermion<WilsonAdjImplR> WilsonAdjFermionR;
typedef WilsonFermion<WilsonAdjImplF> WilsonAdjFermionF;
typedef WilsonFermion<WilsonAdjImplD> WilsonAdjFermionD;
typedef WilsonFermion<WilsonTwoIndexSymmetricImplR> WilsonTwoIndexSymmetricFermionR;
typedef WilsonFermion<WilsonTwoIndexSymmetricImplF> WilsonTwoIndexSymmetricFermionF;
typedef WilsonFermion<WilsonTwoIndexSymmetricImplD> WilsonTwoIndexSymmetricFermionD;
typedef WilsonTMFermion<WilsonImplR> WilsonTMFermionR; typedef WilsonTMFermion<WilsonImplR> WilsonTMFermionR;
typedef WilsonTMFermion<WilsonImplF> WilsonTMFermionF; typedef WilsonTMFermion<WilsonImplF> WilsonTMFermionF;
typedef WilsonTMFermion<WilsonImplD> WilsonTMFermionD; typedef WilsonTMFermion<WilsonImplD> WilsonTMFermionD;
@ -198,11 +167,6 @@ typedef DomainWallFermion<WilsonImplD> DomainWallFermionD;
typedef MobiusFermion<WilsonImplR> MobiusFermionR; typedef MobiusFermion<WilsonImplR> MobiusFermionR;
typedef MobiusFermion<WilsonImplF> MobiusFermionF; typedef MobiusFermion<WilsonImplF> MobiusFermionF;
typedef MobiusFermion<WilsonImplD> MobiusFermionD; typedef MobiusFermion<WilsonImplD> MobiusFermionD;
typedef ZMobiusFermion<ZWilsonImplR> ZMobiusFermionR;
typedef ZMobiusFermion<ZWilsonImplF> ZMobiusFermionF;
typedef ZMobiusFermion<ZWilsonImplD> ZMobiusFermionD;
typedef ScaledShamirFermion<WilsonImplR> ScaledShamirFermionR; typedef ScaledShamirFermion<WilsonImplR> ScaledShamirFermionR;
typedef ScaledShamirFermion<WilsonImplF> ScaledShamirFermionF; typedef ScaledShamirFermion<WilsonImplF> ScaledShamirFermionF;
typedef ScaledShamirFermion<WilsonImplD> ScaledShamirFermionD; typedef ScaledShamirFermion<WilsonImplD> ScaledShamirFermionD;
@ -258,21 +222,21 @@ typedef MobiusFermion<GparityWilsonImplD> GparityMobiusFermionD;
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
// G5 herm -- this has to live in QCD since dirac matrix is not in the broader sector of code // G5 herm -- this has to live in QCD since dirac matrix is not in the broader sector of code
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
#include <Grid/qcd/action/fermion/g5HermitianLinop.h> #include <qcd/action/fermion/g5HermitianLinop.h>
//////////////////////////////////////// ////////////////////////////////////////
// Pseudo fermion combinations for HMC // Pseudo fermion combinations for HMC
//////////////////////////////////////// ////////////////////////////////////////
#include <Grid/qcd/action/pseudofermion/EvenOddSchurDifferentiable.h> #include <qcd/action/pseudofermion/EvenOddSchurDifferentiable.h>
#include <Grid/qcd/action/pseudofermion/TwoFlavour.h> #include <qcd/action/pseudofermion/TwoFlavour.h>
#include <Grid/qcd/action/pseudofermion/TwoFlavourRatio.h> #include <qcd/action/pseudofermion/TwoFlavourRatio.h>
#include <Grid/qcd/action/pseudofermion/TwoFlavourEvenOdd.h> #include <qcd/action/pseudofermion/TwoFlavourEvenOdd.h>
#include <Grid/qcd/action/pseudofermion/TwoFlavourEvenOddRatio.h> #include <qcd/action/pseudofermion/TwoFlavourEvenOddRatio.h>
#include <Grid/qcd/action/pseudofermion/OneFlavourRational.h> #include <qcd/action/pseudofermion/OneFlavourRational.h>
#include <Grid/qcd/action/pseudofermion/OneFlavourRationalRatio.h> #include <qcd/action/pseudofermion/OneFlavourRationalRatio.h>
#include <Grid/qcd/action/pseudofermion/OneFlavourEvenOddRational.h> #include <qcd/action/pseudofermion/OneFlavourEvenOddRational.h>
#include <Grid/qcd/action/pseudofermion/OneFlavourEvenOddRationalRatio.h> #include <qcd/action/pseudofermion/OneFlavourEvenOddRationalRatio.h>
#endif #endif

View File

@ -28,10 +28,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
See the full license in the file "LICENSE" in the top level distribution directory See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/ *************************************************************************************/
/* END LEGAL */ /* END LEGAL */
#include <Grid.h> #include <Grid.h>
namespace Grid { namespace Grid {
namespace QCD { namespace QCD {
@ -48,353 +45,487 @@ namespace QCD {
FourDimGrid, FourDimGrid,
FourDimRedBlackGrid,_M5,p), FourDimRedBlackGrid,_M5,p),
mass(_mass) mass(_mass)
{ } {
}
template<class Impl> template<class Impl>
void CayleyFermion5D<Impl>::M5D (const FermionField &psi, FermionField &chi) void CayleyFermion5D<Impl>::Meooe5D (const FermionField &psi, FermionField &Din)
{ {
int Ls=this->Ls; // Assemble Din
std::vector<Coeff_t> diag (Ls,1.0); int Ls=this->Ls;
std::vector<Coeff_t> upper(Ls,-1.0); upper[Ls-1]=mass; for(int s=0;s<Ls;s++){
std::vector<Coeff_t> lower(Ls,-1.0); lower[0] =mass; if ( s==0 ) {
M5D(psi,chi,chi,lower,diag,upper); // Din = bs psi[s] + cs[s] psi[s+1}
} axpby_ssp_pminus(Din,bs[s],psi,cs[s],psi,s,s+1);
template<class Impl> // Din+= -mass*cs[s] psi[s+1}
void CayleyFermion5D<Impl>::Meooe5D (const FermionField &psi, FermionField &Din) axpby_ssp_pplus (Din,1.0,Din,-mass*cs[s],psi,s,Ls-1);
{ } else if ( s==(Ls-1)) {
int Ls=this->Ls; axpby_ssp_pminus(Din,bs[s],psi,-mass*cs[s],psi,s,0);
std::vector<Coeff_t> diag = bs; axpby_ssp_pplus (Din,1.0,Din,cs[s],psi,s,s-1);
std::vector<Coeff_t> upper= cs; } else {
std::vector<Coeff_t> lower= cs; axpby_ssp_pminus(Din,bs[s],psi,cs[s],psi,s,s+1);
upper[Ls-1]=-mass*upper[Ls-1]; axpby_ssp_pplus(Din,1.0,Din,cs[s],psi,s,s-1);
lower[0] =-mass*lower[0]; }
M5D(psi,psi,Din,lower,diag,upper); }
}
template<class Impl> void CayleyFermion5D<Impl>::Meo5D (const FermionField &psi, FermionField &chi)
{
int Ls=this->Ls;
std::vector<Coeff_t> diag = beo;
std::vector<Coeff_t> upper(Ls);
std::vector<Coeff_t> lower(Ls);
for(int i=0;i<Ls;i++) {
upper[i]=-ceo[i];
lower[i]=-ceo[i];
} }
upper[Ls-1]=-mass*upper[Ls-1]; template<class Impl>
lower[0] =-mass*lower[0]; void CayleyFermion5D<Impl>::MeooeDag5D (const FermionField &psi, FermionField &Din)
M5D(psi,psi,chi,lower,diag,upper); {
} int Ls=this->Ls;
template<class Impl> for(int s=0;s<Ls;s++){
void CayleyFermion5D<Impl>::Mooee (const FermionField &psi, FermionField &chi) if ( s==0 ) {
{ axpby_ssp_pplus (Din,bs[s],psi,cs[s+1],psi,s,s+1);
int Ls=this->Ls; axpby_ssp_pminus(Din,1.0,Din,-mass*cs[Ls-1],psi,s,Ls-1);
std::vector<Coeff_t> diag = bee; } else if ( s==(Ls-1)) {
std::vector<Coeff_t> upper(Ls); axpby_ssp_pplus (Din,bs[s],psi,-mass*cs[0],psi,s,0);
std::vector<Coeff_t> lower(Ls); axpby_ssp_pminus(Din,1.0,Din,cs[s-1],psi,s,s-1);
for(int i=0;i<Ls;i++) { } else {
upper[i]=-cee[i]; axpby_ssp_pplus (Din,bs[s],psi,cs[s+1],psi,s,s+1);
lower[i]=-cee[i]; axpby_ssp_pminus(Din,1.0,Din,cs[s-1],psi,s,s-1);
} }
upper[Ls-1]=-mass*upper[Ls-1];
lower[0] =-mass*lower[0];
M5D(psi,psi,chi,lower,diag,upper);
}
template<class Impl>
void CayleyFermion5D<Impl>::MooeeDag (const FermionField &psi, FermionField &chi)
{
int Ls=this->Ls;
std::vector<Coeff_t> diag = bee;
std::vector<Coeff_t> upper(Ls);
std::vector<Coeff_t> lower(Ls);
for (int s=0;s<Ls;s++){
// Assemble the 5d matrix
if ( s==0 ) {
upper[s] = -cee[s+1] ;
lower[s] = mass*cee[Ls-1];
} else if ( s==(Ls-1)) {
upper[s] = mass*cee[0];
lower[s] = -cee[s-1];
} else {
upper[s]=-cee[s+1];
lower[s]=-cee[s-1];
} }
} }
M5Ddag(psi,psi,chi,lower,diag,upper); // override multiply
} template<class Impl>
RealD CayleyFermion5D<Impl>::M (const FermionField &psi, FermionField &chi)
{
int Ls=this->Ls;
template<class Impl> FermionField Din(psi._grid);
void CayleyFermion5D<Impl>::M5Ddag (const FermionField &psi, FermionField &chi)
{
int Ls=this->Ls;
std::vector<Coeff_t> diag(Ls,1.0);
std::vector<Coeff_t> upper(Ls,-1.0);
std::vector<Coeff_t> lower(Ls,-1.0);
upper[Ls-1]=-mass*upper[Ls-1];
lower[0] =-mass*lower[0];
M5Ddag(psi,chi,chi,lower,diag,upper);
}
template<class Impl> // Assemble Din
void CayleyFermion5D<Impl>::MeooeDag5D (const FermionField &psi, FermionField &Din) /*
{ for(int s=0;s<Ls;s++){
int Ls=this->Ls; if ( s==0 ) {
std::vector<Coeff_t> diag =bs; // Din = bs psi[s] + cs[s] psi[s+1}
std::vector<Coeff_t> upper=cs; axpby_ssp_pminus(Din,bs[s],psi,cs[s],psi,s,s+1);
std::vector<Coeff_t> lower=cs; // Din+= -mass*cs[s] psi[s+1}
upper[Ls-1]=-mass*upper[Ls-1]; axpby_ssp_pplus (Din,1.0,Din,-mass*cs[s],psi,s,Ls-1);
lower[0] =-mass*lower[0]; } else if ( s==(Ls-1)) {
M5Ddag(psi,psi,Din,lower,diag,upper); axpby_ssp_pminus(Din,bs[s],psi,-mass*cs[s],psi,s,0);
} axpby_ssp_pplus (Din,1.0,Din,cs[s],psi,s,s-1);
} else {
axpby_ssp_pminus(Din,bs[s],psi,cs[s],psi,s,s+1);
axpby_ssp_pplus(Din,1.0,Din,cs[s],psi,s,s-1);
}
}
*/
Meooe5D(psi,Din);
template<class Impl> this->DW(Din,chi,DaggerNo);
RealD CayleyFermion5D<Impl>::M (const FermionField &psi, FermionField &chi) // ((b D_W + D_w hop terms +1) on s-diag
{ axpby(chi,1.0,1.0,chi,psi);
int Ls=this->Ls;
FermionField Din(psi._grid); // Call Mooee??
for(int s=0;s<Ls;s++){
// Assemble Din if ( s==0 ){
Meooe5D(psi,Din); axpby_ssp_pminus(chi,1.0,chi,-1.0,psi,s,s+1);
axpby_ssp_pplus (chi,1.0,chi,mass,psi,s,Ls-1);
this->DW(Din,chi,DaggerNo); } else if ( s==(Ls-1)) {
// ((b D_W + D_w hop terms +1) on s-diag axpby_ssp_pminus(chi,1.0,chi,mass,psi,s,0);
axpby(chi,1.0,1.0,chi,psi); axpby_ssp_pplus (chi,1.0,chi,-1.0,psi,s,s-1);
} else {
M5D(psi,chi); axpby_ssp_pminus(chi,1.0,chi,-1.0,psi,s,s+1);
return(norm2(chi)); axpby_ssp_pplus (chi,1.0,chi,-1.0,psi,s,s-1);
} }
}
template<class Impl> return norm2(chi);
RealD CayleyFermion5D<Impl>::Mdag (const FermionField &psi, FermionField &chi)
{
// Under adjoint
//D1+ D1- P- -> D1+^dag P+ D2-^dag
//D2- P+ D2+ P-D1-^dag D2+dag
FermionField Din(psi._grid);
// Apply Dw
this->DW(psi,Din,DaggerYes);
MeooeDag5D(Din,chi);
M5Ddag(psi,chi);
// ((b D_W + D_w hop terms +1) on s-diag
axpby (chi,1.0,1.0,chi,psi);
return norm2(chi);
}
// half checkerboard operations
template<class Impl>
void CayleyFermion5D<Impl>::Meooe (const FermionField &psi, FermionField &chi)
{
int Ls=this->Ls;
FermionField tmp(psi._grid);
Meooe5D(psi,tmp);
if ( psi.checkerboard == Odd ) {
this->DhopEO(tmp,chi,DaggerNo);
} else {
this->DhopOE(tmp,chi,DaggerNo);
} }
}
template<class Impl> template<class Impl>
void CayleyFermion5D<Impl>::MeooeDag (const FermionField &psi, FermionField &chi) RealD CayleyFermion5D<Impl>::Mdag (const FermionField &psi, FermionField &chi)
{ {
FermionField tmp(psi._grid); // Under adjoint
// Apply 4d dslash //D1+ D1- P- -> D1+^dag P+ D2-^dag
if ( psi.checkerboard == Odd ) { //D2- P+ D2+ P-D1-^dag D2+dag
this->DhopEO(psi,tmp,DaggerYes);
} else { FermionField Din(psi._grid);
this->DhopOE(psi,tmp,DaggerYes); // Apply Dw
this->DW(psi,Din,DaggerYes);
MeooeDag5D(Din,chi);
int Ls=this->Ls;
for(int s=0;s<Ls;s++){
// Collect the terms in DW
// Chi = bs Din[s] + cs[s] Din[s+1}
// Chi+= -mass*cs[s] psi[s+1}
/*
if ( s==0 ) {
axpby_ssp_pplus (chi,bs[s],Din,cs[s+1],Din,s,s+1);
axpby_ssp_pminus(chi,1.0,chi,-mass*cs[Ls-1],Din,s,Ls-1);
} else if ( s==(Ls-1)) {
axpby_ssp_pplus (chi,bs[s],Din,-mass*cs[0],Din,s,0);
axpby_ssp_pminus(chi,1.0,chi,cs[s-1],Din,s,s-1);
} else {
axpby_ssp_pplus (chi,bs[s],Din,cs[s+1],Din,s,s+1);
axpby_ssp_pminus(chi,1.0,chi,cs[s-1],Din,s,s-1);
}
*/
// FIXME just call MooeeDag??
// Collect the terms indept of DW
if ( s==0 ){
axpby_ssp_pplus (chi,1.0,chi,-1.0,psi,s,s+1);
axpby_ssp_pminus(chi,1.0,chi,mass,psi,s,Ls-1);
} else if ( s==(Ls-1)) {
axpby_ssp_pplus (chi,1.0,chi,mass,psi,s,0);
axpby_ssp_pminus(chi,1.0,chi,-1.0,psi,s,s-1);
} else {
axpby_ssp_pplus(chi,1.0,chi,-1.0,psi,s,s+1);
axpby_ssp_pminus(chi,1.0,chi,-1.0,psi,s,s-1);
}
}
// ((b D_W + D_w hop terms +1) on s-diag
axpby (chi,1.0,1.0,chi,psi);
return norm2(chi);
} }
MeooeDag5D(tmp,chi);
}
template<class Impl> // half checkerboard operations
void CayleyFermion5D<Impl>::Mdir (const FermionField &psi, FermionField &chi,int dir,int disp){ template<class Impl>
FermionField tmp(psi._grid); void CayleyFermion5D<Impl>::Meooe (const FermionField &psi, FermionField &chi)
Meo5D(psi,tmp); {
// Apply 4d dslash fragment int Ls=this->Ls;
this->DhopDir(tmp,chi,dir,disp);
}
// force terms; five routines; default to Dhop on diagonal
template<class Impl>
void CayleyFermion5D<Impl>::MDeriv (GaugeField &mat,const FermionField &U,const FermionField &V,int dag)
{
FermionField Din(V._grid);
if ( dag == DaggerNo ) { FermionField tmp(psi._grid);
// U d/du [D_w D5] V = U d/du DW D5 V // Assemble the 5d matrix
Meooe5D(V,Din); Meooe5D(psi,tmp);
this->DhopDeriv(mat,U,Din,dag); #if 0
} else { std::cout << "Meooe Test replacement norm2 tmp = " <<norm2(tmp)<<std::endl;
// U d/du [D_w D5]^dag V = U D5^dag d/du DW^dag Y // implicit adj on U in call for(int s=0;s<Ls;s++){
Meooe5D(U,Din); if ( s==0 ) {
this->DhopDeriv(mat,Din,V,dag); // tmp = bs psi[s] + cs[s] psi[s+1}
// tmp+= -mass*cs[s] psi[s+1}
axpby_ssp_pminus(tmp,beo[s],psi,-ceo[s],psi ,s, s+1);
axpby_ssp_pplus(tmp,1.0,tmp,mass*ceo[s],psi,s,Ls-1);
} else if ( s==(Ls-1)) {
axpby_ssp_pminus(tmp,beo[s],psi,mass*ceo[s],psi,s,0);
axpby_ssp_pplus(tmp,1.0,tmp,-ceo[s],psi,s,s-1);
} else {
axpby_ssp_pminus(tmp,beo[s],psi,-ceo[s],psi,s,s+1);
axpby_ssp_pplus (tmp,1.0,tmp,-ceo[s],psi,s,s-1);
}
}
std::cout << "Meooe Test replacement norm2 tmp old = " <<norm2(tmp)<<std::endl;
#endif
// Apply 4d dslash
if ( psi.checkerboard == Odd ) {
this->DhopEO(tmp,chi,DaggerNo);
} else {
this->DhopOE(tmp,chi,DaggerNo);
}
} }
};
template<class Impl>
void CayleyFermion5D<Impl>::MoeDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag)
{
FermionField Din(V._grid);
if ( dag == DaggerNo ) { template<class Impl>
// U d/du [D_w D5] V = U d/du DW D5 V void CayleyFermion5D<Impl>::MeooeDag (const FermionField &psi, FermionField &chi)
Meooe5D(V,Din); {
this->DhopDerivOE(mat,U,Din,dag); FermionField tmp(psi._grid);
} else { // Apply 4d dslash
// U d/du [D_w D5]^dag V = U D5^dag d/du DW^dag Y // implicit adj on U in call if ( psi.checkerboard == Odd ) {
this->DhopEO(psi,tmp,DaggerYes);
} else {
this->DhopOE(psi,tmp,DaggerYes);
}
MeooeDag5D(tmp,chi);
#if 0
std::cout << "Meooe Test replacement norm2 chi new = " <<norm2(chi)<<std::endl;
// Assemble the 5d matrix
int Ls=this->Ls;
for(int s=0;s<Ls;s++){
if ( s==0 ) {
axpby_ssp_pplus(chi,beo[s],tmp, -ceo[s+1] ,tmp,s,s+1);
axpby_ssp_pminus(chi, 1.0,chi,mass*ceo[Ls-1],tmp,s,Ls-1);
} else if ( s==(Ls-1)) {
axpby_ssp_pplus(chi,beo[s],tmp,mass*ceo[0],tmp,s,0);
axpby_ssp_pminus(chi,1.0,chi,-ceo[s-1],tmp,s,s-1);
} else {
axpby_ssp_pplus(chi,beo[s],tmp,-ceo[s+1],tmp,s,s+1);
axpby_ssp_pminus(chi,1.0 ,chi,-ceo[s-1],tmp,s,s-1);
}
}
std::cout << "Meooe Test replacement norm2 chi old = " <<norm2(chi)<<std::endl;
#endif
}
template<class Impl>
void CayleyFermion5D<Impl>::Mooee (const FermionField &psi, FermionField &chi)
{
int Ls=this->Ls;
for (int s=0;s<Ls;s++){
if ( s==0 ) {
axpby_ssp_pminus(chi,bee[s],psi ,-cee[s],psi,s,s+1);
axpby_ssp_pplus (chi,1.0,chi,mass*cee[s],psi,s,Ls-1);
} else if ( s==(Ls-1)) {
axpby_ssp_pminus(chi,bee[s],psi,mass*cee[s],psi,s,0);
axpby_ssp_pplus (chi,1.0,chi,-cee[s],psi,s,s-1);
} else {
axpby_ssp_pminus(chi,bee[s],psi,-cee[s],psi,s,s+1);
axpby_ssp_pplus (chi,1.0,chi,-cee[s],psi,s,s-1);
}
}
}
template<class Impl>
void CayleyFermion5D<Impl>::Mdir (const FermionField &psi, FermionField &chi,int dir,int disp){
int Ls=this->Ls;
FermionField tmp(psi._grid);
// Assemble the 5d matrix
for(int s=0;s<Ls;s++){
if ( s==0 ) {
// tmp = bs psi[s] + cs[s] psi[s+1}
// tmp+= -mass*cs[s] psi[s+1}
axpby_ssp_pminus(tmp,beo[s],psi,-ceo[s],psi ,s, s+1);
axpby_ssp_pplus(tmp,1.0,tmp,mass*ceo[s],psi,s,Ls-1);
} else if ( s==(Ls-1)) {
axpby_ssp_pminus(tmp,beo[s],psi,mass*ceo[s],psi,s,0);
axpby_ssp_pplus(tmp,1.0,tmp,-ceo[s],psi,s,s-1);
} else {
axpby_ssp_pminus(tmp,beo[s],psi,-ceo[s],psi,s,s+1);
axpby_ssp_pplus (tmp,1.0,tmp,-ceo[s],psi,s,s-1);
}
}
// Apply 4d dslash fragment
this->DhopDir(tmp,chi,dir,disp);
}
template<class Impl>
void CayleyFermion5D<Impl>::MooeeDag (const FermionField &psi, FermionField &chi)
{
int Ls=this->Ls;
for (int s=0;s<Ls;s++){
// Assemble the 5d matrix
if ( s==0 ) {
axpby_ssp_pplus(chi,bee[s],psi,-cee[s+1] ,psi,s,s+1);
axpby_ssp_pminus(chi,1.0,chi,mass*cee[Ls-1],psi,s,Ls-1);
} else if ( s==(Ls-1)) {
axpby_ssp_pplus(chi,bee[s],psi,mass*cee[0],psi,s,0);
axpby_ssp_pminus(chi,1.0,chi,-cee[s-1],psi,s,s-1);
} else {
axpby_ssp_pplus(chi,bee[s],psi,-cee[s+1],psi,s,s+1);
axpby_ssp_pminus(chi,1.0 ,chi,-cee[s-1],psi,s,s-1);
}
}
}
template<class Impl>
void CayleyFermion5D<Impl>::MooeeInv (const FermionField &psi, FermionField &chi)
{
int Ls=this->Ls;
// Apply (L^{\prime})^{-1}
axpby_ssp (chi,1.0,psi, 0.0,psi,0,0); // chi[0]=psi[0]
for (int s=1;s<Ls;s++){
axpby_ssp_pplus(chi,1.0,psi,-lee[s-1],chi,s,s-1);// recursion Psi[s] -lee P_+ chi[s-1]
}
// L_m^{-1}
for (int s=0;s<Ls-1;s++){ // Chi[ee] = 1 - sum[s<Ls-1] -leem[s]P_- chi
axpby_ssp_pminus(chi,1.0,chi,-leem[s],chi,Ls-1,s);
}
// U_m^{-1} D^{-1}
for (int s=0;s<Ls-1;s++){
// Chi[s] + 1/d chi[s]
axpby_ssp_pplus(chi,1.0/dee[s],chi,-ueem[s]/dee[Ls-1],chi,s,Ls-1);
}
axpby_ssp(chi,1.0/dee[Ls-1],chi,0.0,chi,Ls-1,Ls-1); // Modest avoidable
// Apply U^{-1}
for (int s=Ls-2;s>=0;s--){
axpby_ssp_pminus (chi,1.0,chi,-uee[s],chi,s,s+1); // chi[Ls]
}
}
template<class Impl>
void CayleyFermion5D<Impl>::MooeeInvDag (const FermionField &psi, FermionField &chi)
{
int Ls=this->Ls;
// Apply (U^{\prime})^{-dagger}
axpby_ssp (chi,1.0,psi, 0.0,psi,0,0); // chi[0]=psi[0]
for (int s=1;s<Ls;s++){
axpby_ssp_pminus(chi,1.0,psi,-uee[s-1],chi,s,s-1);
}
// U_m^{-\dagger}
for (int s=0;s<Ls-1;s++){
axpby_ssp_pplus(chi,1.0,chi,-ueem[s],chi,Ls-1,s);
}
// L_m^{-\dagger} D^{-dagger}
for (int s=0;s<Ls-1;s++){
axpby_ssp_pminus(chi,1.0/dee[s],chi,-leem[s]/dee[Ls-1],chi,s,Ls-1);
}
axpby_ssp(chi,1.0/dee[Ls-1],chi,0.0,chi,Ls-1,Ls-1); // Modest avoidable
// Apply L^{-dagger}
for (int s=Ls-2;s>=0;s--){
axpby_ssp_pplus (chi,1.0,chi,-lee[s],chi,s,s+1); // chi[Ls]
}
}
// force terms; five routines; default to Dhop on diagonal
template<class Impl>
void CayleyFermion5D<Impl>::MDeriv (GaugeField &mat,const FermionField &U,const FermionField &V,int dag)
{
FermionField Din(V._grid);
if ( dag == DaggerNo ) {
// U d/du [D_w D5] V = U d/du DW D5 V
Meooe5D(V,Din);
this->DhopDeriv(mat,U,Din,dag);
} else {
// U d/du [D_w D5]^dag V = U D5^dag d/du DW^dag Y // implicit adj on U in call
Meooe5D(U,Din);
this->DhopDeriv(mat,Din,V,dag);
}
};
template<class Impl>
void CayleyFermion5D<Impl>::MoeDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag)
{
FermionField Din(V._grid);
if ( dag == DaggerNo ) {
// U d/du [D_w D5] V = U d/du DW D5 V
Meooe5D(V,Din);
this->DhopDerivOE(mat,U,Din,dag);
} else {
// U d/du [D_w D5]^dag V = U D5^dag d/du DW^dag Y // implicit adj on U in call
Meooe5D(U,Din); Meooe5D(U,Din);
this->DhopDerivOE(mat,Din,V,dag); this->DhopDerivOE(mat,Din,V,dag);
} }
}; };
template<class Impl> template<class Impl>
void CayleyFermion5D<Impl>::MeoDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag) void CayleyFermion5D<Impl>::MeoDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag)
{ {
FermionField Din(V._grid); FermionField Din(V._grid);
if ( dag == DaggerNo ) {
// U d/du [D_w D5] V = U d/du DW D5 V
Meooe5D(V,Din);
this->DhopDerivEO(mat,U,Din,dag);
} else {
// U d/du [D_w D5]^dag V = U D5^dag d/du DW^dag Y // implicit adj on U in call
Meooe5D(U,Din);
this->DhopDerivEO(mat,Din,V,dag);
}
};
// Tanh
template<class Impl>
void CayleyFermion5D<Impl>::SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD b,RealD c)
{
std::vector<Coeff_t> gamma(this->Ls);
for(int s=0;s<this->Ls;s++) gamma[s] = zdata->gamma[s];
SetCoefficientsInternal(1.0,gamma,b,c);
}
//Zolo
template<class Impl>
void CayleyFermion5D<Impl>::SetCoefficientsZolotarev(RealD zolo_hi,Approx::zolotarev_data *zdata,RealD b,RealD c)
{
std::vector<Coeff_t> gamma(this->Ls);
for(int s=0;s<this->Ls;s++) gamma[s] = zdata->gamma[s];
SetCoefficientsInternal(zolo_hi,gamma,b,c);
}
//Zolo
template<class Impl>
void CayleyFermion5D<Impl>::SetCoefficientsInternal(RealD zolo_hi,std::vector<Coeff_t> & gamma,RealD b,RealD c)
{
int Ls=this->Ls;
///////////////////////////////////////////////////////////
// The Cayley coeffs (unprec)
///////////////////////////////////////////////////////////
omega.resize(Ls);
bs.resize(Ls);
cs.resize(Ls);
as.resize(Ls);
//
// Ts = ( [bs+cs]Dw )^-1 ( (bs+cs) Dw )
// -(g5 ------- -1 ) ( g5 --------- + 1 )
// ( {2+(bs-cs)Dw} ) ( 2+(bs-cs) Dw )
//
// bs = 1/2( (1/omega_s + 1)*b + (1/omega - 1)*c ) = 1/2( 1/omega(b+c) + (b-c) )
// cs = 1/2( (1/omega_s - 1)*b + (1/omega + 1)*c ) = 1/2( 1/omega(b+c) - (b-c) )
//
// bs+cs = 0.5*( 1/omega(b+c) + (b-c) + 1/omega(b+c) - (b-c) ) = 1/omega(b+c)
// bs-cs = 0.5*( 1/omega(b+c) + (b-c) - 1/omega(b+c) + (b-c) ) = b-c
//
// So
//
// Ts = ( [b+c]Dw/omega_s )^-1 ( (b+c) Dw /omega_s )
// -(g5 ------- -1 ) ( g5 --------- + 1 )
// ( {2+(b-c)Dw} ) ( 2+(b-c) Dw )
//
// Ts = ( [b+c]Dw )^-1 ( (b+c) Dw )
// -(g5 ------- -omega_s) ( g5 --------- + omega_s )
// ( {2+(b-c)Dw} ) ( 2+(b-c) Dw )
//
double bpc = b+c;
double bmc = b-c;
for(int i=0; i < Ls; i++){
as[i] = 1.0;
omega[i] = gamma[i]*zolo_hi; //NB reciprocal relative to Chroma NEF code
bs[i] = 0.5*(bpc/omega[i] + bmc);
cs[i] = 0.5*(bpc/omega[i] - bmc);
}
////////////////////////////////////////////////////////
// Constants for the preconditioned matrix Cayley form
////////////////////////////////////////////////////////
bee.resize(Ls);
cee.resize(Ls);
beo.resize(Ls);
ceo.resize(Ls);
for(int i=0;i<Ls;i++){
bee[i]=as[i]*(bs[i]*(4.0-this->M5) +1.0);
cee[i]=as[i]*(1.0-cs[i]*(4.0-this->M5));
beo[i]=as[i]*bs[i];
ceo[i]=-as[i]*cs[i];
}
aee.resize(Ls);
aeo.resize(Ls);
for(int i=0;i<Ls;i++){
aee[i]=cee[i];
aeo[i]=ceo[i];
}
//////////////////////////////////////////
// LDU decomposition of eeoo
//////////////////////////////////////////
dee.resize(Ls);
lee.resize(Ls);
leem.resize(Ls);
uee.resize(Ls);
ueem.resize(Ls);
for(int i=0;i<Ls;i++){
dee[i] = bee[i];
if ( i < Ls-1 ) {
lee[i] =-cee[i+1]/bee[i]; // sub-diag entry on the ith column
leem[i]=mass*cee[Ls-1]/bee[0];
for(int j=0;j<i;j++) leem[i]*= aee[j]/bee[j+1];
uee[i] =-aee[i]/bee[i]; // up-diag entry on the ith row
ueem[i]=mass;
for(int j=1;j<=i;j++) ueem[i]*= cee[j]/bee[j];
ueem[i]*= aee[0]/bee[0];
if ( dag == DaggerNo ) {
// U d/du [D_w D5] V = U d/du DW D5 V
Meooe5D(V,Din);
this->DhopDerivEO(mat,U,Din,dag);
} else { } else {
lee[i] =0.0; // U d/du [D_w D5]^dag V = U D5^dag d/du DW^dag Y // implicit adj on U in call
leem[i]=0.0; Meooe5D(U,Din);
uee[i] =0.0; this->DhopDerivEO(mat,Din,V,dag);
ueem[i]=0.0; }
};
// Tanh
template<class Impl>
void CayleyFermion5D<Impl>::SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD b,RealD c)
{
SetCoefficientsZolotarev(1.0,zdata,b,c);
}
//Zolo
template<class Impl>
void CayleyFermion5D<Impl>::SetCoefficientsZolotarev(RealD zolo_hi,Approx::zolotarev_data *zdata,RealD b,RealD c)
{
int Ls=this->Ls;
///////////////////////////////////////////////////////////
// The Cayley coeffs (unprec)
///////////////////////////////////////////////////////////
omega.resize(Ls);
bs.resize(Ls);
cs.resize(Ls);
as.resize(Ls);
//
// Ts = ( [bs+cs]Dw )^-1 ( (bs+cs) Dw )
// -(g5 ------- -1 ) ( g5 --------- + 1 )
// ( {2+(bs-cs)Dw} ) ( 2+(bs-cs) Dw )
//
// bs = 1/2( (1/omega_s + 1)*b + (1/omega - 1)*c ) = 1/2( 1/omega(b+c) + (b-c) )
// cs = 1/2( (1/omega_s - 1)*b + (1/omega + 1)*c ) = 1/2( 1/omega(b+c) - (b-c) )
//
// bs+cs = 0.5*( 1/omega(b+c) + (b-c) + 1/omega(b+c) - (b-c) ) = 1/omega(b+c)
// bs-cs = 0.5*( 1/omega(b+c) + (b-c) - 1/omega(b+c) + (b-c) ) = b-c
//
// So
//
// Ts = ( [b+c]Dw/omega_s )^-1 ( (b+c) Dw /omega_s )
// -(g5 ------- -1 ) ( g5 --------- + 1 )
// ( {2+(b-c)Dw} ) ( 2+(b-c) Dw )
//
// Ts = ( [b+c]Dw )^-1 ( (b+c) Dw )
// -(g5 ------- -omega_s) ( g5 --------- + omega_s )
// ( {2+(b-c)Dw} ) ( 2+(b-c) Dw )
//
double bpc = b+c;
double bmc = b-c;
for(int i=0; i < Ls; i++){
as[i] = 1.0;
omega[i] = ((double)zdata->gamma[i])*zolo_hi; //NB reciprocal relative to Chroma NEF code
bs[i] = 0.5*(bpc/omega[i] + bmc);
cs[i] = 0.5*(bpc/omega[i] - bmc);
}
////////////////////////////////////////////////////////
// Constants for the preconditioned matrix Cayley form
////////////////////////////////////////////////////////
bee.resize(Ls);
cee.resize(Ls);
beo.resize(Ls);
ceo.resize(Ls);
for(int i=0;i<Ls;i++){
bee[i]=as[i]*(bs[i]*(4.0-this->M5) +1.0);
cee[i]=as[i]*(1.0-cs[i]*(4.0-this->M5));
beo[i]=as[i]*bs[i];
ceo[i]=-as[i]*cs[i];
}
aee.resize(Ls);
aeo.resize(Ls);
for(int i=0;i<Ls;i++){
aee[i]=cee[i];
aeo[i]=ceo[i];
}
//////////////////////////////////////////
// LDU decomposition of eeoo
//////////////////////////////////////////
dee.resize(Ls);
lee.resize(Ls);
leem.resize(Ls);
uee.resize(Ls);
ueem.resize(Ls);
for(int i=0;i<Ls;i++){
dee[i] = bee[i];
if ( i < Ls-1 ) {
lee[i] =-cee[i+1]/bee[i]; // sub-diag entry on the ith column
leem[i]=mass*cee[Ls-1]/bee[0];
for(int j=0;j<i;j++) leem[i]*= aee[j]/bee[j+1];
uee[i] =-aee[i]/bee[i]; // up-diag entry on the ith row
ueem[i]=mass;
for(int j=1;j<=i;j++) ueem[i]*= cee[j]/bee[j];
ueem[i]*= aee[0]/bee[0];
} else {
lee[i] =0.0;
leem[i]=0.0;
uee[i] =0.0;
ueem[i]=0.0;
}
}
{
double delta_d=mass*cee[Ls-1];
for(int j=0;j<Ls-1;j++) delta_d *= cee[j]/bee[j];
dee[Ls-1] += delta_d;
} }
} }
{
Coeff_t delta_d=mass*cee[Ls-1];
for(int j=0;j<Ls-1;j++) delta_d *= cee[j]/bee[j];
dee[Ls-1] += delta_d;
}
}
FermOpTemplateInstantiate(CayleyFermion5D); FermOpTemplateInstantiate(CayleyFermion5D);
GparityFermOpTemplateInstantiate(CayleyFermion5D); GparityFermOpTemplateInstantiate(CayleyFermion5D);

View File

@ -51,29 +51,6 @@ namespace Grid {
virtual void MooeeDag (const FermionField &in, FermionField &out); virtual void MooeeDag (const FermionField &in, FermionField &out);
virtual void MooeeInv (const FermionField &in, FermionField &out); virtual void MooeeInv (const FermionField &in, FermionField &out);
virtual void MooeeInvDag (const FermionField &in, FermionField &out); virtual void MooeeInvDag (const FermionField &in, FermionField &out);
virtual void Meo5D (const FermionField &psi, FermionField &chi);
virtual void M5D (const FermionField &psi, FermionField &chi);
virtual void M5Ddag(const FermionField &psi, FermionField &chi);
/////////////////////////////////////////////////////
// Instantiate different versions depending on Impl
/////////////////////////////////////////////////////
void M5D(const FermionField &psi,
const FermionField &phi,
FermionField &chi,
std::vector<Coeff_t> &lower,
std::vector<Coeff_t> &diag,
std::vector<Coeff_t> &upper);
void M5Ddag(const FermionField &psi,
const FermionField &phi,
FermionField &chi,
std::vector<Coeff_t> &lower,
std::vector<Coeff_t> &diag,
std::vector<Coeff_t> &upper);
void MooeeInternal(const FermionField &in, FermionField &out,int dag,int inv);
virtual void Instantiatable(void)=0; virtual void Instantiatable(void)=0;
// force terms; five routines; default to Dhop on diagonal // force terms; five routines; default to Dhop on diagonal
@ -91,23 +68,23 @@ namespace Grid {
RealD mass; RealD mass;
// Cayley form Moebius (tanh and zolotarev) // Cayley form Moebius (tanh and zolotarev)
std::vector<Coeff_t> omega; std::vector<RealD> omega;
std::vector<Coeff_t> bs; // S dependent coeffs std::vector<RealD> bs; // S dependent coeffs
std::vector<Coeff_t> cs; std::vector<RealD> cs;
std::vector<Coeff_t> as; std::vector<RealD> as;
// For preconditioning Cayley form // For preconditioning Cayley form
std::vector<Coeff_t> bee; std::vector<RealD> bee;
std::vector<Coeff_t> cee; std::vector<RealD> cee;
std::vector<Coeff_t> aee; std::vector<RealD> aee;
std::vector<Coeff_t> beo; std::vector<RealD> beo;
std::vector<Coeff_t> ceo; std::vector<RealD> ceo;
std::vector<Coeff_t> aeo; std::vector<RealD> aeo;
// LDU factorisation of the eeoo matrix // LDU factorisation of the eeoo matrix
std::vector<Coeff_t> lee; std::vector<RealD> lee;
std::vector<Coeff_t> leem; std::vector<RealD> leem;
std::vector<Coeff_t> uee; std::vector<RealD> uee;
std::vector<Coeff_t> ueem; std::vector<RealD> ueem;
std::vector<Coeff_t> dee; std::vector<RealD> dee;
// Constructors // Constructors
CayleyFermion5D(GaugeField &_Umu, CayleyFermion5D(GaugeField &_Umu,
@ -120,20 +97,9 @@ namespace Grid {
protected: protected:
void SetCoefficientsZolotarev(RealD zolohi,Approx::zolotarev_data *zdata,RealD b,RealD c); void SetCoefficientsZolotarev(RealD zolohi,Approx::zolotarev_data *zdata,RealD b,RealD c);
void SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD b,RealD c); void SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD b,RealD c);
void SetCoefficientsInternal(RealD zolo_hi,std::vector<Coeff_t> & gamma,RealD b,RealD c);
}; };
} }
} }
#define INSTANTIATE_DPERP(A)\
template void CayleyFermion5D< A >::M5D(const FermionField &psi,const FermionField &phi,FermionField &chi,\
std::vector<Coeff_t> &lower,std::vector<Coeff_t> &diag,std::vector<Coeff_t> &upper); \
template void CayleyFermion5D< A >::M5Ddag(const FermionField &psi,const FermionField &phi,FermionField &chi,\
std::vector<Coeff_t> &lower,std::vector<Coeff_t> &diag,std::vector<Coeff_t> &upper); \
template void CayleyFermion5D< A >::MooeeInv (const FermionField &psi, FermionField &chi); \
template void CayleyFermion5D< A >::MooeeInvDag (const FermionField &psi, FermionField &chi);
#define CAYLEY_DPERP_CACHE
#undef CAYLEY_DPERP_LINALG
#endif #endif

View File

@ -1,211 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/CayleyFermion5D.cc
Copyright (C) 2015
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid.h>
namespace Grid {
namespace QCD {
// FIXME -- make a version of these routines with site loop outermost for cache reuse.
// Pminus fowards
// Pplus backwards..
template<class Impl>
void CayleyFermion5D<Impl>::M5D(const FermionField &psi,
const FermionField &phi,
FermionField &chi,
std::vector<Coeff_t> &lower,
std::vector<Coeff_t> &diag,
std::vector<Coeff_t> &upper)
{
int Ls =this->Ls;
GridBase *grid=psi._grid;
assert(phi.checkerboard == psi.checkerboard);
chi.checkerboard=psi.checkerboard;
PARALLEL_FOR_LOOP
for(int ss=0;ss<grid->oSites();ss+=Ls){ // adds Ls
for(int s=0;s<Ls;s++){
auto tmp = psi._odata[0];
if ( s==0 ) {
spProj5m(tmp,psi._odata[ss+s+1]);
chi[ss+s]=diag[s]*phi[ss+s]+upper[s]*tmp;
spProj5p(tmp,psi._odata[ss+Ls-1]);
chi[ss+s]=chi[ss+s]+lower[s]*tmp;
} else if ( s==(Ls-1)) {
spProj5m(tmp,psi._odata[ss+0]);
chi[ss+s]=diag[s]*phi[ss+s]+upper[s]*tmp;
spProj5p(tmp,psi._odata[ss+s-1]);
chi[ss+s]=chi[ss+s]+lower[s]*tmp;
} else {
spProj5m(tmp,psi._odata[ss+s+1]);
chi[ss+s]=diag[s]*phi[ss+s]+upper[s]*tmp;
spProj5p(tmp,psi._odata[ss+s-1]);
chi[ss+s]=chi[ss+s]+lower[s]*tmp;
}
}
}
}
template<class Impl>
void CayleyFermion5D<Impl>::M5Ddag(const FermionField &psi,
const FermionField &phi,
FermionField &chi,
std::vector<Coeff_t> &lower,
std::vector<Coeff_t> &diag,
std::vector<Coeff_t> &upper)
{
int Ls =this->Ls;
GridBase *grid=psi._grid;
assert(phi.checkerboard == psi.checkerboard);
chi.checkerboard=psi.checkerboard;
PARALLEL_FOR_LOOP
for(int ss=0;ss<grid->oSites();ss+=Ls){ // adds Ls
auto tmp = psi._odata[0];
for(int s=0;s<Ls;s++){
if ( s==0 ) {
spProj5p(tmp,psi._odata[ss+s+1]);
chi[ss+s]=diag[s]*phi[ss+s]+upper[s]*tmp;
spProj5m(tmp,psi._odata[ss+Ls-1]);
chi[ss+s]=chi[ss+s]+lower[s]*tmp;
} else if ( s==(Ls-1)) {
spProj5p(tmp,psi._odata[ss+0]);
chi[ss+s]=diag[s]*phi[ss+s]+upper[s]*tmp;
spProj5m(tmp,psi._odata[ss+s-1]);
chi[ss+s]=chi[ss+s]+lower[s]*tmp;
} else {
spProj5p(tmp,psi._odata[ss+s+1]);
chi[ss+s]=diag[s]*phi[ss+s]+upper[s]*tmp;
spProj5m(tmp,psi._odata[ss+s-1]);
chi[ss+s]=chi[ss+s]+lower[s]*tmp;
}
}
}
}
template<class Impl>
void CayleyFermion5D<Impl>::MooeeInv (const FermionField &psi, FermionField &chi)
{
GridBase *grid=psi._grid;
int Ls=this->Ls;
chi.checkerboard=psi.checkerboard;
PARALLEL_FOR_LOOP
for(int ss=0;ss<grid->oSites();ss+=Ls){ // adds Ls
auto tmp = psi._odata[0];
// Apply (L^{\prime})^{-1}
chi[ss]=psi[ss]; // chi[0]=psi[0]
for(int s=1;s<Ls;s++){
spProj5p(tmp,chi[ss+s-1]);
chi[ss+s] = psi[ss+s]-lee[s-1]*tmp;
}
// L_m^{-1}
for (int s=0;s<Ls-1;s++){ // Chi[ee] = 1 - sum[s<Ls-1] -leem[s]P_- chi
spProj5m(tmp,chi[ss+s]);
chi[ss+Ls-1] = chi[ss+Ls-1] - leem[s]*tmp;
}
// U_m^{-1} D^{-1}
for (int s=0;s<Ls-1;s++){
// Chi[s] + 1/d chi[s]
spProj5p(tmp,chi[ss+Ls-1]);
chi[ss+s] = (1.0/dee[s])*chi[ss+s]-(ueem[s]/dee[Ls-1])*tmp;
}
chi[ss+Ls-1]= (1.0/dee[Ls-1])*chi[ss+Ls-1];
// Apply U^{-1}
for (int s=Ls-2;s>=0;s--){
spProj5m(tmp,chi[ss+s+1]);
chi[ss+s] = chi[ss+s] - uee[s]*tmp;
}
}
}
template<class Impl>
void CayleyFermion5D<Impl>::MooeeInvDag (const FermionField &psi, FermionField &chi)
{
GridBase *grid=psi._grid;
int Ls=this->Ls;
assert(psi.checkerboard == psi.checkerboard);
chi.checkerboard=psi.checkerboard;
PARALLEL_FOR_LOOP
for(int ss=0;ss<grid->oSites();ss+=Ls){ // adds Ls
auto tmp = psi._odata[0];
// Apply (U^{\prime})^{-dagger}
chi[ss]=psi[ss];
for (int s=1;s<Ls;s++){
spProj5m(tmp,chi[ss+s-1]);
chi[ss+s] = psi[ss+s]-uee[s-1]*tmp;
}
// U_m^{-\dagger}
for (int s=0;s<Ls-1;s++){
spProj5p(tmp,chi[ss+s]);
chi[ss+Ls-1] = chi[ss+Ls-1] - ueem[s]*tmp;
}
// L_m^{-\dagger} D^{-dagger}
for (int s=0;s<Ls-1;s++){
spProj5m(tmp,chi[ss+Ls-1]);
chi[ss+s] = (1.0/dee[s])*chi[ss+s]-(leem[s]/dee[Ls-1])*tmp;
}
chi[ss+Ls-1]= (1.0/dee[Ls-1])*chi[ss+Ls-1];
// Apply L^{-dagger}
for (int s=Ls-2;s>=0;s--){
spProj5p(tmp,chi[ss+s+1]);
chi[ss+s] = chi[ss+s] - lee[s]*tmp;
}
}
}
#ifdef CAYLEY_DPERP_CACHE
INSTANTIATE_DPERP(WilsonImplF);
INSTANTIATE_DPERP(WilsonImplD);
INSTANTIATE_DPERP(GparityWilsonImplF);
INSTANTIATE_DPERP(GparityWilsonImplD);
INSTANTIATE_DPERP(ZWilsonImplF);
INSTANTIATE_DPERP(ZWilsonImplD);
#endif
}}

View File

@ -1,133 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/CayleyFermion5D.cc
Copyright (C) 2015
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Eigen/Dense>
#include <Grid.h>
namespace Grid {
namespace QCD {
/*
* Dense matrix versions of routines
*/
/*
template<class Impl>
void CayleyFermion5D<Impl>::MooeeInvDag (const FermionField &psi, FermionField &chi)
{
this->MooeeInternal(psi,chi,DaggerYes,InverseYes);
}
template<class Impl>
void CayleyFermion5D<Impl>::MooeeInv(const FermionField &psi, FermionField &chi)
{
this->MooeeInternal(psi,chi,DaggerNo,InverseYes);
}
*/
template<class Impl>
void CayleyFermion5D<Impl>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv)
{
int Ls=this->Ls;
int LLs = psi._grid->_rdimensions[0];
int vol = psi._grid->oSites()/LLs;
chi.checkerboard=psi.checkerboard;
assert(Ls==LLs);
Eigen::MatrixXd Pplus = Eigen::MatrixXd::Zero(Ls,Ls);
Eigen::MatrixXd Pminus = Eigen::MatrixXd::Zero(Ls,Ls);
for(int s=0;s<Ls;s++){
Pplus(s,s) = bee[s];
Pminus(s,s)= bee[s];
}
for(int s=0;s<Ls-1;s++){
Pminus(s,s+1) = -cee[s];
}
for(int s=0;s<Ls-1;s++){
Pplus(s+1,s) = -cee[s+1];
}
Pplus (0,Ls-1) = mass*cee[0];
Pminus(Ls-1,0) = mass*cee[Ls-1];
Eigen::MatrixXd PplusMat ;
Eigen::MatrixXd PminusMat;
if ( inv ) {
PplusMat =Pplus.inverse();
PminusMat=Pminus.inverse();
} else {
PplusMat =Pplus;
PminusMat=Pminus;
}
if(dag){
PplusMat.adjointInPlace();
PminusMat.adjointInPlace();
}
// For the non-vectorised s-direction this is simple
for(auto site=0;site<vol;site++){
SiteSpinor SiteChi;
SiteHalfSpinor SitePplus;
SiteHalfSpinor SitePminus;
for(int s1=0;s1<Ls;s1++){
SiteChi =zero;
for(int s2=0;s2<Ls;s2++){
int lex2 = s2+Ls*site;
if ( PplusMat(s1,s2) != 0.0 ) {
spProj5p(SitePplus,psi[lex2]);
accumRecon5p(SiteChi,PplusMat (s1,s2)*SitePplus);
}
if ( PminusMat(s1,s2) != 0.0 ) {
spProj5m(SitePminus,psi[lex2]);
accumRecon5m(SiteChi,PminusMat(s1,s2)*SitePminus);
}
}
chi[s1+Ls*site] = SiteChi*0.5;
}
}
}
template void CayleyFermion5D<GparityWilsonImplF>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
template void CayleyFermion5D<GparityWilsonImplD>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
template void CayleyFermion5D<WilsonImplF>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
template void CayleyFermion5D<WilsonImplD>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
}}

View File

@ -1,149 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/CayleyFermion5D.cc
Copyright (C) 2015
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid.h>
namespace Grid {
namespace QCD {
// FIXME -- make a version of these routines with site loop outermost for cache reuse.
// Pminus fowards
// Pplus backwards
template<class Impl>
void CayleyFermion5D<Impl>::M5D(const FermionField &psi,
const FermionField &phi,
FermionField &chi,
std::vector<Coeff_t> &lower,
std::vector<Coeff_t> &diag,
std::vector<Coeff_t> &upper)
{
int Ls=this->Ls;
for(int s=0;s<Ls;s++){
if ( s==0 ) {
axpby_ssp_pminus(chi,diag[s],phi,upper[s],psi,s,s+1);
axpby_ssp_pplus (chi,1.0,chi,lower[s],psi,s,Ls-1);
} else if ( s==(Ls-1)) {
axpby_ssp_pminus(chi,diag[s],phi,upper[s],psi,s,0);
axpby_ssp_pplus (chi,1.0,chi,lower[s],psi,s,s-1);
} else {
axpby_ssp_pminus(chi,diag[s],phi,upper[s],psi,s,s+1);
axpby_ssp_pplus(chi,1.0,chi,lower[s],psi,s,s-1);
}
}
}
template<class Impl>
void CayleyFermion5D<Impl>::M5Ddag(const FermionField &psi,
const FermionField &phi,
FermionField &chi,
std::vector<Coeff_t> &lower,
std::vector<Coeff_t> &diag,
std::vector<Coeff_t> &upper)
{
int Ls=this->Ls;
for(int s=0;s<Ls;s++){
if ( s==0 ) {
axpby_ssp_pplus (chi,diag[s],phi,upper[s],psi,s,s+1);
axpby_ssp_pminus(chi,1.0,chi,lower[s],psi,s,Ls-1);
} else if ( s==(Ls-1)) {
axpby_ssp_pplus (chi,diag[s],phi,upper[s],psi,s,0);
axpby_ssp_pminus(chi,1.0,chi,lower[s],psi,s,s-1);
} else {
axpby_ssp_pplus (chi,diag[s],phi,upper[s],psi,s,s+1);
axpby_ssp_pminus(chi,1.0,chi,lower[s],psi,s,s-1);
}
}
}
template<class Impl>
void CayleyFermion5D<Impl>::MooeeInv (const FermionField &psi, FermionField &chi)
{
chi.checkerboard=psi.checkerboard;
int Ls=this->Ls;
// Apply (L^{\prime})^{-1}
axpby_ssp (chi,1.0,psi, 0.0,psi,0,0); // chi[0]=psi[0]
for (int s=1;s<Ls;s++){
axpby_ssp_pplus(chi,1.0,psi,-lee[s-1],chi,s,s-1);// recursion Psi[s] -lee P_+ chi[s-1]
}
// L_m^{-1}
for (int s=0;s<Ls-1;s++){ // Chi[ee] = 1 - sum[s<Ls-1] -leem[s]P_- chi
axpby_ssp_pminus(chi,1.0,chi,-leem[s],chi,Ls-1,s);
}
// U_m^{-1} D^{-1}
for (int s=0;s<Ls-1;s++){
// Chi[s] + 1/d chi[s]
axpby_ssp_pplus(chi,1.0/dee[s],chi,-ueem[s]/dee[Ls-1],chi,s,Ls-1);
}
axpby_ssp(chi,1.0/dee[Ls-1],chi,0.0,chi,Ls-1,Ls-1); // Modest avoidable
// Apply U^{-1}
for (int s=Ls-2;s>=0;s--){
axpby_ssp_pminus (chi,1.0,chi,-uee[s],chi,s,s+1); // chi[Ls]
}
}
template<class Impl>
void CayleyFermion5D<Impl>::MooeeInvDag (const FermionField &psi, FermionField &chi)
{
chi.checkerboard=psi.checkerboard;
int Ls=this->Ls;
// Apply (U^{\prime})^{-dagger}
axpby_ssp (chi,1.0,psi, 0.0,psi,0,0); // chi[0]=psi[0]
for (int s=1;s<Ls;s++){
axpby_ssp_pminus(chi,1.0,psi,-uee[s-1],chi,s,s-1);
}
// U_m^{-\dagger}
for (int s=0;s<Ls-1;s++){
axpby_ssp_pplus(chi,1.0,chi,-ueem[s],chi,Ls-1,s);
}
// L_m^{-\dagger} D^{-dagger}
for (int s=0;s<Ls-1;s++){
axpby_ssp_pminus(chi,1.0/dee[s],chi,-leem[s]/dee[Ls-1],chi,s,Ls-1);
}
axpby_ssp(chi,1.0/dee[Ls-1],chi,0.0,chi,Ls-1,Ls-1); // Modest avoidable
// Apply L^{-dagger}
for (int s=Ls-2;s>=0;s--){
axpby_ssp_pplus (chi,1.0,chi,-lee[s],chi,s,s+1); // chi[Ls]
}
}
#ifdef CAYLEY_DPERP_LINALG
INSTANTIATE(WilsonImplF);
INSTANTIATE(WilsonImplD);
INSTANTIATE(GparityWilsonImplF);
INSTANTIATE(GparityWilsonImplD);
#endif
}
}

View File

@ -1,309 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/CayleyFermion5D.cc
Copyright (C) 2015
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Eigen/Dense>
#include <Grid.h>
namespace Grid {
namespace QCD {
/*
* Dense matrix versions of routines
*/
template<class Impl>
void CayleyFermion5D<Impl>::MooeeInvDag (const FermionField &psi, FermionField &chi)
{
this->MooeeInternal(psi,chi,DaggerYes,InverseYes);
}
template<class Impl>
void CayleyFermion5D<Impl>::MooeeInv(const FermionField &psi, FermionField &chi)
{
this->MooeeInternal(psi,chi,DaggerNo,InverseYes);
}
template<class Impl>
void CayleyFermion5D<Impl>::M5D(const FermionField &psi,
const FermionField &phi,
FermionField &chi,
std::vector<Coeff_t> &lower,
std::vector<Coeff_t> &diag,
std::vector<Coeff_t> &upper)
{
GridBase *grid=psi._grid;
int Ls = this->Ls;
int LLs = grid->_rdimensions[0];
int nsimd= Simd::Nsimd();
Vector<iSinglet<Simd> > u(LLs);
Vector<iSinglet<Simd> > l(LLs);
Vector<iSinglet<Simd> > d(LLs);
assert(Ls/LLs==nsimd);
assert(phi.checkerboard == psi.checkerboard);
chi.checkerboard=psi.checkerboard;
// just directly address via type pun
typedef typename Simd::scalar_type scalar_type;
scalar_type * u_p = (scalar_type *)&u[0];
scalar_type * l_p = (scalar_type *)&l[0];
scalar_type * d_p = (scalar_type *)&d[0];
for(int o=0;o<LLs;o++){ // outer
for(int i=0;i<nsimd;i++){ //inner
int s = o+i*LLs;
int ss = o*nsimd+i;
u_p[ss] = upper[s];
l_p[ss] = lower[s];
d_p[ss] = diag[s];
}}
PARALLEL_FOR_LOOP
for(int ss=0;ss<grid->oSites();ss+=LLs){ // adds LLs
alignas(64) SiteHalfSpinor hp;
alignas(64) SiteHalfSpinor hm;
alignas(64) SiteSpinor fp;
alignas(64) SiteSpinor fm;
for(int v=0;v<LLs;v++){
int vp=(v+1)%LLs;
int vm=(v+LLs-1)%LLs;
spProj5m(hp,psi[ss+vp]);
spProj5p(hm,psi[ss+vm]);
if ( vp<=v ) rotate(hp,hp,1);
if ( vm>=v ) rotate(hm,hm,nsimd-1);
hp=hp*0.5;
hm=hm*0.5;
spRecon5m(fp,hp);
spRecon5p(fm,hm);
chi[ss+v] = d[v]*phi[ss+v]+u[v]*fp;
chi[ss+v] = chi[ss+v] +l[v]*fm;
}
}
}
template<class Impl>
void CayleyFermion5D<Impl>::M5Ddag(const FermionField &psi,
const FermionField &phi,
FermionField &chi,
std::vector<Coeff_t> &lower,
std::vector<Coeff_t> &diag,
std::vector<Coeff_t> &upper)
{
GridBase *grid=psi._grid;
int Ls = this->Ls;
int LLs = grid->_rdimensions[0];
int nsimd= Simd::Nsimd();
Vector<iSinglet<Simd> > u(LLs);
Vector<iSinglet<Simd> > l(LLs);
Vector<iSinglet<Simd> > d(LLs);
assert(Ls/LLs==nsimd);
assert(phi.checkerboard == psi.checkerboard);
chi.checkerboard=psi.checkerboard;
// just directly address via type pun
typedef typename Simd::scalar_type scalar_type;
scalar_type * u_p = (scalar_type *)&u[0];
scalar_type * l_p = (scalar_type *)&l[0];
scalar_type * d_p = (scalar_type *)&d[0];
for(int o=0;o<LLs;o++){ // outer
for(int i=0;i<nsimd;i++){ //inner
int s = o+i*LLs;
int ss = o*nsimd+i;
u_p[ss] = upper[s];
l_p[ss] = lower[s];
d_p[ss] = diag[s];
}}
PARALLEL_FOR_LOOP
for(int ss=0;ss<grid->oSites();ss+=LLs){ // adds LLs
alignas(64) SiteHalfSpinor hp;
alignas(64) SiteHalfSpinor hm;
alignas(64) SiteSpinor fp;
alignas(64) SiteSpinor fm;
for(int v=0;v<LLs;v++){
int vp=(v+1)%LLs;
int vm=(v+LLs-1)%LLs;
spProj5p(hp,psi[ss+vp]);
spProj5m(hm,psi[ss+vm]);
if ( vp<=v ) rotate(hp,hp,1);
if ( vm>=v ) rotate(hm,hm,nsimd-1);
hp=hp*0.5;
hm=hm*0.5;
spRecon5p(fp,hp);
spRecon5m(fm,hm);
chi[ss+v] = d[v]*phi[ss+v]+u[v]*fp;
chi[ss+v] = chi[ss+v] +l[v]*fm;
}
}
}
template<class Impl>
void CayleyFermion5D<Impl>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv)
{
int Ls=this->Ls;
int LLs = psi._grid->_rdimensions[0];
int vol = psi._grid->oSites()/LLs;
chi.checkerboard=psi.checkerboard;
Eigen::MatrixXcd Pplus = Eigen::MatrixXcd::Zero(Ls,Ls);
Eigen::MatrixXcd Pminus = Eigen::MatrixXcd::Zero(Ls,Ls);
for(int s=0;s<Ls;s++){
Pplus(s,s) = bee[s];
Pminus(s,s)= bee[s];
}
for(int s=0;s<Ls-1;s++){
Pminus(s,s+1) = -cee[s];
}
for(int s=0;s<Ls-1;s++){
Pplus(s+1,s) = -cee[s+1];
}
Pplus (0,Ls-1) = mass*cee[0];
Pminus(Ls-1,0) = mass*cee[Ls-1];
Eigen::MatrixXcd PplusMat ;
Eigen::MatrixXcd PminusMat;
if ( inv ) {
PplusMat =Pplus.inverse();
PminusMat=Pminus.inverse();
} else {
PplusMat =Pplus;
PminusMat=Pminus;
}
if(dag){
PplusMat.adjointInPlace();
PminusMat.adjointInPlace();
}
typedef typename SiteHalfSpinor::scalar_type scalar_type;
const int Nsimd=Simd::Nsimd();
Vector<iSinglet<Simd> > Matp(Ls*LLs);
Vector<iSinglet<Simd> > Matm(Ls*LLs);
for(int s2=0;s2<Ls;s2++){
for(int s1=0;s1<LLs;s1++){
int istride = LLs;
int ostride = 1;
Simd Vp;
Simd Vm;
scalar_type *sp = (scalar_type *)&Vp;
scalar_type *sm = (scalar_type *)&Vm;
for(int l=0;l<Nsimd;l++){
sp[l] = PplusMat (l*istride+s1*ostride ,s2);
sm[l] = PminusMat(l*istride+s1*ostride,s2);
}
Matp[LLs*s2+s1] = Vp;
Matm[LLs*s2+s1] = Vm;
}
}
// Dynamic allocate on stack to get per thread without serialised heap acces
PARALLEL_FOR_LOOP
for(auto site=0;site<vol;site++){
// SiteHalfSpinor *SitePplus =(SiteHalfSpinor *) alloca(LLs*sizeof(SiteHalfSpinor));
// SiteHalfSpinor *SitePminus=(SiteHalfSpinor *) alloca(LLs*sizeof(SiteHalfSpinor));
// SiteSpinor *SiteChi =(SiteSpinor *) alloca(LLs*sizeof(SiteSpinor));
Vector<SiteHalfSpinor> SitePplus(LLs);
Vector<SiteHalfSpinor> SitePminus(LLs);
Vector<SiteHalfSpinor> SiteChiP(LLs);
Vector<SiteHalfSpinor> SiteChiM(LLs);
Vector<SiteSpinor> SiteChi(LLs);
SiteHalfSpinor BcastP;
SiteHalfSpinor BcastM;
for(int s=0;s<LLs;s++){
int lex = s+LLs*site;
spProj5p(SitePplus[s] ,psi[lex]);
spProj5m(SitePminus[s],psi[lex]);
SiteChiP[s]=zero;
SiteChiM[s]=zero;
}
int s=0;
for(int l=0; l<Simd::Nsimd();l++){ // simd lane
for(int s2=0;s2<LLs;s2++){ // Column loop of right hand side
vbroadcast(BcastP,SitePplus [s2],l);
vbroadcast(BcastM,SitePminus[s2],l);
for(int s1=0;s1<LLs;s1++){ // Column loop of reduction variables
SiteChiP[s1]=SiteChiP[s1]+Matp[LLs*s+s1]*BcastP;
SiteChiM[s1]=SiteChiM[s1]+Matm[LLs*s+s1]*BcastM;
}
s++;
}}
for(int s=0;s<LLs;s++){
int lex = s+LLs*site;
spRecon5p(SiteChi[s],SiteChiP[s]);
accumRecon5m(SiteChi[s],SiteChiM[s]);
chi[lex] = SiteChi[s]*0.5;
}
}
}
INSTANTIATE_DPERP(DomainWallVec5dImplD);
INSTANTIATE_DPERP(DomainWallVec5dImplF);
INSTANTIATE_DPERP(ZDomainWallVec5dImplD);
INSTANTIATE_DPERP(ZDomainWallVec5dImplF);
template void CayleyFermion5D<DomainWallVec5dImplF>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
template void CayleyFermion5D<DomainWallVec5dImplD>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
template void CayleyFermion5D<ZDomainWallVec5dImplF>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
template void CayleyFermion5D<ZDomainWallVec5dImplD>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
}}

View File

@ -29,7 +29,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
#ifndef GRID_QCD_DOMAIN_WALL_FERMION_H #ifndef GRID_QCD_DOMAIN_WALL_FERMION_H
#define GRID_QCD_DOMAIN_WALL_FERMION_H #define GRID_QCD_DOMAIN_WALL_FERMION_H
#include <Grid/Grid.h> #include <Grid.h>
namespace Grid { namespace Grid {

View File

@ -1,36 +1,35 @@
/************************************************************************************* /*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/FermionOperatorImpl.h Source file: ./lib/qcd/action/fermion/FermionOperatorImpl.h
Copyright (C) 2015 Copyright (C) 2015
Author: Peter Boyle <pabobyle@ph.ed.ac.uk> Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk> Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local> Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk> Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or the Free Software Foundation; either version 2 of the License, or
(at your option) any later version. (at your option) any later version.
This program is distributed in the hope that it will be useful, This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details. GNU General Public License for more details.
You should have received a copy of the GNU General Public License along You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc., with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution See the full license in the file "LICENSE" in the top level distribution directory
directory *************************************************************************************/
*************************************************************************************/ /* END LEGAL */
/* END LEGAL */ #ifndef GRID_QCD_FERMION_OPERATOR_IMPL_H
#ifndef GRID_QCD_FERMION_OPERATOR_IMPL_H #define GRID_QCD_FERMION_OPERATOR_IMPL_H
#define GRID_QCD_FERMION_OPERATOR_IMPL_H
namespace Grid { namespace Grid {
@ -76,7 +75,7 @@ namespace Grid {
// //
// //
// template<class Impl> // template<class Impl>
// class MyOp : public<Impl> { // class MyOp : pubic<Impl> {
// public: // public:
// //
// INHERIT_ALL_IMPL_TYPES(Impl); // INHERIT_ALL_IMPL_TYPES(Impl);
@ -100,80 +99,63 @@ namespace Grid {
typedef typename Impl::SiteSpinor SiteSpinor; \ typedef typename Impl::SiteSpinor SiteSpinor; \
typedef typename Impl::SiteHalfSpinor SiteHalfSpinor; \ typedef typename Impl::SiteHalfSpinor SiteHalfSpinor; \
typedef typename Impl::Compressor Compressor; \ typedef typename Impl::Compressor Compressor; \
typedef typename Impl::StencilImpl StencilImpl; \ typedef typename Impl::StencilImpl StencilImpl; \
typedef typename Impl::ImplParams ImplParams; \ typedef typename Impl::ImplParams ImplParams;
typedef typename Impl::Coeff_t Coeff_t;
#define INHERIT_IMPL_TYPES(Base) \ #define INHERIT_IMPL_TYPES(Base) \
INHERIT_GIMPL_TYPES(Base) \ INHERIT_GIMPL_TYPES(Base)\
INHERIT_FIMPL_TYPES(Base) INHERIT_FIMPL_TYPES(Base)
/////// ///////
// Single flavour four spinors with colour index // Single flavour four spinors with colour index
/////// ///////
template <class S, class Representation = FundamentalRepresentation,class _Coeff_t = RealD > template<class S,int Nrepresentation=Nc>
class WilsonImpl class WilsonImpl : public PeriodicGaugeImpl< GaugeImplTypes< S,Nrepresentation> > {
: public PeriodicGaugeImpl<GaugeImplTypes<S, Representation::Dimension > > {
public: public:
static const int Dimension = Representation::Dimension;
typedef PeriodicGaugeImpl<GaugeImplTypes<S, Dimension > > Gimpl;
//Necessary?
constexpr bool is_fundamental() const{return Dimension == Nc ? 1 : 0;}
const bool LsVectorised=false;
typedef _Coeff_t Coeff_t;
typedef PeriodicGaugeImpl< GaugeImplTypes< S,Nrepresentation> > Gimpl;
INHERIT_GIMPL_TYPES(Gimpl); INHERIT_GIMPL_TYPES(Gimpl);
template <typename vtype> using iImplSpinor = iScalar<iVector<iVector<vtype, Dimension>, Ns> >; template<typename vtype> using iImplSpinor = iScalar<iVector<iVector<vtype, Nrepresentation>, Ns> >;
template <typename vtype> using iImplHalfSpinor = iScalar<iVector<iVector<vtype, Dimension>, Nhs> >; template<typename vtype> using iImplHalfSpinor = iScalar<iVector<iVector<vtype, Nrepresentation>, Nhs> >;
template <typename vtype> using iImplDoubledGaugeField = iVector<iScalar<iMatrix<vtype, Dimension> >, Nds>; template<typename vtype> using iImplDoubledGaugeField = iVector<iScalar<iMatrix<vtype, Nrepresentation> >, Nds >;
typedef iImplSpinor<Simd> SiteSpinor; typedef iImplSpinor <Simd> SiteSpinor;
typedef iImplHalfSpinor<Simd> SiteHalfSpinor; typedef iImplHalfSpinor<Simd> SiteHalfSpinor;
typedef iImplDoubledGaugeField<Simd> SiteDoubledGaugeField; typedef iImplDoubledGaugeField<Simd> SiteDoubledGaugeField;
typedef Lattice<SiteSpinor> FermionField; typedef Lattice<SiteSpinor> FermionField;
typedef Lattice<SiteDoubledGaugeField> DoubledGaugeField; typedef Lattice<SiteDoubledGaugeField> DoubledGaugeField;
typedef WilsonCompressor<SiteHalfSpinor, SiteSpinor> Compressor; typedef WilsonCompressor<SiteHalfSpinor,SiteSpinor> Compressor;
typedef WilsonImplParams ImplParams; typedef WilsonImplParams ImplParams;
typedef WilsonStencil<SiteSpinor, SiteHalfSpinor> StencilImpl; typedef WilsonStencil<SiteSpinor,SiteHalfSpinor> StencilImpl;
ImplParams Params; ImplParams Params;
WilsonImpl(const ImplParams &p = ImplParams()) : Params(p){}; WilsonImpl(const ImplParams &p= ImplParams()) : Params(p) {};
bool overlapCommsCompute(void) { return Params.overlapCommsCompute; }; bool overlapCommsCompute(void) { return Params.overlapCommsCompute; };
inline void multLink(SiteHalfSpinor &phi, inline void multLink(SiteHalfSpinor &phi,const SiteDoubledGaugeField &U,const SiteHalfSpinor &chi,int mu,StencilEntry *SE,StencilImpl &St){
const SiteDoubledGaugeField &U, mult(&phi(),&U(mu),&chi());
const SiteHalfSpinor &chi,
int mu,
StencilEntry *SE,
StencilImpl &St) {
mult(&phi(), &U(mu), &chi());
} }
template <class ref> template<class ref>
inline void loadLinkElement(Simd &reg, inline void loadLinkElement(Simd & reg,ref &memory){
ref &memory) {
reg = memory; reg = memory;
} }
inline void DoubleStore(GridBase *GaugeGrid,DoubledGaugeField &Uds,const GaugeField &Umu)
inline void DoubleStore(GridBase *GaugeGrid, {
DoubledGaugeField &Uds, conformable(Uds._grid,GaugeGrid);
const GaugeField &Umu) { conformable(Umu._grid,GaugeGrid);
conformable(Uds._grid, GaugeGrid); GaugeLinkField U(GaugeGrid);
conformable(Umu._grid, GaugeGrid); for(int mu=0;mu<Nd;mu++){
GaugeLinkField U(GaugeGrid); U = PeekIndex<LorentzIndex>(Umu,mu);
for (int mu = 0; mu < Nd; mu++) { PokeIndex<LorentzIndex>(Uds,U,mu);
U = PeekIndex<LorentzIndex>(Umu, mu); U = adj(Cshift(U,mu,-1));
PokeIndex<LorentzIndex>(Uds, U, mu); PokeIndex<LorentzIndex>(Uds,U,mu+4);
U = adj(Cshift(U, mu, -1));
PokeIndex<LorentzIndex>(Uds, U, mu + 4);
} }
} }
@ -186,171 +168,154 @@ namespace Grid {
inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField &Atilde,int mu){ inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField &Atilde,int mu){
int Ls=Btilde._grid->_fdimensions[0]; int Ls=Btilde._grid->_fdimensions[0];
GaugeLinkField tmp(mat._grid); GaugeLinkField tmp(mat._grid);
tmp = zero; tmp = zero;
PARALLEL_FOR_LOOP
PARALLEL_FOR_LOOP for(int sss=0;sss<tmp._grid->oSites();sss++){
for(int sss=0;sss<tmp._grid->oSites();sss++){ int sU=sss;
int sU=sss; for(int s=0;s<Ls;s++){
for(int s=0;s<Ls;s++){ int sF = s+Ls*sU;
int sF = s+Ls*sU; tmp[sU] = tmp[sU]+ traceIndex<SpinIndex>(outerProduct(Btilde[sF],Atilde[sF])); // ordering here
tmp[sU] = tmp[sU]+ traceIndex<SpinIndex>(outerProduct(Btilde[sF],Atilde[sF])); // ordering here
}
} }
}
PokeIndex<LorentzIndex>(mat,tmp,mu); PokeIndex<LorentzIndex>(mat,tmp,mu);
} }
}; };
/////// ///////
// Single flavour four spinors with colour index, 5d redblack // Single flavour four spinors with colour index, 5d redblack
/////// ///////
template<class S,int Nrepresentation=Nc,class _Coeff_t = RealD> template<class S,int Nrepresentation=Nc>
class DomainWallVec5dImpl : public PeriodicGaugeImpl< GaugeImplTypes< S,Nrepresentation> > { class DomainWallRedBlack5dImpl : public PeriodicGaugeImpl< GaugeImplTypes< S,Nrepresentation> > {
public: public:
static const int Dimension = Nrepresentation; typedef PeriodicGaugeImpl< GaugeImplTypes< S,Nrepresentation> > Gimpl;
const bool LsVectorised=true;
typedef _Coeff_t Coeff_t;
typedef PeriodicGaugeImpl<GaugeImplTypes<S, Nrepresentation> > Gimpl;
INHERIT_GIMPL_TYPES(Gimpl); INHERIT_GIMPL_TYPES(Gimpl);
template <typename vtype> using iImplSpinor = iScalar<iVector<iVector<vtype, Nrepresentation>, Ns> >; template<typename vtype> using iImplSpinor = iScalar<iVector<iVector<vtype, Nrepresentation>, Ns> >;
template <typename vtype> using iImplHalfSpinor = iScalar<iVector<iVector<vtype, Nrepresentation>, Nhs> >; template<typename vtype> using iImplHalfSpinor = iScalar<iVector<iVector<vtype, Nrepresentation>, Nhs> >;
template <typename vtype> using iImplDoubledGaugeField = iVector<iScalar<iMatrix<vtype, Nrepresentation> >, Nds>; template<typename vtype> using iImplDoubledGaugeField = iVector<iScalar<iMatrix<vtype, Nrepresentation> >, Nds >;
template <typename vtype> using iImplGaugeField = iVector<iScalar<iMatrix<vtype, Nrepresentation> >, Nd>; template<typename vtype> using iImplGaugeField = iVector<iScalar<iMatrix<vtype, Nrepresentation> >, Nd >;
template <typename vtype> using iImplGaugeLink = iScalar<iScalar<iMatrix<vtype, Nrepresentation> > >; template<typename vtype> using iImplGaugeLink = iScalar<iScalar<iMatrix<vtype, Nrepresentation> > >;
typedef iImplSpinor<Simd> SiteSpinor; typedef iImplSpinor <Simd> SiteSpinor;
typedef iImplHalfSpinor<Simd> SiteHalfSpinor; typedef iImplHalfSpinor<Simd> SiteHalfSpinor;
typedef Lattice<SiteSpinor> FermionField; typedef Lattice<SiteSpinor> FermionField;
// Make the doubled gauge field a *scalar* // Make the doubled gauge field a *scalar*
typedef iImplDoubledGaugeField<typename Simd::scalar_type> typedef iImplDoubledGaugeField<typename Simd::scalar_type> SiteDoubledGaugeField; // This is a scalar
SiteDoubledGaugeField; // This is a scalar typedef iImplGaugeField<typename Simd::scalar_type> SiteScalarGaugeField; // scalar
typedef iImplGaugeField<typename Simd::scalar_type> typedef iImplGaugeLink <typename Simd::scalar_type> SiteScalarGaugeLink; // scalar
SiteScalarGaugeField; // scalar
typedef iImplGaugeLink<typename Simd::scalar_type>
SiteScalarGaugeLink; // scalar
typedef Lattice<SiteDoubledGaugeField> DoubledGaugeField; typedef Lattice<SiteDoubledGaugeField> DoubledGaugeField;
typedef WilsonCompressor<SiteHalfSpinor, SiteSpinor> Compressor; typedef WilsonCompressor<SiteHalfSpinor,SiteSpinor> Compressor;
typedef WilsonImplParams ImplParams; typedef WilsonImplParams ImplParams;
typedef WilsonStencil<SiteSpinor, SiteHalfSpinor> StencilImpl; typedef WilsonStencil<SiteSpinor,SiteHalfSpinor> StencilImpl;
ImplParams Params; ImplParams Params;
DomainWallVec5dImpl(const ImplParams &p = ImplParams()) : Params(p){}; DomainWallRedBlack5dImpl(const ImplParams &p= ImplParams()) : Params(p) {};
bool overlapCommsCompute(void) { return false; }; bool overlapCommsCompute(void) { return false; };
template <class ref> template<class ref>
inline void loadLinkElement(Simd &reg, ref &memory) { inline void loadLinkElement(Simd & reg,ref &memory){
vsplat(reg, memory); vsplat(reg,memory);
} }
inline void multLink(SiteHalfSpinor &phi, const SiteDoubledGaugeField &U, inline void multLink(SiteHalfSpinor &phi,const SiteDoubledGaugeField &U,const SiteHalfSpinor &chi,int mu,StencilEntry *SE,StencilImpl &St)
const SiteHalfSpinor &chi, int mu, StencilEntry *SE, {
StencilImpl &St) {
SiteGaugeLink UU; SiteGaugeLink UU;
for (int i = 0; i < Nrepresentation; i++) { for(int i=0;i<Nrepresentation;i++){
for (int j = 0; j < Nrepresentation; j++) { for(int j=0;j<Nrepresentation;j++){
vsplat(UU()()(i, j), U(mu)()(i, j)); vsplat(UU()()(i,j),U(mu)()(i,j));
} }
} }
mult(&phi(), &UU(), &chi()); mult(&phi(),&UU(),&chi());
} }
inline void DoubleStore(GridBase *GaugeGrid, DoubledGaugeField &Uds, inline void DoubleStore(GridBase *GaugeGrid,DoubledGaugeField &Uds,const GaugeField &Umu)
const GaugeField &Umu) { {
SiteScalarGaugeField ScalarUmu; SiteScalarGaugeField ScalarUmu;
SiteDoubledGaugeField ScalarUds; SiteDoubledGaugeField ScalarUds;
GaugeLinkField U(Umu._grid); GaugeLinkField U (Umu._grid);
GaugeField Uadj(Umu._grid); GaugeField Uadj(Umu._grid);
for (int mu = 0; mu < Nd; mu++) { for(int mu=0;mu<Nd;mu++){
U = PeekIndex<LorentzIndex>(Umu, mu); U = PeekIndex<LorentzIndex>(Umu,mu);
U = adj(Cshift(U, mu, -1)); U = adj(Cshift(U,mu,-1));
PokeIndex<LorentzIndex>(Uadj, U, mu); PokeIndex<LorentzIndex>(Uadj,U,mu);
} }
for (int lidx = 0; lidx < GaugeGrid->lSites(); lidx++) { for(int lidx=0;lidx<GaugeGrid->lSites();lidx++){
std::vector<int> lcoor; std::vector<int> lcoor;
GaugeGrid->LocalIndexToLocalCoor(lidx, lcoor); GaugeGrid->LocalIndexToLocalCoor(lidx,lcoor);
peekLocalSite(ScalarUmu, Umu, lcoor); peekLocalSite(ScalarUmu,Umu,lcoor);
for (int mu = 0; mu < 4; mu++) ScalarUds(mu) = ScalarUmu(mu); for(int mu=0;mu<4;mu++) ScalarUds(mu) = ScalarUmu(mu);
peekLocalSite(ScalarUmu, Uadj, lcoor); peekLocalSite(ScalarUmu,Uadj,lcoor);
for (int mu = 0; mu < 4; mu++) ScalarUds(mu + 4) = ScalarUmu(mu); for(int mu=0;mu<4;mu++) ScalarUds(mu+4) = ScalarUmu(mu);
pokeLocalSite(ScalarUds, Uds, lcoor); pokeLocalSite(ScalarUds,Uds,lcoor);
} }
} }
inline void InsertForce4D(GaugeField &mat, FermionField &Btilde, inline void InsertForce4D(GaugeField &mat, FermionField &Btilde, FermionField &A,int mu){
FermionField &A, int mu) {
assert(0); assert(0);
} }
inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField &Atilde,int mu){
FermionField &Atilde, int mu) {
assert(0); assert(0);
} }
}; };
//////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////
// Flavour doubled spinors; is Gparity the only? what about C*? // Flavour doubled spinors; is Gparity the only? what about C*?
//////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////
template <class S, int Nrepresentation,class _Coeff_t = RealD> template<class S,int Nrepresentation>
class GparityWilsonImpl class GparityWilsonImpl : public ConjugateGaugeImpl< GaugeImplTypes<S,Nrepresentation> >{
: public ConjugateGaugeImpl<GaugeImplTypes<S, Nrepresentation> > {
public: public:
static const int Dimension = Nrepresentation;
const bool LsVectorised=false;
typedef _Coeff_t Coeff_t;
typedef ConjugateGaugeImpl< GaugeImplTypes<S,Nrepresentation> > Gimpl; typedef ConjugateGaugeImpl< GaugeImplTypes<S,Nrepresentation> > Gimpl;
INHERIT_GIMPL_TYPES(Gimpl); INHERIT_GIMPL_TYPES(Gimpl);
template <typename vtype> template<typename vtype> using iImplSpinor = iVector<iVector<iVector<vtype, Nrepresentation>, Ns>, Ngp >;
using iImplSpinor = template<typename vtype> using iImplHalfSpinor = iVector<iVector<iVector<vtype, Nrepresentation>, Nhs>, Ngp >;
iVector<iVector<iVector<vtype, Nrepresentation>, Ns>, Ngp>; template<typename vtype> using iImplDoubledGaugeField = iVector<iVector<iScalar<iMatrix<vtype, Nrepresentation> >, Nds >, Ngp >;
template <typename vtype>
using iImplHalfSpinor =
iVector<iVector<iVector<vtype, Nrepresentation>, Nhs>, Ngp>;
template <typename vtype>
using iImplDoubledGaugeField =
iVector<iVector<iScalar<iMatrix<vtype, Nrepresentation> >, Nds>, Ngp>;
typedef iImplSpinor<Simd> SiteSpinor; typedef iImplSpinor <Simd> SiteSpinor;
typedef iImplHalfSpinor<Simd> SiteHalfSpinor; typedef iImplHalfSpinor<Simd> SiteHalfSpinor;
typedef iImplDoubledGaugeField<Simd> SiteDoubledGaugeField; typedef iImplDoubledGaugeField<Simd> SiteDoubledGaugeField;
typedef Lattice<SiteSpinor> FermionField; typedef Lattice<SiteSpinor> FermionField;
typedef Lattice<SiteDoubledGaugeField> DoubledGaugeField; typedef Lattice<SiteDoubledGaugeField> DoubledGaugeField;
typedef WilsonCompressor<SiteHalfSpinor, SiteSpinor> Compressor; typedef WilsonCompressor<SiteHalfSpinor,SiteSpinor> Compressor;
typedef WilsonStencil<SiteSpinor, SiteHalfSpinor> StencilImpl; typedef WilsonStencil<SiteSpinor,SiteHalfSpinor> StencilImpl;
typedef GparityWilsonImplParams ImplParams; typedef GparityWilsonImplParams ImplParams;
ImplParams Params; ImplParams Params;
GparityWilsonImpl(const ImplParams &p= ImplParams()) : Params(p) {};
GparityWilsonImpl(const ImplParams &p = ImplParams()) : Params(p){};
bool overlapCommsCompute(void) { return Params.overlapCommsCompute; }; bool overlapCommsCompute(void) { return Params.overlapCommsCompute; };
// provide the multiply by link that is differentiated between Gparity (with // provide the multiply by link that is differentiated between Gparity (with flavour index) and non-Gparity
// flavour index) and non-Gparity inline void multLink(SiteHalfSpinor &phi,const SiteDoubledGaugeField &U,const SiteHalfSpinor &chi,int mu,StencilEntry *SE,StencilImpl &St){
inline void multLink(SiteHalfSpinor &phi, const SiteDoubledGaugeField &U,
const SiteHalfSpinor &chi, int mu, StencilEntry *SE,
StencilImpl &St) {
typedef SiteHalfSpinor vobj; typedef SiteHalfSpinor vobj;
typedef typename SiteHalfSpinor::scalar_object sobj; typedef typename SiteHalfSpinor::scalar_object sobj;
@ -361,17 +326,17 @@ namespace Grid {
const int Nsimd = grid->Nsimd(); const int Nsimd = grid->Nsimd();
int direction = St._directions[mu]; int direction = St._directions[mu];
int distance = St._distances[mu]; int distance = St._distances[mu];
int ptype = St._permute_type[mu]; int ptype = St._permute_type[mu];
int sl = St._grid->_simd_layout[direction]; int sl = St._grid->_simd_layout[direction];
// Fixme X.Y.Z.T hardcode in stencil // Fixme X.Y.Z.T hardcode in stencil
int mmu = mu % Nd; int mmu = mu % Nd;
// assert our assumptions // assert our assumptions
assert((distance == 1) || (distance == -1)); // nearest neighbour stencil hard code assert((distance==1)||(distance==-1)); // nearest neighbour stencil hard code
assert((sl == 1) || (sl == 2)); assert((sl==1)||(sl==2));
std::vector<int> icoor; std::vector<int> icoor;
@ -415,7 +380,7 @@ namespace Grid {
mult(&phi(1),&U(1)(mu),&chi(1)); mult(&phi(1),&U(1)(mu),&chi(1));
} }
} }
inline void DoubleStore(GridBase *GaugeGrid,DoubledGaugeField &Uds,const GaugeField &Umu) inline void DoubleStore(GridBase *GaugeGrid,DoubledGaugeField &Uds,const GaugeField &Umu)
{ {
@ -444,11 +409,11 @@ namespace Grid {
} }
PARALLEL_FOR_LOOP PARALLEL_FOR_LOOP
for(auto ss=U.begin();ss<U.end();ss++){ for(auto ss=U.begin();ss<U.end();ss++){
Uds[ss](0)(mu) = U[ss](); Uds[ss](0)(mu) = U[ss]();
Uds[ss](1)(mu) = Uconj[ss](); Uds[ss](1)(mu) = Uconj[ss]();
} }
U = adj(Cshift(U ,mu,-1)); // correct except for spanning the boundary U = adj(Cshift(U ,mu,-1)); // correct except for spanning the boundary
Uconj = adj(Cshift(Uconj,mu,-1)); Uconj = adj(Cshift(Uconj,mu,-1));
@ -458,86 +423,68 @@ namespace Grid {
Utmp = where(coor==0,Uconj,Utmp); Utmp = where(coor==0,Uconj,Utmp);
} }
PARALLEL_FOR_LOOP PARALLEL_FOR_LOOP
for(auto ss=U.begin();ss<U.end();ss++){ for(auto ss=U.begin();ss<U.end();ss++){
Uds[ss](0)(mu+4) = Utmp[ss](); Uds[ss](0)(mu+4) = Utmp[ss]();
} }
Utmp = Uconj; Utmp = Uconj;
if ( Params.twists[mu] ) { if ( Params.twists[mu] ) {
Utmp = where(coor==0,U,Utmp); Utmp = where(coor==0,U,Utmp);
} }
PARALLEL_FOR_LOOP PARALLEL_FOR_LOOP
for(auto ss=U.begin();ss<U.end();ss++){ for(auto ss=U.begin();ss<U.end();ss++){
Uds[ss](1)(mu+4) = Utmp[ss](); Uds[ss](1)(mu+4) = Utmp[ss]();
} }
} }
} }
inline void InsertForce4D(GaugeField &mat, FermionField &Btilde, FermionField &A,int mu){
inline void InsertForce4D(GaugeField &mat, FermionField &Btilde,
FermionField &A, int mu) {
// DhopDir provides U or Uconj depending on coor/flavour. // DhopDir provides U or Uconj depending on coor/flavour.
GaugeLinkField link(mat._grid); GaugeLinkField link(mat._grid);
// use lorentz for flavour as hack. // use lorentz for flavour as hack.
auto tmp = TraceIndex<SpinIndex>(outerProduct(Btilde, A)); auto tmp = TraceIndex<SpinIndex>(outerProduct(Btilde,A));
PARALLEL_FOR_LOOP PARALLEL_FOR_LOOP
for (auto ss = tmp.begin(); ss < tmp.end(); ss++) { for(auto ss=tmp.begin();ss<tmp.end();ss++){
link[ss]() = tmp[ss](0, 0) - conjugate(tmp[ss](1, 1)); link[ss]() = tmp[ss](0,0) - conjugate(tmp[ss](1,1)) ;
} }
PokeIndex<LorentzIndex>(mat, link, mu); PokeIndex<LorentzIndex>(mat,link,mu);
return; return;
} }
inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField &Atilde,int mu){
inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, int Ls=Btilde._grid->_fdimensions[0];
FermionField &Atilde, int mu) {
int Ls = Btilde._grid->_fdimensions[0];
GaugeLinkField tmp(mat._grid); GaugeLinkField tmp(mat._grid);
tmp = zero; tmp = zero;
PARALLEL_FOR_LOOP PARALLEL_FOR_LOOP
for (int ss = 0; ss < tmp._grid->oSites(); ss++) { for(int ss=0;ss<tmp._grid->oSites();ss++){
for (int s = 0; s < Ls; s++) { for(int s=0;s<Ls;s++){
int sF = s + Ls * ss; int sF = s+Ls*ss;
auto ttmp = traceIndex<SpinIndex>(outerProduct(Btilde[sF], Atilde[sF])); auto ttmp = traceIndex<SpinIndex>(outerProduct(Btilde[sF],Atilde[sF]));
tmp[ss]() = tmp[ss]() + ttmp(0, 0) + conjugate(ttmp(1, 1)); tmp[ss]() = tmp[ss]()+ ttmp(0,0) + conjugate(ttmp(1,1));
}
} }
PokeIndex<LorentzIndex>(mat, tmp, mu); }
PokeIndex<LorentzIndex>(mat,tmp,mu);
return; return;
} }
}; };
typedef WilsonImpl<vComplex, FundamentalRepresentation > WilsonImplR; // Real.. whichever prec typedef WilsonImpl<vComplex ,Nc> WilsonImplR; // Real.. whichever prec
typedef WilsonImpl<vComplexF, FundamentalRepresentation > WilsonImplF; // Float typedef WilsonImpl<vComplexF,Nc> WilsonImplF; // Float
typedef WilsonImpl<vComplexD, FundamentalRepresentation > WilsonImplD; // Double typedef WilsonImpl<vComplexD,Nc> WilsonImplD; // Double
typedef DomainWallRedBlack5dImpl<vComplex ,Nc> DomainWallRedBlack5dImplR; // Real.. whichever prec
typedef DomainWallRedBlack5dImpl<vComplexF,Nc> DomainWallRedBlack5dImplF; // Float
typedef DomainWallRedBlack5dImpl<vComplexD,Nc> DomainWallRedBlack5dImplD; // Double
typedef WilsonImpl<vComplex, FundamentalRepresentation, ComplexD > ZWilsonImplR; // Real.. whichever prec typedef GparityWilsonImpl<vComplex ,Nc> GparityWilsonImplR; // Real.. whichever prec
typedef WilsonImpl<vComplexF, FundamentalRepresentation, ComplexD > ZWilsonImplF; // Float typedef GparityWilsonImpl<vComplexF,Nc> GparityWilsonImplF; // Float
typedef WilsonImpl<vComplexD, FundamentalRepresentation, ComplexD > ZWilsonImplD; // Double typedef GparityWilsonImpl<vComplexD,Nc> GparityWilsonImplD; // Double
typedef WilsonImpl<vComplex, AdjointRepresentation > WilsonAdjImplR; // Real.. whichever prec }
typedef WilsonImpl<vComplexF, AdjointRepresentation > WilsonAdjImplF; // Float
typedef WilsonImpl<vComplexD, AdjointRepresentation > WilsonAdjImplD; // Double
typedef WilsonImpl<vComplex, TwoIndexSymmetricRepresentation > WilsonTwoIndexSymmetricImplR; // Real.. whichever prec
typedef WilsonImpl<vComplexF, TwoIndexSymmetricRepresentation > WilsonTwoIndexSymmetricImplF; // Float
typedef WilsonImpl<vComplexD, TwoIndexSymmetricRepresentation > WilsonTwoIndexSymmetricImplD; // Double
typedef DomainWallVec5dImpl<vComplex ,Nc> DomainWallVec5dImplR; // Real.. whichever prec
typedef DomainWallVec5dImpl<vComplexF,Nc> DomainWallVec5dImplF; // Float
typedef DomainWallVec5dImpl<vComplexD,Nc> DomainWallVec5dImplD; // Double
typedef DomainWallVec5dImpl<vComplex ,Nc,ComplexD> ZDomainWallVec5dImplR; // Real.. whichever prec
typedef DomainWallVec5dImpl<vComplexF,Nc,ComplexD> ZDomainWallVec5dImplF; // Float
typedef DomainWallVec5dImpl<vComplexD,Nc,ComplexD> ZDomainWallVec5dImplD; // Double
typedef GparityWilsonImpl<vComplex, Nc> GparityWilsonImplR; // Real.. whichever prec
typedef GparityWilsonImpl<vComplexF, Nc> GparityWilsonImplF; // Float
typedef GparityWilsonImpl<vComplexD, Nc> GparityWilsonImplD; // Double
}
} }
#endif #endif

View File

@ -29,7 +29,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
#ifndef GRID_QCD_MOBIUS_FERMION_H #ifndef GRID_QCD_MOBIUS_FERMION_H
#define GRID_QCD_MOBIUS_FERMION_H #define GRID_QCD_MOBIUS_FERMION_H
#include <Grid/Grid.h> #include <Grid.h>
namespace Grid { namespace Grid {

View File

@ -29,7 +29,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
#ifndef GRID_QCD_MOBIUS_ZOLOTAREV_FERMION_H #ifndef GRID_QCD_MOBIUS_ZOLOTAREV_FERMION_H
#define GRID_QCD_MOBIUS_ZOLOTAREV_FERMION_H #define GRID_QCD_MOBIUS_ZOLOTAREV_FERMION_H
#include <Grid/Grid.h> #include <Grid.h>
namespace Grid { namespace Grid {

View File

@ -29,7 +29,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
#ifndef OVERLAP_WILSON_CAYLEY_TANH_FERMION_H #ifndef OVERLAP_WILSON_CAYLEY_TANH_FERMION_H
#define OVERLAP_WILSON_CAYLEY_TANH_FERMION_H #define OVERLAP_WILSON_CAYLEY_TANH_FERMION_H
#include <Grid/Grid.h> #include <Grid.h>
namespace Grid { namespace Grid {

View File

@ -29,7 +29,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
#ifndef OVERLAP_WILSON_CAYLEY_ZOLOTAREV_FERMION_H #ifndef OVERLAP_WILSON_CAYLEY_ZOLOTAREV_FERMION_H
#define OVERLAP_WILSON_CAYLEY_ZOLOTAREV_FERMION_H #define OVERLAP_WILSON_CAYLEY_ZOLOTAREV_FERMION_H
#include <Grid/Grid.h> #include <Grid.h>
namespace Grid { namespace Grid {

View File

@ -29,7 +29,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
#ifndef OVERLAP_WILSON_CONTFRAC_TANH_FERMION_H #ifndef OVERLAP_WILSON_CONTFRAC_TANH_FERMION_H
#define OVERLAP_WILSON_CONTFRAC_TANH_FERMION_H #define OVERLAP_WILSON_CONTFRAC_TANH_FERMION_H
#include <Grid/Grid.h> #include <Grid.h>
namespace Grid { namespace Grid {

View File

@ -29,7 +29,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
#ifndef OVERLAP_WILSON_CONTFRAC_ZOLOTAREV_FERMION_H #ifndef OVERLAP_WILSON_CONTFRAC_ZOLOTAREV_FERMION_H
#define OVERLAP_WILSON_CONTFRAC_ZOLOTAREV_FERMION_H #define OVERLAP_WILSON_CONTFRAC_ZOLOTAREV_FERMION_H
#include <Grid/Grid.h> #include <Grid.h>
namespace Grid { namespace Grid {

View File

@ -29,7 +29,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
#ifndef OVERLAP_WILSON_PARTFRAC_TANH_FERMION_H #ifndef OVERLAP_WILSON_PARTFRAC_TANH_FERMION_H
#define OVERLAP_WILSON_PARTFRAC_TANH_FERMION_H #define OVERLAP_WILSON_PARTFRAC_TANH_FERMION_H
#include <Grid/Grid.h> #include <Grid.h>
namespace Grid { namespace Grid {

View File

@ -29,7 +29,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
#ifndef OVERLAP_WILSON_PARTFRAC_ZOLOTAREV_FERMION_H #ifndef OVERLAP_WILSON_PARTFRAC_ZOLOTAREV_FERMION_H
#define OVERLAP_WILSON_PARTFRAC_ZOLOTAREV_FERMION_H #define OVERLAP_WILSON_PARTFRAC_ZOLOTAREV_FERMION_H
#include <Grid/Grid.h> #include <Grid.h>
namespace Grid { namespace Grid {

View File

@ -29,7 +29,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
#ifndef GRID_QCD_SCALED_SHAMIR_FERMION_H #ifndef GRID_QCD_SCALED_SHAMIR_FERMION_H
#define GRID_QCD_SCALED_SHAMIR_FERMION_H #define GRID_QCD_SCALED_SHAMIR_FERMION_H
#include <Grid/Grid.h> #include <Grid.h>
namespace Grid { namespace Grid {

View File

@ -29,7 +29,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
#ifndef GRID_QCD_SHAMIR_ZOLOTAREV_FERMION_H #ifndef GRID_QCD_SHAMIR_ZOLOTAREV_FERMION_H
#define GRID_QCD_SHAMIR_ZOLOTAREV_FERMION_H #define GRID_QCD_SHAMIR_ZOLOTAREV_FERMION_H
#include <Grid/Grid.h> #include <Grid.h>
namespace Grid { namespace Grid {

View File

@ -1,315 +1,319 @@
/************************************************************************************* /*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/WilsonFermion.cc Source file: ./lib/qcd/action/fermion/WilsonFermion.cc
Copyright (C) 2015 Copyright (C) 2015
Author: Peter Boyle <pabobyle@ph.ed.ac.uk> Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk> Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local> Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk> Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or the Free Software Foundation; either version 2 of the License, or
(at your option) any later version. (at your option) any later version.
This program is distributed in the hope that it will be useful, This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details. GNU General Public License for more details.
You should have received a copy of the GNU General Public License along You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc., with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution See the full license in the file "LICENSE" in the top level distribution directory
directory *************************************************************************************/
*************************************************************************************/ /* END LEGAL */
/* END LEGAL */
#include <Grid.h> #include <Grid.h>
namespace Grid { namespace Grid {
namespace QCD { namespace QCD {
const std::vector<int> WilsonFermionStatic::directions({0, 1, 2, 3, 0, 1, 2, const std::vector<int> WilsonFermionStatic::directions ({0,1,2,3, 0, 1, 2, 3});
3}); const std::vector<int> WilsonFermionStatic::displacements({1,1,1,1,-1,-1,-1,-1});
const std::vector<int> WilsonFermionStatic::displacements({1, 1, 1, 1, -1, -1, int WilsonFermionStatic::HandOptDslash;
-1, -1});
int WilsonFermionStatic::HandOptDslash;
///////////////////////////////// /////////////////////////////////
// Constructor and gauge import // Constructor and gauge import
///////////////////////////////// /////////////////////////////////
template <class Impl> template<class Impl>
WilsonFermion<Impl>::WilsonFermion(GaugeField &_Umu, GridCartesian &Fgrid, WilsonFermion<Impl>::WilsonFermion(GaugeField &_Umu,
GridRedBlackCartesian &Hgrid, RealD _mass, GridCartesian &Fgrid,
const ImplParams &p) GridRedBlackCartesian &Hgrid,
: Kernels(p), RealD _mass,const ImplParams &p) :
_grid(&Fgrid), Kernels(p),
_cbgrid(&Hgrid), _grid(&Fgrid),
Stencil(&Fgrid, npoint, Even, directions, displacements), _cbgrid(&Hgrid),
StencilEven(&Hgrid, npoint, Even, directions, Stencil (&Fgrid,npoint,Even,directions,displacements),
displacements), // source is Even StencilEven(&Hgrid,npoint,Even,directions,displacements), // source is Even
StencilOdd(&Hgrid, npoint, Odd, directions, StencilOdd (&Hgrid,npoint,Odd ,directions,displacements), // source is Odd
displacements), // source is Odd mass(_mass),
mass(_mass), Lebesgue(_grid),
Lebesgue(_grid), LebesgueEvenOdd(_cbgrid),
LebesgueEvenOdd(_cbgrid), Umu(&Fgrid),
Umu(&Fgrid), UmuEven(&Hgrid),
UmuEven(&Hgrid), UmuOdd (&Hgrid)
UmuOdd(&Hgrid) { {
// Allocate the required comms buffer // Allocate the required comms buffer
ImportGauge(_Umu); ImportGauge(_Umu);
}
template <class Impl>
void WilsonFermion<Impl>::ImportGauge(const GaugeField &_Umu) {
GaugeField HUmu(_Umu._grid);
HUmu = _Umu * (-0.5);
Impl::DoubleStore(GaugeGrid(), Umu, HUmu);
pickCheckerboard(Even, UmuEven, Umu);
pickCheckerboard(Odd, UmuOdd, Umu);
}
/////////////////////////////
// Implement the interface
/////////////////////////////
template <class Impl>
RealD WilsonFermion<Impl>::M(const FermionField &in, FermionField &out) {
out.checkerboard = in.checkerboard;
Dhop(in, out, DaggerNo);
return axpy_norm(out, 4 + mass, in, out);
}
template <class Impl>
RealD WilsonFermion<Impl>::Mdag(const FermionField &in, FermionField &out) {
out.checkerboard = in.checkerboard;
Dhop(in, out, DaggerYes);
return axpy_norm(out, 4 + mass, in, out);
}
template <class Impl>
void WilsonFermion<Impl>::Meooe(const FermionField &in, FermionField &out) {
if (in.checkerboard == Odd) {
DhopEO(in, out, DaggerNo);
} else {
DhopOE(in, out, DaggerNo);
} }
}
template <class Impl> template<class Impl>
void WilsonFermion<Impl>::MeooeDag(const FermionField &in, FermionField &out) { void WilsonFermion<Impl>::ImportGauge(const GaugeField &_Umu)
if (in.checkerboard == Odd) { {
DhopEO(in, out, DaggerYes); GaugeField HUmu(_Umu._grid);
} else { HUmu = _Umu*(-0.5);
DhopOE(in, out, DaggerYes); Impl::DoubleStore(GaugeGrid(),Umu,HUmu);
pickCheckerboard(Even,UmuEven,Umu);
pickCheckerboard(Odd ,UmuOdd,Umu);
} }
}
template <class Impl> /////////////////////////////
void WilsonFermion<Impl>::Mooee(const FermionField &in, FermionField &out) { // Implement the interface
out.checkerboard = in.checkerboard; /////////////////////////////
typename FermionField::scalar_type scal(4.0 + mass);
out = scal * in;
}
template <class Impl> template<class Impl>
void WilsonFermion<Impl>::MooeeDag(const FermionField &in, FermionField &out) { RealD WilsonFermion<Impl>::M(const FermionField &in, FermionField &out)
out.checkerboard = in.checkerboard; {
Mooee(in, out); out.checkerboard=in.checkerboard;
} Dhop(in,out,DaggerNo);
return axpy_norm(out,4+mass,in,out);
template <class Impl>
void WilsonFermion<Impl>::MooeeInv(const FermionField &in, FermionField &out) {
out.checkerboard = in.checkerboard;
out = (1.0 / (4.0 + mass)) * in;
}
template <class Impl>
void WilsonFermion<Impl>::MooeeInvDag(const FermionField &in,
FermionField &out) {
out.checkerboard = in.checkerboard;
MooeeInv(in, out);
}
///////////////////////////////////
// Internal
///////////////////////////////////
template <class Impl>
void WilsonFermion<Impl>::DerivInternal(StencilImpl &st, DoubledGaugeField &U,
GaugeField &mat, const FermionField &A,
const FermionField &B, int dag) {
assert((dag == DaggerNo) || (dag == DaggerYes));
Compressor compressor(dag);
FermionField Btilde(B._grid);
FermionField Atilde(B._grid);
Atilde = A;
st.HaloExchange(B, compressor);
for (int mu = 0; mu < Nd; mu++) {
////////////////////////////////////////////////////////////////////////
// Flip gamma (1+g)<->(1-g) if dag
////////////////////////////////////////////////////////////////////////
int gamma = mu;
if (!dag) gamma += Nd;
////////////////////////
// Call the single hop
////////////////////////
PARALLEL_FOR_LOOP
for (int sss = 0; sss < B._grid->oSites(); sss++) {
Kernels::DiracOptDhopDir(st, U, st.comm_buf, sss, sss, B, Btilde, mu,
gamma);
}
//////////////////////////////////////////////////
// spin trace outer product
//////////////////////////////////////////////////
Impl::InsertForce4D(mat, Btilde, Atilde, mu);
} }
}
template <class Impl> template<class Impl>
void WilsonFermion<Impl>::DhopDeriv(GaugeField &mat, const FermionField &U, RealD WilsonFermion<Impl>::Mdag(const FermionField &in, FermionField &out)
const FermionField &V, int dag) { {
conformable(U._grid, _grid); out.checkerboard=in.checkerboard;
conformable(U._grid, V._grid); Dhop(in,out,DaggerYes);
conformable(U._grid, mat._grid); return axpy_norm(out,4+mass,in,out);
mat.checkerboard = U.checkerboard;
DerivInternal(Stencil, Umu, mat, U, V, dag);
}
template <class Impl>
void WilsonFermion<Impl>::DhopDerivOE(GaugeField &mat, const FermionField &U,
const FermionField &V, int dag) {
conformable(U._grid, _cbgrid);
conformable(U._grid, V._grid);
conformable(U._grid, mat._grid);
assert(V.checkerboard == Even);
assert(U.checkerboard == Odd);
mat.checkerboard = Odd;
DerivInternal(StencilEven, UmuOdd, mat, U, V, dag);
}
template <class Impl>
void WilsonFermion<Impl>::DhopDerivEO(GaugeField &mat, const FermionField &U,
const FermionField &V, int dag) {
conformable(U._grid, _cbgrid);
conformable(U._grid, V._grid);
conformable(U._grid, mat._grid);
assert(V.checkerboard == Odd);
assert(U.checkerboard == Even);
mat.checkerboard = Even;
DerivInternal(StencilOdd, UmuEven, mat, U, V, dag);
}
template <class Impl>
void WilsonFermion<Impl>::Dhop(const FermionField &in, FermionField &out,
int dag) {
conformable(in._grid, _grid); // verifies full grid
conformable(in._grid, out._grid);
out.checkerboard = in.checkerboard;
DhopInternal(Stencil, Lebesgue, Umu, in, out, dag);
}
template <class Impl>
void WilsonFermion<Impl>::DhopOE(const FermionField &in, FermionField &out,
int dag) {
conformable(in._grid, _cbgrid); // verifies half grid
conformable(in._grid, out._grid); // drops the cb check
assert(in.checkerboard == Even);
out.checkerboard = Odd;
DhopInternal(StencilEven, LebesgueEvenOdd, UmuOdd, in, out, dag);
}
template <class Impl>
void WilsonFermion<Impl>::DhopEO(const FermionField &in, FermionField &out,
int dag) {
conformable(in._grid, _cbgrid); // verifies half grid
conformable(in._grid, out._grid); // drops the cb check
assert(in.checkerboard == Odd);
out.checkerboard = Even;
DhopInternal(StencilOdd, LebesgueEvenOdd, UmuEven, in, out, dag);
}
template <class Impl>
void WilsonFermion<Impl>::Mdir(const FermionField &in, FermionField &out,
int dir, int disp) {
DhopDir(in, out, dir, disp);
}
template <class Impl>
void WilsonFermion<Impl>::DhopDir(const FermionField &in, FermionField &out,
int dir, int disp) {
int skip = (disp == 1) ? 0 : 1;
int dirdisp = dir + skip * 4;
int gamma = dir + (1 - skip) * 4;
DhopDirDisp(in, out, dirdisp, gamma, DaggerNo);
};
template <class Impl>
void WilsonFermion<Impl>::DhopDirDisp(const FermionField &in, FermionField &out,
int dirdisp, int gamma, int dag) {
Compressor compressor(dag);
Stencil.HaloExchange(in, compressor);
PARALLEL_FOR_LOOP
for (int sss = 0; sss < in._grid->oSites(); sss++) {
Kernels::DiracOptDhopDir(Stencil, Umu, Stencil.comm_buf, sss, sss, in, out,
dirdisp, gamma);
} }
};
template <class Impl> template<class Impl>
void WilsonFermion<Impl>::DhopInternal(StencilImpl &st, LebesgueOrder &lo, void WilsonFermion<Impl>::Meooe(const FermionField &in, FermionField &out)
DoubledGaugeField &U, {
const FermionField &in, if ( in.checkerboard == Odd ) {
FermionField &out, int dag) { DhopEO(in,out,DaggerNo);
assert((dag == DaggerNo) || (dag == DaggerYes)); } else {
DhopOE(in,out,DaggerNo);
Compressor compressor(dag); }
st.HaloExchange(in, compressor); }
template<class Impl>
if (dag == DaggerYes) { void WilsonFermion<Impl>::MeooeDag(const FermionField &in, FermionField &out)
PARALLEL_FOR_LOOP {
for (int sss = 0; sss < in._grid->oSites(); sss++) { if ( in.checkerboard == Odd ) {
Kernels::DiracOptDhopSiteDag(st, lo, U, st.comm_buf, sss, sss, 1, 1, in, DhopEO(in,out,DaggerYes);
out); } else {
} DhopOE(in,out,DaggerYes);
} else {
PARALLEL_FOR_LOOP
for (int sss = 0; sss < in._grid->oSites(); sss++) {
Kernels::DiracOptDhopSite(st, lo, U, st.comm_buf, sss, sss, 1, 1, in,
out);
} }
} }
};
FermOpTemplateInstantiate(WilsonFermion); template<class Impl>
AdjointFermOpTemplateInstantiate(WilsonFermion); void WilsonFermion<Impl>::Mooee(const FermionField &in, FermionField &out) {
TwoIndexFermOpTemplateInstantiate(WilsonFermion); out.checkerboard = in.checkerboard;
GparityFermOpTemplateInstantiate(WilsonFermion); typename FermionField::scalar_type scal(4.0+mass);
} out = scal*in;
} }
template<class Impl>
void WilsonFermion<Impl>::MooeeDag(const FermionField &in, FermionField &out) {
out.checkerboard = in.checkerboard;
Mooee(in,out);
}
template<class Impl>
void WilsonFermion<Impl>::MooeeInv(const FermionField &in, FermionField &out) {
out.checkerboard = in.checkerboard;
out = (1.0/(4.0+mass))*in;
}
template<class Impl>
void WilsonFermion<Impl>::MooeeInvDag(const FermionField &in, FermionField &out) {
out.checkerboard = in.checkerboard;
MooeeInv(in,out);
}
///////////////////////////////////
// Internal
///////////////////////////////////
template<class Impl>
void WilsonFermion<Impl>::DerivInternal(StencilImpl & st,
DoubledGaugeField & U,
GaugeField &mat,
const FermionField &A,
const FermionField &B,int dag) {
assert((dag==DaggerNo) ||(dag==DaggerYes));
Compressor compressor(dag);
FermionField Btilde(B._grid);
FermionField Atilde(B._grid);
Atilde = A;
st.HaloExchange(B,compressor);
for(int mu=0;mu<Nd;mu++){
////////////////////////////////////////////////////////////////////////
// Flip gamma (1+g)<->(1-g) if dag
////////////////////////////////////////////////////////////////////////
int gamma = mu;
if ( !dag ) gamma+= Nd;
////////////////////////
// Call the single hop
////////////////////////
PARALLEL_FOR_LOOP
for(int sss=0;sss<B._grid->oSites();sss++){
Kernels::DiracOptDhopDir(st,U,st.comm_buf,sss,sss,B,Btilde,mu,gamma);
}
//////////////////////////////////////////////////
// spin trace outer product
//////////////////////////////////////////////////
Impl::InsertForce4D(mat,Btilde,Atilde,mu);
}
}
template<class Impl>
void WilsonFermion<Impl>::DhopDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag)
{
conformable(U._grid,_grid);
conformable(U._grid,V._grid);
conformable(U._grid,mat._grid);
mat.checkerboard = U.checkerboard;
DerivInternal(Stencil,Umu,mat,U,V,dag);
}
template<class Impl>
void WilsonFermion<Impl>::DhopDerivOE(GaugeField &mat,const FermionField &U,const FermionField &V,int dag)
{
conformable(U._grid,_cbgrid);
conformable(U._grid,V._grid);
conformable(U._grid,mat._grid);
assert(V.checkerboard==Even);
assert(U.checkerboard==Odd);
mat.checkerboard = Odd;
DerivInternal(StencilEven,UmuOdd,mat,U,V,dag);
}
template<class Impl>
void WilsonFermion<Impl>::DhopDerivEO(GaugeField &mat,const FermionField &U,const FermionField &V,int dag)
{
conformable(U._grid,_cbgrid);
conformable(U._grid,V._grid);
conformable(U._grid,mat._grid);
assert(V.checkerboard==Odd);
assert(U.checkerboard==Even);
mat.checkerboard = Even;
DerivInternal(StencilOdd,UmuEven,mat,U,V,dag);
}
template<class Impl>
void WilsonFermion<Impl>::Dhop(const FermionField &in, FermionField &out,int dag) {
conformable(in._grid,_grid); // verifies full grid
conformable(in._grid,out._grid);
out.checkerboard = in.checkerboard;
DhopInternal(Stencil,Lebesgue,Umu,in,out,dag);
}
template<class Impl>
void WilsonFermion<Impl>::DhopOE(const FermionField &in, FermionField &out,int dag) {
conformable(in._grid,_cbgrid); // verifies half grid
conformable(in._grid,out._grid); // drops the cb check
assert(in.checkerboard==Even);
out.checkerboard = Odd;
DhopInternal(StencilEven,LebesgueEvenOdd,UmuOdd,in,out,dag);
}
template<class Impl>
void WilsonFermion<Impl>::DhopEO(const FermionField &in, FermionField &out,int dag) {
conformable(in._grid,_cbgrid); // verifies half grid
conformable(in._grid,out._grid); // drops the cb check
assert(in.checkerboard==Odd);
out.checkerboard = Even;
DhopInternal(StencilOdd,LebesgueEvenOdd,UmuEven,in,out,dag);
}
template<class Impl>
void WilsonFermion<Impl>::Mdir (const FermionField &in, FermionField &out,int dir,int disp) {
DhopDir(in,out,dir,disp);
}
template<class Impl>
void WilsonFermion<Impl>::DhopDir(const FermionField &in, FermionField &out,int dir,int disp){
int skip = (disp==1) ? 0 : 1;
int dirdisp = dir+skip*4;
int gamma = dir+(1-skip)*4;
DhopDirDisp(in,out,dirdisp,gamma,DaggerNo);
};
template<class Impl>
void WilsonFermion<Impl>::DhopDirDisp(const FermionField &in, FermionField &out,int dirdisp,int gamma,int dag) {
Compressor compressor(dag);
Stencil.HaloExchange(in,compressor);
PARALLEL_FOR_LOOP
for(int sss=0;sss<in._grid->oSites();sss++){
Kernels::DiracOptDhopDir(Stencil,Umu,Stencil.comm_buf,sss,sss,in,out,dirdisp,gamma);
}
};
template<class Impl>
void WilsonFermion<Impl>::DhopInternal(StencilImpl & st,LebesgueOrder& lo,DoubledGaugeField & U,
const FermionField &in, FermionField &out,int dag)
{
assert((dag==DaggerNo) ||(dag==DaggerYes));
Compressor compressor(dag);
st.HaloExchange(in,compressor);
if ( dag == DaggerYes ) {
PARALLEL_FOR_LOOP
for(int sss=0;sss<in._grid->oSites();sss++){
Kernels::DiracOptDhopSiteDag(st,lo,U,st.comm_buf,sss,sss,1,1,in,out);
}
} else {
PARALLEL_FOR_LOOP
for(int sss=0;sss<in._grid->oSites();sss++){
Kernels::DiracOptDhopSite(st,lo,U,st.comm_buf,sss,sss,1,1,in,out);
}
}
};
FermOpTemplateInstantiate(WilsonFermion);
GparityFermOpTemplateInstantiate(WilsonFermion);
}}

View File

@ -1,155 +1,161 @@
/************************************************************************************* /*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/WilsonFermion.h Source file: ./lib/qcd/action/fermion/WilsonFermion.h
Copyright (C) 2015 Copyright (C) 2015
Author: Peter Boyle <pabobyle@ph.ed.ac.uk> Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk> Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: paboyle <paboyle@ph.ed.ac.uk> Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or the Free Software Foundation; either version 2 of the License, or
(at your option) any later version. (at your option) any later version.
This program is distributed in the hope that it will be useful, This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details. GNU General Public License for more details.
You should have received a copy of the GNU General Public License along You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc., with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution See the full license in the file "LICENSE" in the top level distribution directory
directory *************************************************************************************/
*************************************************************************************/ /* END LEGAL */
/* END LEGAL */ #ifndef GRID_QCD_WILSON_FERMION_H
#ifndef GRID_QCD_WILSON_FERMION_H #define GRID_QCD_WILSON_FERMION_H
#define GRID_QCD_WILSON_FERMION_H
namespace Grid { namespace Grid {
namespace QCD { namespace QCD {
class WilsonFermionStatic { class WilsonFermionStatic {
public: public:
static int HandOptDslash; // these are a temporary hack static int HandOptDslash; // these are a temporary hack
static int MortonOrder; static int MortonOrder;
static const std::vector<int> directions; static const std::vector<int> directions ;
static const std::vector<int> displacements; static const std::vector<int> displacements;
static const int npoint = 8; static const int npoint=8;
}; };
template <class Impl> template<class Impl>
class WilsonFermion : public WilsonKernels<Impl>, public WilsonFermionStatic { class WilsonFermion : public WilsonKernels<Impl>, public WilsonFermionStatic
public: {
INHERIT_IMPL_TYPES(Impl); public:
typedef WilsonKernels<Impl> Kernels; INHERIT_IMPL_TYPES(Impl);
typedef WilsonKernels<Impl> Kernels;
/////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////
// Implement the abstract base // Implement the abstract base
/////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////
GridBase *GaugeGrid(void) { return _grid; } GridBase *GaugeGrid(void) { return _grid ;}
GridBase *GaugeRedBlackGrid(void) { return _cbgrid; } GridBase *GaugeRedBlackGrid(void) { return _cbgrid ;}
GridBase *FermionGrid(void) { return _grid; } GridBase *FermionGrid(void) { return _grid;}
GridBase *FermionRedBlackGrid(void) { return _cbgrid; } GridBase *FermionRedBlackGrid(void) { return _cbgrid;}
////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////
// override multiply; cut number routines if pass dagger argument // override multiply; cut number routines if pass dagger argument
// and also make interface more uniformly consistent // and also make interface more uniformly consistent
////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////
RealD M(const FermionField &in, FermionField &out); RealD M(const FermionField &in, FermionField &out);
RealD Mdag(const FermionField &in, FermionField &out); RealD Mdag(const FermionField &in, FermionField &out);
///////////////////////////////////////////////////////// /////////////////////////////////////////////////////////
// half checkerboard operations // half checkerboard operations
// could remain virtual so we can derive Clover from Wilson base // could remain virtual so we can derive Clover from Wilson base
///////////////////////////////////////////////////////// /////////////////////////////////////////////////////////
void Meooe(const FermionField &in, FermionField &out); void Meooe(const FermionField &in, FermionField &out) ;
void MeooeDag(const FermionField &in, FermionField &out); void MeooeDag(const FermionField &in, FermionField &out) ;
// allow override for twisted mass and clover // allow override for twisted mass and clover
virtual void Mooee(const FermionField &in, FermionField &out); virtual void Mooee(const FermionField &in, FermionField &out) ;
virtual void MooeeDag(const FermionField &in, FermionField &out); virtual void MooeeDag(const FermionField &in, FermionField &out) ;
virtual void MooeeInv(const FermionField &in, FermionField &out); virtual void MooeeInv(const FermionField &in, FermionField &out) ;
virtual void MooeeInvDag(const FermionField &in, FermionField &out); virtual void MooeeInvDag(const FermionField &in, FermionField &out) ;
//////////////////////// ////////////////////////
// Derivative interface // Derivative interface
//////////////////////// ////////////////////////
// Interface calls an internal routine // Interface calls an internal routine
void DhopDeriv(GaugeField &mat, const FermionField &U, const FermionField &V, void DhopDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
int dag); void DhopDerivOE(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
void DhopDerivOE(GaugeField &mat, const FermionField &U, void DhopDerivEO(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
const FermionField &V, int dag);
void DhopDerivEO(GaugeField &mat, const FermionField &U,
const FermionField &V, int dag);
///////////////////////////////////////////////////////////////
// non-hermitian hopping term; half cb or both
///////////////////////////////////////////////////////////////
void Dhop(const FermionField &in, FermionField &out, int dag);
void DhopOE(const FermionField &in, FermionField &out, int dag);
void DhopEO(const FermionField &in, FermionField &out, int dag);
///////////////////////////////////////////////////////////////
// Multigrid assistance; force term uses too
///////////////////////////////////////////////////////////////
void Mdir(const FermionField &in, FermionField &out, int dir, int disp);
void DhopDir(const FermionField &in, FermionField &out, int dir, int disp);
void DhopDirDisp(const FermionField &in, FermionField &out, int dirdisp,
int gamma, int dag);
///////////////////////////////////////////////////////////////
// Extra methods added by derived
///////////////////////////////////////////////////////////////
void DerivInternal(StencilImpl &st, DoubledGaugeField &U, GaugeField &mat,
const FermionField &A, const FermionField &B, int dag);
void DhopInternal(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
const FermionField &in, FermionField &out, int dag);
// Constructor
WilsonFermion(GaugeField &_Umu, GridCartesian &Fgrid,
GridRedBlackCartesian &Hgrid, RealD _mass,
const ImplParams &p = ImplParams());
// DoubleStore impl dependent
void ImportGauge(const GaugeField &_Umu);
///////////////////////////////////////////////////////////////
// Data members require to support the functionality
///////////////////////////////////////////////////////////////
// protected:
public:
RealD mass;
GridBase *_grid;
GridBase *_cbgrid;
// Defines the stencils for even and odd
StencilImpl Stencil;
StencilImpl StencilEven;
StencilImpl StencilOdd;
// Copy of the gauge field , with even and odd subsets
DoubledGaugeField Umu;
DoubledGaugeField UmuEven;
DoubledGaugeField UmuOdd;
LebesgueOrder Lebesgue;
LebesgueOrder LebesgueEvenOdd;
};
typedef WilsonFermion<WilsonImplF> WilsonFermionF;
typedef WilsonFermion<WilsonImplD> WilsonFermionD;
} ///////////////////////////////////////////////////////////////
// non-hermitian hopping term; half cb or both
///////////////////////////////////////////////////////////////
void Dhop(const FermionField &in, FermionField &out,int dag) ;
void DhopOE(const FermionField &in, FermionField &out,int dag) ;
void DhopEO(const FermionField &in, FermionField &out,int dag) ;
///////////////////////////////////////////////////////////////
// Multigrid assistance; force term uses too
///////////////////////////////////////////////////////////////
void Mdir (const FermionField &in, FermionField &out,int dir,int disp) ;
void DhopDir(const FermionField &in, FermionField &out,int dir,int disp);
void DhopDirDisp(const FermionField &in, FermionField &out,int dirdisp,int gamma,int dag) ;
///////////////////////////////////////////////////////////////
// Extra methods added by derived
///////////////////////////////////////////////////////////////
void DerivInternal(StencilImpl & st,
DoubledGaugeField & U,
GaugeField &mat,
const FermionField &A,
const FermionField &B,
int dag);
void DhopInternal(StencilImpl & st,LebesgueOrder & lo,DoubledGaugeField & U,
const FermionField &in, FermionField &out,int dag) ;
// Constructor
WilsonFermion(GaugeField &_Umu,
GridCartesian &Fgrid,
GridRedBlackCartesian &Hgrid,
RealD _mass,
const ImplParams &p= ImplParams()
) ;
// DoubleStore impl dependent
void ImportGauge(const GaugeField &_Umu);
///////////////////////////////////////////////////////////////
// Data members require to support the functionality
///////////////////////////////////////////////////////////////
// protected:
public:
RealD mass;
GridBase * _grid;
GridBase * _cbgrid;
//Defines the stencils for even and odd
StencilImpl Stencil;
StencilImpl StencilEven;
StencilImpl StencilOdd;
// Copy of the gauge field , with even and odd subsets
DoubledGaugeField Umu;
DoubledGaugeField UmuEven;
DoubledGaugeField UmuOdd;
LebesgueOrder Lebesgue;
LebesgueOrder LebesgueEvenOdd;
};
typedef WilsonFermion<WilsonImplF> WilsonFermionF;
typedef WilsonFermion<WilsonImplD> WilsonFermionD;
}
} }
#endif #endif

View File

@ -42,15 +42,15 @@ const std::vector<int> WilsonFermion5DStatic::displacements({1,1,1,1,-1,-1,-1,-1
// 5d lattice for DWF. // 5d lattice for DWF.
template<class Impl> template<class Impl>
WilsonFermion5D<Impl>::WilsonFermion5D(GaugeField &_Umu, WilsonFermion5D<Impl>::WilsonFermion5D(GaugeField &_Umu,
GridCartesian &FiveDimGrid, GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid, GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid, GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid, GridRedBlackCartesian &FourDimRedBlackGrid,
RealD _M5,const ImplParams &p) : RealD _M5,const ImplParams &p) :
Kernels(p), Kernels(p),
_FiveDimGrid (&FiveDimGrid), _FiveDimGrid(&FiveDimGrid),
_FiveDimRedBlackGrid(&FiveDimRedBlackGrid), _FiveDimRedBlackGrid(&FiveDimRedBlackGrid),
_FourDimGrid (&FourDimGrid), _FourDimGrid(&FourDimGrid),
_FourDimRedBlackGrid(&FourDimRedBlackGrid), _FourDimRedBlackGrid(&FourDimRedBlackGrid),
Stencil (_FiveDimGrid,npoint,Even,directions,displacements), Stencil (_FiveDimGrid,npoint,Even,directions,displacements),
StencilEven(_FiveDimRedBlackGrid,npoint,Even,directions,displacements), // source is Even StencilEven(_FiveDimRedBlackGrid,npoint,Even,directions,displacements), // source is Even
@ -62,83 +62,60 @@ WilsonFermion5D<Impl>::WilsonFermion5D(GaugeField &_Umu,
Lebesgue(_FourDimGrid), Lebesgue(_FourDimGrid),
LebesgueEvenOdd(_FourDimRedBlackGrid) LebesgueEvenOdd(_FourDimRedBlackGrid)
{ {
if (Impl::LsVectorised) { // some assertions
assert(FiveDimGrid._ndimension==5);
assert(FourDimGrid._ndimension==4);
assert(FiveDimRedBlackGrid._ndimension==5);
assert(FourDimRedBlackGrid._ndimension==4);
assert(FiveDimRedBlackGrid._checker_dim==1);
int nsimd = Simd::Nsimd(); // Dimension zero of the five-d is the Ls direction
Ls=FiveDimGrid._fdimensions[0];
assert(FiveDimRedBlackGrid._fdimensions[0]==Ls);
assert(FiveDimRedBlackGrid._processors[0] ==1);
assert(FiveDimRedBlackGrid._simd_layout[0]==1);
assert(FiveDimGrid._processors[0] ==1);
assert(FiveDimGrid._simd_layout[0] ==1);
// some assertions // Other dimensions must match the decomposition of the four-D fields
assert(FiveDimGrid._ndimension==5); for(int d=0;d<4;d++){
assert(FiveDimRedBlackGrid._ndimension==5); assert(FourDimRedBlackGrid._fdimensions[d] ==FourDimGrid._fdimensions[d]);
assert(FiveDimRedBlackGrid._checker_dim==1); // Don't checker the s direction assert(FiveDimRedBlackGrid._fdimensions[d+1]==FourDimGrid._fdimensions[d]);
assert(FourDimGrid._ndimension==4);
// Dimension zero of the five-d is the Ls direction assert(FourDimRedBlackGrid._processors[d] ==FourDimGrid._processors[d]);
Ls=FiveDimGrid._fdimensions[0]; assert(FiveDimRedBlackGrid._processors[d+1] ==FourDimGrid._processors[d]);
assert(FiveDimGrid._processors[0] ==1);
assert(FiveDimGrid._simd_layout[0] ==nsimd);
assert(FiveDimRedBlackGrid._fdimensions[0]==Ls); assert(FourDimRedBlackGrid._simd_layout[d] ==FourDimGrid._simd_layout[d]);
assert(FiveDimRedBlackGrid._processors[0] ==1); assert(FiveDimRedBlackGrid._simd_layout[d+1]==FourDimGrid._simd_layout[d]);
assert(FiveDimRedBlackGrid._simd_layout[0]==nsimd);
// Other dimensions must match the decomposition of the four-D fields assert(FiveDimGrid._fdimensions[d+1] ==FourDimGrid._fdimensions[d]);
for(int d=0;d<4;d++){ assert(FiveDimGrid._processors[d+1] ==FourDimGrid._processors[d]);
assert(FiveDimRedBlackGrid._fdimensions[d+1]==FourDimGrid._fdimensions[d]); assert(FiveDimGrid._simd_layout[d+1] ==FourDimGrid._simd_layout[d]);
assert(FiveDimRedBlackGrid._processors[d+1] ==FourDimGrid._processors[d]);
assert(FourDimGrid._simd_layout[d]=1);
assert(FourDimRedBlackGrid._simd_layout[d]=1);
assert(FiveDimRedBlackGrid._simd_layout[d+1]==1);
assert(FiveDimGrid._fdimensions[d+1] ==FourDimGrid._fdimensions[d]);
assert(FiveDimGrid._processors[d+1] ==FourDimGrid._processors[d]);
assert(FiveDimGrid._simd_layout[d+1] ==FourDimGrid._simd_layout[d]);
}
} else {
// some assertions
assert(FiveDimGrid._ndimension==5);
assert(FourDimGrid._ndimension==4);
assert(FiveDimRedBlackGrid._ndimension==5);
assert(FourDimRedBlackGrid._ndimension==4);
assert(FiveDimRedBlackGrid._checker_dim==1);
// Dimension zero of the five-d is the Ls direction
Ls=FiveDimGrid._fdimensions[0];
assert(FiveDimRedBlackGrid._fdimensions[0]==Ls);
assert(FiveDimRedBlackGrid._processors[0] ==1);
assert(FiveDimRedBlackGrid._simd_layout[0]==1);
assert(FiveDimGrid._processors[0] ==1);
assert(FiveDimGrid._simd_layout[0] ==1);
// Other dimensions must match the decomposition of the four-D fields
for(int d=0;d<4;d++){
assert(FourDimRedBlackGrid._fdimensions[d] ==FourDimGrid._fdimensions[d]);
assert(FiveDimRedBlackGrid._fdimensions[d+1]==FourDimGrid._fdimensions[d]);
assert(FourDimRedBlackGrid._processors[d] ==FourDimGrid._processors[d]);
assert(FiveDimRedBlackGrid._processors[d+1] ==FourDimGrid._processors[d]);
assert(FourDimRedBlackGrid._simd_layout[d] ==FourDimGrid._simd_layout[d]);
assert(FiveDimRedBlackGrid._simd_layout[d+1]==FourDimGrid._simd_layout[d]);
assert(FiveDimGrid._fdimensions[d+1] ==FourDimGrid._fdimensions[d]);
assert(FiveDimGrid._processors[d+1] ==FourDimGrid._processors[d]);
assert(FiveDimGrid._simd_layout[d+1] ==FourDimGrid._simd_layout[d]);
}
} }
// Allocate the required comms buffer // Allocate the required comms buffer
ImportGauge(_Umu); ImportGauge(_Umu);
} }
/*
template<class Impl> template<class Impl>
WilsonFermion5D<Impl>::WilsonFermion5D(int simd,GaugeField &_Umu, WilsonFermion5D<Impl>::WilsonFermion5D(int simd,GaugeField &_Umu,
GridCartesian &FiveDimGrid, GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid, GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid, GridCartesian &FourDimGrid,
RealD _M5,const ImplParams &p) : RealD _M5,const ImplParams &p) :
Kernels(p),
_FiveDimGrid (&FiveDimGrid),
_FiveDimRedBlackGrid(&FiveDimRedBlackGrid),
_FourDimGrid (&FourDimGrid),
Stencil (_FiveDimGrid,npoint,Even,directions,displacements),
StencilEven(_FiveDimRedBlackGrid,npoint,Even,directions,displacements), // source is Even
StencilOdd (_FiveDimRedBlackGrid,npoint,Odd ,directions,displacements), // source is Odd
M5(_M5),
Umu(_FourDimGrid),
UmuEven(_FourDimGrid),
UmuOdd (_FourDimGrid),
Lebesgue(_FourDimGrid),
LebesgueEvenOdd(_FourDimGrid)
{ {
int nsimd = Simd::Nsimd(); int nsimd = Simd::Nsimd();
@ -171,75 +148,13 @@ WilsonFermion5D<Impl>::WilsonFermion5D(int simd,GaugeField &_Umu,
} }
{ {
GaugeField HUmu(_Umu._grid);
HUmu = _Umu*(-0.5);
Impl::DoubleStore(GaugeGrid(),Umu,HUmu);
UmuEven=Umu;// Really want a reference.
UmuOdd =Umu;
} }
} }
*/
template<class Impl>
void WilsonFermion5D<Impl>::Report(void)
{
std::vector<int> latt = GridDefaultLatt();
RealD volume = Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt[mu];
RealD NP = _FourDimGrid->_Nprocessors;
if ( DhopCalls > 0 ) {
std::cout << GridLogMessage << "#### Dhop calls report " << std::endl;
std::cout << GridLogMessage << "WilsonFermion5D Number of Dhop Calls : " << DhopCalls << std::endl;
std::cout << GridLogMessage << "WilsonFermion5D Total Communication time : " << DhopCommTime
<< " us" << std::endl;
std::cout << GridLogMessage << "WilsonFermion5D CommTime/Calls : "
<< DhopCommTime / DhopCalls << " us" << std::endl;
std::cout << GridLogMessage << "WilsonFermion5D Total Compute time : "
<< DhopComputeTime << " us" << std::endl;
std::cout << GridLogMessage << "WilsonFermion5D ComputeTime/Calls : "
<< DhopComputeTime / DhopCalls << " us" << std::endl;
RealD mflops = 1344*volume*DhopCalls/DhopComputeTime;
std::cout << GridLogMessage << "Average mflops/s per call : " << mflops << std::endl;
std::cout << GridLogMessage << "Average mflops/s per call per node : " << mflops/NP << std::endl;
}
if ( DerivCalls > 0 ) {
std::cout << GridLogMessage << "#### Deriv calls report "<< std::endl;
std::cout << GridLogMessage << "WilsonFermion5D Number of Deriv Calls : " <<DerivCalls <<std::endl;
std::cout << GridLogMessage << "WilsonFermion5D Total Communication time : " <<DerivCommTime <<" us"<<std::endl;
std::cout << GridLogMessage << "WilsonFermion5D CommTime/Calls : " <<DerivCommTime/DerivCalls<<" us" <<std::endl;
std::cout << GridLogMessage << "WilsonFermion5D Total Compute time : " <<DerivComputeTime <<" us"<<std::endl;
std::cout << GridLogMessage << "WilsonFermion5D ComputeTime/Calls : " <<DerivComputeTime/DerivCalls<<" us" <<std::endl;
std::cout << GridLogMessage << "WilsonFermion5D Total Dhop Compute time : " <<DerivDhopComputeTime <<" us"<<std::endl;
std::cout << GridLogMessage << "WilsonFermion5D Dhop ComputeTime/Calls : " <<DerivDhopComputeTime/DerivCalls<<" us" <<std::endl;
RealD mflops = 144*volume*DerivCalls/DerivDhopComputeTime;
std::cout << GridLogMessage << "Average mflops/s per call : " << mflops << std::endl;
std::cout << GridLogMessage << "Average mflops/s per call per node : " << mflops/NP << std::endl;
}
if (DerivCalls > 0 || DhopCalls > 0){
std::cout << GridLogMessage << "WilsonFermion5D Stencil"<<std::endl; Stencil.Report();
std::cout << GridLogMessage << "WilsonFermion5D StencilEven"<<std::endl; StencilEven.Report();
std::cout << GridLogMessage << "WilsonFermion5D StencilOdd"<<std::endl; StencilOdd.Report();
}
}
template<class Impl>
void WilsonFermion5D<Impl>::ZeroCounters(void) {
DhopCalls = 0;
DhopCommTime = 0;
DhopComputeTime = 0;
DerivCalls = 0;
DerivCommTime = 0;
DerivComputeTime = 0;
DerivDhopComputeTime = 0;
Stencil.ZeroCounters();
StencilEven.ZeroCounters();
StencilOdd.ZeroCounters();
}
template<class Impl> template<class Impl>
@ -282,13 +197,12 @@ PARALLEL_FOR_LOOP
template<class Impl> template<class Impl>
void WilsonFermion5D<Impl>::DerivInternal(StencilImpl & st, void WilsonFermion5D<Impl>::DerivInternal(StencilImpl & st,
DoubledGaugeField & U, DoubledGaugeField & U,
GaugeField &mat, GaugeField &mat,
const FermionField &A, const FermionField &A,
const FermionField &B, const FermionField &B,
int dag) int dag)
{ {
DerivCalls++;
assert((dag==DaggerNo) ||(dag==DaggerYes)); assert((dag==DaggerNo) ||(dag==DaggerYes));
conformable(st._grid,A._grid); conformable(st._grid,A._grid);
@ -299,53 +213,51 @@ void WilsonFermion5D<Impl>::DerivInternal(StencilImpl & st,
FermionField Btilde(B._grid); FermionField Btilde(B._grid);
FermionField Atilde(B._grid); FermionField Atilde(B._grid);
DerivCommTime-=usecond();
st.HaloExchange(B,compressor); st.HaloExchange(B,compressor);
DerivCommTime+=usecond();
Atilde=A; Atilde=A;
DerivComputeTime-=usecond(); for(int mu=0;mu<Nd;mu++){
for (int mu = 0; mu < Nd; mu++) {
//////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////
// Flip gamma if dag // Flip gamma if dag
//////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////
int gamma = mu; int gamma = mu;
if (!dag) gamma += Nd; if ( !dag ) gamma+= Nd;
//////////////////////// ////////////////////////
// Call the single hop // Call the single hop
//////////////////////// ////////////////////////
DerivDhopComputeTime -= usecond(); PARALLEL_FOR_LOOP
PARALLEL_FOR_LOOP for(int sss=0;sss<U._grid->oSites();sss++){
for (int sss = 0; sss < U._grid->oSites(); sss++) { for(int s=0;s<Ls;s++){
for (int s = 0; s < Ls; s++) { int sU=sss;
int sU = sss; int sF = s+Ls*sU;
int sF = s + Ls * sU;
assert(sF < B._grid->oSites()); assert ( sF< B._grid->oSites());
assert(sU < U._grid->oSites()); assert ( sU< U._grid->oSites());
Kernels::DiracOptDhopDir(st, U, st.comm_buf, sF, sU, B, Btilde, mu, Kernels::DiracOptDhopDir(st,U,st.comm_buf,sF,sU,B,Btilde,mu,gamma);
gamma);
////////////////////////////
// spin trace outer product
////////////////////////////
////////////////////////////
// spin trace outer product
////////////////////////////
} }
} }
DerivDhopComputeTime += usecond();
Impl::InsertForce5D(mat, Btilde, Atilde, mu); Impl::InsertForce5D(mat,Btilde,Atilde,mu);
} }
DerivComputeTime += usecond();
} }
template<class Impl> template<class Impl>
void WilsonFermion5D<Impl>::DhopDeriv( GaugeField &mat, void WilsonFermion5D<Impl>::DhopDeriv( GaugeField &mat,
const FermionField &A, const FermionField &A,
const FermionField &B, const FermionField &B,
int dag) int dag)
{ {
conformable(A._grid,FermionGrid()); conformable(A._grid,FermionGrid());
conformable(A._grid,B._grid); conformable(A._grid,B._grid);
@ -358,9 +270,9 @@ void WilsonFermion5D<Impl>::DhopDeriv( GaugeField &mat,
template<class Impl> template<class Impl>
void WilsonFermion5D<Impl>::DhopDerivEO(GaugeField &mat, void WilsonFermion5D<Impl>::DhopDerivEO(GaugeField &mat,
const FermionField &A, const FermionField &A,
const FermionField &B, const FermionField &B,
int dag) int dag)
{ {
conformable(A._grid,FermionRedBlackGrid()); conformable(A._grid,FermionRedBlackGrid());
conformable(GaugeRedBlackGrid(),mat._grid); conformable(GaugeRedBlackGrid(),mat._grid);
@ -376,9 +288,9 @@ void WilsonFermion5D<Impl>::DhopDerivEO(GaugeField &mat,
template<class Impl> template<class Impl>
void WilsonFermion5D<Impl>::DhopDerivOE(GaugeField &mat, void WilsonFermion5D<Impl>::DhopDerivOE(GaugeField &mat,
const FermionField &A, const FermionField &A,
const FermionField &B, const FermionField &B,
int dag) int dag)
{ {
conformable(A._grid,FermionRedBlackGrid()); conformable(A._grid,FermionRedBlackGrid());
conformable(GaugeRedBlackGrid(),mat._grid); conformable(GaugeRedBlackGrid(),mat._grid);
@ -393,61 +305,32 @@ void WilsonFermion5D<Impl>::DhopDerivOE(GaugeField &mat,
template<class Impl> template<class Impl>
void WilsonFermion5D<Impl>::DhopInternal(StencilImpl & st, LebesgueOrder &lo, void WilsonFermion5D<Impl>::DhopInternal(StencilImpl & st, LebesgueOrder &lo,
DoubledGaugeField & U, DoubledGaugeField & U,
const FermionField &in, FermionField &out,int dag) const FermionField &in, FermionField &out,int dag)
{ {
DhopCalls++;
// assert((dag==DaggerNo) ||(dag==DaggerYes)); // assert((dag==DaggerNo) ||(dag==DaggerYes));
Compressor compressor(dag); Compressor compressor(dag);
int LLs = in._grid->_rdimensions[0]; int LLs = in._grid->_rdimensions[0];
DhopCommTime-=usecond();
st.HaloExchange(in,compressor); st.HaloExchange(in,compressor);
DhopCommTime+=usecond();
DhopComputeTime-=usecond();
// Dhop takes the 4d grid from U, and makes a 5d index for fermion // Dhop takes the 4d grid from U, and makes a 5d index for fermion
if (dag == DaggerYes) { if ( dag == DaggerYes ) {
PARALLEL_FOR_LOOP PARALLEL_FOR_LOOP
for (int ss = 0; ss < U._grid->oSites(); ss++) { for(int ss=0;ss<U._grid->oSites();ss++){
int sU = ss; int sU=ss;
int sF = LLs * sU; int sF=LLs*sU;
Kernels::DiracOptDhopSiteDag(st, lo, U, st.comm_buf, sF, sU, LLs, 1, in, Kernels::DiracOptDhopSiteDag(st,lo,U,st.comm_buf,sF,sU,LLs,1,in,out);
out);
} }
#ifdef AVX512
} else if (stat.is_init() ) {
int nthreads;
stat.start();
#pragma omp parallel
{
#pragma omp master
nthreads = omp_get_num_threads();
int mythread = omp_get_thread_num();
stat.enter(mythread);
#pragma omp for nowait
for(int ss=0;ss<U._grid->oSites();ss++)
{
int sU=ss;
int sF=LLs*sU;
Kernels::DiracOptDhopSite(st,lo,U,st.comm_buf,sF,sU,LLs,1,in,out);
}
stat.exit(mythread);
}
stat.accum(nthreads);
#endif
} else { } else {
PARALLEL_FOR_LOOP PARALLEL_FOR_LOOP
for (int ss = 0; ss < U._grid->oSites(); ss++) { for(int ss=0;ss<U._grid->oSites();ss++){
int sU = ss; int sU=ss;
int sF = LLs * sU; int sF=LLs*sU;
Kernels::DiracOptDhopSite(st, lo, U, st.comm_buf, sF, sU, LLs, 1, in, Kernels::DiracOptDhopSite(st,lo,U,st.comm_buf,sF,sU,LLs,1,in,out);
out);
} }
} }
DhopComputeTime+=usecond();
} }
@ -493,6 +376,8 @@ void WilsonFermion5D<Impl>::DW(const FermionField &in, FermionField &out,int dag
FermOpTemplateInstantiate(WilsonFermion5D); FermOpTemplateInstantiate(WilsonFermion5D);
GparityFermOpTemplateInstantiate(WilsonFermion5D); GparityFermOpTemplateInstantiate(WilsonFermion5D);
template class WilsonFermion5D<DomainWallRedBlack5dImplF>;
template class WilsonFermion5D<DomainWallRedBlack5dImplD>;
}} }}

View File

@ -31,8 +31,6 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
#ifndef GRID_QCD_WILSON_FERMION_5D_H #ifndef GRID_QCD_WILSON_FERMION_5D_H
#define GRID_QCD_WILSON_FERMION_5D_H #define GRID_QCD_WILSON_FERMION_5D_H
#include <Grid/Stat.h>
namespace Grid { namespace Grid {
namespace QCD { namespace QCD {
@ -62,18 +60,6 @@ namespace Grid {
public: public:
INHERIT_IMPL_TYPES(Impl); INHERIT_IMPL_TYPES(Impl);
typedef WilsonKernels<Impl> Kernels; typedef WilsonKernels<Impl> Kernels;
PmuStat stat;
void Report(void);
void ZeroCounters(void);
double DhopCalls;
double DhopCommTime;
double DhopComputeTime;
double DerivCalls;
double DerivCommTime;
double DerivComputeTime;
double DerivDhopComputeTime;
/////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////
// Implement the abstract base // Implement the abstract base
@ -139,14 +125,12 @@ namespace Grid {
double _M5,const ImplParams &p= ImplParams()); double _M5,const ImplParams &p= ImplParams());
// Constructors // Constructors
/*
WilsonFermion5D(int simd, WilsonFermion5D(int simd,
GaugeField &_Umu, GaugeField &_Umu,
GridCartesian &FiveDimGrid, GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid, GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid, GridCartesian &FourDimGrid,
double _M5,const ImplParams &p= ImplParams()); double _M5,const ImplParams &p= ImplParams());
*/
// DoubleStore // DoubleStore
void ImportGauge(const GaugeField &_Umu); void ImportGauge(const GaugeField &_Umu);

File diff suppressed because it is too large Load Diff

View File

@ -1,35 +1,34 @@
/************************************************************************************* /*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/WilsonKernels.h Source file: ./lib/qcd/action/fermion/WilsonKernels.h
Copyright (C) 2015 Copyright (C) 2015
Author: Peter Boyle <pabobyle@ph.ed.ac.uk> Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk> Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: paboyle <paboyle@ph.ed.ac.uk> Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or the Free Software Foundation; either version 2 of the License, or
(at your option) any later version. (at your option) any later version.
This program is distributed in the hope that it will be useful, This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details. GNU General Public License for more details.
You should have received a copy of the GNU General Public License along You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc., with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution See the full license in the file "LICENSE" in the top level distribution directory
directory *************************************************************************************/
*************************************************************************************/ /* END LEGAL */
/* END LEGAL */ #ifndef GRID_QCD_DHOP_H
#ifndef GRID_QCD_DHOP_H #define GRID_QCD_DHOP_H
#define GRID_QCD_DHOP_H
namespace Grid { namespace Grid {
@ -49,158 +48,51 @@ namespace Grid {
template<class Impl> class WilsonKernels : public FermionOperator<Impl> , public WilsonKernelsStatic { template<class Impl> class WilsonKernels : public FermionOperator<Impl> , public WilsonKernelsStatic {
public: public:
INHERIT_IMPL_TYPES(Impl); INHERIT_IMPL_TYPES(Impl);
typedef FermionOperator<Impl> Base; typedef FermionOperator<Impl> Base;
public: public:
template <bool EnableBool = true> void DiracOptDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
typename std::enable_if<Impl::Dimension == 3 && Nc == 3 &&EnableBool, void>::type std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
DiracOptDhopSite( int sF, int sU,int Ls, int Ns, const FermionField &in, FermionField &out);
StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf,
int sF, int sU, int Ls, int Ns, const FermionField &in,
FermionField &out) {
#ifdef AVX512
if (AsmOpt) {
WilsonKernels<Impl>::DiracOptAsmDhopSite(st, lo, U, buf, sF, sU, Ls, Ns,
in, out);
} else {
#else
{
#endif
for (int site = 0; site < Ns; site++) {
for (int s = 0; s < Ls; s++) {
if (HandOpt)
WilsonKernels<Impl>::DiracOptHandDhopSite(st, lo, U, buf, sF, sU,
in, out);
else
WilsonKernels<Impl>::DiracOptGenericDhopSite(st, lo, U, buf, sF, sU,
in, out);
sF++;
}
sU++;
}
}
}
template <bool EnableBool = true>
typename std::enable_if<(Impl::Dimension != 3 || (Impl::Dimension == 3 && Nc != 3)) && EnableBool, void>::type
DiracOptDhopSite(
StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf,
int sF, int sU, int Ls, int Ns, const FermionField &in,
FermionField &out) {
for (int site = 0; site < Ns; site++) {
for (int s = 0; s < Ls; s++) {
WilsonKernels<Impl>::DiracOptGenericDhopSite(st, lo, U, buf, sF, sU, in,
out);
sF++;
}
sU++;
}
}
template <bool EnableBool = true>
typename std::enable_if<Impl::Dimension == 3 && Nc == 3 && EnableBool,
void>::type
DiracOptDhopSiteDag(
StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf,
int sF, int sU, int Ls, int Ns, const FermionField &in,
FermionField &out) {
#ifdef AVX512
if (AsmOpt) {
WilsonKernels<Impl>::DiracOptAsmDhopSiteDag(st, lo, U, buf, sF, sU, Ls,
Ns, in, out);
} else {
#else
{
#endif
for (int site = 0; site < Ns; site++) {
for (int s = 0; s < Ls; s++) {
if (HandOpt)
WilsonKernels<Impl>::DiracOptHandDhopSiteDag(st, lo, U, buf, sF, sU,
in, out);
else
WilsonKernels<Impl>::DiracOptGenericDhopSiteDag(st, lo, U, buf, sF,
sU, in, out);
sF++;
}
sU++;
}
}
}
template <bool EnableBool = true>
typename std::enable_if<
(Impl::Dimension != 3 || (Impl::Dimension == 3 && Nc != 3)) && EnableBool,
void>::type
DiracOptDhopSiteDag(
StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf,
int sF, int sU, int Ls, int Ns, const FermionField &in,
FermionField &out) {
for (int site = 0; site < Ns; site++) {
for (int s = 0; s < Ls; s++) {
WilsonKernels<Impl>::DiracOptGenericDhopSiteDag(st, lo, U, buf, sF, sU,
in, out);
sF++;
}
sU++;
}
}
void DiracOptDhopDir(
StencilImpl &st, DoubledGaugeField &U,
std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf,
int sF, int sU, const FermionField &in, FermionField &out, int dirdisp,
int gamma);
private:
// Specialised variants
void DiracOptGenericDhopSite(
StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf,
int sF, int sU, const FermionField &in, FermionField &out);
void DiracOptGenericDhopSiteDag(
StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf,
int sF, int sU, const FermionField &in, FermionField &out);
void DiracOptAsmDhopSite(
StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf,
int sF, int sU, int Ls, int Ns, const FermionField &in,
FermionField &out);
void DiracOptAsmDhopSiteDag(
StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf,
int sF, int sU, int Ls, int Ns, const FermionField &in,
FermionField &out);
void DiracOptHandDhopSite(
StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf,
int sF, int sU, const FermionField &in, FermionField &out);
void DiracOptHandDhopSiteDag(
StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf,
int sF, int sU, const FermionField &in, FermionField &out);
public:
WilsonKernels(const ImplParams &p = ImplParams());
};
}
}
void DiracOptDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int sF,int sU,int Ls, int Ns, const FermionField &in,FermionField &out);
void DiracOptDhopDir(StencilImpl &st,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int sF,int sU,const FermionField &in, FermionField &out,int dirdisp,int gamma);
private:
// Specialised variants
void DiracOptGenericDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int sF,int sU, const FermionField &in, FermionField &out);
void DiracOptGenericDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int sF,int sU,const FermionField &in,FermionField &out);
void DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int sF,int sU,int Ls, int Ns, const FermionField &in, FermionField &out);
void DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int sF,int sU,const FermionField &in, FermionField &out);
void DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int sF,int sU,const FermionField &in, FermionField &out);
public:
WilsonKernels(const ImplParams &p= ImplParams());
};
}
}
#endif #endif

View File

@ -1,4 +1,4 @@
/************************************************************************************* /*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid Grid physics library, www.github.com/paboyle/Grid
@ -26,55 +26,46 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/ *************************************************************************************/
/* END LEGAL */ /* END LEGAL */
#include <Grid.h> #include <Grid.h>
namespace Grid { namespace Grid {
namespace QCD { namespace QCD {
///////////////////////////////////////////////////////////
// Default to no assembler implementation
///////////////////////////////////////////////////////////
template<class Impl>
void WilsonKernels<Impl >::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
{
assert(0);
}
template<class Impl>
void WilsonKernels<Impl >::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
{
assert(0);
}
///////////////////////////////////////////////////////////
// Default to no assembler implementation
///////////////////////////////////////////////////////////
template<class Impl>
void WilsonKernels<Impl >::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
{
assert(0);
}
#if defined(AVX512) #if defined(AVX512)
/////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////
// If we are AVX512 specialise the single precision routine // If we are AVX512 specialise the single precision routine
/////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////
#include <simd/Intel512wilson.h> #include <simd/Intel512wilson.h>
#include <simd/Intel512single.h> #include <simd/Intel512single.h>
static Vector<vComplexF> signs; static Vector<vComplexF> signs;
int setupSigns(void ){ int setupSigns(void ){
Vector<vComplexF> bother(2); Vector<vComplexF> bother(2);
signs = bother; signs = bother;
vrsign(signs[0]); vrsign(signs[0]);
visign(signs[1]); visign(signs[1]);
return 1; return 1;
} }
static int signInit = setupSigns(); static int signInit = setupSigns();
#define label(A) ilabel(A) #define label(A) ilabel(A)
#define ilabel(A) ".globl\n" #A ":\n" #define ilabel(A) ".globl\n" #A ":\n"
@ -82,19 +73,10 @@ namespace Grid {
#define MAYBEPERM(A,perm) if (perm) { A ; } #define MAYBEPERM(A,perm) if (perm) { A ; }
#define MULT_2SPIN(ptr,pf) MULT_ADDSUB_2SPIN(ptr,pf) #define MULT_2SPIN(ptr,pf) MULT_ADDSUB_2SPIN(ptr,pf)
#define FX(A) WILSONASM_ ##A #define FX(A) WILSONASM_ ##A
template<>
#undef KERNEL_DAG void WilsonKernels<WilsonImplF>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
template<> std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
void WilsonKernels<WilsonImplF>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
#define KERNEL_DAG
template<>
void WilsonKernels<WilsonImplF>::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h> #include <qcd/action/fermion/WilsonKernelsAsmBody.h>
#undef VMOVIDUP #undef VMOVIDUP
@ -107,43 +89,32 @@ namespace Grid {
#define VMOVIDUP(A,B,C) VBCASTIDUPf(A,B,C) #define VMOVIDUP(A,B,C) VBCASTIDUPf(A,B,C)
#define VMOVRDUP(A,B,C) VBCASTRDUPf(A,B,C) #define VMOVRDUP(A,B,C) VBCASTRDUPf(A,B,C)
#define MULT_2SPIN(ptr,pf) MULT_ADDSUB_2SPIN_LS(ptr,pf) #define MULT_2SPIN(ptr,pf) MULT_ADDSUB_2SPIN_LS(ptr,pf)
template<>
#undef KERNEL_DAG void WilsonKernels<DomainWallRedBlack5dImplF>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
template<> std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
void WilsonKernels<DomainWallVec5dImplF>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
#define KERNEL_DAG
template<>
void WilsonKernels<DomainWallVec5dImplF>::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h> #include <qcd/action/fermion/WilsonKernelsAsmBody.h>
#endif #endif
template void WilsonKernels<WilsonImplF>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);
#define INSTANTIATE_ASM(A)\ template void WilsonKernels<WilsonImplD>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
template void WilsonKernels<A>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,\ std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,\ int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);\ template void WilsonKernels<GparityWilsonImplF>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
template void WilsonKernels<A>::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,\ std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,\ int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);\ template void WilsonKernels<GparityWilsonImplD>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);
INSTANTIATE_ASM(WilsonImplF); template void WilsonKernels<DomainWallRedBlack5dImplF>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
INSTANTIATE_ASM(WilsonImplD); std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
INSTANTIATE_ASM(ZWilsonImplF); int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);
INSTANTIATE_ASM(ZWilsonImplD); template void WilsonKernels<DomainWallRedBlack5dImplD>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
INSTANTIATE_ASM(GparityWilsonImplF); std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
INSTANTIATE_ASM(GparityWilsonImplD); int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);
INSTANTIATE_ASM(DomainWallVec5dImplF); }}
INSTANTIATE_ASM(DomainWallVec5dImplD);
INSTANTIATE_ASM(ZDomainWallVec5dImplF);
INSTANTIATE_ASM(ZDomainWallVec5dImplD);
}
}

View File

@ -30,11 +30,7 @@
basep = st.GetPFInfo(nent,plocal); nent++; basep = st.GetPFInfo(nent,plocal); nent++;
if ( local ) { if ( local ) {
LOAD64(%r10,isigns); LOAD64(%r10,isigns);
#ifdef KERNEL_DAG
XP_PROJMEM(base);
#else
XM_PROJMEM(base); XM_PROJMEM(base);
#endif
MAYBEPERM(PERMUTE_DIR3,perm); MAYBEPERM(PERMUTE_DIR3,perm);
} else { } else {
LOAD_CHI(base); LOAD_CHI(base);
@ -45,22 +41,15 @@
MULT_2SPIN_DIR_PFXP(Xp,basep); MULT_2SPIN_DIR_PFXP(Xp,basep);
} }
LOAD64(%r10,isigns); LOAD64(%r10,isigns);
#ifdef KERNEL_DAG
XP_RECON;
#else
XM_RECON; XM_RECON;
#endif
//////////////////////////////// ////////////////////////////////
// Yp // Yp
//////////////////////////////// ////////////////////////////////
basep = st.GetPFInfo(nent,plocal); nent++; basep = st.GetPFInfo(nent,plocal); nent++;
if ( local ) { if ( local ) {
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
#ifdef KERNEL_DAG
YP_PROJMEM(base);
#else
YM_PROJMEM(base); YM_PROJMEM(base);
#endif
MAYBEPERM(PERMUTE_DIR2,perm); MAYBEPERM(PERMUTE_DIR2,perm);
} else { } else {
LOAD_CHI(base); LOAD_CHI(base);
@ -71,11 +60,7 @@
MULT_2SPIN_DIR_PFYP(Yp,basep); MULT_2SPIN_DIR_PFYP(Yp,basep);
} }
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
#ifdef KERNEL_DAG
YP_RECON_ACCUM;
#else
YM_RECON_ACCUM; YM_RECON_ACCUM;
#endif
//////////////////////////////// ////////////////////////////////
// Zp // Zp
@ -83,11 +68,7 @@
basep = st.GetPFInfo(nent,plocal); nent++; basep = st.GetPFInfo(nent,plocal); nent++;
if ( local ) { if ( local ) {
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
#ifdef KERNEL_DAG
ZP_PROJMEM(base);
#else
ZM_PROJMEM(base); ZM_PROJMEM(base);
#endif
MAYBEPERM(PERMUTE_DIR1,perm); MAYBEPERM(PERMUTE_DIR1,perm);
} else { } else {
LOAD_CHI(base); LOAD_CHI(base);
@ -98,11 +79,7 @@
MULT_2SPIN_DIR_PFZP(Zp,basep); MULT_2SPIN_DIR_PFZP(Zp,basep);
} }
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
#ifdef KERNEL_DAG
ZP_RECON_ACCUM;
#else
ZM_RECON_ACCUM; ZM_RECON_ACCUM;
#endif
//////////////////////////////// ////////////////////////////////
// Tp // Tp
@ -110,11 +87,7 @@
basep = st.GetPFInfo(nent,plocal); nent++; basep = st.GetPFInfo(nent,plocal); nent++;
if ( local ) { if ( local ) {
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
#ifdef KERNEL_DAG
TP_PROJMEM(base);
#else
TM_PROJMEM(base); TM_PROJMEM(base);
#endif
MAYBEPERM(PERMUTE_DIR0,perm); MAYBEPERM(PERMUTE_DIR0,perm);
} else { } else {
LOAD_CHI(base); LOAD_CHI(base);
@ -125,26 +98,16 @@
MULT_2SPIN_DIR_PFTP(Tp,basep); MULT_2SPIN_DIR_PFTP(Tp,basep);
} }
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
#ifdef KERNEL_DAG
TP_RECON_ACCUM;
#else
TM_RECON_ACCUM; TM_RECON_ACCUM;
#endif
//////////////////////////////// ////////////////////////////////
// Xm // Xm
//////////////////////////////// ////////////////////////////////
#ifndef STREAM_STORE
basep= (uint64_t) &out._odata[ss]; basep= (uint64_t) &out._odata[ss];
#endif
// basep= st.GetPFInfo(nent,plocal); nent++; // basep= st.GetPFInfo(nent,plocal); nent++;
if ( local ) { if ( local ) {
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
#ifdef KERNEL_DAG
XM_PROJMEM(base);
#else
XP_PROJMEM(base); XP_PROJMEM(base);
#endif
MAYBEPERM(PERMUTE_DIR3,perm); MAYBEPERM(PERMUTE_DIR3,perm);
} else { } else {
LOAD_CHI(base); LOAD_CHI(base);
@ -155,11 +118,7 @@
MULT_2SPIN_DIR_PFXM(Xm,basep); MULT_2SPIN_DIR_PFXM(Xm,basep);
} }
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
#ifdef KERNEL_DAG
XM_RECON_ACCUM;
#else
XP_RECON_ACCUM; XP_RECON_ACCUM;
#endif
//////////////////////////////// ////////////////////////////////
// Ym // Ym
@ -167,11 +126,7 @@
basep= st.GetPFInfo(nent,plocal); nent++; basep= st.GetPFInfo(nent,plocal); nent++;
if ( local ) { if ( local ) {
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
#ifdef KERNEL_DAG
YM_PROJMEM(base);
#else
YP_PROJMEM(base); YP_PROJMEM(base);
#endif
MAYBEPERM(PERMUTE_DIR2,perm); MAYBEPERM(PERMUTE_DIR2,perm);
} else { } else {
LOAD_CHI(base); LOAD_CHI(base);
@ -182,11 +137,7 @@
MULT_2SPIN_DIR_PFYM(Ym,basep); MULT_2SPIN_DIR_PFYM(Ym,basep);
} }
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
#ifdef KERNEL_DAG
YM_RECON_ACCUM;
#else
YP_RECON_ACCUM; YP_RECON_ACCUM;
#endif
//////////////////////////////// ////////////////////////////////
// Zm // Zm
@ -194,11 +145,7 @@
basep= st.GetPFInfo(nent,plocal); nent++; basep= st.GetPFInfo(nent,plocal); nent++;
if ( local ) { if ( local ) {
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
#ifdef KERNEL_DAG
ZM_PROJMEM(base);
#else
ZP_PROJMEM(base); ZP_PROJMEM(base);
#endif
MAYBEPERM(PERMUTE_DIR1,perm); MAYBEPERM(PERMUTE_DIR1,perm);
} else { } else {
LOAD_CHI(base); LOAD_CHI(base);
@ -209,11 +156,7 @@
MULT_2SPIN_DIR_PFZM(Zm,basep); MULT_2SPIN_DIR_PFZM(Zm,basep);
} }
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
#ifdef KERNEL_DAG
ZM_RECON_ACCUM;
#else
ZP_RECON_ACCUM; ZP_RECON_ACCUM;
#endif
//////////////////////////////// ////////////////////////////////
// Tm // Tm
@ -221,28 +164,18 @@
basep= st.GetPFInfo(nent,plocal); nent++; basep= st.GetPFInfo(nent,plocal); nent++;
if ( local ) { if ( local ) {
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
#ifdef KERNEL_DAG
TM_PROJMEM(base);
#else
TP_PROJMEM(base); TP_PROJMEM(base);
#endif
MAYBEPERM(PERMUTE_DIR0,perm); MAYBEPERM(PERMUTE_DIR0,perm);
} else { } else {
LOAD_CHI(base); LOAD_CHI(base);
} }
base= (uint64_t) &out._odata[ss]; base= (uint64_t) &out._odata[ss];
#ifndef STREAM_STORE
PREFETCH_CHIMU(base); PREFETCH_CHIMU(base);
#endif
{ {
MULT_2SPIN_DIR_PFTM(Tm,basep); MULT_2SPIN_DIR_PFTM(Tm,basep);
} }
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
#ifdef KERNEL_DAG
TM_RECON_ACCUM;
#else
TP_RECON_ACCUM; TP_RECON_ACCUM;
#endif
basep= st.GetPFInfo(nent,plocal); nent++; basep= st.GetPFInfo(nent,plocal); nent++;
SAVE_RESULT(base,basep); SAVE_RESULT(base,basep);

View File

@ -311,8 +311,8 @@ namespace Grid {
namespace QCD { namespace QCD {
template<class Impl> template<class Impl>
void WilsonKernels<Impl>::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U, void WilsonKernels<Impl >::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf, std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int ss,int sU,const FermionField &in, FermionField &out) int ss,int sU,const FermionField &in, FermionField &out)
{ {
@ -554,8 +554,8 @@ namespace QCD {
} }
} }
template<class Impl> template<class Impl>
void WilsonKernels<Impl>::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U, void WilsonKernels<Impl >::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf, std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int ss,int sU,const FermionField &in, FermionField &out) int ss,int sU,const FermionField &in, FermionField &out)
{ {
@ -839,23 +839,46 @@ void WilsonKernels<GparityWilsonImplD>::DiracOptHandDhopSiteDag(StencilImpl &st,
////////////// Wilson ; uses this implementation ///////////////////// ////////////// Wilson ; uses this implementation /////////////////////
// Need Nc=3 though // // Need Nc=3 though //
#define INSTANTIATE_THEM(A) \ template void WilsonKernels<WilsonImplF>::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
template void WilsonKernels<A>::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,\ std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,\ int ss,int sU,const FermionField &in, FermionField &out);
int ss,int sU,const FermionField &in, FermionField &out);\ template void WilsonKernels<WilsonImplD>::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
template void WilsonKernels<A>::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,\ std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,\ int ss,int sU,const FermionField &in, FermionField &out);
template void WilsonKernels<WilsonImplF>::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int ss,int sU,const FermionField &in, FermionField &out);
template void WilsonKernels<WilsonImplD>::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int ss,int sU,const FermionField &in, FermionField &out); int ss,int sU,const FermionField &in, FermionField &out);
INSTANTIATE_THEM(WilsonImplF);
INSTANTIATE_THEM(WilsonImplD); template void WilsonKernels<GparityWilsonImplF>::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
INSTANTIATE_THEM(ZWilsonImplF); std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
INSTANTIATE_THEM(ZWilsonImplD); int ss,int sU,const FermionField &in, FermionField &out);
INSTANTIATE_THEM(GparityWilsonImplF); template void WilsonKernels<GparityWilsonImplD>::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
INSTANTIATE_THEM(GparityWilsonImplD); std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
INSTANTIATE_THEM(DomainWallVec5dImplF); int ss,int sU,const FermionField &in, FermionField &out);
INSTANTIATE_THEM(DomainWallVec5dImplD); template void WilsonKernels<GparityWilsonImplF>::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
INSTANTIATE_THEM(ZDomainWallVec5dImplF); std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
INSTANTIATE_THEM(ZDomainWallVec5dImplD); int ss,int sU,const FermionField &in, FermionField &out);
template void WilsonKernels<GparityWilsonImplD>::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int ss,int sU,const FermionField &in, FermionField &out);
template void WilsonKernels<DomainWallRedBlack5dImplF>::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int ss,int sU,const FermionField &in, FermionField &out);
template void WilsonKernels<DomainWallRedBlack5dImplD>::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int ss,int sU,const FermionField &in, FermionField &out);
template void WilsonKernels<DomainWallRedBlack5dImplF>::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int ss,int sU,const FermionField &in, FermionField &out);
template void WilsonKernels<DomainWallRedBlack5dImplD>::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int ss,int sU,const FermionField &in, FermionField &out);
}} }}

View File

@ -28,7 +28,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
#ifndef GRID_QCD_WILSON_TM_FERMION_H #ifndef GRID_QCD_WILSON_TM_FERMION_H
#define GRID_QCD_WILSON_TM_FERMION_H #define GRID_QCD_WILSON_TM_FERMION_H
#include <Grid/Grid.h> #include <Grid.h>
namespace Grid { namespace Grid {

View File

@ -1,79 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/MobiusFermion.h
Copyright (C) 2015
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_QCD_ZMOBIUS_FERMION_H
#define GRID_QCD_ZMOBIUS_FERMION_H
#include <Grid/Grid.h>
namespace Grid {
namespace QCD {
template<class Impl>
class ZMobiusFermion : public CayleyFermion5D<Impl>
{
public:
INHERIT_IMPL_TYPES(Impl);
public:
virtual void Instantiatable(void) {};
// Constructors
ZMobiusFermion(GaugeField &_Umu,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
RealD _mass,RealD _M5,
std::vector<ComplexD> &gamma, RealD b,RealD c,const ImplParams &p= ImplParams()) :
CayleyFermion5D<Impl>(_Umu,
FiveDimGrid,
FiveDimRedBlackGrid,
FourDimGrid,
FourDimRedBlackGrid,_mass,_M5,p)
{
RealD eps = 1.0;
std::cout<<GridLogMessage << "ZMobiusFermion (b="<<b<<",c="<<c<<") with Ls= "<<this->Ls<<" gamma passed in"<<std::endl;
std::vector<Coeff_t> zgamma(this->Ls);
for(int s=0;s<this->Ls;s++){
zgamma[s] = gamma[s];
}
// Call base setter
this->SetCoefficientsInternal(1.0,zgamma,b,c);
}
};
}
}
#endif

View File

@ -1,194 +1,181 @@
/************************************************************************************* /*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/gauge/GaugeImpl.h Source file: ./lib/qcd/action/gauge/GaugeImpl.h
Copyright (C) 2015 Copyright (C) 2015
Author: paboyle <paboyle@ph.ed.ac.uk> Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or the Free Software Foundation; either version 2 of the License, or
(at your option) any later version. (at your option) any later version.
This program is distributed in the hope that it will be useful, This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details. GNU General Public License for more details.
You should have received a copy of the GNU General Public License along You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc., with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution See the full license in the file "LICENSE" in the top level distribution directory
directory *************************************************************************************/
*************************************************************************************/ /* END LEGAL */
/* END LEGAL */ #ifndef GRID_QCD_GAUGE_IMPL_H
#ifndef GRID_QCD_GAUGE_IMPL_H #define GRID_QCD_GAUGE_IMPL_H
#define GRID_QCD_GAUGE_IMPL_H
namespace Grid { namespace Grid {
namespace QCD {
//////////////////////////////////////////////////////////////////////// namespace QCD {
// Implementation dependent gauge types
////////////////////////////////////////////////////////////////////////
template <class Gimpl> class WilsonLoops;
#define INHERIT_GIMPL_TYPES(GImpl) \ ////////////////////////////////////////////////////////////////////////
typedef typename GImpl::Simd Simd; \ // Implementation dependent gauge types
typedef typename GImpl::GaugeLinkField GaugeLinkField; \ ////////////////////////////////////////////////////////////////////////
typedef typename GImpl::GaugeField GaugeField; \
typedef typename GImpl::SiteGaugeField SiteGaugeField; \
typedef typename GImpl::SiteGaugeLink SiteGaugeLink;
// template<class Gimpl> class WilsonLoops;
template <class S, int Nrepresentation = Nc> class GaugeImplTypes {
public:
typedef S Simd;
template <typename vtype> #define INHERIT_GIMPL_TYPES(GImpl) \
using iImplGaugeLink = iScalar<iScalar<iMatrix<vtype, Nrepresentation>>>; typedef typename GImpl::Simd Simd;\
template <typename vtype> typedef typename GImpl::GaugeLinkField GaugeLinkField;\
using iImplGaugeField = iVector<iScalar<iMatrix<vtype, Nrepresentation>>, Nd>; typedef typename GImpl::GaugeField GaugeField;\
typedef typename GImpl::SiteGaugeField SiteGaugeField;\
typedef typename GImpl::SiteGaugeLink SiteGaugeLink;
typedef iImplGaugeLink<Simd> SiteGaugeLink;
typedef iImplGaugeField<Simd> SiteGaugeField;
typedef Lattice<SiteGaugeLink> GaugeLinkField; // bit ugly naming; polarised //
// gauge field, lorentz... all template<class S,int Nrepresentation=Nc>
// ugly class GaugeImplTypes {
typedef Lattice<SiteGaugeField> GaugeField; public:
// Move this elsewhere? FIXME typedef S Simd;
static inline void AddGaugeLink(GaugeField &U, GaugeLinkField &W,
int mu) { // U[mu] += W template<typename vtype> using iImplGaugeLink = iScalar<iScalar<iMatrix<vtype, Nrepresentation> > >;
PARALLEL_FOR_LOOP template<typename vtype> using iImplGaugeField = iVector<iScalar<iMatrix<vtype, Nrepresentation> >, Nd >;
for (auto ss = 0; ss < U._grid->oSites(); ss++) {
U._odata[ss]._internal[mu] = typedef iImplGaugeLink <Simd> SiteGaugeLink;
U._odata[ss]._internal[mu] + W._odata[ss]._internal; typedef iImplGaugeField <Simd> SiteGaugeField;
typedef Lattice<SiteGaugeLink> GaugeLinkField; // bit ugly naming; polarised gauge field, lorentz... all ugly
typedef Lattice<SiteGaugeField> GaugeField;
};
// Composition with smeared link, bc's etc.. probably need multiple inheritance
// Variable precision "S" and variable Nc
template<class GimplTypes>
class PeriodicGaugeImpl : public GimplTypes {
public:
INHERIT_GIMPL_TYPES(GimplTypes);
////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Support needed for the assembly of loops including all boundary condition effects such as conjugate bcs
////////////////////////////////////////////////////////////////////////////////////////////////////////////
template<class covariant> static inline
Lattice<covariant> CovShiftForward (const GaugeLinkField &Link, int mu, const Lattice<covariant> &field) {
return PeriodicBC::CovShiftForward(Link,mu,field);
}
template<class covariant> static inline
Lattice<covariant> CovShiftBackward(const GaugeLinkField &Link, int mu,const Lattice<covariant> &field) {
return PeriodicBC::CovShiftBackward(Link,mu,field);
}
static inline
GaugeLinkField CovShiftIdentityBackward(const GaugeLinkField &Link, int mu) {
return Cshift(adj(Link),mu,-1);
}
static inline
GaugeLinkField CovShiftIdentityForward(const GaugeLinkField &Link, int mu) {
return Link;
}
static inline
GaugeLinkField ShiftStaple(const GaugeLinkField &Link, int mu) {
return Cshift(Link,mu,1);
}
static inline bool isPeriodicGaugeField(void) {
return true;
}
};
// Composition with smeared link, bc's etc.. probably need multiple inheritance
// Variable precision "S" and variable Nc
template<class GimplTypes>
class ConjugateGaugeImpl : public GimplTypes {
public:
INHERIT_GIMPL_TYPES(GimplTypes);
////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Support needed for the assembly of loops including all boundary condition effects such as Gparity.
////////////////////////////////////////////////////////////////////////////////////////////////////////////
template<class covariant> static
Lattice<covariant> CovShiftForward (const GaugeLinkField &Link, int mu, const Lattice<covariant> &field) {
return ConjugateBC::CovShiftForward(Link,mu,field);
} }
template<class covariant> static
Lattice<covariant> CovShiftBackward(const GaugeLinkField &Link, int mu,const Lattice<covariant> &field) {
return ConjugateBC::CovShiftBackward(Link,mu,field);
}
static inline
GaugeLinkField CovShiftIdentityBackward(const GaugeLinkField &Link, int mu) {
GridBase *grid = Link._grid;
int Lmu = grid->GlobalDimensions()[mu]-1;
Lattice<iScalar<vInteger> > coor(grid); LatticeCoordinate(coor,mu);
GaugeLinkField tmp (grid);
tmp=adj(Link);
tmp = where(coor==Lmu,conjugate(tmp),tmp);
return Cshift(tmp,mu,-1);// moves towards positive mu
}
static inline
GaugeLinkField CovShiftIdentityForward(const GaugeLinkField &Link, int mu) {
return Link;
}
static inline
GaugeLinkField ShiftStaple(const GaugeLinkField &Link, int mu) {
GridBase *grid = Link._grid;
int Lmu = grid->GlobalDimensions()[mu]-1;
Lattice<iScalar<vInteger> > coor(grid); LatticeCoordinate(coor,mu);
GaugeLinkField tmp (grid);
tmp=Cshift(Link,mu,1);
tmp=where(coor==Lmu,conjugate(tmp),tmp);
return tmp;
}
static inline bool isPeriodicGaugeField(void) {
return false;
}
};
typedef GaugeImplTypes<vComplex,Nc> GimplTypesR;
typedef GaugeImplTypes<vComplexF,Nc> GimplTypesF;
typedef GaugeImplTypes<vComplexD,Nc> GimplTypesD;
typedef PeriodicGaugeImpl<GimplTypesR> PeriodicGimplR; // Real.. whichever prec
typedef PeriodicGaugeImpl<GimplTypesF> PeriodicGimplF; // Float
typedef PeriodicGaugeImpl<GimplTypesD> PeriodicGimplD; // Double
typedef ConjugateGaugeImpl<GimplTypesR> ConjugateGimplR; // Real.. whichever prec
typedef ConjugateGaugeImpl<GimplTypesF> ConjugateGimplF; // Float
typedef ConjugateGaugeImpl<GimplTypesD> ConjugateGimplD; // Double
} }
};
// Composition with smeared link, bc's etc.. probably need multiple inheritance
// Variable precision "S" and variable Nc
template <class GimplTypes> class PeriodicGaugeImpl : public GimplTypes {
public:
INHERIT_GIMPL_TYPES(GimplTypes);
////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Support needed for the assembly of loops including all boundary condition
// effects such as conjugate bcs
////////////////////////////////////////////////////////////////////////////////////////////////////////////
template <class covariant>
static inline Lattice<covariant>
CovShiftForward(const GaugeLinkField &Link, int mu,
const Lattice<covariant> &field) {
return PeriodicBC::CovShiftForward(Link, mu, field);
}
template <class covariant>
static inline Lattice<covariant>
CovShiftBackward(const GaugeLinkField &Link, int mu,
const Lattice<covariant> &field) {
return PeriodicBC::CovShiftBackward(Link, mu, field);
}
static inline GaugeLinkField
CovShiftIdentityBackward(const GaugeLinkField &Link, int mu) {
return Cshift(adj(Link), mu, -1);
}
static inline GaugeLinkField
CovShiftIdentityForward(const GaugeLinkField &Link, int mu) {
return Link;
}
static inline GaugeLinkField ShiftStaple(const GaugeLinkField &Link, int mu) {
return Cshift(Link, mu, 1);
}
static inline bool isPeriodicGaugeField(void) { return true; }
};
// Composition with smeared link, bc's etc.. probably need multiple inheritance
// Variable precision "S" and variable Nc
template <class GimplTypes> class ConjugateGaugeImpl : public GimplTypes {
public:
INHERIT_GIMPL_TYPES(GimplTypes);
////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Support needed for the assembly of loops including all boundary condition
// effects such as Gparity.
////////////////////////////////////////////////////////////////////////////////////////////////////////////
template <class covariant>
static Lattice<covariant> CovShiftForward(const GaugeLinkField &Link, int mu,
const Lattice<covariant> &field) {
return ConjugateBC::CovShiftForward(Link, mu, field);
}
template <class covariant>
static Lattice<covariant> CovShiftBackward(const GaugeLinkField &Link, int mu,
const Lattice<covariant> &field) {
return ConjugateBC::CovShiftBackward(Link, mu, field);
}
static inline GaugeLinkField
CovShiftIdentityBackward(const GaugeLinkField &Link, int mu) {
GridBase *grid = Link._grid;
int Lmu = grid->GlobalDimensions()[mu] - 1;
Lattice<iScalar<vInteger>> coor(grid);
LatticeCoordinate(coor, mu);
GaugeLinkField tmp(grid);
tmp = adj(Link);
tmp = where(coor == Lmu, conjugate(tmp), tmp);
return Cshift(tmp, mu, -1); // moves towards positive mu
}
static inline GaugeLinkField
CovShiftIdentityForward(const GaugeLinkField &Link, int mu) {
return Link;
}
static inline GaugeLinkField ShiftStaple(const GaugeLinkField &Link, int mu) {
GridBase *grid = Link._grid;
int Lmu = grid->GlobalDimensions()[mu] - 1;
Lattice<iScalar<vInteger>> coor(grid);
LatticeCoordinate(coor, mu);
GaugeLinkField tmp(grid);
tmp = Cshift(Link, mu, 1);
tmp = where(coor == Lmu, conjugate(tmp), tmp);
return tmp;
}
static inline bool isPeriodicGaugeField(void) { return false; }
};
typedef GaugeImplTypes<vComplex, Nc> GimplTypesR;
typedef GaugeImplTypes<vComplexF, Nc> GimplTypesF;
typedef GaugeImplTypes<vComplexD, Nc> GimplTypesD;
typedef GaugeImplTypes<vComplex, SU<Nc>::AdjointDimension> GimplAdjointTypesR;
typedef GaugeImplTypes<vComplexF, SU<Nc>::AdjointDimension> GimplAdjointTypesF;
typedef GaugeImplTypes<vComplexD, SU<Nc>::AdjointDimension> GimplAdjointTypesD;
typedef PeriodicGaugeImpl<GimplTypesR> PeriodicGimplR; // Real.. whichever prec
typedef PeriodicGaugeImpl<GimplTypesF> PeriodicGimplF; // Float
typedef PeriodicGaugeImpl<GimplTypesD> PeriodicGimplD; // Double
typedef PeriodicGaugeImpl<GimplAdjointTypesR> PeriodicGimplAdjR; // Real.. whichever prec
typedef PeriodicGaugeImpl<GimplAdjointTypesF> PeriodicGimplAdjF; // Float
typedef PeriodicGaugeImpl<GimplAdjointTypesD> PeriodicGimplAdjD; // Double
typedef ConjugateGaugeImpl<GimplTypesR> ConjugateGimplR; // Real.. whichever prec
typedef ConjugateGaugeImpl<GimplTypesF> ConjugateGimplF; // Float
typedef ConjugateGaugeImpl<GimplTypesD> ConjugateGimplD; // Double
}
} }
#endif #endif

View File

@ -1,214 +1,212 @@
/************************************************************************************* /*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/pseudofermion/OneFlavourEvenOddRational.h Source file: ./lib/qcd/action/pseudofermion/OneFlavourEvenOddRational.h
Copyright (C) 2015 Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk> Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or the Free Software Foundation; either version 2 of the License, or
(at your option) any later version. (at your option) any later version.
This program is distributed in the hope that it will be useful, This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details. GNU General Public License for more details.
You should have received a copy of the GNU General Public License along You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc., with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution See the full license in the file "LICENSE" in the top level distribution directory
directory *************************************************************************************/
*************************************************************************************/ /* END LEGAL */
/* END LEGAL */
#ifndef QCD_PSEUDOFERMION_ONE_FLAVOUR_EVEN_ODD_RATIONAL_H #ifndef QCD_PSEUDOFERMION_ONE_FLAVOUR_EVEN_ODD_RATIONAL_H
#define QCD_PSEUDOFERMION_ONE_FLAVOUR_EVEN_ODD_RATIONAL_H #define QCD_PSEUDOFERMION_ONE_FLAVOUR_EVEN_ODD_RATIONAL_H
namespace Grid { namespace Grid{
namespace QCD { namespace QCD{
/////////////////////////////////////// ///////////////////////////////////////
// One flavour rational // One flavour rational
/////////////////////////////////////// ///////////////////////////////////////
// S_f = chi^dag * N(Mpc^dag*Mpc)/D(Mpc^dag*Mpc) * chi // S_f = chi^dag * N(Mpc^dag*Mpc)/D(Mpc^dag*Mpc) * chi
//
// Here, M is some operator
// N and D makeup the rat. poly
//
template <class Impl>
class OneFlavourEvenOddRationalPseudoFermionAction
: public Action<typename Impl::GaugeField> {
public:
INHERIT_IMPL_TYPES(Impl);
typedef OneFlavourRationalParams Params;
Params param;
MultiShiftFunction PowerHalf;
MultiShiftFunction PowerNegHalf;
MultiShiftFunction PowerQuarter;
MultiShiftFunction PowerNegQuarter;
private:
FermionOperator<Impl> &FermOp; // the basic operator
// NOT using "Nroots"; IroIro is -- perhaps later, but this wasn't good for us
// historically
// and hasenbusch works better
FermionField PhiEven; // the pseudo fermion field for this trajectory
FermionField PhiOdd; // the pseudo fermion field for this trajectory
public:
OneFlavourEvenOddRationalPseudoFermionAction(FermionOperator<Impl> &Op,
Params &p)
: FermOp(Op),
PhiEven(Op.FermionRedBlackGrid()),
PhiOdd(Op.FermionRedBlackGrid()),
param(p) {
AlgRemez remez(param.lo, param.hi, param.precision);
// MdagM^(+- 1/2)
std::cout << GridLogMessage << "Generating degree " << param.degree
<< " for x^(1/2)" << std::endl;
remez.generateApprox(param.degree, 1, 2);
PowerHalf.Init(remez, param.tolerance, false);
PowerNegHalf.Init(remez, param.tolerance, true);
// MdagM^(+- 1/4)
std::cout << GridLogMessage << "Generating degree " << param.degree
<< " for x^(1/4)" << std::endl;
remez.generateApprox(param.degree, 1, 4);
PowerQuarter.Init(remez, param.tolerance, false);
PowerNegQuarter.Init(remez, param.tolerance, true);
};
virtual void refresh(const GaugeField &U, GridParallelRNG &pRNG) {
// P(phi) = e^{- phi^dag (MpcdagMpc)^-1/2 phi}
// = e^{- phi^dag (MpcdagMpc)^-1/4 (MpcdagMpc)^-1/4 phi}
// Phi = MpcdagMpc^{1/4} eta
// //
// P(eta) = e^{- eta^dag eta} // Here, M is some operator
// N and D makeup the rat. poly
// //
// e^{x^2/2 sig^2} => sig^2 = 0.5.
//
// So eta should be of width sig = 1/sqrt(2).
RealD scale = std::sqrt(0.5); template<class Impl>
class OneFlavourEvenOddRationalPseudoFermionAction : public Action<typename Impl::GaugeField> {
public:
INHERIT_IMPL_TYPES(Impl);
FermionField eta(FermOp.FermionGrid()); typedef OneFlavourRationalParams Params;
FermionField etaOdd(FermOp.FermionRedBlackGrid()); Params param;
FermionField etaEven(FermOp.FermionRedBlackGrid());
gaussian(pRNG, eta); MultiShiftFunction PowerHalf ;
eta = eta * scale; MultiShiftFunction PowerNegHalf;
MultiShiftFunction PowerQuarter;
MultiShiftFunction PowerNegQuarter;
pickCheckerboard(Even, etaEven, eta); private:
pickCheckerboard(Odd, etaOdd, eta);
FermOp.ImportGauge(U); FermionOperator<Impl> & FermOp;// the basic operator
// mutishift CG // NOT using "Nroots"; IroIro is -- perhaps later, but this wasn't good for us historically
SchurDifferentiableOperator<Impl> Mpc(FermOp); // and hasenbusch works better
ConjugateGradientMultiShift<FermionField> msCG(param.MaxIter, PowerQuarter);
msCG(Mpc, etaOdd, PhiOdd);
////////////////////////////////////////////////////// FermionField PhiEven; // the pseudo fermion field for this trajectory
// FIXME : Clover term not yet.. FermionField PhiOdd; // the pseudo fermion field for this trajectory
//////////////////////////////////////////////////////
assert(FermOp.ConstEE() == 1);
PhiEven = zero;
};
////////////////////////////////////////////////////// public:
// S = phi^dag (Mdag M)^-1/2 phi
//////////////////////////////////////////////////////
virtual RealD S(const GaugeField &U) {
FermOp.ImportGauge(U);
FermionField Y(FermOp.FermionRedBlackGrid()); OneFlavourEvenOddRationalPseudoFermionAction(FermionOperator<Impl> &Op,
Params & p ) : FermOp(Op),
PhiEven(Op.FermionRedBlackGrid()),
PhiOdd (Op.FermionRedBlackGrid()),
param(p)
{
AlgRemez remez(param.lo,param.hi,param.precision);
SchurDifferentiableOperator<Impl> Mpc(FermOp); // MdagM^(+- 1/2)
std::cout<<GridLogMessage << "Generating degree "<<param.degree<<" for x^(1/2)"<<std::endl;
remez.generateApprox(param.degree,1,2);
PowerHalf.Init(remez,param.tolerance,false);
PowerNegHalf.Init(remez,param.tolerance,true);
ConjugateGradientMultiShift<FermionField> msCG(param.MaxIter, // MdagM^(+- 1/4)
PowerNegQuarter); std::cout<<GridLogMessage << "Generating degree "<<param.degree<<" for x^(1/4)"<<std::endl;
remez.generateApprox(param.degree,1,4);
PowerQuarter.Init(remez,param.tolerance,false);
PowerNegQuarter.Init(remez,param.tolerance,true);
};
msCG(Mpc, PhiOdd, Y); virtual void refresh(const GaugeField &U, GridParallelRNG& pRNG) {
RealD action = norm2(Y); // P(phi) = e^{- phi^dag (MpcdagMpc)^-1/2 phi}
std::cout << GridLogMessage << "Pseudofermion action FIXME -- is -1/4 " // = e^{- phi^dag (MpcdagMpc)^-1/4 (MpcdagMpc)^-1/4 phi}
"solve or -1/2 solve faster??? " // Phi = MpcdagMpc^{1/4} eta
<< action << std::endl; //
// P(eta) = e^{- eta^dag eta}
//
// e^{x^2/2 sig^2} => sig^2 = 0.5.
//
// So eta should be of width sig = 1/sqrt(2).
return action; RealD scale = std::sqrt(0.5);
};
////////////////////////////////////////////////////// FermionField eta (FermOp.FermionGrid());
// Need FermionField etaOdd (FermOp.FermionRedBlackGrid());
// dS_f/dU = chi^dag d[N/D] chi FermionField etaEven(FermOp.FermionRedBlackGrid());
//
// N/D is expressed as partial fraction expansion:
//
// a0 + \sum_k ak/(M^dagM + bk)
//
// d[N/D] is then
//
// \sum_k -ak [M^dagM+bk]^{-1} [ dM^dag M + M^dag dM ] [M^dag M +
// bk]^{-1}
//
// Need
// Mf Phi_k = [MdagM+bk]^{-1} Phi
// Mf Phi = \sum_k ak [MdagM+bk]^{-1} Phi
//
// With these building blocks
//
// dS/dU = \sum_k -ak Mf Phi_k^dag [ dM^dag M + M^dag dM ] Mf
// Phi_k
// S = innerprodReal(Phi,Mf Phi);
//////////////////////////////////////////////////////
virtual void deriv(const GaugeField &U, GaugeField &dSdU) {
const int Npole = PowerNegHalf.poles.size();
std::vector<FermionField> MPhi_k(Npole, FermOp.FermionRedBlackGrid()); gaussian(pRNG,eta); eta=eta*scale;
FermionField X(FermOp.FermionRedBlackGrid()); pickCheckerboard(Even,etaEven,eta);
FermionField Y(FermOp.FermionRedBlackGrid()); pickCheckerboard(Odd,etaOdd,eta);
GaugeField tmp(FermOp.GaugeGrid()); FermOp.ImportGauge(U);
FermOp.ImportGauge(U); // mutishift CG
SchurDifferentiableOperator<Impl> Mpc(FermOp);
ConjugateGradientMultiShift<FermionField> msCG(param.MaxIter,PowerQuarter);
msCG(Mpc,etaOdd,PhiOdd);
SchurDifferentiableOperator<Impl> Mpc(FermOp); //////////////////////////////////////////////////////
// FIXME : Clover term not yet..
//////////////////////////////////////////////////////
ConjugateGradientMultiShift<FermionField> msCG(param.MaxIter, PowerNegHalf); assert(FermOp.ConstEE() == 1);
PhiEven = zero;
msCG(Mpc, PhiOdd, MPhi_k); };
dSdU = zero; //////////////////////////////////////////////////////
for (int k = 0; k < Npole; k++) { // S = phi^dag (Mdag M)^-1/2 phi
RealD ak = PowerNegHalf.residues[k]; //////////////////////////////////////////////////////
virtual RealD S(const GaugeField &U) {
X = MPhi_k[k]; FermOp.ImportGauge(U);
Mpc.Mpc(X, Y); FermionField Y(FermOp.FermionRedBlackGrid());
Mpc.MpcDeriv(tmp, Y, X);
dSdU = dSdU + ak * tmp;
Mpc.MpcDagDeriv(tmp, X, Y);
dSdU = dSdU + ak * tmp;
}
// dSdU = Ta(dSdU); SchurDifferentiableOperator<Impl> Mpc(FermOp);
};
}; ConjugateGradientMultiShift<FermionField> msCG(param.MaxIter,PowerNegQuarter);
}
msCG(Mpc,PhiOdd,Y);
RealD action = norm2(Y);
std::cout << GridLogMessage << "Pseudofermion action FIXME -- is -1/4 solve or -1/2 solve faster??? "<<action<<std::endl;
return action;
};
//////////////////////////////////////////////////////
// Need
// dS_f/dU = chi^dag d[N/D] chi
//
// N/D is expressed as partial fraction expansion:
//
// a0 + \sum_k ak/(M^dagM + bk)
//
// d[N/D] is then
//
// \sum_k -ak [M^dagM+bk]^{-1} [ dM^dag M + M^dag dM ] [M^dag M + bk]^{-1}
//
// Need
// Mf Phi_k = [MdagM+bk]^{-1} Phi
// Mf Phi = \sum_k ak [MdagM+bk]^{-1} Phi
//
// With these building blocks
//
// dS/dU = \sum_k -ak Mf Phi_k^dag [ dM^dag M + M^dag dM ] Mf Phi_k
// S = innerprodReal(Phi,Mf Phi);
//////////////////////////////////////////////////////
virtual void deriv(const GaugeField &U,GaugeField & dSdU) {
const int Npole = PowerNegHalf.poles.size();
std::vector<FermionField> MPhi_k (Npole,FermOp.FermionRedBlackGrid());
FermionField X(FermOp.FermionRedBlackGrid());
FermionField Y(FermOp.FermionRedBlackGrid());
GaugeField tmp(FermOp.GaugeGrid());
FermOp.ImportGauge(U);
SchurDifferentiableOperator<Impl> Mpc(FermOp);
ConjugateGradientMultiShift<FermionField> msCG(param.MaxIter,PowerNegHalf);
msCG(Mpc,PhiOdd,MPhi_k);
dSdU = zero;
for(int k=0;k<Npole;k++){
RealD ak = PowerNegHalf.residues[k];
X = MPhi_k[k];
Mpc.Mpc(X,Y);
Mpc.MpcDeriv (tmp , Y, X ); dSdU=dSdU+ak*tmp;
Mpc.MpcDagDeriv(tmp , X, Y ); dSdU=dSdU+ak*tmp;
}
dSdU = Ta(dSdU);
};
};
}
} }
#endif #endif

View File

@ -256,7 +256,7 @@ namespace Grid{
} }
//dSdU = Ta(dSdU); dSdU = Ta(dSdU);
}; };
}; };

View File

@ -186,7 +186,7 @@ namespace Grid{
} }
//dSdU = Ta(dSdU); dSdU = Ta(dSdU);
}; };
}; };

View File

@ -242,7 +242,7 @@ namespace Grid{
} }
//dSdU = Ta(dSdU); dSdU = Ta(dSdU);
}; };
}; };

View File

@ -1,151 +1,149 @@
/************************************************************************************* /*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/pseudofermion/TwoFlavour.h Source file: ./lib/qcd/action/pseudofermion/TwoFlavour.h
Copyright (C) 2015 Copyright (C) 2015
Author: Peter Boyle <pabobyle@ph.ed.ac.uk> Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk> Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or the Free Software Foundation; either version 2 of the License, or
(at your option) any later version. (at your option) any later version.
This program is distributed in the hope that it will be useful, This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details. GNU General Public License for more details.
You should have received a copy of the GNU General Public License along You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc., with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution See the full license in the file "LICENSE" in the top level distribution directory
directory *************************************************************************************/
*************************************************************************************/ /* END LEGAL */
/* END LEGAL */
#ifndef QCD_PSEUDOFERMION_TWO_FLAVOUR_H #ifndef QCD_PSEUDOFERMION_TWO_FLAVOUR_H
#define QCD_PSEUDOFERMION_TWO_FLAVOUR_H #define QCD_PSEUDOFERMION_TWO_FLAVOUR_H
namespace Grid { namespace Grid{
namespace QCD { namespace QCD{
//////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////
// Two flavour pseudofermion action for any dop // Two flavour pseudofermion action for any dop
//////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////
template <class Impl> template<class Impl>
class TwoFlavourPseudoFermionAction : public Action<typename Impl::GaugeField> { class TwoFlavourPseudoFermionAction : public Action<typename Impl::GaugeField> {
public: public:
INHERIT_IMPL_TYPES(Impl); INHERIT_IMPL_TYPES(Impl);
private: private:
FermionOperator<Impl> &FermOp; // the basic operator
OperatorFunction<FermionField> &DerivativeSolver; FermionOperator<Impl> & FermOp;// the basic operator
OperatorFunction<FermionField> &ActionSolver; OperatorFunction<FermionField> &DerivativeSolver;
FermionField Phi; // the pseudo fermion field for this trajectory OperatorFunction<FermionField> &ActionSolver;
public: FermionField Phi; // the pseudo fermion field for this trajectory
/////////////////////////////////////////////////
// Pass in required objects.
/////////////////////////////////////////////////
TwoFlavourPseudoFermionAction(FermionOperator<Impl> &Op,
OperatorFunction<FermionField> &DS,
OperatorFunction<FermionField> &AS)
: FermOp(Op),
DerivativeSolver(DS),
ActionSolver(AS),
Phi(Op.FermionGrid()){};
////////////////////////////////////////////////////////////////////////////////////// public:
// Push the gauge field in to the dops. Assume any BC's and smearing already /////////////////////////////////////////////////
// applied // Pass in required objects.
////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////
virtual void refresh(const GaugeField &U, GridParallelRNG &pRNG) { TwoFlavourPseudoFermionAction(FermionOperator<Impl> &Op,
// P(phi) = e^{- phi^dag (MdagM)^-1 phi} OperatorFunction<FermionField> & DS,
// Phi = Mdag eta OperatorFunction<FermionField> & AS
// P(eta) = e^{- eta^dag eta} ) : FermOp(Op), DerivativeSolver(DS), ActionSolver(AS), Phi(Op.FermionGrid()) {
// };
// e^{x^2/2 sig^2} => sig^2 = 0.5.
//
// So eta should be of width sig = 1/sqrt(2).
// and must multiply by 0.707....
//
// Chroma has this scale factor: two_flavor_monomial_w.h
// IroIro: does not use this scale. It is absorbed by a change of vars
// in the Phi integral, and thus is only an irrelevant prefactor for
// the partition function.
//
RealD scale = std::sqrt(0.5);
FermionField eta(FermOp.FermionGrid());
gaussian(pRNG, eta); //////////////////////////////////////////////////////////////////////////////////////
// Push the gauge field in to the dops. Assume any BC's and smearing already applied
//////////////////////////////////////////////////////////////////////////////////////
virtual void refresh(const GaugeField &U, GridParallelRNG& pRNG) {
FermOp.ImportGauge(U); // P(phi) = e^{- phi^dag (MdagM)^-1 phi}
FermOp.Mdag(eta, Phi); // Phi = Mdag eta
// P(eta) = e^{- eta^dag eta}
//
// e^{x^2/2 sig^2} => sig^2 = 0.5.
//
// So eta should be of width sig = 1/sqrt(2).
// and must multiply by 0.707....
//
// Chroma has this scale factor: two_flavor_monomial_w.h
// IroIro: does not use this scale. It is absorbed by a change of vars
// in the Phi integral, and thus is only an irrelevant prefactor for the partition function.
//
RealD scale = std::sqrt(0.5);
FermionField eta(FermOp.FermionGrid());
Phi = Phi * scale; gaussian(pRNG,eta);
};
////////////////////////////////////////////////////// FermOp.ImportGauge(U);
// S = phi^dag (Mdag M)^-1 phi FermOp.Mdag(eta,Phi);
//////////////////////////////////////////////////////
virtual RealD S(const GaugeField &U) {
FermOp.ImportGauge(U);
FermionField X(FermOp.FermionGrid()); Phi=Phi*scale;
FermionField Y(FermOp.FermionGrid());
MdagMLinearOperator<FermionOperator<Impl>, FermionField> MdagMOp(FermOp); };
X = zero;
ActionSolver(MdagMOp, Phi, X);
MdagMOp.Op(X, Y);
RealD action = norm2(Y); //////////////////////////////////////////////////////
std::cout << GridLogMessage << "Pseudofermion action " << action // S = phi^dag (Mdag M)^-1 phi
<< std::endl; //////////////////////////////////////////////////////
return action; virtual RealD S(const GaugeField &U) {
};
////////////////////////////////////////////////////// FermOp.ImportGauge(U);
// dS/du = - phi^dag (Mdag M)^-1 [ Mdag dM + dMdag M ] (Mdag M)^-1 phi
// = - phi^dag M^-1 dM (MdagM)^-1 phi - phi^dag (MdagM)^-1 dMdag dM
// (Mdag)^-1 phi
//
// = - Ydag dM X - Xdag dMdag Y
//
//////////////////////////////////////////////////////
virtual void deriv(const GaugeField &U, GaugeField &dSdU) {
FermOp.ImportGauge(U);
FermionField X(FermOp.FermionGrid()); FermionField X(FermOp.FermionGrid());
FermionField Y(FermOp.FermionGrid()); FermionField Y(FermOp.FermionGrid());
GaugeField tmp(FermOp.GaugeGrid());
MdagMLinearOperator<FermionOperator<Impl>, FermionField> MdagMOp(FermOp); MdagMLinearOperator<FermionOperator<Impl> ,FermionField> MdagMOp(FermOp);
X=zero;
ActionSolver(MdagMOp,Phi,X);
MdagMOp.Op(X,Y);
X = zero; RealD action = norm2(Y);
DerivativeSolver(MdagMOp, Phi, X); // X = (MdagM)^-1 phi std::cout << GridLogMessage << "Pseudofermion action "<<action<<std::endl;
MdagMOp.Op(X, Y); // Y = M X = (Mdag)^-1 phi return action;
};
// Our conventions really make this UdSdU; We do not differentiate wrt Udag //////////////////////////////////////////////////////
// here. // dS/du = - phi^dag (Mdag M)^-1 [ Mdag dM + dMdag M ] (Mdag M)^-1 phi
// So must take dSdU - adj(dSdU) and left multiply by mom to get dS/dt. // = - phi^dag M^-1 dM (MdagM)^-1 phi - phi^dag (MdagM)^-1 dMdag dM (Mdag)^-1 phi
//
// = - Ydag dM X - Xdag dMdag Y
//
//////////////////////////////////////////////////////
virtual void deriv(const GaugeField &U,GaugeField & dSdU) {
FermOp.MDeriv(tmp, Y, X, DaggerNo); FermOp.ImportGauge(U);
dSdU = tmp;
FermOp.MDeriv(tmp, X, Y, DaggerYes);
dSdU = dSdU + tmp;
// not taking here the traceless antihermitian component FermionField X(FermOp.FermionGrid());
}; FermionField Y(FermOp.FermionGrid());
}; GaugeField tmp(FermOp.GaugeGrid());
}
MdagMLinearOperator<FermionOperator<Impl> ,FermionField> MdagMOp(FermOp);
X=zero;
DerivativeSolver(MdagMOp,Phi,X);
MdagMOp.Op(X,Y);
// Our conventions really make this UdSdU; We do not differentiate wrt Udag here.
// So must take dSdU - adj(dSdU) and left multiply by mom to get dS/dt.
FermOp.MDeriv(tmp , Y, X,DaggerNo ); dSdU=tmp;
FermOp.MDeriv(tmp , X, Y,DaggerYes); dSdU=dSdU+tmp;
dSdU = Ta(dSdU);
};
};
}
} }
#endif #endif

View File

@ -1,66 +1,70 @@
/************************************************************************************* /*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/pseudofermion/TwoFlavourEvenOdd.h Source file: ./lib/qcd/action/pseudofermion/TwoFlavourEvenOdd.h
Copyright (C) 2015 Copyright (C) 2015
Author: Peter Boyle <pabobyle@ph.ed.ac.uk> Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk> Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or the Free Software Foundation; either version 2 of the License, or
(at your option) any later version. (at your option) any later version.
This program is distributed in the hope that it will be useful, This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details. GNU General Public License for more details.
You should have received a copy of the GNU General Public License along You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc., with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution See the full license in the file "LICENSE" in the top level distribution directory
directory *************************************************************************************/
*************************************************************************************/ /* END LEGAL */
/* END LEGAL */
#ifndef QCD_PSEUDOFERMION_TWO_FLAVOUR_EVEN_ODD_H #ifndef QCD_PSEUDOFERMION_TWO_FLAVOUR_EVEN_ODD_H
#define QCD_PSEUDOFERMION_TWO_FLAVOUR_EVEN_ODD_H #define QCD_PSEUDOFERMION_TWO_FLAVOUR_EVEN_ODD_H
namespace Grid { namespace Grid{
namespace QCD { namespace QCD{
////////////////////////////////////////////////////////////////////////
// Two flavour pseudofermion action for any EO prec dop
////////////////////////////////////////////////////////////////////////
template <class Impl>
class TwoFlavourEvenOddPseudoFermionAction
: public Action<typename Impl::GaugeField> {
public:
INHERIT_IMPL_TYPES(Impl);
private:
FermionOperator<Impl> &FermOp; // the basic operator
OperatorFunction<FermionField> &DerivativeSolver; ////////////////////////////////////////////////////////////////////////
OperatorFunction<FermionField> &ActionSolver; // Two flavour pseudofermion action for any EO prec dop
////////////////////////////////////////////////////////////////////////
template<class Impl>
class TwoFlavourEvenOddPseudoFermionAction : public Action<typename Impl::GaugeField> {
FermionField PhiOdd; // the pseudo fermion field for this trajectory public:
FermionField PhiEven; // the pseudo fermion field for this trajectory
public: INHERIT_IMPL_TYPES(Impl);
/////////////////////////////////////////////////
// Pass in required objects. private:
/////////////////////////////////////////////////
TwoFlavourEvenOddPseudoFermionAction(FermionOperator<Impl> &Op, FermionOperator<Impl> & FermOp;// the basic operator
OperatorFunction<FermionField> &DS,
OperatorFunction<FermionField> &AS) OperatorFunction<FermionField> &DerivativeSolver;
: FermOp(Op), OperatorFunction<FermionField> &ActionSolver;
DerivativeSolver(DS),
ActionSolver(AS), FermionField PhiOdd; // the pseudo fermion field for this trajectory
FermionField PhiEven; // the pseudo fermion field for this trajectory
public:
/////////////////////////////////////////////////
// Pass in required objects.
/////////////////////////////////////////////////
TwoFlavourEvenOddPseudoFermionAction(FermionOperator<Impl> &Op,
OperatorFunction<FermionField> & DS,
OperatorFunction<FermionField> & AS
) :
FermOp(Op),
DerivativeSolver(DS),
ActionSolver(AS),
PhiEven(Op.FermionRedBlackGrid()), PhiEven(Op.FermionRedBlackGrid()),
PhiOdd(Op.FermionRedBlackGrid()) PhiOdd(Op.FermionRedBlackGrid())
{}; {};
@ -169,7 +173,7 @@ class TwoFlavourEvenOddPseudoFermionAction
FermOp.MeeDeriv(tmp , X, Y,DaggerYes); dSdU=dSdU+tmp; FermOp.MeeDeriv(tmp , X, Y,DaggerYes); dSdU=dSdU+tmp;
*/ */
//dSdU = Ta(dSdU); dSdU = Ta(dSdU);
}; };

View File

@ -131,11 +131,9 @@ namespace Grid{
Vpc.MpcDag(PhiOdd,Y); // Y= Vdag phi Vpc.MpcDag(PhiOdd,Y); // Y= Vdag phi
X=zero; X=zero;
ActionSolver(Mpc,Y,X); // X= (MdagM)^-1 Vdag phi ActionSolver(Mpc,Y,X); // X= (MdagM)^-1 Vdag phi
//Mpc.Mpc(X,Y); // Y= Mdag^-1 Vdag phi Mpc.Mpc(X,Y); // Y= Mdag^-1 Vdag phi
// Multiply by Ydag
RealD action = real(innerProduct(Y,X));
//RealD action = norm2(Y); RealD action = norm2(Y);
// The EE factorised block; normally can replace with zero if det is constant (gauge field indept) // The EE factorised block; normally can replace with zero if det is constant (gauge field indept)
// Only really clover term that creates this. Leave the EE portion as a future to do to make most // Only really clover term that creates this. Leave the EE portion as a future to do to make most
@ -190,8 +188,7 @@ namespace Grid{
assert(NumOp.ConstEE() == 1); assert(NumOp.ConstEE() == 1);
assert(DenOp.ConstEE() == 1); assert(DenOp.ConstEE() == 1);
//dSdU = -Ta(dSdU); dSdU = -Ta(dSdU);
dSdU = -dSdU;
}; };
}; };

View File

@ -155,8 +155,7 @@ namespace Grid{
DenOp.MDeriv(force,Y,X,DaggerNo); dSdU=dSdU-force; DenOp.MDeriv(force,Y,X,DaggerNo); dSdU=dSdU-force;
DenOp.MDeriv(force,X,Y,DaggerYes); dSdU=dSdU-force; DenOp.MDeriv(force,X,Y,DaggerYes); dSdU=dSdU-force;
dSdU *= -1.0; dSdU = - Ta(dSdU);
//dSdU = - Ta(dSdU);
}; };
}; };

View File

@ -1,34 +1,33 @@
/************************************************************************************* /*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/hmc/HMC.h Source file: ./lib/qcd/hmc/HMC.h
Copyright (C) 2015 Copyright (C) 2015
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk> Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk> Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: neo <cossu@post.kek.jp> Author: neo <cossu@post.kek.jp>
Author: paboyle <paboyle@ph.ed.ac.uk> Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or the Free Software Foundation; either version 2 of the License, or
(at your option) any later version. (at your option) any later version.
This program is distributed in the hope that it will be useful, This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details. GNU General Public License for more details.
You should have received a copy of the GNU General Public License along You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc., with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution See the full license in the file "LICENSE" in the top level distribution directory
directory *************************************************************************************/
*************************************************************************************/ /* END LEGAL */
/* END LEGAL */
//-------------------------------------------------------------------- //--------------------------------------------------------------------
/*! @file HMC.h /*! @file HMC.h
* @brief Classes for Hybrid Monte Carlo update * @brief Classes for Hybrid Monte Carlo update
@ -42,195 +41,172 @@ directory
#include <string> #include <string>
namespace Grid {
namespace QCD {
struct HMCparameters { namespace Grid{
Integer StartTrajectory; namespace QCD{
Integer Trajectories; /* @brief Number of sweeps in this run */
bool MetropolisTest;
Integer NoMetropolisUntil;
HMCparameters() {
////////////////////////////// Default values
MetropolisTest = true;
NoMetropolisUntil = 10;
StartTrajectory = 0;
Trajectories = 200;
/////////////////////////////////
}
void print() const { struct HMCparameters{
std::cout << GridLogMessage << "[HMC parameter] Trajectories : " << Trajectories << "\n";
std::cout << GridLogMessage << "[HMC parameter] Start trajectory : " << StartTrajectory << "\n";
std::cout << GridLogMessage << "[HMC parameter] Metropolis test (on/off): " << MetropolisTest << "\n";
std::cout << GridLogMessage << "[HMC parameter] Thermalization trajs : " << NoMetropolisUntil << "\n";
}
}; Integer StartTrajectory;
Integer Trajectories; /* @brief Number of sweeps in this run */
bool MetropolisTest;
Integer NoMetropolisUntil;
template <class GaugeField> HMCparameters(){
class HmcObservable { ////////////////////////////// Default values
public: MetropolisTest = true;
virtual void TrajectoryComplete(int traj, GaugeField &U, GridSerialRNG &sRNG, NoMetropolisUntil = 10;
GridParallelRNG &pRNG) = 0; StartTrajectory = 0;
}; Trajectories = 200;
/////////////////////////////////
}
};
template <class Gimpl> template<class GaugeField>
class PlaquetteLogger : public HmcObservable<typename Gimpl::GaugeField> { class HmcObservable {
private: public:
std::string Stem; virtual void TrajectoryComplete (int traj, GaugeField &U, GridSerialRNG &sRNG, GridParallelRNG & pRNG )=0;
};
public: template<class Gimpl>
INHERIT_GIMPL_TYPES(Gimpl); class PlaquetteLogger : public HmcObservable<typename Gimpl::GaugeField> {
PlaquetteLogger(std::string cf) { Stem = cf; }; private:
std::string Stem;
public:
INHERIT_GIMPL_TYPES(Gimpl);
PlaquetteLogger(std::string cf) {
Stem = cf;
};
void TrajectoryComplete(int traj, GaugeField &U, GridSerialRNG &sRNG, void TrajectoryComplete(int traj, GaugeField &U, GridSerialRNG &sRNG, GridParallelRNG & pRNG )
GridParallelRNG &pRNG) { {
std::string file; std::string file; { std::ostringstream os; os << Stem <<"."<< traj; file = os.str(); }
{ std::ofstream of(file);
std::ostringstream os;
os << Stem << "." << traj;
file = os.str();
}
std::ofstream of(file);
RealD peri_plaq = WilsonLoops<PeriodicGimplR>::avgPlaquette(U); RealD peri_plaq = WilsonLoops<PeriodicGimplR>::avgPlaquette(U);
RealD peri_rect = WilsonLoops<PeriodicGimplR>::avgRectangle(U); RealD peri_rect = WilsonLoops<PeriodicGimplR>::avgRectangle(U);
RealD impl_plaq = WilsonLoops<Gimpl>::avgPlaquette(U); RealD impl_plaq = WilsonLoops<Gimpl>::avgPlaquette(U);
RealD impl_rect = WilsonLoops<Gimpl>::avgRectangle(U); RealD impl_rect = WilsonLoops<Gimpl>::avgRectangle(U);
of << traj << " " << impl_plaq << " " << impl_rect << " " << peri_plaq of << traj<<" "<< impl_plaq << " " << impl_rect << " "<< peri_plaq<<" "<<peri_rect<<std::endl;
<< " " << peri_rect << std::endl; std::cout<< GridLogMessage<< "traj"<<" "<< "plaq " << " " << " rect " << " "<< "peri_plaq" <<" "<<"peri_rect"<<std::endl;
std::cout << GridLogMessage << "traj" std::cout<< GridLogMessage<< traj<<" "<< impl_plaq << " " << impl_rect << " "<< peri_plaq<<" "<<peri_rect<<std::endl;
<< " " }
<< "plaq " };
<< " "
<< " rect "
<< " "
<< "peri_plaq"
<< " "
<< "peri_rect" << std::endl;
std::cout << GridLogMessage << traj << " " << impl_plaq << " " << impl_rect
<< " " << peri_plaq << " " << peri_rect << std::endl;
}
};
// template <class GaugeField, class Integrator, class Smearer, class // template <class GaugeField, class Integrator, class Smearer, class Boundary>
// Boundary> template <class GaugeField, class IntegratorType>
template <class GaugeField, class IntegratorType> class HybridMonteCarlo {
class HybridMonteCarlo { private:
private:
const HMCparameters Params;
GridSerialRNG &sRNG; // Fixme: need a RNG management strategy. const HMCparameters Params;
GridParallelRNG &pRNG; // Fixme: need a RNG management strategy.
GaugeField &Ucur;
IntegratorType &TheIntegrator; GridSerialRNG &sRNG; // Fixme: need a RNG management strategy.
std::vector<HmcObservable<GaugeField> *> Observables; GridParallelRNG &pRNG; // Fixme: need a RNG management strategy.
GaugeField & Ucur;
///////////////////////////////////////////////////////// IntegratorType &TheIntegrator;
// Metropolis step std::vector<HmcObservable<GaugeField> *> Observables;
/////////////////////////////////////////////////////////
bool metropolis_test(const RealD DeltaH) {
RealD rn_test;
RealD prob = std::exp(-DeltaH); /////////////////////////////////////////////////////////
// Metropolis step
/////////////////////////////////////////////////////////
bool metropolis_test(const RealD DeltaH){
random(sRNG, rn_test); RealD rn_test;
std::cout << GridLogMessage RealD prob = std::exp(-DeltaH);
<< "--------------------------------------------------\n";
std::cout << GridLogMessage << "exp(-dH) = " << prob
<< " Random = " << rn_test << "\n";
std::cout << GridLogMessage
<< "Acc. Probability = " << ((prob < 1.0) ? prob : 1.0) << "\n";
if ((prob > 1.0) || (rn_test <= prob)) { // accepted random(sRNG,rn_test);
std::cout << GridLogMessage << "Metropolis_test -- ACCEPTED\n";
std::cout << GridLogMessage
<< "--------------------------------------------------\n";
return true;
} else { // rejected
std::cout << GridLogMessage << "Metropolis_test -- REJECTED\n";
std::cout << GridLogMessage
<< "--------------------------------------------------\n";
return false;
}
}
///////////////////////////////////////////////////////// std::cout<<GridLogMessage<< "--------------------------------------------\n";
// Evolution std::cout<<GridLogMessage<< "dH = "<<DeltaH << " Random = "<< rn_test <<"\n";
///////////////////////////////////////////////////////// std::cout<<GridLogMessage<< "Acc. Probability = " << ((prob<1.0)? prob: 1.0)<< " ";
RealD evolve_step(GaugeField &U) {
TheIntegrator.refresh(U, pRNG); // set U and initialize P and phi's
RealD H0 = TheIntegrator.S(U); // initial state action if((prob >1.0) || (rn_test <= prob)){ // accepted
std::cout<<GridLogMessage <<"-- ACCEPTED\n";
return true;
} else { // rejected
std::cout<<GridLogMessage <<"-- REJECTED\n";
return false;
}
std::streamsize current_precision = std::cout.precision();
std::cout.precision(17);
std::cout << GridLogMessage << "Total H before trajectory = " << H0 << "\n";
std::cout.precision(current_precision);
TheIntegrator.integrate(U);
RealD H1 = TheIntegrator.S(U); // updated state action
std::cout.precision(17);
std::cout << GridLogMessage << "Total H after trajectory = " << H1
<< " dH = " << H1 - H0 << "\n";
std::cout.precision(current_precision);
return (H1 - H0);
}
public:
/////////////////////////////////////////
// Constructor
/////////////////////////////////////////
HybridMonteCarlo(HMCparameters Pams, IntegratorType &_Int,
GridSerialRNG &_sRNG, GridParallelRNG &_pRNG, GaugeField &_U)
: Params(Pams), TheIntegrator(_Int), sRNG(_sRNG), pRNG(_pRNG), Ucur(_U) {}
~HybridMonteCarlo(){};
void AddObservable(HmcObservable<GaugeField> *obs) {
Observables.push_back(obs);
}
void evolve(void) {
Real DeltaH;
GaugeField Ucopy(Ucur._grid);
Params.print();
// Actual updates (evolve a copy Ucopy then copy back eventually)
for (int traj = Params.StartTrajectory;
traj < Params.Trajectories + Params.StartTrajectory; ++traj) {
std::cout << GridLogMessage << "-- # Trajectory = " << traj << "\n";
Ucopy = Ucur;
DeltaH = evolve_step(Ucopy);
bool accept = true;
if (traj >= Params.NoMetropolisUntil) {
accept = metropolis_test(DeltaH);
} }
if (accept) { /////////////////////////////////////////////////////////
Ucur = Ucopy; // Evolution
/////////////////////////////////////////////////////////
RealD evolve_step(GaugeField& U){
TheIntegrator.refresh(U,pRNG); // set U and initialize P and phi's
RealD H0 = TheIntegrator.S(U); // initial state action
std::cout<<GridLogMessage<<"Total H before = "<< H0 << "\n";
TheIntegrator.integrate(U);
RealD H1 = TheIntegrator.S(U); // updated state action
std::cout<<GridLogMessage<<"Total H after = "<< H1 << "\n";
return (H1-H0);
} }
for (int obs = 0; obs < Observables.size(); obs++) { public:
Observables[obs]->TrajectoryComplete(traj + 1, Ucur, sRNG, pRNG);
} /////////////////////////////////////////
} // Constructor
} /////////////////////////////////////////
}; HybridMonteCarlo(HMCparameters Pms, IntegratorType &_Int, GridSerialRNG &_sRNG, GridParallelRNG &_pRNG, GaugeField &_U ) :
Params(Pms),
TheIntegrator(_Int),
sRNG(_sRNG),
pRNG(_pRNG),
Ucur(_U)
{
}
~HybridMonteCarlo(){};
void AddObservable(HmcObservable<GaugeField> *obs) {
Observables.push_back(obs);
}
void evolve(void){
Real DeltaH;
GaugeField Ucopy(Ucur._grid);
// Actual updates (evolve a copy Ucopy then copy back eventually)
for(int traj=Params.StartTrajectory; traj < Params.Trajectories+Params.StartTrajectory; ++traj){
std::cout<<GridLogMessage << "-- # Trajectory = "<< traj << "\n";
Ucopy = Ucur;
DeltaH = evolve_step(Ucopy);
bool accept = true;
if ( traj > Params.NoMetropolisUntil) {
accept = metropolis_test(DeltaH);
}
if ( accept ) {
Ucur = Ucopy;
}
for(int obs = 0;obs<Observables.size();obs++){
Observables[obs]->TrajectoryComplete (traj+1,Ucur,sRNG,pRNG);
}
}
}
};
}// QCD
}// Grid
} // QCD
} // Grid
#endif #endif

Some files were not shown because too many files have changed in this diff Show More