1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-04-09 21:50:45 +01:00

Merge branch 'feature/qed-fvol' of https://github.com/paboyle/Grid into feature/qed-fvol

# Conflicts:
#	extras/Hadrons/Modules.hpp
#	extras/Hadrons/modules.inc
This commit is contained in:
James Harrison 2017-06-07 16:59:47 +01:00
commit c2b2b71c5d
265 changed files with 30461 additions and 7193 deletions

8
.gitignore vendored
View File

@ -92,6 +92,7 @@ build*/*
##################### #####################
*.xcodeproj/* *.xcodeproj/*
build.sh build.sh
.vscode
# Eigen source # # Eigen source #
################ ################
@ -106,6 +107,10 @@ lib/fftw/*
m4/lt* m4/lt*
m4/libtool.m4 m4/libtool.m4
# github pages #
################
gh-pages/
# Buck files # # Buck files #
############## ##############
.buck* .buck*
@ -116,4 +121,5 @@ make-bin-BUCK.sh
# generated sources # # generated sources #
##################### #####################
lib/qcd/spin/gamma-gen/*.h lib/qcd/spin/gamma-gen/*.h
lib/qcd/spin/gamma-gen/*.cc lib/qcd/spin/gamma-gen/*.cc

View File

@ -7,9 +7,11 @@ cache:
matrix: matrix:
include: include:
- os: osx - os: osx
osx_image: xcode7.2 osx_image: xcode8.3
compiler: clang compiler: clang
- compiler: gcc - compiler: gcc
dist: trusty
sudo: required
addons: addons:
apt: apt:
sources: sources:
@ -24,6 +26,8 @@ matrix:
- binutils-dev - binutils-dev
env: VERSION=-4.9 env: VERSION=-4.9
- compiler: gcc - compiler: gcc
dist: trusty
sudo: required
addons: addons:
apt: apt:
sources: sources:
@ -38,6 +42,7 @@ matrix:
- binutils-dev - binutils-dev
env: VERSION=-5 env: VERSION=-5
- compiler: clang - compiler: clang
dist: trusty
addons: addons:
apt: apt:
sources: sources:
@ -52,6 +57,7 @@ matrix:
- binutils-dev - binutils-dev
env: CLANG_LINK=http://llvm.org/releases/3.8.0/clang+llvm-3.8.0-x86_64-linux-gnu-ubuntu-14.04.tar.xz env: CLANG_LINK=http://llvm.org/releases/3.8.0/clang+llvm-3.8.0-x86_64-linux-gnu-ubuntu-14.04.tar.xz
- compiler: clang - compiler: clang
dist: trusty
addons: addons:
apt: apt:
sources: sources:
@ -73,13 +79,15 @@ before_install:
- if [[ "$TRAVIS_OS_NAME" == "linux" ]] && [[ "$CC" == "clang" ]]; then export LD_LIBRARY_PATH="${GRIDDIR}/clang/lib:${LD_LIBRARY_PATH}"; fi - if [[ "$TRAVIS_OS_NAME" == "linux" ]] && [[ "$CC" == "clang" ]]; then export LD_LIBRARY_PATH="${GRIDDIR}/clang/lib:${LD_LIBRARY_PATH}"; fi
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew update; fi - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew update; fi
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install libmpc; fi - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install libmpc; fi
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install openmpi; fi
- if [[ "$TRAVIS_OS_NAME" == "osx" ]] && [[ "$CC" == "gcc" ]]; then brew install gcc5; fi
install: install:
- export CC=$CC$VERSION - export CC=$CC$VERSION
- export CXX=$CXX$VERSION - export CXX=$CXX$VERSION
- echo $PATH - echo $PATH
- which autoconf
- autoconf --version
- which automake
- automake --version
- which $CC - which $CC
- $CC --version - $CC --version
- which $CXX - which $CXX
@ -92,15 +100,15 @@ script:
- cd build - cd build
- ../configure --enable-precision=single --enable-simd=SSE4 --enable-comms=none - ../configure --enable-precision=single --enable-simd=SSE4 --enable-comms=none
- make -j4 - make -j4
- ./benchmarks/Benchmark_dwf --threads 1 - ./benchmarks/Benchmark_dwf --threads 1 --debug-signals
- echo make clean - echo make clean
- ../configure --enable-precision=double --enable-simd=SSE4 --enable-comms=none - ../configure --enable-precision=double --enable-simd=SSE4 --enable-comms=none
- make -j4 - make -j4
- ./benchmarks/Benchmark_dwf --threads 1 - ./benchmarks/Benchmark_dwf --threads 1 --debug-signals
- make check
- echo make clean - echo make clean
- if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then export CXXFLAGS='-DMPI_UINT32_T=MPI_UNSIGNED -DMPI_UINT64_T=MPI_UNSIGNED_LONG'; fi - if [[ "$TRAVIS_OS_NAME" == "linux" ]] && [[ "$CC" == "clang" ]]; then ../configure --enable-precision=single --enable-simd=SSE4 --enable-comms=mpi-auto ; fi
- ../configure --enable-precision=single --enable-simd=SSE4 --enable-comms=mpi-auto - if [[ "$TRAVIS_OS_NAME" == "linux" ]] && [[ "$CC" == "clang" ]]; then make -j4; fi
- make -j4 - if [[ "$TRAVIS_OS_NAME" == "linux" ]] && [[ "$CC" == "clang" ]]; then mpirun.openmpi -n 2 ./benchmarks/Benchmark_dwf --threads 1 --mpi 2.1.1.1; fi
- if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then mpirun.openmpi -n 2 ./benchmarks/Benchmark_dwf --threads 1 --mpi 2.1.1.1; fi

View File

@ -3,10 +3,15 @@ SUBDIRS = lib benchmarks tests extras
include $(top_srcdir)/doxygen.inc include $(top_srcdir)/doxygen.inc
tests: all bin_SCRIPTS=grid-config
$(MAKE) -C tests tests
.PHONY: tests doxygen-run doxygen-doc $(DX_PS_GOAL) $(DX_PDF_GOAL)
.PHONY: bench check tests doxygen-run doxygen-doc $(DX_PS_GOAL) $(DX_PDF_GOAL)
tests-local: all
bench-local: all
check-local: all
AM_CXXFLAGS += -I$(top_builddir)/include AM_CXXFLAGS += -I$(top_builddir)/include
ACLOCAL_AMFLAGS = -I m4 ACLOCAL_AMFLAGS = -I m4

View File

@ -22,6 +22,26 @@ Last update Nov 2016.
_Please do not send pull requests to the `master` branch which is reserved for releases._ _Please do not send pull requests to the `master` branch which is reserved for releases._
### Compilers
Intel ICPC v16.0.3 and later
Clang v3.5 and later (need 3.8 and later for OpenMP)
GCC v4.9.x (recommended)
GCC v6.3 and later
### Important:
Some versions of GCC appear to have a bug under high optimisation (-O2, -O3).
The safety of these compiler versions cannot be guaranteed at this time. Follow Issue 100 for details and updates.
GCC v5.x
GCC v6.1, v6.2
### Bug report ### Bug report
_To help us tracking and solving more efficiently issues with Grid, please report problems using the issue system of GitHub rather than sending emails to Grid developers._ _To help us tracking and solving more efficiently issues with Grid, please report problems using the issue system of GitHub rather than sending emails to Grid developers._
@ -32,7 +52,7 @@ When you file an issue, please go though the following checklist:
2. Give a description of the target platform (CPU, network, compiler). Please give the full CPU part description, using for example `cat /proc/cpuinfo | grep 'model name' | uniq` (Linux) or `sysctl machdep.cpu.brand_string` (macOS) and the full output the `--version` option of your compiler. 2. Give a description of the target platform (CPU, network, compiler). Please give the full CPU part description, using for example `cat /proc/cpuinfo | grep 'model name' | uniq` (Linux) or `sysctl machdep.cpu.brand_string` (macOS) and the full output the `--version` option of your compiler.
3. Give the exact `configure` command used. 3. Give the exact `configure` command used.
4. Attach `config.log`. 4. Attach `config.log`.
5. Attach `config.summary`. 5. Attach `grid.config.summary`.
6. Attach the output of `make V=1`. 6. Attach the output of `make V=1`.
7. Describe the issue and any previous attempt to solve it. If relevant, show how to reproduce the issue using a minimal working example. 7. Describe the issue and any previous attempt to solve it. If relevant, show how to reproduce the issue using a minimal working example.
@ -95,10 +115,10 @@ install Grid. Other options are detailed in the next section, you can also use `
`CXX`, `CXXFLAGS`, `LDFLAGS`, ... environment variables can be modified to `CXX`, `CXXFLAGS`, `LDFLAGS`, ... environment variables can be modified to
customise the build. customise the build.
Finally, you can build and install Grid: Finally, you can build, check, and install Grid:
``` bash ``` bash
make; make install make; make check; make install
``` ```
To minimise the build time, only the tests at the root of the `tests` directory are built by default. If you want to build tests in the sub-directory `<subdir>` you can execute: To minimise the build time, only the tests at the root of the `tests` directory are built by default. If you want to build tests in the sub-directory `<subdir>` you can execute:
@ -121,7 +141,7 @@ If you want to build all the tests at once just use `make tests`.
- `--enable-gen-simd-width=<size>`: select the size (in bytes) of the generic SIMD vector type (default: 32 bytes). - `--enable-gen-simd-width=<size>`: select the size (in bytes) of the generic SIMD vector type (default: 32 bytes).
- `--enable-precision={single|double}`: set the default precision (default: `double`). - `--enable-precision={single|double}`: set the default precision (default: `double`).
- `--enable-precision=<comm>`: Use `<comm>` for message passing (default: `none`). A list of possible SIMD targets is detailed in a section below. - `--enable-precision=<comm>`: Use `<comm>` for message passing (default: `none`). A list of possible SIMD targets is detailed in a section below.
- `--enable-rng={ranlux48|mt19937}`: choose the RNG (default: `ranlux48 `). - `--enable-rng={sitmo|ranlux48|mt19937}`: choose the RNG (default: `sitmo `).
- `--disable-timers`: disable system dependent high-resolution timers. - `--disable-timers`: disable system dependent high-resolution timers.
- `--enable-chroma`: enable Chroma regression tests. - `--enable-chroma`: enable Chroma regression tests.
- `--enable-doxygen-doc`: enable the Doxygen documentation generation (build with `make doxygen-doc`) - `--enable-doxygen-doc`: enable the Doxygen documentation generation (build with `make doxygen-doc`)
@ -159,7 +179,6 @@ Alternatively, some CPU codenames can be directly used:
| `<code>` | Description | | `<code>` | Description |
| ----------- | -------------------------------------- | | ----------- | -------------------------------------- |
| `KNC` | [Intel Xeon Phi codename Knights Corner](http://ark.intel.com/products/codename/57721/Knights-Corner) |
| `KNL` | [Intel Xeon Phi codename Knights Landing](http://ark.intel.com/products/codename/48999/Knights-Landing) | | `KNL` | [Intel Xeon Phi codename Knights Landing](http://ark.intel.com/products/codename/48999/Knights-Landing) |
| `BGQ` | Blue Gene/Q | | `BGQ` | Blue Gene/Q |

61
TODO
View File

@ -1,6 +1,26 @@
TODO: TODO:
--------------- ---------------
Peter's work list:
2)- Precision conversion and sort out localConvert <--
3)- Remove DenseVector, DenseMatrix; Use Eigen instead. <-- started
4)- Binary I/O speed up & x-strips
-- Profile CG, BlockCG, etc... Flop count/rate -- PARTIAL, time but no flop/s yet
-- Physical propagator interface
-- Conserved currents
-- GaugeFix into central location
-- Multigrid Wilson and DWF, compare to other Multigrid implementations
-- HDCR resume
Recent DONE
-- Cut down the exterior overhead <-- DONE
-- Interior legs from SHM comms <-- DONE
-- Half-precision comms <-- DONE
-- Merge high precision reduction into develop
-- multiRHS DWF; benchmark on Cori/BNL for comms elimination
-- slice* linalg routines for multiRHS, BlockCG
-----
* Forces; the UdSdU term in gauge force term is half of what I think it should * Forces; the UdSdU term in gauge force term is half of what I think it should
be. This is a consequence of taking ONLY the first term in: be. This is a consequence of taking ONLY the first term in:
@ -21,16 +41,8 @@ TODO:
This means we must double the force in the Test_xxx_force routines, and is the origin of the factor of two. This means we must double the force in the Test_xxx_force routines, and is the origin of the factor of two.
This 2x is applied by hand in the fermion routines and in the Test_rect_force routine. This 2x is applied by hand in the fermion routines and in the Test_rect_force routine.
Policies:
* Link smearing/boundary conds; Policy class based implementation ; framework more in place
* Support different boundary conditions (finite temp, chem. potential ... ) * Support different boundary conditions (finite temp, chem. potential ... )
* Support different fermion representations?
- contained entirely within the integrator presently
- Sign of force term. - Sign of force term.
- Reversibility test. - Reversibility test.
@ -41,11 +53,6 @@ Policies:
- Audit oIndex usage for cb behaviour - Audit oIndex usage for cb behaviour
- Rectangle gauge actions.
Iwasaki,
Symanzik,
... etc...
- Prepare multigrid for HMC. - Alternate setup schemes. - Prepare multigrid for HMC. - Alternate setup schemes.
- Support for ILDG --- ugly, not done - Support for ILDG --- ugly, not done
@ -55,9 +62,11 @@ Policies:
- FFTnD ? - FFTnD ?
- Gparity; hand opt use template specialisation elegance to enable the optimised paths ? - Gparity; hand opt use template specialisation elegance to enable the optimised paths ?
- Gparity force term; Gparity (R)HMC. - Gparity force term; Gparity (R)HMC.
- Random number state save restore
- Mobius implementation clean up to rmove #if 0 stale code sequences - Mobius implementation clean up to rmove #if 0 stale code sequences
- CG -- profile carefully, kernel fusion, whole CG performance measurements. - CG -- profile carefully, kernel fusion, whole CG performance measurements.
================================================================ ================================================================
@ -90,6 +99,7 @@ Insert/Extract
Not sure of status of this -- reverify. Things are working nicely now though. Not sure of status of this -- reverify. Things are working nicely now though.
* Make the Tensor types and Complex etc... play more nicely. * Make the Tensor types and Complex etc... play more nicely.
- TensorRemove is a hack, come up with a long term rationalised approach to Complex vs. Scalar<Scalar<Scalar<Complex > > > - TensorRemove is a hack, come up with a long term rationalised approach to Complex vs. Scalar<Scalar<Scalar<Complex > > >
QDP forces use of "toDouble" to get back to non tensor scalar. This role is presently taken TensorRemove, but I QDP forces use of "toDouble" to get back to non tensor scalar. This role is presently taken TensorRemove, but I
want to introduce a syntax that does not require this. want to introduce a syntax that does not require this.
@ -112,6 +122,8 @@ Not sure of status of this -- reverify. Things are working nicely now though.
RECENT RECENT
--------------- ---------------
- Support different fermion representations? -- DONE
- contained entirely within the integrator presently
- Clean up HMC -- DONE - Clean up HMC -- DONE
- LorentzScalar<GaugeField> gets Gauge link type (cleaner). -- DONE - LorentzScalar<GaugeField> gets Gauge link type (cleaner). -- DONE
- Simplified the integrators a bit. -- DONE - Simplified the integrators a bit. -- DONE
@ -123,6 +135,26 @@ RECENT
- Parallel io improvements -- DONE - Parallel io improvements -- DONE
- Plaquette and link trace checks into nersc reader from the Grid_nersc_io.cc test. -- DONE - Plaquette and link trace checks into nersc reader from the Grid_nersc_io.cc test. -- DONE
DONE:
- MultiArray -- MultiRHS done
- ConjugateGradientMultiShift -- DONE
- MCR -- DONE
- Remez -- Mike or Boost? -- DONE
- Proto (ET) -- DONE
- uBlas -- DONE ; Eigen
- Potentially Useful Boost libraries -- DONE ; Eigen
- Aligned allocator; memory pool -- DONE
- Multiprecision -- DONE
- Serialization -- DONE
- Regex -- Not needed
- Tokenize -- Why?
- Random number state save restore -- DONE
- Rectangle gauge actions. -- DONE
Iwasaki,
Symanzik,
... etc...
Done: Cayley, Partial , ContFrac force terms. Done: Cayley, Partial , ContFrac force terms.
DONE DONE
@ -207,6 +239,7 @@ Done
FUNCTIONALITY: it pleases me to keep track of things I have done (keeps me arguably sane) FUNCTIONALITY: it pleases me to keep track of things I have done (keeps me arguably sane)
====================================================================================================== ======================================================================================================
* Link smearing/boundary conds; Policy class based implementation ; framework more in place -- DONE
* Command line args for geometry, simd, etc. layout. Is it necessary to have -- DONE * Command line args for geometry, simd, etc. layout. Is it necessary to have -- DONE
user pass these? Is this a QCD specific? user pass these? Is this a QCD specific?

View File

@ -1,6 +1,5 @@
Version : 0.6.0 Version : 0.7.0
- AVX512, AVX2, AVX, SSE good - Clang 3.5 and above, ICPC v16 and above, GCC 6.3 and above recommended
- Clang 3.5 and above, ICPC v16 and above, GCC 4.9 and above - MPI and MPI3 comms optimisations for KNL and OPA finished
- MPI and MPI3 - Half precision comms
- HiRep, Smearing, Generic gauge group

View File

@ -31,6 +31,32 @@ using namespace std;
using namespace Grid; using namespace Grid;
using namespace Grid::QCD; using namespace Grid::QCD;
struct time_statistics{
double mean;
double err;
double min;
double max;
void statistics(std::vector<double> v){
double sum = std::accumulate(v.begin(), v.end(), 0.0);
mean = sum / v.size();
std::vector<double> diff(v.size());
std::transform(v.begin(), v.end(), diff.begin(), [=](double x) { return x - mean; });
double sq_sum = std::inner_product(diff.begin(), diff.end(), diff.begin(), 0.0);
err = std::sqrt(sq_sum / (v.size()*(v.size() - 1)));
auto result = std::minmax_element(v.begin(), v.end());
min = *result.first;
max = *result.second;
}
};
void header(){
std::cout <<GridLogMessage << " L "<<"\t"<<" Ls "<<"\t"
<<std::setw(11)<<"bytes"<<"MB/s uni (err/min/max)"<<"\t\t"<<"MB/s bidi (err/min/max)"<<std::endl;
};
int main (int argc, char ** argv) int main (int argc, char ** argv)
{ {
Grid_init(&argc,&argv); Grid_init(&argc,&argv);
@ -40,15 +66,19 @@ int main (int argc, char ** argv)
int threads = GridThread::GetThreads(); int threads = GridThread::GetThreads();
std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl; std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
int Nloop=10; int Nloop=500;
int nmu=0; int nmu=0;
int maxlat=24;
for(int mu=0;mu<Nd;mu++) if (mpi_layout[mu]>1) nmu++; for(int mu=0;mu<Nd;mu++) if (mpi_layout[mu]>1) nmu++;
std::cout << GridLogMessage << "Number of iterations to average: "<< Nloop << std::endl;
std::vector<double> t_time(Nloop);
time_statistics timestat;
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
std::cout<<GridLogMessage << "= Benchmarking concurrent halo exchange in "<<nmu<<" dimensions"<<std::endl; std::cout<<GridLogMessage << "= Benchmarking concurrent halo exchange in "<<nmu<<" dimensions"<<std::endl;
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
std::cout<<GridLogMessage << " L "<<"\t\t"<<" Ls "<<"\t\t"<<"bytes"<<"\t\t"<<"MB/s uni"<<"\t\t"<<"MB/s bidi"<<std::endl; header();
int maxlat=24;
for(int lat=4;lat<=maxlat;lat+=4){ for(int lat=4;lat<=maxlat;lat+=4){
for(int Ls=8;Ls<=32;Ls*=2){ for(int Ls=8;Ls<=32;Ls*=2){
@ -65,8 +95,8 @@ int main (int argc, char ** argv)
int ncomm; int ncomm;
int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD); int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD);
double start=usecond();
for(int i=0;i<Nloop;i++){ for(int i=0;i<Nloop;i++){
double start=usecond();
std::vector<CartesianCommunicator::CommsRequest_t> requests; std::vector<CartesianCommunicator::CommsRequest_t> requests;
@ -102,18 +132,24 @@ int main (int argc, char ** argv)
} }
Grid.SendToRecvFromComplete(requests); Grid.SendToRecvFromComplete(requests);
Grid.Barrier(); Grid.Barrier();
double stop=usecond();
t_time[i] = stop-start; // microseconds
} }
double stop=usecond();
timestat.statistics(t_time);
double dbytes = bytes; double dbytes = bytes;
double xbytes = Nloop*dbytes*2.0*ncomm; double xbytes = dbytes*2.0*ncomm;
double rbytes = xbytes; double rbytes = xbytes;
double bidibytes = xbytes+rbytes; double bidibytes = xbytes+rbytes;
double time = stop-start; // microseconds std::cout<<GridLogMessage << std::setw(4) << lat<<"\t"<<Ls<<"\t"
<<std::setw(11) << bytes<< std::fixed << std::setprecision(1) << std::setw(7)
<<std::right<< xbytes/timestat.mean<<" "<< xbytes*timestat.err/(timestat.mean*timestat.mean)<< " "
<<xbytes/timestat.max <<" "<< xbytes/timestat.min
<< "\t\t"<<std::setw(7)<< bidibytes/timestat.mean<< " " << bidibytes*timestat.err/(timestat.mean*timestat.mean) << " "
<< bidibytes/timestat.max << " " << bidibytes/timestat.min << std::endl;
std::cout<<GridLogMessage << lat<<"\t\t"<<Ls<<"\t\t"<<bytes<<"\t\t"<<xbytes/time<<"\t\t"<<bidibytes/time<<std::endl;
} }
} }
@ -121,8 +157,7 @@ int main (int argc, char ** argv)
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
std::cout<<GridLogMessage << "= Benchmarking sequential halo exchange in "<<nmu<<" dimensions"<<std::endl; std::cout<<GridLogMessage << "= Benchmarking sequential halo exchange in "<<nmu<<" dimensions"<<std::endl;
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
std::cout<<GridLogMessage << " L "<<"\t\t"<<" Ls "<<"\t\t"<<"bytes"<<"\t\t"<<"MB/s uni"<<"\t\t"<<"MB/s bidi"<<std::endl; header();
for(int lat=4;lat<=maxlat;lat+=4){ for(int lat=4;lat<=maxlat;lat+=4){
for(int Ls=8;Ls<=32;Ls*=2){ for(int Ls=8;Ls<=32;Ls*=2){
@ -138,8 +173,8 @@ int main (int argc, char ** argv)
int ncomm; int ncomm;
int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD); int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD);
double start=usecond();
for(int i=0;i<Nloop;i++){ for(int i=0;i<Nloop;i++){
double start=usecond();
ncomm=0; ncomm=0;
for(int mu=0;mu<4;mu++){ for(int mu=0;mu<4;mu++){
@ -178,27 +213,34 @@ int main (int argc, char ** argv)
} }
} }
Grid.Barrier(); Grid.Barrier();
double stop=usecond();
t_time[i] = stop-start; // microseconds
} }
double stop=usecond(); timestat.statistics(t_time);
double dbytes = bytes; double dbytes = bytes;
double xbytes = Nloop*dbytes*2.0*ncomm; double xbytes = dbytes*2.0*ncomm;
double rbytes = xbytes; double rbytes = xbytes;
double bidibytes = xbytes+rbytes; double bidibytes = xbytes+rbytes;
double time = stop-start; std::cout<<GridLogMessage << std::setw(4) << lat<<"\t"<<Ls<<"\t"
<<std::setw(11) << bytes<< std::fixed << std::setprecision(1) << std::setw(7)
<<std::right<< xbytes/timestat.mean<<" "<< xbytes*timestat.err/(timestat.mean*timestat.mean)<< " "
<<xbytes/timestat.max <<" "<< xbytes/timestat.min
<< "\t\t"<<std::setw(7)<< bidibytes/timestat.mean<< " " << bidibytes*timestat.err/(timestat.mean*timestat.mean) << " "
<< bidibytes/timestat.max << " " << bidibytes/timestat.min << std::endl;
std::cout<<GridLogMessage << lat<<"\t\t"<<Ls<<"\t\t"<<bytes<<"\t\t"<<xbytes/time<<"\t\t"<<bidibytes/time<<std::endl;
} }
} }
Nloop=10;
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
std::cout<<GridLogMessage << "= Benchmarking concurrent STENCIL halo exchange in "<<nmu<<" dimensions"<<std::endl; std::cout<<GridLogMessage << "= Benchmarking concurrent STENCIL halo exchange in "<<nmu<<" dimensions"<<std::endl;
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
std::cout<<GridLogMessage << " L "<<"\t\t"<<" Ls "<<"\t\t"<<"bytes"<<"\t\t"<<"MB/s uni"<<"\t\t"<<"MB/s bidi"<<std::endl; header();
for(int lat=4;lat<=maxlat;lat+=4){ for(int lat=4;lat<=maxlat;lat+=4){
for(int Ls=8;Ls<=32;Ls*=2){ for(int Ls=8;Ls<=32;Ls*=2){
@ -221,8 +263,8 @@ int main (int argc, char ** argv)
int ncomm; int ncomm;
int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD); int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD);
double start=usecond();
for(int i=0;i<Nloop;i++){ for(int i=0;i<Nloop;i++){
double start=usecond();
std::vector<CartesianCommunicator::CommsRequest_t> requests; std::vector<CartesianCommunicator::CommsRequest_t> requests;
@ -258,28 +300,34 @@ int main (int argc, char ** argv)
} }
Grid.StencilSendToRecvFromComplete(requests); Grid.StencilSendToRecvFromComplete(requests);
Grid.Barrier(); Grid.Barrier();
double stop=usecond();
t_time[i] = stop-start; // microseconds
} }
double stop=usecond();
timestat.statistics(t_time);
double dbytes = bytes; double dbytes = bytes;
double xbytes = Nloop*dbytes*2.0*ncomm; double xbytes = dbytes*2.0*ncomm;
double rbytes = xbytes; double rbytes = xbytes;
double bidibytes = xbytes+rbytes; double bidibytes = xbytes+rbytes;
double time = stop-start; // microseconds std::cout<<GridLogMessage << std::setw(4) << lat<<"\t"<<Ls<<"\t"
<<std::setw(11) << bytes<< std::fixed << std::setprecision(1) << std::setw(7)
<<std::right<< xbytes/timestat.mean<<" "<< xbytes*timestat.err/(timestat.mean*timestat.mean)<< " "
<<xbytes/timestat.max <<" "<< xbytes/timestat.min
<< "\t\t"<<std::setw(7)<< bidibytes/timestat.mean<< " " << bidibytes*timestat.err/(timestat.mean*timestat.mean) << " "
<< bidibytes/timestat.max << " " << bidibytes/timestat.min << std::endl;
std::cout<<GridLogMessage << lat<<"\t\t"<<Ls<<"\t\t"<<bytes<<"\t\t"<<xbytes/time<<"\t\t"<<bidibytes/time<<std::endl;
} }
} }
Nloop=100;
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
std::cout<<GridLogMessage << "= Benchmarking sequential STENCIL halo exchange in "<<nmu<<" dimensions"<<std::endl; std::cout<<GridLogMessage << "= Benchmarking sequential STENCIL halo exchange in "<<nmu<<" dimensions"<<std::endl;
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
std::cout<<GridLogMessage << " L "<<"\t\t"<<" Ls "<<"\t\t"<<"bytes"<<"\t\t"<<"MB/s uni"<<"\t\t"<<"MB/s bidi"<<std::endl; header();
for(int lat=4;lat<=maxlat;lat+=4){ for(int lat=4;lat<=maxlat;lat+=4){
for(int Ls=8;Ls<=32;Ls*=2){ for(int Ls=8;Ls<=32;Ls*=2){
@ -302,8 +350,8 @@ int main (int argc, char ** argv)
int ncomm; int ncomm;
int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD); int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD);
double start=usecond();
for(int i=0;i<Nloop;i++){ for(int i=0;i<Nloop;i++){
double start=usecond();
std::vector<CartesianCommunicator::CommsRequest_t> requests; std::vector<CartesianCommunicator::CommsRequest_t> requests;
@ -341,19 +389,27 @@ int main (int argc, char ** argv)
} }
} }
Grid.Barrier(); Grid.Barrier();
double stop=usecond();
t_time[i] = stop-start; // microseconds
} }
double stop=usecond();
timestat.statistics(t_time);
double dbytes = bytes; double dbytes = bytes;
double xbytes = Nloop*dbytes*2.0*ncomm; double xbytes = dbytes*2.0*ncomm;
double rbytes = xbytes; double rbytes = xbytes;
double bidibytes = xbytes+rbytes; double bidibytes = xbytes+rbytes;
double time = stop-start; // microseconds
std::cout<<GridLogMessage << lat<<"\t\t"<<Ls<<"\t\t"<<bytes<<"\t\t"<<xbytes/time<<"\t\t"<<bidibytes/time<<std::endl; std::cout<<GridLogMessage << std::setw(4) << lat<<"\t"<<Ls<<"\t"
<<std::setw(11) << bytes<< std::fixed << std::setprecision(1) << std::setw(7)
<<std::right<< xbytes/timestat.mean<<" "<< xbytes*timestat.err/(timestat.mean*timestat.mean)<< " "
<<xbytes/timestat.max <<" "<< xbytes/timestat.min
<< "\t\t"<<std::setw(7)<< bidibytes/timestat.mean<< " " << bidibytes*timestat.err/(timestat.mean*timestat.mean) << " "
<< bidibytes/timestat.max << " " << bidibytes/timestat.min << std::endl;
} }
} }

View File

@ -1,28 +1,22 @@
/************************************************************************************* /*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid Grid physics library, www.github.com/paboyle/Grid
Source file: ./benchmarks/Benchmark_dwf.cc Source file: ./benchmarks/Benchmark_dwf.cc
Copyright (C) 2015 Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk> Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: paboyle <paboyle@ph.ed.ac.uk> Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or the Free Software Foundation; either version 2 of the License, or
(at your option) any later version. (at your option) any later version.
This program is distributed in the hope that it will be useful, This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details. GNU General Public License for more details.
You should have received a copy of the GNU General Public License along You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc., with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/ *************************************************************************************/
/* END LEGAL */ /* END LEGAL */
@ -151,9 +145,7 @@ int main (int argc, char ** argv)
RealD M5 =1.8; RealD M5 =1.8;
RealD NP = UGrid->_Nprocessors; RealD NP = UGrid->_Nprocessors;
RealD NN = UGrid->NodeCount();
std::cout << GridLogMessage << "Creating action operator " << std::endl;
DomainWallFermionR Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
std::cout << GridLogMessage<< "*****************************************************************" <<std::endl; std::cout << GridLogMessage<< "*****************************************************************" <<std::endl;
std::cout << GridLogMessage<< "* Kernel options --dslash-generic, --dslash-unroll, --dslash-asm" <<std::endl; std::cout << GridLogMessage<< "* Kernel options --dslash-generic, --dslash-unroll, --dslash-asm" <<std::endl;
@ -163,16 +155,22 @@ int main (int argc, char ** argv)
std::cout << GridLogMessage<< "* Vectorising space-time by "<<vComplex::Nsimd()<<std::endl; std::cout << GridLogMessage<< "* Vectorising space-time by "<<vComplex::Nsimd()<<std::endl;
if ( sizeof(Real)==4 ) std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl; if ( sizeof(Real)==4 ) std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl;
if ( sizeof(Real)==8 ) std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl; if ( sizeof(Real)==8 ) std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl;
#ifdef GRID_OMP
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute ) std::cout << GridLogMessage<< "* Using Overlapped Comms/Compute" <<std::endl;
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsThenCompute) std::cout << GridLogMessage<< "* Using sequential comms compute" <<std::endl;
#endif
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl; if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl;
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using Nc=3 WilsonKernels" <<std::endl; if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using Nc=3 WilsonKernels" <<std::endl;
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3 WilsonKernels" <<std::endl; if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3 WilsonKernels" <<std::endl;
std::cout << GridLogMessage<< "*****************************************************************" <<std::endl; std::cout << GridLogMessage<< "*****************************************************************" <<std::endl;
DomainWallFermionR Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
int ncall =1000; int ncall =1000;
if (1) { if (1) {
FGrid->Barrier(); FGrid->Barrier();
Dw.ZeroCounters(); Dw.ZeroCounters();
Dw.Dhop(src,result,0); Dw.Dhop(src,result,0);
std::cout<<GridLogMessage<<"Called warmup"<<std::endl;
double t0=usecond(); double t0=usecond();
for(int i=0;i<ncall;i++){ for(int i=0;i<ncall;i++){
__SSC_START; __SSC_START;
@ -190,6 +188,7 @@ int main (int argc, char ** argv)
// std::cout<<GridLogMessage << "norm ref "<< norm2(ref)<<std::endl; // std::cout<<GridLogMessage << "norm ref "<< norm2(ref)<<std::endl;
std::cout<<GridLogMessage << "mflop/s = "<< flops/(t1-t0)<<std::endl; std::cout<<GridLogMessage << "mflop/s = "<< flops/(t1-t0)<<std::endl;
std::cout<<GridLogMessage << "mflop/s per rank = "<< flops/(t1-t0)/NP<<std::endl; std::cout<<GridLogMessage << "mflop/s per rank = "<< flops/(t1-t0)/NP<<std::endl;
std::cout<<GridLogMessage << "mflop/s per node = "<< flops/(t1-t0)/NN<<std::endl;
err = ref-result; err = ref-result;
std::cout<<GridLogMessage << "norm diff "<< norm2(err)<<std::endl; std::cout<<GridLogMessage << "norm diff "<< norm2(err)<<std::endl;
@ -206,6 +205,34 @@ int main (int argc, char ** argv)
Dw.Report(); Dw.Report();
} }
DomainWallFermionRL DwH(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
if (1) {
FGrid->Barrier();
DwH.ZeroCounters();
DwH.Dhop(src,result,0);
double t0=usecond();
for(int i=0;i<ncall;i++){
__SSC_START;
DwH.Dhop(src,result,0);
__SSC_STOP;
}
double t1=usecond();
FGrid->Barrier();
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
double flops=1344*volume*ncall;
std::cout<<GridLogMessage << "Called half prec comms Dw "<<ncall<<" times in "<<t1-t0<<" us"<<std::endl;
std::cout<<GridLogMessage << "mflop/s = "<< flops/(t1-t0)<<std::endl;
std::cout<<GridLogMessage << "mflop/s per rank = "<< flops/(t1-t0)/NP<<std::endl;
std::cout<<GridLogMessage << "mflop/s per node = "<< flops/(t1-t0)/NN<<std::endl;
err = ref-result;
std::cout<<GridLogMessage << "norm diff "<< norm2(err)<<std::endl;
assert (norm2(err)< 1.0e-3 );
DwH.Report();
}
if (1) if (1)
{ {
@ -214,6 +241,10 @@ int main (int argc, char ** argv)
std::cout << GridLogMessage<< "* Vectorising fifth dimension by "<<vComplex::Nsimd()<<std::endl; std::cout << GridLogMessage<< "* Vectorising fifth dimension by "<<vComplex::Nsimd()<<std::endl;
if ( sizeof(Real)==4 ) std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl; if ( sizeof(Real)==4 ) std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl;
if ( sizeof(Real)==8 ) std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl; if ( sizeof(Real)==8 ) std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl;
#ifdef GRID_OMP
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute ) std::cout << GridLogMessage<< "* Using Overlapped Comms/Compute" <<std::endl;
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsThenCompute) std::cout << GridLogMessage<< "* Using sequential comms compute" <<std::endl;
#endif
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl; if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl;
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using Nc=3 WilsonKernels" <<std::endl; if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using Nc=3 WilsonKernels" <<std::endl;
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3 WilsonKernels" <<std::endl; if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3 WilsonKernels" <<std::endl;
@ -245,6 +276,7 @@ int main (int argc, char ** argv)
std::cout<<GridLogMessage << "Called Dw s_inner "<<ncall<<" times in "<<t1-t0<<" us"<<std::endl; std::cout<<GridLogMessage << "Called Dw s_inner "<<ncall<<" times in "<<t1-t0<<" us"<<std::endl;
std::cout<<GridLogMessage << "mflop/s = "<< flops/(t1-t0)<<std::endl; std::cout<<GridLogMessage << "mflop/s = "<< flops/(t1-t0)<<std::endl;
std::cout<<GridLogMessage << "mflop/s per rank = "<< flops/(t1-t0)/NP<<std::endl; std::cout<<GridLogMessage << "mflop/s per rank = "<< flops/(t1-t0)/NP<<std::endl;
std::cout<<GridLogMessage << "mflop/s per node = "<< flops/(t1-t0)/NN<<std::endl;
// std::cout<<GridLogMessage<< "res norms "<< norm2(result)<<" " <<norm2(sresult)<<std::endl; // std::cout<<GridLogMessage<< "res norms "<< norm2(result)<<" " <<norm2(sresult)<<std::endl;
sDw.Report(); sDw.Report();
RealD sum=0; RealD sum=0;
@ -277,6 +309,10 @@ int main (int argc, char ** argv)
std::cout << GridLogMessage<< "* Vectorising fifth dimension by "<<vComplex::Nsimd()<<std::endl; std::cout << GridLogMessage<< "* Vectorising fifth dimension by "<<vComplex::Nsimd()<<std::endl;
if ( sizeof(Real)==4 ) std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl; if ( sizeof(Real)==4 ) std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl;
if ( sizeof(Real)==8 ) std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl; if ( sizeof(Real)==8 ) std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl;
#ifdef GRID_OMP
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute ) std::cout << GridLogMessage<< "* Using Overlapped Comms/Compute" <<std::endl;
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsThenCompute) std::cout << GridLogMessage<< "* Using sequential comms compute" <<std::endl;
#endif
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric ) if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric )
std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl; std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl;
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll)
@ -316,6 +352,7 @@ int main (int argc, char ** argv)
std::cout<<GridLogMessage << "sDeo mflop/s = "<< flops/(t1-t0)<<std::endl; std::cout<<GridLogMessage << "sDeo mflop/s = "<< flops/(t1-t0)<<std::endl;
std::cout<<GridLogMessage << "sDeo mflop/s per rank "<< flops/(t1-t0)/NP<<std::endl; std::cout<<GridLogMessage << "sDeo mflop/s per rank "<< flops/(t1-t0)/NP<<std::endl;
std::cout<<GridLogMessage << "sDeo mflop/s per node "<< flops/(t1-t0)/NN<<std::endl;
sDw.Report(); sDw.Report();
sDw.DhopEO(ssrc_o,sr_e,DaggerNo); sDw.DhopEO(ssrc_o,sr_e,DaggerNo);
@ -394,14 +431,15 @@ int main (int argc, char ** argv)
// S-direction is INNERMOST and takes no part in the parity. // S-direction is INNERMOST and takes no part in the parity.
static int Opt; // these are a temporary hack
static int Comms; // these are a temporary hack
std::cout << GridLogMessage<< "*********************************************************" <<std::endl; std::cout << GridLogMessage<< "*********************************************************" <<std::endl;
std::cout << GridLogMessage<< "* Benchmarking DomainWallFermionR::DhopEO "<<std::endl; std::cout << GridLogMessage<< "* Benchmarking DomainWallFermionR::DhopEO "<<std::endl;
std::cout << GridLogMessage<< "* Vectorising space-time by "<<vComplex::Nsimd()<<std::endl; std::cout << GridLogMessage<< "* Vectorising space-time by "<<vComplex::Nsimd()<<std::endl;
if ( sizeof(Real)==4 ) std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl; if ( sizeof(Real)==4 ) std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl;
if ( sizeof(Real)==8 ) std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl; if ( sizeof(Real)==8 ) std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl;
#ifdef GRID_OMP
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute ) std::cout << GridLogMessage<< "* Using Overlapped Comms/Compute" <<std::endl;
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsThenCompute) std::cout << GridLogMessage<< "* Using sequential comms compute" <<std::endl;
#endif
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl; if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl;
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using Nc=3 WilsonKernels" <<std::endl; if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using Nc=3 WilsonKernels" <<std::endl;
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3 WilsonKernels" <<std::endl; if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3 WilsonKernels" <<std::endl;
@ -422,6 +460,7 @@ int main (int argc, char ** argv)
std::cout<<GridLogMessage << "Deo mflop/s = "<< flops/(t1-t0)<<std::endl; std::cout<<GridLogMessage << "Deo mflop/s = "<< flops/(t1-t0)<<std::endl;
std::cout<<GridLogMessage << "Deo mflop/s per rank "<< flops/(t1-t0)/NP<<std::endl; std::cout<<GridLogMessage << "Deo mflop/s per rank "<< flops/(t1-t0)/NP<<std::endl;
std::cout<<GridLogMessage << "Deo mflop/s per node "<< flops/(t1-t0)/NN<<std::endl;
Dw.Report(); Dw.Report();
} }
Dw.DhopEO(src_o,r_e,DaggerNo); Dw.DhopEO(src_o,r_e,DaggerNo);
@ -453,3 +492,4 @@ int main (int argc, char ** argv)
Grid_finalize(); Grid_finalize();
} }

View File

@ -35,8 +35,9 @@ using namespace Grid::QCD;
int main (int argc, char ** argv) int main (int argc, char ** argv)
{ {
Grid_init(&argc,&argv); Grid_init(&argc,&argv);
#define LMAX (32)
int Nloop=1000; int Nloop=200;
std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd()); std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
std::vector<int> mpi_layout = GridDefaultMpi(); std::vector<int> mpi_layout = GridDefaultMpi();
@ -50,7 +51,7 @@ int main (int argc, char ** argv)
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl; std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl;
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl; std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
for(int lat=2;lat<=32;lat+=2){ for(int lat=2;lat<=LMAX;lat+=2){
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
@ -82,7 +83,7 @@ int main (int argc, char ** argv)
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl; std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl;
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl; std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
for(int lat=2;lat<=32;lat+=2){ for(int lat=2;lat<=LMAX;lat+=2){
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
@ -113,7 +114,7 @@ int main (int argc, char ** argv)
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl; std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl;
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl; std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
for(int lat=2;lat<=32;lat+=2){ for(int lat=2;lat<=LMAX;lat+=2){
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
@ -144,7 +145,7 @@ int main (int argc, char ** argv)
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl; std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl;
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl; std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
for(int lat=2;lat<=32;lat+=2){ for(int lat=2;lat<=LMAX;lat+=2){
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];

View File

@ -1,11 +1,7 @@
include Make.inc include Make.inc
simple: simple_su3_test.o simple_su3_expr.o simple_simd_test.o bench-local: all
./Benchmark_su3
EXTRA_LIBRARIES = libsimple_su3_test.a libsimple_su3_expr.a libsimple_simd_test.a ./Benchmark_memory_bandwidth
./Benchmark_wilson
libsimple_su3_test_a_SOURCES = simple_su3_test.cc ./Benchmark_dwf --dslash-unroll
libsimple_su3_expr_a_SOURCES = simple_su3_expr.cc
libsimple_simd_test_a_SOURCES = simple_simd_test.cc

View File

@ -1,6 +1,6 @@
#!/usr/bin/env bash #!/usr/bin/env bash
EIGEN_URL='http://bitbucket.org/eigen/eigen/get/3.2.9.tar.bz2' EIGEN_URL='http://bitbucket.org/eigen/eigen/get/3.3.3.tar.bz2'
echo "-- deploying Eigen source..." echo "-- deploying Eigen source..."
wget ${EIGEN_URL} --no-check-certificate wget ${EIGEN_URL} --no-check-certificate

View File

@ -1,16 +1,19 @@
AC_PREREQ([2.63]) AC_PREREQ([2.63])
AC_INIT([Grid], [0.6.0], [https://github.com/paboyle/Grid], [Grid]) AC_INIT([Grid], [0.7.0], [https://github.com/paboyle/Grid], [Grid])
AC_CANONICAL_BUILD AC_CANONICAL_BUILD
AC_CANONICAL_HOST AC_CANONICAL_HOST
AC_CANONICAL_TARGET AC_CANONICAL_TARGET
AM_INIT_AUTOMAKE(subdir-objects) AM_INIT_AUTOMAKE([subdir-objects 1.13])
AM_EXTRA_RECURSIVE_TARGETS([tests bench])
AC_CONFIG_MACRO_DIR([m4]) AC_CONFIG_MACRO_DIR([m4])
AC_CONFIG_SRCDIR([lib/Grid.h]) AC_CONFIG_SRCDIR([lib/Grid.h])
AC_CONFIG_HEADERS([lib/Config.h],[sed -i 's|PACKAGE_|GRID_|' lib/Config.h]) AC_CONFIG_HEADERS([lib/Config.h],[sed -i 's|PACKAGE_|GRID_|' lib/Config.h])
m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])]) m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])])
################ Get git info
#AC_REVISION([m4_esyscmd_s([./scripts/configure.commit])])
############### Checks for programs ############### Checks for programs
CXXFLAGS="-O3 $CXXFLAGS"
AC_PROG_CXX AC_PROG_CXX
AC_PROG_RANLIB AC_PROG_RANLIB
@ -24,12 +27,15 @@ AX_GXX_VERSION
AC_DEFINE_UNQUOTED([GXX_VERSION],["$GXX_VERSION"], AC_DEFINE_UNQUOTED([GXX_VERSION],["$GXX_VERSION"],
[version of g++ that will compile the code]) [version of g++ that will compile the code])
CXXFLAGS="-O3 $CXXFLAGS"
############### Checks for typedefs, structures, and compiler characteristics ############### Checks for typedefs, structures, and compiler characteristics
AC_TYPE_SIZE_T AC_TYPE_SIZE_T
AC_TYPE_UINT32_T AC_TYPE_UINT32_T
AC_TYPE_UINT64_T AC_TYPE_UINT64_T
############### OpenMP ############### OpenMP
AC_OPENMP AC_OPENMP
ac_openmp=no ac_openmp=no
if test "${OPENMP_CXXFLAGS}X" != "X"; then if test "${OPENMP_CXXFLAGS}X" != "X"; then
@ -60,16 +66,23 @@ AC_ARG_WITH([mpfr],
[AM_CXXFLAGS="-I$with_mpfr/include $AM_CXXFLAGS"] [AM_CXXFLAGS="-I$with_mpfr/include $AM_CXXFLAGS"]
[AM_LDFLAGS="-L$with_mpfr/lib $AM_LDFLAGS"]) [AM_LDFLAGS="-L$with_mpfr/lib $AM_LDFLAGS"])
############### FFTW3 ############### FFTW3
AC_ARG_WITH([fftw], AC_ARG_WITH([fftw],
[AS_HELP_STRING([--with-fftw=prefix], [AS_HELP_STRING([--with-fftw=prefix],
[try this for a non-standard install prefix of the FFTW3 library])], [try this for a non-standard install prefix of the FFTW3 library])],
[AM_CXXFLAGS="-I$with_fftw/include $AM_CXXFLAGS"] [AM_CXXFLAGS="-I$with_fftw/include $AM_CXXFLAGS"]
[AM_LDFLAGS="-L$with_fftw/lib $AM_LDFLAGS"]) [AM_LDFLAGS="-L$with_fftw/lib $AM_LDFLAGS"])
############### lapack ############### LIME
AC_ARG_WITH([lime],
[AS_HELP_STRING([--with-lime=prefix],
[try this for a non-standard install prefix of the LIME library])],
[AM_CXXFLAGS="-I$with_lime/include $AM_CXXFLAGS"]
[AM_LDFLAGS="-L$with_lime/lib $AM_LDFLAGS"])
############### lapack
AC_ARG_ENABLE([lapack], AC_ARG_ENABLE([lapack],
[AC_HELP_STRING([--enable-lapack=yes|no|prefix], [enable LAPACK])], [AC_HELP_STRING([--enable-lapack=yes|no|prefix], [enable LAPACK])],
[ac_LAPACK=${enable_lapack}], [ac_LAPACK=no]) [ac_LAPACK=${enable_lapack}], [ac_LAPACK=no])
case ${ac_LAPACK} in case ${ac_LAPACK} in
@ -83,6 +96,18 @@ case ${ac_LAPACK} in
AC_DEFINE([USE_LAPACK],[1],[use LAPACK]);; AC_DEFINE([USE_LAPACK],[1],[use LAPACK]);;
esac esac
############### FP16 conversions
AC_ARG_ENABLE([sfw-fp16],
[AC_HELP_STRING([--enable-sfw-fp16=yes|no], [enable software fp16 comms])],
[ac_SFW_FP16=${enable_sfw_fp16}], [ac_SFW_FP16=yes])
case ${ac_SFW_FP16} in
yes)
AC_DEFINE([SFW_FP16],[1],[software conversion to fp16]);;
no);;
*)
AC_MSG_ERROR(["SFW FP16 option not supported ${ac_SFW_FP16}"]);;
esac
############### MKL ############### MKL
AC_ARG_ENABLE([mkl], AC_ARG_ENABLE([mkl],
[AC_HELP_STRING([--enable-mkl=yes|no|prefix], [enable Intel MKL for LAPACK & FFTW])], [AC_HELP_STRING([--enable-mkl=yes|no|prefix], [enable Intel MKL for LAPACK & FFTW])],
@ -108,7 +133,7 @@ AC_ARG_WITH([hdf5],
############### first-touch ############### first-touch
AC_ARG_ENABLE([numa], AC_ARG_ENABLE([numa],
[AC_HELP_STRING([--enable-numa=yes|no|prefix], [enable first touch numa opt])], [AC_HELP_STRING([--enable-numa=yes|no|prefix], [enable first touch numa opt])],
[ac_NUMA=${enable_NUMA}],[ac_NUMA=no]) [ac_NUMA=${enable_NUMA}],[ac_NUMA=no])
case ${ac_NUMA} in case ${ac_NUMA} in
@ -134,8 +159,8 @@ if test "${ac_MKL}x" != "nox"; then
fi fi
AC_SEARCH_LIBS([__gmpf_init], [gmp], AC_SEARCH_LIBS([__gmpf_init], [gmp],
[AC_SEARCH_LIBS([mpfr_init], [mpfr], [AC_SEARCH_LIBS([mpfr_init], [mpfr],
[AC_DEFINE([HAVE_LIBMPFR], [1], [AC_DEFINE([HAVE_LIBMPFR], [1],
[Define to 1 if you have the `MPFR' library])] [Define to 1 if you have the `MPFR' library])]
[have_mpfr=true], [AC_MSG_ERROR([MPFR library not found])])] [have_mpfr=true], [AC_MSG_ERROR([MPFR library not found])])]
[AC_DEFINE([HAVE_LIBGMP], [1], [Define to 1 if you have the `GMP' library])] [AC_DEFINE([HAVE_LIBGMP], [1], [Define to 1 if you have the `GMP' library])]
@ -144,7 +169,7 @@ AC_SEARCH_LIBS([__gmpf_init], [gmp],
if test "${ac_LAPACK}x" != "nox"; then if test "${ac_LAPACK}x" != "nox"; then
AC_SEARCH_LIBS([LAPACKE_sbdsdc], [lapack], [], AC_SEARCH_LIBS([LAPACKE_sbdsdc], [lapack], [],
[AC_MSG_ERROR("LAPACK enabled but library not found")]) [AC_MSG_ERROR("LAPACK enabled but library not found")])
fi fi
AC_SEARCH_LIBS([fftw_execute], [fftw3], AC_SEARCH_LIBS([fftw_execute], [fftw3],
[AC_SEARCH_LIBS([fftwf_execute], [fftw3f], [], [AC_SEARCH_LIBS([fftwf_execute], [fftw3f], [],
@ -152,6 +177,14 @@ AC_SEARCH_LIBS([fftw_execute], [fftw3],
[AC_DEFINE([HAVE_FFTW], [1], [Define to 1 if you have the `FFTW' library])] [AC_DEFINE([HAVE_FFTW], [1], [Define to 1 if you have the `FFTW' library])]
[have_fftw=true]) [have_fftw=true])
AC_SEARCH_LIBS([limeCreateReader], [lime],
[AC_DEFINE([HAVE_LIME], [1], [Define to 1 if you have the `LIME' library])]
[have_lime=true],
[AC_MSG_WARN(C-LIME library was not found in your system.
In order to use ILGG file format please install or provide the correct path to your installation
Info at: http://usqcd.jlab.org/usqcd-docs/c-lime/)])
AC_SEARCH_LIBS([H5Fopen], [hdf5_cpp], AC_SEARCH_LIBS([H5Fopen], [hdf5_cpp],
[AC_DEFINE([HAVE_HDF5], [1], [Define to 1 if you have the `HDF5' library])] [AC_DEFINE([HAVE_HDF5], [1], [Define to 1 if you have the `HDF5' library])]
[have_hdf5=true] [have_hdf5=true]
@ -176,19 +209,26 @@ case ${ax_cv_cxx_compiler_vendor} in
case ${ac_SIMD} in case ${ac_SIMD} in
SSE4) SSE4)
AC_DEFINE([SSE4],[1],[SSE4 intrinsics]) AC_DEFINE([SSE4],[1],[SSE4 intrinsics])
SIMD_FLAGS='-msse4.2';; case ${ac_SFW_FP16} in
yes)
SIMD_FLAGS='-msse4.2';;
no)
SIMD_FLAGS='-msse4.2 -mf16c';;
*)
AC_MSG_ERROR(["SFW_FP16 must be either yes or no value ${ac_SFW_FP16} "]);;
esac;;
AVX) AVX)
AC_DEFINE([AVX1],[1],[AVX intrinsics]) AC_DEFINE([AVX1],[1],[AVX intrinsics])
SIMD_FLAGS='-mavx';; SIMD_FLAGS='-mavx -mf16c';;
AVXFMA4) AVXFMA4)
AC_DEFINE([AVXFMA4],[1],[AVX intrinsics with FMA4]) AC_DEFINE([AVXFMA4],[1],[AVX intrinsics with FMA4])
SIMD_FLAGS='-mavx -mfma4';; SIMD_FLAGS='-mavx -mfma4 -mf16c';;
AVXFMA) AVXFMA)
AC_DEFINE([AVXFMA],[1],[AVX intrinsics with FMA3]) AC_DEFINE([AVXFMA],[1],[AVX intrinsics with FMA3])
SIMD_FLAGS='-mavx -mfma';; SIMD_FLAGS='-mavx -mfma -mf16c';;
AVX2) AVX2)
AC_DEFINE([AVX2],[1],[AVX2 intrinsics]) AC_DEFINE([AVX2],[1],[AVX2 intrinsics])
SIMD_FLAGS='-mavx2 -mfma';; SIMD_FLAGS='-mavx2 -mfma -mf16c';;
AVX512) AVX512)
AC_DEFINE([AVX512],[1],[AVX512 intrinsics]) AC_DEFINE([AVX512],[1],[AVX512 intrinsics])
SIMD_FLAGS='-mavx512f -mavx512pf -mavx512er -mavx512cd';; SIMD_FLAGS='-mavx512f -mavx512pf -mavx512er -mavx512cd';;
@ -297,7 +337,7 @@ case ${ac_COMMS} in
comms_type='shmem' comms_type='shmem'
;; ;;
*) *)
AC_MSG_ERROR([${ac_COMMS} unsupported --enable-comms option]); AC_MSG_ERROR([${ac_COMMS} unsupported --enable-comms option]);
;; ;;
esac esac
case ${ac_COMMS} in case ${ac_COMMS} in
@ -334,7 +374,7 @@ case ${ac_RNG} in
AC_DEFINE([RNG_SITMO],[1],[RNG_SITMO] ) AC_DEFINE([RNG_SITMO],[1],[RNG_SITMO] )
;; ;;
*) *)
AC_MSG_ERROR([${ac_RNG} unsupported --enable-rng option]); AC_MSG_ERROR([${ac_RNG} unsupported --enable-rng option]);
;; ;;
esac esac
@ -351,7 +391,7 @@ case ${ac_TIMERS} in
AC_DEFINE([TIMERS_OFF],[1],[TIMERS_OFF] ) AC_DEFINE([TIMERS_OFF],[1],[TIMERS_OFF] )
;; ;;
*) *)
AC_MSG_ERROR([${ac_TIMERS} unsupported --enable-timers option]); AC_MSG_ERROR([${ac_TIMERS} unsupported --enable-timers option]);
;; ;;
esac esac
@ -363,7 +403,7 @@ case ${ac_CHROMA} in
yes|no) yes|no)
;; ;;
*) *)
AC_MSG_ERROR([${ac_CHROMA} unsupported --enable-chroma option]); AC_MSG_ERROR([${ac_CHROMA} unsupported --enable-chroma option]);
;; ;;
esac esac
@ -384,12 +424,65 @@ DX_INIT_DOXYGEN([$PACKAGE_NAME], [doxygen.cfg])
############### Ouput ############### Ouput
cwd=`pwd -P`; cd ${srcdir}; abs_srcdir=`pwd -P`; cd ${cwd} cwd=`pwd -P`; cd ${srcdir}; abs_srcdir=`pwd -P`; cd ${cwd}
GRID_CXXFLAGS="$AM_CXXFLAGS $CXXFLAGS"
GRID_LDFLAGS="$AM_LDFLAGS $LDFLAGS"
GRID_LIBS=$LIBS
GRID_SHORT_SHA=`git rev-parse --short HEAD`
GRID_SHA=`git rev-parse HEAD`
GRID_BRANCH=`git rev-parse --abbrev-ref HEAD`
AM_CXXFLAGS="-I${abs_srcdir}/include $AM_CXXFLAGS" AM_CXXFLAGS="-I${abs_srcdir}/include $AM_CXXFLAGS"
AM_CFLAGS="-I${abs_srcdir}/include $AM_CFLAGS" AM_CFLAGS="-I${abs_srcdir}/include $AM_CFLAGS"
AM_LDFLAGS="-L${cwd}/lib $AM_LDFLAGS" AM_LDFLAGS="-L${cwd}/lib $AM_LDFLAGS"
AC_SUBST([AM_CFLAGS]) AC_SUBST([AM_CFLAGS])
AC_SUBST([AM_CXXFLAGS]) AC_SUBST([AM_CXXFLAGS])
AC_SUBST([AM_LDFLAGS]) AC_SUBST([AM_LDFLAGS])
AC_SUBST([GRID_CXXFLAGS])
AC_SUBST([GRID_LDFLAGS])
AC_SUBST([GRID_LIBS])
AC_SUBST([GRID_SHA])
AC_SUBST([GRID_BRANCH])
git_commit=`cd $srcdir && ./scripts/configure.commit`
echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Summary of configuration for $PACKAGE v$VERSION
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
----- GIT VERSION -------------------------------------
$git_commit
----- PLATFORM ----------------------------------------
architecture (build) : $build_cpu
os (build) : $build_os
architecture (target) : $target_cpu
os (target) : $target_os
compiler vendor : ${ax_cv_cxx_compiler_vendor}
compiler version : ${ax_cv_gxx_version}
----- BUILD OPTIONS -----------------------------------
SIMD : ${ac_SIMD}${SIMD_GEN_WIDTH_MSG}
Threading : ${ac_openmp}
Communications type : ${comms_type}
Default precision : ${ac_PRECISION}
Software FP16 conversion : ${ac_SFW_FP16}
RNG choice : ${ac_RNG}
GMP : `if test "x$have_gmp" = xtrue; then echo yes; else echo no; fi`
LAPACK : ${ac_LAPACK}
FFTW : `if test "x$have_fftw" = xtrue; then echo yes; else echo no; fi`
LIME (ILDG support) : `if test "x$have_lime" = xtrue; then echo yes; else echo no; fi`
HDF5 : `if test "x$have_hdf5" = xtrue; then echo yes; else echo no; fi`
build DOXYGEN documentation : `if test "$DX_FLAG_doc" = '1'; then echo yes; else echo no; fi`
----- BUILD FLAGS -------------------------------------
CXXFLAGS:
`echo ${AM_CXXFLAGS} ${CXXFLAGS} | tr ' ' '\n' | sed 's/^-/ -/g'`
LDFLAGS:
`echo ${AM_LDFLAGS} ${LDFLAGS} | tr ' ' '\n' | sed 's/^-/ -/g'`
LIBS:
`echo ${LIBS} | tr ' ' '\n' | sed 's/^-/ -/g'`
-------------------------------------------------------" > grid.configure.summary
GRID_SUMMARY="`cat grid.configure.summary`"
AM_SUBST_NOTMAKE([GRID_SUMMARY])
AC_SUBST([GRID_SUMMARY])
AC_CONFIG_FILES([grid-config], [chmod +x grid-config])
AC_CONFIG_FILES(Makefile) AC_CONFIG_FILES(Makefile)
AC_CONFIG_FILES(lib/Makefile) AC_CONFIG_FILES(lib/Makefile)
AC_CONFIG_FILES(tests/Makefile) AC_CONFIG_FILES(tests/Makefile)
@ -400,44 +493,15 @@ AC_CONFIG_FILES(tests/forces/Makefile)
AC_CONFIG_FILES(tests/hadrons/Makefile) AC_CONFIG_FILES(tests/hadrons/Makefile)
AC_CONFIG_FILES(tests/hmc/Makefile) AC_CONFIG_FILES(tests/hmc/Makefile)
AC_CONFIG_FILES(tests/solver/Makefile) AC_CONFIG_FILES(tests/solver/Makefile)
AC_CONFIG_FILES(tests/smearing/Makefile)
AC_CONFIG_FILES(tests/qdpxx/Makefile) AC_CONFIG_FILES(tests/qdpxx/Makefile)
AC_CONFIG_FILES(tests/testu01/Makefile) AC_CONFIG_FILES(tests/testu01/Makefile)
AC_CONFIG_FILES(benchmarks/Makefile) AC_CONFIG_FILES(benchmarks/Makefile)
AC_CONFIG_FILES(extras/Makefile) AC_CONFIG_FILES(extras/Makefile)
AC_CONFIG_FILES(extras/Hadrons/Makefile) AC_CONFIG_FILES(extras/Hadrons/Makefile)
AC_CONFIG_FILES(extras/qed-fvol/Makefile)
AC_OUTPUT AC_OUTPUT
echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ echo ""
Summary of configuration for $PACKAGE v$VERSION cat grid.configure.summary
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ echo ""
----- PLATFORM ----------------------------------------
architecture (build) : $build_cpu
os (build) : $build_os
architecture (target) : $target_cpu
os (target) : $target_os
compiler vendor : ${ax_cv_cxx_compiler_vendor}
compiler version : ${ax_cv_gxx_version}
----- BUILD OPTIONS -----------------------------------
SIMD : ${ac_SIMD}${SIMD_GEN_WIDTH_MSG}
Threading : ${ac_openmp}
Communications type : ${comms_type}
Default precision : ${ac_PRECISION}
RNG choice : ${ac_RNG}
GMP : `if test "x$have_gmp" = xtrue; then echo yes; else echo no; fi`
LAPACK : ${ac_LAPACK}
FFTW : `if test "x$have_fftw" = xtrue; then echo yes; else echo no; fi`
HDF5 : `if test "x$have_hdf5" = xtrue; then echo yes; else echo no; fi`
build DOXYGEN documentation : `if test "$DX_FLAG_doc" = '1'; then echo yes; else echo no; fi`
----- BUILD FLAGS -------------------------------------
CXXFLAGS:
`echo ${AM_CXXFLAGS} ${CXXFLAGS} | tr ' ' '\n' | sed 's/^-/ -/g'`
LDFLAGS:
`echo ${AM_LDFLAGS} ${LDFLAGS} | tr ' ' '\n' | sed 's/^-/ -/g'`
LIBS:
`echo ${LIBS} | tr ' ' '\n' | sed 's/^-/ -/g'`
-------------------------------------------------------" > config.summary
echo ""
cat config.summary
echo ""

View File

@ -162,7 +162,8 @@ void Application::saveParameterFile(const std::string parameterFileName)
sizeString((size)*locVol_) << " (" << sizeString(size) << "/site)" sizeString((size)*locVol_) << " (" << sizeString(size) << "/site)"
#define DEFINE_MEMPEAK \ #define DEFINE_MEMPEAK \
auto memPeak = [this](const std::vector<unsigned int> &program)\ GeneticScheduler<unsigned int>::ObjFunc memPeak = \
[this](const std::vector<unsigned int> &program)\
{\ {\
unsigned int memPeak;\ unsigned int memPeak;\
bool msg;\ bool msg;\

View File

@ -41,9 +41,10 @@ using namespace Hadrons;
// constructor ///////////////////////////////////////////////////////////////// // constructor /////////////////////////////////////////////////////////////////
Environment::Environment(void) Environment::Environment(void)
{ {
nd_ = GridDefaultLatt().size(); dim_ = GridDefaultLatt();
nd_ = dim_.size();
grid4d_.reset(SpaceTimeGrid::makeFourDimGrid( grid4d_.reset(SpaceTimeGrid::makeFourDimGrid(
GridDefaultLatt(), GridDefaultSimd(nd_, vComplex::Nsimd()), dim_, GridDefaultSimd(nd_, vComplex::Nsimd()),
GridDefaultMpi())); GridDefaultMpi()));
gridRb4d_.reset(SpaceTimeGrid::makeFourDimRedBlackGrid(grid4d_.get())); gridRb4d_.reset(SpaceTimeGrid::makeFourDimRedBlackGrid(grid4d_.get()));
auto loc = getGrid()->LocalDimensions(); auto loc = getGrid()->LocalDimensions();
@ -132,6 +133,16 @@ unsigned int Environment::getNd(void) const
return nd_; return nd_;
} }
std::vector<int> Environment::getDim(void) const
{
return dim_;
}
int Environment::getDim(const unsigned int mu) const
{
return dim_[mu];
}
// random number generator ///////////////////////////////////////////////////// // random number generator /////////////////////////////////////////////////////
void Environment::setSeed(const std::vector<int> &seed) void Environment::setSeed(const std::vector<int> &seed)
{ {
@ -271,6 +282,21 @@ std::string Environment::getModuleType(const std::string name) const
return getModuleType(getModuleAddress(name)); return getModuleType(getModuleAddress(name));
} }
std::string Environment::getModuleNamespace(const unsigned int address) const
{
std::string type = getModuleType(address), ns;
auto pos2 = type.rfind("::");
auto pos1 = type.rfind("::", pos2 - 2);
return type.substr(pos1 + 2, pos2 - pos1 - 2);
}
std::string Environment::getModuleNamespace(const std::string name) const
{
return getModuleNamespace(getModuleAddress(name));
}
bool Environment::hasModule(const unsigned int address) const bool Environment::hasModule(const unsigned int address) const
{ {
return (address < module_.size()); return (address < module_.size());
@ -492,7 +518,14 @@ std::string Environment::getObjectType(const unsigned int address) const
{ {
if (hasRegisteredObject(address)) if (hasRegisteredObject(address))
{ {
return typeName(object_[address].type); if (object_[address].type)
{
return typeName(object_[address].type);
}
else
{
return "<no type>";
}
} }
else if (hasObject(address)) else if (hasObject(address))
{ {
@ -532,6 +565,23 @@ Environment::Size Environment::getObjectSize(const std::string name) const
return getObjectSize(getObjectAddress(name)); return getObjectSize(getObjectAddress(name));
} }
unsigned int Environment::getObjectModule(const unsigned int address) const
{
if (hasObject(address))
{
return object_[address].module;
}
else
{
HADRON_ERROR("no object with address " + std::to_string(address));
}
}
unsigned int Environment::getObjectModule(const std::string name) const
{
return getObjectModule(getObjectAddress(name));
}
unsigned int Environment::getObjectLs(const unsigned int address) const unsigned int Environment::getObjectLs(const unsigned int address) const
{ {
if (hasRegisteredObject(address)) if (hasRegisteredObject(address))

View File

@ -106,6 +106,8 @@ public:
void createGrid(const unsigned int Ls); void createGrid(const unsigned int Ls);
GridCartesian * getGrid(const unsigned int Ls = 1) const; GridCartesian * getGrid(const unsigned int Ls = 1) const;
GridRedBlackCartesian * getRbGrid(const unsigned int Ls = 1) const; GridRedBlackCartesian * getRbGrid(const unsigned int Ls = 1) const;
std::vector<int> getDim(void) const;
int getDim(const unsigned int mu) const;
unsigned int getNd(void) const; unsigned int getNd(void) const;
// random number generator // random number generator
void setSeed(const std::vector<int> &seed); void setSeed(const std::vector<int> &seed);
@ -131,6 +133,8 @@ public:
std::string getModuleName(const unsigned int address) const; std::string getModuleName(const unsigned int address) const;
std::string getModuleType(const unsigned int address) const; std::string getModuleType(const unsigned int address) const;
std::string getModuleType(const std::string name) const; std::string getModuleType(const std::string name) const;
std::string getModuleNamespace(const unsigned int address) const;
std::string getModuleNamespace(const std::string name) const;
bool hasModule(const unsigned int address) const; bool hasModule(const unsigned int address) const;
bool hasModule(const std::string name) const; bool hasModule(const std::string name) const;
Graph<unsigned int> makeModuleGraph(void) const; Graph<unsigned int> makeModuleGraph(void) const;
@ -171,6 +175,8 @@ public:
std::string getObjectType(const std::string name) const; std::string getObjectType(const std::string name) const;
Size getObjectSize(const unsigned int address) const; Size getObjectSize(const unsigned int address) const;
Size getObjectSize(const std::string name) const; Size getObjectSize(const std::string name) const;
unsigned int getObjectModule(const unsigned int address) const;
unsigned int getObjectModule(const std::string name) const;
unsigned int getObjectLs(const unsigned int address) const; unsigned int getObjectLs(const unsigned int address) const;
unsigned int getObjectLs(const std::string name) const; unsigned int getObjectLs(const std::string name) const;
bool hasObject(const unsigned int address) const; bool hasObject(const unsigned int address) const;
@ -181,6 +187,10 @@ public:
bool hasCreatedObject(const std::string name) const; bool hasCreatedObject(const std::string name) const;
bool isObject5d(const unsigned int address) const; bool isObject5d(const unsigned int address) const;
bool isObject5d(const std::string name) const; bool isObject5d(const std::string name) const;
template <typename T>
bool isObjectOfType(const unsigned int address) const;
template <typename T>
bool isObjectOfType(const std::string name) const;
Environment::Size getTotalSize(void) const; Environment::Size getTotalSize(void) const;
void addOwnership(const unsigned int owner, void addOwnership(const unsigned int owner,
const unsigned int property); const unsigned int property);
@ -197,6 +207,7 @@ private:
bool dryRun_{false}; bool dryRun_{false};
unsigned int traj_, locVol_; unsigned int traj_, locVol_;
// grids // grids
std::vector<int> dim_;
GridPt grid4d_; GridPt grid4d_;
std::map<unsigned int, GridPt> grid5d_; std::map<unsigned int, GridPt> grid5d_;
GridRbPt gridRb4d_; GridRbPt gridRb4d_;
@ -343,7 +354,7 @@ T * Environment::getObject(const unsigned int address) const
else else
{ {
HADRON_ERROR("object with address " + std::to_string(address) + HADRON_ERROR("object with address " + std::to_string(address) +
" does not have type '" + typeid(T).name() + " does not have type '" + typeName(&typeid(T)) +
"' (has type '" + getObjectType(address) + "')"); "' (has type '" + getObjectType(address) + "')");
} }
} }
@ -380,6 +391,37 @@ T * Environment::createLattice(const std::string name)
return createLattice<T>(getObjectAddress(name)); return createLattice<T>(getObjectAddress(name));
} }
template <typename T>
bool Environment::isObjectOfType(const unsigned int address) const
{
if (hasRegisteredObject(address))
{
if (auto h = dynamic_cast<Holder<T> *>(object_[address].data.get()))
{
return true;
}
else
{
return false;
}
}
else if (hasObject(address))
{
HADRON_ERROR("object with address " + std::to_string(address) +
" exists but is not registered");
}
else
{
HADRON_ERROR("no object with address " + std::to_string(address));
}
}
template <typename T>
bool Environment::isObjectOfType(const std::string name) const
{
return isObjectOfType<T>(getObjectAddress(name));
}
END_HADRONS_NAMESPACE END_HADRONS_NAMESPACE
#endif // Hadrons_Environment_hpp_ #endif // Hadrons_Environment_hpp_

View File

@ -55,7 +55,7 @@ using Grid::operator<<;
#define FIMPL WilsonImplR #define FIMPL WilsonImplR
#endif #endif
#ifndef SIMPL #ifndef SIMPL
#define SIMPL ScalarImplR #define SIMPL ScalarImplCR
#endif #endif
BEGIN_HADRONS_NAMESPACE BEGIN_HADRONS_NAMESPACE
@ -65,20 +65,25 @@ BEGIN_HADRONS_NAMESPACE
typedef FermionOperator<FImpl> FMat##suffix; \ typedef FermionOperator<FImpl> FMat##suffix; \
typedef typename FImpl::FermionField FermionField##suffix; \ typedef typename FImpl::FermionField FermionField##suffix; \
typedef typename FImpl::PropagatorField PropagatorField##suffix; \ typedef typename FImpl::PropagatorField PropagatorField##suffix; \
typedef typename FImpl::SitePropagator SitePropagator##suffix; typedef typename FImpl::SitePropagator SitePropagator##suffix; \
typedef std::vector<typename FImpl::SitePropagator::scalar_object> \
SlicedPropagator##suffix;
#define GAUGE_TYPE_ALIASES(FImpl, suffix)\ #define GAUGE_TYPE_ALIASES(FImpl, suffix)\
typedef typename FImpl::DoubledGaugeField DoubledGaugeField##suffix; typedef typename FImpl::DoubledGaugeField DoubledGaugeField##suffix;
#define SCALAR_TYPE_ALIASES(SImpl, suffix)\ #define SCALAR_TYPE_ALIASES(SImpl, suffix)\
typedef typename SImpl::ScalarField ScalarField##suffix;\ typedef typename SImpl::Field ScalarField##suffix;\
typedef typename SImpl::PropagatorField PropagatorField##suffix; typedef typename SImpl::Field PropagatorField##suffix;
#define SOLVER_TYPE_ALIASES(FImpl, suffix)\ #define SOLVER_TYPE_ALIASES(FImpl, suffix)\
typedef std::function<void(FermionField##suffix &,\ typedef std::function<void(FermionField##suffix &,\
const FermionField##suffix &)> SolverFn##suffix; const FermionField##suffix &)> SolverFn##suffix;
#define TYPE_ALIASES(FImpl, suffix)\ #define SINK_TYPE_ALIASES(suffix)\
typedef std::function<SlicedPropagator##suffix(const PropagatorField##suffix &)> SinkFn##suffix;
#define FGS_TYPE_ALIASES(FImpl, suffix)\
FERM_TYPE_ALIASES(FImpl, suffix)\ FERM_TYPE_ALIASES(FImpl, suffix)\
GAUGE_TYPE_ALIASES(FImpl, suffix)\ GAUGE_TYPE_ALIASES(FImpl, suffix)\
SOLVER_TYPE_ALIASES(FImpl, suffix) SOLVER_TYPE_ALIASES(FImpl, suffix)

View File

@ -1,10 +1,13 @@
#include <Grid/Hadrons/Modules/MAction/DWF.hpp> #include <Grid/Hadrons/Modules/MAction/DWF.hpp>
#include <Grid/Hadrons/Modules/MAction/Wilson.hpp> #include <Grid/Hadrons/Modules/MAction/Wilson.hpp>
#include <Grid/Hadrons/Modules/MContraction/Baryon.hpp> #include <Grid/Hadrons/Modules/MContraction/Baryon.hpp>
#include <Grid/Hadrons/Modules/MContraction/DiscLoop.hpp>
#include <Grid/Hadrons/Modules/MContraction/Gamma3pt.hpp>
#include <Grid/Hadrons/Modules/MContraction/Meson.hpp> #include <Grid/Hadrons/Modules/MContraction/Meson.hpp>
#include <Grid/Hadrons/Modules/MContraction/WeakHamiltonian.hpp> #include <Grid/Hadrons/Modules/MContraction/WeakHamiltonian.hpp>
#include <Grid/Hadrons/Modules/MContraction/WeakHamiltonianEye.hpp> #include <Grid/Hadrons/Modules/MContraction/WeakHamiltonianEye.hpp>
#include <Grid/Hadrons/Modules/MContraction/WeakHamiltonianNonEye.hpp> #include <Grid/Hadrons/Modules/MContraction/WeakHamiltonianNonEye.hpp>
#include <Grid/Hadrons/Modules/MContraction/WeakNeutral4ptDisc.hpp>
#include <Grid/Hadrons/Modules/MGauge/Load.hpp> #include <Grid/Hadrons/Modules/MGauge/Load.hpp>
#include <Grid/Hadrons/Modules/MGauge/Random.hpp> #include <Grid/Hadrons/Modules/MGauge/Random.hpp>
#include <Grid/Hadrons/Modules/MGauge/StochEm.hpp> #include <Grid/Hadrons/Modules/MGauge/StochEm.hpp>
@ -14,6 +17,7 @@
#include <Grid/Hadrons/Modules/MScalar/FreeProp.hpp> #include <Grid/Hadrons/Modules/MScalar/FreeProp.hpp>
#include <Grid/Hadrons/Modules/MScalar/Scalar.hpp> #include <Grid/Hadrons/Modules/MScalar/Scalar.hpp>
#include <Grid/Hadrons/Modules/MScalar/ScalarVP.hpp> #include <Grid/Hadrons/Modules/MScalar/ScalarVP.hpp>
#include <Grid/Hadrons/Modules/MSink/Point.hpp>
#include <Grid/Hadrons/Modules/MSolver/RBPrecCG.hpp> #include <Grid/Hadrons/Modules/MSolver/RBPrecCG.hpp>
#include <Grid/Hadrons/Modules/MSource/Point.hpp> #include <Grid/Hadrons/Modules/MSource/Point.hpp>
#include <Grid/Hadrons/Modules/MSource/SeqGamma.hpp> #include <Grid/Hadrons/Modules/MSource/SeqGamma.hpp>

View File

@ -27,8 +27,8 @@ See the full license in the file "LICENSE" in the top level distribution directo
*************************************************************************************/ *************************************************************************************/
/* END LEGAL */ /* END LEGAL */
#ifndef Hadrons_DWF_hpp_ #ifndef Hadrons_MAction_DWF_hpp_
#define Hadrons_DWF_hpp_ #define Hadrons_MAction_DWF_hpp_
#include <Grid/Hadrons/Global.hpp> #include <Grid/Hadrons/Global.hpp>
#include <Grid/Hadrons/Module.hpp> #include <Grid/Hadrons/Module.hpp>
@ -48,14 +48,15 @@ public:
std::string, gauge, std::string, gauge,
unsigned int, Ls, unsigned int, Ls,
double , mass, double , mass,
double , M5); double , M5,
std::string , boundary);
}; };
template <typename FImpl> template <typename FImpl>
class TDWF: public Module<DWFPar> class TDWF: public Module<DWFPar>
{ {
public: public:
TYPE_ALIASES(FImpl,); FGS_TYPE_ALIASES(FImpl,);
public: public:
// constructor // constructor
TDWF(const std::string name); TDWF(const std::string name);
@ -116,14 +117,19 @@ void TDWF<FImpl>::execute(void)
<< par().mass << ", M5= " << par().M5 << " and Ls= " << par().mass << ", M5= " << par().M5 << " and Ls= "
<< par().Ls << " using gauge field '" << par().gauge << "'" << par().Ls << " using gauge field '" << par().gauge << "'"
<< std::endl; << std::endl;
LOG(Message) << "Fermion boundary conditions: " << par().boundary
<< std::endl;
env().createGrid(par().Ls); env().createGrid(par().Ls);
auto &U = *env().template getObject<LatticeGaugeField>(par().gauge); auto &U = *env().template getObject<LatticeGaugeField>(par().gauge);
auto &g4 = *env().getGrid(); auto &g4 = *env().getGrid();
auto &grb4 = *env().getRbGrid(); auto &grb4 = *env().getRbGrid();
auto &g5 = *env().getGrid(par().Ls); auto &g5 = *env().getGrid(par().Ls);
auto &grb5 = *env().getRbGrid(par().Ls); auto &grb5 = *env().getRbGrid(par().Ls);
std::vector<Complex> boundary = strToVec<Complex>(par().boundary);
typename DomainWallFermion<FImpl>::ImplParams implParams(boundary);
FMat *fMatPt = new DomainWallFermion<FImpl>(U, g5, grb5, g4, grb4, FMat *fMatPt = new DomainWallFermion<FImpl>(U, g5, grb5, g4, grb4,
par().mass, par().M5); par().mass, par().M5,
implParams);
env().setObject(getName(), fMatPt); env().setObject(getName(), fMatPt);
} }
@ -131,4 +137,4 @@ END_MODULE_NAMESPACE
END_HADRONS_NAMESPACE END_HADRONS_NAMESPACE
#endif // Hadrons_DWF_hpp_ #endif // Hadrons_MAction_DWF_hpp_

View File

@ -27,8 +27,8 @@ See the full license in the file "LICENSE" in the top level distribution directo
*************************************************************************************/ *************************************************************************************/
/* END LEGAL */ /* END LEGAL */
#ifndef Hadrons_Wilson_hpp_ #ifndef Hadrons_MAction_Wilson_hpp_
#define Hadrons_Wilson_hpp_ #define Hadrons_MAction_Wilson_hpp_
#include <Grid/Hadrons/Global.hpp> #include <Grid/Hadrons/Global.hpp>
#include <Grid/Hadrons/Module.hpp> #include <Grid/Hadrons/Module.hpp>
@ -46,14 +46,15 @@ class WilsonPar: Serializable
public: public:
GRID_SERIALIZABLE_CLASS_MEMBERS(WilsonPar, GRID_SERIALIZABLE_CLASS_MEMBERS(WilsonPar,
std::string, gauge, std::string, gauge,
double , mass); double , mass,
std::string, boundary);
}; };
template <typename FImpl> template <typename FImpl>
class TWilson: public Module<WilsonPar> class TWilson: public Module<WilsonPar>
{ {
public: public:
TYPE_ALIASES(FImpl,); FGS_TYPE_ALIASES(FImpl,);
public: public:
// constructor // constructor
TWilson(const std::string name); TWilson(const std::string name);
@ -112,10 +113,15 @@ void TWilson<FImpl>::execute()
{ {
LOG(Message) << "Setting up TWilson fermion matrix with m= " << par().mass LOG(Message) << "Setting up TWilson fermion matrix with m= " << par().mass
<< " using gauge field '" << par().gauge << "'" << std::endl; << " using gauge field '" << par().gauge << "'" << std::endl;
LOG(Message) << "Fermion boundary conditions: " << par().boundary
<< std::endl;
auto &U = *env().template getObject<LatticeGaugeField>(par().gauge); auto &U = *env().template getObject<LatticeGaugeField>(par().gauge);
auto &grid = *env().getGrid(); auto &grid = *env().getGrid();
auto &gridRb = *env().getRbGrid(); auto &gridRb = *env().getRbGrid();
FMat *fMatPt = new WilsonFermion<FImpl>(U, grid, gridRb, par().mass); std::vector<Complex> boundary = strToVec<Complex>(par().boundary);
typename WilsonFermion<FImpl>::ImplParams implParams(boundary);
FMat *fMatPt = new WilsonFermion<FImpl>(U, grid, gridRb, par().mass,
implParams);
env().setObject(getName(), fMatPt); env().setObject(getName(), fMatPt);
} }

View File

@ -27,8 +27,8 @@ See the full license in the file "LICENSE" in the top level distribution directo
*************************************************************************************/ *************************************************************************************/
/* END LEGAL */ /* END LEGAL */
#ifndef Hadrons_Baryon_hpp_ #ifndef Hadrons_MContraction_Baryon_hpp_
#define Hadrons_Baryon_hpp_ #define Hadrons_MContraction_Baryon_hpp_
#include <Grid/Hadrons/Global.hpp> #include <Grid/Hadrons/Global.hpp>
#include <Grid/Hadrons/Module.hpp> #include <Grid/Hadrons/Module.hpp>
@ -55,9 +55,9 @@ template <typename FImpl1, typename FImpl2, typename FImpl3>
class TBaryon: public Module<BaryonPar> class TBaryon: public Module<BaryonPar>
{ {
public: public:
TYPE_ALIASES(FImpl1, 1); FERM_TYPE_ALIASES(FImpl1, 1);
TYPE_ALIASES(FImpl2, 2); FERM_TYPE_ALIASES(FImpl2, 2);
TYPE_ALIASES(FImpl3, 3); FERM_TYPE_ALIASES(FImpl3, 3);
class Result: Serializable class Result: Serializable
{ {
public: public:
@ -121,11 +121,11 @@ void TBaryon<FImpl1, FImpl2, FImpl3>::execute(void)
// FIXME: do contractions // FIXME: do contractions
write(writer, "meson", result); // write(writer, "meson", result);
} }
END_MODULE_NAMESPACE END_MODULE_NAMESPACE
END_HADRONS_NAMESPACE END_HADRONS_NAMESPACE
#endif // Hadrons_Baryon_hpp_ #endif // Hadrons_MContraction_Baryon_hpp_

View File

@ -0,0 +1,144 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Modules/MContraction/DiscLoop.hpp
Copyright (C) 2017
Author: Andrew Lawson <andrew.lawson1991@gmail.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef Hadrons_MContraction_DiscLoop_hpp_
#define Hadrons_MContraction_DiscLoop_hpp_
#include <Grid/Hadrons/Global.hpp>
#include <Grid/Hadrons/Module.hpp>
#include <Grid/Hadrons/ModuleFactory.hpp>
BEGIN_HADRONS_NAMESPACE
/******************************************************************************
* DiscLoop *
******************************************************************************/
BEGIN_MODULE_NAMESPACE(MContraction)
class DiscLoopPar: Serializable
{
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(DiscLoopPar,
std::string, q_loop,
Gamma::Algebra, gamma,
std::string, output);
};
template <typename FImpl>
class TDiscLoop: public Module<DiscLoopPar>
{
FERM_TYPE_ALIASES(FImpl,);
class Result: Serializable
{
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(Result,
Gamma::Algebra, gamma,
std::vector<Complex>, corr);
};
public:
// constructor
TDiscLoop(const std::string name);
// destructor
virtual ~TDiscLoop(void) = default;
// dependency relation
virtual std::vector<std::string> getInput(void);
virtual std::vector<std::string> getOutput(void);
// setup
virtual void setup(void);
// execution
virtual void execute(void);
};
MODULE_REGISTER_NS(DiscLoop, TDiscLoop<FIMPL>, MContraction);
/******************************************************************************
* TDiscLoop implementation *
******************************************************************************/
// constructor /////////////////////////////////////////////////////////////////
template <typename FImpl>
TDiscLoop<FImpl>::TDiscLoop(const std::string name)
: Module<DiscLoopPar>(name)
{}
// dependencies/products ///////////////////////////////////////////////////////
template <typename FImpl>
std::vector<std::string> TDiscLoop<FImpl>::getInput(void)
{
std::vector<std::string> in = {par().q_loop};
return in;
}
template <typename FImpl>
std::vector<std::string> TDiscLoop<FImpl>::getOutput(void)
{
std::vector<std::string> out = {getName()};
return out;
}
// setup ///////////////////////////////////////////////////////////////////////
template <typename FImpl>
void TDiscLoop<FImpl>::setup(void)
{
}
// execution ///////////////////////////////////////////////////////////////////
template <typename FImpl>
void TDiscLoop<FImpl>::execute(void)
{
LOG(Message) << "Computing disconnected loop contraction '" << getName()
<< "' using '" << par().q_loop << "' with " << par().gamma
<< " insertion." << std::endl;
CorrWriter writer(par().output);
PropagatorField &q_loop = *env().template getObject<PropagatorField>(par().q_loop);
LatticeComplex c(env().getGrid());
Gamma gamma(par().gamma);
std::vector<TComplex> buf;
Result result;
c = trace(gamma*q_loop);
sliceSum(c, buf, Tp);
result.gamma = par().gamma;
result.corr.resize(buf.size());
for (unsigned int t = 0; t < buf.size(); ++t)
{
result.corr[t] = TensorRemove(buf[t]);
}
write(writer, "disc", result);
}
END_MODULE_NAMESPACE
END_HADRONS_NAMESPACE
#endif // Hadrons_MContraction_DiscLoop_hpp_

View File

@ -0,0 +1,170 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Modules/MContraction/Gamma3pt.hpp
Copyright (C) 2017
Author: Andrew Lawson <andrew.lawson1991@gmail.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef Hadrons_MContraction_Gamma3pt_hpp_
#define Hadrons_MContraction_Gamma3pt_hpp_
#include <Grid/Hadrons/Global.hpp>
#include <Grid/Hadrons/Module.hpp>
#include <Grid/Hadrons/ModuleFactory.hpp>
BEGIN_HADRONS_NAMESPACE
/*
* 3pt contraction with gamma matrix insertion.
*
* Schematic:
*
* q2 q3
* /----<------*------<----¬
* / gamma \
* / \
* i * * f
* \ /
* \ /
* \----------->----------/
* q1
*
* trace(g5*q1*adj(q2)*g5*gamma*q3)
*/
/******************************************************************************
* Gamma3pt *
******************************************************************************/
BEGIN_MODULE_NAMESPACE(MContraction)
class Gamma3ptPar: Serializable
{
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(Gamma3ptPar,
std::string, q1,
std::string, q2,
std::string, q3,
Gamma::Algebra, gamma,
std::string, output);
};
template <typename FImpl1, typename FImpl2, typename FImpl3>
class TGamma3pt: public Module<Gamma3ptPar>
{
FERM_TYPE_ALIASES(FImpl1, 1);
FERM_TYPE_ALIASES(FImpl2, 2);
FERM_TYPE_ALIASES(FImpl3, 3);
class Result: Serializable
{
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(Result,
Gamma::Algebra, gamma,
std::vector<Complex>, corr);
};
public:
// constructor
TGamma3pt(const std::string name);
// destructor
virtual ~TGamma3pt(void) = default;
// dependency relation
virtual std::vector<std::string> getInput(void);
virtual std::vector<std::string> getOutput(void);
// setup
virtual void setup(void);
// execution
virtual void execute(void);
};
MODULE_REGISTER_NS(Gamma3pt, ARG(TGamma3pt<FIMPL, FIMPL, FIMPL>), MContraction);
/******************************************************************************
* TGamma3pt implementation *
******************************************************************************/
// constructor /////////////////////////////////////////////////////////////////
template <typename FImpl1, typename FImpl2, typename FImpl3>
TGamma3pt<FImpl1, FImpl2, FImpl3>::TGamma3pt(const std::string name)
: Module<Gamma3ptPar>(name)
{}
// dependencies/products ///////////////////////////////////////////////////////
template <typename FImpl1, typename FImpl2, typename FImpl3>
std::vector<std::string> TGamma3pt<FImpl1, FImpl2, FImpl3>::getInput(void)
{
std::vector<std::string> in = {par().q1, par().q2, par().q3};
return in;
}
template <typename FImpl1, typename FImpl2, typename FImpl3>
std::vector<std::string> TGamma3pt<FImpl1, FImpl2, FImpl3>::getOutput(void)
{
std::vector<std::string> out = {getName()};
return out;
}
// setup ///////////////////////////////////////////////////////////////////////
template <typename FImpl1, typename FImpl2, typename FImpl3>
void TGamma3pt<FImpl1, FImpl2, FImpl3>::setup(void)
{
}
// execution ///////////////////////////////////////////////////////////////////
template <typename FImpl1, typename FImpl2, typename FImpl3>
void TGamma3pt<FImpl1, FImpl2, FImpl3>::execute(void)
{
LOG(Message) << "Computing 3pt contractions '" << getName() << "' using"
<< " quarks '" << par().q1 << "', '" << par().q2 << "' and '"
<< par().q3 << "', with " << par().gamma << " insertion."
<< std::endl;
CorrWriter writer(par().output);
PropagatorField1 &q1 = *env().template getObject<PropagatorField1>(par().q1);
PropagatorField2 &q2 = *env().template getObject<PropagatorField2>(par().q2);
PropagatorField3 &q3 = *env().template getObject<PropagatorField3>(par().q3);
LatticeComplex c(env().getGrid());
Gamma g5(Gamma::Algebra::Gamma5);
Gamma gamma(par().gamma);
std::vector<TComplex> buf;
Result result;
c = trace(g5*q1*adj(q2)*(g5*gamma)*q3);
sliceSum(c, buf, Tp);
result.gamma = par().gamma;
result.corr.resize(buf.size());
for (unsigned int t = 0; t < buf.size(); ++t)
{
result.corr[t] = TensorRemove(buf[t]);
}
write(writer, "gamma3pt", result);
}
END_MODULE_NAMESPACE
END_HADRONS_NAMESPACE
#endif // Hadrons_MContraction_Gamma3pt_hpp_

View File

@ -29,8 +29,8 @@ See the full license in the file "LICENSE" in the top level distribution directo
*************************************************************************************/ *************************************************************************************/
/* END LEGAL */ /* END LEGAL */
#ifndef Hadrons_Meson_hpp_ #ifndef Hadrons_MContraction_Meson_hpp_
#define Hadrons_Meson_hpp_ #define Hadrons_MContraction_Meson_hpp_
#include <Grid/Hadrons/Global.hpp> #include <Grid/Hadrons/Global.hpp>
#include <Grid/Hadrons/Module.hpp> #include <Grid/Hadrons/Module.hpp>
@ -69,7 +69,7 @@ public:
std::string, q1, std::string, q1,
std::string, q2, std::string, q2,
std::string, gammas, std::string, gammas,
std::string, mom, std::string, sink,
std::string, output); std::string, output);
}; };
@ -77,8 +77,10 @@ template <typename FImpl1, typename FImpl2>
class TMeson: public Module<MesonPar> class TMeson: public Module<MesonPar>
{ {
public: public:
TYPE_ALIASES(FImpl1, 1); FERM_TYPE_ALIASES(FImpl1, 1);
TYPE_ALIASES(FImpl2, 2); FERM_TYPE_ALIASES(FImpl2, 2);
FERM_TYPE_ALIASES(ScalarImplCR, Scalar);
SINK_TYPE_ALIASES(Scalar);
class Result: Serializable class Result: Serializable
{ {
public: public:
@ -115,7 +117,7 @@ TMeson<FImpl1, FImpl2>::TMeson(const std::string name)
template <typename FImpl1, typename FImpl2> template <typename FImpl1, typename FImpl2>
std::vector<std::string> TMeson<FImpl1, FImpl2>::getInput(void) std::vector<std::string> TMeson<FImpl1, FImpl2>::getInput(void)
{ {
std::vector<std::string> input = {par().q1, par().q2}; std::vector<std::string> input = {par().q1, par().q2, par().sink};
return input; return input;
} }
@ -131,12 +133,11 @@ std::vector<std::string> TMeson<FImpl1, FImpl2>::getOutput(void)
template <typename FImpl1, typename FImpl2> template <typename FImpl1, typename FImpl2>
void TMeson<FImpl1, FImpl2>::parseGammaString(std::vector<GammaPair> &gammaList) void TMeson<FImpl1, FImpl2>::parseGammaString(std::vector<GammaPair> &gammaList)
{ {
gammaList.clear();
// Determine gamma matrices to insert at source/sink. // Determine gamma matrices to insert at source/sink.
if (par().gammas.compare("all") == 0) if (par().gammas.compare("all") == 0)
{ {
// Do all contractions. // Do all contractions.
unsigned int n_gam = Ns * Ns;
gammaList.resize(n_gam*n_gam);
for (unsigned int i = 1; i < Gamma::nGamma; i += 2) for (unsigned int i = 1; i < Gamma::nGamma; i += 2)
{ {
for (unsigned int j = 1; j < Gamma::nGamma; j += 2) for (unsigned int j = 1; j < Gamma::nGamma; j += 2)
@ -155,6 +156,9 @@ void TMeson<FImpl1, FImpl2>::parseGammaString(std::vector<GammaPair> &gammaList)
// execution /////////////////////////////////////////////////////////////////// // execution ///////////////////////////////////////////////////////////////////
#define mesonConnected(q1, q2, gSnk, gSrc) \
(g5*(gSnk))*(q1)*(adj(gSrc)*g5)*adj(q2)
template <typename FImpl1, typename FImpl2> template <typename FImpl1, typename FImpl2>
void TMeson<FImpl1, FImpl2>::execute(void) void TMeson<FImpl1, FImpl2>::execute(void)
{ {
@ -162,43 +166,72 @@ void TMeson<FImpl1, FImpl2>::execute(void)
<< " quarks '" << par().q1 << "' and '" << par().q2 << "'" << " quarks '" << par().q1 << "' and '" << par().q2 << "'"
<< std::endl; << std::endl;
CorrWriter writer(par().output); CorrWriter writer(par().output);
PropagatorField1 &q1 = *env().template getObject<PropagatorField1>(par().q1);
PropagatorField2 &q2 = *env().template getObject<PropagatorField2>(par().q2);
LatticeComplex c(env().getGrid());
Gamma g5(Gamma::Algebra::Gamma5);
std::vector<GammaPair> gammaList;
std::vector<TComplex> buf; std::vector<TComplex> buf;
std::vector<Result> result; std::vector<Result> result;
std::vector<Real> p; Gamma g5(Gamma::Algebra::Gamma5);
std::vector<GammaPair> gammaList;
p = strToVec<Real>(par().mom); int nt = env().getDim(Tp);
LatticeComplex ph(env().getGrid()), coor(env().getGrid());
Complex i(0.0,1.0);
ph = zero;
for(unsigned int mu = 0; mu < env().getNd(); mu++)
{
LatticeCoordinate(coor, mu);
ph = ph + p[mu]*coor*((1./(env().getGrid()->_fdimensions[mu])));
}
ph = exp((Real)(2*M_PI)*i*ph);
parseGammaString(gammaList); parseGammaString(gammaList);
result.resize(gammaList.size()); result.resize(gammaList.size());
for (unsigned int i = 0; i < result.size(); ++i) for (unsigned int i = 0; i < result.size(); ++i)
{ {
Gamma gSnk(gammaList[i].first);
Gamma gSrc(gammaList[i].second);
c = trace((g5*gSnk)*q1*(adj(gSrc)*g5)*adj(q2))*ph;
sliceSum(c, buf, Tp);
result[i].gamma_snk = gammaList[i].first; result[i].gamma_snk = gammaList[i].first;
result[i].gamma_src = gammaList[i].second; result[i].gamma_src = gammaList[i].second;
result[i].corr.resize(buf.size()); result[i].corr.resize(nt);
for (unsigned int t = 0; t < buf.size(); ++t) }
if (env().template isObjectOfType<SlicedPropagator1>(par().q1) and
env().template isObjectOfType<SlicedPropagator2>(par().q2))
{
SlicedPropagator1 &q1 = *env().template getObject<SlicedPropagator1>(par().q1);
SlicedPropagator2 &q2 = *env().template getObject<SlicedPropagator2>(par().q2);
LOG(Message) << "(propagator already sinked)" << std::endl;
for (unsigned int i = 0; i < result.size(); ++i)
{ {
result[i].corr[t] = TensorRemove(buf[t]); Gamma gSnk(gammaList[i].first);
Gamma gSrc(gammaList[i].second);
for (unsigned int t = 0; t < buf.size(); ++t)
{
result[i].corr[t] = TensorRemove(trace(mesonConnected(q1[t], q2[t], gSnk, gSrc)));
}
}
}
else
{
PropagatorField1 &q1 = *env().template getObject<PropagatorField1>(par().q1);
PropagatorField2 &q2 = *env().template getObject<PropagatorField2>(par().q2);
LatticeComplex c(env().getGrid());
LOG(Message) << "(using sink '" << par().sink << "')" << std::endl;
for (unsigned int i = 0; i < result.size(); ++i)
{
Gamma gSnk(gammaList[i].first);
Gamma gSrc(gammaList[i].second);
std::string ns;
ns = env().getModuleNamespace(env().getObjectModule(par().sink));
if (ns == "MSource")
{
PropagatorField1 &sink =
*env().template getObject<PropagatorField1>(par().sink);
c = trace(mesonConnected(q1, q2, gSnk, gSrc)*sink);
sliceSum(c, buf, Tp);
}
else if (ns == "MSink")
{
SinkFnScalar &sink = *env().template getObject<SinkFnScalar>(par().sink);
c = trace(mesonConnected(q1, q2, gSnk, gSrc));
buf = sink(c);
}
for (unsigned int t = 0; t < buf.size(); ++t)
{
result[i].corr[t] = TensorRemove(buf[t]);
}
} }
} }
write(writer, "meson", result); write(writer, "meson", result);
@ -208,4 +241,4 @@ END_MODULE_NAMESPACE
END_HADRONS_NAMESPACE END_HADRONS_NAMESPACE
#endif // Hadrons_Meson_hpp_ #endif // Hadrons_MContraction_Meson_hpp_

View File

@ -26,8 +26,8 @@ See the full license in the file "LICENSE" in the top level distribution directo
*************************************************************************************/ *************************************************************************************/
/* END LEGAL */ /* END LEGAL */
#ifndef Hadrons_WeakHamiltonian_hpp_ #ifndef Hadrons_MContraction_WeakHamiltonian_hpp_
#define Hadrons_WeakHamiltonian_hpp_ #define Hadrons_MContraction_WeakHamiltonian_hpp_
#include <Grid/Hadrons/Global.hpp> #include <Grid/Hadrons/Global.hpp>
#include <Grid/Hadrons/Module.hpp> #include <Grid/Hadrons/Module.hpp>
@ -79,8 +79,36 @@ public:
std::string, output); std::string, output);
}; };
#define MAKE_WEAK_MODULE(modname)\
class T##modname: public Module<WeakHamiltonianPar>\
{\
public:\
FERM_TYPE_ALIASES(FIMPL,)\
class Result: Serializable\
{\
public:\
GRID_SERIALIZABLE_CLASS_MEMBERS(Result,\
std::string, name,\
std::vector<Complex>, corr);\
};\
public:\
/* constructor */ \
T##modname(const std::string name);\
/* destructor */ \
virtual ~T##modname(void) = default;\
/* dependency relation */ \
virtual std::vector<std::string> getInput(void);\
virtual std::vector<std::string> getOutput(void);\
/* setup */ \
virtual void setup(void);\
/* execution */ \
virtual void execute(void);\
std::vector<std::string> VA_label = {"V", "A"};\
};\
MODULE_REGISTER_NS(modname, T##modname, MContraction);
END_MODULE_NAMESPACE END_MODULE_NAMESPACE
END_HADRONS_NAMESPACE END_HADRONS_NAMESPACE
#endif // Hadrons_WeakHamiltonian_hpp_ #endif // Hadrons_MContraction_WeakHamiltonian_hpp_

View File

@ -26,8 +26,8 @@ See the full license in the file "LICENSE" in the top level distribution directo
*************************************************************************************/ *************************************************************************************/
/* END LEGAL */ /* END LEGAL */
#ifndef Hadrons_WeakHamiltonianEye_hpp_ #ifndef Hadrons_MContraction_WeakHamiltonianEye_hpp_
#define Hadrons_WeakHamiltonianEye_hpp_ #define Hadrons_MContraction_WeakHamiltonianEye_hpp_
#include <Grid/Hadrons/Modules/MContraction/WeakHamiltonian.hpp> #include <Grid/Hadrons/Modules/MContraction/WeakHamiltonian.hpp>
@ -49,35 +49,10 @@ enum
#define MAKE_SE_BODY(Q_1, Q_2, Q_3, gamma) (Q_3*g5*Q_1*adj(Q_2)*g5*gamma) #define MAKE_SE_BODY(Q_1, Q_2, Q_3, gamma) (Q_3*g5*Q_1*adj(Q_2)*g5*gamma)
#define MAKE_SE_LOOP(Q_loop, gamma) (Q_loop*gamma) #define MAKE_SE_LOOP(Q_loop, gamma) (Q_loop*gamma)
class TWeakHamiltonianEye: public Module<WeakHamiltonianPar> MAKE_WEAK_MODULE(WeakHamiltonianEye)
{
public:
TYPE_ALIASES(FIMPL,)
class Result: Serializable
{
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(Result,
std::string, name,
std::vector<Complex>, corr);
};
public:
// constructor
TWeakHamiltonianEye(const std::string name);
// destructor
virtual ~TWeakHamiltonianEye(void) = default;
// dependency relation
virtual std::vector<std::string> getInput(void);
virtual std::vector<std::string> getOutput(void);
// setup
virtual void setup(void);
// execution
virtual void execute(void);
};
MODULE_REGISTER_NS(WeakHamiltonianEye, TWeakHamiltonianEye, MContraction);
END_MODULE_NAMESPACE END_MODULE_NAMESPACE
END_HADRONS_NAMESPACE END_HADRONS_NAMESPACE
#endif // Hadrons_WeakHamiltonianEye_hpp_ #endif // Hadrons_MContraction_WeakHamiltonianEye_hpp_

View File

@ -26,8 +26,8 @@ See the full license in the file "LICENSE" in the top level distribution directo
*************************************************************************************/ *************************************************************************************/
/* END LEGAL */ /* END LEGAL */
#ifndef Hadrons_WeakHamiltonianNonEye_hpp_ #ifndef Hadrons_MContraction_WeakHamiltonianNonEye_hpp_
#define Hadrons_WeakHamiltonianNonEye_hpp_ #define Hadrons_MContraction_WeakHamiltonianNonEye_hpp_
#include <Grid/Hadrons/Modules/MContraction/WeakHamiltonian.hpp> #include <Grid/Hadrons/Modules/MContraction/WeakHamiltonian.hpp>
@ -48,35 +48,10 @@ enum
// Wing and Connected subdiagram contractions // Wing and Connected subdiagram contractions
#define MAKE_CW_SUBDIAG(Q_1, Q_2, gamma) (Q_1*adj(Q_2)*g5*gamma) #define MAKE_CW_SUBDIAG(Q_1, Q_2, gamma) (Q_1*adj(Q_2)*g5*gamma)
class TWeakHamiltonianNonEye: public Module<WeakHamiltonianPar> MAKE_WEAK_MODULE(WeakHamiltonianNonEye)
{
public:
TYPE_ALIASES(FIMPL,)
class Result: Serializable
{
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(Result,
std::string, name,
std::vector<Complex>, corr);
};
public:
// constructor
TWeakHamiltonianNonEye(const std::string name);
// destructor
virtual ~TWeakHamiltonianNonEye(void) = default;
// dependency relation
virtual std::vector<std::string> getInput(void);
virtual std::vector<std::string> getOutput(void);
// setup
virtual void setup(void);
// execution
virtual void execute(void);
};
MODULE_REGISTER_NS(WeakHamiltonianNonEye, TWeakHamiltonianNonEye, MContraction);
END_MODULE_NAMESPACE END_MODULE_NAMESPACE
END_HADRONS_NAMESPACE END_HADRONS_NAMESPACE
#endif // Hadrons_WeakHamiltonianNonEye_hpp_ #endif // Hadrons_MContraction_WeakHamiltonianNonEye_hpp_

View File

@ -0,0 +1,135 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Modules/MContraction/WeakNeutral4ptDisc.cc
Copyright (C) 2017
Author: Andrew Lawson <andrew.lawson1991@gmail.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Hadrons/Modules/MContraction/WeakNeutral4ptDisc.hpp>
using namespace Grid;
using namespace Hadrons;
using namespace MContraction;
/*
* Weak Hamiltonian + current contractions, disconnected topology for neutral
* mesons.
*
* These contractions are generated by operators Q_1,...,10 of the dS=1 Weak
* Hamiltonian in the physical basis and an additional current J (see e.g.
* Fig 11 of arXiv:1507.03094).
*
* Schematic:
*
* q2 q4 q3
* /--<--¬ /---<--¬ /---<--¬
* / \ / \ / \
* i * * H_W | J * * f
* \ / \ / \ /
* \--->---/ \-------/ \------/
* q1
*
* options
* - q1: input propagator 1 (string)
* - q2: input propagator 2 (string)
* - q3: input propagator 3 (string), assumed to be sequential propagator
* - q4: input propagator 4 (string), assumed to be a loop
*
* type 1: trace(q1*adj(q2)*g5*gL[mu])*trace(loop*gL[mu])*trace(q3*g5)
* type 2: trace(q1*adj(q2)*g5*gL[mu]*loop*gL[mu])*trace(q3*g5)
*/
/*******************************************************************************
* TWeakNeutral4ptDisc implementation *
******************************************************************************/
// constructor /////////////////////////////////////////////////////////////////
TWeakNeutral4ptDisc::TWeakNeutral4ptDisc(const std::string name)
: Module<WeakHamiltonianPar>(name)
{}
// dependencies/products ///////////////////////////////////////////////////////
std::vector<std::string> TWeakNeutral4ptDisc::getInput(void)
{
std::vector<std::string> in = {par().q1, par().q2, par().q3, par().q4};
return in;
}
std::vector<std::string> TWeakNeutral4ptDisc::getOutput(void)
{
std::vector<std::string> out = {getName()};
return out;
}
// setup ///////////////////////////////////////////////////////////////////////
void TWeakNeutral4ptDisc::setup(void)
{
}
// execution ///////////////////////////////////////////////////////////////////
void TWeakNeutral4ptDisc::execute(void)
{
LOG(Message) << "Computing Weak Hamiltonian neutral disconnected contractions '"
<< getName() << "' using quarks '" << par().q1 << "', '"
<< par().q2 << ", '" << par().q3 << "' and '" << par().q4
<< "'." << std::endl;
CorrWriter writer(par().output);
PropagatorField &q1 = *env().template getObject<PropagatorField>(par().q1);
PropagatorField &q2 = *env().template getObject<PropagatorField>(par().q2);
PropagatorField &q3 = *env().template getObject<PropagatorField>(par().q3);
PropagatorField &q4 = *env().template getObject<PropagatorField>(par().q4);
Gamma g5 = Gamma(Gamma::Algebra::Gamma5);
LatticeComplex expbuf(env().getGrid());
std::vector<TComplex> corrbuf;
std::vector<Result> result(n_neut_disc_diag);
unsigned int ndim = env().getNd();
PropagatorField tmp(env().getGrid());
std::vector<PropagatorField> meson(ndim, tmp);
std::vector<PropagatorField> loop(ndim, tmp);
LatticeComplex curr(env().getGrid());
// Setup for type 1 contractions.
for (int mu = 0; mu < ndim; ++mu)
{
meson[mu] = MAKE_DISC_MESON(q1, q2, GammaL(Gamma::gmu[mu]));
loop[mu] = MAKE_DISC_LOOP(q4, GammaL(Gamma::gmu[mu]));
}
curr = MAKE_DISC_CURR(q3, GammaL(Gamma::Algebra::Gamma5));
// Perform type 1 contractions.
SUM_MU(expbuf, trace(meson[mu]*loop[mu]))
expbuf *= curr;
MAKE_DIAG(expbuf, corrbuf, result[neut_disc_1_diag], "HW_disc0_1")
// Perform type 2 contractions.
SUM_MU(expbuf, trace(meson[mu])*trace(loop[mu]))
expbuf *= curr;
MAKE_DIAG(expbuf, corrbuf, result[neut_disc_2_diag], "HW_disc0_2")
write(writer, "HW_disc0", result);
}

View File

@ -0,0 +1,59 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Modules/MContraction/WeakNeutral4ptDisc.hpp
Copyright (C) 2017
Author: Andrew Lawson <andrew.lawson1991@gmail.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef Hadrons_MContraction_WeakNeutral4ptDisc_hpp_
#define Hadrons_MContraction_WeakNeutral4ptDisc_hpp_
#include <Grid/Hadrons/Modules/MContraction/WeakHamiltonian.hpp>
BEGIN_HADRONS_NAMESPACE
/******************************************************************************
* WeakNeutral4ptDisc *
******************************************************************************/
BEGIN_MODULE_NAMESPACE(MContraction)
enum
{
neut_disc_1_diag = 0,
neut_disc_2_diag = 1,
n_neut_disc_diag = 2
};
// Neutral 4pt disconnected subdiagram contractions.
#define MAKE_DISC_MESON(Q_1, Q_2, gamma) (Q_1*adj(Q_2)*g5*gamma)
#define MAKE_DISC_LOOP(Q_LOOP, gamma) (Q_LOOP*gamma)
#define MAKE_DISC_CURR(Q_c, gamma) (trace(Q_c*gamma))
MAKE_WEAK_MODULE(WeakNeutral4ptDisc)
END_MODULE_NAMESPACE
END_HADRONS_NAMESPACE
#endif // Hadrons_MContraction_WeakNeutral4ptDisc_hpp_

View File

@ -27,8 +27,8 @@ See the full license in the file "LICENSE" in the top level distribution directo
*************************************************************************************/ *************************************************************************************/
/* END LEGAL */ /* END LEGAL */
#ifndef Hadrons_Load_hpp_ #ifndef Hadrons_MGauge_Load_hpp_
#define Hadrons_Load_hpp_ #define Hadrons_MGauge_Load_hpp_
#include <Grid/Hadrons/Global.hpp> #include <Grid/Hadrons/Global.hpp>
#include <Grid/Hadrons/Module.hpp> #include <Grid/Hadrons/Module.hpp>
@ -70,4 +70,4 @@ END_MODULE_NAMESPACE
END_HADRONS_NAMESPACE END_HADRONS_NAMESPACE
#endif // Hadrons_Load_hpp_ #endif // Hadrons_MGauge_Load_hpp_

View File

@ -27,8 +27,8 @@ See the full license in the file "LICENSE" in the top level distribution directo
*************************************************************************************/ *************************************************************************************/
/* END LEGAL */ /* END LEGAL */
#ifndef Hadrons_Random_hpp_ #ifndef Hadrons_MGauge_Random_hpp_
#define Hadrons_Random_hpp_ #define Hadrons_MGauge_Random_hpp_
#include <Grid/Hadrons/Global.hpp> #include <Grid/Hadrons/Global.hpp>
#include <Grid/Hadrons/Module.hpp> #include <Grid/Hadrons/Module.hpp>
@ -63,4 +63,4 @@ END_MODULE_NAMESPACE
END_HADRONS_NAMESPACE END_HADRONS_NAMESPACE
#endif // Hadrons_Random_hpp_ #endif // Hadrons_MGauge_Random_hpp_

View File

@ -25,8 +25,8 @@ with this program; if not, write to the Free Software Foundation, Inc.,
See the full license in the file "LICENSE" in the top level distribution directory See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/ *************************************************************************************/
/* END LEGAL */ /* END LEGAL */
#ifndef Hadrons_StochEm_hpp_ #ifndef Hadrons_MGauge_StochEm_hpp_
#define Hadrons_StochEm_hpp_ #define Hadrons_MGauge_StochEm_hpp_
#include <Grid/Hadrons/Global.hpp> #include <Grid/Hadrons/Global.hpp>
#include <Grid/Hadrons/Module.hpp> #include <Grid/Hadrons/Module.hpp>
@ -72,4 +72,4 @@ END_MODULE_NAMESPACE
END_HADRONS_NAMESPACE END_HADRONS_NAMESPACE
#endif // Hadrons_StochEm_hpp_ #endif // Hadrons_MGauge_StochEm_hpp_

View File

@ -27,8 +27,8 @@ See the full license in the file "LICENSE" in the top level distribution directo
*************************************************************************************/ *************************************************************************************/
/* END LEGAL */ /* END LEGAL */
#ifndef Hadrons_Unit_hpp_ #ifndef Hadrons_MGauge_Unit_hpp_
#define Hadrons_Unit_hpp_ #define Hadrons_MGauge_Unit_hpp_
#include <Grid/Hadrons/Global.hpp> #include <Grid/Hadrons/Global.hpp>
#include <Grid/Hadrons/Module.hpp> #include <Grid/Hadrons/Module.hpp>
@ -63,4 +63,4 @@ END_MODULE_NAMESPACE
END_HADRONS_NAMESPACE END_HADRONS_NAMESPACE
#endif // Hadrons_Unit_hpp_ #endif // Hadrons_MGauge_Unit_hpp_

View File

@ -26,8 +26,8 @@ See the full license in the file "LICENSE" in the top level distribution directo
*************************************************************************************/ *************************************************************************************/
/* END LEGAL */ /* END LEGAL */
#ifndef Hadrons_NoiseLoop_hpp_ #ifndef Hadrons_MLoop_NoiseLoop_hpp_
#define Hadrons_NoiseLoop_hpp_ #define Hadrons_MLoop_NoiseLoop_hpp_
#include <Grid/Hadrons/Global.hpp> #include <Grid/Hadrons/Global.hpp>
#include <Grid/Hadrons/Module.hpp> #include <Grid/Hadrons/Module.hpp>
@ -65,7 +65,7 @@ template <typename FImpl>
class TNoiseLoop: public Module<NoiseLoopPar> class TNoiseLoop: public Module<NoiseLoopPar>
{ {
public: public:
TYPE_ALIASES(FImpl,); FERM_TYPE_ALIASES(FImpl,);
public: public:
// constructor // constructor
TNoiseLoop(const std::string name); TNoiseLoop(const std::string name);
@ -129,4 +129,4 @@ END_MODULE_NAMESPACE
END_HADRONS_NAMESPACE END_HADRONS_NAMESPACE
#endif // Hadrons_NoiseLoop_hpp_ #endif // Hadrons_MLoop_NoiseLoop_hpp_

View File

@ -82,7 +82,7 @@ void TChargedProp::execute(void)
LOG(Message) << "Caching momentum space free scalar propagator" LOG(Message) << "Caching momentum space free scalar propagator"
<< " (mass= " << par().mass << ")..." << std::endl; << " (mass= " << par().mass << ")..." << std::endl;
freeMomProp_ = env().createLattice<ScalarField>(freeMomPropName_); freeMomProp_ = env().createLattice<ScalarField>(freeMomPropName_);
Scalar<SIMPL>::MomentumSpacePropagator(*freeMomProp_, par().mass); SIMPL::MomentumSpacePropagator(*freeMomProp_, par().mass);
} }
else else
{ {

View File

@ -1,5 +1,5 @@
#ifndef Hadrons_ChargedProp_hpp_ #ifndef Hadrons_MScalar_ChargedProp_hpp_
#define Hadrons_ChargedProp_hpp_ #define Hadrons_MScalar_ChargedProp_hpp_
#include <Grid/Hadrons/Global.hpp> #include <Grid/Hadrons/Global.hpp>
#include <Grid/Hadrons/Module.hpp> #include <Grid/Hadrons/Module.hpp>
@ -59,4 +59,4 @@ END_MODULE_NAMESPACE
END_HADRONS_NAMESPACE END_HADRONS_NAMESPACE
#endif // Hadrons_ChargedProp_hpp_ #endif // Hadrons_MScalar_ChargedProp_hpp_

View File

@ -52,14 +52,14 @@ void TFreeProp::execute(void)
LOG(Message) << "Caching momentum space free scalar propagator" LOG(Message) << "Caching momentum space free scalar propagator"
<< " (mass= " << par().mass << ")..." << std::endl; << " (mass= " << par().mass << ")..." << std::endl;
freeMomProp = env().createLattice<ScalarField>(freeMomPropName_); freeMomProp = env().createLattice<ScalarField>(freeMomPropName_);
Scalar<SIMPL>::MomentumSpacePropagator(*freeMomProp, par().mass); SIMPL::MomentumSpacePropagator(*freeMomProp, par().mass);
} }
else else
{ {
freeMomProp = env().getObject<ScalarField>(freeMomPropName_); freeMomProp = env().getObject<ScalarField>(freeMomPropName_);
} }
LOG(Message) << "Computing free scalar propagator..." << std::endl; LOG(Message) << "Computing free scalar propagator..." << std::endl;
Scalar<SIMPL>::FreePropagator(source, prop, *freeMomProp); SIMPL::FreePropagator(source, prop, *freeMomProp);
if (!par().output.empty()) if (!par().output.empty())
{ {

View File

@ -1,5 +1,5 @@
#ifndef Hadrons_FreeProp_hpp_ #ifndef Hadrons_MScalar_FreeProp_hpp_
#define Hadrons_FreeProp_hpp_ #define Hadrons_MScalar_FreeProp_hpp_
#include <Grid/Hadrons/Global.hpp> #include <Grid/Hadrons/Global.hpp>
#include <Grid/Hadrons/Module.hpp> #include <Grid/Hadrons/Module.hpp>
@ -47,4 +47,4 @@ END_MODULE_NAMESPACE
END_HADRONS_NAMESPACE END_HADRONS_NAMESPACE
#endif // Hadrons_FreeProp_hpp_ #endif // Hadrons_MScalar_FreeProp_hpp_

View File

@ -0,0 +1,114 @@
#ifndef Hadrons_MSink_Point_hpp_
#define Hadrons_MSink_Point_hpp_
#include <Grid/Hadrons/Global.hpp>
#include <Grid/Hadrons/Module.hpp>
#include <Grid/Hadrons/ModuleFactory.hpp>
BEGIN_HADRONS_NAMESPACE
/******************************************************************************
* Point *
******************************************************************************/
BEGIN_MODULE_NAMESPACE(MSink)
class PointPar: Serializable
{
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(PointPar,
std::string, mom);
};
template <typename FImpl>
class TPoint: public Module<PointPar>
{
public:
FERM_TYPE_ALIASES(FImpl,);
SINK_TYPE_ALIASES();
public:
// constructor
TPoint(const std::string name);
// destructor
virtual ~TPoint(void) = default;
// dependency relation
virtual std::vector<std::string> getInput(void);
virtual std::vector<std::string> getOutput(void);
// setup
virtual void setup(void);
// execution
virtual void execute(void);
};
MODULE_REGISTER_NS(Point, TPoint<FIMPL>, MSink);
MODULE_REGISTER_NS(ScalarPoint, TPoint<ScalarImplCR>, MSink);
/******************************************************************************
* TPoint implementation *
******************************************************************************/
// constructor /////////////////////////////////////////////////////////////////
template <typename FImpl>
TPoint<FImpl>::TPoint(const std::string name)
: Module<PointPar>(name)
{}
// dependencies/products ///////////////////////////////////////////////////////
template <typename FImpl>
std::vector<std::string> TPoint<FImpl>::getInput(void)
{
std::vector<std::string> in;
return in;
}
template <typename FImpl>
std::vector<std::string> TPoint<FImpl>::getOutput(void)
{
std::vector<std::string> out = {getName()};
return out;
}
// setup ///////////////////////////////////////////////////////////////////////
template <typename FImpl>
void TPoint<FImpl>::setup(void)
{
unsigned int size;
size = env().template lattice4dSize<LatticeComplex>();
env().registerObject(getName(), size);
}
// execution ///////////////////////////////////////////////////////////////////
template <typename FImpl>
void TPoint<FImpl>::execute(void)
{
std::vector<Real> p = strToVec<Real>(par().mom);
LatticeComplex ph(env().getGrid()), coor(env().getGrid());
Complex i(0.0,1.0);
LOG(Message) << "Setting up point sink function for momentum ["
<< par().mom << "]" << std::endl;
ph = zero;
for(unsigned int mu = 0; mu < env().getNd(); mu++)
{
LatticeCoordinate(coor, mu);
ph = ph + (p[mu]/env().getGrid()->_fdimensions[mu])*coor;
}
ph = exp((Real)(2*M_PI)*i*ph);
auto sink = [ph](const PropagatorField &field)
{
SlicedPropagator res;
PropagatorField tmp = ph*field;
sliceSum(tmp, res, Tp);
return res;
};
env().setObject(getName(), new SinkFn(sink));
}
END_MODULE_NAMESPACE
END_HADRONS_NAMESPACE
#endif // Hadrons_MSink_Point_hpp_

View File

@ -27,8 +27,8 @@ See the full license in the file "LICENSE" in the top level distribution directo
*************************************************************************************/ *************************************************************************************/
/* END LEGAL */ /* END LEGAL */
#ifndef Hadrons_RBPrecCG_hpp_ #ifndef Hadrons_MSolver_RBPrecCG_hpp_
#define Hadrons_RBPrecCG_hpp_ #define Hadrons_MSolver_RBPrecCG_hpp_
#include <Grid/Hadrons/Global.hpp> #include <Grid/Hadrons/Global.hpp>
#include <Grid/Hadrons/Module.hpp> #include <Grid/Hadrons/Module.hpp>
@ -53,7 +53,7 @@ template <typename FImpl>
class TRBPrecCG: public Module<RBPrecCGPar> class TRBPrecCG: public Module<RBPrecCGPar>
{ {
public: public:
TYPE_ALIASES(FImpl,); FGS_TYPE_ALIASES(FImpl,);
public: public:
// constructor // constructor
TRBPrecCG(const std::string name); TRBPrecCG(const std::string name);
@ -129,4 +129,4 @@ END_MODULE_NAMESPACE
END_HADRONS_NAMESPACE END_HADRONS_NAMESPACE
#endif // Hadrons_RBPrecCG_hpp_ #endif // Hadrons_MSolver_RBPrecCG_hpp_

View File

@ -27,8 +27,8 @@ See the full license in the file "LICENSE" in the top level distribution directo
*************************************************************************************/ *************************************************************************************/
/* END LEGAL */ /* END LEGAL */
#ifndef Hadrons_Point_hpp_ #ifndef Hadrons_MSource_Point_hpp_
#define Hadrons_Point_hpp_ #define Hadrons_MSource_Point_hpp_
#include <Grid/Hadrons/Global.hpp> #include <Grid/Hadrons/Global.hpp>
#include <Grid/Hadrons/Module.hpp> #include <Grid/Hadrons/Module.hpp>
@ -78,8 +78,8 @@ public:
virtual void execute(void); virtual void execute(void);
}; };
MODULE_REGISTER_NS(Point, TPoint<FIMPL>, MSource); MODULE_REGISTER_NS(Point, TPoint<FIMPL>, MSource);
MODULE_REGISTER_NS(ScalarPoint, TPoint<ScalarImplR>, MSource); MODULE_REGISTER_NS(ScalarPoint, TPoint<ScalarImplCR>, MSource);
/****************************************************************************** /******************************************************************************
* TPoint template implementation * * TPoint template implementation *
@ -133,4 +133,4 @@ END_MODULE_NAMESPACE
END_HADRONS_NAMESPACE END_HADRONS_NAMESPACE
#endif // Hadrons_Point_hpp_ #endif // Hadrons_MSource_Point_hpp_

View File

@ -28,8 +28,8 @@ See the full license in the file "LICENSE" in the top level distribution directo
*************************************************************************************/ *************************************************************************************/
/* END LEGAL */ /* END LEGAL */
#ifndef Hadrons_SeqGamma_hpp_ #ifndef Hadrons_MSource_SeqGamma_hpp_
#define Hadrons_SeqGamma_hpp_ #define Hadrons_MSource_SeqGamma_hpp_
#include <Grid/Hadrons/Global.hpp> #include <Grid/Hadrons/Global.hpp>
#include <Grid/Hadrons/Module.hpp> #include <Grid/Hadrons/Module.hpp>
@ -72,7 +72,7 @@ template <typename FImpl>
class TSeqGamma: public Module<SeqGammaPar> class TSeqGamma: public Module<SeqGammaPar>
{ {
public: public:
TYPE_ALIASES(FImpl,); FGS_TYPE_ALIASES(FImpl,);
public: public:
// constructor // constructor
TSeqGamma(const std::string name); TSeqGamma(const std::string name);
@ -161,4 +161,4 @@ END_MODULE_NAMESPACE
END_HADRONS_NAMESPACE END_HADRONS_NAMESPACE
#endif // Hadrons_SeqGamma_hpp_ #endif // Hadrons_MSource_SeqGamma_hpp_

View File

@ -26,8 +26,8 @@ See the full license in the file "LICENSE" in the top level distribution directo
*************************************************************************************/ *************************************************************************************/
/* END LEGAL */ /* END LEGAL */
#ifndef Hadrons_WallSource_hpp_ #ifndef Hadrons_MSource_WallSource_hpp_
#define Hadrons_WallSource_hpp_ #define Hadrons_MSource_WallSource_hpp_
#include <Grid/Hadrons/Global.hpp> #include <Grid/Hadrons/Global.hpp>
#include <Grid/Hadrons/Module.hpp> #include <Grid/Hadrons/Module.hpp>
@ -64,7 +64,7 @@ template <typename FImpl>
class TWall: public Module<WallPar> class TWall: public Module<WallPar>
{ {
public: public:
TYPE_ALIASES(FImpl,); FERM_TYPE_ALIASES(FImpl,);
public: public:
// constructor // constructor
TWall(const std::string name); TWall(const std::string name);
@ -144,4 +144,4 @@ END_MODULE_NAMESPACE
END_HADRONS_NAMESPACE END_HADRONS_NAMESPACE
#endif // Hadrons_WallSource_hpp_ #endif // Hadrons_MSource_WallSource_hpp_

View File

@ -27,8 +27,8 @@ See the full license in the file "LICENSE" in the top level distribution directo
*************************************************************************************/ *************************************************************************************/
/* END LEGAL */ /* END LEGAL */
#ifndef Hadrons_Z2_hpp_ #ifndef Hadrons_MSource_Z2_hpp_
#define Hadrons_Z2_hpp_ #define Hadrons_MSource_Z2_hpp_
#include <Grid/Hadrons/Global.hpp> #include <Grid/Hadrons/Global.hpp>
#include <Grid/Hadrons/Module.hpp> #include <Grid/Hadrons/Module.hpp>
@ -82,8 +82,8 @@ public:
virtual void execute(void); virtual void execute(void);
}; };
MODULE_REGISTER_NS(Z2, TZ2<FIMPL>, MSource); MODULE_REGISTER_NS(Z2, TZ2<FIMPL>, MSource);
MODULE_REGISTER_NS(ScalarZ2, TZ2<ScalarImplR>, MSource); MODULE_REGISTER_NS(ScalarZ2, TZ2<ScalarImplCR>, MSource);
/****************************************************************************** /******************************************************************************
* TZ2 template implementation * * TZ2 template implementation *
@ -149,4 +149,4 @@ END_MODULE_NAMESPACE
END_HADRONS_NAMESPACE END_HADRONS_NAMESPACE
#endif // Hadrons_Z2_hpp_ #endif // Hadrons_MSource_Z2_hpp_

View File

@ -51,7 +51,7 @@ template <typename FImpl>
class TQuark: public Module<QuarkPar> class TQuark: public Module<QuarkPar>
{ {
public: public:
TYPE_ALIASES(FImpl,); FGS_TYPE_ALIASES(FImpl,);
public: public:
// constructor // constructor
TQuark(const std::string name); TQuark(const std::string name);
@ -173,7 +173,7 @@ void TQuark<FImpl>::execute(void)
*env().template getObject<PropagatorField>(getName()); *env().template getObject<PropagatorField>(getName());
axpby_ssp_pminus(sol, 0., sol, 1., sol, 0, 0); axpby_ssp_pminus(sol, 0., sol, 1., sol, 0, 0);
axpby_ssp_pplus(sol, 0., sol, 1., sol, 0, Ls_-1); axpby_ssp_pplus(sol, 1., sol, 1., sol, 0, Ls_-1);
ExtractSlice(tmp, sol, 0, 0); ExtractSlice(tmp, sol, 0, 0);
FermToProp(p4d, tmp, s, c); FermToProp(p4d, tmp, s, c);
} }

View File

@ -1,5 +1,5 @@
#ifndef Hadrons____FILEBASENAME____hpp_ #ifndef Hadrons____NAMESPACE_______FILEBASENAME____hpp_
#define Hadrons____FILEBASENAME____hpp_ #define Hadrons____NAMESPACE_______FILEBASENAME____hpp_
#include <Grid/Hadrons/Global.hpp> #include <Grid/Hadrons/Global.hpp>
#include <Grid/Hadrons/Module.hpp> #include <Grid/Hadrons/Module.hpp>
@ -41,4 +41,4 @@ END_MODULE_NAMESPACE
END_HADRONS_NAMESPACE END_HADRONS_NAMESPACE
#endif // Hadrons____FILEBASENAME____hpp_ #endif // Hadrons____NAMESPACE_______FILEBASENAME____hpp_

View File

@ -1,5 +1,5 @@
#ifndef Hadrons____FILEBASENAME____hpp_ #ifndef Hadrons____NAMESPACE_______FILEBASENAME____hpp_
#define Hadrons____FILEBASENAME____hpp_ #define Hadrons____NAMESPACE_______FILEBASENAME____hpp_
#include <Grid/Hadrons/Global.hpp> #include <Grid/Hadrons/Global.hpp>
#include <Grid/Hadrons/Module.hpp> #include <Grid/Hadrons/Module.hpp>
@ -82,4 +82,4 @@ END_MODULE_NAMESPACE
END_HADRONS_NAMESPACE END_HADRONS_NAMESPACE
#endif // Hadrons____FILEBASENAME____hpp_ #endif // Hadrons____NAMESPACE_______FILEBASENAME____hpp_

View File

@ -1,6 +1,7 @@
modules_cc =\ modules_cc =\
Modules/MContraction/WeakHamiltonianEye.cc \ Modules/MContraction/WeakHamiltonianEye.cc \
Modules/MContraction/WeakHamiltonianNonEye.cc \ Modules/MContraction/WeakHamiltonianNonEye.cc \
Modules/MContraction/WeakNeutral4ptDisc.cc \
Modules/MGauge/Load.cc \ Modules/MGauge/Load.cc \
Modules/MGauge/Random.cc \ Modules/MGauge/Random.cc \
Modules/MGauge/StochEm.cc \ Modules/MGauge/StochEm.cc \
@ -13,10 +14,13 @@ modules_hpp =\
Modules/MAction/DWF.hpp \ Modules/MAction/DWF.hpp \
Modules/MAction/Wilson.hpp \ Modules/MAction/Wilson.hpp \
Modules/MContraction/Baryon.hpp \ Modules/MContraction/Baryon.hpp \
Modules/MContraction/DiscLoop.hpp \
Modules/MContraction/Gamma3pt.hpp \
Modules/MContraction/Meson.hpp \ Modules/MContraction/Meson.hpp \
Modules/MContraction/WeakHamiltonian.hpp \ Modules/MContraction/WeakHamiltonian.hpp \
Modules/MContraction/WeakHamiltonianEye.hpp \ Modules/MContraction/WeakHamiltonianEye.hpp \
Modules/MContraction/WeakHamiltonianNonEye.hpp \ Modules/MContraction/WeakHamiltonianNonEye.hpp \
Modules/MContraction/WeakNeutral4ptDisc.hpp \
Modules/MGauge/Load.hpp \ Modules/MGauge/Load.hpp \
Modules/MGauge/Random.hpp \ Modules/MGauge/Random.hpp \
Modules/MGauge/StochEm.hpp \ Modules/MGauge/StochEm.hpp \
@ -26,6 +30,7 @@ modules_hpp =\
Modules/MScalar/FreeProp.hpp \ Modules/MScalar/FreeProp.hpp \
Modules/MScalar/Scalar.hpp \ Modules/MScalar/Scalar.hpp \
Modules/MScalar/ScalarVP.hpp \ Modules/MScalar/ScalarVP.hpp \
Modules/MSink/Point.hpp \
Modules/MSolver/RBPrecCG.hpp \ Modules/MSolver/RBPrecCG.hpp \
Modules/MSource/Point.hpp \ Modules/MSource/Point.hpp \
Modules/MSource/SeqGamma.hpp \ Modules/MSource/SeqGamma.hpp \

View File

@ -20,4 +20,17 @@ The simple testcase in this directory is the submitted bug report that encapsula
problem. The test case works with icpc and with clang++, but fails consistently on g++ problem. The test case works with icpc and with clang++, but fails consistently on g++
current variants. current variants.
Peter Peter
************
Second GCC bug reported, see Issue 100.
https://wandbox.org/permlink/tzssJza6R9XnqANw
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=80652
Getting Travis fails under gcc-5 for Test_simd, now that I added more comprehensive testing to the
CI test suite. The limitations of Travis runtime limits & weak cores are being shown.
Travis uses 5.4.1 for g++-5.

86
grid-config.in Executable file
View File

@ -0,0 +1,86 @@
#! /bin/sh
prefix=@prefix@
exec_prefix=@exec_prefix@
includedir=@includedir@
usage()
{
cat <<EOF
Usage: grid-config [OPTION]
Known values for OPTION are:
--prefix show Grid installation prefix
--cxxflags print pre-processor and compiler flags
--ldflags print library linking flags
--libs print library linking information
--summary print full build summary
--help display this help and exit
--version output version information
--git print git revision
EOF
exit $1
}
if test $# -eq 0; then
usage 1
fi
cflags=false
libs=false
while test $# -gt 0; do
case "$1" in
-*=*) optarg=`echo "$1" | sed 's/[-_a-zA-Z0-9]*=//'` ;;
*) optarg= ;;
esac
case "$1" in
--prefix)
echo $prefix
;;
--version)
echo @VERSION@
exit 0
;;
--git)
echo "@GRID_BRANCH@ @GRID_SHA@"
exit 0
;;
--help)
usage 0
;;
--cxxflags)
echo @GRID_CXXFLAGS@
;;
--ldflags)
echo @GRID_LDFLAGS@
;;
--libs)
echo @GRID_LIBS@
;;
--summary)
echo ""
echo "@GRID_SUMMARY@"
echo ""
;;
*)
usage
exit 1
;;
esac
shift
done
exit 0

37
lib/DisableWarnings.h Normal file
View File

@ -0,0 +1,37 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/DisableWarnings.h
Copyright (C) 2016
Author: Guido Cossu <guido.cossu@ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#ifndef DISABLE_WARNINGS_H
#define DISABLE_WARNINGS_H
//disables and intel compiler specific warning (in json.hpp)
#pragma warning disable 488
#endif

View File

@ -38,28 +38,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
#ifndef GRID_BASE_H #ifndef GRID_BASE_H
#define GRID_BASE_H #define GRID_BASE_H
/////////////////// #include <Grid/GridStd.h>
// Std C++ dependencies
///////////////////
#include <cassert>
#include <complex>
#include <vector>
#include <iostream>
#include <iomanip>
#include <random>
#include <functional>
#include <stdio.h>
#include <stdlib.h>
#include <stdio.h>
#include <signal.h>
#include <ctime>
#include <sys/time.h>
#include <chrono>
///////////////////
// Grid headers
///////////////////
#include "Config.h"
#include <Grid/perfmon/Timer.h> #include <Grid/perfmon/Timer.h>
#include <Grid/perfmon/PerfCount.h> #include <Grid/perfmon/PerfCount.h>

27
lib/GridStd.h Normal file
View File

@ -0,0 +1,27 @@
#ifndef GRID_STD_H
#define GRID_STD_H
///////////////////
// Std C++ dependencies
///////////////////
#include <cassert>
#include <complex>
#include <vector>
#include <iostream>
#include <iomanip>
#include <random>
#include <functional>
#include <stdio.h>
#include <stdlib.h>
#include <stdio.h>
#include <signal.h>
#include <ctime>
#include <sys/time.h>
#include <chrono>
///////////////////
// Grid config
///////////////////
#include "Config.h"
#endif /* GRID_STD_H */

9
lib/Grid_Eigen_Dense.h Normal file
View File

@ -0,0 +1,9 @@
#pragma once
#if defined __GNUC__
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
#endif
#include <Grid/Eigen/Dense>
#if defined __GNUC__
#pragma GCC diagnostic pop
#endif

View File

@ -46,7 +46,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
#include <Grid/algorithms/iterative/ConjugateGradientMixedPrec.h> #include <Grid/algorithms/iterative/ConjugateGradientMixedPrec.h>
// Lanczos support // Lanczos support
#include <Grid/algorithms/iterative/MatrixUtils.h> //#include <Grid/algorithms/iterative/MatrixUtils.h>
#include <Grid/algorithms/iterative/ImplicitlyRestartedLanczos.h> #include <Grid/algorithms/iterative/ImplicitlyRestartedLanczos.h>
#include <Grid/algorithms/CoarsenedMatrix.h> #include <Grid/algorithms/CoarsenedMatrix.h>
#include <Grid/algorithms/FFT.h> #include <Grid/algorithms/FFT.h>

View File

@ -235,7 +235,7 @@ namespace Grid {
Field tmp(in._grid); Field tmp(in._grid);
_Mat.MeooeDag(in,tmp); _Mat.MeooeDag(in,tmp);
_Mat.MooeeInvDag(tmp,out); _Mat.MooeeInvDag(tmp,out);
_Mat.MeooeDag(out,tmp); _Mat.MeooeDag(out,tmp);
_Mat.MooeeDag(in,out); _Mat.MooeeDag(in,out);

View File

@ -197,8 +197,9 @@ namespace Grid {
void operator() (LinearOperatorBase<Field> &Linop, const Field &in, Field &out) { void operator() (LinearOperatorBase<Field> &Linop, const Field &in, Field &out) {
GridBase *grid=in._grid; GridBase *grid=in._grid;
//std::cout << "Chevyshef(): in._grid="<<in._grid<<std::endl;
//<<" Linop.Grid()="<<Linop.Grid()<<"Linop.RedBlackGrid()="<<Linop.RedBlackGrid()<<std::endl; // std::cout << "Chevyshef(): in._grid="<<in._grid<<std::endl;
//std::cout <<" Linop.Grid()="<<Linop.Grid()<<"Linop.RedBlackGrid()="<<Linop.RedBlackGrid()<<std::endl;
int vol=grid->gSites(); int vol=grid->gSites();

View File

@ -16,7 +16,7 @@
#define INCLUDED_ALG_REMEZ_H #define INCLUDED_ALG_REMEZ_H
#include <stddef.h> #include <stddef.h>
#include <Config.h> #include <Grid/GridStd.h>
#ifdef HAVE_LIBGMP #ifdef HAVE_LIBGMP
#include "bigfloat.h" #include "bigfloat.h"

View File

@ -0,0 +1,366 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/algorithms/iterative/BlockConjugateGradient.h
Copyright (C) 2017
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_BLOCK_CONJUGATE_GRADIENT_H
#define GRID_BLOCK_CONJUGATE_GRADIENT_H
namespace Grid {
//////////////////////////////////////////////////////////////////////////
// Block conjugate gradient. Dimension zero should be the block direction
//////////////////////////////////////////////////////////////////////////
template <class Field>
class BlockConjugateGradient : public OperatorFunction<Field> {
public:
typedef typename Field::scalar_type scomplex;
const int blockDim = 0;
int Nblock;
bool ErrorOnNoConverge; // throw an assert when the CG fails to converge.
// Defaults true.
RealD Tolerance;
Integer MaxIterations;
Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion
BlockConjugateGradient(RealD tol, Integer maxit, bool err_on_no_conv = true)
: Tolerance(tol),
MaxIterations(maxit),
ErrorOnNoConverge(err_on_no_conv){};
void operator()(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi)
{
int Orthog = 0; // First dimension is block dim
Nblock = Src._grid->_fdimensions[Orthog];
std::cout<<GridLogMessage<<" Block Conjugate Gradient : Orthog "<<Orthog<<" Nblock "<<Nblock<<std::endl;
Psi.checkerboard = Src.checkerboard;
conformable(Psi, Src);
Field P(Src);
Field AP(Src);
Field R(Src);
Eigen::MatrixXcd m_pAp = Eigen::MatrixXcd::Identity(Nblock,Nblock);
Eigen::MatrixXcd m_pAp_inv= Eigen::MatrixXcd::Identity(Nblock,Nblock);
Eigen::MatrixXcd m_rr = Eigen::MatrixXcd::Zero(Nblock,Nblock);
Eigen::MatrixXcd m_rr_inv = Eigen::MatrixXcd::Zero(Nblock,Nblock);
Eigen::MatrixXcd m_alpha = Eigen::MatrixXcd::Zero(Nblock,Nblock);
Eigen::MatrixXcd m_beta = Eigen::MatrixXcd::Zero(Nblock,Nblock);
// Initial residual computation & set up
std::vector<RealD> residuals(Nblock);
std::vector<RealD> ssq(Nblock);
sliceNorm(ssq,Src,Orthog);
RealD sssum=0;
for(int b=0;b<Nblock;b++) sssum+=ssq[b];
sliceNorm(residuals,Src,Orthog);
for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); }
sliceNorm(residuals,Psi,Orthog);
for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); }
// Initial search dir is guess
Linop.HermOp(Psi, AP);
/************************************************************************
* Block conjugate gradient (Stephen Pickles, thesis 1995, pp 71, O Leary 1980)
************************************************************************
* O'Leary : R = B - A X
* O'Leary : P = M R ; preconditioner M = 1
* O'Leary : alpha = PAP^{-1} RMR
* O'Leary : beta = RMR^{-1}_old RMR_new
* O'Leary : X=X+Palpha
* O'Leary : R_new=R_old-AP alpha
* O'Leary : P=MR_new+P beta
*/
R = Src - AP;
P = R;
sliceInnerProductMatrix(m_rr,R,R,Orthog);
GridStopWatch sliceInnerTimer;
GridStopWatch sliceMaddTimer;
GridStopWatch MatrixTimer;
GridStopWatch SolverTimer;
SolverTimer.Start();
int k;
for (k = 1; k <= MaxIterations; k++) {
RealD rrsum=0;
for(int b=0;b<Nblock;b++) rrsum+=real(m_rr(b,b));
std::cout << GridLogIterative << "\titeration "<<k<<" rr_sum "<<rrsum<<" ssq_sum "<< sssum
<<" / "<<std::sqrt(rrsum/sssum) <<std::endl;
MatrixTimer.Start();
Linop.HermOp(P, AP);
MatrixTimer.Stop();
// Alpha
sliceInnerTimer.Start();
sliceInnerProductMatrix(m_pAp,P,AP,Orthog);
sliceInnerTimer.Stop();
m_pAp_inv = m_pAp.inverse();
m_alpha = m_pAp_inv * m_rr ;
// Psi, R update
sliceMaddTimer.Start();
sliceMaddMatrix(Psi,m_alpha, P,Psi,Orthog); // add alpha * P to psi
sliceMaddMatrix(R ,m_alpha,AP, R,Orthog,-1.0);// sub alpha * AP to resid
sliceMaddTimer.Stop();
// Beta
m_rr_inv = m_rr.inverse();
sliceInnerTimer.Start();
sliceInnerProductMatrix(m_rr,R,R,Orthog);
sliceInnerTimer.Stop();
m_beta = m_rr_inv *m_rr;
// Search update
sliceMaddTimer.Start();
sliceMaddMatrix(AP,m_beta,P,R,Orthog);
sliceMaddTimer.Stop();
P= AP;
/*********************
* convergence monitor
*********************
*/
RealD max_resid=0;
for(int b=0;b<Nblock;b++){
RealD rr = real(m_rr(b,b))/ssq[b];
if ( rr > max_resid ) max_resid = rr;
}
if ( max_resid < Tolerance*Tolerance ) {
SolverTimer.Stop();
std::cout << GridLogMessage<<"BlockCG converged in "<<k<<" iterations"<<std::endl;
for(int b=0;b<Nblock;b++){
std::cout << GridLogMessage<< "\t\tblock "<<b<<" resid "<< std::sqrt(real(m_rr(b,b))/ssq[b])<<std::endl;
}
std::cout << GridLogMessage<<"\tMax residual is "<<std::sqrt(max_resid)<<std::endl;
Linop.HermOp(Psi, AP);
AP = AP-Src;
std::cout << GridLogMessage <<"\tTrue residual is " << std::sqrt(norm2(AP)/norm2(Src)) <<std::endl;
std::cout << GridLogMessage << "Time Breakdown "<<std::endl;
std::cout << GridLogMessage << "\tElapsed " << SolverTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tMatrix " << MatrixTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tInnerProd " << sliceInnerTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tMaddMatrix " << sliceMaddTimer.Elapsed() <<std::endl;
IterationsToComplete = k;
return;
}
}
std::cout << GridLogMessage << "BlockConjugateGradient did NOT converge" << std::endl;
if (ErrorOnNoConverge) assert(0);
IterationsToComplete = k;
}
};
//////////////////////////////////////////////////////////////////////////
// multiRHS conjugate gradient. Dimension zero should be the block direction
//////////////////////////////////////////////////////////////////////////
template <class Field>
class MultiRHSConjugateGradient : public OperatorFunction<Field> {
public:
typedef typename Field::scalar_type scomplex;
const int blockDim = 0;
int Nblock;
bool ErrorOnNoConverge; // throw an assert when the CG fails to converge.
// Defaults true.
RealD Tolerance;
Integer MaxIterations;
Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion
MultiRHSConjugateGradient(RealD tol, Integer maxit, bool err_on_no_conv = true)
: Tolerance(tol),
MaxIterations(maxit),
ErrorOnNoConverge(err_on_no_conv){};
void operator()(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi)
{
int Orthog = 0; // First dimension is block dim
Nblock = Src._grid->_fdimensions[Orthog];
std::cout<<GridLogMessage<<"MultiRHS Conjugate Gradient : Orthog "<<Orthog<<" Nblock "<<Nblock<<std::endl;
Psi.checkerboard = Src.checkerboard;
conformable(Psi, Src);
Field P(Src);
Field AP(Src);
Field R(Src);
std::vector<ComplexD> v_pAp(Nblock);
std::vector<RealD> v_rr (Nblock);
std::vector<RealD> v_rr_inv(Nblock);
std::vector<RealD> v_alpha(Nblock);
std::vector<RealD> v_beta(Nblock);
// Initial residual computation & set up
std::vector<RealD> residuals(Nblock);
std::vector<RealD> ssq(Nblock);
sliceNorm(ssq,Src,Orthog);
RealD sssum=0;
for(int b=0;b<Nblock;b++) sssum+=ssq[b];
sliceNorm(residuals,Src,Orthog);
for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); }
sliceNorm(residuals,Psi,Orthog);
for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); }
// Initial search dir is guess
Linop.HermOp(Psi, AP);
R = Src - AP;
P = R;
sliceNorm(v_rr,R,Orthog);
GridStopWatch sliceInnerTimer;
GridStopWatch sliceMaddTimer;
GridStopWatch sliceNormTimer;
GridStopWatch MatrixTimer;
GridStopWatch SolverTimer;
SolverTimer.Start();
int k;
for (k = 1; k <= MaxIterations; k++) {
RealD rrsum=0;
for(int b=0;b<Nblock;b++) rrsum+=real(v_rr[b]);
std::cout << GridLogIterative << "\titeration "<<k<<" rr_sum "<<rrsum<<" ssq_sum "<< sssum
<<" / "<<std::sqrt(rrsum/sssum) <<std::endl;
MatrixTimer.Start();
Linop.HermOp(P, AP);
MatrixTimer.Stop();
// Alpha
// sliceInnerProductVectorTest(v_pAp_test,P,AP,Orthog);
sliceInnerTimer.Start();
sliceInnerProductVector(v_pAp,P,AP,Orthog);
sliceInnerTimer.Stop();
for(int b=0;b<Nblock;b++){
// std::cout << " "<< v_pAp[b]<<" "<< v_pAp_test[b]<<std::endl;
v_alpha[b] = v_rr[b]/real(v_pAp[b]);
}
// Psi, R update
sliceMaddTimer.Start();
sliceMaddVector(Psi,v_alpha, P,Psi,Orthog); // add alpha * P to psi
sliceMaddVector(R ,v_alpha,AP, R,Orthog,-1.0);// sub alpha * AP to resid
sliceMaddTimer.Stop();
// Beta
for(int b=0;b<Nblock;b++){
v_rr_inv[b] = 1.0/v_rr[b];
}
sliceNormTimer.Start();
sliceNorm(v_rr,R,Orthog);
sliceNormTimer.Stop();
for(int b=0;b<Nblock;b++){
v_beta[b] = v_rr_inv[b] *v_rr[b];
}
// Search update
sliceMaddTimer.Start();
sliceMaddVector(P,v_beta,P,R,Orthog);
sliceMaddTimer.Stop();
/*********************
* convergence monitor
*********************
*/
RealD max_resid=0;
for(int b=0;b<Nblock;b++){
RealD rr = v_rr[b]/ssq[b];
if ( rr > max_resid ) max_resid = rr;
}
if ( max_resid < Tolerance*Tolerance ) {
SolverTimer.Stop();
std::cout << GridLogMessage<<"MultiRHS solver converged in " <<k<<" iterations"<<std::endl;
for(int b=0;b<Nblock;b++){
std::cout << GridLogMessage<< "\t\tBlock "<<b<<" resid "<< std::sqrt(v_rr[b]/ssq[b])<<std::endl;
}
std::cout << GridLogMessage<<"\tMax residual is "<<std::sqrt(max_resid)<<std::endl;
Linop.HermOp(Psi, AP);
AP = AP-Src;
std::cout <<GridLogMessage << "\tTrue residual is " << std::sqrt(norm2(AP)/norm2(Src)) <<std::endl;
std::cout << GridLogMessage << "Time Breakdown "<<std::endl;
std::cout << GridLogMessage << "\tElapsed " << SolverTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tMatrix " << MatrixTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tInnerProd " << sliceInnerTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tNorm " << sliceNormTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tMaddMatrix " << sliceMaddTimer.Elapsed() <<std::endl;
IterationsToComplete = k;
return;
}
}
std::cout << GridLogMessage << "MultiRHSConjugateGradient did NOT converge" << std::endl;
if (ErrorOnNoConverge) assert(0);
IterationsToComplete = k;
}
};
}
#endif

View File

@ -78,18 +78,12 @@ class ConjugateGradient : public OperatorFunction<Field> {
cp = a; cp = a;
ssq = norm2(src); ssq = norm2(src);
std::cout << GridLogIterative << std::setprecision(4) std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradient: guess " << guess << std::endl;
<< "ConjugateGradient: guess " << guess << std::endl; std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradient: src " << ssq << std::endl;
std::cout << GridLogIterative << std::setprecision(4) std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradient: mp " << d << std::endl;
<< "ConjugateGradient: src " << ssq << std::endl; std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradient: mmp " << b << std::endl;
std::cout << GridLogIterative << std::setprecision(4) std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradient: cp,r " << cp << std::endl;
<< "ConjugateGradient: mp " << d << std::endl; std::cout << GridLogIterative << std::setprecision(4) << "ConjugateGradient: p " << a << std::endl;
std::cout << GridLogIterative << std::setprecision(4)
<< "ConjugateGradient: mmp " << b << std::endl;
std::cout << GridLogIterative << std::setprecision(4)
<< "ConjugateGradient: cp,r " << cp << std::endl;
std::cout << GridLogIterative << std::setprecision(4)
<< "ConjugateGradient: p " << a << std::endl;
RealD rsq = Tolerance * Tolerance * ssq; RealD rsq = Tolerance * Tolerance * ssq;
@ -99,8 +93,7 @@ class ConjugateGradient : public OperatorFunction<Field> {
} }
std::cout << GridLogIterative << std::setprecision(4) std::cout << GridLogIterative << std::setprecision(4)
<< "ConjugateGradient: k=0 residual " << cp << " target " << rsq << "ConjugateGradient: k=0 residual " << cp << " target " << rsq << std::endl;
<< std::endl;
GridStopWatch LinalgTimer; GridStopWatch LinalgTimer;
GridStopWatch MatrixTimer; GridStopWatch MatrixTimer;
@ -130,8 +123,11 @@ class ConjugateGradient : public OperatorFunction<Field> {
p = p * b + r; p = p * b + r;
LinalgTimer.Stop(); LinalgTimer.Stop();
std::cout << GridLogIterative << "ConjugateGradient: Iteration " << k std::cout << GridLogIterative << "ConjugateGradient: Iteration " << k
<< " residual " << cp << " target " << rsq << std::endl; << " residual " << cp << " target " << rsq << std::endl;
std::cout << GridLogDebug << "a = "<< a << " b_pred = "<< b_pred << " b = "<< b << std::endl;
std::cout << GridLogDebug << "qq = "<< qq << " d = "<< d << " c = "<< c << std::endl;
// Stopping condition // Stopping condition
if (cp <= rsq) { if (cp <= rsq) {
@ -139,32 +135,33 @@ class ConjugateGradient : public OperatorFunction<Field> {
Linop.HermOpAndNorm(psi, mmp, d, qq); Linop.HermOpAndNorm(psi, mmp, d, qq);
p = mmp - src; p = mmp - src;
RealD mmpnorm = sqrt(norm2(mmp));
RealD psinorm = sqrt(norm2(psi));
RealD srcnorm = sqrt(norm2(src)); RealD srcnorm = sqrt(norm2(src));
RealD resnorm = sqrt(norm2(p)); RealD resnorm = sqrt(norm2(p));
RealD true_residual = resnorm / srcnorm; RealD true_residual = resnorm / srcnorm;
std::cout << GridLogMessage std::cout << GridLogMessage << "ConjugateGradient Converged on iteration " << k << std::endl;
<< "ConjugateGradient: Converged on iteration " << k << std::endl; std::cout << GridLogMessage << "\tComputed residual " << sqrt(cp / ssq)<<std::endl;
std::cout << GridLogMessage << "Computed residual " << sqrt(cp / ssq) std::cout << GridLogMessage << "\tTrue residual " << true_residual<<std::endl;
<< " true residual " << true_residual << " target " std::cout << GridLogMessage << "\tTarget " << Tolerance << std::endl;
<< Tolerance << std::endl;
std::cout << GridLogMessage << "Time elapsed: Iterations " std::cout << GridLogMessage << "Time breakdown "<<std::endl;
<< SolverTimer.Elapsed() << " Matrix " std::cout << GridLogMessage << "\tElapsed " << SolverTimer.Elapsed() <<std::endl;
<< MatrixTimer.Elapsed() << " Linalg " std::cout << GridLogMessage << "\tMatrix " << MatrixTimer.Elapsed() <<std::endl;
<< LinalgTimer.Elapsed(); std::cout << GridLogMessage << "\tLinalg " << LinalgTimer.Elapsed() <<std::endl;
std::cout << std::endl;
if (ErrorOnNoConverge) assert(true_residual / Tolerance < 10000.0); if (ErrorOnNoConverge) assert(true_residual / Tolerance < 10000.0);
IterationsToComplete = k; IterationsToComplete = k;
return; return;
} }
} }
std::cout << GridLogMessage << "ConjugateGradient did NOT converge" std::cout << GridLogMessage << "ConjugateGradient did NOT converge"
<< std::endl; << std::endl;
if (ErrorOnNoConverge) assert(0); if (ErrorOnNoConverge) assert(0);
IterationsToComplete = k; IterationsToComplete = k;
} }
}; };
} }

View File

@ -30,6 +30,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
#define GRID_IRL_H #define GRID_IRL_H
#include <string.h> //memset #include <string.h> //memset
#ifdef USE_LAPACK #ifdef USE_LAPACK
void LAPACK_dstegr(char *jobz, char *range, int *n, double *d, double *e, void LAPACK_dstegr(char *jobz, char *range, int *n, double *d, double *e,
double *vl, double *vu, int *il, int *iu, double *abstol, double *vl, double *vu, int *il, int *iu, double *abstol,
@ -37,8 +38,9 @@ void LAPACK_dstegr(char *jobz, char *range, int *n, double *d, double *e,
double *work, int *lwork, int *iwork, int *liwork, double *work, int *lwork, int *iwork, int *liwork,
int *info); int *info);
#endif #endif
#include "DenseMatrix.h"
#include "EigenSort.h" #include <Grid/algorithms/densematrix/DenseMatrix.h>
#include <Grid/algorithms/iterative/EigenSort.h>
namespace Grid { namespace Grid {
@ -1088,8 +1090,6 @@ static void Lock(DenseMatrix<T> &H, // Hess mtx
int dfg, int dfg,
bool herm) bool herm)
{ {
//ForceTridiagonal(H); //ForceTridiagonal(H);
int M = H.dim; int M = H.dim;
@ -1121,7 +1121,6 @@ static void Lock(DenseMatrix<T> &H, // Hess mtx
AH = Hermitian(QQ)*AH; AH = Hermitian(QQ)*AH;
AH = AH*QQ; AH = AH*QQ;
for(int i=con;i<M;i++){ for(int i=con;i<M;i++){
for(int j=con;j<M;j++){ for(int j=con;j<M;j++){

View File

@ -1,453 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/algorithms/iterative/Matrix.h
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef MATRIX_H
#define MATRIX_H
#include <cstdlib>
#include <string>
#include <cmath>
#include <vector>
#include <iostream>
#include <iomanip>
#include <complex>
#include <typeinfo>
#include <Grid/Grid.h>
/** Sign function **/
template <class T> T sign(T p){return ( p/abs(p) );}
/////////////////////////////////////////////////////////////////////////////////////////////////////////
///////////////////// Hijack STL containers for our wicked means /////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////////////////////////////////
template<class T> using Vector = Vector<T>;
template<class T> using Matrix = Vector<Vector<T> >;
template<class T> void Resize(Vector<T > & vec, int N) { vec.resize(N); }
template<class T> void Resize(Matrix<T > & mat, int N, int M) {
mat.resize(N);
for(int i=0;i<N;i++){
mat[i].resize(M);
}
}
template<class T> void Size(Vector<T> & vec, int &N)
{
N= vec.size();
}
template<class T> void Size(Matrix<T> & mat, int &N,int &M)
{
N= mat.size();
M= mat[0].size();
}
template<class T> void SizeSquare(Matrix<T> & mat, int &N)
{
int M; Size(mat,N,M);
assert(N==M);
}
template<class T> void SizeSame(Matrix<T> & mat1,Matrix<T> &mat2, int &N1,int &M1)
{
int N2,M2;
Size(mat1,N1,M1);
Size(mat2,N2,M2);
assert(N1==N2);
assert(M1==M2);
}
//*****************************************
//* (Complex) Vector operations *
//*****************************************
/**Conj of a Vector **/
template <class T> Vector<T> conj(Vector<T> p){
Vector<T> q(p.size());
for(int i=0;i<p.size();i++){q[i] = conj(p[i]);}
return q;
}
/** Norm of a Vector**/
template <class T> T norm(Vector<T> p){
T sum = 0;
for(int i=0;i<p.size();i++){sum = sum + p[i]*conj(p[i]);}
return abs(sqrt(sum));
}
/** Norm squared of a Vector **/
template <class T> T norm2(Vector<T> p){
T sum = 0;
for(int i=0;i<p.size();i++){sum = sum + p[i]*conj(p[i]);}
return abs((sum));
}
/** Sum elements of a Vector **/
template <class T> T trace(Vector<T> p){
T sum = 0;
for(int i=0;i<p.size();i++){sum = sum + p[i];}
return sum;
}
/** Fill a Vector with constant c **/
template <class T> void Fill(Vector<T> &p, T c){
for(int i=0;i<p.size();i++){p[i] = c;}
}
/** Normalize a Vector **/
template <class T> void normalize(Vector<T> &p){
T m = norm(p);
if( abs(m) > 0.0) for(int i=0;i<p.size();i++){p[i] /= m;}
}
/** Vector by scalar **/
template <class T, class U> Vector<T> times(Vector<T> p, U s){
for(int i=0;i<p.size();i++){p[i] *= s;}
return p;
}
template <class T, class U> Vector<T> times(U s, Vector<T> p){
for(int i=0;i<p.size();i++){p[i] *= s;}
return p;
}
/** inner product of a and b = conj(a) . b **/
template <class T> T inner(Vector<T> a, Vector<T> b){
T m = 0.;
for(int i=0;i<a.size();i++){m = m + conj(a[i])*b[i];}
return m;
}
/** sum of a and b = a + b **/
template <class T> Vector<T> add(Vector<T> a, Vector<T> b){
Vector<T> m(a.size());
for(int i=0;i<a.size();i++){m[i] = a[i] + b[i];}
return m;
}
/** sum of a and b = a - b **/
template <class T> Vector<T> sub(Vector<T> a, Vector<T> b){
Vector<T> m(a.size());
for(int i=0;i<a.size();i++){m[i] = a[i] - b[i];}
return m;
}
/**
*********************************
* Matrices *
*********************************
**/
template<class T> void Fill(Matrix<T> & mat, T&val) {
int N,M;
Size(mat,N,M);
for(int i=0;i<N;i++){
for(int j=0;j<M;j++){
mat[i][j] = val;
}}
}
/** Transpose of a matrix **/
Matrix<T> Transpose(Matrix<T> & mat){
int N,M;
Size(mat,N,M);
Matrix C; Resize(C,M,N);
for(int i=0;i<M;i++){
for(int j=0;j<N;j++){
C[i][j] = mat[j][i];
}}
return C;
}
/** Set Matrix to unit matrix **/
template<class T> void Unity(Matrix<T> &mat){
int N; SizeSquare(mat,N);
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
if ( i==j ) A[i][j] = 1;
else A[i][j] = 0;
}
}
}
/** Add C * I to matrix **/
template<class T>
void PlusUnit(Matrix<T> & A,T c){
int dim; SizeSquare(A,dim);
for(int i=0;i<dim;i++){A[i][i] = A[i][i] + c;}
}
/** return the Hermitian conjugate of matrix **/
Matrix<T> HermitianConj(Matrix<T> &mat){
int dim; SizeSquare(mat,dim);
Matrix<T> C; Resize(C,dim,dim);
for(int i=0;i<dim;i++){
for(int j=0;j<dim;j++){
C[i][j] = conj(mat[j][i]);
}
}
return C;
}
/** return diagonal entries as a Vector **/
Vector<T> diag(Matrix<T> &A)
{
int dim; SizeSquare(A,dim);
Vector<T> d; Resize(d,dim);
for(int i=0;i<dim;i++){
d[i] = A[i][i];
}
return d;
}
/** Left multiply by a Vector **/
Vector<T> operator *(Vector<T> &B,Matrix<T> &A)
{
int K,M,N;
Size(B,K);
Size(A,M,N);
assert(K==M);
Vector<T> C; Resize(C,N);
for(int j=0;j<N;j++){
T sum = 0.0;
for(int i=0;i<M;i++){
sum += B[i] * A[i][j];
}
C[j] = sum;
}
return C;
}
/** return 1/diagonal entries as a Vector **/
Vector<T> inv_diag(Matrix<T> & A){
int dim; SizeSquare(A,dim);
Vector<T> d; Resize(d,dim);
for(int i=0;i<dim;i++){
d[i] = 1.0/A[i][i];
}
return d;
}
/** Matrix Addition **/
inline Matrix<T> operator + (Matrix<T> &A,Matrix<T> &B)
{
int N,M ; SizeSame(A,B,N,M);
Matrix C; Resize(C,N,M);
for(int i=0;i<N;i++){
for(int j=0;j<M;j++){
C[i][j] = A[i][j] + B[i][j];
}
}
return C;
}
/** Matrix Subtraction **/
inline Matrix<T> operator- (Matrix<T> & A,Matrix<T> &B){
int N,M ; SizeSame(A,B,N,M);
Matrix C; Resize(C,N,M);
for(int i=0;i<N;i++){
for(int j=0;j<M;j++){
C[i][j] = A[i][j] - B[i][j];
}}
return C;
}
/** Matrix scalar multiplication **/
inline Matrix<T> operator* (Matrix<T> & A,T c){
int N,M; Size(A,N,M);
Matrix C; Resize(C,N,M);
for(int i=0;i<N;i++){
for(int j=0;j<M;j++){
C[i][j] = A[i][j]*c;
}}
return C;
}
/** Matrix Matrix multiplication **/
inline Matrix<T> operator* (Matrix<T> &A,Matrix<T> &B){
int K,L,N,M;
Size(A,K,L);
Size(B,N,M); assert(L==N);
Matrix C; Resize(C,K,M);
for(int i=0;i<K;i++){
for(int j=0;j<M;j++){
T sum = 0.0;
for(int k=0;k<N;k++) sum += A[i][k]*B[k][j];
C[i][j] =sum;
}
}
return C;
}
/** Matrix Vector multiplication **/
inline Vector<T> operator* (Matrix<T> &A,Vector<T> &B){
int M,N,K;
Size(A,N,M);
Size(B,K); assert(K==M);
Vector<T> C; Resize(C,N);
for(int i=0;i<N;i++){
T sum = 0.0;
for(int j=0;j<M;j++) sum += A[i][j]*B[j];
C[i] = sum;
}
return C;
}
/** Some version of Matrix norm **/
/*
inline T Norm(){ // this is not a usual L2 norm
T norm = 0;
for(int i=0;i<dim;i++){
for(int j=0;j<dim;j++){
norm += abs(A[i][j]);
}}
return norm;
}
*/
/** Some version of Matrix norm **/
template<class T> T LargestDiag(Matrix<T> &A)
{
int dim ; SizeSquare(A,dim);
T ld = abs(A[0][0]);
for(int i=1;i<dim;i++){
T cf = abs(A[i][i]);
if(abs(cf) > abs(ld) ){ld = cf;}
}
return ld;
}
/** Look for entries on the leading subdiagonal that are smaller than 'small' **/
template <class T,class U> int Chop_subdiag(Matrix<T> &A,T norm, int offset, U small)
{
int dim; SizeSquare(A,dim);
for(int l = dim - 1 - offset; l >= 1; l--) {
if((U)abs(A[l][l - 1]) < (U)small) {
A[l][l-1]=(U)0.0;
return l;
}
}
return 0;
}
/** Look for entries on the leading subdiagonal that are smaller than 'small' **/
template <class T,class U> int Chop_symm_subdiag(Matrix<T> & A,T norm, int offset, U small)
{
int dim; SizeSquare(A,dim);
for(int l = dim - 1 - offset; l >= 1; l--) {
if((U)abs(A[l][l - 1]) < (U)small) {
A[l][l - 1] = (U)0.0;
A[l - 1][l] = (U)0.0;
return l;
}
}
return 0;
}
/**Assign a submatrix to a larger one**/
template<class T>
void AssignSubMtx(Matrix<T> & A,int row_st, int row_end, int col_st, int col_end, Matrix<T> &S)
{
for(int i = row_st; i<row_end; i++){
for(int j = col_st; j<col_end; j++){
A[i][j] = S[i - row_st][j - col_st];
}
}
}
/**Get a square submatrix**/
template <class T>
Matrix<T> GetSubMtx(Matrix<T> &A,int row_st, int row_end, int col_st, int col_end)
{
Matrix<T> H; Resize(row_end - row_st,col_end-col_st);
for(int i = row_st; i<row_end; i++){
for(int j = col_st; j<col_end; j++){
H[i-row_st][j-col_st]=A[i][j];
}}
return H;
}
/**Assign a submatrix to a larger one NB remember Vector Vectors are transposes of the matricies they represent**/
template<class T>
void AssignSubMtx(Matrix<T> & A,int row_st, int row_end, int col_st, int col_end, Matrix<T> &S)
{
for(int i = row_st; i<row_end; i++){
for(int j = col_st; j<col_end; j++){
A[i][j] = S[i - row_st][j - col_st];
}}
}
/** compute b_i A_ij b_j **/ // surprised no Conj
template<class T> T proj(Matrix<T> A, Vector<T> B){
int dim; SizeSquare(A,dim);
int dimB; Size(B,dimB);
assert(dimB==dim);
T C = 0;
for(int i=0;i<dim;i++){
T sum = 0.0;
for(int j=0;j<dim;j++){
sum += A[i][j]*B[j];
}
C += B[i]*sum; // No conj?
}
return C;
}
/*
*************************************************************
*
* Matrix Vector products
*
*************************************************************
*/
// Instead make a linop and call my CG;
/// q -> q Q
template <class T,class Fermion> void times(Vector<Fermion> &q, Matrix<T> &Q)
{
int M; SizeSquare(Q,M);
int N; Size(q,N);
assert(M==N);
times(q,Q,N);
}
/// q -> q Q
template <class T> void times(multi1d<LatticeFermion> &q, Matrix<T> &Q, int N)
{
GridBase *grid = q[0]._grid;
int M; SizeSquare(Q,M);
int K; Size(q,K);
assert(N<M);
assert(N<K);
Vector<Fermion> S(N,grid );
for(int j=0;j<N;j++){
S[j] = zero;
for(int k=0;k<N;k++){
S[j] = S[j] + q[k]* Q[k][j];
}
}
for(int j=0;j<q.size();j++){
q[j] = S[j];
}
}
#endif

View File

@ -1,75 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/algorithms/iterative/MatrixUtils.h
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_MATRIX_UTILS_H
#define GRID_MATRIX_UTILS_H
namespace Grid {
namespace MatrixUtils {
template<class T> inline void Size(Matrix<T>& A,int &N,int &M){
N=A.size(); assert(N>0);
M=A[0].size();
for(int i=0;i<N;i++){
assert(A[i].size()==M);
}
}
template<class T> inline void SizeSquare(Matrix<T>& A,int &N)
{
int M;
Size(A,N,M);
assert(N==M);
}
template<class T> inline void Fill(Matrix<T>& A,T & val)
{
int N,M;
Size(A,N,M);
for(int i=0;i<N;i++){
for(int j=0;j<M;j++){
A[i][j]=val;
}}
}
template<class T> inline void Diagonal(Matrix<T>& A,T & val)
{
int N;
SizeSquare(A,N);
for(int i=0;i<N;i++){
A[i][i]=val;
}
}
template<class T> inline void Identity(Matrix<T>& A)
{
Fill(A,0.0);
Diagonal(A,1.0);
}
};
}
#endif

View File

@ -1,15 +0,0 @@
- ConjugateGradientMultiShift
- MCR
- Potentially Useful Boost libraries
- MultiArray
- Aligned allocator; memory pool
- Remez -- Mike or Boost?
- Multiprecision
- quaternians
- Tokenize
- Serialization
- Regex
- Proto (ET)
- uBlas

View File

@ -1,122 +0,0 @@
#include <math.h>
#include <stdlib.h>
#include <vector>
struct Bisection {
static void get_eig2(int row_num,std::vector<RealD> &ALPHA,std::vector<RealD> &BETA, std::vector<RealD> & eig)
{
int i,j;
std::vector<RealD> evec1(row_num+3);
std::vector<RealD> evec2(row_num+3);
RealD eps2;
ALPHA[1]=0.;
BETHA[1]=0.;
for(i=0;i<row_num-1;i++) {
ALPHA[i+1] = A[i*(row_num+1)].real();
BETHA[i+2] = A[i*(row_num+1)+1].real();
}
ALPHA[row_num] = A[(row_num-1)*(row_num+1)].real();
bisec(ALPHA,BETHA,row_num,1,row_num,1e-10,1e-10,evec1,eps2);
bisec(ALPHA,BETHA,row_num,1,row_num,1e-16,1e-16,evec2,eps2);
// Do we really need to sort here?
int begin=1;
int end = row_num;
int swapped=1;
while(swapped) {
swapped=0;
for(i=begin;i<end;i++){
if(mag(evec2[i])>mag(evec2[i+1])) {
swap(evec2+i,evec2+i+1);
swapped=1;
}
}
end--;
for(i=end-1;i>=begin;i--){
if(mag(evec2[i])>mag(evec2[i+1])) {
swap(evec2+i,evec2+i+1);
swapped=1;
}
}
begin++;
}
for(i=0;i<row_num;i++){
for(j=0;j<row_num;j++) {
if(i==j) H[i*row_num+j]=evec2[i+1];
else H[i*row_num+j]=0.;
}
}
}
static void bisec(std::vector<RealD> &c,
std::vector<RealD> &b,
int n,
int m1,
int m2,
RealD eps1,
RealD relfeh,
std::vector<RealD> &x,
RealD &eps2)
{
std::vector<RealD> wu(n+2);
RealD h,q,x1,xu,x0,xmin,xmax;
int i,a,k;
b[1]=0.0;
xmin=c[n]-fabs(b[n]);
xmax=c[n]+fabs(b[n]);
for(i=1;i<n;i++){
h=fabs(b[i])+fabs(b[i+1]);
if(c[i]+h>xmax) xmax= c[i]+h;
if(c[i]-h<xmin) xmin= c[i]-h;
}
xmax *=2.;
eps2=relfeh*((xmin+xmax)>0.0 ? xmax : -xmin);
if(eps1<=0.0) eps1=eps2;
eps2=0.5*eps1+7.0*(eps2);
x0=xmax;
for(i=m1;i<=m2;i++){
x[i]=xmax;
wu[i]=xmin;
}
for(k=m2;k>=m1;k--){
xu=xmin;
i=k;
do{
if(xu<wu[i]){
xu=wu[i];
i=m1-1;
}
i--;
}while(i>=m1);
if(x0>x[k]) x0=x[k];
while((x0-xu)>2*relfeh*(fabs(xu)+fabs(x0))+eps1){
x1=(xu+x0)/2;
a=0;
q=1.0;
for(i=1;i<=n;i++){
q=c[i]-x1-((q!=0.0)? b[i]*b[i]/q:fabs(b[i])/relfeh);
if(q<0) a++;
}
// printf("x1=%e a=%d\n",x1,a);
if(a<k){
if(a<m1){
xu=x1;
wu[m1]=x1;
}else {
xu=x1;
wu[a+1]=x1;
if(x[a]>x1) x[a]=x1;
}
}else x0=x1;
}
x[k]=(x0+xu)/2;
}
}
}

View File

@ -1 +0,0 @@

View File

@ -6,8 +6,9 @@
Copyright (C) 2015 Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk> Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: paboyle <paboyle@ph.ed.ac.uk> Author: paboyle <paboyle@ph.ed.ac.uk>
Author: Guido Cossu <guido.cossu@ed.ac.uk>
This program is free software; you can redistribute it and/or modify This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@ -99,7 +100,7 @@ public:
virtual int oIndex(std::vector<int> &coor) virtual int oIndex(std::vector<int> &coor)
{ {
int idx=0; int idx=0;
// Works with either global or local coordinates // Works with either global or local coordinates
for(int d=0;d<_ndimension;d++) idx+=_ostride[d]*(coor[d]%_rdimensions[d]); for(int d=0;d<_ndimension;d++) idx+=_ostride[d]*(coor[d]%_rdimensions[d]);
return idx; return idx;
} }
@ -121,6 +122,12 @@ public:
Lexicographic::CoorFromIndex(coor,Oindex,_rdimensions); Lexicographic::CoorFromIndex(coor,Oindex,_rdimensions);
} }
inline void InOutCoorToLocalCoor (std::vector<int> &ocoor, std::vector<int> &icoor, std::vector<int> &lcoor) {
lcoor.resize(_ndimension);
for (int d = 0; d < _ndimension; d++)
lcoor[d] = ocoor[d] + _rdimensions[d] * icoor[d];
}
////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////
// SIMD lane addressing // SIMD lane addressing
////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////
@ -128,6 +135,7 @@ public:
{ {
Lexicographic::CoorFromIndex(coor,lane,_simd_layout); Lexicographic::CoorFromIndex(coor,lane,_simd_layout);
} }
inline int PermuteDim(int dimension){ inline int PermuteDim(int dimension){
return _simd_layout[dimension]>1; return _simd_layout[dimension]>1;
} }
@ -145,15 +153,15 @@ public:
// Distance should be either 0,1,2.. // Distance should be either 0,1,2..
// //
if ( _simd_layout[dimension] > 2 ) { if ( _simd_layout[dimension] > 2 ) {
for(int d=0;d<_ndimension;d++){ for(int d=0;d<_ndimension;d++){
if ( d != dimension ) assert ( (_simd_layout[d]==1) ); if ( d != dimension ) assert ( (_simd_layout[d]==1) );
} }
permute_type = RotateBit; // How to specify distance; this is not just direction. permute_type = RotateBit; // How to specify distance; this is not just direction.
return permute_type; return permute_type;
} }
for(int d=_ndimension-1;d>dimension;d--){ for(int d=_ndimension-1;d>dimension;d--){
if (_simd_layout[d]>1 ) permute_type++; if (_simd_layout[d]>1 ) permute_type++;
} }
return permute_type; return permute_type;
} }
@ -173,6 +181,24 @@ public:
inline const std::vector<int> &LocalDimensions(void) { return _ldimensions;}; inline const std::vector<int> &LocalDimensions(void) { return _ldimensions;};
inline const std::vector<int> &VirtualLocalDimensions(void) { return _ldimensions;}; inline const std::vector<int> &VirtualLocalDimensions(void) { return _ldimensions;};
////////////////////////////////////////////////////////////////
// Utility to print the full decomposition details
////////////////////////////////////////////////////////////////
void show_decomposition(){
std::cout << GridLogMessage << "Full Dimensions : " << _fdimensions << std::endl;
std::cout << GridLogMessage << "Global Dimensions : " << _gdimensions << std::endl;
std::cout << GridLogMessage << "Local Dimensions : " << _ldimensions << std::endl;
std::cout << GridLogMessage << "Reduced Dimensions : " << _rdimensions << std::endl;
std::cout << GridLogMessage << "Outer strides : " << _ostride << std::endl;
std::cout << GridLogMessage << "Inner strides : " << _istride << std::endl;
std::cout << GridLogMessage << "iSites : " << _isites << std::endl;
std::cout << GridLogMessage << "oSites : " << _osites << std::endl;
std::cout << GridLogMessage << "lSites : " << lSites() << std::endl;
std::cout << GridLogMessage << "gSites : " << gSites() << std::endl;
std::cout << GridLogMessage << "Nd : " << _ndimension << std::endl;
}
//////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////
// Global addressing // Global addressing
//////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////
@ -184,12 +210,15 @@ public:
assert(lidx<lSites()); assert(lidx<lSites());
Lexicographic::CoorFromIndex(lcoor,lidx,_ldimensions); Lexicographic::CoorFromIndex(lcoor,lidx,_ldimensions);
} }
void GlobalCoorToGlobalIndex(const std::vector<int> & gcoor,int & gidx){ void GlobalCoorToGlobalIndex(const std::vector<int> & gcoor,int & gidx){
gidx=0; gidx=0;
int mult=1; int mult=1;
for(int mu=0;mu<_ndimension;mu++) { for(int mu=0;mu<_ndimension;mu++) {
gidx+=mult*gcoor[mu]; gidx+=mult*gcoor[mu];
mult*=_gdimensions[mu]; mult*=_gdimensions[mu];
} }
} }
void GlobalCoorToProcessorCoorLocalCoor(std::vector<int> &pcoor,std::vector<int> &lcoor,const std::vector<int> &gcoor) void GlobalCoorToProcessorCoorLocalCoor(std::vector<int> &pcoor,std::vector<int> &lcoor,const std::vector<int> &gcoor)
@ -197,9 +226,9 @@ public:
pcoor.resize(_ndimension); pcoor.resize(_ndimension);
lcoor.resize(_ndimension); lcoor.resize(_ndimension);
for(int mu=0;mu<_ndimension;mu++){ for(int mu=0;mu<_ndimension;mu++){
int _fld = _fdimensions[mu]/_processors[mu]; int _fld = _fdimensions[mu]/_processors[mu];
pcoor[mu] = gcoor[mu]/_fld; pcoor[mu] = gcoor[mu]/_fld;
lcoor[mu] = gcoor[mu]%_fld; lcoor[mu] = gcoor[mu]%_fld;
} }
} }
void GlobalCoorToRankIndex(int &rank, int &o_idx, int &i_idx ,const std::vector<int> &gcoor) void GlobalCoorToRankIndex(int &rank, int &o_idx, int &i_idx ,const std::vector<int> &gcoor)
@ -211,9 +240,9 @@ public:
/* /*
std::vector<int> cblcoor(lcoor); std::vector<int> cblcoor(lcoor);
for(int d=0;d<cblcoor.size();d++){ for(int d=0;d<cblcoor.size();d++){
if( this->CheckerBoarded(d) ) { if( this->CheckerBoarded(d) ) {
cblcoor[d] = lcoor[d]/2; cblcoor[d] = lcoor[d]/2;
} }
} }
*/ */
i_idx= iIndex(lcoor); i_idx= iIndex(lcoor);
@ -239,7 +268,7 @@ public:
{ {
RankIndexToGlobalCoor(rank,o_idx,i_idx ,fcoor); RankIndexToGlobalCoor(rank,o_idx,i_idx ,fcoor);
if(CheckerBoarded(0)){ if(CheckerBoarded(0)){
fcoor[0] = fcoor[0]*2+cb; fcoor[0] = fcoor[0]*2+cb;
} }
} }
void ProcessorCoorLocalCoorToGlobalCoor(std::vector<int> &Pcoor,std::vector<int> &Lcoor,std::vector<int> &gcoor) void ProcessorCoorLocalCoorToGlobalCoor(std::vector<int> &Pcoor,std::vector<int> &Lcoor,std::vector<int> &gcoor)

View File

@ -30,21 +30,11 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
namespace Grid { namespace Grid {
template<class vobj>
class SimpleCompressor {
public:
void Point(int) {};
vobj operator() (const vobj &arg) {
return arg;
}
};
/////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////
// Gather for when there is no need to SIMD split with compression // Gather for when there is no need to SIMD split
/////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////
template<class vobj,class cobj,class compressor> void template<class vobj> void
Gather_plane_simple (const Lattice<vobj> &rhs,commVector<cobj> &buffer,int dimension,int plane,int cbmask,compressor &compress, int off=0) Gather_plane_simple (const Lattice<vobj> &rhs,commVector<vobj> &buffer,int dimension,int plane,int cbmask, int off=0)
{ {
int rd = rhs._grid->_rdimensions[dimension]; int rd = rhs._grid->_rdimensions[dimension];
@ -62,7 +52,7 @@ Gather_plane_simple (const Lattice<vobj> &rhs,commVector<cobj> &buffer,int dimen
for(int b=0;b<e2;b++){ for(int b=0;b<e2;b++){
int o = n*stride; int o = n*stride;
int bo = n*e2; int bo = n*e2;
buffer[off+bo+b]=compress(rhs._odata[so+o+b]); buffer[off+bo+b]=rhs._odata[so+o+b];
} }
} }
} else { } else {
@ -78,17 +68,16 @@ Gather_plane_simple (const Lattice<vobj> &rhs,commVector<cobj> &buffer,int dimen
} }
} }
parallel_for(int i=0;i<table.size();i++){ parallel_for(int i=0;i<table.size();i++){
buffer[off+table[i].first]=compress(rhs._odata[so+table[i].second]); buffer[off+table[i].first]=rhs._odata[so+table[i].second];
} }
} }
} }
/////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////
// Gather for when there *is* need to SIMD split with compression // Gather for when there *is* need to SIMD split
/////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////
template<class cobj,class vobj,class compressor> void template<class vobj> void
Gather_plane_extract(const Lattice<vobj> &rhs,std::vector<typename cobj::scalar_object *> pointers,int dimension,int plane,int cbmask,compressor &compress) Gather_plane_extract(const Lattice<vobj> &rhs,std::vector<typename vobj::scalar_object *> pointers,int dimension,int plane,int cbmask)
{ {
int rd = rhs._grid->_rdimensions[dimension]; int rd = rhs._grid->_rdimensions[dimension];
@ -109,8 +98,8 @@ Gather_plane_extract(const Lattice<vobj> &rhs,std::vector<typename cobj::scalar_
int o = n*n1; int o = n*n1;
int offset = b+n*e2; int offset = b+n*e2;
cobj temp =compress(rhs._odata[so+o+b]); vobj temp =rhs._odata[so+o+b];
extract<cobj>(temp,pointers,offset); extract<vobj>(temp,pointers,offset);
} }
} }
@ -127,32 +116,14 @@ Gather_plane_extract(const Lattice<vobj> &rhs,std::vector<typename cobj::scalar_
int offset = b+n*e2; int offset = b+n*e2;
if ( ocb & cbmask ) { if ( ocb & cbmask ) {
cobj temp =compress(rhs._odata[so+o+b]); vobj temp =rhs._odata[so+o+b];
extract<cobj>(temp,pointers,offset); extract<vobj>(temp,pointers,offset);
} }
} }
} }
} }
} }
//////////////////////////////////////////////////////
// Gather for when there is no need to SIMD split
//////////////////////////////////////////////////////
template<class vobj> void Gather_plane_simple (const Lattice<vobj> &rhs,commVector<vobj> &buffer, int dimension,int plane,int cbmask)
{
SimpleCompressor<vobj> dontcompress;
Gather_plane_simple (rhs,buffer,dimension,plane,cbmask,dontcompress);
}
//////////////////////////////////////////////////////
// Gather for when there *is* need to SIMD split
//////////////////////////////////////////////////////
template<class vobj> void Gather_plane_extract(const Lattice<vobj> &rhs,std::vector<typename vobj::scalar_object *> pointers,int dimension,int plane,int cbmask)
{
SimpleCompressor<vobj> dontcompress;
Gather_plane_extract<vobj,vobj,decltype(dontcompress)>(rhs,pointers,dimension,plane,cbmask,dontcompress);
}
////////////////////////////////////////////////////// //////////////////////////////////////////////////////
// Scatter for when there is no need to SIMD split // Scatter for when there is no need to SIMD split
////////////////////////////////////////////////////// //////////////////////////////////////////////////////
@ -200,7 +171,7 @@ template<class vobj> void Scatter_plane_simple (Lattice<vobj> &rhs,commVector<vo
////////////////////////////////////////////////////// //////////////////////////////////////////////////////
// Scatter for when there *is* need to SIMD split // Scatter for when there *is* need to SIMD split
////////////////////////////////////////////////////// //////////////////////////////////////////////////////
template<class vobj,class cobj> void Scatter_plane_merge(Lattice<vobj> &rhs,std::vector<cobj *> pointers,int dimension,int plane,int cbmask) template<class vobj> void Scatter_plane_merge(Lattice<vobj> &rhs,std::vector<typename vobj::scalar_object *> pointers,int dimension,int plane,int cbmask)
{ {
int rd = rhs._grid->_rdimensions[dimension]; int rd = rhs._grid->_rdimensions[dimension];

View File

@ -154,13 +154,7 @@ template<class vobj> void Cshift_comms(Lattice<vobj> &ret,const Lattice<vobj> &r
recv_from_rank, recv_from_rank,
bytes); bytes);
grid->Barrier(); grid->Barrier();
/*
for(int i=0;i<send_buf.size();i++){
assert(recv_buf.size()==buffer_size);
assert(send_buf.size()==buffer_size);
std::cout << "SendRecv_Cshift_comms ["<<i<<" "<< dimension<<"] snd "<<send_buf[i]<<" rcv " << recv_buf[i] << " 0x" << cbmask<<std::endl;
}
*/
Scatter_plane_simple (ret,recv_buf,dimension,x,cbmask); Scatter_plane_simple (ret,recv_buf,dimension,x,cbmask);
} }
} }
@ -246,13 +240,6 @@ template<class vobj> void Cshift_comms_simd(Lattice<vobj> &ret,const Lattice<vo
(void *)&recv_buf_extract[i][0], (void *)&recv_buf_extract[i][0],
recv_from_rank, recv_from_rank,
bytes); bytes);
/*
for(int w=0;w<recv_buf_extract[i].size();w++){
assert(recv_buf_extract[i].size()==buffer_size);
assert(send_buf_extract[i].size()==buffer_size);
std::cout << "SendRecv_Cshift_comms ["<<w<<" "<< dimension<<"] recv "<<recv_buf_extract[i][w]<<" send " << send_buf_extract[nbr_lane][w] << cbmask<<std::endl;
}
*/
grid->Barrier(); grid->Barrier();
rpointers[i] = &recv_buf_extract[i][0]; rpointers[i] = &recv_buf_extract[i][0];
} else { } else {

12276
lib/json/json.hpp Normal file

File diff suppressed because it is too large Load Diff

View File

@ -235,64 +235,74 @@ public:
} }
}; };
////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////
// Constructor requires "grid" passed. // Constructor requires "grid" passed.
// what about a default grid? // what about a default grid?
////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////
Lattice(GridBase *grid) : _odata(grid->oSites()) { Lattice(GridBase *grid) : _odata(grid->oSites()) {
_grid = grid; _grid = grid;
// _odata.reserve(_grid->oSites()); // _odata.reserve(_grid->oSites());
// _odata.resize(_grid->oSites()); // _odata.resize(_grid->oSites());
// std::cout << "Constructing lattice object with Grid pointer "<<_grid<<std::endl; // std::cout << "Constructing lattice object with Grid pointer "<<_grid<<std::endl;
assert((((uint64_t)&_odata[0])&0xF) ==0); assert((((uint64_t)&_odata[0])&0xF) ==0);
checkerboard=0; checkerboard=0;
} }
Lattice(const Lattice& r){ // copy constructor Lattice(const Lattice& r){ // copy constructor
_grid = r._grid; _grid = r._grid;
checkerboard = r.checkerboard; checkerboard = r.checkerboard;
_odata.resize(_grid->oSites());// essential _odata.resize(_grid->oSites());// essential
parallel_for(int ss=0;ss<_grid->oSites();ss++){ parallel_for(int ss=0;ss<_grid->oSites();ss++){
_odata[ss]=r._odata[ss]; _odata[ss]=r._odata[ss];
} }
} }
virtual ~Lattice(void) = default; virtual ~Lattice(void) = default;
template<class sobj> strong_inline Lattice<vobj> & operator = (const sobj & r){ void reset(GridBase* grid) {
parallel_for(int ss=0;ss<_grid->oSites();ss++){ if (_grid != grid) {
this->_odata[ss]=r; _grid = grid;
} _odata.resize(grid->oSites());
return *this; checkerboard = 0;
}
template<class robj> strong_inline Lattice<vobj> & operator = (const Lattice<robj> & r){
this->checkerboard = r.checkerboard;
conformable(*this,r);
parallel_for(int ss=0;ss<_grid->oSites();ss++){
this->_odata[ss]=r._odata[ss];
}
return *this;
} }
}
// *=,+=,-= operators inherit behvour from correspond */+/- operation template<class sobj> strong_inline Lattice<vobj> & operator = (const sobj & r){
template<class T> strong_inline Lattice<vobj> &operator *=(const T &r) { parallel_for(int ss=0;ss<_grid->oSites();ss++){
*this = (*this)*r; this->_odata[ss]=r;
return *this;
} }
return *this;
template<class T> strong_inline Lattice<vobj> &operator -=(const T &r) { }
*this = (*this)-r;
return *this; template<class robj> strong_inline Lattice<vobj> & operator = (const Lattice<robj> & r){
this->checkerboard = r.checkerboard;
conformable(*this,r);
parallel_for(int ss=0;ss<_grid->oSites();ss++){
this->_odata[ss]=r._odata[ss];
} }
template<class T> strong_inline Lattice<vobj> &operator +=(const T &r) { return *this;
*this = (*this)+r; }
return *this;
} // *=,+=,-= operators inherit behvour from correspond */+/- operation
}; // class Lattice template<class T> strong_inline Lattice<vobj> &operator *=(const T &r) {
*this = (*this)*r;
return *this;
}
template<class T> strong_inline Lattice<vobj> &operator -=(const T &r) {
*this = (*this)-r;
return *this;
}
template<class T> strong_inline Lattice<vobj> &operator +=(const T &r) {
*this = (*this)+r;
return *this;
}
}; // class Lattice
template<class vobj> std::ostream& operator<< (std::ostream& stream, const Lattice<vobj> &o){ template<class vobj> std::ostream& operator<< (std::ostream& stream, const Lattice<vobj> &o){
std::vector<int> gcoor; std::vector<int> gcoor;
typedef typename vobj::scalar_object sobj; typedef typename vobj::scalar_object sobj;
@ -310,7 +320,7 @@ public:
} }
return stream; return stream;
} }
} }

View File

@ -1,157 +1,154 @@
/************************************************************************************* /*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/lattice/Lattice_reduction.h Source file: ./lib/lattice/Lattice_reduction.h
Copyright (C) 2015 Copyright (C) 2015
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk> Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk> Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: paboyle <paboyle@ph.ed.ac.uk> Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or the Free Software Foundation; either version 2 of the License, or
(at your option) any later version. (at your option) any later version.
This program is distributed in the hope that it will be useful, This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details. GNU General Public License for more details.
You should have received a copy of the GNU General Public License along You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc., with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/ *************************************************************************************/
/* END LEGAL */ /* END LEGAL */
#ifndef GRID_LATTICE_REDUCTION_H #ifndef GRID_LATTICE_REDUCTION_H
#define GRID_LATTICE_REDUCTION_H #define GRID_LATTICE_REDUCTION_H
#include <Grid/Grid_Eigen_Dense.h>
namespace Grid { namespace Grid {
#ifdef GRID_WARN_SUBOPTIMAL #ifdef GRID_WARN_SUBOPTIMAL
#warning "Optimisation alert all these reduction loops are NOT threaded " #warning "Optimisation alert all these reduction loops are NOT threaded "
#endif #endif
//////////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////
// Deterministic Reduction operations // Deterministic Reduction operations
//////////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////
template<class vobj> inline RealD norm2(const Lattice<vobj> &arg){ template<class vobj> inline RealD norm2(const Lattice<vobj> &arg){
ComplexD nrm = innerProduct(arg,arg); ComplexD nrm = innerProduct(arg,arg);
return std::real(nrm); return std::real(nrm);
}
// Double inner product
template<class vobj>
inline ComplexD innerProduct(const Lattice<vobj> &left,const Lattice<vobj> &right)
{
typedef typename vobj::scalar_type scalar_type;
typedef typename vobj::vector_typeD vector_type;
scalar_type nrm;
GridBase *grid = left._grid;
std::vector<vector_type,alignedAllocator<vector_type> > sumarray(grid->SumArraySize());
parallel_for(int thr=0;thr<grid->SumArraySize();thr++){
int nwork, mywork, myoff;
GridThread::GetWork(left._grid->oSites(),thr,mywork,myoff);
decltype(innerProductD(left._odata[0],right._odata[0])) vnrm=zero; // private to thread; sub summation
for(int ss=myoff;ss<mywork+myoff; ss++){
vnrm = vnrm + innerProductD(left._odata[ss],right._odata[ss]);
}
sumarray[thr]=TensorRemove(vnrm) ;
} }
vector_type vvnrm; vvnrm=zero; // sum across threads
for(int i=0;i<grid->SumArraySize();i++){
vvnrm = vvnrm+sumarray[i];
}
nrm = Reduce(vvnrm);// sum across simd
right._grid->GlobalSum(nrm);
return nrm;
}
template<class Op,class T1>
inline auto sum(const LatticeUnaryExpression<Op,T1> & expr)
->typename decltype(expr.first.func(eval(0,std::get<0>(expr.second))))::scalar_object
{
return sum(closure(expr));
}
template<class vobj> template<class Op,class T1,class T2>
inline ComplexD innerProduct(const Lattice<vobj> &left,const Lattice<vobj> &right) inline auto sum(const LatticeBinaryExpression<Op,T1,T2> & expr)
{
typedef typename vobj::scalar_type scalar_type;
typedef typename vobj::vector_type vector_type;
scalar_type nrm;
GridBase *grid = left._grid;
std::vector<vector_type,alignedAllocator<vector_type> > sumarray(grid->SumArraySize());
for(int i=0;i<grid->SumArraySize();i++){
sumarray[i]=zero;
}
parallel_for(int thr=0;thr<grid->SumArraySize();thr++){
int nwork, mywork, myoff;
GridThread::GetWork(left._grid->oSites(),thr,mywork,myoff);
decltype(innerProduct(left._odata[0],right._odata[0])) vnrm=zero; // private to thread; sub summation
for(int ss=myoff;ss<mywork+myoff; ss++){
vnrm = vnrm + innerProduct(left._odata[ss],right._odata[ss]);
}
sumarray[thr]=TensorRemove(vnrm) ;
}
vector_type vvnrm; vvnrm=zero; // sum across threads
for(int i=0;i<grid->SumArraySize();i++){
vvnrm = vvnrm+sumarray[i];
}
nrm = Reduce(vvnrm);// sum across simd
right._grid->GlobalSum(nrm);
return nrm;
}
template<class Op,class T1>
inline auto sum(const LatticeUnaryExpression<Op,T1> & expr)
->typename decltype(expr.first.func(eval(0,std::get<0>(expr.second))))::scalar_object
{
return sum(closure(expr));
}
template<class Op,class T1,class T2>
inline auto sum(const LatticeBinaryExpression<Op,T1,T2> & expr)
->typename decltype(expr.first.func(eval(0,std::get<0>(expr.second)),eval(0,std::get<1>(expr.second))))::scalar_object ->typename decltype(expr.first.func(eval(0,std::get<0>(expr.second)),eval(0,std::get<1>(expr.second))))::scalar_object
{ {
return sum(closure(expr)); return sum(closure(expr));
} }
template<class Op,class T1,class T2,class T3> template<class Op,class T1,class T2,class T3>
inline auto sum(const LatticeTrinaryExpression<Op,T1,T2,T3> & expr) inline auto sum(const LatticeTrinaryExpression<Op,T1,T2,T3> & expr)
->typename decltype(expr.first.func(eval(0,std::get<0>(expr.second)), ->typename decltype(expr.first.func(eval(0,std::get<0>(expr.second)),
eval(0,std::get<1>(expr.second)), eval(0,std::get<1>(expr.second)),
eval(0,std::get<2>(expr.second)) eval(0,std::get<2>(expr.second))
))::scalar_object ))::scalar_object
{ {
return sum(closure(expr)); return sum(closure(expr));
} }
template<class vobj> template<class vobj>
inline typename vobj::scalar_object sum(const Lattice<vobj> &arg){ inline typename vobj::scalar_object sum(const Lattice<vobj> &arg)
{
GridBase *grid=arg._grid; GridBase *grid=arg._grid;
int Nsimd = grid->Nsimd(); int Nsimd = grid->Nsimd();
std::vector<vobj,alignedAllocator<vobj> > sumarray(grid->SumArraySize()); std::vector<vobj,alignedAllocator<vobj> > sumarray(grid->SumArraySize());
for(int i=0;i<grid->SumArraySize();i++){ for(int i=0;i<grid->SumArraySize();i++){
sumarray[i]=zero; sumarray[i]=zero;
} }
parallel_for(int thr=0;thr<grid->SumArraySize();thr++){ parallel_for(int thr=0;thr<grid->SumArraySize();thr++){
int nwork, mywork, myoff; int nwork, mywork, myoff;
GridThread::GetWork(grid->oSites(),thr,mywork,myoff); GridThread::GetWork(grid->oSites(),thr,mywork,myoff);
vobj vvsum=zero; vobj vvsum=zero;
for(int ss=myoff;ss<mywork+myoff; ss++){ for(int ss=myoff;ss<mywork+myoff; ss++){
vvsum = vvsum + arg._odata[ss]; vvsum = vvsum + arg._odata[ss];
}
sumarray[thr]=vvsum;
}
vobj vsum=zero; // sum across threads
for(int i=0;i<grid->SumArraySize();i++){
vsum = vsum+sumarray[i];
}
typedef typename vobj::scalar_object sobj;
sobj ssum=zero;
std::vector<sobj> buf(Nsimd);
extract(vsum,buf);
for(int i=0;i<Nsimd;i++) ssum = ssum + buf[i];
arg._grid->GlobalSum(ssum);
return ssum;
} }
sumarray[thr]=vvsum;
}
vobj vsum=zero; // sum across threads
for(int i=0;i<grid->SumArraySize();i++){
vsum = vsum+sumarray[i];
}
typedef typename vobj::scalar_object sobj;
sobj ssum=zero;
std::vector<sobj> buf(Nsimd);
extract(vsum,buf);
for(int i=0;i<Nsimd;i++) ssum = ssum + buf[i];
arg._grid->GlobalSum(ssum);
return ssum;
}
//////////////////////////////////////////////////////////////////////////////////////////////////////////////
// sliceSum, sliceInnerProduct, sliceAxpy, sliceNorm etc...
//////////////////////////////////////////////////////////////////////////////////////////////////////////////
template<class vobj> inline void sliceSum(const Lattice<vobj> &Data,std::vector<typename vobj::scalar_object> &result,int orthogdim) template<class vobj> inline void sliceSum(const Lattice<vobj> &Data,std::vector<typename vobj::scalar_object> &result,int orthogdim)
{ {
///////////////////////////////////////////////////////
// FIXME precision promoted summation
// may be important for correlation functions
// But easily avoided by using double precision fields
///////////////////////////////////////////////////////
typedef typename vobj::scalar_object sobj; typedef typename vobj::scalar_object sobj;
GridBase *grid = Data._grid; GridBase *grid = Data._grid;
assert(grid!=NULL); assert(grid!=NULL);
// FIXME
// std::cout<<GridLogMessage<<"WARNING ! SliceSum is unthreaded "<<grid->SumArraySize()<<" threads "<<std::endl;
const int Nd = grid->_ndimension; const int Nd = grid->_ndimension;
const int Nsimd = grid->Nsimd(); const int Nsimd = grid->Nsimd();
@ -163,23 +160,31 @@ template<class vobj> inline void sliceSum(const Lattice<vobj> &Data,std::vector<
int rd=grid->_rdimensions[orthogdim]; int rd=grid->_rdimensions[orthogdim];
std::vector<vobj,alignedAllocator<vobj> > lvSum(rd); // will locally sum vectors first std::vector<vobj,alignedAllocator<vobj> > lvSum(rd); // will locally sum vectors first
std::vector<sobj> lsSum(ld,zero); // sum across these down to scalars std::vector<sobj> lsSum(ld,zero); // sum across these down to scalars
std::vector<sobj> extracted(Nsimd); // splitting the SIMD std::vector<sobj> extracted(Nsimd); // splitting the SIMD
result.resize(fd); // And then global sum to return the same vector to every node for IO to file result.resize(fd); // And then global sum to return the same vector to every node
for(int r=0;r<rd;r++){ for(int r=0;r<rd;r++){
lvSum[r]=zero; lvSum[r]=zero;
} }
std::vector<int> coor(Nd); int e1= grid->_slice_nblock[orthogdim];
int e2= grid->_slice_block [orthogdim];
int stride=grid->_slice_stride[orthogdim];
// sum over reduced dimension planes, breaking out orthog dir // sum over reduced dimension planes, breaking out orthog dir
// Parallel over orthog direction
parallel_for(int r=0;r<rd;r++){
for(int ss=0;ss<grid->oSites();ss++){ int so=r*grid->_ostride[orthogdim]; // base offset for start of plane
Lexicographic::CoorFromIndex(coor,ss,grid->_rdimensions);
int r = coor[orthogdim]; for(int n=0;n<e1;n++){
lvSum[r]=lvSum[r]+Data._odata[ss]; for(int b=0;b<e2;b++){
} int ss= so+n*stride+b;
lvSum[r]=lvSum[r]+Data._odata[ss];
}
}
}
// Sum across simd lanes in the plane, breaking out orthog dir. // Sum across simd lanes in the plane, breaking out orthog dir.
std::vector<int> icoor(Nd); std::vector<int> icoor(Nd);
@ -214,10 +219,305 @@ template<class vobj> inline void sliceSum(const Lattice<vobj> &Data,std::vector<
result[t]=gsum; result[t]=gsum;
} }
}
template<class vobj>
static void sliceInnerProductVector( std::vector<ComplexD> & result, const Lattice<vobj> &lhs,const Lattice<vobj> &rhs,int orthogdim)
{
typedef typename vobj::vector_type vector_type;
typedef typename vobj::scalar_type scalar_type;
GridBase *grid = lhs._grid;
assert(grid!=NULL);
conformable(grid,rhs._grid);
const int Nd = grid->_ndimension;
const int Nsimd = grid->Nsimd();
assert(orthogdim >= 0);
assert(orthogdim < Nd);
int fd=grid->_fdimensions[orthogdim];
int ld=grid->_ldimensions[orthogdim];
int rd=grid->_rdimensions[orthogdim];
std::vector<vector_type,alignedAllocator<vector_type> > lvSum(rd); // will locally sum vectors first
std::vector<scalar_type > lsSum(ld,scalar_type(0.0)); // sum across these down to scalars
std::vector<iScalar<scalar_type> > extracted(Nsimd); // splitting the SIMD
result.resize(fd); // And then global sum to return the same vector to every node for IO to file
for(int r=0;r<rd;r++){
lvSum[r]=zero;
}
int e1= grid->_slice_nblock[orthogdim];
int e2= grid->_slice_block [orthogdim];
int stride=grid->_slice_stride[orthogdim];
parallel_for(int r=0;r<rd;r++){
int so=r*grid->_ostride[orthogdim]; // base offset for start of plane
for(int n=0;n<e1;n++){
for(int b=0;b<e2;b++){
int ss= so+n*stride+b;
vector_type vv = TensorRemove(innerProduct(lhs._odata[ss],rhs._odata[ss]));
lvSum[r]=lvSum[r]+vv;
}
}
}
// Sum across simd lanes in the plane, breaking out orthog dir.
std::vector<int> icoor(Nd);
for(int rt=0;rt<rd;rt++){
iScalar<vector_type> temp;
temp._internal = lvSum[rt];
extract(temp,extracted);
for(int idx=0;idx<Nsimd;idx++){
grid->iCoorFromIindex(icoor,idx);
int ldx =rt+icoor[orthogdim]*rd;
lsSum[ldx]=lsSum[ldx]+extracted[idx]._internal;
}
}
// sum over nodes.
scalar_type gsum;
for(int t=0;t<fd;t++){
int pt = t/ld; // processor plane
int lt = t%ld;
if ( pt == grid->_processor_coor[orthogdim] ) {
gsum=lsSum[lt];
} else {
gsum=scalar_type(0.0);
}
grid->GlobalSum(gsum);
result[t]=gsum;
}
}
template<class vobj>
static void sliceNorm (std::vector<RealD> &sn,const Lattice<vobj> &rhs,int Orthog)
{
typedef typename vobj::scalar_object sobj;
typedef typename vobj::scalar_type scalar_type;
typedef typename vobj::vector_type vector_type;
int Nblock = rhs._grid->GlobalDimensions()[Orthog];
std::vector<ComplexD> ip(Nblock);
sn.resize(Nblock);
sliceInnerProductVector(ip,rhs,rhs,Orthog);
for(int ss=0;ss<Nblock;ss++){
sn[ss] = real(ip[ss]);
}
};
template<class vobj>
static void sliceMaddVector(Lattice<vobj> &R,std::vector<RealD> &a,const Lattice<vobj> &X,const Lattice<vobj> &Y,
int orthogdim,RealD scale=1.0)
{
typedef typename vobj::scalar_object sobj;
typedef typename vobj::scalar_type scalar_type;
typedef typename vobj::vector_type vector_type;
typedef typename vobj::tensor_reduced tensor_reduced;
scalar_type zscale(scale);
GridBase *grid = X._grid;
int Nsimd =grid->Nsimd();
int Nblock =grid->GlobalDimensions()[orthogdim];
int fd =grid->_fdimensions[orthogdim];
int ld =grid->_ldimensions[orthogdim];
int rd =grid->_rdimensions[orthogdim];
int e1 =grid->_slice_nblock[orthogdim];
int e2 =grid->_slice_block [orthogdim];
int stride =grid->_slice_stride[orthogdim];
std::vector<int> icoor;
for(int r=0;r<rd;r++){
int so=r*grid->_ostride[orthogdim]; // base offset for start of plane
vector_type av;
for(int l=0;l<Nsimd;l++){
grid->iCoorFromIindex(icoor,l);
int ldx =r+icoor[orthogdim]*rd;
scalar_type *as =(scalar_type *)&av;
as[l] = scalar_type(a[ldx])*zscale;
}
tensor_reduced at; at=av;
parallel_for_nest2(int n=0;n<e1;n++){
for(int b=0;b<e2;b++){
int ss= so+n*stride+b;
R._odata[ss] = at*X._odata[ss]+Y._odata[ss];
}
}
}
};
/*
template<class vobj>
static void sliceMaddVectorSlow (Lattice<vobj> &R,std::vector<RealD> &a,const Lattice<vobj> &X,const Lattice<vobj> &Y,
int Orthog,RealD scale=1.0)
{
// FIXME: Implementation is slow
// Best base the linear combination by constructing a
// set of vectors of size grid->_rdimensions[Orthog].
typedef typename vobj::scalar_object sobj;
typedef typename vobj::scalar_type scalar_type;
typedef typename vobj::vector_type vector_type;
int Nblock = X._grid->GlobalDimensions()[Orthog];
GridBase *FullGrid = X._grid;
GridBase *SliceGrid = makeSubSliceGrid(FullGrid,Orthog);
Lattice<vobj> Xslice(SliceGrid);
Lattice<vobj> Rslice(SliceGrid);
// If we based this on Cshift it would work for spread out
// but it would be even slower
for(int i=0;i<Nblock;i++){
ExtractSlice(Rslice,Y,i,Orthog);
ExtractSlice(Xslice,X,i,Orthog);
Rslice = Rslice + Xslice*(scale*a[i]);
InsertSlice(Rslice,R,i,Orthog);
}
};
template<class vobj>
static void sliceInnerProductVectorSlow( std::vector<ComplexD> & vec, const Lattice<vobj> &lhs,const Lattice<vobj> &rhs,int Orthog)
{
// FIXME: Implementation is slow
// Look at localInnerProduct implementation,
// and do inside a site loop with block strided iterators
typedef typename vobj::scalar_object sobj;
typedef typename vobj::scalar_type scalar_type;
typedef typename vobj::vector_type vector_type;
typedef typename vobj::tensor_reduced scalar;
typedef typename scalar::scalar_object scomplex;
int Nblock = lhs._grid->GlobalDimensions()[Orthog];
vec.resize(Nblock);
std::vector<scomplex> sip(Nblock);
Lattice<scalar> IP(lhs._grid);
IP=localInnerProduct(lhs,rhs);
sliceSum(IP,sip,Orthog);
for(int ss=0;ss<Nblock;ss++){
vec[ss] = TensorRemove(sip[ss]);
}
}
*/
//////////////////////////////////////////////////////////////////////////////////////////
// FIXME: Implementation is slow
// If we based this on Cshift it would work for spread out
// but it would be even slower
//
// Repeated extract slice is inefficient
//
// Best base the linear combination by constructing a
// set of vectors of size grid->_rdimensions[Orthog].
//////////////////////////////////////////////////////////////////////////////////////////
inline GridBase *makeSubSliceGrid(const GridBase *BlockSolverGrid,int Orthog)
{
int NN = BlockSolverGrid->_ndimension;
int nsimd = BlockSolverGrid->Nsimd();
std::vector<int> latt_phys(0);
std::vector<int> simd_phys(0);
std::vector<int> mpi_phys(0);
for(int d=0;d<NN;d++){
if( d!=Orthog ) {
latt_phys.push_back(BlockSolverGrid->_fdimensions[d]);
simd_phys.push_back(BlockSolverGrid->_simd_layout[d]);
mpi_phys.push_back(BlockSolverGrid->_processors[d]);
}
}
return (GridBase *)new GridCartesian(latt_phys,simd_phys,mpi_phys);
} }
template<class vobj>
static void sliceMaddMatrix (Lattice<vobj> &R,Eigen::MatrixXcd &aa,const Lattice<vobj> &X,const Lattice<vobj> &Y,int Orthog,RealD scale=1.0)
{
typedef typename vobj::scalar_object sobj;
typedef typename vobj::scalar_type scalar_type;
typedef typename vobj::vector_type vector_type;
int Nblock = X._grid->GlobalDimensions()[Orthog];
GridBase *FullGrid = X._grid;
GridBase *SliceGrid = makeSubSliceGrid(FullGrid,Orthog);
Lattice<vobj> Xslice(SliceGrid);
Lattice<vobj> Rslice(SliceGrid);
for(int i=0;i<Nblock;i++){
ExtractSlice(Rslice,Y,i,Orthog);
for(int j=0;j<Nblock;j++){
ExtractSlice(Xslice,X,j,Orthog);
Rslice = Rslice + Xslice*(scale*aa(j,i));
}
InsertSlice(Rslice,R,i,Orthog);
}
};
template<class vobj>
static void sliceInnerProductMatrix( Eigen::MatrixXcd &mat, const Lattice<vobj> &lhs,const Lattice<vobj> &rhs,int Orthog)
{
// FIXME: Implementation is slow
// Not sure of best solution.. think about it
typedef typename vobj::scalar_object sobj;
typedef typename vobj::scalar_type scalar_type;
typedef typename vobj::vector_type vector_type;
GridBase *FullGrid = lhs._grid;
GridBase *SliceGrid = makeSubSliceGrid(FullGrid,Orthog);
int Nblock = FullGrid->GlobalDimensions()[Orthog];
Lattice<vobj> Lslice(SliceGrid);
Lattice<vobj> Rslice(SliceGrid);
mat = Eigen::MatrixXcd::Zero(Nblock,Nblock);
for(int i=0;i<Nblock;i++){
ExtractSlice(Lslice,lhs,i,Orthog);
for(int j=0;j<Nblock;j++){
ExtractSlice(Rslice,rhs,j,Orthog);
mat(i,j) = innerProduct(Lslice,Rslice);
}
}
#undef FORCE_DIAG
#ifdef FORCE_DIAG
for(int i=0;i<Nblock;i++){
for(int j=0;j<Nblock;j++){
if ( i != j ) mat(i,j)=0.0;
}
}
#endif
return;
} }
} /*END NAMESPACE GRID*/
#endif #endif

View File

@ -6,8 +6,8 @@
Copyright (C) 2015 Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk> Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: paboyle <paboyle@ph.ed.ac.uk> Author: Guido Cossu <guido.cossu@ed.ac.uk>
This program is free software; you can redistribute it and/or modify This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@ -75,6 +75,55 @@ namespace Grid {
return multiplicity; return multiplicity;
} }
// merge of April 11 2017
//<<<<<<< HEAD
// this function is necessary for the LS vectorised field
inline int RNGfillable_general(GridBase *coarse,GridBase *fine)
{
int rngdims = coarse->_ndimension;
// trivially extended in higher dims, with locality guaranteeing RNG state is local to node
int lowerdims = fine->_ndimension - coarse->_ndimension; assert(lowerdims >= 0);
// assumes that the higher dimensions are not using more processors
// all further divisions are local
for(int d=0;d<lowerdims;d++) assert(fine->_processors[d]==1);
for(int d=0;d<rngdims;d++) assert(coarse->_processors[d] == fine->_processors[d+lowerdims]);
// then divide the number of local sites
// check that the total number of sims agree, meanse the iSites are the same
assert(fine->Nsimd() == coarse->Nsimd());
// check that the two grids divide cleanly
assert( (fine->lSites() / coarse->lSites() ) * coarse->lSites() == fine->lSites() );
return fine->lSites() / coarse->lSites();
}
/*
// Wrap seed_seq to give common interface with random_device
class fixedSeed {
public:
typedef std::seed_seq::result_type result_type;
std::seed_seq src;
fixedSeed(const std::vector<int> &seeds) : src(seeds.begin(),seeds.end()) {};
result_type operator () (void){
std::vector<result_type> list(1);
src.generate(list.begin(),list.end());
return list[0];
}
};
=======
>>>>>>> develop
*/
// real scalars are one component // real scalars are one component
template<class scalar,class distribution,class generator> template<class scalar,class distribution,class generator>
void fillScalar(scalar &s,distribution &dist,generator & gen) void fillScalar(scalar &s,distribution &dist,generator & gen)
@ -109,7 +158,7 @@ namespace Grid {
#ifdef RNG_SITMO #ifdef RNG_SITMO
typedef sitmo::prng_engine RngEngine; typedef sitmo::prng_engine RngEngine;
typedef uint64_t RngStateType; typedef uint64_t RngStateType;
static const int RngStateCount = 4; static const int RngStateCount = 13;
#endif #endif
std::vector<RngEngine> _generators; std::vector<RngEngine> _generators;
@ -164,7 +213,7 @@ namespace Grid {
ss<<eng; ss<<eng;
ss.seekg(0,ss.beg); ss.seekg(0,ss.beg);
for(int i=0;i<RngStateCount;i++){ for(int i=0;i<RngStateCount;i++){
ss>>saved[i]; ss>>saved[i];
} }
} }
void GetState(std::vector<RngStateType> & saved,int gen) { void GetState(std::vector<RngStateType> & saved,int gen) {
@ -174,7 +223,7 @@ namespace Grid {
assert(saved.size()==RngStateCount); assert(saved.size()==RngStateCount);
std::stringstream ss; std::stringstream ss;
for(int i=0;i<RngStateCount;i++){ for(int i=0;i<RngStateCount;i++){
ss<< saved[i]<<" "; ss<< saved[i]<<" ";
} }
ss.seekg(0,ss.beg); ss.seekg(0,ss.beg);
ss>>eng; ss>>eng;
@ -215,7 +264,7 @@ namespace Grid {
dist[0].reset(); dist[0].reset();
for(int idx=0;idx<words;idx++){ for(int idx=0;idx<words;idx++){
fillScalar(buf[idx],dist[0],_generators[0]); fillScalar(buf[idx],dist[0],_generators[0]);
} }
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l)); CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
@ -247,7 +296,7 @@ namespace Grid {
RealF *pointer=(RealF *)&l; RealF *pointer=(RealF *)&l;
dist[0].reset(); dist[0].reset();
for(int i=0;i<2*vComplexF::Nsimd();i++){ for(int i=0;i<2*vComplexF::Nsimd();i++){
fillScalar(pointer[i],dist[0],_generators[0]); fillScalar(pointer[i],dist[0],_generators[0]);
} }
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l)); CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
} }
@ -255,7 +304,7 @@ namespace Grid {
RealD *pointer=(RealD *)&l; RealD *pointer=(RealD *)&l;
dist[0].reset(); dist[0].reset();
for(int i=0;i<2*vComplexD::Nsimd();i++){ for(int i=0;i<2*vComplexD::Nsimd();i++){
fillScalar(pointer[i],dist[0],_generators[0]); fillScalar(pointer[i],dist[0],_generators[0]);
} }
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l)); CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
} }
@ -263,7 +312,7 @@ namespace Grid {
RealF *pointer=(RealF *)&l; RealF *pointer=(RealF *)&l;
dist[0].reset(); dist[0].reset();
for(int i=0;i<vRealF::Nsimd();i++){ for(int i=0;i<vRealF::Nsimd();i++){
fillScalar(pointer[i],dist[0],_generators[0]); fillScalar(pointer[i],dist[0],_generators[0]);
} }
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l)); CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
} }
@ -275,7 +324,7 @@ namespace Grid {
} }
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l)); CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
} }
void SeedFixedIntegers(const std::vector<int> &seeds){ void SeedFixedIntegers(const std::vector<int> &seeds){
CartesianCommunicator::BroadcastWorld(0,(void *)&seeds[0],sizeof(int)*seeds.size()); CartesianCommunicator::BroadcastWorld(0,(void *)&seeds[0],sizeof(int)*seeds.size());
std::seed_seq src(seeds.begin(),seeds.end()); std::seed_seq src(seeds.begin(),seeds.end());
@ -284,18 +333,20 @@ namespace Grid {
}; };
class GridParallelRNG : public GridRNGbase { class GridParallelRNG : public GridRNGbase {
double _time_counter;
public: public:
GridBase *_grid; GridBase *_grid;
int _vol; unsigned int _vol;
public:
int generator_idx(int os,int is){ int generator_idx(int os,int is) {
return is*_grid->oSites()+os; return is*_grid->oSites()+os;
} }
GridParallelRNG(GridBase *grid) : GridRNGbase() { GridParallelRNG(GridBase *grid) : GridRNGbase() {
_grid=grid; _grid = grid;
_vol =_grid->iSites()*_grid->oSites(); _vol =_grid->iSites()*_grid->oSites();
_generators.resize(_vol); _generators.resize(_vol);
_uniform.resize(_vol,std::uniform_real_distribution<RealD>{0,1}); _uniform.resize(_vol,std::uniform_real_distribution<RealD>{0,1});
@ -309,33 +360,34 @@ namespace Grid {
typedef typename vobj::scalar_object scalar_object; typedef typename vobj::scalar_object scalar_object;
typedef typename vobj::scalar_type scalar_type; typedef typename vobj::scalar_type scalar_type;
typedef typename vobj::vector_type vector_type; typedef typename vobj::vector_type vector_type;
int multiplicity = RNGfillable(_grid,l._grid);
int Nsimd =_grid->Nsimd(); double inner_time_counter = usecond();
int osites=_grid->oSites();
int words=sizeof(scalar_object)/sizeof(scalar_type); int multiplicity = RNGfillable_general(_grid, l._grid); // l has finer or same grid
int Nsimd = _grid->Nsimd(); // guaranteed to be the same for l._grid too
int osites = _grid->oSites(); // guaranteed to be <= l._grid->oSites() by a factor multiplicity
int words = sizeof(scalar_object) / sizeof(scalar_type);
parallel_for(int ss=0;ss<osites;ss++){ parallel_for(int ss=0;ss<osites;ss++){
std::vector<scalar_object> buf(Nsimd);
for (int m = 0; m < multiplicity; m++) { // Draw from same generator multiplicity times
std::vector<scalar_object> buf(Nsimd); int sm = multiplicity * ss + m; // Maps the generator site to the fine site
for(int m=0;m<multiplicity;m++) {// Draw from same generator multiplicity times
int sm=multiplicity*ss+m; // Maps the generator site to the fine site for (int si = 0; si < Nsimd; si++) {
for(int si=0;si<Nsimd;si++){ int gdx = generator_idx(ss, si); // index of generator state
int gdx = generator_idx(ss,si); // index of generator state scalar_type *pointer = (scalar_type *)&buf[si];
scalar_type *pointer = (scalar_type *)&buf[si]; dist[gdx].reset();
dist[gdx].reset(); for (int idx = 0; idx < words; idx++)
for(int idx=0;idx<words;idx++){ fillScalar(pointer[idx], dist[gdx], _generators[gdx]);
fillScalar(pointer[idx],dist[gdx],_generators[gdx]); }
} // merge into SIMD lanes, FIXME suboptimal implementation
} merge(l._odata[sm], buf);
}
// merge into SIMD lanes
merge(l._odata[sm],buf);
}
} }
_time_counter += usecond()- inner_time_counter;
}; };
void SeedFixedIntegers(const std::vector<int> &seeds){ void SeedFixedIntegers(const std::vector<int> &seeds){
@ -412,6 +464,12 @@ namespace Grid {
} }
#endif #endif
} }
void Report(){
std::cout << GridLogMessage << "Time spent in the fill() routine by GridParallelRNG: "<< _time_counter/1e3 << " ms" << std::endl;
}
//////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////
// Support for rigorous test of RNG's // Support for rigorous test of RNG's
// Return uniform random uint32_t from requested site generator // Return uniform random uint32_t from requested site generator
@ -419,7 +477,6 @@ namespace Grid {
uint32_t GlobalU01(int gsite){ uint32_t GlobalU01(int gsite){
uint32_t the_number; uint32_t the_number;
// who // who
std::vector<int> gcoor; std::vector<int> gcoor;
int rank,o_idx,i_idx; int rank,o_idx,i_idx;

View File

@ -1,4 +1,4 @@
/************************************************************************************* /*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid Grid physics library, www.github.com/paboyle/Grid
@ -359,7 +359,7 @@ void localConvert(const Lattice<vobj> &in,Lattice<vvobj> &out)
template<class vobj> template<class vobj>
void InsertSlice(Lattice<vobj> &lowDim,Lattice<vobj> & higherDim,int slice, int orthog) void InsertSlice(const Lattice<vobj> &lowDim,Lattice<vobj> & higherDim,int slice, int orthog)
{ {
typedef typename vobj::scalar_object sobj; typedef typename vobj::scalar_object sobj;
@ -401,7 +401,7 @@ void InsertSlice(Lattice<vobj> &lowDim,Lattice<vobj> & higherDim,int slice, int
} }
template<class vobj> template<class vobj>
void ExtractSlice(Lattice<vobj> &lowDim, Lattice<vobj> & higherDim,int slice, int orthog) void ExtractSlice(Lattice<vobj> &lowDim,const Lattice<vobj> & higherDim,int slice, int orthog)
{ {
typedef typename vobj::scalar_object sobj; typedef typename vobj::scalar_object sobj;
@ -444,7 +444,7 @@ void ExtractSlice(Lattice<vobj> &lowDim, Lattice<vobj> & higherDim,int slice, in
template<class vobj> template<class vobj>
void InsertSliceLocal(Lattice<vobj> &lowDim, Lattice<vobj> & higherDim,int slice_lo,int slice_hi, int orthog) void InsertSliceLocal(const Lattice<vobj> &lowDim, Lattice<vobj> & higherDim,int slice_lo,int slice_hi, int orthog)
{ {
typedef typename vobj::scalar_object sobj; typedef typename vobj::scalar_object sobj;

View File

@ -62,14 +62,20 @@ namespace Grid {
return ret; return ret;
} }
template<class obj> Lattice<obj> expMat(const Lattice<obj> &rhs, ComplexD alpha, Integer Nexp = DEFAULT_MAT_EXP){ template<class obj> Lattice<obj> expMat(const Lattice<obj> &rhs, RealD alpha, Integer Nexp = DEFAULT_MAT_EXP){
Lattice<obj> ret(rhs._grid); Lattice<obj> ret(rhs._grid);
ret.checkerboard = rhs.checkerboard; ret.checkerboard = rhs.checkerboard;
conformable(ret,rhs); conformable(ret,rhs);
parallel_for(int ss=0;ss<rhs._grid->oSites();ss++){ parallel_for(int ss=0;ss<rhs._grid->oSites();ss++){
ret._odata[ss]=Exponentiate(rhs._odata[ss],alpha, Nexp); ret._odata[ss]=Exponentiate(rhs._odata[ss],alpha, Nexp);
} }
return ret; return ret;
} }

View File

@ -30,6 +30,7 @@ directory
*************************************************************************************/ *************************************************************************************/
/* END LEGAL */ /* END LEGAL */
#include <Grid/GridCore.h> #include <Grid/GridCore.h>
#include <Grid/util/CompilerCompatible.h>
#include <cxxabi.h> #include <cxxabi.h>
#include <memory> #include <memory>

View File

@ -110,8 +110,8 @@ public:
friend std::ostream& operator<< (std::ostream& stream, Logger& log){ friend std::ostream& operator<< (std::ostream& stream, Logger& log){
if ( log.active ) { if ( log.active ) {
stream << log.background()<< std::setw(10) << std::left << log.topName << log.background()<< " : "; stream << log.background()<< std::setw(8) << std::left << log.topName << log.background()<< " : ";
stream << log.colour() << std::setw(14) << std::left << log.name << log.background() << " : "; stream << log.colour() << std::setw(10) << std::left << log.name << log.background() << " : ";
if ( log.timestamp ) { if ( log.timestamp ) {
StopWatch.Stop(); StopWatch.Stop();
GridTime now = StopWatch.Elapsed(); GridTime now = StopWatch.Elapsed();

View File

@ -6,8 +6,8 @@
Copyright (C) 2015 Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk> Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: paboyle <paboyle@ph.ed.ac.uk> Author: Guido Cossu<guido.cossu@ed.ac.uk>
This program is free software; you can redistribute it and/or modify This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@ -30,6 +30,8 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
#define GRID_BINARY_IO_H #define GRID_BINARY_IO_H
#include "IldgIOtypes.h"
#ifdef HAVE_ENDIAN_H #ifdef HAVE_ENDIAN_H
#include <endian.h> #include <endian.h>
#endif #endif
@ -149,7 +151,48 @@ class BinaryIO {
csum=csum+buf[i]; csum=csum+buf[i];
} }
} }
// Simple classes for precision conversion
template <class fobj, class sobj>
struct BinarySimpleUnmunger {
typedef typename getPrecision<fobj>::real_scalar_type fobj_stype;
typedef typename getPrecision<sobj>::real_scalar_type sobj_stype;
void operator()(sobj &in, fobj &out, uint32_t &csum) {
// take word by word and transform accoding to the status
fobj_stype *out_buffer = (fobj_stype *)&out;
sobj_stype *in_buffer = (sobj_stype *)&in;
size_t fobj_words = sizeof(out) / sizeof(fobj_stype);
size_t sobj_words = sizeof(in) / sizeof(sobj_stype);
assert(fobj_words == sobj_words);
for (unsigned int word = 0; word < sobj_words; word++)
out_buffer[word] = in_buffer[word]; // type conversion on the fly
BinaryIO::Uint32Checksum((uint32_t *)&out, sizeof(out), csum);
}
};
template <class fobj, class sobj>
struct BinarySimpleMunger {
typedef typename getPrecision<fobj>::real_scalar_type fobj_stype;
typedef typename getPrecision<sobj>::real_scalar_type sobj_stype;
void operator()(fobj &in, sobj &out, uint32_t &csum) {
// take word by word and transform accoding to the status
fobj_stype *in_buffer = (fobj_stype *)&in;
sobj_stype *out_buffer = (sobj_stype *)&out;
size_t fobj_words = sizeof(in) / sizeof(fobj_stype);
size_t sobj_words = sizeof(out) / sizeof(sobj_stype);
assert(fobj_words == sobj_words);
for (unsigned int word = 0; word < sobj_words; word++)
out_buffer[word] = in_buffer[word]; // type conversion on the fly
BinaryIO::Uint32Checksum((uint32_t *)&in, sizeof(in), csum);
}
};
template<class vobj,class fobj,class munger> template<class vobj,class fobj,class munger>
static inline uint32_t readObjectSerial(Lattice<vobj> &Umu,std::string file,munger munge,int offset,const std::string &format) static inline uint32_t readObjectSerial(Lattice<vobj> &Umu,std::string file,munger munge,int offset,const std::string &format)
{ {
@ -188,9 +231,9 @@ class BinaryIO {
fin.read((char *)&file_object, sizeof(file_object));assert( fin.fail()==0); fin.read((char *)&file_object, sizeof(file_object));assert( fin.fail()==0);
bytes += sizeof(file_object); bytes += sizeof(file_object);
if (ieee32big) be32toh_v((void *)&file_object, sizeof(file_object)); if (ieee32big) be32toh_v((void *)&file_object, sizeof(file_object));
if (ieee32) le32toh_v((void *)&file_object, sizeof(file_object)); if (ieee32) le32toh_v((void *)&file_object, sizeof(file_object));
if (ieee64big) be64toh_v((void *)&file_object, sizeof(file_object)); if (ieee64big) be64toh_v((void *)&file_object, sizeof(file_object));
if (ieee64) le64toh_v((void *)&file_object, sizeof(file_object)); if (ieee64) le64toh_v((void *)&file_object, sizeof(file_object));
munge(file_object, munged, csum); munge(file_object, munged, csum);
} }
@ -209,7 +252,7 @@ class BinaryIO {
static inline uint32_t writeObjectSerial(Lattice<vobj> &Umu,std::string file,munger munge,int offset, static inline uint32_t writeObjectSerial(Lattice<vobj> &Umu,std::string file,munger munge,int offset,
const std::string & format) const std::string & format)
{ {
typedef typename vobj::scalar_object sobj; typedef typename vobj::scalar_object sobj;
GridBase *grid = Umu._grid; GridBase *grid = Umu._grid;
@ -246,7 +289,6 @@ class BinaryIO {
if ( grid->IsBoss() ) { if ( grid->IsBoss() ) {
if(ieee32big) htobe32_v((void *)&file_object,sizeof(file_object)); if(ieee32big) htobe32_v((void *)&file_object,sizeof(file_object));
if(ieee32) htole32_v((void *)&file_object,sizeof(file_object)); if(ieee32) htole32_v((void *)&file_object,sizeof(file_object));
if(ieee64big) htobe64_v((void *)&file_object,sizeof(file_object)); if(ieee64big) htobe64_v((void *)&file_object,sizeof(file_object));
@ -270,24 +312,29 @@ class BinaryIO {
typedef typename GridSerialRNG::RngStateType RngStateType; typedef typename GridSerialRNG::RngStateType RngStateType;
const int RngStateCount = GridSerialRNG::RngStateCount; const int RngStateCount = GridSerialRNG::RngStateCount;
GridBase *grid = parallel._grid; GridBase *grid = parallel._grid;
int gsites = grid->_gsites; int gsites = grid->_gsites;
GridStopWatch timer; timer.Start();
////////////////////////////////////////////////// //////////////////////////////////////////////////
// Serialise through node zero // Serialise through node zero
////////////////////////////////////////////////// //////////////////////////////////////////////////
std::cout<< GridLogMessage<< "Serial RNG write I/O "<< file<<std::endl;
std::ofstream fout; std::ofstream fout;
if ( grid->IsBoss() ) { if (grid->IsBoss()) {
fout.open(file,std::ios::binary|std::ios::out|std::ios::in); fout.open(file, std::ios::binary | std::ios::out);
if (!fout.is_open()) {
std::cout << GridLogMessage << "writeRNGSerial: Error opening file " << file << std::endl;
exit(0);// write better error handling
}
fout.seekp(offset); fout.seekp(offset);
} }
uint32_t csum=0; std::cout << GridLogMessage << "Serial RNG write I/O on file " << file << std::endl;
uint32_t csum = 0;
std::vector<RngStateType> saved(RngStateCount); std::vector<RngStateType> saved(RngStateCount);
int bytes = sizeof(RngStateType)*saved.size(); int bytes = sizeof(RngStateType) * saved.size();
std::cout << GridLogDebug << "RngStateCount: " << RngStateCount << std::endl;
std::cout << GridLogDebug << "Type has " << bytes << " bytes" << std::endl;
std::vector<int> gcoor; std::vector<int> gcoor;
for(int gidx=0;gidx<gsites;gidx++){ for(int gidx=0;gidx<gsites;gidx++){
@ -301,8 +348,7 @@ class BinaryIO {
// std::cout << "rank" << rank<<" Getting state for index "<<l_idx<<std::endl; // std::cout << "rank" << rank<<" Getting state for index "<<l_idx<<std::endl;
parallel.GetState(saved,l_idx); parallel.GetState(saved,l_idx);
} }
grid->Broadcast(rank, (void *)&saved[0], bytes);
grid->Broadcast(rank,(void *)&saved[0],bytes);
if ( grid->IsBoss() ) { if ( grid->IsBoss() ) {
Uint32Checksum((uint32_t *)&saved[0],bytes,csum); Uint32Checksum((uint32_t *)&saved[0],bytes,csum);
@ -316,9 +362,20 @@ class BinaryIO {
Uint32Checksum((uint32_t *)&saved[0],bytes,csum); Uint32Checksum((uint32_t *)&saved[0],bytes,csum);
fout.write((char *)&saved[0],bytes);assert( fout.fail()==0); fout.write((char *)&saved[0],bytes);assert( fout.fail()==0);
} }
grid->Broadcast(0,(void *)&csum,sizeof(csum));
grid->Broadcast(0, (void *)&csum, sizeof(csum));
if (grid->IsBoss())
fout.close();
timer.Stop();
std::cout << GridLogMessage << "RNG file checksum " << std::hex << csum << std::dec << std::endl;
std::cout << GridLogMessage << "RNG state saved in " << timer.Elapsed() << std::endl;
return csum; return csum;
} }
static inline uint32_t readRNGSerial(GridSerialRNG &serial,GridParallelRNG &parallel,std::string file,int offset) static inline uint32_t readRNGSerial(GridSerialRNG &serial,GridParallelRNG &parallel,std::string file,int offset)
{ {
typedef typename GridSerialRNG::RngStateType RngStateType; typedef typename GridSerialRNG::RngStateType RngStateType;
@ -330,34 +387,46 @@ class BinaryIO {
////////////////////////////////////////////////// //////////////////////////////////////////////////
// Serialise through node zero // Serialise through node zero
////////////////////////////////////////////////// //////////////////////////////////////////////////
std::cout<< GridLogMessage<< "Serial RNG read I/O "<< file<<std::endl; std::cout<< GridLogMessage<< "Serial RNG read I/O of file "<<file<<std::endl;
std::ifstream fin;
if (grid->IsBoss()) {
fin.open(file, std::ios::binary | std::ios::in);
if (!fin.is_open()) {
std::cout << GridLogMessage << "readRNGSerial: Error opening file " << file << std::endl;
exit(0);// write better error handling
}
fin.seekg(offset);
}
std::ifstream fin(file,std::ios::binary|std::ios::in);
fin.seekg(offset);
uint32_t csum=0; uint32_t csum=0;
std::vector<RngStateType> saved(RngStateCount); std::vector<RngStateType> saved(RngStateCount);
int bytes = sizeof(RngStateType)*saved.size(); int bytes = sizeof(RngStateType)*saved.size();
std::cout << GridLogDebug << "RngStateCount: " << RngStateCount << std::endl;
std::cout << GridLogDebug << "Type has " << bytes << " bytes" << std::endl;
std::vector<int> gcoor; std::vector<int> gcoor;
std::cout << GridLogDebug << "gsites: " << gsites << " loop" << std::endl;
for(int gidx=0;gidx<gsites;gidx++){ for(int gidx=0;gidx<gsites;gidx++){
int rank,o_idx,i_idx; int rank,o_idx,i_idx;
grid->GlobalIndexToGlobalCoor(gidx,gcoor); grid->GlobalIndexToGlobalCoor(gidx,gcoor);
grid->GlobalCoorToRankIndex(rank,o_idx,i_idx,gcoor); grid->GlobalCoorToRankIndex(rank,o_idx,i_idx,gcoor);
int l_idx=parallel.generator_idx(o_idx,i_idx); int l_idx=parallel.generator_idx(o_idx,i_idx);
//std::cout << GridLogDebug << "l_idx " << l_idx << " o_idx " << o_idx
// << " i_idx " << i_idx << " rank " << rank << std::endl;
if ( grid->IsBoss() ) { if ( grid->IsBoss() ) {
fin.read((char *)&saved[0],bytes);assert( fin.fail()==0); fin.read((char *)&saved[0],bytes);assert( fin.fail()==0);
Uint32Checksum((uint32_t *)&saved[0],bytes,csum); Uint32Checksum((uint32_t *)&saved[0],bytes,csum);
} }
grid->Broadcast(0,(void *)&saved[0],bytes); grid->Broadcast(0,(void *)&saved[0],bytes);
if( rank == grid->ThisRank() ){ if( rank == grid->ThisRank() ){
parallel.SetState(saved,l_idx); parallel.SetState(saved,l_idx);
} }
} }
if ( grid->IsBoss() ) { if ( grid->IsBoss() ) {
@ -366,16 +435,21 @@ class BinaryIO {
Uint32Checksum((uint32_t *)&saved[0],bytes,csum); Uint32Checksum((uint32_t *)&saved[0],bytes,csum);
} }
std::cout << GridLogMessage << "RNG file checksum " << std::hex << csum << std::dec << std::endl;
grid->Broadcast(0,(void *)&csum,sizeof(csum)); grid->Broadcast(0,(void *)&csum,sizeof(csum));
return csum; return csum;
} }
template<class vobj,class fobj,class munger> template <class vobj, class fobj, class munger>
static inline uint32_t readObjectParallel(Lattice<vobj> &Umu,std::string file,munger munge,int offset, static inline uint32_t readObjectParallel(Lattice<vobj> &Umu,
const std::string &format) std::string file,
{ munger munge,
int offset,
const std::string &format,
ILDGtype ILDG = ILDGtype()) {
typedef typename vobj::scalar_object sobj; typedef typename vobj::scalar_object sobj;
GridBase *grid = Umu._grid; GridBase *grid = Umu._grid;
@ -441,9 +515,10 @@ class BinaryIO {
int myrank = grid->ThisRank(); int myrank = grid->ThisRank();
int iorank = grid->RankFromProcessorCoor(ioproc); int iorank = grid->RankFromProcessorCoor(ioproc);
if ( IOnode ) { if (!ILDG.is_ILDG)
fin.open(file,std::ios::binary|std::ios::in); if ( IOnode ) {
} fin.open(file,std::ios::binary|std::ios::in);
}
////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////
// Find the location of each site and send to primary node // Find the location of each site and send to primary node
@ -451,13 +526,14 @@ class BinaryIO {
// available (how short sighted is that?) // available (how short sighted is that?)
////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////
Umu = zero; Umu = zero;
static uint32_t csum; csum=0; static uint32_t csum; csum=0;//static for SHMEM
fobj fileObj; fobj fileObj;
static sobj siteObj; // Static to place in symmetric region for SHMEM static sobj siteObj; // Static to place in symmetric region for SHMEM
// need to implement these loops in Nd independent way with a lexico conversion // need to implement these loops in Nd independent way with a lexico conversion
for(int tlex=0;tlex<slice_vol;tlex++){ for(int tlex=0;tlex<slice_vol;tlex++){
std::vector<int> tsite(nd); // temporary mixed up site std::vector<int> tsite(nd); // temporary mixed up site
std::vector<int> gsite(nd); std::vector<int> gsite(nd);
std::vector<int> lsite(nd); std::vector<int> lsite(nd);
@ -470,6 +546,7 @@ class BinaryIO {
gsite[d] = tsite[d]+start[d]; // global site gsite[d] = tsite[d]+start[d]; // global site
} }
///////////////////////// /////////////////////////
// Get the rank of owner of data // Get the rank of owner of data
///////////////////////// /////////////////////////
@ -481,18 +558,28 @@ class BinaryIO {
// iorank reads from the seek // iorank reads from the seek
//////////////////////////////// ////////////////////////////////
if (myrank == iorank) { if (myrank == iorank) {
fin.seekg(offset+g_idx*sizeof(fileObj));
fin.read((char *)&fileObj,sizeof(fileObj));assert( fin.fail()==0); if (ILDG.is_ILDG){
bytes+=sizeof(fileObj); // use C-LIME to populate the record
#ifdef HAVE_LIME
if(ieee32big) be32toh_v((void *)&fileObj,sizeof(fileObj)); uint64_t sizeFO = sizeof(fileObj);
if(ieee32) le32toh_v((void *)&fileObj,sizeof(fileObj)); limeReaderSeek(ILDG.LR, g_idx*sizeFO, SEEK_SET);
if(ieee64big) be64toh_v((void *)&fileObj,sizeof(fileObj)); int status = limeReaderReadData((void *)&fileObj, &sizeFO, ILDG.LR);
if(ieee64) le64toh_v((void *)&fileObj,sizeof(fileObj)); #endif
} else{
munge(fileObj,siteObj,csum); fin.seekg(offset+g_idx*sizeof(fileObj));
fin.read((char *)&fileObj,sizeof(fileObj));
}
bytes+=sizeof(fileObj);
if(ieee32big) be32toh_v((void *)&fileObj,sizeof(fileObj));
if(ieee32) le32toh_v((void *)&fileObj,sizeof(fileObj));
if(ieee64big) be64toh_v((void *)&fileObj,sizeof(fileObj));
if(ieee64) le64toh_v((void *)&fileObj,sizeof(fileObj));
munge(fileObj,siteObj,csum);
} }
// Possibly do transport through pt2pt // Possibly do transport through pt2pt
@ -515,32 +602,42 @@ class BinaryIO {
timer.Stop(); timer.Stop();
std::cout<<GridLogPerformance<<"readObjectParallel: read "<< bytes <<" bytes in "<<timer.Elapsed() <<" " std::cout<<GridLogPerformance<<"readObjectParallel: read "<< bytes <<" bytes in "<<timer.Elapsed() <<" "
<< (double)bytes/timer.useconds() <<" MB/s " <<std::endl; << (double)bytes/timer.useconds() <<" MB/s " <<std::endl;
return csum; return csum;
} }
////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////
// Parallel writer // Parallel writer
////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////
template<class vobj,class fobj,class munger> template <class vobj, class fobj, class munger>
static inline uint32_t writeObjectParallel(Lattice<vobj> &Umu,std::string file,munger munge,int offset, static inline uint32_t writeObjectParallel(Lattice<vobj> &Umu,
const std::string & format) std::string file, munger munge,
{ int offset,
const std::string &format,
ILDGtype ILDG = ILDGtype()) {
typedef typename vobj::scalar_object sobj; typedef typename vobj::scalar_object sobj;
GridBase *grid = Umu._grid; GridBase *grid = Umu._grid;
int ieee32big = (format == std::string("IEEE32BIG")); int ieee32big = (format == std::string("IEEE32BIG"));
int ieee32 = (format == std::string("IEEE32")); int ieee32 = (format == std::string("IEEE32"));
int ieee64big = (format == std::string("IEEE64BIG")); int ieee64big = (format == std::string("IEEE64BIG"));
int ieee64 = (format == std::string("IEEE64")); int ieee64 = (format == std::string("IEEE64"));
if (!(ieee32big || ieee32 || ieee64big || ieee64)) {
std::cout << GridLogError << "Unrecognized file format " << format
<< std::endl;
std::cout << GridLogError
<< "Allowed: IEEE32BIG | IEEE32 | IEEE64BIG | IEEE64"
<< std::endl;
exit(0);
}
int nd = grid->_ndimension; int nd = grid->_ndimension;
for(int d=0;d<nd;d++){ for (int d = 0; d < nd; d++) {
assert(grid->CheckerBoarded(d) == 0); assert(grid->CheckerBoarded(d) == 0);
} }
std::vector<int> parallel(nd,1); std::vector<int> parallel(nd, 1);
std::vector<int> ioproc (nd); std::vector<int> ioproc(nd);
std::vector<int> start(nd); std::vector<int> start(nd);
std::vector<int> range(nd); std::vector<int> range(nd);
@ -548,9 +645,8 @@ class BinaryIO {
int IOnode = 1; int IOnode = 1;
for(int d=0;d<grid->_ndimension;d++) { for (int d = 0; d < grid->_ndimension; d++) {
if (d != grid->_ndimension - 1) parallel[d] = 0;
if ( d!= grid->_ndimension-1 ) parallel[d] = 0;
if (parallel[d]) { if (parallel[d]) {
range[d] = grid->_ldimensions[d]; range[d] = grid->_ldimensions[d];
@ -566,11 +662,12 @@ class BinaryIO {
slice_vol = slice_vol * range[d]; slice_vol = slice_vol * range[d];
} }
{ {
uint32_t tmp = IOnode; uint32_t tmp = IOnode;
grid->GlobalSum(tmp); grid->GlobalSum(tmp);
std::cout<< GridLogMessage<< "Parallel write I/O from "<< file << " with " <<tmp<< " IOnodes for subslice "; std::cout<< GridLogMessage<< "Parallel write I/O from "<< file
<< " with " <<tmp<< " IOnodes for subslice ";
for(int d=0;d<grid->_ndimension;d++){ for(int d=0;d<grid->_ndimension;d++){
std::cout<< range[d]; std::cout<< range[d];
if( d< grid->_ndimension-1 ) if( d< grid->_ndimension-1 )
@ -579,7 +676,8 @@ class BinaryIO {
std::cout << std::endl; std::cout << std::endl;
} }
GridStopWatch timer; timer.Start(); GridStopWatch timer;
timer.Start();
uint64_t bytes=0; uint64_t bytes=0;
int myrank = grid->ThisRank(); int myrank = grid->ThisRank();
@ -590,48 +688,58 @@ class BinaryIO {
// Ideally one reader/writer per xy plane and read these contiguously // Ideally one reader/writer per xy plane and read these contiguously
// with comms from nominated I/O nodes. // with comms from nominated I/O nodes.
std::ofstream fout; std::ofstream fout;
if ( IOnode ) fout.open(file,std::ios::binary|std::ios::in|std::ios::out); if (!ILDG.is_ILDG)
if (IOnode){
fout.open(file, std::ios::binary | std::ios::in | std::ios::out);
if (!fout.is_open()) {
std::cout << GridLogMessage << "writeObjectParallel: Error opening file " << file
<< std::endl;
exit(0);
}
}
////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////
// Find the location of each site and send to primary node // Find the location of each site and send to primary node
// Take loop order from Chroma; defines loop order now that NERSC doc no longer // Take loop order from Chroma; defines loop order now that NERSC doc no
// longer
// available (how short sighted is that?) // available (how short sighted is that?)
////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////
uint32_t csum=0; uint32_t csum = 0;
fobj fileObj; fobj fileObj;
static sobj siteObj; // static for SHMEM target; otherwise dynamic allocate with AlignedAllocator static sobj siteObj; // static for SHMEM target; otherwise dynamic allocate
// with AlignedAllocator
// should aggregate a whole chunk and then write. // should aggregate a whole chunk and then write.
// need to implement these loops in Nd independent way with a lexico conversion // need to implement these loops in Nd independent way with a lexico
for(int tlex=0;tlex<slice_vol;tlex++){ // conversion
for (int tlex = 0; tlex < slice_vol; tlex++) {
std::vector<int> tsite(nd); // temporary mixed up site std::vector<int> tsite(nd); // temporary mixed up site
std::vector<int> gsite(nd); std::vector<int> gsite(nd);
std::vector<int> lsite(nd); std::vector<int> lsite(nd);
std::vector<int> iosite(nd); std::vector<int> iosite(nd);
Lexicographic::CoorFromIndex(tsite,tlex,range); Lexicographic::CoorFromIndex(tsite, tlex, range);
for(int d=0;d<nd;d++){ for(int d = 0;d < nd; d++){
lsite[d] = tsite[d]%grid->_ldimensions[d]; // local site lsite[d] = tsite[d] % grid->_ldimensions[d]; // local site
gsite[d] = tsite[d]+start[d]; // global site gsite[d] = tsite[d] + start[d]; // global site
} }
///////////////////////// /////////////////////////
// Get the rank of owner of data // Get the rank of owner of data
///////////////////////// /////////////////////////
int rank, o_idx,i_idx, g_idx; int rank, o_idx, i_idx, g_idx;
grid->GlobalCoorToRankIndex(rank,o_idx,i_idx,gsite); grid->GlobalCoorToRankIndex(rank, o_idx, i_idx, gsite);
grid->GlobalCoorToGlobalIndex(gsite,g_idx); grid->GlobalCoorToGlobalIndex(gsite, g_idx);
//////////////////////////////// ////////////////////////////////
// iorank writes from the seek // iorank writes from the seek
//////////////////////////////// ////////////////////////////////
// Owner of data peeks it // Owner of data peeks it
peekLocalSite(siteObj,Umu,lsite); peekLocalSite(siteObj, Umu, lsite);
// Pair of nodes may need to do pt2pt send // Pair of nodes may need to do pt2pt send
if ( rank != iorank ) { // comms is necessary if ( rank != iorank ) { // comms is necessary
@ -641,20 +749,30 @@ class BinaryIO {
} }
} }
grid->Barrier(); // necessary? grid->Barrier(); // necessary?
if (myrank == iorank) { if (myrank == iorank) {
munge(siteObj, fileObj, csum);
munge(siteObj,fileObj,csum);
if (ieee32big) htobe32_v((void *)&fileObj, sizeof(fileObj));
if(ieee32big) htobe32_v((void *)&fileObj,sizeof(fileObj)); if (ieee32) htole32_v((void *)&fileObj, sizeof(fileObj));
if(ieee32) htole32_v((void *)&fileObj,sizeof(fileObj)); if (ieee64big) htobe64_v((void *)&fileObj, sizeof(fileObj));
if(ieee64big) htobe64_v((void *)&fileObj,sizeof(fileObj)); if (ieee64) htole64_v((void *)&fileObj, sizeof(fileObj));
if(ieee64) htole64_v((void *)&fileObj,sizeof(fileObj));
fout.seekp(offset+g_idx*sizeof(fileObj)); if (ILDG.is_ILDG) {
fout.write((char *)&fileObj,sizeof(fileObj));assert( fout.fail()==0); #ifdef HAVE_LIME
bytes+=sizeof(fileObj); uint64_t sizeFO = sizeof(fileObj);
limeWriterSeek(ILDG.LW, g_idx*sizeFO, SEEK_SET);
int status = limeWriteRecordData((void *)&fileObj, &sizeFO, ILDG.LW);
#endif
}
else {
fout.seekp(offset + g_idx * sizeof(fileObj));
fout.write((char *)&fileObj, sizeof(fileObj));assert( fout.fail()==0);
}
bytes += sizeof(fileObj);
} }
} }
@ -662,14 +780,20 @@ class BinaryIO {
grid->GlobalSum(bytes); grid->GlobalSum(bytes);
timer.Stop(); timer.Stop();
std::cout<<GridLogPerformance<<"writeObjectParallel: wrote "<< bytes <<" bytes in "<<timer.Elapsed() <<" " std::cout << GridLogPerformance << "writeObjectParallel: wrote " << bytes
<< (double)bytes/timer.useconds() <<" MB/s " <<std::endl; << " bytes in " << timer.Elapsed() << " "
<< (double)bytes / timer.useconds() << " MB/s " << std::endl;
grid->Barrier(); // necessary?
if (IOnode)
fout.close();
return csum; return csum;
} }
}; };
} }
#endif #endif

251
lib/parallelIO/IldgIO.h Normal file
View File

@ -0,0 +1,251 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/parallelIO/IldgIO.h
Copyright (C) 2015
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_ILDG_IO_H
#define GRID_ILDG_IO_H
#include <algorithm>
#include <fstream>
#include <iomanip>
#include <iostream>
#include <map>
#include <pwd.h>
#include <sys/utsname.h>
#include <unistd.h>
#ifdef HAVE_LIME
extern "C" { // for linkage
#include "lime.h"
}
namespace Grid {
namespace QCD {
inline void ILDGGrid(GridBase *grid, ILDGField &header) {
assert(grid->_ndimension == 4); // emit error if not
header.dimension.resize(4);
header.boundary.resize(4);
for (int d = 0; d < 4; d++) {
header.dimension[d] = grid->_fdimensions[d];
// Read boundary conditions from ... ?
header.boundary[d] = std::string("periodic");
}
}
inline void ILDGChecksum(uint32_t *buf, uint32_t buf_size_bytes,
uint32_t &csum) {
BinaryIO::Uint32Checksum(buf, buf_size_bytes, csum);
}
//////////////////////////////////////////////////////////////////////
// Utilities ; these are QCD aware
//////////////////////////////////////////////////////////////////////
template <class GaugeField>
inline void ILDGStatistics(GaugeField &data, ILDGField &header) {
// How to convert data precision etc...
header.link_trace = Grid::QCD::WilsonLoops<PeriodicGimplR>::linkTrace(data);
header.plaquette = Grid::QCD::WilsonLoops<PeriodicGimplR>::avgPlaquette(data);
// header.polyakov =
}
// Forcing QCD here
template <class fobj, class sobj>
struct ILDGMunger {
void operator()(fobj &in, sobj &out, uint32_t &csum) {
for (int mu = 0; mu < 4; mu++) {
for (int i = 0; i < 3; i++) {
for (int j = 0; j < 3; j++) {
out(mu)()(i, j) = in(mu)()(i, j);
}
}
}
ILDGChecksum((uint32_t *)&in, sizeof(in), csum);
};
};
template <class fobj, class sobj>
struct ILDGUnmunger {
void operator()(sobj &in, fobj &out, uint32_t &csum) {
for (int mu = 0; mu < 4; mu++) {
for (int i = 0; i < 3; i++) {
for (int j = 0; j < 3; j++) {
out(mu)()(i, j) = in(mu)()(i, j);
}
}
}
ILDGChecksum((uint32_t *)&out, sizeof(out), csum);
};
};
////////////////////////////////////////////////////////////////////////////////
// Write and read from fstream; compute header offset for payload
////////////////////////////////////////////////////////////////////////////////
enum ILDGstate {ILDGread, ILDGwrite};
class ILDGIO : public BinaryIO {
FILE *File;
LimeWriter *LimeW;
LimeRecordHeader *LimeHeader;
LimeReader *LimeR;
std::string filename;
public:
ILDGIO(std::string file, ILDGstate RW) {
filename = file;
if (RW == ILDGwrite){
File = fopen(file.c_str(), "w");
// check if opened correctly
LimeW = limeCreateWriter(File);
} else {
File = fopen(file.c_str(), "r");
// check if opened correctly
LimeR = limeCreateReader(File);
}
}
~ILDGIO() { fclose(File); }
int createHeader(std::string message, int MB, int ME, size_t PayloadSize, LimeWriter* L){
LimeRecordHeader *h;
h = limeCreateHeader(MB, ME, const_cast<char *>(message.c_str()), PayloadSize);
int status = limeWriteRecordHeader(h, L);
if (status < 0) {
std::cerr << "ILDG Header error\n";
return status;
}
limeDestroyHeader(h);
return LIME_SUCCESS;
}
unsigned int writeHeader(ILDGField &header) {
// write header in LIME
n_uint64_t nbytes;
int MB_flag = 1, ME_flag = 0;
char message[] = "ildg-format";
nbytes = strlen(message);
LimeHeader = limeCreateHeader(MB_flag, ME_flag, message, nbytes);
limeWriteRecordHeader(LimeHeader, LimeW);
limeDestroyHeader(LimeHeader);
// save the xml header here
// use the xml_writer to c++ streams in pugixml
// and convert to char message
limeWriteRecordData(message, &nbytes, LimeW);
limeWriterCloseRecord(LimeW);
return 0;
}
unsigned int readHeader(ILDGField &header) {
return 0;
}
template <class vsimd>
uint32_t readConfiguration(Lattice<iLorentzColourMatrix<vsimd> > &Umu) {
typedef Lattice<iLorentzColourMatrix<vsimd> > GaugeField;
typedef LorentzColourMatrixD sobjd;
typedef LorentzColourMatrixF sobjf;
typedef iLorentzColourMatrix<vsimd> itype;
typedef LorentzColourMatrix sobj;
GridBase *grid = Umu._grid;
ILDGField header;
readHeader(header);
// now just the conf, ignore the header
std::string format = std::string("IEEE64BIG");
do {limeReaderNextRecord(LimeR);}
while (strncmp(limeReaderType(LimeR), "ildg-binary-data",16));
n_uint64_t nbytes = limeReaderBytes(LimeR);//size of this record (configuration)
ILDGtype ILDGt(true, LimeR);
// this is special for double prec data, just for the moment
uint32_t csum = BinaryIO::readObjectParallel< itype, sobjd >(
Umu, filename, ILDGMunger<sobjd, sobj>(), 0, format, ILDGt);
// Check configuration
// todo
return csum;
}
template <class vsimd>
uint32_t writeConfiguration(Lattice<iLorentzColourMatrix<vsimd> > &Umu, std::string format) {
typedef Lattice<iLorentzColourMatrix<vsimd> > GaugeField;
typedef iLorentzColourMatrix<vsimd> vobj;
typedef typename vobj::scalar_object sobj;
typedef LorentzColourMatrixD fobj;
ILDGField header;
// fill the header
header.floating_point = format;
ILDGUnmunger<fobj, sobj> munge;
unsigned int offset = writeHeader(header);
BinaryIO::Uint32Checksum<vobj, fobj>(Umu, munge, header.checksum);
// Write data record header
n_uint64_t PayloadSize = sizeof(fobj) * Umu._grid->_gsites;
createHeader("ildg-binary-data", 0, 1, PayloadSize, LimeW);
ILDGtype ILDGt(true, LimeW);
uint32_t csum = BinaryIO::writeObjectParallel<vobj, fobj>(
Umu, filename, munge, 0, header.floating_point, ILDGt);
limeWriterCloseRecord(LimeW);
// Last record
// the logical file name LNF
// look into documentation on how to generate this string
std::string LNF = "empty";
PayloadSize = sizeof(LNF);
createHeader("ildg-binary-lfn", 1 , 1, PayloadSize, LimeW);
limeWriteRecordData(const_cast<char*>(LNF.c_str()), &PayloadSize, LimeW);
limeWriterCloseRecord(LimeW);
return csum;
}
// format for RNG? Now just binary out
};
}
}
//HAVE_LIME
#endif
#endif

View File

@ -0,0 +1,80 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/parallelIO/IldgIO.h
Copyright (C) 2015
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_ILDGTYPES_IO_H
#define GRID_ILDGTYPES_IO_H
#ifdef HAVE_LIME
extern "C" { // for linkage
#include "lime.h"
}
namespace Grid {
struct ILDGtype {
bool is_ILDG;
LimeWriter* LW;
LimeReader* LR;
ILDGtype(bool is, LimeWriter* L) : is_ILDG(is), LW(L), LR(NULL) {}
ILDGtype(bool is, LimeReader* L) : is_ILDG(is), LW(NULL), LR(L) {}
ILDGtype() : is_ILDG(false), LW(NULL), LR(NULL) {}
};
class ILDGField {
public:
// header strings (not in order)
std::vector<int> dimension;
std::vector<std::string> boundary;
int data_start;
std::string hdr_version;
std::string storage_format;
// Checks on data
double link_trace;
double plaquette;
uint32_t checksum;
unsigned int sequence_number;
std::string data_type;
std::string ensemble_id;
std::string ensemble_label;
std::string creator;
std::string creator_hardware;
std::string creation_date;
std::string archive_date;
std::string floating_point;
};
}
#else
namespace Grid {
struct ILDGtype {
bool is_ILDG;
ILDGtype() : is_ILDG(false) {}
};
}
#endif
#endif

View File

@ -1,4 +1,4 @@
/************************************************************************************* /*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid Grid physics library, www.github.com/paboyle/Grid
@ -6,9 +6,9 @@
Copyright (C) 2015 Copyright (C) 2015
Author: Matt Spraggs <matthew.spraggs@gmail.com> Author: Matt Spraggs <matthew.spraggs@gmail.com>
Author: Peter Boyle <paboyle@ph.ed.ac.uk> Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: paboyle <paboyle@ph.ed.ac.uk> Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@ -25,8 +25,8 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/ *************************************************************************************/
/* END LEGAL */ /* END LEGAL */
#ifndef GRID_NERSC_IO_H #ifndef GRID_NERSC_IO_H
#define GRID_NERSC_IO_H #define GRID_NERSC_IO_H
@ -41,92 +41,92 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
#include <pwd.h> #include <pwd.h>
namespace Grid { namespace Grid {
namespace QCD { namespace QCD {
using namespace Grid; using namespace Grid;
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// Some data types for intermediate storage // Some data types for intermediate storage
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
template<typename vtype> using iLorentzColour2x3 = iVector<iVector<iVector<vtype, Nc>, 2>, 4 >; template<typename vtype> using iLorentzColour2x3 = iVector<iVector<iVector<vtype, Nc>, 2>, 4 >;
typedef iLorentzColour2x3<Complex> LorentzColour2x3; typedef iLorentzColour2x3<Complex> LorentzColour2x3;
typedef iLorentzColour2x3<ComplexF> LorentzColour2x3F; typedef iLorentzColour2x3<ComplexF> LorentzColour2x3F;
typedef iLorentzColour2x3<ComplexD> LorentzColour2x3D; typedef iLorentzColour2x3<ComplexD> LorentzColour2x3D;
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// header specification/interpretation // header specification/interpretation
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
class NerscField { class NerscField {
public: public:
// header strings (not in order) // header strings (not in order)
int dimension[4]; int dimension[4];
std::string boundary[4]; std::string boundary[4];
int data_start; int data_start;
std::string hdr_version; std::string hdr_version;
std::string storage_format; std::string storage_format;
// Checks on data // Checks on data
double link_trace; double link_trace;
double plaquette; double plaquette;
uint32_t checksum; uint32_t checksum;
unsigned int sequence_number; unsigned int sequence_number;
std::string data_type; std::string data_type;
std::string ensemble_id ; std::string ensemble_id ;
std::string ensemble_label ; std::string ensemble_label ;
std::string creator ; std::string creator ;
std::string creator_hardware ; std::string creator_hardware ;
std::string creation_date ; std::string creation_date ;
std::string archive_date ; std::string archive_date ;
std::string floating_point; std::string floating_point;
}; };
////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////
// Bit and Physical Checksumming and QA of data // Bit and Physical Checksumming and QA of data
////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////
inline void NerscGrid(GridBase *grid,NerscField &header) inline void NerscGrid(GridBase *grid,NerscField &header)
{ {
assert(grid->_ndimension==4); assert(grid->_ndimension==4);
for(int d=0;d<4;d++) { for(int d=0;d<4;d++) {
header.dimension[d] = grid->_fdimensions[d]; header.dimension[d] = grid->_fdimensions[d];
} }
for(int d=0;d<4;d++) { for(int d=0;d<4;d++) {
header.boundary[d] = std::string("PERIODIC"); header.boundary[d] = std::string("PERIODIC");
} }
} }
template<class GaugeField> template<class GaugeField>
inline void NerscStatistics(GaugeField & data,NerscField &header) inline void NerscStatistics(GaugeField & data,NerscField &header)
{ {
// How to convert data precision etc... // How to convert data precision etc...
header.link_trace=Grid::QCD::WilsonLoops<PeriodicGimplR>::linkTrace(data); header.link_trace=Grid::QCD::WilsonLoops<PeriodicGimplR>::linkTrace(data);
header.plaquette =Grid::QCD::WilsonLoops<PeriodicGimplR>::avgPlaquette(data); header.plaquette =Grid::QCD::WilsonLoops<PeriodicGimplR>::avgPlaquette(data);
} }
inline void NerscMachineCharacteristics(NerscField &header) inline void NerscMachineCharacteristics(NerscField &header)
{ {
// Who // Who
struct passwd *pw = getpwuid (getuid()); struct passwd *pw = getpwuid (getuid());
if (pw) header.creator = std::string(pw->pw_name); if (pw) header.creator = std::string(pw->pw_name);
// When // When
std::time_t t = std::time(nullptr); std::time_t t = std::time(nullptr);
std::tm tm = *std::localtime(&t); std::tm tm = *std::localtime(&t);
std::ostringstream oss; std::ostringstream oss;
// oss << std::put_time(&tm, "%c %Z"); // oss << std::put_time(&tm, "%c %Z");
header.creation_date = oss.str(); header.creation_date = oss.str();
header.archive_date = header.creation_date; header.archive_date = header.creation_date;
// What // What
struct utsname name; uname(&name); struct utsname name; uname(&name);
header.creator_hardware = std::string(name.nodename)+"-"; header.creator_hardware = std::string(name.nodename)+"-";
header.creator_hardware+= std::string(name.machine)+"-"; header.creator_hardware+= std::string(name.machine)+"-";
header.creator_hardware+= std::string(name.sysname)+"-"; header.creator_hardware+= std::string(name.sysname)+"-";
header.creator_hardware+= std::string(name.release); header.creator_hardware+= std::string(name.release);
} }
////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////
// Utilities ; these are QCD aware // Utilities ; these are QCD aware
////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////
inline void NerscChecksum(uint32_t *buf,uint32_t buf_size_bytes,uint32_t &csum) inline void NerscChecksum(uint32_t *buf,uint32_t buf_size_bytes,uint32_t &csum)
{ {
BinaryIO::Uint32Checksum(buf,buf_size_bytes,csum); BinaryIO::Uint32Checksum(buf,buf_size_bytes,csum);
@ -145,30 +145,32 @@ inline void NerscMachineCharacteristics(NerscField &header)
template<class fobj,class sobj> template<class fobj,class sobj>
struct NerscSimpleMunger{ struct NerscSimpleMunger{
void operator()(fobj &in, sobj &out, uint32_t &csum) {
void operator() (fobj &in,sobj &out,uint32_t &csum){ for (int mu = 0; mu < Nd; mu++) {
for (int i = 0; i < Nc; i++) {
for(int mu=0;mu<4;mu++){ for (int j = 0; j < Nc; j++) {
for(int i=0;i<3;i++){ out(mu)()(i, j) = in(mu)()(i, j);
for(int j=0;j<3;j++){ }
out(mu)()(i,j) = in(mu)()(i,j); }
}}} }
NerscChecksum((uint32_t *)&in,sizeof(in),csum); NerscChecksum((uint32_t *)&in, sizeof(in), csum);
}; };
}; };
template<class fobj,class sobj> template <class fobj, class sobj>
struct NerscSimpleUnmunger{ struct NerscSimpleUnmunger {
void operator() (sobj &in,fobj &out,uint32_t &csum){ void operator()(sobj &in, fobj &out, uint32_t &csum) {
for(int mu=0;mu<Nd;mu++){ for (int mu = 0; mu < Nd; mu++) {
for(int i=0;i<Nc;i++){ for (int i = 0; i < Nc; i++) {
for(int j=0;j<Nc;j++){ for (int j = 0; j < Nc; j++) {
out(mu)()(i,j) = in(mu)()(i,j); out(mu)()(i, j) = in(mu)()(i, j);
}}} }
NerscChecksum((uint32_t *)&out,sizeof(out),csum); }
}
NerscChecksum((uint32_t *)&out, sizeof(out), csum);
}; };
}; };
template<class fobj,class sobj> template<class fobj,class sobj>
struct Nersc3x2munger{ struct Nersc3x2munger{
void operator() (fobj &in,sobj &out,uint32_t &csum){ void operator() (fobj &in,sobj &out,uint32_t &csum){
@ -204,74 +206,74 @@ inline void NerscMachineCharacteristics(NerscField &header)
}; };
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// Write and read from fstream; comput header offset for payload // Write and read from fstream; comput header offset for payload
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
class NerscIO : public BinaryIO { class NerscIO : public BinaryIO {
public: public:
static inline void truncate(std::string file){ static inline void truncate(std::string file){
std::ofstream fout(file,std::ios::out); std::ofstream fout(file,std::ios::out);
} }
#define dump_nersc_header(field, s)\ #define dump_nersc_header(field, s) \
s << "BEGIN_HEADER" << std::endl;\ s << "BEGIN_HEADER" << std::endl; \
s << "HDR_VERSION = " << field.hdr_version << std::endl;\ s << "HDR_VERSION = " << field.hdr_version << std::endl; \
s << "DATATYPE = " << field.data_type << std::endl;\ s << "DATATYPE = " << field.data_type << std::endl; \
s << "STORAGE_FORMAT = " << field.storage_format << std::endl;\ s << "STORAGE_FORMAT = " << field.storage_format << std::endl; \
for(int i=0;i<4;i++){\ for(int i=0;i<4;i++){ \
s << "DIMENSION_" << i+1 << " = " << field.dimension[i] << std::endl ;\ s << "DIMENSION_" << i+1 << " = " << field.dimension[i] << std::endl ; \
}\ } \
s << "LINK_TRACE = " << std::setprecision(10) << field.link_trace << std::endl;\ s << "LINK_TRACE = " << std::setprecision(10) << field.link_trace << std::endl; \
s << "PLAQUETTE = " << std::setprecision(10) << field.plaquette << std::endl;\ s << "PLAQUETTE = " << std::setprecision(10) << field.plaquette << std::endl; \
for(int i=0;i<4;i++){\ for(int i=0;i<4;i++){ \
s << "BOUNDARY_"<<i+1<<" = " << field.boundary[i] << std::endl;\ s << "BOUNDARY_"<<i+1<<" = " << field.boundary[i] << std::endl; \
}\ } \
\ \
s << "CHECKSUM = "<< std::hex << std::setw(10) << field.checksum << std::dec<<std::endl;\ s << "CHECKSUM = "<< std::hex << std::setw(10) << field.checksum << std::dec<<std::endl; \
s << "ENSEMBLE_ID = " << field.ensemble_id << std::endl;\ s << "ENSEMBLE_ID = " << field.ensemble_id << std::endl; \
s << "ENSEMBLE_LABEL = " << field.ensemble_label << std::endl;\ s << "ENSEMBLE_LABEL = " << field.ensemble_label << std::endl; \
s << "SEQUENCE_NUMBER = " << field.sequence_number << std::endl;\ s << "SEQUENCE_NUMBER = " << field.sequence_number << std::endl; \
s << "CREATOR = " << field.creator << std::endl;\ s << "CREATOR = " << field.creator << std::endl; \
s << "CREATOR_HARDWARE = "<< field.creator_hardware << std::endl;\ s << "CREATOR_HARDWARE = "<< field.creator_hardware << std::endl; \
s << "CREATION_DATE = " << field.creation_date << std::endl;\ s << "CREATION_DATE = " << field.creation_date << std::endl; \
s << "ARCHIVE_DATE = " << field.archive_date << std::endl;\ s << "ARCHIVE_DATE = " << field.archive_date << std::endl; \
s << "FLOATING_POINT = " << field.floating_point << std::endl;\ s << "FLOATING_POINT = " << field.floating_point << std::endl; \
s << "END_HEADER" << std::endl; s << "END_HEADER" << std::endl;
static inline unsigned int writeHeader(NerscField &field,std::string file) static inline unsigned int writeHeader(NerscField &field,std::string file)
{ {
std::ofstream fout(file,std::ios::out|std::ios::in); std::ofstream fout(file,std::ios::out|std::ios::in);
fout.seekp(0,std::ios::beg); fout.seekp(0,std::ios::beg);
dump_nersc_header(field, fout); dump_nersc_header(field, fout);
field.data_start = fout.tellp(); field.data_start = fout.tellp();
return field.data_start; return field.data_start;
} }
// for the header-reader // for the header-reader
static inline int readHeader(std::string file,GridBase *grid, NerscField &field) static inline int readHeader(std::string file,GridBase *grid, NerscField &field)
{ {
int offset=0; int offset=0;
std::map<std::string,std::string> header; std::map<std::string,std::string> header;
std::string line; std::string line;
////////////////////////////////////////////////// //////////////////////////////////////////////////
// read the header // read the header
////////////////////////////////////////////////// //////////////////////////////////////////////////
std::ifstream fin(file); std::ifstream fin(file);
getline(fin,line); // read one line and insist is getline(fin,line); // read one line and insist is
removeWhitespace(line); removeWhitespace(line);
std::cout << GridLogMessage << "* " << line << std::endl; std::cout << GridLogMessage << "* " << line << std::endl;
assert(line==std::string("BEGIN_HEADER")); assert(line==std::string("BEGIN_HEADER"));
do { do {
getline(fin,line); // read one line getline(fin,line); // read one line
std::cout << GridLogMessage << "* "<<line<< std::endl; std::cout << GridLogMessage << "* "<<line<< std::endl;
int eq = line.find("="); int eq = line.find("=");
if(eq >0) { if(eq >0) {
std::string key=line.substr(0,eq); std::string key=line.substr(0,eq);
std::string val=line.substr(eq+1); std::string val=line.substr(eq+1);
removeWhitespace(key); removeWhitespace(key);
@ -279,275 +281,272 @@ static inline int readHeader(std::string file,GridBase *grid, NerscField &field
header[key] = val; header[key] = val;
} }
} while( line.find("END_HEADER") == std::string::npos ); } while( line.find("END_HEADER") == std::string::npos );
field.data_start = fin.tellg(); field.data_start = fin.tellg();
////////////////////////////////////////////////// //////////////////////////////////////////////////
// chomp the values // chomp the values
////////////////////////////////////////////////// //////////////////////////////////////////////////
field.hdr_version = header["HDR_VERSION"]; field.hdr_version = header["HDR_VERSION"];
field.data_type = header["DATATYPE"]; field.data_type = header["DATATYPE"];
field.storage_format = header["STORAGE_FORMAT"]; field.storage_format = header["STORAGE_FORMAT"];
field.dimension[0] = std::stol(header["DIMENSION_1"]); field.dimension[0] = std::stol(header["DIMENSION_1"]);
field.dimension[1] = std::stol(header["DIMENSION_2"]); field.dimension[1] = std::stol(header["DIMENSION_2"]);
field.dimension[2] = std::stol(header["DIMENSION_3"]); field.dimension[2] = std::stol(header["DIMENSION_3"]);
field.dimension[3] = std::stol(header["DIMENSION_4"]); field.dimension[3] = std::stol(header["DIMENSION_4"]);
assert(grid->_ndimension == 4); assert(grid->_ndimension == 4);
for(int d=0;d<4;d++){ for(int d=0;d<4;d++){
assert(grid->_fdimensions[d]==field.dimension[d]); assert(grid->_fdimensions[d]==field.dimension[d]);
} }
field.link_trace = std::stod(header["LINK_TRACE"]); field.link_trace = std::stod(header["LINK_TRACE"]);
field.plaquette = std::stod(header["PLAQUETTE"]); field.plaquette = std::stod(header["PLAQUETTE"]);
field.boundary[0] = header["BOUNDARY_1"]; field.boundary[0] = header["BOUNDARY_1"];
field.boundary[1] = header["BOUNDARY_2"]; field.boundary[1] = header["BOUNDARY_2"];
field.boundary[2] = header["BOUNDARY_3"]; field.boundary[2] = header["BOUNDARY_3"];
field.boundary[3] = header["BOUNDARY_4"]; field.boundary[3] = header["BOUNDARY_4"];
field.checksum = std::stoul(header["CHECKSUM"],0,16); field.checksum = std::stoul(header["CHECKSUM"],0,16);
field.ensemble_id = header["ENSEMBLE_ID"]; field.ensemble_id = header["ENSEMBLE_ID"];
field.ensemble_label = header["ENSEMBLE_LABEL"]; field.ensemble_label = header["ENSEMBLE_LABEL"];
field.sequence_number = std::stol(header["SEQUENCE_NUMBER"]); field.sequence_number = std::stol(header["SEQUENCE_NUMBER"]);
field.creator = header["CREATOR"]; field.creator = header["CREATOR"];
field.creator_hardware = header["CREATOR_HARDWARE"]; field.creator_hardware = header["CREATOR_HARDWARE"];
field.creation_date = header["CREATION_DATE"]; field.creation_date = header["CREATION_DATE"];
field.archive_date = header["ARCHIVE_DATE"]; field.archive_date = header["ARCHIVE_DATE"];
field.floating_point = header["FLOATING_POINT"]; field.floating_point = header["FLOATING_POINT"];
return field.data_start; return field.data_start;
} }
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Now the meat: the object readers // Now the meat: the object readers
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
#define PARALLEL_READ #define PARALLEL_READ
#define PARALLEL_WRITE #define PARALLEL_WRITE
template<class vsimd> template<class vsimd>
static inline void readConfiguration(Lattice<iLorentzColourMatrix<vsimd> > &Umu,NerscField& header,std::string file) static inline void readConfiguration(Lattice<iLorentzColourMatrix<vsimd> > &Umu,NerscField& header,std::string file)
{ {
typedef Lattice<iLorentzColourMatrix<vsimd> > GaugeField; typedef Lattice<iLorentzColourMatrix<vsimd> > GaugeField;
GridBase *grid = Umu._grid; GridBase *grid = Umu._grid;
int offset = readHeader(file,Umu._grid,header); int offset = readHeader(file,Umu._grid,header);
NerscField clone(header); NerscField clone(header);
std::string format(header.floating_point); std::string format(header.floating_point);
int ieee32big = (format == std::string("IEEE32BIG")); int ieee32big = (format == std::string("IEEE32BIG"));
int ieee32 = (format == std::string("IEEE32")); int ieee32 = (format == std::string("IEEE32"));
int ieee64big = (format == std::string("IEEE64BIG")); int ieee64big = (format == std::string("IEEE64BIG"));
int ieee64 = (format == std::string("IEEE64")); int ieee64 = (format == std::string("IEEE64"));
uint32_t csum; uint32_t csum;
// depending on datatype, set up munger; // depending on datatype, set up munger;
// munger is a function of <floating point, Real, data_type> // munger is a function of <floating point, Real, data_type>
if ( header.data_type == std::string("4D_SU3_GAUGE") ) { if ( header.data_type == std::string("4D_SU3_GAUGE") ) {
if ( ieee32 || ieee32big ) { if ( ieee32 || ieee32big ) {
#ifdef PARALLEL_READ #ifdef PARALLEL_READ
csum=BinaryIO::readObjectParallel<iLorentzColourMatrix<vsimd>, LorentzColour2x3F> csum=BinaryIO::readObjectParallel<iLorentzColourMatrix<vsimd>, LorentzColour2x3F>
(Umu,file,Nersc3x2munger<LorentzColour2x3F,LorentzColourMatrix>(), offset,format); (Umu,file,Nersc3x2munger<LorentzColour2x3F,LorentzColourMatrix>(), offset,format);
#else #else
csum=BinaryIO::readObjectSerial<iLorentzColourMatrix<vsimd>, LorentzColour2x3F> csum=BinaryIO::readObjectSerial<iLorentzColourMatrix<vsimd>, LorentzColour2x3F>
(Umu,file,Nersc3x2munger<LorentzColour2x3F,LorentzColourMatrix>(), offset,format); (Umu,file,Nersc3x2munger<LorentzColour2x3F,LorentzColourMatrix>(), offset,format);
#endif #endif
} }
if ( ieee64 || ieee64big ) { if ( ieee64 || ieee64big ) {
#ifdef PARALLEL_READ #ifdef PARALLEL_READ
csum=BinaryIO::readObjectParallel<iLorentzColourMatrix<vsimd>, LorentzColour2x3D> csum=BinaryIO::readObjectParallel<iLorentzColourMatrix<vsimd>, LorentzColour2x3D>
(Umu,file,Nersc3x2munger<LorentzColour2x3D,LorentzColourMatrix>(),offset,format); (Umu,file,Nersc3x2munger<LorentzColour2x3D,LorentzColourMatrix>(),offset,format);
#else #else
csum=BinaryIO::readObjectSerial<iLorentzColourMatrix<vsimd>, LorentzColour2x3D> csum=BinaryIO::readObjectSerial<iLorentzColourMatrix<vsimd>, LorentzColour2x3D>
(Umu,file,Nersc3x2munger<LorentzColour2x3D,LorentzColourMatrix>(),offset,format); (Umu,file,Nersc3x2munger<LorentzColour2x3D,LorentzColourMatrix>(),offset,format);
#endif #endif
} }
} else if ( header.data_type == std::string("4D_SU3_GAUGE_3x3") ) { } else if ( header.data_type == std::string("4D_SU3_GAUGE_3x3") ) {
if ( ieee32 || ieee32big ) { if ( ieee32 || ieee32big ) {
#ifdef PARALLEL_READ #ifdef PARALLEL_READ
csum=BinaryIO::readObjectParallel<iLorentzColourMatrix<vsimd>,LorentzColourMatrixF> csum=BinaryIO::readObjectParallel<iLorentzColourMatrix<vsimd>,LorentzColourMatrixF>
(Umu,file,NerscSimpleMunger<LorentzColourMatrixF,LorentzColourMatrix>(),offset,format); (Umu,file,NerscSimpleMunger<LorentzColourMatrixF,LorentzColourMatrix>(),offset,format);
#else #else
csum=BinaryIO::readObjectSerial<iLorentzColourMatrix<vsimd>,LorentzColourMatrixF> csum=BinaryIO::readObjectSerial<iLorentzColourMatrix<vsimd>,LorentzColourMatrixF>
(Umu,file,NerscSimpleMunger<LorentzColourMatrixF,LorentzColourMatrix>(),offset,format); (Umu,file,NerscSimpleMunger<LorentzColourMatrixF,LorentzColourMatrix>(),offset,format);
#endif #endif
} }
if ( ieee64 || ieee64big ) { if ( ieee64 || ieee64big ) {
#ifdef PARALLEL_READ #ifdef PARALLEL_READ
csum=BinaryIO::readObjectParallel<iLorentzColourMatrix<vsimd>,LorentzColourMatrixD> csum=BinaryIO::readObjectParallel<iLorentzColourMatrix<vsimd>,LorentzColourMatrixD>
(Umu,file,NerscSimpleMunger<LorentzColourMatrixD,LorentzColourMatrix>(),offset,format); (Umu,file,NerscSimpleMunger<LorentzColourMatrixD,LorentzColourMatrix>(),offset,format);
#else #else
csum=BinaryIO::readObjectSerial<iLorentzColourMatrix<vsimd>,LorentzColourMatrixD> csum=BinaryIO::readObjectSerial<iLorentzColourMatrix<vsimd>,LorentzColourMatrixD>
(Umu,file,NerscSimpleMunger<LorentzColourMatrixD,LorentzColourMatrix>(),offset,format); (Umu,file,NerscSimpleMunger<LorentzColourMatrixD,LorentzColourMatrix>(),offset,format);
#endif #endif
} }
} else { } else {
assert(0); assert(0);
} }
NerscStatistics<GaugeField>(Umu,clone); NerscStatistics<GaugeField>(Umu,clone);
std::cout<<GridLogMessage <<"NERSC Configuration "<<file<<" checksum "<<std::hex<< csum<< std::dec std::cout<<GridLogMessage <<"NERSC Configuration "<<file<<" checksum "<<std::hex<< csum<< std::dec
<<" header "<<std::hex<<header.checksum<<std::dec <<std::endl; <<" header "<<std::hex<<header.checksum<<std::dec <<std::endl;
std::cout<<GridLogMessage <<"NERSC Configuration "<<file<<" plaquette "<<clone.plaquette std::cout<<GridLogMessage <<"NERSC Configuration "<<file<<" plaquette "<<clone.plaquette
<<" header "<<header.plaquette<<std::endl; <<" header "<<header.plaquette<<std::endl;
std::cout<<GridLogMessage <<"NERSC Configuration "<<file<<" link_trace "<<clone.link_trace std::cout<<GridLogMessage <<"NERSC Configuration "<<file<<" link_trace "<<clone.link_trace
<<" header "<<header.link_trace<<std::endl; <<" header "<<header.link_trace<<std::endl;
assert(fabs(clone.plaquette -header.plaquette ) < 1.0e-5 ); assert(fabs(clone.plaquette -header.plaquette ) < 1.0e-5 );
assert(fabs(clone.link_trace-header.link_trace) < 1.0e-6 ); assert(fabs(clone.link_trace-header.link_trace) < 1.0e-6 );
assert(csum == header.checksum ); assert(csum == header.checksum );
std::cout<<GridLogMessage <<"NERSC Configuration "<<file<< " and plaquette, link trace, and checksum agree"<<std::endl; std::cout<<GridLogMessage <<"NERSC Configuration "<<file<< " and plaquette, link trace, and checksum agree"<<std::endl;
} }
template<class vsimd> template<class vsimd>
static inline void writeConfiguration(Lattice<iLorentzColourMatrix<vsimd> > &Umu,std::string file, int two_row,int bits32) static inline void writeConfiguration(Lattice<iLorentzColourMatrix<vsimd> > &Umu,std::string file, int two_row,int bits32)
{ {
typedef Lattice<iLorentzColourMatrix<vsimd> > GaugeField; typedef Lattice<iLorentzColourMatrix<vsimd> > GaugeField;
typedef iLorentzColourMatrix<vsimd> vobj; typedef iLorentzColourMatrix<vsimd> vobj;
typedef typename vobj::scalar_object sobj; typedef typename vobj::scalar_object sobj;
// Following should become arguments // Following should become arguments
NerscField header; NerscField header;
header.sequence_number = 1; header.sequence_number = 1;
header.ensemble_id = "UKQCD"; header.ensemble_id = "UKQCD";
header.ensemble_label = "DWF"; header.ensemble_label = "DWF";
typedef LorentzColourMatrixD fobj3D; typedef LorentzColourMatrixD fobj3D;
typedef LorentzColour2x3D fobj2D; typedef LorentzColour2x3D fobj2D;
typedef LorentzColourMatrixF fobj3f;
typedef LorentzColour2x3F fobj2f;
GridBase *grid = Umu._grid;
NerscGrid(grid,header);
NerscStatistics<GaugeField>(Umu,header);
NerscMachineCharacteristics(header);
uint32_t csum;
int offset;
truncate(file); GridBase *grid = Umu._grid;
if ( two_row ) { NerscGrid(grid,header);
NerscStatistics<GaugeField>(Umu,header);
NerscMachineCharacteristics(header);
header.floating_point = std::string("IEEE64BIG"); uint32_t csum;
header.data_type = std::string("4D_SU3_GAUGE"); int offset;
Nersc3x2unmunger<fobj2D,sobj> munge;
BinaryIO::Uint32Checksum<vobj,fobj2D>(Umu, munge,header.checksum); truncate(file);
offset = writeHeader(header,file);
if ( two_row ) {
header.floating_point = std::string("IEEE64BIG");
header.data_type = std::string("4D_SU3_GAUGE");
Nersc3x2unmunger<fobj2D,sobj> munge;
BinaryIO::Uint32Checksum<vobj,fobj2D>(Umu, munge,header.checksum);
offset = writeHeader(header,file);
#ifdef PARALLEL_WRITE #ifdef PARALLEL_WRITE
csum=BinaryIO::writeObjectParallel<vobj,fobj2D>(Umu,file,munge,offset,header.floating_point); csum=BinaryIO::writeObjectParallel<vobj,fobj2D>(Umu,file,munge,offset,header.floating_point);
#else #else
csum=BinaryIO::writeObjectSerial<vobj,fobj2D>(Umu,file,munge,offset,header.floating_point); csum=BinaryIO::writeObjectSerial<vobj,fobj2D>(Umu,file,munge,offset,header.floating_point);
#endif #endif
} else {
} else { header.floating_point = std::string("IEEE64BIG");
header.floating_point = std::string("IEEE64BIG"); header.data_type = std::string("4D_SU3_GAUGE_3x3");
header.data_type = std::string("4D_SU3_GAUGE_3x3"); NerscSimpleUnmunger<fobj3D,sobj> munge;
NerscSimpleUnmunger<fobj3D,sobj> munge; BinaryIO::Uint32Checksum<vobj,fobj3D>(Umu, munge,header.checksum);
BinaryIO::Uint32Checksum<vobj,fobj3D>(Umu, munge,header.checksum); offset = writeHeader(header,file);
offset = writeHeader(header,file);
#ifdef PARALLEL_WRITE #ifdef PARALLEL_WRITE
csum=BinaryIO::writeObjectParallel<vobj,fobj3D>(Umu,file,munge,offset,header.floating_point); csum=BinaryIO::writeObjectParallel<vobj,fobj3D>(Umu,file,munge,offset,header.floating_point);
#else #else
csum=BinaryIO::writeObjectSerial<vobj,fobj3D>(Umu,file,munge,offset,header.floating_point); csum=BinaryIO::writeObjectSerial<vobj,fobj3D>(Umu,file,munge,offset,header.floating_point);
#endif #endif
} }
std::cout<<GridLogMessage <<"Written NERSC Configuration "<<file<< " checksum "<<std::hex<<csum<< std::dec<<" plaq "<< header.plaquette <<std::endl; std::cout<<GridLogMessage <<"Written NERSC Configuration on "<< file << " checksum "<<std::hex<<csum<< std::dec<<" plaq "<< header.plaquette <<std::endl;
} }
/////////////////////////////// ///////////////////////////////
// RNG state // RNG state
/////////////////////////////// ///////////////////////////////
static inline void writeRNGState(GridSerialRNG &serial,GridParallelRNG &parallel,std::string file) static inline void writeRNGState(GridSerialRNG &serial,GridParallelRNG &parallel,std::string file)
{ {
typedef typename GridParallelRNG::RngStateType RngStateType; typedef typename GridParallelRNG::RngStateType RngStateType;
// Following should become arguments // Following should become arguments
NerscField header; NerscField header;
header.sequence_number = 1; header.sequence_number = 1;
header.ensemble_id = "UKQCD"; header.ensemble_id = "UKQCD";
header.ensemble_label = "DWF"; header.ensemble_label = "DWF";
GridBase *grid = parallel._grid; GridBase *grid = parallel._grid;
NerscGrid(grid,header); NerscGrid(grid,header);
header.link_trace=0.0; header.link_trace=0.0;
header.plaquette=0.0; header.plaquette=0.0;
NerscMachineCharacteristics(header); NerscMachineCharacteristics(header);
uint32_t csum; uint32_t csum;
int offset; int offset;
#ifdef RNG_RANLUX #ifdef RNG_RANLUX
header.floating_point = std::string("UINT64"); header.floating_point = std::string("UINT64");
header.data_type = std::string("RANLUX48"); header.data_type = std::string("RANLUX48");
#endif #endif
#ifdef RNG_MT19937 #ifdef RNG_MT19937
header.floating_point = std::string("UINT32"); header.floating_point = std::string("UINT32");
header.data_type = std::string("MT19937"); header.data_type = std::string("MT19937");
#endif #endif
#ifdef RNG_SITMO #ifdef RNG_SITMO
header.floating_point = std::string("UINT64"); header.floating_point = std::string("UINT64");
header.data_type = std::string("SITMO"); header.data_type = std::string("SITMO");
#endif #endif
truncate(file); truncate(file);
offset = writeHeader(header,file); offset = writeHeader(header,file);
csum=BinaryIO::writeRNGSerial(serial,parallel,file,offset); csum=BinaryIO::writeRNGSerial(serial,parallel,file,offset);
header.checksum = csum; header.checksum = csum;
offset = writeHeader(header,file); offset = writeHeader(header,file);
std::cout<<GridLogMessage <<"Written NERSC RNG STATE "<<file<< " checksum "<<std::hex<<csum<<std::dec<<std::endl; std::cout<<GridLogMessage <<"Written NERSC RNG STATE "<<file<< " checksum "<<std::hex<<csum<<std::dec<<std::endl;
} }
static inline void readRNGState(GridSerialRNG &serial,GridParallelRNG & parallel,NerscField& header,std::string file) static inline void readRNGState(GridSerialRNG &serial,GridParallelRNG & parallel,NerscField& header,std::string file)
{ {
typedef typename GridParallelRNG::RngStateType RngStateType; typedef typename GridParallelRNG::RngStateType RngStateType;
GridBase *grid = parallel._grid; GridBase *grid = parallel._grid;
int offset = readHeader(file,grid,header); int offset = readHeader(file,grid,header);
NerscField clone(header); NerscField clone(header);
std::string format(header.floating_point); std::string format(header.floating_point);
std::string data_type(header.data_type); std::string data_type(header.data_type);
#ifdef RNG_RANLUX #ifdef RNG_RANLUX
assert(format == std::string("UINT64")); assert(format == std::string("UINT64"));
assert(data_type == std::string("RANLUX48")); assert(data_type == std::string("RANLUX48"));
#endif #endif
#ifdef RNG_MT19937 #ifdef RNG_MT19937
assert(format == std::string("UINT32")); assert(format == std::string("UINT32"));
assert(data_type == std::string("MT19937")); assert(data_type == std::string("MT19937"));
#endif #endif
#ifdef RNG_SITMO #ifdef RNG_SITMO
assert(format == std::string("UINT64")); assert(format == std::string("UINT64"));
assert(data_type == std::string("SITMO")); assert(data_type == std::string("SITMO"));
#endif #endif
// depending on datatype, set up munger; // depending on datatype, set up munger;
// munger is a function of <floating point, Real, data_type> // munger is a function of <floating point, Real, data_type>
uint32_t csum=BinaryIO::readRNGSerial(serial,parallel,file,offset); uint32_t csum=BinaryIO::readRNGSerial(serial,parallel,file,offset);
assert(csum == header.checksum ); assert(csum == header.checksum );
std::cout<<GridLogMessage <<"Read NERSC RNG file "<<file<< " format "<< data_type <<std::endl; std::cout<<GridLogMessage <<"Read NERSC RNG file "<<file<< " format "<< data_type <<std::endl;
} }
}; };
}} }}
#endif #endif

View File

@ -205,13 +205,14 @@ public:
void Stop(void) { void Stop(void) {
count=0; count=0;
cycles=0; cycles=0;
size_t ign;
#ifdef __linux__ #ifdef __linux__
ssize_t ign;
if ( fd!= -1) { if ( fd!= -1) {
::ioctl(fd, PERF_EVENT_IOC_DISABLE, 0); ::ioctl(fd, PERF_EVENT_IOC_DISABLE, 0);
::ioctl(cyclefd, PERF_EVENT_IOC_DISABLE, 0); ::ioctl(cyclefd, PERF_EVENT_IOC_DISABLE, 0);
ign=::read(fd, &count, sizeof(long long)); ign=::read(fd, &count, sizeof(long long));
ign=::read(cyclefd, &cycles, sizeof(long long)); ign+=::read(cyclefd, &cycles, sizeof(long long));
assert(ign=2*sizeof(long long));
} }
elapsed = cyclecount() - begin; elapsed = cyclecount() - begin;
#else #else

124
lib/qcd/LatticeTheories.h Normal file
View File

@ -0,0 +1,124 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/QCD.h
Copyright (C) 2015
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: neo <cossu@post.kek.jp>
Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_LT_H
#define GRID_LT_H
namespace Grid{
// First steps in the complete generalization of the Physics part
// Design not final
namespace LatticeTheories {
template <int Dimensions>
struct LatticeTheory {
static const int Nd = Dimensions;
static const int Nds = Dimensions * 2; // double stored field
template <typename vtype>
using iSinglet = iScalar<iScalar<iScalar<vtype> > >;
};
template <int Dimensions, int Colours>
struct LatticeGaugeTheory : public LatticeTheory<Dimensions> {
static const int Nds = Dimensions * 2;
static const int Nd = Dimensions;
static const int Nc = Colours;
template <typename vtype>
using iColourMatrix = iScalar<iScalar<iMatrix<vtype, Nc> > >;
template <typename vtype>
using iLorentzColourMatrix = iVector<iScalar<iMatrix<vtype, Nc> >, Nd>;
template <typename vtype>
using iDoubleStoredColourMatrix = iVector<iScalar<iMatrix<vtype, Nc> >, Nds>;
template <typename vtype>
using iColourVector = iScalar<iScalar<iVector<vtype, Nc> > >;
};
template <int Dimensions, int Colours, int Spin>
struct FermionicLatticeGaugeTheory
: public LatticeGaugeTheory<Dimensions, Colours> {
static const int Nd = Dimensions;
static const int Nds = Dimensions * 2;
static const int Nc = Colours;
static const int Ns = Spin;
template <typename vtype>
using iSpinMatrix = iScalar<iMatrix<iScalar<vtype>, Ns> >;
template <typename vtype>
using iSpinColourMatrix = iScalar<iMatrix<iMatrix<vtype, Nc>, Ns> >;
template <typename vtype>
using iSpinVector = iScalar<iVector<iScalar<vtype>, Ns> >;
template <typename vtype>
using iSpinColourVector = iScalar<iVector<iVector<vtype, Nc>, Ns> >;
// These 2 only if Spin is a multiple of 2
static const int Nhs = Spin / 2;
template <typename vtype>
using iHalfSpinVector = iScalar<iVector<iScalar<vtype>, Nhs> >;
template <typename vtype>
using iHalfSpinColourVector = iScalar<iVector<iVector<vtype, Nc>, Nhs> >;
//tests
typedef iColourMatrix<Complex> ColourMatrix;
typedef iColourMatrix<ComplexF> ColourMatrixF;
typedef iColourMatrix<ComplexD> ColourMatrixD;
};
// Examples, not complete now.
struct QCD : public FermionicLatticeGaugeTheory<4, 3, 4> {
static const int Xp = 0;
static const int Yp = 1;
static const int Zp = 2;
static const int Tp = 3;
static const int Xm = 4;
static const int Ym = 5;
static const int Zm = 6;
static const int Tm = 7;
typedef FermionicLatticeGaugeTheory FLGT;
typedef FLGT::iSpinMatrix<Complex > SpinMatrix;
typedef FLGT::iSpinMatrix<ComplexF > SpinMatrixF;
typedef FLGT::iSpinMatrix<ComplexD > SpinMatrixD;
};
struct QED : public FermionicLatticeGaugeTheory<4, 1, 4> {//fill
};
template <int Dimensions>
struct Scalar : public LatticeTheory<Dimensions> {};
}; // LatticeTheories
} // Grid
#endif

View File

@ -32,9 +32,12 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
#ifndef GRID_QCD_BASE_H #ifndef GRID_QCD_BASE_H
#define GRID_QCD_BASE_H #define GRID_QCD_BASE_H
namespace Grid{ namespace Grid{
namespace QCD { namespace QCD {
static const int Xdir = 0;
static const int Ydir = 1;
static const int Zdir = 2;
static const int Tdir = 3;
static const int Xp = 0; static const int Xp = 0;
static const int Yp = 1; static const int Yp = 1;
@ -354,36 +357,36 @@ namespace QCD {
////////////////////////////////////////////// //////////////////////////////////////////////
template<class vobj> template<class vobj>
void pokeColour(Lattice<vobj> &lhs, void pokeColour(Lattice<vobj> &lhs,
const Lattice<decltype(peekIndex<ColourIndex>(lhs._odata[0],0))> & rhs, const Lattice<decltype(peekIndex<ColourIndex>(lhs._odata[0],0))> & rhs,
int i) int i)
{ {
PokeIndex<ColourIndex>(lhs,rhs,i); PokeIndex<ColourIndex>(lhs,rhs,i);
} }
template<class vobj> template<class vobj>
void pokeColour(Lattice<vobj> &lhs, void pokeColour(Lattice<vobj> &lhs,
const Lattice<decltype(peekIndex<ColourIndex>(lhs._odata[0],0,0))> & rhs, const Lattice<decltype(peekIndex<ColourIndex>(lhs._odata[0],0,0))> & rhs,
int i,int j) int i,int j)
{ {
PokeIndex<ColourIndex>(lhs,rhs,i,j); PokeIndex<ColourIndex>(lhs,rhs,i,j);
} }
template<class vobj> template<class vobj>
void pokeSpin(Lattice<vobj> &lhs, void pokeSpin(Lattice<vobj> &lhs,
const Lattice<decltype(peekIndex<SpinIndex>(lhs._odata[0],0))> & rhs, const Lattice<decltype(peekIndex<SpinIndex>(lhs._odata[0],0))> & rhs,
int i) int i)
{ {
PokeIndex<SpinIndex>(lhs,rhs,i); PokeIndex<SpinIndex>(lhs,rhs,i);
} }
template<class vobj> template<class vobj>
void pokeSpin(Lattice<vobj> &lhs, void pokeSpin(Lattice<vobj> &lhs,
const Lattice<decltype(peekIndex<SpinIndex>(lhs._odata[0],0,0))> & rhs, const Lattice<decltype(peekIndex<SpinIndex>(lhs._odata[0],0,0))> & rhs,
int i,int j) int i,int j)
{ {
PokeIndex<SpinIndex>(lhs,rhs,i,j); PokeIndex<SpinIndex>(lhs,rhs,i,j);
} }
template<class vobj> template<class vobj>
void pokeLorentz(Lattice<vobj> &lhs, void pokeLorentz(Lattice<vobj> &lhs,
const Lattice<decltype(peekIndex<LorentzIndex>(lhs._odata[0],0))> & rhs, const Lattice<decltype(peekIndex<LorentzIndex>(lhs._odata[0],0))> & rhs,
int i) int i)
{ {
PokeIndex<LorentzIndex>(lhs,rhs,i); PokeIndex<LorentzIndex>(lhs,rhs,i);
} }
@ -492,6 +495,38 @@ namespace QCD {
} //namespace QCD } //namespace QCD
} // Grid } // Grid
/*
<<<<<<< HEAD
#include <Grid/qcd/utils/SpaceTimeGrid.h>
#include <Grid/qcd/spin/Dirac.h>
#include <Grid/qcd/spin/TwoSpinor.h>
#include <Grid/qcd/utils/LinalgUtils.h>
#include <Grid/qcd/utils/CovariantCshift.h>
// Include representations
#include <Grid/qcd/utils/SUn.h>
#include <Grid/qcd/utils/SUnAdjoint.h>
#include <Grid/qcd/utils/SUnTwoIndex.h>
#include <Grid/qcd/representations/hmc_types.h>
// Scalar field
#include <Grid/qcd/utils/ScalarObjs.h>
#include <Grid/qcd/action/Actions.h>
#include <Grid/qcd/smearing/Smearing.h>
#include <Grid/qcd/hmc/integrators/Integrator.h>
#include <Grid/qcd/hmc/integrators/Integrator_algorithm.h>
#include <Grid/qcd/observables/hmc_observable.h>
#include <Grid/qcd/hmc/HMC.h>
//#include <Grid/qcd/modules/mods.h>
=======
>>>>>>> develop
*/
#endif #endif

View File

@ -4,10 +4,11 @@ Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/ActionBase.h Source file: ./lib/qcd/action/ActionBase.h
Copyright (C) 2015 Copyright (C) 2015-2016
Author: Peter Boyle <paboyle@ph.ed.ac.uk> Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: neo <cossu@post.kek.jp> Author: neo <cossu@post.kek.jp>
Author: Guido Cossu <guido.cossu@ed.ac.uk>
This program is free software; you can redistribute it and/or modify This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@ -27,128 +28,29 @@ See the full license in the file "LICENSE" in the top level distribution
directory directory
*************************************************************************************/ *************************************************************************************/
/* END LEGAL */ /* END LEGAL */
#ifndef QCD_ACTION_BASE
#define QCD_ACTION_BASE #ifndef ACTION_BASE_H
#define ACTION_BASE_H
namespace Grid { namespace Grid {
namespace QCD { namespace QCD {
template <class GaugeField> template <class GaugeField >
class Action { class Action
{
public: public:
bool is_smeared = false; bool is_smeared = false;
// Boundary conditions? // Heatbath? // Heatbath?
virtual void refresh(const GaugeField& U, virtual void refresh(const GaugeField& U, GridParallelRNG& pRNG) = 0; // refresh pseudofermions
GridParallelRNG& pRNG) = 0; // refresh pseudofermions virtual RealD S(const GaugeField& U) = 0; // evaluate the action
virtual RealD S(const GaugeField& U) = 0; // evaluate the action virtual void deriv(const GaugeField& U, GaugeField& dSdU) = 0; // evaluate the action derivative
virtual void deriv(const GaugeField& U, virtual std::string action_name() = 0; // return the action name
GaugeField& dSdU) = 0; // evaluate the action derivative virtual std::string LogParameters() = 0; // prints action parameters
virtual ~Action(){}; virtual ~Action(){}
}; };
// Indexing of tuple types
template <class T, class Tuple>
struct Index;
template <class T, class... Types>
struct Index<T, std::tuple<T, Types...>> {
static const std::size_t value = 0;
};
template <class T, class U, class... Types>
struct Index<T, std::tuple<U, Types...>> {
static const std::size_t value = 1 + Index<T, std::tuple<Types...>>::value;
};
/*
template <class GaugeField>
struct ActionLevel {
public:
typedef Action<GaugeField>*
ActPtr; // now force the same colours as the rest of the code
//Add supported representations here
unsigned int multiplier;
std::vector<ActPtr> actions;
ActionLevel(unsigned int mul = 1) : actions(0), multiplier(mul) {
assert(mul >= 1);
};
void push_back(ActPtr ptr) { actions.push_back(ptr); }
};
*/
template <class GaugeField, class Repr = NoHirep >
struct ActionLevel {
public:
unsigned int multiplier;
// Fundamental repr actions separated because of the smearing
typedef Action<GaugeField>* ActPtr;
// construct a tuple of vectors of the actions for the corresponding higher
// representation fields
typedef typename AccessTypes<Action, Repr>::VectorCollection action_collection;
action_collection actions_hirep;
typedef typename AccessTypes<Action, Repr>::FieldTypeCollection action_hirep_types;
std::vector<ActPtr>& actions;
// Temporary conversion between ActionLevel and ActionLevelHirep
//ActionLevelHirep(ActionLevel<GaugeField>& AL ):actions(AL.actions), multiplier(AL.multiplier){}
ActionLevel(unsigned int mul = 1) : actions(std::get<0>(actions_hirep)), multiplier(mul) {
// initialize the hirep vectors to zero.
//apply(this->resize, actions_hirep, 0); //need a working resize
assert(mul >= 1);
};
//void push_back(ActPtr ptr) { actions.push_back(ptr); }
template < class Field >
void push_back(Action<Field>* ptr) {
// insert only in the correct vector
std::get< Index < Field, action_hirep_types>::value >(actions_hirep).push_back(ptr);
};
template < class ActPtr>
static void resize(ActPtr ap, unsigned int n){
ap->resize(n);
}
//template <std::size_t I>
//auto getRepresentation(Repr& R)->decltype(std::get<I>(R).U) {return std::get<I>(R).U;}
// Loop on tuple for a callable function
template <std::size_t I = 1, typename Callable, typename ...Args>
inline typename std::enable_if<I == std::tuple_size<action_collection>::value, void>::type apply(
Callable, Repr& R,Args&...) const {}
template <std::size_t I = 1, typename Callable, typename ...Args>
inline typename std::enable_if<I < std::tuple_size<action_collection>::value, void>::type apply(
Callable fn, Repr& R, Args&... arguments) const {
fn(std::get<I>(actions_hirep), std::get<I>(R.rep), arguments...);
apply<I + 1>(fn, R, arguments...);
}
};
//template <class GaugeField>
//using ActionSet = std::vector<ActionLevel<GaugeField> >;
template <class GaugeField, class R>
using ActionSet = std::vector<ActionLevel<GaugeField, R> >;
} }
} }
#endif #endif // ACTION_BASE_H

View File

@ -31,15 +31,31 @@ directory
#define QCD_ACTION_CORE #define QCD_ACTION_CORE
#include <Grid/qcd/action/ActionBase.h> #include <Grid/qcd/action/ActionBase.h>
#include <Grid/qcd/action/ActionSet.h>
#include <Grid/qcd/action/ActionParams.h> #include <Grid/qcd/action/ActionParams.h>
//////////////////////////////////////////// ////////////////////////////////////////////
// Gauge Actions // Gauge Actions
//////////////////////////////////////////// ////////////////////////////////////////////
#include <Grid/qcd/action/gauge/Gauge.h> #include <Grid/qcd/action/gauge/Gauge.h>
//////////////////////////////////////////// ////////////////////////////////////////////
// Fermion prereqs // Fermion prereqs
//////////////////////////////////////////// ////////////////////////////////////////////
#include <Grid/qcd/action/fermion/FermionCore.h> #include <Grid/qcd/action/fermion/FermionCore.h>
////////////////////////////////////////////
// Scalar Actions
////////////////////////////////////////////
#include <Grid/qcd/action/scalar/Scalar.h>
////////////////////////////////////////////
// Utility functions
////////////////////////////////////////////
#include <Grid/qcd/utils/Metric.h>
#include <Grid/qcd/utils/CovariantLaplacian.h>
#endif #endif

View File

@ -1,67 +1,92 @@
/************************************************************************************* /*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/ActionParams.h Source file: ./lib/qcd/action/ActionParams.h
Copyright (C) 2015 Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk> Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: paboyle <paboyle@ph.ed.ac.uk> Author: paboyle <paboyle@ph.ed.ac.uk>
Author: Guido Cossu <guido.cossu@ed.ac.uk>
This program is free software; you can redistribute it and/or modify This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or the Free Software Foundation; either version 2 of the License, or
(at your option) any later version. (at your option) any later version.
This program is distributed in the hope that it will be useful, This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details. GNU General Public License for more details.
You should have received a copy of the GNU General Public License along You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc., with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_QCD_ACTION_PARAMS_H #ifndef GRID_QCD_ACTION_PARAMS_H
#define GRID_QCD_ACTION_PARAMS_H #define GRID_QCD_ACTION_PARAMS_H
namespace Grid { namespace Grid {
namespace QCD { namespace QCD {
// These can move into a params header and be given MacroMagic serialisation // These can move into a params header and be given MacroMagic serialisation
struct GparityWilsonImplParams { struct GparityWilsonImplParams {
bool overlapCommsCompute; bool overlapCommsCompute;
std::vector<int> twists; std::vector<int> twists;
GparityWilsonImplParams () : twists(Nd,0), overlapCommsCompute(false) {}; GparityWilsonImplParams() : twists(Nd, 0), overlapCommsCompute(false){};
};
struct WilsonImplParams {
bool overlapCommsCompute;
std::vector<Complex> boundary_phases;
WilsonImplParams() : overlapCommsCompute(false) {
boundary_phases.resize(Nd, 1.0);
}; };
WilsonImplParams(const std::vector<Complex> phi)
: boundary_phases(phi), overlapCommsCompute(false) {}
};
struct WilsonImplParams { struct StaggeredImplParams {
bool overlapCommsCompute; StaggeredImplParams() {};
WilsonImplParams() : overlapCommsCompute(false) {}; };
};
struct OneFlavourRationalParams : Serializable {
GRID_SERIALIZABLE_CLASS_MEMBERS(OneFlavourRationalParams,
RealD, lo,
RealD, hi,
int, MaxIter,
RealD, tolerance,
int, degree,
int, precision);
// MaxIter and tolerance, vectors??
// constructor
OneFlavourRationalParams( RealD _lo = 0.0,
RealD _hi = 1.0,
int _maxit = 1000,
RealD tol = 1.0e-8,
int _degree = 10,
int _precision = 64)
: lo(_lo),
hi(_hi),
MaxIter(_maxit),
tolerance(tol),
degree(_degree),
precision(_precision){};
};
}
}
struct StaggeredImplParams {
StaggeredImplParams() {};
};
struct OneFlavourRationalParams {
RealD lo;
RealD hi;
int MaxIter; // Vector?
RealD tolerance; // Vector?
int degree=10;
int precision=64;
OneFlavourRationalParams (RealD _lo,RealD _hi,int _maxit,RealD tol=1.0e-8,int _degree = 10,int _precision=64) :
lo(_lo), hi(_hi), MaxIter(_maxit), tolerance(tol), degree(_degree), precision(_precision)
{};
};
}}
#endif #endif

116
lib/qcd/action/ActionSet.h Normal file
View File

@ -0,0 +1,116 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/ActionSet.h
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: neo <cossu@post.kek.jp>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#ifndef ACTION_SET_H
#define ACTION_SET_H
namespace Grid {
// Should drop this namespace here
namespace QCD {
//////////////////////////////////
// Indexing of tuple types
//////////////////////////////////
template <class T, class Tuple>
struct Index;
template <class T, class... Types>
struct Index<T, std::tuple<T, Types...>> {
static const std::size_t value = 0;
};
template <class T, class U, class... Types>
struct Index<T, std::tuple<U, Types...>> {
static const std::size_t value = 1 + Index<T, std::tuple<Types...>>::value;
};
////////////////////////////////////////////
// Action Level
// Action collection
// in a integration level
// (for multilevel integration schemes)
////////////////////////////////////////////
template <class Field, class Repr = NoHirep >
struct ActionLevel {
public:
unsigned int multiplier;
// Fundamental repr actions separated because of the smearing
typedef Action<Field>* ActPtr;
// construct a tuple of vectors of the actions for the corresponding higher
// representation fields
typedef typename AccessTypes<Action, Repr>::VectorCollection action_collection;
typedef typename AccessTypes<Action, Repr>::FieldTypeCollection action_hirep_types;
action_collection actions_hirep;
std::vector<ActPtr>& actions;
explicit ActionLevel(unsigned int mul = 1) :
actions(std::get<0>(actions_hirep)), multiplier(mul) {
// initialize the hirep vectors to zero.
// apply(this->resize, actions_hirep, 0); //need a working resize
assert(mul >= 1);
}
template < class GenField >
void push_back(Action<GenField>* ptr) {
// insert only in the correct vector
std::get< Index < GenField, action_hirep_types>::value >(actions_hirep).push_back(ptr);
};
template <class ActPtr>
static void resize(ActPtr ap, unsigned int n) {
ap->resize(n);
}
// Loop on tuple for a callable function
template <std::size_t I = 1, typename Callable, typename ...Args>
inline typename std::enable_if<I == std::tuple_size<action_collection>::value, void>::type apply(Callable, Repr& R,Args&...) const {}
template <std::size_t I = 1, typename Callable, typename ...Args>
inline typename std::enable_if<I < std::tuple_size<action_collection>::value, void>::type apply(Callable fn, Repr& R, Args&... arguments) const {
fn(std::get<I>(actions_hirep), std::get<I>(R.rep), arguments...);
apply<I + 1>(fn, R, arguments...);
}
};
// Define the ActionSet
template <class GaugeField, class R>
using ActionSet = std::vector<ActionLevel<GaugeField, R> >;
} // QCD
} // Grid
#endif // ACTION_SET_H

View File

@ -29,7 +29,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
*************************************************************************************/ *************************************************************************************/
/* END LEGAL */ /* END LEGAL */
#include <Grid/Eigen/Dense> #include <Grid/Grid_Eigen_Dense.h>
#include <Grid/qcd/action/fermion/FermionCore.h> #include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/CayleyFermion5D.h> #include <Grid/qcd/action/fermion/CayleyFermion5D.h>
@ -320,7 +320,7 @@ void CayleyFermion5D<Impl>::MDeriv (GaugeField &mat,const FermionField &U,const
this->DhopDeriv(mat,U,Din,dag); this->DhopDeriv(mat,U,Din,dag);
} else { } else {
// U d/du [D_w D5]^dag V = U D5^dag d/du DW^dag Y // implicit adj on U in call // U d/du [D_w D5]^dag V = U D5^dag d/du DW^dag Y // implicit adj on U in call
MeooeDag5D(U,Din); Meooe5D(U,Din);
this->DhopDeriv(mat,Din,V,dag); this->DhopDeriv(mat,Din,V,dag);
} }
}; };
@ -335,8 +335,8 @@ void CayleyFermion5D<Impl>::MoeDeriv(GaugeField &mat,const FermionField &U,const
this->DhopDerivOE(mat,U,Din,dag); this->DhopDerivOE(mat,U,Din,dag);
} else { } else {
// U d/du [D_w D5]^dag V = U D5^dag d/du DW^dag Y // implicit adj on U in call // U d/du [D_w D5]^dag V = U D5^dag d/du DW^dag Y // implicit adj on U in call
MeooeDag5D(U,Din); Meooe5D(U,Din);
this->DhopDerivOE(mat,Din,V,dag); this->DhopDerivOE(mat,Din,V,dag);
} }
}; };
template<class Impl> template<class Impl>
@ -350,7 +350,7 @@ void CayleyFermion5D<Impl>::MeoDeriv(GaugeField &mat,const FermionField &U,const
this->DhopDerivEO(mat,U,Din,dag); this->DhopDerivEO(mat,U,Din,dag);
} else { } else {
// U d/du [D_w D5]^dag V = U D5^dag d/du DW^dag Y // implicit adj on U in call // U d/du [D_w D5]^dag V = U D5^dag d/du DW^dag Y // implicit adj on U in call
MeooeDag5D(U,Din); Meooe5D(U,Din);
this->DhopDerivEO(mat,Din,V,dag); this->DhopDerivEO(mat,Din,V,dag);
} }
}; };
@ -380,6 +380,8 @@ void CayleyFermion5D<Impl>::SetCoefficientsInternal(RealD zolo_hi,std::vector<Co
/////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////
// The Cayley coeffs (unprec) // The Cayley coeffs (unprec)
/////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////
assert(gamma.size()==Ls);
omega.resize(Ls); omega.resize(Ls);
bs.resize(Ls); bs.resize(Ls);
cs.resize(Ls); cs.resize(Ls);
@ -412,10 +414,11 @@ void CayleyFermion5D<Impl>::SetCoefficientsInternal(RealD zolo_hi,std::vector<Co
for(int i=0; i < Ls; i++){ for(int i=0; i < Ls; i++){
as[i] = 1.0; as[i] = 1.0;
omega[i] = gamma[i]*zolo_hi; //NB reciprocal relative to Chroma NEF code omega[i] = gamma[i]*zolo_hi; //NB reciprocal relative to Chroma NEF code
// assert(fabs(omega[i])>0.0);
bs[i] = 0.5*(bpc/omega[i] + bmc); bs[i] = 0.5*(bpc/omega[i] + bmc);
cs[i] = 0.5*(bpc/omega[i] - bmc); cs[i] = 0.5*(bpc/omega[i] - bmc);
} }
//////////////////////////////////////////////////////// ////////////////////////////////////////////////////////
// Constants for the preconditioned matrix Cayley form // Constants for the preconditioned matrix Cayley form
//////////////////////////////////////////////////////// ////////////////////////////////////////////////////////
@ -425,12 +428,12 @@ void CayleyFermion5D<Impl>::SetCoefficientsInternal(RealD zolo_hi,std::vector<Co
ceo.resize(Ls); ceo.resize(Ls);
for(int i=0;i<Ls;i++){ for(int i=0;i<Ls;i++){
bee[i]=as[i]*(bs[i]*(4.0-this->M5) +1.0); bee[i]=as[i]*(bs[i]*(4.0-this->M5) +1.0);
// assert(fabs(bee[i])>0.0);
cee[i]=as[i]*(1.0-cs[i]*(4.0-this->M5)); cee[i]=as[i]*(1.0-cs[i]*(4.0-this->M5));
beo[i]=as[i]*bs[i]; beo[i]=as[i]*bs[i];
ceo[i]=-as[i]*cs[i]; ceo[i]=-as[i]*cs[i];
} }
aee.resize(Ls); aee.resize(Ls);
aeo.resize(Ls); aeo.resize(Ls);
for(int i=0;i<Ls;i++){ for(int i=0;i<Ls;i++){
@ -474,14 +477,16 @@ void CayleyFermion5D<Impl>::SetCoefficientsInternal(RealD zolo_hi,std::vector<Co
{ {
Coeff_t delta_d=mass*cee[Ls-1]; Coeff_t delta_d=mass*cee[Ls-1];
for(int j=0;j<Ls-1;j++) delta_d *= cee[j]/bee[j]; for(int j=0;j<Ls-1;j++) {
// assert(fabs(bee[j])>0.0);
delta_d *= cee[j]/bee[j];
}
dee[Ls-1] += delta_d; dee[Ls-1] += delta_d;
} }
int inv=1; int inv=1;
this->MooeeInternalCompute(0,inv,MatpInv,MatmInv); this->MooeeInternalCompute(0,inv,MatpInv,MatmInv);
this->MooeeInternalCompute(1,inv,MatpInvDag,MatmInvDag); this->MooeeInternalCompute(1,inv,MatpInvDag,MatmInvDag);
} }
@ -495,7 +500,9 @@ void CayleyFermion5D<Impl>::MooeeInternalCompute(int dag, int inv,
GridBase *grid = this->FermionRedBlackGrid(); GridBase *grid = this->FermionRedBlackGrid();
int LLs = grid->_rdimensions[0]; int LLs = grid->_rdimensions[0];
if ( LLs == Ls ) return; // Not vectorised in 5th direction if ( LLs == Ls ) {
return; // Not vectorised in 5th direction
}
Eigen::MatrixXcd Pplus = Eigen::MatrixXcd::Zero(Ls,Ls); Eigen::MatrixXcd Pplus = Eigen::MatrixXcd::Zero(Ls,Ls);
Eigen::MatrixXcd Pminus = Eigen::MatrixXcd::Zero(Ls,Ls); Eigen::MatrixXcd Pminus = Eigen::MatrixXcd::Zero(Ls,Ls);

View File

@ -237,6 +237,13 @@ void CayleyFermion5D<Impl>::MooeeInvDag (const FermionField &psi, FermionField &
INSTANTIATE_DPERP(GparityWilsonImplD); INSTANTIATE_DPERP(GparityWilsonImplD);
INSTANTIATE_DPERP(ZWilsonImplF); INSTANTIATE_DPERP(ZWilsonImplF);
INSTANTIATE_DPERP(ZWilsonImplD); INSTANTIATE_DPERP(ZWilsonImplD);
INSTANTIATE_DPERP(WilsonImplFH);
INSTANTIATE_DPERP(WilsonImplDF);
INSTANTIATE_DPERP(GparityWilsonImplFH);
INSTANTIATE_DPERP(GparityWilsonImplDF);
INSTANTIATE_DPERP(ZWilsonImplFH);
INSTANTIATE_DPERP(ZWilsonImplDF);
#endif #endif
}} }}

View File

@ -29,7 +29,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
*************************************************************************************/ *************************************************************************************/
/* END LEGAL */ /* END LEGAL */
#include <Grid/Eigen/Dense> #include <Grid/Grid_Eigen_Dense.h>
#include <Grid/qcd/action/fermion/FermionCore.h> #include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/CayleyFermion5D.h> #include <Grid/qcd/action/fermion/CayleyFermion5D.h>
@ -137,6 +137,20 @@ template void CayleyFermion5D<WilsonImplF>::MooeeInternal(const FermionField &ps
template void CayleyFermion5D<WilsonImplD>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv); template void CayleyFermion5D<WilsonImplD>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
template void CayleyFermion5D<ZWilsonImplF>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv); template void CayleyFermion5D<ZWilsonImplF>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
template void CayleyFermion5D<ZWilsonImplD>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv); template void CayleyFermion5D<ZWilsonImplD>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
INSTANTIATE_DPERP(GparityWilsonImplFH);
INSTANTIATE_DPERP(GparityWilsonImplDF);
INSTANTIATE_DPERP(WilsonImplFH);
INSTANTIATE_DPERP(WilsonImplDF);
INSTANTIATE_DPERP(ZWilsonImplFH);
INSTANTIATE_DPERP(ZWilsonImplDF);
template void CayleyFermion5D<GparityWilsonImplFH>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
template void CayleyFermion5D<GparityWilsonImplDF>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
template void CayleyFermion5D<WilsonImplFH>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
template void CayleyFermion5D<WilsonImplDF>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
template void CayleyFermion5D<ZWilsonImplFH>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
template void CayleyFermion5D<ZWilsonImplDF>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
#endif #endif
}} }}

View File

@ -37,7 +37,6 @@ namespace Grid {
namespace QCD { namespace QCD {
// FIXME -- make a version of these routines with site loop outermost for cache reuse. // FIXME -- make a version of these routines with site loop outermost for cache reuse.
// Pminus fowards // Pminus fowards
// Pplus backwards // Pplus backwards
template<class Impl> template<class Impl>
@ -152,6 +151,13 @@ void CayleyFermion5D<Impl>::MooeeInvDag (const FermionField &psi, FermionField &
INSTANTIATE_DPERP(GparityWilsonImplD); INSTANTIATE_DPERP(GparityWilsonImplD);
INSTANTIATE_DPERP(ZWilsonImplF); INSTANTIATE_DPERP(ZWilsonImplF);
INSTANTIATE_DPERP(ZWilsonImplD); INSTANTIATE_DPERP(ZWilsonImplD);
INSTANTIATE_DPERP(WilsonImplFH);
INSTANTIATE_DPERP(WilsonImplDF);
INSTANTIATE_DPERP(GparityWilsonImplFH);
INSTANTIATE_DPERP(GparityWilsonImplDF);
INSTANTIATE_DPERP(ZWilsonImplFH);
INSTANTIATE_DPERP(ZWilsonImplDF);
#endif #endif
} }

View File

@ -808,10 +808,21 @@ INSTANTIATE_DPERP(DomainWallVec5dImplF);
INSTANTIATE_DPERP(ZDomainWallVec5dImplD); INSTANTIATE_DPERP(ZDomainWallVec5dImplD);
INSTANTIATE_DPERP(ZDomainWallVec5dImplF); INSTANTIATE_DPERP(ZDomainWallVec5dImplF);
INSTANTIATE_DPERP(DomainWallVec5dImplDF);
INSTANTIATE_DPERP(DomainWallVec5dImplFH);
INSTANTIATE_DPERP(ZDomainWallVec5dImplDF);
INSTANTIATE_DPERP(ZDomainWallVec5dImplFH);
template void CayleyFermion5D<DomainWallVec5dImplF>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv); template void CayleyFermion5D<DomainWallVec5dImplF>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
template void CayleyFermion5D<DomainWallVec5dImplD>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv); template void CayleyFermion5D<DomainWallVec5dImplD>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
template void CayleyFermion5D<ZDomainWallVec5dImplF>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv); template void CayleyFermion5D<ZDomainWallVec5dImplF>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
template void CayleyFermion5D<ZDomainWallVec5dImplD>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv); template void CayleyFermion5D<ZDomainWallVec5dImplD>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
template void CayleyFermion5D<DomainWallVec5dImplFH>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
template void CayleyFermion5D<DomainWallVec5dImplDF>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
template void CayleyFermion5D<ZDomainWallVec5dImplFH>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
template void CayleyFermion5D<ZDomainWallVec5dImplDF>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
}} }}

View File

@ -68,7 +68,7 @@ namespace Grid {
Approx::zolotarev_data *zdata = Approx::higham(eps,this->Ls);// eps is ignored for higham Approx::zolotarev_data *zdata = Approx::higham(eps,this->Ls);// eps is ignored for higham
assert(zdata->n==this->Ls); assert(zdata->n==this->Ls);
// std::cout<<GridLogMessage << "DomainWallFermion with Ls="<<this->Ls<<std::endl; std::cout<<GridLogMessage << "DomainWallFermion with Ls="<<this->Ls<<std::endl;
// Call base setter // Call base setter
this->SetCoefficientsTanh(zdata,1.0,0.0); this->SetCoefficientsTanh(zdata,1.0,0.0);

View File

@ -1,4 +1,4 @@
/************************************************************************************* /*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid Grid physics library, www.github.com/paboyle/Grid
@ -89,6 +89,10 @@ typedef WilsonFermion<WilsonImplR> WilsonFermionR;
typedef WilsonFermion<WilsonImplF> WilsonFermionF; typedef WilsonFermion<WilsonImplF> WilsonFermionF;
typedef WilsonFermion<WilsonImplD> WilsonFermionD; typedef WilsonFermion<WilsonImplD> WilsonFermionD;
typedef WilsonFermion<WilsonImplRL> WilsonFermionRL;
typedef WilsonFermion<WilsonImplFH> WilsonFermionFH;
typedef WilsonFermion<WilsonImplDF> WilsonFermionDF;
typedef WilsonFermion<WilsonAdjImplR> WilsonAdjFermionR; typedef WilsonFermion<WilsonAdjImplR> WilsonAdjFermionR;
typedef WilsonFermion<WilsonAdjImplF> WilsonAdjFermionF; typedef WilsonFermion<WilsonAdjImplF> WilsonAdjFermionF;
typedef WilsonFermion<WilsonAdjImplD> WilsonAdjFermionD; typedef WilsonFermion<WilsonAdjImplD> WilsonAdjFermionD;
@ -105,27 +109,50 @@ typedef DomainWallFermion<WilsonImplR> DomainWallFermionR;
typedef DomainWallFermion<WilsonImplF> DomainWallFermionF; typedef DomainWallFermion<WilsonImplF> DomainWallFermionF;
typedef DomainWallFermion<WilsonImplD> DomainWallFermionD; typedef DomainWallFermion<WilsonImplD> DomainWallFermionD;
typedef DomainWallFermion<WilsonImplRL> DomainWallFermionRL;
typedef DomainWallFermion<WilsonImplFH> DomainWallFermionFH;
typedef DomainWallFermion<WilsonImplDF> DomainWallFermionDF;
typedef MobiusFermion<WilsonImplR> MobiusFermionR; typedef MobiusFermion<WilsonImplR> MobiusFermionR;
typedef MobiusFermion<WilsonImplF> MobiusFermionF; typedef MobiusFermion<WilsonImplF> MobiusFermionF;
typedef MobiusFermion<WilsonImplD> MobiusFermionD; typedef MobiusFermion<WilsonImplD> MobiusFermionD;
typedef MobiusFermion<WilsonImplRL> MobiusFermionRL;
typedef MobiusFermion<WilsonImplFH> MobiusFermionFH;
typedef MobiusFermion<WilsonImplDF> MobiusFermionDF;
typedef ZMobiusFermion<ZWilsonImplR> ZMobiusFermionR; typedef ZMobiusFermion<ZWilsonImplR> ZMobiusFermionR;
typedef ZMobiusFermion<ZWilsonImplF> ZMobiusFermionF; typedef ZMobiusFermion<ZWilsonImplF> ZMobiusFermionF;
typedef ZMobiusFermion<ZWilsonImplD> ZMobiusFermionD; typedef ZMobiusFermion<ZWilsonImplD> ZMobiusFermionD;
typedef ZMobiusFermion<ZWilsonImplRL> ZMobiusFermionRL;
typedef ZMobiusFermion<ZWilsonImplFH> ZMobiusFermionFH;
typedef ZMobiusFermion<ZWilsonImplDF> ZMobiusFermionDF;
// Ls vectorised // Ls vectorised
typedef DomainWallFermion<DomainWallVec5dImplR> DomainWallFermionVec5dR; typedef DomainWallFermion<DomainWallVec5dImplR> DomainWallFermionVec5dR;
typedef DomainWallFermion<DomainWallVec5dImplF> DomainWallFermionVec5dF; typedef DomainWallFermion<DomainWallVec5dImplF> DomainWallFermionVec5dF;
typedef DomainWallFermion<DomainWallVec5dImplD> DomainWallFermionVec5dD; typedef DomainWallFermion<DomainWallVec5dImplD> DomainWallFermionVec5dD;
typedef DomainWallFermion<DomainWallVec5dImplRL> DomainWallFermionVec5dRL;
typedef DomainWallFermion<DomainWallVec5dImplFH> DomainWallFermionVec5dFH;
typedef DomainWallFermion<DomainWallVec5dImplDF> DomainWallFermionVec5dDF;
typedef MobiusFermion<DomainWallVec5dImplR> MobiusFermionVec5dR; typedef MobiusFermion<DomainWallVec5dImplR> MobiusFermionVec5dR;
typedef MobiusFermion<DomainWallVec5dImplF> MobiusFermionVec5dF; typedef MobiusFermion<DomainWallVec5dImplF> MobiusFermionVec5dF;
typedef MobiusFermion<DomainWallVec5dImplD> MobiusFermionVec5dD; typedef MobiusFermion<DomainWallVec5dImplD> MobiusFermionVec5dD;
typedef MobiusFermion<DomainWallVec5dImplRL> MobiusFermionVec5dRL;
typedef MobiusFermion<DomainWallVec5dImplFH> MobiusFermionVec5dFH;
typedef MobiusFermion<DomainWallVec5dImplDF> MobiusFermionVec5dDF;
typedef ZMobiusFermion<ZDomainWallVec5dImplR> ZMobiusFermionVec5dR; typedef ZMobiusFermion<ZDomainWallVec5dImplR> ZMobiusFermionVec5dR;
typedef ZMobiusFermion<ZDomainWallVec5dImplF> ZMobiusFermionVec5dF; typedef ZMobiusFermion<ZDomainWallVec5dImplF> ZMobiusFermionVec5dF;
typedef ZMobiusFermion<ZDomainWallVec5dImplD> ZMobiusFermionVec5dD; typedef ZMobiusFermion<ZDomainWallVec5dImplD> ZMobiusFermionVec5dD;
typedef ZMobiusFermion<ZDomainWallVec5dImplRL> ZMobiusFermionVec5dRL;
typedef ZMobiusFermion<ZDomainWallVec5dImplFH> ZMobiusFermionVec5dFH;
typedef ZMobiusFermion<ZDomainWallVec5dImplDF> ZMobiusFermionVec5dDF;
typedef ScaledShamirFermion<WilsonImplR> ScaledShamirFermionR; typedef ScaledShamirFermion<WilsonImplR> ScaledShamirFermionR;
typedef ScaledShamirFermion<WilsonImplF> ScaledShamirFermionF; typedef ScaledShamirFermion<WilsonImplF> ScaledShamirFermionF;
@ -166,17 +193,35 @@ typedef OverlapWilsonPartialFractionZolotarevFermion<WilsonImplD> OverlapWilsonP
typedef WilsonFermion<GparityWilsonImplR> GparityWilsonFermionR; typedef WilsonFermion<GparityWilsonImplR> GparityWilsonFermionR;
typedef WilsonFermion<GparityWilsonImplF> GparityWilsonFermionF; typedef WilsonFermion<GparityWilsonImplF> GparityWilsonFermionF;
typedef WilsonFermion<GparityWilsonImplD> GparityWilsonFermionD; typedef WilsonFermion<GparityWilsonImplD> GparityWilsonFermionD;
typedef WilsonFermion<GparityWilsonImplRL> GparityWilsonFermionRL;
typedef WilsonFermion<GparityWilsonImplFH> GparityWilsonFermionFH;
typedef WilsonFermion<GparityWilsonImplDF> GparityWilsonFermionDF;
typedef DomainWallFermion<GparityWilsonImplR> GparityDomainWallFermionR; typedef DomainWallFermion<GparityWilsonImplR> GparityDomainWallFermionR;
typedef DomainWallFermion<GparityWilsonImplF> GparityDomainWallFermionF; typedef DomainWallFermion<GparityWilsonImplF> GparityDomainWallFermionF;
typedef DomainWallFermion<GparityWilsonImplD> GparityDomainWallFermionD; typedef DomainWallFermion<GparityWilsonImplD> GparityDomainWallFermionD;
typedef DomainWallFermion<GparityWilsonImplRL> GparityDomainWallFermionRL;
typedef DomainWallFermion<GparityWilsonImplFH> GparityDomainWallFermionFH;
typedef DomainWallFermion<GparityWilsonImplDF> GparityDomainWallFermionDF;
typedef WilsonTMFermion<GparityWilsonImplR> GparityWilsonTMFermionR; typedef WilsonTMFermion<GparityWilsonImplR> GparityWilsonTMFermionR;
typedef WilsonTMFermion<GparityWilsonImplF> GparityWilsonTMFermionF; typedef WilsonTMFermion<GparityWilsonImplF> GparityWilsonTMFermionF;
typedef WilsonTMFermion<GparityWilsonImplD> GparityWilsonTMFermionD; typedef WilsonTMFermion<GparityWilsonImplD> GparityWilsonTMFermionD;
typedef WilsonTMFermion<GparityWilsonImplRL> GparityWilsonTMFermionRL;
typedef WilsonTMFermion<GparityWilsonImplFH> GparityWilsonTMFermionFH;
typedef WilsonTMFermion<GparityWilsonImplDF> GparityWilsonTMFermionDF;
typedef MobiusFermion<GparityWilsonImplR> GparityMobiusFermionR; typedef MobiusFermion<GparityWilsonImplR> GparityMobiusFermionR;
typedef MobiusFermion<GparityWilsonImplF> GparityMobiusFermionF; typedef MobiusFermion<GparityWilsonImplF> GparityMobiusFermionF;
typedef MobiusFermion<GparityWilsonImplD> GparityMobiusFermionD; typedef MobiusFermion<GparityWilsonImplD> GparityMobiusFermionD;
typedef MobiusFermion<GparityWilsonImplRL> GparityMobiusFermionRL;
typedef MobiusFermion<GparityWilsonImplFH> GparityMobiusFermionFH;
typedef MobiusFermion<GparityWilsonImplDF> GparityMobiusFermionDF;
typedef ImprovedStaggeredFermion<StaggeredImplR> ImprovedStaggeredFermionR; typedef ImprovedStaggeredFermion<StaggeredImplR> ImprovedStaggeredFermionR;
typedef ImprovedStaggeredFermion<StaggeredImplF> ImprovedStaggeredFermionF; typedef ImprovedStaggeredFermion<StaggeredImplF> ImprovedStaggeredFermionF;
typedef ImprovedStaggeredFermion<StaggeredImplD> ImprovedStaggeredFermionD; typedef ImprovedStaggeredFermion<StaggeredImplD> ImprovedStaggeredFermionD;

View File

@ -55,7 +55,14 @@ Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
template class A<ZWilsonImplF>; \ template class A<ZWilsonImplF>; \
template class A<ZWilsonImplD>; \ template class A<ZWilsonImplD>; \
template class A<GparityWilsonImplF>; \ template class A<GparityWilsonImplF>; \
template class A<GparityWilsonImplD>; template class A<GparityWilsonImplD>; \
template class A<WilsonImplFH>; \
template class A<WilsonImplDF>; \
template class A<ZWilsonImplFH>; \
template class A<ZWilsonImplDF>; \
template class A<GparityWilsonImplFH>; \
template class A<GparityWilsonImplDF>;
#define AdjointFermOpTemplateInstantiate(A) \ #define AdjointFermOpTemplateInstantiate(A) \
template class A<WilsonAdjImplF>; \ template class A<WilsonAdjImplF>; \
@ -69,7 +76,11 @@ Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
template class A<DomainWallVec5dImplF>; \ template class A<DomainWallVec5dImplF>; \
template class A<DomainWallVec5dImplD>; \ template class A<DomainWallVec5dImplD>; \
template class A<ZDomainWallVec5dImplF>; \ template class A<ZDomainWallVec5dImplF>; \
template class A<ZDomainWallVec5dImplD>; template class A<ZDomainWallVec5dImplD>; \
template class A<DomainWallVec5dImplFH>; \
template class A<DomainWallVec5dImplDF>; \
template class A<ZDomainWallVec5dImplFH>; \
template class A<ZDomainWallVec5dImplDF>;
#define FermOpTemplateInstantiate(A) \ #define FermOpTemplateInstantiate(A) \
FermOp4dVecTemplateInstantiate(A) \ FermOp4dVecTemplateInstantiate(A) \

View File

@ -35,7 +35,6 @@ directory
namespace Grid { namespace Grid {
namespace QCD { namespace QCD {
////////////////////////////////////////////// //////////////////////////////////////////////
// Template parameter class constructs to package // Template parameter class constructs to package
// externally control Fermion implementations // externally control Fermion implementations
@ -44,7 +43,7 @@ namespace QCD {
// Ultimately need Impl to always define types where XXX is opaque // Ultimately need Impl to always define types where XXX is opaque
// //
// typedef typename XXX Simd; // typedef typename XXX Simd;
// typedef typename XXX GaugeLinkField; // typedef typename XXX GaugeLinkField;
// typedef typename XXX GaugeField; // typedef typename XXX GaugeField;
// typedef typename XXX GaugeActField; // typedef typename XXX GaugeActField;
// typedef typename XXX FermionField; // typedef typename XXX FermionField;
@ -89,7 +88,53 @@ namespace QCD {
// //
// } // }
////////////////////////////////////////////// //////////////////////////////////////////////
template <class T> struct SamePrecisionMapper {
typedef T HigherPrecVector ;
typedef T LowerPrecVector ;
};
template <class T> struct LowerPrecisionMapper { };
template <> struct LowerPrecisionMapper<vRealF> {
typedef vRealF HigherPrecVector ;
typedef vRealH LowerPrecVector ;
};
template <> struct LowerPrecisionMapper<vRealD> {
typedef vRealD HigherPrecVector ;
typedef vRealF LowerPrecVector ;
};
template <> struct LowerPrecisionMapper<vComplexF> {
typedef vComplexF HigherPrecVector ;
typedef vComplexH LowerPrecVector ;
};
template <> struct LowerPrecisionMapper<vComplexD> {
typedef vComplexD HigherPrecVector ;
typedef vComplexF LowerPrecVector ;
};
struct CoeffReal {
public:
typedef RealD _Coeff_t;
static const int Nhcs = 2;
template<class Simd> using PrecisionMapper = SamePrecisionMapper<Simd>;
};
struct CoeffRealHalfComms {
public:
typedef RealD _Coeff_t;
static const int Nhcs = 1;
template<class Simd> using PrecisionMapper = LowerPrecisionMapper<Simd>;
};
struct CoeffComplex {
public:
typedef ComplexD _Coeff_t;
static const int Nhcs = 2;
template<class Simd> using PrecisionMapper = SamePrecisionMapper<Simd>;
};
struct CoeffComplexHalfComms {
public:
typedef ComplexD _Coeff_t;
static const int Nhcs = 1;
template<class Simd> using PrecisionMapper = LowerPrecisionMapper<Simd>;
};
//////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////
// Implementation dependent fermion types // Implementation dependent fermion types
@ -108,58 +153,63 @@ namespace QCD {
typedef typename Impl::Coeff_t Coeff_t; \ typedef typename Impl::Coeff_t Coeff_t; \
#define INHERIT_IMPL_TYPES(Base) \ #define INHERIT_IMPL_TYPES(Base) \
INHERIT_GIMPL_TYPES(Base) \ INHERIT_GIMPL_TYPES(Base) \
INHERIT_FIMPL_TYPES(Base) INHERIT_FIMPL_TYPES(Base)
///////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////
// Single flavour four spinors with colour index // Single flavour four spinors with colour index
///////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////
template <class S, class Representation = FundamentalRepresentation,class _Coeff_t = RealD > template <class S, class Representation = FundamentalRepresentation,class Options = CoeffReal >
class WilsonImpl : public PeriodicGaugeImpl<GaugeImplTypes<S, Representation::Dimension > > { class WilsonImpl : public PeriodicGaugeImpl<GaugeImplTypes<S, Representation::Dimension > > {
public: public:
static const int Dimension = Representation::Dimension; static const int Dimension = Representation::Dimension;
static const bool LsVectorised=false;
static const int Nhcs = Options::Nhcs;
typedef PeriodicGaugeImpl<GaugeImplTypes<S, Dimension > > Gimpl; typedef PeriodicGaugeImpl<GaugeImplTypes<S, Dimension > > Gimpl;
INHERIT_GIMPL_TYPES(Gimpl);
//Necessary? //Necessary?
constexpr bool is_fundamental() const{return Dimension == Nc ? 1 : 0;} constexpr bool is_fundamental() const{return Dimension == Nc ? 1 : 0;}
const bool LsVectorised=false; typedef typename Options::_Coeff_t Coeff_t;
typedef _Coeff_t Coeff_t; typedef typename Options::template PrecisionMapper<Simd>::LowerPrecVector SimdL;
INHERIT_GIMPL_TYPES(Gimpl);
template <typename vtype> using iImplSpinor = iScalar<iVector<iVector<vtype, Dimension>, Ns> >; template <typename vtype> using iImplSpinor = iScalar<iVector<iVector<vtype, Dimension>, Ns> >;
template <typename vtype> using iImplPropagator = iScalar<iMatrix<iMatrix<vtype, Dimension>, Ns> >; template <typename vtype> using iImplPropagator = iScalar<iMatrix<iMatrix<vtype, Dimension>, Ns> >;
template <typename vtype> using iImplHalfSpinor = iScalar<iVector<iVector<vtype, Dimension>, Nhs> >; template <typename vtype> using iImplHalfSpinor = iScalar<iVector<iVector<vtype, Dimension>, Nhs> >;
template <typename vtype> using iImplHalfCommSpinor = iScalar<iVector<iVector<vtype, Dimension>, Nhcs> >;
template <typename vtype> using iImplDoubledGaugeField = iVector<iScalar<iMatrix<vtype, Dimension> >, Nds>; template <typename vtype> using iImplDoubledGaugeField = iVector<iScalar<iMatrix<vtype, Dimension> >, Nds>;
typedef iImplSpinor<Simd> SiteSpinor; typedef iImplSpinor<Simd> SiteSpinor;
typedef iImplPropagator<Simd> SitePropagator; typedef iImplPropagator<Simd> SitePropagator;
typedef iImplHalfSpinor<Simd> SiteHalfSpinor; typedef iImplHalfSpinor<Simd> SiteHalfSpinor;
typedef iImplHalfCommSpinor<SimdL> SiteHalfCommSpinor;
typedef iImplDoubledGaugeField<Simd> SiteDoubledGaugeField; typedef iImplDoubledGaugeField<Simd> SiteDoubledGaugeField;
typedef Lattice<SiteSpinor> FermionField; typedef Lattice<SiteSpinor> FermionField;
typedef Lattice<SitePropagator> PropagatorField; typedef Lattice<SitePropagator> PropagatorField;
typedef Lattice<SiteDoubledGaugeField> DoubledGaugeField; typedef Lattice<SiteDoubledGaugeField> DoubledGaugeField;
typedef WilsonCompressor<SiteHalfSpinor, SiteSpinor> Compressor; typedef WilsonCompressor<SiteHalfCommSpinor,SiteHalfSpinor, SiteSpinor> Compressor;
typedef WilsonImplParams ImplParams; typedef WilsonImplParams ImplParams;
typedef WilsonStencil<SiteSpinor, SiteHalfSpinor> StencilImpl; typedef WilsonStencil<SiteSpinor, SiteHalfSpinor> StencilImpl;
ImplParams Params; ImplParams Params;
WilsonImpl(const ImplParams &p = ImplParams()) : Params(p){}; WilsonImpl(const ImplParams &p = ImplParams()) : Params(p){
assert(Params.boundary_phases.size() == Nd);
};
bool overlapCommsCompute(void) { return Params.overlapCommsCompute; }; bool overlapCommsCompute(void) { return Params.overlapCommsCompute; };
inline void multLink(SiteHalfSpinor &phi, inline void multLink(SiteHalfSpinor &phi,
const SiteDoubledGaugeField &U, const SiteDoubledGaugeField &U,
const SiteHalfSpinor &chi, const SiteHalfSpinor &chi,
int mu, int mu,
StencilEntry *SE, StencilEntry *SE,
StencilImpl &St) { StencilImpl &St) {
mult(&phi(), &U(mu), &chi()); mult(&phi(), &U(mu), &chi());
} }
@ -169,16 +219,34 @@ namespace QCD {
} }
inline void DoubleStore(GridBase *GaugeGrid, inline void DoubleStore(GridBase *GaugeGrid,
DoubledGaugeField &Uds, DoubledGaugeField &Uds,
const GaugeField &Umu) { const GaugeField &Umu)
{
typedef typename Simd::scalar_type scalar_type;
conformable(Uds._grid, GaugeGrid); conformable(Uds._grid, GaugeGrid);
conformable(Umu._grid, GaugeGrid); conformable(Umu._grid, GaugeGrid);
GaugeLinkField U(GaugeGrid); GaugeLinkField U(GaugeGrid);
GaugeLinkField tmp(GaugeGrid);
Lattice<iScalar<vInteger> > coor(GaugeGrid);
for (int mu = 0; mu < Nd; mu++) { for (int mu = 0; mu < Nd; mu++) {
U = PeekIndex<LorentzIndex>(Umu, mu);
PokeIndex<LorentzIndex>(Uds, U, mu); auto pha = Params.boundary_phases[mu];
U = adj(Cshift(U, mu, -1)); scalar_type phase( real(pha),imag(pha) );
PokeIndex<LorentzIndex>(Uds, U, mu + 4);
int Lmu = GaugeGrid->GlobalDimensions()[mu] - 1;
LatticeCoordinate(coor, mu);
U = PeekIndex<LorentzIndex>(Umu, mu);
tmp = where(coor == Lmu, phase * U, U);
PokeIndex<LorentzIndex>(Uds, tmp, mu);
U = adj(Cshift(U, mu, -1));
U = where(coor == 0, conjugate(phase) * U, U);
PokeIndex<LorentzIndex>(Uds, U, mu + 4);
} }
} }
@ -195,11 +263,11 @@ namespace QCD {
tmp = zero; tmp = zero;
parallel_for(int sss=0;sss<tmp._grid->oSites();sss++){ parallel_for(int sss=0;sss<tmp._grid->oSites();sss++){
int sU=sss; int sU=sss;
for(int s=0;s<Ls;s++){ for(int s=0;s<Ls;s++){
int sF = s+Ls*sU; int sF = s+Ls*sU;
tmp[sU] = tmp[sU]+ traceIndex<SpinIndex>(outerProduct(Btilde[sF],Atilde[sF])); // ordering here tmp[sU] = tmp[sU]+ traceIndex<SpinIndex>(outerProduct(Btilde[sF],Atilde[sF])); // ordering here
} }
} }
PokeIndex<LorentzIndex>(mat,tmp,mu); PokeIndex<LorentzIndex>(mat,tmp,mu);
@ -209,31 +277,34 @@ namespace QCD {
//////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////
// Single flavour four spinors with colour index, 5d redblack // Single flavour four spinors with colour index, 5d redblack
//////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////
template<class S,int Nrepresentation=Nc, class Options=CoeffReal>
template<class S,int Nrepresentation=Nc,class _Coeff_t = RealD>
class DomainWallVec5dImpl : public PeriodicGaugeImpl< GaugeImplTypes< S,Nrepresentation> > { class DomainWallVec5dImpl : public PeriodicGaugeImpl< GaugeImplTypes< S,Nrepresentation> > {
public: public:
static const int Dimension = Nrepresentation;
const bool LsVectorised=true;
typedef _Coeff_t Coeff_t;
typedef PeriodicGaugeImpl<GaugeImplTypes<S, Nrepresentation> > Gimpl; typedef PeriodicGaugeImpl<GaugeImplTypes<S, Nrepresentation> > Gimpl;
INHERIT_GIMPL_TYPES(Gimpl); INHERIT_GIMPL_TYPES(Gimpl);
static const int Dimension = Nrepresentation;
static const bool LsVectorised=true;
static const int Nhcs = Options::Nhcs;
typedef typename Options::_Coeff_t Coeff_t;
typedef typename Options::template PrecisionMapper<Simd>::LowerPrecVector SimdL;
template <typename vtype> using iImplSpinor = iScalar<iVector<iVector<vtype, Nrepresentation>, Ns> >; template <typename vtype> using iImplSpinor = iScalar<iVector<iVector<vtype, Nrepresentation>, Ns> >;
template <typename vtype> using iImplPropagator = iScalar<iMatrix<iMatrix<vtype, Nrepresentation>, Ns> >; template <typename vtype> using iImplPropagator = iScalar<iMatrix<iMatrix<vtype, Nrepresentation>, Ns> >;
template <typename vtype> using iImplHalfSpinor = iScalar<iVector<iVector<vtype, Nrepresentation>, Nhs> >; template <typename vtype> using iImplHalfSpinor = iScalar<iVector<iVector<vtype, Nrepresentation>, Nhs> >;
template <typename vtype> using iImplHalfCommSpinor = iScalar<iVector<iVector<vtype, Nrepresentation>, Nhcs> >;
template <typename vtype> using iImplDoubledGaugeField = iVector<iScalar<iMatrix<vtype, Nrepresentation> >, Nds>; template <typename vtype> using iImplDoubledGaugeField = iVector<iScalar<iMatrix<vtype, Nrepresentation> >, Nds>;
template <typename vtype> using iImplGaugeField = iVector<iScalar<iMatrix<vtype, Nrepresentation> >, Nd>; template <typename vtype> using iImplGaugeField = iVector<iScalar<iMatrix<vtype, Nrepresentation> >, Nd>;
template <typename vtype> using iImplGaugeLink = iScalar<iScalar<iMatrix<vtype, Nrepresentation> > >; template <typename vtype> using iImplGaugeLink = iScalar<iScalar<iMatrix<vtype, Nrepresentation> > >;
typedef iImplSpinor<Simd> SiteSpinor; typedef iImplSpinor<Simd> SiteSpinor;
typedef iImplPropagator<Simd> SitePropagator; typedef iImplPropagator<Simd> SitePropagator;
typedef iImplHalfSpinor<Simd> SiteHalfSpinor; typedef iImplHalfSpinor<Simd> SiteHalfSpinor;
typedef Lattice<SiteSpinor> FermionField; typedef iImplHalfCommSpinor<SimdL> SiteHalfCommSpinor;
typedef Lattice<SitePropagator> PropagatorField; typedef Lattice<SiteSpinor> FermionField;
typedef Lattice<SitePropagator> PropagatorField;
///////////////////////////////////////////////// /////////////////////////////////////////////////
// Make the doubled gauge field a *scalar* // Make the doubled gauge field a *scalar*
@ -241,9 +312,9 @@ class DomainWallVec5dImpl : public PeriodicGaugeImpl< GaugeImplTypes< S,Nrepres
typedef iImplDoubledGaugeField<typename Simd::scalar_type> SiteDoubledGaugeField; // This is a scalar typedef iImplDoubledGaugeField<typename Simd::scalar_type> SiteDoubledGaugeField; // This is a scalar
typedef iImplGaugeField<typename Simd::scalar_type> SiteScalarGaugeField; // scalar typedef iImplGaugeField<typename Simd::scalar_type> SiteScalarGaugeField; // scalar
typedef iImplGaugeLink<typename Simd::scalar_type> SiteScalarGaugeLink; // scalar typedef iImplGaugeLink<typename Simd::scalar_type> SiteScalarGaugeLink; // scalar
typedef Lattice<SiteDoubledGaugeField> DoubledGaugeField; typedef Lattice<SiteDoubledGaugeField> DoubledGaugeField;
typedef WilsonCompressor<SiteHalfSpinor, SiteSpinor> Compressor; typedef WilsonCompressor<SiteHalfCommSpinor,SiteHalfSpinor, SiteSpinor> Compressor;
typedef WilsonImplParams ImplParams; typedef WilsonImplParams ImplParams;
typedef WilsonStencil<SiteSpinor, SiteHalfSpinor> StencilImpl; typedef WilsonStencil<SiteSpinor, SiteHalfSpinor> StencilImpl;
@ -259,12 +330,12 @@ class DomainWallVec5dImpl : public PeriodicGaugeImpl< GaugeImplTypes< S,Nrepres
} }
inline void multLink(SiteHalfSpinor &phi, const SiteDoubledGaugeField &U, inline void multLink(SiteHalfSpinor &phi, const SiteDoubledGaugeField &U,
const SiteHalfSpinor &chi, int mu, StencilEntry *SE, const SiteHalfSpinor &chi, int mu, StencilEntry *SE,
StencilImpl &St) { StencilImpl &St) {
SiteGaugeLink UU; SiteGaugeLink UU;
for (int i = 0; i < Nrepresentation; i++) { for (int i = 0; i < Nrepresentation; i++) {
for (int j = 0; j < Nrepresentation; j++) { for (int j = 0; j < Nrepresentation; j++) {
vsplat(UU()()(i, j), U(mu)()(i, j)); vsplat(UU()()(i, j), U(mu)()(i, j));
} }
} }
mult(&phi(), &UU(), &chi()); mult(&phi(), &UU(), &chi());
@ -301,45 +372,90 @@ class DomainWallVec5dImpl : public PeriodicGaugeImpl< GaugeImplTypes< S,Nrepres
{ {
assert(0); assert(0);
} }
inline void InsertForce5D(GaugeField &mat, FermionField &Btilde,FermionField &Atilde, int mu) inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField &Atilde, int mu) {
{
assert(0); assert(0);
// Following lines to be revised after Peter's addition of half prec
// missing put lane...
/*
typedef decltype(traceIndex<SpinIndex>(outerProduct(Btilde[0], Atilde[0]))) result_type;
unsigned int LLs = Btilde._grid->_rdimensions[0];
conformable(Atilde._grid,Btilde._grid);
GridBase* grid = mat._grid;
GridBase* Bgrid = Btilde._grid;
unsigned int dimU = grid->Nd();
unsigned int dimF = Bgrid->Nd();
GaugeLinkField tmp(grid);
tmp = zero;
// FIXME
// Current implementation works, thread safe, probably suboptimal
// Passing through the local coordinate for grid transformation
// the force grid is in general very different from the Ls vectorized grid
PARALLEL_FOR_LOOP
for (int so = 0; so < grid->oSites(); so++) {
std::vector<typename result_type::scalar_object> vres(Bgrid->Nsimd());
std::vector<int> ocoor; grid->oCoorFromOindex(ocoor,so);
for (int si = 0; si < tmp._grid->iSites(); si++){
typename result_type::scalar_object scalar_object; scalar_object = zero;
std::vector<int> local_coor;
std::vector<int> icoor; grid->iCoorFromIindex(icoor,si);
grid->InOutCoorToLocalCoor(ocoor, icoor, local_coor);
for (int s = 0; s < LLs; s++) {
std::vector<int> slocal_coor(dimF);
slocal_coor[0] = s;
for (int s4d = 1; s4d< dimF; s4d++) slocal_coor[s4d] = local_coor[s4d-1];
int sF = Bgrid->oIndexReduced(slocal_coor);
assert(sF < Bgrid->oSites());
extract(traceIndex<SpinIndex>(outerProduct(Btilde[sF], Atilde[sF])), vres);
// sum across the 5d dimension
for (auto v : vres) scalar_object += v;
}
tmp._odata[so].putlane(scalar_object, si);
}
}
PokeIndex<LorentzIndex>(mat, tmp, mu);
*/
} }
}; };
//////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////
// Flavour doubled spinors; is Gparity the only? what about C*? // Flavour doubled spinors; is Gparity the only? what about C*?
//////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////
template <class S, int Nrepresentation, class Options=CoeffReal>
template <class S, int Nrepresentation,class _Coeff_t = RealD>
class GparityWilsonImpl : public ConjugateGaugeImpl<GaugeImplTypes<S, Nrepresentation> > { class GparityWilsonImpl : public ConjugateGaugeImpl<GaugeImplTypes<S, Nrepresentation> > {
public: public:
static const int Dimension = Nrepresentation; static const int Dimension = Nrepresentation;
static const int Nhcs = Options::Nhcs;
static const bool LsVectorised=false;
const bool LsVectorised=false;
typedef _Coeff_t Coeff_t;
typedef ConjugateGaugeImpl< GaugeImplTypes<S,Nrepresentation> > Gimpl; typedef ConjugateGaugeImpl< GaugeImplTypes<S,Nrepresentation> > Gimpl;
INHERIT_GIMPL_TYPES(Gimpl); INHERIT_GIMPL_TYPES(Gimpl);
typedef typename Options::_Coeff_t Coeff_t;
typedef typename Options::template PrecisionMapper<Simd>::LowerPrecVector SimdL;
template <typename vtype> using iImplSpinor = iVector<iVector<iVector<vtype, Nrepresentation>, Ns>, Ngp>; template <typename vtype> using iImplSpinor = iVector<iVector<iVector<vtype, Nrepresentation>, Ns>, Ngp>;
template <typename vtype> using iImplPropagator = iVector<iMatrix<iMatrix<vtype, Nrepresentation>, Ns>, Ngp >; template <typename vtype> using iImplPropagator = iVector<iMatrix<iMatrix<vtype, Nrepresentation>, Ns>, Ngp>;
template <typename vtype> using iImplHalfSpinor = iVector<iVector<iVector<vtype, Nrepresentation>, Nhs>, Ngp>; template <typename vtype> using iImplHalfSpinor = iVector<iVector<iVector<vtype, Nrepresentation>, Nhs>, Ngp>;
template <typename vtype> using iImplHalfCommSpinor = iVector<iVector<iVector<vtype, Nrepresentation>, Nhcs>, Ngp>;
template <typename vtype> using iImplDoubledGaugeField = iVector<iVector<iScalar<iMatrix<vtype, Nrepresentation> >, Nds>, Ngp>; template <typename vtype> using iImplDoubledGaugeField = iVector<iVector<iScalar<iMatrix<vtype, Nrepresentation> >, Nds>, Ngp>;
typedef iImplSpinor<Simd> SiteSpinor; typedef iImplSpinor<Simd> SiteSpinor;
typedef iImplPropagator<Simd> SitePropagator; typedef iImplPropagator<Simd> SitePropagator;
typedef iImplHalfSpinor<Simd> SiteHalfSpinor; typedef iImplHalfSpinor<Simd> SiteHalfSpinor;
typedef iImplHalfCommSpinor<SimdL> SiteHalfCommSpinor;
typedef iImplDoubledGaugeField<Simd> SiteDoubledGaugeField; typedef iImplDoubledGaugeField<Simd> SiteDoubledGaugeField;
typedef Lattice<SiteSpinor> FermionField; typedef Lattice<SiteSpinor> FermionField;
typedef Lattice<SitePropagator> PropagatorField; typedef Lattice<SitePropagator> PropagatorField;
typedef Lattice<SiteDoubledGaugeField> DoubledGaugeField; typedef Lattice<SiteDoubledGaugeField> DoubledGaugeField;
typedef WilsonCompressor<SiteHalfSpinor, SiteSpinor> Compressor; typedef WilsonCompressor<SiteHalfCommSpinor,SiteHalfSpinor, SiteSpinor> Compressor;
typedef WilsonStencil<SiteSpinor, SiteHalfSpinor> StencilImpl; typedef WilsonStencil<SiteSpinor, SiteHalfSpinor> StencilImpl;
typedef GparityWilsonImplParams ImplParams; typedef GparityWilsonImplParams ImplParams;
@ -353,19 +469,19 @@ class GparityWilsonImpl : public ConjugateGaugeImpl<GaugeImplTypes<S, Nrepresent
// provide the multiply by link that is differentiated between Gparity (with // provide the multiply by link that is differentiated between Gparity (with
// flavour index) and non-Gparity // flavour index) and non-Gparity
inline void multLink(SiteHalfSpinor &phi, const SiteDoubledGaugeField &U, inline void multLink(SiteHalfSpinor &phi, const SiteDoubledGaugeField &U,
const SiteHalfSpinor &chi, int mu, StencilEntry *SE, const SiteHalfSpinor &chi, int mu, StencilEntry *SE,
StencilImpl &St) { StencilImpl &St) {
typedef SiteHalfSpinor vobj; typedef SiteHalfSpinor vobj;
typedef typename SiteHalfSpinor::scalar_object sobj; typedef typename SiteHalfSpinor::scalar_object sobj;
vobj vtmp; vobj vtmp;
sobj stmp; sobj stmp;
GridBase *grid = St._grid; GridBase *grid = St._grid;
const int Nsimd = grid->Nsimd(); const int Nsimd = grid->Nsimd();
int direction = St._directions[mu]; int direction = St._directions[mu];
int distance = St._distances[mu]; int distance = St._distances[mu];
int ptype = St._permute_type[mu]; int ptype = St._permute_type[mu];
@ -373,13 +489,13 @@ class GparityWilsonImpl : public ConjugateGaugeImpl<GaugeImplTypes<S, Nrepresent
// Fixme X.Y.Z.T hardcode in stencil // Fixme X.Y.Z.T hardcode in stencil
int mmu = mu % Nd; int mmu = mu % Nd;
// assert our assumptions // assert our assumptions
assert((distance == 1) || (distance == -1)); // nearest neighbour stencil hard code assert((distance == 1) || (distance == -1)); // nearest neighbour stencil hard code
assert((sl == 1) || (sl == 2)); assert((sl == 1) || (sl == 2));
std::vector<int> icoor; std::vector<int> icoor;
if ( SE->_around_the_world && Params.twists[mmu] ) { if ( SE->_around_the_world && Params.twists[mmu] ) {
if ( sl == 2 ) { if ( sl == 2 ) {
@ -389,25 +505,25 @@ class GparityWilsonImpl : public ConjugateGaugeImpl<GaugeImplTypes<S, Nrepresent
extract(chi,vals); extract(chi,vals);
for(int s=0;s<Nsimd;s++){ for(int s=0;s<Nsimd;s++){
grid->iCoorFromIindex(icoor,s); grid->iCoorFromIindex(icoor,s);
assert((icoor[direction]==0)||(icoor[direction]==1)); assert((icoor[direction]==0)||(icoor[direction]==1));
int permute_lane; int permute_lane;
if ( distance == 1) { if ( distance == 1) {
permute_lane = icoor[direction]?1:0; permute_lane = icoor[direction]?1:0;
} else { } else {
permute_lane = icoor[direction]?0:1; permute_lane = icoor[direction]?0:1;
} }
if ( permute_lane ) { if ( permute_lane ) {
stmp(0) = vals[s](1); stmp(0) = vals[s](1);
stmp(1) = vals[s](0); stmp(1) = vals[s](0);
vals[s] = stmp; vals[s] = stmp;
} }
} }
merge(vtmp,vals); merge(vtmp,vals);
} else { } else {
vtmp(0) = chi(1); vtmp(0) = chi(1);
vtmp(1) = chi(0); vtmp(1) = chi(0);
@ -432,11 +548,11 @@ class GparityWilsonImpl : public ConjugateGaugeImpl<GaugeImplTypes<S, Nrepresent
GaugeLinkField Uconj(GaugeGrid); GaugeLinkField Uconj(GaugeGrid);
Lattice<iScalar<vInteger> > coor(GaugeGrid); Lattice<iScalar<vInteger> > coor(GaugeGrid);
for(int mu=0;mu<Nd;mu++){ for(int mu=0;mu<Nd;mu++){
LatticeCoordinate(coor,mu); LatticeCoordinate(coor,mu);
U = PeekIndex<LorentzIndex>(Umu,mu); U = PeekIndex<LorentzIndex>(Umu,mu);
Uconj = conjugate(U); Uconj = conjugate(U);
@ -450,7 +566,7 @@ class GparityWilsonImpl : public ConjugateGaugeImpl<GaugeImplTypes<S, Nrepresent
Uds[ss](0)(mu) = U[ss](); Uds[ss](0)(mu) = U[ss]();
Uds[ss](1)(mu) = Uconj[ss](); Uds[ss](1)(mu) = Uconj[ss]();
} }
U = adj(Cshift(U ,mu,-1)); // correct except for spanning the boundary U = adj(Cshift(U ,mu,-1)); // correct except for spanning the boundary
Uconj = adj(Cshift(Uconj,mu,-1)); Uconj = adj(Cshift(Uconj,mu,-1));
@ -458,11 +574,12 @@ class GparityWilsonImpl : public ConjugateGaugeImpl<GaugeImplTypes<S, Nrepresent
if ( Params.twists[mu] ) { if ( Params.twists[mu] ) {
Utmp = where(coor==0,Uconj,Utmp); Utmp = where(coor==0,Uconj,Utmp);
} }
parallel_for(auto ss=U.begin();ss<U.end();ss++){ parallel_for(auto ss=U.begin();ss<U.end();ss++){
Uds[ss](0)(mu+4) = Utmp[ss](); Uds[ss](0)(mu+4) = Utmp[ss]();
} }
Utmp = Uconj; Utmp = Uconj;
if ( Params.twists[mu] ) { if ( Params.twists[mu] ) {
Utmp = where(coor==0,U,Utmp); Utmp = where(coor==0,U,Utmp);
@ -471,11 +588,10 @@ class GparityWilsonImpl : public ConjugateGaugeImpl<GaugeImplTypes<S, Nrepresent
parallel_for(auto ss=U.begin();ss<U.end();ss++){ parallel_for(auto ss=U.begin();ss<U.end();ss++){
Uds[ss](1)(mu+4) = Utmp[ss](); Uds[ss](1)(mu+4) = Utmp[ss]();
} }
} }
} }
inline void InsertForce4D(GaugeField &mat, FermionField &Btilde, FermionField &A, int mu) { inline void InsertForce4D(GaugeField &mat, FermionField &Btilde, FermionField &A, int mu) {
// DhopDir provides U or Uconj depending on coor/flavour. // DhopDir provides U or Uconj depending on coor/flavour.
@ -483,7 +599,7 @@ class GparityWilsonImpl : public ConjugateGaugeImpl<GaugeImplTypes<S, Nrepresent
// use lorentz for flavour as hack. // use lorentz for flavour as hack.
auto tmp = TraceIndex<SpinIndex>(outerProduct(Btilde, A)); auto tmp = TraceIndex<SpinIndex>(outerProduct(Btilde, A));
parallel_for(auto ss = tmp.begin(); ss < tmp.end(); ss++) { parallel_for(auto ss = tmp.begin(); ss < tmp.end(); ss++) {
link[ss]() = tmp[ss](0, 0) - conjugate(tmp[ss](1, 1)); link[ss]() = tmp[ss](0, 0) + conjugate(tmp[ss](1, 1));
} }
PokeIndex<LorentzIndex>(mat, link, mu); PokeIndex<LorentzIndex>(mat, link, mu);
return; return;
@ -492,7 +608,7 @@ class GparityWilsonImpl : public ConjugateGaugeImpl<GaugeImplTypes<S, Nrepresent
inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField &Atilde, int mu) { inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField &Atilde, int mu) {
int Ls = Btilde._grid->_fdimensions[0]; int Ls = Btilde._grid->_fdimensions[0];
GaugeLinkField tmp(mat._grid); GaugeLinkField tmp(mat._grid);
tmp = zero; tmp = zero;
parallel_for(int ss = 0; ss < tmp._grid->oSites(); ss++) { parallel_for(int ss = 0; ss < tmp._grid->oSites(); ss++) {
@ -508,23 +624,22 @@ class GparityWilsonImpl : public ConjugateGaugeImpl<GaugeImplTypes<S, Nrepresent
}; };
/////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////// // Single flavour one component spinors with colour index
// Single flavour one component spinors with colour index /////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////// template <class S, class Representation = FundamentalRepresentation >
template <class S, class Representation = FundamentalRepresentation > class StaggeredImpl : public PeriodicGaugeImpl<GaugeImplTypes<S, Representation::Dimension > > {
class StaggeredImpl : public PeriodicGaugeImpl<GaugeImplTypes<S, Representation::Dimension > > {
public: public:
typedef RealD _Coeff_t ; typedef RealD _Coeff_t ;
static const int Dimension = Representation::Dimension; static const int Dimension = Representation::Dimension;
static const bool LsVectorised=false;
typedef PeriodicGaugeImpl<GaugeImplTypes<S, Dimension > > Gimpl; typedef PeriodicGaugeImpl<GaugeImplTypes<S, Dimension > > Gimpl;
//Necessary? //Necessary?
constexpr bool is_fundamental() const{return Dimension == Nc ? 1 : 0;} constexpr bool is_fundamental() const{return Dimension == Nc ? 1 : 0;}
const bool LsVectorised=false;
typedef _Coeff_t Coeff_t; typedef _Coeff_t Coeff_t;
INHERIT_GIMPL_TYPES(Gimpl); INHERIT_GIMPL_TYPES(Gimpl);
@ -641,8 +756,6 @@ class GparityWilsonImpl : public ConjugateGaugeImpl<GaugeImplTypes<S, Nrepresent
} }
}; };
///////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////
// Single flavour one component spinors with colour index. 5d vec // Single flavour one component spinors with colour index. 5d vec
///////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////
@ -651,16 +764,14 @@ class GparityWilsonImpl : public ConjugateGaugeImpl<GaugeImplTypes<S, Nrepresent
public: public:
typedef RealD _Coeff_t ;
static const int Dimension = Representation::Dimension; static const int Dimension = Representation::Dimension;
static const bool LsVectorised=true;
typedef RealD Coeff_t ;
typedef PeriodicGaugeImpl<GaugeImplTypes<S, Dimension > > Gimpl; typedef PeriodicGaugeImpl<GaugeImplTypes<S, Dimension > > Gimpl;
//Necessary? //Necessary?
constexpr bool is_fundamental() const{return Dimension == Nc ? 1 : 0;} constexpr bool is_fundamental() const{return Dimension == Nc ? 1 : 0;}
const bool LsVectorised=true;
typedef _Coeff_t Coeff_t;
INHERIT_GIMPL_TYPES(Gimpl); INHERIT_GIMPL_TYPES(Gimpl);
@ -823,43 +934,61 @@ class GparityWilsonImpl : public ConjugateGaugeImpl<GaugeImplTypes<S, Nrepresent
} }
}; };
typedef WilsonImpl<vComplex, FundamentalRepresentation, CoeffReal > WilsonImplR; // Real.. whichever prec
typedef WilsonImpl<vComplexF, FundamentalRepresentation, CoeffReal > WilsonImplF; // Float
typedef WilsonImpl<vComplexD, FundamentalRepresentation, CoeffReal > WilsonImplD; // Double
typedef WilsonImpl<vComplex, FundamentalRepresentation, CoeffRealHalfComms > WilsonImplRL; // Real.. whichever prec
typedef WilsonImpl<vComplexF, FundamentalRepresentation, CoeffRealHalfComms > WilsonImplFH; // Float
typedef WilsonImpl<vComplexD, FundamentalRepresentation, CoeffRealHalfComms > WilsonImplDF; // Double
typedef WilsonImpl<vComplex, FundamentalRepresentation > WilsonImplR; // Real.. whichever prec typedef WilsonImpl<vComplex, FundamentalRepresentation, CoeffComplex > ZWilsonImplR; // Real.. whichever prec
typedef WilsonImpl<vComplexF, FundamentalRepresentation > WilsonImplF; // Float typedef WilsonImpl<vComplexF, FundamentalRepresentation, CoeffComplex > ZWilsonImplF; // Float
typedef WilsonImpl<vComplexD, FundamentalRepresentation > WilsonImplD; // Double typedef WilsonImpl<vComplexD, FundamentalRepresentation, CoeffComplex > ZWilsonImplD; // Double
typedef WilsonImpl<vComplex, FundamentalRepresentation, ComplexD > ZWilsonImplR; // Real.. whichever prec typedef WilsonImpl<vComplex, FundamentalRepresentation, CoeffComplexHalfComms > ZWilsonImplRL; // Real.. whichever prec
typedef WilsonImpl<vComplexF, FundamentalRepresentation, ComplexD > ZWilsonImplF; // Float typedef WilsonImpl<vComplexF, FundamentalRepresentation, CoeffComplexHalfComms > ZWilsonImplFH; // Float
typedef WilsonImpl<vComplexD, FundamentalRepresentation, ComplexD > ZWilsonImplD; // Double typedef WilsonImpl<vComplexD, FundamentalRepresentation, CoeffComplexHalfComms > ZWilsonImplDF; // Double
typedef WilsonImpl<vComplex, AdjointRepresentation > WilsonAdjImplR; // Real.. whichever prec typedef WilsonImpl<vComplex, AdjointRepresentation, CoeffReal > WilsonAdjImplR; // Real.. whichever prec
typedef WilsonImpl<vComplexF, AdjointRepresentation > WilsonAdjImplF; // Float typedef WilsonImpl<vComplexF, AdjointRepresentation, CoeffReal > WilsonAdjImplF; // Float
typedef WilsonImpl<vComplexD, AdjointRepresentation > WilsonAdjImplD; // Double typedef WilsonImpl<vComplexD, AdjointRepresentation, CoeffReal > WilsonAdjImplD; // Double
typedef WilsonImpl<vComplex, TwoIndexSymmetricRepresentation > WilsonTwoIndexSymmetricImplR; // Real.. whichever prec typedef WilsonImpl<vComplex, TwoIndexSymmetricRepresentation, CoeffReal > WilsonTwoIndexSymmetricImplR; // Real.. whichever prec
typedef WilsonImpl<vComplexF, TwoIndexSymmetricRepresentation > WilsonTwoIndexSymmetricImplF; // Float typedef WilsonImpl<vComplexF, TwoIndexSymmetricRepresentation, CoeffReal > WilsonTwoIndexSymmetricImplF; // Float
typedef WilsonImpl<vComplexD, TwoIndexSymmetricRepresentation > WilsonTwoIndexSymmetricImplD; // Double typedef WilsonImpl<vComplexD, TwoIndexSymmetricRepresentation, CoeffReal > WilsonTwoIndexSymmetricImplD; // Double
typedef DomainWallVec5dImpl<vComplex ,Nc> DomainWallVec5dImplR; // Real.. whichever prec typedef DomainWallVec5dImpl<vComplex ,Nc, CoeffReal> DomainWallVec5dImplR; // Real.. whichever prec
typedef DomainWallVec5dImpl<vComplexF,Nc> DomainWallVec5dImplF; // Float typedef DomainWallVec5dImpl<vComplexF,Nc, CoeffReal> DomainWallVec5dImplF; // Float
typedef DomainWallVec5dImpl<vComplexD,Nc> DomainWallVec5dImplD; // Double typedef DomainWallVec5dImpl<vComplexD,Nc, CoeffReal> DomainWallVec5dImplD; // Double
typedef DomainWallVec5dImpl<vComplex ,Nc,ComplexD> ZDomainWallVec5dImplR; // Real.. whichever prec typedef DomainWallVec5dImpl<vComplex ,Nc, CoeffRealHalfComms> DomainWallVec5dImplRL; // Real.. whichever prec
typedef DomainWallVec5dImpl<vComplexF,Nc,ComplexD> ZDomainWallVec5dImplF; // Float typedef DomainWallVec5dImpl<vComplexF,Nc, CoeffRealHalfComms> DomainWallVec5dImplFH; // Float
typedef DomainWallVec5dImpl<vComplexD,Nc,ComplexD> ZDomainWallVec5dImplD; // Double typedef DomainWallVec5dImpl<vComplexD,Nc, CoeffRealHalfComms> DomainWallVec5dImplDF; // Double
typedef GparityWilsonImpl<vComplex , Nc> GparityWilsonImplR; // Real.. whichever prec typedef DomainWallVec5dImpl<vComplex ,Nc,CoeffComplex> ZDomainWallVec5dImplR; // Real.. whichever prec
typedef GparityWilsonImpl<vComplexF, Nc> GparityWilsonImplF; // Float typedef DomainWallVec5dImpl<vComplexF,Nc,CoeffComplex> ZDomainWallVec5dImplF; // Float
typedef GparityWilsonImpl<vComplexD, Nc> GparityWilsonImplD; // Double typedef DomainWallVec5dImpl<vComplexD,Nc,CoeffComplex> ZDomainWallVec5dImplD; // Double
typedef DomainWallVec5dImpl<vComplex ,Nc,CoeffComplexHalfComms> ZDomainWallVec5dImplRL; // Real.. whichever prec
typedef DomainWallVec5dImpl<vComplexF,Nc,CoeffComplexHalfComms> ZDomainWallVec5dImplFH; // Float
typedef DomainWallVec5dImpl<vComplexD,Nc,CoeffComplexHalfComms> ZDomainWallVec5dImplDF; // Double
typedef GparityWilsonImpl<vComplex , Nc,CoeffReal> GparityWilsonImplR; // Real.. whichever prec
typedef GparityWilsonImpl<vComplexF, Nc,CoeffReal> GparityWilsonImplF; // Float
typedef GparityWilsonImpl<vComplexD, Nc,CoeffReal> GparityWilsonImplD; // Double
typedef GparityWilsonImpl<vComplex , Nc,CoeffRealHalfComms> GparityWilsonImplRL; // Real.. whichever prec
typedef GparityWilsonImpl<vComplexF, Nc,CoeffRealHalfComms> GparityWilsonImplFH; // Float
typedef GparityWilsonImpl<vComplexD, Nc,CoeffRealHalfComms> GparityWilsonImplDF; // Double
typedef StaggeredImpl<vComplex, FundamentalRepresentation > StaggeredImplR; // Real.. whichever prec typedef StaggeredImpl<vComplex, FundamentalRepresentation > StaggeredImplR; // Real.. whichever prec
typedef StaggeredImpl<vComplexF, FundamentalRepresentation > StaggeredImplF; // Float typedef StaggeredImpl<vComplexF, FundamentalRepresentation > StaggeredImplF; // Float
typedef StaggeredImpl<vComplexD, FundamentalRepresentation > StaggeredImplD; // Double typedef StaggeredImpl<vComplexD, FundamentalRepresentation > StaggeredImplD; // Double
typedef StaggeredVec5dImpl<vComplex, FundamentalRepresentation > StaggeredVec5dImplR; // Real.. whichever prec typedef StaggeredVec5dImpl<vComplex, FundamentalRepresentation > StaggeredVec5dImplR; // Real.. whichever prec
typedef StaggeredVec5dImpl<vComplexF, FundamentalRepresentation > StaggeredVec5dImplF; // Float typedef StaggeredVec5dImpl<vComplexF, FundamentalRepresentation > StaggeredVec5dImplF; // Float
typedef StaggeredVec5dImpl<vComplexD, FundamentalRepresentation > StaggeredVec5dImplD; // Double typedef StaggeredVec5dImpl<vComplexD, FundamentalRepresentation > StaggeredVec5dImplD; // Double
}} }}

View File

@ -160,8 +160,6 @@ void ImprovedStaggeredFermion<Impl>::ImportGauge(const GaugeField &_Uthin,const
PokeIndex<LorentzIndex>(UUUmu, U*(-0.5*c2/u0/u0/u0), mu+4); PokeIndex<LorentzIndex>(UUUmu, U*(-0.5*c2/u0/u0/u0), mu+4);
} }
std::cout << " Umu " << Umu._odata[0]<<std::endl;
std::cout << " UUUmu " << UUUmu._odata[0]<<std::endl;
pickCheckerboard(Even, UmuEven, Umu); pickCheckerboard(Even, UmuEven, Umu);
pickCheckerboard(Odd, UmuOdd , Umu); pickCheckerboard(Odd, UmuOdd , Umu);
pickCheckerboard(Even, UUUmuEven, UUUmu); pickCheckerboard(Even, UUUmuEven, UUUmu);

Some files were not shown because too many files have changed in this diff Show More