1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-06-15 14:27:06 +01:00

Compare commits

..

1 Commits

Author SHA1 Message Date
e57eafe388 Fix to multinode code 2017-04-26 14:46:52 -04:00
242 changed files with 6896 additions and 30054 deletions

8
.gitignore vendored
View File

@ -92,7 +92,6 @@ build*/*
##################### #####################
*.xcodeproj/* *.xcodeproj/*
build.sh build.sh
.vscode
# Eigen source # # Eigen source #
################ ################
@ -107,10 +106,6 @@ lib/fftw/*
m4/lt* m4/lt*
m4/libtool.m4 m4/libtool.m4
# github pages #
################
gh-pages/
# Buck files # # Buck files #
############## ##############
.buck* .buck*
@ -121,5 +116,4 @@ make-bin-BUCK.sh
# generated sources # # generated sources #
##################### #####################
lib/qcd/spin/gamma-gen/*.h lib/qcd/spin/gamma-gen/*.h
lib/qcd/spin/gamma-gen/*.cc lib/qcd/spin/gamma-gen/*.cc

View File

@ -10,8 +10,6 @@ matrix:
osx_image: xcode8.3 osx_image: xcode8.3
compiler: clang compiler: clang
- compiler: gcc - compiler: gcc
dist: trusty
sudo: required
addons: addons:
apt: apt:
sources: sources:
@ -26,8 +24,6 @@ matrix:
- binutils-dev - binutils-dev
env: VERSION=-4.9 env: VERSION=-4.9
- compiler: gcc - compiler: gcc
dist: trusty
sudo: required
addons: addons:
apt: apt:
sources: sources:
@ -42,7 +38,6 @@ matrix:
- binutils-dev - binutils-dev
env: VERSION=-5 env: VERSION=-5
- compiler: clang - compiler: clang
dist: trusty
addons: addons:
apt: apt:
sources: sources:
@ -57,7 +52,6 @@ matrix:
- binutils-dev - binutils-dev
env: CLANG_LINK=http://llvm.org/releases/3.8.0/clang+llvm-3.8.0-x86_64-linux-gnu-ubuntu-14.04.tar.xz env: CLANG_LINK=http://llvm.org/releases/3.8.0/clang+llvm-3.8.0-x86_64-linux-gnu-ubuntu-14.04.tar.xz
- compiler: clang - compiler: clang
dist: trusty
addons: addons:
apt: apt:
sources: sources:
@ -84,10 +78,6 @@ install:
- export CC=$CC$VERSION - export CC=$CC$VERSION
- export CXX=$CXX$VERSION - export CXX=$CXX$VERSION
- echo $PATH - echo $PATH
- which autoconf
- autoconf --version
- which automake
- automake --version
- which $CC - which $CC
- $CC --version - $CC --version
- which $CXX - which $CXX
@ -105,10 +95,9 @@ script:
- ../configure --enable-precision=double --enable-simd=SSE4 --enable-comms=none - ../configure --enable-precision=double --enable-simd=SSE4 --enable-comms=none
- make -j4 - make -j4
- ./benchmarks/Benchmark_dwf --threads 1 --debug-signals - ./benchmarks/Benchmark_dwf --threads 1 --debug-signals
- make check
- echo make clean - echo make clean
- if [[ "$TRAVIS_OS_NAME" == "linux" ]] && [[ "$CC" == "clang" ]]; then ../configure --enable-precision=single --enable-simd=SSE4 --enable-comms=mpi-auto ; fi - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then ../configure --enable-precision=single --enable-simd=SSE4 --enable-comms=mpi-auto CXXFLAGS='-DMPI_UINT32_T=MPI_UNSIGNED -DMPI_UINT64_T=MPI_UNSIGNED_LONG'; fi
- if [[ "$TRAVIS_OS_NAME" == "linux" ]] && [[ "$CC" == "clang" ]]; then make -j4; fi - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then make -j4; fi
- if [[ "$TRAVIS_OS_NAME" == "linux" ]] && [[ "$CC" == "clang" ]]; then mpirun.openmpi -n 2 ./benchmarks/Benchmark_dwf --threads 1 --mpi 2.1.1.1; fi - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then mpirun.openmpi -n 2 ./benchmarks/Benchmark_dwf --threads 1 --mpi 2.1.1.1; fi

View File

@ -3,15 +3,10 @@ SUBDIRS = lib benchmarks tests extras
include $(top_srcdir)/doxygen.inc include $(top_srcdir)/doxygen.inc
bin_SCRIPTS=grid-config tests: all
$(MAKE) -C tests tests
.PHONY: tests doxygen-run doxygen-doc $(DX_PS_GOAL) $(DX_PDF_GOAL)
.PHONY: bench check tests doxygen-run doxygen-doc $(DX_PS_GOAL) $(DX_PDF_GOAL)
tests-local: all
bench-local: all
check-local: all
AM_CXXFLAGS += -I$(top_builddir)/include AM_CXXFLAGS += -I$(top_builddir)/include
ACLOCAL_AMFLAGS = -I m4 ACLOCAL_AMFLAGS = -I m4

View File

@ -22,26 +22,6 @@ Last update Nov 2016.
_Please do not send pull requests to the `master` branch which is reserved for releases._ _Please do not send pull requests to the `master` branch which is reserved for releases._
### Compilers
Intel ICPC v16.0.3 and later
Clang v3.5 and later (need 3.8 and later for OpenMP)
GCC v4.9.x (recommended)
GCC v6.3 and later
### Important:
Some versions of GCC appear to have a bug under high optimisation (-O2, -O3).
The safety of these compiler versions cannot be guaranteed at this time. Follow Issue 100 for details and updates.
GCC v5.x
GCC v6.1, v6.2
### Bug report ### Bug report
_To help us tracking and solving more efficiently issues with Grid, please report problems using the issue system of GitHub rather than sending emails to Grid developers._ _To help us tracking and solving more efficiently issues with Grid, please report problems using the issue system of GitHub rather than sending emails to Grid developers._
@ -52,7 +32,7 @@ When you file an issue, please go though the following checklist:
2. Give a description of the target platform (CPU, network, compiler). Please give the full CPU part description, using for example `cat /proc/cpuinfo | grep 'model name' | uniq` (Linux) or `sysctl machdep.cpu.brand_string` (macOS) and the full output the `--version` option of your compiler. 2. Give a description of the target platform (CPU, network, compiler). Please give the full CPU part description, using for example `cat /proc/cpuinfo | grep 'model name' | uniq` (Linux) or `sysctl machdep.cpu.brand_string` (macOS) and the full output the `--version` option of your compiler.
3. Give the exact `configure` command used. 3. Give the exact `configure` command used.
4. Attach `config.log`. 4. Attach `config.log`.
5. Attach `grid.config.summary`. 5. Attach `config.summary`.
6. Attach the output of `make V=1`. 6. Attach the output of `make V=1`.
7. Describe the issue and any previous attempt to solve it. If relevant, show how to reproduce the issue using a minimal working example. 7. Describe the issue and any previous attempt to solve it. If relevant, show how to reproduce the issue using a minimal working example.
@ -115,10 +95,10 @@ install Grid. Other options are detailed in the next section, you can also use `
`CXX`, `CXXFLAGS`, `LDFLAGS`, ... environment variables can be modified to `CXX`, `CXXFLAGS`, `LDFLAGS`, ... environment variables can be modified to
customise the build. customise the build.
Finally, you can build, check, and install Grid: Finally, you can build and install Grid:
``` bash ``` bash
make; make check; make install make; make install
``` ```
To minimise the build time, only the tests at the root of the `tests` directory are built by default. If you want to build tests in the sub-directory `<subdir>` you can execute: To minimise the build time, only the tests at the root of the `tests` directory are built by default. If you want to build tests in the sub-directory `<subdir>` you can execute:
@ -141,7 +121,7 @@ If you want to build all the tests at once just use `make tests`.
- `--enable-gen-simd-width=<size>`: select the size (in bytes) of the generic SIMD vector type (default: 32 bytes). - `--enable-gen-simd-width=<size>`: select the size (in bytes) of the generic SIMD vector type (default: 32 bytes).
- `--enable-precision={single|double}`: set the default precision (default: `double`). - `--enable-precision={single|double}`: set the default precision (default: `double`).
- `--enable-precision=<comm>`: Use `<comm>` for message passing (default: `none`). A list of possible SIMD targets is detailed in a section below. - `--enable-precision=<comm>`: Use `<comm>` for message passing (default: `none`). A list of possible SIMD targets is detailed in a section below.
- `--enable-rng={sitmo|ranlux48|mt19937}`: choose the RNG (default: `sitmo `). - `--enable-rng={ranlux48|mt19937}`: choose the RNG (default: `ranlux48 `).
- `--disable-timers`: disable system dependent high-resolution timers. - `--disable-timers`: disable system dependent high-resolution timers.
- `--enable-chroma`: enable Chroma regression tests. - `--enable-chroma`: enable Chroma regression tests.
- `--enable-doxygen-doc`: enable the Doxygen documentation generation (build with `make doxygen-doc`) - `--enable-doxygen-doc`: enable the Doxygen documentation generation (build with `make doxygen-doc`)
@ -179,6 +159,7 @@ Alternatively, some CPU codenames can be directly used:
| `<code>` | Description | | `<code>` | Description |
| ----------- | -------------------------------------- | | ----------- | -------------------------------------- |
| `KNC` | [Intel Xeon Phi codename Knights Corner](http://ark.intel.com/products/codename/57721/Knights-Corner) |
| `KNL` | [Intel Xeon Phi codename Knights Landing](http://ark.intel.com/products/codename/48999/Knights-Landing) | | `KNL` | [Intel Xeon Phi codename Knights Landing](http://ark.intel.com/products/codename/48999/Knights-Landing) |
| `BGQ` | Blue Gene/Q | | `BGQ` | Blue Gene/Q |

31
TODO
View File

@ -1,30 +1,23 @@
TODO: TODO:
--------------- ---------------
Large item work list: Peter's work list:
1)- MultiRHS with spread out extra dim -- Go through filesystem with SciDAC I/O 2)- Precision conversion and sort out localConvert <--
3)- Remove DenseVector, DenseMatrix; Use Eigen instead. <-- started
2)- Christoph's local basis expansion Lanczos 4)- Binary I/O speed up & x-strips
3)- BG/Q port and check -- Profile CG, BlockCG, etc... Flop count/rate -- PARTIAL, time but no flop/s yet
4)- Precision conversion and sort out localConvert <-- partial -- Physical propagator interface
- Consistent linear solver flop count/rate -- PARTIAL, time but no flop/s yet -- Conserved currents
5)- Physical propagator interface -- GaugeFix into central location
6)- Conserved currents -- Multigrid Wilson and DWF, compare to other Multigrid implementations
7)- Multigrid Wilson and DWF, compare to other Multigrid implementations -- HDCR resume
8)- HDCR resume
Recent DONE Recent DONE
-- Lanczos Remove DenseVector, DenseMatrix; Use Eigen instead. <-- DONE
-- GaugeFix into central location <-- DONE
-- Scidac and Ildg metadata handling <-- DONE
-- Binary I/O MPI2 IO <-- DONE
-- Binary I/O speed up & x-strips <-- DONE
-- Cut down the exterior overhead <-- DONE -- Cut down the exterior overhead <-- DONE
-- Interior legs from SHM comms <-- DONE -- Interior legs from SHM comms <-- DONE
-- Half-precision comms <-- DONE -- Half-precision comms <-- DONE
-- Merge high precision reduction into develop <-- DONE -- Merge high precision reduction into develop
-- BlockCG, BCGrQ <-- DONE -- multiRHS DWF; benchmark on Cori/BNL for comms elimination
-- multiRHS DWF; benchmark on Cori/BNL for comms elimination <-- DONE
-- slice* linalg routines for multiRHS, BlockCG -- slice* linalg routines for multiRHS, BlockCG
----- -----

View File

@ -1,5 +1,6 @@
Version : 0.7.0 Version : 0.6.0
- Clang 3.5 and above, ICPC v16 and above, GCC 6.3 and above recommended - AVX512, AVX2, AVX, SSE good
- MPI and MPI3 comms optimisations for KNL and OPA finished - Clang 3.5 and above, ICPC v16 and above, GCC 4.9 and above
- Half precision comms - MPI and MPI3
- HiRep, Smearing, Generic gauge group

View File

@ -31,32 +31,6 @@ using namespace std;
using namespace Grid; using namespace Grid;
using namespace Grid::QCD; using namespace Grid::QCD;
struct time_statistics{
double mean;
double err;
double min;
double max;
void statistics(std::vector<double> v){
double sum = std::accumulate(v.begin(), v.end(), 0.0);
mean = sum / v.size();
std::vector<double> diff(v.size());
std::transform(v.begin(), v.end(), diff.begin(), [=](double x) { return x - mean; });
double sq_sum = std::inner_product(diff.begin(), diff.end(), diff.begin(), 0.0);
err = std::sqrt(sq_sum / (v.size()*(v.size() - 1)));
auto result = std::minmax_element(v.begin(), v.end());
min = *result.first;
max = *result.second;
}
};
void header(){
std::cout <<GridLogMessage << " L "<<"\t"<<" Ls "<<"\t"
<<std::setw(11)<<"bytes"<<"MB/s uni (err/min/max)"<<"\t\t"<<"MB/s bidi (err/min/max)"<<std::endl;
};
int main (int argc, char ** argv) int main (int argc, char ** argv)
{ {
Grid_init(&argc,&argv); Grid_init(&argc,&argv);
@ -66,19 +40,15 @@ int main (int argc, char ** argv)
int threads = GridThread::GetThreads(); int threads = GridThread::GetThreads();
std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl; std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
int Nloop=100; int Nloop=10;
int nmu=0; int nmu=0;
int maxlat=24;
for(int mu=0;mu<Nd;mu++) if (mpi_layout[mu]>1) nmu++; for(int mu=0;mu<Nd;mu++) if (mpi_layout[mu]>1) nmu++;
std::cout << GridLogMessage << "Number of iterations to average: "<< Nloop << std::endl;
std::vector<double> t_time(Nloop);
time_statistics timestat;
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
std::cout<<GridLogMessage << "= Benchmarking concurrent halo exchange in "<<nmu<<" dimensions"<<std::endl; std::cout<<GridLogMessage << "= Benchmarking concurrent halo exchange in "<<nmu<<" dimensions"<<std::endl;
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
header(); std::cout<<GridLogMessage << " L "<<"\t\t"<<" Ls "<<"\t\t"<<"bytes"<<"\t\t"<<"MB/s uni"<<"\t\t"<<"MB/s bidi"<<std::endl;
int maxlat=24;
for(int lat=4;lat<=maxlat;lat+=4){ for(int lat=4;lat<=maxlat;lat+=4){
for(int Ls=8;Ls<=32;Ls*=2){ for(int Ls=8;Ls<=32;Ls*=2){
@ -88,9 +58,6 @@ int main (int argc, char ** argv)
lat*mpi_layout[3]}); lat*mpi_layout[3]});
GridCartesian Grid(latt_size,simd_layout,mpi_layout); GridCartesian Grid(latt_size,simd_layout,mpi_layout);
RealD Nrank = Grid._Nprocessors;
RealD Nnode = Grid.NodeCount();
RealD ppn = Nrank/Nnode;
std::vector<std::vector<HalfSpinColourVectorD> > xbuf(8,std::vector<HalfSpinColourVectorD>(lat*lat*lat*Ls)); std::vector<std::vector<HalfSpinColourVectorD> > xbuf(8,std::vector<HalfSpinColourVectorD>(lat*lat*lat*Ls));
std::vector<std::vector<HalfSpinColourVectorD> > rbuf(8,std::vector<HalfSpinColourVectorD>(lat*lat*lat*Ls)); std::vector<std::vector<HalfSpinColourVectorD> > rbuf(8,std::vector<HalfSpinColourVectorD>(lat*lat*lat*Ls));
@ -98,8 +65,8 @@ int main (int argc, char ** argv)
int ncomm; int ncomm;
int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD); int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD);
for(int i=0;i<Nloop;i++){
double start=usecond(); double start=usecond();
for(int i=0;i<Nloop;i++){
std::vector<CartesianCommunicator::CommsRequest_t> requests; std::vector<CartesianCommunicator::CommsRequest_t> requests;
@ -135,24 +102,18 @@ int main (int argc, char ** argv)
} }
Grid.SendToRecvFromComplete(requests); Grid.SendToRecvFromComplete(requests);
Grid.Barrier(); Grid.Barrier();
double stop=usecond();
t_time[i] = stop-start; // microseconds
} }
double stop=usecond();
timestat.statistics(t_time); double dbytes = bytes;
double xbytes = Nloop*dbytes*2.0*ncomm;
double dbytes = bytes*ppn;
double xbytes = dbytes*2.0*ncomm;
double rbytes = xbytes; double rbytes = xbytes;
double bidibytes = xbytes+rbytes; double bidibytes = xbytes+rbytes;
std::cout<<GridLogMessage << std::setw(4) << lat<<"\t"<<Ls<<"\t" double time = stop-start; // microseconds
<<std::setw(11) << bytes<< std::fixed << std::setprecision(1) << std::setw(7)
<<std::right<< xbytes/timestat.mean<<" "<< xbytes*timestat.err/(timestat.mean*timestat.mean)<< " "
<<xbytes/timestat.max <<" "<< xbytes/timestat.min
<< "\t\t"<<std::setw(7)<< bidibytes/timestat.mean<< " " << bidibytes*timestat.err/(timestat.mean*timestat.mean) << " "
<< bidibytes/timestat.max << " " << bidibytes/timestat.min << std::endl;
std::cout<<GridLogMessage << lat<<"\t\t"<<Ls<<"\t\t"<<bytes<<"\t\t"<<xbytes/time<<"\t\t"<<bidibytes/time<<std::endl;
} }
} }
@ -160,7 +121,8 @@ int main (int argc, char ** argv)
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
std::cout<<GridLogMessage << "= Benchmarking sequential halo exchange in "<<nmu<<" dimensions"<<std::endl; std::cout<<GridLogMessage << "= Benchmarking sequential halo exchange in "<<nmu<<" dimensions"<<std::endl;
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
header(); std::cout<<GridLogMessage << " L "<<"\t\t"<<" Ls "<<"\t\t"<<"bytes"<<"\t\t"<<"MB/s uni"<<"\t\t"<<"MB/s bidi"<<std::endl;
for(int lat=4;lat<=maxlat;lat+=4){ for(int lat=4;lat<=maxlat;lat+=4){
for(int Ls=8;Ls<=32;Ls*=2){ for(int Ls=8;Ls<=32;Ls*=2){
@ -168,9 +130,6 @@ int main (int argc, char ** argv)
std::vector<int> latt_size ({lat,lat,lat,lat}); std::vector<int> latt_size ({lat,lat,lat,lat});
GridCartesian Grid(latt_size,simd_layout,mpi_layout); GridCartesian Grid(latt_size,simd_layout,mpi_layout);
RealD Nrank = Grid._Nprocessors;
RealD Nnode = Grid.NodeCount();
RealD ppn = Nrank/Nnode;
std::vector<std::vector<HalfSpinColourVectorD> > xbuf(8,std::vector<HalfSpinColourVectorD>(lat*lat*lat*Ls)); std::vector<std::vector<HalfSpinColourVectorD> > xbuf(8,std::vector<HalfSpinColourVectorD>(lat*lat*lat*Ls));
std::vector<std::vector<HalfSpinColourVectorD> > rbuf(8,std::vector<HalfSpinColourVectorD>(lat*lat*lat*Ls)); std::vector<std::vector<HalfSpinColourVectorD> > rbuf(8,std::vector<HalfSpinColourVectorD>(lat*lat*lat*Ls));
@ -179,8 +138,8 @@ int main (int argc, char ** argv)
int ncomm; int ncomm;
int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD); int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD);
for(int i=0;i<Nloop;i++){
double start=usecond(); double start=usecond();
for(int i=0;i<Nloop;i++){
ncomm=0; ncomm=0;
for(int mu=0;mu<4;mu++){ for(int mu=0;mu<4;mu++){
@ -219,34 +178,27 @@ int main (int argc, char ** argv)
} }
} }
Grid.Barrier(); Grid.Barrier();
double stop=usecond();
t_time[i] = stop-start; // microseconds
} }
timestat.statistics(t_time); double stop=usecond();
double dbytes = bytes*ppn; double dbytes = bytes;
double xbytes = dbytes*2.0*ncomm; double xbytes = Nloop*dbytes*2.0*ncomm;
double rbytes = xbytes; double rbytes = xbytes;
double bidibytes = xbytes+rbytes; double bidibytes = xbytes+rbytes;
std::cout<<GridLogMessage << std::setw(4) << lat<<"\t"<<Ls<<"\t" double time = stop-start;
<<std::setw(11) << bytes<< std::fixed << std::setprecision(1) << std::setw(7)
<<std::right<< xbytes/timestat.mean<<" "<< xbytes*timestat.err/(timestat.mean*timestat.mean)<< " "
<<xbytes/timestat.max <<" "<< xbytes/timestat.min
<< "\t\t"<<std::setw(7)<< bidibytes/timestat.mean<< " " << bidibytes*timestat.err/(timestat.mean*timestat.mean) << " "
<< bidibytes/timestat.max << " " << bidibytes/timestat.min << std::endl;
std::cout<<GridLogMessage << lat<<"\t\t"<<Ls<<"\t\t"<<bytes<<"\t\t"<<xbytes/time<<"\t\t"<<bidibytes/time<<std::endl;
} }
} }
Nloop=10;
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
std::cout<<GridLogMessage << "= Benchmarking concurrent STENCIL halo exchange in "<<nmu<<" dimensions"<<std::endl; std::cout<<GridLogMessage << "= Benchmarking concurrent STENCIL halo exchange in "<<nmu<<" dimensions"<<std::endl;
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
header(); std::cout<<GridLogMessage << " L "<<"\t\t"<<" Ls "<<"\t\t"<<"bytes"<<"\t\t"<<"MB/s uni"<<"\t\t"<<"MB/s bidi"<<std::endl;
for(int lat=4;lat<=maxlat;lat+=4){ for(int lat=4;lat<=maxlat;lat+=4){
for(int Ls=8;Ls<=32;Ls*=2){ for(int Ls=8;Ls<=32;Ls*=2){
@ -257,9 +209,6 @@ int main (int argc, char ** argv)
lat*mpi_layout[3]}); lat*mpi_layout[3]});
GridCartesian Grid(latt_size,simd_layout,mpi_layout); GridCartesian Grid(latt_size,simd_layout,mpi_layout);
RealD Nrank = Grid._Nprocessors;
RealD Nnode = Grid.NodeCount();
RealD ppn = Nrank/Nnode;
std::vector<HalfSpinColourVectorD *> xbuf(8); std::vector<HalfSpinColourVectorD *> xbuf(8);
std::vector<HalfSpinColourVectorD *> rbuf(8); std::vector<HalfSpinColourVectorD *> rbuf(8);
@ -267,115 +216,16 @@ int main (int argc, char ** argv)
for(int d=0;d<8;d++){ for(int d=0;d<8;d++){
xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
bzero((void *)xbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
bzero((void *)rbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
} }
int ncomm; int ncomm;
int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD); int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD);
double dbytes; double start=usecond();
for(int i=0;i<Nloop;i++){ for(int i=0;i<Nloop;i++){
double start=usecond();
dbytes=0;
ncomm=0;
std::vector<CartesianCommunicator::CommsRequest_t> requests; std::vector<CartesianCommunicator::CommsRequest_t> requests;
for(int mu=0;mu<4;mu++){
if (mpi_layout[mu]>1 ) {
ncomm++;
int comm_proc=1;
int xmit_to_rank;
int recv_from_rank;
Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
dbytes+=
Grid.StencilSendToRecvFromBegin(requests,
(void *)&xbuf[mu][0],
xmit_to_rank,
(void *)&rbuf[mu][0],
recv_from_rank,
bytes);
comm_proc = mpi_layout[mu]-1;
Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
dbytes+=
Grid.StencilSendToRecvFromBegin(requests,
(void *)&xbuf[mu+4][0],
xmit_to_rank,
(void *)&rbuf[mu+4][0],
recv_from_rank,
bytes);
}
}
Grid.StencilSendToRecvFromComplete(requests);
Grid.Barrier();
double stop=usecond();
t_time[i] = stop-start; // microseconds
}
timestat.statistics(t_time);
dbytes=dbytes*ppn;
double xbytes = dbytes*0.5;
double rbytes = dbytes*0.5;
double bidibytes = dbytes;
std::cout<<GridLogMessage << std::setw(4) << lat<<"\t"<<Ls<<"\t"
<<std::setw(11) << bytes<< std::fixed << std::setprecision(1) << std::setw(7)
<<std::right<< xbytes/timestat.mean<<" "<< xbytes*timestat.err/(timestat.mean*timestat.mean)<< " "
<<xbytes/timestat.max <<" "<< xbytes/timestat.min
<< "\t\t"<<std::setw(7)<< bidibytes/timestat.mean<< " " << bidibytes*timestat.err/(timestat.mean*timestat.mean) << " "
<< bidibytes/timestat.max << " " << bidibytes/timestat.min << std::endl;
}
}
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
std::cout<<GridLogMessage << "= Benchmarking sequential STENCIL halo exchange in "<<nmu<<" dimensions"<<std::endl;
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
header();
for(int lat=4;lat<=maxlat;lat+=4){
for(int Ls=8;Ls<=32;Ls*=2){
std::vector<int> latt_size ({lat*mpi_layout[0],
lat*mpi_layout[1],
lat*mpi_layout[2],
lat*mpi_layout[3]});
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
RealD Nrank = Grid._Nprocessors;
RealD Nnode = Grid.NodeCount();
RealD ppn = Nrank/Nnode;
std::vector<HalfSpinColourVectorD *> xbuf(8);
std::vector<HalfSpinColourVectorD *> rbuf(8);
Grid.ShmBufferFreeAll();
for(int d=0;d<8;d++){
xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
bzero((void *)xbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
bzero((void *)rbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
}
int ncomm;
int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD);
double dbytes;
for(int i=0;i<Nloop;i++){
double start=usecond();
std::vector<CartesianCommunicator::CommsRequest_t> requests;
dbytes=0;
ncomm=0; ncomm=0;
for(int mu=0;mu<4;mu++){ for(int mu=0;mu<4;mu++){
@ -387,52 +237,123 @@ int main (int argc, char ** argv)
int recv_from_rank; int recv_from_rank;
Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
dbytes+= Grid.StencilSendToRecvFromBegin(requests,
Grid.StencilSendToRecvFromBegin(requests, (void *)&xbuf[mu][0],
(void *)&xbuf[mu][0], xmit_to_rank,
xmit_to_rank, (void *)&rbuf[mu][0],
(void *)&rbuf[mu][0], recv_from_rank,
recv_from_rank, bytes);
bytes);
comm_proc = mpi_layout[mu]-1;
Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
Grid.StencilSendToRecvFromBegin(requests,
(void *)&xbuf[mu+4][0],
xmit_to_rank,
(void *)&rbuf[mu+4][0],
recv_from_rank,
bytes);
}
}
Grid.StencilSendToRecvFromComplete(requests);
Grid.Barrier();
}
double stop=usecond();
double dbytes = bytes;
double xbytes = Nloop*dbytes*2.0*ncomm;
double rbytes = xbytes;
double bidibytes = xbytes+rbytes;
double time = stop-start; // microseconds
std::cout<<GridLogMessage << lat<<"\t\t"<<Ls<<"\t\t"<<bytes<<"\t\t"<<xbytes/time<<"\t\t"<<bidibytes/time<<std::endl;
}
}
Nloop=100;
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
std::cout<<GridLogMessage << "= Benchmarking sequential STENCIL halo exchange in "<<nmu<<" dimensions"<<std::endl;
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
std::cout<<GridLogMessage << " L "<<"\t\t"<<" Ls "<<"\t\t"<<"bytes"<<"\t\t"<<"MB/s uni"<<"\t\t"<<"MB/s bidi"<<std::endl;
for(int lat=4;lat<=maxlat;lat+=4){
for(int Ls=8;Ls<=32;Ls*=2){
std::vector<int> latt_size ({lat*mpi_layout[0],
lat*mpi_layout[1],
lat*mpi_layout[2],
lat*mpi_layout[3]});
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
std::vector<HalfSpinColourVectorD *> xbuf(8);
std::vector<HalfSpinColourVectorD *> rbuf(8);
Grid.ShmBufferFreeAll();
for(int d=0;d<8;d++){
xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
}
int ncomm;
int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD);
double start=usecond();
for(int i=0;i<Nloop;i++){
std::vector<CartesianCommunicator::CommsRequest_t> requests;
ncomm=0;
for(int mu=0;mu<4;mu++){
if (mpi_layout[mu]>1 ) {
ncomm++;
int comm_proc=1;
int xmit_to_rank;
int recv_from_rank;
Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
Grid.StencilSendToRecvFromBegin(requests,
(void *)&xbuf[mu][0],
xmit_to_rank,
(void *)&rbuf[mu][0],
recv_from_rank,
bytes);
Grid.StencilSendToRecvFromComplete(requests); Grid.StencilSendToRecvFromComplete(requests);
requests.resize(0); requests.resize(0);
comm_proc = mpi_layout[mu]-1; comm_proc = mpi_layout[mu]-1;
Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
dbytes+= Grid.StencilSendToRecvFromBegin(requests,
Grid.StencilSendToRecvFromBegin(requests, (void *)&xbuf[mu+4][0],
(void *)&xbuf[mu+4][0], xmit_to_rank,
xmit_to_rank, (void *)&rbuf[mu+4][0],
(void *)&rbuf[mu+4][0], recv_from_rank,
recv_from_rank, bytes);
bytes);
Grid.StencilSendToRecvFromComplete(requests); Grid.StencilSendToRecvFromComplete(requests);
requests.resize(0); requests.resize(0);
} }
} }
Grid.Barrier(); Grid.Barrier();
double stop=usecond();
t_time[i] = stop-start; // microseconds
} }
double stop=usecond();
timestat.statistics(t_time); double dbytes = bytes;
double xbytes = Nloop*dbytes*2.0*ncomm;
double rbytes = xbytes;
double bidibytes = xbytes+rbytes;
dbytes=dbytes*ppn; double time = stop-start; // microseconds
double xbytes = dbytes*0.5;
double rbytes = dbytes*0.5;
double bidibytes = dbytes;
std::cout<<GridLogMessage << lat<<"\t\t"<<Ls<<"\t\t"<<bytes<<"\t\t"<<xbytes/time<<"\t\t"<<bidibytes/time<<std::endl;
std::cout<<GridLogMessage << std::setw(4) << lat<<"\t"<<Ls<<"\t"
<<std::setw(11) << bytes<< std::fixed << std::setprecision(1) << std::setw(7)
<<std::right<< xbytes/timestat.mean<<" "<< xbytes*timestat.err/(timestat.mean*timestat.mean)<< " "
<<xbytes/timestat.max <<" "<< xbytes/timestat.min
<< "\t\t"<<std::setw(7)<< bidibytes/timestat.mean<< " " << bidibytes*timestat.err/(timestat.mean*timestat.mean) << " "
<< bidibytes/timestat.max << " " << bidibytes/timestat.min << std::endl;
} }
} }

View File

@ -1,22 +1,28 @@
/************************************************************************************* /*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid Grid physics library, www.github.com/paboyle/Grid
Source file: ./benchmarks/Benchmark_dwf.cc Source file: ./benchmarks/Benchmark_dwf.cc
Copyright (C) 2015 Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk> Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: paboyle <paboyle@ph.ed.ac.uk> Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or the Free Software Foundation; either version 2 of the License, or
(at your option) any later version. (at your option) any later version.
This program is distributed in the hope that it will be useful, This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details. GNU General Public License for more details.
You should have received a copy of the GNU General Public License along You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc., with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/ *************************************************************************************/
/* END LEGAL */ /* END LEGAL */
@ -145,7 +151,6 @@ int main (int argc, char ** argv)
RealD M5 =1.8; RealD M5 =1.8;
RealD NP = UGrid->_Nprocessors; RealD NP = UGrid->_Nprocessors;
RealD NN = UGrid->NodeCount();
std::cout << GridLogMessage<< "*****************************************************************" <<std::endl; std::cout << GridLogMessage<< "*****************************************************************" <<std::endl;
std::cout << GridLogMessage<< "* Kernel options --dslash-generic, --dslash-unroll, --dslash-asm" <<std::endl; std::cout << GridLogMessage<< "* Kernel options --dslash-generic, --dslash-unroll, --dslash-asm" <<std::endl;
@ -155,10 +160,6 @@ int main (int argc, char ** argv)
std::cout << GridLogMessage<< "* Vectorising space-time by "<<vComplex::Nsimd()<<std::endl; std::cout << GridLogMessage<< "* Vectorising space-time by "<<vComplex::Nsimd()<<std::endl;
if ( sizeof(Real)==4 ) std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl; if ( sizeof(Real)==4 ) std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl;
if ( sizeof(Real)==8 ) std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl; if ( sizeof(Real)==8 ) std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl;
#ifdef GRID_OMP
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute ) std::cout << GridLogMessage<< "* Using Overlapped Comms/Compute" <<std::endl;
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsThenCompute) std::cout << GridLogMessage<< "* Using sequential comms compute" <<std::endl;
#endif
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl; if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl;
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using Nc=3 WilsonKernels" <<std::endl; if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using Nc=3 WilsonKernels" <<std::endl;
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3 WilsonKernels" <<std::endl; if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3 WilsonKernels" <<std::endl;
@ -188,7 +189,6 @@ int main (int argc, char ** argv)
// std::cout<<GridLogMessage << "norm ref "<< norm2(ref)<<std::endl; // std::cout<<GridLogMessage << "norm ref "<< norm2(ref)<<std::endl;
std::cout<<GridLogMessage << "mflop/s = "<< flops/(t1-t0)<<std::endl; std::cout<<GridLogMessage << "mflop/s = "<< flops/(t1-t0)<<std::endl;
std::cout<<GridLogMessage << "mflop/s per rank = "<< flops/(t1-t0)/NP<<std::endl; std::cout<<GridLogMessage << "mflop/s per rank = "<< flops/(t1-t0)/NP<<std::endl;
std::cout<<GridLogMessage << "mflop/s per node = "<< flops/(t1-t0)/NN<<std::endl;
err = ref-result; err = ref-result;
std::cout<<GridLogMessage << "norm diff "<< norm2(err)<<std::endl; std::cout<<GridLogMessage << "norm diff "<< norm2(err)<<std::endl;
@ -225,7 +225,6 @@ int main (int argc, char ** argv)
std::cout<<GridLogMessage << "Called half prec comms Dw "<<ncall<<" times in "<<t1-t0<<" us"<<std::endl; std::cout<<GridLogMessage << "Called half prec comms Dw "<<ncall<<" times in "<<t1-t0<<" us"<<std::endl;
std::cout<<GridLogMessage << "mflop/s = "<< flops/(t1-t0)<<std::endl; std::cout<<GridLogMessage << "mflop/s = "<< flops/(t1-t0)<<std::endl;
std::cout<<GridLogMessage << "mflop/s per rank = "<< flops/(t1-t0)/NP<<std::endl; std::cout<<GridLogMessage << "mflop/s per rank = "<< flops/(t1-t0)/NP<<std::endl;
std::cout<<GridLogMessage << "mflop/s per node = "<< flops/(t1-t0)/NN<<std::endl;
err = ref-result; err = ref-result;
std::cout<<GridLogMessage << "norm diff "<< norm2(err)<<std::endl; std::cout<<GridLogMessage << "norm diff "<< norm2(err)<<std::endl;
@ -241,10 +240,6 @@ int main (int argc, char ** argv)
std::cout << GridLogMessage<< "* Vectorising fifth dimension by "<<vComplex::Nsimd()<<std::endl; std::cout << GridLogMessage<< "* Vectorising fifth dimension by "<<vComplex::Nsimd()<<std::endl;
if ( sizeof(Real)==4 ) std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl; if ( sizeof(Real)==4 ) std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl;
if ( sizeof(Real)==8 ) std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl; if ( sizeof(Real)==8 ) std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl;
#ifdef GRID_OMP
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute ) std::cout << GridLogMessage<< "* Using Overlapped Comms/Compute" <<std::endl;
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsThenCompute) std::cout << GridLogMessage<< "* Using sequential comms compute" <<std::endl;
#endif
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl; if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl;
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using Nc=3 WilsonKernels" <<std::endl; if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using Nc=3 WilsonKernels" <<std::endl;
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3 WilsonKernels" <<std::endl; if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3 WilsonKernels" <<std::endl;
@ -276,7 +271,6 @@ int main (int argc, char ** argv)
std::cout<<GridLogMessage << "Called Dw s_inner "<<ncall<<" times in "<<t1-t0<<" us"<<std::endl; std::cout<<GridLogMessage << "Called Dw s_inner "<<ncall<<" times in "<<t1-t0<<" us"<<std::endl;
std::cout<<GridLogMessage << "mflop/s = "<< flops/(t1-t0)<<std::endl; std::cout<<GridLogMessage << "mflop/s = "<< flops/(t1-t0)<<std::endl;
std::cout<<GridLogMessage << "mflop/s per rank = "<< flops/(t1-t0)/NP<<std::endl; std::cout<<GridLogMessage << "mflop/s per rank = "<< flops/(t1-t0)/NP<<std::endl;
std::cout<<GridLogMessage << "mflop/s per node = "<< flops/(t1-t0)/NN<<std::endl;
// std::cout<<GridLogMessage<< "res norms "<< norm2(result)<<" " <<norm2(sresult)<<std::endl; // std::cout<<GridLogMessage<< "res norms "<< norm2(result)<<" " <<norm2(sresult)<<std::endl;
sDw.Report(); sDw.Report();
RealD sum=0; RealD sum=0;
@ -309,10 +303,6 @@ int main (int argc, char ** argv)
std::cout << GridLogMessage<< "* Vectorising fifth dimension by "<<vComplex::Nsimd()<<std::endl; std::cout << GridLogMessage<< "* Vectorising fifth dimension by "<<vComplex::Nsimd()<<std::endl;
if ( sizeof(Real)==4 ) std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl; if ( sizeof(Real)==4 ) std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl;
if ( sizeof(Real)==8 ) std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl; if ( sizeof(Real)==8 ) std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl;
#ifdef GRID_OMP
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute ) std::cout << GridLogMessage<< "* Using Overlapped Comms/Compute" <<std::endl;
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsThenCompute) std::cout << GridLogMessage<< "* Using sequential comms compute" <<std::endl;
#endif
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric ) if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric )
std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl; std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl;
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll)
@ -352,7 +342,6 @@ int main (int argc, char ** argv)
std::cout<<GridLogMessage << "sDeo mflop/s = "<< flops/(t1-t0)<<std::endl; std::cout<<GridLogMessage << "sDeo mflop/s = "<< flops/(t1-t0)<<std::endl;
std::cout<<GridLogMessage << "sDeo mflop/s per rank "<< flops/(t1-t0)/NP<<std::endl; std::cout<<GridLogMessage << "sDeo mflop/s per rank "<< flops/(t1-t0)/NP<<std::endl;
std::cout<<GridLogMessage << "sDeo mflop/s per node "<< flops/(t1-t0)/NN<<std::endl;
sDw.Report(); sDw.Report();
sDw.DhopEO(ssrc_o,sr_e,DaggerNo); sDw.DhopEO(ssrc_o,sr_e,DaggerNo);
@ -431,15 +420,14 @@ int main (int argc, char ** argv)
// S-direction is INNERMOST and takes no part in the parity. // S-direction is INNERMOST and takes no part in the parity.
static int Opt; // these are a temporary hack
static int Comms; // these are a temporary hack
std::cout << GridLogMessage<< "*********************************************************" <<std::endl; std::cout << GridLogMessage<< "*********************************************************" <<std::endl;
std::cout << GridLogMessage<< "* Benchmarking DomainWallFermionR::DhopEO "<<std::endl; std::cout << GridLogMessage<< "* Benchmarking DomainWallFermionR::DhopEO "<<std::endl;
std::cout << GridLogMessage<< "* Vectorising space-time by "<<vComplex::Nsimd()<<std::endl; std::cout << GridLogMessage<< "* Vectorising space-time by "<<vComplex::Nsimd()<<std::endl;
if ( sizeof(Real)==4 ) std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl; if ( sizeof(Real)==4 ) std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl;
if ( sizeof(Real)==8 ) std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl; if ( sizeof(Real)==8 ) std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl;
#ifdef GRID_OMP
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute ) std::cout << GridLogMessage<< "* Using Overlapped Comms/Compute" <<std::endl;
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsThenCompute) std::cout << GridLogMessage<< "* Using sequential comms compute" <<std::endl;
#endif
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl; if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl;
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using Nc=3 WilsonKernels" <<std::endl; if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using Nc=3 WilsonKernels" <<std::endl;
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3 WilsonKernels" <<std::endl; if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3 WilsonKernels" <<std::endl;
@ -460,7 +448,6 @@ int main (int argc, char ** argv)
std::cout<<GridLogMessage << "Deo mflop/s = "<< flops/(t1-t0)<<std::endl; std::cout<<GridLogMessage << "Deo mflop/s = "<< flops/(t1-t0)<<std::endl;
std::cout<<GridLogMessage << "Deo mflop/s per rank "<< flops/(t1-t0)/NP<<std::endl; std::cout<<GridLogMessage << "Deo mflop/s per rank "<< flops/(t1-t0)/NP<<std::endl;
std::cout<<GridLogMessage << "Deo mflop/s per node "<< flops/(t1-t0)/NN<<std::endl;
Dw.Report(); Dw.Report();
} }
Dw.DhopEO(src_o,r_e,DaggerNo); Dw.DhopEO(src_o,r_e,DaggerNo);
@ -492,4 +479,3 @@ int main (int argc, char ** argv)
Grid_finalize(); Grid_finalize();
} }

View File

@ -55,8 +55,8 @@ int main (int argc, char ** argv)
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s"<<"\t\t"<<"Gflop/s"<<"\t\t seconds"<<std::endl; std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s"<<"\t\t"<<"Gflop/s"<<"\t\t seconds"<<std::endl;
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl; std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
uint64_t lmax=64; uint64_t lmax=44;
#define NLOOP (100*lmax*lmax*lmax*lmax/vol) #define NLOOP (1*lmax*lmax*lmax*lmax/vol)
for(int lat=4;lat<=lmax;lat+=4){ for(int lat=4;lat<=lmax;lat+=4){
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});

View File

@ -35,9 +35,8 @@ using namespace Grid::QCD;
int main (int argc, char ** argv) int main (int argc, char ** argv)
{ {
Grid_init(&argc,&argv); Grid_init(&argc,&argv);
#define LMAX (64)
int Nloop=20; int Nloop=1000;
std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd()); std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
std::vector<int> mpi_layout = GridDefaultMpi(); std::vector<int> mpi_layout = GridDefaultMpi();
@ -51,7 +50,7 @@ int main (int argc, char ** argv)
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl; std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl;
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl; std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
for(int lat=2;lat<=LMAX;lat+=2){ for(int lat=2;lat<=32;lat+=2){
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
@ -83,7 +82,7 @@ int main (int argc, char ** argv)
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl; std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl;
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl; std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
for(int lat=2;lat<=LMAX;lat+=2){ for(int lat=2;lat<=32;lat+=2){
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
@ -114,7 +113,7 @@ int main (int argc, char ** argv)
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl; std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl;
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl; std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
for(int lat=2;lat<=LMAX;lat+=2){ for(int lat=2;lat<=32;lat+=2){
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
@ -145,7 +144,7 @@ int main (int argc, char ** argv)
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl; std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl;
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl; std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
for(int lat=2;lat<=LMAX;lat+=2){ for(int lat=2;lat<=32;lat+=2){
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];

View File

@ -1,7 +1,11 @@
include Make.inc include Make.inc
bench-local: all simple: simple_su3_test.o simple_su3_expr.o simple_simd_test.o
./Benchmark_su3
./Benchmark_memory_bandwidth EXTRA_LIBRARIES = libsimple_su3_test.a libsimple_su3_expr.a libsimple_simd_test.a
./Benchmark_wilson
./Benchmark_dwf --dslash-unroll libsimple_su3_test_a_SOURCES = simple_su3_test.cc
libsimple_su3_expr_a_SOURCES = simple_su3_expr.cc
libsimple_simd_test_a_SOURCES = simple_simd_test.cc

View File

@ -1,6 +1,6 @@
#!/usr/bin/env bash #!/usr/bin/env bash
EIGEN_URL='http://bitbucket.org/eigen/eigen/get/3.3.3.tar.bz2' EIGEN_URL='http://bitbucket.org/eigen/eigen/get/3.2.9.tar.bz2'
echo "-- deploying Eigen source..." echo "-- deploying Eigen source..."
wget ${EIGEN_URL} --no-check-certificate wget ${EIGEN_URL} --no-check-certificate

View File

@ -1,19 +1,16 @@
AC_PREREQ([2.63]) AC_PREREQ([2.63])
AC_INIT([Grid], [0.7.0], [https://github.com/paboyle/Grid], [Grid]) AC_INIT([Grid], [0.6.0], [https://github.com/paboyle/Grid], [Grid])
AC_CANONICAL_BUILD AC_CANONICAL_BUILD
AC_CANONICAL_HOST AC_CANONICAL_HOST
AC_CANONICAL_TARGET AC_CANONICAL_TARGET
AM_INIT_AUTOMAKE([subdir-objects 1.13]) AM_INIT_AUTOMAKE(subdir-objects)
AM_EXTRA_RECURSIVE_TARGETS([tests bench])
AC_CONFIG_MACRO_DIR([m4]) AC_CONFIG_MACRO_DIR([m4])
AC_CONFIG_SRCDIR([lib/Grid.h]) AC_CONFIG_SRCDIR([lib/Grid.h])
AC_CONFIG_HEADERS([lib/Config.h],[sed -i 's|PACKAGE_|GRID_|' lib/Config.h]) AC_CONFIG_HEADERS([lib/Config.h],[sed -i 's|PACKAGE_|GRID_|' lib/Config.h])
m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])]) m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])])
################ Get git info
#AC_REVISION([m4_esyscmd_s([./scripts/configure.commit])])
############### Checks for programs ############### Checks for programs
CXXFLAGS="-O3 $CXXFLAGS"
AC_PROG_CXX AC_PROG_CXX
AC_PROG_RANLIB AC_PROG_RANLIB
@ -27,15 +24,12 @@ AX_GXX_VERSION
AC_DEFINE_UNQUOTED([GXX_VERSION],["$GXX_VERSION"], AC_DEFINE_UNQUOTED([GXX_VERSION],["$GXX_VERSION"],
[version of g++ that will compile the code]) [version of g++ that will compile the code])
CXXFLAGS="-g $CXXFLAGS"
############### Checks for typedefs, structures, and compiler characteristics ############### Checks for typedefs, structures, and compiler characteristics
AC_TYPE_SIZE_T AC_TYPE_SIZE_T
AC_TYPE_UINT32_T AC_TYPE_UINT32_T
AC_TYPE_UINT64_T AC_TYPE_UINT64_T
############### OpenMP ############### OpenMP
AC_OPENMP AC_OPENMP
ac_openmp=no ac_openmp=no
if test "${OPENMP_CXXFLAGS}X" != "X"; then if test "${OPENMP_CXXFLAGS}X" != "X"; then
@ -66,23 +60,16 @@ AC_ARG_WITH([mpfr],
[AM_CXXFLAGS="-I$with_mpfr/include $AM_CXXFLAGS"] [AM_CXXFLAGS="-I$with_mpfr/include $AM_CXXFLAGS"]
[AM_LDFLAGS="-L$with_mpfr/lib $AM_LDFLAGS"]) [AM_LDFLAGS="-L$with_mpfr/lib $AM_LDFLAGS"])
############### FFTW3 ############### FFTW3
AC_ARG_WITH([fftw], AC_ARG_WITH([fftw],
[AS_HELP_STRING([--with-fftw=prefix], [AS_HELP_STRING([--with-fftw=prefix],
[try this for a non-standard install prefix of the FFTW3 library])], [try this for a non-standard install prefix of the FFTW3 library])],
[AM_CXXFLAGS="-I$with_fftw/include $AM_CXXFLAGS"] [AM_CXXFLAGS="-I$with_fftw/include $AM_CXXFLAGS"]
[AM_LDFLAGS="-L$with_fftw/lib $AM_LDFLAGS"]) [AM_LDFLAGS="-L$with_fftw/lib $AM_LDFLAGS"])
############### LIME ############### lapack
AC_ARG_WITH([lime],
[AS_HELP_STRING([--with-lime=prefix],
[try this for a non-standard install prefix of the LIME library])],
[AM_CXXFLAGS="-I$with_lime/include $AM_CXXFLAGS"]
[AM_LDFLAGS="-L$with_lime/lib $AM_LDFLAGS"])
############### lapack
AC_ARG_ENABLE([lapack], AC_ARG_ENABLE([lapack],
[AC_HELP_STRING([--enable-lapack=yes|no|prefix], [enable LAPACK])], [AC_HELP_STRING([--enable-lapack=yes|no|prefix], [enable LAPACK])],
[ac_LAPACK=${enable_lapack}], [ac_LAPACK=no]) [ac_LAPACK=${enable_lapack}], [ac_LAPACK=no])
case ${ac_LAPACK} in case ${ac_LAPACK} in
@ -98,7 +85,7 @@ esac
############### FP16 conversions ############### FP16 conversions
AC_ARG_ENABLE([sfw-fp16], AC_ARG_ENABLE([sfw-fp16],
[AC_HELP_STRING([--enable-sfw-fp16=yes|no], [enable software fp16 comms])], [AC_HELP_STRING([--enable-sfw-fp16=yes|no], [enable software fp16 comms])],
[ac_SFW_FP16=${enable_sfw_fp16}], [ac_SFW_FP16=yes]) [ac_SFW_FP16=${enable_sfw_fp16}], [ac_SFW_FP16=yes])
case ${ac_SFW_FP16} in case ${ac_SFW_FP16} in
yes) yes)
@ -133,7 +120,7 @@ AC_ARG_WITH([hdf5],
############### first-touch ############### first-touch
AC_ARG_ENABLE([numa], AC_ARG_ENABLE([numa],
[AC_HELP_STRING([--enable-numa=yes|no|prefix], [enable first touch numa opt])], [AC_HELP_STRING([--enable-numa=yes|no|prefix], [enable first touch numa opt])],
[ac_NUMA=${enable_NUMA}],[ac_NUMA=no]) [ac_NUMA=${enable_NUMA}],[ac_NUMA=no])
case ${ac_NUMA} in case ${ac_NUMA} in
@ -159,8 +146,8 @@ if test "${ac_MKL}x" != "nox"; then
fi fi
AC_SEARCH_LIBS([__gmpf_init], [gmp], AC_SEARCH_LIBS([__gmpf_init], [gmp],
[AC_SEARCH_LIBS([mpfr_init], [mpfr], [AC_SEARCH_LIBS([mpfr_init], [mpfr],
[AC_DEFINE([HAVE_LIBMPFR], [1], [AC_DEFINE([HAVE_LIBMPFR], [1],
[Define to 1 if you have the `MPFR' library])] [Define to 1 if you have the `MPFR' library])]
[have_mpfr=true], [AC_MSG_ERROR([MPFR library not found])])] [have_mpfr=true], [AC_MSG_ERROR([MPFR library not found])])]
[AC_DEFINE([HAVE_LIBGMP], [1], [Define to 1 if you have the `GMP' library])] [AC_DEFINE([HAVE_LIBGMP], [1], [Define to 1 if you have the `GMP' library])]
@ -169,7 +156,7 @@ AC_SEARCH_LIBS([__gmpf_init], [gmp],
if test "${ac_LAPACK}x" != "nox"; then if test "${ac_LAPACK}x" != "nox"; then
AC_SEARCH_LIBS([LAPACKE_sbdsdc], [lapack], [], AC_SEARCH_LIBS([LAPACKE_sbdsdc], [lapack], [],
[AC_MSG_ERROR("LAPACK enabled but library not found")]) [AC_MSG_ERROR("LAPACK enabled but library not found")])
fi fi
AC_SEARCH_LIBS([fftw_execute], [fftw3], AC_SEARCH_LIBS([fftw_execute], [fftw3],
[AC_SEARCH_LIBS([fftwf_execute], [fftw3f], [], [AC_SEARCH_LIBS([fftwf_execute], [fftw3f], [],
@ -177,18 +164,6 @@ AC_SEARCH_LIBS([fftw_execute], [fftw3],
[AC_DEFINE([HAVE_FFTW], [1], [Define to 1 if you have the `FFTW' library])] [AC_DEFINE([HAVE_FFTW], [1], [Define to 1 if you have the `FFTW' library])]
[have_fftw=true]) [have_fftw=true])
AC_SEARCH_LIBS([limeCreateReader], [lime],
[AC_DEFINE([HAVE_LIME], [1], [Define to 1 if you have the `LIME' library])]
[have_lime=true],
[AC_MSG_WARN(C-LIME library was not found in your system.
In order to use ILGG file format please install or provide the correct path to your installation
Info at: http://usqcd.jlab.org/usqcd-docs/c-lime/)])
AC_SEARCH_LIBS([crc32], [z],
[AC_DEFINE([HAVE_ZLIB], [1], [Define to 1 if you have the `LIBZ' library])]
[have_zlib=true],
[AC_MSG_ERROR(zlib library was not found in your system.)])
AC_SEARCH_LIBS([H5Fopen], [hdf5_cpp], AC_SEARCH_LIBS([H5Fopen], [hdf5_cpp],
[AC_DEFINE([HAVE_HDF5], [1], [Define to 1 if you have the `HDF5' library])] [AC_DEFINE([HAVE_HDF5], [1], [Define to 1 if you have the `HDF5' library])]
[have_hdf5=true] [have_hdf5=true]
@ -341,7 +316,7 @@ case ${ac_COMMS} in
comms_type='shmem' comms_type='shmem'
;; ;;
*) *)
AC_MSG_ERROR([${ac_COMMS} unsupported --enable-comms option]); AC_MSG_ERROR([${ac_COMMS} unsupported --enable-comms option]);
;; ;;
esac esac
case ${ac_COMMS} in case ${ac_COMMS} in
@ -378,7 +353,7 @@ case ${ac_RNG} in
AC_DEFINE([RNG_SITMO],[1],[RNG_SITMO] ) AC_DEFINE([RNG_SITMO],[1],[RNG_SITMO] )
;; ;;
*) *)
AC_MSG_ERROR([${ac_RNG} unsupported --enable-rng option]); AC_MSG_ERROR([${ac_RNG} unsupported --enable-rng option]);
;; ;;
esac esac
@ -395,7 +370,7 @@ case ${ac_TIMERS} in
AC_DEFINE([TIMERS_OFF],[1],[TIMERS_OFF] ) AC_DEFINE([TIMERS_OFF],[1],[TIMERS_OFF] )
;; ;;
*) *)
AC_MSG_ERROR([${ac_TIMERS} unsupported --enable-timers option]); AC_MSG_ERROR([${ac_TIMERS} unsupported --enable-timers option]);
;; ;;
esac esac
@ -407,7 +382,7 @@ case ${ac_CHROMA} in
yes|no) yes|no)
;; ;;
*) *)
AC_MSG_ERROR([${ac_CHROMA} unsupported --enable-chroma option]); AC_MSG_ERROR([${ac_CHROMA} unsupported --enable-chroma option]);
;; ;;
esac esac
@ -428,65 +403,12 @@ DX_INIT_DOXYGEN([$PACKAGE_NAME], [doxygen.cfg])
############### Ouput ############### Ouput
cwd=`pwd -P`; cd ${srcdir}; abs_srcdir=`pwd -P`; cd ${cwd} cwd=`pwd -P`; cd ${srcdir}; abs_srcdir=`pwd -P`; cd ${cwd}
GRID_CXXFLAGS="$AM_CXXFLAGS $CXXFLAGS"
GRID_LDFLAGS="$AM_LDFLAGS $LDFLAGS"
GRID_LIBS=$LIBS
GRID_SHORT_SHA=`git rev-parse --short HEAD`
GRID_SHA=`git rev-parse HEAD`
GRID_BRANCH=`git rev-parse --abbrev-ref HEAD`
AM_CXXFLAGS="-I${abs_srcdir}/include $AM_CXXFLAGS" AM_CXXFLAGS="-I${abs_srcdir}/include $AM_CXXFLAGS"
AM_CFLAGS="-I${abs_srcdir}/include $AM_CFLAGS" AM_CFLAGS="-I${abs_srcdir}/include $AM_CFLAGS"
AM_LDFLAGS="-L${cwd}/lib $AM_LDFLAGS" AM_LDFLAGS="-L${cwd}/lib $AM_LDFLAGS"
AC_SUBST([AM_CFLAGS]) AC_SUBST([AM_CFLAGS])
AC_SUBST([AM_CXXFLAGS]) AC_SUBST([AM_CXXFLAGS])
AC_SUBST([AM_LDFLAGS]) AC_SUBST([AM_LDFLAGS])
AC_SUBST([GRID_CXXFLAGS])
AC_SUBST([GRID_LDFLAGS])
AC_SUBST([GRID_LIBS])
AC_SUBST([GRID_SHA])
AC_SUBST([GRID_BRANCH])
git_commit=`cd $srcdir && ./scripts/configure.commit`
echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Summary of configuration for $PACKAGE v$VERSION
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
----- GIT VERSION -------------------------------------
$git_commit
----- PLATFORM ----------------------------------------
architecture (build) : $build_cpu
os (build) : $build_os
architecture (target) : $target_cpu
os (target) : $target_os
compiler vendor : ${ax_cv_cxx_compiler_vendor}
compiler version : ${ax_cv_gxx_version}
----- BUILD OPTIONS -----------------------------------
SIMD : ${ac_SIMD}${SIMD_GEN_WIDTH_MSG}
Threading : ${ac_openmp}
Communications type : ${comms_type}
Default precision : ${ac_PRECISION}
Software FP16 conversion : ${ac_SFW_FP16}
RNG choice : ${ac_RNG}
GMP : `if test "x$have_gmp" = xtrue; then echo yes; else echo no; fi`
LAPACK : ${ac_LAPACK}
FFTW : `if test "x$have_fftw" = xtrue; then echo yes; else echo no; fi`
LIME (ILDG support) : `if test "x$have_lime" = xtrue; then echo yes; else echo no; fi`
HDF5 : `if test "x$have_hdf5" = xtrue; then echo yes; else echo no; fi`
build DOXYGEN documentation : `if test "$DX_FLAG_doc" = '1'; then echo yes; else echo no; fi`
----- BUILD FLAGS -------------------------------------
CXXFLAGS:
`echo ${AM_CXXFLAGS} ${CXXFLAGS} | tr ' ' '\n' | sed 's/^-/ -/g'`
LDFLAGS:
`echo ${AM_LDFLAGS} ${LDFLAGS} | tr ' ' '\n' | sed 's/^-/ -/g'`
LIBS:
`echo ${LIBS} | tr ' ' '\n' | sed 's/^-/ -/g'`
-------------------------------------------------------" > grid.configure.summary
GRID_SUMMARY="`cat grid.configure.summary`"
AM_SUBST_NOTMAKE([GRID_SUMMARY])
AC_SUBST([GRID_SUMMARY])
AC_CONFIG_FILES([grid-config], [chmod +x grid-config])
AC_CONFIG_FILES(Makefile) AC_CONFIG_FILES(Makefile)
AC_CONFIG_FILES(lib/Makefile) AC_CONFIG_FILES(lib/Makefile)
AC_CONFIG_FILES(tests/Makefile) AC_CONFIG_FILES(tests/Makefile)
@ -497,7 +419,6 @@ AC_CONFIG_FILES(tests/forces/Makefile)
AC_CONFIG_FILES(tests/hadrons/Makefile) AC_CONFIG_FILES(tests/hadrons/Makefile)
AC_CONFIG_FILES(tests/hmc/Makefile) AC_CONFIG_FILES(tests/hmc/Makefile)
AC_CONFIG_FILES(tests/solver/Makefile) AC_CONFIG_FILES(tests/solver/Makefile)
AC_CONFIG_FILES(tests/smearing/Makefile)
AC_CONFIG_FILES(tests/qdpxx/Makefile) AC_CONFIG_FILES(tests/qdpxx/Makefile)
AC_CONFIG_FILES(tests/testu01/Makefile) AC_CONFIG_FILES(tests/testu01/Makefile)
AC_CONFIG_FILES(benchmarks/Makefile) AC_CONFIG_FILES(benchmarks/Makefile)
@ -505,7 +426,36 @@ AC_CONFIG_FILES(extras/Makefile)
AC_CONFIG_FILES(extras/Hadrons/Makefile) AC_CONFIG_FILES(extras/Hadrons/Makefile)
AC_OUTPUT AC_OUTPUT
echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Summary of configuration for $PACKAGE v$VERSION
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
----- PLATFORM ----------------------------------------
architecture (build) : $build_cpu
os (build) : $build_os
architecture (target) : $target_cpu
os (target) : $target_os
compiler vendor : ${ax_cv_cxx_compiler_vendor}
compiler version : ${ax_cv_gxx_version}
----- BUILD OPTIONS -----------------------------------
SIMD : ${ac_SIMD}${SIMD_GEN_WIDTH_MSG}
Threading : ${ac_openmp}
Communications type : ${comms_type}
Default precision : ${ac_PRECISION}
Software FP16 conversion : ${ac_SFW_FP16}
RNG choice : ${ac_RNG}
GMP : `if test "x$have_gmp" = xtrue; then echo yes; else echo no; fi`
LAPACK : ${ac_LAPACK}
FFTW : `if test "x$have_fftw" = xtrue; then echo yes; else echo no; fi`
HDF5 : `if test "x$have_hdf5" = xtrue; then echo yes; else echo no; fi`
build DOXYGEN documentation : `if test "$DX_FLAG_doc" = '1'; then echo yes; else echo no; fi`
----- BUILD FLAGS -------------------------------------
CXXFLAGS:
`echo ${AM_CXXFLAGS} ${CXXFLAGS} | tr ' ' '\n' | sed 's/^-/ -/g'`
LDFLAGS:
`echo ${AM_LDFLAGS} ${LDFLAGS} | tr ' ' '\n' | sed 's/^-/ -/g'`
LIBS:
`echo ${LIBS} | tr ' ' '\n' | sed 's/^-/ -/g'`
-------------------------------------------------------" > config.summary
echo "" echo ""
cat grid.configure.summary cat config.summary
echo "" echo ""

View File

@ -162,8 +162,7 @@ void Application::saveParameterFile(const std::string parameterFileName)
sizeString((size)*locVol_) << " (" << sizeString(size) << "/site)" sizeString((size)*locVol_) << " (" << sizeString(size) << "/site)"
#define DEFINE_MEMPEAK \ #define DEFINE_MEMPEAK \
GeneticScheduler<unsigned int>::ObjFunc memPeak = \ auto memPeak = [this](const std::vector<unsigned int> &program)\
[this](const std::vector<unsigned int> &program)\
{\ {\
unsigned int memPeak;\ unsigned int memPeak;\
bool msg;\ bool msg;\

View File

@ -145,15 +145,6 @@ std::string typeName(void)
return typeName(typeIdPt<T>()); return typeName(typeIdPt<T>());
} }
// default writers/readers
#ifdef HAVE_HDF5
typedef Hdf5Reader CorrReader;
typedef Hdf5Writer CorrWriter;
#else
typedef XmlReader CorrReader;
typedef XmlWriter CorrWriter;
#endif
END_HADRONS_NAMESPACE END_HADRONS_NAMESPACE
#endif // Hadrons_Global_hpp_ #endif // Hadrons_Global_hpp_

View File

@ -29,20 +29,12 @@ See the full license in the file "LICENSE" in the top level distribution directo
#include <Grid/Hadrons/Modules/MAction/DWF.hpp> #include <Grid/Hadrons/Modules/MAction/DWF.hpp>
#include <Grid/Hadrons/Modules/MAction/Wilson.hpp> #include <Grid/Hadrons/Modules/MAction/Wilson.hpp>
#include <Grid/Hadrons/Modules/MContraction/Baryon.hpp> #include <Grid/Hadrons/Modules/MContraction/Baryon.hpp>
#include <Grid/Hadrons/Modules/MContraction/DiscLoop.hpp>
#include <Grid/Hadrons/Modules/MContraction/Gamma3pt.hpp>
#include <Grid/Hadrons/Modules/MContraction/Meson.hpp> #include <Grid/Hadrons/Modules/MContraction/Meson.hpp>
#include <Grid/Hadrons/Modules/MContraction/WeakHamiltonian.hpp>
#include <Grid/Hadrons/Modules/MContraction/WeakHamiltonianEye.hpp>
#include <Grid/Hadrons/Modules/MContraction/WeakHamiltonianNonEye.hpp>
#include <Grid/Hadrons/Modules/MContraction/WeakNeutral4ptDisc.hpp>
#include <Grid/Hadrons/Modules/MGauge/Load.hpp> #include <Grid/Hadrons/Modules/MGauge/Load.hpp>
#include <Grid/Hadrons/Modules/MGauge/Random.hpp> #include <Grid/Hadrons/Modules/MGauge/Random.hpp>
#include <Grid/Hadrons/Modules/MGauge/Unit.hpp> #include <Grid/Hadrons/Modules/MGauge/Unit.hpp>
#include <Grid/Hadrons/Modules/MLoop/NoiseLoop.hpp>
#include <Grid/Hadrons/Modules/MSolver/RBPrecCG.hpp> #include <Grid/Hadrons/Modules/MSolver/RBPrecCG.hpp>
#include <Grid/Hadrons/Modules/MSource/Point.hpp> #include <Grid/Hadrons/Modules/MSource/Point.hpp>
#include <Grid/Hadrons/Modules/MSource/SeqGamma.hpp> #include <Grid/Hadrons/Modules/MSource/SeqGamma.hpp>
#include <Grid/Hadrons/Modules/MSource/Wall.hpp>
#include <Grid/Hadrons/Modules/MSource/Z2.hpp> #include <Grid/Hadrons/Modules/MSource/Z2.hpp>
#include <Grid/Hadrons/Modules/Quark.hpp> #include <Grid/Hadrons/Modules/Quark.hpp>

View File

@ -48,8 +48,7 @@ public:
std::string, gauge, std::string, gauge,
unsigned int, Ls, unsigned int, Ls,
double , mass, double , mass,
double , M5, double , M5);
std::string , boundary);
}; };
template <typename FImpl> template <typename FImpl>
@ -117,19 +116,14 @@ void TDWF<FImpl>::execute(void)
<< par().mass << ", M5= " << par().M5 << " and Ls= " << par().mass << ", M5= " << par().M5 << " and Ls= "
<< par().Ls << " using gauge field '" << par().gauge << "'" << par().Ls << " using gauge field '" << par().gauge << "'"
<< std::endl; << std::endl;
LOG(Message) << "Fermion boundary conditions: " << par().boundary
<< std::endl;
env().createGrid(par().Ls); env().createGrid(par().Ls);
auto &U = *env().template getObject<LatticeGaugeField>(par().gauge); auto &U = *env().template getObject<LatticeGaugeField>(par().gauge);
auto &g4 = *env().getGrid(); auto &g4 = *env().getGrid();
auto &grb4 = *env().getRbGrid(); auto &grb4 = *env().getRbGrid();
auto &g5 = *env().getGrid(par().Ls); auto &g5 = *env().getGrid(par().Ls);
auto &grb5 = *env().getRbGrid(par().Ls); auto &grb5 = *env().getRbGrid(par().Ls);
std::vector<Complex> boundary = strToVec<Complex>(par().boundary);
typename DomainWallFermion<FImpl>::ImplParams implParams(boundary);
FMat *fMatPt = new DomainWallFermion<FImpl>(U, g5, grb5, g4, grb4, FMat *fMatPt = new DomainWallFermion<FImpl>(U, g5, grb5, g4, grb4,
par().mass, par().M5, par().mass, par().M5);
implParams);
env().setObject(getName(), fMatPt); env().setObject(getName(), fMatPt);
} }

View File

@ -46,8 +46,7 @@ class WilsonPar: Serializable
public: public:
GRID_SERIALIZABLE_CLASS_MEMBERS(WilsonPar, GRID_SERIALIZABLE_CLASS_MEMBERS(WilsonPar,
std::string, gauge, std::string, gauge,
double , mass, double , mass);
std::string, boundary);
}; };
template <typename FImpl> template <typename FImpl>
@ -113,15 +112,10 @@ void TWilson<FImpl>::execute()
{ {
LOG(Message) << "Setting up TWilson fermion matrix with m= " << par().mass LOG(Message) << "Setting up TWilson fermion matrix with m= " << par().mass
<< " using gauge field '" << par().gauge << "'" << std::endl; << " using gauge field '" << par().gauge << "'" << std::endl;
LOG(Message) << "Fermion boundary conditions: " << par().boundary
<< std::endl;
auto &U = *env().template getObject<LatticeGaugeField>(par().gauge); auto &U = *env().template getObject<LatticeGaugeField>(par().gauge);
auto &grid = *env().getGrid(); auto &grid = *env().getGrid();
auto &gridRb = *env().getRbGrid(); auto &gridRb = *env().getRbGrid();
std::vector<Complex> boundary = strToVec<Complex>(par().boundary); FMat *fMatPt = new WilsonFermion<FImpl>(U, grid, gridRb, par().mass);
typename WilsonFermion<FImpl>::ImplParams implParams(boundary);
FMat *fMatPt = new WilsonFermion<FImpl>(U, grid, gridRb, par().mass,
implParams);
env().setObject(getName(), fMatPt); env().setObject(getName(), fMatPt);
} }

View File

@ -112,7 +112,7 @@ void TBaryon<FImpl1, FImpl2, FImpl3>::execute(void)
<< " quarks '" << par().q1 << "', '" << par().q2 << "', and '" << " quarks '" << par().q1 << "', '" << par().q2 << "', and '"
<< par().q3 << "'" << std::endl; << par().q3 << "'" << std::endl;
CorrWriter writer(par().output); XmlWriter writer(par().output);
PropagatorField1 &q1 = *env().template getObject<PropagatorField1>(par().q1); PropagatorField1 &q1 = *env().template getObject<PropagatorField1>(par().q1);
PropagatorField2 &q2 = *env().template getObject<PropagatorField2>(par().q2); PropagatorField2 &q2 = *env().template getObject<PropagatorField2>(par().q2);
PropagatorField3 &q3 = *env().template getObject<PropagatorField3>(par().q2); PropagatorField3 &q3 = *env().template getObject<PropagatorField3>(par().q2);

View File

@ -1,144 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Modules/MContraction/DiscLoop.hpp
Copyright (C) 2017
Author: Andrew Lawson <andrew.lawson1991@gmail.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef Hadrons_DiscLoop_hpp_
#define Hadrons_DiscLoop_hpp_
#include <Grid/Hadrons/Global.hpp>
#include <Grid/Hadrons/Module.hpp>
#include <Grid/Hadrons/ModuleFactory.hpp>
BEGIN_HADRONS_NAMESPACE
/******************************************************************************
* DiscLoop *
******************************************************************************/
BEGIN_MODULE_NAMESPACE(MContraction)
class DiscLoopPar: Serializable
{
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(DiscLoopPar,
std::string, q_loop,
Gamma::Algebra, gamma,
std::string, output);
};
template <typename FImpl>
class TDiscLoop: public Module<DiscLoopPar>
{
TYPE_ALIASES(FImpl,);
class Result: Serializable
{
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(Result,
Gamma::Algebra, gamma,
std::vector<Complex>, corr);
};
public:
// constructor
TDiscLoop(const std::string name);
// destructor
virtual ~TDiscLoop(void) = default;
// dependency relation
virtual std::vector<std::string> getInput(void);
virtual std::vector<std::string> getOutput(void);
// setup
virtual void setup(void);
// execution
virtual void execute(void);
};
MODULE_REGISTER_NS(DiscLoop, TDiscLoop<FIMPL>, MContraction);
/******************************************************************************
* TDiscLoop implementation *
******************************************************************************/
// constructor /////////////////////////////////////////////////////////////////
template <typename FImpl>
TDiscLoop<FImpl>::TDiscLoop(const std::string name)
: Module<DiscLoopPar>(name)
{}
// dependencies/products ///////////////////////////////////////////////////////
template <typename FImpl>
std::vector<std::string> TDiscLoop<FImpl>::getInput(void)
{
std::vector<std::string> in = {par().q_loop};
return in;
}
template <typename FImpl>
std::vector<std::string> TDiscLoop<FImpl>::getOutput(void)
{
std::vector<std::string> out = {getName()};
return out;
}
// setup ///////////////////////////////////////////////////////////////////////
template <typename FImpl>
void TDiscLoop<FImpl>::setup(void)
{
}
// execution ///////////////////////////////////////////////////////////////////
template <typename FImpl>
void TDiscLoop<FImpl>::execute(void)
{
LOG(Message) << "Computing disconnected loop contraction '" << getName()
<< "' using '" << par().q_loop << "' with " << par().gamma
<< " insertion." << std::endl;
CorrWriter writer(par().output);
PropagatorField &q_loop = *env().template getObject<PropagatorField>(par().q_loop);
LatticeComplex c(env().getGrid());
Gamma gamma(par().gamma);
std::vector<TComplex> buf;
Result result;
c = trace(gamma*q_loop);
sliceSum(c, buf, Tp);
result.gamma = par().gamma;
result.corr.resize(buf.size());
for (unsigned int t = 0; t < buf.size(); ++t)
{
result.corr[t] = TensorRemove(buf[t]);
}
write(writer, "disc", result);
}
END_MODULE_NAMESPACE
END_HADRONS_NAMESPACE
#endif // Hadrons_DiscLoop_hpp_

View File

@ -1,170 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Modules/MContraction/Gamma3pt.hpp
Copyright (C) 2017
Author: Andrew Lawson <andrew.lawson1991@gmail.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef Hadrons_Gamma3pt_hpp_
#define Hadrons_Gamma3pt_hpp_
#include <Grid/Hadrons/Global.hpp>
#include <Grid/Hadrons/Module.hpp>
#include <Grid/Hadrons/ModuleFactory.hpp>
BEGIN_HADRONS_NAMESPACE
/*
* 3pt contraction with gamma matrix insertion.
*
* Schematic:
*
* q2 q3
* /----<------*------<----¬
* / gamma \
* / \
* i * * f
* \ /
* \ /
* \----------->----------/
* q1
*
* trace(g5*q1*adj(q2)*g5*gamma*q3)
*/
/******************************************************************************
* Gamma3pt *
******************************************************************************/
BEGIN_MODULE_NAMESPACE(MContraction)
class Gamma3ptPar: Serializable
{
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(Gamma3ptPar,
std::string, q1,
std::string, q2,
std::string, q3,
Gamma::Algebra, gamma,
std::string, output);
};
template <typename FImpl1, typename FImpl2, typename FImpl3>
class TGamma3pt: public Module<Gamma3ptPar>
{
TYPE_ALIASES(FImpl1, 1);
TYPE_ALIASES(FImpl2, 2);
TYPE_ALIASES(FImpl3, 3);
class Result: Serializable
{
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(Result,
Gamma::Algebra, gamma,
std::vector<Complex>, corr);
};
public:
// constructor
TGamma3pt(const std::string name);
// destructor
virtual ~TGamma3pt(void) = default;
// dependency relation
virtual std::vector<std::string> getInput(void);
virtual std::vector<std::string> getOutput(void);
// setup
virtual void setup(void);
// execution
virtual void execute(void);
};
MODULE_REGISTER_NS(Gamma3pt, ARG(TGamma3pt<FIMPL, FIMPL, FIMPL>), MContraction);
/******************************************************************************
* TGamma3pt implementation *
******************************************************************************/
// constructor /////////////////////////////////////////////////////////////////
template <typename FImpl1, typename FImpl2, typename FImpl3>
TGamma3pt<FImpl1, FImpl2, FImpl3>::TGamma3pt(const std::string name)
: Module<Gamma3ptPar>(name)
{}
// dependencies/products ///////////////////////////////////////////////////////
template <typename FImpl1, typename FImpl2, typename FImpl3>
std::vector<std::string> TGamma3pt<FImpl1, FImpl2, FImpl3>::getInput(void)
{
std::vector<std::string> in = {par().q1, par().q2, par().q3};
return in;
}
template <typename FImpl1, typename FImpl2, typename FImpl3>
std::vector<std::string> TGamma3pt<FImpl1, FImpl2, FImpl3>::getOutput(void)
{
std::vector<std::string> out = {getName()};
return out;
}
// setup ///////////////////////////////////////////////////////////////////////
template <typename FImpl1, typename FImpl2, typename FImpl3>
void TGamma3pt<FImpl1, FImpl2, FImpl3>::setup(void)
{
}
// execution ///////////////////////////////////////////////////////////////////
template <typename FImpl1, typename FImpl2, typename FImpl3>
void TGamma3pt<FImpl1, FImpl2, FImpl3>::execute(void)
{
LOG(Message) << "Computing 3pt contractions '" << getName() << "' using"
<< " quarks '" << par().q1 << "', '" << par().q2 << "' and '"
<< par().q3 << "', with " << par().gamma << " insertion."
<< std::endl;
CorrWriter writer(par().output);
PropagatorField1 &q1 = *env().template getObject<PropagatorField1>(par().q1);
PropagatorField2 &q2 = *env().template getObject<PropagatorField2>(par().q2);
PropagatorField3 &q3 = *env().template getObject<PropagatorField3>(par().q3);
LatticeComplex c(env().getGrid());
Gamma g5(Gamma::Algebra::Gamma5);
Gamma gamma(par().gamma);
std::vector<TComplex> buf;
Result result;
c = trace(g5*q1*adj(q2)*(g5*gamma)*q3);
sliceSum(c, buf, Tp);
result.gamma = par().gamma;
result.corr.resize(buf.size());
for (unsigned int t = 0; t < buf.size(); ++t)
{
result.corr[t] = TensorRemove(buf[t]);
}
write(writer, "gamma3pt", result);
}
END_MODULE_NAMESPACE
END_HADRONS_NAMESPACE
#endif // Hadrons_Gamma3pt_hpp_

View File

@ -6,10 +6,8 @@ Source file: extras/Hadrons/Modules/MContraction/Meson.hpp
Copyright (C) 2015 Copyright (C) 2015
Copyright (C) 2016 Copyright (C) 2016
Copyright (C) 2017
Author: Antonin Portelli <antonin.portelli@me.com> Author: Antonin Portelli <antonin.portelli@me.com>
Andrew Lawson <andrew.lawson1991@gmail.com>
This program is free software; you can redistribute it and/or modify This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@ -38,39 +36,20 @@ See the full license in the file "LICENSE" in the top level distribution directo
BEGIN_HADRONS_NAMESPACE BEGIN_HADRONS_NAMESPACE
/*
Meson contractions
-----------------------------
* options:
- q1: input propagator 1 (string)
- q2: input propagator 2 (string)
- gammas: gamma products to insert at sink & source, pairs of gamma matrices
(space-separated strings) in angled brackets (i.e. <g_sink g_src>),
in a sequence (e.g. "<Gamma5 Gamma5><Gamma5 GammaT>").
Special values: "all" - perform all possible contractions.
- mom: momentum insertion, space-separated float sequence (e.g ".1 .2 1. 0."),
given as multiples of (2*pi) / L.
*/
/****************************************************************************** /******************************************************************************
* TMeson * * TMeson *
******************************************************************************/ ******************************************************************************/
BEGIN_MODULE_NAMESPACE(MContraction) BEGIN_MODULE_NAMESPACE(MContraction)
typedef std::pair<Gamma::Algebra, Gamma::Algebra> GammaPair;
class MesonPar: Serializable class MesonPar: Serializable
{ {
public: public:
GRID_SERIALIZABLE_CLASS_MEMBERS(MesonPar, GRID_SERIALIZABLE_CLASS_MEMBERS(MesonPar,
std::string, q1, std::string, q1,
std::string, q2, std::string, q2,
std::string, gammas, std::string, output,
std::string, mom, Gamma::Algebra, gammaSource,
std::string, output); Gamma::Algebra, gammaSink);
}; };
template <typename FImpl1, typename FImpl2> template <typename FImpl1, typename FImpl2>
@ -82,10 +61,7 @@ public:
class Result: Serializable class Result: Serializable
{ {
public: public:
GRID_SERIALIZABLE_CLASS_MEMBERS(Result, GRID_SERIALIZABLE_CLASS_MEMBERS(Result, std::vector<Complex>, corr);
Gamma::Algebra, gamma_snk,
Gamma::Algebra, gamma_src,
std::vector<Complex>, corr);
}; };
public: public:
// constructor // constructor
@ -95,7 +71,6 @@ public:
// dependencies/products // dependencies/products
virtual std::vector<std::string> getInput(void); virtual std::vector<std::string> getInput(void);
virtual std::vector<std::string> getOutput(void); virtual std::vector<std::string> getOutput(void);
virtual void parseGammaString(std::vector<GammaPair> &gammaList);
// execution // execution
virtual void execute(void); virtual void execute(void);
}; };
@ -128,31 +103,6 @@ std::vector<std::string> TMeson<FImpl1, FImpl2>::getOutput(void)
return output; return output;
} }
template <typename FImpl1, typename FImpl2>
void TMeson<FImpl1, FImpl2>::parseGammaString(std::vector<GammaPair> &gammaList)
{
gammaList.clear();
// Determine gamma matrices to insert at source/sink.
if (par().gammas.compare("all") == 0)
{
// Do all contractions.
for (unsigned int i = 1; i < Gamma::nGamma; i += 2)
{
for (unsigned int j = 1; j < Gamma::nGamma; j += 2)
{
gammaList.push_back(std::make_pair((Gamma::Algebra)i,
(Gamma::Algebra)j));
}
}
}
else
{
// Parse individual contractions from input string.
gammaList = strToVec<GammaPair>(par().gammas);
}
}
// execution /////////////////////////////////////////////////////////////////// // execution ///////////////////////////////////////////////////////////////////
template <typename FImpl1, typename FImpl2> template <typename FImpl1, typename FImpl2>
void TMeson<FImpl1, FImpl2>::execute(void) void TMeson<FImpl1, FImpl2>::execute(void)
@ -161,44 +111,21 @@ void TMeson<FImpl1, FImpl2>::execute(void)
<< " quarks '" << par().q1 << "' and '" << par().q2 << "'" << " quarks '" << par().q1 << "' and '" << par().q2 << "'"
<< std::endl; << std::endl;
CorrWriter writer(par().output); XmlWriter writer(par().output);
PropagatorField1 &q1 = *env().template getObject<PropagatorField1>(par().q1); PropagatorField1 &q1 = *env().template getObject<PropagatorField1>(par().q1);
PropagatorField2 &q2 = *env().template getObject<PropagatorField2>(par().q2); PropagatorField2 &q2 = *env().template getObject<PropagatorField2>(par().q2);
LatticeComplex c(env().getGrid()); LatticeComplex c(env().getGrid());
Gamma g5(Gamma::Algebra::Gamma5); Gamma gSrc(par().gammaSource), gSnk(par().gammaSink);
std::vector<GammaPair> gammaList; Gamma g5(Gamma::Algebra::Gamma5);
std::vector<TComplex> buf; std::vector<TComplex> buf;
std::vector<Result> result; Result result;
std::vector<Real> p;
p = strToVec<Real>(par().mom);
LatticeComplex ph(env().getGrid()), coor(env().getGrid());
Complex i(0.0,1.0);
ph = zero;
for(unsigned int mu = 0; mu < env().getNd(); mu++)
{
LatticeCoordinate(coor, mu);
ph = ph + p[mu]*coor*((1./(env().getGrid()->_fdimensions[mu])));
}
ph = exp((Real)(2*M_PI)*i*ph);
parseGammaString(gammaList); c = trace(gSnk*q1*adj(gSrc)*g5*adj(q2)*g5);
sliceSum(c, buf, Tp);
result.resize(gammaList.size()); result.corr.resize(buf.size());
for (unsigned int i = 0; i < result.size(); ++i) for (unsigned int t = 0; t < buf.size(); ++t)
{ {
Gamma gSnk(gammaList[i].first); result.corr[t] = TensorRemove(buf[t]);
Gamma gSrc(gammaList[i].second);
c = trace((g5*gSnk)*q1*(adj(gSrc)*g5)*adj(q2))*ph;
sliceSum(c, buf, Tp);
result[i].gamma_snk = gammaList[i].first;
result[i].gamma_src = gammaList[i].second;
result[i].corr.resize(buf.size());
for (unsigned int t = 0; t < buf.size(); ++t)
{
result[i].corr[t] = TensorRemove(buf[t]);
}
} }
write(writer, "meson", result); write(writer, "meson", result);
} }

View File

@ -1,114 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Modules/MContraction/WeakHamiltonian.hpp
Copyright (C) 2017
Author: Andrew Lawson <andrew.lawson1991@gmail.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef Hadrons_WeakHamiltonian_hpp_
#define Hadrons_WeakHamiltonian_hpp_
#include <Grid/Hadrons/Global.hpp>
#include <Grid/Hadrons/Module.hpp>
#include <Grid/Hadrons/ModuleFactory.hpp>
BEGIN_HADRONS_NAMESPACE
/******************************************************************************
* WeakHamiltonian *
******************************************************************************/
BEGIN_MODULE_NAMESPACE(MContraction)
/*******************************************************************************
* Utilities for contractions involving the Weak Hamiltonian.
******************************************************************************/
//// Sum and store correlator.
#define MAKE_DIAG(exp, buf, res, n)\
sliceSum(exp, buf, Tp);\
res.name = (n);\
res.corr.resize(buf.size());\
for (unsigned int t = 0; t < buf.size(); ++t)\
{\
res.corr[t] = TensorRemove(buf[t]);\
}
//// Contraction of mu index: use 'mu' variable in exp.
#define SUM_MU(buf,exp)\
buf = zero;\
for (unsigned int mu = 0; mu < ndim; ++mu)\
{\
buf += exp;\
}
enum
{
i_V = 0,
i_A = 1,
n_i = 2
};
class WeakHamiltonianPar: Serializable
{
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(WeakHamiltonianPar,
std::string, q1,
std::string, q2,
std::string, q3,
std::string, q4,
std::string, output);
};
#define MAKE_WEAK_MODULE(modname)\
class T##modname: public Module<WeakHamiltonianPar>\
{\
public:\
TYPE_ALIASES(FIMPL,)\
class Result: Serializable\
{\
public:\
GRID_SERIALIZABLE_CLASS_MEMBERS(Result,\
std::string, name,\
std::vector<Complex>, corr);\
};\
public:\
/* constructor */ \
T##modname(const std::string name);\
/* destructor */ \
virtual ~T##modname(void) = default;\
/* dependency relation */ \
virtual std::vector<std::string> getInput(void);\
virtual std::vector<std::string> getOutput(void);\
/* setup */ \
virtual void setup(void);\
/* execution */ \
virtual void execute(void);\
std::vector<std::string> VA_label = {"V", "A"};\
};\
MODULE_REGISTER_NS(modname, T##modname, MContraction);
END_MODULE_NAMESPACE
END_HADRONS_NAMESPACE
#endif // Hadrons_WeakHamiltonian_hpp_

View File

@ -1,137 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Modules/MContraction/WeakHamiltonianEye.cc
Copyright (C) 2017
Author: Andrew Lawson <andrew.lawson1991@gmail.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Hadrons/Modules/MContraction/WeakHamiltonianEye.hpp>
using namespace Grid;
using namespace Hadrons;
using namespace MContraction;
/*
* Weak Hamiltonian current-current contractions, Eye-type.
*
* These contractions are generated by the Q1 and Q2 operators in the physical
* basis (see e.g. Fig 3 of arXiv:1507.03094).
*
* Schematics: q4 |
* /-<-¬ |
* / \ | q2 q3
* \ / | /----<------*------<----¬
* q2 \ / q3 | / /-*-¬ \
* /-----<-----* *-----<----¬ | / / \ \
* i * H_W * f | i * \ / q4 * f
* \ / | \ \->-/ /
* \ / | \ /
* \---------->---------/ | \----------->----------/
* q1 | q1
* |
* Saucer (S) | Eye (E)
*
* S: trace(q3*g5*q1*adj(q2)*g5*gL[mu][p_1]*q4*gL[mu][p_2])
* E: trace(q3*g5*q1*adj(q2)*g5*gL[mu][p_1])*trace(q4*gL[mu][p_2])
*/
/******************************************************************************
* TWeakHamiltonianEye implementation *
******************************************************************************/
// constructor /////////////////////////////////////////////////////////////////
TWeakHamiltonianEye::TWeakHamiltonianEye(const std::string name)
: Module<WeakHamiltonianPar>(name)
{}
// dependencies/products ///////////////////////////////////////////////////////
std::vector<std::string> TWeakHamiltonianEye::getInput(void)
{
std::vector<std::string> in = {par().q1, par().q2, par().q3, par().q4};
return in;
}
std::vector<std::string> TWeakHamiltonianEye::getOutput(void)
{
std::vector<std::string> out = {getName()};
return out;
}
// setup ///////////////////////////////////////////////////////////////////////
void TWeakHamiltonianEye::setup(void)
{
}
// execution ///////////////////////////////////////////////////////////////////
void TWeakHamiltonianEye::execute(void)
{
LOG(Message) << "Computing Weak Hamiltonian (Eye type) contractions '"
<< getName() << "' using quarks '" << par().q1 << "', '"
<< par().q2 << ", '" << par().q3 << "' and '" << par().q4
<< "'." << std::endl;
CorrWriter writer(par().output);
PropagatorField &q1 = *env().template getObject<PropagatorField>(par().q1);
PropagatorField &q2 = *env().template getObject<PropagatorField>(par().q2);
PropagatorField &q3 = *env().template getObject<PropagatorField>(par().q3);
PropagatorField &q4 = *env().template getObject<PropagatorField>(par().q4);
Gamma g5 = Gamma(Gamma::Algebra::Gamma5);
LatticeComplex expbuf(env().getGrid());
std::vector<TComplex> corrbuf;
std::vector<Result> result(n_eye_diag);
unsigned int ndim = env().getNd();
PropagatorField tmp1(env().getGrid());
LatticeComplex tmp2(env().getGrid());
std::vector<PropagatorField> S_body(ndim, tmp1);
std::vector<PropagatorField> S_loop(ndim, tmp1);
std::vector<LatticeComplex> E_body(ndim, tmp2);
std::vector<LatticeComplex> E_loop(ndim, tmp2);
// Setup for S-type contractions.
for (int mu = 0; mu < ndim; ++mu)
{
S_body[mu] = MAKE_SE_BODY(q1, q2, q3, GammaL(Gamma::gmu[mu]));
S_loop[mu] = MAKE_SE_LOOP(q4, GammaL(Gamma::gmu[mu]));
}
// Perform S-type contractions.
SUM_MU(expbuf, trace(S_body[mu]*S_loop[mu]))
MAKE_DIAG(expbuf, corrbuf, result[S_diag], "HW_S")
// Recycle sub-expressions for E-type contractions.
for (unsigned int mu = 0; mu < ndim; ++mu)
{
E_body[mu] = trace(S_body[mu]);
E_loop[mu] = trace(S_loop[mu]);
}
// Perform E-type contractions.
SUM_MU(expbuf, E_body[mu]*E_loop[mu])
MAKE_DIAG(expbuf, corrbuf, result[E_diag], "HW_E")
write(writer, "HW_Eye", result);
}

View File

@ -1,58 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Modules/MContraction/WeakHamiltonianEye.hpp
Copyright (C) 2017
Author: Andrew Lawson <andrew.lawson1991@gmail.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef Hadrons_WeakHamiltonianEye_hpp_
#define Hadrons_WeakHamiltonianEye_hpp_
#include <Grid/Hadrons/Modules/MContraction/WeakHamiltonian.hpp>
BEGIN_HADRONS_NAMESPACE
/******************************************************************************
* WeakHamiltonianEye *
******************************************************************************/
BEGIN_MODULE_NAMESPACE(MContraction)
enum
{
S_diag = 0,
E_diag = 1,
n_eye_diag = 2
};
// Saucer and Eye subdiagram contractions.
#define MAKE_SE_BODY(Q_1, Q_2, Q_3, gamma) (Q_3*g5*Q_1*adj(Q_2)*g5*gamma)
#define MAKE_SE_LOOP(Q_loop, gamma) (Q_loop*gamma)
MAKE_WEAK_MODULE(WeakHamiltonianEye)
END_MODULE_NAMESPACE
END_HADRONS_NAMESPACE
#endif // Hadrons_WeakHamiltonianEye_hpp_

View File

@ -1,139 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Modules/MContraction/WeakHamiltonianNonEye.cc
Copyright (C) 2017
Author: Andrew Lawson <andrew.lawson1991@gmail.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Hadrons/Modules/MContraction/WeakHamiltonianNonEye.hpp>
using namespace Grid;
using namespace Hadrons;
using namespace MContraction;
/*
* Weak Hamiltonian current-current contractions, Non-Eye-type.
*
* These contractions are generated by the Q1 and Q2 operators in the physical
* basis (see e.g. Fig 3 of arXiv:1507.03094).
*
* Schematic:
* q2 q3 | q2 q3
* /--<--¬ /--<--¬ | /--<--¬ /--<--¬
* / \ / \ | / \ / \
* / \ / \ | / \ / \
* / \ / \ | / \ / \
* i * * H_W * f | i * * * H_W * f
* \ * | | \ / \ /
* \ / \ / | \ / \ /
* \ / \ / | \ / \ /
* \ / \ / | \-->--/ \-->--/
* \-->--/ \-->--/ | q1 q4
* q1 q4 |
* Connected (C) | Wing (W)
*
* C: trace(q1*adj(q2)*g5*gL[mu]*q3*adj(q4)*g5*gL[mu])
* W: trace(q1*adj(q2)*g5*gL[mu])*trace(q3*adj(q4)*g5*gL[mu])
*
*/
/******************************************************************************
* TWeakHamiltonianNonEye implementation *
******************************************************************************/
// constructor /////////////////////////////////////////////////////////////////
TWeakHamiltonianNonEye::TWeakHamiltonianNonEye(const std::string name)
: Module<WeakHamiltonianPar>(name)
{}
// dependencies/products ///////////////////////////////////////////////////////
std::vector<std::string> TWeakHamiltonianNonEye::getInput(void)
{
std::vector<std::string> in = {par().q1, par().q2, par().q3, par().q4};
return in;
}
std::vector<std::string> TWeakHamiltonianNonEye::getOutput(void)
{
std::vector<std::string> out = {getName()};
return out;
}
// setup ///////////////////////////////////////////////////////////////////////
void TWeakHamiltonianNonEye::setup(void)
{
}
// execution ///////////////////////////////////////////////////////////////////
void TWeakHamiltonianNonEye::execute(void)
{
LOG(Message) << "Computing Weak Hamiltonian (Non-Eye type) contractions '"
<< getName() << "' using quarks '" << par().q1 << "', '"
<< par().q2 << ", '" << par().q3 << "' and '" << par().q4
<< "'." << std::endl;
CorrWriter writer(par().output);
PropagatorField &q1 = *env().template getObject<PropagatorField>(par().q1);
PropagatorField &q2 = *env().template getObject<PropagatorField>(par().q2);
PropagatorField &q3 = *env().template getObject<PropagatorField>(par().q3);
PropagatorField &q4 = *env().template getObject<PropagatorField>(par().q4);
Gamma g5 = Gamma(Gamma::Algebra::Gamma5);
LatticeComplex expbuf(env().getGrid());
std::vector<TComplex> corrbuf;
std::vector<Result> result(n_noneye_diag);
unsigned int ndim = env().getNd();
PropagatorField tmp1(env().getGrid());
LatticeComplex tmp2(env().getGrid());
std::vector<PropagatorField> C_i_side_loop(ndim, tmp1);
std::vector<PropagatorField> C_f_side_loop(ndim, tmp1);
std::vector<LatticeComplex> W_i_side_loop(ndim, tmp2);
std::vector<LatticeComplex> W_f_side_loop(ndim, tmp2);
// Setup for C-type contractions.
for (int mu = 0; mu < ndim; ++mu)
{
C_i_side_loop[mu] = MAKE_CW_SUBDIAG(q1, q2, GammaL(Gamma::gmu[mu]));
C_f_side_loop[mu] = MAKE_CW_SUBDIAG(q3, q4, GammaL(Gamma::gmu[mu]));
}
// Perform C-type contractions.
SUM_MU(expbuf, trace(C_i_side_loop[mu]*C_f_side_loop[mu]))
MAKE_DIAG(expbuf, corrbuf, result[C_diag], "HW_C")
// Recycle sub-expressions for W-type contractions.
for (unsigned int mu = 0; mu < ndim; ++mu)
{
W_i_side_loop[mu] = trace(C_i_side_loop[mu]);
W_f_side_loop[mu] = trace(C_f_side_loop[mu]);
}
// Perform W-type contractions.
SUM_MU(expbuf, W_i_side_loop[mu]*W_f_side_loop[mu])
MAKE_DIAG(expbuf, corrbuf, result[W_diag], "HW_W")
write(writer, "HW_NonEye", result);
}

View File

@ -1,57 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Modules/MContraction/WeakHamiltonianNonEye.hpp
Copyright (C) 2017
Author: Andrew Lawson <andrew.lawson1991@gmail.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef Hadrons_WeakHamiltonianNonEye_hpp_
#define Hadrons_WeakHamiltonianNonEye_hpp_
#include <Grid/Hadrons/Modules/MContraction/WeakHamiltonian.hpp>
BEGIN_HADRONS_NAMESPACE
/******************************************************************************
* WeakHamiltonianNonEye *
******************************************************************************/
BEGIN_MODULE_NAMESPACE(MContraction)
enum
{
W_diag = 0,
C_diag = 1,
n_noneye_diag = 2
};
// Wing and Connected subdiagram contractions
#define MAKE_CW_SUBDIAG(Q_1, Q_2, gamma) (Q_1*adj(Q_2)*g5*gamma)
MAKE_WEAK_MODULE(WeakHamiltonianNonEye)
END_MODULE_NAMESPACE
END_HADRONS_NAMESPACE
#endif // Hadrons_WeakHamiltonianNonEye_hpp_

View File

@ -1,135 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Modules/MContraction/WeakNeutral4ptDisc.cc
Copyright (C) 2017
Author: Andrew Lawson <andrew.lawson1991@gmail.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Hadrons/Modules/MContraction/WeakNeutral4ptDisc.hpp>
using namespace Grid;
using namespace Hadrons;
using namespace MContraction;
/*
* Weak Hamiltonian + current contractions, disconnected topology for neutral
* mesons.
*
* These contractions are generated by operators Q_1,...,10 of the dS=1 Weak
* Hamiltonian in the physical basis and an additional current J (see e.g.
* Fig 11 of arXiv:1507.03094).
*
* Schematic:
*
* q2 q4 q3
* /--<--¬ /---<--¬ /---<--¬
* / \ / \ / \
* i * * H_W | J * * f
* \ / \ / \ /
* \--->---/ \-------/ \------/
* q1
*
* options
* - q1: input propagator 1 (string)
* - q2: input propagator 2 (string)
* - q3: input propagator 3 (string), assumed to be sequential propagator
* - q4: input propagator 4 (string), assumed to be a loop
*
* type 1: trace(q1*adj(q2)*g5*gL[mu])*trace(loop*gL[mu])*trace(q3*g5)
* type 2: trace(q1*adj(q2)*g5*gL[mu]*loop*gL[mu])*trace(q3*g5)
*/
/*******************************************************************************
* TWeakNeutral4ptDisc implementation *
******************************************************************************/
// constructor /////////////////////////////////////////////////////////////////
TWeakNeutral4ptDisc::TWeakNeutral4ptDisc(const std::string name)
: Module<WeakHamiltonianPar>(name)
{}
// dependencies/products ///////////////////////////////////////////////////////
std::vector<std::string> TWeakNeutral4ptDisc::getInput(void)
{
std::vector<std::string> in = {par().q1, par().q2, par().q3, par().q4};
return in;
}
std::vector<std::string> TWeakNeutral4ptDisc::getOutput(void)
{
std::vector<std::string> out = {getName()};
return out;
}
// setup ///////////////////////////////////////////////////////////////////////
void TWeakNeutral4ptDisc::setup(void)
{
}
// execution ///////////////////////////////////////////////////////////////////
void TWeakNeutral4ptDisc::execute(void)
{
LOG(Message) << "Computing Weak Hamiltonian neutral disconnected contractions '"
<< getName() << "' using quarks '" << par().q1 << "', '"
<< par().q2 << ", '" << par().q3 << "' and '" << par().q4
<< "'." << std::endl;
CorrWriter writer(par().output);
PropagatorField &q1 = *env().template getObject<PropagatorField>(par().q1);
PropagatorField &q2 = *env().template getObject<PropagatorField>(par().q2);
PropagatorField &q3 = *env().template getObject<PropagatorField>(par().q3);
PropagatorField &q4 = *env().template getObject<PropagatorField>(par().q4);
Gamma g5 = Gamma(Gamma::Algebra::Gamma5);
LatticeComplex expbuf(env().getGrid());
std::vector<TComplex> corrbuf;
std::vector<Result> result(n_neut_disc_diag);
unsigned int ndim = env().getNd();
PropagatorField tmp(env().getGrid());
std::vector<PropagatorField> meson(ndim, tmp);
std::vector<PropagatorField> loop(ndim, tmp);
LatticeComplex curr(env().getGrid());
// Setup for type 1 contractions.
for (int mu = 0; mu < ndim; ++mu)
{
meson[mu] = MAKE_DISC_MESON(q1, q2, GammaL(Gamma::gmu[mu]));
loop[mu] = MAKE_DISC_LOOP(q4, GammaL(Gamma::gmu[mu]));
}
curr = MAKE_DISC_CURR(q3, GammaL(Gamma::Algebra::Gamma5));
// Perform type 1 contractions.
SUM_MU(expbuf, trace(meson[mu]*loop[mu]))
expbuf *= curr;
MAKE_DIAG(expbuf, corrbuf, result[neut_disc_1_diag], "HW_disc0_1")
// Perform type 2 contractions.
SUM_MU(expbuf, trace(meson[mu])*trace(loop[mu]))
expbuf *= curr;
MAKE_DIAG(expbuf, corrbuf, result[neut_disc_2_diag], "HW_disc0_2")
write(writer, "HW_disc0", result);
}

View File

@ -1,59 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Modules/MContraction/WeakNeutral4ptDisc.hpp
Copyright (C) 2017
Author: Andrew Lawson <andrew.lawson1991@gmail.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef Hadrons_WeakNeutral4ptDisc_hpp_
#define Hadrons_WeakNeutral4ptDisc_hpp_
#include <Grid/Hadrons/Modules/MContraction/WeakHamiltonian.hpp>
BEGIN_HADRONS_NAMESPACE
/******************************************************************************
* WeakNeutral4ptDisc *
******************************************************************************/
BEGIN_MODULE_NAMESPACE(MContraction)
enum
{
neut_disc_1_diag = 0,
neut_disc_2_diag = 1,
n_neut_disc_diag = 2
};
// Neutral 4pt disconnected subdiagram contractions.
#define MAKE_DISC_MESON(Q_1, Q_2, gamma) (Q_1*adj(Q_2)*g5*gamma)
#define MAKE_DISC_LOOP(Q_LOOP, gamma) (Q_LOOP*gamma)
#define MAKE_DISC_CURR(Q_c, gamma) (trace(Q_c*gamma))
MAKE_WEAK_MODULE(WeakNeutral4ptDisc)
END_MODULE_NAMESPACE
END_HADRONS_NAMESPACE
#endif // Hadrons_WeakNeutral4ptDisc_hpp_

View File

@ -65,7 +65,7 @@ void TLoad::setup(void)
// execution /////////////////////////////////////////////////////////////////// // execution ///////////////////////////////////////////////////////////////////
void TLoad::execute(void) void TLoad::execute(void)
{ {
FieldMetaData header; NerscField header;
std::string fileName = par().file + "." std::string fileName = par().file + "."
+ std::to_string(env().getTrajectory()); + std::to_string(env().getTrajectory());
@ -74,5 +74,5 @@ void TLoad::execute(void)
LatticeGaugeField &U = *env().createLattice<LatticeGaugeField>(getName()); LatticeGaugeField &U = *env().createLattice<LatticeGaugeField>(getName());
NerscIO::readConfiguration(U, header, fileName); NerscIO::readConfiguration(U, header, fileName);
LOG(Message) << "NERSC header:" << std::endl; LOG(Message) << "NERSC header:" << std::endl;
dump_meta_data(header, LOG(Message)); dump_nersc_header(header, LOG(Message));
} }

View File

@ -1,132 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Modules/MLoop/NoiseLoop.hpp
Copyright (C) 2016
Author: Andrew Lawson <andrew.lawson1991@gmail.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef Hadrons_NoiseLoop_hpp_
#define Hadrons_NoiseLoop_hpp_
#include <Grid/Hadrons/Global.hpp>
#include <Grid/Hadrons/Module.hpp>
#include <Grid/Hadrons/ModuleFactory.hpp>
BEGIN_HADRONS_NAMESPACE
/*
Noise loop propagator
-----------------------------
* loop_x = q_x * adj(eta_x)
* options:
- q = Result of inversion on noise source.
- eta = noise source.
*/
/******************************************************************************
* NoiseLoop *
******************************************************************************/
BEGIN_MODULE_NAMESPACE(MLoop)
class NoiseLoopPar: Serializable
{
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(NoiseLoopPar,
std::string, q,
std::string, eta);
};
template <typename FImpl>
class TNoiseLoop: public Module<NoiseLoopPar>
{
public:
TYPE_ALIASES(FImpl,);
public:
// constructor
TNoiseLoop(const std::string name);
// destructor
virtual ~TNoiseLoop(void) = default;
// dependency relation
virtual std::vector<std::string> getInput(void);
virtual std::vector<std::string> getOutput(void);
// setup
virtual void setup(void);
// execution
virtual void execute(void);
};
MODULE_REGISTER_NS(NoiseLoop, TNoiseLoop<FIMPL>, MLoop);
/******************************************************************************
* TNoiseLoop implementation *
******************************************************************************/
// constructor /////////////////////////////////////////////////////////////////
template <typename FImpl>
TNoiseLoop<FImpl>::TNoiseLoop(const std::string name)
: Module<NoiseLoopPar>(name)
{}
// dependencies/products ///////////////////////////////////////////////////////
template <typename FImpl>
std::vector<std::string> TNoiseLoop<FImpl>::getInput(void)
{
std::vector<std::string> in = {par().q, par().eta};
return in;
}
template <typename FImpl>
std::vector<std::string> TNoiseLoop<FImpl>::getOutput(void)
{
std::vector<std::string> out = {getName()};
return out;
}
// setup ///////////////////////////////////////////////////////////////////////
template <typename FImpl>
void TNoiseLoop<FImpl>::setup(void)
{
env().template registerLattice<PropagatorField>(getName());
}
// execution ///////////////////////////////////////////////////////////////////
template <typename FImpl>
void TNoiseLoop<FImpl>::execute(void)
{
PropagatorField &loop = *env().template createLattice<PropagatorField>(getName());
PropagatorField &q = *env().template getObject<PropagatorField>(par().q);
PropagatorField &eta = *env().template getObject<PropagatorField>(par().eta);
loop = q*adj(eta);
}
END_MODULE_NAMESPACE
END_HADRONS_NAMESPACE
#endif // Hadrons_NoiseLoop_hpp_

View File

@ -6,7 +6,6 @@ Source file: extras/Hadrons/Modules/MSource/SeqGamma.hpp
Copyright (C) 2015 Copyright (C) 2015
Copyright (C) 2016 Copyright (C) 2016
Copyright (C) 2017
Author: Antonin Portelli <antonin.portelli@me.com> Author: Antonin Portelli <antonin.portelli@me.com>
@ -150,9 +149,9 @@ void TSeqGamma<FImpl>::execute(void)
for(unsigned int mu = 0; mu < env().getNd(); mu++) for(unsigned int mu = 0; mu < env().getNd(); mu++)
{ {
LatticeCoordinate(coor, mu); LatticeCoordinate(coor, mu);
ph = ph + p[mu]*coor*((1./(env().getGrid()->_fdimensions[mu]))); ph = ph + p[mu]*coor;
} }
ph = exp((Real)(2*M_PI)*i*ph); ph = exp(i*ph);
LatticeCoordinate(t, Tp); LatticeCoordinate(t, Tp);
src = where((t >= par().tA) and (t <= par().tB), ph*(g*q), 0.*q); src = where((t >= par().tA) and (t <= par().tB), ph*(g*q), 0.*q);
} }

View File

@ -1,147 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: extras/Hadrons/Modules/MSource/Wall.hpp
Copyright (C) 2017
Author: Andrew Lawson <andrew.lawson1991@gmail.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef Hadrons_WallSource_hpp_
#define Hadrons_WallSource_hpp_
#include <Grid/Hadrons/Global.hpp>
#include <Grid/Hadrons/Module.hpp>
#include <Grid/Hadrons/ModuleFactory.hpp>
BEGIN_HADRONS_NAMESPACE
/*
Wall source
-----------------------------
* src_x = delta(x_3 - tW) * exp(i x.mom)
* options:
- tW: source timeslice (integer)
- mom: momentum insertion, space-separated float sequence (e.g ".1 .2 1. 0.")
*/
/******************************************************************************
* Wall *
******************************************************************************/
BEGIN_MODULE_NAMESPACE(MSource)
class WallPar: Serializable
{
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(WallPar,
unsigned int, tW,
std::string, mom);
};
template <typename FImpl>
class TWall: public Module<WallPar>
{
public:
TYPE_ALIASES(FImpl,);
public:
// constructor
TWall(const std::string name);
// destructor
virtual ~TWall(void) = default;
// dependency relation
virtual std::vector<std::string> getInput(void);
virtual std::vector<std::string> getOutput(void);
// setup
virtual void setup(void);
// execution
virtual void execute(void);
};
MODULE_REGISTER_NS(Wall, TWall<FIMPL>, MSource);
/******************************************************************************
* TWall implementation *
******************************************************************************/
// constructor /////////////////////////////////////////////////////////////////
template <typename FImpl>
TWall<FImpl>::TWall(const std::string name)
: Module<WallPar>(name)
{}
// dependencies/products ///////////////////////////////////////////////////////
template <typename FImpl>
std::vector<std::string> TWall<FImpl>::getInput(void)
{
std::vector<std::string> in;
return in;
}
template <typename FImpl>
std::vector<std::string> TWall<FImpl>::getOutput(void)
{
std::vector<std::string> out = {getName()};
return out;
}
// setup ///////////////////////////////////////////////////////////////////////
template <typename FImpl>
void TWall<FImpl>::setup(void)
{
env().template registerLattice<PropagatorField>(getName());
}
// execution ///////////////////////////////////////////////////////////////////
template <typename FImpl>
void TWall<FImpl>::execute(void)
{
LOG(Message) << "Generating wall source at t = " << par().tW
<< " with momentum " << par().mom << std::endl;
PropagatorField &src = *env().template createLattice<PropagatorField>(getName());
Lattice<iScalar<vInteger>> t(env().getGrid());
LatticeComplex ph(env().getGrid()), coor(env().getGrid());
std::vector<Real> p;
Complex i(0.0,1.0);
p = strToVec<Real>(par().mom);
ph = zero;
for(unsigned int mu = 0; mu < Nd; mu++)
{
LatticeCoordinate(coor, mu);
ph = ph + p[mu]*coor*((1./(env().getGrid()->_fdimensions[mu])));
}
ph = exp((Real)(2*M_PI)*i*ph);
LatticeCoordinate(t, Tp);
src = 1.;
src = where((t == par().tW), src*ph, 0.*src);
}
END_MODULE_NAMESPACE
END_HADRONS_NAMESPACE
#endif // Hadrons_WallSource_hpp_

View File

@ -173,7 +173,7 @@ void TQuark<FImpl>::execute(void)
*env().template getObject<PropagatorField>(getName()); *env().template getObject<PropagatorField>(getName());
axpby_ssp_pminus(sol, 0., sol, 1., sol, 0, 0); axpby_ssp_pminus(sol, 0., sol, 1., sol, 0, 0);
axpby_ssp_pplus(sol, 1., sol, 1., sol, 0, Ls_-1); axpby_ssp_pplus(sol, 0., sol, 1., sol, 0, Ls_-1);
ExtractSlice(tmp, sol, 0, 0); ExtractSlice(tmp, sol, 0, 0);
FermToProp(p4d, tmp, s, c); FermToProp(p4d, tmp, s, c);
} }

View File

@ -1,7 +1,4 @@
modules_cc =\ modules_cc =\
Modules/MContraction/WeakHamiltonianEye.cc \
Modules/MContraction/WeakHamiltonianNonEye.cc \
Modules/MContraction/WeakNeutral4ptDisc.cc \
Modules/MGauge/Load.cc \ Modules/MGauge/Load.cc \
Modules/MGauge/Random.cc \ Modules/MGauge/Random.cc \
Modules/MGauge/Unit.cc Modules/MGauge/Unit.cc
@ -10,21 +7,13 @@ modules_hpp =\
Modules/MAction/DWF.hpp \ Modules/MAction/DWF.hpp \
Modules/MAction/Wilson.hpp \ Modules/MAction/Wilson.hpp \
Modules/MContraction/Baryon.hpp \ Modules/MContraction/Baryon.hpp \
Modules/MContraction/DiscLoop.hpp \
Modules/MContraction/Gamma3pt.hpp \
Modules/MContraction/Meson.hpp \ Modules/MContraction/Meson.hpp \
Modules/MContraction/WeakHamiltonian.hpp \
Modules/MContraction/WeakHamiltonianEye.hpp \
Modules/MContraction/WeakHamiltonianNonEye.hpp \
Modules/MContraction/WeakNeutral4ptDisc.hpp \
Modules/MGauge/Load.hpp \ Modules/MGauge/Load.hpp \
Modules/MGauge/Random.hpp \ Modules/MGauge/Random.hpp \
Modules/MGauge/Unit.hpp \ Modules/MGauge/Unit.hpp \
Modules/MLoop/NoiseLoop.hpp \
Modules/MSolver/RBPrecCG.hpp \ Modules/MSolver/RBPrecCG.hpp \
Modules/MSource/Point.hpp \ Modules/MSource/Point.hpp \
Modules/MSource/SeqGamma.hpp \ Modules/MSource/SeqGamma.hpp \
Modules/MSource/Wall.hpp \
Modules/MSource/Z2.hpp \ Modules/MSource/Z2.hpp \
Modules/Quark.hpp Modules/Quark.hpp

View File

@ -20,17 +20,4 @@ The simple testcase in this directory is the submitted bug report that encapsula
problem. The test case works with icpc and with clang++, but fails consistently on g++ problem. The test case works with icpc and with clang++, but fails consistently on g++
current variants. current variants.
Peter Peter
************
Second GCC bug reported, see Issue 100.
https://wandbox.org/permlink/tzssJza6R9XnqANw
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=80652
Getting Travis fails under gcc-5 for Test_simd, now that I added more comprehensive testing to the
CI test suite. The limitations of Travis runtime limits & weak cores are being shown.
Travis uses 5.4.1 for g++-5.

View File

@ -1,86 +0,0 @@
#! /bin/sh
prefix=@prefix@
exec_prefix=@exec_prefix@
includedir=@includedir@
usage()
{
cat <<EOF
Usage: grid-config [OPTION]
Known values for OPTION are:
--prefix show Grid installation prefix
--cxxflags print pre-processor and compiler flags
--ldflags print library linking flags
--libs print library linking information
--summary print full build summary
--help display this help and exit
--version output version information
--git print git revision
EOF
exit $1
}
if test $# -eq 0; then
usage 1
fi
cflags=false
libs=false
while test $# -gt 0; do
case "$1" in
-*=*) optarg=`echo "$1" | sed 's/[-_a-zA-Z0-9]*=//'` ;;
*) optarg= ;;
esac
case "$1" in
--prefix)
echo $prefix
;;
--version)
echo @VERSION@
exit 0
;;
--git)
echo "@GRID_BRANCH@ @GRID_SHA@"
exit 0
;;
--help)
usage 0
;;
--cxxflags)
echo @GRID_CXXFLAGS@
;;
--ldflags)
echo @GRID_LDFLAGS@
;;
--libs)
echo @GRID_LIBS@
;;
--summary)
echo ""
echo "@GRID_SUMMARY@"
echo ""
;;
*)
usage
exit 1
;;
esac
shift
done
exit 0

View File

@ -1,37 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/DisableWarnings.h
Copyright (C) 2016
Author: Guido Cossu <guido.cossu@ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#ifndef DISABLE_WARNINGS_H
#define DISABLE_WARNINGS_H
//disables and intel compiler specific warning (in json.hpp)
#pragma warning disable 488
#endif

View File

@ -41,9 +41,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
#include <Grid/GridCore.h> #include <Grid/GridCore.h>
#include <Grid/GridQCDcore.h> #include <Grid/GridQCDcore.h>
#include <Grid/qcd/action/Action.h> #include <Grid/qcd/action/Action.h>
#include <Grid/qcd/utils/GaugeFix.h>
#include <Grid/qcd/smearing/Smearing.h> #include <Grid/qcd/smearing/Smearing.h>
#include <Grid/parallelIO/MetaData.h>
#include <Grid/qcd/hmc/HMC_aggregate.h> #include <Grid/qcd/hmc/HMC_aggregate.h>
#endif #endif

View File

@ -38,7 +38,28 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
#ifndef GRID_BASE_H #ifndef GRID_BASE_H
#define GRID_BASE_H #define GRID_BASE_H
#include <Grid/GridStd.h> ///////////////////
// Std C++ dependencies
///////////////////
#include <cassert>
#include <complex>
#include <vector>
#include <iostream>
#include <iomanip>
#include <random>
#include <functional>
#include <stdio.h>
#include <stdlib.h>
#include <stdio.h>
#include <signal.h>
#include <ctime>
#include <sys/time.h>
#include <chrono>
///////////////////
// Grid headers
///////////////////
#include "Config.h"
#include <Grid/perfmon/Timer.h> #include <Grid/perfmon/Timer.h>
#include <Grid/perfmon/PerfCount.h> #include <Grid/perfmon/PerfCount.h>

View File

@ -1,29 +0,0 @@
#ifndef GRID_STD_H
#define GRID_STD_H
///////////////////
// Std C++ dependencies
///////////////////
#include <cassert>
#include <complex>
#include <vector>
#include <string>
#include <iostream>
#include <iomanip>
#include <random>
#include <functional>
#include <stdio.h>
#include <stdlib.h>
#include <stdio.h>
#include <signal.h>
#include <ctime>
#include <sys/time.h>
#include <chrono>
#include <zlib.h>
///////////////////
// Grid config
///////////////////
#include "Config.h"
#endif /* GRID_STD_H */

View File

@ -1,9 +0,0 @@
#pragma once
#if defined __GNUC__
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
#endif
#include <Grid/Eigen/Dense>
#if defined __GNUC__
#pragma GCC diagnostic pop
#endif

View File

@ -235,7 +235,7 @@ namespace Grid {
Field tmp(in._grid); Field tmp(in._grid);
_Mat.MeooeDag(in,tmp); _Mat.MeooeDag(in,tmp);
_Mat.MooeeInvDag(tmp,out); _Mat.MooeeInvDag(tmp,out);
_Mat.MeooeDag(out,tmp); _Mat.MeooeDag(out,tmp);
_Mat.MooeeDag(in,out); _Mat.MooeeDag(in,out);

View File

@ -197,9 +197,8 @@ namespace Grid {
void operator() (LinearOperatorBase<Field> &Linop, const Field &in, Field &out) { void operator() (LinearOperatorBase<Field> &Linop, const Field &in, Field &out) {
GridBase *grid=in._grid; GridBase *grid=in._grid;
//std::cout << "Chevyshef(): in._grid="<<in._grid<<std::endl;
// std::cout << "Chevyshef(): in._grid="<<in._grid<<std::endl; //<<" Linop.Grid()="<<Linop.Grid()<<"Linop.RedBlackGrid()="<<Linop.RedBlackGrid()<<std::endl;
//std::cout <<" Linop.Grid()="<<Linop.Grid()<<"Linop.RedBlackGrid()="<<Linop.RedBlackGrid()<<std::endl;
int vol=grid->gSites(); int vol=grid->gSites();

View File

@ -16,7 +16,7 @@
#define INCLUDED_ALG_REMEZ_H #define INCLUDED_ALG_REMEZ_H
#include <stddef.h> #include <stddef.h>
#include <Grid/GridStd.h> #include <Config.h>
#ifdef HAVE_LIBGMP #ifdef HAVE_LIBGMP
#include "bigfloat.h" #include "bigfloat.h"

View File

@ -0,0 +1,137 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/algorithms/iterative/DenseMatrix.h
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_DENSE_MATRIX_H
#define GRID_DENSE_MATRIX_H
namespace Grid {
/////////////////////////////////////////////////////////////
// Matrix untils
/////////////////////////////////////////////////////////////
template<class T> using DenseVector = std::vector<T>;
template<class T> using DenseMatrix = DenseVector<DenseVector<T> >;
template<class T> void Size(DenseVector<T> & vec, int &N)
{
N= vec.size();
}
template<class T> void Size(DenseMatrix<T> & mat, int &N,int &M)
{
N= mat.size();
M= mat[0].size();
}
template<class T> void SizeSquare(DenseMatrix<T> & mat, int &N)
{
int M; Size(mat,N,M);
assert(N==M);
}
template<class T> void Resize(DenseVector<T > & mat, int N) {
mat.resize(N);
}
template<class T> void Resize(DenseMatrix<T > & mat, int N, int M) {
mat.resize(N);
for(int i=0;i<N;i++){
mat[i].resize(M);
}
}
template<class T> void Fill(DenseMatrix<T> & mat, T&val) {
int N,M;
Size(mat,N,M);
for(int i=0;i<N;i++){
for(int j=0;j<M;j++){
mat[i][j] = val;
}}
}
/** Transpose of a matrix **/
template<class T> DenseMatrix<T> Transpose(DenseMatrix<T> & mat){
int N,M;
Size(mat,N,M);
DenseMatrix<T> C; Resize(C,M,N);
for(int i=0;i<M;i++){
for(int j=0;j<N;j++){
C[i][j] = mat[j][i];
}}
return C;
}
/** Set DenseMatrix to unit matrix **/
template<class T> void Unity(DenseMatrix<T> &A){
int N; SizeSquare(A,N);
for(int i=0;i<N;i++){
for(int j=0;j<N;j++){
if ( i==j ) A[i][j] = 1;
else A[i][j] = 0;
}
}
}
/** Add C * I to matrix **/
template<class T>
void PlusUnit(DenseMatrix<T> & A,T c){
int dim; SizeSquare(A,dim);
for(int i=0;i<dim;i++){A[i][i] = A[i][i] + c;}
}
/** return the Hermitian conjugate of matrix **/
template<class T>
DenseMatrix<T> HermitianConj(DenseMatrix<T> &mat){
int dim; SizeSquare(mat,dim);
DenseMatrix<T> C; Resize(C,dim,dim);
for(int i=0;i<dim;i++){
for(int j=0;j<dim;j++){
C[i][j] = conj(mat[j][i]);
}
}
return C;
}
/**Get a square submatrix**/
template <class T>
DenseMatrix<T> GetSubMtx(DenseMatrix<T> &A,int row_st, int row_end, int col_st, int col_end)
{
DenseMatrix<T> H; Resize(H,row_end - row_st,col_end-col_st);
for(int i = row_st; i<row_end; i++){
for(int j = col_st; j<col_end; j++){
H[i-row_st][j-col_st]=A[i][j];
}}
return H;
}
}
#include "Householder.h"
#include "Francis.h"
#endif

View File

@ -0,0 +1,525 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/algorithms/iterative/Francis.h
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef FRANCIS_H
#define FRANCIS_H
#include <cstdlib>
#include <string>
#include <cmath>
#include <iostream>
#include <sstream>
#include <stdexcept>
#include <fstream>
#include <complex>
#include <algorithm>
//#include <timer.h>
//#include <lapacke.h>
//#include <Eigen/Dense>
namespace Grid {
template <class T> int SymmEigensystem(DenseMatrix<T > &Ain, DenseVector<T> &evals, DenseMatrix<T> &evecs, RealD small);
template <class T> int Eigensystem(DenseMatrix<T > &Ain, DenseVector<T> &evals, DenseMatrix<T> &evecs, RealD small);
/**
Find the eigenvalues of an upper hessenberg matrix using the Francis QR algorithm.
H =
x x x x x x x x x
x x x x x x x x x
0 x x x x x x x x
0 0 x x x x x x x
0 0 0 x x x x x x
0 0 0 0 x x x x x
0 0 0 0 0 x x x x
0 0 0 0 0 0 x x x
0 0 0 0 0 0 0 x x
Factorization is P T P^H where T is upper triangular (mod cc blocks) and P is orthagonal/unitary.
**/
template <class T>
int QReigensystem(DenseMatrix<T> &Hin, DenseVector<T> &evals, DenseMatrix<T> &evecs, RealD small)
{
DenseMatrix<T> H = Hin;
int N ; SizeSquare(H,N);
int M = N;
Fill(evals,0);
Fill(evecs,0);
T s,t,x=0,y=0,z=0;
T u,d;
T apd,amd,bc;
DenseVector<T> p(N,0);
T nrm = Norm(H); ///DenseMatrix Norm
int n, m;
int e = 0;
int it = 0;
int tot_it = 0;
int l = 0;
int r = 0;
DenseMatrix<T> P; Resize(P,N,N); Unity(P);
DenseVector<int> trows(N,0);
/// Check if the matrix is really hessenberg, if not abort
RealD sth = 0;
for(int j=0;j<N;j++){
for(int i=j+2;i<N;i++){
sth = abs(H[i][j]);
if(sth > small){
std::cout << "Non hessenberg H = " << sth << " > " << small << std::endl;
exit(1);
}
}
}
do{
std::cout << "Francis QR Step N = " << N << std::endl;
/** Check for convergence
x x x x x
0 x x x x
0 0 x x x
0 0 x x x
0 0 0 0 x
for this matrix l = 4
**/
do{
l = Chop_subdiag(H,nrm,e,small);
r = 0; ///May have converged on more than one eval
///Single eval
if(l == N-1){
evals[e] = H[l][l];
N--; e++; r++; it = 0;
}
///RealD eval
if(l == N-2){
trows[l+1] = 1; ///Needed for UTSolve
apd = H[l][l] + H[l+1][l+1];
amd = H[l][l] - H[l+1][l+1];
bc = (T)4.0*H[l+1][l]*H[l][l+1];
evals[e] = (T)0.5*( apd + sqrt(amd*amd + bc) );
evals[e+1] = (T)0.5*( apd - sqrt(amd*amd + bc) );
N-=2; e+=2; r++; it = 0;
}
} while(r>0);
if(N ==0) break;
DenseVector<T > ck; Resize(ck,3);
DenseVector<T> v; Resize(v,3);
for(int m = N-3; m >= l; m--){
///Starting vector essentially random shift.
if(it%10 == 0 && N >= 3 && it > 0){
s = (T)1.618033989*( abs( H[N-1][N-2] ) + abs( H[N-2][N-3] ) );
t = (T)0.618033989*( abs( H[N-1][N-2] ) + abs( H[N-2][N-3] ) );
x = H[m][m]*H[m][m] + H[m][m+1]*H[m+1][m] - s*H[m][m] + t;
y = H[m+1][m]*(H[m][m] + H[m+1][m+1] - s);
z = H[m+1][m]*H[m+2][m+1];
}
///Starting vector implicit Q theorem
else{
s = (H[N-2][N-2] + H[N-1][N-1]);
t = (H[N-2][N-2]*H[N-1][N-1] - H[N-2][N-1]*H[N-1][N-2]);
x = H[m][m]*H[m][m] + H[m][m+1]*H[m+1][m] - s*H[m][m] + t;
y = H[m+1][m]*(H[m][m] + H[m+1][m+1] - s);
z = H[m+1][m]*H[m+2][m+1];
}
ck[0] = x; ck[1] = y; ck[2] = z;
if(m == l) break;
/** Some stupid thing from numerical recipies, seems to work**/
// PAB.. for heaven's sake quote page, purpose, evidence it works.
// what sort of comment is that!?!?!?
u=abs(H[m][m-1])*(abs(y)+abs(z));
d=abs(x)*(abs(H[m-1][m-1])+abs(H[m][m])+abs(H[m+1][m+1]));
if ((T)abs(u+d) == (T)abs(d) ){
l = m; break;
}
//if (u < small){l = m; break;}
}
if(it > 100000){
std::cout << "QReigensystem: bugger it got stuck after 100000 iterations" << std::endl;
std::cout << "got " << e << " evals " << l << " " << N << std::endl;
exit(1);
}
normalize(ck); ///Normalization cancels in PHP anyway
T beta;
Householder_vector<T >(ck, 0, 2, v, beta);
Householder_mult<T >(H,v,beta,0,l,l+2,0);
Householder_mult<T >(H,v,beta,0,l,l+2,1);
///Accumulate eigenvector
Householder_mult<T >(P,v,beta,0,l,l+2,1);
int sw = 0; ///Are we on the last row?
for(int k=l;k<N-2;k++){
x = H[k+1][k];
y = H[k+2][k];
z = (T)0.0;
if(k+3 <= N-1){
z = H[k+3][k];
} else{
sw = 1;
v[2] = (T)0.0;
}
ck[0] = x; ck[1] = y; ck[2] = z;
normalize(ck);
Householder_vector<T >(ck, 0, 2-sw, v, beta);
Householder_mult<T >(H,v, beta,0,k+1,k+3-sw,0);
Householder_mult<T >(H,v, beta,0,k+1,k+3-sw,1);
///Accumulate eigenvector
Householder_mult<T >(P,v, beta,0,k+1,k+3-sw,1);
}
it++;
tot_it++;
}while(N > 1);
N = evals.size();
///Annoying - UT solves in reverse order;
DenseVector<T> tmp; Resize(tmp,N);
for(int i=0;i<N;i++){
tmp[i] = evals[N-i-1];
}
evals = tmp;
UTeigenvectors(H, trows, evals, evecs);
for(int i=0;i<evals.size();i++){evecs[i] = P*evecs[i]; normalize(evecs[i]);}
return tot_it;
}
template <class T>
int my_Wilkinson(DenseMatrix<T> &Hin, DenseVector<T> &evals, DenseMatrix<T> &evecs, RealD small)
{
/**
Find the eigenvalues of an upper Hessenberg matrix using the Wilkinson QR algorithm.
H =
x x 0 0 0 0
x x x 0 0 0
0 x x x 0 0
0 0 x x x 0
0 0 0 x x x
0 0 0 0 x x
Factorization is P T P^H where T is upper triangular (mod cc blocks) and P is orthagonal/unitary. **/
return my_Wilkinson(Hin, evals, evecs, small, small);
}
template <class T>
int my_Wilkinson(DenseMatrix<T> &Hin, DenseVector<T> &evals, DenseMatrix<T> &evecs, RealD small, RealD tol)
{
int N; SizeSquare(Hin,N);
int M = N;
///I don't want to modify the input but matricies must be passed by reference
//Scale a matrix by its "norm"
//RealD Hnorm = abs( Hin.LargestDiag() ); H = H*(1.0/Hnorm);
DenseMatrix<T> H; H = Hin;
RealD Hnorm = abs(Norm(Hin));
H = H * (1.0 / Hnorm);
// TODO use openmp and memset
Fill(evals,0);
Fill(evecs,0);
T s, t, x = 0, y = 0, z = 0;
T u, d;
T apd, amd, bc;
DenseVector<T> p; Resize(p,N); Fill(p,0);
T nrm = Norm(H); ///DenseMatrix Norm
int n, m;
int e = 0;
int it = 0;
int tot_it = 0;
int l = 0;
int r = 0;
DenseMatrix<T> P; Resize(P,N,N);
Unity(P);
DenseVector<int> trows(N, 0);
/// Check if the matrix is really symm tridiag
RealD sth = 0;
for(int j = 0; j < N; ++j)
{
for(int i = j + 2; i < N; ++i)
{
if(abs(H[i][j]) > tol || abs(H[j][i]) > tol)
{
std::cout << "Non Tridiagonal H(" << i << ","<< j << ") = |" << Real( real( H[j][i] ) ) << "| > " << tol << std::endl;
std::cout << "Warning tridiagonalize and call again" << std::endl;
// exit(1); // see what is going on
//return;
}
}
}
do{
do{
//Jasper
//Check if the subdiagonal term is small enough (<small)
//if true then it is converged.
//check start from H.dim - e - 1
//How to deal with more than 2 are converged?
//What if Chop_symm_subdiag return something int the middle?
//--------------
l = Chop_symm_subdiag(H,nrm, e, small);
r = 0; ///May have converged on more than one eval
//Jasper
//In this case
// x x 0 0 0 0
// x x x 0 0 0
// 0 x x x 0 0
// 0 0 x x x 0
// 0 0 0 x x 0
// 0 0 0 0 0 x <- l
//--------------
///Single eval
if(l == N - 1)
{
evals[e] = H[l][l];
N--;
e++;
r++;
it = 0;
}
//Jasper
// x x 0 0 0 0
// x x x 0 0 0
// 0 x x x 0 0
// 0 0 x x 0 0
// 0 0 0 0 x x <- l
// 0 0 0 0 x x
//--------------
///RealD eval
if(l == N - 2)
{
trows[l + 1] = 1; ///Needed for UTSolve
apd = H[l][l] + H[l + 1][ l + 1];
amd = H[l][l] - H[l + 1][l + 1];
bc = (T) 4.0 * H[l + 1][l] * H[l][l + 1];
evals[e] = (T) 0.5 * (apd + sqrt(amd * amd + bc));
evals[e + 1] = (T) 0.5 * (apd - sqrt(amd * amd + bc));
N -= 2;
e += 2;
r++;
it = 0;
}
}while(r > 0);
//Jasper
//Already converged
//--------------
if(N == 0) break;
DenseVector<T> ck,v; Resize(ck,2); Resize(v,2);
for(int m = N - 3; m >= l; m--)
{
///Starting vector essentially random shift.
if(it%10 == 0 && N >= 3 && it > 0)
{
t = abs(H[N - 1][N - 2]) + abs(H[N - 2][N - 3]);
x = H[m][m] - t;
z = H[m + 1][m];
} else {
///Starting vector implicit Q theorem
d = (H[N - 2][N - 2] - H[N - 1][N - 1]) * (T) 0.5;
t = H[N - 1][N - 1] - H[N - 1][N - 2] * H[N - 1][N - 2]
/ (d + sign(d) * sqrt(d * d + H[N - 1][N - 2] * H[N - 1][N - 2]));
x = H[m][m] - t;
z = H[m + 1][m];
}
//Jasper
//why it is here????
//-----------------------
if(m == l)
break;
u = abs(H[m][m - 1]) * (abs(y) + abs(z));
d = abs(x) * (abs(H[m - 1][m - 1]) + abs(H[m][m]) + abs(H[m + 1][m + 1]));
if ((T)abs(u + d) == (T)abs(d))
{
l = m;
break;
}
}
//Jasper
if(it > 1000000)
{
std::cout << "Wilkinson: bugger it got stuck after 100000 iterations" << std::endl;
std::cout << "got " << e << " evals " << l << " " << N << std::endl;
exit(1);
}
//
T s, c;
Givens_calc<T>(x, z, c, s);
Givens_mult<T>(H, l, l + 1, c, -s, 0);
Givens_mult<T>(H, l, l + 1, c, s, 1);
Givens_mult<T>(P, l, l + 1, c, s, 1);
//
for(int k = l; k < N - 2; ++k)
{
x = H.A[k + 1][k];
z = H.A[k + 2][k];
Givens_calc<T>(x, z, c, s);
Givens_mult<T>(H, k + 1, k + 2, c, -s, 0);
Givens_mult<T>(H, k + 1, k + 2, c, s, 1);
Givens_mult<T>(P, k + 1, k + 2, c, s, 1);
}
it++;
tot_it++;
}while(N > 1);
N = evals.size();
///Annoying - UT solves in reverse order;
DenseVector<T> tmp(N);
for(int i = 0; i < N; ++i)
tmp[i] = evals[N-i-1];
evals = tmp;
//
UTeigenvectors(H, trows, evals, evecs);
//UTSymmEigenvectors(H, trows, evals, evecs);
for(int i = 0; i < evals.size(); ++i)
{
evecs[i] = P * evecs[i];
normalize(evecs[i]);
evals[i] = evals[i] * Hnorm;
}
// // FIXME this is to test
// Hin.write("evecs3", evecs);
// Hin.write("evals3", evals);
// // check rsd
// for(int i = 0; i < M; i++) {
// vector<T> Aevec = Hin * evecs[i];
// RealD norm2(0.);
// for(int j = 0; j < M; j++) {
// norm2 += (Aevec[j] - evals[i] * evecs[i][j]) * (Aevec[j] - evals[i] * evecs[i][j]);
// }
// }
return tot_it;
}
template <class T>
void Hess(DenseMatrix<T > &A, DenseMatrix<T> &Q, int start){
/**
turn a matrix A =
x x x x x
x x x x x
x x x x x
x x x x x
x x x x x
into
x x x x x
x x x x x
0 x x x x
0 0 x x x
0 0 0 x x
with householder rotations
Slow.
*/
int N ; SizeSquare(A,N);
DenseVector<T > p; Resize(p,N); Fill(p,0);
for(int k=start;k<N-2;k++){
//cerr << "hess" << k << std::endl;
DenseVector<T > ck,v; Resize(ck,N-k-1); Resize(v,N-k-1);
for(int i=k+1;i<N;i++){ck[i-k-1] = A(i,k);} ///kth column
normalize(ck); ///Normalization cancels in PHP anyway
T beta;
Householder_vector<T >(ck, 0, ck.size()-1, v, beta); ///Householder vector
Householder_mult<T>(A,v,beta,start,k+1,N-1,0); ///A -> PA
Householder_mult<T >(A,v,beta,start,k+1,N-1,1); ///PA -> PAP^H
///Accumulate eigenvector
Householder_mult<T >(Q,v,beta,start,k+1,N-1,1); ///Q -> QP^H
}
/*for(int l=0;l<N-2;l++){
for(int k=l+2;k<N;k++){
A(0,k,l);
}
}*/
}
template <class T>
void Tri(DenseMatrix<T > &A, DenseMatrix<T> &Q, int start){
///Tridiagonalize a matrix
int N; SizeSquare(A,N);
Hess(A,Q,start);
/*for(int l=0;l<N-2;l++){
for(int k=l+2;k<N;k++){
A(0,l,k);
}
}*/
}
template <class T>
void ForceTridiagonal(DenseMatrix<T> &A){
///Tridiagonalize a matrix
int N ; SizeSquare(A,N);
for(int l=0;l<N-2;l++){
for(int k=l+2;k<N;k++){
A[l][k]=0;
A[k][l]=0;
}
}
}
template <class T>
int my_SymmEigensystem(DenseMatrix<T > &Ain, DenseVector<T> &evals, DenseVector<DenseVector<T> > &evecs, RealD small){
///Solve a symmetric eigensystem, not necessarily in tridiagonal form
int N; SizeSquare(Ain,N);
DenseMatrix<T > A; A = Ain;
DenseMatrix<T > Q; Resize(Q,N,N); Unity(Q);
Tri(A,Q,0);
int it = my_Wilkinson<T>(A, evals, evecs, small);
for(int k=0;k<N;k++){evecs[k] = Q*evecs[k];}
return it;
}
template <class T>
int Wilkinson(DenseMatrix<T> &Ain, DenseVector<T> &evals, DenseVector<DenseVector<T> > &evecs, RealD small){
return my_Wilkinson(Ain, evals, evecs, small);
}
template <class T>
int SymmEigensystem(DenseMatrix<T> &Ain, DenseVector<T> &evals, DenseVector<DenseVector<T> > &evecs, RealD small){
return my_SymmEigensystem(Ain, evals, evecs, small);
}
template <class T>
int Eigensystem(DenseMatrix<T > &Ain, DenseVector<T> &evals, DenseVector<DenseVector<T> > &evecs, RealD small){
///Solve a general eigensystem, not necessarily in tridiagonal form
int N = Ain.dim;
DenseMatrix<T > A(N); A = Ain;
DenseMatrix<T > Q(N);Q.Unity();
Hess(A,Q,0);
int it = QReigensystem<T>(A, evals, evecs, small);
for(int k=0;k<N;k++){evecs[k] = Q*evecs[k];}
return it;
}
}
#endif

View File

@ -0,0 +1,242 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/algorithms/iterative/Householder.h
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef HOUSEHOLDER_H
#define HOUSEHOLDER_H
#define TIMER(A) std::cout << GridLogMessage << __FUNC__ << " file "<< __FILE__ <<" line " << __LINE__ << std::endl;
#define ENTER() std::cout << GridLogMessage << "ENTRY "<<__FUNC__ << " file "<< __FILE__ <<" line " << __LINE__ << std::endl;
#define LEAVE() std::cout << GridLogMessage << "EXIT "<<__FUNC__ << " file "<< __FILE__ <<" line " << __LINE__ << std::endl;
#include <cstdlib>
#include <string>
#include <cmath>
#include <iostream>
#include <sstream>
#include <stdexcept>
#include <fstream>
#include <complex>
#include <algorithm>
namespace Grid {
/** Comparison function for finding the max element in a vector **/
template <class T> bool cf(T i, T j) {
return abs(i) < abs(j);
}
/**
Calculate a real Givens angle
**/
template <class T> inline void Givens_calc(T y, T z, T &c, T &s){
RealD mz = (RealD)abs(z);
if(mz==0.0){
c = 1; s = 0;
}
if(mz >= (RealD)abs(y)){
T t = -y/z;
s = (T)1.0 / sqrt ((T)1.0 + t * t);
c = s * t;
} else {
T t = -z/y;
c = (T)1.0 / sqrt ((T)1.0 + t * t);
s = c * t;
}
}
template <class T> inline void Givens_mult(DenseMatrix<T> &A, int i, int k, T c, T s, int dir)
{
int q ; SizeSquare(A,q);
if(dir == 0){
for(int j=0;j<q;j++){
T nu = A[i][j];
T w = A[k][j];
A[i][j] = (c*nu + s*w);
A[k][j] = (-s*nu + c*w);
}
}
if(dir == 1){
for(int j=0;j<q;j++){
T nu = A[j][i];
T w = A[j][k];
A[j][i] = (c*nu - s*w);
A[j][k] = (s*nu + c*w);
}
}
}
/**
from input = x;
Compute the complex Householder vector, v, such that
P = (I - b v transpose(v) )
b = 2/v.v
P | x | | x | k = 0
| x | | 0 |
| x | = | 0 |
| x | | 0 | j = 3
| x | | x |
These are the "Unreduced" Householder vectors.
**/
template <class T> inline void Householder_vector(DenseVector<T> input, int k, int j, DenseVector<T> &v, T &beta)
{
int N ; Size(input,N);
T m = *max_element(input.begin() + k, input.begin() + j + 1, cf<T> );
if(abs(m) > 0.0){
T alpha = 0;
for(int i=k; i<j+1; i++){
v[i] = input[i]/m;
alpha = alpha + v[i]*conj(v[i]);
}
alpha = sqrt(alpha);
beta = (T)1.0/(alpha*(alpha + abs(v[k]) ));
if(abs(v[k]) > 0.0) v[k] = v[k] + (v[k]/abs(v[k]))*alpha;
else v[k] = -alpha;
} else{
for(int i=k; i<j+1; i++){
v[i] = 0.0;
}
}
}
/**
from input = x;
Compute the complex Householder vector, v, such that
P = (I - b v transpose(v) )
b = 2/v.v
Px = alpha*e_dir
These are the "Unreduced" Householder vectors.
**/
template <class T> inline void Householder_vector(DenseVector<T> input, int k, int j, int dir, DenseVector<T> &v, T &beta)
{
int N = input.size();
T m = *max_element(input.begin() + k, input.begin() + j + 1, cf);
if(abs(m) > 0.0){
T alpha = 0;
for(int i=k; i<j+1; i++){
v[i] = input[i]/m;
alpha = alpha + v[i]*conj(v[i]);
}
alpha = sqrt(alpha);
beta = 1.0/(alpha*(alpha + abs(v[dir]) ));
if(abs(v[dir]) > 0.0) v[dir] = v[dir] + (v[dir]/abs(v[dir]))*alpha;
else v[dir] = -alpha;
}else{
for(int i=k; i<j+1; i++){
v[i] = 0.0;
}
}
}
/**
Compute the product PA if trans = 0
AP if trans = 1
P = (I - b v transpose(v) )
b = 2/v.v
start at element l of matrix A
v is of length j - k + 1 of v are nonzero
**/
template <class T> inline void Householder_mult(DenseMatrix<T> &A , DenseVector<T> v, T beta, int l, int k, int j, int trans)
{
int N ; SizeSquare(A,N);
if(abs(beta) > 0.0){
for(int p=l; p<N; p++){
T s = 0;
if(trans==0){
for(int i=k;i<j+1;i++) s += conj(v[i-k])*A[i][p];
s *= beta;
for(int i=k;i<j+1;i++){ A[i][p] = A[i][p]-s*conj(v[i-k]);}
} else {
for(int i=k;i<j+1;i++){ s += conj(v[i-k])*A[p][i];}
s *= beta;
for(int i=k;i<j+1;i++){ A[p][i]=A[p][i]-s*conj(v[i-k]);}
}
}
}
}
/**
Compute the product PA if trans = 0
AP if trans = 1
P = (I - b v transpose(v) )
b = 2/v.v
start at element l of matrix A
v is of length j - k + 1 of v are nonzero
A is tridiagonal
**/
template <class T> inline void Householder_mult_tri(DenseMatrix<T> &A , DenseVector<T> v, T beta, int l, int M, int k, int j, int trans)
{
if(abs(beta) > 0.0){
int N ; SizeSquare(A,N);
DenseMatrix<T> tmp; Resize(tmp,N,N); Fill(tmp,0);
T s;
for(int p=l; p<M; p++){
s = 0;
if(trans==0){
for(int i=k;i<j+1;i++) s = s + conj(v[i-k])*A[i][p];
}else{
for(int i=k;i<j+1;i++) s = s + v[i-k]*A[p][i];
}
s = beta*s;
if(trans==0){
for(int i=k;i<j+1;i++) tmp[i][p] = tmp(i,p) - s*v[i-k];
}else{
for(int i=k;i<j+1;i++) tmp[p][i] = tmp[p][i] - s*conj(v[i-k]);
}
}
for(int p=l; p<M; p++){
if(trans==0){
for(int i=k;i<j+1;i++) A[i][p] = A[i][p] + tmp[i][p];
}else{
for(int i=k;i<j+1;i++) A[p][i] = A[p][i] + tmp[p][i];
}
}
}
}
}
#endif

View File

@ -33,8 +33,6 @@ directory
namespace Grid { namespace Grid {
enum BlockCGtype { BlockCG, BlockCGrQ, CGmultiRHS };
////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////
// Block conjugate gradient. Dimension zero should be the block direction // Block conjugate gradient. Dimension zero should be the block direction
////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////
@ -42,273 +40,25 @@ template <class Field>
class BlockConjugateGradient : public OperatorFunction<Field> { class BlockConjugateGradient : public OperatorFunction<Field> {
public: public:
typedef typename Field::scalar_type scomplex; typedef typename Field::scalar_type scomplex;
int blockDim ; const int blockDim = 0;
int Nblock;
BlockCGtype CGtype; int Nblock;
bool ErrorOnNoConverge; // throw an assert when the CG fails to converge. bool ErrorOnNoConverge; // throw an assert when the CG fails to converge.
// Defaults true. // Defaults true.
RealD Tolerance; RealD Tolerance;
Integer MaxIterations; Integer MaxIterations;
Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion
BlockConjugateGradient(BlockCGtype cgtype,int _Orthog,RealD tol, Integer maxit, bool err_on_no_conv = true) BlockConjugateGradient(RealD tol, Integer maxit, bool err_on_no_conv = true)
: Tolerance(tol), CGtype(cgtype), blockDim(_Orthog), MaxIterations(maxit), ErrorOnNoConverge(err_on_no_conv) : Tolerance(tol),
{}; MaxIterations(maxit),
ErrorOnNoConverge(err_on_no_conv){};
////////////////////////////////////////////////////////////////////////////////////////////////////
// Thin QR factorisation (google it)
////////////////////////////////////////////////////////////////////////////////////////////////////
void ThinQRfact (Eigen::MatrixXcd &m_rr,
Eigen::MatrixXcd &C,
Eigen::MatrixXcd &Cinv,
Field & Q,
const Field & R)
{
int Orthog = blockDim; // First dimension is block dim; this is an assumption
////////////////////////////////////////////////////////////////////////////////////////////////////
//Dimensions
// R_{ferm x Nblock} = Q_{ferm x Nblock} x C_{Nblock x Nblock} -> ferm x Nblock
//
// Rdag R = m_rr = Herm = L L^dag <-- Cholesky decomposition (LLT routine in Eigen)
//
// Q C = R => Q = R C^{-1}
//
// Want Ident = Q^dag Q = C^{-dag} R^dag R C^{-1} = C^{-dag} L L^dag C^{-1} = 1_{Nblock x Nblock}
//
// Set C = L^{dag}, and then Q^dag Q = ident
//
// Checks:
// Cdag C = Rdag R ; passes.
// QdagQ = 1 ; passes
////////////////////////////////////////////////////////////////////////////////////////////////////
sliceInnerProductMatrix(m_rr,R,R,Orthog);
////////////////////////////////////////////////////////////////////////////////////////////////////
// Cholesky from Eigen
// There exists a ldlt that is documented as more stable
////////////////////////////////////////////////////////////////////////////////////////////////////
Eigen::MatrixXcd L = m_rr.llt().matrixL();
C = L.adjoint();
Cinv = C.inverse();
////////////////////////////////////////////////////////////////////////////////////////////////////
// Q = R C^{-1}
//
// Q_j = R_i Cinv(i,j)
//
// NB maddMatrix conventions are Right multiplication X[j] a[j,i] already
////////////////////////////////////////////////////////////////////////////////////////////////////
// FIXME:: make a sliceMulMatrix to avoid zero vector
sliceMulMatrix(Q,Cinv,R,Orthog);
}
////////////////////////////////////////////////////////////////////////////////////////////////////
// Call one of several implementations
////////////////////////////////////////////////////////////////////////////////////////////////////
void operator()(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi) void operator()(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi)
{ {
if ( CGtype == BlockCGrQ ) { int Orthog = 0; // First dimension is block dim
BlockCGrQsolve(Linop,Src,Psi);
} else if (CGtype == BlockCG ) {
BlockCGsolve(Linop,Src,Psi);
} else if (CGtype == CGmultiRHS ) {
CGmultiRHSsolve(Linop,Src,Psi);
} else {
assert(0);
}
}
////////////////////////////////////////////////////////////////////////////
// BlockCGrQ implementation:
//--------------------------
// X is guess/Solution
// B is RHS
// Solve A X_i = B_i ; i refers to Nblock index
////////////////////////////////////////////////////////////////////////////
void BlockCGrQsolve(LinearOperatorBase<Field> &Linop, const Field &B, Field &X)
{
int Orthog = blockDim; // First dimension is block dim; this is an assumption
Nblock = B._grid->_fdimensions[Orthog];
std::cout<<GridLogMessage<<" Block Conjugate Gradient : Orthog "<<Orthog<<" Nblock "<<Nblock<<std::endl;
X.checkerboard = B.checkerboard;
conformable(X, B);
Field tmp(B);
Field Q(B);
Field D(B);
Field Z(B);
Field AD(B);
Eigen::MatrixXcd m_DZ = Eigen::MatrixXcd::Identity(Nblock,Nblock);
Eigen::MatrixXcd m_M = Eigen::MatrixXcd::Identity(Nblock,Nblock);
Eigen::MatrixXcd m_rr = Eigen::MatrixXcd::Zero(Nblock,Nblock);
Eigen::MatrixXcd m_C = Eigen::MatrixXcd::Zero(Nblock,Nblock);
Eigen::MatrixXcd m_Cinv = Eigen::MatrixXcd::Zero(Nblock,Nblock);
Eigen::MatrixXcd m_S = Eigen::MatrixXcd::Zero(Nblock,Nblock);
Eigen::MatrixXcd m_Sinv = Eigen::MatrixXcd::Zero(Nblock,Nblock);
Eigen::MatrixXcd m_tmp = Eigen::MatrixXcd::Identity(Nblock,Nblock);
Eigen::MatrixXcd m_tmp1 = Eigen::MatrixXcd::Identity(Nblock,Nblock);
// Initial residual computation & set up
std::vector<RealD> residuals(Nblock);
std::vector<RealD> ssq(Nblock);
sliceNorm(ssq,B,Orthog);
RealD sssum=0;
for(int b=0;b<Nblock;b++) sssum+=ssq[b];
sliceNorm(residuals,B,Orthog);
for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); }
sliceNorm(residuals,X,Orthog);
for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); }
/************************************************************************
* Block conjugate gradient rQ (Sebastien Birk Thesis, after Dubrulle 2001)
************************************************************************
* Dimensions:
*
* X,B==(Nferm x Nblock)
* A==(Nferm x Nferm)
*
* Nferm = Nspin x Ncolour x Ncomplex x Nlattice_site
*
* QC = R = B-AX, D = Q ; QC => Thin QR factorisation (google it)
* for k:
* Z = AD
* M = [D^dag Z]^{-1}
* X = X + D MC
* QS = Q - ZM
* D = Q + D S^dag
* C = S C
*/
///////////////////////////////////////
// Initial block: initial search dir is guess
///////////////////////////////////////
std::cout << GridLogMessage<<"BlockCGrQ algorithm initialisation " <<std::endl;
//1. QC = R = B-AX, D = Q ; QC => Thin QR factorisation (google it)
Linop.HermOp(X, AD);
tmp = B - AD;
ThinQRfact (m_rr, m_C, m_Cinv, Q, tmp);
D=Q;
std::cout << GridLogMessage<<"BlockCGrQ computed initial residual and QR fact " <<std::endl;
///////////////////////////////////////
// Timers
///////////////////////////////////////
GridStopWatch sliceInnerTimer;
GridStopWatch sliceMaddTimer;
GridStopWatch QRTimer;
GridStopWatch MatrixTimer;
GridStopWatch SolverTimer;
SolverTimer.Start();
int k;
for (k = 1; k <= MaxIterations; k++) {
//3. Z = AD
MatrixTimer.Start();
Linop.HermOp(D, Z);
MatrixTimer.Stop();
//4. M = [D^dag Z]^{-1}
sliceInnerTimer.Start();
sliceInnerProductMatrix(m_DZ,D,Z,Orthog);
sliceInnerTimer.Stop();
m_M = m_DZ.inverse();
//5. X = X + D MC
m_tmp = m_M * m_C;
sliceMaddTimer.Start();
sliceMaddMatrix(X,m_tmp, D,X,Orthog);
sliceMaddTimer.Stop();
//6. QS = Q - ZM
sliceMaddTimer.Start();
sliceMaddMatrix(tmp,m_M,Z,Q,Orthog,-1.0);
sliceMaddTimer.Stop();
QRTimer.Start();
ThinQRfact (m_rr, m_S, m_Sinv, Q, tmp);
QRTimer.Stop();
//7. D = Q + D S^dag
m_tmp = m_S.adjoint();
sliceMaddTimer.Start();
sliceMaddMatrix(D,m_tmp,D,Q,Orthog);
sliceMaddTimer.Stop();
//8. C = S C
m_C = m_S*m_C;
/*********************
* convergence monitor
*********************
*/
m_rr = m_C.adjoint() * m_C;
RealD max_resid=0;
RealD rrsum=0;
RealD rr;
for(int b=0;b<Nblock;b++) {
rrsum+=real(m_rr(b,b));
rr = real(m_rr(b,b))/ssq[b];
if ( rr > max_resid ) max_resid = rr;
}
std::cout << GridLogIterative << "\titeration "<<k<<" rr_sum "<<rrsum<<" ssq_sum "<< sssum
<<" ave "<<std::sqrt(rrsum/sssum) << " max "<< max_resid <<std::endl;
if ( max_resid < Tolerance*Tolerance ) {
SolverTimer.Stop();
std::cout << GridLogMessage<<"BlockCGrQ converged in "<<k<<" iterations"<<std::endl;
for(int b=0;b<Nblock;b++){
std::cout << GridLogMessage<< "\t\tblock "<<b<<" computed resid "
<< std::sqrt(real(m_rr(b,b))/ssq[b])<<std::endl;
}
std::cout << GridLogMessage<<"\tMax residual is "<<std::sqrt(max_resid)<<std::endl;
Linop.HermOp(X, AD);
AD = AD-B;
std::cout << GridLogMessage <<"\t True residual is " << std::sqrt(norm2(AD)/norm2(B)) <<std::endl;
std::cout << GridLogMessage << "Time Breakdown "<<std::endl;
std::cout << GridLogMessage << "\tElapsed " << SolverTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tMatrix " << MatrixTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tInnerProd " << sliceInnerTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tMaddMatrix " << sliceMaddTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tThinQRfact " << QRTimer.Elapsed() <<std::endl;
IterationsToComplete = k;
return;
}
}
std::cout << GridLogMessage << "BlockConjugateGradient(rQ) did NOT converge" << std::endl;
if (ErrorOnNoConverge) assert(0);
IterationsToComplete = k;
}
//////////////////////////////////////////////////////////////////////////
// Block conjugate gradient; Original O'Leary Dimension zero should be the block direction
//////////////////////////////////////////////////////////////////////////
void BlockCGsolve(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi)
{
int Orthog = blockDim; // First dimension is block dim; this is an assumption
Nblock = Src._grid->_fdimensions[Orthog]; Nblock = Src._grid->_fdimensions[Orthog];
std::cout<<GridLogMessage<<" Block Conjugate Gradient : Orthog "<<Orthog<<" Nblock "<<Nblock<<std::endl; std::cout<<GridLogMessage<<" Block Conjugate Gradient : Orthog "<<Orthog<<" Nblock "<<Nblock<<std::endl;
@ -412,9 +162,8 @@ void BlockCGsolve(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi
********************* *********************
*/ */
RealD max_resid=0; RealD max_resid=0;
RealD rr;
for(int b=0;b<Nblock;b++){ for(int b=0;b<Nblock;b++){
rr = real(m_rr(b,b))/ssq[b]; RealD rr = real(m_rr(b,b))/ssq[b];
if ( rr > max_resid ) max_resid = rr; if ( rr > max_resid ) max_resid = rr;
} }
@ -424,14 +173,13 @@ void BlockCGsolve(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi
std::cout << GridLogMessage<<"BlockCG converged in "<<k<<" iterations"<<std::endl; std::cout << GridLogMessage<<"BlockCG converged in "<<k<<" iterations"<<std::endl;
for(int b=0;b<Nblock;b++){ for(int b=0;b<Nblock;b++){
std::cout << GridLogMessage<< "\t\tblock "<<b<<" computed resid " std::cout << GridLogMessage<< "\t\tblock "<<b<<" resid "<< std::sqrt(real(m_rr(b,b))/ssq[b])<<std::endl;
<< std::sqrt(real(m_rr(b,b))/ssq[b])<<std::endl;
} }
std::cout << GridLogMessage<<"\tMax residual is "<<std::sqrt(max_resid)<<std::endl; std::cout << GridLogMessage<<"\tMax residual is "<<std::sqrt(max_resid)<<std::endl;
Linop.HermOp(Psi, AP); Linop.HermOp(Psi, AP);
AP = AP-Src; AP = AP-Src;
std::cout << GridLogMessage <<"\t True residual is " << std::sqrt(norm2(AP)/norm2(Src)) <<std::endl; std::cout << GridLogMessage <<"\tTrue residual is " << std::sqrt(norm2(AP)/norm2(Src)) <<std::endl;
std::cout << GridLogMessage << "Time Breakdown "<<std::endl; std::cout << GridLogMessage << "Time Breakdown "<<std::endl;
std::cout << GridLogMessage << "\tElapsed " << SolverTimer.Elapsed() <<std::endl; std::cout << GridLogMessage << "\tElapsed " << SolverTimer.Elapsed() <<std::endl;
@ -449,13 +197,35 @@ void BlockCGsolve(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi
if (ErrorOnNoConverge) assert(0); if (ErrorOnNoConverge) assert(0);
IterationsToComplete = k; IterationsToComplete = k;
} }
};
////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////
// multiRHS conjugate gradient. Dimension zero should be the block direction // multiRHS conjugate gradient. Dimension zero should be the block direction
// Use this for spread out across nodes
////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////
void CGmultiRHSsolve(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi) template <class Field>
class MultiRHSConjugateGradient : public OperatorFunction<Field> {
public:
typedef typename Field::scalar_type scomplex;
const int blockDim = 0;
int Nblock;
bool ErrorOnNoConverge; // throw an assert when the CG fails to converge.
// Defaults true.
RealD Tolerance;
Integer MaxIterations;
Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion
MultiRHSConjugateGradient(RealD tol, Integer maxit, bool err_on_no_conv = true)
: Tolerance(tol),
MaxIterations(maxit),
ErrorOnNoConverge(err_on_no_conv){};
void operator()(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi)
{ {
int Orthog = blockDim; // First dimension is block dim int Orthog = 0; // First dimension is block dim
Nblock = Src._grid->_fdimensions[Orthog]; Nblock = Src._grid->_fdimensions[Orthog];
std::cout<<GridLogMessage<<"MultiRHS Conjugate Gradient : Orthog "<<Orthog<<" Nblock "<<Nblock<<std::endl; std::cout<<GridLogMessage<<"MultiRHS Conjugate Gradient : Orthog "<<Orthog<<" Nblock "<<Nblock<<std::endl;
@ -515,10 +285,12 @@ void CGmultiRHSsolve(LinearOperatorBase<Field> &Linop, const Field &Src, Field &
MatrixTimer.Stop(); MatrixTimer.Stop();
// Alpha // Alpha
// sliceInnerProductVectorTest(v_pAp_test,P,AP,Orthog);
sliceInnerTimer.Start(); sliceInnerTimer.Start();
sliceInnerProductVector(v_pAp,P,AP,Orthog); sliceInnerProductVector(v_pAp,P,AP,Orthog);
sliceInnerTimer.Stop(); sliceInnerTimer.Stop();
for(int b=0;b<Nblock;b++){ for(int b=0;b<Nblock;b++){
// std::cout << " "<< v_pAp[b]<<" "<< v_pAp_test[b]<<std::endl;
v_alpha[b] = v_rr[b]/real(v_pAp[b]); v_alpha[b] = v_rr[b]/real(v_pAp[b]);
} }
@ -560,7 +332,7 @@ void CGmultiRHSsolve(LinearOperatorBase<Field> &Linop, const Field &Src, Field &
std::cout << GridLogMessage<<"MultiRHS solver converged in " <<k<<" iterations"<<std::endl; std::cout << GridLogMessage<<"MultiRHS solver converged in " <<k<<" iterations"<<std::endl;
for(int b=0;b<Nblock;b++){ for(int b=0;b<Nblock;b++){
std::cout << GridLogMessage<< "\t\tBlock "<<b<<" computed resid "<< std::sqrt(v_rr[b]/ssq[b])<<std::endl; std::cout << GridLogMessage<< "\t\tBlock "<<b<<" resid "<< std::sqrt(v_rr[b]/ssq[b])<<std::endl;
} }
std::cout << GridLogMessage<<"\tMax residual is "<<std::sqrt(max_resid)<<std::endl; std::cout << GridLogMessage<<"\tMax residual is "<<std::sqrt(max_resid)<<std::endl;
@ -586,8 +358,9 @@ void CGmultiRHSsolve(LinearOperatorBase<Field> &Linop, const Field &Src, Field &
if (ErrorOnNoConverge) assert(0); if (ErrorOnNoConverge) assert(0);
IterationsToComplete = k; IterationsToComplete = k;
} }
}; };
} }
#endif #endif

View File

@ -123,11 +123,8 @@ class ConjugateGradient : public OperatorFunction<Field> {
p = p * b + r; p = p * b + r;
LinalgTimer.Stop(); LinalgTimer.Stop();
std::cout << GridLogIterative << "ConjugateGradient: Iteration " << k std::cout << GridLogIterative << "ConjugateGradient: Iteration " << k
<< " residual " << cp << " target " << rsq << std::endl; << " residual " << cp << " target " << rsq << std::endl;
std::cout << GridLogDebug << "a = "<< a << " b_pred = "<< b_pred << " b = "<< b << std::endl;
std::cout << GridLogDebug << "qq = "<< qq << " d = "<< d << " c = "<< c << std::endl;
// Stopping condition // Stopping condition
if (cp <= rsq) { if (cp <= rsq) {
@ -135,6 +132,8 @@ class ConjugateGradient : public OperatorFunction<Field> {
Linop.HermOpAndNorm(psi, mmp, d, qq); Linop.HermOpAndNorm(psi, mmp, d, qq);
p = mmp - src; p = mmp - src;
RealD mmpnorm = sqrt(norm2(mmp));
RealD psinorm = sqrt(norm2(psi));
RealD srcnorm = sqrt(norm2(src)); RealD srcnorm = sqrt(norm2(src));
RealD resnorm = sqrt(norm2(p)); RealD resnorm = sqrt(norm2(p));
RealD true_residual = resnorm / srcnorm; RealD true_residual = resnorm / srcnorm;
@ -158,10 +157,8 @@ class ConjugateGradient : public OperatorFunction<Field> {
} }
std::cout << GridLogMessage << "ConjugateGradient did NOT converge" std::cout << GridLogMessage << "ConjugateGradient did NOT converge"
<< std::endl; << std::endl;
if (ErrorOnNoConverge) assert(0); if (ErrorOnNoConverge) assert(0);
IterationsToComplete = k; IterationsToComplete = k;
} }
}; };
} }

View File

@ -0,0 +1,81 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/algorithms/iterative/EigenSort.h
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_EIGENSORT_H
#define GRID_EIGENSORT_H
namespace Grid {
/////////////////////////////////////////////////////////////
// Eigen sorter to begin with
/////////////////////////////////////////////////////////////
template<class Field>
class SortEigen {
private:
//hacking for testing for now
private:
static bool less_lmd(RealD left,RealD right){
return left > right;
}
static bool less_pair(std::pair<RealD,Field const*>& left,
std::pair<RealD,Field const*>& right){
return left.first > (right.first);
}
public:
void push(DenseVector<RealD>& lmd,
DenseVector<Field>& evec,int N) {
DenseVector<Field> cpy(lmd.size(),evec[0]._grid);
for(int i=0;i<lmd.size();i++) cpy[i] = evec[i];
DenseVector<std::pair<RealD, Field const*> > emod(lmd.size());
for(int i=0;i<lmd.size();++i)
emod[i] = std::pair<RealD,Field const*>(lmd[i],&cpy[i]);
partial_sort(emod.begin(),emod.begin()+N,emod.end(),less_pair);
typename DenseVector<std::pair<RealD, Field const*> >::iterator it = emod.begin();
for(int i=0;i<N;++i){
lmd[i]=it->first;
evec[i]=*(it->second);
++it;
}
}
void push(DenseVector<RealD>& lmd,int N) {
std::partial_sort(lmd.begin(),lmd.begin()+N,lmd.end(),less_lmd);
}
bool saturated(RealD lmd, RealD thrs) {
return fabs(lmd) > fabs(thrs);
}
};
}
#endif

File diff suppressed because it is too large Load Diff

View File

@ -6,9 +6,8 @@
Copyright (C) 2015 Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk> Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: paboyle <paboyle@ph.ed.ac.uk> Author: paboyle <paboyle@ph.ed.ac.uk>
Author: Guido Cossu <guido.cossu@ed.ac.uk>
This program is free software; you can redistribute it and/or modify This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@ -50,6 +49,7 @@ public:
GridBase(const std::vector<int> & processor_grid) : CartesianCommunicator(processor_grid) {}; GridBase(const std::vector<int> & processor_grid) : CartesianCommunicator(processor_grid) {};
// Physics Grid information. // Physics Grid information.
std::vector<int> _simd_layout;// Which dimensions get relayed out over simd lanes. std::vector<int> _simd_layout;// Which dimensions get relayed out over simd lanes.
std::vector<int> _fdimensions;// (full) Global dimensions of array prior to cb removal std::vector<int> _fdimensions;// (full) Global dimensions of array prior to cb removal
@ -62,12 +62,13 @@ public:
int _isites; int _isites;
int _fsites; // _isites*_osites = product(dimensions). int _fsites; // _isites*_osites = product(dimensions).
int _gsites; int _gsites;
std::vector<int> _slice_block;// subslice information std::vector<int> _slice_block; // subslice information
std::vector<int> _slice_stride; std::vector<int> _slice_stride;
std::vector<int> _slice_nblock; std::vector<int> _slice_nblock;
std::vector<int> _lstart; // local start of array in gcoors _processor_coor[d]*_ldimensions[d] // Might need these at some point
std::vector<int> _lend ; // local end of array in gcoors _processor_coor[d]*_ldimensions[d]+_ldimensions_[d]-1 // std::vector<int> _lstart; // local start of array in gcoors. _processor_coor[d]*_ldimensions[d]
// std::vector<int> _lend; // local end of array in gcoors _processor_coor[d]*_ldimensions[d]+_ldimensions_[d]-1
public: public:
@ -98,7 +99,7 @@ public:
virtual int oIndex(std::vector<int> &coor) virtual int oIndex(std::vector<int> &coor)
{ {
int idx=0; int idx=0;
// Works with either global or local coordinates // Works with either global or local coordinates
for(int d=0;d<_ndimension;d++) idx+=_ostride[d]*(coor[d]%_rdimensions[d]); for(int d=0;d<_ndimension;d++) idx+=_ostride[d]*(coor[d]%_rdimensions[d]);
return idx; return idx;
} }
@ -120,12 +121,6 @@ public:
Lexicographic::CoorFromIndex(coor,Oindex,_rdimensions); Lexicographic::CoorFromIndex(coor,Oindex,_rdimensions);
} }
inline void InOutCoorToLocalCoor (std::vector<int> &ocoor, std::vector<int> &icoor, std::vector<int> &lcoor) {
lcoor.resize(_ndimension);
for (int d = 0; d < _ndimension; d++)
lcoor[d] = ocoor[d] + _rdimensions[d] * icoor[d];
}
////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////
// SIMD lane addressing // SIMD lane addressing
////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////
@ -133,7 +128,6 @@ public:
{ {
Lexicographic::CoorFromIndex(coor,lane,_simd_layout); Lexicographic::CoorFromIndex(coor,lane,_simd_layout);
} }
inline int PermuteDim(int dimension){ inline int PermuteDim(int dimension){
return _simd_layout[dimension]>1; return _simd_layout[dimension]>1;
} }
@ -151,15 +145,15 @@ public:
// Distance should be either 0,1,2.. // Distance should be either 0,1,2..
// //
if ( _simd_layout[dimension] > 2 ) { if ( _simd_layout[dimension] > 2 ) {
for(int d=0;d<_ndimension;d++){ for(int d=0;d<_ndimension;d++){
if ( d != dimension ) assert ( (_simd_layout[d]==1) ); if ( d != dimension ) assert ( (_simd_layout[d]==1) );
} }
permute_type = RotateBit; // How to specify distance; this is not just direction. permute_type = RotateBit; // How to specify distance; this is not just direction.
return permute_type; return permute_type;
} }
for(int d=_ndimension-1;d>dimension;d--){ for(int d=_ndimension-1;d>dimension;d--){
if (_simd_layout[d]>1 ) permute_type++; if (_simd_layout[d]>1 ) permute_type++;
} }
return permute_type; return permute_type;
} }
@ -174,30 +168,11 @@ public:
inline int gSites(void) const { return _isites*_osites*_Nprocessors; }; inline int gSites(void) const { return _isites*_osites*_Nprocessors; };
inline int Nd (void) const { return _ndimension;}; inline int Nd (void) const { return _ndimension;};
inline const std::vector<int> LocalStarts(void) { return _lstart; };
inline const std::vector<int> &FullDimensions(void) { return _fdimensions;}; inline const std::vector<int> &FullDimensions(void) { return _fdimensions;};
inline const std::vector<int> &GlobalDimensions(void) { return _gdimensions;}; inline const std::vector<int> &GlobalDimensions(void) { return _gdimensions;};
inline const std::vector<int> &LocalDimensions(void) { return _ldimensions;}; inline const std::vector<int> &LocalDimensions(void) { return _ldimensions;};
inline const std::vector<int> &VirtualLocalDimensions(void) { return _ldimensions;}; inline const std::vector<int> &VirtualLocalDimensions(void) { return _ldimensions;};
////////////////////////////////////////////////////////////////
// Utility to print the full decomposition details
////////////////////////////////////////////////////////////////
void show_decomposition(){
std::cout << GridLogMessage << "Full Dimensions : " << _fdimensions << std::endl;
std::cout << GridLogMessage << "Global Dimensions : " << _gdimensions << std::endl;
std::cout << GridLogMessage << "Local Dimensions : " << _ldimensions << std::endl;
std::cout << GridLogMessage << "Reduced Dimensions : " << _rdimensions << std::endl;
std::cout << GridLogMessage << "Outer strides : " << _ostride << std::endl;
std::cout << GridLogMessage << "Inner strides : " << _istride << std::endl;
std::cout << GridLogMessage << "iSites : " << _isites << std::endl;
std::cout << GridLogMessage << "oSites : " << _osites << std::endl;
std::cout << GridLogMessage << "lSites : " << lSites() << std::endl;
std::cout << GridLogMessage << "gSites : " << gSites() << std::endl;
std::cout << GridLogMessage << "Nd : " << _ndimension << std::endl;
}
//////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////
// Global addressing // Global addressing
//////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////
@ -209,15 +184,12 @@ public:
assert(lidx<lSites()); assert(lidx<lSites());
Lexicographic::CoorFromIndex(lcoor,lidx,_ldimensions); Lexicographic::CoorFromIndex(lcoor,lidx,_ldimensions);
} }
void GlobalCoorToGlobalIndex(const std::vector<int> & gcoor,int & gidx){ void GlobalCoorToGlobalIndex(const std::vector<int> & gcoor,int & gidx){
gidx=0; gidx=0;
int mult=1; int mult=1;
for(int mu=0;mu<_ndimension;mu++) { for(int mu=0;mu<_ndimension;mu++) {
gidx+=mult*gcoor[mu]; gidx+=mult*gcoor[mu];
mult*=_gdimensions[mu]; mult*=_gdimensions[mu];
} }
} }
void GlobalCoorToProcessorCoorLocalCoor(std::vector<int> &pcoor,std::vector<int> &lcoor,const std::vector<int> &gcoor) void GlobalCoorToProcessorCoorLocalCoor(std::vector<int> &pcoor,std::vector<int> &lcoor,const std::vector<int> &gcoor)
@ -225,9 +197,9 @@ public:
pcoor.resize(_ndimension); pcoor.resize(_ndimension);
lcoor.resize(_ndimension); lcoor.resize(_ndimension);
for(int mu=0;mu<_ndimension;mu++){ for(int mu=0;mu<_ndimension;mu++){
int _fld = _fdimensions[mu]/_processors[mu]; int _fld = _fdimensions[mu]/_processors[mu];
pcoor[mu] = gcoor[mu]/_fld; pcoor[mu] = gcoor[mu]/_fld;
lcoor[mu] = gcoor[mu]%_fld; lcoor[mu] = gcoor[mu]%_fld;
} }
} }
void GlobalCoorToRankIndex(int &rank, int &o_idx, int &i_idx ,const std::vector<int> &gcoor) void GlobalCoorToRankIndex(int &rank, int &o_idx, int &i_idx ,const std::vector<int> &gcoor)
@ -239,9 +211,9 @@ public:
/* /*
std::vector<int> cblcoor(lcoor); std::vector<int> cblcoor(lcoor);
for(int d=0;d<cblcoor.size();d++){ for(int d=0;d<cblcoor.size();d++){
if( this->CheckerBoarded(d) ) { if( this->CheckerBoarded(d) ) {
cblcoor[d] = lcoor[d]/2; cblcoor[d] = lcoor[d]/2;
} }
} }
*/ */
i_idx= iIndex(lcoor); i_idx= iIndex(lcoor);
@ -267,7 +239,7 @@ public:
{ {
RankIndexToGlobalCoor(rank,o_idx,i_idx ,fcoor); RankIndexToGlobalCoor(rank,o_idx,i_idx ,fcoor);
if(CheckerBoarded(0)){ if(CheckerBoarded(0)){
fcoor[0] = fcoor[0]*2+cb; fcoor[0] = fcoor[0]*2+cb;
} }
} }
void ProcessorCoorLocalCoorToGlobalCoor(std::vector<int> &Pcoor,std::vector<int> &Lcoor,std::vector<int> &gcoor) void ProcessorCoorLocalCoorToGlobalCoor(std::vector<int> &Pcoor,std::vector<int> &Lcoor,std::vector<int> &gcoor)

View File

@ -76,8 +76,6 @@ public:
_ldimensions.resize(_ndimension); _ldimensions.resize(_ndimension);
_rdimensions.resize(_ndimension); _rdimensions.resize(_ndimension);
_simd_layout.resize(_ndimension); _simd_layout.resize(_ndimension);
_lstart.resize(_ndimension);
_lend.resize(_ndimension);
_ostride.resize(_ndimension); _ostride.resize(_ndimension);
_istride.resize(_ndimension); _istride.resize(_ndimension);
@ -96,10 +94,8 @@ public:
// Use a reduced simd grid // Use a reduced simd grid
_ldimensions[d]= _gdimensions[d]/_processors[d]; //local dimensions _ldimensions[d]= _gdimensions[d]/_processors[d]; //local dimensions
_rdimensions[d]= _ldimensions[d]/_simd_layout[d]; //overdecomposition _rdimensions[d]= _ldimensions[d]/_simd_layout[d]; //overdecomposition
_lstart[d] = _processor_coor[d]*_ldimensions[d]; _osites *= _rdimensions[d];
_lend[d] = _processor_coor[d]*_ldimensions[d]+_ldimensions[d]-1; _isites *= _simd_layout[d];
_osites *= _rdimensions[d];
_isites *= _simd_layout[d];
// Addressing support // Addressing support
if ( d==0 ) { if ( d==0 ) {

View File

@ -151,8 +151,6 @@ public:
_ldimensions.resize(_ndimension); _ldimensions.resize(_ndimension);
_rdimensions.resize(_ndimension); _rdimensions.resize(_ndimension);
_simd_layout.resize(_ndimension); _simd_layout.resize(_ndimension);
_lstart.resize(_ndimension);
_lend.resize(_ndimension);
_ostride.resize(_ndimension); _ostride.resize(_ndimension);
_istride.resize(_ndimension); _istride.resize(_ndimension);
@ -171,8 +169,6 @@ public:
_gdimensions[d] = _gdimensions[d]/2; // Remove a checkerboard _gdimensions[d] = _gdimensions[d]/2; // Remove a checkerboard
} }
_ldimensions[d] = _gdimensions[d]/_processors[d]; _ldimensions[d] = _gdimensions[d]/_processors[d];
_lstart[d] = _processor_coor[d]*_ldimensions[d];
_lend[d] = _processor_coor[d]*_ldimensions[d]+_ldimensions[d]-1;
// Use a reduced simd grid // Use a reduced simd grid
_simd_layout[d] = simd_layout[d]; _simd_layout[d] = simd_layout[d];

View File

@ -60,7 +60,6 @@ void CartesianCommunicator::ShmBufferFreeAll(void) {
///////////////////////////////// /////////////////////////////////
// Grid information queries // Grid information queries
///////////////////////////////// /////////////////////////////////
int CartesianCommunicator::Dimensions(void) { return _ndimension; };
int CartesianCommunicator::IsBoss(void) { return _processor==0; }; int CartesianCommunicator::IsBoss(void) { return _processor==0; };
int CartesianCommunicator::BossRank(void) { return 0; }; int CartesianCommunicator::BossRank(void) { return 0; };
int CartesianCommunicator::ThisRank(void) { return _processor; }; int CartesianCommunicator::ThisRank(void) { return _processor; };
@ -92,7 +91,6 @@ void CartesianCommunicator::GlobalSumVector(ComplexD *c,int N)
#if !defined( GRID_COMMS_MPI3) && !defined (GRID_COMMS_MPI3L) #if !defined( GRID_COMMS_MPI3) && !defined (GRID_COMMS_MPI3L)
int CartesianCommunicator::NodeCount(void) { return ProcessorCount();}; int CartesianCommunicator::NodeCount(void) { return ProcessorCount();};
int CartesianCommunicator::RankCount(void) { return ProcessorCount();};
double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list, double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list,
void *xmit, void *xmit,

View File

@ -148,7 +148,6 @@ class CartesianCommunicator {
int RankFromProcessorCoor(std::vector<int> &coor); int RankFromProcessorCoor(std::vector<int> &coor);
void ProcessorCoorFromRank(int rank,std::vector<int> &coor); void ProcessorCoorFromRank(int rank,std::vector<int> &coor);
int Dimensions(void) ;
int IsBoss(void) ; int IsBoss(void) ;
int BossRank(void) ; int BossRank(void) ;
int ThisRank(void) ; int ThisRank(void) ;
@ -156,7 +155,6 @@ class CartesianCommunicator {
const std::vector<int> & ProcessorGrid(void) ; const std::vector<int> & ProcessorGrid(void) ;
int ProcessorCount(void) ; int ProcessorCount(void) ;
int NodeCount(void) ; int NodeCount(void) ;
int RankCount(void) ;
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// very VERY rarely (Log, serial RNG) we need world without a grid // very VERY rarely (Log, serial RNG) we need world without a grid
@ -177,8 +175,6 @@ class CartesianCommunicator {
void GlobalSumVector(ComplexF *c,int N); void GlobalSumVector(ComplexF *c,int N);
void GlobalSum(ComplexD &c); void GlobalSum(ComplexD &c);
void GlobalSumVector(ComplexD *c,int N); void GlobalSumVector(ComplexD *c,int N);
void GlobalXOR(uint32_t &);
void GlobalXOR(uint64_t &);
template<class obj> void GlobalSum(obj &o){ template<class obj> void GlobalSum(obj &o){
typedef typename obj::scalar_type scalar_type; typedef typename obj::scalar_type scalar_type;

View File

@ -83,14 +83,6 @@ void CartesianCommunicator::GlobalSum(uint64_t &u){
int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_SUM,communicator); int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_SUM,communicator);
assert(ierr==0); assert(ierr==0);
} }
void CartesianCommunicator::GlobalXOR(uint32_t &u){
int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_BXOR,communicator);
assert(ierr==0);
}
void CartesianCommunicator::GlobalXOR(uint64_t &u){
int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_BXOR,communicator);
assert(ierr==0);
}
void CartesianCommunicator::GlobalSum(float &f){ void CartesianCommunicator::GlobalSum(float &f){
int ierr=MPI_Allreduce(MPI_IN_PLACE,&f,1,MPI_FLOAT,MPI_SUM,communicator); int ierr=MPI_Allreduce(MPI_IN_PLACE,&f,1,MPI_FLOAT,MPI_SUM,communicator);
assert(ierr==0); assert(ierr==0);

View File

@ -65,7 +65,6 @@ std::vector<int> CartesianCommunicator::MyGroup;
std::vector<void *> CartesianCommunicator::ShmCommBufs; std::vector<void *> CartesianCommunicator::ShmCommBufs;
int CartesianCommunicator::NodeCount(void) { return GroupSize;}; int CartesianCommunicator::NodeCount(void) { return GroupSize;};
int CartesianCommunicator::RankCount(void) { return WorldSize;};
#undef FORCE_COMMS #undef FORCE_COMMS
@ -510,14 +509,6 @@ void CartesianCommunicator::GlobalSum(uint64_t &u){
int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_SUM,communicator); int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_SUM,communicator);
assert(ierr==0); assert(ierr==0);
} }
void CartesianCommunicator::GlobalXOR(uint32_t &u){
int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_BXOR,communicator);
assert(ierr==0);
}
void CartesianCommunicator::GlobalXOR(uint64_t &u){
int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_BXOR,communicator);
assert(ierr==0);
}
void CartesianCommunicator::GlobalSum(float &f){ void CartesianCommunicator::GlobalSum(float &f){
int ierr=MPI_Allreduce(MPI_IN_PLACE,&f,1,MPI_FLOAT,MPI_SUM,communicator); int ierr=MPI_Allreduce(MPI_IN_PLACE,&f,1,MPI_FLOAT,MPI_SUM,communicator);
assert(ierr==0); assert(ierr==0);

View File

@ -59,8 +59,6 @@ void CartesianCommunicator::GlobalSum(double &){}
void CartesianCommunicator::GlobalSum(uint32_t &){} void CartesianCommunicator::GlobalSum(uint32_t &){}
void CartesianCommunicator::GlobalSum(uint64_t &){} void CartesianCommunicator::GlobalSum(uint64_t &){}
void CartesianCommunicator::GlobalSumVector(double *,int N){} void CartesianCommunicator::GlobalSumVector(double *,int N){}
void CartesianCommunicator::GlobalXOR(uint32_t &){}
void CartesianCommunicator::GlobalXOR(uint64_t &){}
void CartesianCommunicator::SendRecvPacket(void *xmit, void CartesianCommunicator::SendRecvPacket(void *xmit,
void *recv, void *recv,

File diff suppressed because it is too large Load Diff

View File

@ -235,74 +235,64 @@ public:
} }
}; };
////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////
// Constructor requires "grid" passed. // Constructor requires "grid" passed.
// what about a default grid? // what about a default grid?
////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////
Lattice(GridBase *grid) : _odata(grid->oSites()) { Lattice(GridBase *grid) : _odata(grid->oSites()) {
_grid = grid; _grid = grid;
// _odata.reserve(_grid->oSites()); // _odata.reserve(_grid->oSites());
// _odata.resize(_grid->oSites()); // _odata.resize(_grid->oSites());
// std::cout << "Constructing lattice object with Grid pointer "<<_grid<<std::endl; // std::cout << "Constructing lattice object with Grid pointer "<<_grid<<std::endl;
assert((((uint64_t)&_odata[0])&0xF) ==0); assert((((uint64_t)&_odata[0])&0xF) ==0);
checkerboard=0; checkerboard=0;
}
Lattice(const Lattice& r){ // copy constructor
_grid = r._grid;
checkerboard = r.checkerboard;
_odata.resize(_grid->oSites());// essential
parallel_for(int ss=0;ss<_grid->oSites();ss++){
_odata[ss]=r._odata[ss];
}
}
virtual ~Lattice(void) = default;
void reset(GridBase* grid) {
if (_grid != grid) {
_grid = grid;
_odata.resize(grid->oSites());
checkerboard = 0;
} }
}
template<class sobj> strong_inline Lattice<vobj> & operator = (const sobj & r){ Lattice(const Lattice& r){ // copy constructor
parallel_for(int ss=0;ss<_grid->oSites();ss++){ _grid = r._grid;
this->_odata[ss]=r; checkerboard = r.checkerboard;
_odata.resize(_grid->oSites());// essential
parallel_for(int ss=0;ss<_grid->oSites();ss++){
_odata[ss]=r._odata[ss];
}
} }
return *this;
}
template<class robj> strong_inline Lattice<vobj> & operator = (const Lattice<robj> & r){ virtual ~Lattice(void) = default;
this->checkerboard = r.checkerboard;
conformable(*this,r);
parallel_for(int ss=0;ss<_grid->oSites();ss++){ template<class sobj> strong_inline Lattice<vobj> & operator = (const sobj & r){
this->_odata[ss]=r._odata[ss]; parallel_for(int ss=0;ss<_grid->oSites();ss++){
this->_odata[ss]=r;
}
return *this;
} }
return *this; template<class robj> strong_inline Lattice<vobj> & operator = (const Lattice<robj> & r){
} this->checkerboard = r.checkerboard;
conformable(*this,r);
// *=,+=,-= operators inherit behvour from correspond */+/- operation
template<class T> strong_inline Lattice<vobj> &operator *=(const T &r) { parallel_for(int ss=0;ss<_grid->oSites();ss++){
*this = (*this)*r; this->_odata[ss]=r._odata[ss];
return *this; }
} return *this;
}
template<class T> strong_inline Lattice<vobj> &operator -=(const T &r) {
*this = (*this)-r; // *=,+=,-= operators inherit behvour from correspond */+/- operation
return *this; template<class T> strong_inline Lattice<vobj> &operator *=(const T &r) {
} *this = (*this)*r;
template<class T> strong_inline Lattice<vobj> &operator +=(const T &r) { return *this;
*this = (*this)+r; }
return *this;
} template<class T> strong_inline Lattice<vobj> &operator -=(const T &r) {
}; // class Lattice *this = (*this)-r;
return *this;
}
template<class T> strong_inline Lattice<vobj> &operator +=(const T &r) {
*this = (*this)+r;
return *this;
}
}; // class Lattice
template<class vobj> std::ostream& operator<< (std::ostream& stream, const Lattice<vobj> &o){ template<class vobj> std::ostream& operator<< (std::ostream& stream, const Lattice<vobj> &o){
std::vector<int> gcoor; std::vector<int> gcoor;
typedef typename vobj::scalar_object sobj; typedef typename vobj::scalar_object sobj;
@ -320,7 +310,7 @@ public:
} }
return stream; return stream;
} }
} }

View File

@ -1,37 +1,45 @@
/************************************************************************************* /*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/lattice/Lattice_reduction.h Source file: ./lib/lattice/Lattice_reduction.h
Copyright (C) 2015 Copyright (C) 2015
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk> Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk> Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: paboyle <paboyle@ph.ed.ac.uk> Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or the Free Software Foundation; either version 2 of the License, or
(at your option) any later version. (at your option) any later version.
This program is distributed in the hope that it will be useful, This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details. GNU General Public License for more details.
You should have received a copy of the GNU General Public License along You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc., with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/ *************************************************************************************/
/* END LEGAL */ /* END LEGAL */
#ifndef GRID_LATTICE_REDUCTION_H #ifndef GRID_LATTICE_REDUCTION_H
#define GRID_LATTICE_REDUCTION_H #define GRID_LATTICE_REDUCTION_H
#include <Grid/Grid_Eigen_Dense.h> #include <Grid/Eigen/Dense>
namespace Grid { namespace Grid {
#ifdef GRID_WARN_SUBOPTIMAL #ifdef GRID_WARN_SUBOPTIMAL
#warning "Optimisation alert all these reduction loops are NOT threaded " #warning "Optimisation alert all these reduction loops are NOT threaded "
#endif #endif
//////////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////
// Deterministic Reduction operations // Deterministic Reduction operations
//////////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////
template<class vobj> inline RealD norm2(const Lattice<vobj> &arg){ template<class vobj> inline RealD norm2(const Lattice<vobj> &arg){
ComplexD nrm = innerProduct(arg,arg); ComplexD nrm = innerProduct(arg,arg);
return std::real(nrm); return std::real(nrm);
@ -328,8 +336,6 @@ static void sliceMaddVector(Lattice<vobj> &R,std::vector<RealD> &a,const Lattice
typedef typename vobj::vector_type vector_type; typedef typename vobj::vector_type vector_type;
typedef typename vobj::tensor_reduced tensor_reduced; typedef typename vobj::tensor_reduced tensor_reduced;
scalar_type zscale(scale);
GridBase *grid = X._grid; GridBase *grid = X._grid;
int Nsimd =grid->Nsimd(); int Nsimd =grid->Nsimd();
@ -355,7 +361,7 @@ static void sliceMaddVector(Lattice<vobj> &R,std::vector<RealD> &a,const Lattice
grid->iCoorFromIindex(icoor,l); grid->iCoorFromIindex(icoor,l);
int ldx =r+icoor[orthogdim]*rd; int ldx =r+icoor[orthogdim]*rd;
scalar_type *as =(scalar_type *)&av; scalar_type *as =(scalar_type *)&av;
as[l] = scalar_type(a[ldx])*zscale; as[l] = scalar_type(a[ldx])*scale;
} }
tensor_reduced at; at=av; tensor_reduced at; at=av;
@ -369,6 +375,74 @@ static void sliceMaddVector(Lattice<vobj> &R,std::vector<RealD> &a,const Lattice
} }
}; };
/*
template<class vobj>
static void sliceMaddVectorSlow (Lattice<vobj> &R,std::vector<RealD> &a,const Lattice<vobj> &X,const Lattice<vobj> &Y,
int Orthog,RealD scale=1.0)
{
// FIXME: Implementation is slow
// Best base the linear combination by constructing a
// set of vectors of size grid->_rdimensions[Orthog].
typedef typename vobj::scalar_object sobj;
typedef typename vobj::scalar_type scalar_type;
typedef typename vobj::vector_type vector_type;
int Nblock = X._grid->GlobalDimensions()[Orthog];
GridBase *FullGrid = X._grid;
GridBase *SliceGrid = makeSubSliceGrid(FullGrid,Orthog);
Lattice<vobj> Xslice(SliceGrid);
Lattice<vobj> Rslice(SliceGrid);
// If we based this on Cshift it would work for spread out
// but it would be even slower
for(int i=0;i<Nblock;i++){
ExtractSlice(Rslice,Y,i,Orthog);
ExtractSlice(Xslice,X,i,Orthog);
Rslice = Rslice + Xslice*(scale*a[i]);
InsertSlice(Rslice,R,i,Orthog);
}
};
template<class vobj>
static void sliceInnerProductVectorSlow( std::vector<ComplexD> & vec, const Lattice<vobj> &lhs,const Lattice<vobj> &rhs,int Orthog)
{
// FIXME: Implementation is slow
// Look at localInnerProduct implementation,
// and do inside a site loop with block strided iterators
typedef typename vobj::scalar_object sobj;
typedef typename vobj::scalar_type scalar_type;
typedef typename vobj::vector_type vector_type;
typedef typename vobj::tensor_reduced scalar;
typedef typename scalar::scalar_object scomplex;
int Nblock = lhs._grid->GlobalDimensions()[Orthog];
vec.resize(Nblock);
std::vector<scomplex> sip(Nblock);
Lattice<scalar> IP(lhs._grid);
IP=localInnerProduct(lhs,rhs);
sliceSum(IP,sip,Orthog);
for(int ss=0;ss<Nblock;ss++){
vec[ss] = TensorRemove(sip[ss]);
}
}
*/
//////////////////////////////////////////////////////////////////////////////////////////
// FIXME: Implementation is slow
// If we based this on Cshift it would work for spread out
// but it would be even slower
//
// Repeated extract slice is inefficient
//
// Best base the linear combination by constructing a
// set of vectors of size grid->_rdimensions[Orthog].
//////////////////////////////////////////////////////////////////////////////////////////
inline GridBase *makeSubSliceGrid(const GridBase *BlockSolverGrid,int Orthog) inline GridBase *makeSubSliceGrid(const GridBase *BlockSolverGrid,int Orthog)
{ {
int NN = BlockSolverGrid->_ndimension; int NN = BlockSolverGrid->_ndimension;
@ -388,6 +462,7 @@ inline GridBase *makeSubSliceGrid(const GridBase *BlockSolverGrid,int Or
return (GridBase *)new GridCartesian(latt_phys,simd_phys,mpi_phys); return (GridBase *)new GridCartesian(latt_phys,simd_phys,mpi_phys);
} }
template<class vobj> template<class vobj>
static void sliceMaddMatrix (Lattice<vobj> &R,Eigen::MatrixXcd &aa,const Lattice<vobj> &X,const Lattice<vobj> &Y,int Orthog,RealD scale=1.0) static void sliceMaddMatrix (Lattice<vobj> &R,Eigen::MatrixXcd &aa,const Lattice<vobj> &X,const Lattice<vobj> &Y,int Orthog,RealD scale=1.0)
{ {
@ -396,103 +471,28 @@ static void sliceMaddMatrix (Lattice<vobj> &R,Eigen::MatrixXcd &aa,const Lattice
typedef typename vobj::vector_type vector_type; typedef typename vobj::vector_type vector_type;
int Nblock = X._grid->GlobalDimensions()[Orthog]; int Nblock = X._grid->GlobalDimensions()[Orthog];
GridBase *FullGrid = X._grid; GridBase *FullGrid = X._grid;
GridBase *SliceGrid = makeSubSliceGrid(FullGrid,Orthog); GridBase *SliceGrid = makeSubSliceGrid(FullGrid,Orthog);
Lattice<vobj> Xslice(SliceGrid); Lattice<vobj> Xslice(SliceGrid);
Lattice<vobj> Rslice(SliceGrid); Lattice<vobj> Rslice(SliceGrid);
assert( FullGrid->_simd_layout[Orthog]==1); for(int i=0;i<Nblock;i++){
int nh = FullGrid->_ndimension; ExtractSlice(Rslice,Y,i,Orthog);
int nl = SliceGrid->_ndimension; for(int j=0;j<Nblock;j++){
ExtractSlice(Xslice,X,j,Orthog);
//FIXME package in a convenient iterator Rslice = Rslice + Xslice*(scale*aa(j,i));
//Should loop over a plane orthogonal to direction "Orthog" }
int stride=FullGrid->_slice_stride[Orthog]; InsertSlice(Rslice,R,i,Orthog);
int block =FullGrid->_slice_block [Orthog];
int nblock=FullGrid->_slice_nblock[Orthog];
int ostride=FullGrid->_ostride[Orthog];
#pragma omp parallel
{
std::vector<vobj> s_x(Nblock);
#pragma omp for collapse(2)
for(int n=0;n<nblock;n++){
for(int b=0;b<block;b++){
int o = n*stride + b;
for(int i=0;i<Nblock;i++){
s_x[i] = X[o+i*ostride];
}
vobj dot;
for(int i=0;i<Nblock;i++){
dot = Y[o+i*ostride];
for(int j=0;j<Nblock;j++){
dot = dot + s_x[j]*(scale*aa(j,i));
}
R[o+i*ostride]=dot;
}
}}
} }
}; };
template<class vobj>
static void sliceMulMatrix (Lattice<vobj> &R,Eigen::MatrixXcd &aa,const Lattice<vobj> &X,int Orthog,RealD scale=1.0)
{
typedef typename vobj::scalar_object sobj;
typedef typename vobj::scalar_type scalar_type;
typedef typename vobj::vector_type vector_type;
int Nblock = X._grid->GlobalDimensions()[Orthog];
GridBase *FullGrid = X._grid;
GridBase *SliceGrid = makeSubSliceGrid(FullGrid,Orthog);
Lattice<vobj> Xslice(SliceGrid);
Lattice<vobj> Rslice(SliceGrid);
assert( FullGrid->_simd_layout[Orthog]==1);
int nh = FullGrid->_ndimension;
int nl = SliceGrid->_ndimension;
//FIXME package in a convenient iterator
//Should loop over a plane orthogonal to direction "Orthog"
int stride=FullGrid->_slice_stride[Orthog];
int block =FullGrid->_slice_block [Orthog];
int nblock=FullGrid->_slice_nblock[Orthog];
int ostride=FullGrid->_ostride[Orthog];
#pragma omp parallel
{
std::vector<vobj> s_x(Nblock);
#pragma omp for collapse(2)
for(int n=0;n<nblock;n++){
for(int b=0;b<block;b++){
int o = n*stride + b;
for(int i=0;i<Nblock;i++){
s_x[i] = X[o+i*ostride];
}
vobj dot;
for(int i=0;i<Nblock;i++){
dot = s_x[0]*(scale*aa(0,i));
for(int j=1;j<Nblock;j++){
dot = dot + s_x[j]*(scale*aa(j,i));
}
R[o+i*ostride]=dot;
}
}}
}
};
template<class vobj> template<class vobj>
static void sliceInnerProductMatrix( Eigen::MatrixXcd &mat, const Lattice<vobj> &lhs,const Lattice<vobj> &rhs,int Orthog) static void sliceInnerProductMatrix( Eigen::MatrixXcd &mat, const Lattice<vobj> &lhs,const Lattice<vobj> &rhs,int Orthog)
{ {
// FIXME: Implementation is slow
// Not sure of best solution.. think about it
typedef typename vobj::scalar_object sobj; typedef typename vobj::scalar_object sobj;
typedef typename vobj::scalar_type scalar_type; typedef typename vobj::scalar_type scalar_type;
typedef typename vobj::vector_type vector_type; typedef typename vobj::vector_type vector_type;
@ -506,54 +506,25 @@ static void sliceInnerProductMatrix( Eigen::MatrixXcd &mat, const Lattice<vobj>
Lattice<vobj> Rslice(SliceGrid); Lattice<vobj> Rslice(SliceGrid);
mat = Eigen::MatrixXcd::Zero(Nblock,Nblock); mat = Eigen::MatrixXcd::Zero(Nblock,Nblock);
assert( FullGrid->_simd_layout[Orthog]==1); for(int i=0;i<Nblock;i++){
int nh = FullGrid->_ndimension; ExtractSlice(Lslice,lhs,i,Orthog);
int nl = SliceGrid->_ndimension; for(int j=0;j<Nblock;j++){
ExtractSlice(Rslice,rhs,j,Orthog);
//FIXME package in a convenient iterator mat(i,j) = innerProduct(Lslice,Rslice);
//Should loop over a plane orthogonal to direction "Orthog" }
int stride=FullGrid->_slice_stride[Orthog];
int block =FullGrid->_slice_block [Orthog];
int nblock=FullGrid->_slice_nblock[Orthog];
int ostride=FullGrid->_ostride[Orthog];
typedef typename vobj::vector_typeD vector_typeD;
#pragma omp parallel
{
std::vector<vobj> Left(Nblock);
std::vector<vobj> Right(Nblock);
Eigen::MatrixXcd mat_thread = Eigen::MatrixXcd::Zero(Nblock,Nblock);
#pragma omp for collapse(2)
for(int n=0;n<nblock;n++){
for(int b=0;b<block;b++){
int o = n*stride + b;
for(int i=0;i<Nblock;i++){
Left [i] = lhs[o+i*ostride];
Right[i] = rhs[o+i*ostride];
}
for(int i=0;i<Nblock;i++){
for(int j=0;j<Nblock;j++){
auto tmp = innerProduct(Left[i],Right[j]);
vector_typeD rtmp = TensorRemove(tmp);
mat_thread(i,j) += Reduce(rtmp);
}}
}}
#pragma omp critical
{
mat += mat_thread;
}
} }
#undef FORCE_DIAG
#ifdef FORCE_DIAG
for(int i=0;i<Nblock;i++){
for(int j=0;j<Nblock;j++){
if ( i != j ) mat(i,j)=0.0;
}
}
#endif
return; return;
} }
} /*END NAMESPACE GRID*/ } /*END NAMESPACE GRID*/
#endif #endif

View File

@ -6,8 +6,8 @@
Copyright (C) 2015 Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk> Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Guido Cossu <guido.cossu@ed.ac.uk> Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@ -75,55 +75,6 @@ namespace Grid {
return multiplicity; return multiplicity;
} }
// merge of April 11 2017
//<<<<<<< HEAD
// this function is necessary for the LS vectorised field
inline int RNGfillable_general(GridBase *coarse,GridBase *fine)
{
int rngdims = coarse->_ndimension;
// trivially extended in higher dims, with locality guaranteeing RNG state is local to node
int lowerdims = fine->_ndimension - coarse->_ndimension; assert(lowerdims >= 0);
// assumes that the higher dimensions are not using more processors
// all further divisions are local
for(int d=0;d<lowerdims;d++) assert(fine->_processors[d]==1);
for(int d=0;d<rngdims;d++) assert(coarse->_processors[d] == fine->_processors[d+lowerdims]);
// then divide the number of local sites
// check that the total number of sims agree, meanse the iSites are the same
assert(fine->Nsimd() == coarse->Nsimd());
// check that the two grids divide cleanly
assert( (fine->lSites() / coarse->lSites() ) * coarse->lSites() == fine->lSites() );
return fine->lSites() / coarse->lSites();
}
/*
// Wrap seed_seq to give common interface with random_device
class fixedSeed {
public:
typedef std::seed_seq::result_type result_type;
std::seed_seq src;
fixedSeed(const std::vector<int> &seeds) : src(seeds.begin(),seeds.end()) {};
result_type operator () (void){
std::vector<result_type> list(1);
src.generate(list.begin(),list.end());
return list[0];
}
};
=======
>>>>>>> develop
*/
// real scalars are one component // real scalars are one component
template<class scalar,class distribution,class generator> template<class scalar,class distribution,class generator>
void fillScalar(scalar &s,distribution &dist,generator & gen) void fillScalar(scalar &s,distribution &dist,generator & gen)
@ -158,7 +109,7 @@ namespace Grid {
#ifdef RNG_SITMO #ifdef RNG_SITMO
typedef sitmo::prng_engine RngEngine; typedef sitmo::prng_engine RngEngine;
typedef uint64_t RngStateType; typedef uint64_t RngStateType;
static const int RngStateCount = 13; static const int RngStateCount = 4;
#endif #endif
std::vector<RngEngine> _generators; std::vector<RngEngine> _generators;
@ -213,7 +164,7 @@ namespace Grid {
ss<<eng; ss<<eng;
ss.seekg(0,ss.beg); ss.seekg(0,ss.beg);
for(int i=0;i<RngStateCount;i++){ for(int i=0;i<RngStateCount;i++){
ss>>saved[i]; ss>>saved[i];
} }
} }
void GetState(std::vector<RngStateType> & saved,int gen) { void GetState(std::vector<RngStateType> & saved,int gen) {
@ -223,7 +174,7 @@ namespace Grid {
assert(saved.size()==RngStateCount); assert(saved.size()==RngStateCount);
std::stringstream ss; std::stringstream ss;
for(int i=0;i<RngStateCount;i++){ for(int i=0;i<RngStateCount;i++){
ss<< saved[i]<<" "; ss<< saved[i]<<" ";
} }
ss.seekg(0,ss.beg); ss.seekg(0,ss.beg);
ss>>eng; ss>>eng;
@ -264,7 +215,7 @@ namespace Grid {
dist[0].reset(); dist[0].reset();
for(int idx=0;idx<words;idx++){ for(int idx=0;idx<words;idx++){
fillScalar(buf[idx],dist[0],_generators[0]); fillScalar(buf[idx],dist[0],_generators[0]);
} }
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l)); CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
@ -296,7 +247,7 @@ namespace Grid {
RealF *pointer=(RealF *)&l; RealF *pointer=(RealF *)&l;
dist[0].reset(); dist[0].reset();
for(int i=0;i<2*vComplexF::Nsimd();i++){ for(int i=0;i<2*vComplexF::Nsimd();i++){
fillScalar(pointer[i],dist[0],_generators[0]); fillScalar(pointer[i],dist[0],_generators[0]);
} }
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l)); CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
} }
@ -304,7 +255,7 @@ namespace Grid {
RealD *pointer=(RealD *)&l; RealD *pointer=(RealD *)&l;
dist[0].reset(); dist[0].reset();
for(int i=0;i<2*vComplexD::Nsimd();i++){ for(int i=0;i<2*vComplexD::Nsimd();i++){
fillScalar(pointer[i],dist[0],_generators[0]); fillScalar(pointer[i],dist[0],_generators[0]);
} }
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l)); CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
} }
@ -312,7 +263,7 @@ namespace Grid {
RealF *pointer=(RealF *)&l; RealF *pointer=(RealF *)&l;
dist[0].reset(); dist[0].reset();
for(int i=0;i<vRealF::Nsimd();i++){ for(int i=0;i<vRealF::Nsimd();i++){
fillScalar(pointer[i],dist[0],_generators[0]); fillScalar(pointer[i],dist[0],_generators[0]);
} }
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l)); CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
} }
@ -324,7 +275,7 @@ namespace Grid {
} }
CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l)); CartesianCommunicator::BroadcastWorld(0,(void *)&l,sizeof(l));
} }
void SeedFixedIntegers(const std::vector<int> &seeds){ void SeedFixedIntegers(const std::vector<int> &seeds){
CartesianCommunicator::BroadcastWorld(0,(void *)&seeds[0],sizeof(int)*seeds.size()); CartesianCommunicator::BroadcastWorld(0,(void *)&seeds[0],sizeof(int)*seeds.size());
std::seed_seq src(seeds.begin(),seeds.end()); std::seed_seq src(seeds.begin(),seeds.end());
@ -333,20 +284,18 @@ namespace Grid {
}; };
class GridParallelRNG : public GridRNGbase { class GridParallelRNG : public GridRNGbase {
double _time_counter;
public: public:
GridBase *_grid; GridBase *_grid;
unsigned int _vol; int _vol;
public:
int generator_idx(int os,int is) { int generator_idx(int os,int is){
return is*_grid->oSites()+os; return is*_grid->oSites()+os;
} }
GridParallelRNG(GridBase *grid) : GridRNGbase() { GridParallelRNG(GridBase *grid) : GridRNGbase() {
_grid = grid; _grid=grid;
_vol =_grid->iSites()*_grid->oSites(); _vol =_grid->iSites()*_grid->oSites();
_generators.resize(_vol); _generators.resize(_vol);
_uniform.resize(_vol,std::uniform_real_distribution<RealD>{0,1}); _uniform.resize(_vol,std::uniform_real_distribution<RealD>{0,1});
@ -360,34 +309,33 @@ namespace Grid {
typedef typename vobj::scalar_object scalar_object; typedef typename vobj::scalar_object scalar_object;
typedef typename vobj::scalar_type scalar_type; typedef typename vobj::scalar_type scalar_type;
typedef typename vobj::vector_type vector_type; typedef typename vobj::vector_type vector_type;
int multiplicity = RNGfillable(_grid,l._grid);
double inner_time_counter = usecond(); int Nsimd =_grid->Nsimd();
int osites=_grid->oSites();
int multiplicity = RNGfillable_general(_grid, l._grid); // l has finer or same grid int words=sizeof(scalar_object)/sizeof(scalar_type);
int Nsimd = _grid->Nsimd(); // guaranteed to be the same for l._grid too
int osites = _grid->oSites(); // guaranteed to be <= l._grid->oSites() by a factor multiplicity
int words = sizeof(scalar_object) / sizeof(scalar_type);
parallel_for(int ss=0;ss<osites;ss++){ parallel_for(int ss=0;ss<osites;ss++){
std::vector<scalar_object> buf(Nsimd);
for (int m = 0; m < multiplicity; m++) { // Draw from same generator multiplicity times
int sm = multiplicity * ss + m; // Maps the generator site to the fine site std::vector<scalar_object> buf(Nsimd);
for(int m=0;m<multiplicity;m++) {// Draw from same generator multiplicity times
for (int si = 0; si < Nsimd; si++) { int sm=multiplicity*ss+m; // Maps the generator site to the fine site
int gdx = generator_idx(ss, si); // index of generator state for(int si=0;si<Nsimd;si++){
scalar_type *pointer = (scalar_type *)&buf[si]; int gdx = generator_idx(ss,si); // index of generator state
dist[gdx].reset(); scalar_type *pointer = (scalar_type *)&buf[si];
for (int idx = 0; idx < words; idx++) dist[gdx].reset();
fillScalar(pointer[idx], dist[gdx], _generators[gdx]); for(int idx=0;idx<words;idx++){
} fillScalar(pointer[idx],dist[gdx],_generators[gdx]);
// merge into SIMD lanes, FIXME suboptimal implementation }
merge(l._odata[sm], buf); }
}
// merge into SIMD lanes
merge(l._odata[sm],buf);
}
} }
_time_counter += usecond()- inner_time_counter;
}; };
void SeedFixedIntegers(const std::vector<int> &seeds){ void SeedFixedIntegers(const std::vector<int> &seeds){
@ -464,12 +412,6 @@ namespace Grid {
} }
#endif #endif
} }
void Report(){
std::cout << GridLogMessage << "Time spent in the fill() routine by GridParallelRNG: "<< _time_counter/1e3 << " ms" << std::endl;
}
//////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////
// Support for rigorous test of RNG's // Support for rigorous test of RNG's
// Return uniform random uint32_t from requested site generator // Return uniform random uint32_t from requested site generator
@ -477,6 +419,7 @@ namespace Grid {
uint32_t GlobalU01(int gsite){ uint32_t GlobalU01(int gsite){
uint32_t the_number; uint32_t the_number;
// who // who
std::vector<int> gcoor; std::vector<int> gcoor;
int rank,o_idx,i_idx; int rank,o_idx,i_idx;

View File

@ -551,10 +551,7 @@ void Replicate(Lattice<vobj> &coarse,Lattice<vobj> & fine)
//Copy SIMD-vectorized lattice to array of scalar objects in lexicographic order //Copy SIMD-vectorized lattice to array of scalar objects in lexicographic order
template<typename vobj, typename sobj> template<typename vobj, typename sobj>
typename std::enable_if<isSIMDvectorized<vobj>::value && !isSIMDvectorized<sobj>::value, void>::type typename std::enable_if<isSIMDvectorized<vobj>::value && !isSIMDvectorized<sobj>::value, void>::type unvectorizeToLexOrdArray(std::vector<sobj> &out, const Lattice<vobj> &in){
unvectorizeToLexOrdArray(std::vector<sobj> &out, const Lattice<vobj> &in)
{
typedef typename vobj::vector_type vtype; typedef typename vobj::vector_type vtype;
GridBase* in_grid = in._grid; GridBase* in_grid = in._grid;
@ -593,54 +590,6 @@ unvectorizeToLexOrdArray(std::vector<sobj> &out, const Lattice<vobj> &in)
extract1(in_vobj, out_ptrs, 0); extract1(in_vobj, out_ptrs, 0);
} }
} }
//Copy SIMD-vectorized lattice to array of scalar objects in lexicographic order
template<typename vobj, typename sobj>
typename std::enable_if<isSIMDvectorized<vobj>::value
&& !isSIMDvectorized<sobj>::value, void>::type
vectorizeFromLexOrdArray( std::vector<sobj> &in, Lattice<vobj> &out)
{
typedef typename vobj::vector_type vtype;
GridBase* grid = out._grid;
assert(in.size()==grid->lSites());
int ndim = grid->Nd();
int nsimd = vtype::Nsimd();
std::vector<std::vector<int> > icoor(nsimd);
for(int lane=0; lane < nsimd; lane++){
icoor[lane].resize(ndim);
grid->iCoorFromIindex(icoor[lane],lane);
}
parallel_for(uint64_t oidx = 0; oidx < grid->oSites(); oidx++){ //loop over outer index
//Assemble vector of pointers to output elements
std::vector<sobj*> ptrs(nsimd);
std::vector<int> ocoor(ndim);
grid->oCoorFromOindex(ocoor, oidx);
std::vector<int> lcoor(grid->Nd());
for(int lane=0; lane < nsimd; lane++){
for(int mu=0;mu<ndim;mu++){
lcoor[mu] = ocoor[mu] + grid->_rdimensions[mu]*icoor[lane][mu];
}
int lex;
Lexicographic::IndexFromCoor(lcoor, lex, grid->_ldimensions);
ptrs[lane] = &in[lex];
}
//pack from those ptrs
vobj vecobj;
merge1(vecobj, ptrs, 0);
out._odata[oidx] = vecobj;
}
}
//Convert a Lattice from one precision to another //Convert a Lattice from one precision to another
template<class VobjOut, class VobjIn> template<class VobjOut, class VobjIn>
@ -666,7 +615,7 @@ void precisionChange(Lattice<VobjOut> &out, const Lattice<VobjIn> &in){
std::vector<SobjOut> in_slex_conv(in_grid->lSites()); std::vector<SobjOut> in_slex_conv(in_grid->lSites());
unvectorizeToLexOrdArray(in_slex_conv, in); unvectorizeToLexOrdArray(in_slex_conv, in);
parallel_for(uint64_t out_oidx=0;out_oidx<out_grid->oSites();out_oidx++){ parallel_for(int out_oidx=0;out_oidx<out_grid->oSites();out_oidx++){
std::vector<int> out_ocoor(ndim); std::vector<int> out_ocoor(ndim);
out_grid->oCoorFromOindex(out_ocoor, out_oidx); out_grid->oCoorFromOindex(out_ocoor, out_oidx);

View File

@ -62,20 +62,14 @@ namespace Grid {
return ret; return ret;
} }
template<class obj> Lattice<obj> expMat(const Lattice<obj> &rhs, RealD alpha, Integer Nexp = DEFAULT_MAT_EXP){ template<class obj> Lattice<obj> expMat(const Lattice<obj> &rhs, ComplexD alpha, Integer Nexp = DEFAULT_MAT_EXP){
Lattice<obj> ret(rhs._grid); Lattice<obj> ret(rhs._grid);
ret.checkerboard = rhs.checkerboard; ret.checkerboard = rhs.checkerboard;
conformable(ret,rhs); conformable(ret,rhs);
parallel_for(int ss=0;ss<rhs._grid->oSites();ss++){ parallel_for(int ss=0;ss<rhs._grid->oSites();ss++){
ret._odata[ss]=Exponentiate(rhs._odata[ss],alpha, Nexp); ret._odata[ss]=Exponentiate(rhs._odata[ss],alpha, Nexp);
} }
return ret; return ret;
} }

View File

@ -30,7 +30,6 @@ directory
*************************************************************************************/ *************************************************************************************/
/* END LEGAL */ /* END LEGAL */
#include <Grid/GridCore.h> #include <Grid/GridCore.h>
#include <Grid/util/CompilerCompatible.h>
#include <cxxabi.h> #include <cxxabi.h>
#include <memory> #include <memory>

File diff suppressed because it is too large Load Diff

View File

@ -1,716 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/parallelIO/IldgIO.h
Copyright (C) 2015
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_ILDG_IO_H
#define GRID_ILDG_IO_H
#ifdef HAVE_LIME
#include <algorithm>
#include <fstream>
#include <iomanip>
#include <iostream>
#include <map>
#include <pwd.h>
#include <sys/utsname.h>
#include <unistd.h>
//C-Lime is a must have for this functionality
extern "C" {
#include "lime.h"
}
namespace Grid {
namespace QCD {
/////////////////////////////////
// Encode word types as strings
/////////////////////////////////
template<class word> inline std::string ScidacWordMnemonic(void){ return std::string("unknown"); }
template<> inline std::string ScidacWordMnemonic<double> (void){ return std::string("D"); }
template<> inline std::string ScidacWordMnemonic<float> (void){ return std::string("F"); }
template<> inline std::string ScidacWordMnemonic< int32_t>(void){ return std::string("I32_t"); }
template<> inline std::string ScidacWordMnemonic<uint32_t>(void){ return std::string("U32_t"); }
template<> inline std::string ScidacWordMnemonic< int64_t>(void){ return std::string("I64_t"); }
template<> inline std::string ScidacWordMnemonic<uint64_t>(void){ return std::string("U64_t"); }
/////////////////////////////////////////
// Encode a generic tensor as a string
/////////////////////////////////////////
template<class vobj> std::string ScidacRecordTypeString(int &colors, int &spins, int & typesize,int &datacount) {
typedef typename getPrecision<vobj>::real_scalar_type stype;
int _ColourN = indexRank<ColourIndex,vobj>();
int _ColourScalar = isScalar<ColourIndex,vobj>();
int _ColourVector = isVector<ColourIndex,vobj>();
int _ColourMatrix = isMatrix<ColourIndex,vobj>();
int _SpinN = indexRank<SpinIndex,vobj>();
int _SpinScalar = isScalar<SpinIndex,vobj>();
int _SpinVector = isVector<SpinIndex,vobj>();
int _SpinMatrix = isMatrix<SpinIndex,vobj>();
int _LorentzN = indexRank<LorentzIndex,vobj>();
int _LorentzScalar = isScalar<LorentzIndex,vobj>();
int _LorentzVector = isVector<LorentzIndex,vobj>();
int _LorentzMatrix = isMatrix<LorentzIndex,vobj>();
std::stringstream stream;
stream << "GRID_";
stream << ScidacWordMnemonic<stype>();
// std::cout << " Lorentz N/S/V/M : " << _LorentzN<<" "<<_LorentzScalar<<"/"<<_LorentzVector<<"/"<<_LorentzMatrix<<std::endl;
// std::cout << " Spin N/S/V/M : " << _SpinN <<" "<<_SpinScalar <<"/"<<_SpinVector <<"/"<<_SpinMatrix<<std::endl;
// std::cout << " Colour N/S/V/M : " << _ColourN <<" "<<_ColourScalar <<"/"<<_ColourVector <<"/"<<_ColourMatrix<<std::endl;
if ( _LorentzVector ) stream << "_LorentzVector"<<_LorentzN;
if ( _LorentzMatrix ) stream << "_LorentzMatrix"<<_LorentzN;
if ( _SpinVector ) stream << "_SpinVector"<<_SpinN;
if ( _SpinMatrix ) stream << "_SpinMatrix"<<_SpinN;
if ( _ColourVector ) stream << "_ColourVector"<<_ColourN;
if ( _ColourMatrix ) stream << "_ColourMatrix"<<_ColourN;
if ( _ColourScalar && _LorentzScalar && _SpinScalar ) stream << "_Complex";
typesize = sizeof(typename vobj::scalar_type);
if ( _ColourMatrix ) typesize*= _ColourN*_ColourN;
else typesize*= _ColourN;
if ( _SpinMatrix ) typesize*= _SpinN*_SpinN;
else typesize*= _SpinN;
colors = _ColourN;
spins = _SpinN;
datacount = _LorentzN;
return stream.str();
}
template<class vobj> std::string ScidacRecordTypeString(Lattice<vobj> & lat,int &colors, int &spins, int & typesize,int &datacount) {
return ScidacRecordTypeString<vobj>(colors,spins,typesize,datacount);
};
////////////////////////////////////////////////////////////
// Helper to fill out metadata
////////////////////////////////////////////////////////////
template<class vobj> void ScidacMetaData(Lattice<vobj> & field,
FieldMetaData &header,
scidacRecord & _scidacRecord,
scidacFile & _scidacFile)
{
typedef typename getPrecision<vobj>::real_scalar_type stype;
/////////////////////////////////////
// Pull Grid's metadata
/////////////////////////////////////
PrepareMetaData(field,header);
/////////////////////////////////////
// Scidac Private File structure
/////////////////////////////////////
_scidacFile = scidacFile(field._grid);
/////////////////////////////////////
// Scidac Private Record structure
/////////////////////////////////////
scidacRecord sr;
sr.datatype = ScidacRecordTypeString(field,sr.colors,sr.spins,sr.typesize,sr.datacount);
sr.date = header.creation_date;
sr.precision = ScidacWordMnemonic<stype>();
sr.recordtype = GRID_IO_FIELD;
_scidacRecord = sr;
std::cout << GridLogMessage << "Build SciDAC datatype " <<sr.datatype<<std::endl;
}
///////////////////////////////////////////////////////
// Scidac checksum
///////////////////////////////////////////////////////
static int scidacChecksumVerify(scidacChecksum &scidacChecksum_,uint32_t scidac_csuma,uint32_t scidac_csumb)
{
uint32_t scidac_checksuma = stoull(scidacChecksum_.suma,0,16);
uint32_t scidac_checksumb = stoull(scidacChecksum_.sumb,0,16);
if ( scidac_csuma !=scidac_checksuma) return 0;
if ( scidac_csumb !=scidac_checksumb) return 0;
return 1;
}
////////////////////////////////////////////////////////////////////////////////////
// Lime, ILDG and Scidac I/O classes
////////////////////////////////////////////////////////////////////////////////////
class GridLimeReader : public BinaryIO {
public:
///////////////////////////////////////////////////
// FIXME: format for RNG? Now just binary out instead
///////////////////////////////////////////////////
FILE *File;
LimeReader *LimeR;
std::string filename;
/////////////////////////////////////////////
// Open the file
/////////////////////////////////////////////
void open(std::string &_filename)
{
filename= _filename;
File = fopen(filename.c_str(), "r");
LimeR = limeCreateReader(File);
}
/////////////////////////////////////////////
// Close the file
/////////////////////////////////////////////
void close(void){
fclose(File);
// limeDestroyReader(LimeR);
}
////////////////////////////////////////////
// Read a generic lattice field and verify checksum
////////////////////////////////////////////
template<class vobj>
void readLimeLatticeBinaryObject(Lattice<vobj> &field,std::string record_name)
{
typedef typename vobj::scalar_object sobj;
scidacChecksum scidacChecksum_;
uint32_t nersc_csum,scidac_csuma,scidac_csumb;
std::string format = getFormatString<vobj>();
while ( limeReaderNextRecord(LimeR) == LIME_SUCCESS ) {
std::cout << GridLogMessage << limeReaderType(LimeR) <<std::endl;
if ( strncmp(limeReaderType(LimeR), record_name.c_str(),strlen(record_name.c_str()) ) ) {
off_t offset= ftell(File);
BinarySimpleMunger<sobj,sobj> munge;
BinaryIO::readLatticeObject< sobj, sobj >(field, filename, munge, offset, format,nersc_csum,scidac_csuma,scidac_csumb);
/////////////////////////////////////////////
// Insist checksum is next record
/////////////////////////////////////////////
readLimeObject(scidacChecksum_,std::string("scidacChecksum"),record_name);
/////////////////////////////////////////////
// Verify checksums
/////////////////////////////////////////////
scidacChecksumVerify(scidacChecksum_,scidac_csuma,scidac_csumb);
return;
}
}
}
////////////////////////////////////////////
// Read a generic serialisable object
////////////////////////////////////////////
template<class serialisable_object>
void readLimeObject(serialisable_object &object,std::string object_name,std::string record_name)
{
std::string xmlstring;
// should this be a do while; can we miss a first record??
while ( limeReaderNextRecord(LimeR) == LIME_SUCCESS ) {
uint64_t nbytes = limeReaderBytes(LimeR);//size of this record (configuration)
if ( strncmp(limeReaderType(LimeR), record_name.c_str(),strlen(record_name.c_str()) ) ) {
std::vector<char> xmlc(nbytes+1,'\0');
limeReaderReadData((void *)&xmlc[0], &nbytes, LimeR);
XmlReader RD(&xmlc[0],"");
read(RD,object_name,object);
return;
}
}
assert(0);
}
};
class GridLimeWriter : public BinaryIO {
public:
///////////////////////////////////////////////////
// FIXME: format for RNG? Now just binary out instead
///////////////////////////////////////////////////
FILE *File;
LimeWriter *LimeW;
std::string filename;
void open(std::string &_filename) {
filename= _filename;
File = fopen(filename.c_str(), "w");
LimeW = limeCreateWriter(File); assert(LimeW != NULL );
}
/////////////////////////////////////////////
// Close the file
/////////////////////////////////////////////
void close(void) {
fclose(File);
// limeDestroyWriter(LimeW);
}
///////////////////////////////////////////////////////
// Lime utility functions
///////////////////////////////////////////////////////
int createLimeRecordHeader(std::string message, int MB, int ME, size_t PayloadSize)
{
LimeRecordHeader *h;
h = limeCreateHeader(MB, ME, const_cast<char *>(message.c_str()), PayloadSize);
assert(limeWriteRecordHeader(h, LimeW) >= 0);
limeDestroyHeader(h);
return LIME_SUCCESS;
}
////////////////////////////////////////////
// Write a generic serialisable object
////////////////////////////////////////////
template<class serialisable_object>
void writeLimeObject(int MB,int ME,serialisable_object &object,std::string object_name,std::string record_name)
{
std::string xmlstring;
{
XmlWriter WR("","");
write(WR,object_name,object);
xmlstring = WR.XmlString();
}
uint64_t nbytes = xmlstring.size();
int err;
LimeRecordHeader *h = limeCreateHeader(MB, ME,(char *)record_name.c_str(), nbytes); assert(h!= NULL);
err=limeWriteRecordHeader(h, LimeW); assert(err>=0);
err=limeWriteRecordData(&xmlstring[0], &nbytes, LimeW); assert(err>=0);
err=limeWriterCloseRecord(LimeW); assert(err>=0);
limeDestroyHeader(h);
}
////////////////////////////////////////////
// Write a generic lattice field and csum
////////////////////////////////////////////
template<class vobj>
void writeLimeLatticeBinaryObject(Lattice<vobj> &field,std::string record_name)
{
////////////////////////////////////////////
// Create record header
////////////////////////////////////////////
typedef typename vobj::scalar_object sobj;
int err;
uint32_t nersc_csum,scidac_csuma,scidac_csumb;
uint64_t PayloadSize = sizeof(sobj) * field._grid->_gsites;
createLimeRecordHeader(record_name, 0, 0, PayloadSize);
////////////////////////////////////////////////////////////////////
// NB: FILE and iostream are jointly writing disjoint sequences in the
// the same file through different file handles (integer units).
//
// These are both buffered, so why I think this code is right is as follows.
//
// i) write record header to FILE *File, telegraphing the size.
// ii) ftell reads the offset from FILE *File .
// iii) iostream / MPI Open independently seek this offset. Write sequence direct to disk.
// Closes iostream and flushes.
// iv) fseek on FILE * to end of this disjoint section.
// v) Continue writing scidac record.
////////////////////////////////////////////////////////////////////
off_t offset = ftell(File);
std::string format = getFormatString<vobj>();
BinarySimpleMunger<sobj,sobj> munge;
BinaryIO::writeLatticeObject<vobj,sobj>(field, filename, munge, offset, format,nersc_csum,scidac_csuma,scidac_csumb);
err=limeWriterCloseRecord(LimeW); assert(err>=0);
////////////////////////////////////////
// Write checksum element, propagaing forward from the BinaryIO
// Always pair a checksum with a binary object, and close message
////////////////////////////////////////
scidacChecksum checksum;
std::stringstream streama; streama << std::hex << scidac_csuma;
std::stringstream streamb; streamb << std::hex << scidac_csumb;
checksum.suma= streama.str();
checksum.sumb= streamb.str();
std::cout << GridLogMessage<<" writing scidac checksums "<<std::hex<<scidac_csuma<<"/"<<scidac_csumb<<std::dec<<std::endl;
writeLimeObject(0,1,checksum,std::string("scidacChecksum" ),std::string(SCIDAC_CHECKSUM));
}
};
class ScidacWriter : public GridLimeWriter {
public:
template<class SerialisableUserFile>
void writeScidacFileRecord(GridBase *grid,SerialisableUserFile &_userFile)
{
scidacFile _scidacFile(grid);
writeLimeObject(1,0,_scidacFile,_scidacFile.SerialisableClassName(),std::string(SCIDAC_PRIVATE_FILE_XML));
writeLimeObject(0,1,_userFile,_userFile.SerialisableClassName(),std::string(SCIDAC_FILE_XML));
}
////////////////////////////////////////////////
// Write generic lattice field in scidac format
////////////////////////////////////////////////
template <class vobj, class userRecord>
void writeScidacFieldRecord(Lattice<vobj> &field,userRecord _userRecord)
{
typedef typename vobj::scalar_object sobj;
uint64_t nbytes;
GridBase * grid = field._grid;
////////////////////////////////////////
// fill the Grid header
////////////////////////////////////////
FieldMetaData header;
scidacRecord _scidacRecord;
scidacFile _scidacFile;
ScidacMetaData(field,header,_scidacRecord,_scidacFile);
//////////////////////////////////////////////
// Fill the Lime file record by record
//////////////////////////////////////////////
writeLimeObject(1,0,header ,std::string("FieldMetaData"),std::string(GRID_FORMAT)); // Open message
writeLimeObject(0,0,_userRecord,_userRecord.SerialisableClassName(),std::string(SCIDAC_RECORD_XML));
writeLimeObject(0,0,_scidacRecord,_scidacRecord.SerialisableClassName(),std::string(SCIDAC_PRIVATE_RECORD_XML));
writeLimeLatticeBinaryObject(field,std::string(ILDG_BINARY_DATA)); // Closes message with checksum
}
};
class IldgWriter : public ScidacWriter {
public:
///////////////////////////////////
// A little helper
///////////////////////////////////
void writeLimeIldgLFN(std::string &LFN)
{
uint64_t PayloadSize = LFN.size();
int err;
createLimeRecordHeader(ILDG_DATA_LFN, 0 , 0, PayloadSize);
err=limeWriteRecordData(const_cast<char*>(LFN.c_str()), &PayloadSize,LimeW); assert(err>=0);
err=limeWriterCloseRecord(LimeW); assert(err>=0);
}
////////////////////////////////////////////////////////////////
// Special ILDG operations ; gauge configs only.
// Don't require scidac records EXCEPT checksum
// Use Grid MetaData object if present.
////////////////////////////////////////////////////////////////
template <class vsimd>
void writeConfiguration(Lattice<iLorentzColourMatrix<vsimd> > &Umu,int sequence,std::string LFN,std::string description)
{
GridBase * grid = Umu._grid;
typedef Lattice<iLorentzColourMatrix<vsimd> > GaugeField;
typedef iLorentzColourMatrix<vsimd> vobj;
typedef typename vobj::scalar_object sobj;
uint64_t nbytes;
////////////////////////////////////////
// fill the Grid header
////////////////////////////////////////
FieldMetaData header;
scidacRecord _scidacRecord;
scidacFile _scidacFile;
ScidacMetaData(Umu,header,_scidacRecord,_scidacFile);
std::string format = header.floating_point;
header.ensemble_id = description;
header.ensemble_label = description;
header.sequence_number = sequence;
header.ildg_lfn = LFN;
assert ( (format == std::string("IEEE32BIG"))
||(format == std::string("IEEE64BIG")) );
//////////////////////////////////////////////////////
// Fill ILDG header data struct
//////////////////////////////////////////////////////
ildgFormat ildgfmt ;
ildgfmt.field = std::string("su3gauge");
if ( format == std::string("IEEE32BIG") ) {
ildgfmt.precision = 32;
} else {
ildgfmt.precision = 64;
}
ildgfmt.version = 1.0;
ildgfmt.lx = header.dimension[0];
ildgfmt.ly = header.dimension[1];
ildgfmt.lz = header.dimension[2];
ildgfmt.lt = header.dimension[3];
assert(header.nd==4);
assert(header.nd==header.dimension.size());
//////////////////////////////////////////////////////////////////////////////
// Fill the USQCD info field
//////////////////////////////////////////////////////////////////////////////
usqcdInfo info;
info.version=1.0;
info.plaq = header.plaquette;
info.linktr = header.link_trace;
std::cout << GridLogMessage << " Writing config; IldgIO "<<std::endl;
//////////////////////////////////////////////
// Fill the Lime file record by record
//////////////////////////////////////////////
writeLimeObject(1,0,header ,std::string("FieldMetaData"),std::string(GRID_FORMAT)); // Open message
writeLimeObject(0,0,_scidacFile,_scidacFile.SerialisableClassName(),std::string(SCIDAC_PRIVATE_FILE_XML));
writeLimeObject(0,1,info,info.SerialisableClassName(),std::string(SCIDAC_FILE_XML));
writeLimeObject(1,0,_scidacRecord,_scidacRecord.SerialisableClassName(),std::string(SCIDAC_PRIVATE_RECORD_XML));
writeLimeObject(0,0,info,info.SerialisableClassName(),std::string(SCIDAC_RECORD_XML));
writeLimeObject(0,0,ildgfmt,std::string("ildgFormat") ,std::string(ILDG_FORMAT)); // rec
writeLimeIldgLFN(header.ildg_lfn); // rec
writeLimeLatticeBinaryObject(Umu,std::string(ILDG_BINARY_DATA)); // Closes message with checksum
// limeDestroyWriter(LimeW);
fclose(File);
}
};
class IldgReader : public GridLimeReader {
public:
////////////////////////////////////////////////////////////////
// Read either Grid/SciDAC/ILDG configuration
// Don't require scidac records EXCEPT checksum
// Use Grid MetaData object if present.
// Else use ILDG MetaData object if present.
// Else use SciDAC MetaData object if present.
////////////////////////////////////////////////////////////////
template <class vsimd>
void readConfiguration(Lattice<iLorentzColourMatrix<vsimd> > &Umu, FieldMetaData &FieldMetaData_) {
typedef Lattice<iLorentzColourMatrix<vsimd> > GaugeField;
typedef typename GaugeField::vector_object vobj;
typedef typename vobj::scalar_object sobj;
typedef LorentzColourMatrixF fobj;
typedef LorentzColourMatrixD dobj;
GridBase *grid = Umu._grid;
std::vector<int> dims = Umu._grid->FullDimensions();
assert(dims.size()==4);
// Metadata holders
ildgFormat ildgFormat_ ;
std::string ildgLFN_ ;
scidacChecksum scidacChecksum_;
usqcdInfo usqcdInfo_ ;
// track what we read from file
int found_ildgFormat =0;
int found_ildgLFN =0;
int found_scidacChecksum=0;
int found_usqcdInfo =0;
int found_ildgBinary =0;
int found_FieldMetaData =0;
uint32_t nersc_csum;
uint32_t scidac_csuma;
uint32_t scidac_csumb;
// Binary format
std::string format;
//////////////////////////////////////////////////////////////////////////
// Loop over all records
// -- Order is poorly guaranteed except ILDG header preceeds binary section.
// -- Run like an event loop.
// -- Impose trust hierarchy. Grid takes precedence & look for ILDG, and failing
// that Scidac.
// -- Insist on Scidac checksum record.
//////////////////////////////////////////////////////////////////////////
while ( limeReaderNextRecord(LimeR) == LIME_SUCCESS ) {
uint64_t nbytes = limeReaderBytes(LimeR);//size of this record (configuration)
//////////////////////////////////////////////////////////////////
// If not BINARY_DATA read a string and parse
//////////////////////////////////////////////////////////////////
if ( strncmp(limeReaderType(LimeR), ILDG_BINARY_DATA,strlen(ILDG_BINARY_DATA) ) ) {
// Copy out the string
std::vector<char> xmlc(nbytes+1,'\0');
limeReaderReadData((void *)&xmlc[0], &nbytes, LimeR);
std::cout << GridLogMessage<< "Non binary record :" <<limeReaderType(LimeR) <<std::endl; //<<"\n"<<(&xmlc[0])<<std::endl;
//////////////////////////////////
// ILDG format record
if ( !strncmp(limeReaderType(LimeR), ILDG_FORMAT,strlen(ILDG_FORMAT)) ) {
XmlReader RD(&xmlc[0],"");
read(RD,"ildgFormat",ildgFormat_);
if ( ildgFormat_.precision == 64 ) format = std::string("IEEE64BIG");
if ( ildgFormat_.precision == 32 ) format = std::string("IEEE32BIG");
assert( ildgFormat_.lx == dims[0]);
assert( ildgFormat_.ly == dims[1]);
assert( ildgFormat_.lz == dims[2]);
assert( ildgFormat_.lt == dims[3]);
found_ildgFormat = 1;
}
if ( !strncmp(limeReaderType(LimeR), ILDG_DATA_LFN,strlen(ILDG_DATA_LFN)) ) {
FieldMetaData_.ildg_lfn = std::string(&xmlc[0]);
found_ildgLFN = 1;
}
if ( !strncmp(limeReaderType(LimeR), GRID_FORMAT,strlen(ILDG_FORMAT)) ) {
XmlReader RD(&xmlc[0],"");
read(RD,"FieldMetaData",FieldMetaData_);
format = FieldMetaData_.floating_point;
assert(FieldMetaData_.dimension[0] == dims[0]);
assert(FieldMetaData_.dimension[1] == dims[1]);
assert(FieldMetaData_.dimension[2] == dims[2]);
assert(FieldMetaData_.dimension[3] == dims[3]);
found_FieldMetaData = 1;
}
if ( !strncmp(limeReaderType(LimeR), SCIDAC_RECORD_XML,strlen(SCIDAC_RECORD_XML)) ) {
std::string xmls(&xmlc[0]);
// is it a USQCD info field
if ( xmls.find(std::string("usqcdInfo")) != std::string::npos ) {
std::cout << GridLogMessage<<"...found a usqcdInfo field"<<std::endl;
XmlReader RD(&xmlc[0],"");
read(RD,"usqcdInfo",usqcdInfo_);
found_usqcdInfo = 1;
}
}
if ( !strncmp(limeReaderType(LimeR), SCIDAC_CHECKSUM,strlen(SCIDAC_CHECKSUM)) ) {
XmlReader RD(&xmlc[0],"");
read(RD,"scidacChecksum",scidacChecksum_);
found_scidacChecksum = 1;
}
} else {
/////////////////////////////////
// Binary data
/////////////////////////////////
std::cout << GridLogMessage << "ILDG Binary record found : " ILDG_BINARY_DATA << std::endl;
off_t offset= ftell(File);
if ( format == std::string("IEEE64BIG") ) {
GaugeSimpleMunger<dobj, sobj> munge;
BinaryIO::readLatticeObject< vobj, dobj >(Umu, filename, munge, offset, format,nersc_csum,scidac_csuma,scidac_csumb);
} else {
GaugeSimpleMunger<fobj, sobj> munge;
BinaryIO::readLatticeObject< vobj, fobj >(Umu, filename, munge, offset, format,nersc_csum,scidac_csuma,scidac_csumb);
}
found_ildgBinary = 1;
}
}
//////////////////////////////////////////////////////
// Minimally must find binary segment and checksum
// Since this is an ILDG reader require ILDG format
//////////////////////////////////////////////////////
assert(found_ildgBinary);
assert(found_ildgFormat);
assert(found_scidacChecksum);
// Must find something with the lattice dimensions
assert(found_FieldMetaData||found_ildgFormat);
if ( found_FieldMetaData ) {
std::cout << GridLogMessage<<"Grid MetaData was record found: configuration was probably written by Grid ! Yay ! "<<std::endl;
} else {
assert(found_ildgFormat);
assert ( ildgFormat_.field == std::string("su3gauge") );
///////////////////////////////////////////////////////////////////////////////////////
// Populate our Grid metadata as best we can
///////////////////////////////////////////////////////////////////////////////////////
std::ostringstream vers; vers << ildgFormat_.version;
FieldMetaData_.hdr_version = vers.str();
FieldMetaData_.data_type = std::string("4D_SU3_GAUGE_3X3");
FieldMetaData_.nd=4;
FieldMetaData_.dimension.resize(4);
FieldMetaData_.dimension[0] = ildgFormat_.lx ;
FieldMetaData_.dimension[1] = ildgFormat_.ly ;
FieldMetaData_.dimension[2] = ildgFormat_.lz ;
FieldMetaData_.dimension[3] = ildgFormat_.lt ;
if ( found_usqcdInfo ) {
FieldMetaData_.plaquette = usqcdInfo_.plaq;
FieldMetaData_.link_trace= usqcdInfo_.linktr;
std::cout << GridLogMessage <<"This configuration was probably written by USQCD "<<std::endl;
std::cout << GridLogMessage <<"USQCD xml record Plaquette : "<<FieldMetaData_.plaquette<<std::endl;
std::cout << GridLogMessage <<"USQCD xml record LinkTrace : "<<FieldMetaData_.link_trace<<std::endl;
} else {
FieldMetaData_.plaquette = 0.0;
FieldMetaData_.link_trace= 0.0;
std::cout << GridLogWarning << "This configuration is unsafe with no plaquette records that can verify it !!! "<<std::endl;
}
}
////////////////////////////////////////////////////////////
// Really really want to mandate a scidac checksum
////////////////////////////////////////////////////////////
if ( found_scidacChecksum ) {
FieldMetaData_.scidac_checksuma = stoull(scidacChecksum_.suma,0,16);
FieldMetaData_.scidac_checksumb = stoull(scidacChecksum_.sumb,0,16);
scidacChecksumVerify(scidacChecksum_,scidac_csuma,scidac_csumb);
assert( scidac_csuma ==FieldMetaData_.scidac_checksuma);
assert( scidac_csumb ==FieldMetaData_.scidac_checksumb);
std::cout << GridLogMessage<<"SciDAC checksums match " << std::endl;
} else {
std::cout << GridLogWarning<<"SciDAC checksums not found. This is unsafe. " << std::endl;
assert(0); // Can I insist always checksum ?
}
if ( found_FieldMetaData || found_usqcdInfo ) {
FieldMetaData checker;
GaugeStatistics(Umu,checker);
assert(fabs(checker.plaquette - FieldMetaData_.plaquette )<1.0e-5);
assert(fabs(checker.link_trace - FieldMetaData_.link_trace)<1.0e-5);
std::cout << GridLogMessage<<"Plaquette and link trace match " << std::endl;
}
}
};
}}
//HAVE_LIME
#endif
#endif

View File

@ -1,231 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/parallelIO/IldgIO.h
Copyright (C) 2015
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_ILDGTYPES_IO_H
#define GRID_ILDGTYPES_IO_H
#ifdef HAVE_LIME
extern "C" { // for linkage
#include "lime.h"
}
namespace Grid {
/////////////////////////////////////////////////////////////////////////////////
// Data representation of records that enter ILDG and SciDac formats
/////////////////////////////////////////////////////////////////////////////////
#define GRID_FORMAT "grid-format"
#define ILDG_FORMAT "ildg-format"
#define ILDG_BINARY_DATA "ildg-binary-data"
#define ILDG_DATA_LFN "ildg-data-lfn"
#define SCIDAC_CHECKSUM "scidac-checksum"
#define SCIDAC_PRIVATE_FILE_XML "scidac-private-file-xml"
#define SCIDAC_FILE_XML "scidac-file-xml"
#define SCIDAC_PRIVATE_RECORD_XML "scidac-private-record-xml"
#define SCIDAC_RECORD_XML "scidac-record-xml"
#define SCIDAC_BINARY_DATA "scidac-binary-data"
// Unused SCIDAC records names; could move to support this functionality
#define SCIDAC_SITELIST "scidac-sitelist"
////////////////////////////////////////////////////////////
const int GRID_IO_SINGLEFILE = 0; // hardcode lift from QIO compat
const int GRID_IO_MULTIFILE = 1; // hardcode lift from QIO compat
const int GRID_IO_FIELD = 0; // hardcode lift from QIO compat
const int GRID_IO_GLOBAL = 1; // hardcode lift from QIO compat
////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////////
// QIO uses mandatory "private" records fixed format
// Private is in principle "opaque" however it can't be changed now because that would break existing
// file compatability, so should be correct to assume the undocumented but defacto file structure.
/////////////////////////////////////////////////////////////////////////////////
////////////////////////
// Scidac private file xml
// <?xml version="1.0" encoding="UTF-8"?><scidacFile><version>1.1</version><spacetime>4</spacetime><dims>16 16 16 32 </dims><volfmt>0</volfmt></scidacFile>
////////////////////////
struct scidacFile : Serializable {
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(scidacFile,
double, version,
int, spacetime,
std::string, dims, // must convert to int
int, volfmt);
std::vector<int> getDimensions(void) {
std::stringstream stream(dims);
std::vector<int> dimensions;
int n;
while(stream >> n){
dimensions.push_back(n);
}
return dimensions;
}
void setDimensions(std::vector<int> dimensions) {
char delimiter = ' ';
std::stringstream stream;
for(int i=0;i<dimensions.size();i++){
stream << dimensions[i];
if ( i != dimensions.size()-1) {
stream << delimiter <<std::endl;
}
}
dims = stream.str();
}
// Constructor provides Grid
scidacFile() =default; // default constructor
scidacFile(GridBase * grid){
version = 1.0;
spacetime = grid->_ndimension;
setDimensions(grid->FullDimensions());
volfmt = GRID_IO_SINGLEFILE;
}
};
///////////////////////////////////////////////////////////////////////
// scidac-private-record-xml : example
// <scidacRecord>
// <version>1.1</version><date>Tue Jul 26 21:14:44 2011 UTC</date><recordtype>0</recordtype>
// <datatype>QDP_D3_ColorMatrix</datatype><precision>D</precision><colors>3</colors><spins>4</spins>
// <typesize>144</typesize><datacount>4</datacount>
// </scidacRecord>
///////////////////////////////////////////////////////////////////////
struct scidacRecord : Serializable {
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(scidacRecord,
double, version,
std::string, date,
int, recordtype,
std::string, datatype,
std::string, precision,
int, colors,
int, spins,
int, typesize,
int, datacount);
scidacRecord() { version =1.0; }
};
////////////////////////
// ILDG format
////////////////////////
struct ildgFormat : Serializable {
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(ildgFormat,
double, version,
std::string, field,
int, precision,
int, lx,
int, ly,
int, lz,
int, lt);
ildgFormat() { version=1.0; };
};
////////////////////////
// USQCD info
////////////////////////
struct usqcdInfo : Serializable {
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(usqcdInfo,
double, version,
double, plaq,
double, linktr,
std::string, info);
usqcdInfo() {
version=1.0;
};
};
////////////////////////
// Scidac Checksum
////////////////////////
struct scidacChecksum : Serializable {
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(scidacChecksum,
double, version,
std::string, suma,
std::string, sumb);
scidacChecksum() {
version=1.0;
};
};
////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Type: scidac-file-xml <title>MILC ILDG archival gauge configuration</title>
////////////////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Type:
////////////////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////
// Scidac private file xml
// <?xml version="1.0" encoding="UTF-8"?><scidacFile><version>1.1</version><spacetime>4</spacetime><dims>16 16 16 32 </dims><volfmt>0</volfmt></scidacFile>
////////////////////////
#if 0
////////////////////////////////////////////////////////////////////////////////////////
// From http://www.physics.utah.edu/~detar/scidac/qio_2p3.pdf
////////////////////////////////////////////////////////////////////////////////////////
struct usqcdPropFile : Serializable {
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(usqcdPropFile,
double, version,
std::string, type,
std::string, info);
usqcdPropFile() {
version=1.0;
};
};
struct usqcdSourceInfo : Serializable {
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(usqcdSourceInfo,
double, version,
std::string, info);
usqcdSourceInfo() {
version=1.0;
};
};
struct usqcdPropInfo : Serializable {
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(usqcdPropInfo,
double, version,
int, spin,
int, color,
std::string, info);
usqcdPropInfo() {
version=1.0;
};
};
#endif
}
#endif
#endif

View File

@ -1,325 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/parallelIO/NerscIO.h
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <algorithm>
#include <iostream>
#include <iomanip>
#include <fstream>
#include <map>
#include <unistd.h>
#include <sys/utsname.h>
#include <pwd.h>
namespace Grid {
///////////////////////////////////////////////////////
// Precision mapping
///////////////////////////////////////////////////////
template<class vobj> static std::string getFormatString (void)
{
std::string format;
typedef typename getPrecision<vobj>::real_scalar_type stype;
if ( sizeof(stype) == sizeof(float) ) {
format = std::string("IEEE32BIG");
}
if ( sizeof(stype) == sizeof(double) ) {
format = std::string("IEEE64BIG");
}
return format;
}
////////////////////////////////////////////////////////////////////////////////
// header specification/interpretation
////////////////////////////////////////////////////////////////////////////////
class FieldMetaData : Serializable {
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(FieldMetaData,
int, nd,
std::vector<int>, dimension,
std::vector<std::string>, boundary,
int, data_start,
std::string, hdr_version,
std::string, storage_format,
double, link_trace,
double, plaquette,
uint32_t, checksum,
uint32_t, scidac_checksuma,
uint32_t, scidac_checksumb,
unsigned int, sequence_number,
std::string, data_type,
std::string, ensemble_id,
std::string, ensemble_label,
std::string, ildg_lfn,
std::string, creator,
std::string, creator_hardware,
std::string, creation_date,
std::string, archive_date,
std::string, floating_point);
FieldMetaData(void) {
nd=4;
dimension.resize(4);
boundary.resize(4);
}
};
namespace QCD {
using namespace Grid;
//////////////////////////////////////////////////////////////////////
// Bit and Physical Checksumming and QA of data
//////////////////////////////////////////////////////////////////////
inline void GridMetaData(GridBase *grid,FieldMetaData &header)
{
int nd = grid->_ndimension;
header.nd = nd;
header.dimension.resize(nd);
header.boundary.resize(nd);
for(int d=0;d<nd;d++) {
header.dimension[d] = grid->_fdimensions[d];
}
for(int d=0;d<nd;d++) {
header.boundary[d] = std::string("PERIODIC");
}
}
inline void MachineCharacteristics(FieldMetaData &header)
{
// Who
struct passwd *pw = getpwuid (getuid());
if (pw) header.creator = std::string(pw->pw_name);
// When
std::time_t t = std::time(nullptr);
std::tm tm_ = *std::localtime(&t);
std::ostringstream oss;
// oss << std::put_time(&tm_, "%c %Z");
header.creation_date = oss.str();
header.archive_date = header.creation_date;
// What
struct utsname name; uname(&name);
header.creator_hardware = std::string(name.nodename)+"-";
header.creator_hardware+= std::string(name.machine)+"-";
header.creator_hardware+= std::string(name.sysname)+"-";
header.creator_hardware+= std::string(name.release);
}
#define dump_meta_data(field, s) \
s << "BEGIN_HEADER" << std::endl; \
s << "HDR_VERSION = " << field.hdr_version << std::endl; \
s << "DATATYPE = " << field.data_type << std::endl; \
s << "STORAGE_FORMAT = " << field.storage_format << std::endl; \
for(int i=0;i<4;i++){ \
s << "DIMENSION_" << i+1 << " = " << field.dimension[i] << std::endl ; \
} \
s << "LINK_TRACE = " << std::setprecision(10) << field.link_trace << std::endl; \
s << "PLAQUETTE = " << std::setprecision(10) << field.plaquette << std::endl; \
for(int i=0;i<4;i++){ \
s << "BOUNDARY_"<<i+1<<" = " << field.boundary[i] << std::endl; \
} \
\
s << "CHECKSUM = "<< std::hex << std::setw(10) << field.checksum << std::dec<<std::endl; \
s << "SCIDAC_CHECKSUMA = "<< std::hex << std::setw(10) << field.scidac_checksuma << std::dec<<std::endl; \
s << "SCIDAC_CHECKSUMB = "<< std::hex << std::setw(10) << field.scidac_checksumb << std::dec<<std::endl; \
s << "ENSEMBLE_ID = " << field.ensemble_id << std::endl; \
s << "ENSEMBLE_LABEL = " << field.ensemble_label << std::endl; \
s << "SEQUENCE_NUMBER = " << field.sequence_number << std::endl; \
s << "CREATOR = " << field.creator << std::endl; \
s << "CREATOR_HARDWARE = "<< field.creator_hardware << std::endl; \
s << "CREATION_DATE = " << field.creation_date << std::endl; \
s << "ARCHIVE_DATE = " << field.archive_date << std::endl; \
s << "FLOATING_POINT = " << field.floating_point << std::endl; \
s << "END_HEADER" << std::endl;
template<class vobj> inline void PrepareMetaData(Lattice<vobj> & field, FieldMetaData &header)
{
GridBase *grid = field._grid;
std::string format = getFormatString<vobj>();
header.floating_point = format;
header.checksum = 0x0; // Nersc checksum unused in ILDG, Scidac
GridMetaData(grid,header);
MachineCharacteristics(header);
}
inline void GaugeStatistics(Lattice<vLorentzColourMatrixF> & data,FieldMetaData &header)
{
// How to convert data precision etc...
header.link_trace=Grid::QCD::WilsonLoops<PeriodicGimplF>::linkTrace(data);
header.plaquette =Grid::QCD::WilsonLoops<PeriodicGimplF>::avgPlaquette(data);
}
inline void GaugeStatistics(Lattice<vLorentzColourMatrixD> & data,FieldMetaData &header)
{
// How to convert data precision etc...
header.link_trace=Grid::QCD::WilsonLoops<PeriodicGimplD>::linkTrace(data);
header.plaquette =Grid::QCD::WilsonLoops<PeriodicGimplD>::avgPlaquette(data);
}
template<> inline void PrepareMetaData<vLorentzColourMatrixF>(Lattice<vLorentzColourMatrixF> & field, FieldMetaData &header)
{
GridBase *grid = field._grid;
std::string format = getFormatString<vLorentzColourMatrixF>();
header.floating_point = format;
header.checksum = 0x0; // Nersc checksum unused in ILDG, Scidac
GridMetaData(grid,header);
GaugeStatistics(field,header);
MachineCharacteristics(header);
}
template<> inline void PrepareMetaData<vLorentzColourMatrixD>(Lattice<vLorentzColourMatrixD> & field, FieldMetaData &header)
{
GridBase *grid = field._grid;
std::string format = getFormatString<vLorentzColourMatrixD>();
header.floating_point = format;
header.checksum = 0x0; // Nersc checksum unused in ILDG, Scidac
GridMetaData(grid,header);
GaugeStatistics(field,header);
MachineCharacteristics(header);
}
//////////////////////////////////////////////////////////////////////
// Utilities ; these are QCD aware
//////////////////////////////////////////////////////////////////////
inline void reconstruct3(LorentzColourMatrix & cm)
{
const int x=0;
const int y=1;
const int z=2;
for(int mu=0;mu<Nd;mu++){
cm(mu)()(2,x) = adj(cm(mu)()(0,y)*cm(mu)()(1,z)-cm(mu)()(0,z)*cm(mu)()(1,y)); //x= yz-zy
cm(mu)()(2,y) = adj(cm(mu)()(0,z)*cm(mu)()(1,x)-cm(mu)()(0,x)*cm(mu)()(1,z)); //y= zx-xz
cm(mu)()(2,z) = adj(cm(mu)()(0,x)*cm(mu)()(1,y)-cm(mu)()(0,y)*cm(mu)()(1,x)); //z= xy-yx
}
}
////////////////////////////////////////////////////////////////////////////////
// Some data types for intermediate storage
////////////////////////////////////////////////////////////////////////////////
template<typename vtype> using iLorentzColour2x3 = iVector<iVector<iVector<vtype, Nc>, 2>, Nd >;
typedef iLorentzColour2x3<Complex> LorentzColour2x3;
typedef iLorentzColour2x3<ComplexF> LorentzColour2x3F;
typedef iLorentzColour2x3<ComplexD> LorentzColour2x3D;
/////////////////////////////////////////////////////////////////////////////////
// Simple classes for precision conversion
/////////////////////////////////////////////////////////////////////////////////
template <class fobj, class sobj>
struct BinarySimpleUnmunger {
typedef typename getPrecision<fobj>::real_scalar_type fobj_stype;
typedef typename getPrecision<sobj>::real_scalar_type sobj_stype;
void operator()(sobj &in, fobj &out) {
// take word by word and transform accoding to the status
fobj_stype *out_buffer = (fobj_stype *)&out;
sobj_stype *in_buffer = (sobj_stype *)&in;
size_t fobj_words = sizeof(out) / sizeof(fobj_stype);
size_t sobj_words = sizeof(in) / sizeof(sobj_stype);
assert(fobj_words == sobj_words);
for (unsigned int word = 0; word < sobj_words; word++)
out_buffer[word] = in_buffer[word]; // type conversion on the fly
}
};
template <class fobj, class sobj>
struct BinarySimpleMunger {
typedef typename getPrecision<fobj>::real_scalar_type fobj_stype;
typedef typename getPrecision<sobj>::real_scalar_type sobj_stype;
void operator()(fobj &in, sobj &out) {
// take word by word and transform accoding to the status
fobj_stype *in_buffer = (fobj_stype *)&in;
sobj_stype *out_buffer = (sobj_stype *)&out;
size_t fobj_words = sizeof(in) / sizeof(fobj_stype);
size_t sobj_words = sizeof(out) / sizeof(sobj_stype);
assert(fobj_words == sobj_words);
for (unsigned int word = 0; word < sobj_words; word++)
out_buffer[word] = in_buffer[word]; // type conversion on the fly
}
};
template<class fobj,class sobj>
struct GaugeSimpleMunger{
void operator()(fobj &in, sobj &out) {
for (int mu = 0; mu < Nd; mu++) {
for (int i = 0; i < Nc; i++) {
for (int j = 0; j < Nc; j++) {
out(mu)()(i, j) = in(mu)()(i, j);
}}
}
};
};
template <class fobj, class sobj>
struct GaugeSimpleUnmunger {
void operator()(sobj &in, fobj &out) {
for (int mu = 0; mu < Nd; mu++) {
for (int i = 0; i < Nc; i++) {
for (int j = 0; j < Nc; j++) {
out(mu)()(i, j) = in(mu)()(i, j);
}}
}
};
};
template<class fobj,class sobj>
struct Gauge3x2munger{
void operator() (fobj &in,sobj &out){
for(int mu=0;mu<Nd;mu++){
for(int i=0;i<2;i++){
for(int j=0;j<3;j++){
out(mu)()(i,j) = in(mu)(i)(j);
}}
}
reconstruct3(out);
}
};
template<class fobj,class sobj>
struct Gauge3x2unmunger{
void operator() (sobj &in,fobj &out){
for(int mu=0;mu<Nd;mu++){
for(int i=0;i<2;i++){
for(int j=0;j<3;j++){
out(mu)(i)(j) = in(mu)()(i,j);
}}
}
}
};
}
}

View File

@ -1,4 +1,4 @@
/************************************************************************************* /*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid Grid physics library, www.github.com/paboyle/Grid
@ -6,9 +6,9 @@
Copyright (C) 2015 Copyright (C) 2015
Author: Matt Spraggs <matthew.spraggs@gmail.com> Author: Matt Spraggs <matthew.spraggs@gmail.com>
Author: Peter Boyle <paboyle@ph.ed.ac.uk> Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: paboyle <paboyle@ph.ed.ac.uk> Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@ -25,59 +25,253 @@
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/ *************************************************************************************/
/* END LEGAL */ /* END LEGAL */
#ifndef GRID_NERSC_IO_H #ifndef GRID_NERSC_IO_H
#define GRID_NERSC_IO_H #define GRID_NERSC_IO_H
#include <algorithm>
#include <iostream>
#include <iomanip>
#include <fstream>
#include <map>
#include <unistd.h>
#include <sys/utsname.h>
#include <pwd.h>
namespace Grid { namespace Grid {
namespace QCD { namespace QCD {
using namespace Grid; using namespace Grid;
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// Write and read from fstream; comput header offset for payload // Some data types for intermediate storage
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
class NerscIO : public BinaryIO { template<typename vtype> using iLorentzColour2x3 = iVector<iVector<iVector<vtype, Nc>, 2>, 4 >;
public:
static inline void truncate(std::string file){ typedef iLorentzColour2x3<Complex> LorentzColour2x3;
std::ofstream fout(file,std::ios::out); typedef iLorentzColour2x3<ComplexF> LorentzColour2x3F;
typedef iLorentzColour2x3<ComplexD> LorentzColour2x3D;
////////////////////////////////////////////////////////////////////////////////
// header specification/interpretation
////////////////////////////////////////////////////////////////////////////////
class NerscField {
public:
// header strings (not in order)
int dimension[4];
std::string boundary[4];
int data_start;
std::string hdr_version;
std::string storage_format;
// Checks on data
double link_trace;
double plaquette;
uint32_t checksum;
unsigned int sequence_number;
std::string data_type;
std::string ensemble_id ;
std::string ensemble_label ;
std::string creator ;
std::string creator_hardware ;
std::string creation_date ;
std::string archive_date ;
std::string floating_point;
};
//////////////////////////////////////////////////////////////////////
// Bit and Physical Checksumming and QA of data
//////////////////////////////////////////////////////////////////////
inline void NerscGrid(GridBase *grid,NerscField &header)
{
assert(grid->_ndimension==4);
for(int d=0;d<4;d++) {
header.dimension[d] = grid->_fdimensions[d];
}
for(int d=0;d<4;d++) {
header.boundary[d] = std::string("PERIODIC");
}
}
template<class GaugeField>
inline void NerscStatistics(GaugeField & data,NerscField &header)
{
// How to convert data precision etc...
header.link_trace=Grid::QCD::WilsonLoops<PeriodicGimplR>::linkTrace(data);
header.plaquette =Grid::QCD::WilsonLoops<PeriodicGimplR>::avgPlaquette(data);
}
inline void NerscMachineCharacteristics(NerscField &header)
{
// Who
struct passwd *pw = getpwuid (getuid());
if (pw) header.creator = std::string(pw->pw_name);
// When
std::time_t t = std::time(nullptr);
std::tm tm = *std::localtime(&t);
std::ostringstream oss;
// oss << std::put_time(&tm, "%c %Z");
header.creation_date = oss.str();
header.archive_date = header.creation_date;
// What
struct utsname name; uname(&name);
header.creator_hardware = std::string(name.nodename)+"-";
header.creator_hardware+= std::string(name.machine)+"-";
header.creator_hardware+= std::string(name.sysname)+"-";
header.creator_hardware+= std::string(name.release);
}
//////////////////////////////////////////////////////////////////////
// Utilities ; these are QCD aware
//////////////////////////////////////////////////////////////////////
inline void NerscChecksum(uint32_t *buf,uint32_t buf_size_bytes,uint32_t &csum)
{
BinaryIO::Uint32Checksum(buf,buf_size_bytes,csum);
}
inline void reconstruct3(LorentzColourMatrix & cm)
{
const int x=0;
const int y=1;
const int z=2;
for(int mu=0;mu<4;mu++){
cm(mu)()(2,x) = adj(cm(mu)()(0,y)*cm(mu)()(1,z)-cm(mu)()(0,z)*cm(mu)()(1,y)); //x= yz-zy
cm(mu)()(2,y) = adj(cm(mu)()(0,z)*cm(mu)()(1,x)-cm(mu)()(0,x)*cm(mu)()(1,z)); //y= zx-xz
cm(mu)()(2,z) = adj(cm(mu)()(0,x)*cm(mu)()(1,y)-cm(mu)()(0,y)*cm(mu)()(1,x)); //z= xy-yx
} }
static inline unsigned int writeHeader(FieldMetaData &field,std::string file)
{
std::ofstream fout(file,std::ios::out|std::ios::in);
fout.seekp(0,std::ios::beg);
dump_meta_data(field, fout);
field.data_start = fout.tellp();
return field.data_start;
} }
// for the header-reader template<class fobj,class sobj>
static inline int readHeader(std::string file,GridBase *grid, FieldMetaData &field) struct NerscSimpleMunger{
{
int offset=0;
std::map<std::string,std::string> header;
std::string line;
////////////////////////////////////////////////// void operator() (fobj &in,sobj &out,uint32_t &csum){
// read the header
//////////////////////////////////////////////////
std::ifstream fin(file);
getline(fin,line); // read one line and insist is for(int mu=0;mu<4;mu++){
for(int i=0;i<3;i++){
for(int j=0;j<3;j++){
out(mu)()(i,j) = in(mu)()(i,j);
}}}
NerscChecksum((uint32_t *)&in,sizeof(in),csum);
};
};
removeWhitespace(line); template<class fobj,class sobj>
std::cout << GridLogMessage << "* " << line << std::endl; struct NerscSimpleUnmunger{
void operator() (sobj &in,fobj &out,uint32_t &csum){
for(int mu=0;mu<Nd;mu++){
for(int i=0;i<Nc;i++){
for(int j=0;j<Nc;j++){
out(mu)()(i,j) = in(mu)()(i,j);
}}}
NerscChecksum((uint32_t *)&out,sizeof(out),csum);
};
};
template<class fobj,class sobj>
struct Nersc3x2munger{
void operator() (fobj &in,sobj &out,uint32_t &csum){
NerscChecksum((uint32_t *)&in,sizeof(in),csum);
assert(line==std::string("BEGIN_HEADER")); for(int mu=0;mu<4;mu++){
for(int i=0;i<2;i++){
for(int j=0;j<3;j++){
out(mu)()(i,j) = in(mu)(i)(j);
}}
}
reconstruct3(out);
}
};
do { template<class fobj,class sobj>
getline(fin,line); // read one line struct Nersc3x2unmunger{
std::cout << GridLogMessage << "* "<<line<< std::endl;
int eq = line.find("="); void operator() (sobj &in,fobj &out,uint32_t &csum){
if(eq >0) {
for(int mu=0;mu<4;mu++){
for(int i=0;i<2;i++){
for(int j=0;j<3;j++){
out(mu)(i)(j) = in(mu)()(i,j);
}}
}
NerscChecksum((uint32_t *)&out,sizeof(out),csum);
}
};
////////////////////////////////////////////////////////////////////////////////
// Write and read from fstream; comput header offset for payload
////////////////////////////////////////////////////////////////////////////////
class NerscIO : public BinaryIO {
public:
static inline void truncate(std::string file){
std::ofstream fout(file,std::ios::out);
}
#define dump_nersc_header(field, s)\
s << "BEGIN_HEADER" << std::endl;\
s << "HDR_VERSION = " << field.hdr_version << std::endl;\
s << "DATATYPE = " << field.data_type << std::endl;\
s << "STORAGE_FORMAT = " << field.storage_format << std::endl;\
for(int i=0;i<4;i++){\
s << "DIMENSION_" << i+1 << " = " << field.dimension[i] << std::endl ;\
}\
s << "LINK_TRACE = " << std::setprecision(10) << field.link_trace << std::endl;\
s << "PLAQUETTE = " << std::setprecision(10) << field.plaquette << std::endl;\
for(int i=0;i<4;i++){\
s << "BOUNDARY_"<<i+1<<" = " << field.boundary[i] << std::endl;\
}\
\
s << "CHECKSUM = "<< std::hex << std::setw(10) << field.checksum << std::dec<<std::endl;\
s << "ENSEMBLE_ID = " << field.ensemble_id << std::endl;\
s << "ENSEMBLE_LABEL = " << field.ensemble_label << std::endl;\
s << "SEQUENCE_NUMBER = " << field.sequence_number << std::endl;\
s << "CREATOR = " << field.creator << std::endl;\
s << "CREATOR_HARDWARE = "<< field.creator_hardware << std::endl;\
s << "CREATION_DATE = " << field.creation_date << std::endl;\
s << "ARCHIVE_DATE = " << field.archive_date << std::endl;\
s << "FLOATING_POINT = " << field.floating_point << std::endl;\
s << "END_HEADER" << std::endl;
static inline unsigned int writeHeader(NerscField &field,std::string file)
{
std::ofstream fout(file,std::ios::out|std::ios::in);
fout.seekp(0,std::ios::beg);
dump_nersc_header(field, fout);
field.data_start = fout.tellp();
return field.data_start;
}
// for the header-reader
static inline int readHeader(std::string file,GridBase *grid, NerscField &field)
{
int offset=0;
std::map<std::string,std::string> header;
std::string line;
//////////////////////////////////////////////////
// read the header
//////////////////////////////////////////////////
std::ifstream fin(file);
getline(fin,line); // read one line and insist is
removeWhitespace(line);
std::cout << GridLogMessage << "* " << line << std::endl;
assert(line==std::string("BEGIN_HEADER"));
do {
getline(fin,line); // read one line
std::cout << GridLogMessage << "* "<<line<< std::endl;
int eq = line.find("=");
if(eq >0) {
std::string key=line.substr(0,eq); std::string key=line.substr(0,eq);
std::string val=line.substr(eq+1); std::string val=line.substr(eq+1);
removeWhitespace(key); removeWhitespace(key);
@ -85,269 +279,275 @@ namespace Grid {
header[key] = val; header[key] = val;
} }
} while( line.find("END_HEADER") == std::string::npos ); } while( line.find("END_HEADER") == std::string::npos );
field.data_start = fin.tellg(); field.data_start = fin.tellg();
////////////////////////////////////////////////// //////////////////////////////////////////////////
// chomp the values // chomp the values
////////////////////////////////////////////////// //////////////////////////////////////////////////
field.hdr_version = header["HDR_VERSION"]; field.hdr_version = header["HDR_VERSION"];
field.data_type = header["DATATYPE"]; field.data_type = header["DATATYPE"];
field.storage_format = header["STORAGE_FORMAT"]; field.storage_format = header["STORAGE_FORMAT"];
field.dimension[0] = std::stol(header["DIMENSION_1"]); field.dimension[0] = std::stol(header["DIMENSION_1"]);
field.dimension[1] = std::stol(header["DIMENSION_2"]); field.dimension[1] = std::stol(header["DIMENSION_2"]);
field.dimension[2] = std::stol(header["DIMENSION_3"]); field.dimension[2] = std::stol(header["DIMENSION_3"]);
field.dimension[3] = std::stol(header["DIMENSION_4"]); field.dimension[3] = std::stol(header["DIMENSION_4"]);
assert(grid->_ndimension == 4); assert(grid->_ndimension == 4);
for(int d=0;d<4;d++){ for(int d=0;d<4;d++){
assert(grid->_fdimensions[d]==field.dimension[d]); assert(grid->_fdimensions[d]==field.dimension[d]);
}
field.link_trace = std::stod(header["LINK_TRACE"]);
field.plaquette = std::stod(header["PLAQUETTE"]);
field.boundary[0] = header["BOUNDARY_1"];
field.boundary[1] = header["BOUNDARY_2"];
field.boundary[2] = header["BOUNDARY_3"];
field.boundary[3] = header["BOUNDARY_4"];
field.checksum = std::stoul(header["CHECKSUM"],0,16);
field.ensemble_id = header["ENSEMBLE_ID"];
field.ensemble_label = header["ENSEMBLE_LABEL"];
field.sequence_number = std::stol(header["SEQUENCE_NUMBER"]);
field.creator = header["CREATOR"];
field.creator_hardware = header["CREATOR_HARDWARE"];
field.creation_date = header["CREATION_DATE"];
field.archive_date = header["ARCHIVE_DATE"];
field.floating_point = header["FLOATING_POINT"];
return field.data_start;
}
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Now the meat: the object readers
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
#define PARALLEL_READ
#define PARALLEL_WRITE
template<class vsimd>
static inline void readConfiguration(Lattice<iLorentzColourMatrix<vsimd> > &Umu,NerscField& header,std::string file)
{
typedef Lattice<iLorentzColourMatrix<vsimd> > GaugeField;
GridBase *grid = Umu._grid;
int offset = readHeader(file,Umu._grid,header);
NerscField clone(header);
std::string format(header.floating_point);
int ieee32big = (format == std::string("IEEE32BIG"));
int ieee32 = (format == std::string("IEEE32"));
int ieee64big = (format == std::string("IEEE64BIG"));
int ieee64 = (format == std::string("IEEE64"));
uint32_t csum;
// depending on datatype, set up munger;
// munger is a function of <floating point, Real, data_type>
if ( header.data_type == std::string("4D_SU3_GAUGE") ) {
if ( ieee32 || ieee32big ) {
#ifdef PARALLEL_READ
csum=BinaryIO::readObjectParallel<iLorentzColourMatrix<vsimd>, LorentzColour2x3F>
(Umu,file,Nersc3x2munger<LorentzColour2x3F,LorentzColourMatrix>(), offset,format);
#else
csum=BinaryIO::readObjectSerial<iLorentzColourMatrix<vsimd>, LorentzColour2x3F>
(Umu,file,Nersc3x2munger<LorentzColour2x3F,LorentzColourMatrix>(), offset,format);
#endif
} }
if ( ieee64 || ieee64big ) {
field.link_trace = std::stod(header["LINK_TRACE"]); #ifdef PARALLEL_READ
field.plaquette = std::stod(header["PLAQUETTE"]); csum=BinaryIO::readObjectParallel<iLorentzColourMatrix<vsimd>, LorentzColour2x3D>
(Umu,file,Nersc3x2munger<LorentzColour2x3D,LorentzColourMatrix>(),offset,format);
field.boundary[0] = header["BOUNDARY_1"]; #else
field.boundary[1] = header["BOUNDARY_2"]; csum=BinaryIO::readObjectSerial<iLorentzColourMatrix<vsimd>, LorentzColour2x3D>
field.boundary[2] = header["BOUNDARY_3"]; (Umu,file,Nersc3x2munger<LorentzColour2x3D,LorentzColourMatrix>(),offset,format);
field.boundary[3] = header["BOUNDARY_4"]; #endif
field.checksum = std::stoul(header["CHECKSUM"],0,16);
field.ensemble_id = header["ENSEMBLE_ID"];
field.ensemble_label = header["ENSEMBLE_LABEL"];
field.sequence_number = std::stol(header["SEQUENCE_NUMBER"]);
field.creator = header["CREATOR"];
field.creator_hardware = header["CREATOR_HARDWARE"];
field.creation_date = header["CREATION_DATE"];
field.archive_date = header["ARCHIVE_DATE"];
field.floating_point = header["FLOATING_POINT"];
return field.data_start;
} }
} else if ( header.data_type == std::string("4D_SU3_GAUGE_3x3") ) {
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////// if ( ieee32 || ieee32big ) {
// Now the meat: the object readers #ifdef PARALLEL_READ
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////// csum=BinaryIO::readObjectParallel<iLorentzColourMatrix<vsimd>,LorentzColourMatrixF>
(Umu,file,NerscSimpleMunger<LorentzColourMatrixF,LorentzColourMatrix>(),offset,format);
template<class vsimd> #else
static inline void readConfiguration(Lattice<iLorentzColourMatrix<vsimd> > &Umu, csum=BinaryIO::readObjectSerial<iLorentzColourMatrix<vsimd>,LorentzColourMatrixF>
FieldMetaData& header, (Umu,file,NerscSimpleMunger<LorentzColourMatrixF,LorentzColourMatrix>(),offset,format);
std::string file) #endif
{
typedef Lattice<iLorentzColourMatrix<vsimd> > GaugeField;
GridBase *grid = Umu._grid;
int offset = readHeader(file,Umu._grid,header);
FieldMetaData clone(header);
std::string format(header.floating_point);
int ieee32big = (format == std::string("IEEE32BIG"));
int ieee32 = (format == std::string("IEEE32"));
int ieee64big = (format == std::string("IEEE64BIG"));
int ieee64 = (format == std::string("IEEE64"));
uint32_t nersc_csum,scidac_csuma,scidac_csumb;
// depending on datatype, set up munger;
// munger is a function of <floating point, Real, data_type>
if ( header.data_type == std::string("4D_SU3_GAUGE") ) {
if ( ieee32 || ieee32big ) {
BinaryIO::readLatticeObject<iLorentzColourMatrix<vsimd>, LorentzColour2x3F>
(Umu,file,Gauge3x2munger<LorentzColour2x3F,LorentzColourMatrix>(), offset,format,
nersc_csum,scidac_csuma,scidac_csumb);
}
if ( ieee64 || ieee64big ) {
BinaryIO::readLatticeObject<iLorentzColourMatrix<vsimd>, LorentzColour2x3D>
(Umu,file,Gauge3x2munger<LorentzColour2x3D,LorentzColourMatrix>(),offset,format,
nersc_csum,scidac_csuma,scidac_csumb);
}
} else if ( header.data_type == std::string("4D_SU3_GAUGE_3x3") ) {
if ( ieee32 || ieee32big ) {
BinaryIO::readLatticeObject<iLorentzColourMatrix<vsimd>,LorentzColourMatrixF>
(Umu,file,GaugeSimpleMunger<LorentzColourMatrixF,LorentzColourMatrix>(),offset,format,
nersc_csum,scidac_csuma,scidac_csumb);
}
if ( ieee64 || ieee64big ) {
BinaryIO::readLatticeObject<iLorentzColourMatrix<vsimd>,LorentzColourMatrixD>
(Umu,file,GaugeSimpleMunger<LorentzColourMatrixD,LorentzColourMatrix>(),offset,format,
nersc_csum,scidac_csuma,scidac_csumb);
}
} else {
assert(0);
}
GaugeStatistics(Umu,clone);
std::cout<<GridLogMessage <<"NERSC Configuration "<<file<<" checksum "<<std::hex<<nersc_csum<< std::dec
<<" header "<<std::hex<<header.checksum<<std::dec <<std::endl;
std::cout<<GridLogMessage <<"NERSC Configuration "<<file<<" plaquette "<<clone.plaquette
<<" header "<<header.plaquette<<std::endl;
std::cout<<GridLogMessage <<"NERSC Configuration "<<file<<" link_trace "<<clone.link_trace
<<" header "<<header.link_trace<<std::endl;
if ( fabs(clone.plaquette -header.plaquette ) >= 1.0e-5 ) {
std::cout << " Plaquette mismatch "<<std::endl;
std::cout << Umu[0]<<std::endl;
std::cout << Umu[1]<<std::endl;
}
if ( nersc_csum != header.checksum ) {
std::cerr << " checksum mismatch " << std::endl;
std::cerr << " plaqs " << clone.plaquette << " " << header.plaquette << std::endl;
std::cerr << " trace " << clone.link_trace<< " " << header.link_trace<< std::endl;
std::cerr << " nersc_csum " <<std::hex<< nersc_csum << " " << header.checksum<< std::dec<< std::endl;
exit(0);
}
assert(fabs(clone.plaquette -header.plaquette ) < 1.0e-5 );
assert(fabs(clone.link_trace-header.link_trace) < 1.0e-6 );
assert(nersc_csum == header.checksum );
std::cout<<GridLogMessage <<"NERSC Configuration "<<file<< " and plaquette, link trace, and checksum agree"<<std::endl;
} }
if ( ieee64 || ieee64big ) {
#ifdef PARALLEL_READ
csum=BinaryIO::readObjectParallel<iLorentzColourMatrix<vsimd>,LorentzColourMatrixD>
(Umu,file,NerscSimpleMunger<LorentzColourMatrixD,LorentzColourMatrix>(),offset,format);
#else
csum=BinaryIO::readObjectSerial<iLorentzColourMatrix<vsimd>,LorentzColourMatrixD>
(Umu,file,NerscSimpleMunger<LorentzColourMatrixD,LorentzColourMatrix>(),offset,format);
#endif
}
} else {
assert(0);
}
template<class vsimd> NerscStatistics<GaugeField>(Umu,clone);
static inline void writeConfiguration(Lattice<iLorentzColourMatrix<vsimd> > &Umu,
std::string file,
int two_row,
int bits32)
{
typedef Lattice<iLorentzColourMatrix<vsimd> > GaugeField;
typedef iLorentzColourMatrix<vsimd> vobj; std::cout<<GridLogMessage <<"NERSC Configuration "<<file<<" checksum "<<std::hex<< csum<< std::dec
typedef typename vobj::scalar_object sobj; <<" header "<<std::hex<<header.checksum<<std::dec <<std::endl;
std::cout<<GridLogMessage <<"NERSC Configuration "<<file<<" plaquette "<<clone.plaquette
<<" header "<<header.plaquette<<std::endl;
std::cout<<GridLogMessage <<"NERSC Configuration "<<file<<" link_trace "<<clone.link_trace
<<" header "<<header.link_trace<<std::endl;
assert(fabs(clone.plaquette -header.plaquette ) < 1.0e-5 );
assert(fabs(clone.link_trace-header.link_trace) < 1.0e-6 );
assert(csum == header.checksum );
FieldMetaData header; std::cout<<GridLogMessage <<"NERSC Configuration "<<file<< " and plaquette, link trace, and checksum agree"<<std::endl;
/////////////////////////////////////////// }
// Following should become arguments
///////////////////////////////////////////
header.sequence_number = 1;
header.ensemble_id = "UKQCD";
header.ensemble_label = "DWF";
typedef LorentzColourMatrixD fobj3D; template<class vsimd>
typedef LorentzColour2x3D fobj2D; static inline void writeConfiguration(Lattice<iLorentzColourMatrix<vsimd> > &Umu,std::string file, int two_row,int bits32)
{
typedef Lattice<iLorentzColourMatrix<vsimd> > GaugeField;
typedef iLorentzColourMatrix<vsimd> vobj;
typedef typename vobj::scalar_object sobj;
// Following should become arguments
NerscField header;
header.sequence_number = 1;
header.ensemble_id = "UKQCD";
header.ensemble_label = "DWF";
typedef LorentzColourMatrixD fobj3D;
typedef LorentzColour2x3D fobj2D;
typedef LorentzColourMatrixF fobj3f;
typedef LorentzColour2x3F fobj2f;
GridBase *grid = Umu._grid;
NerscGrid(grid,header);
NerscStatistics<GaugeField>(Umu,header);
NerscMachineCharacteristics(header);
uint32_t csum;
int offset;
GridBase *grid = Umu._grid; truncate(file);
GridMetaData(grid,header); if ( two_row ) {
assert(header.nd==4);
GaugeStatistics(Umu,header);
MachineCharacteristics(header);
int offset; header.floating_point = std::string("IEEE64BIG");
header.data_type = std::string("4D_SU3_GAUGE");
truncate(file); Nersc3x2unmunger<fobj2D,sobj> munge;
BinaryIO::Uint32Checksum<vobj,fobj2D>(Umu, munge,header.checksum);
offset = writeHeader(header,file);
#ifdef PARALLEL_WRITE
csum=BinaryIO::writeObjectParallel<vobj,fobj2D>(Umu,file,munge,offset,header.floating_point);
#else
csum=BinaryIO::writeObjectSerial<vobj,fobj2D>(Umu,file,munge,offset,header.floating_point);
#endif
// Sod it -- always write 3x3 double } else {
header.floating_point = std::string("IEEE64BIG"); header.floating_point = std::string("IEEE64BIG");
header.data_type = std::string("4D_SU3_GAUGE_3x3"); header.data_type = std::string("4D_SU3_GAUGE_3x3");
GaugeSimpleUnmunger<fobj3D,sobj> munge; NerscSimpleUnmunger<fobj3D,sobj> munge;
offset = writeHeader(header,file); BinaryIO::Uint32Checksum<vobj,fobj3D>(Umu, munge,header.checksum);
offset = writeHeader(header,file);
#ifdef PARALLEL_WRITE
csum=BinaryIO::writeObjectParallel<vobj,fobj3D>(Umu,file,munge,offset,header.floating_point);
#else
csum=BinaryIO::writeObjectSerial<vobj,fobj3D>(Umu,file,munge,offset,header.floating_point);
#endif
}
uint32_t nersc_csum,scidac_csuma,scidac_csumb; std::cout<<GridLogMessage <<"Written NERSC Configuration "<<file<< " checksum "<<std::hex<<csum<< std::dec<<" plaq "<< header.plaquette <<std::endl;
BinaryIO::writeLatticeObject<vobj,fobj3D>(Umu,file,munge,offset,header.floating_point,
nersc_csum,scidac_csuma,scidac_csumb);
header.checksum = nersc_csum;
writeHeader(header,file);
std::cout<<GridLogMessage <<"Written NERSC Configuration on "<< file << " checksum " }
<<std::hex<<header.checksum
<<std::dec<<" plaq "<< header.plaquette <<std::endl;
}
///////////////////////////////
// RNG state
///////////////////////////////
static inline void writeRNGState(GridSerialRNG &serial,GridParallelRNG &parallel,std::string file)
{
typedef typename GridParallelRNG::RngStateType RngStateType;
// Following should become arguments ///////////////////////////////
FieldMetaData header; // RNG state
header.sequence_number = 1; ///////////////////////////////
header.ensemble_id = "UKQCD"; static inline void writeRNGState(GridSerialRNG &serial,GridParallelRNG &parallel,std::string file)
header.ensemble_label = "DWF"; {
typedef typename GridParallelRNG::RngStateType RngStateType;
GridBase *grid = parallel._grid; // Following should become arguments
NerscField header;
header.sequence_number = 1;
header.ensemble_id = "UKQCD";
header.ensemble_label = "DWF";
GridMetaData(grid,header); GridBase *grid = parallel._grid;
assert(header.nd==4);
header.link_trace=0.0;
header.plaquette=0.0;
MachineCharacteristics(header);
int offset; NerscGrid(grid,header);
header.link_trace=0.0;
header.plaquette=0.0;
NerscMachineCharacteristics(header);
uint32_t csum;
int offset;
#ifdef RNG_RANLUX #ifdef RNG_RANLUX
header.floating_point = std::string("UINT64"); header.floating_point = std::string("UINT64");
header.data_type = std::string("RANLUX48"); header.data_type = std::string("RANLUX48");
#endif #endif
#ifdef RNG_MT19937 #ifdef RNG_MT19937
header.floating_point = std::string("UINT32"); header.floating_point = std::string("UINT32");
header.data_type = std::string("MT19937"); header.data_type = std::string("MT19937");
#endif #endif
#ifdef RNG_SITMO #ifdef RNG_SITMO
header.floating_point = std::string("UINT64"); header.floating_point = std::string("UINT64");
header.data_type = std::string("SITMO"); header.data_type = std::string("SITMO");
#endif #endif
truncate(file); truncate(file);
offset = writeHeader(header,file); offset = writeHeader(header,file);
uint32_t nersc_csum,scidac_csuma,scidac_csumb; csum=BinaryIO::writeRNGSerial(serial,parallel,file,offset);
BinaryIO::writeRNG(serial,parallel,file,offset,nersc_csum,scidac_csuma,scidac_csumb); header.checksum = csum;
header.checksum = nersc_csum; offset = writeHeader(header,file);
offset = writeHeader(header,file);
std::cout<<GridLogMessage std::cout<<GridLogMessage <<"Written NERSC RNG STATE "<<file<< " checksum "<<std::hex<<csum<<std::dec<<std::endl;
<<"Written NERSC RNG STATE "<<file<< " checksum "
<<std::hex<<header.checksum
<<std::dec<<std::endl;
} }
static inline void readRNGState(GridSerialRNG &serial,GridParallelRNG & parallel,FieldMetaData& header,std::string file) static inline void readRNGState(GridSerialRNG &serial,GridParallelRNG & parallel,NerscField& header,std::string file)
{ {
typedef typename GridParallelRNG::RngStateType RngStateType; typedef typename GridParallelRNG::RngStateType RngStateType;
GridBase *grid = parallel._grid; GridBase *grid = parallel._grid;
int offset = readHeader(file,grid,header); int offset = readHeader(file,grid,header);
FieldMetaData clone(header); NerscField clone(header);
std::string format(header.floating_point); std::string format(header.floating_point);
std::string data_type(header.data_type); std::string data_type(header.data_type);
#ifdef RNG_RANLUX #ifdef RNG_RANLUX
assert(format == std::string("UINT64")); assert(format == std::string("UINT64"));
assert(data_type == std::string("RANLUX48")); assert(data_type == std::string("RANLUX48"));
#endif #endif
#ifdef RNG_MT19937 #ifdef RNG_MT19937
assert(format == std::string("UINT32")); assert(format == std::string("UINT32"));
assert(data_type == std::string("MT19937")); assert(data_type == std::string("MT19937"));
#endif #endif
#ifdef RNG_SITMO #ifdef RNG_SITMO
assert(format == std::string("UINT64")); assert(format == std::string("UINT64"));
assert(data_type == std::string("SITMO")); assert(data_type == std::string("SITMO"));
#endif #endif
// depending on datatype, set up munger; // depending on datatype, set up munger;
// munger is a function of <floating point, Real, data_type> // munger is a function of <floating point, Real, data_type>
uint32_t nersc_csum,scidac_csuma,scidac_csumb; uint32_t csum=BinaryIO::readRNGSerial(serial,parallel,file,offset);
BinaryIO::readRNG(serial,parallel,file,offset,nersc_csum,scidac_csuma,scidac_csumb);
if ( nersc_csum != header.checksum ) { assert(csum == header.checksum );
std::cerr << "checksum mismatch "<<std::hex<< nersc_csum <<" "<<header.checksum<<std::dec<<std::endl;
exit(0);
}
assert(nersc_csum == header.checksum );
std::cout<<GridLogMessage <<"Read NERSC RNG file "<<file<< " format "<< data_type <<std::endl; std::cout<<GridLogMessage <<"Read NERSC RNG file "<<file<< " format "<< data_type <<std::endl;
} }
}; };
}}
}}
#endif #endif

View File

@ -205,14 +205,13 @@ public:
void Stop(void) { void Stop(void) {
count=0; count=0;
cycles=0; cycles=0;
size_t ign;
#ifdef __linux__ #ifdef __linux__
ssize_t ign;
if ( fd!= -1) { if ( fd!= -1) {
::ioctl(fd, PERF_EVENT_IOC_DISABLE, 0); ::ioctl(fd, PERF_EVENT_IOC_DISABLE, 0);
::ioctl(cyclefd, PERF_EVENT_IOC_DISABLE, 0); ::ioctl(cyclefd, PERF_EVENT_IOC_DISABLE, 0);
ign=::read(fd, &count, sizeof(long long)); ign=::read(fd, &count, sizeof(long long));
ign+=::read(cyclefd, &cycles, sizeof(long long)); ign=::read(cyclefd, &cycles, sizeof(long long));
assert(ign=2*sizeof(long long));
} }
elapsed = cyclecount() - begin; elapsed = cyclecount() - begin;
#else #else

View File

@ -1,124 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/QCD.h
Copyright (C) 2015
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: neo <cossu@post.kek.jp>
Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_LT_H
#define GRID_LT_H
namespace Grid{
// First steps in the complete generalization of the Physics part
// Design not final
namespace LatticeTheories {
template <int Dimensions>
struct LatticeTheory {
static const int Nd = Dimensions;
static const int Nds = Dimensions * 2; // double stored field
template <typename vtype>
using iSinglet = iScalar<iScalar<iScalar<vtype> > >;
};
template <int Dimensions, int Colours>
struct LatticeGaugeTheory : public LatticeTheory<Dimensions> {
static const int Nds = Dimensions * 2;
static const int Nd = Dimensions;
static const int Nc = Colours;
template <typename vtype>
using iColourMatrix = iScalar<iScalar<iMatrix<vtype, Nc> > >;
template <typename vtype>
using iLorentzColourMatrix = iVector<iScalar<iMatrix<vtype, Nc> >, Nd>;
template <typename vtype>
using iDoubleStoredColourMatrix = iVector<iScalar<iMatrix<vtype, Nc> >, Nds>;
template <typename vtype>
using iColourVector = iScalar<iScalar<iVector<vtype, Nc> > >;
};
template <int Dimensions, int Colours, int Spin>
struct FermionicLatticeGaugeTheory
: public LatticeGaugeTheory<Dimensions, Colours> {
static const int Nd = Dimensions;
static const int Nds = Dimensions * 2;
static const int Nc = Colours;
static const int Ns = Spin;
template <typename vtype>
using iSpinMatrix = iScalar<iMatrix<iScalar<vtype>, Ns> >;
template <typename vtype>
using iSpinColourMatrix = iScalar<iMatrix<iMatrix<vtype, Nc>, Ns> >;
template <typename vtype>
using iSpinVector = iScalar<iVector<iScalar<vtype>, Ns> >;
template <typename vtype>
using iSpinColourVector = iScalar<iVector<iVector<vtype, Nc>, Ns> >;
// These 2 only if Spin is a multiple of 2
static const int Nhs = Spin / 2;
template <typename vtype>
using iHalfSpinVector = iScalar<iVector<iScalar<vtype>, Nhs> >;
template <typename vtype>
using iHalfSpinColourVector = iScalar<iVector<iVector<vtype, Nc>, Nhs> >;
//tests
typedef iColourMatrix<Complex> ColourMatrix;
typedef iColourMatrix<ComplexF> ColourMatrixF;
typedef iColourMatrix<ComplexD> ColourMatrixD;
};
// Examples, not complete now.
struct QCD : public FermionicLatticeGaugeTheory<4, 3, 4> {
static const int Xp = 0;
static const int Yp = 1;
static const int Zp = 2;
static const int Tp = 3;
static const int Xm = 4;
static const int Ym = 5;
static const int Zm = 6;
static const int Tm = 7;
typedef FermionicLatticeGaugeTheory FLGT;
typedef FLGT::iSpinMatrix<Complex > SpinMatrix;
typedef FLGT::iSpinMatrix<ComplexF > SpinMatrixF;
typedef FLGT::iSpinMatrix<ComplexD > SpinMatrixD;
};
struct QED : public FermionicLatticeGaugeTheory<4, 1, 4> {//fill
};
template <int Dimensions>
struct Scalar : public LatticeTheory<Dimensions> {};
}; // LatticeTheories
} // Grid
#endif

View File

@ -32,12 +32,9 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
#ifndef GRID_QCD_BASE_H #ifndef GRID_QCD_BASE_H
#define GRID_QCD_BASE_H #define GRID_QCD_BASE_H
namespace Grid{ namespace Grid{
namespace QCD { namespace QCD {
static const int Xdir = 0;
static const int Ydir = 1;
static const int Zdir = 2;
static const int Tdir = 3;
static const int Xp = 0; static const int Xp = 0;
static const int Yp = 1; static const int Yp = 1;
@ -357,36 +354,36 @@ namespace QCD {
////////////////////////////////////////////// //////////////////////////////////////////////
template<class vobj> template<class vobj>
void pokeColour(Lattice<vobj> &lhs, void pokeColour(Lattice<vobj> &lhs,
const Lattice<decltype(peekIndex<ColourIndex>(lhs._odata[0],0))> & rhs, const Lattice<decltype(peekIndex<ColourIndex>(lhs._odata[0],0))> & rhs,
int i) int i)
{ {
PokeIndex<ColourIndex>(lhs,rhs,i); PokeIndex<ColourIndex>(lhs,rhs,i);
} }
template<class vobj> template<class vobj>
void pokeColour(Lattice<vobj> &lhs, void pokeColour(Lattice<vobj> &lhs,
const Lattice<decltype(peekIndex<ColourIndex>(lhs._odata[0],0,0))> & rhs, const Lattice<decltype(peekIndex<ColourIndex>(lhs._odata[0],0,0))> & rhs,
int i,int j) int i,int j)
{ {
PokeIndex<ColourIndex>(lhs,rhs,i,j); PokeIndex<ColourIndex>(lhs,rhs,i,j);
} }
template<class vobj> template<class vobj>
void pokeSpin(Lattice<vobj> &lhs, void pokeSpin(Lattice<vobj> &lhs,
const Lattice<decltype(peekIndex<SpinIndex>(lhs._odata[0],0))> & rhs, const Lattice<decltype(peekIndex<SpinIndex>(lhs._odata[0],0))> & rhs,
int i) int i)
{ {
PokeIndex<SpinIndex>(lhs,rhs,i); PokeIndex<SpinIndex>(lhs,rhs,i);
} }
template<class vobj> template<class vobj>
void pokeSpin(Lattice<vobj> &lhs, void pokeSpin(Lattice<vobj> &lhs,
const Lattice<decltype(peekIndex<SpinIndex>(lhs._odata[0],0,0))> & rhs, const Lattice<decltype(peekIndex<SpinIndex>(lhs._odata[0],0,0))> & rhs,
int i,int j) int i,int j)
{ {
PokeIndex<SpinIndex>(lhs,rhs,i,j); PokeIndex<SpinIndex>(lhs,rhs,i,j);
} }
template<class vobj> template<class vobj>
void pokeLorentz(Lattice<vobj> &lhs, void pokeLorentz(Lattice<vobj> &lhs,
const Lattice<decltype(peekIndex<LorentzIndex>(lhs._odata[0],0))> & rhs, const Lattice<decltype(peekIndex<LorentzIndex>(lhs._odata[0],0))> & rhs,
int i) int i)
{ {
PokeIndex<LorentzIndex>(lhs,rhs,i); PokeIndex<LorentzIndex>(lhs,rhs,i);
} }
@ -495,38 +492,6 @@ namespace QCD {
} //namespace QCD } //namespace QCD
} // Grid } // Grid
/*
<<<<<<< HEAD
#include <Grid/qcd/utils/SpaceTimeGrid.h>
#include <Grid/qcd/spin/Dirac.h>
#include <Grid/qcd/spin/TwoSpinor.h>
#include <Grid/qcd/utils/LinalgUtils.h>
#include <Grid/qcd/utils/CovariantCshift.h>
// Include representations
#include <Grid/qcd/utils/SUn.h>
#include <Grid/qcd/utils/SUnAdjoint.h>
#include <Grid/qcd/utils/SUnTwoIndex.h>
#include <Grid/qcd/representations/hmc_types.h>
// Scalar field
#include <Grid/qcd/utils/ScalarObjs.h>
#include <Grid/qcd/action/Actions.h>
#include <Grid/qcd/smearing/Smearing.h>
#include <Grid/qcd/hmc/integrators/Integrator.h>
#include <Grid/qcd/hmc/integrators/Integrator_algorithm.h>
#include <Grid/qcd/observables/hmc_observable.h>
#include <Grid/qcd/hmc/HMC.h>
//#include <Grid/qcd/modules/mods.h>
=======
>>>>>>> develop
*/
#endif #endif

View File

@ -4,11 +4,10 @@ Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/ActionBase.h Source file: ./lib/qcd/action/ActionBase.h
Copyright (C) 2015-2016 Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk> Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: neo <cossu@post.kek.jp> Author: neo <cossu@post.kek.jp>
Author: Guido Cossu <guido.cossu@ed.ac.uk>
This program is free software; you can redistribute it and/or modify This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@ -28,29 +27,128 @@ See the full license in the file "LICENSE" in the top level distribution
directory directory
*************************************************************************************/ *************************************************************************************/
/* END LEGAL */ /* END LEGAL */
#ifndef QCD_ACTION_BASE
#ifndef ACTION_BASE_H #define QCD_ACTION_BASE
#define ACTION_BASE_H
namespace Grid { namespace Grid {
namespace QCD { namespace QCD {
template <class GaugeField > template <class GaugeField>
class Action class Action {
{
public: public:
bool is_smeared = false; bool is_smeared = false;
// Heatbath? // Boundary conditions? // Heatbath?
virtual void refresh(const GaugeField& U, GridParallelRNG& pRNG) = 0; // refresh pseudofermions virtual void refresh(const GaugeField& U,
virtual RealD S(const GaugeField& U) = 0; // evaluate the action GridParallelRNG& pRNG) = 0; // refresh pseudofermions
virtual void deriv(const GaugeField& U, GaugeField& dSdU) = 0; // evaluate the action derivative virtual RealD S(const GaugeField& U) = 0; // evaluate the action
virtual std::string action_name() = 0; // return the action name virtual void deriv(const GaugeField& U,
virtual std::string LogParameters() = 0; // prints action parameters GaugeField& dSdU) = 0; // evaluate the action derivative
virtual ~Action(){} virtual ~Action(){};
}; };
// Indexing of tuple types
template <class T, class Tuple>
struct Index;
template <class T, class... Types>
struct Index<T, std::tuple<T, Types...>> {
static const std::size_t value = 0;
};
template <class T, class U, class... Types>
struct Index<T, std::tuple<U, Types...>> {
static const std::size_t value = 1 + Index<T, std::tuple<Types...>>::value;
};
/*
template <class GaugeField>
struct ActionLevel {
public:
typedef Action<GaugeField>*
ActPtr; // now force the same colours as the rest of the code
//Add supported representations here
unsigned int multiplier;
std::vector<ActPtr> actions;
ActionLevel(unsigned int mul = 1) : actions(0), multiplier(mul) {
assert(mul >= 1);
};
void push_back(ActPtr ptr) { actions.push_back(ptr); }
};
*/
template <class GaugeField, class Repr = NoHirep >
struct ActionLevel {
public:
unsigned int multiplier;
// Fundamental repr actions separated because of the smearing
typedef Action<GaugeField>* ActPtr;
// construct a tuple of vectors of the actions for the corresponding higher
// representation fields
typedef typename AccessTypes<Action, Repr>::VectorCollection action_collection;
action_collection actions_hirep;
typedef typename AccessTypes<Action, Repr>::FieldTypeCollection action_hirep_types;
std::vector<ActPtr>& actions;
// Temporary conversion between ActionLevel and ActionLevelHirep
//ActionLevelHirep(ActionLevel<GaugeField>& AL ):actions(AL.actions), multiplier(AL.multiplier){}
ActionLevel(unsigned int mul = 1) : actions(std::get<0>(actions_hirep)), multiplier(mul) {
// initialize the hirep vectors to zero.
//apply(this->resize, actions_hirep, 0); //need a working resize
assert(mul >= 1);
};
//void push_back(ActPtr ptr) { actions.push_back(ptr); }
template < class Field >
void push_back(Action<Field>* ptr) {
// insert only in the correct vector
std::get< Index < Field, action_hirep_types>::value >(actions_hirep).push_back(ptr);
};
template < class ActPtr>
static void resize(ActPtr ap, unsigned int n){
ap->resize(n);
}
//template <std::size_t I>
//auto getRepresentation(Repr& R)->decltype(std::get<I>(R).U) {return std::get<I>(R).U;}
// Loop on tuple for a callable function
template <std::size_t I = 1, typename Callable, typename ...Args>
inline typename std::enable_if<I == std::tuple_size<action_collection>::value, void>::type apply(
Callable, Repr& R,Args&...) const {}
template <std::size_t I = 1, typename Callable, typename ...Args>
inline typename std::enable_if<I < std::tuple_size<action_collection>::value, void>::type apply(
Callable fn, Repr& R, Args&... arguments) const {
fn(std::get<I>(actions_hirep), std::get<I>(R.rep), arguments...);
apply<I + 1>(fn, R, arguments...);
}
};
//template <class GaugeField>
//using ActionSet = std::vector<ActionLevel<GaugeField> >;
template <class GaugeField, class R>
using ActionSet = std::vector<ActionLevel<GaugeField, R> >;
} }
} }
#endif // ACTION_BASE_H #endif

View File

@ -31,31 +31,15 @@ directory
#define QCD_ACTION_CORE #define QCD_ACTION_CORE
#include <Grid/qcd/action/ActionBase.h> #include <Grid/qcd/action/ActionBase.h>
#include <Grid/qcd/action/ActionSet.h>
#include <Grid/qcd/action/ActionParams.h> #include <Grid/qcd/action/ActionParams.h>
//////////////////////////////////////////// ////////////////////////////////////////////
// Gauge Actions // Gauge Actions
//////////////////////////////////////////// ////////////////////////////////////////////
#include <Grid/qcd/action/gauge/Gauge.h> #include <Grid/qcd/action/gauge/Gauge.h>
//////////////////////////////////////////// ////////////////////////////////////////////
// Fermion prereqs // Fermion prereqs
//////////////////////////////////////////// ////////////////////////////////////////////
#include <Grid/qcd/action/fermion/FermionCore.h> #include <Grid/qcd/action/fermion/FermionCore.h>
////////////////////////////////////////////
// Scalar Actions
////////////////////////////////////////////
#include <Grid/qcd/action/scalar/Scalar.h>
////////////////////////////////////////////
// Utility functions
////////////////////////////////////////////
#include <Grid/qcd/utils/Metric.h>
#include <Grid/qcd/utils/CovariantLaplacian.h>
#endif #endif

View File

@ -1,92 +1,67 @@
/************************************************************************************* /*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/ActionParams.h Source file: ./lib/qcd/action/ActionParams.h
Copyright (C) 2015 Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk> Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: paboyle <paboyle@ph.ed.ac.uk> Author: paboyle <paboyle@ph.ed.ac.uk>
Author: Guido Cossu <guido.cossu@ed.ac.uk>
This program is free software; you can redistribute it and/or modify This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or the Free Software Foundation; either version 2 of the License, or
(at your option) any later version. (at your option) any later version.
This program is distributed in the hope that it will be useful, This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details. GNU General Public License for more details.
You should have received a copy of the GNU General Public License along You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc., with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_QCD_ACTION_PARAMS_H #ifndef GRID_QCD_ACTION_PARAMS_H
#define GRID_QCD_ACTION_PARAMS_H #define GRID_QCD_ACTION_PARAMS_H
namespace Grid { namespace Grid {
namespace QCD { namespace QCD {
// These can move into a params header and be given MacroMagic serialisation // These can move into a params header and be given MacroMagic serialisation
struct GparityWilsonImplParams { struct GparityWilsonImplParams {
bool overlapCommsCompute; bool overlapCommsCompute;
std::vector<int> twists; std::vector<int> twists;
GparityWilsonImplParams() : twists(Nd, 0), overlapCommsCompute(false){}; GparityWilsonImplParams () : twists(Nd,0), overlapCommsCompute(false) {};
};
struct WilsonImplParams {
bool overlapCommsCompute;
std::vector<Complex> boundary_phases;
WilsonImplParams() : overlapCommsCompute(false) {
boundary_phases.resize(Nd, 1.0);
}; };
WilsonImplParams(const std::vector<Complex> phi)
: boundary_phases(phi), overlapCommsCompute(false) {}
};
struct StaggeredImplParams { struct WilsonImplParams {
StaggeredImplParams() {}; bool overlapCommsCompute;
}; WilsonImplParams() : overlapCommsCompute(false) {};
};
struct OneFlavourRationalParams : Serializable {
GRID_SERIALIZABLE_CLASS_MEMBERS(OneFlavourRationalParams,
RealD, lo,
RealD, hi,
int, MaxIter,
RealD, tolerance,
int, degree,
int, precision);
// MaxIter and tolerance, vectors??
// constructor
OneFlavourRationalParams( RealD _lo = 0.0,
RealD _hi = 1.0,
int _maxit = 1000,
RealD tol = 1.0e-8,
int _degree = 10,
int _precision = 64)
: lo(_lo),
hi(_hi),
MaxIter(_maxit),
tolerance(tol),
degree(_degree),
precision(_precision){};
};
}
}
struct StaggeredImplParams {
StaggeredImplParams() {};
};
struct OneFlavourRationalParams {
RealD lo;
RealD hi;
int MaxIter; // Vector?
RealD tolerance; // Vector?
int degree=10;
int precision=64;
OneFlavourRationalParams (RealD _lo,RealD _hi,int _maxit,RealD tol=1.0e-8,int _degree = 10,int _precision=64) :
lo(_lo), hi(_hi), MaxIter(_maxit), tolerance(tol), degree(_degree), precision(_precision)
{};
};
}}
#endif #endif

View File

@ -1,116 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/ActionSet.h
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: neo <cossu@post.kek.jp>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#ifndef ACTION_SET_H
#define ACTION_SET_H
namespace Grid {
// Should drop this namespace here
namespace QCD {
//////////////////////////////////
// Indexing of tuple types
//////////////////////////////////
template <class T, class Tuple>
struct Index;
template <class T, class... Types>
struct Index<T, std::tuple<T, Types...>> {
static const std::size_t value = 0;
};
template <class T, class U, class... Types>
struct Index<T, std::tuple<U, Types...>> {
static const std::size_t value = 1 + Index<T, std::tuple<Types...>>::value;
};
////////////////////////////////////////////
// Action Level
// Action collection
// in a integration level
// (for multilevel integration schemes)
////////////////////////////////////////////
template <class Field, class Repr = NoHirep >
struct ActionLevel {
public:
unsigned int multiplier;
// Fundamental repr actions separated because of the smearing
typedef Action<Field>* ActPtr;
// construct a tuple of vectors of the actions for the corresponding higher
// representation fields
typedef typename AccessTypes<Action, Repr>::VectorCollection action_collection;
typedef typename AccessTypes<Action, Repr>::FieldTypeCollection action_hirep_types;
action_collection actions_hirep;
std::vector<ActPtr>& actions;
explicit ActionLevel(unsigned int mul = 1) :
actions(std::get<0>(actions_hirep)), multiplier(mul) {
// initialize the hirep vectors to zero.
// apply(this->resize, actions_hirep, 0); //need a working resize
assert(mul >= 1);
}
template < class GenField >
void push_back(Action<GenField>* ptr) {
// insert only in the correct vector
std::get< Index < GenField, action_hirep_types>::value >(actions_hirep).push_back(ptr);
};
template <class ActPtr>
static void resize(ActPtr ap, unsigned int n) {
ap->resize(n);
}
// Loop on tuple for a callable function
template <std::size_t I = 1, typename Callable, typename ...Args>
inline typename std::enable_if<I == std::tuple_size<action_collection>::value, void>::type apply(Callable, Repr& R,Args&...) const {}
template <std::size_t I = 1, typename Callable, typename ...Args>
inline typename std::enable_if<I < std::tuple_size<action_collection>::value, void>::type apply(Callable fn, Repr& R, Args&... arguments) const {
fn(std::get<I>(actions_hirep), std::get<I>(R.rep), arguments...);
apply<I + 1>(fn, R, arguments...);
}
};
// Define the ActionSet
template <class GaugeField, class R>
using ActionSet = std::vector<ActionLevel<GaugeField, R> >;
} // QCD
} // Grid
#endif // ACTION_SET_H

View File

@ -29,7 +29,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
*************************************************************************************/ *************************************************************************************/
/* END LEGAL */ /* END LEGAL */
#include <Grid/Grid_Eigen_Dense.h> #include <Grid/Eigen/Dense>
#include <Grid/qcd/action/fermion/FermionCore.h> #include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/CayleyFermion5D.h> #include <Grid/qcd/action/fermion/CayleyFermion5D.h>
@ -320,7 +320,7 @@ void CayleyFermion5D<Impl>::MDeriv (GaugeField &mat,const FermionField &U,const
this->DhopDeriv(mat,U,Din,dag); this->DhopDeriv(mat,U,Din,dag);
} else { } else {
// U d/du [D_w D5]^dag V = U D5^dag d/du DW^dag Y // implicit adj on U in call // U d/du [D_w D5]^dag V = U D5^dag d/du DW^dag Y // implicit adj on U in call
Meooe5D(U,Din); MeooeDag5D(U,Din);
this->DhopDeriv(mat,Din,V,dag); this->DhopDeriv(mat,Din,V,dag);
} }
}; };
@ -335,8 +335,8 @@ void CayleyFermion5D<Impl>::MoeDeriv(GaugeField &mat,const FermionField &U,const
this->DhopDerivOE(mat,U,Din,dag); this->DhopDerivOE(mat,U,Din,dag);
} else { } else {
// U d/du [D_w D5]^dag V = U D5^dag d/du DW^dag Y // implicit adj on U in call // U d/du [D_w D5]^dag V = U D5^dag d/du DW^dag Y // implicit adj on U in call
Meooe5D(U,Din); MeooeDag5D(U,Din);
this->DhopDerivOE(mat,Din,V,dag); this->DhopDerivOE(mat,Din,V,dag);
} }
}; };
template<class Impl> template<class Impl>
@ -350,7 +350,7 @@ void CayleyFermion5D<Impl>::MeoDeriv(GaugeField &mat,const FermionField &U,const
this->DhopDerivEO(mat,U,Din,dag); this->DhopDerivEO(mat,U,Din,dag);
} else { } else {
// U d/du [D_w D5]^dag V = U D5^dag d/du DW^dag Y // implicit adj on U in call // U d/du [D_w D5]^dag V = U D5^dag d/du DW^dag Y // implicit adj on U in call
Meooe5D(U,Din); MeooeDag5D(U,Din);
this->DhopDerivEO(mat,Din,V,dag); this->DhopDerivEO(mat,Din,V,dag);
} }
}; };
@ -380,8 +380,6 @@ void CayleyFermion5D<Impl>::SetCoefficientsInternal(RealD zolo_hi,std::vector<Co
/////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////
// The Cayley coeffs (unprec) // The Cayley coeffs (unprec)
/////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////
assert(gamma.size()==Ls);
omega.resize(Ls); omega.resize(Ls);
bs.resize(Ls); bs.resize(Ls);
cs.resize(Ls); cs.resize(Ls);
@ -414,11 +412,10 @@ void CayleyFermion5D<Impl>::SetCoefficientsInternal(RealD zolo_hi,std::vector<Co
for(int i=0; i < Ls; i++){ for(int i=0; i < Ls; i++){
as[i] = 1.0; as[i] = 1.0;
omega[i] = gamma[i]*zolo_hi; //NB reciprocal relative to Chroma NEF code omega[i] = gamma[i]*zolo_hi; //NB reciprocal relative to Chroma NEF code
// assert(fabs(omega[i])>0.0);
bs[i] = 0.5*(bpc/omega[i] + bmc); bs[i] = 0.5*(bpc/omega[i] + bmc);
cs[i] = 0.5*(bpc/omega[i] - bmc); cs[i] = 0.5*(bpc/omega[i] - bmc);
} }
//////////////////////////////////////////////////////// ////////////////////////////////////////////////////////
// Constants for the preconditioned matrix Cayley form // Constants for the preconditioned matrix Cayley form
//////////////////////////////////////////////////////// ////////////////////////////////////////////////////////
@ -428,12 +425,12 @@ void CayleyFermion5D<Impl>::SetCoefficientsInternal(RealD zolo_hi,std::vector<Co
ceo.resize(Ls); ceo.resize(Ls);
for(int i=0;i<Ls;i++){ for(int i=0;i<Ls;i++){
bee[i]=as[i]*(bs[i]*(4.0-this->M5) +1.0); bee[i]=as[i]*(bs[i]*(4.0-this->M5) +1.0);
// assert(fabs(bee[i])>0.0);
cee[i]=as[i]*(1.0-cs[i]*(4.0-this->M5)); cee[i]=as[i]*(1.0-cs[i]*(4.0-this->M5));
beo[i]=as[i]*bs[i]; beo[i]=as[i]*bs[i];
ceo[i]=-as[i]*cs[i]; ceo[i]=-as[i]*cs[i];
} }
aee.resize(Ls); aee.resize(Ls);
aeo.resize(Ls); aeo.resize(Ls);
for(int i=0;i<Ls;i++){ for(int i=0;i<Ls;i++){
@ -477,16 +474,14 @@ void CayleyFermion5D<Impl>::SetCoefficientsInternal(RealD zolo_hi,std::vector<Co
{ {
Coeff_t delta_d=mass*cee[Ls-1]; Coeff_t delta_d=mass*cee[Ls-1];
for(int j=0;j<Ls-1;j++) { for(int j=0;j<Ls-1;j++) delta_d *= cee[j]/bee[j];
// assert(fabs(bee[j])>0.0);
delta_d *= cee[j]/bee[j];
}
dee[Ls-1] += delta_d; dee[Ls-1] += delta_d;
} }
int inv=1; int inv=1;
this->MooeeInternalCompute(0,inv,MatpInv,MatmInv); this->MooeeInternalCompute(0,inv,MatpInv,MatmInv);
this->MooeeInternalCompute(1,inv,MatpInvDag,MatmInvDag); this->MooeeInternalCompute(1,inv,MatpInvDag,MatmInvDag);
} }
@ -500,9 +495,7 @@ void CayleyFermion5D<Impl>::MooeeInternalCompute(int dag, int inv,
GridBase *grid = this->FermionRedBlackGrid(); GridBase *grid = this->FermionRedBlackGrid();
int LLs = grid->_rdimensions[0]; int LLs = grid->_rdimensions[0];
if ( LLs == Ls ) { if ( LLs == Ls ) return; // Not vectorised in 5th direction
return; // Not vectorised in 5th direction
}
Eigen::MatrixXcd Pplus = Eigen::MatrixXcd::Zero(Ls,Ls); Eigen::MatrixXcd Pplus = Eigen::MatrixXcd::Zero(Ls,Ls);
Eigen::MatrixXcd Pminus = Eigen::MatrixXcd::Zero(Ls,Ls); Eigen::MatrixXcd Pminus = Eigen::MatrixXcd::Zero(Ls,Ls);

View File

@ -29,7 +29,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
*************************************************************************************/ *************************************************************************************/
/* END LEGAL */ /* END LEGAL */
#include <Grid/Grid_Eigen_Dense.h> #include <Grid/Eigen/Dense>
#include <Grid/qcd/action/fermion/FermionCore.h> #include <Grid/qcd/action/fermion/FermionCore.h>
#include <Grid/qcd/action/fermion/CayleyFermion5D.h> #include <Grid/qcd/action/fermion/CayleyFermion5D.h>

View File

@ -68,7 +68,7 @@ namespace Grid {
Approx::zolotarev_data *zdata = Approx::higham(eps,this->Ls);// eps is ignored for higham Approx::zolotarev_data *zdata = Approx::higham(eps,this->Ls);// eps is ignored for higham
assert(zdata->n==this->Ls); assert(zdata->n==this->Ls);
std::cout<<GridLogMessage << "DomainWallFermion with Ls="<<this->Ls<<std::endl; // std::cout<<GridLogMessage << "DomainWallFermion with Ls="<<this->Ls<<std::endl;
// Call base setter // Call base setter
this->SetCoefficientsTanh(zdata,1.0,0.0); this->SetCoefficientsTanh(zdata,1.0,0.0);

View File

@ -1,4 +1,4 @@
/************************************************************************************* /*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid Grid physics library, www.github.com/paboyle/Grid

View File

@ -43,7 +43,7 @@ namespace QCD {
// Ultimately need Impl to always define types where XXX is opaque // Ultimately need Impl to always define types where XXX is opaque
// //
// typedef typename XXX Simd; // typedef typename XXX Simd;
// typedef typename XXX GaugeLinkField; // typedef typename XXX GaugeLinkField;
// typedef typename XXX GaugeField; // typedef typename XXX GaugeField;
// typedef typename XXX GaugeActField; // typedef typename XXX GaugeActField;
// typedef typename XXX FermionField; // typedef typename XXX FermionField;
@ -153,7 +153,7 @@ namespace QCD {
typedef typename Impl::Coeff_t Coeff_t; \ typedef typename Impl::Coeff_t Coeff_t; \
#define INHERIT_IMPL_TYPES(Base) \ #define INHERIT_IMPL_TYPES(Base) \
INHERIT_GIMPL_TYPES(Base) \ INHERIT_GIMPL_TYPES(Base) \
INHERIT_FIMPL_TYPES(Base) INHERIT_FIMPL_TYPES(Base)
///////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////
@ -198,18 +198,16 @@ namespace QCD {
ImplParams Params; ImplParams Params;
WilsonImpl(const ImplParams &p = ImplParams()) : Params(p){ WilsonImpl(const ImplParams &p = ImplParams()) : Params(p){};
assert(Params.boundary_phases.size() == Nd);
};
bool overlapCommsCompute(void) { return Params.overlapCommsCompute; }; bool overlapCommsCompute(void) { return Params.overlapCommsCompute; };
inline void multLink(SiteHalfSpinor &phi, inline void multLink(SiteHalfSpinor &phi,
const SiteDoubledGaugeField &U, const SiteDoubledGaugeField &U,
const SiteHalfSpinor &chi, const SiteHalfSpinor &chi,
int mu, int mu,
StencilEntry *SE, StencilEntry *SE,
StencilImpl &St) { StencilImpl &St) {
mult(&phi(), &U(mu), &chi()); mult(&phi(), &U(mu), &chi());
} }
@ -219,34 +217,16 @@ namespace QCD {
} }
inline void DoubleStore(GridBase *GaugeGrid, inline void DoubleStore(GridBase *GaugeGrid,
DoubledGaugeField &Uds, DoubledGaugeField &Uds,
const GaugeField &Umu) const GaugeField &Umu) {
{
typedef typename Simd::scalar_type scalar_type;
conformable(Uds._grid, GaugeGrid); conformable(Uds._grid, GaugeGrid);
conformable(Umu._grid, GaugeGrid); conformable(Umu._grid, GaugeGrid);
GaugeLinkField U(GaugeGrid); GaugeLinkField U(GaugeGrid);
GaugeLinkField tmp(GaugeGrid);
Lattice<iScalar<vInteger> > coor(GaugeGrid);
for (int mu = 0; mu < Nd; mu++) { for (int mu = 0; mu < Nd; mu++) {
U = PeekIndex<LorentzIndex>(Umu, mu);
auto pha = Params.boundary_phases[mu]; PokeIndex<LorentzIndex>(Uds, U, mu);
scalar_type phase( real(pha),imag(pha) ); U = adj(Cshift(U, mu, -1));
PokeIndex<LorentzIndex>(Uds, U, mu + 4);
int Lmu = GaugeGrid->GlobalDimensions()[mu] - 1;
LatticeCoordinate(coor, mu);
U = PeekIndex<LorentzIndex>(Umu, mu);
tmp = where(coor == Lmu, phase * U, U);
PokeIndex<LorentzIndex>(Uds, tmp, mu);
U = adj(Cshift(U, mu, -1));
U = where(coor == 0, conjugate(phase) * U, U);
PokeIndex<LorentzIndex>(Uds, U, mu + 4);
} }
} }
@ -263,11 +243,11 @@ namespace QCD {
tmp = zero; tmp = zero;
parallel_for(int sss=0;sss<tmp._grid->oSites();sss++){ parallel_for(int sss=0;sss<tmp._grid->oSites();sss++){
int sU=sss; int sU=sss;
for(int s=0;s<Ls;s++){ for(int s=0;s<Ls;s++){
int sF = s+Ls*sU; int sF = s+Ls*sU;
tmp[sU] = tmp[sU]+ traceIndex<SpinIndex>(outerProduct(Btilde[sF],Atilde[sF])); // ordering here tmp[sU] = tmp[sU]+ traceIndex<SpinIndex>(outerProduct(Btilde[sF],Atilde[sF])); // ordering here
} }
} }
PokeIndex<LorentzIndex>(mat,tmp,mu); PokeIndex<LorentzIndex>(mat,tmp,mu);
@ -330,12 +310,12 @@ class DomainWallVec5dImpl : public PeriodicGaugeImpl< GaugeImplTypes< S,Nrepres
} }
inline void multLink(SiteHalfSpinor &phi, const SiteDoubledGaugeField &U, inline void multLink(SiteHalfSpinor &phi, const SiteDoubledGaugeField &U,
const SiteHalfSpinor &chi, int mu, StencilEntry *SE, const SiteHalfSpinor &chi, int mu, StencilEntry *SE,
StencilImpl &St) { StencilImpl &St) {
SiteGaugeLink UU; SiteGaugeLink UU;
for (int i = 0; i < Nrepresentation; i++) { for (int i = 0; i < Nrepresentation; i++) {
for (int j = 0; j < Nrepresentation; j++) { for (int j = 0; j < Nrepresentation; j++) {
vsplat(UU()()(i, j), U(mu)()(i, j)); vsplat(UU()()(i, j), U(mu)()(i, j));
} }
} }
mult(&phi(), &UU(), &chi()); mult(&phi(), &UU(), &chi());
@ -372,53 +352,10 @@ class DomainWallVec5dImpl : public PeriodicGaugeImpl< GaugeImplTypes< S,Nrepres
{ {
assert(0); assert(0);
} }
inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField &Atilde, int mu) { inline void InsertForce5D(GaugeField &mat, FermionField &Btilde,FermionField &Atilde, int mu)
{
assert(0); assert(0);
// Following lines to be revised after Peter's addition of half prec
// missing put lane...
/*
typedef decltype(traceIndex<SpinIndex>(outerProduct(Btilde[0], Atilde[0]))) result_type;
unsigned int LLs = Btilde._grid->_rdimensions[0];
conformable(Atilde._grid,Btilde._grid);
GridBase* grid = mat._grid;
GridBase* Bgrid = Btilde._grid;
unsigned int dimU = grid->Nd();
unsigned int dimF = Bgrid->Nd();
GaugeLinkField tmp(grid);
tmp = zero;
// FIXME
// Current implementation works, thread safe, probably suboptimal
// Passing through the local coordinate for grid transformation
// the force grid is in general very different from the Ls vectorized grid
PARALLEL_FOR_LOOP
for (int so = 0; so < grid->oSites(); so++) {
std::vector<typename result_type::scalar_object> vres(Bgrid->Nsimd());
std::vector<int> ocoor; grid->oCoorFromOindex(ocoor,so);
for (int si = 0; si < tmp._grid->iSites(); si++){
typename result_type::scalar_object scalar_object; scalar_object = zero;
std::vector<int> local_coor;
std::vector<int> icoor; grid->iCoorFromIindex(icoor,si);
grid->InOutCoorToLocalCoor(ocoor, icoor, local_coor);
for (int s = 0; s < LLs; s++) {
std::vector<int> slocal_coor(dimF);
slocal_coor[0] = s;
for (int s4d = 1; s4d< dimF; s4d++) slocal_coor[s4d] = local_coor[s4d-1];
int sF = Bgrid->oIndexReduced(slocal_coor);
assert(sF < Bgrid->oSites());
extract(traceIndex<SpinIndex>(outerProduct(Btilde[sF], Atilde[sF])), vres);
// sum across the 5d dimension
for (auto v : vres) scalar_object += v;
}
tmp._odata[so].putlane(scalar_object, si);
}
}
PokeIndex<LorentzIndex>(mat, tmp, mu);
*/
} }
}; };
@ -469,19 +406,19 @@ class GparityWilsonImpl : public ConjugateGaugeImpl<GaugeImplTypes<S, Nrepresent
// provide the multiply by link that is differentiated between Gparity (with // provide the multiply by link that is differentiated between Gparity (with
// flavour index) and non-Gparity // flavour index) and non-Gparity
inline void multLink(SiteHalfSpinor &phi, const SiteDoubledGaugeField &U, inline void multLink(SiteHalfSpinor &phi, const SiteDoubledGaugeField &U,
const SiteHalfSpinor &chi, int mu, StencilEntry *SE, const SiteHalfSpinor &chi, int mu, StencilEntry *SE,
StencilImpl &St) { StencilImpl &St) {
typedef SiteHalfSpinor vobj; typedef SiteHalfSpinor vobj;
typedef typename SiteHalfSpinor::scalar_object sobj; typedef typename SiteHalfSpinor::scalar_object sobj;
vobj vtmp; vobj vtmp;
sobj stmp; sobj stmp;
GridBase *grid = St._grid; GridBase *grid = St._grid;
const int Nsimd = grid->Nsimd(); const int Nsimd = grid->Nsimd();
int direction = St._directions[mu]; int direction = St._directions[mu];
int distance = St._distances[mu]; int distance = St._distances[mu];
int ptype = St._permute_type[mu]; int ptype = St._permute_type[mu];
@ -489,13 +426,13 @@ class GparityWilsonImpl : public ConjugateGaugeImpl<GaugeImplTypes<S, Nrepresent
// Fixme X.Y.Z.T hardcode in stencil // Fixme X.Y.Z.T hardcode in stencil
int mmu = mu % Nd; int mmu = mu % Nd;
// assert our assumptions // assert our assumptions
assert((distance == 1) || (distance == -1)); // nearest neighbour stencil hard code assert((distance == 1) || (distance == -1)); // nearest neighbour stencil hard code
assert((sl == 1) || (sl == 2)); assert((sl == 1) || (sl == 2));
std::vector<int> icoor; std::vector<int> icoor;
if ( SE->_around_the_world && Params.twists[mmu] ) { if ( SE->_around_the_world && Params.twists[mmu] ) {
if ( sl == 2 ) { if ( sl == 2 ) {
@ -505,25 +442,25 @@ class GparityWilsonImpl : public ConjugateGaugeImpl<GaugeImplTypes<S, Nrepresent
extract(chi,vals); extract(chi,vals);
for(int s=0;s<Nsimd;s++){ for(int s=0;s<Nsimd;s++){
grid->iCoorFromIindex(icoor,s); grid->iCoorFromIindex(icoor,s);
assert((icoor[direction]==0)||(icoor[direction]==1)); assert((icoor[direction]==0)||(icoor[direction]==1));
int permute_lane; int permute_lane;
if ( distance == 1) { if ( distance == 1) {
permute_lane = icoor[direction]?1:0; permute_lane = icoor[direction]?1:0;
} else { } else {
permute_lane = icoor[direction]?0:1; permute_lane = icoor[direction]?0:1;
} }
if ( permute_lane ) { if ( permute_lane ) {
stmp(0) = vals[s](1); stmp(0) = vals[s](1);
stmp(1) = vals[s](0); stmp(1) = vals[s](0);
vals[s] = stmp; vals[s] = stmp;
} }
} }
merge(vtmp,vals); merge(vtmp,vals);
} else { } else {
vtmp(0) = chi(1); vtmp(0) = chi(1);
vtmp(1) = chi(0); vtmp(1) = chi(0);
@ -548,11 +485,11 @@ class GparityWilsonImpl : public ConjugateGaugeImpl<GaugeImplTypes<S, Nrepresent
GaugeLinkField Uconj(GaugeGrid); GaugeLinkField Uconj(GaugeGrid);
Lattice<iScalar<vInteger> > coor(GaugeGrid); Lattice<iScalar<vInteger> > coor(GaugeGrid);
for(int mu=0;mu<Nd;mu++){ for(int mu=0;mu<Nd;mu++){
LatticeCoordinate(coor,mu); LatticeCoordinate(coor,mu);
U = PeekIndex<LorentzIndex>(Umu,mu); U = PeekIndex<LorentzIndex>(Umu,mu);
Uconj = conjugate(U); Uconj = conjugate(U);
@ -566,7 +503,7 @@ class GparityWilsonImpl : public ConjugateGaugeImpl<GaugeImplTypes<S, Nrepresent
Uds[ss](0)(mu) = U[ss](); Uds[ss](0)(mu) = U[ss]();
Uds[ss](1)(mu) = Uconj[ss](); Uds[ss](1)(mu) = Uconj[ss]();
} }
U = adj(Cshift(U ,mu,-1)); // correct except for spanning the boundary U = adj(Cshift(U ,mu,-1)); // correct except for spanning the boundary
Uconj = adj(Cshift(Uconj,mu,-1)); Uconj = adj(Cshift(Uconj,mu,-1));
@ -574,12 +511,11 @@ class GparityWilsonImpl : public ConjugateGaugeImpl<GaugeImplTypes<S, Nrepresent
if ( Params.twists[mu] ) { if ( Params.twists[mu] ) {
Utmp = where(coor==0,Uconj,Utmp); Utmp = where(coor==0,Uconj,Utmp);
} }
parallel_for(auto ss=U.begin();ss<U.end();ss++){ parallel_for(auto ss=U.begin();ss<U.end();ss++){
Uds[ss](0)(mu+4) = Utmp[ss](); Uds[ss](0)(mu+4) = Utmp[ss]();
} }
Utmp = Uconj; Utmp = Uconj;
if ( Params.twists[mu] ) { if ( Params.twists[mu] ) {
Utmp = where(coor==0,U,Utmp); Utmp = where(coor==0,U,Utmp);
@ -588,7 +524,7 @@ class GparityWilsonImpl : public ConjugateGaugeImpl<GaugeImplTypes<S, Nrepresent
parallel_for(auto ss=U.begin();ss<U.end();ss++){ parallel_for(auto ss=U.begin();ss<U.end();ss++){
Uds[ss](1)(mu+4) = Utmp[ss](); Uds[ss](1)(mu+4) = Utmp[ss]();
} }
} }
} }
@ -599,7 +535,7 @@ class GparityWilsonImpl : public ConjugateGaugeImpl<GaugeImplTypes<S, Nrepresent
// use lorentz for flavour as hack. // use lorentz for flavour as hack.
auto tmp = TraceIndex<SpinIndex>(outerProduct(Btilde, A)); auto tmp = TraceIndex<SpinIndex>(outerProduct(Btilde, A));
parallel_for(auto ss = tmp.begin(); ss < tmp.end(); ss++) { parallel_for(auto ss = tmp.begin(); ss < tmp.end(); ss++) {
link[ss]() = tmp[ss](0, 0) + conjugate(tmp[ss](1, 1)); link[ss]() = tmp[ss](0, 0) - conjugate(tmp[ss](1, 1));
} }
PokeIndex<LorentzIndex>(mat, link, mu); PokeIndex<LorentzIndex>(mat, link, mu);
return; return;
@ -608,7 +544,7 @@ class GparityWilsonImpl : public ConjugateGaugeImpl<GaugeImplTypes<S, Nrepresent
inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField &Atilde, int mu) { inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField &Atilde, int mu) {
int Ls = Btilde._grid->_fdimensions[0]; int Ls = Btilde._grid->_fdimensions[0];
GaugeLinkField tmp(mat._grid); GaugeLinkField tmp(mat._grid);
tmp = zero; tmp = zero;
parallel_for(int ss = 0; ss < tmp._grid->oSites(); ss++) { parallel_for(int ss = 0; ss < tmp._grid->oSites(); ss++) {
@ -644,16 +580,19 @@ class StaggeredImpl : public PeriodicGaugeImpl<GaugeImplTypes<S, Representation:
INHERIT_GIMPL_TYPES(Gimpl); INHERIT_GIMPL_TYPES(Gimpl);
template <typename vtype> using iImplScalar = iScalar<iScalar<iScalar<vtype> > >;
template <typename vtype> using iImplSpinor = iScalar<iScalar<iVector<vtype, Dimension> > >; template <typename vtype> using iImplSpinor = iScalar<iScalar<iVector<vtype, Dimension> > >;
template <typename vtype> using iImplHalfSpinor = iScalar<iScalar<iVector<vtype, Dimension> > >; template <typename vtype> using iImplHalfSpinor = iScalar<iScalar<iVector<vtype, Dimension> > >;
template <typename vtype> using iImplDoubledGaugeField = iVector<iScalar<iMatrix<vtype, Dimension> >, Nds>; template <typename vtype> using iImplDoubledGaugeField = iVector<iScalar<iMatrix<vtype, Dimension> >, Nds>;
template <typename vtype> using iImplPropagator = iScalar<iScalar<iMatrix<vtype, Dimension> > >; template <typename vtype> using iImplPropagator = iScalar<iScalar<iMatrix<vtype, Dimension> > >;
typedef iImplScalar<Simd> SiteComplex;
typedef iImplSpinor<Simd> SiteSpinor; typedef iImplSpinor<Simd> SiteSpinor;
typedef iImplHalfSpinor<Simd> SiteHalfSpinor; typedef iImplHalfSpinor<Simd> SiteHalfSpinor;
typedef iImplDoubledGaugeField<Simd> SiteDoubledGaugeField; typedef iImplDoubledGaugeField<Simd> SiteDoubledGaugeField;
typedef iImplPropagator<Simd> SitePropagator; typedef iImplPropagator<Simd> SitePropagator;
typedef Lattice<SiteComplex> ComplexField;
typedef Lattice<SiteSpinor> FermionField; typedef Lattice<SiteSpinor> FermionField;
typedef Lattice<SiteDoubledGaugeField> DoubledGaugeField; typedef Lattice<SiteDoubledGaugeField> DoubledGaugeField;
typedef Lattice<SitePropagator> PropagatorField; typedef Lattice<SitePropagator> PropagatorField;
@ -772,6 +711,7 @@ class StaggeredImpl : public PeriodicGaugeImpl<GaugeImplTypes<S, Representation:
INHERIT_GIMPL_TYPES(Gimpl); INHERIT_GIMPL_TYPES(Gimpl);
template <typename vtype> using iImplScalar = iScalar<iScalar<iScalar<vtype> > >;
template <typename vtype> using iImplSpinor = iScalar<iScalar<iVector<vtype, Dimension> > >; template <typename vtype> using iImplSpinor = iScalar<iScalar<iVector<vtype, Dimension> > >;
template <typename vtype> using iImplHalfSpinor = iScalar<iScalar<iVector<vtype, Dimension> > >; template <typename vtype> using iImplHalfSpinor = iScalar<iScalar<iVector<vtype, Dimension> > >;
template <typename vtype> using iImplDoubledGaugeField = iVector<iScalar<iMatrix<vtype, Dimension> >, Nds>; template <typename vtype> using iImplDoubledGaugeField = iVector<iScalar<iMatrix<vtype, Dimension> >, Nds>;
@ -788,10 +728,12 @@ class StaggeredImpl : public PeriodicGaugeImpl<GaugeImplTypes<S, Representation:
typedef Lattice<SiteDoubledGaugeField> DoubledGaugeField; typedef Lattice<SiteDoubledGaugeField> DoubledGaugeField;
typedef Lattice<SitePropagator> PropagatorField; typedef Lattice<SitePropagator> PropagatorField;
typedef iImplScalar<Simd> SiteComplex;
typedef iImplSpinor<Simd> SiteSpinor; typedef iImplSpinor<Simd> SiteSpinor;
typedef iImplHalfSpinor<Simd> SiteHalfSpinor; typedef iImplHalfSpinor<Simd> SiteHalfSpinor;
typedef Lattice<SiteComplex> ComplexField;
typedef Lattice<SiteSpinor> FermionField; typedef Lattice<SiteSpinor> FermionField;
typedef SimpleCompressor<SiteSpinor> Compressor; typedef SimpleCompressor<SiteSpinor> Compressor;

View File

@ -230,7 +230,8 @@ void WilsonFermion<Impl>::DerivInternal(StencilImpl &st, DoubledGaugeField &U,
} }
template <class Impl> template <class Impl>
void WilsonFermion<Impl>::DhopDeriv(GaugeField &mat, const FermionField &U, const FermionField &V, int dag) { void WilsonFermion<Impl>::DhopDeriv(GaugeField &mat, const FermionField &U,
const FermionField &V, int dag) {
conformable(U._grid, _grid); conformable(U._grid, _grid);
conformable(U._grid, V._grid); conformable(U._grid, V._grid);
conformable(U._grid, mat._grid); conformable(U._grid, mat._grid);
@ -241,12 +242,12 @@ void WilsonFermion<Impl>::DhopDeriv(GaugeField &mat, const FermionField &U, cons
} }
template <class Impl> template <class Impl>
void WilsonFermion<Impl>::DhopDerivOE(GaugeField &mat, const FermionField &U, const FermionField &V, int dag) { void WilsonFermion<Impl>::DhopDerivOE(GaugeField &mat, const FermionField &U,
const FermionField &V, int dag) {
conformable(U._grid, _cbgrid); conformable(U._grid, _cbgrid);
conformable(U._grid, V._grid); conformable(U._grid, V._grid);
//conformable(U._grid, mat._grid); not general, leaving as a comment (Guido) conformable(U._grid, mat._grid);
// Motivation: look at the SchurDiff operator
assert(V.checkerboard == Even); assert(V.checkerboard == Even);
assert(U.checkerboard == Odd); assert(U.checkerboard == Odd);
mat.checkerboard = Odd; mat.checkerboard = Odd;
@ -255,10 +256,11 @@ void WilsonFermion<Impl>::DhopDerivOE(GaugeField &mat, const FermionField &U, co
} }
template <class Impl> template <class Impl>
void WilsonFermion<Impl>::DhopDerivEO(GaugeField &mat, const FermionField &U, const FermionField &V, int dag) { void WilsonFermion<Impl>::DhopDerivEO(GaugeField &mat, const FermionField &U,
const FermionField &V, int dag) {
conformable(U._grid, _cbgrid); conformable(U._grid, _cbgrid);
conformable(U._grid, V._grid); conformable(U._grid, V._grid);
//conformable(U._grid, mat._grid); conformable(U._grid, mat._grid);
assert(V.checkerboard == Odd); assert(V.checkerboard == Odd);
assert(U.checkerboard == Even); assert(U.checkerboard == Even);

View File

@ -11,7 +11,6 @@ Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk> Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local> Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk> Author: paboyle <paboyle@ph.ed.ac.uk>
Author: Guido Cossu <guido.cossu@ed.ac.uk>
This program is free software; you can redistribute it and/or modify This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@ -118,6 +117,7 @@ WilsonFermion5D<Impl>::WilsonFermion5D(GaugeField &_Umu,
// Allocate the required comms buffer // Allocate the required comms buffer
ImportGauge(_Umu); ImportGauge(_Umu);
// Build lists of exterior only nodes // Build lists of exterior only nodes
int LLs = FiveDimGrid._rdimensions[0]; int LLs = FiveDimGrid._rdimensions[0];
int vol4; int vol4;
@ -267,8 +267,6 @@ void WilsonFermion5D<Impl>::DerivInternal(StencilImpl & st,
DerivCommTime+=usecond(); DerivCommTime+=usecond();
Atilde=A; Atilde=A;
int LLs = B._grid->_rdimensions[0];
DerivComputeTime-=usecond(); DerivComputeTime-=usecond();
for (int mu = 0; mu < Nd; mu++) { for (int mu = 0; mu < Nd; mu++) {
@ -298,9 +296,6 @@ void WilsonFermion5D<Impl>::DerivInternal(StencilImpl & st,
//////////////////////////// ////////////////////////////
} }
} }
////////////////////////////
// spin trace outer product
////////////////////////////
DerivDhopComputeTime += usecond(); DerivDhopComputeTime += usecond();
Impl::InsertForce5D(mat, Btilde, Atilde, mu); Impl::InsertForce5D(mat, Btilde, Atilde, mu);
} }
@ -309,14 +304,13 @@ void WilsonFermion5D<Impl>::DerivInternal(StencilImpl & st,
template<class Impl> template<class Impl>
void WilsonFermion5D<Impl>::DhopDeriv(GaugeField &mat, void WilsonFermion5D<Impl>::DhopDeriv(GaugeField &mat,
const FermionField &A, const FermionField &A,
const FermionField &B, const FermionField &B,
int dag) int dag)
{ {
conformable(A._grid,FermionGrid()); conformable(A._grid,FermionGrid());
conformable(A._grid,B._grid); conformable(A._grid,B._grid);
conformable(GaugeGrid(),mat._grid);
//conformable(GaugeGrid(),mat._grid);// this is not general! leaving as a comment
mat.checkerboard = A.checkerboard; mat.checkerboard = A.checkerboard;
@ -325,11 +319,12 @@ void WilsonFermion5D<Impl>::DhopDeriv(GaugeField &mat,
template<class Impl> template<class Impl>
void WilsonFermion5D<Impl>::DhopDerivEO(GaugeField &mat, void WilsonFermion5D<Impl>::DhopDerivEO(GaugeField &mat,
const FermionField &A, const FermionField &A,
const FermionField &B, const FermionField &B,
int dag) int dag)
{ {
conformable(A._grid,FermionRedBlackGrid()); conformable(A._grid,FermionRedBlackGrid());
conformable(GaugeRedBlackGrid(),mat._grid);
conformable(A._grid,B._grid); conformable(A._grid,B._grid);
assert(B.checkerboard==Odd); assert(B.checkerboard==Odd);
@ -342,11 +337,12 @@ void WilsonFermion5D<Impl>::DhopDerivEO(GaugeField &mat,
template<class Impl> template<class Impl>
void WilsonFermion5D<Impl>::DhopDerivOE(GaugeField &mat, void WilsonFermion5D<Impl>::DhopDerivOE(GaugeField &mat,
const FermionField &A, const FermionField &A,
const FermionField &B, const FermionField &B,
int dag) int dag)
{ {
conformable(A._grid,FermionRedBlackGrid()); conformable(A._grid,FermionRedBlackGrid());
conformable(GaugeRedBlackGrid(),mat._grid);
conformable(A._grid,B._grid); conformable(A._grid,B._grid);
assert(B.checkerboard==Even); assert(B.checkerboard==Even);
@ -358,8 +354,8 @@ void WilsonFermion5D<Impl>::DhopDerivOE(GaugeField &mat,
template<class Impl> template<class Impl>
void WilsonFermion5D<Impl>::DhopInternal(StencilImpl & st, LebesgueOrder &lo, void WilsonFermion5D<Impl>::DhopInternal(StencilImpl & st, LebesgueOrder &lo,
DoubledGaugeField & U, DoubledGaugeField & U,
const FermionField &in, FermionField &out,int dag) const FermionField &in, FermionField &out,int dag)
{ {
DhopTotalTime-=usecond(); DhopTotalTime-=usecond();
#ifdef GRID_OMP #ifdef GRID_OMP
@ -392,12 +388,8 @@ void WilsonFermion5D<Impl>::DhopInternalOverlappedComms(StencilImpl & st, Lebesg
std::vector<std::vector<CommsRequest_t> > reqs; std::vector<std::vector<CommsRequest_t> > reqs;
// Rely on async comms; start comms before merge of local data // Rely on async comms; start comms before merge of local data
DhopCommTime-=usecond();
st.CommunicateBegin(reqs); st.CommunicateBegin(reqs);
DhopFaceTime-=usecond();
st.CommsMergeSHM(compressor); st.CommsMergeSHM(compressor);
DhopFaceTime+=usecond();
// Perhaps use omp task and region // Perhaps use omp task and region
#pragma omp parallel #pragma omp parallel
@ -410,6 +402,7 @@ void WilsonFermion5D<Impl>::DhopInternalOverlappedComms(StencilImpl & st, Lebesg
int sF = LLs * myoff; int sF = LLs * myoff;
if ( me == 0 ) { if ( me == 0 ) {
DhopCommTime-=usecond();
st.CommunicateComplete(reqs); st.CommunicateComplete(reqs);
DhopCommTime+=usecond(); DhopCommTime+=usecond();
} else { } else {

View File

@ -29,7 +29,7 @@ directory
#ifndef GRID_QCD_GAUGE_H #ifndef GRID_QCD_GAUGE_H
#define GRID_QCD_GAUGE_H #define GRID_QCD_GAUGE_H
#include <Grid/qcd/action/gauge/GaugeImplementations.h> #include <Grid/qcd/action/gauge/GaugeImpl.h>
#include <Grid/qcd/utils/WilsonLoops.h> #include <Grid/qcd/utils/WilsonLoops.h>
#include <Grid/qcd/action/gauge/WilsonGaugeAction.h> #include <Grid/qcd/action/gauge/WilsonGaugeAction.h>
#include <Grid/qcd/action/gauge/PlaqPlusRectangleAction.h> #include <Grid/qcd/action/gauge/PlaqPlusRectangleAction.h>

View File

@ -2,7 +2,7 @@
Grid physics library, www.github.com/paboyle/Grid Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/gauge/GaugeImplementations.h Source file: ./lib/qcd/action/gauge/GaugeImpl.h
Copyright (C) 2015 Copyright (C) 2015
@ -26,14 +26,53 @@ See the full license in the file "LICENSE" in the top level distribution
directory directory
*************************************************************************************/ *************************************************************************************/
/* END LEGAL */ /* END LEGAL */
#ifndef GRID_QCD_GAUGE_IMPLEMENTATIONS_H #ifndef GRID_QCD_GAUGE_IMPL_H
#define GRID_QCD_GAUGE_IMPLEMENTATIONS_H #define GRID_QCD_GAUGE_IMPL_H
#include "GaugeImplTypes.h"
namespace Grid { namespace Grid {
namespace QCD { namespace QCD {
////////////////////////////////////////////////////////////////////////
// Implementation dependent gauge types
////////////////////////////////////////////////////////////////////////
template <class Gimpl> class WilsonLoops;
#define INHERIT_GIMPL_TYPES(GImpl) \
typedef typename GImpl::Simd Simd; \
typedef typename GImpl::GaugeLinkField GaugeLinkField; \
typedef typename GImpl::GaugeField GaugeField; \
typedef typename GImpl::SiteGaugeField SiteGaugeField; \
typedef typename GImpl::SiteGaugeLink SiteGaugeLink;
//
template <class S, int Nrepresentation = Nc> class GaugeImplTypes {
public:
typedef S Simd;
template <typename vtype>
using iImplGaugeLink = iScalar<iScalar<iMatrix<vtype, Nrepresentation>>>;
template <typename vtype>
using iImplGaugeField = iVector<iScalar<iMatrix<vtype, Nrepresentation>>, Nd>;
typedef iImplGaugeLink<Simd> SiteGaugeLink;
typedef iImplGaugeField<Simd> SiteGaugeField;
typedef Lattice<SiteGaugeLink> GaugeLinkField; // bit ugly naming; polarised
// gauge field, lorentz... all
// ugly
typedef Lattice<SiteGaugeField> GaugeField;
// Move this elsewhere? FIXME
static inline void AddGaugeLink(GaugeField &U, GaugeLinkField &W,
int mu) { // U[mu] += W
parallel_for (auto ss = 0; ss < U._grid->oSites(); ss++) {
U._odata[ss]._internal[mu] =
U._odata[ss]._internal[mu] + W._odata[ss]._internal;
}
}
};
// Composition with smeared link, bc's etc.. probably need multiple inheritance // Composition with smeared link, bc's etc.. probably need multiple inheritance
// Variable precision "S" and variable Nc // Variable precision "S" and variable Nc
template <class GimplTypes> class PeriodicGaugeImpl : public GimplTypes { template <class GimplTypes> class PeriodicGaugeImpl : public GimplTypes {
@ -129,6 +168,14 @@ public:
static inline bool isPeriodicGaugeField(void) { return false; } static inline bool isPeriodicGaugeField(void) { return false; }
}; };
typedef GaugeImplTypes<vComplex, Nc> GimplTypesR;
typedef GaugeImplTypes<vComplexF, Nc> GimplTypesF;
typedef GaugeImplTypes<vComplexD, Nc> GimplTypesD;
typedef GaugeImplTypes<vComplex, SU<Nc>::AdjointDimension> GimplAdjointTypesR;
typedef GaugeImplTypes<vComplexF, SU<Nc>::AdjointDimension> GimplAdjointTypesF;
typedef GaugeImplTypes<vComplexD, SU<Nc>::AdjointDimension> GimplAdjointTypesD;
typedef PeriodicGaugeImpl<GimplTypesR> PeriodicGimplR; // Real.. whichever prec typedef PeriodicGaugeImpl<GimplTypesR> PeriodicGimplR; // Real.. whichever prec
typedef PeriodicGaugeImpl<GimplTypesF> PeriodicGimplF; // Float typedef PeriodicGaugeImpl<GimplTypesF> PeriodicGimplF; // Float
typedef PeriodicGaugeImpl<GimplTypesD> PeriodicGimplD; // Double typedef PeriodicGaugeImpl<GimplTypesD> PeriodicGimplD; // Double
@ -140,8 +187,6 @@ typedef PeriodicGaugeImpl<GimplAdjointTypesD> PeriodicGimplAdjD; // Double
typedef ConjugateGaugeImpl<GimplTypesR> ConjugateGimplR; // Real.. whichever prec typedef ConjugateGaugeImpl<GimplTypesR> ConjugateGimplR; // Real.. whichever prec
typedef ConjugateGaugeImpl<GimplTypesF> ConjugateGimplF; // Float typedef ConjugateGaugeImpl<GimplTypesF> ConjugateGimplF; // Float
typedef ConjugateGaugeImpl<GimplTypesD> ConjugateGimplD; // Double typedef ConjugateGaugeImpl<GimplTypesD> ConjugateGimplD; // Double
} }
} }

View File

@ -1,153 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/gauge/GaugeImpl.h
Copyright (C) 2015
Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_GAUGE_IMPL_TYPES_H
#define GRID_GAUGE_IMPL_TYPES_H
namespace Grid {
namespace QCD {
////////////////////////////////////////////////////////////////////////
// Implementation dependent gauge types
////////////////////////////////////////////////////////////////////////
#define INHERIT_GIMPL_TYPES(GImpl) \
typedef typename GImpl::Simd Simd; \
typedef typename GImpl::LinkField GaugeLinkField; \
typedef typename GImpl::Field GaugeField; \
typedef typename GImpl::ComplexField ComplexField;\
typedef typename GImpl::SiteField SiteGaugeField; \
typedef typename GImpl::SiteComplex SiteComplex; \
typedef typename GImpl::SiteLink SiteGaugeLink;
#define INHERIT_FIELD_TYPES(Impl) \
typedef typename Impl::Simd Simd; \
typedef typename Impl::ComplexField ComplexField; \
typedef typename Impl::SiteField SiteField; \
typedef typename Impl::Field Field;
// hardcodes the exponential approximation in the template
template <class S, int Nrepresentation = Nc, int Nexp = 12 > class GaugeImplTypes {
public:
typedef S Simd;
template <typename vtype> using iImplScalar = iScalar<iScalar<iScalar<vtype> > >;
template <typename vtype> using iImplGaugeLink = iScalar<iScalar<iMatrix<vtype, Nrepresentation> > >;
template <typename vtype> using iImplGaugeField = iVector<iScalar<iMatrix<vtype, Nrepresentation> >, Nd>;
typedef iImplScalar<Simd> SiteComplex;
typedef iImplGaugeLink<Simd> SiteLink;
typedef iImplGaugeField<Simd> SiteField;
typedef Lattice<SiteComplex> ComplexField;
typedef Lattice<SiteLink> LinkField;
typedef Lattice<SiteField> Field;
// Guido: we can probably separate the types from the HMC functions
// this will create 2 kind of implementations
// probably confusing the users
// Now keeping only one class
// Move this elsewhere? FIXME
static inline void AddLink(Field &U, LinkField &W,
int mu) { // U[mu] += W
PARALLEL_FOR_LOOP
for (auto ss = 0; ss < U._grid->oSites(); ss++) {
U._odata[ss]._internal[mu] =
U._odata[ss]._internal[mu] + W._odata[ss]._internal;
}
}
///////////////////////////////////////////////////////////
// Move these to another class
// HMC auxiliary functions
static inline void generate_momenta(Field &P, GridParallelRNG &pRNG) {
// specific for SU gauge fields
LinkField Pmu(P._grid);
Pmu = zero;
for (int mu = 0; mu < Nd; mu++) {
SU<Nrepresentation>::GaussianFundamentalLieAlgebraMatrix(pRNG, Pmu);
PokeIndex<LorentzIndex>(P, Pmu, mu);
}
}
static inline Field projectForce(Field &P) { return Ta(P); }
static inline void update_field(Field& P, Field& U, double ep){
//static std::chrono::duration<double> diff;
//auto start = std::chrono::high_resolution_clock::now();
parallel_for(int ss=0;ss<P._grid->oSites();ss++){
for (int mu = 0; mu < Nd; mu++)
U[ss]._internal[mu] = ProjectOnGroup(Exponentiate(P[ss]._internal[mu], ep, Nexp) * U[ss]._internal[mu]);
}
//auto end = std::chrono::high_resolution_clock::now();
// diff += end - start;
// std::cout << "Time to exponentiate matrix " << diff.count() << " s\n";
}
static inline RealD FieldSquareNorm(Field& U){
LatticeComplex Hloc(U._grid);
Hloc = zero;
for (int mu = 0; mu < Nd; mu++) {
auto Umu = PeekIndex<LorentzIndex>(U, mu);
Hloc += trace(Umu * Umu);
}
Complex Hsum = sum(Hloc);
return Hsum.real();
}
static inline void HotConfiguration(GridParallelRNG &pRNG, Field &U) {
SU<Nc>::HotConfiguration(pRNG, U);
}
static inline void TepidConfiguration(GridParallelRNG &pRNG, Field &U) {
SU<Nc>::TepidConfiguration(pRNG, U);
}
static inline void ColdConfiguration(GridParallelRNG &pRNG, Field &U) {
SU<Nc>::ColdConfiguration(pRNG, U);
}
};
typedef GaugeImplTypes<vComplex, Nc> GimplTypesR;
typedef GaugeImplTypes<vComplexF, Nc> GimplTypesF;
typedef GaugeImplTypes<vComplexD, Nc> GimplTypesD;
typedef GaugeImplTypes<vComplex, SU<Nc>::AdjointDimension> GimplAdjointTypesR;
typedef GaugeImplTypes<vComplexF, SU<Nc>::AdjointDimension> GimplAdjointTypesF;
typedef GaugeImplTypes<vComplexD, SU<Nc>::AdjointDimension> GimplAdjointTypesD;
} // QCD
} // Grid
#endif // GRID_GAUGE_IMPL_TYPES_H

View File

@ -47,19 +47,9 @@ namespace Grid{
public: public:
PlaqPlusRectangleAction(RealD b,RealD c): c_plaq(b),c_rect(c){}; PlaqPlusRectangleAction(RealD b,RealD c): c_plaq(b),c_rect(c){};
virtual std::string action_name(){return "PlaqPlusRectangleAction";}
virtual void refresh(const GaugeField &U, GridParallelRNG& pRNG) {}; // noop as no pseudoferms virtual void refresh(const GaugeField &U, GridParallelRNG& pRNG) {}; // noop as no pseudoferms
virtual std::string LogParameters(){
std::stringstream sstream;
sstream << GridLogMessage << "["<<action_name() <<"] c_plaq: " << c_plaq << std::endl;
sstream << GridLogMessage << "["<<action_name() <<"] c_rect: " << c_rect << std::endl;
return sstream.str();
}
virtual RealD S(const GaugeField &U) { virtual RealD S(const GaugeField &U) {
RealD vol = U._grid->gSites(); RealD vol = U._grid->gSites();
@ -118,32 +108,32 @@ namespace Grid{
class RBCGaugeAction : public PlaqPlusRectangleAction<Gimpl> { class RBCGaugeAction : public PlaqPlusRectangleAction<Gimpl> {
public: public:
INHERIT_GIMPL_TYPES(Gimpl); INHERIT_GIMPL_TYPES(Gimpl);
RBCGaugeAction(RealD beta,RealD c1) : PlaqPlusRectangleAction<Gimpl>(beta*(1.0-8.0*c1), beta*c1) {}; RBCGaugeAction(RealD beta,RealD c1) : PlaqPlusRectangleAction<Gimpl>(beta*(1.0-8.0*c1), beta*c1) {
virtual std::string action_name(){return "RBCGaugeAction";} };
}; };
template<class Gimpl> template<class Gimpl>
class IwasakiGaugeAction : public RBCGaugeAction<Gimpl> { class IwasakiGaugeAction : public RBCGaugeAction<Gimpl> {
public: public:
INHERIT_GIMPL_TYPES(Gimpl); INHERIT_GIMPL_TYPES(Gimpl);
IwasakiGaugeAction(RealD beta) : RBCGaugeAction<Gimpl>(beta,-0.331) {}; IwasakiGaugeAction(RealD beta) : RBCGaugeAction<Gimpl>(beta,-0.331) {
virtual std::string action_name(){return "IwasakiGaugeAction";} };
}; };
template<class Gimpl> template<class Gimpl>
class SymanzikGaugeAction : public RBCGaugeAction<Gimpl> { class SymanzikGaugeAction : public RBCGaugeAction<Gimpl> {
public: public:
INHERIT_GIMPL_TYPES(Gimpl); INHERIT_GIMPL_TYPES(Gimpl);
SymanzikGaugeAction(RealD beta) : RBCGaugeAction<Gimpl>(beta,-1.0/12.0) {}; SymanzikGaugeAction(RealD beta) : RBCGaugeAction<Gimpl>(beta,-1.0/12.0) {
virtual std::string action_name(){return "SymanzikGaugeAction";} };
}; };
template<class Gimpl> template<class Gimpl>
class DBW2GaugeAction : public RBCGaugeAction<Gimpl> { class DBW2GaugeAction : public RBCGaugeAction<Gimpl> {
public: public:
INHERIT_GIMPL_TYPES(Gimpl); INHERIT_GIMPL_TYPES(Gimpl);
DBW2GaugeAction(RealD beta) : RBCGaugeAction<Gimpl>(beta,-1.4067) {}; DBW2GaugeAction(RealD beta) : RBCGaugeAction<Gimpl>(beta,-1.4067) {
virtual std::string action_name(){return "DBW2GaugeAction";} };
}; };
} }

View File

@ -1,99 +1,86 @@
/************************************************************************************* /*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/gauge/WilsonGaugeAction.h Source file: ./lib/qcd/action/gauge/WilsonGaugeAction.h
Copyright (C) 2015 Copyright (C) 2015
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk> Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk> Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: neo <cossu@post.kek.jp> Author: neo <cossu@post.kek.jp>
Author: paboyle <paboyle@ph.ed.ac.uk> Author: paboyle <paboyle@ph.ed.ac.uk>
Author: Guido Cossu <guido.cossu@ed.ac.uk>
This program is free software; you can redistribute it and/or modify This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or the Free Software Foundation; either version 2 of the License, or
(at your option) any later version. (at your option) any later version.
This program is distributed in the hope that it will be useful, This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details. GNU General Public License for more details.
You should have received a copy of the GNU General Public License along You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc., with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution See the full license in the file "LICENSE" in the top level distribution directory
directory *************************************************************************************/
*************************************************************************************/ /* END LEGAL */
/* END LEGAL */
#ifndef QCD_WILSON_GAUGE_ACTION_H #ifndef QCD_WILSON_GAUGE_ACTION_H
#define QCD_WILSON_GAUGE_ACTION_H #define QCD_WILSON_GAUGE_ACTION_H
namespace Grid { namespace Grid{
namespace QCD { namespace QCD{
////////////////////////////////////////////////////////////////////////
// Wilson Gauge Action .. should I template the Nc etc..
////////////////////////////////////////////////////////////////////////
template<class Gimpl>
class WilsonGaugeAction : public Action<typename Gimpl::GaugeField> {
public:
//////////////////////////////////////////////////////////////////////// INHERIT_GIMPL_TYPES(Gimpl);
// Wilson Gauge Action .. should I template the Nc etc..
////////////////////////////////////////////////////////////////////////
template <class Gimpl>
class WilsonGaugeAction : public Action<typename Gimpl::GaugeField> {
public:
INHERIT_GIMPL_TYPES(Gimpl);
/////////////////////////// constructors // typedef LorentzScalar<GaugeField> GaugeLinkField;
explicit WilsonGaugeAction(RealD beta_):beta(beta_){};
virtual std::string action_name() {return "WilsonGaugeAction";} private:
RealD beta;
public:
WilsonGaugeAction(RealD b):beta(b){};
virtual void refresh(const GaugeField &U, GridParallelRNG& pRNG) {}; // noop as no pseudoferms
virtual RealD S(const GaugeField &U) {
RealD plaq = WilsonLoops<Gimpl>::avgPlaquette(U);
RealD vol = U._grid->gSites();
RealD action=beta*(1.0 -plaq)*(Nd*(Nd-1.0))*vol*0.5;
return action;
};
virtual std::string LogParameters(){ virtual void deriv(const GaugeField &U,GaugeField & dSdU) {
std::stringstream sstream; //not optimal implementation FIXME
sstream << GridLogMessage << "[WilsonGaugeAction] Beta: " << beta << std::endl; //extend Ta to include Lorentz indexes
return sstream.str();
//RealD factor = 0.5*beta/RealD(Nc);
RealD factor = 0.5*beta/RealD(Nc);
GaugeLinkField Umu(U._grid);
GaugeLinkField dSdU_mu(U._grid);
for (int mu=0; mu < Nd; mu++){
Umu = PeekIndex<LorentzIndex>(U,mu);
// Staple in direction mu
WilsonLoops<Gimpl>::Staple(dSdU_mu,U,mu);
dSdU_mu = Ta(Umu*dSdU_mu)*factor;
PokeIndex<LorentzIndex>(dSdU, dSdU_mu, mu);
}
};
};
} }
virtual void refresh(const GaugeField &U,
GridParallelRNG &pRNG){}; // noop as no pseudoferms
virtual RealD S(const GaugeField &U) {
RealD plaq = WilsonLoops<Gimpl>::avgPlaquette(U);
RealD vol = U._grid->gSites();
RealD action = beta * (1.0 - plaq) * (Nd * (Nd - 1.0)) * vol * 0.5;
return action;
};
virtual void deriv(const GaugeField &U, GaugeField &dSdU) {
// not optimal implementation FIXME
// extend Ta to include Lorentz indexes
RealD factor = 0.5 * beta / RealD(Nc);
//GaugeLinkField Umu(U._grid);
GaugeLinkField dSdU_mu(U._grid);
for (int mu = 0; mu < Nd; mu++) {
//Umu = PeekIndex<LorentzIndex>(U, mu);
// Staple in direction mu
//WilsonLoops<Gimpl>::Staple(dSdU_mu, U, mu);
//dSdU_mu = Ta(Umu * dSdU_mu) * factor;
WilsonLoops<Gimpl>::StapleMult(dSdU_mu, U, mu);
dSdU_mu = Ta(dSdU_mu) * factor;
PokeIndex<LorentzIndex>(dSdU, dSdU_mu, mu);
}
}
private:
RealD beta;
};
}
} }
#endif #endif

View File

@ -7,7 +7,6 @@
Copyright (C) 2015 Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk> Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Guido Cossu <guido.cossu@ed.ac.uk>
This program is free software; you can redistribute it and/or modify This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@ -46,97 +45,92 @@ namespace Grid{
public: public:
INHERIT_IMPL_TYPES(Impl); INHERIT_IMPL_TYPES(Impl);
typedef FermionOperator<Impl> Matrix; typedef FermionOperator<Impl> Matrix;
SchurDifferentiableOperator (Matrix &Mat) : SchurDiagMooeeOperator<Matrix,FermionField>(Mat) {}; SchurDifferentiableOperator (Matrix &Mat) : SchurDiagMooeeOperator<Matrix,FermionField>(Mat) {};
void MpcDeriv(GaugeField &Force,const FermionField &U,const FermionField &V) { void MpcDeriv(GaugeField &Force,const FermionField &U,const FermionField &V) {
GridBase *fgrid = this->_Mat.FermionGrid(); GridBase *fgrid = this->_Mat.FermionGrid();
GridBase *fcbgrid = this->_Mat.FermionRedBlackGrid(); GridBase *fcbgrid = this->_Mat.FermionRedBlackGrid();
GridBase *ugrid = this->_Mat.GaugeGrid();
GridBase *ucbgrid = this->_Mat.GaugeRedBlackGrid();
FermionField tmp1(fcbgrid); Real coeff = 1.0;
FermionField tmp2(fcbgrid);
conformable(fcbgrid,U._grid); FermionField tmp1(fcbgrid);
conformable(fcbgrid,V._grid); FermionField tmp2(fcbgrid);
// Assert the checkerboard?? or code for either conformable(fcbgrid,U._grid);
assert(U.checkerboard==Odd); conformable(fcbgrid,V._grid);
assert(V.checkerboard==U.checkerboard);
// NOTE Guido: WE DO NOT WANT TO USE THE ucbgrid GRID FOR THE FORCE // Assert the checkerboard?? or code for either
// it is not conformable with the HMC force field assert(U.checkerboard==Odd);
// Case: Ls vectorised fields assert(V.checkerboard==U.checkerboard);
// INHERIT FROM THE Force field instead
GridRedBlackCartesian* forcecb = new GridRedBlackCartesian(Force._grid);
GaugeField ForceO(forcecb);
GaugeField ForceE(forcecb);
GaugeField ForceO(ucbgrid);
GaugeField ForceE(ucbgrid);
// X^dag Der_oe MeeInv Meo Y // X^dag Der_oe MeeInv Meo Y
// Use Mooee as nontrivial but gauge field indept // Use Mooee as nontrivial but gauge field indept
this->_Mat.Meooe (V,tmp1); // odd->even -- implicit -0.5 factor to be applied this->_Mat.Meooe (V,tmp1); // odd->even -- implicit -0.5 factor to be applied
this->_Mat.MooeeInv(tmp1,tmp2); // even->even this->_Mat.MooeeInv(tmp1,tmp2); // even->even
this->_Mat.MoeDeriv(ForceO,U,tmp2,DaggerNo); this->_Mat.MoeDeriv(ForceO,U,tmp2,DaggerNo);
// Accumulate X^dag M_oe MeeInv Der_eo Y
this->_Mat.MeooeDag (U,tmp1); // even->odd -- implicit -0.5 factor to be applied // Accumulate X^dag M_oe MeeInv Der_eo Y
this->_Mat.MooeeInvDag(tmp1,tmp2); // even->even this->_Mat.MeooeDag (U,tmp1); // even->odd -- implicit -0.5 factor to be applied
this->_Mat.MeoDeriv(ForceE,tmp2,V,DaggerNo); this->_Mat.MooeeInvDag(tmp1,tmp2); // even->even
this->_Mat.MeoDeriv(ForceE,tmp2,V,DaggerNo);
assert(ForceE.checkerboard==Even);
assert(ForceO.checkerboard==Odd); assert(ForceE.checkerboard==Even);
assert(ForceO.checkerboard==Odd);
setCheckerboard(Force,ForceE); setCheckerboard(Force,ForceE);
setCheckerboard(Force,ForceO); setCheckerboard(Force,ForceO);
Force=-Force; Force=-Force;
}
delete forcecb;
}
void MpcDagDeriv(GaugeField &Force,const FermionField &U,const FermionField &V) { void MpcDagDeriv(GaugeField &Force,const FermionField &U,const FermionField &V) {
GridBase *fgrid = this->_Mat.FermionGrid(); GridBase *fgrid = this->_Mat.FermionGrid();
GridBase *fcbgrid = this->_Mat.FermionRedBlackGrid(); GridBase *fcbgrid = this->_Mat.FermionRedBlackGrid();
GridBase *ugrid = this->_Mat.GaugeGrid();
GridBase *ucbgrid = this->_Mat.GaugeRedBlackGrid();
FermionField tmp1(fcbgrid); Real coeff = 1.0;
FermionField tmp2(fcbgrid);
conformable(fcbgrid,U._grid); FermionField tmp1(fcbgrid);
conformable(fcbgrid,V._grid); FermionField tmp2(fcbgrid);
// Assert the checkerboard?? or code for either conformable(fcbgrid,U._grid);
assert(V.checkerboard==Odd); conformable(fcbgrid,V._grid);
assert(V.checkerboard==V.checkerboard);
// NOTE Guido: WE DO NOT WANT TO USE THE ucbgrid GRID FOR THE FORCE // Assert the checkerboard?? or code for either
// it is not conformable with the HMC force field assert(V.checkerboard==Odd);
// INHERIT FROM THE Force field instead assert(V.checkerboard==V.checkerboard);
GridRedBlackCartesian* forcecb = new GridRedBlackCartesian(Force._grid);
GaugeField ForceO(forcecb);
GaugeField ForceE(forcecb);
// X^dag Der_oe MeeInv Meo Y GaugeField ForceO(ucbgrid);
// Use Mooee as nontrivial but gauge field indept GaugeField ForceE(ucbgrid);
this->_Mat.MeooeDag (V,tmp1); // odd->even -- implicit -0.5 factor to be applied
this->_Mat.MooeeInvDag(tmp1,tmp2); // even->even
this->_Mat.MoeDeriv(ForceO,U,tmp2,DaggerYes);
// Accumulate X^dag M_oe MeeInv Der_eo Y
this->_Mat.Meooe (U,tmp1); // even->odd -- implicit -0.5 factor to be applied
this->_Mat.MooeeInv(tmp1,tmp2); // even->even
this->_Mat.MeoDeriv(ForceE,tmp2,V,DaggerYes);
assert(ForceE.checkerboard==Even); // X^dag Der_oe MeeInv Meo Y
assert(ForceO.checkerboard==Odd); // Use Mooee as nontrivial but gauge field indept
this->_Mat.MeooeDag (V,tmp1); // odd->even -- implicit -0.5 factor to be applied
this->_Mat.MooeeInvDag(tmp1,tmp2); // even->even
this->_Mat.MoeDeriv(ForceO,U,tmp2,DaggerYes);
// Accumulate X^dag M_oe MeeInv Der_eo Y
this->_Mat.Meooe (U,tmp1); // even->odd -- implicit -0.5 factor to be applied
this->_Mat.MooeeInv(tmp1,tmp2); // even->even
this->_Mat.MeoDeriv(ForceE,tmp2,V,DaggerYes);
setCheckerboard(Force,ForceE); assert(ForceE.checkerboard==Even);
setCheckerboard(Force,ForceO); assert(ForceO.checkerboard==Odd);
Force=-Force;
delete forcecb; setCheckerboard(Force,ForceE);
} setCheckerboard(Force,ForceO);
Force=-Force;
}
}; };

View File

@ -1,4 +1,3 @@
/************************************************************************************* /*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid Grid physics library, www.github.com/paboyle/Grid
@ -91,19 +90,6 @@ class OneFlavourEvenOddRationalPseudoFermionAction
PowerNegQuarter.Init(remez, param.tolerance, true); PowerNegQuarter.Init(remez, param.tolerance, true);
}; };
virtual std::string action_name(){return "OneFlavourEvenOddRationalPseudoFermionAction";}
virtual std::string LogParameters(){
std::stringstream sstream;
sstream << GridLogMessage << "["<<action_name()<<"] Low :" << param.lo << std::endl;
sstream << GridLogMessage << "["<<action_name()<<"] High :" << param.hi << std::endl;
sstream << GridLogMessage << "["<<action_name()<<"] Max iterations :" << param.MaxIter << std::endl;
sstream << GridLogMessage << "["<<action_name()<<"] Tolerance :" << param.tolerance << std::endl;
sstream << GridLogMessage << "["<<action_name()<<"] Degree :" << param.degree << std::endl;
sstream << GridLogMessage << "["<<action_name()<<"] Precision :" << param.precision << std::endl;
return sstream.str();
}
virtual void refresh(const GaugeField &U, GridParallelRNG &pRNG) { virtual void refresh(const GaugeField &U, GridParallelRNG &pRNG) {
// P(phi) = e^{- phi^dag (MpcdagMpc)^-1/2 phi} // P(phi) = e^{- phi^dag (MpcdagMpc)^-1/2 phi}
// = e^{- phi^dag (MpcdagMpc)^-1/4 (MpcdagMpc)^-1/4 phi} // = e^{- phi^dag (MpcdagMpc)^-1/4 (MpcdagMpc)^-1/4 phi}

View File

@ -87,20 +87,6 @@ namespace Grid{
PowerQuarter.Init(remez,param.tolerance,false); PowerQuarter.Init(remez,param.tolerance,false);
PowerNegQuarter.Init(remez,param.tolerance,true); PowerNegQuarter.Init(remez,param.tolerance,true);
}; };
virtual std::string action_name(){return "OneFlavourEvenOddRatioRationalPseudoFermionAction";}
virtual std::string LogParameters(){
std::stringstream sstream;
sstream << GridLogMessage << "["<<action_name()<<"] Low :" << param.lo << std::endl;
sstream << GridLogMessage << "["<<action_name()<<"] High :" << param.hi << std::endl;
sstream << GridLogMessage << "["<<action_name()<<"] Max iterations :" << param.MaxIter << std::endl;
sstream << GridLogMessage << "["<<action_name()<<"] Tolerance :" << param.tolerance << std::endl;
sstream << GridLogMessage << "["<<action_name()<<"] Degree :" << param.degree << std::endl;
sstream << GridLogMessage << "["<<action_name()<<"] Precision :" << param.precision << std::endl;
return sstream.str();
}
virtual void refresh(const GaugeField &U, GridParallelRNG& pRNG) { virtual void refresh(const GaugeField &U, GridParallelRNG& pRNG) {

View File

@ -83,25 +83,9 @@ namespace Grid{
PowerQuarter.Init(remez,param.tolerance,false); PowerQuarter.Init(remez,param.tolerance,false);
PowerNegQuarter.Init(remez,param.tolerance,true); PowerNegQuarter.Init(remez,param.tolerance,true);
}; };
virtual std::string action_name(){return "OneFlavourRationalPseudoFermionAction";}
virtual std::string LogParameters(){
std::stringstream sstream;
sstream << GridLogMessage << "["<<action_name()<<"] Low :" << param.lo << std::endl;
sstream << GridLogMessage << "["<<action_name()<<"] High :" << param.hi << std::endl;
sstream << GridLogMessage << "["<<action_name()<<"] Max iterations :" << param.MaxIter << std::endl;
sstream << GridLogMessage << "["<<action_name()<<"] Tolerance :" << param.tolerance << std::endl;
sstream << GridLogMessage << "["<<action_name()<<"] Degree :" << param.degree << std::endl;
sstream << GridLogMessage << "["<<action_name()<<"] Precision :" << param.precision << std::endl;
return sstream.str();
}
virtual void refresh(const GaugeField &U, GridParallelRNG& pRNG) { virtual void refresh(const GaugeField &U, GridParallelRNG& pRNG) {
// P(phi) = e^{- phi^dag (MdagM)^-1/2 phi} // P(phi) = e^{- phi^dag (MdagM)^-1/2 phi}
// = e^{- phi^dag (MdagM)^-1/4 (MdagM)^-1/4 phi} // = e^{- phi^dag (MdagM)^-1/4 (MdagM)^-1/4 phi}
// Phi = Mdag^{1/4} eta // Phi = Mdag^{1/4} eta

View File

@ -81,21 +81,7 @@ namespace Grid{
PowerQuarter.Init(remez,param.tolerance,false); PowerQuarter.Init(remez,param.tolerance,false);
PowerNegQuarter.Init(remez,param.tolerance,true); PowerNegQuarter.Init(remez,param.tolerance,true);
}; };
virtual std::string action_name(){return "OneFlavourRatioRationalPseudoFermionAction";}
virtual std::string LogParameters(){
std::stringstream sstream;
sstream << GridLogMessage << "["<<action_name()<<"] Low :" << param.lo << std::endl;
sstream << GridLogMessage << "["<<action_name()<<"] High :" << param.hi << std::endl;
sstream << GridLogMessage << "["<<action_name()<<"] Max iterations :" << param.MaxIter << std::endl;
sstream << GridLogMessage << "["<<action_name()<<"] Tolerance :" << param.tolerance << std::endl;
sstream << GridLogMessage << "["<<action_name()<<"] Degree :" << param.degree << std::endl;
sstream << GridLogMessage << "["<<action_name()<<"] Precision :" << param.precision << std::endl;
return sstream.str();
}
virtual void refresh(const GaugeField &U, GridParallelRNG& pRNG) { virtual void refresh(const GaugeField &U, GridParallelRNG& pRNG) {
// S_f = chi^dag* P(V^dag*V)/Q(V^dag*V)* N(M^dag*M)/D(M^dag*M)* P(V^dag*V)/Q(V^dag*V)* chi // S_f = chi^dag* P(V^dag*V)/Q(V^dag*V)* N(M^dag*M)/D(M^dag*M)* P(V^dag*V)/Q(V^dag*V)* chi

View File

@ -62,15 +62,6 @@ class TwoFlavourPseudoFermionAction : public Action<typename Impl::GaugeField> {
ActionSolver(AS), ActionSolver(AS),
Phi(Op.FermionGrid()){}; Phi(Op.FermionGrid()){};
virtual std::string action_name(){return "TwoFlavourPseudoFermionAction";}
virtual std::string LogParameters(){
std::stringstream sstream;
sstream << GridLogMessage << "["<<action_name()<<"] has no parameters" << std::endl;
return sstream.str();
}
////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////
// Push the gauge field in to the dops. Assume any BC's and smearing already applied // Push the gauge field in to the dops. Assume any BC's and smearing already applied
////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////
@ -89,9 +80,7 @@ class TwoFlavourPseudoFermionAction : public Action<typename Impl::GaugeField> {
// in the Phi integral, and thus is only an irrelevant prefactor for // in the Phi integral, and thus is only an irrelevant prefactor for
// the partition function. // the partition function.
// //
RealD scale = std::sqrt(0.5); RealD scale = std::sqrt(0.5);
FermionField eta(FermOp.FermionGrid()); FermionField eta(FermOp.FermionGrid());
gaussian(pRNG, eta); gaussian(pRNG, eta);

View File

@ -31,89 +31,80 @@ directory
#define QCD_PSEUDOFERMION_TWO_FLAVOUR_EVEN_ODD_H #define QCD_PSEUDOFERMION_TWO_FLAVOUR_EVEN_ODD_H
namespace Grid { namespace Grid {
namespace QCD { namespace QCD {
//////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////
// Two flavour pseudofermion action for any EO prec dop // Two flavour pseudofermion action for any EO prec dop
//////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////
template <class Impl> template <class Impl>
class TwoFlavourEvenOddPseudoFermionAction class TwoFlavourEvenOddPseudoFermionAction
: public Action<typename Impl::GaugeField> { : public Action<typename Impl::GaugeField> {
public: public:
INHERIT_IMPL_TYPES(Impl); INHERIT_IMPL_TYPES(Impl);
private: private:
FermionOperator<Impl> &FermOp; // the basic operator FermionOperator<Impl> &FermOp; // the basic operator
OperatorFunction<FermionField> &DerivativeSolver; OperatorFunction<FermionField> &DerivativeSolver;
OperatorFunction<FermionField> &ActionSolver; OperatorFunction<FermionField> &ActionSolver;
FermionField PhiOdd; // the pseudo fermion field for this trajectory FermionField PhiOdd; // the pseudo fermion field for this trajectory
FermionField PhiEven; // the pseudo fermion field for this trajectory FermionField PhiEven; // the pseudo fermion field for this trajectory
public: public:
///////////////////////////////////////////////// /////////////////////////////////////////////////
// Pass in required objects. // Pass in required objects.
///////////////////////////////////////////////// /////////////////////////////////////////////////
TwoFlavourEvenOddPseudoFermionAction(FermionOperator<Impl> &Op, TwoFlavourEvenOddPseudoFermionAction(FermionOperator<Impl> &Op,
OperatorFunction<FermionField> &DS, OperatorFunction<FermionField> &DS,
OperatorFunction<FermionField> &AS) OperatorFunction<FermionField> &AS)
: FermOp(Op), : FermOp(Op),
DerivativeSolver(DS), DerivativeSolver(DS),
ActionSolver(AS), ActionSolver(AS),
PhiEven(Op.FermionRedBlackGrid()), PhiEven(Op.FermionRedBlackGrid()),
PhiOdd(Op.FermionRedBlackGrid()) PhiOdd(Op.FermionRedBlackGrid())
{}; {};
virtual std::string action_name(){return "TwoFlavourEvenOddPseudoFermionAction";}
virtual std::string LogParameters(){
std::stringstream sstream;
sstream << GridLogMessage << "["<<action_name()<<"] has no parameters" << std::endl;
return sstream.str();
}
////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////
// Push the gauge field in to the dops. Assume any BC's and smearing already applied // Push the gauge field in to the dops. Assume any BC's and smearing already applied
////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////
virtual void refresh(const GaugeField &U, GridParallelRNG& pRNG) { virtual void refresh(const GaugeField &U, GridParallelRNG& pRNG) {
// P(phi) = e^{- phi^dag (MpcdagMpc)^-1 phi} // P(phi) = e^{- phi^dag (MpcdagMpc)^-1 phi}
// Phi = McpDag eta // Phi = McpDag eta
// P(eta) = e^{- eta^dag eta} // P(eta) = e^{- eta^dag eta}
// //
// e^{x^2/2 sig^2} => sig^2 = 0.5. // e^{x^2/2 sig^2} => sig^2 = 0.5.
RealD scale = std::sqrt(0.5); RealD scale = std::sqrt(0.5);
FermionField eta (FermOp.FermionGrid()); FermionField eta (FermOp.FermionGrid());
FermionField etaOdd (FermOp.FermionRedBlackGrid()); FermionField etaOdd (FermOp.FermionRedBlackGrid());
FermionField etaEven(FermOp.FermionRedBlackGrid()); FermionField etaEven(FermOp.FermionRedBlackGrid());
gaussian(pRNG,eta); gaussian(pRNG,eta);
pickCheckerboard(Even,etaEven,eta); pickCheckerboard(Even,etaEven,eta);
pickCheckerboard(Odd,etaOdd,eta); pickCheckerboard(Odd,etaOdd,eta);
FermOp.ImportGauge(U); FermOp.ImportGauge(U);
SchurDifferentiableOperator<Impl> PCop(FermOp); SchurDifferentiableOperator<Impl> PCop(FermOp);
PCop.MpcDag(etaOdd,PhiOdd); PCop.MpcDag(etaOdd,PhiOdd);
FermOp.MooeeDag(etaEven,PhiEven); FermOp.MooeeDag(etaEven,PhiEven);
PhiOdd =PhiOdd*scale; PhiOdd =PhiOdd*scale;
PhiEven=PhiEven*scale; PhiEven=PhiEven*scale;
}; };
////////////////////////////////////////////////////// //////////////////////////////////////////////////////
// S = phi^dag (Mdag M)^-1 phi (odd) // S = phi^dag (Mdag M)^-1 phi (odd)
// + phi^dag (Mdag M)^-1 phi (even) // + phi^dag (Mdag M)^-1 phi (even)
////////////////////////////////////////////////////// //////////////////////////////////////////////////////
virtual RealD S(const GaugeField &U) { virtual RealD S(const GaugeField &U) {
FermOp.ImportGauge(U); FermOp.ImportGauge(U);
FermionField X(FermOp.FermionRedBlackGrid()); FermionField X(FermOp.FermionRedBlackGrid());
@ -144,6 +135,7 @@ namespace Grid {
// //
////////////////////////////////////////////////////// //////////////////////////////////////////////////////
virtual void deriv(const GaugeField &U,GaugeField & dSdU) { virtual void deriv(const GaugeField &U,GaugeField & dSdU) {
FermOp.ImportGauge(U); FermOp.ImportGauge(U);
FermionField X(FermOp.FermionRedBlackGrid()); FermionField X(FermOp.FermionRedBlackGrid());
@ -158,8 +150,8 @@ namespace Grid {
X=zero; X=zero;
DerivativeSolver(Mpc,PhiOdd,X); DerivativeSolver(Mpc,PhiOdd,X);
Mpc.Mpc(X,Y); Mpc.Mpc(X,Y);
Mpc.MpcDeriv(tmp , Y, X ); dSdU=tmp; Mpc.MpcDeriv(tmp , Y, X ); dSdU=tmp;
Mpc.MpcDagDeriv(tmp , X, Y); dSdU=dSdU+tmp; Mpc.MpcDagDeriv(tmp , X, Y); dSdU=dSdU+tmp;
// Treat the EE case. (MdagM)^-1 = Minv Minvdag // Treat the EE case. (MdagM)^-1 = Minv Minvdag
// Deriv defaults to zero. // Deriv defaults to zero.
@ -171,10 +163,10 @@ namespace Grid {
assert(FermOp.ConstEE() == 1); assert(FermOp.ConstEE() == 1);
/* /*
FermOp.MooeeInvDag(PhiOdd,Y); FermOp.MooeeInvDag(PhiOdd,Y);
FermOp.MooeeInv(Y,X); FermOp.MooeeInv(Y,X);
FermOp.MeeDeriv(tmp , Y, X,DaggerNo ); dSdU=tmp; FermOp.MeeDeriv(tmp , Y, X,DaggerNo ); dSdU=tmp;
FermOp.MeeDeriv(tmp , X, Y,DaggerYes); dSdU=dSdU+tmp; FermOp.MeeDeriv(tmp , X, Y,DaggerYes); dSdU=dSdU+tmp;
*/ */
//dSdU = Ta(dSdU); //dSdU = Ta(dSdU);

View File

@ -52,75 +52,66 @@ namespace Grid{
public: public:
TwoFlavourEvenOddRatioPseudoFermionAction(FermionOperator<Impl> &_NumOp, TwoFlavourEvenOddRatioPseudoFermionAction(FermionOperator<Impl> &_NumOp,
FermionOperator<Impl> &_DenOp, FermionOperator<Impl> &_DenOp,
OperatorFunction<FermionField> & DS, OperatorFunction<FermionField> & DS,
OperatorFunction<FermionField> & AS) : OperatorFunction<FermionField> & AS) :
NumOp(_NumOp), NumOp(_NumOp),
DenOp(_DenOp), DenOp(_DenOp),
DerivativeSolver(DS), DerivativeSolver(DS),
ActionSolver(AS), ActionSolver(AS),
PhiEven(_NumOp.FermionRedBlackGrid()), PhiEven(_NumOp.FermionRedBlackGrid()),
PhiOdd(_NumOp.FermionRedBlackGrid()) PhiOdd(_NumOp.FermionRedBlackGrid())
{ {
conformable(_NumOp.FermionGrid(), _DenOp.FermionGrid()); conformable(_NumOp.FermionGrid(), _DenOp.FermionGrid());
conformable(_NumOp.FermionRedBlackGrid(), _DenOp.FermionRedBlackGrid()); conformable(_NumOp.FermionRedBlackGrid(), _DenOp.FermionRedBlackGrid());
conformable(_NumOp.GaugeGrid(), _DenOp.GaugeGrid()); conformable(_NumOp.GaugeGrid(), _DenOp.GaugeGrid());
conformable(_NumOp.GaugeRedBlackGrid(), _DenOp.GaugeRedBlackGrid()); conformable(_NumOp.GaugeRedBlackGrid(), _DenOp.GaugeRedBlackGrid());
}; };
virtual std::string action_name(){return "TwoFlavourEvenOddRatioPseudoFermionAction";}
virtual std::string LogParameters(){
std::stringstream sstream;
sstream << GridLogMessage << "["<<action_name()<<"] has no parameters" << std::endl;
return sstream.str();
}
virtual void refresh(const GaugeField &U, GridParallelRNG& pRNG) { virtual void refresh(const GaugeField &U, GridParallelRNG& pRNG) {
// P(phi) = e^{- phi^dag Vpc (MpcdagMpc)^-1 Vpcdag phi} // P(phi) = e^{- phi^dag Vpc (MpcdagMpc)^-1 Vpcdag phi}
// //
// NumOp == V // NumOp == V
// DenOp == M // DenOp == M
// //
// Take phi_o = Vpcdag^{-1} Mpcdag eta_o ; eta_o = Mpcdag^{-1} Vpcdag Phi // Take phi_o = Vpcdag^{-1} Mpcdag eta_o ; eta_o = Mpcdag^{-1} Vpcdag Phi
// //
// P(eta_o) = e^{- eta_o^dag eta_o} // P(eta_o) = e^{- eta_o^dag eta_o}
// //
// e^{x^2/2 sig^2} => sig^2 = 0.5. // e^{x^2/2 sig^2} => sig^2 = 0.5.
// //
RealD scale = std::sqrt(0.5); RealD scale = std::sqrt(0.5);
FermionField eta (NumOp.FermionGrid()); FermionField eta (NumOp.FermionGrid());
FermionField etaOdd (NumOp.FermionRedBlackGrid()); FermionField etaOdd (NumOp.FermionRedBlackGrid());
FermionField etaEven(NumOp.FermionRedBlackGrid()); FermionField etaEven(NumOp.FermionRedBlackGrid());
FermionField tmp (NumOp.FermionRedBlackGrid()); FermionField tmp (NumOp.FermionRedBlackGrid());
gaussian(pRNG,eta); gaussian(pRNG,eta);
pickCheckerboard(Even,etaEven,eta); pickCheckerboard(Even,etaEven,eta);
pickCheckerboard(Odd,etaOdd,eta); pickCheckerboard(Odd,etaOdd,eta);
NumOp.ImportGauge(U); NumOp.ImportGauge(U);
DenOp.ImportGauge(U); DenOp.ImportGauge(U);
SchurDifferentiableOperator<Impl> Mpc(DenOp); SchurDifferentiableOperator<Impl> Mpc(DenOp);
SchurDifferentiableOperator<Impl> Vpc(NumOp); SchurDifferentiableOperator<Impl> Vpc(NumOp);
// Odd det factors // Odd det factors
Mpc.MpcDag(etaOdd,PhiOdd); Mpc.MpcDag(etaOdd,PhiOdd);
tmp=zero; tmp=zero;
ActionSolver(Vpc,PhiOdd,tmp); ActionSolver(Vpc,PhiOdd,tmp);
Vpc.Mpc(tmp,PhiOdd); Vpc.Mpc(tmp,PhiOdd);
// Even det factors // Even det factors
DenOp.MooeeDag(etaEven,tmp); DenOp.MooeeDag(etaEven,tmp);
NumOp.MooeeInvDag(tmp,PhiEven); NumOp.MooeeInvDag(tmp,PhiEven);
PhiOdd =PhiOdd*scale; PhiOdd =PhiOdd*scale;
PhiEven=PhiEven*scale; PhiEven=PhiEven*scale;
}; };
////////////////////////////////////////////////////// //////////////////////////////////////////////////////
@ -128,33 +119,33 @@ namespace Grid{
////////////////////////////////////////////////////// //////////////////////////////////////////////////////
virtual RealD S(const GaugeField &U) { virtual RealD S(const GaugeField &U) {
NumOp.ImportGauge(U); NumOp.ImportGauge(U);
DenOp.ImportGauge(U); DenOp.ImportGauge(U);
SchurDifferentiableOperator<Impl> Mpc(DenOp); SchurDifferentiableOperator<Impl> Mpc(DenOp);
SchurDifferentiableOperator<Impl> Vpc(NumOp); SchurDifferentiableOperator<Impl> Vpc(NumOp);
FermionField X(NumOp.FermionRedBlackGrid()); FermionField X(NumOp.FermionRedBlackGrid());
FermionField Y(NumOp.FermionRedBlackGrid()); FermionField Y(NumOp.FermionRedBlackGrid());
Vpc.MpcDag(PhiOdd,Y); // Y= Vdag phi Vpc.MpcDag(PhiOdd,Y); // Y= Vdag phi
X=zero; X=zero;
ActionSolver(Mpc,Y,X); // X= (MdagM)^-1 Vdag phi ActionSolver(Mpc,Y,X); // X= (MdagM)^-1 Vdag phi
//Mpc.Mpc(X,Y); // Y= Mdag^-1 Vdag phi //Mpc.Mpc(X,Y); // Y= Mdag^-1 Vdag phi
// Multiply by Ydag // Multiply by Ydag
RealD action = real(innerProduct(Y,X)); RealD action = real(innerProduct(Y,X));
//RealD action = norm2(Y); //RealD action = norm2(Y);
// The EE factorised block; normally can replace with zero if det is constant (gauge field indept) // The EE factorised block; normally can replace with zero if det is constant (gauge field indept)
// Only really clover term that creates this. Leave the EE portion as a future to do to make most // Only really clover term that creates this. Leave the EE portion as a future to do to make most
// rapid progresss on DWF for now. // rapid progresss on DWF for now.
// //
NumOp.MooeeDag(PhiEven,X); NumOp.MooeeDag(PhiEven,X);
DenOp.MooeeInvDag(X,Y); DenOp.MooeeInvDag(X,Y);
action = action + norm2(Y); action = action + norm2(Y);
return action; return action;
}; };
////////////////////////////////////////////////////// //////////////////////////////////////////////////////
@ -164,44 +155,44 @@ namespace Grid{
////////////////////////////////////////////////////// //////////////////////////////////////////////////////
virtual void deriv(const GaugeField &U,GaugeField & dSdU) { virtual void deriv(const GaugeField &U,GaugeField & dSdU) {
NumOp.ImportGauge(U); NumOp.ImportGauge(U);
DenOp.ImportGauge(U); DenOp.ImportGauge(U);
SchurDifferentiableOperator<Impl> Mpc(DenOp); SchurDifferentiableOperator<Impl> Mpc(DenOp);
SchurDifferentiableOperator<Impl> Vpc(NumOp); SchurDifferentiableOperator<Impl> Vpc(NumOp);
FermionField X(NumOp.FermionRedBlackGrid()); FermionField X(NumOp.FermionRedBlackGrid());
FermionField Y(NumOp.FermionRedBlackGrid()); FermionField Y(NumOp.FermionRedBlackGrid());
// This assignment is necessary to be compliant with the HMC grids GaugeField force(NumOp.GaugeGrid());
GaugeField force(dSdU._grid);
//Y=Vdag phi //Y=Vdag phi
//X = (Mdag M)^-1 V^dag phi //X = (Mdag M)^-1 V^dag phi
//Y = (Mdag)^-1 V^dag phi //Y = (Mdag)^-1 V^dag phi
Vpc.MpcDag(PhiOdd,Y); // Y= Vdag phi Vpc.MpcDag(PhiOdd,Y); // Y= Vdag phi
X=zero; X=zero;
DerivativeSolver(Mpc,Y,X); // X= (MdagM)^-1 Vdag phi DerivativeSolver(Mpc,Y,X); // X= (MdagM)^-1 Vdag phi
Mpc.Mpc(X,Y); // Y= Mdag^-1 Vdag phi Mpc.Mpc(X,Y); // Y= Mdag^-1 Vdag phi
// phi^dag V (Mdag M)^-1 dV^dag phi // phi^dag V (Mdag M)^-1 dV^dag phi
Vpc.MpcDagDeriv(force , X, PhiOdd ); dSdU = force; Vpc.MpcDagDeriv(force , X, PhiOdd ); dSdU=force;
// phi^dag dV (Mdag M)^-1 V^dag phi // phi^dag dV (Mdag M)^-1 V^dag phi
Vpc.MpcDeriv(force , PhiOdd, X ); dSdU = dSdU+force; Vpc.MpcDeriv(force , PhiOdd, X ); dSdU=dSdU+force;
// - phi^dag V (Mdag M)^-1 Mdag dM (Mdag M)^-1 V^dag phi // - phi^dag V (Mdag M)^-1 Mdag dM (Mdag M)^-1 V^dag phi
// - phi^dag V (Mdag M)^-1 dMdag M (Mdag M)^-1 V^dag phi // - phi^dag V (Mdag M)^-1 dMdag M (Mdag M)^-1 V^dag phi
Mpc.MpcDeriv(force,Y,X); dSdU = dSdU-force; Mpc.MpcDeriv(force,Y,X); dSdU=dSdU-force;
Mpc.MpcDagDeriv(force,X,Y); dSdU = dSdU-force; Mpc.MpcDagDeriv(force,X,Y); dSdU=dSdU-force;
// FIXME No force contribution from EvenEven assumed here // FIXME No force contribution from EvenEven assumed here
// Needs a fix for clover. // Needs a fix for clover.
assert(NumOp.ConstEE() == 1); assert(NumOp.ConstEE() == 1);
assert(DenOp.ConstEE() == 1); assert(DenOp.ConstEE() == 1);
dSdU = -dSdU; //dSdU = -Ta(dSdU);
dSdU = -dSdU;
}; };
}; };
} }

View File

@ -57,14 +57,6 @@ namespace Grid{
OperatorFunction<FermionField> & AS OperatorFunction<FermionField> & AS
) : NumOp(_NumOp), DenOp(_DenOp), DerivativeSolver(DS), ActionSolver(AS), Phi(_NumOp.FermionGrid()) {}; ) : NumOp(_NumOp), DenOp(_DenOp), DerivativeSolver(DS), ActionSolver(AS), Phi(_NumOp.FermionGrid()) {};
virtual std::string action_name(){return "TwoFlavourRatioPseudoFermionAction";}
virtual std::string LogParameters(){
std::stringstream sstream;
sstream << GridLogMessage << "["<<action_name()<<"] has no parameters" << std::endl;
return sstream.str();
}
virtual void refresh(const GaugeField &U, GridParallelRNG& pRNG) { virtual void refresh(const GaugeField &U, GridParallelRNG& pRNG) {
// P(phi) = e^{- phi^dag V (MdagM)^-1 Vdag phi} // P(phi) = e^{- phi^dag V (MdagM)^-1 Vdag phi}

Some files were not shown because too many files have changed in this diff Show More