diff --git a/README.md b/README.md index 9432abe1..1e0988f3 100644 --- a/README.md +++ b/README.md @@ -18,10 +18,41 @@ License: GPL v2. -Last update Nov 2016. +Last update June 2017. _Please do not send pull requests to the `master` branch which is reserved for releases._ + + +### Description +This library provides data parallel C++ container classes with internal memory layout +that is transformed to map efficiently to SIMD architectures. CSHIFT facilities +are provided, similar to HPF and cmfortran, and user control is given over the mapping of +array indices to both MPI tasks and SIMD processing elements. + +* Identically shaped arrays then be processed with perfect data parallelisation. +* Such identically shaped arrays are called conformable arrays. + +The transformation is based on the observation that Cartesian array processing involves +identical processing to be performed on different regions of the Cartesian array. + +The library will both geometrically decompose into MPI tasks and across SIMD lanes. +Local vector loops are parallelised with OpenMP pragmas. + +Data parallel array operations can then be specified with a SINGLE data parallel paradigm, but +optimally use MPI, OpenMP and SIMD parallelism under the hood. This is a significant simplification +for most programmers. + +The layout transformations are parametrised by the SIMD vector length. This adapts according to the architecture. +Presently SSE4, ARM NEON (128 bits) AVX, AVX2, QPX (256 bits), IMCI and AVX512 (512 bits) targets are supported. + +These are presented as `vRealF`, `vRealD`, `vComplexF`, and `vComplexD` internal vector data types. +The corresponding scalar types are named `RealF`, `RealD`, `ComplexF` and `ComplexD`. + +MPI, OpenMP, and SIMD parallelism are present in the library. +Please see [this paper](https://arxiv.org/abs/1512.03487) for more detail. + + ### Compilers Intel ICPC v16.0.3 and later @@ -56,35 +87,25 @@ When you file an issue, please go though the following checklist: 6. Attach the output of `make V=1`. 7. Describe the issue and any previous attempt to solve it. If relevant, show how to reproduce the issue using a minimal working example. +### Required libraries +Grid requires: +[GMP](https://gmplib.org/), -### Description -This library provides data parallel C++ container classes with internal memory layout -that is transformed to map efficiently to SIMD architectures. CSHIFT facilities -are provided, similar to HPF and cmfortran, and user control is given over the mapping of -array indices to both MPI tasks and SIMD processing elements. +[MPFR](http://www.mpfr.org/) -* Identically shaped arrays then be processed with perfect data parallelisation. -* Such identically shaped arrays are called conformable arrays. +Bootstrapping grid downloads and uses for internal dense matrix (non-QCD operations) the Eigen library. -The transformation is based on the observation that Cartesian array processing involves -identical processing to be performed on different regions of the Cartesian array. +Grid optionally uses: -The library will both geometrically decompose into MPI tasks and across SIMD lanes. -Local vector loops are parallelised with OpenMP pragmas. +[HDF5](https://support.hdfgroup.org/HDF5/) -Data parallel array operations can then be specified with a SINGLE data parallel paradigm, but -optimally use MPI, OpenMP and SIMD parallelism under the hood. This is a significant simplification -for most programmers. +[LIME](http://usqcd-software.github.io/c-lime/) for ILDG and SciDAC file format support. -The layout transformations are parametrised by the SIMD vector length. This adapts according to the architecture. -Presently SSE4 (128 bit) AVX, AVX2, QPX (256 bit), IMCI, and AVX512 (512 bit) targets are supported (ARM NEON on the way). +[FFTW](http://www.fftw.org) either generic version or via the Intel MKL library. -These are presented as `vRealF`, `vRealD`, `vComplexF`, and `vComplexD` internal vector data types. These may be useful in themselves for other programmers. -The corresponding scalar types are named `RealF`, `RealD`, `ComplexF` and `ComplexD`. +LAPACK either generic version or Intel MKL library. -MPI, OpenMP, and SIMD parallelism are present in the library. -Please see https://arxiv.org/abs/1512.03487 for more detail. ### Quick start First, start by cloning the repository: @@ -155,7 +176,6 @@ The following options can be use with the `--enable-comms=` option to target dif | `none` | no communications | | `mpi[-auto]` | MPI communications | | `mpi3[-auto]` | MPI communications using MPI 3 shared memory | -| `mpi3l[-auto]` | MPI communications using MPI 3 shared memory and leader model | | `shmem ` | Cray SHMEM communications | For the MPI interfaces the optional `-auto` suffix instructs the `configure` scripts to determine all the necessary compilation and linking flags. This is done by extracting the informations from the MPI wrapper specified in the environment variable `MPICXX` (if not specified `configure` will scan though a list of default names). The `-auto` suffix is not supported by the Cray environment wrapper scripts. Use the standard versions instead. @@ -173,7 +193,8 @@ The following options can be use with the `--enable-simd=` option to target diff | `AVXFMA4` | AVX (256 bit) + FMA4 | | `AVX2` | AVX 2 (256 bit) | | `AVX512` | AVX 512 bit | -| `QPX` | QPX (256 bit) | +| `NEONv8` | [ARM NEON](http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.den0024a/ch07s03.html) (128 bit) | +| `QPX` | IBM QPX (256 bit) | Alternatively, some CPU codenames can be directly used: @@ -195,21 +216,205 @@ The following configuration is recommended for the Intel Knights Landing platfor ``` bash ../configure --enable-precision=double\ --enable-simd=KNL \ - --enable-comms=mpi-auto \ - --with-gmp= \ - --with-mpfr= \ + --enable-comms=mpi-auto \ --enable-mkl \ CXX=icpc MPICXX=mpiicpc ``` +The MKL flag enables use of BLAS and FFTW from the Intel Math Kernels Library. -where `` is the UNIX prefix where GMP and MPFR are installed. If you are working on a Cray machine that does not use the `mpiicpc` wrapper, please use: +If you are working on a Cray machine that does not use the `mpiicpc` wrapper, please use: ``` bash ../configure --enable-precision=double\ --enable-simd=KNL \ --enable-comms=mpi \ - --with-gmp= \ - --with-mpfr= \ --enable-mkl \ CXX=CC CC=cc -``` \ No newline at end of file +``` + +If gmp and mpfr are NOT in standard places (/usr/) these flags may be needed: +``` bash + --with-gmp= \ + --with-mpfr= \ +``` +where `` is the UNIX prefix where GMP and MPFR are installed. + +Knight's Landing with Intel Omnipath adapters with two adapters per node +presently performs better with use of more than one rank per node, using shared memory +for interior communication. This is the mpi3 communications implementation. +We recommend four ranks per node for best performance, but optimum is local volume dependent. + +``` bash +../configure --enable-precision=double\ + --enable-simd=KNL \ + --enable-comms=mpi3-auto \ + --enable-mkl \ + CC=icpc MPICXX=mpiicpc +``` + +### Build setup for Intel Haswell Xeon platform + +The following configuration is recommended for the Intel Haswell platform: + +``` bash +../configure --enable-precision=double\ + --enable-simd=AVX2 \ + --enable-comms=mpi3-auto \ + --enable-mkl \ + CXX=icpc MPICXX=mpiicpc +``` +The MKL flag enables use of BLAS and FFTW from the Intel Math Kernels Library. + +If gmp and mpfr are NOT in standard places (/usr/) these flags may be needed: +``` bash + --with-gmp= \ + --with-mpfr= \ +``` +where `` is the UNIX prefix where GMP and MPFR are installed. + +If you are working on a Cray machine that does not use the `mpiicpc` wrapper, please use: + +``` bash +../configure --enable-precision=double\ + --enable-simd=AVX2 \ + --enable-comms=mpi3 \ + --enable-mkl \ + CXX=CC CC=cc +``` +Since Dual socket nodes are commonplace, we recommend MPI-3 as the default with the use of +one rank per socket. If using the Intel MPI library, threads should be pinned to NUMA domains using +``` + export I_MPI_PIN=1 +``` +This is the default. + +### Build setup for Intel Skylake Xeon platform + +The following configuration is recommended for the Intel Skylake platform: + +``` bash +../configure --enable-precision=double\ + --enable-simd=AVX512 \ + --enable-comms=mpi3 \ + --enable-mkl \ + CXX=mpiicpc +``` +The MKL flag enables use of BLAS and FFTW from the Intel Math Kernels Library. + +If gmp and mpfr are NOT in standard places (/usr/) these flags may be needed: +``` bash + --with-gmp= \ + --with-mpfr= \ +``` +where `` is the UNIX prefix where GMP and MPFR are installed. + +If you are working on a Cray machine that does not use the `mpiicpc` wrapper, please use: + +``` bash +../configure --enable-precision=double\ + --enable-simd=AVX512 \ + --enable-comms=mpi3 \ + --enable-mkl \ + CXX=CC CC=cc +``` +Since Dual socket nodes are commonplace, we recommend MPI-3 as the default with the use of +one rank per socket. If using the Intel MPI library, threads should be pinned to NUMA domains using +``` + export I_MPI_PIN=1 +``` +This is the default. + +#### Expected Skylake Gold 6148 dual socket (single prec, single node 20+20 cores) performance using NUMA MPI mapping): + +mpirun -n 2 benchmarks/Benchmark_dwf --grid 16.16.16.16 --mpi 2.1.1.1 --cacheblocking 2.2.2.2 --dslash-asm --shm 1024 --threads 18 + +TBA + + +### Build setup for AMD EPYC / RYZEN + +The AMD EPYC is a multichip module comprising 32 cores spread over four distinct chips each with 8 cores. +So, even with a single socket node there is a quad-chip module. Dual socket nodes with 64 cores total +are common. Each chip within the module exposes a separate NUMA domain. +There are four NUMA domains per socket and we recommend one MPI rank per NUMA domain. +MPI-3 is recommended with the use of four ranks per socket, +and 8 threads per rank. + +The following configuration is recommended for the AMD EPYC platform. + +``` bash +../configure --enable-precision=double\ + --enable-simd=AVX2 \ + --enable-comms=mpi3 \ + CXX=mpicxx +``` + +If gmp and mpfr are NOT in standard places (/usr/) these flags may be needed: +``` bash + --with-gmp= \ + --with-mpfr= \ +``` +where `` is the UNIX prefix where GMP and MPFR are installed. + +Using MPICH and g++ v4.9.2, best performance can be obtained using explicit GOMP_CPU_AFFINITY flags for each MPI rank. +This can be done by invoking MPI on a wrapper script omp_bind.sh to handle this. + +It is recommended to run 8 MPI ranks on a single dual socket AMD EPYC, with 8 threads per rank using MPI3 and +shared memory to communicate within this node: + +mpirun -np 8 ./omp_bind.sh ./Benchmark_dwf --mpi 2.2.2.1 --dslash-unroll --threads 8 --grid 16.16.16.16 --cacheblocking 4.4.4.4 + +Where omp_bind.sh does the following: +``` +#!/bin/bash + +numanode=` expr $PMI_RANK % 8 ` +basecore=`expr $numanode \* 16` +core0=`expr $basecore + 0 ` +core1=`expr $basecore + 2 ` +core2=`expr $basecore + 4 ` +core3=`expr $basecore + 6 ` +core4=`expr $basecore + 8 ` +core5=`expr $basecore + 10 ` +core6=`expr $basecore + 12 ` +core7=`expr $basecore + 14 ` + +export GOMP_CPU_AFFINITY="$core0 $core1 $core2 $core3 $core4 $core5 $core6 $core7" +echo GOMP_CUP_AFFINITY $GOMP_CPU_AFFINITY + +$@ +``` + +Performance: + +#### Expected AMD EPYC 7601 dual socket (single prec, single node 32+32 cores) performance using NUMA MPI mapping): + +mpirun -np 8 ./omp_bind.sh ./Benchmark_dwf --threads 8 --mpi 2.2.2.1 --dslash-unroll --grid 16.16.16.16 --cacheblocking 4.4.4.4 + +TBA + +### Build setup for BlueGene/Q + +To be written... + +### Build setup for ARM Neon + +To be written... + +### Build setup for laptops, other compilers, non-cluster builds + +Many versions of g++ and clang++ work with Grid, and involve merely replacing CXX (and MPICXX), +and omit the enable-mkl flag. + +Single node builds are enabled with +``` + --enable-comms=none +``` + +FFTW support that is not in the default search path may then enabled with +``` + --with-fftw= +``` + +BLAS will not be compiled in by default, and Lanczos will default to Eigen diagonalisation. + diff --git a/TODO b/TODO index 672879cd..001c6c0c 100644 --- a/TODO +++ b/TODO @@ -1,23 +1,30 @@ TODO: --------------- -Peter's work list: -2)- Precision conversion and sort out localConvert <-- -3)- Remove DenseVector, DenseMatrix; Use Eigen instead. <-- started -4)- Binary I/O speed up & x-strips --- Profile CG, BlockCG, etc... Flop count/rate -- PARTIAL, time but no flop/s yet --- Physical propagator interface --- Conserved currents --- GaugeFix into central location --- Multigrid Wilson and DWF, compare to other Multigrid implementations --- HDCR resume +Large item work list: +1)- MultiRHS with spread out extra dim -- Go through filesystem with SciDAC I/O + +2)- Christoph's local basis expansion Lanczos +3)- BG/Q port and check +4)- Precision conversion and sort out localConvert <-- partial + - Consistent linear solver flop count/rate -- PARTIAL, time but no flop/s yet +5)- Physical propagator interface +6)- Conserved currents +7)- Multigrid Wilson and DWF, compare to other Multigrid implementations +8)- HDCR resume Recent DONE +-- Lanczos Remove DenseVector, DenseMatrix; Use Eigen instead. <-- DONE +-- GaugeFix into central location <-- DONE +-- Scidac and Ildg metadata handling <-- DONE +-- Binary I/O MPI2 IO <-- DONE +-- Binary I/O speed up & x-strips <-- DONE -- Cut down the exterior overhead <-- DONE -- Interior legs from SHM comms <-- DONE -- Half-precision comms <-- DONE --- Merge high precision reduction into develop --- multiRHS DWF; benchmark on Cori/BNL for comms elimination +-- Merge high precision reduction into develop <-- DONE +-- BlockCG, BCGrQ <-- DONE +-- multiRHS DWF; benchmark on Cori/BNL for comms elimination <-- DONE -- slice* linalg routines for multiRHS, BlockCG ----- diff --git a/benchmarks/Benchmark_comms.cc b/benchmarks/Benchmark_comms.cc index ce881ef6..532532f8 100644 --- a/benchmarks/Benchmark_comms.cc +++ b/benchmarks/Benchmark_comms.cc @@ -66,7 +66,7 @@ int main (int argc, char ** argv) int threads = GridThread::GetThreads(); std::cout<1) nmu++; @@ -88,6 +88,9 @@ int main (int argc, char ** argv) lat*mpi_layout[3]}); GridCartesian Grid(latt_size,simd_layout,mpi_layout); + RealD Nrank = Grid._Nprocessors; + RealD Nnode = Grid.NodeCount(); + RealD ppn = Nrank/Nnode; std::vector > xbuf(8,std::vector(lat*lat*lat*Ls)); std::vector > rbuf(8,std::vector(lat*lat*lat*Ls)); @@ -132,13 +135,13 @@ int main (int argc, char ** argv) } Grid.SendToRecvFromComplete(requests); Grid.Barrier(); - double stop=usecond(); - t_time[i] = stop-start; // microseconds + double stop=usecond(); + t_time[i] = stop-start; // microseconds } timestat.statistics(t_time); - double dbytes = bytes; + double dbytes = bytes*ppn; double xbytes = dbytes*2.0*ncomm; double rbytes = xbytes; double bidibytes = xbytes+rbytes; @@ -165,6 +168,9 @@ int main (int argc, char ** argv) std::vector latt_size ({lat,lat,lat,lat}); GridCartesian Grid(latt_size,simd_layout,mpi_layout); + RealD Nrank = Grid._Nprocessors; + RealD Nnode = Grid.NodeCount(); + RealD ppn = Nrank/Nnode; std::vector > xbuf(8,std::vector(lat*lat*lat*Ls)); std::vector > rbuf(8,std::vector(lat*lat*lat*Ls)); @@ -213,14 +219,14 @@ int main (int argc, char ** argv) } } Grid.Barrier(); - double stop=usecond(); - t_time[i] = stop-start; // microseconds + double stop=usecond(); + t_time[i] = stop-start; // microseconds } timestat.statistics(t_time); - double dbytes = bytes; + double dbytes = bytes*ppn; double xbytes = dbytes*2.0*ncomm; double rbytes = xbytes; double bidibytes = xbytes+rbytes; @@ -251,6 +257,9 @@ int main (int argc, char ** argv) lat*mpi_layout[3]}); GridCartesian Grid(latt_size,simd_layout,mpi_layout); + RealD Nrank = Grid._Nprocessors; + RealD Nnode = Grid.NodeCount(); + RealD ppn = Nrank/Nnode; std::vector xbuf(8); std::vector rbuf(8); @@ -258,59 +267,66 @@ int main (int argc, char ** argv) for(int d=0;d<8;d++){ xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); + bzero((void *)xbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); + bzero((void *)rbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); } int ncomm; int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD); + double dbytes; for(int i=0;i requests; - ncomm=0; for(int mu=0;mu<4;mu++){ + if (mpi_layout[mu]>1 ) { ncomm++; int comm_proc=1; int xmit_to_rank; int recv_from_rank; - Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); - Grid.StencilSendToRecvFromBegin(requests, - (void *)&xbuf[mu][0], - xmit_to_rank, - (void *)&rbuf[mu][0], - recv_from_rank, - bytes); + dbytes+= + Grid.StencilSendToRecvFromBegin(requests, + (void *)&xbuf[mu][0], + xmit_to_rank, + (void *)&rbuf[mu][0], + recv_from_rank, + bytes); comm_proc = mpi_layout[mu]-1; Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); - Grid.StencilSendToRecvFromBegin(requests, - (void *)&xbuf[mu+4][0], - xmit_to_rank, - (void *)&rbuf[mu+4][0], - recv_from_rank, - bytes); + dbytes+= + Grid.StencilSendToRecvFromBegin(requests, + (void *)&xbuf[mu+4][0], + xmit_to_rank, + (void *)&rbuf[mu+4][0], + recv_from_rank, + bytes); } } Grid.StencilSendToRecvFromComplete(requests); Grid.Barrier(); - double stop=usecond(); - t_time[i] = stop-start; // microseconds - + double stop=usecond(); + t_time[i] = stop-start; // microseconds + } timestat.statistics(t_time); - double dbytes = bytes; - double xbytes = dbytes*2.0*ncomm; - double rbytes = xbytes; - double bidibytes = xbytes+rbytes; + dbytes=dbytes*ppn; + double xbytes = dbytes*0.5; + double rbytes = dbytes*0.5; + double bidibytes = dbytes; std::cout< xbuf(8); std::vector rbuf(8); @@ -345,16 +364,18 @@ int main (int argc, char ** argv) for(int d=0;d<8;d++){ xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); + bzero((void *)xbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); + bzero((void *)rbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); } int ncomm; int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD); - + double dbytes; for(int i=0;i requests; - + dbytes=0; ncomm=0; for(int mu=0;mu<4;mu++){ @@ -366,41 +387,43 @@ int main (int argc, char ** argv) int recv_from_rank; Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); - Grid.StencilSendToRecvFromBegin(requests, - (void *)&xbuf[mu][0], - xmit_to_rank, - (void *)&rbuf[mu][0], - recv_from_rank, - bytes); + dbytes+= + Grid.StencilSendToRecvFromBegin(requests, + (void *)&xbuf[mu][0], + xmit_to_rank, + (void *)&rbuf[mu][0], + recv_from_rank, + bytes); Grid.StencilSendToRecvFromComplete(requests); requests.resize(0); comm_proc = mpi_layout[mu]-1; Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); - Grid.StencilSendToRecvFromBegin(requests, - (void *)&xbuf[mu+4][0], - xmit_to_rank, - (void *)&rbuf[mu+4][0], - recv_from_rank, - bytes); + dbytes+= + Grid.StencilSendToRecvFromBegin(requests, + (void *)&xbuf[mu+4][0], + xmit_to_rank, + (void *)&rbuf[mu+4][0], + recv_from_rank, + bytes); Grid.StencilSendToRecvFromComplete(requests); requests.resize(0); } } - Grid.Barrier(); - double stop=usecond(); - t_time[i] = stop-start; // microseconds - + Grid.Barrier(); + double stop=usecond(); + t_time[i] = stop-start; // microseconds + } timestat.statistics(t_time); - double dbytes = bytes; - double xbytes = dbytes*2.0*ncomm; - double rbytes = xbytes; - double bidibytes = xbytes+rbytes; + dbytes=dbytes*ppn; + double xbytes = dbytes*0.5; + double rbytes = dbytes*0.5; + double bidibytes = dbytes; std::cout<Barrier(); Dw.ZeroCounters(); @@ -302,6 +302,7 @@ int main (int argc, char ** argv) std::cout<< "sD ERR \n " << err < latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); - int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; + int64_t vol= latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; GridCartesian Grid(latt_size,simd_layout,mpi_layout); uint64_t Nloop=NLOOP; - // GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector({45,12,81,9}); + // GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector({45,12,81,9})); - LatticeVec z(&Grid); //random(pRNG,z); - LatticeVec x(&Grid); //random(pRNG,x); - LatticeVec y(&Grid); //random(pRNG,y); + LatticeVec z(&Grid);// random(pRNG,z); + LatticeVec x(&Grid);// random(pRNG,x); + LatticeVec y(&Grid);// random(pRNG,y); double a=2.0; @@ -83,7 +83,7 @@ int main (int argc, char ** argv) double time = (stop-start)/Nloop*1000; double flops=vol*Nvec*2;// mul,add - double bytes=3*vol*Nvec*sizeof(Real); + double bytes=3.0*vol*Nvec*sizeof(Real); std::cout< latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); - int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; + int64_t vol= latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; GridCartesian Grid(latt_size,simd_layout,mpi_layout); - // GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector({45,12,81,9}); + // GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector({45,12,81,9})); - LatticeVec z(&Grid); //random(pRNG,z); - LatticeVec x(&Grid); //random(pRNG,x); - LatticeVec y(&Grid); //random(pRNG,y); + LatticeVec z(&Grid);// random(pRNG,z); + LatticeVec x(&Grid);// random(pRNG,x); + LatticeVec y(&Grid);// random(pRNG,y); double a=2.0; uint64_t Nloop=NLOOP; @@ -119,7 +119,7 @@ int main (int argc, char ** argv) double time = (stop-start)/Nloop*1000; double flops=vol*Nvec*2;// mul,add - double bytes=3*vol*Nvec*sizeof(Real); + double bytes=3.0*vol*Nvec*sizeof(Real); std::cout< latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); - int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; + int64_t vol= latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; uint64_t Nloop=NLOOP; GridCartesian Grid(latt_size,simd_layout,mpi_layout); - // GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector({45,12,81,9}); + // GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector({45,12,81,9})); - LatticeVec z(&Grid); //random(pRNG,z); - LatticeVec x(&Grid); //random(pRNG,x); - LatticeVec y(&Grid); //random(pRNG,y); + LatticeVec z(&Grid);// random(pRNG,z); + LatticeVec x(&Grid);// random(pRNG,x); + LatticeVec y(&Grid);// random(pRNG,y); RealD a=2.0; @@ -154,7 +154,7 @@ int main (int argc, char ** argv) double stop=usecond(); double time = (stop-start)/Nloop*1000; - double bytes=2*vol*Nvec*sizeof(Real); + double bytes=2.0*vol*Nvec*sizeof(Real); double flops=vol*Nvec*1;// mul std::cout< latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); - int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; + int64_t vol= latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; uint64_t Nloop=NLOOP; GridCartesian Grid(latt_size,simd_layout,mpi_layout); - // GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector({45,12,81,9}); - LatticeVec z(&Grid); //random(pRNG,z); - LatticeVec x(&Grid); //random(pRNG,x); - LatticeVec y(&Grid); //random(pRNG,y); + // GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector({45,12,81,9})); + LatticeVec z(&Grid);// random(pRNG,z); + LatticeVec x(&Grid);// random(pRNG,x); + LatticeVec y(&Grid);// random(pRNG,y); RealD a=2.0; Real nn; double start=usecond(); @@ -187,7 +187,7 @@ int main (int argc, char ** argv) double stop=usecond(); double time = (stop-start)/Nloop*1000; - double bytes=vol*Nvec*sizeof(Real); + double bytes=1.0*vol*Nvec*sizeof(Real); double flops=vol*Nvec*2;// mul,add std::cout< simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd()); std::vector mpi_layout = GridDefaultMpi(); - int threads = GridThread::GetThreads(); + int64_t threads = GridThread::GetThreads(); std::cout< latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); - int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; + int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; GridCartesian Grid(latt_size,simd_layout,mpi_layout); - // GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector({45,12,81,9}); + GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector({45,12,81,9})); - LatticeColourMatrix z(&Grid);// random(pRNG,z); - LatticeColourMatrix x(&Grid);// random(pRNG,x); - LatticeColourMatrix y(&Grid);// random(pRNG,y); + LatticeColourMatrix z(&Grid); random(pRNG,z); + LatticeColourMatrix x(&Grid); random(pRNG,x); + LatticeColourMatrix y(&Grid); random(pRNG,y); double start=usecond(); - for(int i=0;i latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); - int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; + int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; GridCartesian Grid(latt_size,simd_layout,mpi_layout); - // GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector({45,12,81,9}); + GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector({45,12,81,9})); - LatticeColourMatrix z(&Grid); //random(pRNG,z); - LatticeColourMatrix x(&Grid); //random(pRNG,x); - LatticeColourMatrix y(&Grid); //random(pRNG,y); + LatticeColourMatrix z(&Grid); random(pRNG,z); + LatticeColourMatrix x(&Grid); random(pRNG,x); + LatticeColourMatrix y(&Grid); random(pRNG,y); double start=usecond(); - for(int i=0;i latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); - int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; + int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; GridCartesian Grid(latt_size,simd_layout,mpi_layout); - // GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector({45,12,81,9}); + GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector({45,12,81,9})); - LatticeColourMatrix z(&Grid); //random(pRNG,z); - LatticeColourMatrix x(&Grid); //random(pRNG,x); - LatticeColourMatrix y(&Grid); //random(pRNG,y); + LatticeColourMatrix z(&Grid); random(pRNG,z); + LatticeColourMatrix x(&Grid); random(pRNG,x); + LatticeColourMatrix y(&Grid); random(pRNG,y); double start=usecond(); - for(int i=0;i latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); - int vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; + int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; GridCartesian Grid(latt_size,simd_layout,mpi_layout); - // GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector({45,12,81,9}); + GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector({45,12,81,9})); - LatticeColourMatrix z(&Grid); //random(pRNG,z); - LatticeColourMatrix x(&Grid); //random(pRNG,x); - LatticeColourMatrix y(&Grid); //random(pRNG,y); + LatticeColourMatrix z(&Grid); random(pRNG,z); + LatticeColourMatrix x(&Grid); random(pRNG,x); + LatticeColourMatrix y(&Grid); random(pRNG,y); double start=usecond(); - for(int i=0;i]]) AC_CHECK_DECLS([be64toh],[], [], [[#include ]]) @@ -184,6 +185,15 @@ AC_SEARCH_LIBS([limeCreateReader], [lime], In order to use ILGG file format please install or provide the correct path to your installation Info at: http://usqcd.jlab.org/usqcd-docs/c-lime/)]) +AC_SEARCH_LIBS([crc32], [z], + [AC_DEFINE([HAVE_ZLIB], [1], [Define to 1 if you have the `LIBZ' library])] + [have_zlib=true] [LIBS="${LIBS} -lz"], + [AC_MSG_ERROR(zlib library was not found in your system.)]) + +AC_SEARCH_LIBS([move_pages], [numa], + [AC_DEFINE([HAVE_LIBNUMA], [1], [Define to 1 if you have the `LIBNUMA' library])] + [have_libnuma=true] [LIBS="${LIBS} -lnuma"], + [AC_MSG_WARN(libnuma library was not found in your system. Some optimisations will not apply)]) AC_SEARCH_LIBS([H5Fopen], [hdf5_cpp], [AC_DEFINE([HAVE_HDF5], [1], [Define to 1 if you have the `HDF5' library])] @@ -237,6 +247,7 @@ case ${ax_cv_cxx_compiler_vendor} in SIMD_FLAGS='';; KNL) AC_DEFINE([AVX512],[1],[AVX512 intrinsics]) + AC_DEFINE([KNL],[1],[Knights landing processor]) SIMD_FLAGS='-march=knl';; GEN) AC_DEFINE([GEN],[1],[generic vector code]) @@ -244,6 +255,9 @@ case ${ax_cv_cxx_compiler_vendor} in [generic SIMD vector width (in bytes)]) SIMD_GEN_WIDTH_MSG=" (width= $ac_gen_simd_width)" SIMD_FLAGS='';; + NEONv8) + AC_DEFINE([NEONV8],[1],[ARMv8 NEON]) + SIMD_FLAGS='-march=armv8-a';; QPX|BGQ) AC_DEFINE([QPX],[1],[QPX intrinsics for BG/Q]) SIMD_FLAGS='';; @@ -272,6 +286,7 @@ case ${ax_cv_cxx_compiler_vendor} in SIMD_FLAGS='';; KNL) AC_DEFINE([AVX512],[1],[AVX512 intrinsics for Knights Landing]) + AC_DEFINE([KNL],[1],[Knights landing processor]) SIMD_FLAGS='-xmic-avx512';; GEN) AC_DEFINE([GEN],[1],[generic vector code]) diff --git a/extras/Hadrons/Environment.cc b/extras/Hadrons/Environment.cc index 37f2a3d7..0e7a4326 100644 --- a/extras/Hadrons/Environment.cc +++ b/extras/Hadrons/Environment.cc @@ -41,9 +41,10 @@ using namespace Hadrons; // constructor ///////////////////////////////////////////////////////////////// Environment::Environment(void) { - nd_ = GridDefaultLatt().size(); + dim_ = GridDefaultLatt(); + nd_ = dim_.size(); grid4d_.reset(SpaceTimeGrid::makeFourDimGrid( - GridDefaultLatt(), GridDefaultSimd(nd_, vComplex::Nsimd()), + dim_, GridDefaultSimd(nd_, vComplex::Nsimd()), GridDefaultMpi())); gridRb4d_.reset(SpaceTimeGrid::makeFourDimRedBlackGrid(grid4d_.get())); auto loc = getGrid()->LocalDimensions(); @@ -132,6 +133,16 @@ unsigned int Environment::getNd(void) const return nd_; } +std::vector Environment::getDim(void) const +{ + return dim_; +} + +int Environment::getDim(const unsigned int mu) const +{ + return dim_[mu]; +} + // random number generator ///////////////////////////////////////////////////// void Environment::setSeed(const std::vector &seed) { @@ -271,6 +282,21 @@ std::string Environment::getModuleType(const std::string name) const return getModuleType(getModuleAddress(name)); } +std::string Environment::getModuleNamespace(const unsigned int address) const +{ + std::string type = getModuleType(address), ns; + + auto pos2 = type.rfind("::"); + auto pos1 = type.rfind("::", pos2 - 2); + + return type.substr(pos1 + 2, pos2 - pos1 - 2); +} + +std::string Environment::getModuleNamespace(const std::string name) const +{ + return getModuleNamespace(getModuleAddress(name)); +} + bool Environment::hasModule(const unsigned int address) const { return (address < module_.size()); @@ -492,7 +518,14 @@ std::string Environment::getObjectType(const unsigned int address) const { if (hasRegisteredObject(address)) { - return typeName(object_[address].type); + if (object_[address].type) + { + return typeName(object_[address].type); + } + else + { + return ""; + } } else if (hasObject(address)) { @@ -532,6 +565,23 @@ Environment::Size Environment::getObjectSize(const std::string name) const return getObjectSize(getObjectAddress(name)); } +unsigned int Environment::getObjectModule(const unsigned int address) const +{ + if (hasObject(address)) + { + return object_[address].module; + } + else + { + HADRON_ERROR("no object with address " + std::to_string(address)); + } +} + +unsigned int Environment::getObjectModule(const std::string name) const +{ + return getObjectModule(getObjectAddress(name)); +} + unsigned int Environment::getObjectLs(const unsigned int address) const { if (hasRegisteredObject(address)) diff --git a/extras/Hadrons/Environment.hpp b/extras/Hadrons/Environment.hpp index 2628e5a0..13264bd5 100644 --- a/extras/Hadrons/Environment.hpp +++ b/extras/Hadrons/Environment.hpp @@ -106,6 +106,8 @@ public: void createGrid(const unsigned int Ls); GridCartesian * getGrid(const unsigned int Ls = 1) const; GridRedBlackCartesian * getRbGrid(const unsigned int Ls = 1) const; + std::vector getDim(void) const; + int getDim(const unsigned int mu) const; unsigned int getNd(void) const; // random number generator void setSeed(const std::vector &seed); @@ -131,6 +133,8 @@ public: std::string getModuleName(const unsigned int address) const; std::string getModuleType(const unsigned int address) const; std::string getModuleType(const std::string name) const; + std::string getModuleNamespace(const unsigned int address) const; + std::string getModuleNamespace(const std::string name) const; bool hasModule(const unsigned int address) const; bool hasModule(const std::string name) const; Graph makeModuleGraph(void) const; @@ -171,6 +175,8 @@ public: std::string getObjectType(const std::string name) const; Size getObjectSize(const unsigned int address) const; Size getObjectSize(const std::string name) const; + unsigned int getObjectModule(const unsigned int address) const; + unsigned int getObjectModule(const std::string name) const; unsigned int getObjectLs(const unsigned int address) const; unsigned int getObjectLs(const std::string name) const; bool hasObject(const unsigned int address) const; @@ -181,6 +187,10 @@ public: bool hasCreatedObject(const std::string name) const; bool isObject5d(const unsigned int address) const; bool isObject5d(const std::string name) const; + template + bool isObjectOfType(const unsigned int address) const; + template + bool isObjectOfType(const std::string name) const; Environment::Size getTotalSize(void) const; void addOwnership(const unsigned int owner, const unsigned int property); @@ -197,6 +207,7 @@ private: bool dryRun_{false}; unsigned int traj_, locVol_; // grids + std::vector dim_; GridPt grid4d_; std::map grid5d_; GridRbPt gridRb4d_; @@ -343,7 +354,7 @@ T * Environment::getObject(const unsigned int address) const else { HADRON_ERROR("object with address " + std::to_string(address) + - " does not have type '" + typeid(T).name() + + " does not have type '" + typeName(&typeid(T)) + "' (has type '" + getObjectType(address) + "')"); } } @@ -380,6 +391,37 @@ T * Environment::createLattice(const std::string name) return createLattice(getObjectAddress(name)); } +template +bool Environment::isObjectOfType(const unsigned int address) const +{ + if (hasRegisteredObject(address)) + { + if (auto h = dynamic_cast *>(object_[address].data.get())) + { + return true; + } + else + { + return false; + } + } + else if (hasObject(address)) + { + HADRON_ERROR("object with address " + std::to_string(address) + + " exists but is not registered"); + } + else + { + HADRON_ERROR("no object with address " + std::to_string(address)); + } +} + +template +bool Environment::isObjectOfType(const std::string name) const +{ + return isObjectOfType(getObjectAddress(name)); +} + END_HADRONS_NAMESPACE #endif // Hadrons_Environment_hpp_ diff --git a/extras/Hadrons/Global.hpp b/extras/Hadrons/Global.hpp index 3e11ddf8..9de01623 100644 --- a/extras/Hadrons/Global.hpp +++ b/extras/Hadrons/Global.hpp @@ -51,23 +51,43 @@ using Grid::operator<<; * error with GCC 5 (clang & GCC 6 compile fine without it). */ -// FIXME: find a way to do that in a more general fashion #ifndef FIMPL #define FIMPL WilsonImplR #endif +#ifndef SIMPL +#define SIMPL ScalarImplCR +#endif BEGIN_HADRONS_NAMESPACE // type aliases -#define TYPE_ALIASES(FImpl, suffix)\ +#define FERM_TYPE_ALIASES(FImpl, suffix)\ typedef FermionOperator FMat##suffix; \ typedef typename FImpl::FermionField FermionField##suffix; \ typedef typename FImpl::PropagatorField PropagatorField##suffix; \ typedef typename FImpl::SitePropagator SitePropagator##suffix; \ -typedef typename FImpl::DoubledGaugeField DoubledGaugeField##suffix;\ -typedef std::function \ + SlicedPropagator##suffix; + +#define GAUGE_TYPE_ALIASES(FImpl, suffix)\ +typedef typename FImpl::DoubledGaugeField DoubledGaugeField##suffix; + +#define SCALAR_TYPE_ALIASES(SImpl, suffix)\ +typedef typename SImpl::Field ScalarField##suffix;\ +typedef typename SImpl::Field PropagatorField##suffix; + +#define SOLVER_TYPE_ALIASES(FImpl, suffix)\ +typedef std::function SolverFn##suffix; +#define SINK_TYPE_ALIASES(suffix)\ +typedef std::function SinkFn##suffix; + +#define FGS_TYPE_ALIASES(FImpl, suffix)\ +FERM_TYPE_ALIASES(FImpl, suffix)\ +GAUGE_TYPE_ALIASES(FImpl, suffix)\ +SOLVER_TYPE_ALIASES(FImpl, suffix) + // logger class HadronsLogger: public Logger { diff --git a/extras/Hadrons/Modules.hpp b/extras/Hadrons/Modules.hpp index 05ad1697..c27254aa 100644 --- a/extras/Hadrons/Modules.hpp +++ b/extras/Hadrons/Modules.hpp @@ -1,31 +1,3 @@ -/************************************************************************************* - -Grid physics library, www.github.com/paboyle/Grid - -Source file: extras/Hadrons/Modules.hpp - -Copyright (C) 2015 -Copyright (C) 2016 - -Author: Antonin Portelli - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -See the full license in the file "LICENSE" in the top level distribution directory -*************************************************************************************/ -/* END LEGAL */ #include #include #include @@ -36,13 +8,18 @@ See the full license in the file "LICENSE" in the top level distribution directo #include #include #include +#include #include #include +#include #include #include +#include +#include +#include +#include #include #include #include #include #include -#include diff --git a/extras/Hadrons/Modules/MAction/DWF.hpp b/extras/Hadrons/Modules/MAction/DWF.hpp index 49861e3e..78e0916c 100644 --- a/extras/Hadrons/Modules/MAction/DWF.hpp +++ b/extras/Hadrons/Modules/MAction/DWF.hpp @@ -27,8 +27,8 @@ See the full license in the file "LICENSE" in the top level distribution directo *************************************************************************************/ /* END LEGAL */ -#ifndef Hadrons_DWF_hpp_ -#define Hadrons_DWF_hpp_ +#ifndef Hadrons_MAction_DWF_hpp_ +#define Hadrons_MAction_DWF_hpp_ #include #include @@ -48,14 +48,15 @@ public: std::string, gauge, unsigned int, Ls, double , mass, - double , M5); + double , M5, + std::string , boundary); }; template class TDWF: public Module { public: - TYPE_ALIASES(FImpl,); + FGS_TYPE_ALIASES(FImpl,); public: // constructor TDWF(const std::string name); @@ -116,14 +117,19 @@ void TDWF::execute(void) << par().mass << ", M5= " << par().M5 << " and Ls= " << par().Ls << " using gauge field '" << par().gauge << "'" << std::endl; + LOG(Message) << "Fermion boundary conditions: " << par().boundary + << std::endl; env().createGrid(par().Ls); auto &U = *env().template getObject(par().gauge); auto &g4 = *env().getGrid(); auto &grb4 = *env().getRbGrid(); auto &g5 = *env().getGrid(par().Ls); auto &grb5 = *env().getRbGrid(par().Ls); + std::vector boundary = strToVec(par().boundary); + typename DomainWallFermion::ImplParams implParams(boundary); FMat *fMatPt = new DomainWallFermion(U, g5, grb5, g4, grb4, - par().mass, par().M5); + par().mass, par().M5, + implParams); env().setObject(getName(), fMatPt); } @@ -131,4 +137,4 @@ END_MODULE_NAMESPACE END_HADRONS_NAMESPACE -#endif // Hadrons_DWF_hpp_ +#endif // Hadrons_MAction_DWF_hpp_ diff --git a/extras/Hadrons/Modules/MAction/Wilson.hpp b/extras/Hadrons/Modules/MAction/Wilson.hpp index 6ffa997d..aab54245 100644 --- a/extras/Hadrons/Modules/MAction/Wilson.hpp +++ b/extras/Hadrons/Modules/MAction/Wilson.hpp @@ -27,8 +27,8 @@ See the full license in the file "LICENSE" in the top level distribution directo *************************************************************************************/ /* END LEGAL */ -#ifndef Hadrons_Wilson_hpp_ -#define Hadrons_Wilson_hpp_ +#ifndef Hadrons_MAction_Wilson_hpp_ +#define Hadrons_MAction_Wilson_hpp_ #include #include @@ -46,14 +46,15 @@ class WilsonPar: Serializable public: GRID_SERIALIZABLE_CLASS_MEMBERS(WilsonPar, std::string, gauge, - double , mass); + double , mass, + std::string, boundary); }; template class TWilson: public Module { public: - TYPE_ALIASES(FImpl,); + FGS_TYPE_ALIASES(FImpl,); public: // constructor TWilson(const std::string name); @@ -112,10 +113,15 @@ void TWilson::execute() { LOG(Message) << "Setting up TWilson fermion matrix with m= " << par().mass << " using gauge field '" << par().gauge << "'" << std::endl; + LOG(Message) << "Fermion boundary conditions: " << par().boundary + << std::endl; auto &U = *env().template getObject(par().gauge); auto &grid = *env().getGrid(); auto &gridRb = *env().getRbGrid(); - FMat *fMatPt = new WilsonFermion(U, grid, gridRb, par().mass); + std::vector boundary = strToVec(par().boundary); + typename WilsonFermion::ImplParams implParams(boundary); + FMat *fMatPt = new WilsonFermion(U, grid, gridRb, par().mass, + implParams); env().setObject(getName(), fMatPt); } diff --git a/extras/Hadrons/Modules/MContraction/Baryon.hpp b/extras/Hadrons/Modules/MContraction/Baryon.hpp index be7d919c..78bde5a2 100644 --- a/extras/Hadrons/Modules/MContraction/Baryon.hpp +++ b/extras/Hadrons/Modules/MContraction/Baryon.hpp @@ -27,8 +27,8 @@ See the full license in the file "LICENSE" in the top level distribution directo *************************************************************************************/ /* END LEGAL */ -#ifndef Hadrons_Baryon_hpp_ -#define Hadrons_Baryon_hpp_ +#ifndef Hadrons_MContraction_Baryon_hpp_ +#define Hadrons_MContraction_Baryon_hpp_ #include #include @@ -55,9 +55,9 @@ template class TBaryon: public Module { public: - TYPE_ALIASES(FImpl1, 1); - TYPE_ALIASES(FImpl2, 2); - TYPE_ALIASES(FImpl3, 3); + FERM_TYPE_ALIASES(FImpl1, 1); + FERM_TYPE_ALIASES(FImpl2, 2); + FERM_TYPE_ALIASES(FImpl3, 3); class Result: Serializable { public: @@ -121,11 +121,11 @@ void TBaryon::execute(void) // FIXME: do contractions - write(writer, "meson", result); + // write(writer, "meson", result); } END_MODULE_NAMESPACE END_HADRONS_NAMESPACE -#endif // Hadrons_Baryon_hpp_ +#endif // Hadrons_MContraction_Baryon_hpp_ diff --git a/extras/Hadrons/Modules/MContraction/DiscLoop.hpp b/extras/Hadrons/Modules/MContraction/DiscLoop.hpp index 4ad12e90..4f782cd3 100644 --- a/extras/Hadrons/Modules/MContraction/DiscLoop.hpp +++ b/extras/Hadrons/Modules/MContraction/DiscLoop.hpp @@ -26,8 +26,8 @@ See the full license in the file "LICENSE" in the top level distribution directo *************************************************************************************/ /* END LEGAL */ -#ifndef Hadrons_DiscLoop_hpp_ -#define Hadrons_DiscLoop_hpp_ +#ifndef Hadrons_MContraction_DiscLoop_hpp_ +#define Hadrons_MContraction_DiscLoop_hpp_ #include #include @@ -52,7 +52,7 @@ public: template class TDiscLoop: public Module { - TYPE_ALIASES(FImpl,); + FERM_TYPE_ALIASES(FImpl,); class Result: Serializable { public: @@ -141,4 +141,4 @@ END_MODULE_NAMESPACE END_HADRONS_NAMESPACE -#endif // Hadrons_DiscLoop_hpp_ +#endif // Hadrons_MContraction_DiscLoop_hpp_ diff --git a/extras/Hadrons/Modules/MContraction/Gamma3pt.hpp b/extras/Hadrons/Modules/MContraction/Gamma3pt.hpp index e5e73fa6..7f643d49 100644 --- a/extras/Hadrons/Modules/MContraction/Gamma3pt.hpp +++ b/extras/Hadrons/Modules/MContraction/Gamma3pt.hpp @@ -26,8 +26,8 @@ See the full license in the file "LICENSE" in the top level distribution directo *************************************************************************************/ /* END LEGAL */ -#ifndef Hadrons_Gamma3pt_hpp_ -#define Hadrons_Gamma3pt_hpp_ +#ifndef Hadrons_MContraction_Gamma3pt_hpp_ +#define Hadrons_MContraction_Gamma3pt_hpp_ #include #include @@ -72,9 +72,9 @@ public: template class TGamma3pt: public Module { - TYPE_ALIASES(FImpl1, 1); - TYPE_ALIASES(FImpl2, 2); - TYPE_ALIASES(FImpl3, 3); + FERM_TYPE_ALIASES(FImpl1, 1); + FERM_TYPE_ALIASES(FImpl2, 2); + FERM_TYPE_ALIASES(FImpl3, 3); class Result: Serializable { public: @@ -167,4 +167,4 @@ END_MODULE_NAMESPACE END_HADRONS_NAMESPACE -#endif // Hadrons_Gamma3pt_hpp_ +#endif // Hadrons_MContraction_Gamma3pt_hpp_ diff --git a/extras/Hadrons/Modules/MContraction/Meson.hpp b/extras/Hadrons/Modules/MContraction/Meson.hpp index 4cbe1ac4..7810326a 100644 --- a/extras/Hadrons/Modules/MContraction/Meson.hpp +++ b/extras/Hadrons/Modules/MContraction/Meson.hpp @@ -29,8 +29,8 @@ See the full license in the file "LICENSE" in the top level distribution directo *************************************************************************************/ /* END LEGAL */ -#ifndef Hadrons_Meson_hpp_ -#define Hadrons_Meson_hpp_ +#ifndef Hadrons_MContraction_Meson_hpp_ +#define Hadrons_MContraction_Meson_hpp_ #include #include @@ -69,7 +69,7 @@ public: std::string, q1, std::string, q2, std::string, gammas, - std::string, mom, + std::string, sink, std::string, output); }; @@ -77,8 +77,10 @@ template class TMeson: public Module { public: - TYPE_ALIASES(FImpl1, 1); - TYPE_ALIASES(FImpl2, 2); + FERM_TYPE_ALIASES(FImpl1, 1); + FERM_TYPE_ALIASES(FImpl2, 2); + FERM_TYPE_ALIASES(ScalarImplCR, Scalar); + SINK_TYPE_ALIASES(Scalar); class Result: Serializable { public: @@ -115,7 +117,7 @@ TMeson::TMeson(const std::string name) template std::vector TMeson::getInput(void) { - std::vector input = {par().q1, par().q2}; + std::vector input = {par().q1, par().q2, par().sink}; return input; } @@ -131,12 +133,11 @@ std::vector TMeson::getOutput(void) template void TMeson::parseGammaString(std::vector &gammaList) { + gammaList.clear(); // Determine gamma matrices to insert at source/sink. if (par().gammas.compare("all") == 0) { // Do all contractions. - unsigned int n_gam = Ns * Ns; - gammaList.resize(n_gam*n_gam); for (unsigned int i = 1; i < Gamma::nGamma; i += 2) { for (unsigned int j = 1; j < Gamma::nGamma; j += 2) @@ -155,6 +156,9 @@ void TMeson::parseGammaString(std::vector &gammaList) // execution /////////////////////////////////////////////////////////////////// +#define mesonConnected(q1, q2, gSnk, gSrc) \ +(g5*(gSnk))*(q1)*(adj(gSrc)*g5)*adj(q2) + template void TMeson::execute(void) { @@ -162,43 +166,72 @@ void TMeson::execute(void) << " quarks '" << par().q1 << "' and '" << par().q2 << "'" << std::endl; - CorrWriter writer(par().output); - PropagatorField1 &q1 = *env().template getObject(par().q1); - PropagatorField2 &q2 = *env().template getObject(par().q2); - LatticeComplex c(env().getGrid()); - Gamma g5(Gamma::Algebra::Gamma5); - std::vector gammaList; + CorrWriter writer(par().output); std::vector buf; std::vector result; - std::vector p; - - p = strToVec(par().mom); - LatticeComplex ph(env().getGrid()), coor(env().getGrid()); - Complex i(0.0,1.0); - ph = zero; - for(unsigned int mu = 0; mu < env().getNd(); mu++) - { - LatticeCoordinate(coor, mu); - ph = ph + p[mu]*coor*((1./(env().getGrid()->_fdimensions[mu]))); - } - ph = exp((Real)(2*M_PI)*i*ph); + Gamma g5(Gamma::Algebra::Gamma5); + std::vector gammaList; + int nt = env().getDim(Tp); parseGammaString(gammaList); - result.resize(gammaList.size()); for (unsigned int i = 0; i < result.size(); ++i) { - Gamma gSnk(gammaList[i].first); - Gamma gSrc(gammaList[i].second); - c = trace((g5*gSnk)*q1*(adj(gSrc)*g5)*adj(q2))*ph; - sliceSum(c, buf, Tp); - result[i].gamma_snk = gammaList[i].first; result[i].gamma_src = gammaList[i].second; - result[i].corr.resize(buf.size()); - for (unsigned int t = 0; t < buf.size(); ++t) + result[i].corr.resize(nt); + } + if (env().template isObjectOfType(par().q1) and + env().template isObjectOfType(par().q2)) + { + SlicedPropagator1 &q1 = *env().template getObject(par().q1); + SlicedPropagator2 &q2 = *env().template getObject(par().q2); + + LOG(Message) << "(propagator already sinked)" << std::endl; + for (unsigned int i = 0; i < result.size(); ++i) { - result[i].corr[t] = TensorRemove(buf[t]); + Gamma gSnk(gammaList[i].first); + Gamma gSrc(gammaList[i].second); + + for (unsigned int t = 0; t < buf.size(); ++t) + { + result[i].corr[t] = TensorRemove(trace(mesonConnected(q1[t], q2[t], gSnk, gSrc))); + } + } + } + else + { + PropagatorField1 &q1 = *env().template getObject(par().q1); + PropagatorField2 &q2 = *env().template getObject(par().q2); + LatticeComplex c(env().getGrid()); + + LOG(Message) << "(using sink '" << par().sink << "')" << std::endl; + for (unsigned int i = 0; i < result.size(); ++i) + { + Gamma gSnk(gammaList[i].first); + Gamma gSrc(gammaList[i].second); + std::string ns; + + ns = env().getModuleNamespace(env().getObjectModule(par().sink)); + if (ns == "MSource") + { + PropagatorField1 &sink = + *env().template getObject(par().sink); + + c = trace(mesonConnected(q1, q2, gSnk, gSrc)*sink); + sliceSum(c, buf, Tp); + } + else if (ns == "MSink") + { + SinkFnScalar &sink = *env().template getObject(par().sink); + + c = trace(mesonConnected(q1, q2, gSnk, gSrc)); + buf = sink(c); + } + for (unsigned int t = 0; t < buf.size(); ++t) + { + result[i].corr[t] = TensorRemove(buf[t]); + } } } write(writer, "meson", result); @@ -208,4 +241,4 @@ END_MODULE_NAMESPACE END_HADRONS_NAMESPACE -#endif // Hadrons_Meson_hpp_ +#endif // Hadrons_MContraction_Meson_hpp_ diff --git a/extras/Hadrons/Modules/MContraction/WeakHamiltonian.hpp b/extras/Hadrons/Modules/MContraction/WeakHamiltonian.hpp index 23482feb..0a3c2e31 100644 --- a/extras/Hadrons/Modules/MContraction/WeakHamiltonian.hpp +++ b/extras/Hadrons/Modules/MContraction/WeakHamiltonian.hpp @@ -26,8 +26,8 @@ See the full license in the file "LICENSE" in the top level distribution directo *************************************************************************************/ /* END LEGAL */ -#ifndef Hadrons_WeakHamiltonian_hpp_ -#define Hadrons_WeakHamiltonian_hpp_ +#ifndef Hadrons_MContraction_WeakHamiltonian_hpp_ +#define Hadrons_MContraction_WeakHamiltonian_hpp_ #include #include @@ -83,7 +83,7 @@ public: class T##modname: public Module\ {\ public:\ - TYPE_ALIASES(FIMPL,)\ + FERM_TYPE_ALIASES(FIMPL,)\ class Result: Serializable\ {\ public:\ @@ -111,4 +111,4 @@ END_MODULE_NAMESPACE END_HADRONS_NAMESPACE -#endif // Hadrons_WeakHamiltonian_hpp_ +#endif // Hadrons_MContraction_WeakHamiltonian_hpp_ diff --git a/extras/Hadrons/Modules/MContraction/WeakHamiltonianEye.hpp b/extras/Hadrons/Modules/MContraction/WeakHamiltonianEye.hpp index 2ee87895..3a2b9309 100644 --- a/extras/Hadrons/Modules/MContraction/WeakHamiltonianEye.hpp +++ b/extras/Hadrons/Modules/MContraction/WeakHamiltonianEye.hpp @@ -26,8 +26,8 @@ See the full license in the file "LICENSE" in the top level distribution directo *************************************************************************************/ /* END LEGAL */ -#ifndef Hadrons_WeakHamiltonianEye_hpp_ -#define Hadrons_WeakHamiltonianEye_hpp_ +#ifndef Hadrons_MContraction_WeakHamiltonianEye_hpp_ +#define Hadrons_MContraction_WeakHamiltonianEye_hpp_ #include @@ -55,4 +55,4 @@ END_MODULE_NAMESPACE END_HADRONS_NAMESPACE -#endif // Hadrons_WeakHamiltonianEye_hpp_ +#endif // Hadrons_MContraction_WeakHamiltonianEye_hpp_ diff --git a/extras/Hadrons/Modules/MContraction/WeakHamiltonianNonEye.hpp b/extras/Hadrons/Modules/MContraction/WeakHamiltonianNonEye.hpp index 69bb8005..eb5abe3c 100644 --- a/extras/Hadrons/Modules/MContraction/WeakHamiltonianNonEye.hpp +++ b/extras/Hadrons/Modules/MContraction/WeakHamiltonianNonEye.hpp @@ -26,8 +26,8 @@ See the full license in the file "LICENSE" in the top level distribution directo *************************************************************************************/ /* END LEGAL */ -#ifndef Hadrons_WeakHamiltonianNonEye_hpp_ -#define Hadrons_WeakHamiltonianNonEye_hpp_ +#ifndef Hadrons_MContraction_WeakHamiltonianNonEye_hpp_ +#define Hadrons_MContraction_WeakHamiltonianNonEye_hpp_ #include @@ -54,4 +54,4 @@ END_MODULE_NAMESPACE END_HADRONS_NAMESPACE -#endif // Hadrons_WeakHamiltonianNonEye_hpp_ +#endif // Hadrons_MContraction_WeakHamiltonianNonEye_hpp_ diff --git a/extras/Hadrons/Modules/MContraction/WeakNeutral4ptDisc.hpp b/extras/Hadrons/Modules/MContraction/WeakNeutral4ptDisc.hpp index c0d8f829..f26d4636 100644 --- a/extras/Hadrons/Modules/MContraction/WeakNeutral4ptDisc.hpp +++ b/extras/Hadrons/Modules/MContraction/WeakNeutral4ptDisc.hpp @@ -26,8 +26,8 @@ See the full license in the file "LICENSE" in the top level distribution directo *************************************************************************************/ /* END LEGAL */ -#ifndef Hadrons_WeakNeutral4ptDisc_hpp_ -#define Hadrons_WeakNeutral4ptDisc_hpp_ +#ifndef Hadrons_MContraction_WeakNeutral4ptDisc_hpp_ +#define Hadrons_MContraction_WeakNeutral4ptDisc_hpp_ #include @@ -56,4 +56,4 @@ END_MODULE_NAMESPACE END_HADRONS_NAMESPACE -#endif // Hadrons_WeakNeutral4ptDisc_hpp_ +#endif // Hadrons_MContraction_WeakNeutral4ptDisc_hpp_ diff --git a/extras/Hadrons/Modules/Quark.hpp b/extras/Hadrons/Modules/MFermion/GaugeProp.hpp similarity index 66% rename from extras/Hadrons/Modules/Quark.hpp rename to extras/Hadrons/Modules/MFermion/GaugeProp.hpp index be7426ab..b4f9edcc 100644 --- a/extras/Hadrons/Modules/Quark.hpp +++ b/extras/Hadrons/Modules/MFermion/GaugeProp.hpp @@ -1,34 +1,5 @@ -/************************************************************************************* - -Grid physics library, www.github.com/paboyle/Grid - -Source file: extras/Hadrons/Modules/Quark.hpp - -Copyright (C) 2015 -Copyright (C) 2016 - -Author: Antonin Portelli - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -See the full license in the file "LICENSE" in the top level distribution directory -*************************************************************************************/ -/* END LEGAL */ - -#ifndef Hadrons_Quark_hpp_ -#define Hadrons_Quark_hpp_ +#ifndef Hadrons_MFermion_GaugeProp_hpp_ +#define Hadrons_MFermion_GaugeProp_hpp_ #include #include @@ -37,27 +8,29 @@ See the full license in the file "LICENSE" in the top level distribution directo BEGIN_HADRONS_NAMESPACE /****************************************************************************** - * TQuark * + * GaugeProp * ******************************************************************************/ -class QuarkPar: Serializable +BEGIN_MODULE_NAMESPACE(MFermion) + +class GaugePropPar: Serializable { public: - GRID_SERIALIZABLE_CLASS_MEMBERS(QuarkPar, + GRID_SERIALIZABLE_CLASS_MEMBERS(GaugePropPar, std::string, source, std::string, solver); }; template -class TQuark: public Module +class TGaugeProp: public Module { public: - TYPE_ALIASES(FImpl,); + FGS_TYPE_ALIASES(FImpl,); public: // constructor - TQuark(const std::string name); + TGaugeProp(const std::string name); // destructor - virtual ~TQuark(void) = default; - // dependencies/products + virtual ~TGaugeProp(void) = default; + // dependency relation virtual std::vector getInput(void); virtual std::vector getOutput(void); // setup @@ -69,20 +42,20 @@ private: SolverFn *solver_{nullptr}; }; -MODULE_REGISTER(Quark, TQuark); +MODULE_REGISTER_NS(GaugeProp, TGaugeProp, MFermion); /****************************************************************************** - * TQuark implementation * + * TGaugeProp implementation * ******************************************************************************/ // constructor ///////////////////////////////////////////////////////////////// template -TQuark::TQuark(const std::string name) -: Module(name) +TGaugeProp::TGaugeProp(const std::string name) +: Module(name) {} // dependencies/products /////////////////////////////////////////////////////// template -std::vector TQuark::getInput(void) +std::vector TGaugeProp::getInput(void) { std::vector in = {par().source, par().solver}; @@ -90,7 +63,7 @@ std::vector TQuark::getInput(void) } template -std::vector TQuark::getOutput(void) +std::vector TGaugeProp::getOutput(void) { std::vector out = {getName(), getName() + "_5d"}; @@ -99,7 +72,7 @@ std::vector TQuark::getOutput(void) // setup /////////////////////////////////////////////////////////////////////// template -void TQuark::setup(void) +void TGaugeProp::setup(void) { Ls_ = env().getObjectLs(par().solver); env().template registerLattice(getName()); @@ -111,13 +84,13 @@ void TQuark::setup(void) // execution /////////////////////////////////////////////////////////////////// template -void TQuark::execute(void) +void TGaugeProp::execute(void) { LOG(Message) << "Computing quark propagator '" << getName() << "'" - << std::endl; + << std::endl; FermionField source(env().getGrid(Ls_)), sol(env().getGrid(Ls_)), - tmp(env().getGrid()); + tmp(env().getGrid()); std::string propName = (Ls_ == 1) ? getName() : (getName() + "_5d"); PropagatorField &prop = *env().template createLattice(propName); PropagatorField &fullSrc = *env().template getObject(par().source); @@ -128,7 +101,7 @@ void TQuark::execute(void) } LOG(Message) << "Inverting using solver '" << par().solver - << "' on source '" << par().source << "'" << std::endl; + << "' on source '" << par().source << "'" << std::endl; for (unsigned int s = 0; s < Ns; ++s) for (unsigned int c = 0; c < Nc; ++c) { @@ -170,7 +143,7 @@ void TQuark::execute(void) if (Ls_ > 1) { PropagatorField &p4d = - *env().template getObject(getName()); + *env().template getObject(getName()); axpby_ssp_pminus(sol, 0., sol, 1., sol, 0, 0); axpby_ssp_pplus(sol, 1., sol, 1., sol, 0, Ls_-1); @@ -180,6 +153,8 @@ void TQuark::execute(void) } } +END_MODULE_NAMESPACE + END_HADRONS_NAMESPACE -#endif // Hadrons_Quark_hpp_ +#endif // Hadrons_MFermion_GaugeProp_hpp_ diff --git a/extras/Hadrons/Modules/MGauge/Load.cc b/extras/Hadrons/Modules/MGauge/Load.cc index e5ee8abb..062e7e98 100644 --- a/extras/Hadrons/Modules/MGauge/Load.cc +++ b/extras/Hadrons/Modules/MGauge/Load.cc @@ -65,7 +65,7 @@ void TLoad::setup(void) // execution /////////////////////////////////////////////////////////////////// void TLoad::execute(void) { - NerscField header; + FieldMetaData header; std::string fileName = par().file + "." + std::to_string(env().getTrajectory()); @@ -74,5 +74,5 @@ void TLoad::execute(void) LatticeGaugeField &U = *env().createLattice(getName()); NerscIO::readConfiguration(U, header, fileName); LOG(Message) << "NERSC header:" << std::endl; - dump_nersc_header(header, LOG(Message)); + dump_meta_data(header, LOG(Message)); } diff --git a/extras/Hadrons/Modules/MGauge/Load.hpp b/extras/Hadrons/Modules/MGauge/Load.hpp index c41f9b8c..5ff6da0f 100644 --- a/extras/Hadrons/Modules/MGauge/Load.hpp +++ b/extras/Hadrons/Modules/MGauge/Load.hpp @@ -27,8 +27,8 @@ See the full license in the file "LICENSE" in the top level distribution directo *************************************************************************************/ /* END LEGAL */ -#ifndef Hadrons_Load_hpp_ -#define Hadrons_Load_hpp_ +#ifndef Hadrons_MGauge_Load_hpp_ +#define Hadrons_MGauge_Load_hpp_ #include #include @@ -70,4 +70,4 @@ END_MODULE_NAMESPACE END_HADRONS_NAMESPACE -#endif // Hadrons_Load_hpp_ +#endif // Hadrons_MGauge_Load_hpp_ diff --git a/extras/Hadrons/Modules/MGauge/Random.hpp b/extras/Hadrons/Modules/MGauge/Random.hpp index e3fbcf1a..a97d25cf 100644 --- a/extras/Hadrons/Modules/MGauge/Random.hpp +++ b/extras/Hadrons/Modules/MGauge/Random.hpp @@ -27,8 +27,8 @@ See the full license in the file "LICENSE" in the top level distribution directo *************************************************************************************/ /* END LEGAL */ -#ifndef Hadrons_Random_hpp_ -#define Hadrons_Random_hpp_ +#ifndef Hadrons_MGauge_Random_hpp_ +#define Hadrons_MGauge_Random_hpp_ #include #include @@ -63,4 +63,4 @@ END_MODULE_NAMESPACE END_HADRONS_NAMESPACE -#endif // Hadrons_Random_hpp_ +#endif // Hadrons_MGauge_Random_hpp_ diff --git a/extras/Hadrons/Modules/MGauge/StochEm.cc b/extras/Hadrons/Modules/MGauge/StochEm.cc new file mode 100644 index 00000000..c7a9fc4f --- /dev/null +++ b/extras/Hadrons/Modules/MGauge/StochEm.cc @@ -0,0 +1,88 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: extras/Hadrons/Modules/MGauge/StochEm.cc + +Copyright (C) 2015 +Copyright (C) 2016 + + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ +#include + +using namespace Grid; +using namespace Hadrons; +using namespace MGauge; + +/****************************************************************************** +* TStochEm implementation * +******************************************************************************/ +// constructor ///////////////////////////////////////////////////////////////// +TStochEm::TStochEm(const std::string name) +: Module(name) +{} + +// dependencies/products /////////////////////////////////////////////////////// +std::vector TStochEm::getInput(void) +{ + std::vector in; + + return in; +} + +std::vector TStochEm::getOutput(void) +{ + std::vector out = {getName()}; + + return out; +} + +// setup /////////////////////////////////////////////////////////////////////// +void TStochEm::setup(void) +{ + if (!env().hasRegisteredObject("_" + getName() + "_weight")) + { + env().registerLattice("_" + getName() + "_weight"); + } + env().registerLattice(getName()); +} + +// execution /////////////////////////////////////////////////////////////////// +void TStochEm::execute(void) +{ + PhotonR photon(par().gauge, par().zmScheme); + EmField &a = *env().createLattice(getName()); + EmComp *w; + + if (!env().hasCreatedObject("_" + getName() + "_weight")) + { + LOG(Message) << "Caching stochatic EM potential weight (gauge: " + << par().gauge << ", zero-mode scheme: " + << par().zmScheme << ")..." << std::endl; + w = env().createLattice("_" + getName() + "_weight"); + photon.StochasticWeight(*w); + } + else + { + w = env().getObject("_" + getName() + "_weight"); + } + LOG(Message) << "Generating stochatic EM potential..." << std::endl; + photon.StochasticField(a, *env().get4dRng(), *w); +} diff --git a/extras/Hadrons/Modules/MGauge/StochEm.hpp b/extras/Hadrons/Modules/MGauge/StochEm.hpp new file mode 100644 index 00000000..12ce9fdc --- /dev/null +++ b/extras/Hadrons/Modules/MGauge/StochEm.hpp @@ -0,0 +1,75 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: extras/Hadrons/Modules/MGauge/StochEm.hpp + +Copyright (C) 2015 +Copyright (C) 2016 + + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ +#ifndef Hadrons_MGauge_StochEm_hpp_ +#define Hadrons_MGauge_StochEm_hpp_ + +#include +#include +#include + +BEGIN_HADRONS_NAMESPACE + +/****************************************************************************** + * StochEm * + ******************************************************************************/ +BEGIN_MODULE_NAMESPACE(MGauge) + +class StochEmPar: Serializable +{ +public: + GRID_SERIALIZABLE_CLASS_MEMBERS(StochEmPar, + PhotonR::Gauge, gauge, + PhotonR::ZmScheme, zmScheme); +}; + +class TStochEm: public Module +{ +public: + typedef PhotonR::GaugeField EmField; + typedef PhotonR::GaugeLinkField EmComp; +public: + // constructor + TStochEm(const std::string name); + // destructor + virtual ~TStochEm(void) = default; + // dependency relation + virtual std::vector getInput(void); + virtual std::vector getOutput(void); + // setup + virtual void setup(void); + // execution + virtual void execute(void); +}; + +MODULE_REGISTER_NS(StochEm, TStochEm, MGauge); + +END_MODULE_NAMESPACE + +END_HADRONS_NAMESPACE + +#endif // Hadrons_MGauge_StochEm_hpp_ diff --git a/extras/Hadrons/Modules/MGauge/Unit.hpp b/extras/Hadrons/Modules/MGauge/Unit.hpp index 2ff10bfd..7cd15ef7 100644 --- a/extras/Hadrons/Modules/MGauge/Unit.hpp +++ b/extras/Hadrons/Modules/MGauge/Unit.hpp @@ -27,8 +27,8 @@ See the full license in the file "LICENSE" in the top level distribution directo *************************************************************************************/ /* END LEGAL */ -#ifndef Hadrons_Unit_hpp_ -#define Hadrons_Unit_hpp_ +#ifndef Hadrons_MGauge_Unit_hpp_ +#define Hadrons_MGauge_Unit_hpp_ #include #include @@ -63,4 +63,4 @@ END_MODULE_NAMESPACE END_HADRONS_NAMESPACE -#endif // Hadrons_Unit_hpp_ +#endif // Hadrons_MGauge_Unit_hpp_ diff --git a/extras/Hadrons/Modules/MLoop/NoiseLoop.hpp b/extras/Hadrons/Modules/MLoop/NoiseLoop.hpp index 3d2850d1..5d2c4a13 100644 --- a/extras/Hadrons/Modules/MLoop/NoiseLoop.hpp +++ b/extras/Hadrons/Modules/MLoop/NoiseLoop.hpp @@ -26,8 +26,8 @@ See the full license in the file "LICENSE" in the top level distribution directo *************************************************************************************/ /* END LEGAL */ -#ifndef Hadrons_NoiseLoop_hpp_ -#define Hadrons_NoiseLoop_hpp_ +#ifndef Hadrons_MLoop_NoiseLoop_hpp_ +#define Hadrons_MLoop_NoiseLoop_hpp_ #include #include @@ -65,7 +65,7 @@ template class TNoiseLoop: public Module { public: - TYPE_ALIASES(FImpl,); + FERM_TYPE_ALIASES(FImpl,); public: // constructor TNoiseLoop(const std::string name); @@ -129,4 +129,4 @@ END_MODULE_NAMESPACE END_HADRONS_NAMESPACE -#endif // Hadrons_NoiseLoop_hpp_ +#endif // Hadrons_MLoop_NoiseLoop_hpp_ diff --git a/extras/Hadrons/Modules/MScalar/ChargedProp.cc b/extras/Hadrons/Modules/MScalar/ChargedProp.cc new file mode 100644 index 00000000..cd8dc244 --- /dev/null +++ b/extras/Hadrons/Modules/MScalar/ChargedProp.cc @@ -0,0 +1,226 @@ +#include +#include + +using namespace Grid; +using namespace Hadrons; +using namespace MScalar; + +/****************************************************************************** +* TChargedProp implementation * +******************************************************************************/ +// constructor ///////////////////////////////////////////////////////////////// +TChargedProp::TChargedProp(const std::string name) +: Module(name) +{} + +// dependencies/products /////////////////////////////////////////////////////// +std::vector TChargedProp::getInput(void) +{ + std::vector in = {par().source, par().emField}; + + return in; +} + +std::vector TChargedProp::getOutput(void) +{ + std::vector out = {getName()}; + + return out; +} + +// setup /////////////////////////////////////////////////////////////////////// +void TChargedProp::setup(void) +{ + freeMomPropName_ = FREEMOMPROP(par().mass); + phaseName_.clear(); + for (unsigned int mu = 0; mu < env().getNd(); ++mu) + { + phaseName_.push_back("_shiftphase_" + std::to_string(mu)); + } + GFSrcName_ = "_" + getName() + "_DinvSrc"; + if (!env().hasRegisteredObject(freeMomPropName_)) + { + env().registerLattice(freeMomPropName_); + } + if (!env().hasRegisteredObject(phaseName_[0])) + { + for (unsigned int mu = 0; mu < env().getNd(); ++mu) + { + env().registerLattice(phaseName_[mu]); + } + } + if (!env().hasRegisteredObject(GFSrcName_)) + { + env().registerLattice(GFSrcName_); + } + env().registerLattice(getName()); +} + +// execution /////////////////////////////////////////////////////////////////// +void TChargedProp::execute(void) +{ + // CACHING ANALYTIC EXPRESSIONS + ScalarField &source = *env().getObject(par().source); + Complex ci(0.0,1.0); + FFT fft(env().getGrid()); + + // cache free scalar propagator + if (!env().hasCreatedObject(freeMomPropName_)) + { + LOG(Message) << "Caching momentum space free scalar propagator" + << " (mass= " << par().mass << ")..." << std::endl; + freeMomProp_ = env().createLattice(freeMomPropName_); + SIMPL::MomentumSpacePropagator(*freeMomProp_, par().mass); + } + else + { + freeMomProp_ = env().getObject(freeMomPropName_); + } + // cache G*F*src + if (!env().hasCreatedObject(GFSrcName_)) + + { + GFSrc_ = env().createLattice(GFSrcName_); + fft.FFT_all_dim(*GFSrc_, source, FFT::forward); + *GFSrc_ = (*freeMomProp_)*(*GFSrc_); + } + else + { + GFSrc_ = env().getObject(GFSrcName_); + } + // cache phases + if (!env().hasCreatedObject(phaseName_[0])) + { + std::vector &l = env().getGrid()->_fdimensions; + + LOG(Message) << "Caching shift phases..." << std::endl; + for (unsigned int mu = 0; mu < env().getNd(); ++mu) + { + Real twoPiL = M_PI*2./l[mu]; + + phase_.push_back(env().createLattice(phaseName_[mu])); + LatticeCoordinate(*(phase_[mu]), mu); + *(phase_[mu]) = exp(ci*twoPiL*(*(phase_[mu]))); + } + } + else + { + for (unsigned int mu = 0; mu < env().getNd(); ++mu) + { + phase_.push_back(env().getObject(phaseName_[mu])); + } + } + + // PROPAGATOR CALCULATION + LOG(Message) << "Computing charged scalar propagator" + << " (mass= " << par().mass + << ", charge= " << par().charge << ")..." << std::endl; + + ScalarField &prop = *env().createLattice(getName()); + ScalarField buf(env().getGrid()); + ScalarField &GFSrc = *GFSrc_, &G = *freeMomProp_; + double q = par().charge; + + // G*F*Src + prop = GFSrc; + + // - q*G*momD1*G*F*Src (momD1 = F*D1*Finv) + buf = GFSrc; + momD1(buf, fft); + buf = G*buf; + prop = prop - q*buf; + + // + q^2*G*momD1*G*momD1*G*F*Src (here buf = G*momD1*G*F*Src) + momD1(buf, fft); + prop = prop + q*q*G*buf; + + // - q^2*G*momD2*G*F*Src (momD2 = F*D2*Finv) + buf = GFSrc; + momD2(buf, fft); + prop = prop - q*q*G*buf; + + // final FT + fft.FFT_all_dim(prop, prop, FFT::backward); + + // OUTPUT IF NECESSARY + if (!par().output.empty()) + { + std::string filename = par().output + "." + + std::to_string(env().getTrajectory()); + + LOG(Message) << "Saving zero-momentum projection to '" + << filename << "'..." << std::endl; + + CorrWriter writer(filename); + std::vector vecBuf; + std::vector result; + + sliceSum(prop, vecBuf, Tp); + result.resize(vecBuf.size()); + for (unsigned int t = 0; t < vecBuf.size(); ++t) + { + result[t] = TensorRemove(vecBuf[t]); + } + write(writer, "charge", q); + write(writer, "prop", result); + } +} + +void TChargedProp::momD1(ScalarField &s, FFT &fft) +{ + EmField &A = *env().getObject(par().emField); + ScalarField buf(env().getGrid()), result(env().getGrid()), + Amu(env().getGrid()); + Complex ci(0.0,1.0); + + result = zero; + + for (unsigned int mu = 0; mu < env().getNd(); ++mu) + { + Amu = peekLorentz(A, mu); + buf = (*phase_[mu])*s; + fft.FFT_all_dim(buf, buf, FFT::backward); + buf = Amu*buf; + fft.FFT_all_dim(buf, buf, FFT::forward); + result = result - ci*buf; + } + fft.FFT_all_dim(s, s, FFT::backward); + for (unsigned int mu = 0; mu < env().getNd(); ++mu) + { + Amu = peekLorentz(A, mu); + buf = Amu*s; + fft.FFT_all_dim(buf, buf, FFT::forward); + result = result + ci*adj(*phase_[mu])*buf; + } + + s = result; +} + +void TChargedProp::momD2(ScalarField &s, FFT &fft) +{ + EmField &A = *env().getObject(par().emField); + ScalarField buf(env().getGrid()), result(env().getGrid()), + Amu(env().getGrid()); + + result = zero; + + for (unsigned int mu = 0; mu < env().getNd(); ++mu) + { + Amu = peekLorentz(A, mu); + buf = (*phase_[mu])*s; + fft.FFT_all_dim(buf, buf, FFT::backward); + buf = Amu*Amu*buf; + fft.FFT_all_dim(buf, buf, FFT::forward); + result = result + .5*buf; + } + fft.FFT_all_dim(s, s, FFT::backward); + for (unsigned int mu = 0; mu < env().getNd(); ++mu) + { + Amu = peekLorentz(A, mu); + buf = Amu*Amu*s; + fft.FFT_all_dim(buf, buf, FFT::forward); + result = result + .5*adj(*phase_[mu])*buf; + } + + s = result; +} diff --git a/extras/Hadrons/Modules/MScalar/ChargedProp.hpp b/extras/Hadrons/Modules/MScalar/ChargedProp.hpp new file mode 100644 index 00000000..fbe75c05 --- /dev/null +++ b/extras/Hadrons/Modules/MScalar/ChargedProp.hpp @@ -0,0 +1,61 @@ +#ifndef Hadrons_MScalar_ChargedProp_hpp_ +#define Hadrons_MScalar_ChargedProp_hpp_ + +#include +#include +#include + +BEGIN_HADRONS_NAMESPACE + +/****************************************************************************** + * Charged scalar propagator * + ******************************************************************************/ +BEGIN_MODULE_NAMESPACE(MScalar) + +class ChargedPropPar: Serializable +{ +public: + GRID_SERIALIZABLE_CLASS_MEMBERS(ChargedPropPar, + std::string, emField, + std::string, source, + double, mass, + double, charge, + std::string, output); +}; + +class TChargedProp: public Module +{ +public: + SCALAR_TYPE_ALIASES(SIMPL,); + typedef PhotonR::GaugeField EmField; + typedef PhotonR::GaugeLinkField EmComp; +public: + // constructor + TChargedProp(const std::string name); + // destructor + virtual ~TChargedProp(void) = default; + // dependency relation + virtual std::vector getInput(void); + virtual std::vector getOutput(void); + // setup + virtual void setup(void); + // execution + virtual void execute(void); +private: + void momD1(ScalarField &s, FFT &fft); + void momD2(ScalarField &s, FFT &fft); +private: + std::string freeMomPropName_, GFSrcName_; + std::vector phaseName_; + ScalarField *freeMomProp_, *GFSrc_; + std::vector phase_; + EmField *A; +}; + +MODULE_REGISTER_NS(ChargedProp, TChargedProp, MScalar); + +END_MODULE_NAMESPACE + +END_HADRONS_NAMESPACE + +#endif // Hadrons_MScalar_ChargedProp_hpp_ diff --git a/extras/Hadrons/Modules/MScalar/FreeProp.cc b/extras/Hadrons/Modules/MScalar/FreeProp.cc new file mode 100644 index 00000000..674867e3 --- /dev/null +++ b/extras/Hadrons/Modules/MScalar/FreeProp.cc @@ -0,0 +1,79 @@ +#include +#include + +using namespace Grid; +using namespace Hadrons; +using namespace MScalar; + +/****************************************************************************** +* TFreeProp implementation * +******************************************************************************/ +// constructor ///////////////////////////////////////////////////////////////// +TFreeProp::TFreeProp(const std::string name) +: Module(name) +{} + +// dependencies/products /////////////////////////////////////////////////////// +std::vector TFreeProp::getInput(void) +{ + std::vector in = {par().source}; + + return in; +} + +std::vector TFreeProp::getOutput(void) +{ + std::vector out = {getName()}; + + return out; +} + +// setup /////////////////////////////////////////////////////////////////////// +void TFreeProp::setup(void) +{ + freeMomPropName_ = FREEMOMPROP(par().mass); + + if (!env().hasRegisteredObject(freeMomPropName_)) + { + env().registerLattice(freeMomPropName_); + } + env().registerLattice(getName()); +} + +// execution /////////////////////////////////////////////////////////////////// +void TFreeProp::execute(void) +{ + ScalarField &prop = *env().createLattice(getName()); + ScalarField &source = *env().getObject(par().source); + ScalarField *freeMomProp; + + if (!env().hasCreatedObject(freeMomPropName_)) + { + LOG(Message) << "Caching momentum space free scalar propagator" + << " (mass= " << par().mass << ")..." << std::endl; + freeMomProp = env().createLattice(freeMomPropName_); + SIMPL::MomentumSpacePropagator(*freeMomProp, par().mass); + } + else + { + freeMomProp = env().getObject(freeMomPropName_); + } + LOG(Message) << "Computing free scalar propagator..." << std::endl; + SIMPL::FreePropagator(source, prop, *freeMomProp); + + if (!par().output.empty()) + { + TextWriter writer(par().output + "." + + std::to_string(env().getTrajectory())); + std::vector buf; + std::vector result; + + sliceSum(prop, buf, Tp); + result.resize(buf.size()); + for (unsigned int t = 0; t < buf.size(); ++t) + { + result[t] = TensorRemove(buf[t]); + } + write(writer, "prop", result); + } +} diff --git a/extras/Hadrons/Modules/MScalar/FreeProp.hpp b/extras/Hadrons/Modules/MScalar/FreeProp.hpp new file mode 100644 index 00000000..97cf288a --- /dev/null +++ b/extras/Hadrons/Modules/MScalar/FreeProp.hpp @@ -0,0 +1,50 @@ +#ifndef Hadrons_MScalar_FreeProp_hpp_ +#define Hadrons_MScalar_FreeProp_hpp_ + +#include +#include +#include + +BEGIN_HADRONS_NAMESPACE + +/****************************************************************************** + * FreeProp * + ******************************************************************************/ +BEGIN_MODULE_NAMESPACE(MScalar) + +class FreePropPar: Serializable +{ +public: + GRID_SERIALIZABLE_CLASS_MEMBERS(FreePropPar, + std::string, source, + double, mass, + std::string, output); +}; + +class TFreeProp: public Module +{ +public: + SCALAR_TYPE_ALIASES(SIMPL,); +public: + // constructor + TFreeProp(const std::string name); + // destructor + virtual ~TFreeProp(void) = default; + // dependency relation + virtual std::vector getInput(void); + virtual std::vector getOutput(void); + // setup + virtual void setup(void); + // execution + virtual void execute(void); +private: + std::string freeMomPropName_; +}; + +MODULE_REGISTER_NS(FreeProp, TFreeProp, MScalar); + +END_MODULE_NAMESPACE + +END_HADRONS_NAMESPACE + +#endif // Hadrons_MScalar_FreeProp_hpp_ diff --git a/extras/Hadrons/Modules/MScalar/Scalar.hpp b/extras/Hadrons/Modules/MScalar/Scalar.hpp new file mode 100644 index 00000000..db702ff2 --- /dev/null +++ b/extras/Hadrons/Modules/MScalar/Scalar.hpp @@ -0,0 +1,6 @@ +#ifndef Hadrons_Scalar_hpp_ +#define Hadrons_Scalar_hpp_ + +#define FREEMOMPROP(m) "_scalar_mom_prop_" + std::to_string(m) + +#endif // Hadrons_Scalar_hpp_ diff --git a/extras/Hadrons/Modules/MSink/Point.hpp b/extras/Hadrons/Modules/MSink/Point.hpp new file mode 100644 index 00000000..7b3aa9de --- /dev/null +++ b/extras/Hadrons/Modules/MSink/Point.hpp @@ -0,0 +1,114 @@ +#ifndef Hadrons_MSink_Point_hpp_ +#define Hadrons_MSink_Point_hpp_ + +#include +#include +#include + +BEGIN_HADRONS_NAMESPACE + +/****************************************************************************** + * Point * + ******************************************************************************/ +BEGIN_MODULE_NAMESPACE(MSink) + +class PointPar: Serializable +{ +public: + GRID_SERIALIZABLE_CLASS_MEMBERS(PointPar, + std::string, mom); +}; + +template +class TPoint: public Module +{ +public: + FERM_TYPE_ALIASES(FImpl,); + SINK_TYPE_ALIASES(); +public: + // constructor + TPoint(const std::string name); + // destructor + virtual ~TPoint(void) = default; + // dependency relation + virtual std::vector getInput(void); + virtual std::vector getOutput(void); + // setup + virtual void setup(void); + // execution + virtual void execute(void); +}; + +MODULE_REGISTER_NS(Point, TPoint, MSink); +MODULE_REGISTER_NS(ScalarPoint, TPoint, MSink); + +/****************************************************************************** + * TPoint implementation * + ******************************************************************************/ +// constructor ///////////////////////////////////////////////////////////////// +template +TPoint::TPoint(const std::string name) +: Module(name) +{} + +// dependencies/products /////////////////////////////////////////////////////// +template +std::vector TPoint::getInput(void) +{ + std::vector in; + + return in; +} + +template +std::vector TPoint::getOutput(void) +{ + std::vector out = {getName()}; + + return out; +} + +// setup /////////////////////////////////////////////////////////////////////// +template +void TPoint::setup(void) +{ + unsigned int size; + + size = env().template lattice4dSize(); + env().registerObject(getName(), size); +} + +// execution /////////////////////////////////////////////////////////////////// +template +void TPoint::execute(void) +{ + std::vector p = strToVec(par().mom); + LatticeComplex ph(env().getGrid()), coor(env().getGrid()); + Complex i(0.0,1.0); + + LOG(Message) << "Setting up point sink function for momentum [" + << par().mom << "]" << std::endl; + ph = zero; + for(unsigned int mu = 0; mu < env().getNd(); mu++) + { + LatticeCoordinate(coor, mu); + ph = ph + (p[mu]/env().getGrid()->_fdimensions[mu])*coor; + } + ph = exp((Real)(2*M_PI)*i*ph); + auto sink = [ph](const PropagatorField &field) + { + SlicedPropagator res; + PropagatorField tmp = ph*field; + + sliceSum(tmp, res, Tp); + + return res; + }; + env().setObject(getName(), new SinkFn(sink)); +} + +END_MODULE_NAMESPACE + +END_HADRONS_NAMESPACE + +#endif // Hadrons_MSink_Point_hpp_ diff --git a/extras/Hadrons/Modules/MSolver/RBPrecCG.hpp b/extras/Hadrons/Modules/MSolver/RBPrecCG.hpp index d7220271..b1f63a5d 100644 --- a/extras/Hadrons/Modules/MSolver/RBPrecCG.hpp +++ b/extras/Hadrons/Modules/MSolver/RBPrecCG.hpp @@ -27,8 +27,8 @@ See the full license in the file "LICENSE" in the top level distribution directo *************************************************************************************/ /* END LEGAL */ -#ifndef Hadrons_RBPrecCG_hpp_ -#define Hadrons_RBPrecCG_hpp_ +#ifndef Hadrons_MSolver_RBPrecCG_hpp_ +#define Hadrons_MSolver_RBPrecCG_hpp_ #include #include @@ -53,7 +53,7 @@ template class TRBPrecCG: public Module { public: - TYPE_ALIASES(FImpl,); + FGS_TYPE_ALIASES(FImpl,); public: // constructor TRBPrecCG(const std::string name); @@ -129,4 +129,4 @@ END_MODULE_NAMESPACE END_HADRONS_NAMESPACE -#endif // Hadrons_RBPrecCG_hpp_ +#endif // Hadrons_MSolver_RBPrecCG_hpp_ diff --git a/extras/Hadrons/Modules/MSource/Point.hpp b/extras/Hadrons/Modules/MSource/Point.hpp index a0ecbc2a..0c415807 100644 --- a/extras/Hadrons/Modules/MSource/Point.hpp +++ b/extras/Hadrons/Modules/MSource/Point.hpp @@ -27,8 +27,8 @@ See the full license in the file "LICENSE" in the top level distribution directo *************************************************************************************/ /* END LEGAL */ -#ifndef Hadrons_Point_hpp_ -#define Hadrons_Point_hpp_ +#ifndef Hadrons_MSource_Point_hpp_ +#define Hadrons_MSource_Point_hpp_ #include #include @@ -63,7 +63,7 @@ template class TPoint: public Module { public: - TYPE_ALIASES(FImpl,); + FERM_TYPE_ALIASES(FImpl,); public: // constructor TPoint(const std::string name); @@ -78,7 +78,8 @@ public: virtual void execute(void); }; -MODULE_REGISTER_NS(Point, TPoint, MSource); +MODULE_REGISTER_NS(Point, TPoint, MSource); +MODULE_REGISTER_NS(ScalarPoint, TPoint, MSource); /****************************************************************************** * TPoint template implementation * @@ -132,4 +133,4 @@ END_MODULE_NAMESPACE END_HADRONS_NAMESPACE -#endif // Hadrons_Point_hpp_ +#endif // Hadrons_MSource_Point_hpp_ diff --git a/extras/Hadrons/Modules/MSource/SeqGamma.hpp b/extras/Hadrons/Modules/MSource/SeqGamma.hpp index 366ebee7..e2129a46 100644 --- a/extras/Hadrons/Modules/MSource/SeqGamma.hpp +++ b/extras/Hadrons/Modules/MSource/SeqGamma.hpp @@ -28,8 +28,8 @@ See the full license in the file "LICENSE" in the top level distribution directo *************************************************************************************/ /* END LEGAL */ -#ifndef Hadrons_SeqGamma_hpp_ -#define Hadrons_SeqGamma_hpp_ +#ifndef Hadrons_MSource_SeqGamma_hpp_ +#define Hadrons_MSource_SeqGamma_hpp_ #include #include @@ -72,7 +72,7 @@ template class TSeqGamma: public Module { public: - TYPE_ALIASES(FImpl,); + FGS_TYPE_ALIASES(FImpl,); public: // constructor TSeqGamma(const std::string name); @@ -161,4 +161,4 @@ END_MODULE_NAMESPACE END_HADRONS_NAMESPACE -#endif // Hadrons_SeqGamma_hpp_ +#endif // Hadrons_MSource_SeqGamma_hpp_ diff --git a/extras/Hadrons/Modules/MSource/Wall.hpp b/extras/Hadrons/Modules/MSource/Wall.hpp index 8722876f..4de37e4d 100644 --- a/extras/Hadrons/Modules/MSource/Wall.hpp +++ b/extras/Hadrons/Modules/MSource/Wall.hpp @@ -26,8 +26,8 @@ See the full license in the file "LICENSE" in the top level distribution directo *************************************************************************************/ /* END LEGAL */ -#ifndef Hadrons_WallSource_hpp_ -#define Hadrons_WallSource_hpp_ +#ifndef Hadrons_MSource_WallSource_hpp_ +#define Hadrons_MSource_WallSource_hpp_ #include #include @@ -64,7 +64,7 @@ template class TWall: public Module { public: - TYPE_ALIASES(FImpl,); + FERM_TYPE_ALIASES(FImpl,); public: // constructor TWall(const std::string name); @@ -144,4 +144,4 @@ END_MODULE_NAMESPACE END_HADRONS_NAMESPACE -#endif // Hadrons_WallSource_hpp_ +#endif // Hadrons_MSource_WallSource_hpp_ diff --git a/extras/Hadrons/Modules/MSource/Z2.hpp b/extras/Hadrons/Modules/MSource/Z2.hpp index cd5727be..a7f7a3e6 100644 --- a/extras/Hadrons/Modules/MSource/Z2.hpp +++ b/extras/Hadrons/Modules/MSource/Z2.hpp @@ -27,8 +27,8 @@ See the full license in the file "LICENSE" in the top level distribution directo *************************************************************************************/ /* END LEGAL */ -#ifndef Hadrons_Z2_hpp_ -#define Hadrons_Z2_hpp_ +#ifndef Hadrons_MSource_Z2_hpp_ +#define Hadrons_MSource_Z2_hpp_ #include #include @@ -67,7 +67,7 @@ template class TZ2: public Module { public: - TYPE_ALIASES(FImpl,); + FERM_TYPE_ALIASES(FImpl,); public: // constructor TZ2(const std::string name); @@ -82,7 +82,8 @@ public: virtual void execute(void); }; -MODULE_REGISTER_NS(Z2, TZ2, MSource); +MODULE_REGISTER_NS(Z2, TZ2, MSource); +MODULE_REGISTER_NS(ScalarZ2, TZ2, MSource); /****************************************************************************** * TZ2 template implementation * @@ -148,4 +149,4 @@ END_MODULE_NAMESPACE END_HADRONS_NAMESPACE -#endif // Hadrons_Z2_hpp_ +#endif // Hadrons_MSource_Z2_hpp_ diff --git a/extras/Hadrons/Modules/templates/Module_in_NS.hpp.template b/extras/Hadrons/Modules/templates/Module_in_NS.hpp.template index ece2bb58..ea77b12a 100644 --- a/extras/Hadrons/Modules/templates/Module_in_NS.hpp.template +++ b/extras/Hadrons/Modules/templates/Module_in_NS.hpp.template @@ -1,5 +1,5 @@ -#ifndef Hadrons____FILEBASENAME____hpp_ -#define Hadrons____FILEBASENAME____hpp_ +#ifndef Hadrons____NAMESPACE_______FILEBASENAME____hpp_ +#define Hadrons____NAMESPACE_______FILEBASENAME____hpp_ #include #include @@ -41,4 +41,4 @@ END_MODULE_NAMESPACE END_HADRONS_NAMESPACE -#endif // Hadrons____FILEBASENAME____hpp_ +#endif // Hadrons____NAMESPACE_______FILEBASENAME____hpp_ diff --git a/extras/Hadrons/Modules/templates/Module_tmp_in_NS.hpp.template b/extras/Hadrons/Modules/templates/Module_tmp_in_NS.hpp.template index a330652d..b79c0ad3 100644 --- a/extras/Hadrons/Modules/templates/Module_tmp_in_NS.hpp.template +++ b/extras/Hadrons/Modules/templates/Module_tmp_in_NS.hpp.template @@ -1,5 +1,5 @@ -#ifndef Hadrons____FILEBASENAME____hpp_ -#define Hadrons____FILEBASENAME____hpp_ +#ifndef Hadrons____NAMESPACE_______FILEBASENAME____hpp_ +#define Hadrons____NAMESPACE_______FILEBASENAME____hpp_ #include #include @@ -82,4 +82,4 @@ END_MODULE_NAMESPACE END_HADRONS_NAMESPACE -#endif // Hadrons____FILEBASENAME____hpp_ +#endif // Hadrons____NAMESPACE_______FILEBASENAME____hpp_ diff --git a/extras/Hadrons/modules.inc b/extras/Hadrons/modules.inc index af291631..669b08ba 100644 --- a/extras/Hadrons/modules.inc +++ b/extras/Hadrons/modules.inc @@ -4,7 +4,10 @@ modules_cc =\ Modules/MContraction/WeakNeutral4ptDisc.cc \ Modules/MGauge/Load.cc \ Modules/MGauge/Random.cc \ - Modules/MGauge/Unit.cc + Modules/MGauge/StochEm.cc \ + Modules/MGauge/Unit.cc \ + Modules/MScalar/ChargedProp.cc \ + Modules/MScalar/FreeProp.cc modules_hpp =\ Modules/MAction/DWF.hpp \ @@ -17,14 +20,19 @@ modules_hpp =\ Modules/MContraction/WeakHamiltonianEye.hpp \ Modules/MContraction/WeakHamiltonianNonEye.hpp \ Modules/MContraction/WeakNeutral4ptDisc.hpp \ + Modules/MFermion/GaugeProp.hpp \ Modules/MGauge/Load.hpp \ Modules/MGauge/Random.hpp \ + Modules/MGauge/StochEm.hpp \ Modules/MGauge/Unit.hpp \ Modules/MLoop/NoiseLoop.hpp \ + Modules/MScalar/ChargedProp.hpp \ + Modules/MScalar/FreeProp.hpp \ + Modules/MScalar/Scalar.hpp \ + Modules/MSink/Point.hpp \ Modules/MSolver/RBPrecCG.hpp \ Modules/MSource/Point.hpp \ Modules/MSource/SeqGamma.hpp \ Modules/MSource/Wall.hpp \ - Modules/MSource/Z2.hpp \ - Modules/Quark.hpp + Modules/MSource/Z2.hpp diff --git a/extras/qed-fvol/Global.cc b/extras/qed-fvol/Global.cc new file mode 100644 index 00000000..57ed97cc --- /dev/null +++ b/extras/qed-fvol/Global.cc @@ -0,0 +1,11 @@ +#include + +using namespace Grid; +using namespace QCD; +using namespace QedFVol; + +QedFVolLogger QedFVol::QedFVolLogError(1,"Error"); +QedFVolLogger QedFVol::QedFVolLogWarning(1,"Warning"); +QedFVolLogger QedFVol::QedFVolLogMessage(1,"Message"); +QedFVolLogger QedFVol::QedFVolLogIterative(1,"Iterative"); +QedFVolLogger QedFVol::QedFVolLogDebug(1,"Debug"); diff --git a/extras/qed-fvol/Global.hpp b/extras/qed-fvol/Global.hpp new file mode 100644 index 00000000..7f07200d --- /dev/null +++ b/extras/qed-fvol/Global.hpp @@ -0,0 +1,42 @@ +#ifndef QedFVol_Global_hpp_ +#define QedFVol_Global_hpp_ + +#include + +#define BEGIN_QEDFVOL_NAMESPACE \ +namespace Grid {\ +using namespace QCD;\ +namespace QedFVol {\ +using Grid::operator<<; +#define END_QEDFVOL_NAMESPACE }} + +/* the 'using Grid::operator<<;' statement prevents a very nasty compilation + * error with GCC (clang compiles fine without it). + */ + +BEGIN_QEDFVOL_NAMESPACE + +class QedFVolLogger: public Logger +{ +public: + QedFVolLogger(int on, std::string nm): Logger("QedFVol", on, nm, + GridLogColours, "BLACK"){}; +}; + +#define LOG(channel) std::cout << QedFVolLog##channel +#define QEDFVOL_ERROR(msg)\ +LOG(Error) << msg << " (" << __FUNCTION__ << " at " << __FILE__ << ":"\ + << __LINE__ << ")" << std::endl;\ +abort(); + +#define DEBUG_VAR(var) LOG(Debug) << #var << "= " << (var) << std::endl; + +extern QedFVolLogger QedFVolLogError; +extern QedFVolLogger QedFVolLogWarning; +extern QedFVolLogger QedFVolLogMessage; +extern QedFVolLogger QedFVolLogIterative; +extern QedFVolLogger QedFVolLogDebug; + +END_QEDFVOL_NAMESPACE + +#endif // QedFVol_Global_hpp_ diff --git a/extras/qed-fvol/Makefile.am b/extras/qed-fvol/Makefile.am new file mode 100644 index 00000000..0a9030c7 --- /dev/null +++ b/extras/qed-fvol/Makefile.am @@ -0,0 +1,9 @@ +AM_CXXFLAGS += -I$(top_srcdir)/extras + +bin_PROGRAMS = qed-fvol + +qed_fvol_SOURCES = \ + qed-fvol.cc \ + Global.cc + +qed_fvol_LDADD = -lGrid diff --git a/extras/qed-fvol/WilsonLoops.h b/extras/qed-fvol/WilsonLoops.h new file mode 100644 index 00000000..98db6b7a --- /dev/null +++ b/extras/qed-fvol/WilsonLoops.h @@ -0,0 +1,265 @@ +#ifndef QEDFVOL_WILSONLOOPS_H +#define QEDFVOL_WILSONLOOPS_H + +#include + +BEGIN_QEDFVOL_NAMESPACE + +template class NewWilsonLoops : public Gimpl { +public: + INHERIT_GIMPL_TYPES(Gimpl); + + typedef typename Gimpl::GaugeLinkField GaugeMat; + typedef typename Gimpl::GaugeField GaugeLorentz; + + ////////////////////////////////////////////////// + // directed plaquette oriented in mu,nu plane + ////////////////////////////////////////////////// + static void dirPlaquette(GaugeMat &plaq, const std::vector &U, + const int mu, const int nu) { + // Annoyingly, must use either scope resolution to find dependent base + // class, + // or this-> ; there is no "this" in a static method. This forces explicit + // Gimpl scope + // resolution throughout the usage in this file, and rather defeats the + // purpose of deriving + // from Gimpl. + plaq = Gimpl::CovShiftBackward( + U[mu], mu, Gimpl::CovShiftBackward( + U[nu], nu, Gimpl::CovShiftForward(U[mu], mu, U[nu]))); + } + ////////////////////////////////////////////////// + // trace of directed plaquette oriented in mu,nu plane + ////////////////////////////////////////////////// + static void traceDirPlaquette(LatticeComplex &plaq, + const std::vector &U, const int mu, + const int nu) { + GaugeMat sp(U[0]._grid); + dirPlaquette(sp, U, mu, nu); + plaq = trace(sp); + } + ////////////////////////////////////////////////// + // sum over all planes of plaquette + ////////////////////////////////////////////////// + static void sitePlaquette(LatticeComplex &Plaq, + const std::vector &U) { + LatticeComplex sitePlaq(U[0]._grid); + Plaq = zero; + for (int mu = 1; mu < U[0]._grid->_ndimension; mu++) { + for (int nu = 0; nu < mu; nu++) { + traceDirPlaquette(sitePlaq, U, mu, nu); + Plaq = Plaq + sitePlaq; + } + } + } + ////////////////////////////////////////////////// + // sum over all x,y,z,t and over all planes of plaquette + ////////////////////////////////////////////////// + static Real sumPlaquette(const GaugeLorentz &Umu) { + std::vector U(4, Umu._grid); + + for (int mu = 0; mu < Umu._grid->_ndimension; mu++) { + U[mu] = PeekIndex(Umu, mu); + } + + LatticeComplex Plaq(Umu._grid); + + sitePlaquette(Plaq, U); + + TComplex Tp = sum(Plaq); + Complex p = TensorRemove(Tp); + return p.real(); + } + ////////////////////////////////////////////////// + // average over all x,y,z,t and over all planes of plaquette + ////////////////////////////////////////////////// + static Real avgPlaquette(const GaugeLorentz &Umu) { + int ndim = Umu._grid->_ndimension; + Real sumplaq = sumPlaquette(Umu); + Real vol = Umu._grid->gSites(); + Real faces = (1.0 * ndim * (ndim - 1)) / 2.0; + return sumplaq / vol / faces / Nc; // Nc dependent... FIXME + } + + ////////////////////////////////////////////////// + // Wilson loop of size (R1, R2), oriented in mu,nu plane + ////////////////////////////////////////////////// + static void wilsonLoop(GaugeMat &wl, const std::vector &U, + const int Rmu, const int Rnu, + const int mu, const int nu) { + wl = U[nu]; + + for(int i = 0; i < Rnu-1; i++){ + wl = Gimpl::CovShiftForward(U[nu], nu, wl); + } + + for(int i = 0; i < Rmu; i++){ + wl = Gimpl::CovShiftForward(U[mu], mu, wl); + } + + for(int i = 0; i < Rnu; i++){ + wl = Gimpl::CovShiftBackward(U[nu], nu, wl); + } + + for(int i = 0; i < Rmu; i++){ + wl = Gimpl::CovShiftBackward(U[mu], mu, wl); + } + } + ////////////////////////////////////////////////// + // trace of Wilson Loop oriented in mu,nu plane + ////////////////////////////////////////////////// + static void traceWilsonLoop(LatticeComplex &wl, + const std::vector &U, + const int Rmu, const int Rnu, + const int mu, const int nu) { + GaugeMat sp(U[0]._grid); + wilsonLoop(sp, U, Rmu, Rnu, mu, nu); + wl = trace(sp); + } + ////////////////////////////////////////////////// + // sum over all planes of Wilson loop + ////////////////////////////////////////////////// + static void siteWilsonLoop(LatticeComplex &Wl, + const std::vector &U, + const int R1, const int R2) { + LatticeComplex siteWl(U[0]._grid); + Wl = zero; + for (int mu = 1; mu < U[0]._grid->_ndimension; mu++) { + for (int nu = 0; nu < mu; nu++) { + traceWilsonLoop(siteWl, U, R1, R2, mu, nu); + Wl = Wl + siteWl; + traceWilsonLoop(siteWl, U, R2, R1, mu, nu); + Wl = Wl + siteWl; + } + } + } + ////////////////////////////////////////////////// + // sum over planes of Wilson loop with length R1 + // in the time direction + ////////////////////////////////////////////////// + static void siteTimelikeWilsonLoop(LatticeComplex &Wl, + const std::vector &U, + const int R1, const int R2) { + LatticeComplex siteWl(U[0]._grid); + + int ndim = U[0]._grid->_ndimension; + + Wl = zero; + for (int nu = 0; nu < ndim - 1; nu++) { + traceWilsonLoop(siteWl, U, R1, R2, ndim-1, nu); + Wl = Wl + siteWl; + } + } + ////////////////////////////////////////////////// + // sum Wilson loop over all planes orthogonal to the time direction + ////////////////////////////////////////////////// + static void siteSpatialWilsonLoop(LatticeComplex &Wl, + const std::vector &U, + const int R1, const int R2) { + LatticeComplex siteWl(U[0]._grid); + + Wl = zero; + for (int mu = 1; mu < U[0]._grid->_ndimension - 1; mu++) { + for (int nu = 0; nu < mu; nu++) { + traceWilsonLoop(siteWl, U, R1, R2, mu, nu); + Wl = Wl + siteWl; + traceWilsonLoop(siteWl, U, R2, R1, mu, nu); + Wl = Wl + siteWl; + } + } + } + ////////////////////////////////////////////////// + // sum over all x,y,z,t and over all planes of Wilson loop + ////////////////////////////////////////////////// + static Real sumWilsonLoop(const GaugeLorentz &Umu, + const int R1, const int R2) { + std::vector U(4, Umu._grid); + + for (int mu = 0; mu < Umu._grid->_ndimension; mu++) { + U[mu] = PeekIndex(Umu, mu); + } + + LatticeComplex Wl(Umu._grid); + + siteWilsonLoop(Wl, U, R1, R2); + + TComplex Tp = sum(Wl); + Complex p = TensorRemove(Tp); + return p.real(); + } + ////////////////////////////////////////////////// + // sum over all x,y,z,t and over all planes of timelike Wilson loop + ////////////////////////////////////////////////// + static Real sumTimelikeWilsonLoop(const GaugeLorentz &Umu, + const int R1, const int R2) { + std::vector U(4, Umu._grid); + + for (int mu = 0; mu < Umu._grid->_ndimension; mu++) { + U[mu] = PeekIndex(Umu, mu); + } + + LatticeComplex Wl(Umu._grid); + + siteTimelikeWilsonLoop(Wl, U, R1, R2); + + TComplex Tp = sum(Wl); + Complex p = TensorRemove(Tp); + return p.real(); + } + ////////////////////////////////////////////////// + // sum over all x,y,z,t and over all planes of spatial Wilson loop + ////////////////////////////////////////////////// + static Real sumSpatialWilsonLoop(const GaugeLorentz &Umu, + const int R1, const int R2) { + std::vector U(4, Umu._grid); + + for (int mu = 0; mu < Umu._grid->_ndimension; mu++) { + U[mu] = PeekIndex(Umu, mu); + } + + LatticeComplex Wl(Umu._grid); + + siteSpatialWilsonLoop(Wl, U, R1, R2); + + TComplex Tp = sum(Wl); + Complex p = TensorRemove(Tp); + return p.real(); + } + ////////////////////////////////////////////////// + // average over all x,y,z,t and over all planes of Wilson loop + ////////////////////////////////////////////////// + static Real avgWilsonLoop(const GaugeLorentz &Umu, + const int R1, const int R2) { + int ndim = Umu._grid->_ndimension; + Real sumWl = sumWilsonLoop(Umu, R1, R2); + Real vol = Umu._grid->gSites(); + Real faces = 1.0 * ndim * (ndim - 1); + return sumWl / vol / faces / Nc; // Nc dependent... FIXME + } + ////////////////////////////////////////////////// + // average over all x,y,z,t and over all planes of timelike Wilson loop + ////////////////////////////////////////////////// + static Real avgTimelikeWilsonLoop(const GaugeLorentz &Umu, + const int R1, const int R2) { + int ndim = Umu._grid->_ndimension; + Real sumWl = sumTimelikeWilsonLoop(Umu, R1, R2); + Real vol = Umu._grid->gSites(); + Real faces = 1.0 * (ndim - 1); + return sumWl / vol / faces / Nc; // Nc dependent... FIXME + } + ////////////////////////////////////////////////// + // average over all x,y,z,t and over all planes of spatial Wilson loop + ////////////////////////////////////////////////// + static Real avgSpatialWilsonLoop(const GaugeLorentz &Umu, + const int R1, const int R2) { + int ndim = Umu._grid->_ndimension; + Real sumWl = sumSpatialWilsonLoop(Umu, R1, R2); + Real vol = Umu._grid->gSites(); + Real faces = 1.0 * (ndim - 1) * (ndim - 2); + return sumWl / vol / faces / Nc; // Nc dependent... FIXME + } +}; + +END_QEDFVOL_NAMESPACE + +#endif // QEDFVOL_WILSONLOOPS_H \ No newline at end of file diff --git a/extras/qed-fvol/qed-fvol.cc b/extras/qed-fvol/qed-fvol.cc new file mode 100644 index 00000000..3ecac2fc --- /dev/null +++ b/extras/qed-fvol/qed-fvol.cc @@ -0,0 +1,88 @@ +#include +#include + +using namespace Grid; +using namespace QCD; +using namespace QedFVol; + +typedef PeriodicGaugeImpl QedPeriodicGimplR; +typedef PhotonR::GaugeField EmField; +typedef PhotonR::GaugeLinkField EmComp; + +const int NCONFIGS = 10; +const int NWILSON = 10; + +int main(int argc, char *argv[]) +{ + // parse command line + std::string parameterFileName; + + if (argc < 2) + { + std::cerr << "usage: " << argv[0] << " [Grid options]"; + std::cerr << std::endl; + std::exit(EXIT_FAILURE); + } + parameterFileName = argv[1]; + + // initialization + Grid_init(&argc, &argv); + QedFVolLogError.Active(GridLogError.isActive()); + QedFVolLogWarning.Active(GridLogWarning.isActive()); + QedFVolLogMessage.Active(GridLogMessage.isActive()); + QedFVolLogIterative.Active(GridLogIterative.isActive()); + QedFVolLogDebug.Active(GridLogDebug.isActive()); + LOG(Message) << "Grid initialized" << std::endl; + + // QED stuff + std::vector latt_size = GridDefaultLatt(); + std::vector simd_layout = GridDefaultSimd(4, vComplex::Nsimd()); + std::vector mpi_layout = GridDefaultMpi(); + GridCartesian grid(latt_size,simd_layout,mpi_layout); + GridParallelRNG pRNG(&grid); + PhotonR photon(PhotonR::Gauge::feynman, + PhotonR::ZmScheme::qedL); + EmField a(&grid); + EmField expA(&grid); + + Complex imag_unit(0, 1); + + Real wlA; + std::vector logWlAvg(NWILSON, 0.0), logWlTime(NWILSON, 0.0), logWlSpace(NWILSON, 0.0); + + pRNG.SeedRandomDevice(); + + LOG(Message) << "Wilson loop calculation beginning" << std::endl; + for(int ic = 0; ic < NCONFIGS; ic++){ + LOG(Message) << "Configuration " << ic <::avgWilsonLoop(expA, iw, iw) * 3; + logWlAvg[iw-1] -= 2*log(wlA); + wlA = NewWilsonLoops::avgTimelikeWilsonLoop(expA, iw, iw) * 3; + logWlTime[iw-1] -= 2*log(wlA); + wlA = NewWilsonLoops::avgSpatialWilsonLoop(expA, iw, iw) * 3; + logWlSpace[iw-1] -= 2*log(wlA); + } + } + LOG(Message) << "Wilson loop calculation completed" << std::endl; + + // Calculate Wilson loops + for(int iw=1; iw<=10; iw++){ + LOG(Message) << iw << 'x' << iw << " Wilson loop" << std::endl; + LOG(Message) << "-2log(W) average: " << logWlAvg[iw-1]/NCONFIGS << std::endl; + LOG(Message) << "-2log(W) timelike: " << logWlTime[iw-1]/NCONFIGS << std::endl; + LOG(Message) << "-2log(W) spatial: " << logWlSpace[iw-1]/NCONFIGS << std::endl; + } + + // epilogue + LOG(Message) << "Grid is finalizing now" << std::endl; + Grid_finalize(); + + return EXIT_SUCCESS; +} diff --git a/lib/Grid.h b/lib/Grid.h index 543b0330..9dcc207b 100644 --- a/lib/Grid.h +++ b/lib/Grid.h @@ -41,7 +41,9 @@ Author: paboyle #include #include #include +#include #include +#include #include #endif diff --git a/lib/GridStd.h b/lib/GridStd.h index fb5e5b21..097e62ab 100644 --- a/lib/GridStd.h +++ b/lib/GridStd.h @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -18,6 +19,7 @@ #include #include #include +#include /////////////////// // Grid config diff --git a/lib/algorithms/densematrix/DenseMatrix.h b/lib/algorithms/densematrix/DenseMatrix.h deleted file mode 100644 index d86add21..00000000 --- a/lib/algorithms/densematrix/DenseMatrix.h +++ /dev/null @@ -1,137 +0,0 @@ - /************************************************************************************* - - Grid physics library, www.github.com/paboyle/Grid - - Source file: ./lib/algorithms/iterative/DenseMatrix.h - - Copyright (C) 2015 - -Author: Peter Boyle -Author: paboyle - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - - See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ -#ifndef GRID_DENSE_MATRIX_H -#define GRID_DENSE_MATRIX_H - -namespace Grid { - ///////////////////////////////////////////////////////////// - // Matrix untils - ///////////////////////////////////////////////////////////// - -template using DenseVector = std::vector; -template using DenseMatrix = DenseVector >; - -template void Size(DenseVector & vec, int &N) -{ - N= vec.size(); -} -template void Size(DenseMatrix & mat, int &N,int &M) -{ - N= mat.size(); - M= mat[0].size(); -} - -template void SizeSquare(DenseMatrix & mat, int &N) -{ - int M; Size(mat,N,M); - assert(N==M); -} - -template void Resize(DenseVector & mat, int N) { - mat.resize(N); -} -template void Resize(DenseMatrix & mat, int N, int M) { - mat.resize(N); - for(int i=0;i void Fill(DenseMatrix & mat, T&val) { - int N,M; - Size(mat,N,M); - for(int i=0;i DenseMatrix Transpose(DenseMatrix & mat){ - int N,M; - Size(mat,N,M); - DenseMatrix C; Resize(C,M,N); - for(int i=0;i void Unity(DenseMatrix &A){ - int N; SizeSquare(A,N); - for(int i=0;i -void PlusUnit(DenseMatrix & A,T c){ - int dim; SizeSquare(A,dim); - for(int i=0;i -DenseMatrix HermitianConj(DenseMatrix &mat){ - - int dim; SizeSquare(mat,dim); - - DenseMatrix C; Resize(C,dim,dim); - - for(int i=0;i -DenseMatrix GetSubMtx(DenseMatrix &A,int row_st, int row_end, int col_st, int col_end) -{ - DenseMatrix H; Resize(H,row_end - row_st,col_end-col_st); - - for(int i = row_st; i - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - - See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ -#ifndef FRANCIS_H -#define FRANCIS_H - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -//#include -//#include -//#include - -namespace Grid { - -template int SymmEigensystem(DenseMatrix &Ain, DenseVector &evals, DenseMatrix &evecs, RealD small); -template int Eigensystem(DenseMatrix &Ain, DenseVector &evals, DenseMatrix &evecs, RealD small); - -/** - Find the eigenvalues of an upper hessenberg matrix using the Francis QR algorithm. -H = - x x x x x x x x x - x x x x x x x x x - 0 x x x x x x x x - 0 0 x x x x x x x - 0 0 0 x x x x x x - 0 0 0 0 x x x x x - 0 0 0 0 0 x x x x - 0 0 0 0 0 0 x x x - 0 0 0 0 0 0 0 x x -Factorization is P T P^H where T is upper triangular (mod cc blocks) and P is orthagonal/unitary. -**/ -template -int QReigensystem(DenseMatrix &Hin, DenseVector &evals, DenseMatrix &evecs, RealD small) -{ - DenseMatrix H = Hin; - - int N ; SizeSquare(H,N); - int M = N; - - Fill(evals,0); - Fill(evecs,0); - - T s,t,x=0,y=0,z=0; - T u,d; - T apd,amd,bc; - DenseVector p(N,0); - T nrm = Norm(H); ///DenseMatrix Norm - int n, m; - int e = 0; - int it = 0; - int tot_it = 0; - int l = 0; - int r = 0; - DenseMatrix P; Resize(P,N,N); Unity(P); - DenseVector trows(N,0); - - /// Check if the matrix is really hessenberg, if not abort - RealD sth = 0; - for(int j=0;j small){ - std::cout << "Non hessenberg H = " << sth << " > " << small << std::endl; - exit(1); - } - } - } - - do{ - std::cout << "Francis QR Step N = " << N << std::endl; - /** Check for convergence - x x x x x - 0 x x x x - 0 0 x x x - 0 0 x x x - 0 0 0 0 x - for this matrix l = 4 - **/ - do{ - l = Chop_subdiag(H,nrm,e,small); - r = 0; ///May have converged on more than one eval - ///Single eval - if(l == N-1){ - evals[e] = H[l][l]; - N--; e++; r++; it = 0; - } - ///RealD eval - if(l == N-2){ - trows[l+1] = 1; ///Needed for UTSolve - apd = H[l][l] + H[l+1][l+1]; - amd = H[l][l] - H[l+1][l+1]; - bc = (T)4.0*H[l+1][l]*H[l][l+1]; - evals[e] = (T)0.5*( apd + sqrt(amd*amd + bc) ); - evals[e+1] = (T)0.5*( apd - sqrt(amd*amd + bc) ); - N-=2; e+=2; r++; it = 0; - } - } while(r>0); - - if(N ==0) break; - - DenseVector ck; Resize(ck,3); - DenseVector v; Resize(v,3); - - for(int m = N-3; m >= l; m--){ - ///Starting vector essentially random shift. - if(it%10 == 0 && N >= 3 && it > 0){ - s = (T)1.618033989*( abs( H[N-1][N-2] ) + abs( H[N-2][N-3] ) ); - t = (T)0.618033989*( abs( H[N-1][N-2] ) + abs( H[N-2][N-3] ) ); - x = H[m][m]*H[m][m] + H[m][m+1]*H[m+1][m] - s*H[m][m] + t; - y = H[m+1][m]*(H[m][m] + H[m+1][m+1] - s); - z = H[m+1][m]*H[m+2][m+1]; - } - ///Starting vector implicit Q theorem - else{ - s = (H[N-2][N-2] + H[N-1][N-1]); - t = (H[N-2][N-2]*H[N-1][N-1] - H[N-2][N-1]*H[N-1][N-2]); - x = H[m][m]*H[m][m] + H[m][m+1]*H[m+1][m] - s*H[m][m] + t; - y = H[m+1][m]*(H[m][m] + H[m+1][m+1] - s); - z = H[m+1][m]*H[m+2][m+1]; - } - ck[0] = x; ck[1] = y; ck[2] = z; - - if(m == l) break; - - /** Some stupid thing from numerical recipies, seems to work**/ - // PAB.. for heaven's sake quote page, purpose, evidence it works. - // what sort of comment is that!?!?!? - u=abs(H[m][m-1])*(abs(y)+abs(z)); - d=abs(x)*(abs(H[m-1][m-1])+abs(H[m][m])+abs(H[m+1][m+1])); - if ((T)abs(u+d) == (T)abs(d) ){ - l = m; break; - } - - //if (u < small){l = m; break;} - } - if(it > 100000){ - std::cout << "QReigensystem: bugger it got stuck after 100000 iterations" << std::endl; - std::cout << "got " << e << " evals " << l << " " << N << std::endl; - exit(1); - } - normalize(ck); ///Normalization cancels in PHP anyway - T beta; - Householder_vector(ck, 0, 2, v, beta); - Householder_mult(H,v,beta,0,l,l+2,0); - Householder_mult(H,v,beta,0,l,l+2,1); - ///Accumulate eigenvector - Householder_mult(P,v,beta,0,l,l+2,1); - int sw = 0; ///Are we on the last row? - for(int k=l;k(ck, 0, 2-sw, v, beta); - Householder_mult(H,v, beta,0,k+1,k+3-sw,0); - Householder_mult(H,v, beta,0,k+1,k+3-sw,1); - ///Accumulate eigenvector - Householder_mult(P,v, beta,0,k+1,k+3-sw,1); - } - it++; - tot_it++; - }while(N > 1); - N = evals.size(); - ///Annoying - UT solves in reverse order; - DenseVector tmp; Resize(tmp,N); - for(int i=0;i -int my_Wilkinson(DenseMatrix &Hin, DenseVector &evals, DenseMatrix &evecs, RealD small) -{ - /** - Find the eigenvalues of an upper Hessenberg matrix using the Wilkinson QR algorithm. - H = - x x 0 0 0 0 - x x x 0 0 0 - 0 x x x 0 0 - 0 0 x x x 0 - 0 0 0 x x x - 0 0 0 0 x x - Factorization is P T P^H where T is upper triangular (mod cc blocks) and P is orthagonal/unitary. **/ - return my_Wilkinson(Hin, evals, evecs, small, small); -} - -template -int my_Wilkinson(DenseMatrix &Hin, DenseVector &evals, DenseMatrix &evecs, RealD small, RealD tol) -{ - int N; SizeSquare(Hin,N); - int M = N; - - ///I don't want to modify the input but matricies must be passed by reference - //Scale a matrix by its "norm" - //RealD Hnorm = abs( Hin.LargestDiag() ); H = H*(1.0/Hnorm); - DenseMatrix H; H = Hin; - - RealD Hnorm = abs(Norm(Hin)); - H = H * (1.0 / Hnorm); - - // TODO use openmp and memset - Fill(evals,0); - Fill(evecs,0); - - T s, t, x = 0, y = 0, z = 0; - T u, d; - T apd, amd, bc; - DenseVector p; Resize(p,N); Fill(p,0); - - T nrm = Norm(H); ///DenseMatrix Norm - int n, m; - int e = 0; - int it = 0; - int tot_it = 0; - int l = 0; - int r = 0; - DenseMatrix P; Resize(P,N,N); - Unity(P); - DenseVector trows(N, 0); - /// Check if the matrix is really symm tridiag - RealD sth = 0; - for(int j = 0; j < N; ++j) - { - for(int i = j + 2; i < N; ++i) - { - if(abs(H[i][j]) > tol || abs(H[j][i]) > tol) - { - std::cout << "Non Tridiagonal H(" << i << ","<< j << ") = |" << Real( real( H[j][i] ) ) << "| > " << tol << std::endl; - std::cout << "Warning tridiagonalize and call again" << std::endl; - // exit(1); // see what is going on - //return; - } - } - } - - do{ - do{ - //Jasper - //Check if the subdiagonal term is small enough ( 0); - //Jasper - //Already converged - //-------------- - if(N == 0) break; - - DenseVector ck,v; Resize(ck,2); Resize(v,2); - - for(int m = N - 3; m >= l; m--) - { - ///Starting vector essentially random shift. - if(it%10 == 0 && N >= 3 && it > 0) - { - t = abs(H[N - 1][N - 2]) + abs(H[N - 2][N - 3]); - x = H[m][m] - t; - z = H[m + 1][m]; - } else { - ///Starting vector implicit Q theorem - d = (H[N - 2][N - 2] - H[N - 1][N - 1]) * (T) 0.5; - t = H[N - 1][N - 1] - H[N - 1][N - 2] * H[N - 1][N - 2] - / (d + sign(d) * sqrt(d * d + H[N - 1][N - 2] * H[N - 1][N - 2])); - x = H[m][m] - t; - z = H[m + 1][m]; - } - //Jasper - //why it is here???? - //----------------------- - if(m == l) - break; - - u = abs(H[m][m - 1]) * (abs(y) + abs(z)); - d = abs(x) * (abs(H[m - 1][m - 1]) + abs(H[m][m]) + abs(H[m + 1][m + 1])); - if ((T)abs(u + d) == (T)abs(d)) - { - l = m; - break; - } - } - //Jasper - if(it > 1000000) - { - std::cout << "Wilkinson: bugger it got stuck after 100000 iterations" << std::endl; - std::cout << "got " << e << " evals " << l << " " << N << std::endl; - exit(1); - } - // - T s, c; - Givens_calc(x, z, c, s); - Givens_mult(H, l, l + 1, c, -s, 0); - Givens_mult(H, l, l + 1, c, s, 1); - Givens_mult(P, l, l + 1, c, s, 1); - // - for(int k = l; k < N - 2; ++k) - { - x = H.A[k + 1][k]; - z = H.A[k + 2][k]; - Givens_calc(x, z, c, s); - Givens_mult(H, k + 1, k + 2, c, -s, 0); - Givens_mult(H, k + 1, k + 2, c, s, 1); - Givens_mult(P, k + 1, k + 2, c, s, 1); - } - it++; - tot_it++; - }while(N > 1); - - N = evals.size(); - ///Annoying - UT solves in reverse order; - DenseVector tmp(N); - for(int i = 0; i < N; ++i) - tmp[i] = evals[N-i-1]; - evals = tmp; - // - UTeigenvectors(H, trows, evals, evecs); - //UTSymmEigenvectors(H, trows, evals, evecs); - for(int i = 0; i < evals.size(); ++i) - { - evecs[i] = P * evecs[i]; - normalize(evecs[i]); - evals[i] = evals[i] * Hnorm; - } - // // FIXME this is to test - // Hin.write("evecs3", evecs); - // Hin.write("evals3", evals); - // // check rsd - // for(int i = 0; i < M; i++) { - // vector Aevec = Hin * evecs[i]; - // RealD norm2(0.); - // for(int j = 0; j < M; j++) { - // norm2 += (Aevec[j] - evals[i] * evecs[i][j]) * (Aevec[j] - evals[i] * evecs[i][j]); - // } - // } - return tot_it; -} - -template -void Hess(DenseMatrix &A, DenseMatrix &Q, int start){ - - /** - turn a matrix A = - x x x x x - x x x x x - x x x x x - x x x x x - x x x x x - into - x x x x x - x x x x x - 0 x x x x - 0 0 x x x - 0 0 0 x x - with householder rotations - Slow. - */ - int N ; SizeSquare(A,N); - DenseVector p; Resize(p,N); Fill(p,0); - - for(int k=start;k ck,v; Resize(ck,N-k-1); Resize(v,N-k-1); - for(int i=k+1;i(ck, 0, ck.size()-1, v, beta); ///Householder vector - Householder_mult(A,v,beta,start,k+1,N-1,0); ///A -> PA - Householder_mult(A,v,beta,start,k+1,N-1,1); ///PA -> PAP^H - ///Accumulate eigenvector - Householder_mult(Q,v,beta,start,k+1,N-1,1); ///Q -> QP^H - } - /*for(int l=0;l -void Tri(DenseMatrix &A, DenseMatrix &Q, int start){ -///Tridiagonalize a matrix - int N; SizeSquare(A,N); - Hess(A,Q,start); - /*for(int l=0;l -void ForceTridiagonal(DenseMatrix &A){ -///Tridiagonalize a matrix - int N ; SizeSquare(A,N); - for(int l=0;l -int my_SymmEigensystem(DenseMatrix &Ain, DenseVector &evals, DenseVector > &evecs, RealD small){ - ///Solve a symmetric eigensystem, not necessarily in tridiagonal form - int N; SizeSquare(Ain,N); - DenseMatrix A; A = Ain; - DenseMatrix Q; Resize(Q,N,N); Unity(Q); - Tri(A,Q,0); - int it = my_Wilkinson(A, evals, evecs, small); - for(int k=0;k -int Wilkinson(DenseMatrix &Ain, DenseVector &evals, DenseVector > &evecs, RealD small){ - return my_Wilkinson(Ain, evals, evecs, small); -} - -template -int SymmEigensystem(DenseMatrix &Ain, DenseVector &evals, DenseVector > &evecs, RealD small){ - return my_SymmEigensystem(Ain, evals, evecs, small); -} - -template -int Eigensystem(DenseMatrix &Ain, DenseVector &evals, DenseVector > &evecs, RealD small){ -///Solve a general eigensystem, not necessarily in tridiagonal form - int N = Ain.dim; - DenseMatrix A(N); A = Ain; - DenseMatrix Q(N);Q.Unity(); - Hess(A,Q,0); - int it = QReigensystem(A, evals, evecs, small); - for(int k=0;k - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - - See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ -#ifndef HOUSEHOLDER_H -#define HOUSEHOLDER_H - -#define TIMER(A) std::cout << GridLogMessage << __FUNC__ << " file "<< __FILE__ <<" line " << __LINE__ << std::endl; -#define ENTER() std::cout << GridLogMessage << "ENTRY "<<__FUNC__ << " file "<< __FILE__ <<" line " << __LINE__ << std::endl; -#define LEAVE() std::cout << GridLogMessage << "EXIT "<<__FUNC__ << " file "<< __FILE__ <<" line " << __LINE__ << std::endl; - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace Grid { -/** Comparison function for finding the max element in a vector **/ -template bool cf(T i, T j) { - return abs(i) < abs(j); -} - -/** - Calculate a real Givens angle - **/ -template inline void Givens_calc(T y, T z, T &c, T &s){ - - RealD mz = (RealD)abs(z); - - if(mz==0.0){ - c = 1; s = 0; - } - if(mz >= (RealD)abs(y)){ - T t = -y/z; - s = (T)1.0 / sqrt ((T)1.0 + t * t); - c = s * t; - } else { - T t = -z/y; - c = (T)1.0 / sqrt ((T)1.0 + t * t); - s = c * t; - } -} - -template inline void Givens_mult(DenseMatrix &A, int i, int k, T c, T s, int dir) -{ - int q ; SizeSquare(A,q); - - if(dir == 0){ - for(int j=0;j inline void Householder_vector(DenseVector input, int k, int j, DenseVector &v, T &beta) -{ - int N ; Size(input,N); - T m = *max_element(input.begin() + k, input.begin() + j + 1, cf ); - - if(abs(m) > 0.0){ - T alpha = 0; - - for(int i=k; i 0.0) v[k] = v[k] + (v[k]/abs(v[k]))*alpha; - else v[k] = -alpha; - } else{ - for(int i=k; i inline void Householder_vector(DenseVector input, int k, int j, int dir, DenseVector &v, T &beta) -{ - int N = input.size(); - T m = *max_element(input.begin() + k, input.begin() + j + 1, cf); - - if(abs(m) > 0.0){ - T alpha = 0; - - for(int i=k; i 0.0) v[dir] = v[dir] + (v[dir]/abs(v[dir]))*alpha; - else v[dir] = -alpha; - }else{ - for(int i=k; i inline void Householder_mult(DenseMatrix &A , DenseVector v, T beta, int l, int k, int j, int trans) -{ - int N ; SizeSquare(A,N); - - if(abs(beta) > 0.0){ - for(int p=l; p inline void Householder_mult_tri(DenseMatrix &A , DenseVector v, T beta, int l, int M, int k, int j, int trans) -{ - if(abs(beta) > 0.0){ - - int N ; SizeSquare(A,N); - - DenseMatrix tmp; Resize(tmp,N,N); Fill(tmp,0); - - T s; - for(int p=l; p class BlockConjugateGradient : public OperatorFunction { public: + typedef typename Field::scalar_type scomplex; - const int blockDim = 0; - + int blockDim ; int Nblock; + + BlockCGtype CGtype; bool ErrorOnNoConverge; // throw an assert when the CG fails to converge. // Defaults true. RealD Tolerance; Integer MaxIterations; Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion - BlockConjugateGradient(RealD tol, Integer maxit, bool err_on_no_conv = true) - : Tolerance(tol), - MaxIterations(maxit), - ErrorOnNoConverge(err_on_no_conv){}; + BlockConjugateGradient(BlockCGtype cgtype,int _Orthog,RealD tol, Integer maxit, bool err_on_no_conv = true) + : Tolerance(tol), CGtype(cgtype), blockDim(_Orthog), MaxIterations(maxit), ErrorOnNoConverge(err_on_no_conv) + {}; +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Thin QR factorisation (google it) +//////////////////////////////////////////////////////////////////////////////////////////////////// +void ThinQRfact (Eigen::MatrixXcd &m_rr, + Eigen::MatrixXcd &C, + Eigen::MatrixXcd &Cinv, + Field & Q, + const Field & R) +{ + int Orthog = blockDim; // First dimension is block dim; this is an assumption + //////////////////////////////////////////////////////////////////////////////////////////////////// + //Dimensions + // R_{ferm x Nblock} = Q_{ferm x Nblock} x C_{Nblock x Nblock} -> ferm x Nblock + // + // Rdag R = m_rr = Herm = L L^dag <-- Cholesky decomposition (LLT routine in Eigen) + // + // Q C = R => Q = R C^{-1} + // + // Want Ident = Q^dag Q = C^{-dag} R^dag R C^{-1} = C^{-dag} L L^dag C^{-1} = 1_{Nblock x Nblock} + // + // Set C = L^{dag}, and then Q^dag Q = ident + // + // Checks: + // Cdag C = Rdag R ; passes. + // QdagQ = 1 ; passes + //////////////////////////////////////////////////////////////////////////////////////////////////// + sliceInnerProductMatrix(m_rr,R,R,Orthog); + + //////////////////////////////////////////////////////////////////////////////////////////////////// + // Cholesky from Eigen + // There exists a ldlt that is documented as more stable + //////////////////////////////////////////////////////////////////////////////////////////////////// + Eigen::MatrixXcd L = m_rr.llt().matrixL(); + + C = L.adjoint(); + Cinv = C.inverse(); + + //////////////////////////////////////////////////////////////////////////////////////////////////// + // Q = R C^{-1} + // + // Q_j = R_i Cinv(i,j) + // + // NB maddMatrix conventions are Right multiplication X[j] a[j,i] already + //////////////////////////////////////////////////////////////////////////////////////////////////// + // FIXME:: make a sliceMulMatrix to avoid zero vector + sliceMulMatrix(Q,Cinv,R,Orthog); +} +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Call one of several implementations +//////////////////////////////////////////////////////////////////////////////////////////////////// void operator()(LinearOperatorBase &Linop, const Field &Src, Field &Psi) { - int Orthog = 0; // First dimension is block dim + if ( CGtype == BlockCGrQ ) { + BlockCGrQsolve(Linop,Src,Psi); + } else if (CGtype == BlockCG ) { + BlockCGsolve(Linop,Src,Psi); + } else if (CGtype == CGmultiRHS ) { + CGmultiRHSsolve(Linop,Src,Psi); + } else { + assert(0); + } +} + +//////////////////////////////////////////////////////////////////////////// +// BlockCGrQ implementation: +//-------------------------- +// X is guess/Solution +// B is RHS +// Solve A X_i = B_i ; i refers to Nblock index +//////////////////////////////////////////////////////////////////////////// +void BlockCGrQsolve(LinearOperatorBase &Linop, const Field &B, Field &X) +{ + int Orthog = blockDim; // First dimension is block dim; this is an assumption + Nblock = B._grid->_fdimensions[Orthog]; + + std::cout< residuals(Nblock); + std::vector ssq(Nblock); + + sliceNorm(ssq,B,Orthog); + RealD sssum=0; + for(int b=0;b Thin QR factorisation (google it) + * for k: + * Z = AD + * M = [D^dag Z]^{-1} + * X = X + D MC + * QS = Q - ZM + * D = Q + D S^dag + * C = S C + */ + /////////////////////////////////////// + // Initial block: initial search dir is guess + /////////////////////////////////////// + std::cout << GridLogMessage<<"BlockCGrQ algorithm initialisation " < Thin QR factorisation (google it) + + Linop.HermOp(X, AD); + tmp = B - AD; + ThinQRfact (m_rr, m_C, m_Cinv, Q, tmp); + D=Q; + + std::cout << GridLogMessage<<"BlockCGrQ computed initial residual and QR fact " < max_resid ) max_resid = rr; + } + + std::cout << GridLogIterative << "\titeration "< &Linop, const Field &Src, Field &Psi) +{ + int Orthog = blockDim; // First dimension is block dim; this is an assumption Nblock = Src._grid->_fdimensions[Orthog]; std::cout< &Linop, const Field &Src, Field &Psi) ********************* */ RealD max_resid=0; + RealD rr; for(int b=0;b max_resid ) max_resid = rr; } @@ -173,13 +424,14 @@ void operator()(LinearOperatorBase &Linop, const Field &Src, Field &Psi) std::cout << GridLogMessage<<"BlockCG converged in "< &Linop, const Field &Src, Field &Psi) if (ErrorOnNoConverge) assert(0); IterationsToComplete = k; } -}; - - ////////////////////////////////////////////////////////////////////////// // multiRHS conjugate gradient. Dimension zero should be the block direction +// Use this for spread out across nodes ////////////////////////////////////////////////////////////////////////// -template -class MultiRHSConjugateGradient : public OperatorFunction { - public: - - typedef typename Field::scalar_type scomplex; - - const int blockDim = 0; - - int Nblock; - bool ErrorOnNoConverge; // throw an assert when the CG fails to converge. - // Defaults true. - RealD Tolerance; - Integer MaxIterations; - Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion - - MultiRHSConjugateGradient(RealD tol, Integer maxit, bool err_on_no_conv = true) - : Tolerance(tol), - MaxIterations(maxit), - ErrorOnNoConverge(err_on_no_conv){}; - -void operator()(LinearOperatorBase &Linop, const Field &Src, Field &Psi) +void CGmultiRHSsolve(LinearOperatorBase &Linop, const Field &Src, Field &Psi) { - int Orthog = 0; // First dimension is block dim + int Orthog = blockDim; // First dimension is block dim Nblock = Src._grid->_fdimensions[Orthog]; std::cout< &Linop, const Field &Src, Field &Psi) MatrixTimer.Stop(); // Alpha - // sliceInnerProductVectorTest(v_pAp_test,P,AP,Orthog); sliceInnerTimer.Start(); sliceInnerProductVector(v_pAp,P,AP,Orthog); sliceInnerTimer.Stop(); for(int b=0;b &Linop, const Field &Src, Field &Psi) std::cout << GridLogMessage<<"MultiRHS solver converged in " < &Linop, const Field &Src, Field &Psi) if (ErrorOnNoConverge) assert(0); IterationsToComplete = k; } + }; - - } #endif diff --git a/lib/algorithms/iterative/EigenSort.h b/lib/algorithms/iterative/EigenSort.h deleted file mode 100644 index 23621544..00000000 --- a/lib/algorithms/iterative/EigenSort.h +++ /dev/null @@ -1,81 +0,0 @@ - /************************************************************************************* - - Grid physics library, www.github.com/paboyle/Grid - - Source file: ./lib/algorithms/iterative/EigenSort.h - - Copyright (C) 2015 - -Author: Peter Boyle - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - - See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ -#ifndef GRID_EIGENSORT_H -#define GRID_EIGENSORT_H - - -namespace Grid { - ///////////////////////////////////////////////////////////// - // Eigen sorter to begin with - ///////////////////////////////////////////////////////////// - -template -class SortEigen { - private: - -//hacking for testing for now - private: - static bool less_lmd(RealD left,RealD right){ - return left > right; - } - static bool less_pair(std::pair& left, - std::pair& right){ - return left.first > (right.first); - } - - - public: - - void push(DenseVector& lmd, - DenseVector& evec,int N) { - DenseVector cpy(lmd.size(),evec[0]._grid); - for(int i=0;i > emod(lmd.size()); - for(int i=0;i(lmd[i],&cpy[i]); - - partial_sort(emod.begin(),emod.begin()+N,emod.end(),less_pair); - - typename DenseVector >::iterator it = emod.begin(); - for(int i=0;ifirst; - evec[i]=*(it->second); - ++it; - } - } - void push(DenseVector& lmd,int N) { - std::partial_sort(lmd.begin(),lmd.begin()+N,lmd.end(),less_lmd); - } - bool saturated(RealD lmd, RealD thrs) { - return fabs(lmd) > fabs(thrs); - } -}; - -} -#endif diff --git a/lib/algorithms/iterative/ImplicitlyRestartedLanczos.h b/lib/algorithms/iterative/ImplicitlyRestartedLanczosCJ.h similarity index 99% rename from lib/algorithms/iterative/ImplicitlyRestartedLanczos.h rename to lib/algorithms/iterative/ImplicitlyRestartedLanczosCJ.h index 29510d18..4b9c3907 100644 --- a/lib/algorithms/iterative/ImplicitlyRestartedLanczos.h +++ b/lib/algorithms/iterative/ImplicitlyRestartedLanczosCJ.h @@ -57,9 +57,10 @@ namespace Grid { // Implicitly restarted lanczos ///////////////////////////////////////////////////////////// +// creating a seaprate instance to avoid conflicts for the time being template - class ImplicitlyRestartedLanczos { + class ImplicitlyRestartedLanczosCJ { const RealD small = 1.0e-16; public: diff --git a/lib/allocator/AlignedAllocator.h b/lib/allocator/AlignedAllocator.h index 6e85ab27..db86c435 100644 --- a/lib/allocator/AlignedAllocator.h +++ b/lib/allocator/AlignedAllocator.h @@ -98,7 +98,14 @@ public: #else if ( ptr == (_Tp *) NULL ) ptr = (_Tp *) memalign(128,bytes); #endif - + // First touch optimise in threaded loop + uint8_t *cp = (uint8_t *)ptr; +#ifdef GRID_OMP +#pragma omp parallel for +#endif + for(size_type n=0;n & processor_grid) : CartesianCommunicator(processor_grid) {}; - // Physics Grid information. std::vector _simd_layout;// Which dimensions get relayed out over simd lanes. std::vector _fdimensions;// (full) Global dimensions of array prior to cb removal @@ -63,13 +62,12 @@ public: int _isites; int _fsites; // _isites*_osites = product(dimensions). int _gsites; - std::vector _slice_block; // subslice information + std::vector _slice_block;// subslice information std::vector _slice_stride; std::vector _slice_nblock; - // Might need these at some point - // std::vector _lstart; // local start of array in gcoors. _processor_coor[d]*_ldimensions[d] - // std::vector _lend; // local end of array in gcoors _processor_coor[d]*_ldimensions[d]+_ldimensions_[d]-1 + std::vector _lstart; // local start of array in gcoors _processor_coor[d]*_ldimensions[d] + std::vector _lend ; // local end of array in gcoors _processor_coor[d]*_ldimensions[d]+_ldimensions_[d]-1 public: @@ -176,6 +174,7 @@ public: inline int gSites(void) const { return _isites*_osites*_Nprocessors; }; inline int Nd (void) const { return _ndimension;}; + inline const std::vector LocalStarts(void) { return _lstart; }; inline const std::vector &FullDimensions(void) { return _fdimensions;}; inline const std::vector &GlobalDimensions(void) { return _gdimensions;}; inline const std::vector &LocalDimensions(void) { return _ldimensions;}; diff --git a/lib/cartesian/Cartesian_full.h b/lib/cartesian/Cartesian_full.h index 7e29d311..b0e47fa4 100644 --- a/lib/cartesian/Cartesian_full.h +++ b/lib/cartesian/Cartesian_full.h @@ -76,6 +76,8 @@ public: _ldimensions.resize(_ndimension); _rdimensions.resize(_ndimension); _simd_layout.resize(_ndimension); + _lstart.resize(_ndimension); + _lend.resize(_ndimension); _ostride.resize(_ndimension); _istride.resize(_ndimension); @@ -94,8 +96,10 @@ public: // Use a reduced simd grid _ldimensions[d]= _gdimensions[d]/_processors[d]; //local dimensions _rdimensions[d]= _ldimensions[d]/_simd_layout[d]; //overdecomposition - _osites *= _rdimensions[d]; - _isites *= _simd_layout[d]; + _lstart[d] = _processor_coor[d]*_ldimensions[d]; + _lend[d] = _processor_coor[d]*_ldimensions[d]+_ldimensions[d]-1; + _osites *= _rdimensions[d]; + _isites *= _simd_layout[d]; // Addressing support if ( d==0 ) { diff --git a/lib/cartesian/Cartesian_red_black.h b/lib/cartesian/Cartesian_red_black.h index 2f132c19..3037de00 100644 --- a/lib/cartesian/Cartesian_red_black.h +++ b/lib/cartesian/Cartesian_red_black.h @@ -151,6 +151,8 @@ public: _ldimensions.resize(_ndimension); _rdimensions.resize(_ndimension); _simd_layout.resize(_ndimension); + _lstart.resize(_ndimension); + _lend.resize(_ndimension); _ostride.resize(_ndimension); _istride.resize(_ndimension); @@ -169,6 +171,8 @@ public: _gdimensions[d] = _gdimensions[d]/2; // Remove a checkerboard } _ldimensions[d] = _gdimensions[d]/_processors[d]; + _lstart[d] = _processor_coor[d]*_ldimensions[d]; + _lend[d] = _processor_coor[d]*_ldimensions[d]+_ldimensions[d]-1; // Use a reduced simd grid _simd_layout[d] = simd_layout[d]; diff --git a/lib/communicator/Communicator_base.cc b/lib/communicator/Communicator_base.cc index 98d2abf4..557fef48 100644 --- a/lib/communicator/Communicator_base.cc +++ b/lib/communicator/Communicator_base.cc @@ -60,6 +60,7 @@ void CartesianCommunicator::ShmBufferFreeAll(void) { ///////////////////////////////// // Grid information queries ///////////////////////////////// +int CartesianCommunicator::Dimensions(void) { return _ndimension; }; int CartesianCommunicator::IsBoss(void) { return _processor==0; }; int CartesianCommunicator::BossRank(void) { return 0; }; int CartesianCommunicator::ThisRank(void) { return _processor; }; @@ -91,6 +92,7 @@ void CartesianCommunicator::GlobalSumVector(ComplexD *c,int N) #if !defined( GRID_COMMS_MPI3) && !defined (GRID_COMMS_MPI3L) int CartesianCommunicator::NodeCount(void) { return ProcessorCount();}; +int CartesianCommunicator::RankCount(void) { return ProcessorCount();}; double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector &list, void *xmit, diff --git a/lib/communicator/Communicator_base.h b/lib/communicator/Communicator_base.h index e0b9f2c3..12a8429f 100644 --- a/lib/communicator/Communicator_base.h +++ b/lib/communicator/Communicator_base.h @@ -148,6 +148,7 @@ class CartesianCommunicator { int RankFromProcessorCoor(std::vector &coor); void ProcessorCoorFromRank(int rank,std::vector &coor); + int Dimensions(void) ; int IsBoss(void) ; int BossRank(void) ; int ThisRank(void) ; @@ -155,6 +156,7 @@ class CartesianCommunicator { const std::vector & ProcessorGrid(void) ; int ProcessorCount(void) ; int NodeCount(void) ; + int RankCount(void) ; //////////////////////////////////////////////////////////////////////////////// // very VERY rarely (Log, serial RNG) we need world without a grid @@ -175,6 +177,8 @@ class CartesianCommunicator { void GlobalSumVector(ComplexF *c,int N); void GlobalSum(ComplexD &c); void GlobalSumVector(ComplexD *c,int N); + void GlobalXOR(uint32_t &); + void GlobalXOR(uint64_t &); template void GlobalSum(obj &o){ typedef typename obj::scalar_type scalar_type; diff --git a/lib/communicator/Communicator_mpi.cc b/lib/communicator/Communicator_mpi.cc index 470a06c7..bd2a62fb 100644 --- a/lib/communicator/Communicator_mpi.cc +++ b/lib/communicator/Communicator_mpi.cc @@ -83,6 +83,14 @@ void CartesianCommunicator::GlobalSum(uint64_t &u){ int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_SUM,communicator); assert(ierr==0); } +void CartesianCommunicator::GlobalXOR(uint32_t &u){ + int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_BXOR,communicator); + assert(ierr==0); +} +void CartesianCommunicator::GlobalXOR(uint64_t &u){ + int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_BXOR,communicator); + assert(ierr==0); +} void CartesianCommunicator::GlobalSum(float &f){ int ierr=MPI_Allreduce(MPI_IN_PLACE,&f,1,MPI_FLOAT,MPI_SUM,communicator); assert(ierr==0); diff --git a/lib/communicator/Communicator_mpi3.cc b/lib/communicator/Communicator_mpi3.cc index a8bffc14..4192300b 100644 --- a/lib/communicator/Communicator_mpi3.cc +++ b/lib/communicator/Communicator_mpi3.cc @@ -37,7 +37,10 @@ Author: Peter Boyle #include #include #include -//#include +#include +#ifdef HAVE_NUMAIF_H +#include +#endif #ifndef SHM_HUGETLB #define SHM_HUGETLB 04000 #endif @@ -65,6 +68,7 @@ std::vector CartesianCommunicator::MyGroup; std::vector CartesianCommunicator::ShmCommBufs; int CartesianCommunicator::NodeCount(void) { return GroupSize;}; +int CartesianCommunicator::RankCount(void) { return WorldSize;}; #undef FORCE_COMMS @@ -213,6 +217,25 @@ void CartesianCommunicator::Init(int *argc, char ***argv) { void * ptr = mmap(NULL,size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); if ( ptr == MAP_FAILED ) { perror("failed mmap"); assert(0); } assert(((uint64_t)ptr&0x3F)==0); + + // Try to force numa domain on the shm segment if we have numaif.h +#ifdef HAVE_NUMAIF_H + int status; + int flags=MPOL_MF_MOVE; +#ifdef KNL + int nodes=1; // numa domain == MCDRAM + // Find out if in SNC2,SNC4 mode ? +#else + int nodes=r; // numa domain == MPI ID +#endif + unsigned long count=1; + for(uint64_t page=0;page &R,std::vector &a,const Lattice } }; - -/* -template -static void sliceMaddVectorSlow (Lattice &R,std::vector &a,const Lattice &X,const Lattice &Y, - int Orthog,RealD scale=1.0) -{ - // FIXME: Implementation is slow - // Best base the linear combination by constructing a - // set of vectors of size grid->_rdimensions[Orthog]. - typedef typename vobj::scalar_object sobj; - typedef typename vobj::scalar_type scalar_type; - typedef typename vobj::vector_type vector_type; - - int Nblock = X._grid->GlobalDimensions()[Orthog]; - - GridBase *FullGrid = X._grid; - GridBase *SliceGrid = makeSubSliceGrid(FullGrid,Orthog); - - Lattice Xslice(SliceGrid); - Lattice Rslice(SliceGrid); - // If we based this on Cshift it would work for spread out - // but it would be even slower - for(int i=0;i -static void sliceInnerProductVectorSlow( std::vector & vec, const Lattice &lhs,const Lattice &rhs,int Orthog) - { - // FIXME: Implementation is slow - // Look at localInnerProduct implementation, - // and do inside a site loop with block strided iterators - typedef typename vobj::scalar_object sobj; - typedef typename vobj::scalar_type scalar_type; - typedef typename vobj::vector_type vector_type; - typedef typename vobj::tensor_reduced scalar; - typedef typename scalar::scalar_object scomplex; - - int Nblock = lhs._grid->GlobalDimensions()[Orthog]; - vec.resize(Nblock); - std::vector sip(Nblock); - Lattice IP(lhs._grid); - IP=localInnerProduct(lhs,rhs); - sliceSum(IP,sip,Orthog); - - for(int ss=0;ss_rdimensions[Orthog]. -////////////////////////////////////////////////////////////////////////////////////////// - inline GridBase *makeSubSliceGrid(const GridBase *BlockSolverGrid,int Orthog) { int NN = BlockSolverGrid->_ndimension; @@ -453,7 +388,6 @@ inline GridBase *makeSubSliceGrid(const GridBase *BlockSolverGrid,int Or return (GridBase *)new GridCartesian(latt_phys,simd_phys,mpi_phys); } - template static void sliceMaddMatrix (Lattice &R,Eigen::MatrixXcd &aa,const Lattice &X,const Lattice &Y,int Orthog,RealD scale=1.0) { @@ -462,28 +396,103 @@ static void sliceMaddMatrix (Lattice &R,Eigen::MatrixXcd &aa,const Lattice typedef typename vobj::vector_type vector_type; int Nblock = X._grid->GlobalDimensions()[Orthog]; - + GridBase *FullGrid = X._grid; GridBase *SliceGrid = makeSubSliceGrid(FullGrid,Orthog); - + Lattice Xslice(SliceGrid); Lattice Rslice(SliceGrid); - - for(int i=0;i_simd_layout[Orthog]==1); + int nh = FullGrid->_ndimension; + int nl = SliceGrid->_ndimension; + + //FIXME package in a convenient iterator + //Should loop over a plane orthogonal to direction "Orthog" + int stride=FullGrid->_slice_stride[Orthog]; + int block =FullGrid->_slice_block [Orthog]; + int nblock=FullGrid->_slice_nblock[Orthog]; + int ostride=FullGrid->_ostride[Orthog]; +#pragma omp parallel + { + std::vector s_x(Nblock); + +#pragma omp for collapse(2) + for(int n=0;n +static void sliceMulMatrix (Lattice &R,Eigen::MatrixXcd &aa,const Lattice &X,int Orthog,RealD scale=1.0) +{ + typedef typename vobj::scalar_object sobj; + typedef typename vobj::scalar_type scalar_type; + typedef typename vobj::vector_type vector_type; + + int Nblock = X._grid->GlobalDimensions()[Orthog]; + + GridBase *FullGrid = X._grid; + GridBase *SliceGrid = makeSubSliceGrid(FullGrid,Orthog); + + Lattice Xslice(SliceGrid); + Lattice Rslice(SliceGrid); + + assert( FullGrid->_simd_layout[Orthog]==1); + int nh = FullGrid->_ndimension; + int nl = SliceGrid->_ndimension; + + //FIXME package in a convenient iterator + //Should loop over a plane orthogonal to direction "Orthog" + int stride=FullGrid->_slice_stride[Orthog]; + int block =FullGrid->_slice_block [Orthog]; + int nblock=FullGrid->_slice_nblock[Orthog]; + int ostride=FullGrid->_ostride[Orthog]; +#pragma omp parallel + { + std::vector s_x(Nblock); + +#pragma omp for collapse(2) + for(int n=0;n static void sliceInnerProductMatrix( Eigen::MatrixXcd &mat, const Lattice &lhs,const Lattice &rhs,int Orthog) { - // FIXME: Implementation is slow - // Not sure of best solution.. think about it typedef typename vobj::scalar_object sobj; typedef typename vobj::scalar_type scalar_type; typedef typename vobj::vector_type vector_type; @@ -497,22 +506,50 @@ static void sliceInnerProductMatrix( Eigen::MatrixXcd &mat, const Lattice Lattice Rslice(SliceGrid); mat = Eigen::MatrixXcd::Zero(Nblock,Nblock); - - for(int i=0;i_simd_layout[Orthog]==1); + int nh = FullGrid->_ndimension; + int nl = SliceGrid->_ndimension; + + //FIXME package in a convenient iterator + //Should loop over a plane orthogonal to direction "Orthog" + int stride=FullGrid->_slice_stride[Orthog]; + int block =FullGrid->_slice_block [Orthog]; + int nblock=FullGrid->_slice_nblock[Orthog]; + int ostride=FullGrid->_ostride[Orthog]; + + typedef typename vobj::vector_typeD vector_typeD; + +#pragma omp parallel + { + std::vector Left(Nblock); + std::vector Right(Nblock); + Eigen::MatrixXcd mat_thread = Eigen::MatrixXcd::Zero(Nblock,Nblock); + +#pragma omp for collapse(2) + for(int n=0;n &coarse,Lattice & fine) //Copy SIMD-vectorized lattice to array of scalar objects in lexicographic order template -typename std::enable_if::value && !isSIMDvectorized::value, void>::type unvectorizeToLexOrdArray(std::vector &out, const Lattice &in){ +typename std::enable_if::value && !isSIMDvectorized::value, void>::type +unvectorizeToLexOrdArray(std::vector &out, const Lattice &in) +{ + typedef typename vobj::vector_type vtype; GridBase* in_grid = in._grid; @@ -590,6 +593,54 @@ typename std::enable_if::value && !isSIMDvectorized extract1(in_vobj, out_ptrs, 0); } } +//Copy SIMD-vectorized lattice to array of scalar objects in lexicographic order +template +typename std::enable_if::value + && !isSIMDvectorized::value, void>::type +vectorizeFromLexOrdArray( std::vector &in, Lattice &out) +{ + + typedef typename vobj::vector_type vtype; + + GridBase* grid = out._grid; + assert(in.size()==grid->lSites()); + + int ndim = grid->Nd(); + int nsimd = vtype::Nsimd(); + + std::vector > icoor(nsimd); + + for(int lane=0; lane < nsimd; lane++){ + icoor[lane].resize(ndim); + grid->iCoorFromIindex(icoor[lane],lane); + } + + parallel_for(uint64_t oidx = 0; oidx < grid->oSites(); oidx++){ //loop over outer index + //Assemble vector of pointers to output elements + std::vector ptrs(nsimd); + + std::vector ocoor(ndim); + grid->oCoorFromOindex(ocoor, oidx); + + std::vector lcoor(grid->Nd()); + + for(int lane=0; lane < nsimd; lane++){ + + for(int mu=0;mu_rdimensions[mu]*icoor[lane][mu]; + } + + int lex; + Lexicographic::IndexFromCoor(lcoor, lex, grid->_ldimensions); + ptrs[lane] = &in[lex]; + } + + //pack from those ptrs + vobj vecobj; + merge1(vecobj, ptrs, 0); + out._odata[oidx] = vecobj; + } +} //Convert a Lattice from one precision to another template @@ -615,7 +666,7 @@ void precisionChange(Lattice &out, const Lattice &in){ std::vector in_slex_conv(in_grid->lSites()); unvectorizeToLexOrdArray(in_slex_conv, in); - parallel_for(int out_oidx=0;out_oidxoSites();out_oidx++){ + parallel_for(uint64_t out_oidx=0;out_oidxoSites();out_oidx++){ std::vector out_ocoor(ndim); out_grid->oCoorFromOindex(out_ocoor, out_oidx); diff --git a/lib/parallelIO/BinaryIO.h b/lib/parallelIO/BinaryIO.h index afa7eb2e..117bec01 100644 --- a/lib/parallelIO/BinaryIO.h +++ b/lib/parallelIO/BinaryIO.h @@ -29,16 +29,25 @@ #ifndef GRID_BINARY_IO_H #define GRID_BINARY_IO_H - -#include "IldgIOtypes.h" +#if defined(GRID_COMMS_MPI) || defined(GRID_COMMS_MPI3) +#define USE_MPI_IO +#else +#undef USE_MPI_IO +#endif #ifdef HAVE_ENDIAN_H #include #endif + #include #include +namespace Grid { + +///////////////////////////////////////////////////////////////////////////////// +// Byte reversal garbage +///////////////////////////////////////////////////////////////////////////////// inline uint32_t byte_reverse32(uint32_t f) { f = ((f&0xFF)<<24) | ((f&0xFF00)<<8) | ((f&0xFF0000)>>8) | ((f&0xFF000000UL)>>24) ; return f; @@ -60,18 +69,121 @@ inline uint64_t Grid_ntohll(uint64_t A) { } #endif -namespace Grid { - - // A little helper - inline void removeWhitespace(std::string &key) - { - key.erase(std::remove_if(key.begin(), key.end(), ::isspace),key.end()); - } +// A little helper +inline void removeWhitespace(std::string &key) +{ + key.erase(std::remove_if(key.begin(), key.end(), ::isspace),key.end()); +} +/////////////////////////////////////////////////////////////////////////////////////////////////// +// Static class holding the parallel IO code +// Could just use a namespace +/////////////////////////////////////////////////////////////////////////////////////////////////// class BinaryIO { - public: + ///////////////////////////////////////////////////////////////////////////// + // more byte manipulation helpers + ///////////////////////////////////////////////////////////////////////////// + + template static inline void Uint32Checksum(Lattice &lat,uint32_t &nersc_csum) + { + typedef typename vobj::scalar_object sobj; + + GridBase *grid = lat._grid; + int lsites = grid->lSites(); + + std::vector scalardata(lsites); + unvectorizeToLexOrdArray(scalardata,lat); + + NerscChecksum(grid,scalardata,nersc_csum); + } + + template static inline void NerscChecksum(GridBase *grid,std::vector &fbuf,uint32_t &nersc_csum) + { + const uint64_t size32 = sizeof(fobj)/sizeof(uint32_t); + + + uint64_t lsites =grid->lSites(); + if (fbuf.size()==1) { + lsites=1; + } + +#pragma omp parallel + { + uint32_t nersc_csum_thr=0; + +#pragma omp for + for(uint64_t local_site=0;local_site static inline void ScidacChecksum(GridBase *grid,std::vector &fbuf,uint32_t &scidac_csuma,uint32_t &scidac_csumb) + { + const uint64_t size32 = sizeof(fobj)/sizeof(uint32_t); + + + int nd = grid->_ndimension; + + uint64_t lsites =grid->lSites(); + if (fbuf.size()==1) { + lsites=1; + } + std::vector local_vol =grid->LocalDimensions(); + std::vector local_start =grid->LocalStarts(); + std::vector global_vol =grid->FullDimensions(); + +#pragma omp parallel + { + std::vector coor(nd); + uint32_t scidac_csuma_thr=0; + uint32_t scidac_csumb_thr=0; + uint32_t site_crc=0; + +#pragma omp for + for(uint64_t local_site=0;local_site>(32-gsite29); + scidac_csumb_thr ^= site_crc<>(32-gsite31); + } + +#pragma omp critical + { + scidac_csuma^= scidac_csuma_thr; + scidac_csumb^= scidac_csumb_thr; + } + } + } // Network is big endian static inline void htobe32_v(void *file_object,uint32_t bytes){ be32toh_v(file_object,bytes);} @@ -79,21 +191,22 @@ class BinaryIO { static inline void htole32_v(void *file_object,uint32_t bytes){ le32toh_v(file_object,bytes);} static inline void htole64_v(void *file_object,uint32_t bytes){ le64toh_v(file_object,bytes);} - static inline void be32toh_v(void *file_object,uint32_t bytes) + static inline void be32toh_v(void *file_object,uint64_t bytes) { uint32_t * f = (uint32_t *)file_object; - for(int i=0;i*sizeof(uint32_t)>8) | ((f&0xFF000000UL)>>24) ; @@ -102,21 +215,23 @@ class BinaryIO { } // BE is same as network - static inline void be64toh_v(void *file_object,uint32_t bytes) + static inline void be64toh_v(void *file_object,uint64_t bytes) { uint64_t * f = (uint64_t *)file_object; - for(int i=0;i*sizeof(uint64_t)>8) | ((f&0xFF000000UL)>>24) ; @@ -126,674 +241,410 @@ class BinaryIO { fp[i] = Grid_ntohll(g); } } + ///////////////////////////////////////////////////////////////////////////// + // Real action: + // Read or Write distributed lexico array of ANY object to a specific location in file + ////////////////////////////////////////////////////////////////////////////////////// - template static inline void Uint32Checksum(Lattice &lat,munger munge,uint32_t &csum) + static const int BINARYIO_MASTER_APPEND = 0x10; + static const int BINARYIO_UNORDERED = 0x08; + static const int BINARYIO_LEXICOGRAPHIC = 0x04; + static const int BINARYIO_READ = 0x02; + static const int BINARYIO_WRITE = 0x01; + + template + static inline void IOobject(word w, + GridBase *grid, + std::vector &iodata, + std::string file, + int offset, + const std::string &format, int control, + uint32_t &nersc_csum, + uint32_t &scidac_csuma, + uint32_t &scidac_csumb) { - typedef typename vobj::scalar_object sobj; - GridBase *grid = lat._grid ; - std::cout <Barrier(); + GridStopWatch timer; + GridStopWatch bstimer; - csum = 0; - std::vector lcoor; - for(int l=0;llSites();l++){ - Lexicographic::CoorFromIndex(lcoor,l,grid->_ldimensions); - peekLocalSite(siteObj,lat,lcoor); - munge(siteObj,fileObj,csum); + nersc_csum=0; + scidac_csuma=0; + scidac_csumb=0; + + int ndim = grid->Dimensions(); + int nrank = grid->ProcessorCount(); + int myrank = grid->ThisRank(); + + std::vector psizes = grid->ProcessorGrid(); + std::vector pcoor = grid->ThisProcessorCoor(); + std::vector gLattice= grid->GlobalDimensions(); + std::vector lLattice= grid->LocalDimensions(); + + std::vector lStart(ndim); + std::vector gStart(ndim); + + // Flatten the file + uint64_t lsites = grid->lSites(); + if ( control & BINARYIO_MASTER_APPEND ) { + assert(iodata.size()==1); + } else { + assert(lsites==iodata.size()); } - grid->GlobalSum(csum); - } - - static inline void Uint32Checksum(uint32_t *buf,uint32_t buf_size_bytes,uint32_t &csum) - { - for(int i=0;i*sizeof(uint32_t) - struct BinarySimpleUnmunger { - typedef typename getPrecision::real_scalar_type fobj_stype; - typedef typename getPrecision::real_scalar_type sobj_stype; +#ifdef USE_MPI_IO + std::vector distribs(ndim,MPI_DISTRIBUTE_BLOCK); + std::vector dargs (ndim,MPI_DISTRIBUTE_DFLT_DARG); + MPI_Datatype mpiObject; + MPI_Datatype fileArray; + MPI_Datatype localArray; + MPI_Datatype mpiword; + MPI_Offset disp = offset; + MPI_File fh ; + MPI_Status status; + int numword; - void operator()(sobj &in, fobj &out, uint32_t &csum) { - // take word by word and transform accoding to the status - fobj_stype *out_buffer = (fobj_stype *)&out; - sobj_stype *in_buffer = (sobj_stype *)∈ - size_t fobj_words = sizeof(out) / sizeof(fobj_stype); - size_t sobj_words = sizeof(in) / sizeof(sobj_stype); - assert(fobj_words == sobj_words); - - for (unsigned int word = 0; word < sobj_words; word++) - out_buffer[word] = in_buffer[word]; // type conversion on the fly - - BinaryIO::Uint32Checksum((uint32_t *)&out, sizeof(out), csum); + if ( sizeof( word ) == sizeof(float ) ) { + numword = sizeof(fobj)/sizeof(float); + mpiword = MPI_FLOAT; + } else { + numword = sizeof(fobj)/sizeof(double); + mpiword = MPI_DOUBLE; } - }; - template - struct BinarySimpleMunger { - typedef typename getPrecision::real_scalar_type fobj_stype; - typedef typename getPrecision::real_scalar_type sobj_stype; + ////////////////////////////////////////////////////////////////////////////// + // Sobj in MPI phrasing + ////////////////////////////////////////////////////////////////////////////// + int ierr; + ierr = MPI_Type_contiguous(numword,mpiword,&mpiObject); assert(ierr==0); + ierr = MPI_Type_commit(&mpiObject); - void operator()(fobj &in, sobj &out, uint32_t &csum) { - // take word by word and transform accoding to the status - fobj_stype *in_buffer = (fobj_stype *)∈ - sobj_stype *out_buffer = (sobj_stype *)&out; - size_t fobj_words = sizeof(in) / sizeof(fobj_stype); - size_t sobj_words = sizeof(out) / sizeof(sobj_stype); - assert(fobj_words == sobj_words); + ////////////////////////////////////////////////////////////////////////////// + // File global array data type + ////////////////////////////////////////////////////////////////////////////// + ierr=MPI_Type_create_subarray(ndim,&gLattice[0],&lLattice[0],&gStart[0],MPI_ORDER_FORTRAN, mpiObject,&fileArray); assert(ierr==0); + ierr=MPI_Type_commit(&fileArray); assert(ierr==0); - for (unsigned int word = 0; word < sobj_words; word++) - out_buffer[word] = in_buffer[word]; // type conversion on the fly - - BinaryIO::Uint32Checksum((uint32_t *)&in, sizeof(in), csum); - } - }; - - template - static inline uint32_t readObjectSerial(Lattice &Umu,std::string file,munger munge,int offset,const std::string &format) - { - typedef typename vobj::scalar_object sobj; - - GridBase *grid = Umu._grid; - - std::cout<< GridLogMessage<< "Serial read I/O "<< file<< std::endl; - GridStopWatch timer; timer.Start(); + ////////////////////////////////////////////////////////////////////////////// + // local lattice array + ////////////////////////////////////////////////////////////////////////////// + ierr=MPI_Type_create_subarray(ndim,&lLattice[0],&lLattice[0],&lStart[0],MPI_ORDER_FORTRAN, mpiObject,&localArray); assert(ierr==0); + ierr=MPI_Type_commit(&localArray); assert(ierr==0); +#endif + ////////////////////////////////////////////////////////////////////////////// + // Byte order + ////////////////////////////////////////////////////////////////////////////// int ieee32big = (format == std::string("IEEE32BIG")); int ieee32 = (format == std::string("IEEE32")); int ieee64big = (format == std::string("IEEE64BIG")); int ieee64 = (format == std::string("IEEE64")); - // Find the location of each site and send to primary node - // Take loop order from Chroma; defines loop order now that NERSC doc no longer - // available (how short sighted is that?) - std::ifstream fin(file,std::ios::binary|std::ios::in); - fin.seekg(offset); - - Umu = zero; - uint32_t csum=0; - uint64_t bytes=0; - fobj file_object; - sobj munged; - - for(int t=0;t_fdimensions[3];t++){ - for(int z=0;z_fdimensions[2];z++){ - for(int y=0;y_fdimensions[1];y++){ - for(int x=0;x_fdimensions[0];x++){ - - std::vector site({x,y,z,t}); - - if (grid->IsBoss()) { - fin.read((char *)&file_object, sizeof(file_object));assert( fin.fail()==0); - bytes += sizeof(file_object); - if (ieee32big) be32toh_v((void *)&file_object, sizeof(file_object)); - if (ieee32) le32toh_v((void *)&file_object, sizeof(file_object)); - if (ieee64big) be64toh_v((void *)&file_object, sizeof(file_object)); - if (ieee64) le64toh_v((void *)&file_object, sizeof(file_object)); - - munge(file_object, munged, csum); - } - // The boss who read the file has their value poked - pokeSite(munged,Umu,site); - }}}} - timer.Stop(); - std::cout<Broadcast(0,(void *)&csum,sizeof(csum)); - return csum; - } - - template - static inline uint32_t writeObjectSerial(Lattice &Umu,std::string file,munger munge,int offset, - const std::string & format) - { - typedef typename vobj::scalar_object sobj; - - GridBase *grid = Umu._grid; - - int ieee32big = (format == std::string("IEEE32BIG")); - int ieee32 = (format == std::string("IEEE32")); - int ieee64big = (format == std::string("IEEE64BIG")); - int ieee64 = (format == std::string("IEEE64")); - - ////////////////////////////////////////////////// - // Serialise through node zero - ////////////////////////////////////////////////// - std::cout<< GridLogMessage<< "Serial write I/O "<< file<IsBoss() ) { - fout.open(file,std::ios::binary|std::ios::out|std::ios::in); - fout.seekp(offset); - } - uint64_t bytes=0; - uint32_t csum=0; - fobj file_object; - sobj unmunged; - for(int t=0;t_fdimensions[3];t++){ - for(int z=0;z_fdimensions[2];z++){ - for(int y=0;y_fdimensions[1];y++){ - for(int x=0;x_fdimensions[0];x++){ - - std::vector site({x,y,z,t}); - // peek & write - peekSite(unmunged,Umu,site); - - munge(unmunged,file_object,csum); - - - if ( grid->IsBoss() ) { - if(ieee32big) htobe32_v((void *)&file_object,sizeof(file_object)); - if(ieee32) htole32_v((void *)&file_object,sizeof(file_object)); - if(ieee64big) htobe64_v((void *)&file_object,sizeof(file_object)); - if(ieee64) htole64_v((void *)&file_object,sizeof(file_object)); - - // NB could gather an xstrip as an optimisation. - fout.write((char *)&file_object,sizeof(file_object));assert( fout.fail()==0); - bytes+=sizeof(file_object); - } - }}}} - timer.Stop(); - std::cout<Broadcast(0,(void *)&csum,sizeof(csum)); - return csum; - } - - static inline uint32_t writeRNGSerial(GridSerialRNG &serial,GridParallelRNG ¶llel,std::string file,int offset) - { - typedef typename GridSerialRNG::RngStateType RngStateType; - const int RngStateCount = GridSerialRNG::RngStateCount; - - GridBase *grid = parallel._grid; - int gsites = grid->_gsites; - - GridStopWatch timer; timer.Start(); - ////////////////////////////////////////////////// - // Serialise through node zero - ////////////////////////////////////////////////// - std::ofstream fout; - if (grid->IsBoss()) { - fout.open(file, std::ios::binary | std::ios::out); - if (!fout.is_open()) { - std::cout << GridLogMessage << "writeRNGSerial: Error opening file " << file << std::endl; - exit(0);// write better error handling - } - fout.seekp(offset); - } - - std::cout << GridLogMessage << "Serial RNG write I/O on file " << file << std::endl; - uint32_t csum = 0; - std::vector saved(RngStateCount); - int bytes = sizeof(RngStateType) * saved.size(); - std::cout << GridLogDebug << "RngStateCount: " << RngStateCount << std::endl; - std::cout << GridLogDebug << "Type has " << bytes << " bytes" << std::endl; - std::vector gcoor; - - for(int gidx=0;gidxGlobalIndexToGlobalCoor(gidx,gcoor); - grid->GlobalCoorToRankIndex(rank,o_idx,i_idx,gcoor); - int l_idx=parallel.generator_idx(o_idx,i_idx); - - if( rank == grid->ThisRank() ){ - // std::cout << "rank" << rank<<" Getting state for index "<Broadcast(rank, (void *)&saved[0], bytes); - - if ( grid->IsBoss() ) { - Uint32Checksum((uint32_t *)&saved[0],bytes,csum); - fout.write((char *)&saved[0],bytes);assert( fout.fail()==0); - } - - } - - if ( grid->IsBoss() ) { - serial.GetState(saved,0); - Uint32Checksum((uint32_t *)&saved[0],bytes,csum); - fout.write((char *)&saved[0],bytes);assert( fout.fail()==0); - } - - grid->Broadcast(0, (void *)&csum, sizeof(csum)); - - if (grid->IsBoss()) - fout.close(); - - timer.Stop(); - - std::cout << GridLogMessage << "RNG file checksum " << std::hex << csum << std::dec << std::endl; - std::cout << GridLogMessage << "RNG state saved in " << timer.Elapsed() << std::endl; - return csum; - } - - - static inline uint32_t readRNGSerial(GridSerialRNG &serial,GridParallelRNG ¶llel,std::string file,int offset) - { - typedef typename GridSerialRNG::RngStateType RngStateType; - const int RngStateCount = GridSerialRNG::RngStateCount; - - GridBase *grid = parallel._grid; - int gsites = grid->_gsites; - - ////////////////////////////////////////////////// - // Serialise through node zero - ////////////////////////////////////////////////// - std::cout<< GridLogMessage<< "Serial RNG read I/O of file "<IsBoss()) { - fin.open(file, std::ios::binary | std::ios::in); - if (!fin.is_open()) { - std::cout << GridLogMessage << "readRNGSerial: Error opening file " << file << std::endl; - exit(0);// write better error handling - } - fin.seekg(offset); - } - - - uint32_t csum=0; - std::vector saved(RngStateCount); - int bytes = sizeof(RngStateType)*saved.size(); - std::cout << GridLogDebug << "RngStateCount: " << RngStateCount << std::endl; - std::cout << GridLogDebug << "Type has " << bytes << " bytes" << std::endl; - std::vector gcoor; - - std::cout << GridLogDebug << "gsites: " << gsites << " loop" << std::endl; - for(int gidx=0;gidxGlobalIndexToGlobalCoor(gidx,gcoor); - grid->GlobalCoorToRankIndex(rank,o_idx,i_idx,gcoor); - int l_idx=parallel.generator_idx(o_idx,i_idx); - //std::cout << GridLogDebug << "l_idx " << l_idx << " o_idx " << o_idx - // << " i_idx " << i_idx << " rank " << rank << std::endl; - - if ( grid->IsBoss() ) { - fin.read((char *)&saved[0],bytes);assert( fin.fail()==0); - Uint32Checksum((uint32_t *)&saved[0],bytes,csum); - } - - grid->Broadcast(0,(void *)&saved[0],bytes); - - if( rank == grid->ThisRank() ){ - parallel.SetState(saved,l_idx); - } - } - - if ( grid->IsBoss() ) { - fin.read((char *)&saved[0],bytes);assert( fin.fail()==0); - serial.SetState(saved,0); - Uint32Checksum((uint32_t *)&saved[0],bytes,csum); - } - - std::cout << GridLogMessage << "RNG file checksum " << std::hex << csum << std::dec << std::endl; - - grid->Broadcast(0,(void *)&csum,sizeof(csum)); - - return csum; - } - - - template - static inline uint32_t readObjectParallel(Lattice &Umu, - std::string file, - munger munge, - int offset, - const std::string &format, - ILDGtype ILDG = ILDGtype()) { - typedef typename vobj::scalar_object sobj; - - GridBase *grid = Umu._grid; - - int ieee32big = (format == std::string("IEEE32BIG")); - int ieee32 = (format == std::string("IEEE32")); - int ieee64big = (format == std::string("IEEE64BIG")); - int ieee64 = (format == std::string("IEEE64")); - - - // Take into account block size of parallel file systems want about - // 4-16MB chunks. - // Ideally one reader/writer per xy plane and read these contiguously - // with comms from nominated I/O nodes. - std::ifstream fin; - - int nd = grid->_ndimension; - std::vector parallel(nd,1); - std::vector ioproc (nd); - std::vector start(nd); - std::vector range(nd); - - for(int d=0;dCheckerBoarded(d) == 0); - } - - uint64_t slice_vol = 1; - - int IOnode = 1; - for(int d=0;d_ndimension;d++) { - - if ( d == 0 ) parallel[d] = 0; - if (parallel[d]) { - range[d] = grid->_ldimensions[d]; - start[d] = grid->_processor_coor[d]*range[d]; - ioproc[d]= grid->_processor_coor[d]; - } else { - range[d] = grid->_gdimensions[d]; - start[d] = 0; - ioproc[d]= 0; - - if ( grid->_processor_coor[d] != 0 ) IOnode = 0; - } - slice_vol = slice_vol * range[d]; - } - - { - uint32_t tmp = IOnode; - grid->GlobalSum(tmp); - std::cout<< std::dec ; - std::cout<< GridLogMessage<< "Parallel read I/O to "<< file << " with " <_ndimension;d++){ - std::cout<< range[d]; - if( d< grid->_ndimension-1 ) - std::cout<< " x "; - } - std::cout << std::endl; - } - - GridStopWatch timer; timer.Start(); - uint64_t bytes=0; - - int myrank = grid->ThisRank(); - int iorank = grid->RankFromProcessorCoor(ioproc); - - if (!ILDG.is_ILDG) - if ( IOnode ) { - fin.open(file,std::ios::binary|std::ios::in); - } - - ////////////////////////////////////////////////////////// - // Find the location of each site and send to primary node - // Take loop order from Chroma; defines loop order now that NERSC doc no longer - // available (how short sighted is that?) - ////////////////////////////////////////////////////////// - Umu = zero; - static uint32_t csum; csum=0;//static for SHMEM - - fobj fileObj; - static sobj siteObj; // Static to place in symmetric region for SHMEM - - // need to implement these loops in Nd independent way with a lexico conversion - for(int tlex=0;tlex tsite(nd); // temporary mixed up site - std::vector gsite(nd); - std::vector lsite(nd); - std::vector iosite(nd); - - Lexicographic::CoorFromIndex(tsite,tlex,range); - - for(int d=0;d_ldimensions[d]; // local site - gsite[d] = tsite[d]+start[d]; // global site - } - - - ///////////////////////// - // Get the rank of owner of data - ///////////////////////// - int rank, o_idx,i_idx, g_idx; - grid->GlobalCoorToRankIndex(rank,o_idx,i_idx,gsite); - grid->GlobalCoorToGlobalIndex(gsite,g_idx); - - //////////////////////////////// - // iorank reads from the seek - //////////////////////////////// - if (myrank == iorank) { - - - if (ILDG.is_ILDG){ - // use C-LIME to populate the record - #ifdef HAVE_LIME - uint64_t sizeFO = sizeof(fileObj); - limeReaderSeek(ILDG.LR, g_idx*sizeFO, SEEK_SET); - int status = limeReaderReadData((void *)&fileObj, &sizeFO, ILDG.LR); - #endif - } else{ - fin.seekg(offset+g_idx*sizeof(fileObj)); - fin.read((char *)&fileObj,sizeof(fileObj)); - } - bytes+=sizeof(fileObj); - - if(ieee32big) be32toh_v((void *)&fileObj,sizeof(fileObj)); - if(ieee32) le32toh_v((void *)&fileObj,sizeof(fileObj)); - if(ieee64big) be64toh_v((void *)&fileObj,sizeof(fileObj)); - if(ieee64) le64toh_v((void *)&fileObj,sizeof(fileObj)); - - munge(fileObj,siteObj,csum); - - } - - // Possibly do transport through pt2pt - if ( rank != iorank ) { - if ( (myrank == rank) || (myrank==iorank) ) { - grid->SendRecvPacket((void *)&siteObj,(void *)&siteObj,iorank,rank,sizeof(siteObj)); + ////////////////////////////////////////////////////////////////////////////// + // Do the I/O + ////////////////////////////////////////////////////////////////////////////// + if ( control & BINARYIO_READ ) { + + timer.Start(); + + if ( (control & BINARYIO_LEXICOGRAPHIC) && (nrank > 1) ) { +#ifdef USE_MPI_IO + std::cout<< GridLogMessage<< "MPI read I/O "<< file<< std::endl; + ierr=MPI_File_open(grid->communicator,(char *) file.c_str(), MPI_MODE_RDONLY, MPI_INFO_NULL, &fh); assert(ierr==0); + ierr=MPI_File_set_view(fh, disp, mpiObject, fileArray, "native", MPI_INFO_NULL); assert(ierr==0); + ierr=MPI_File_read_all(fh, &iodata[0], 1, localArray, &status); assert(ierr==0); + MPI_File_close(&fh); + MPI_Type_free(&fileArray); + MPI_Type_free(&localArray); +#else + assert(0); +#endif + } else { + std::cout<< GridLogMessage<< "C++ read I/O "<< file<<" : " + << iodata.size()*sizeof(fobj)<<" bytes"<Barrier(); + + bstimer.Start(); + ScidacChecksum(grid,iodata,scidac_csuma,scidac_csumb); + if (ieee32big) be32toh_v((void *)&iodata[0], sizeof(fobj)*iodata.size()); + if (ieee32) le32toh_v((void *)&iodata[0], sizeof(fobj)*iodata.size()); + if (ieee64big) be64toh_v((void *)&iodata[0], sizeof(fobj)*iodata.size()); + if (ieee64) le64toh_v((void *)&iodata[0], sizeof(fobj)*iodata.size()); + NerscChecksum(grid,iodata,nersc_csum); + bstimer.Stop(); + } + + if ( control & BINARYIO_WRITE ) { + + bstimer.Start(); + NerscChecksum(grid,iodata,nersc_csum); + if (ieee32big) htobe32_v((void *)&iodata[0], sizeof(fobj)*iodata.size()); + if (ieee32) htole32_v((void *)&iodata[0], sizeof(fobj)*iodata.size()); + if (ieee64big) htobe64_v((void *)&iodata[0], sizeof(fobj)*iodata.size()); + if (ieee64) htole64_v((void *)&iodata[0], sizeof(fobj)*iodata.size()); + ScidacChecksum(grid,iodata,scidac_csuma,scidac_csumb); + bstimer.Stop(); + + grid->Barrier(); + + timer.Start(); + if ( (control & BINARYIO_LEXICOGRAPHIC) && (nrank > 1) ) { +#ifdef USE_MPI_IO + std::cout<< GridLogMessage<< "MPI write I/O "<< file<< std::endl; + ierr=MPI_File_open(grid->communicator,(char *) file.c_str(), MPI_MODE_RDWR|MPI_MODE_CREATE,MPI_INFO_NULL, &fh); assert(ierr==0); + ierr=MPI_File_set_view(fh, disp, mpiObject, fileArray, "native", MPI_INFO_NULL); assert(ierr==0); + ierr=MPI_File_write_all(fh, &iodata[0], 1, localArray, &status); assert(ierr==0); + MPI_File_close(&fh); + MPI_Type_free(&fileArray); + MPI_Type_free(&localArray); +#else + assert(0); +#endif + } else { + std::ofstream fout; fout.open(file,std::ios::binary|std::ios::out|std::ios::in); + std::cout<< GridLogMessage<< "C++ write I/O "<< file<<" : " + << iodata.size()*sizeof(fobj)<<" bytes"<Barrier(); // necessary? + timer.Stop(); } - grid->GlobalSum(csum); - grid->GlobalSum(bytes); + std::cout<Barrier(); + grid->GlobalSum(nersc_csum); + grid->GlobalXOR(scidac_csuma); + grid->GlobalXOR(scidac_csumb); + grid->Barrier(); + } + + ///////////////////////////////////////////////////////////////////////////// + // Read a Lattice of object + ////////////////////////////////////////////////////////////////////////////////////// + template + static inline void readLatticeObject(Lattice &Umu, + std::string file, + munger munge, + int offset, + const std::string &format, + uint32_t &nersc_csum, + uint32_t &scidac_csuma, + uint32_t &scidac_csumb) + { + typedef typename vobj::scalar_object sobj; + typedef typename vobj::Realified::scalar_type word; word w=0; + + GridBase *grid = Umu._grid; + int lsites = grid->lSites(); + + std::vector scalardata(lsites); + std::vector iodata(lsites); // Munge, checksum, byte order in here + + IOobject(w,grid,iodata,file,offset,format,BINARYIO_READ|BINARYIO_LEXICOGRAPHIC, + nersc_csum,scidac_csuma,scidac_csumb); + + GridStopWatch timer; + timer.Start(); + + parallel_for(int x=0;xBarrier(); timer.Stop(); - std::cout< - static inline uint32_t writeObjectParallel(Lattice &Umu, - std::string file, munger munge, - int offset, - const std::string &format, - ILDGtype ILDG = ILDGtype()) { + ///////////////////////////////////////////////////////////////////////////// + // Write a Lattice of object + ////////////////////////////////////////////////////////////////////////////////////// + template + static inline void writeLatticeObject(Lattice &Umu, + std::string file, + munger munge, + int offset, + const std::string &format, + uint32_t &nersc_csum, + uint32_t &scidac_csuma, + uint32_t &scidac_csumb) + { typedef typename vobj::scalar_object sobj; + typedef typename vobj::Realified::scalar_type word; word w=0; GridBase *grid = Umu._grid; + int lsites = grid->lSites(); - int ieee32big = (format == std::string("IEEE32BIG")); - int ieee32 = (format == std::string("IEEE32")); - int ieee64big = (format == std::string("IEEE64BIG")); - int ieee64 = (format == std::string("IEEE64")); + std::vector scalardata(lsites); + std::vector iodata(lsites); // Munge, checksum, byte order in here - if (!(ieee32big || ieee32 || ieee64big || ieee64)) { - std::cout << GridLogError << "Unrecognized file format " << format - << std::endl; - std::cout << GridLogError - << "Allowed: IEEE32BIG | IEEE32 | IEEE64BIG | IEEE64" - << std::endl; - exit(0); + ////////////////////////////////////////////////////////////////////////////// + // Munge [ .e.g 3rd row recon ] + ////////////////////////////////////////////////////////////////////////////// + GridStopWatch timer; timer.Start(); + unvectorizeToLexOrdArray(scalardata,Umu); + + parallel_for(int x=0;xBarrier(); + timer.Stop(); + + IOobject(w,grid,iodata,file,offset,format,BINARYIO_WRITE|BINARYIO_LEXICOGRAPHIC, + nersc_csum,scidac_csuma,scidac_csumb); + + std::cout< RNGstate; + typedef RngStateType word; word w=0; + + std::string format = "IEEE32BIG"; + + GridBase *grid = parallel._grid; + int gsites = grid->gSites(); + int lsites = grid->lSites(); + + uint32_t nersc_csum_tmp; + uint32_t scidac_csuma_tmp; + uint32_t scidac_csumb_tmp; + + GridStopWatch timer; + + std::cout << GridLogMessage << "RNG read I/O on file " << file << std::endl; + + std::vector iodata(lsites); + IOobject(w,grid,iodata,file,offset,format,BINARYIO_READ|BINARYIO_LEXICOGRAPHIC, + nersc_csum,scidac_csuma,scidac_csumb); + + timer.Start(); + parallel_for(int lidx=0;lidx tmp(RngStateCount); + std::copy(iodata[lidx].begin(),iodata[lidx].end(),tmp.begin()); + parallel.SetState(tmp,lidx); } + timer.Stop(); - int nd = grid->_ndimension; - for (int d = 0; d < nd; d++) { - assert(grid->CheckerBoarded(d) == 0); - } - - std::vector parallel(nd, 1); - std::vector ioproc(nd); - std::vector start(nd); - std::vector range(nd); - - uint64_t slice_vol = 1; - - int IOnode = 1; - - for (int d = 0; d < grid->_ndimension; d++) { - if (d != grid->_ndimension - 1) parallel[d] = 0; - - if (parallel[d]) { - range[d] = grid->_ldimensions[d]; - start[d] = grid->_processor_coor[d]*range[d]; - ioproc[d]= grid->_processor_coor[d]; - } else { - range[d] = grid->_gdimensions[d]; - start[d] = 0; - ioproc[d]= 0; - - if ( grid->_processor_coor[d] != 0 ) IOnode = 0; - } - - slice_vol = slice_vol * range[d]; - } + iodata.resize(1); + IOobject(w,grid,iodata,file,offset,format,BINARYIO_READ|BINARYIO_MASTER_APPEND, + nersc_csum_tmp,scidac_csuma_tmp,scidac_csumb_tmp); { - uint32_t tmp = IOnode; - grid->GlobalSum(tmp); - std::cout<< GridLogMessage<< "Parallel write I/O from "<< file - << " with " <_ndimension;d++){ - std::cout<< range[d]; - if( d< grid->_ndimension-1 ) - std::cout<< " x "; - } - std::cout << std::endl; + std::vector tmp(RngStateCount); + std::copy(iodata[0].begin(),iodata[0].end(),tmp.begin()); + serial.SetState(tmp,0); } - + + nersc_csum = nersc_csum + nersc_csum_tmp; + scidac_csuma = scidac_csuma ^ scidac_csuma_tmp; + scidac_csumb = scidac_csumb ^ scidac_csumb_tmp; + + std::cout << GridLogMessage << "RNG file nersc_checksum " << std::hex << nersc_csum << std::dec << std::endl; + std::cout << GridLogMessage << "RNG file scidac_checksuma " << std::hex << scidac_csuma << std::dec << std::endl; + std::cout << GridLogMessage << "RNG file scidac_checksumb " << std::hex << scidac_csumb << std::dec << std::endl; + + std::cout << GridLogMessage << "RNG state overhead " << timer.Elapsed() << std::endl; + } + ///////////////////////////////////////////////////////////////////////////// + // Write a RNG; lexico map to an array of state and use IOobject + ////////////////////////////////////////////////////////////////////////////////////// + static inline void writeRNG(GridSerialRNG &serial, + GridParallelRNG ¶llel, + std::string file, + int offset, + uint32_t &nersc_csum, + uint32_t &scidac_csuma, + uint32_t &scidac_csumb) + { + typedef typename GridSerialRNG::RngStateType RngStateType; + typedef RngStateType word; word w=0; + const int RngStateCount = GridSerialRNG::RngStateCount; + typedef std::array RNGstate; + + GridBase *grid = parallel._grid; + int gsites = grid->gSites(); + int lsites = grid->lSites(); + + uint32_t nersc_csum_tmp; + uint32_t scidac_csuma_tmp; + uint32_t scidac_csumb_tmp; + GridStopWatch timer; + std::string format = "IEEE32BIG"; + + std::cout << GridLogMessage << "RNG write I/O on file " << file << std::endl; + timer.Start(); - uint64_t bytes=0; - - int myrank = grid->ThisRank(); - int iorank = grid->RankFromProcessorCoor(ioproc); - - // Take into account block size of parallel file systems want about - // 4-16MB chunks. - // Ideally one reader/writer per xy plane and read these contiguously - // with comms from nominated I/O nodes. - std::ofstream fout; - if (!ILDG.is_ILDG) - if (IOnode){ - fout.open(file, std::ios::binary | std::ios::in | std::ios::out); - if (!fout.is_open()) { - std::cout << GridLogMessage << "writeObjectParallel: Error opening file " << file - << std::endl; - exit(0); - } - } - - - ////////////////////////////////////////////////////////// - // Find the location of each site and send to primary node - // Take loop order from Chroma; defines loop order now that NERSC doc no - // longer - // available (how short sighted is that?) - ////////////////////////////////////////////////////////// - - uint32_t csum = 0; - fobj fileObj; - static sobj siteObj; // static for SHMEM target; otherwise dynamic allocate - // with AlignedAllocator - - // should aggregate a whole chunk and then write. - // need to implement these loops in Nd independent way with a lexico - // conversion - for (int tlex = 0; tlex < slice_vol; tlex++) { - std::vector tsite(nd); // temporary mixed up site - std::vector gsite(nd); - std::vector lsite(nd); - std::vector iosite(nd); - - Lexicographic::CoorFromIndex(tsite, tlex, range); - - for(int d = 0;d < nd; d++){ - lsite[d] = tsite[d] % grid->_ldimensions[d]; // local site - gsite[d] = tsite[d] + start[d]; // global site - } - - ///////////////////////// - // Get the rank of owner of data - ///////////////////////// - int rank, o_idx, i_idx, g_idx; - grid->GlobalCoorToRankIndex(rank, o_idx, i_idx, gsite); - grid->GlobalCoorToGlobalIndex(gsite, g_idx); - - //////////////////////////////// - // iorank writes from the seek - //////////////////////////////// - - // Owner of data peeks it - peekLocalSite(siteObj, Umu, lsite); - - // Pair of nodes may need to do pt2pt send - if ( rank != iorank ) { // comms is necessary - if ( (myrank == rank) || (myrank==iorank) ) { // and we have to do it - // Send to IOrank - grid->SendRecvPacket((void *)&siteObj,(void *)&siteObj,rank,iorank,sizeof(siteObj)); - } - } - - grid->Barrier(); // necessary? - - if (myrank == iorank) { - munge(siteObj, fileObj, csum); - - if (ieee32big) htobe32_v((void *)&fileObj, sizeof(fileObj)); - if (ieee32) htole32_v((void *)&fileObj, sizeof(fileObj)); - if (ieee64big) htobe64_v((void *)&fileObj, sizeof(fileObj)); - if (ieee64) htole64_v((void *)&fileObj, sizeof(fileObj)); - - - if (ILDG.is_ILDG) { - #ifdef HAVE_LIME - uint64_t sizeFO = sizeof(fileObj); - limeWriterSeek(ILDG.LW, g_idx*sizeFO, SEEK_SET); - int status = limeWriteRecordData((void *)&fileObj, &sizeFO, ILDG.LW); - #endif - } - - else { - fout.seekp(offset + g_idx * sizeof(fileObj)); - fout.write((char *)&fileObj, sizeof(fileObj));assert( fout.fail()==0); - } - bytes += sizeof(fileObj); - } + std::vector iodata(lsites); + parallel_for(int lidx=0;lidx tmp(RngStateCount); + parallel.GetState(tmp,lidx); + std::copy(tmp.begin(),tmp.end(),iodata[lidx].begin()); } - - grid->GlobalSum(csum); - grid->GlobalSum(bytes); - timer.Stop(); - std::cout << GridLogPerformance << "writeObjectParallel: wrote " << bytes - << " bytes in " << timer.Elapsed() << " " - << (double)bytes / timer.useconds() << " MB/s " << std::endl; + IOobject(w,grid,iodata,file,offset,format,BINARYIO_WRITE|BINARYIO_LEXICOGRAPHIC, + nersc_csum,scidac_csuma,scidac_csumb); + iodata.resize(1); + { + std::vector tmp(RngStateCount); + serial.GetState(tmp,0); + std::copy(tmp.begin(),tmp.end(),iodata[0].begin()); + } + IOobject(w,grid,iodata,file,offset,format,BINARYIO_WRITE|BINARYIO_MASTER_APPEND, + nersc_csum_tmp,scidac_csuma_tmp,scidac_csumb_tmp); - grid->Barrier(); // necessary? - if (IOnode) - fout.close(); - - - return csum; + nersc_csum = nersc_csum + nersc_csum_tmp; + scidac_csuma = scidac_csuma ^ scidac_csuma_tmp; + scidac_csumb = scidac_csumb ^ scidac_csumb_tmp; + + std::cout << GridLogMessage << "RNG file checksum " << std::hex << nersc_csum << std::dec << std::endl; + std::cout << GridLogMessage << "RNG file checksuma " << std::hex << scidac_csuma << std::dec << std::endl; + std::cout << GridLogMessage << "RNG file checksumb " << std::hex << scidac_csumb << std::dec << std::endl; + std::cout << GridLogMessage << "RNG state overhead " << timer.Elapsed() << std::endl; } }; } - #endif diff --git a/lib/parallelIO/IldgIO.h b/lib/parallelIO/IldgIO.h index 0912e2f6..17ce4a06 100644 --- a/lib/parallelIO/IldgIO.h +++ b/lib/parallelIO/IldgIO.h @@ -27,6 +27,7 @@ directory #ifndef GRID_ILDG_IO_H #define GRID_ILDG_IO_H +#ifdef HAVE_LIME #include #include #include @@ -37,213 +38,677 @@ directory #include #include -#ifdef HAVE_LIME - -extern "C" { // for linkage +//C-Lime is a must have for this functionality +extern "C" { #include "lime.h" } namespace Grid { namespace QCD { -inline void ILDGGrid(GridBase *grid, ILDGField &header) { - assert(grid->_ndimension == 4); // emit error if not - header.dimension.resize(4); - header.boundary.resize(4); - for (int d = 0; d < 4; d++) { - header.dimension[d] = grid->_fdimensions[d]; - // Read boundary conditions from ... ? - header.boundary[d] = std::string("periodic"); - } -} + ///////////////////////////////// + // Encode word types as strings + ///////////////////////////////// + template inline std::string ScidacWordMnemonic(void){ return std::string("unknown"); } + template<> inline std::string ScidacWordMnemonic (void){ return std::string("D"); } + template<> inline std::string ScidacWordMnemonic (void){ return std::string("F"); } + template<> inline std::string ScidacWordMnemonic< int32_t>(void){ return std::string("I32_t"); } + template<> inline std::string ScidacWordMnemonic(void){ return std::string("U32_t"); } + template<> inline std::string ScidacWordMnemonic< int64_t>(void){ return std::string("I64_t"); } + template<> inline std::string ScidacWordMnemonic(void){ return std::string("U64_t"); } -inline void ILDGChecksum(uint32_t *buf, uint32_t buf_size_bytes, - uint32_t &csum) { - BinaryIO::Uint32Checksum(buf, buf_size_bytes, csum); -} + ///////////////////////////////////////// + // Encode a generic tensor as a string + ///////////////////////////////////////// + template std::string ScidacRecordTypeString(int &colors, int &spins, int & typesize,int &datacount) { -////////////////////////////////////////////////////////////////////// -// Utilities ; these are QCD aware -////////////////////////////////////////////////////////////////////// -template -inline void ILDGStatistics(GaugeField &data, ILDGField &header) { - // How to convert data precision etc... - header.link_trace = Grid::QCD::WilsonLoops::linkTrace(data); - header.plaquette = Grid::QCD::WilsonLoops::avgPlaquette(data); - // header.polyakov = -} + typedef typename getPrecision::real_scalar_type stype; -// Forcing QCD here -template -struct ILDGMunger { - void operator()(fobj &in, sobj &out, uint32_t &csum) { - for (int mu = 0; mu < 4; mu++) { - for (int i = 0; i < 3; i++) { - for (int j = 0; j < 3; j++) { - out(mu)()(i, j) = in(mu)()(i, j); - } - } - } - ILDGChecksum((uint32_t *)&in, sizeof(in), csum); - }; -}; + int _ColourN = indexRank(); + int _ColourScalar = isScalar(); + int _ColourVector = isVector(); + int _ColourMatrix = isMatrix(); -template -struct ILDGUnmunger { - void operator()(sobj &in, fobj &out, uint32_t &csum) { - for (int mu = 0; mu < 4; mu++) { - for (int i = 0; i < 3; i++) { - for (int j = 0; j < 3; j++) { - out(mu)()(i, j) = in(mu)()(i, j); - } - } - } - ILDGChecksum((uint32_t *)&out, sizeof(out), csum); - }; -}; + int _SpinN = indexRank(); + int _SpinScalar = isScalar(); + int _SpinVector = isVector(); + int _SpinMatrix = isMatrix(); -//////////////////////////////////////////////////////////////////////////////// -// Write and read from fstream; compute header offset for payload -//////////////////////////////////////////////////////////////////////////////// -enum ILDGstate {ILDGread, ILDGwrite}; + int _LorentzN = indexRank(); + int _LorentzScalar = isScalar(); + int _LorentzVector = isVector(); + int _LorentzMatrix = isMatrix(); -class ILDGIO : public BinaryIO { - FILE *File; - LimeWriter *LimeW; - LimeRecordHeader *LimeHeader; - LimeReader *LimeR; - std::string filename; + std::stringstream stream; + + stream << "GRID_"; + stream << ScidacWordMnemonic(); + + // std::cout << " Lorentz N/S/V/M : " << _LorentzN<<" "<<_LorentzScalar<<"/"<<_LorentzVector<<"/"<<_LorentzMatrix< std::string ScidacRecordTypeString(Lattice & lat,int &colors, int &spins, int & typesize,int &datacount) { + return ScidacRecordTypeString(colors,spins,typesize,datacount); + }; + + + //////////////////////////////////////////////////////////// + // Helper to fill out metadata + //////////////////////////////////////////////////////////// + template void ScidacMetaData(Lattice & field, + FieldMetaData &header, + scidacRecord & _scidacRecord, + scidacFile & _scidacFile) + { + typedef typename getPrecision::real_scalar_type stype; + + ///////////////////////////////////// + // Pull Grid's metadata + ///////////////////////////////////// + PrepareMetaData(field,header); + + ///////////////////////////////////// + // Scidac Private File structure + ///////////////////////////////////// + _scidacFile = scidacFile(field._grid); + + ///////////////////////////////////// + // Scidac Private Record structure + ///////////////////////////////////// + scidacRecord sr; + sr.datatype = ScidacRecordTypeString(field,sr.colors,sr.spins,sr.typesize,sr.datacount); + sr.date = header.creation_date; + sr.precision = ScidacWordMnemonic(); + sr.recordtype = GRID_IO_FIELD; + + _scidacRecord = sr; + + std::cout << GridLogMessage << "Build SciDAC datatype " < + void readLimeLatticeBinaryObject(Lattice &field,std::string record_name) + { + typedef typename vobj::scalar_object sobj; + scidacChecksum scidacChecksum_; + uint32_t nersc_csum,scidac_csuma,scidac_csumb; + + std::string format = getFormatString(); + + while ( limeReaderNextRecord(LimeR) == LIME_SUCCESS ) { + + std::cout << GridLogMessage << limeReaderType(LimeR) < munge; + BinaryIO::readLatticeObject< sobj, sobj >(field, filename, munge, offset, format,nersc_csum,scidac_csuma,scidac_csumb); + + ///////////////////////////////////////////// + // Insist checksum is next record + ///////////////////////////////////////////// + readLimeObject(scidacChecksum_,std::string("scidacChecksum"),record_name); + + ///////////////////////////////////////////// + // Verify checksums + ///////////////////////////////////////////// + scidacChecksumVerify(scidacChecksum_,scidac_csuma,scidac_csumb); + return; + } } } + //////////////////////////////////////////// + // Read a generic serialisable object + //////////////////////////////////////////// + template + void readLimeObject(serialisable_object &object,std::string object_name,std::string record_name) + { + std::string xmlstring; + // should this be a do while; can we miss a first record?? + while ( limeReaderNextRecord(LimeR) == LIME_SUCCESS ) { - ~ILDGIO() { fclose(File); } + uint64_t nbytes = limeReaderBytes(LimeR);//size of this record (configuration) - int createHeader(std::string message, int MB, int ME, size_t PayloadSize, LimeWriter* L){ + if ( strncmp(limeReaderType(LimeR), record_name.c_str(),strlen(record_name.c_str()) ) ) { + std::vector xmlc(nbytes+1,'\0'); + limeReaderReadData((void *)&xmlc[0], &nbytes, LimeR); + XmlReader RD(&xmlc[0],""); + read(RD,object_name,object); + return; + } + + } + assert(0); + } +}; + +class GridLimeWriter : public BinaryIO { + public: + /////////////////////////////////////////////////// + // FIXME: format for RNG? Now just binary out instead + /////////////////////////////////////////////////// + + FILE *File; + LimeWriter *LimeW; + std::string filename; + + void open(std::string &_filename) { + filename= _filename; + File = fopen(filename.c_str(), "w"); + LimeW = limeCreateWriter(File); assert(LimeW != NULL ); + } + ///////////////////////////////////////////// + // Close the file + ///////////////////////////////////////////// + void close(void) { + fclose(File); + // limeDestroyWriter(LimeW); + } + /////////////////////////////////////////////////////// + // Lime utility functions + /////////////////////////////////////////////////////// + int createLimeRecordHeader(std::string message, int MB, int ME, size_t PayloadSize) + { LimeRecordHeader *h; h = limeCreateHeader(MB, ME, const_cast(message.c_str()), PayloadSize); - int status = limeWriteRecordHeader(h, L); - if (status < 0) { - std::cerr << "ILDG Header error\n"; - return status; - } + assert(limeWriteRecordHeader(h, LimeW) >= 0); limeDestroyHeader(h); return LIME_SUCCESS; } + //////////////////////////////////////////// + // Write a generic serialisable object + //////////////////////////////////////////// + template + void writeLimeObject(int MB,int ME,serialisable_object &object,std::string object_name,std::string record_name) + { + std::string xmlstring; + { + XmlWriter WR("",""); + write(WR,object_name,object); + xmlstring = WR.XmlString(); + } + uint64_t nbytes = xmlstring.size(); + int err; + LimeRecordHeader *h = limeCreateHeader(MB, ME,(char *)record_name.c_str(), nbytes); assert(h!= NULL); - unsigned int writeHeader(ILDGField &header) { - // write header in LIME - n_uint64_t nbytes; - int MB_flag = 1, ME_flag = 0; + err=limeWriteRecordHeader(h, LimeW); assert(err>=0); + err=limeWriteRecordData(&xmlstring[0], &nbytes, LimeW); assert(err>=0); + err=limeWriterCloseRecord(LimeW); assert(err>=0); + limeDestroyHeader(h); + } + //////////////////////////////////////////// + // Write a generic lattice field and csum + //////////////////////////////////////////// + template + void writeLimeLatticeBinaryObject(Lattice &field,std::string record_name) + { + //////////////////////////////////////////// + // Create record header + //////////////////////////////////////////// + typedef typename vobj::scalar_object sobj; + int err; + uint32_t nersc_csum,scidac_csuma,scidac_csumb; + uint64_t PayloadSize = sizeof(sobj) * field._grid->_gsites; + createLimeRecordHeader(record_name, 0, 0, PayloadSize); - char message[] = "ildg-format"; - nbytes = strlen(message); - LimeHeader = limeCreateHeader(MB_flag, ME_flag, message, nbytes); - limeWriteRecordHeader(LimeHeader, LimeW); - limeDestroyHeader(LimeHeader); - // save the xml header here - // use the xml_writer to c++ streams in pugixml - // and convert to char message - limeWriteRecordData(message, &nbytes, LimeW); - limeWriterCloseRecord(LimeW); + //////////////////////////////////////////////////////////////////// + // NB: FILE and iostream are jointly writing disjoint sequences in the + // the same file through different file handles (integer units). + // + // These are both buffered, so why I think this code is right is as follows. + // + // i) write record header to FILE *File, telegraphing the size. + // ii) ftell reads the offset from FILE *File . + // iii) iostream / MPI Open independently seek this offset. Write sequence direct to disk. + // Closes iostream and flushes. + // iv) fseek on FILE * to end of this disjoint section. + // v) Continue writing scidac record. + //////////////////////////////////////////////////////////////////// + off_t offset = ftell(File); + std::string format = getFormatString(); + BinarySimpleMunger munge; + BinaryIO::writeLatticeObject(field, filename, munge, offset, format,nersc_csum,scidac_csuma,scidac_csumb); + err=limeWriterCloseRecord(LimeW); assert(err>=0); + //////////////////////////////////////// + // Write checksum element, propagaing forward from the BinaryIO + // Always pair a checksum with a binary object, and close message + //////////////////////////////////////// + scidacChecksum checksum; + std::stringstream streama; streama << std::hex << scidac_csuma; + std::stringstream streamb; streamb << std::hex << scidac_csumb; + checksum.suma= streama.str(); + checksum.sumb= streamb.str(); + std::cout << GridLogMessage<<" writing scidac checksums "< + void writeScidacFileRecord(GridBase *grid,SerialisableUserFile &_userFile) + { + scidacFile _scidacFile(grid); + writeLimeObject(1,0,_scidacFile,_scidacFile.SerialisableClassName(),std::string(SCIDAC_PRIVATE_FILE_XML)); + writeLimeObject(0,1,_userFile,_userFile.SerialisableClassName(),std::string(SCIDAC_FILE_XML)); + } + //////////////////////////////////////////////// + // Write generic lattice field in scidac format + //////////////////////////////////////////////// + template + void writeScidacFieldRecord(Lattice &field,userRecord _userRecord) + { + typedef typename vobj::scalar_object sobj; + uint64_t nbytes; + GridBase * grid = field._grid; + + //////////////////////////////////////// + // fill the Grid header + //////////////////////////////////////// + FieldMetaData header; + scidacRecord _scidacRecord; + scidacFile _scidacFile; + + ScidacMetaData(field,header,_scidacRecord,_scidacFile); + + ////////////////////////////////////////////// + // Fill the Lime file record by record + ////////////////////////////////////////////// + writeLimeObject(1,0,header ,std::string("FieldMetaData"),std::string(GRID_FORMAT)); // Open message + writeLimeObject(0,0,_userRecord,_userRecord.SerialisableClassName(),std::string(SCIDAC_RECORD_XML)); + writeLimeObject(0,0,_scidacRecord,_scidacRecord.SerialisableClassName(),std::string(SCIDAC_PRIVATE_RECORD_XML)); + writeLimeLatticeBinaryObject(field,std::string(ILDG_BINARY_DATA)); // Closes message with checksum + } +}; + +class IldgWriter : public ScidacWriter { + public: + + /////////////////////////////////// + // A little helper + /////////////////////////////////// + void writeLimeIldgLFN(std::string &LFN) + { + uint64_t PayloadSize = LFN.size(); + int err; + createLimeRecordHeader(ILDG_DATA_LFN, 0 , 0, PayloadSize); + err=limeWriteRecordData(const_cast(LFN.c_str()), &PayloadSize,LimeW); assert(err>=0); + err=limeWriterCloseRecord(LimeW); assert(err>=0); } + //////////////////////////////////////////////////////////////// + // Special ILDG operations ; gauge configs only. + // Don't require scidac records EXCEPT checksum + // Use Grid MetaData object if present. + //////////////////////////////////////////////////////////////// template - uint32_t readConfiguration(Lattice > &Umu) { - typedef Lattice > GaugeField; - typedef LorentzColourMatrixD sobjd; - typedef LorentzColourMatrixF sobjf; - typedef iLorentzColourMatrix itype; - typedef LorentzColourMatrix sobj; - GridBase *grid = Umu._grid; - - ILDGField header; - readHeader(header); - - // now just the conf, ignore the header - std::string format = std::string("IEEE64BIG"); - do {limeReaderNextRecord(LimeR);} - while (strncmp(limeReaderType(LimeR), "ildg-binary-data",16)); - - n_uint64_t nbytes = limeReaderBytes(LimeR);//size of this record (configuration) - - - ILDGtype ILDGt(true, LimeR); - // this is special for double prec data, just for the moment - uint32_t csum = BinaryIO::readObjectParallel< itype, sobjd >( - Umu, filename, ILDGMunger(), 0, format, ILDGt); - - // Check configuration - // todo - - return csum; - } - - template - uint32_t writeConfiguration(Lattice > &Umu, std::string format) { + void writeConfiguration(Lattice > &Umu,int sequence,std::string LFN,std::string description) + { + GridBase * grid = Umu._grid; typedef Lattice > GaugeField; typedef iLorentzColourMatrix vobj; typedef typename vobj::scalar_object sobj; - typedef LorentzColourMatrixD fobj; - ILDGField header; - // fill the header - header.floating_point = format; + uint64_t nbytes; - ILDGUnmunger munge; - unsigned int offset = writeHeader(header); + //////////////////////////////////////// + // fill the Grid header + //////////////////////////////////////// + FieldMetaData header; + scidacRecord _scidacRecord; + scidacFile _scidacFile; - BinaryIO::Uint32Checksum(Umu, munge, header.checksum); + ScidacMetaData(Umu,header,_scidacRecord,_scidacFile); - // Write data record header - n_uint64_t PayloadSize = sizeof(fobj) * Umu._grid->_gsites; - createHeader("ildg-binary-data", 0, 1, PayloadSize, LimeW); + std::string format = header.floating_point; + header.ensemble_id = description; + header.ensemble_label = description; + header.sequence_number = sequence; + header.ildg_lfn = LFN; - ILDGtype ILDGt(true, LimeW); - uint32_t csum = BinaryIO::writeObjectParallel( - Umu, filename, munge, 0, header.floating_point, ILDGt); + assert ( (format == std::string("IEEE32BIG")) + ||(format == std::string("IEEE64BIG")) ); - limeWriterCloseRecord(LimeW); + ////////////////////////////////////////////////////// + // Fill ILDG header data struct + ////////////////////////////////////////////////////// + ildgFormat ildgfmt ; + ildgfmt.field = std::string("su3gauge"); - // Last record - // the logical file name LNF - // look into documentation on how to generate this string - std::string LNF = "empty"; + if ( format == std::string("IEEE32BIG") ) { + ildgfmt.precision = 32; + } else { + ildgfmt.precision = 64; + } + ildgfmt.version = 1.0; + ildgfmt.lx = header.dimension[0]; + ildgfmt.ly = header.dimension[1]; + ildgfmt.lz = header.dimension[2]; + ildgfmt.lt = header.dimension[3]; + assert(header.nd==4); + assert(header.nd==header.dimension.size()); + ////////////////////////////////////////////////////////////////////////////// + // Fill the USQCD info field + ////////////////////////////////////////////////////////////////////////////// + usqcdInfo info; + info.version=1.0; + info.plaq = header.plaquette; + info.linktr = header.link_trace; - PayloadSize = sizeof(LNF); - createHeader("ildg-binary-lfn", 1 , 1, PayloadSize, LimeW); - limeWriteRecordData(const_cast(LNF.c_str()), &PayloadSize, LimeW); - - limeWriterCloseRecord(LimeW); - - return csum; + std::cout << GridLogMessage << " Writing config; IldgIO "< + void readConfiguration(Lattice > &Umu, FieldMetaData &FieldMetaData_) { + + typedef Lattice > GaugeField; + typedef typename GaugeField::vector_object vobj; + typedef typename vobj::scalar_object sobj; + + typedef LorentzColourMatrixF fobj; + typedef LorentzColourMatrixD dobj; + + GridBase *grid = Umu._grid; + + std::vector dims = Umu._grid->FullDimensions(); + + assert(dims.size()==4); + + // Metadata holders + ildgFormat ildgFormat_ ; + std::string ildgLFN_ ; + scidacChecksum scidacChecksum_; + usqcdInfo usqcdInfo_ ; + + // track what we read from file + int found_ildgFormat =0; + int found_ildgLFN =0; + int found_scidacChecksum=0; + int found_usqcdInfo =0; + int found_ildgBinary =0; + int found_FieldMetaData =0; + + uint32_t nersc_csum; + uint32_t scidac_csuma; + uint32_t scidac_csumb; + + // Binary format + std::string format; + + ////////////////////////////////////////////////////////////////////////// + // Loop over all records + // -- Order is poorly guaranteed except ILDG header preceeds binary section. + // -- Run like an event loop. + // -- Impose trust hierarchy. Grid takes precedence & look for ILDG, and failing + // that Scidac. + // -- Insist on Scidac checksum record. + ////////////////////////////////////////////////////////////////////////// + + while ( limeReaderNextRecord(LimeR) == LIME_SUCCESS ) { + + uint64_t nbytes = limeReaderBytes(LimeR);//size of this record (configuration) + + ////////////////////////////////////////////////////////////////// + // If not BINARY_DATA read a string and parse + ////////////////////////////////////////////////////////////////// + if ( strncmp(limeReaderType(LimeR), ILDG_BINARY_DATA,strlen(ILDG_BINARY_DATA) ) ) { + + // Copy out the string + std::vector xmlc(nbytes+1,'\0'); + limeReaderReadData((void *)&xmlc[0], &nbytes, LimeR); + std::cout << GridLogMessage<< "Non binary record :" < munge; + BinaryIO::readLatticeObject< vobj, dobj >(Umu, filename, munge, offset, format,nersc_csum,scidac_csuma,scidac_csumb); + } else { + GaugeSimpleMunger munge; + BinaryIO::readLatticeObject< vobj, fobj >(Umu, filename, munge, offset, format,nersc_csum,scidac_csuma,scidac_csumb); + } + + found_ildgBinary = 1; + } + + } + + ////////////////////////////////////////////////////// + // Minimally must find binary segment and checksum + // Since this is an ILDG reader require ILDG format + ////////////////////////////////////////////////////// + assert(found_ildgBinary); + assert(found_ildgFormat); + assert(found_scidacChecksum); + + // Must find something with the lattice dimensions + assert(found_FieldMetaData||found_ildgFormat); + + if ( found_FieldMetaData ) { + + std::cout << GridLogMessage<<"Grid MetaData was record found: configuration was probably written by Grid ! Yay ! "<1.1416 16 16 32 0 +//////////////////////// +struct scidacFile : Serializable { public: - // header strings (not in order) - std::vector dimension; - std::vector boundary; - int data_start; - std::string hdr_version; - std::string storage_format; - // Checks on data - double link_trace; - double plaquette; - uint32_t checksum; - unsigned int sequence_number; - std::string data_type; - std::string ensemble_id; - std::string ensemble_label; - std::string creator; - std::string creator_hardware; - std::string creation_date; - std::string archive_date; - std::string floating_point; -}; -} -#else -namespace Grid { + GRID_SERIALIZABLE_CLASS_MEMBERS(scidacFile, + double, version, + int, spacetime, + std::string, dims, // must convert to int + int, volfmt); -struct ILDGtype { - bool is_ILDG; - ILDGtype() : is_ILDG(false) {} -}; -} + std::vector getDimensions(void) { + std::stringstream stream(dims); + std::vector dimensions; + int n; + while(stream >> n){ + dimensions.push_back(n); + } + return dimensions; + } + void setDimensions(std::vector dimensions) { + char delimiter = ' '; + std::stringstream stream; + for(int i=0;i_ndimension; + setDimensions(grid->FullDimensions()); + volfmt = GRID_IO_SINGLEFILE; + } + +}; + +/////////////////////////////////////////////////////////////////////// +// scidac-private-record-xml : example +// +// 1.1Tue Jul 26 21:14:44 2011 UTC0 +// QDP_D3_ColorMatrixD34 +// 1444 +// +/////////////////////////////////////////////////////////////////////// + +struct scidacRecord : Serializable { + public: + GRID_SERIALIZABLE_CLASS_MEMBERS(scidacRecord, + double, version, + std::string, date, + int, recordtype, + std::string, datatype, + std::string, precision, + int, colors, + int, spins, + int, typesize, + int, datacount); + + scidacRecord() { version =1.0; } + +}; + +//////////////////////// +// ILDG format +//////////////////////// +struct ildgFormat : Serializable { +public: + GRID_SERIALIZABLE_CLASS_MEMBERS(ildgFormat, + double, version, + std::string, field, + int, precision, + int, lx, + int, ly, + int, lz, + int, lt); + ildgFormat() { version=1.0; }; +}; +//////////////////////// +// USQCD info +//////////////////////// +struct usqcdInfo : Serializable { + public: + GRID_SERIALIZABLE_CLASS_MEMBERS(usqcdInfo, + double, version, + double, plaq, + double, linktr, + std::string, info); + usqcdInfo() { + version=1.0; + }; +}; +//////////////////////// +// Scidac Checksum +//////////////////////// +struct scidacChecksum : Serializable { + public: + GRID_SERIALIZABLE_CLASS_MEMBERS(scidacChecksum, + double, version, + std::string, suma, + std::string, sumb); + scidacChecksum() { + version=1.0; + }; +}; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// Type: scidac-file-xml MILC ILDG archival gauge configuration +//////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// Type: +//////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +//////////////////////// +// Scidac private file xml +// 1.1416 16 16 32 0 +//////////////////////// + +#if 0 +//////////////////////////////////////////////////////////////////////////////////////// +// From http://www.physics.utah.edu/~detar/scidac/qio_2p3.pdf +//////////////////////////////////////////////////////////////////////////////////////// +struct usqcdPropFile : Serializable { + public: + GRID_SERIALIZABLE_CLASS_MEMBERS(usqcdPropFile, + double, version, + std::string, type, + std::string, info); + usqcdPropFile() { + version=1.0; + }; +}; +struct usqcdSourceInfo : Serializable { + public: + GRID_SERIALIZABLE_CLASS_MEMBERS(usqcdSourceInfo, + double, version, + std::string, info); + usqcdSourceInfo() { + version=1.0; + }; +}; +struct usqcdPropInfo : Serializable { + public: + GRID_SERIALIZABLE_CLASS_MEMBERS(usqcdPropInfo, + double, version, + int, spin, + int, color, + std::string, info); + usqcdPropInfo() { + version=1.0; + }; +}; +#endif + +} #endif #endif diff --git a/lib/parallelIO/MetaData.h b/lib/parallelIO/MetaData.h new file mode 100644 index 00000000..6d45d0a5 --- /dev/null +++ b/lib/parallelIO/MetaData.h @@ -0,0 +1,325 @@ +/************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/parallelIO/NerscIO.h + + Copyright (C) 2015 + + + Author: Peter Boyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace Grid { + + /////////////////////////////////////////////////////// + // Precision mapping + /////////////////////////////////////////////////////// + template static std::string getFormatString (void) + { + std::string format; + typedef typename getPrecision::real_scalar_type stype; + if ( sizeof(stype) == sizeof(float) ) { + format = std::string("IEEE32BIG"); + } + if ( sizeof(stype) == sizeof(double) ) { + format = std::string("IEEE64BIG"); + } + return format; + } + //////////////////////////////////////////////////////////////////////////////// + // header specification/interpretation + //////////////////////////////////////////////////////////////////////////////// + class FieldMetaData : Serializable { + public: + + GRID_SERIALIZABLE_CLASS_MEMBERS(FieldMetaData, + int, nd, + std::vector, dimension, + std::vector, boundary, + int, data_start, + std::string, hdr_version, + std::string, storage_format, + double, link_trace, + double, plaquette, + uint32_t, checksum, + uint32_t, scidac_checksuma, + uint32_t, scidac_checksumb, + unsigned int, sequence_number, + std::string, data_type, + std::string, ensemble_id, + std::string, ensemble_label, + std::string, ildg_lfn, + std::string, creator, + std::string, creator_hardware, + std::string, creation_date, + std::string, archive_date, + std::string, floating_point); + FieldMetaData(void) { + nd=4; + dimension.resize(4); + boundary.resize(4); + } + }; + + + + namespace QCD { + + using namespace Grid; + + + ////////////////////////////////////////////////////////////////////// + // Bit and Physical Checksumming and QA of data + ////////////////////////////////////////////////////////////////////// + inline void GridMetaData(GridBase *grid,FieldMetaData &header) + { + int nd = grid->_ndimension; + header.nd = nd; + header.dimension.resize(nd); + header.boundary.resize(nd); + for(int d=0;d_fdimensions[d]; + } + for(int d=0;dpw_name); + + // When + std::time_t t = std::time(nullptr); + std::tm tm_ = *std::localtime(&t); + std::ostringstream oss; + // oss << std::put_time(&tm_, "%c %Z"); + header.creation_date = oss.str(); + header.archive_date = header.creation_date; + + // What + struct utsname name; uname(&name); + header.creator_hardware = std::string(name.nodename)+"-"; + header.creator_hardware+= std::string(name.machine)+"-"; + header.creator_hardware+= std::string(name.sysname)+"-"; + header.creator_hardware+= std::string(name.release); + } + +#define dump_meta_data(field, s) \ + s << "BEGIN_HEADER" << std::endl; \ + s << "HDR_VERSION = " << field.hdr_version << std::endl; \ + s << "DATATYPE = " << field.data_type << std::endl; \ + s << "STORAGE_FORMAT = " << field.storage_format << std::endl; \ + for(int i=0;i<4;i++){ \ + s << "DIMENSION_" << i+1 << " = " << field.dimension[i] << std::endl ; \ + } \ + s << "LINK_TRACE = " << std::setprecision(10) << field.link_trace << std::endl; \ + s << "PLAQUETTE = " << std::setprecision(10) << field.plaquette << std::endl; \ + for(int i=0;i<4;i++){ \ + s << "BOUNDARY_"< inline void PrepareMetaData(Lattice & field, FieldMetaData &header) +{ + GridBase *grid = field._grid; + std::string format = getFormatString(); + header.floating_point = format; + header.checksum = 0x0; // Nersc checksum unused in ILDG, Scidac + GridMetaData(grid,header); + MachineCharacteristics(header); + } + inline void GaugeStatistics(Lattice & data,FieldMetaData &header) + { + // How to convert data precision etc... + header.link_trace=Grid::QCD::WilsonLoops::linkTrace(data); + header.plaquette =Grid::QCD::WilsonLoops::avgPlaquette(data); + } + inline void GaugeStatistics(Lattice & data,FieldMetaData &header) + { + // How to convert data precision etc... + header.link_trace=Grid::QCD::WilsonLoops::linkTrace(data); + header.plaquette =Grid::QCD::WilsonLoops::avgPlaquette(data); + } + template<> inline void PrepareMetaData(Lattice & field, FieldMetaData &header) + { + + GridBase *grid = field._grid; + std::string format = getFormatString(); + header.floating_point = format; + header.checksum = 0x0; // Nersc checksum unused in ILDG, Scidac + GridMetaData(grid,header); + GaugeStatistics(field,header); + MachineCharacteristics(header); + } + template<> inline void PrepareMetaData(Lattice & field, FieldMetaData &header) + { + GridBase *grid = field._grid; + std::string format = getFormatString(); + header.floating_point = format; + header.checksum = 0x0; // Nersc checksum unused in ILDG, Scidac + GridMetaData(grid,header); + GaugeStatistics(field,header); + MachineCharacteristics(header); + } + + ////////////////////////////////////////////////////////////////////// + // Utilities ; these are QCD aware + ////////////////////////////////////////////////////////////////////// + inline void reconstruct3(LorentzColourMatrix & cm) + { + const int x=0; + const int y=1; + const int z=2; + for(int mu=0;mu using iLorentzColour2x3 = iVector, 2>, Nd >; + + typedef iLorentzColour2x3 LorentzColour2x3; + typedef iLorentzColour2x3 LorentzColour2x3F; + typedef iLorentzColour2x3 LorentzColour2x3D; + +///////////////////////////////////////////////////////////////////////////////// +// Simple classes for precision conversion +///////////////////////////////////////////////////////////////////////////////// +template +struct BinarySimpleUnmunger { + typedef typename getPrecision::real_scalar_type fobj_stype; + typedef typename getPrecision::real_scalar_type sobj_stype; + + void operator()(sobj &in, fobj &out) { + // take word by word and transform accoding to the status + fobj_stype *out_buffer = (fobj_stype *)&out; + sobj_stype *in_buffer = (sobj_stype *)∈ + size_t fobj_words = sizeof(out) / sizeof(fobj_stype); + size_t sobj_words = sizeof(in) / sizeof(sobj_stype); + assert(fobj_words == sobj_words); + + for (unsigned int word = 0; word < sobj_words; word++) + out_buffer[word] = in_buffer[word]; // type conversion on the fly + + } +}; + +template +struct BinarySimpleMunger { + typedef typename getPrecision::real_scalar_type fobj_stype; + typedef typename getPrecision::real_scalar_type sobj_stype; + + void operator()(fobj &in, sobj &out) { + // take word by word and transform accoding to the status + fobj_stype *in_buffer = (fobj_stype *)∈ + sobj_stype *out_buffer = (sobj_stype *)&out; + size_t fobj_words = sizeof(in) / sizeof(fobj_stype); + size_t sobj_words = sizeof(out) / sizeof(sobj_stype); + assert(fobj_words == sobj_words); + + for (unsigned int word = 0; word < sobj_words; word++) + out_buffer[word] = in_buffer[word]; // type conversion on the fly + + } +}; + + + template + struct GaugeSimpleMunger{ + void operator()(fobj &in, sobj &out) { + for (int mu = 0; mu < Nd; mu++) { + for (int i = 0; i < Nc; i++) { + for (int j = 0; j < Nc; j++) { + out(mu)()(i, j) = in(mu)()(i, j); + }} + } + }; + }; + + template + struct GaugeSimpleUnmunger { + + void operator()(sobj &in, fobj &out) { + for (int mu = 0; mu < Nd; mu++) { + for (int i = 0; i < Nc; i++) { + for (int j = 0; j < Nc; j++) { + out(mu)()(i, j) = in(mu)()(i, j); + }} + } + }; + }; + + template + struct Gauge3x2munger{ + void operator() (fobj &in,sobj &out){ + for(int mu=0;mu + struct Gauge3x2unmunger{ + void operator() (sobj &in,fobj &out){ + for(int mu=0;mu -#include -#include -#include -#include - -#include -#include -#include - namespace Grid { namespace QCD { using namespace Grid; - //////////////////////////////////////////////////////////////////////////////// - // Some data types for intermediate storage - //////////////////////////////////////////////////////////////////////////////// - template using iLorentzColour2x3 = iVector, 2>, 4 >; - - typedef iLorentzColour2x3 LorentzColour2x3; - typedef iLorentzColour2x3 LorentzColour2x3F; - typedef iLorentzColour2x3 LorentzColour2x3D; - - //////////////////////////////////////////////////////////////////////////////// - // header specification/interpretation - //////////////////////////////////////////////////////////////////////////////// - class NerscField { - public: - // header strings (not in order) - int dimension[4]; - std::string boundary[4]; - int data_start; - std::string hdr_version; - std::string storage_format; - // Checks on data - double link_trace; - double plaquette; - uint32_t checksum; - unsigned int sequence_number; - std::string data_type; - std::string ensemble_id ; - std::string ensemble_label ; - std::string creator ; - std::string creator_hardware ; - std::string creation_date ; - std::string archive_date ; - std::string floating_point; - }; - - ////////////////////////////////////////////////////////////////////// - // Bit and Physical Checksumming and QA of data - ////////////////////////////////////////////////////////////////////// - - inline void NerscGrid(GridBase *grid,NerscField &header) - { - assert(grid->_ndimension==4); - for(int d=0;d<4;d++) { - header.dimension[d] = grid->_fdimensions[d]; - } - for(int d=0;d<4;d++) { - header.boundary[d] = std::string("PERIODIC"); - } - } - template - inline void NerscStatistics(GaugeField & data,NerscField &header) - { - // How to convert data precision etc... - header.link_trace=Grid::QCD::WilsonLoops::linkTrace(data); - header.plaquette =Grid::QCD::WilsonLoops::avgPlaquette(data); - } - - inline void NerscMachineCharacteristics(NerscField &header) - { - // Who - struct passwd *pw = getpwuid (getuid()); - if (pw) header.creator = std::string(pw->pw_name); - - // When - std::time_t t = std::time(nullptr); - std::tm tm = *std::localtime(&t); - std::ostringstream oss; - // oss << std::put_time(&tm, "%c %Z"); - header.creation_date = oss.str(); - header.archive_date = header.creation_date; - - // What - struct utsname name; uname(&name); - header.creator_hardware = std::string(name.nodename)+"-"; - header.creator_hardware+= std::string(name.machine)+"-"; - header.creator_hardware+= std::string(name.sysname)+"-"; - header.creator_hardware+= std::string(name.release); - - } - ////////////////////////////////////////////////////////////////////// - // Utilities ; these are QCD aware - ////////////////////////////////////////////////////////////////////// - inline void NerscChecksum(uint32_t *buf,uint32_t buf_size_bytes,uint32_t &csum) - { - BinaryIO::Uint32Checksum(buf,buf_size_bytes,csum); - } - inline void reconstruct3(LorentzColourMatrix & cm) - { - const int x=0; - const int y=1; - const int z=2; - for(int mu=0;mu<4;mu++){ - cm(mu)()(2,x) = adj(cm(mu)()(0,y)*cm(mu)()(1,z)-cm(mu)()(0,z)*cm(mu)()(1,y)); //x= yz-zy - cm(mu)()(2,y) = adj(cm(mu)()(0,z)*cm(mu)()(1,x)-cm(mu)()(0,x)*cm(mu)()(1,z)); //y= zx-xz - cm(mu)()(2,z) = adj(cm(mu)()(0,x)*cm(mu)()(1,y)-cm(mu)()(0,y)*cm(mu)()(1,x)); //z= xy-yx - } - } - - template - struct NerscSimpleMunger{ - void operator()(fobj &in, sobj &out, uint32_t &csum) { - for (int mu = 0; mu < Nd; mu++) { - for (int i = 0; i < Nc; i++) { - for (int j = 0; j < Nc; j++) { - out(mu)()(i, j) = in(mu)()(i, j); - } - } - } - NerscChecksum((uint32_t *)&in, sizeof(in), csum); - }; - }; - - template - struct NerscSimpleUnmunger { - void operator()(sobj &in, fobj &out, uint32_t &csum) { - for (int mu = 0; mu < Nd; mu++) { - for (int i = 0; i < Nc; i++) { - for (int j = 0; j < Nc; j++) { - out(mu)()(i, j) = in(mu)()(i, j); - } - } - } - NerscChecksum((uint32_t *)&out, sizeof(out), csum); - }; - }; - - template - struct Nersc3x2munger{ - void operator() (fobj &in,sobj &out,uint32_t &csum){ - - NerscChecksum((uint32_t *)&in,sizeof(in),csum); - - for(int mu=0;mu<4;mu++){ - for(int i=0;i<2;i++){ - for(int j=0;j<3;j++){ - out(mu)()(i,j) = in(mu)(i)(j); - }} - } - reconstruct3(out); - } - }; - - template - struct Nersc3x2unmunger{ - - void operator() (sobj &in,fobj &out,uint32_t &csum){ - - - for(int mu=0;mu<4;mu++){ - for(int i=0;i<2;i++){ - for(int j=0;j<3;j++){ - out(mu)(i)(j) = in(mu)()(i,j); - }} - } - - NerscChecksum((uint32_t *)&out,sizeof(out),csum); - - } - }; - - //////////////////////////////////////////////////////////////////////////////// // Write and read from fstream; comput header offset for payload //////////////////////////////////////////////////////////////////////////////// @@ -216,42 +45,17 @@ namespace Grid { std::ofstream fout(file,std::ios::out); } -#define dump_nersc_header(field, s) \ - s << "BEGIN_HEADER" << std::endl; \ - s << "HDR_VERSION = " << field.hdr_version << std::endl; \ - s << "DATATYPE = " << field.data_type << std::endl; \ - s << "STORAGE_FORMAT = " << field.storage_format << std::endl; \ - for(int i=0;i<4;i++){ \ - s << "DIMENSION_" << i+1 << " = " << field.dimension[i] << std::endl ; \ - } \ - s << "LINK_TRACE = " << std::setprecision(10) << field.link_trace << std::endl; \ - s << "PLAQUETTE = " << std::setprecision(10) << field.plaquette << std::endl; \ - for(int i=0;i<4;i++){ \ - s << "BOUNDARY_"< header; @@ -323,21 +127,21 @@ namespace Grid { return field.data_start; } - ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - // Now the meat: the object readers - ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -#define PARALLEL_READ -#define PARALLEL_WRITE + ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + // Now the meat: the object readers + ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - template - static inline void readConfiguration(Lattice > &Umu,NerscField& header,std::string file) - { + template + static inline void readConfiguration(Lattice > &Umu, + FieldMetaData& header, + std::string file) + { typedef Lattice > GaugeField; GridBase *grid = Umu._grid; int offset = readHeader(file,Umu._grid,header); - NerscField clone(header); + FieldMetaData clone(header); std::string format(header.floating_point); @@ -346,76 +150,78 @@ namespace Grid { int ieee64big = (format == std::string("IEEE64BIG")); int ieee64 = (format == std::string("IEEE64")); - uint32_t csum; + uint32_t nersc_csum,scidac_csuma,scidac_csumb; // depending on datatype, set up munger; // munger is a function of if ( header.data_type == std::string("4D_SU3_GAUGE") ) { - if ( ieee32 || ieee32big ) { -#ifdef PARALLEL_READ - csum=BinaryIO::readObjectParallel, LorentzColour2x3F> - (Umu,file,Nersc3x2munger(), offset,format); -#else - csum=BinaryIO::readObjectSerial, LorentzColour2x3F> - (Umu,file,Nersc3x2munger(), offset,format); -#endif - } - if ( ieee64 || ieee64big ) { -#ifdef PARALLEL_READ - csum=BinaryIO::readObjectParallel, LorentzColour2x3D> - (Umu,file,Nersc3x2munger(),offset,format); -#else - csum=BinaryIO::readObjectSerial, LorentzColour2x3D> - (Umu,file,Nersc3x2munger(),offset,format); -#endif - } - } else if ( header.data_type == std::string("4D_SU3_GAUGE_3x3") ) { if ( ieee32 || ieee32big ) { -#ifdef PARALLEL_READ - csum=BinaryIO::readObjectParallel,LorentzColourMatrixF> - (Umu,file,NerscSimpleMunger(),offset,format); -#else - csum=BinaryIO::readObjectSerial,LorentzColourMatrixF> - (Umu,file,NerscSimpleMunger(),offset,format); -#endif + BinaryIO::readLatticeObject, LorentzColour2x3F> + (Umu,file,Gauge3x2munger(), offset,format, + nersc_csum,scidac_csuma,scidac_csumb); } if ( ieee64 || ieee64big ) { -#ifdef PARALLEL_READ - csum=BinaryIO::readObjectParallel,LorentzColourMatrixD> - (Umu,file,NerscSimpleMunger(),offset,format); -#else - csum=BinaryIO::readObjectSerial,LorentzColourMatrixD> - (Umu,file,NerscSimpleMunger(),offset,format); -#endif + BinaryIO::readLatticeObject, LorentzColour2x3D> + (Umu,file,Gauge3x2munger(),offset,format, + nersc_csum,scidac_csuma,scidac_csumb); + } + } else if ( header.data_type == std::string("4D_SU3_GAUGE_3x3") ) { + if ( ieee32 || ieee32big ) { + BinaryIO::readLatticeObject,LorentzColourMatrixF> + (Umu,file,GaugeSimpleMunger(),offset,format, + nersc_csum,scidac_csuma,scidac_csumb); + } + if ( ieee64 || ieee64big ) { + BinaryIO::readLatticeObject,LorentzColourMatrixD> + (Umu,file,GaugeSimpleMunger(),offset,format, + nersc_csum,scidac_csuma,scidac_csumb); } } else { assert(0); } - NerscStatistics(Umu,clone); + GaugeStatistics(Umu,clone); - std::cout<= 1.0e-5 ) { + std::cout << " Plaquette mismatch "< - static inline void writeConfiguration(Lattice > &Umu,std::string file, int two_row,int bits32) + static inline void writeConfiguration(Lattice > &Umu, + std::string file, + int two_row, + int bits32) { typedef Lattice > GaugeField; typedef iLorentzColourMatrix vobj; typedef typename vobj::scalar_object sobj; + FieldMetaData header; + /////////////////////////////////////////// // Following should become arguments - NerscField header; + /////////////////////////////////////////// header.sequence_number = 1; header.ensemble_id = "UKQCD"; header.ensemble_label = "DWF"; @@ -425,45 +231,32 @@ namespace Grid { GridBase *grid = Umu._grid; - NerscGrid(grid,header); - NerscStatistics(Umu,header); - NerscMachineCharacteristics(header); + GridMetaData(grid,header); + assert(header.nd==4); + GaugeStatistics(Umu,header); + MachineCharacteristics(header); - uint32_t csum; int offset; truncate(file); - if ( two_row ) { + // Sod it -- always write 3x3 double + header.floating_point = std::string("IEEE64BIG"); + header.data_type = std::string("4D_SU3_GAUGE_3x3"); + GaugeSimpleUnmunger munge; + offset = writeHeader(header,file); - header.floating_point = std::string("IEEE64BIG"); - header.data_type = std::string("4D_SU3_GAUGE"); - Nersc3x2unmunger munge; - BinaryIO::Uint32Checksum(Umu, munge,header.checksum); - offset = writeHeader(header,file); -#ifdef PARALLEL_WRITE - csum=BinaryIO::writeObjectParallel(Umu,file,munge,offset,header.floating_point); -#else - csum=BinaryIO::writeObjectSerial(Umu,file,munge,offset,header.floating_point); -#endif - } else { - header.floating_point = std::string("IEEE64BIG"); - header.data_type = std::string("4D_SU3_GAUGE_3x3"); - NerscSimpleUnmunger munge; - BinaryIO::Uint32Checksum(Umu, munge,header.checksum); - offset = writeHeader(header,file); -#ifdef PARALLEL_WRITE - csum=BinaryIO::writeObjectParallel(Umu,file,munge,offset,header.floating_point); -#else - csum=BinaryIO::writeObjectSerial(Umu,file,munge,offset,header.floating_point); -#endif - } + uint32_t nersc_csum,scidac_csuma,scidac_csumb; + BinaryIO::writeLatticeObject(Umu,file,munge,offset,header.floating_point, + nersc_csum,scidac_csuma,scidac_csumb); + header.checksum = nersc_csum; + writeHeader(header,file); - std::cout< - uint32_t csum=BinaryIO::readRNGSerial(serial,parallel,file,offset); + uint32_t nersc_csum,scidac_csuma,scidac_csumb; + BinaryIO::readRNG(serial,parallel,file,offset,nersc_csum,scidac_csuma,scidac_csumb); - assert(csum == header.checksum ); + if ( nersc_csum != header.checksum ) { + std::cerr << "checksum mismatch "< ImprovedStaggeredFermion }} +//////////////////// +// Scalar QED actions +// TODO: this needs to move to another header after rename to Fermion.h +//////////////////// +#include +#include + #endif diff --git a/lib/qcd/action/fermion/FermionOperatorImpl.h b/lib/qcd/action/fermion/FermionOperatorImpl.h index 20458b6d..524179f5 100644 --- a/lib/qcd/action/fermion/FermionOperatorImpl.h +++ b/lib/qcd/action/fermion/FermionOperatorImpl.h @@ -644,19 +644,16 @@ class StaggeredImpl : public PeriodicGaugeImpl using iImplScalar = iScalar > >; template using iImplSpinor = iScalar > >; template using iImplHalfSpinor = iScalar > >; template using iImplDoubledGaugeField = iVector >, Nds>; template using iImplPropagator = iScalar > >; - typedef iImplScalar SiteComplex; typedef iImplSpinor SiteSpinor; typedef iImplHalfSpinor SiteHalfSpinor; typedef iImplDoubledGaugeField SiteDoubledGaugeField; typedef iImplPropagator SitePropagator; - typedef Lattice ComplexField; typedef Lattice FermionField; typedef Lattice DoubledGaugeField; typedef Lattice PropagatorField; @@ -775,7 +772,6 @@ class StaggeredImpl : public PeriodicGaugeImpl using iImplScalar = iScalar > >; template using iImplSpinor = iScalar > >; template using iImplHalfSpinor = iScalar > >; template using iImplDoubledGaugeField = iVector >, Nds>; @@ -792,12 +788,10 @@ class StaggeredImpl : public PeriodicGaugeImpl DoubledGaugeField; typedef Lattice PropagatorField; - typedef iImplScalar SiteComplex; typedef iImplSpinor SiteSpinor; typedef iImplHalfSpinor SiteHalfSpinor; - typedef Lattice ComplexField; typedef Lattice FermionField; typedef SimpleCompressor Compressor; diff --git a/lib/qcd/action/gauge/GaugeImplTypes.h b/lib/qcd/action/gauge/GaugeImplTypes.h index 29e79581..9e3e0d68 100644 --- a/lib/qcd/action/gauge/GaugeImplTypes.h +++ b/lib/qcd/action/gauge/GaugeImplTypes.h @@ -40,12 +40,15 @@ namespace QCD { typedef typename GImpl::Simd Simd; \ typedef typename GImpl::LinkField GaugeLinkField; \ typedef typename GImpl::Field GaugeField; \ + typedef typename GImpl::ComplexField ComplexField;\ typedef typename GImpl::SiteField SiteGaugeField; \ + typedef typename GImpl::SiteComplex SiteComplex; \ typedef typename GImpl::SiteLink SiteGaugeLink; -#define INHERIT_FIELD_TYPES(Impl) \ - typedef typename Impl::Simd Simd; \ - typedef typename Impl::SiteField SiteField; \ +#define INHERIT_FIELD_TYPES(Impl) \ + typedef typename Impl::Simd Simd; \ + typedef typename Impl::ComplexField ComplexField; \ + typedef typename Impl::SiteField SiteField; \ typedef typename Impl::Field Field; // hardcodes the exponential approximation in the template @@ -53,14 +56,17 @@ template class GaugeImplType public: typedef S Simd; - template using iImplGaugeLink = iScalar>>; - template using iImplGaugeField = iVector>, Nd>; + template using iImplScalar = iScalar > >; + template using iImplGaugeLink = iScalar > >; + template using iImplGaugeField = iVector >, Nd>; + typedef iImplScalar SiteComplex; typedef iImplGaugeLink SiteLink; typedef iImplGaugeField SiteField; - typedef Lattice LinkField; - typedef Lattice Field; + typedef Lattice ComplexField; + typedef Lattice LinkField; + typedef Lattice Field; // Guido: we can probably separate the types from the HMC functions // this will create 2 kind of implementations diff --git a/lib/qcd/action/gauge/Photon.h b/lib/qcd/action/gauge/Photon.h new file mode 100644 index 00000000..7e21a1de --- /dev/null +++ b/lib/qcd/action/gauge/Photon.h @@ -0,0 +1,286 @@ +/************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/qcd/action/gauge/Photon.h + + Copyright (C) 2015 + + Author: Peter Boyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ +/* END LEGAL */ +#ifndef QCD_PHOTON_ACTION_H +#define QCD_PHOTON_ACTION_H + +namespace Grid{ +namespace QCD{ + template + class QedGimpl + { + public: + typedef S Simd; + + template + using iImplGaugeLink = iScalar>>; + template + using iImplGaugeField = iVector>, Nd>; + + typedef iImplGaugeLink SiteLink; + typedef iImplGaugeField SiteField; + typedef SiteField SiteComplex; + + typedef Lattice LinkField; + typedef Lattice Field; + typedef Field ComplexField; + }; + + typedef QedGimpl QedGimplR; + + template + class Photon + { + public: + INHERIT_GIMPL_TYPES(Gimpl); + GRID_SERIALIZABLE_ENUM(Gauge, undef, feynman, 1, coulomb, 2, landau, 3); + GRID_SERIALIZABLE_ENUM(ZmScheme, undef, qedL, 1, qedTL, 2); + public: + Photon(Gauge gauge, ZmScheme zmScheme); + virtual ~Photon(void) = default; + void FreePropagator(const GaugeField &in, GaugeField &out); + void MomentumSpacePropagator(const GaugeField &in, GaugeField &out); + void StochasticWeight(GaugeLinkField &weight); + void StochasticField(GaugeField &out, GridParallelRNG &rng); + void StochasticField(GaugeField &out, GridParallelRNG &rng, + const GaugeLinkField &weight); + private: + void invKHatSquared(GaugeLinkField &out); + void zmSub(GaugeLinkField &out); + private: + Gauge gauge_; + ZmScheme zmScheme_; + }; + + typedef Photon PhotonR; + + template + Photon::Photon(Gauge gauge, ZmScheme zmScheme) + : gauge_(gauge), zmScheme_(zmScheme) + {} + + template + void Photon::FreePropagator (const GaugeField &in,GaugeField &out) + { + FFT theFFT(in._grid); + + GaugeField in_k(in._grid); + GaugeField prop_k(in._grid); + + theFFT.FFT_all_dim(in_k,in,FFT::forward); + MomentumSpacePropagator(prop_k,in_k); + theFFT.FFT_all_dim(out,prop_k,FFT::backward); + } + + template + void Photon::invKHatSquared(GaugeLinkField &out) + { + GridBase *grid = out._grid; + GaugeLinkField kmu(grid), one(grid); + const unsigned int nd = grid->_ndimension; + std::vector &l = grid->_fdimensions; + std::vector zm(nd,0); + TComplex Tone = Complex(1.0,0.0); + TComplex Tzero= Complex(0.0,0.0); + + one = Complex(1.0,0.0); + out = zero; + for(int mu = 0; mu < nd; mu++) + { + Real twoPiL = M_PI*2./l[mu]; + + LatticeCoordinate(kmu,mu); + kmu = 2.*sin(.5*twoPiL*kmu); + out = out + kmu*kmu; + } + pokeSite(Tone, out, zm); + out = one/out; + pokeSite(Tzero, out, zm); + } + + template + void Photon::zmSub(GaugeLinkField &out) + { + GridBase *grid = out._grid; + const unsigned int nd = grid->_ndimension; + + switch (zmScheme_) + { + case ZmScheme::qedTL: + { + std::vector zm(nd,0); + TComplex Tzero = Complex(0.0,0.0); + + pokeSite(Tzero, out, zm); + + break; + } + case ZmScheme::qedL: + { + LatticeInteger spNrm(grid), coor(grid); + GaugeLinkField z(grid); + + spNrm = zero; + for(int d = 0; d < grid->_ndimension - 1; d++) + { + LatticeCoordinate(coor,d); + spNrm = spNrm + coor*coor; + } + out = where(spNrm == Integer(0), 0.*out, out); + + break; + } + default: + break; + } + } + + template + void Photon::MomentumSpacePropagator(const GaugeField &in, + GaugeField &out) + { + GridBase *grid = out._grid; + LatticeComplex k2Inv(grid); + + invKHatSquared(k2Inv); + zmSub(k2Inv); + + out = in*k2Inv; + } + + template + void Photon::StochasticWeight(GaugeLinkField &weight) + { + auto *grid = dynamic_cast(weight._grid); + const unsigned int nd = grid->_ndimension; + std::vector latt_size = grid->_fdimensions; + + Integer vol = 1; + for(int d = 0; d < nd; d++) + { + vol = vol * latt_size[d]; + } + invKHatSquared(weight); + weight = sqrt(vol*real(weight)); + zmSub(weight); + } + + template + void Photon::StochasticField(GaugeField &out, GridParallelRNG &rng) + { + auto *grid = dynamic_cast(out._grid); + GaugeLinkField weight(grid); + + StochasticWeight(weight); + StochasticField(out, rng, weight); + } + + template + void Photon::StochasticField(GaugeField &out, GridParallelRNG &rng, + const GaugeLinkField &weight) + { + auto *grid = dynamic_cast(out._grid); + const unsigned int nd = grid->_ndimension; + GaugeLinkField r(grid); + GaugeField aTilde(grid); + FFT fft(grid); + + for(int mu = 0; mu < nd; mu++) + { + gaussian(rng, r); + r = weight*r; + pokeLorentz(aTilde, r, mu); + } + fft.FFT_all_dim(out, aTilde, FFT::backward); + + out = real(out); + } +// template +// void Photon::FeynmanGaugeMomentumSpacePropagator_L(GaugeField &out, +// const GaugeField &in) +// { +// +// FeynmanGaugeMomentumSpacePropagator_TL(out,in); +// +// GridBase *grid = out._grid; +// LatticeInteger coor(grid); +// GaugeField zz(grid); zz=zero; +// +// // xyzt +// for(int d = 0; d < grid->_ndimension-1;d++){ +// LatticeCoordinate(coor,d); +// out = where(coor==Integer(0),zz,out); +// } +// } +// +// template +// void Photon::FeynmanGaugeMomentumSpacePropagator_TL(GaugeField &out, +// const GaugeField &in) +// { +// +// // what type LatticeComplex +// GridBase *grid = out._grid; +// int nd = grid->_ndimension; +// +// typedef typename GaugeField::vector_type vector_type; +// typedef typename GaugeField::scalar_type ScalComplex; +// typedef Lattice > LatComplex; +// +// std::vector latt_size = grid->_fdimensions; +// +// LatComplex denom(grid); denom= zero; +// LatComplex one(grid); one = ScalComplex(1.0,0.0); +// LatComplex kmu(grid); +// +// ScalComplex ci(0.0,1.0); +// // momphase = n * 2pi / L +// for(int mu=0;mu zero_mode(nd,0); +// TComplexD Tone = ComplexD(1.0,0.0); +// TComplexD Tzero= ComplexD(0.0,0.0); +// +// pokeSite(Tone,denom,zero_mode); +// +// denom= one/denom; +// +// pokeSite(Tzero,denom,zero_mode); +// +// out = zero; +// out = in*denom; +// }; + +}} +#endif diff --git a/lib/qcd/action/gauge/WilsonGaugeAction.h b/lib/qcd/action/gauge/WilsonGaugeAction.h index 77c2424c..1ea780b7 100644 --- a/lib/qcd/action/gauge/WilsonGaugeAction.h +++ b/lib/qcd/action/gauge/WilsonGaugeAction.h @@ -71,14 +71,18 @@ class WilsonGaugeAction : public Action { RealD factor = 0.5 * beta / RealD(Nc); - GaugeLinkField Umu(U._grid); + //GaugeLinkField Umu(U._grid); GaugeLinkField dSdU_mu(U._grid); for (int mu = 0; mu < Nd; mu++) { - Umu = PeekIndex(U, mu); + //Umu = PeekIndex(U, mu); // Staple in direction mu - WilsonLoops::Staple(dSdU_mu, U, mu); - dSdU_mu = Ta(Umu * dSdU_mu) * factor; + //WilsonLoops::Staple(dSdU_mu, U, mu); + //dSdU_mu = Ta(Umu * dSdU_mu) * factor; + + + WilsonLoops::StapleMult(dSdU_mu, U, mu); + dSdU_mu = Ta(dSdU_mu) * factor; PokeIndex(dSdU, dSdU_mu, mu); } diff --git a/lib/qcd/action/scalar/Scalar.h b/lib/qcd/action/scalar/Scalar.h index e5bea275..485a6765 100644 --- a/lib/qcd/action/scalar/Scalar.h +++ b/lib/qcd/action/scalar/Scalar.h @@ -31,6 +31,7 @@ directory #include #include +#include namespace Grid { namespace QCD { @@ -39,6 +40,10 @@ namespace QCD { typedef ScalarAction ScalarActionF; typedef ScalarAction ScalarActionD; + template using ScalarAdjActionR = ScalarInteractionAction, Dimensions>; + template using ScalarAdjActionF = ScalarInteractionAction, Dimensions>; + template using ScalarAdjActionD = ScalarInteractionAction, Dimensions>; + } } diff --git a/lib/qcd/action/scalar/ScalarAction.h b/lib/qcd/action/scalar/ScalarAction.h index f10ec9a6..2c82d2e3 100644 --- a/lib/qcd/action/scalar/ScalarAction.h +++ b/lib/qcd/action/scalar/ScalarAction.h @@ -6,10 +6,10 @@ Copyright (C) 2015 -Author: Azusa Yamaguchi -Author: Peter Boyle -Author: neo -Author: paboyle + Author: Azusa Yamaguchi + Author: Peter Boyle + Author: neo + Author: paboyle This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -35,50 +35,49 @@ directory namespace Grid { // FIXME drop the QCD namespace everywhere here - - template - class ScalarAction : public QCD::Action { - public: + +template +class ScalarAction : public QCD::Action { + public: INHERIT_FIELD_TYPES(Impl); - - private: + + private: RealD mass_square; RealD lambda; - - public: - ScalarAction(RealD ms, RealD l) : mass_square(ms), lambda(l){}; - virtual std::string LogParameters(){ + public: + ScalarAction(RealD ms, RealD l) : mass_square(ms), lambda(l) {} + + virtual std::string LogParameters() { std::stringstream sstream; sstream << GridLogMessage << "[ScalarAction] lambda : " << lambda << std::endl; sstream << GridLogMessage << "[ScalarAction] mass_square : " << mass_square << std::endl; return sstream.str(); - } - - virtual std::string action_name(){return "ScalarAction";} - - virtual void refresh(const Field &U, - GridParallelRNG &pRNG){}; // noop as no pseudoferms - + virtual std::string action_name() {return "ScalarAction";} + + virtual void refresh(const Field &U, GridParallelRNG &pRNG) {} // noop as no pseudoferms + virtual RealD S(const Field &p) { return (mass_square * 0.5 + QCD::Nd) * ScalarObs::sumphisquared(p) + - (lambda / 24.) * ScalarObs::sumphifourth(p) + - ScalarObs::sumphider(p); + (lambda / 24.) * ScalarObs::sumphifourth(p) + + ScalarObs::sumphider(p); }; - + virtual void deriv(const Field &p, - Field &force) { + Field &force) { Field tmp(p._grid); Field p2(p._grid); ScalarObs::phisquared(p2, p); tmp = -(Cshift(p, 0, -1) + Cshift(p, 0, 1)); for (int mu = 1; mu < QCD::Nd; mu++) tmp -= Cshift(p, mu, -1) + Cshift(p, mu, 1); - - force=+(mass_square + 2. * QCD::Nd) * p + (lambda / 6.) * p2 * p + tmp; - }; - }; - -} // Grid + + force =+(mass_square + 2. * QCD::Nd) * p + (lambda / 6.) * p2 * p + tmp; + } +}; + + + +} // namespace Grid #endif // SCALAR_ACTION_H diff --git a/lib/qcd/action/scalar/ScalarImpl.h b/lib/qcd/action/scalar/ScalarImpl.h index ee2d2fb8..f85ab840 100644 --- a/lib/qcd/action/scalar/ScalarImpl.h +++ b/lib/qcd/action/scalar/ScalarImpl.h @@ -5,96 +5,158 @@ namespace Grid { //namespace QCD { - template - class ScalarImplTypes { - public: +template +class ScalarImplTypes { + public: typedef S Simd; - + template using iImplField = iScalar > >; - + typedef iImplField SiteField; - + typedef SiteField SitePropagator; + typedef SiteField SiteComplex; typedef Lattice Field; + typedef Field ComplexField; + typedef Field FermionField; + typedef Field PropagatorField; static inline void generate_momenta(Field& P, GridParallelRNG& pRNG){ gaussian(pRNG, P); } - + static inline Field projectForce(Field& P){return P;} - - static inline void update_field(Field& P, Field& U, double ep){ + + static inline void update_field(Field& P, Field& U, double ep) { U += P*ep; } - - static inline RealD FieldSquareNorm(Field& U){ + + static inline RealD FieldSquareNorm(Field& U) { return (- sum(trace(U*U))/2.0); } - + static inline void HotConfiguration(GridParallelRNG &pRNG, Field &U) { gaussian(pRNG, U); } - + static inline void TepidConfiguration(GridParallelRNG &pRNG, Field &U) { gaussian(pRNG, U); } - + static inline void ColdConfiguration(GridParallelRNG &pRNG, Field &U) { U = 1.0; } + static void MomentumSpacePropagator(Field &out, RealD m) + { + GridBase *grid = out._grid; + Field kmu(grid), one(grid); + const unsigned int nd = grid->_ndimension; + std::vector &l = grid->_fdimensions; + + one = Complex(1.0,0.0); + out = m*m; + for(int mu = 0; mu < nd; mu++) + { + Real twoPiL = M_PI*2./l[mu]; + + LatticeCoordinate(kmu,mu); + kmu = 2.*sin(.5*twoPiL*kmu); + out = out + kmu*kmu; + } + out = one/out; + } + + static void FreePropagator(const Field &in, Field &out, + const Field &momKernel) + { + FFT fft((GridCartesian *)in._grid); + Field inFT(in._grid); + + fft.FFT_all_dim(inFT, in, FFT::forward); + inFT = inFT*momKernel; + fft.FFT_all_dim(out, inFT, FFT::backward); + } + + static void FreePropagator(const Field &in, Field &out, RealD m) + { + Field momKernel(in._grid); + + MomentumSpacePropagator(momKernel, m); + FreePropagator(in, out, momKernel); + } + }; template - class ScalarMatrixImplTypes { + class ScalarAdjMatrixImplTypes { public: typedef S Simd; + typedef QCD::SU Group; template - using iImplField = iScalar > >; + using iImplField = iScalar>>; + template + using iImplComplex = iScalar>>; + + typedef iImplField SiteField; + typedef SiteField SitePropagator; + typedef iImplComplex SiteComplex; - typedef iImplField SiteField; - - - typedef Lattice Field; - - static inline void generate_momenta(Field& P, GridParallelRNG& pRNG){ - gaussian(pRNG, P); + typedef Lattice Field; + typedef Lattice ComplexField; + typedef Field FermionField; + typedef Field PropagatorField; + + static inline void generate_momenta(Field& P, GridParallelRNG& pRNG) { + Group::GaussianFundamentalLieAlgebraMatrix(pRNG, P); } - - static inline Field projectForce(Field& P){return P;} - - static inline void update_field(Field& P, Field& U, double ep){ + + static inline Field projectForce(Field& P) {return P;} + + static inline void update_field(Field& P, Field& U, double ep) { U += P*ep; } - - static inline RealD FieldSquareNorm(Field& U){ - return (TensorRemove(- sum(trace(U*U))*0.5).real()); + + static inline RealD FieldSquareNorm(Field& U) { + return (TensorRemove(sum(trace(U*U))).real()); } - + static inline void HotConfiguration(GridParallelRNG &pRNG, Field &U) { - gaussian(pRNG, U); + Group::GaussianFundamentalLieAlgebraMatrix(pRNG, U); } - + static inline void TepidConfiguration(GridParallelRNG &pRNG, Field &U) { - gaussian(pRNG, U); + Group::GaussianFundamentalLieAlgebraMatrix(pRNG, U, 0.01); } - + static inline void ColdConfiguration(GridParallelRNG &pRNG, Field &U) { - U = 1.0; + U = zero; } - + }; - - + + typedef ScalarImplTypes ScalarImplR; typedef ScalarImplTypes ScalarImplF; typedef ScalarImplTypes ScalarImplD; + typedef ScalarImplTypes ScalarImplCR; + typedef ScalarImplTypes ScalarImplCF; + typedef ScalarImplTypes ScalarImplCD; + + // Hardcoding here the size of the matrices + typedef ScalarAdjMatrixImplTypes ScalarAdjImplR; + typedef ScalarAdjMatrixImplTypes ScalarAdjImplF; + typedef ScalarAdjMatrixImplTypes ScalarAdjImplD; + + template using ScalarNxNAdjImplR = ScalarAdjMatrixImplTypes; + template using ScalarNxNAdjImplF = ScalarAdjMatrixImplTypes; + template using ScalarNxNAdjImplD = ScalarAdjMatrixImplTypes; - //} -} + //} +} #endif diff --git a/lib/qcd/action/scalar/ScalarInteractionAction.h b/lib/qcd/action/scalar/ScalarInteractionAction.h index bd54a010..4d189352 100644 --- a/lib/qcd/action/scalar/ScalarInteractionAction.h +++ b/lib/qcd/action/scalar/ScalarInteractionAction.h @@ -6,10 +6,7 @@ Copyright (C) 2015 -Author: Azusa Yamaguchi -Author: Peter Boyle -Author: neo -Author: paboyle + Author: Guido Cossu This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -30,55 +27,122 @@ directory *************************************************************************************/ /* END LEGAL */ -#ifndef SCALAR_ACTION_H -#define SCALAR_ACTION_H +#ifndef SCALAR_INT_ACTION_H +#define SCALAR_INT_ACTION_H + + +// Note: this action can completely absorb the ScalarAction for real float fields +// use the scalarObjs to generalise the structure namespace Grid { // FIXME drop the QCD namespace everywhere here - - template + + template class ScalarInteractionAction : public QCD::Action { public: INHERIT_FIELD_TYPES(Impl); - private: RealD mass_square; RealD lambda; - - public: - ScalarAction(RealD ms, RealD l) : mass_square(ms), lambda(l){}; - virtual std::string LogParameters(){ + + typedef typename Field::vector_object vobj; + typedef CartesianStencil Stencil; + + SimpleCompressor compressor; + int npoint = 2*Ndim; + std::vector directions;// = {0,1,2,3,0,1,2,3}; // forcing 4 dimensions + std::vector displacements;// = {1,1,1,1, -1,-1,-1,-1}; + + + public: + + ScalarInteractionAction(RealD ms, RealD l) : mass_square(ms), lambda(l), displacements(2*Ndim,0), directions(2*Ndim,0){ + for (int mu = 0 ; mu < Ndim; mu++){ + directions[mu] = mu; directions[mu+Ndim] = mu; + displacements[mu] = 1; displacements[mu+Ndim] = -1; + } + } + + virtual std::string LogParameters() { std::stringstream sstream; sstream << GridLogMessage << "[ScalarAction] lambda : " << lambda << std::endl; sstream << GridLogMessage << "[ScalarAction] mass_square : " << mass_square << std::endl; return sstream.str(); - } - - virtual std::string action_name(){return "ScalarAction";} - - virtual void refresh(const Field &U, - GridParallelRNG &pRNG){}; // noop as no pseudoferms - + + virtual std::string action_name() {return "ScalarAction";} + + virtual void refresh(const Field &U, GridParallelRNG &pRNG) {} + virtual RealD S(const Field &p) { - return (mass_square * 0.5 + QCD::Nd) * ScalarObs::sumphisquared(p) + - (lambda / 24.) * ScalarObs::sumphifourth(p) + - ScalarObs::sumphider(p); + assert(p._grid->Nd() == Ndim); + static Stencil phiStencil(p._grid, npoint, 0, directions, displacements); + phiStencil.HaloExchange(p, compressor); + Field action(p._grid), pshift(p._grid), phisquared(p._grid); + phisquared = p*p; + action = (2.0*Ndim + mass_square)*phisquared - lambda/24.*phisquared*phisquared; + for (int mu = 0; mu < Ndim; mu++) { + // pshift = Cshift(p, mu, +1); // not efficient, implement with stencils + parallel_for (int i = 0; i < p._grid->oSites(); i++) { + int permute_type; + StencilEntry *SE; + vobj temp2; + const vobj *temp, *t_p; + + SE = phiStencil.GetEntry(permute_type, mu, i); + t_p = &p._odata[i]; + if ( SE->_is_local ) { + temp = &p._odata[SE->_offset]; + if ( SE->_permute ) { + permute(temp2, *temp, permute_type); + action._odata[i] -= temp2*(*t_p) + (*t_p)*temp2; + } else { + action._odata[i] -= (*temp)*(*t_p) + (*t_p)*(*temp); + } + } else { + action._odata[i] -= phiStencil.CommBuf()[SE->_offset]*(*t_p) + (*t_p)*phiStencil.CommBuf()[SE->_offset]; + } + } + // action -= pshift*p + p*pshift; + } + // NB the trace in the algebra is normalised to 1/2 + // minus sign coming from the antihermitian fields + return -(TensorRemove(sum(trace(action)))).real(); }; - - virtual void deriv(const Field &p, - Field &force) { - Field tmp(p._grid); - Field p2(p._grid); - ScalarObs::phisquared(p2, p); - tmp = -(Cshift(p, 0, -1) + Cshift(p, 0, 1)); - for (int mu = 1; mu < QCD::Nd; mu++) tmp -= Cshift(p, mu, -1) + Cshift(p, mu, 1); + + virtual void deriv(const Field &p, Field &force) { + assert(p._grid->Nd() == Ndim); + force = (2.0*Ndim + mass_square)*p - lambda/12.*p*p*p; + // move this outside + static Stencil phiStencil(p._grid, npoint, 0, directions, displacements); + phiStencil.HaloExchange(p, compressor); - force=+(mass_square + 2. * QCD::Nd) * p + (lambda / 6.) * p2 * p + tmp; - }; + //for (int mu = 0; mu < QCD::Nd; mu++) force -= Cshift(p, mu, -1) + Cshift(p, mu, 1); + for (int point = 0; point < npoint; point++) { + parallel_for (int i = 0; i < p._grid->oSites(); i++) { + const vobj *temp; + vobj temp2; + int permute_type; + StencilEntry *SE; + SE = phiStencil.GetEntry(permute_type, point, i); + + if ( SE->_is_local ) { + temp = &p._odata[SE->_offset]; + if ( SE->_permute ) { + permute(temp2, *temp, permute_type); + force._odata[i] -= temp2; + } else { + force._odata[i] -= *temp; + } + } else { + force._odata[i] -= phiStencil.CommBuf()[SE->_offset]; + } + } + } + } }; -} // Grid +} // namespace Grid -#endif // SCALAR_ACTION_H +#endif // SCALAR_INT_ACTION_H diff --git a/lib/qcd/hmc/GenericHMCrunner.h b/lib/qcd/hmc/GenericHMCrunner.h index c0c5079e..4f6c1af0 100644 --- a/lib/qcd/hmc/GenericHMCrunner.h +++ b/lib/qcd/hmc/GenericHMCrunner.h @@ -207,6 +207,12 @@ using GenericHMCRunnerTemplate = HMCWrapperTemplate ScalarGenericHMCRunner; +typedef HMCWrapperTemplate + ScalarAdjGenericHMCRunner; + +template +using ScalarNxNAdjGenericHMCRunner = HMCWrapperTemplate < ScalarNxNAdjImplR, MinimumNorm2, ScalarNxNMatrixFields >; + } // namespace QCD } // namespace Grid diff --git a/lib/qcd/hmc/HMC.h b/lib/qcd/hmc/HMC.h index ac690b60..5688bb24 100644 --- a/lib/qcd/hmc/HMC.h +++ b/lib/qcd/hmc/HMC.h @@ -76,7 +76,7 @@ struct HMCparameters: Serializable { template < class ReaderClass > void initialize(Reader &TheReader){ - std::cout << "Reading HMC\n"; + std::cout << GridLogMessage << "Reading HMC\n"; read(TheReader, "HMC", *this); } diff --git a/lib/qcd/hmc/HMCResourceManager.h b/lib/qcd/hmc/HMCResourceManager.h index 9f4c99a9..cf0000ed 100644 --- a/lib/qcd/hmc/HMCResourceManager.h +++ b/lib/qcd/hmc/HMCResourceManager.h @@ -253,6 +253,7 @@ class HMCResourceManager { template void AddObservable(Types&&... Args){ ObservablesList.push_back(std::unique_ptr(new T(std::forward(Args)...))); + ObservablesList.back()->print_parameters(); } std::vector* > GetObservables(){ @@ -297,4 +298,4 @@ private: } } -#endif // HMC_RESOURCE_MANAGER_H \ No newline at end of file +#endif // HMC_RESOURCE_MANAGER_H diff --git a/lib/qcd/hmc/checkpointers/BinaryCheckpointer.h b/lib/qcd/hmc/checkpointers/BinaryCheckpointer.h index 251ed042..59d655ad 100644 --- a/lib/qcd/hmc/checkpointers/BinaryCheckpointer.h +++ b/lib/qcd/hmc/checkpointers/BinaryCheckpointer.h @@ -62,36 +62,50 @@ class BinaryHmcCheckpointer : public BaseHmcCheckpointer { fout.close(); } - void TrajectoryComplete(int traj, Field &U, GridSerialRNG &sRNG, - GridParallelRNG &pRNG) { + void TrajectoryComplete(int traj, Field &U, GridSerialRNG &sRNG, GridParallelRNG &pRNG) { + if ((traj % Params.saveInterval) == 0) { std::string config, rng; this->build_filenames(traj, Params, config, rng); - BinaryIO::BinarySimpleUnmunger munge; + uint32_t nersc_csum; + uint32_t scidac_csuma; + uint32_t scidac_csumb; + + BinarySimpleUnmunger munge; truncate(rng); - BinaryIO::writeRNGSerial(sRNG, pRNG, rng, 0); + BinaryIO::writeRNG(sRNG, pRNG, rng, 0,nersc_csum,scidac_csuma,scidac_csumb); truncate(config); - uint32_t csum = BinaryIO::writeObjectParallel( - U, config, munge, 0, Params.format); + + BinaryIO::writeLatticeObject(U, config, munge, 0, Params.format, + nersc_csum,scidac_csuma,scidac_csumb); std::cout << GridLogMessage << "Written Binary Configuration " << config - << " checksum " << std::hex << csum << std::dec << std::endl; + << " checksum " << std::hex + << nersc_csum <<"/" + << scidac_csuma <<"/" + << scidac_csumb + << std::dec << std::endl; } + }; - void CheckpointRestore(int traj, Field &U, GridSerialRNG &sRNG, - GridParallelRNG &pRNG) { + void CheckpointRestore(int traj, Field &U, GridSerialRNG &sRNG, GridParallelRNG &pRNG) { std::string config, rng; this->build_filenames(traj, Params, config, rng); - BinaryIO::BinarySimpleMunger munge; - BinaryIO::readRNGSerial(sRNG, pRNG, rng, 0); - uint32_t csum = BinaryIO::readObjectParallel( - U, config, munge, 0, Params.format); + BinarySimpleMunger munge; + uint32_t nersc_csum; + uint32_t scidac_csuma; + uint32_t scidac_csumb; + BinaryIO::readRNG(sRNG, pRNG, rng, 0,nersc_csum,scidac_csuma,scidac_csumb); + BinaryIO::readLatticeObject(U, config, munge, 0, Params.format, + nersc_csum,scidac_csuma,scidac_csumb); + std::cout << GridLogMessage << "Read Binary Configuration " << config - << " checksum " << std::hex << csum << std::dec << std::endl; + << " checksums " << std::hex << nersc_csum<<"/"< { // check here that the format is valid int ieee32big = (Params.format == std::string("IEEE32BIG")); - int ieee32 = (Params.format == std::string("IEEE32")); + int ieee32 = (Params.format == std::string("IEEE32")); int ieee64big = (Params.format == std::string("IEEE64BIG")); - int ieee64 = (Params.format == std::string("IEEE64")); + int ieee64 = (Params.format == std::string("IEEE64")); if (!(ieee64big || ieee32 || ieee32big || ieee64)) { std::cout << GridLogError << "Unrecognized file format " << Params.format @@ -74,13 +74,20 @@ class ILDGHmcCheckpointer : public BaseHmcCheckpointer { if ((traj % Params.saveInterval) == 0) { std::string config, rng; this->build_filenames(traj, Params, config, rng); - - ILDGIO IO(config, ILDGwrite); - BinaryIO::writeRNGSerial(sRNG, pRNG, rng, 0); - uint32_t csum = IO.writeConfiguration(U, Params.format); + + uint32_t nersc_csum,scidac_csuma,scidac_csumb; + BinaryIO::writeRNG(sRNG, pRNG, rng, 0,nersc_csum,scidac_csuma,scidac_csumb); + IldgWriter _IldgWriter; + _IldgWriter.open(config); + _IldgWriter.writeConfiguration(U, traj, config, config); + _IldgWriter.close(); std::cout << GridLogMessage << "Written ILDG Configuration on " << config - << " checksum " << std::hex << csum << std::dec << std::endl; + << " checksum " << std::hex + << nersc_csum<<"/" + << scidac_csuma<<"/" + << scidac_csumb + << std::dec << std::endl; } }; @@ -89,12 +96,21 @@ class ILDGHmcCheckpointer : public BaseHmcCheckpointer { std::string config, rng; this->build_filenames(traj, Params, config, rng); - ILDGIO IO(config, ILDGread); - BinaryIO::readRNGSerial(sRNG, pRNG, rng, 0); - uint32_t csum = IO.readConfiguration(U); // format from the header + uint32_t nersc_csum,scidac_csuma,scidac_csumb; + BinaryIO::readRNG(sRNG, pRNG, rng, 0,nersc_csum,scidac_csuma,scidac_csumb); + + FieldMetaData header; + IldgReader _IldgReader; + _IldgReader.open(config); + _IldgReader.readConfiguration(U,header); // format from the header + _IldgReader.close(); std::cout << GridLogMessage << "Read ILDG Configuration from " << config - << " checksum " << std::hex << csum << std::dec << std::endl; + << " checksum " << std::hex + << nersc_csum<<"/" + << scidac_csuma<<"/" + << scidac_csumb + << std::dec << std::endl; }; }; } diff --git a/lib/qcd/hmc/checkpointers/NerscCheckpointer.h b/lib/qcd/hmc/checkpointers/NerscCheckpointer.h index 395369a0..a4b1b480 100644 --- a/lib/qcd/hmc/checkpointers/NerscCheckpointer.h +++ b/lib/qcd/hmc/checkpointers/NerscCheckpointer.h @@ -70,7 +70,7 @@ class NerscHmcCheckpointer : public BaseHmcCheckpointer { std::string config, rng; this->build_filenames(traj, Params, config, rng); - NerscField header; + FieldMetaData header; NerscIO::readRNGState(sRNG, pRNG, header, rng); NerscIO::readConfiguration(U, header, config); }; diff --git a/lib/qcd/representations/hmc_types.h b/lib/qcd/representations/hmc_types.h index 3701c9b2..3fee377e 100644 --- a/lib/qcd/representations/hmc_types.h +++ b/lib/qcd/representations/hmc_types.h @@ -62,7 +62,10 @@ class Representations { typedef Representations NoHirep; typedef Representations > ScalarFields; - //typedef Representations > ScalarMatrixFields; +typedef Representations > ScalarMatrixFields; + +template < int Colours> +using ScalarNxNMatrixFields = Representations::Field> >; // Helper classes to access the elements // Strips the first N parameters from the tuple diff --git a/lib/qcd/smearing/WilsonFlow.h b/lib/qcd/smearing/WilsonFlow.h index 8b8f9a81..4f5c0d43 100644 --- a/lib/qcd/smearing/WilsonFlow.h +++ b/lib/qcd/smearing/WilsonFlow.h @@ -36,20 +36,23 @@ namespace QCD { template class WilsonFlow: public Smear{ unsigned int Nstep; - RealD epsilon; + unsigned int measure_interval; + mutable RealD epsilon, taus; + mutable WilsonGaugeAction SG; void evolve_step(typename Gimpl::GaugeField&) const; + void evolve_step_adaptive(typename Gimpl::GaugeField&, RealD); RealD tau(unsigned int t)const {return epsilon*(t+1.0); } - public: INHERIT_GIMPL_TYPES(Gimpl) - explicit WilsonFlow(unsigned int Nstep, RealD epsilon): + explicit WilsonFlow(unsigned int Nstep, RealD epsilon, unsigned int interval = 1): Nstep(Nstep), epsilon(epsilon), + measure_interval(interval), SG(WilsonGaugeAction(3.0)) { // WilsonGaugeAction with beta 3.0 assert(epsilon > 0.0); @@ -72,7 +75,9 @@ class WilsonFlow: public Smear{ // undefined for WilsonFlow } + void smear_adaptive(GaugeField&, const GaugeField&, RealD maxTau); RealD energyDensityPlaquette(unsigned int step, const GaugeField& U) const; + RealD energyDensityPlaquette(const GaugeField& U) const; }; @@ -98,23 +103,110 @@ void WilsonFlow::evolve_step(typename Gimpl::GaugeField &U) const{ Gimpl::update_field(Z, U, -2.0*epsilon); // V(t+e) = exp(ep*Z)*W2 } +template +void WilsonFlow::evolve_step_adaptive(typename Gimpl::GaugeField &U, RealD maxTau) { + if (maxTau - taus < epsilon){ + epsilon = maxTau-taus; + } + //std::cout << GridLogMessage << "Integration epsilon : " << epsilon << std::endl; + GaugeField Z(U._grid); + GaugeField Zprime(U._grid); + GaugeField tmp(U._grid), Uprime(U._grid); + Uprime = U; + SG.deriv(U, Z); + Zprime = -Z; + Z *= 0.25; // Z0 = 1/4 * F(U) + Gimpl::update_field(Z, U, -2.0*epsilon); // U = W1 = exp(ep*Z0)*W0 + + Z *= -17.0/8.0; + SG.deriv(U, tmp); Z += tmp; // -17/32*Z0 +Z1 + Zprime += 2.0*tmp; + Z *= 8.0/9.0; // Z = -17/36*Z0 +8/9*Z1 + Gimpl::update_field(Z, U, -2.0*epsilon); // U_= W2 = exp(ep*Z)*W1 + + + Z *= -4.0/3.0; + SG.deriv(U, tmp); Z += tmp; // 4/3*(17/36*Z0 -8/9*Z1) +Z2 + Z *= 3.0/4.0; // Z = 17/36*Z0 -8/9*Z1 +3/4*Z2 + Gimpl::update_field(Z, U, -2.0*epsilon); // V(t+e) = exp(ep*Z)*W2 + + // Ramos + Gimpl::update_field(Zprime, Uprime, -2.0*epsilon); // V'(t+e) = exp(ep*Z')*W0 + // Compute distance as norm^2 of the difference + GaugeField diffU = U - Uprime; + RealD diff = norm2(diffU); + // adjust integration step + + taus += epsilon; + //std::cout << GridLogMessage << "Adjusting integration step with distance: " << diff << std::endl; + + epsilon = epsilon*0.95*std::pow(1e-4/diff,1./3.); + //std::cout << GridLogMessage << "New epsilon : " << epsilon << std::endl; + +} + template RealD WilsonFlow::energyDensityPlaquette(unsigned int step, const GaugeField& U) const { RealD td = tau(step); return 2.0 * td * td * SG.S(U)/U._grid->gSites(); } +template +RealD WilsonFlow::energyDensityPlaquette(const GaugeField& U) const { + return 2.0 * taus * taus * SG.S(U)/U._grid->gSites(); +} + + +//#define WF_TIMING + + + template void WilsonFlow::smear(GaugeField& out, const GaugeField& in) const { out = in; - for (unsigned int step = 0; step < Nstep; step++) { + for (unsigned int step = 1; step <= Nstep; step++) { + auto start = std::chrono::high_resolution_clock::now(); evolve_step(out); + auto end = std::chrono::high_resolution_clock::now(); + std::chrono::duration diff = end - start; + #ifdef WF_TIMING + std::cout << "Time to evolve " << diff.count() << " s\n"; + #endif std::cout << GridLogMessage << "[WilsonFlow] Energy density (plaq) : " - << step << " " + << step << " " << energyDensityPlaquette(step,out) << std::endl; + if( step % measure_interval == 0){ + std::cout << GridLogMessage << "[WilsonFlow] Top. charge : " + << step << " " + << WilsonLoops::TopologicalCharge(out) << std::endl; + } } } +template +void WilsonFlow::smear_adaptive(GaugeField& out, const GaugeField& in, RealD maxTau){ + out = in; + taus = epsilon; + unsigned int step = 0; + do{ + step++; + //std::cout << GridLogMessage << "Evolution time :"<< taus << std::endl; + evolve_step_adaptive(out, maxTau); + std::cout << GridLogMessage << "[WilsonFlow] Energy density (plaq) : " + << step << " " + << energyDensityPlaquette(out) << std::endl; + if( step % measure_interval == 0){ + std::cout << GridLogMessage << "[WilsonFlow] Top. charge : " + << step << " " + << WilsonLoops::TopologicalCharge(out) << std::endl; + } + } while (taus < maxTau); + + + +} + + } // namespace QCD } // namespace Grid diff --git a/lib/qcd/utils/GaugeFix.h b/lib/qcd/utils/GaugeFix.h new file mode 100644 index 00000000..4ff216e4 --- /dev/null +++ b/lib/qcd/utils/GaugeFix.h @@ -0,0 +1,188 @@ + /************************************************************************************* + + grid` physics library, www.github.com/paboyle/Grid + + Copyright (C) 2015 + +Author: Azusa Yamaguchi +Author: Peter Boyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +//#include + +using namespace Grid; +using namespace Grid::QCD; + +template +class FourierAcceleratedGaugeFixer : public Gimpl { + public: + INHERIT_GIMPL_TYPES(Gimpl); + + typedef typename Gimpl::GaugeLinkField GaugeMat; + typedef typename Gimpl::GaugeField GaugeLorentz; + + static void GaugeLinkToLieAlgebraField(const std::vector &U,std::vector &A) { + for(int mu=0;mu &A,GaugeMat &dmuAmu) { + dmuAmu=zero; + for(int mu=0;mu::avgPlaquette(Umu); + Real org_link_trace=WilsonLoops::linkTrace(Umu); + Real old_trace = org_link_trace; + Real trG; + + std::vector U(Nd,grid); + GaugeMat dmuAmu(grid); + + for(int i=0;i(Umu,mu); + if ( Fourier==false ) { + trG = SteepestDescentStep(U,alpha,dmuAmu); + } else { + trG = FourierAccelSteepestDescentStep(U,alpha,dmuAmu); + } + for(int mu=0;mu(Umu,U[mu],mu); + // Monitor progress and convergence test + // infrequently to minimise cost overhead + if ( i %20 == 0 ) { + Real plaq =WilsonLoops::avgPlaquette(Umu); + Real link_trace=WilsonLoops::linkTrace(Umu); + + if (Fourier) + std::cout << GridLogMessage << "Fourier Iteration "< &U,Real & alpha, GaugeMat & dmuAmu) { + GridBase *grid = U[0]._grid; + + std::vector A(Nd,grid); + GaugeMat g(grid); + + GaugeLinkToLieAlgebraField(U,A); + ExpiAlphaDmuAmu(A,g,alpha,dmuAmu); + + + Real vol = grid->gSites(); + Real trG = TensorRemove(sum(trace(g))).real()/vol/Nc; + + SU::GaugeTransform(U,g); + + return trG; + } + + static Real FourierAccelSteepestDescentStep(std::vector &U,Real & alpha, GaugeMat & dmuAmu) { + + GridBase *grid = U[0]._grid; + + Real vol = grid->gSites(); + + FFT theFFT((GridCartesian *)grid); + + LatticeComplex Fp(grid); + LatticeComplex psq(grid); psq=zero; + LatticeComplex pmu(grid); + LatticeComplex one(grid); one = Complex(1.0,0.0); + + GaugeMat g(grid); + GaugeMat dmuAmu_p(grid); + std::vector A(Nd,grid); + + GaugeLinkToLieAlgebraField(U,A); + + DmuAmu(A,dmuAmu); + + theFFT.FFT_all_dim(dmuAmu_p,dmuAmu,FFT::forward); + + ////////////////////////////////// + // Work out Fp = psq_max/ psq... + ////////////////////////////////// + std::vector latt_size = grid->GlobalDimensions(); + std::vector coor(grid->_ndimension,0); + for(int mu=0;mu::taExp(ciadmam,g); + + Real trG = TensorRemove(sum(trace(g))).real()/vol/Nc; + + SU::GaugeTransform(U,g); + + return trG; + } + + static void ExpiAlphaDmuAmu(const std::vector &A,GaugeMat &g,Real & alpha, GaugeMat &dmuAmu) { + GridBase *grid = g._grid; + Complex cialpha(0.0,-alpha); + GaugeMat ciadmam(grid); + DmuAmu(A,dmuAmu); + ciadmam = dmuAmu*cialpha; + SU::taExp(ciadmam,g); + } +}; + diff --git a/lib/qcd/utils/SUn.h b/lib/qcd/utils/SUn.h index 99a620bc..8f0c0a7b 100644 --- a/lib/qcd/utils/SUn.h +++ b/lib/qcd/utils/SUn.h @@ -716,8 +716,7 @@ template for (int a = 0; a < AdjointDimension; a++) { generator(a, Ta); - auto tmp = - 2.0 * (trace(timesI(Ta) * in)) * scale;// 2.0 for the normalization of the trace in the fundamental rep - pokeColour(h_out, tmp, a); + pokeColour(h_out, - 2.0 * (trace(timesI(Ta) * in)) * scale, a); } } diff --git a/lib/qcd/utils/Utils.h b/lib/qcd/utils/Utils.h index 61c81cb5..1786db54 100644 --- a/lib/qcd/utils/Utils.h +++ b/lib/qcd/utils/Utils.h @@ -12,7 +12,4 @@ #include #include - - - #endif diff --git a/lib/qcd/utils/WilsonLoops.h b/lib/qcd/utils/WilsonLoops.h index 5382882e..90d905d3 100644 --- a/lib/qcd/utils/WilsonLoops.h +++ b/lib/qcd/utils/WilsonLoops.h @@ -73,7 +73,7 @@ public: ////////////////////////////////////////////////// // trace of directed plaquette oriented in mu,nu plane ////////////////////////////////////////////////// - static void traceDirPlaquette(LatticeComplex &plaq, + static void traceDirPlaquette(ComplexField &plaq, const std::vector &U, const int mu, const int nu) { GaugeMat sp(U[0]._grid); @@ -83,9 +83,9 @@ public: ////////////////////////////////////////////////// // sum over all planes of plaquette ////////////////////////////////////////////////// - static void sitePlaquette(LatticeComplex &Plaq, + static void sitePlaquette(ComplexField &Plaq, const std::vector &U) { - LatticeComplex sitePlaq(U[0]._grid); + ComplexField sitePlaq(U[0]._grid); Plaq = zero; for (int mu = 1; mu < Nd; mu++) { for (int nu = 0; nu < mu; nu++) { @@ -104,11 +104,11 @@ public: U[mu] = PeekIndex(Umu, mu); } - LatticeComplex Plaq(Umu._grid); + ComplexField Plaq(Umu._grid); sitePlaquette(Plaq, U); - TComplex Tp = sum(Plaq); - Complex p = TensorRemove(Tp); + auto Tp = sum(Plaq); + auto p = TensorRemove(Tp); return p.real(); } @@ -129,15 +129,15 @@ public: static RealD linkTrace(const GaugeLorentz &Umu) { std::vector U(Nd, Umu._grid); - LatticeComplex Tr(Umu._grid); + ComplexField Tr(Umu._grid); Tr = zero; for (int mu = 0; mu < Nd; mu++) { U[mu] = PeekIndex(Umu, mu); Tr = Tr + trace(U[mu]); } - TComplex Tp = sum(Tr); - Complex p = TensorRemove(Tp); + auto Tp = sum(Tr); + auto p = TensorRemove(Tp); double vol = Umu._grid->gSites(); @@ -188,6 +188,32 @@ public: } } + +// For the force term +static void StapleMult(GaugeMat &staple, const GaugeLorentz &Umu, int mu) { + GridBase *grid = Umu._grid; + std::vector U(Nd, grid); + for (int d = 0; d < Nd; d++) { + // this operation is taking too much time + U[d] = PeekIndex(Umu, d); + } + staple = zero; + GaugeMat tmp1(grid); + GaugeMat tmp2(grid); + + for (int nu = 0; nu < Nd; nu++) { + if (nu != mu) { + // this is ~10% faster than the Staple + tmp1 = Cshift(U[nu], mu, 1); + tmp2 = Cshift(U[mu], nu, 1); + staple += tmp1* adj(U[nu]*tmp2); + tmp2 = adj(U[mu]*tmp1)*U[nu]; + staple += Cshift(tmp2, nu, -1); + } + } + staple = U[mu]*staple; +} + ////////////////////////////////////////////////// // the sum over all staples on each site ////////////////////////////////////////////////// @@ -200,7 +226,6 @@ public: U[d] = PeekIndex(Umu, d); } staple = zero; - GaugeMat tmp(grid); for (int nu = 0; nu < Nd; nu++) { @@ -214,7 +239,7 @@ public: // | // __| // - + staple += Gimpl::ShiftStaple( Gimpl::CovShiftForward( U[nu], nu, @@ -227,6 +252,7 @@ public: // |__ // // + staple += Gimpl::ShiftStaple( Gimpl::CovShiftBackward(U[nu], nu, Gimpl::CovShiftBackward(U[mu], mu, U[nu])), mu); @@ -289,8 +315,7 @@ public: // staple = Gimpl::ShiftStaple( Gimpl::CovShiftBackward(U[nu], nu, - Gimpl::CovShiftBackward(U[mu], mu, U[nu])), - mu); + Gimpl::CovShiftBackward(U[mu], mu, U[nu])), mu); } } @@ -307,10 +332,10 @@ public: GaugeMat Vup(Umu._grid), Vdn(Umu._grid); StapleUpper(Vup, Umu, mu, nu); StapleLower(Vdn, Umu, mu, nu); - GaugeMat v = adj(Vup) - adj(Vdn); + GaugeMat v = Vup - Vdn; GaugeMat u = PeekIndex(Umu, mu); // some redundant copies GaugeMat vu = v*u; - FS = 0.25*Ta(u*v + Cshift(vu, mu, +1)); + FS = 0.25*Ta(u*v + Cshift(vu, mu, -1)); } static Real TopologicalCharge(GaugeLorentz &U){ @@ -330,8 +355,8 @@ public: double coeff = 8.0/(32.0*M_PI*M_PI); - LatticeComplex qfield = coeff*trace(Bx*Ex + By*Ey + Bz*Ez); - TComplex Tq = sum(qfield); + ComplexField qfield = coeff*trace(Bx*Ex + By*Ey + Bz*Ez); + auto Tq = sum(qfield); return TensorRemove(Tq).real(); } @@ -350,16 +375,16 @@ public: adj(Gimpl::CovShiftForward( U[nu], nu, Gimpl::CovShiftForward(U[nu], nu, U[mu]))); } - static void traceDirRectangle(LatticeComplex &rect, + static void traceDirRectangle(ComplexField &rect, const std::vector &U, const int mu, const int nu) { GaugeMat sp(U[0]._grid); dirRectangle(sp, U, mu, nu); rect = trace(sp); } - static void siteRectangle(LatticeComplex &Rect, + static void siteRectangle(ComplexField &Rect, const std::vector &U) { - LatticeComplex siteRect(U[0]._grid); + ComplexField siteRect(U[0]._grid); Rect = zero; for (int mu = 1; mu < Nd; mu++) { for (int nu = 0; nu < mu; nu++) { @@ -379,12 +404,12 @@ public: U[mu] = PeekIndex(Umu, mu); } - LatticeComplex Rect(Umu._grid); + ComplexField Rect(Umu._grid); siteRectangle(Rect, U); - TComplex Tp = sum(Rect); - Complex p = TensorRemove(Tp); + auto Tp = sum(Rect); + auto p = TensorRemove(Tp); return p.real(); } ////////////////////////////////////////////////// diff --git a/lib/serialisation/Hdf5IO.cc b/lib/serialisation/Hdf5IO.cc index b9bb0b87..1fb7be0c 100644 --- a/lib/serialisation/Hdf5IO.cc +++ b/lib/serialisation/Hdf5IO.cc @@ -65,10 +65,12 @@ Hdf5Reader::Hdf5Reader(const std::string &fileName) Hdf5Type::type()); } -void Hdf5Reader::push(const std::string &s) +bool Hdf5Reader::push(const std::string &s) { group_ = group_.openGroup(s); path_.push_back(s); + + return true; } void Hdf5Reader::pop(void) diff --git a/lib/serialisation/Hdf5IO.h b/lib/serialisation/Hdf5IO.h index 2f891cd4..94ad9736 100644 --- a/lib/serialisation/Hdf5IO.h +++ b/lib/serialisation/Hdf5IO.h @@ -54,7 +54,7 @@ namespace Grid public: Hdf5Reader(const std::string &fileName); virtual ~Hdf5Reader(void) = default; - void push(const std::string &s); + bool push(const std::string &s); void pop(void); template void readDefault(const std::string &s, U &output); diff --git a/lib/serialisation/MacroMagic.h b/lib/serialisation/MacroMagic.h index a864989c..774c947f 100644 --- a/lib/serialisation/MacroMagic.h +++ b/lib/serialisation/MacroMagic.h @@ -110,11 +110,12 @@ THE SOFTWARE. #define GRID_MACRO_MEMBER(A,B) A B; #define GRID_MACRO_COMP_MEMBER(A,B) result = (result and (lhs. B == rhs. B)); -#define GRID_MACRO_OS_WRITE_MEMBER(A,B) os<< #A <<" "#B <<" = "<< obj. B <<" ; " <\ static inline void write(Writer &WR,const std::string &s, const cname &obj){ \ diff --git a/lib/serialisation/XmlIO.cc b/lib/serialisation/XmlIO.cc index b04263c9..a132a2f0 100644 --- a/lib/serialisation/XmlIO.cc +++ b/lib/serialisation/XmlIO.cc @@ -32,16 +32,21 @@ using namespace Grid; using namespace std; // Writer implementation /////////////////////////////////////////////////////// -XmlWriter::XmlWriter(const string &fileName) -: fileName_(fileName) +XmlWriter::XmlWriter(const string &fileName, string toplev) : fileName_(fileName) { - node_ = doc_.append_child(); - node_.set_name("grid"); + if ( toplev == std::string("") ) { + node_=doc_; + } else { + node_=doc_.append_child(); + node_.set_name(toplev.c_str()); + } } XmlWriter::~XmlWriter(void) { - doc_.save_file(fileName_.c_str(), " "); + if ( fileName_ != std::string("") ) { + doc_.save_file(fileName_.c_str(), " "); + } } void XmlWriter::push(const string &s) @@ -53,21 +58,44 @@ void XmlWriter::pop(void) { node_ = node_.parent(); } - -// Reader implementation /////////////////////////////////////////////////////// -XmlReader::XmlReader(const string &fileName) -: fileName_(fileName) +std::string XmlWriter::XmlString(void) { - pugi::xml_parse_result result = doc_.load_file(fileName_.c_str()); - - if ( !result ) - { + std::ostringstream oss; + doc_.save(oss); + return oss.str(); +} + +XmlReader::XmlReader(const char *xmlstring,string toplev) : fileName_("") +{ + pugi::xml_parse_result result; + result = doc_.load_string(xmlstring); + if ( !result ) { cerr << "XML error description: " << result.description() << "\n"; cerr << "XML error offset : " << result.offset << "\n"; abort(); } - - node_ = doc_.child("grid"); + if ( toplev == std::string("") ) { + node_ = doc_; + } else { + node_ = doc_.child(toplev.c_str()); + } +} + +// Reader implementation /////////////////////////////////////////////////////// +XmlReader::XmlReader(const string &fileName,string toplev) : fileName_(fileName) +{ + pugi::xml_parse_result result; + result = doc_.load_file(fileName_.c_str()); + if ( !result ) { + cerr << "XML error description: " << result.description() << "\n"; + cerr << "XML error offset : " << result.offset << "\n"; + abort(); + } + if ( toplev == std::string("") ) { + node_ = doc_; + } else { + node_ = doc_.child(toplev.c_str()); + } } bool XmlReader::push(const string &s) diff --git a/lib/serialisation/XmlIO.h b/lib/serialisation/XmlIO.h index f333b9aa..fcdbf1e4 100644 --- a/lib/serialisation/XmlIO.h +++ b/lib/serialisation/XmlIO.h @@ -44,10 +44,9 @@ namespace Grid { class XmlWriter: public Writer - { - + { public: - XmlWriter(const std::string &fileName); + XmlWriter(const std::string &fileName,std::string toplev = std::string("grid") ); virtual ~XmlWriter(void); void push(const std::string &s); void pop(void); @@ -55,6 +54,7 @@ namespace Grid void writeDefault(const std::string &s, const U &x); template void writeDefault(const std::string &s, const std::vector &x); + std::string XmlString(void); private: pugi::xml_document doc_; pugi::xml_node node_; @@ -64,7 +64,8 @@ namespace Grid class XmlReader: public Reader { public: - XmlReader(const std::string &fileName); + XmlReader(const char *xmlstring,std::string toplev = std::string("grid") ); + XmlReader(const std::string &fileName,std::string toplev = std::string("grid") ); virtual ~XmlReader(void) = default; bool push(const std::string &s); void pop(void); @@ -118,7 +119,7 @@ namespace Grid std::string buf; readDefault(s, buf); - std::cout << s << " " << buf << std::endl; + // std::cout << s << " " << buf << std::endl; fromString(output, buf); } diff --git a/lib/simd/Grid_avx.h b/lib/simd/Grid_avx.h index 52be9c05..f4634432 100644 --- a/lib/simd/Grid_avx.h +++ b/lib/simd/Grid_avx.h @@ -701,9 +701,28 @@ namespace Optimization { //Integer Reduce template<> inline Integer Reduce::operator()(__m256i in){ - // FIXME unimplemented - printf("Reduce : Missing integer implementation -> FIX\n"); - assert(0); + __m128i ret; +#if defined (AVX2) + // AVX2 horizontal adds within upper and lower halves of register; use + // SSE to add upper and lower halves for result. + __m256i v1, v2; + __m128i u1, u2; + v1 = _mm256_hadd_epi32(in, in); + v2 = _mm256_hadd_epi32(v1, v1); + u1 = _mm256_castsi256_si128(v2); // upper half + u2 = _mm256_extracti128_si256(v2, 1); // lower half + ret = _mm_add_epi32(u1, u2); +#else + // No AVX horizontal add; extract upper and lower halves of register & use + // SSE intrinsics. + __m128i u1, u2, u3; + u1 = _mm256_extractf128_si256(in, 0); // upper half + u2 = _mm256_extractf128_si256(in, 1); // lower half + u3 = _mm_add_epi32(u1, u2); + u1 = _mm_hadd_epi32(u3, u3); + ret = _mm_hadd_epi32(u1, u1); +#endif + return _mm_cvtsi128_si32(ret); } } diff --git a/lib/simd/Grid_avx512.h b/lib/simd/Grid_avx512.h index ba054665..85d27421 100644 --- a/lib/simd/Grid_avx512.h +++ b/lib/simd/Grid_avx512.h @@ -543,6 +543,24 @@ namespace Optimization { u512d conv; conv.v = v1; return conv.f[0]; } + + //Integer Reduce + template<> + inline Integer Reduce::operator()(__m512i in){ + // No full vector reduce, use AVX to add upper and lower halves of register + // and perform AVX reduction. + __m256i v1, v2, v3; + __m128i u1, u2, ret; + v1 = _mm512_castsi512_si256(in); // upper half + v2 = _mm512_extracti32x8_epi32(in, 1); // lower half + v3 = _mm256_add_epi32(v1, v2); + v1 = _mm256_hadd_epi32(v3, v3); + v2 = _mm256_hadd_epi32(v1, v1); + u1 = _mm256_castsi256_si128(v2) // upper half + u2 = _mm256_extracti128_si256(v2, 1); // lower half + ret = _mm_add_epi32(u1, u2); + return _mm_cvtsi128_si32(ret); + } #else //Complex float Reduce template<> @@ -570,9 +588,7 @@ namespace Optimization { //Integer Reduce template<> inline Integer Reduce::operator()(__m512i in){ - // FIXME unimplemented - printf("Reduce : Missing integer implementation -> FIX\n"); - assert(0); + return _mm512_reduce_add_epi32(in); } #endif diff --git a/lib/simd/Grid_imci.h b/lib/simd/Grid_imci.h index 173e57d8..a1dae565 100644 --- a/lib/simd/Grid_imci.h +++ b/lib/simd/Grid_imci.h @@ -401,9 +401,7 @@ namespace Optimization { //Integer Reduce template<> inline Integer Reduce::operator()(__m512i in){ - // FIXME unimplemented - printf("Reduce : Missing integer implementation -> FIX\n"); - assert(0); + return _mm512_reduce_add_epi32(in); } diff --git a/lib/simd/Grid_neon.h b/lib/simd/Grid_neon.h index 7c1ad443..d6eb9c5a 100644 --- a/lib/simd/Grid_neon.h +++ b/lib/simd/Grid_neon.h @@ -1,13 +1,14 @@ - /************************************************************************************* +/************************************************************************************* - Grid physics library, www.github.com/paboyle/Grid + Grid physics library, www.github.com/paboyle/Grid Source file: ./lib/simd/Grid_neon.h Copyright (C) 2015 -Author: Peter Boyle -Author: neo + Author: Nils Meyer + Author: Peter Boyle + Author: neo This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -26,19 +27,25 @@ Author: neo See the full license in the file "LICENSE" in the top level distribution directory *************************************************************************************/ /* END LEGAL */ -//---------------------------------------------------------------------- -/*! @file Grid_sse4.h - @brief Optimization libraries for NEON (ARM) instructions set ARMv8 - Experimental - Using intrinsics - DEVELOPING! +/* + + ARMv8 NEON intrinsics layer by + + Nils Meyer , + University of Regensburg, Germany + SFB/TRR55 + */ -// Time-stamp: <2015-07-10 17:45:09 neo> -//---------------------------------------------------------------------- +#ifndef GEN_SIMD_WIDTH +#define GEN_SIMD_WIDTH 16u +#endif + +#include "Grid_generic_types.h" #include -// ARMv8 supports double precision - +namespace Grid { namespace Optimization { template @@ -46,16 +53,20 @@ namespace Optimization { float32x4_t f; vtype v; }; - union u128f { float32x4_t v; float f[4]; }; union u128d { float64x2_t v; - double f[4]; + double f[2]; }; - + // half precision + union u128h { + float16x8_t v; + uint16_t f[8]; + }; + struct Vsplat{ //Complex float inline float32x4_t operator()(float a, float b){ @@ -64,31 +75,31 @@ namespace Optimization { } // Real float inline float32x4_t operator()(float a){ - return vld1q_dup_f32(&a); + return vdupq_n_f32(a); } //Complex double - inline float32x4_t operator()(double a, double b){ - float tmp[4]={(float)a,(float)b,(float)a,(float)b}; - return vld1q_f32(tmp); + inline float64x2_t operator()(double a, double b){ + double tmp[2]={a,b}; + return vld1q_f64(tmp); } - //Real double - inline float32x4_t operator()(double a){ - return vld1q_dup_f32(&a); + //Real double // N:tbc + inline float64x2_t operator()(double a){ + return vdupq_n_f64(a); } - //Integer + //Integer // N:tbc inline uint32x4_t operator()(Integer a){ - return vld1q_dup_u32(&a); + return vdupq_n_u32(a); } }; struct Vstore{ - //Float + //Float inline void operator()(float32x4_t a, float* F){ vst1q_f32(F, a); } //Double - inline void operator()(float32x4_t a, double* D){ - vst1q_f32((float*)D, a); + inline void operator()(float64x2_t a, double* D){ + vst1q_f64(D, a); } //Integer inline void operator()(uint32x4_t a, Integer* I){ @@ -97,54 +108,54 @@ namespace Optimization { }; - struct Vstream{ - //Float + struct Vstream{ // N:equivalents to _mm_stream_p* in NEON? + //Float // N:generic inline void operator()(float * a, float32x4_t b){ - + memcpy(a,&b,4*sizeof(float)); } - //Double - inline void operator()(double * a, float32x4_t b){ - + //Double // N:generic + inline void operator()(double * a, float64x2_t b){ + memcpy(a,&b,2*sizeof(double)); } }; + // Nils: Vset untested; not used currently in Grid at all; + // git commit 4a8c4ccfba1d05159348d21a9698028ea847e77b struct Vset{ - // Complex float + // Complex float // N:ok inline float32x4_t operator()(Grid::ComplexF *a){ - float32x4_t foo; - return foo; + float tmp[4]={a[1].imag(),a[1].real(),a[0].imag(),a[0].real()}; + return vld1q_f32(tmp); } - // Complex double - inline float32x4_t operator()(Grid::ComplexD *a){ - float32x4_t foo; - return foo; + // Complex double // N:ok + inline float64x2_t operator()(Grid::ComplexD *a){ + double tmp[2]={a[0].imag(),a[0].real()}; + return vld1q_f64(tmp); } - // Real float + // Real float // N:ok inline float32x4_t operator()(float *a){ - float32x4_t foo; - return foo; + float tmp[4]={a[3],a[2],a[1],a[0]}; + return vld1q_f32(tmp); } - // Real double - inline float32x4_t operator()(double *a){ - float32x4_t foo; - return foo; + // Real double // N:ok + inline float64x2_t operator()(double *a){ + double tmp[2]={a[1],a[0]}; + return vld1q_f64(tmp); } - // Integer + // Integer // N:ok inline uint32x4_t operator()(Integer *a){ - uint32x4_t foo; - return foo; + return vld1q_dup_u32(a); } - - }; + // N:leaving as is template struct Reduce{ //Need templated class to overload output type //General form must generate error if compiled - inline Out_type operator()(In_type in){ + inline Out_type operator()(In_type in){ printf("Error, using wrong Reduce function\n"); exit(1); return 0; @@ -184,26 +195,98 @@ namespace Optimization { } }; + struct MultRealPart{ + inline float32x4_t operator()(float32x4_t a, float32x4_t b){ + float32x4_t re = vtrn1q_f32(a, a); + return vmulq_f32(re, b); + } + inline float64x2_t operator()(float64x2_t a, float64x2_t b){ + float64x2_t re = vzip1q_f64(a, a); + return vmulq_f64(re, b); + } + }; + + struct MaddRealPart{ + inline float32x4_t operator()(float32x4_t a, float32x4_t b, float32x4_t c){ + float32x4_t re = vtrn1q_f32(a, a); + return vfmaq_f32(c, re, b); + } + inline float64x2_t operator()(float64x2_t a, float64x2_t b, float64x2_t c){ + float64x2_t re = vzip1q_f64(a, a); + return vfmaq_f64(c, re, b); + } + }; + + struct Div{ + // Real float + inline float32x4_t operator()(float32x4_t a, float32x4_t b){ + return vdivq_f32(a, b); + } + // Real double + inline float64x2_t operator()(float64x2_t a, float64x2_t b){ + return vdivq_f64(a, b); + } + }; + struct MultComplex{ // Complex float inline float32x4_t operator()(float32x4_t a, float32x4_t b){ - float32x4_t foo; - return foo; + + float32x4_t r0, r1, r2, r3, r4; + + // a = ar ai Ar Ai + // b = br bi Br Bi + // collect real/imag part, negate bi and Bi + r0 = vtrn1q_f32(b, b); // br br Br Br + r1 = vnegq_f32(b); // -br -bi -Br -Bi + r2 = vtrn2q_f32(b, r1); // bi -bi Bi -Bi + + // the fun part + r3 = vmulq_f32(r2, a); // bi*ar -bi*ai ... + r4 = vrev64q_f32(r3); // -bi*ai bi*ar ... + + // fma(a,b,c) = a+b*c + return vfmaq_f32(r4, r0, a); // ar*br-ai*bi ai*br+ar*bi ... + + // no fma, use mul and add + //float32x4_t r5; + //r5 = vmulq_f32(r0, a); + //return vaddq_f32(r4, r5); } // Complex double inline float64x2_t operator()(float64x2_t a, float64x2_t b){ - float32x4_t foo; - return foo; + + float64x2_t r0, r1, r2, r3, r4; + + // b = br bi + // collect real/imag part, negate bi + r0 = vtrn1q_f64(b, b); // br br + r1 = vnegq_f64(b); // -br -bi + r2 = vtrn2q_f64(b, r1); // bi -bi + + // the fun part + r3 = vmulq_f64(r2, a); // bi*ar -bi*ai + r4 = vextq_f64(r3,r3,1); // -bi*ai bi*ar + + // fma(a,b,c) = a+b*c + return vfmaq_f64(r4, r0, a); // ar*br-ai*bi ai*br+ar*bi + + // no fma, use mul and add + //float64x2_t r5; + //r5 = vmulq_f64(r0, a); + //return vaddq_f64(r4, r5); } }; struct Mult{ // Real float inline float32x4_t mac(float32x4_t a, float32x4_t b, float32x4_t c){ - return vaddq_f32(vmulq_f32(b,c),a); + //return vaddq_f32(vmulq_f32(b,c),a); + return vfmaq_f32(a, b, c); } inline float64x2_t mac(float64x2_t a, float64x2_t b, float64x2_t c){ - return vaddq_f64(vmulq_f64(b,c),a); + //return vaddq_f64(vmulq_f64(b,c),a); + return vfmaq_f64(a, b, c); } inline float32x4_t operator()(float32x4_t a, float32x4_t b){ return vmulq_f32(a,b); @@ -221,89 +304,275 @@ namespace Optimization { struct Conj{ // Complex single inline float32x4_t operator()(float32x4_t in){ - return in; + // ar ai br bi -> ar -ai br -bi + float32x4_t r0, r1; + r0 = vnegq_f32(in); // -ar -ai -br -bi + r1 = vrev64q_f32(r0); // -ai -ar -bi -br + return vtrn1q_f32(in, r1); // ar -ai br -bi } // Complex double - //inline float32x4_t operator()(float32x4_t in){ - // return 0; - //} + inline float64x2_t operator()(float64x2_t in){ + + float64x2_t r0, r1; + r0 = vextq_f64(in, in, 1); // ai ar + r1 = vnegq_f64(r0); // -ai -ar + return vextq_f64(r0, r1, 1); // ar -ai + } // do not define for integer input }; struct TimesMinusI{ //Complex single inline float32x4_t operator()(float32x4_t in, float32x4_t ret){ - return in; + // ar ai br bi -> ai -ar ai -br + float32x4_t r0, r1; + r0 = vnegq_f32(in); // -ar -ai -br -bi + r1 = vrev64q_f32(in); // ai ar bi br + return vtrn1q_f32(r1, r0); // ar -ai br -bi } //Complex double - //inline float32x4_t operator()(float32x4_t in, float32x4_t ret){ - // return in; - //} - - + inline float64x2_t operator()(float64x2_t in, float64x2_t ret){ + // a ib -> b -ia + float64x2_t tmp; + tmp = vnegq_f64(in); + return vextq_f64(in, tmp, 1); + } }; struct TimesI{ //Complex single inline float32x4_t operator()(float32x4_t in, float32x4_t ret){ - //need shuffle - return in; + // ar ai br bi -> -ai ar -bi br + float32x4_t r0, r1; + r0 = vnegq_f32(in); // -ar -ai -br -bi + r1 = vrev64q_f32(r0); // -ai -ar -bi -br + return vtrn1q_f32(r1, in); // -ai ar -bi br } //Complex double - //inline float32x4_t operator()(float32x4_t in, float32x4_t ret){ - // return 0; - //} + inline float64x2_t operator()(float64x2_t in, float64x2_t ret){ + // a ib -> -b ia + float64x2_t tmp; + tmp = vnegq_f64(in); + return vextq_f64(tmp, in, 1); + } + }; + + struct Permute{ + + static inline float32x4_t Permute0(float32x4_t in){ // N:ok + // AB CD -> CD AB + return vextq_f32(in, in, 2); + }; + static inline float32x4_t Permute1(float32x4_t in){ // N:ok + // AB CD -> BA DC + return vrev64q_f32(in); + }; + static inline float32x4_t Permute2(float32x4_t in){ // N:not used by Boyle + return in; + }; + static inline float32x4_t Permute3(float32x4_t in){ // N:not used by Boyle + return in; + }; + + static inline float64x2_t Permute0(float64x2_t in){ // N:ok + // AB -> BA + return vextq_f64(in, in, 1); + }; + static inline float64x2_t Permute1(float64x2_t in){ // N:not used by Boyle + return in; + }; + static inline float64x2_t Permute2(float64x2_t in){ // N:not used by Boyle + return in; + }; + static inline float64x2_t Permute3(float64x2_t in){ // N:not used by Boyle + return in; + }; + + }; + + struct Rotate{ + + static inline float32x4_t rotate(float32x4_t in,int n){ // N:ok + switch(n){ + case 0: // AB CD -> AB CD + return tRotate<0>(in); + break; + case 1: // AB CD -> BC DA + return tRotate<1>(in); + break; + case 2: // AB CD -> CD AB + return tRotate<2>(in); + break; + case 3: // AB CD -> DA BC + return tRotate<3>(in); + break; + default: assert(0); + } + } + static inline float64x2_t rotate(float64x2_t in,int n){ // N:ok + switch(n){ + case 0: // AB -> AB + return tRotate<0>(in); + break; + case 1: // AB -> BA + return tRotate<1>(in); + break; + default: assert(0); + } + } + +// working, but no restriction on n +// template static inline float32x4_t tRotate(float32x4_t in){ return vextq_f32(in,in,n); }; +// template static inline float64x2_t tRotate(float64x2_t in){ return vextq_f64(in,in,n); }; + +// restriction on n + template static inline float32x4_t tRotate(float32x4_t in){ return vextq_f32(in,in,n%4); }; + template static inline float64x2_t tRotate(float64x2_t in){ return vextq_f64(in,in,n%2); }; + + }; + + struct PrecisionChange { + + static inline float16x8_t StoH (const float32x4_t &a,const float32x4_t &b) { + float16x4_t h = vcvt_f16_f32(a); + return vcvt_high_f16_f32(h, b); + } + static inline void HtoS (float16x8_t h,float32x4_t &sa,float32x4_t &sb) { + sb = vcvt_high_f32_f16(h); + // there is no direct conversion from lower float32x4_t to float64x2_t + // vextq_f16 not supported by clang 3.8 / 4.0 / arm clang + //float16x8_t h1 = vextq_f16(h, h, 4); // correct, but not supported by clang + // workaround for clang + uint32x4_t h1u = reinterpret_cast(h); + float16x8_t h1 = reinterpret_cast(vextq_u32(h1u, h1u, 2)); + sa = vcvt_high_f32_f16(h1); + } + static inline float32x4_t DtoS (float64x2_t a,float64x2_t b) { + float32x2_t s = vcvt_f32_f64(a); + return vcvt_high_f32_f64(s, b); + + } + static inline void StoD (float32x4_t s,float64x2_t &a,float64x2_t &b) { + b = vcvt_high_f64_f32(s); + // there is no direct conversion from lower float32x4_t to float64x2_t + float32x4_t s1 = vextq_f32(s, s, 2); + a = vcvt_high_f64_f32(s1); + + } + static inline float16x8_t DtoH (float64x2_t a,float64x2_t b,float64x2_t c,float64x2_t d) { + float32x4_t s1 = DtoS(a, b); + float32x4_t s2 = DtoS(c, d); + return StoH(s1, s2); + } + static inline void HtoD (float16x8_t h,float64x2_t &a,float64x2_t &b,float64x2_t &c,float64x2_t &d) { + float32x4_t s1, s2; + HtoS(h, s1, s2); + StoD(s1, a, b); + StoD(s2, c, d); + } + }; + + ////////////////////////////////////////////// + // Exchange support + + struct Exchange{ + static inline void Exchange0(float32x4_t &out1,float32x4_t &out2,float32x4_t in1,float32x4_t in2){ + // in1: ABCD -> out1: ABEF + // in2: EFGH -> out2: CDGH + + // z: CDAB + float32x4_t z = vextq_f32(in1, in1, 2); + // out1: ABEF + out1 = vextq_f32(z, in2, 2); + + // z: GHEF + z = vextq_f32(in2, in2, 2); + // out2: CDGH + out2 = vextq_f32(in1, z, 2); + }; + + static inline void Exchange1(float32x4_t &out1,float32x4_t &out2,float32x4_t in1,float32x4_t in2){ + // in1: ABCD -> out1: AECG + // in2: EFGH -> out2: BFDH + out1 = vtrn1q_f32(in1, in2); + out2 = vtrn2q_f32(in1, in2); + }; + static inline void Exchange2(float32x4_t &out1,float32x4_t &out2,float32x4_t in1,float32x4_t in2){ + assert(0); + return; + }; + static inline void Exchange3(float32x4_t &out1,float32x4_t &out2,float32x4_t in1,float32x4_t in2){ + assert(0); + return; + }; + // double precision + static inline void Exchange0(float64x2_t &out1,float64x2_t &out2,float64x2_t in1,float64x2_t in2){ + // in1: AB -> out1: AC + // in2: CD -> out2: BD + out1 = vzip1q_f64(in1, in2); + out2 = vzip2q_f64(in1, in2); + }; + static inline void Exchange1(float64x2_t &out1,float64x2_t &out2,float64x2_t in1,float64x2_t in2){ + assert(0); + return; + }; + static inline void Exchange2(float64x2_t &out1,float64x2_t &out2,float64x2_t in1,float64x2_t in2){ + assert(0); + return; + }; + static inline void Exchange3(float64x2_t &out1,float64x2_t &out2,float64x2_t in1,float64x2_t in2){ + assert(0); + return; + }; }; ////////////////////////////////////////////// // Some Template specialization - template < typename vtype > - void permute(vtype &a, vtype b, int perm) { - }; //Complex float Reduce template<> inline Grid::ComplexF Reduce::operator()(float32x4_t in){ - return 0; + float32x4_t v1; // two complex + v1 = Optimization::Permute::Permute0(in); + v1 = vaddq_f32(v1,in); + u128f conv; conv.v=v1; + return Grid::ComplexF(conv.f[0],conv.f[1]); } //Real float Reduce template<> inline Grid::RealF Reduce::operator()(float32x4_t in){ - float32x2_t high = vget_high_f32(in); - float32x2_t low = vget_low_f32(in); - float32x2_t tmp = vadd_f32(low, high); - float32x2_t sum = vpadd_f32(tmp, tmp); - return vget_lane_f32(sum,0); + return vaddvq_f32(in); } - - + + //Complex double Reduce - template<> + template<> // N:by Boyle inline Grid::ComplexD Reduce::operator()(float64x2_t in){ - return 0; + u128d conv; conv.v = in; + return Grid::ComplexD(conv.f[0],conv.f[1]); } - + //Real double Reduce template<> inline Grid::RealD Reduce::operator()(float64x2_t in){ - float64x2_t sum = vpaddq_f64(in, in); - return vgetq_lane_f64(sum,0); + return vaddvq_f64(in); } //Integer Reduce template<> inline Integer Reduce::operator()(uint32x4_t in){ // FIXME unimplemented - printf("Reduce : Missing integer implementation -> FIX\n"); + printf("Reduce : Missing integer implementation -> FIX\n"); assert(0); } } ////////////////////////////////////////////////////////////////////////////////////// -// Here assign types -namespace Grid { +// Here assign types +// typedef Optimization::vech SIMD_Htype; // Reduced precision type + typedef float16x8_t SIMD_Htype; // Half precision type typedef float32x4_t SIMD_Ftype; // Single precision type typedef float64x2_t SIMD_Dtype; // Double precision type typedef uint32x4_t SIMD_Itype; // Integer type @@ -312,13 +581,6 @@ namespace Grid { inline void prefetch_HINT_T0(const char *ptr){}; - // Gpermute function - template < typename VectorSIMD > - inline void Gpermute(VectorSIMD &y,const VectorSIMD &b, int perm ) { - Optimization::permute(y.v,b.v,perm); - } - - // Function name aliases typedef Optimization::Vsplat VsplatSIMD; typedef Optimization::Vstore VstoreSIMD; @@ -326,16 +588,19 @@ namespace Grid { typedef Optimization::Vstream VstreamSIMD; template using ReduceSIMD = Optimization::Reduce; - + // Arithmetic operations typedef Optimization::Sum SumSIMD; typedef Optimization::Sub SubSIMD; + typedef Optimization::Div DivSIMD; typedef Optimization::Mult MultSIMD; typedef Optimization::MultComplex MultComplexSIMD; + typedef Optimization::MultRealPart MultRealPartSIMD; + typedef Optimization::MaddRealPart MaddRealPartSIMD; typedef Optimization::Conj ConjSIMD; typedef Optimization::TimesMinusI TimesMinusISIMD; typedef Optimization::TimesI TimesISIMD; -} +} \ No newline at end of file diff --git a/lib/simd/Grid_qpx.h b/lib/simd/Grid_qpx.h index cbca9118..8de7bde8 100644 --- a/lib/simd/Grid_qpx.h +++ b/lib/simd/Grid_qpx.h @@ -374,6 +374,84 @@ namespace Optimization { // Complex float FLOAT_WRAP_2(operator(), inline) }; +#define USE_FP16 + struct PrecisionChange { + static inline vech StoH (const vector4float &a, const vector4float &b) { + vech ret; + std::cout << GridLogError << "QPX single to half precision conversion not yet supported." << std::endl; + assert(0); + return ret; + } + static inline void HtoS (vech h, vector4float &sa, vector4float &sb) { + std::cout << GridLogError << "QPX half to single precision conversion not yet supported." << std::endl; + assert(0); + } + static inline vector4float DtoS (vector4double a, vector4double b) { + vector4float ret; + std::cout << GridLogError << "QPX double to single precision conversion not yet supported." << std::endl; + assert(0); + return ret; + } + static inline void StoD (vector4float s, vector4double &a, vector4double &b) { + std::cout << GridLogError << "QPX single to double precision conversion not yet supported." << std::endl; + assert(0); + } + static inline vech DtoH (vector4double a, vector4double b, + vector4double c, vector4double d) { + vech ret; + std::cout << GridLogError << "QPX double to half precision conversion not yet supported." << std::endl; + assert(0); + return ret; + } + static inline void HtoD (vech h, vector4double &a, vector4double &b, + vector4double &c, vector4double &d) { + std::cout << GridLogError << "QPX half to double precision conversion not yet supported." << std::endl; + assert(0); + } + }; + + ////////////////////////////////////////////// + // Exchange support +#define FLOAT_WRAP_EXCHANGE(fn) \ + static inline void fn(vector4float &out1, vector4float &out2, \ + vector4float in1, vector4float in2) \ + { \ + vector4double out1d, out2d, in1d, in2d; \ + in1d = Vset()(in1); \ + in2d = Vset()(in2); \ + fn(out1d, out2d, in1d, in2d); \ + Vstore()(out1d, out1); \ + Vstore()(out2d, out2); \ + } + + struct Exchange{ + + // double precision + static inline void Exchange0(vector4double &out1, vector4double &out2, + vector4double in1, vector4double in2) { + out1 = vec_perm(in1, in2, vec_gpci(0145)); + out2 = vec_perm(in1, in2, vec_gpci(02367)); + } + static inline void Exchange1(vector4double &out1, vector4double &out2, + vector4double in1, vector4double in2) { + out1 = vec_perm(in1, in2, vec_gpci(0426)); + out2 = vec_perm(in1, in2, vec_gpci(01537)); + } + static inline void Exchange2(vector4double &out1, vector4double &out2, + vector4double in1, vector4double in2) { + assert(0); + } + static inline void Exchange3(vector4double &out1, vector4double &out2, + vector4double in1, vector4double in2) { + assert(0); + } + + // single precision + FLOAT_WRAP_EXCHANGE(Exchange0); + FLOAT_WRAP_EXCHANGE(Exchange1); + FLOAT_WRAP_EXCHANGE(Exchange2); + FLOAT_WRAP_EXCHANGE(Exchange3); + }; struct Permute{ //Complex double @@ -497,15 +575,19 @@ namespace Optimization { //Integer Reduce template<> - inline Integer Reduce::operator()(int in){ - // FIXME unimplemented - printf("Reduce : Missing integer implementation -> FIX\n"); - assert(0); + inline Integer Reduce::operator()(veci in){ + Integer a = 0; + for (unsigned int i = 0; i < W::r; ++i) + { + a += in.v[i]; + } + return a; } } //////////////////////////////////////////////////////////////////////////////// // Here assign types +typedef Optimization::vech SIMD_Htype; // Half precision type typedef Optimization::vector4float SIMD_Ftype; // Single precision type typedef vector4double SIMD_Dtype; // Double precision type typedef Optimization::veci SIMD_Itype; // Integer type diff --git a/lib/simd/Grid_sse4.h b/lib/simd/Grid_sse4.h index 2fb2df76..0b1f9ffb 100644 --- a/lib/simd/Grid_sse4.h +++ b/lib/simd/Grid_sse4.h @@ -570,9 +570,9 @@ namespace Optimization { //Integer Reduce template<> inline Integer Reduce::operator()(__m128i in){ - // FIXME unimplemented - printf("Reduce : Missing integer implementation -> FIX\n"); - assert(0); + __m128i v1 = _mm_hadd_epi32(in, in); + __m128i v2 = _mm_hadd_epi32(v1, v1); + return _mm_cvtsi128_si32(v2); } } diff --git a/lib/simd/Grid_vector_types.h b/lib/simd/Grid_vector_types.h index 0048382f..27585547 100644 --- a/lib/simd/Grid_vector_types.h +++ b/lib/simd/Grid_vector_types.h @@ -53,7 +53,7 @@ directory #if defined IMCI #include "Grid_imci.h" #endif -#ifdef NEONv8 +#ifdef NEONV8 #include "Grid_neon.h" #endif #if defined QPX @@ -327,10 +327,6 @@ class Grid_simd { // provides support /////////////////////////////////////// - //#if (__GNUC__ == 5 ) || ( ( __GNUC__ == 6 ) && __GNUC_MINOR__ < 3 ) - //#pragma GCC push_options - //#pragma GCC optimize ("O0") - //#endif template friend inline Grid_simd SimdApply(const functor &func, const Grid_simd &v) { Grid_simd ret; @@ -364,9 +360,6 @@ class Grid_simd { ret.v = cx.v; return ret; } - //#if (__GNUC__ == 5 ) || ( ( __GNUC__ == 6 ) && __GNUC_MINOR__ < 3 ) - //#pragma GCC pop_options - //#endif /////////////////////// // Exchange // Al Ah , Bl Bh -> Al Bl Ah,Bh @@ -428,7 +421,6 @@ class Grid_simd { }; // end of Grid_simd class definition - inline void permute(ComplexD &y,ComplexD b, int perm) { y=b; } inline void permute(ComplexF &y,ComplexF b, int perm) { y=b; } inline void permute(RealD &y,RealD b, int perm) { y=b; } @@ -759,8 +751,8 @@ inline Grid_simd, V> toComplex(const Grid_simd &in) { conv.v = in.v; for (int i = 0; i < Rsimd::Nsimd(); i += 2) { - assert(conv.s[i + 1] == - conv.s[i]); // trap any cases where real was not duplicated + assert(conv.s[i + 1] == conv.s[i]); + // trap any cases where real was not duplicated // indicating the SIMD grids of real and imag assignment did not correctly // match conv.s[i + 1] = 0.0; // zero imaginary parts @@ -838,8 +830,6 @@ inline void precisionChange(vComplexD *out,vComplexF *in,int nvec){ precisionCha inline void precisionChange(vComplexD *out,vComplexH *in,int nvec){ precisionChange((vRealD *)out,(vRealH *)in,nvec);} inline void precisionChange(vComplexF *out,vComplexH *in,int nvec){ precisionChange((vRealF *)out,(vRealH *)in,nvec);} - - // Check our vector types are of an appropriate size. #if defined QPX static_assert(2*sizeof(SIMD_Ftype) == sizeof(SIMD_Dtype), "SIMD vector lengths incorrect"); @@ -854,21 +844,14 @@ static_assert(sizeof(SIMD_Ftype) == sizeof(SIMD_Itype), "SIMD vector lengths inc ///////////////////////////////////////// template struct is_simd : public std::false_type {}; -template <> -struct is_simd : public std::true_type {}; -template <> -struct is_simd : public std::true_type {}; -template <> -struct is_simd : public std::true_type {}; -template <> -struct is_simd : public std::true_type {}; -template <> -struct is_simd : public std::true_type {}; +template <> struct is_simd : public std::true_type {}; +template <> struct is_simd : public std::true_type {}; +template <> struct is_simd : public std::true_type {}; +template <> struct is_simd : public std::true_type {}; +template <> struct is_simd : public std::true_type {}; -template -using IfSimd = Invoke::value, int> >; -template -using IfNotSimd = Invoke::value, unsigned> >; +template using IfSimd = Invoke::value, int> >; +template using IfNotSimd = Invoke::value, unsigned> >; } #endif diff --git a/lib/simd/Grid_vector_unops.h b/lib/simd/Grid_vector_unops.h index 2afac190..2244566f 100644 --- a/lib/simd/Grid_vector_unops.h +++ b/lib/simd/Grid_vector_unops.h @@ -179,13 +179,6 @@ inline Grid_simd div(const Grid_simd &r, Integer y) { //////////////////////////////////////////////////////////////////////////// // Allows us to assign into **conformable** real vectors from complex //////////////////////////////////////////////////////////////////////////// -// template < class S, class V > -// inline auto ComplexRemove(const Grid_simd &c) -> -// Grid_simd::Real,V> { -// Grid_simd::Real,V> ret; -// ret.v = c.v; -// return ret; -// } template struct AndFunctor { scalar operator()(const scalar &x, const scalar &y) const { return x & y; } diff --git a/lib/stencil/Lebesgue.cc b/lib/stencil/Lebesgue.cc index 4551878c..2880e4b6 100644 --- a/lib/stencil/Lebesgue.cc +++ b/lib/stencil/Lebesgue.cc @@ -32,8 +32,11 @@ Author: paboyle namespace Grid { int LebesgueOrder::UseLebesgueOrder; +#ifdef KNL std::vector LebesgueOrder::Block({8,2,2,2}); - +#else +std::vector LebesgueOrder::Block({2,2,2,2}); +#endif LebesgueOrder::IndexInteger LebesgueOrder::alignup(IndexInteger n){ n--; // 1000 0011 --> 1000 0010 n |= n >> 1; // 1000 0010 | 0100 0001 = 1100 0011 @@ -51,8 +54,31 @@ LebesgueOrder::LebesgueOrder(GridBase *_grid) if ( Block[0]==0) ZGraph(); else if ( Block[1]==0) NoBlocking(); else CartesianBlocking(); -} + if (0) { + std::cout << "Thread Interleaving"< reorder = _LebesgueReorder; + std::vector throrder; + int vol = _LebesgueReorder.size(); + int threads = GridThread::GetThreads(); + int blockbits=3; + int blocklen = 8; + int msk = 0x7; + + for(int t=0;t> blockbits) % threads == t ) { + throrder.push_back(reorder[ss]); + } + } + } + _LebesgueReorder = throrder; +} void LebesgueOrder::NoBlocking(void) { std::cout< & xi, std::vector &dims); + void ThreadInterleave(void); + private: std::vector _LebesgueReorder; diff --git a/lib/stencil/Stencil.h b/lib/stencil/Stencil.h index 05a531fe..2894778a 100644 --- a/lib/stencil/Stencil.h +++ b/lib/stencil/Stencil.h @@ -285,7 +285,7 @@ class CartesianStencil { // Stencil runs along coordinate axes only; NO diagonal { int dimension = _directions[point]; int displacement = _distances[point]; - + int fd = _grid->_fdimensions[dimension]; int rd = _grid->_rdimensions[dimension]; diff --git a/lib/tensors/Tensor_class.h b/lib/tensors/Tensor_class.h index cb90da6c..c7f868db 100644 --- a/lib/tensors/Tensor_class.h +++ b/lib/tensors/Tensor_class.h @@ -156,11 +156,18 @@ class iScalar { // convert from a something to a scalar via constructor of something arg template ::value, T>::type * = nullptr> - strong_inline iScalar operator=(T arg) { + strong_inline iScalar operator=(T arg) { _internal = arg; return *this; } + // Convert elements + template + strong_inline iScalar operator=(iScalar &&arg) { + _internal = arg._internal; + return *this; + } + friend std::ostream &operator<<(std::ostream &stream,const iScalar &o) { stream << "S {" << o._internal << "}"; return stream; diff --git a/lib/tensors/Tensor_exp.h b/lib/tensors/Tensor_exp.h index e18fed70..f7eee8f0 100644 --- a/lib/tensors/Tensor_exp.h +++ b/lib/tensors/Tensor_exp.h @@ -80,8 +80,11 @@ template inline iVector Exponentiate(const iVector mat iQ2 = arg*arg*alpha*alpha; mat iQ3 = arg*iQ2*alpha; // sign in c0 from the conventions on the Ta - c0 = -imag( trace(iQ3) ) * one_over_three; - c1 = -real( trace(iQ2) ) * one_over_two; + scalar imQ3, reQ2; + imQ3 = imag( trace(iQ3) ); + reQ2 = real( trace(iQ2) ); + c0 = -imQ3 * one_over_three; + c1 = -reQ2 * one_over_two; // Cayley Hamilton checks to machine precision, tested tmp = c1 * one_over_three; diff --git a/lib/tensors/Tensor_index.h b/lib/tensors/Tensor_index.h index 7f34f3ac..f114baf8 100644 --- a/lib/tensors/Tensor_index.h +++ b/lib/tensors/Tensor_index.h @@ -47,6 +47,28 @@ template class TensorIndexRecursion { public: + + //////////////////////////////////////////////////// + // Type Queries + //////////////////////////////////////////////////// + template static inline int indexRank(const iScalar tmp) { return TensorIndexRecursion::indexRank(tmp._internal); } + template static inline int indexRank(const iVector tmp){ return TensorIndexRecursion::indexRank(tmp._internal[0]); } + template static inline int indexRank(const iMatrix tmp){ return TensorIndexRecursion::indexRank(tmp._internal[0][0]); } + + template static inline int isScalar(const iScalar tmp) { return TensorIndexRecursion::isScalar(tmp._internal); } + template static inline int isScalar(const iVector tmp){ return TensorIndexRecursion::isScalar(tmp._internal[0]); } + template static inline int isScalar(const iMatrix tmp){ return TensorIndexRecursion::isScalar(tmp._internal[0][0]); } + + template static inline int isVector(const iScalar tmp) { return TensorIndexRecursion::isVector(tmp._internal); } + template static inline int isVector(const iVector tmp){ return TensorIndexRecursion::isVector(tmp._internal[0]); } + template static inline int isVector(const iMatrix tmp){ return TensorIndexRecursion::isVector(tmp._internal[0][0]); } + + template static inline int isMatrix(const iScalar tmp) { return TensorIndexRecursion::isMatrix(tmp._internal); } + template static inline int isMatrix(const iVector tmp){ return TensorIndexRecursion::isMatrix(tmp._internal[0]); } + template static inline int isMatrix(const iMatrix tmp){ return TensorIndexRecursion::isMatrix(tmp._internal[0][0]); } + //////////////////////////////////////////////////// + // Trace + //////////////////////////////////////////////////// template static auto traceIndex(const iScalar arg) -> iScalar::traceIndex(arg._internal))> { @@ -215,6 +237,24 @@ class TensorIndexRecursion { template<> class TensorIndexRecursion<0> { public: + //////////////////////////////////////////////////// + // Type Queries + //////////////////////////////////////////////////// + template static inline int indexRank(const iScalar tmp) { return 1; } + template static inline int indexRank(const iVector tmp){ return N; } + template static inline int indexRank(const iMatrix tmp){ return N; } + + template static inline int isScalar(const iScalar tmp) { return true;} + template static inline int isScalar(const iVector tmp){ return false;} + template static inline int isScalar(const iMatrix tmp){ return false;} + + template static inline int isVector(const iScalar tmp) { return false;} + template static inline int isVector(const iVector tmp){ return true;} + template static inline int isVector(const iMatrix tmp){ return false;} + + template static inline int isMatrix(const iScalar tmp) { return false;} + template static inline int isMatrix(const iVector tmp){ return false;} + template static inline int isMatrix(const iMatrix tmp){ return true;} ///////////////////////////////////////// // Ends recursion for trace (scalar/vector/matrix) @@ -302,6 +342,26 @@ class TensorIndexRecursion<0> { //////////////////////////////////////////////////////////////////////////////////////////////////////// // External wrappers //////////////////////////////////////////////////////////////////////////////////////////////////////// +template inline int indexRank(void) +{ + vtype tmp; + return TensorIndexRecursion::indexRank(tmp); +} +template inline int isScalar(void) +{ + vtype tmp; + return TensorIndexRecursion::isScalar(tmp); +} +template inline int isVector(void) +{ + vtype tmp; + return TensorIndexRecursion::isVector(tmp); +} +template inline int isMatrix(void) +{ + vtype tmp; + return TensorIndexRecursion::isMatrix(tmp); +} template inline auto traceIndex (const vtype &arg) -> RemoveCRV(TensorIndexRecursion::traceIndex(arg)) { diff --git a/lib/tensors/Tensor_traits.h b/lib/tensors/Tensor_traits.h index ab20b807..c1ef397a 100644 --- a/lib/tensors/Tensor_traits.h +++ b/lib/tensors/Tensor_traits.h @@ -281,8 +281,8 @@ namespace Grid { template class getPrecision{ public: - typedef typename getVectorType::type vector_obj; //get the vector_obj (i.e. a grid Tensor) if its a Lattice, do nothing otherwise (i.e. if fundamental or grid Tensor) - + //get the vector_obj (i.e. a grid Tensor) if its a Lattice, do nothing otherwise (i.e. if fundamental or grid Tensor) + typedef typename getVectorType::type vector_obj; typedef typename GridTypeMapper::scalar_type scalar_type; //get the associated scalar type. Works on fundamental and tensor types typedef typename GridTypeMapper::Realified real_scalar_type; //remove any std::complex wrapper, should get us to the fundamental type diff --git a/tests/IO/Test_ildg_io.cc b/tests/IO/Test_ildg_io.cc new file mode 100644 index 00000000..6aac2e38 --- /dev/null +++ b/tests/IO/Test_ildg_io.cc @@ -0,0 +1,101 @@ + /************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./tests/Test_nersc_io.cc + + Copyright (C) 2015 + +Author: Azusa Yamaguchi +Author: Peter Boyle +Author: paboyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +#include + +using namespace std; +using namespace Grid; +using namespace Grid::QCD; + + +int main (int argc, char ** argv) +{ +#ifdef HAVE_LIME + Grid_init(&argc,&argv); + + std::cout < simd_layout = GridDefaultSimd(4,vComplex::Nsimd()); + std::vector mpi_layout = GridDefaultMpi(); + //std::vector latt_size ({48,48,48,96}); + //std::vector latt_size ({32,32,32,32}); + std::vector latt_size ({16,16,16,32}); + std::vector clatt_size ({4,4,4,8}); + int orthodir=3; + int orthosz =latt_size[orthodir]; + + GridCartesian Fine(latt_size,simd_layout,mpi_layout); + GridCartesian Coarse(clatt_size,simd_layout,mpi_layout); + + + GridParallelRNG pRNGa(&Fine); + GridParallelRNG pRNGb(&Fine); + GridSerialRNG sRNGa; + GridSerialRNG sRNGb; + + std::cout <({45,12,81,9})); + sRNGa.SeedFixedIntegers(std::vector({45,12,81,9})); + std::cout < U(4,&Fine); + + SU3::HotConfiguration(pRNGa,Umu); + + + FieldMetaData header; + + std::cout < + +using namespace std; +using namespace Grid; +using namespace Grid::QCD; + + +int main (int argc, char ** argv) +{ +#ifdef HAVE_LIME + Grid_init(&argc,&argv); + + + std::vector simd_layout = GridDefaultSimd(4,vComplex::Nsimd()); + std::vector mpi_layout = GridDefaultMpi(); + std::vector latt_size = GridDefaultLatt(); + int orthodir=3; + int orthosz =latt_size[orthodir]; + + GridCartesian Fine(latt_size,simd_layout,mpi_layout); + + LatticeGaugeField Umu(&Fine); + std::vector U(4,&Fine); + + FieldMetaData header; + std::string file("./ildg.file"); + IldgReader IR; + IR.open(file); + IR.readConfiguration(Umu,header); + IR.close(); + + for(int mu=0;mu(Umu,mu); + } + + // Painful ; fix syntactical niceness + LatticeComplex LinkTrace(&Fine); + LinkTrace=zero; + for(int mu=0;mu Plaq_T(orthosz); + sliceSum(Plaq,Plaq_T,Nd-1); + int Nt = Plaq_T.size(); + + TComplex Plaq_T_sum; + Plaq_T_sum=zero; + for(int t=0;t simd_layout = GridDefaultSimd(4,vComplex::Nsimd()); std::vector mpi_layout = GridDefaultMpi(); - std::vector latt_size ({16,16,16,16}); + //std::vector latt_size ({48,48,48,96}); + //std::vector latt_size ({32,32,32,32}); + std::vector latt_size ({16,16,16,32}); std::vector clatt_size ({4,4,4,8}); int orthodir=3; int orthosz =latt_size[orthodir]; @@ -49,30 +52,32 @@ int main (int argc, char ** argv) GridCartesian Fine(latt_size,simd_layout,mpi_layout); GridCartesian Coarse(clatt_size,simd_layout,mpi_layout); + GridParallelRNG pRNGa(&Fine); GridParallelRNG pRNGb(&Fine); GridSerialRNG sRNGa; GridSerialRNG sRNGb; + std::cout <({45,12,81,9})); sRNGa.SeedFixedIntegers(std::vector({45,12,81,9})); - + std::cout < U(4,&Fine); - SU3::ColdConfiguration(pRNGa,Umu); + SU3::HotConfiguration(pRNGa,Umu); - NerscField header; + FieldMetaData header; std::string file("./ckpoint_lat.4000"); int precision32 = 0; int tworow = 0; NerscIO::writeConfiguration(Umu,file,tworow,precision32); + Umu_saved = Umu; NerscIO::readConfiguration(Umu,header,file); + Umu_diff = Umu - Umu_saved; + //std::cout << "Umu_save "< Plaq_T(orthosz); sliceSum(Plaq,Plaq_T,Nd-1); @@ -139,7 +148,6 @@ int main (int argc, char ** argv) Complex p = TensorRemove(Tp); std::cout< U(4,&Fine); - NerscField header; + FieldMetaData header; std::string file("./ckpoint_lat"); NerscIO::readConfiguration(Umu,header,file); diff --git a/tests/IO/Test_serialisation.cc b/tests/IO/Test_serialisation.cc index 7d911dfd..d5b52044 100644 --- a/tests/IO/Test_serialisation.cc +++ b/tests/IO/Test_serialisation.cc @@ -31,6 +31,7 @@ Author: Peter Boyle using namespace Grid; +using namespace Grid::QCD; GRID_SERIALIZABLE_ENUM(myenum, undef, red, 1, blue, 2, green, 3); @@ -44,8 +45,8 @@ public: double, y, bool , b, std::vector, array, - std::vector>, twodimarray, - std::vector>>, cmplx3darray + std::vector >, twodimarray, + std::vector > >, cmplx3darray ); myclass() {} myclass(int i) @@ -237,7 +238,7 @@ int main(int argc,char **argv) std::cout << "Loaded (JSON) -----------------" << std::endl; std::cout << jcopy1 << std::endl << jveccopy1 << std::endl; } - + /* // This is still work in progress { diff --git a/tests/Test_simd.cc b/tests/Test_simd.cc index c0bbef1d..b2e8d68e 100644 --- a/tests/Test_simd.cc +++ b/tests/Test_simd.cc @@ -183,8 +183,6 @@ void IntTester(const functor &func) { typedef Integer scal; typedef vInteger vec; - GridSerialRNG sRNG; - sRNG.SeedFixedIntegers(std::vector({45,12,81,9})); int Nsimd = vec::Nsimd(); @@ -287,6 +285,50 @@ void ReductionTester(const functor &func) } +template +void IntReductionTester(const functor &func) +{ + int Nsimd = vec::Nsimd(); + + std::vector input1(Nsimd); + std::vector input2(Nsimd); + reduced result(0); + reduced reference(0); + reduced tmp; + + std::vector > buf(3); + vec & v_input1 = buf[0]; + vec & v_input2 = buf[1]; + + for(int i=0;i(v_input1,input1); + merge(v_input2,input2); + + func.template vfunc(result,v_input1,v_input2); + + for(int i=0;i(tmp,input1[i],input2[i]); + reference+=tmp; + } + + std::cout<(funcReduce()); std::cout< mpi_layout = GridDefaultMpi(); double volume = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3]; - + GridCartesian Fine(latt_size,simd_layout,mpi_layout); GridRedBlackCartesian rbFine(latt_size,simd_layout,mpi_layout); GridParallelRNG fRNG(&Fine); @@ -55,14 +54,14 @@ int main (int argc, char ** argv) // fRNG.SeedFixedIntegers(std::vector({45,12,81,9}); std::vector seeds({1,2,3,4}); fRNG.SeedFixedIntegers(seeds); - + Field Foo(&Fine); Field Bar(&Fine); Field Check(&Fine); Field Diff(&Fine); LatticeComplex lex(&Fine); - lex = zero; + lex = zero; random(fRNG,Foo); gaussian(fRNG,Bar); @@ -98,7 +97,7 @@ int main (int argc, char ** argv) Fine.oCoorFromOindex(ocoor,o); ocoor[dir]=(ocoor[dir]+disp)%Fine._rdimensions[dir]; } - + SimpleCompressor compress; myStencil.HaloExchange(Foo,compress); @@ -147,7 +146,7 @@ int main (int argc, char ** argv) <<") " < 1.0e-4) { @@ -187,16 +186,15 @@ int main (int argc, char ** argv) Fine.oCoorFromOindex(ocoor,o); ocoor[dir]=(ocoor[dir]+disp)%Fine._rdimensions[dir]; } - + SimpleCompressor compress; - Bar = Cshift(Foo,dir,disp); if ( disp & 0x1 ) { ECheck.checkerboard = Even; OCheck.checkerboard = Odd; - } else { + } else { ECheck.checkerboard = Odd; OCheck.checkerboard = Even; } @@ -213,7 +211,7 @@ int main (int argc, char ** argv) permute(OCheck._odata[i],EFoo._odata[SE->_offset],permute_type); else if (SE->_is_local) OCheck._odata[i] = EFoo._odata[SE->_offset]; - else + else OCheck._odata[i] = EStencil.CommBuf()[SE->_offset]; } OStencil.HaloExchange(OFoo,compress); @@ -222,18 +220,18 @@ int main (int argc, char ** argv) StencilEntry *SE; SE = OStencil.GetEntry(permute_type,0,i); // std::cout << "ODD source "<< i<<" -> " <_offset << " "<< SE->_is_local<_is_local && SE->_permute ) permute(ECheck._odata[i],OFoo._odata[SE->_offset],permute_type); else if (SE->_is_local) ECheck._odata[i] = OFoo._odata[SE->_offset]; - else + else ECheck._odata[i] = OStencil.CommBuf()[SE->_offset]; } - + setCheckerboard(Check,ECheck); setCheckerboard(Check,OCheck); - + Real nrmC = norm2(Check); Real nrmB = norm2(Bar); Diff = Check-Bar; @@ -256,10 +254,10 @@ int main (int argc, char ** argv) diff =norm2(ddiff); if ( diff > 0){ std::cout <<"Coor (" << coor[0]<<","< 1.0e-4) exit(-1); diff --git a/tests/core/Test_GaugeAction.cc b/tests/core/Test_GaugeAction.cc index 2f0535f1..572f19fb 100644 --- a/tests/core/Test_GaugeAction.cc +++ b/tests/core/Test_GaugeAction.cc @@ -73,7 +73,7 @@ int main (int argc, char ** argv) std::vector U(4,&Fine); - NerscField header; + FieldMetaData header; std::string file("./ckpoint_lat.4000"); NerscIO::readConfiguration(Umu,header,file); diff --git a/tests/core/Test_RectPlaq.cc b/tests/core/Test_RectPlaq.cc index 9154f879..2e9cc832 100644 --- a/tests/core/Test_RectPlaq.cc +++ b/tests/core/Test_RectPlaq.cc @@ -90,7 +90,7 @@ int main (int argc, char ** argv) std::vector U(4,&Fine); - NerscField header; + FieldMetaData header; std::string file("./ckpoint_lat.4000"); NerscIO::readConfiguration(Umu,header,file); diff --git a/tests/core/Test_fft_gfix.cc b/tests/core/Test_fft_gfix.cc index 7938241e..9732eb85 100644 --- a/tests/core/Test_fft_gfix.cc +++ b/tests/core/Test_fft_gfix.cc @@ -28,212 +28,6 @@ Author: Peter Boyle /* END LEGAL */ #include -using namespace Grid; -using namespace Grid::QCD; - -template -class FourierAcceleratedGaugeFixer : public Gimpl { - public: - INHERIT_GIMPL_TYPES(Gimpl); - - typedef typename Gimpl::GaugeLinkField GaugeMat; - typedef typename Gimpl::GaugeField GaugeLorentz; - - static void GaugeLinkToLieAlgebraField(const std::vector &U,std::vector &A) { - for(int mu=0;mu &A,GaugeMat &dmuAmu) { - dmuAmu=zero; - for(int mu=0;mu::avgPlaquette(Umu); - Real org_link_trace=WilsonLoops::linkTrace(Umu); - Real old_trace = org_link_trace; - Real trG; - - std::vector U(Nd,grid); - GaugeMat dmuAmu(grid); - - for(int i=0;i(Umu,mu); - //trG = SteepestDescentStep(U,alpha,dmuAmu); - trG = FourierAccelSteepestDescentStep(U,alpha,dmuAmu); - for(int mu=0;mu(Umu,U[mu],mu); - // Monitor progress and convergence test - // infrequently to minimise cost overhead - if ( i %20 == 0 ) { - Real plaq =WilsonLoops::avgPlaquette(Umu); - Real link_trace=WilsonLoops::linkTrace(Umu); - - std::cout << GridLogMessage << " Iteration "< &U,Real & alpha, GaugeMat & dmuAmu) { - GridBase *grid = U[0]._grid; - - std::vector A(Nd,grid); - GaugeMat g(grid); - - GaugeLinkToLieAlgebraField(U,A); - ExpiAlphaDmuAmu(A,g,alpha,dmuAmu); - - - Real vol = grid->gSites(); - Real trG = TensorRemove(sum(trace(g))).real()/vol/Nc; - - SU::GaugeTransform(U,g); - - return trG; - } - - static Real FourierAccelSteepestDescentStep(std::vector &U,Real & alpha, GaugeMat & dmuAmu) { - - GridBase *grid = U[0]._grid; - - Real vol = grid->gSites(); - - FFT theFFT((GridCartesian *)grid); - - LatticeComplex Fp(grid); - LatticeComplex psq(grid); psq=zero; - LatticeComplex pmu(grid); - LatticeComplex one(grid); one = Complex(1.0,0.0); - - GaugeMat g(grid); - GaugeMat dmuAmu_p(grid); - std::vector A(Nd,grid); - - GaugeLinkToLieAlgebraField(U,A); - - DmuAmu(A,dmuAmu); - - theFFT.FFT_all_dim(dmuAmu_p,dmuAmu,FFT::forward); - - ////////////////////////////////// - // Work out Fp = psq_max/ psq... - ////////////////////////////////// - std::vector latt_size = grid->GlobalDimensions(); - std::vector coor(grid->_ndimension,0); - for(int mu=0;mu::taExp(ciadmam,g); - - Real trG = TensorRemove(sum(trace(g))).real()/vol/Nc; - - SU::GaugeTransform(U,g); - - return trG; - } - - static void ExpiAlphaDmuAmu(const std::vector &A,GaugeMat &g,Real & alpha, GaugeMat &dmuAmu) { - GridBase *grid = g._grid; - Complex cialpha(0.0,-alpha); - GaugeMat ciadmam(grid); - DmuAmu(A,dmuAmu); - ciadmam = dmuAmu*cialpha; - SU::taExp(ciadmam,g); - } -/* - //////////////////////////////////////////////////////////////// - // NB The FT for fields living on links has an extra phase in it - // Could add these to the FFT class as a later task since this code - // might be reused elsewhere ???? - //////////////////////////////////////////////////////////////// - static void InverseFourierTransformAmu(FFT &theFFT,const std::vector &Ap,std::vector &Ax) { - GridBase * grid = theFFT.Grid(); - std::vector latt_size = grid->GlobalDimensions(); - - ComplexField pmu(grid); - ComplexField pha(grid); - GaugeMat Apha(grid); - - Complex ci(0.0,1.0); - - for(int mu=0;mu &Ax,std::vector &Ap) { - GridBase * grid = theFFT.Grid(); - std::vector latt_size = grid->GlobalDimensions(); - - ComplexField pmu(grid); - ComplexField pha(grid); - Complex ci(0.0,1.0); - - // Sign convention for FFTW calls: - // A(x)= Sum_p e^ipx A(p) / V - // A(p)= Sum_p e^-ipx A(x) - - for(int mu=0;mu seeds({1,2,3,4}); @@ -264,22 +58,24 @@ int main (int argc, char ** argv) std::cout<< "*****************************************************************" <::avgPlaquette(Umu); std::cout << " Initial plaquette "<::SteepestDescentGaugeFix(Umu,alpha,10000,1.0e-10, 1.0e-10); + Umu = Urnd; + FourierAcceleratedGaugeFixer::SteepestDescentGaugeFix(Umu,alpha,10000,1.0e-12, 1.0e-12,false); plaq=WilsonLoops::avgPlaquette(Umu); std::cout << " Final plaquette "<::SteepestDescentGaugeFix(Umu,alpha,10000,1.0e-12, 1.0e-12,true); - // std::cout<< "*****************************************************************" <::avgPlaquette(Umu); + std::cout << " Final plaquette "<::avgPlaquette(Umu); + std::cout << " Initial plaquette "<::SteepestDescentGaugeFix(Umu,alpha,10000,1.0e-12, 1.0e-12,true); + + plaq=WilsonLoops::avgPlaquette(Umu); + std::cout << " Final plaquette "< IRL(HermOp,X,Nk,Nm,eresid,Nit); - ImplicitlyRestartedLanczos ChebyIRL(HermOp,Cheby,Nk,Nm,eresid,Nit); + ImplicitlyRestartedLanczos IRL(HermOp,X,Nk,Nk,Nm,eresid,Nit); + ImplicitlyRestartedLanczos ChebyIRL(HermOp,Cheby,Nk,Nk,Nm,eresid,Nit); LatticeComplex src(grid); gaussian(RNG,src); { diff --git a/tests/forces/Test_contfrac_force.cc b/tests/forces/Test_contfrac_force.cc index 227ad5a0..2afb4dde 100644 --- a/tests/forces/Test_contfrac_force.cc +++ b/tests/forces/Test_contfrac_force.cc @@ -139,7 +139,7 @@ int main (int argc, char ** argv) } - Complex dSpred = sum(dS); + ComplexD dSpred = sum(dS); std::cout << GridLogMessage << " S "< - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - - See the full license in the file "LICENSE" in the top level distribution - directory. - *******************************************************************************/ - -#include - -using namespace Grid; -using namespace Hadrons; - -/******************************************************************************* - * Macros to reduce code duplication. - ******************************************************************************/ -// Useful definitions -#define ZERO_MOM "0. 0. 0. 0." -#define INIT_INDEX(s, n) (std::string(s) + "_" + std::to_string(n)) -#define ADD_INDEX(s, n) (s + "_" + std::to_string(n)) -#define LABEL_3PT(s, t1, t2) ADD_INDEX(INIT_INDEX(s, t1), t2) -#define LABEL_4PT(s, t1, t2, t3) ADD_INDEX(ADD_INDEX(INIT_INDEX(s, t1), t2), t3) -#define LABEL_4PT_NOISE(s, t1, t2, t3, nn) ADD_INDEX(ADD_INDEX(ADD_INDEX(INIT_INDEX(s, t1), t2), t3), nn) - -// Wall source/sink macros -#define NAME_3MOM_WALL_SOURCE(t, mom) ("wall_" + std::to_string(t) + "_" + mom) -#define NAME_WALL_SOURCE(t) NAME_3MOM_WALL_SOURCE(t, ZERO_MOM) -#define NAME_POINT_SOURCE(pos) ("point_" + pos) - -#define MAKE_3MOM_WALL_PROP(tW, mom, propName, solver)\ -{\ - std::string srcName = NAME_3MOM_WALL_SOURCE(tW, mom);\ - makeWallSource(application, srcName, tW, mom);\ - makePropagator(application, propName, srcName, solver);\ -} - -#define MAKE_WALL_PROP(tW, propName, solver)\ - MAKE_3MOM_WALL_PROP(tW, ZERO_MOM, propName, solver) - -// Sequential source macros -#define MAKE_SEQUENTIAL_PROP(tS, qSrc, mom, propName, solver)\ -{\ - std::string srcName = ADD_INDEX(qSrc + "_seq", tS);\ - makeSequentialSource(application, srcName, qSrc, tS, mom);\ - makePropagator(application, propName, srcName, solver);\ -} - -// Point source macros -#define MAKE_POINT_PROP(pos, propName, solver)\ -{\ - std::string srcName = NAME_POINT_SOURCE(pos);\ - makePointSource(application, srcName, pos);\ - makePropagator(application, propName, srcName, solver);\ -} - -/******************************************************************************* - * Functions for propagator construction. - ******************************************************************************/ - -/******************************************************************************* - * Name: makePointSource - * Purpose: Construct point source and add to application module. - * Parameters: application - main application that stores modules. - * srcName - name of source module to create. - * pos - Position of point source. - * Returns: None. - ******************************************************************************/ -inline void makePointSource(Application &application, std::string srcName, - std::string pos) -{ - // If the source already exists, don't make the module again. - if (!(Environment::getInstance().hasModule(srcName))) - { - MSource::Point::Par pointPar; - pointPar.position = pos; - application.createModule(srcName, pointPar); - } -} - -/******************************************************************************* - * Name: makeSequentialSource - * Purpose: Construct sequential source and add to application module. - * Parameters: application - main application that stores modules. - * srcName - name of source module to create. - * qSrc - Input quark for sequential inversion. - * tS - sequential source timeslice. - * mom - momentum insertion (default is zero). - * Returns: None. - ******************************************************************************/ -inline void makeSequentialSource(Application &application, std::string srcName, - std::string qSrc, unsigned int tS, - std::string mom = ZERO_MOM) -{ - // If the source already exists, don't make the module again. - if (!(Environment::getInstance().hasModule(srcName))) - { - MSource::SeqGamma::Par seqPar; - seqPar.q = qSrc; - seqPar.tA = tS; - seqPar.tB = tS; - seqPar.mom = mom; - seqPar.gamma = Gamma::Algebra::GammaT; - application.createModule(srcName, seqPar); - } -} - -/******************************************************************************* - * Name: makeWallSource - * Purpose: Construct wall source and add to application module. - * Parameters: application - main application that stores modules. - * srcName - name of source module to create. - * tW - wall source timeslice. - * mom - momentum insertion (default is zero). - * Returns: None. - ******************************************************************************/ -inline void makeWallSource(Application &application, std::string srcName, - unsigned int tW, std::string mom = ZERO_MOM) -{ - // If the source already exists, don't make the module again. - if (!(Environment::getInstance().hasModule(srcName))) - { - MSource::Wall::Par wallPar; - wallPar.tW = tW; - wallPar.mom = mom; - application.createModule(srcName, wallPar); - } -} - -/******************************************************************************* - * Name: makeWallSink - * Purpose: Wall sink smearing of a propagator. - * Parameters: application - main application that stores modules. - * propName - name of input propagator. - * wallName - name of smeared propagator. - * mom - momentum insertion (default is zero). - * Returns: None. - ******************************************************************************/ -inline void makeWallSink(Application &application, std::string propName, - std::string wallName, std::string mom = ZERO_MOM) -{ - // If the propagator has already been smeared, don't smear it again. - // Temporarily removed, strategy for sink smearing likely to change. - /*if (!(Environment::getInstance().hasModule(wallName))) - { - MSink::Wall::Par wallPar; - wallPar.q = propName; - wallPar.mom = mom; - application.createModule(wallName, wallPar); - }*/ -} - -/******************************************************************************* - * Name: makePropagator - * Purpose: Construct source and propagator then add to application module. - * Parameters: application - main application that stores modules. - * propName - name of propagator module to create. - * srcName - name of source module to use. - * solver - solver to use (default is CG). - * Returns: None. - ******************************************************************************/ -inline void makePropagator(Application &application, std::string &propName, - std::string &srcName, std::string &solver) -{ - // If the propagator already exists, don't make the module again. - if (!(Environment::getInstance().hasModule(propName))) - { - Quark::Par quarkPar; - quarkPar.source = srcName; - quarkPar.solver = solver; - application.createModule(propName, quarkPar); - } -} - -/******************************************************************************* - * Name: makeLoop - * Purpose: Use noise source and inversion result to make loop propagator, then - * add to application module. - * Parameters: application - main application that stores modules. - * propName - name of propagator module to create. - * srcName - name of noise source module to use. - * resName - name of inversion result on given noise source. - * Returns: None. - ******************************************************************************/ -inline void makeLoop(Application &application, std::string &propName, - std::string &srcName, std::string &resName) -{ - // If the loop propagator already exists, don't make the module again. - if (!(Environment::getInstance().hasModule(propName))) - { - MLoop::NoiseLoop::Par loopPar; - loopPar.q = resName; - loopPar.eta = srcName; - application.createModule(propName, loopPar); - } -} - -/******************************************************************************* - * Contraction module creation. - ******************************************************************************/ - -/******************************************************************************* - * Name: mesonContraction - * Purpose: Create meson contraction module and add to application module. - * Parameters: application - main application that stores modules. - * npt - specify n-point correlator (for labelling). - * q1 - quark propagator 1. - * q2 - quark propagator 2. - * label - unique label to construct module name. - * mom - momentum to project (default is zero) - * gammas - gamma insertions at source and sink. - * Returns: None. - ******************************************************************************/ -inline void mesonContraction(Application &application, unsigned int npt, - std::string &q1, std::string &q2, - std::string &label, - std::string mom = ZERO_MOM, - std::string gammas = "") -{ - std::string modName = std::to_string(npt) + "pt_" + label; - if (!(Environment::getInstance().hasModule(modName))) - { - MContraction::Meson::Par mesPar; - mesPar.output = std::to_string(npt) + "pt/" + label; - mesPar.q1 = q1; - mesPar.q2 = q2; - mesPar.mom = mom; - mesPar.gammas = gammas; - application.createModule(modName, mesPar); - } - } - -/******************************************************************************* - * Name: gamma3ptContraction - * Purpose: Create gamma3pt contraction module and add to application module. - * Parameters: application - main application that stores modules. - * npt - specify n-point correlator (for labelling). - * q1 - quark propagator 1. - * q2 - quark propagator 2. - * q3 - quark propagator 3. - * label - unique label to construct module name. - * gamma - gamma insertions between q2 and q3. - * Returns: None. - ******************************************************************************/ -inline void gamma3ptContraction(Application &application, unsigned int npt, - std::string &q1, std::string &q2, - std::string &q3, std::string &label, - Gamma::Algebra gamma = Gamma::Algebra::Identity) -{ - std::string modName = std::to_string(npt) + "pt_" + label; - if (!(Environment::getInstance().hasModule(modName))) - { - MContraction::Gamma3pt::Par gamma3ptPar; - gamma3ptPar.output = std::to_string(npt) + "pt/" + label; - gamma3ptPar.q1 = q1; - gamma3ptPar.q2 = q2; - gamma3ptPar.q3 = q3; - gamma3ptPar.gamma = gamma; - application.createModule(modName, gamma3ptPar); - } - } - -/******************************************************************************* - * Name: weakContraction[Eye,NonEye] - * Purpose: Create Weak Hamiltonian contraction module for Eye/NonEye topology - * and add to application module. - * Parameters: application - main application that stores modules. - * npt - specify n-point correlator (for labelling). - * q1 - quark propagator 1. - * q2 - quark propagator 2. - * q3 - quark propagator 3. - * q4 - quark propagator 4. - * label - unique label to construct module name. - * Returns: None. - ******************************************************************************/ -#define HW_CONTRACTION(top) \ -inline void weakContraction##top(Application &application, unsigned int npt,\ - std::string &q1, std::string &q2, \ - std::string &q3, std::string &q4, \ - std::string &label)\ -{\ - std::string modName = std::to_string(npt) + "pt_" + label;\ - if (!(Environment::getInstance().hasModule(modName)))\ - {\ - MContraction::WeakHamiltonian##top::Par weakPar;\ - weakPar.output = std::to_string(npt) + "pt/" + label;\ - weakPar.q1 = q1;\ - weakPar.q2 = q2;\ - weakPar.q3 = q3;\ - weakPar.q4 = q4;\ - application.createModule(modName, weakPar);\ - }\ -} -HW_CONTRACTION(Eye) // weakContractionEye -HW_CONTRACTION(NonEye) // weakContractionNonEye - -/******************************************************************************* - * Name: disc0Contraction - * Purpose: Create contraction module for 4pt Weak Hamiltonian + current - * disconnected topology for neutral mesons and add to application - * module. - * Parameters: application - main application that stores modules. - * q1 - quark propagator 1. - * q2 - quark propagator 2. - * q3 - quark propagator 3. - * q4 - quark propagator 4. - * label - unique label to construct module name. - * Returns: None. - ******************************************************************************/ -inline void disc0Contraction(Application &application, - std::string &q1, std::string &q2, - std::string &q3, std::string &q4, - std::string &label) -{ - std::string modName = "4pt_" + label; - if (!(Environment::getInstance().hasModule(modName))) - { - MContraction::WeakNeutral4ptDisc::Par disc0Par; - disc0Par.output = "4pt/" + label; - disc0Par.q1 = q1; - disc0Par.q2 = q2; - disc0Par.q3 = q3; - disc0Par.q4 = q4; - application.createModule(modName, disc0Par); - } - } - -/******************************************************************************* - * Name: discLoopContraction - * Purpose: Create contraction module for disconnected loop and add to - * application module. - * Parameters: application - main application that stores modules. - * q_loop - loop quark propagator. - * modName - unique module name. - * gamma - gamma matrix to use in contraction. - * Returns: None. - ******************************************************************************/ -inline void discLoopContraction(Application &application, - std::string &q_loop, std::string &modName, - Gamma::Algebra gamma = Gamma::Algebra::Identity) -{ - if (!(Environment::getInstance().hasModule(modName))) - { - MContraction::DiscLoop::Par discPar; - discPar.output = "disc/" + modName; - discPar.q_loop = q_loop; - discPar.gamma = gamma; - application.createModule(modName, discPar); - } - } diff --git a/tests/hadrons/Test_hadrons_meson_3pt.cc b/tests/hadrons/Test_hadrons_meson_3pt.cc index efef6931..382c39d4 100644 --- a/tests/hadrons/Test_hadrons_meson_3pt.cc +++ b/tests/hadrons/Test_hadrons_meson_3pt.cc @@ -61,6 +61,14 @@ int main(int argc, char *argv[]) // gauge field application.createModule("gauge"); + + // set fermion boundary conditions to be periodic space, antiperiodic time. + std::string boundary = "1 1 1 -1"; + + // sink + MSink::Point::Par sinkPar; + sinkPar.mom = "0 0 0"; + application.createModule("sink", sinkPar); for (unsigned int i = 0; i < flavour.size(); ++i) { // actions @@ -69,6 +77,7 @@ int main(int argc, char *argv[]) actionPar.Ls = 12; actionPar.M5 = 1.8; actionPar.mass = mass[i]; + actionPar.boundary = boundary; application.createModule("DWF_" + flavour[i], actionPar); // solvers @@ -110,15 +119,15 @@ int main(int argc, char *argv[]) } // propagators - Quark::Par quarkPar; + MFermion::GaugeProp::Par quarkPar; quarkPar.solver = "CG_" + flavour[i]; quarkPar.source = srcName; - application.createModule(qName[i], quarkPar); + application.createModule(qName[i], quarkPar); for (unsigned int mu = 0; mu < Nd; ++mu) { quarkPar.source = seqName[i][mu]; seqName[i][mu] = "Q_" + flavour[i] + "-" + seqName[i][mu]; - application.createModule(seqName[i][mu], quarkPar); + application.createModule(seqName[i][mu], quarkPar); } } @@ -131,7 +140,7 @@ int main(int argc, char *argv[]) mesPar.q1 = qName[i]; mesPar.q2 = qName[j]; mesPar.gammas = "all"; - mesPar.mom = "0. 0. 0. 0."; + mesPar.sink = "sink"; application.createModule("meson_Z2_" + std::to_string(t) + "_" @@ -150,7 +159,7 @@ int main(int argc, char *argv[]) mesPar.q1 = qName[i]; mesPar.q2 = seqName[j][mu]; mesPar.gammas = "all"; - mesPar.mom = "0. 0. 0. 0."; + mesPar.sink = "sink"; application.createModule("3pt_Z2_" + std::to_string(t) + "_" diff --git a/tests/hadrons/Test_hadrons_rarekaon.cc b/tests/hadrons/Test_hadrons_rarekaon.cc deleted file mode 100644 index 89d7d501..00000000 --- a/tests/hadrons/Test_hadrons_rarekaon.cc +++ /dev/null @@ -1,337 +0,0 @@ -/******************************************************************************* - Grid physics library, www.github.com/paboyle/Grid - - Source file: tests/hadrons/Test_hadrons_rarekaon.cc - - Copyright (C) 2017 - - Author: Andrew Lawson - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - - See the full license in the file "LICENSE" in the top level distribution - directory. - *******************************************************************************/ - -#include "Test_hadrons.hpp" - -using namespace Grid; -using namespace Hadrons; - -enum quarks -{ - light = 0, - strange = 1, - charm = 2 -}; - -int main(int argc, char *argv[]) -{ - // parse command line ////////////////////////////////////////////////////// - std::string configStem; - - if (argc < 2) - { - std::cerr << "usage: " << argv[0] << " [Grid options]"; - std::cerr << std::endl; - std::exit(EXIT_FAILURE); - } - configStem = argv[1]; - - // initialization ////////////////////////////////////////////////////////// - Grid_init(&argc, &argv); - HadronsLogError.Active(GridLogError.isActive()); - HadronsLogWarning.Active(GridLogWarning.isActive()); - HadronsLogMessage.Active(GridLogMessage.isActive()); - HadronsLogIterative.Active(GridLogIterative.isActive()); - HadronsLogDebug.Active(GridLogDebug.isActive()); - LOG(Message) << "Grid initialized" << std::endl; - - // run setup /////////////////////////////////////////////////////////////// - Application application; - std::vector mass = {.01, .04, .2}; - std::vector flavour = {"l", "s", "c"}; - std::vector solvers = {"CG_l", "CG_s", "CG_c"}; - std::string kmom = "0. 0. 0. 0."; - std::string pmom = "1. 0. 0. 0."; - std::string qmom = "-1. 0. 0. 0."; - std::string mqmom = "1. 0. 0. 0."; - std::vector tKs = {0}; - unsigned int dt_pi = 16; - std::vector tJs = {8}; - unsigned int n_noise = 1; - unsigned int nt = 32; - bool do_disconnected(false); - - // Global parameters. - Application::GlobalPar globalPar; - globalPar.trajCounter.start = 1500; - globalPar.trajCounter.end = 1520; - globalPar.trajCounter.step = 20; - globalPar.seed = "1 2 3 4"; - globalPar.genetic.maxGen = 1000; - globalPar.genetic.maxCstGen = 200; - globalPar.genetic.popSize = 20; - globalPar.genetic.mutationRate = .1; - application.setPar(globalPar); - - // gauge field - if (configStem == "None") - { - application.createModule("gauge"); - } - else - { - MGauge::Load::Par gaugePar; - gaugePar.file = configStem; - application.createModule("gauge", gaugePar); - } - for (unsigned int i = 0; i < flavour.size(); ++i) - { - // actions - MAction::DWF::Par actionPar; - actionPar.gauge = "gauge"; - actionPar.Ls = 16; - actionPar.M5 = 1.8; - actionPar.mass = mass[i]; - application.createModule("DWF_" + flavour[i], actionPar); - - // solvers - // RBPrecCG -> CG - MSolver::RBPrecCG::Par solverPar; - solverPar.action = "DWF_" + flavour[i]; - solverPar.residual = 1.0e-8; - application.createModule(solvers[i], - solverPar); - } - - // Create noise propagators for loops. - std::vector noiseSrcs; - std::vector> noiseRes; - std::vector> noiseProps; - if (n_noise > 0) - { - MSource::Z2::Par noisePar; - noisePar.tA = 0; - noisePar.tB = nt - 1; - std::string loop_stem = "loop_"; - - noiseRes.resize(flavour.size()); - noiseProps.resize(flavour.size()); - for (unsigned int nn = 0; nn < n_noise; ++nn) - { - std::string eta = INIT_INDEX("noise", nn); - application.createModule(eta, noisePar); - noiseSrcs.push_back(eta); - - for (unsigned int f = 0; f < flavour.size(); ++f) - { - std::string loop_prop = INIT_INDEX(loop_stem + flavour[f], nn); - std::string loop_res = loop_prop + "_res"; - makePropagator(application, loop_res, eta, solvers[f]); - makeLoop(application, loop_prop, eta, loop_res); - noiseRes[f].push_back(loop_res); - noiseProps[f].push_back(loop_prop); - } - } - } - - // Translate rare kaon decay across specified timeslices. - for (unsigned int i = 0; i < tKs.size(); ++i) - { - // Zero-momentum wall source propagators for kaon and pion. - unsigned int tK = tKs[i]; - unsigned int tpi = (tK + dt_pi) % nt; - std::string q_Kl_0 = INIT_INDEX("Q_l_0", tK); - std::string q_pil_0 = INIT_INDEX("Q_l_0", tpi); - MAKE_WALL_PROP(tK, q_Kl_0, solvers[light]); - MAKE_WALL_PROP(tpi, q_pil_0, solvers[light]); - - // Wall sources for kaon and pion with momentum insertion. If either - // p or k are zero, or p = k, re-use the existing name to avoid - // duplicating a propagator. - std::string q_Ks_k = INIT_INDEX("Q_Ks_k", tK); - std::string q_Ks_p = INIT_INDEX((kmom == pmom) ? "Q_Ks_k" : "Q_Ks_p", tK); - std::string q_pil_k = INIT_INDEX((kmom == ZERO_MOM) ? "Q_l_0" : "Q_l_k", tpi); - std::string q_pil_p = INIT_INDEX((pmom == kmom) ? q_pil_k : ((pmom == ZERO_MOM) ? "Q_l_0" : "Q_l_p"), tpi); - MAKE_3MOM_WALL_PROP(tK, kmom, q_Ks_k, solvers[strange]); - MAKE_3MOM_WALL_PROP(tK, pmom, q_Ks_p, solvers[strange]); - MAKE_3MOM_WALL_PROP(tpi, kmom, q_pil_k, solvers[light]); - MAKE_3MOM_WALL_PROP(tpi, pmom, q_pil_p, solvers[light]); - - /*********************************************************************** - * CONTRACTIONS: pi and K 2pt contractions with mom = p, k. - **********************************************************************/ - // Wall-Point - std::string PW_K_k = INIT_INDEX("PW_K_k", tK); - std::string PW_K_p = INIT_INDEX("PW_K_p", tK); - std::string PW_pi_k = INIT_INDEX("PW_pi_k", tpi); - std::string PW_pi_p = INIT_INDEX("PW_pi_p", tpi); - mesonContraction(application, 2, q_Kl_0, q_Ks_k, PW_K_k, kmom); - mesonContraction(application, 2, q_Kl_0, q_Ks_p, PW_K_p, pmom); - mesonContraction(application, 2, q_pil_k, q_pil_0, PW_pi_k, kmom); - mesonContraction(application, 2, q_pil_p, q_pil_0, PW_pi_p, pmom); - // Wall-Wall, to be done - requires modification of meson module. - - /*********************************************************************** - * CONTRACTIONS: 3pt Weak Hamiltonian, C & W (non-Eye type) classes. - **********************************************************************/ - std::string HW_CW_k = LABEL_3PT("HW_CW_k", tK, tpi); - std::string HW_CW_p = LABEL_3PT("HW_CW_p", tK, tpi); - weakContractionNonEye(application, 3, q_Kl_0, q_Ks_k, q_pil_k, q_pil_0, HW_CW_k); - weakContractionNonEye(application, 3, q_Kl_0, q_Ks_p, q_pil_p, q_pil_0, HW_CW_p); - - /*********************************************************************** - * CONTRACTIONS: 3pt sd insertion. - **********************************************************************/ - // Note: eventually will use wall sink smeared q_Kl_0 instead. - std::string sd_k = LABEL_3PT("sd_k", tK, tpi); - std::string sd_p = LABEL_3PT("sd_p", tK, tpi); - gamma3ptContraction(application, 3, q_Kl_0, q_Ks_k, q_pil_k, sd_k); - gamma3ptContraction(application, 3, q_Kl_0, q_Ks_p, q_pil_p, sd_p); - - for (unsigned int nn = 0; nn < n_noise; ++nn) - { - /******************************************************************* - * CONTRACTIONS: 3pt Weak Hamiltonian, S and E (Eye type) classes. - ******************************************************************/ - // Note: eventually will use wall sink smeared q_Kl_0 instead. - for (unsigned int f = 0; f < flavour.size(); ++f) - { - if ((f != strange) || do_disconnected) - { - std::string HW_SE_k = LABEL_3PT("HW_SE_k_" + flavour[f], tK, tpi); - std::string HW_SE_p = LABEL_3PT("HW_SE_p_" + flavour[f], tK, tpi); - std::string loop_q = noiseProps[f][nn]; - weakContractionEye(application, 3, q_Kl_0, q_Ks_k, q_pil_k, loop_q, HW_CW_k); - weakContractionEye(application, 3, q_Kl_0, q_Ks_p, q_pil_p, loop_q, HW_CW_p); - } - } - } - - // Perform separate contractions for each t_J position. - for (unsigned int j = 0; j < tJs.size(); ++j) - { - // Sequential sources for current insertions. Local for now, - // gamma_0 only. - unsigned int tJ = (tJs[j] + tK) % nt; - MSource::SeqGamma::Par seqPar; - std::string q_KlCl_q = LABEL_3PT("Q_KlCl_q", tK, tJ); - std::string q_KsCs_mq = LABEL_3PT("Q_KsCs_mq", tK, tJ); - std::string q_pilCl_q = LABEL_3PT("Q_pilCl_q", tpi, tJ); - std::string q_pilCl_mq = LABEL_3PT("Q_pilCl_mq", tpi, tJ); - MAKE_SEQUENTIAL_PROP(tJ, q_Kl_0, qmom, q_KlCl_q, solvers[light]); - MAKE_SEQUENTIAL_PROP(tJ, q_Ks_k, mqmom, q_KsCs_mq, solvers[strange]); - MAKE_SEQUENTIAL_PROP(tJ, q_pil_p, qmom, q_pilCl_q, solvers[light]); - MAKE_SEQUENTIAL_PROP(tJ, q_pil_0, mqmom, q_pilCl_mq, solvers[light]); - - /******************************************************************* - * CONTRACTIONS: pi and K 3pt contractions with current insertion. - ******************************************************************/ - // Wall-Point - std::string C_PW_Kl = LABEL_3PT("C_PW_Kl", tK, tJ); - std::string C_PW_Ksb = LABEL_3PT("C_PW_Ksb", tK, tJ); - std::string C_PW_pilb = LABEL_3PT("C_PW_pilb", tK, tJ); - std::string C_PW_pil = LABEL_3PT("C_PW_pil", tK, tJ); - mesonContraction(application, 3, q_KlCl_q, q_Ks_k, C_PW_Kl, pmom); - mesonContraction(application, 3, q_Kl_0, q_KsCs_mq, C_PW_Ksb, pmom); - mesonContraction(application, 3, q_pil_0, q_pilCl_q, C_PW_pilb, kmom); - mesonContraction(application, 3, q_pilCl_mq, q_pil_p, C_PW_pil, kmom); - // Wall-Wall, to be done. - - /******************************************************************* - * CONTRACTIONS: 4pt contractions, C & W classes. - ******************************************************************/ - std::string CW_Kl = LABEL_4PT("CW_Kl", tK, tJ, tpi); - std::string CW_Ksb = LABEL_4PT("CW_Ksb", tK, tJ, tpi); - std::string CW_pilb = LABEL_4PT("CW_pilb", tK, tJ, tpi); - std::string CW_pil = LABEL_4PT("CW_pil", tK, tJ, tpi); - weakContractionNonEye(application, 4, q_KlCl_q, q_Ks_k, q_pil_p, q_pil_0, CW_Kl); - weakContractionNonEye(application, 4, q_Kl_0, q_KsCs_mq, q_pil_p, q_pil_0, CW_Ksb); - weakContractionNonEye(application, 4, q_Kl_0, q_Ks_k, q_pilCl_q, q_pil_0, CW_pilb); - weakContractionNonEye(application, 4, q_Kl_0, q_Ks_k, q_pil_p, q_pilCl_mq, CW_pil); - - /******************************************************************* - * CONTRACTIONS: 4pt contractions, sd insertions. - ******************************************************************/ - // Note: eventually will use wall sink smeared q_Kl_0/q_KlCl_q instead. - std::string sd_Kl = LABEL_4PT("sd_Kl", tK, tJ, tpi); - std::string sd_Ksb = LABEL_4PT("sd_Ksb", tK, tJ, tpi); - std::string sd_pilb = LABEL_4PT("sd_pilb", tK, tJ, tpi); - gamma3ptContraction(application, 4, q_KlCl_q, q_Ks_k, q_pil_p, sd_Kl); - gamma3ptContraction(application, 4, q_Kl_0, q_KsCs_mq, q_pil_p, sd_Ksb); - gamma3ptContraction(application, 4, q_Kl_0, q_Ks_k, q_pilCl_q, sd_pilb); - - // Sequential sources for each noise propagator. - for (unsigned int nn = 0; nn < n_noise; ++nn) - { - std::string loop_stem = "loop_"; - - // Contraction required for each quark flavour - alternatively - // drop the strange loop if not performing disconnected - // contractions or neglecting H_W operators Q_3 -> Q_10. - for (unsigned int f = 0; f < flavour.size(); ++f) - { - if ((f != strange) || do_disconnected) - { - std::string eta = noiseSrcs[nn]; - std::string loop_q = noiseProps[f][nn]; - std::string loop_qCq = LABEL_3PT(loop_stem + flavour[f], tJ, nn); - std::string loop_qCq_res = loop_qCq + "_res"; - MAKE_SEQUENTIAL_PROP(tJ, noiseRes[f][nn], qmom, - loop_qCq_res, solvers[f]); - makeLoop(application, loop_qCq, eta, loop_qCq_res); - - /******************************************************* - * CONTRACTIONS: 4pt contractions, S & E classes. - ******************************************************/ - // Note: eventually will use wall sink smeared q_Kl_0/q_KlCl_q instead. - std::string SE_Kl = LABEL_4PT_NOISE("SE_Kl", tK, tJ, tpi, nn); - std::string SE_Ksb = LABEL_4PT_NOISE("SE_Ksb", tK, tJ, tpi, nn); - std::string SE_pilb = LABEL_4PT_NOISE("SE_pilb", tK, tJ, tpi, nn); - std::string SE_loop = LABEL_4PT_NOISE("SE_loop", tK, tJ, tpi, nn); - weakContractionEye(application, 4, q_KlCl_q, q_Ks_k, q_pil_p, loop_q, SE_Kl); - weakContractionEye(application, 4, q_Kl_0, q_KsCs_mq, q_pil_p, loop_q, SE_Ksb); - weakContractionEye(application, 4, q_Kl_0, q_Ks_k, q_pilCl_q, loop_q, SE_pilb); - weakContractionEye(application, 4, q_Kl_0, q_Ks_k, q_pil_p, loop_qCq, SE_loop); - - /******************************************************* - * CONTRACTIONS: 4pt contractions, pi0 disconnected - * loop. - ******************************************************/ - std::string disc0 = LABEL_4PT_NOISE("disc0", tK, tJ, tpi, nn); - disc0Contraction(application, q_Kl_0, q_Ks_k, q_pilCl_q, loop_q, disc0); - - /******************************************************* - * CONTRACTIONS: Disconnected loop. - ******************************************************/ - std::string discLoop = "disc_" + loop_qCq; - discLoopContraction(application, loop_qCq, discLoop); - } - } - } - } - } - // execution - std::string par_file_name = "rarekaon_000_100_tK0_tpi16_tJ8_noloop_mc0.2.xml"; - application.saveParameterFile(par_file_name); - application.run(); - - // epilogue - LOG(Message) << "Grid is finalizing now" << std::endl; - Grid_finalize(); - - return EXIT_SUCCESS; -} diff --git a/tests/hadrons/Test_hadrons_spectrum.cc b/tests/hadrons/Test_hadrons_spectrum.cc index 2d731ff4..801674f7 100644 --- a/tests/hadrons/Test_hadrons_spectrum.cc +++ b/tests/hadrons/Test_hadrons_spectrum.cc @@ -63,6 +63,14 @@ int main(int argc, char *argv[]) MSource::Point::Par ptPar; ptPar.position = "0 0 0 0"; application.createModule("pt", ptPar); + // sink + MSink::Point::Par sinkPar; + sinkPar.mom = "0 0 0"; + application.createModule("sink", sinkPar); + + // set fermion boundary conditions to be periodic space, antiperiodic time. + std::string boundary = "1 1 1 -1"; + for (unsigned int i = 0; i < flavour.size(); ++i) { // actions @@ -71,6 +79,7 @@ int main(int argc, char *argv[]) actionPar.Ls = 12; actionPar.M5 = 1.8; actionPar.mass = mass[i]; + actionPar.boundary = boundary; application.createModule("DWF_" + flavour[i], actionPar); // solvers @@ -81,31 +90,31 @@ int main(int argc, char *argv[]) solverPar); // propagators - Quark::Par quarkPar; + MFermion::GaugeProp::Par quarkPar; quarkPar.solver = "CG_" + flavour[i]; quarkPar.source = "pt"; - application.createModule("Qpt_" + flavour[i], quarkPar); + application.createModule("Qpt_" + flavour[i], quarkPar); quarkPar.source = "z2"; - application.createModule("QZ2_" + flavour[i], quarkPar); + application.createModule("QZ2_" + flavour[i], quarkPar); } for (unsigned int i = 0; i < flavour.size(); ++i) for (unsigned int j = i; j < flavour.size(); ++j) { MContraction::Meson::Par mesPar; - mesPar.output = "mesons/pt_" + flavour[i] + flavour[j]; - mesPar.q1 = "Qpt_" + flavour[i]; - mesPar.q2 = "Qpt_" + flavour[j]; - mesPar.gammas = "all"; - mesPar.mom = "0. 0. 0. 0."; + mesPar.output = "mesons/pt_" + flavour[i] + flavour[j]; + mesPar.q1 = "Qpt_" + flavour[i]; + mesPar.q2 = "Qpt_" + flavour[j]; + mesPar.gammas = "all"; + mesPar.sink = "sink"; application.createModule("meson_pt_" + flavour[i] + flavour[j], mesPar); - mesPar.output = "mesons/Z2_" + flavour[i] + flavour[j]; - mesPar.q1 = "QZ2_" + flavour[i]; - mesPar.q2 = "QZ2_" + flavour[j]; - mesPar.gammas = "all"; - mesPar.mom = "0. 0. 0. 0."; + mesPar.output = "mesons/Z2_" + flavour[i] + flavour[j]; + mesPar.q1 = "QZ2_" + flavour[i]; + mesPar.q2 = "QZ2_" + flavour[j]; + mesPar.gammas = "all"; + mesPar.sink = "sink"; application.createModule("meson_Z2_" + flavour[i] + flavour[j], mesPar); diff --git a/tests/hmc/Test_hmc_ScalarActionNxN.cc b/tests/hmc/Test_hmc_ScalarActionNxN.cc new file mode 100644 index 00000000..a4dad1a3 --- /dev/null +++ b/tests/hmc/Test_hmc_ScalarActionNxN.cc @@ -0,0 +1,193 @@ +/************************************************************************************* + +Grid physics library, www.github.com/paboyle/Grid + +Source file: ./tests/Test_hmc_WilsonFermionGauge.cc + +Copyright (C) 2016 + +Author: Guido Cossu + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +See the full license in the file "LICENSE" in the top level distribution directory +*************************************************************************************/ +/* END LEGAL */ +#include +namespace Grid { +class ScalarActionParameters : Serializable { + public: + GRID_SERIALIZABLE_CLASS_MEMBERS(ScalarActionParameters, + double, mass_squared, + double, lambda); + + template + ScalarActionParameters(Reader& Reader){ + read(Reader, "ScalarAction", *this); + } + +}; +} + +using namespace Grid; +using namespace Grid::QCD; + +template +class MagMeas : public HmcObservable { +public: + typedef typename Impl::Field Field; + typedef typename Impl::Simd::scalar_type Trace; + + void TrajectoryComplete(int traj, + Field &U, + GridSerialRNG &sRNG, + GridParallelRNG &pRNG) { + + int def_prec = std::cout.precision(); + + std::cout << std::setprecision(std::numeric_limits::digits10 + 1); + std::cout << GridLogMessage + << "m= " << TensorRemove(trace(sum(U))) << std::endl; + std::cout << GridLogMessage + << "m^2= " << TensorRemove(trace(sum(U)*sum(U))) << std::endl; + std::cout << GridLogMessage + << "phi^2= " << TensorRemove(sum(trace(U*U))) << std::endl; + std::cout.precision(def_prec); + + } +private: + +}; + +template +class MagMod: public ObservableModule, NoParameters>{ + typedef ObservableModule, NoParameters> ObsBase; + using ObsBase::ObsBase; // for constructors + + // acquire resource + virtual void initialize(){ + this->ObservablePtr.reset(new MagMeas()); + } +public: + MagMod(): ObsBase(NoParameters()){} +}; + +int main(int argc, char **argv) { + typedef Grid::JSONReader Serialiser; + + Grid_init(&argc, &argv); + int threads = GridThread::GetThreads(); + // here make a routine to print all the relevant information on the run + std::cout << GridLogMessage << "Grid is setup to use " << threads << " threads" << std::endl; + + // Typedefs to simplify notation + constexpr int Ncolours = 2; + constexpr int Ndimensions = 3; + typedef ScalarNxNAdjGenericHMCRunner HMCWrapper; // Uses the default minimum norm, real scalar fields + typedef ScalarAdjActionR ScalarAction; + //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: + HMCWrapper TheHMC; + TheHMC.ReadCommandLine(argc, argv); + + if (TheHMC.ParameterFile.empty()){ + std::cout << "Input file not specified." + << "Use --ParameterFile option in the command line.\nAborting" + << std::endl; + exit(1); + } + Serialiser Reader(TheHMC.ParameterFile); + + // Grid from the command line + GridModule ScalarGrid; + if (GridDefaultLatt().size() != Ndimensions){ + std::cout << "Incorrect dimension of the grid\n. Expected dim="<< Ndimensions << std::endl; + exit(1); + } + if (GridDefaultMpi().size() != Ndimensions){ + std::cout << "Incorrect dimension of the mpi grid\n. Expected dim="<< Ndimensions << std::endl; + exit(1); + } + ScalarGrid.set_full(new GridCartesian(GridDefaultLatt(),GridDefaultSimd(Ndimensions, vComplex::Nsimd()),GridDefaultMpi())); + ScalarGrid.set_rb(new GridRedBlackCartesian(ScalarGrid.get_full())); + TheHMC.Resources.AddGrid("scalar", ScalarGrid); + std::cout << "Lattice size : " << GridDefaultLatt() << std::endl; + + // Checkpointer definition + CheckpointerParameters CPparams(Reader); + TheHMC.Resources.LoadBinaryCheckpointer(CPparams); + + RNGModuleParameters RNGpar(Reader); + TheHMC.Resources.SetRNGSeeds(RNGpar); + + // Construct observables + typedef MagMod MagObs; + TheHMC.Resources.AddObservable(); + + ///////////////////////////////////////////////////////////// + // Collect actions, here use more encapsulation + + // Scalar action in adjoint representation + ScalarActionParameters SPar(Reader); + ScalarAction Saction(SPar.mass_squared, SPar.lambda); + + // Collect actions + ActionLevel> Level1(1); + Level1.push_back(&Saction); + TheHMC.TheAction.push_back(Level1); + ///////////////////////////////////////////////////////////// + TheHMC.Parameters.initialize(Reader); + + TheHMC.Run(); + + Grid_finalize(); +} // main + +/* Examples for input files + +JSON + +{ + "Checkpointer": { + "config_prefix": "ckpoint_scalar_lat", + "rng_prefix": "ckpoint_scalar_rng", + "saveInterval": 1, + "format": "IEEE64BIG" + }, + "RandomNumberGenerator": { + "serial_seeds": "1 2 3 4 6", + "parallel_seeds": "6 7 8 9 11" + }, + "ScalarAction":{ + "mass_squared": 0.5, + "lambda": 0.1 + }, + "HMC":{ + "StartTrajectory": 0, + "Trajectories": 100, + "MetropolisTest": true, + "NoMetropolisUntil": 10, + "StartingType": "HotStart", + "MD":{ + "name": "MinimumNorm2", + "MDsteps": 15, + "trajL": 2.0 + } + } +} + + +XML example not provided yet + +*/ diff --git a/tests/smearing/Test_WilsonFlow.cc b/tests/smearing/Test_WilsonFlow.cc index 4e6bd0af..5db00d5d 100644 --- a/tests/smearing/Test_WilsonFlow.cc +++ b/tests/smearing/Test_WilsonFlow.cc @@ -28,6 +28,38 @@ directory /* END LEGAL */ #include +namespace Grid{ + struct WFParameters: Serializable { + GRID_SERIALIZABLE_CLASS_MEMBERS(WFParameters, + int, steps, + double, step_size, + int, meas_interval, + double, maxTau); // for the adaptive algorithm + + + template + WFParameters(Reader& Reader){ + read(Reader, "WilsonFlow", *this); + } + + }; + + struct ConfParameters: Serializable { + GRID_SERIALIZABLE_CLASS_MEMBERS(ConfParameters, + std::string, conf_prefix, + std::string, rng_prefix, + int, StartConfiguration, + int, EndConfiguration, + int, Skip); + + template + ConfParameters(Reader& Reader){ + read(Reader, "Configurations", *this); + } + + }; +} + int main(int argc, char **argv) { using namespace Grid; using namespace Grid::QCD; @@ -42,22 +74,38 @@ int main(int argc, char **argv) { GridRedBlackCartesian RBGrid(latt_size, simd_layout, mpi_layout); std::vector seeds({1, 2, 3, 4, 5}); + GridSerialRNG sRNG; GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(seeds); LatticeGaugeField Umu(&Grid), Uflow(&Grid); SU::HotConfiguration(pRNG, Umu); + + typedef Grid::JSONReader Serialiser; + Serialiser Reader("input.json"); + WFParameters WFPar(Reader); + ConfParameters CPar(Reader); + CheckpointerParameters CPPar(CPar.conf_prefix, CPar.rng_prefix); + BinaryHmcCheckpointer CPBin(CPPar); + + for (int conf = CPar.StartConfiguration; conf <= CPar.EndConfiguration; conf+= CPar.Skip){ + + CPBin.CheckpointRestore(conf, Umu, sRNG, pRNG); std::cout << std::setprecision(15); - std::cout << GridLogMessage << "Plaquette: " + std::cout << GridLogMessage << "Initial plaquette: " << WilsonLoops::avgPlaquette(Umu) << std::endl; - WilsonFlow WF(200, 0.01); + WilsonFlow WF(WFPar.steps, WFPar.step_size, WFPar.meas_interval); - WF.smear(Uflow, Umu); + WF.smear_adaptive(Uflow, Umu, WFPar.maxTau); RealD WFlow_plaq = WilsonLoops::avgPlaquette(Uflow); - std::cout << GridLogMessage << "Plaquette: "<< WFlow_plaq << std::endl; + RealD WFlow_TC = WilsonLoops::TopologicalCharge(Uflow); + RealD WFlow_T0 = WF.energyDensityPlaquette(Uflow); + std::cout << GridLogMessage << "Plaquette "<< conf << " " << WFlow_plaq << std::endl; + std::cout << GridLogMessage << "T0 "<< conf << " " << WFlow_T0 << std::endl; + std::cout << GridLogMessage << "TopologicalCharge "<< conf << " " << WFlow_TC << std::endl; std::cout<< GridLogMessage << " Admissibility check:\n"; const double sp_adm = 0.067; // admissible threshold @@ -73,6 +121,32 @@ int main(int argc, char **argv) { std::cout<< GridLogMessage << " (sp_admissible = "<< sp_adm <<")\n"; //std::cout<< GridLogMessage << " sp_admissible - sp_max = "< U(4,UGrid); for(int mu=0;mu eval(Nm); - FermionField src(FrbGrid); gaussian(RNG5rb,src); + FermionField src(FrbGrid); + gaussian(RNG5rb,src); std::vector evec(Nm,FrbGrid); for(int i=0;i<1;i++){ - std::cout << i<<" / "<< Nm<< " grid pointer "< HermOp(Ds); ConjugateGradient CG(1.0e-8,10000); - BlockConjugateGradient BCG(1.0e-8,10000); - MultiRHSConjugateGradient mCG(1.0e-8,10000); + int blockDim = 0; + BlockConjugateGradient BCGrQ(BlockCGrQ,blockDim,1.0e-8,10000); + BlockConjugateGradient BCG (BlockCG,blockDim,1.0e-8,10000); + BlockConjugateGradient mCG (CGmultiRHS,blockDim,1.0e-8,10000); - std::cout << GridLogMessage << "************************************************************************ "< HermOp4d(Ds4d); FermionField src4d(UGrid); random(pRNG,src4d); @@ -111,7 +113,7 @@ int main (int argc, char ** argv) std::cout << GridLogMessage << " Calling Block CG for "<