diff --git a/.gitignore b/.gitignore index 96b54b35..38e3da2d 100644 --- a/.gitignore +++ b/.gitignore @@ -95,6 +95,10 @@ build.sh ################ lib/Eigen/* +# FFTW source # +################ +lib/fftw/* + # libtool macros # ################## m4/lt* diff --git a/.travis.yml b/.travis.yml index 0733d037..de9c54d3 100644 --- a/.travis.yml +++ b/.travis.yml @@ -23,6 +23,8 @@ matrix: - libmpfr-dev - libgmp-dev - libmpc-dev + - libopenmpi-dev + - openmpi-bin - binutils-dev env: VERSION=-4.9 - compiler: gcc @@ -35,6 +37,8 @@ matrix: - libmpfr-dev - libgmp-dev - libmpc-dev + - libopenmpi-dev + - openmpi-bin - binutils-dev env: VERSION=-5 - compiler: clang @@ -47,6 +51,8 @@ matrix: - libmpfr-dev - libgmp-dev - libmpc-dev + - libopenmpi-dev + - openmpi-bin - binutils-dev env: CLANG_LINK=http://llvm.org/releases/3.8.0/clang+llvm-3.8.0-x86_64-linux-gnu-ubuntu-14.04.tar.xz - compiler: clang @@ -59,6 +65,8 @@ matrix: - libmpfr-dev - libgmp-dev - libmpc-dev + - libopenmpi-dev + - openmpi-bin - binutils-dev env: CLANG_LINK=http://llvm.org/releases/3.7.0/clang+llvm-3.7.0-x86_64-linux-gnu-ubuntu-14.04.tar.xz @@ -69,6 +77,7 @@ before_install: - if [[ "$TRAVIS_OS_NAME" == "linux" ]] && [[ "$CC" == "clang" ]]; then export LD_LIBRARY_PATH="${GRIDDIR}/clang/lib:${LD_LIBRARY_PATH}"; fi - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew update; fi - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install libmpc; fi + - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install openmpi; fi - if [[ "$TRAVIS_OS_NAME" == "osx" ]] && [[ "$CC" == "gcc" ]]; then brew install gcc5; fi install: @@ -92,3 +101,10 @@ script: - ../configure --enable-precision=double --enable-simd=SSE4 --enable-comms=none - make -j4 - ./benchmarks/Benchmark_dwf --threads 1 + - echo make clean + - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then export CXXFLAGS='-DMPI_UINT32_T=MPI_UNSIGNED -DMPI_UINT64_T=MPI_UNSIGNED_LONG'; fi + - ../configure --enable-precision=single --enable-simd=SSE4 --enable-comms=mpi-auto + - make -j4 + - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then mpirun.openmpi -n 2 ./benchmarks/Benchmark_dwf --threads 1 --mpi 2.1.1.1; fi + - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then mpirun -n 2 ./benchmarks/Benchmark_dwf --threads 1 --mpi 2.1.1.1; fi + diff --git a/benchmarks/Benchmark_dwf.cc b/benchmarks/Benchmark_dwf.cc index c4a7c29e..bc9ab708 100644 --- a/benchmarks/Benchmark_dwf.cc +++ b/benchmarks/Benchmark_dwf.cc @@ -86,18 +86,6 @@ int main (int argc, char ** argv) LatticeFermion tmp(FGrid); LatticeFermion err(FGrid); - /* src=zero; - std::vector origin(5,0); - SpinColourVector f=zero; - for(int sp=0;sp<4;sp++){ - for(int co=0;co<3;co++){ - f()(sp)(co)=Complex(1.0,0.0); - }} - pokeSite(f,src,origin); - */ - - ColourMatrix cm = Complex(1.0,0.0); - LatticeGaugeField Umu(UGrid); random(RNG4,Umu); @@ -144,10 +132,12 @@ int main (int argc, char ** argv) DomainWallFermionR Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5); - std::cout< 1.0e-6 ) { - std::cout << "site "< + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +#ifndef _GRID_FFT_H_ +#define _GRID_FFT_H_ + +#ifdef HAVE_FFTW +#include +#endif +namespace Grid { + + template struct FFTW { }; + +#ifdef HAVE_FFTW + template<> struct FFTW { + public: + + typedef fftw_complex FFTW_scalar; + typedef fftw_plan FFTW_plan; + + static FFTW_plan fftw_plan_many_dft(int rank, const int *n,int howmany, + FFTW_scalar *in, const int *inembed, + int istride, int idist, + FFTW_scalar *out, const int *onembed, + int ostride, int odist, + int sign, unsigned flags) { + return ::fftw_plan_many_dft(rank,n,howmany,in,inembed,istride,idist,out,onembed,ostride,odist,sign,flags); + } + + static void fftw_flops(const FFTW_plan p,double *add, double *mul, double *fmas){ + ::fftw_flops(p,add,mul,fmas); + } + + inline static void fftw_execute_dft(const FFTW_plan p,FFTW_scalar *in,FFTW_scalar *out) { + ::fftw_execute_dft(p,in,out); + } + inline static void fftw_destroy_plan(const FFTW_plan p) { + ::fftw_destroy_plan(p); + } + }; + + template<> struct FFTW { + public: + + typedef fftwf_complex FFTW_scalar; + typedef fftwf_plan FFTW_plan; + + static FFTW_plan fftw_plan_many_dft(int rank, const int *n,int howmany, + FFTW_scalar *in, const int *inembed, + int istride, int idist, + FFTW_scalar *out, const int *onembed, + int ostride, int odist, + int sign, unsigned flags) { + return ::fftwf_plan_many_dft(rank,n,howmany,in,inembed,istride,idist,out,onembed,ostride,odist,sign,flags); + } + + static void fftw_flops(const FFTW_plan p,double *add, double *mul, double *fmas){ + ::fftwf_flops(p,add,mul,fmas); + } + + inline static void fftw_execute_dft(const FFTW_plan p,FFTW_scalar *in,FFTW_scalar *out) { + ::fftwf_execute_dft(p,in,out); + } + inline static void fftw_destroy_plan(const FFTW_plan p) { + ::fftwf_destroy_plan(p); + } + }; + +#endif + +#ifndef FFTW_FORWARD +#define FFTW_FORWARD (-1) +#define FFTW_BACKWARD (+1) +#endif + + class FFT { + private: + + GridCartesian *vgrid; + GridCartesian *sgrid; + + int Nd; + double flops; + double flops_call; + uint64_t usec; + + std::vector dimensions; + std::vector processors; + std::vector processor_coor; + + public: + + static const int forward=FFTW_FORWARD; + static const int backward=FFTW_BACKWARD; + + double Flops(void) {return flops;} + double MFlops(void) {return flops/usec;} + + FFT ( GridCartesian * grid ) : + vgrid(grid), + Nd(grid->_ndimension), + dimensions(grid->_fdimensions), + processors(grid->_processors), + processor_coor(grid->_processor_coor) + { + flops=0; + usec =0; + std::vector layout(Nd,1); + sgrid = new GridCartesian(dimensions,layout,processors); + }; + + ~FFT ( void) { + delete sgrid; + } + + template + void FFT_dim(Lattice &result,const Lattice &source,int dim, int inverse){ + + conformable(result._grid,vgrid); + conformable(source._grid,vgrid); + + int L = vgrid->_ldimensions[dim]; + int G = vgrid->_fdimensions[dim]; + + std::vector layout(Nd,1); + std::vector pencil_gd(vgrid->_fdimensions); + + pencil_gd[dim] = G*processors[dim]; + + // Pencil global vol LxLxGxLxL per node + GridCartesian pencil_g(pencil_gd,layout,processors); + + // Construct pencils + typedef typename vobj::scalar_object sobj; + typedef typename sobj::scalar_type scalar; + + Lattice ssource(vgrid); ssource =source; + Lattice pgsource(&pencil_g); + Lattice pgresult(&pencil_g); pgresult=zero; + +#ifndef HAVE_FFTW + assert(0); +#else + typedef typename FFTW::FFTW_scalar FFTW_scalar; + typedef typename FFTW::FFTW_plan FFTW_plan; + + { + int Ncomp = sizeof(sobj)/sizeof(scalar); + int Nlow = 1; + for(int d=0;d_ldimensions[d]; + } + + int rank = 1; /* 1d transforms */ + int n[] = {G}; /* 1d transforms of length G */ + int howmany = Ncomp; + int odist,idist,istride,ostride; + idist = odist = 1; /* Distance between consecutive FT's */ + istride = ostride = Ncomp*Nlow; /* distance between two elements in the same FT */ + int *inembed = n, *onembed = n; + + + int sign = FFTW_FORWARD; + if (inverse) sign = FFTW_BACKWARD; + + FFTW_plan p; + { + FFTW_scalar *in = (FFTW_scalar *)&pgsource._odata[0]; + FFTW_scalar *out= (FFTW_scalar *)&pgresult._odata[0]; + p = FFTW::fftw_plan_many_dft(rank,n,howmany, + in,inembed, + istride,idist, + out,onembed, + ostride, odist, + sign,FFTW_ESTIMATE); + } + + double add,mul,fma; + FFTW::fftw_flops(p,&add,&mul,&fma); + flops_call = add+mul+2.0*fma; + + GridStopWatch timer; + + // Barrel shift and collect global pencil + for(int p=0;plSites();idx++) { + + std::vector lcoor(Nd); + sgrid->LocalIndexToLocalCoor(idx,lcoor); + + sobj s; + + peekLocalSite(s,ssource,lcoor); + + lcoor[dim]+=p*L; + + pokeLocalSite(s,pgsource,lcoor); + } + + ssource = Cshift(ssource,dim,L); + } + + // Loop over orthog coords + int NN=pencil_g.lSites(); + + GridStopWatch Timer; + Timer.Start(); + +PARALLEL_FOR_LOOP + for(int idx=0;idx lcoor(Nd); + pencil_g.LocalIndexToLocalCoor(idx,lcoor); + + if ( lcoor[dim] == 0 ) { // restricts loop to plane at lcoor[dim]==0 + FFTW_scalar *in = (FFTW_scalar *)&pgsource._odata[idx]; + FFTW_scalar *out= (FFTW_scalar *)&pgresult._odata[idx]; + FFTW::fftw_execute_dft(p,in,out); + } + } + + Timer.Stop(); + usec += Timer.useconds(); + flops+= flops_call*NN; + + int pc = processor_coor[dim]; + for(int idx=0;idxlSites();idx++) { + std::vector lcoor(Nd); + sgrid->LocalIndexToLocalCoor(idx,lcoor); + std::vector gcoor = lcoor; + // extract the result + sobj s; + gcoor[dim] = lcoor[dim]+L*pc; + peekLocalSite(s,pgresult,gcoor); + pokeLocalSite(s,result,lcoor); + } + + FFTW::fftw_destroy_plan(p); + } +#endif + + + } + + }; + + +} + +#endif diff --git a/lib/Grid.h b/lib/Grid.h index 9de8470d..486ee4d3 100644 --- a/lib/Grid.h +++ b/lib/Grid.h @@ -68,6 +68,7 @@ Author: paboyle #include #include #include +#include #include #include #include @@ -78,7 +79,8 @@ Author: paboyle #include #include #include -#include + +#include #include #include diff --git a/lib/Init.cc b/lib/Init.cc index 6543435c..34e503a4 100644 --- a/lib/Init.cc +++ b/lib/Init.cc @@ -153,6 +153,7 @@ void GridParseLayout(char **argv,int argc, assert(ompthreads.size()==1); GridThread::SetThreads(ompthreads[0]); } + if( GridCmdOptionExists(argv,argv+argc,"--cores") ){ std::vector cores(0); arg= GridCmdOptionPayload(argv,argv+argc,"--cores"); @@ -203,7 +204,6 @@ void Grid_init(int *argc,char ***argv) GridLogConfigure(logstreams); } - if( GridCmdOptionExists(*argv,*argv+*argc,"--debug-signals") ){ Grid_debug_handler_init(); } diff --git a/lib/Makefile.am b/lib/Makefile.am index a7ef229a..bc752af5 100644 --- a/lib/Makefile.am +++ b/lib/Makefile.am @@ -17,8 +17,8 @@ endif include Make.inc include Eigen.inc -lib_LTLIBRARIES = libGrid.la +lib_LIBRARIES = libGrid.a -libGrid_la_SOURCES = $(CCFILES) $(extra_sources) -libGrid_ladir = $(pkgincludedir) +libGrid_a_SOURCES = $(CCFILES) $(extra_sources) +libGrid_adir = $(pkgincludedir) nobase_dist_pkginclude_HEADERS = $(HFILES) $(eigen_files) Config.h diff --git a/lib/Stencil.h b/lib/Stencil.h index 47c1b977..f5056485 100644 --- a/lib/Stencil.h +++ b/lib/Stencil.h @@ -106,7 +106,6 @@ #define SERIAL_SENDS void AddPacket(void *xmit,void * rcv, Integer to,Integer from,Integer bytes){ - comms_bytes+=2.0*bytes; #ifdef SEND_IMMEDIATE commtime-=usecond(); _grid->SendToRecvFrom(xmit,to,rcv,from,bytes); @@ -265,7 +264,7 @@ // _mm_prefetch((char *)&_entries[ent],_MM_HINT_T0); } inline uint64_t GetInfo(int &ptype,int &local,int &perm,int point,int ent,uint64_t base) { - _mm_prefetch((char *)&_entries[ent+1],_MM_HINT_T0); + //_mm_prefetch((char *)&_entries[ent+1],_MM_HINT_T0); local = _entries[ent]._is_local; perm = _entries[ent]._permute; if (perm) ptype = _permute_type[point]; @@ -301,6 +300,39 @@ double gathermtime; double splicetime; double nosplicetime; + double calls; + + void ZeroCounters(void) { + gathertime = 0.; + jointime = 0.; + commtime = 0.; + halogtime = 0.; + mergetime = 0.; + spintime = 0.; + gathermtime = 0.; + splicetime = 0.; + nosplicetime = 0.; + comms_bytes = 0.; + calls = 0.; + }; + + void Report(void) { +#define PRINTIT(A) \ + std::cout << GridLogMessage << " Stencil " << #A << " "<< A/calls< 0. ) { + std::cout << GridLogMessage << " Stencil calls "< &distances) : _permute_type(npoints), _comm_buf_size(npoints) { - #ifdef TIMING_HACK - gathertime=0; - jointime=0; - commtime=0; - halogtime=0; - mergetime=0; - spintime=0; - gathermtime=0; - splicetime=0; - nosplicetime=0; - comms_bytes=0; - #endif _npoints = npoints; _grid = grid; _directions = directions; @@ -623,6 +643,7 @@ template void HaloExchange(const Lattice &source,compressor &compress) { + calls++; Mergers.resize(0); Packets.resize(0); HaloGather(source,compress); diff --git a/lib/algorithms/iterative/ConjugateGradient.h b/lib/algorithms/iterative/ConjugateGradient.h index f5102019..f340eb38 100644 --- a/lib/algorithms/iterative/ConjugateGradient.h +++ b/lib/algorithms/iterative/ConjugateGradient.h @@ -1,153 +1,168 @@ - /************************************************************************************* +/************************************************************************************* - Grid physics library, www.github.com/paboyle/Grid +Grid physics library, www.github.com/paboyle/Grid - Source file: ./lib/algorithms/iterative/ConjugateGradient.h +Source file: ./lib/algorithms/iterative/ConjugateGradient.h - Copyright (C) 2015 +Copyright (C) 2015 Author: Azusa Yamaguchi Author: Peter Boyle Author: paboyle - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ #ifndef GRID_CONJUGATE_GRADIENT_H #define GRID_CONJUGATE_GRADIENT_H namespace Grid { - ///////////////////////////////////////////////////////////// - // Base classes for iterative processes based on operators - // single input vec, single output vec. - ///////////////////////////////////////////////////////////// +///////////////////////////////////////////////////////////// +// Base classes for iterative processes based on operators +// single input vec, single output vec. +///////////////////////////////////////////////////////////// - template - class ConjugateGradient : public OperatorFunction { -public: - bool ErrorOnNoConverge; //throw an assert when the CG fails to converge. Defaults true. - RealD Tolerance; - Integer MaxIterations; - ConjugateGradient(RealD tol,Integer maxit, bool err_on_no_conv = true) : Tolerance(tol), MaxIterations(maxit), ErrorOnNoConverge(err_on_no_conv){ - }; +template +class ConjugateGradient : public OperatorFunction { + public: + bool ErrorOnNoConverge; // throw an assert when the CG fails to converge. + // Defaults true. + RealD Tolerance; + Integer MaxIterations; + ConjugateGradient(RealD tol, Integer maxit, bool err_on_no_conv = true) + : Tolerance(tol), + MaxIterations(maxit), + ErrorOnNoConverge(err_on_no_conv){}; + void operator()(LinearOperatorBase &Linop, const Field &src, + Field &psi) { + psi.checkerboard = src.checkerboard; + conformable(psi, src); - void operator() (LinearOperatorBase &Linop,const Field &src, Field &psi){ + RealD cp, c, a, d, b, ssq, qq, b_pred; - psi.checkerboard = src.checkerboard; - conformable(psi,src); + Field p(src); + Field mmp(src); + Field r(src); - RealD cp,c,a,d,b,ssq,qq,b_pred; - - Field p(src); - Field mmp(src); - Field r(src); - - //Initial residual computation & set up - RealD guess = norm2(psi); - assert(std::isnan(guess)==0); + // Initial residual computation & set up + RealD guess = norm2(psi); + assert(std::isnan(guess) == 0); - Linop.HermOpAndNorm(psi,mmp,d,b); - - r= src-mmp; - p= r; - - a =norm2(p); - cp =a; - ssq=norm2(src); + + Linop.HermOpAndNorm(psi, mmp, d, b); + - std::cout< + +#ifdef __cplusplus +extern "C" +{ +#endif /* __cplusplus */ + +/* If is included, use the C99 complex type. Otherwise + define a type bit-compatible with C99 complex */ +#if !defined(FFTW_NO_Complex) && defined(_Complex_I) && defined(complex) && defined(I) +# define FFTW_DEFINE_COMPLEX(R, C) typedef R _Complex C +#else +# define FFTW_DEFINE_COMPLEX(R, C) typedef R C[2] +#endif + +#define FFTW_CONCAT(prefix, name) prefix ## name +#define FFTW_MANGLE_DOUBLE(name) FFTW_CONCAT(fftw_, name) +#define FFTW_MANGLE_FLOAT(name) FFTW_CONCAT(fftwf_, name) +#define FFTW_MANGLE_LONG_DOUBLE(name) FFTW_CONCAT(fftwl_, name) +#define FFTW_MANGLE_QUAD(name) FFTW_CONCAT(fftwq_, name) + +/* IMPORTANT: for Windows compilers, you should add a line + #define FFTW_DLL + here and in kernel/ifftw.h if you are compiling/using FFTW as a + DLL, in order to do the proper importing/exporting, or + alternatively compile with -DFFTW_DLL or the equivalent + command-line flag. This is not necessary under MinGW/Cygwin, where + libtool does the imports/exports automatically. */ +#if defined(FFTW_DLL) && (defined(_WIN32) || defined(__WIN32__)) + /* annoying Windows syntax for shared-library declarations */ +# if defined(COMPILING_FFTW) /* defined in api.h when compiling FFTW */ +# define FFTW_EXTERN extern __declspec(dllexport) +# else /* user is calling FFTW; import symbol */ +# define FFTW_EXTERN extern __declspec(dllimport) +# endif +#else +# define FFTW_EXTERN extern +#endif + +enum fftw_r2r_kind_do_not_use_me { + FFTW_R2HC=0, FFTW_HC2R=1, FFTW_DHT=2, + FFTW_REDFT00=3, FFTW_REDFT01=4, FFTW_REDFT10=5, FFTW_REDFT11=6, + FFTW_RODFT00=7, FFTW_RODFT01=8, FFTW_RODFT10=9, FFTW_RODFT11=10 +}; + +struct fftw_iodim_do_not_use_me { + int n; /* dimension size */ + int is; /* input stride */ + int os; /* output stride */ +}; + +#include /* for ptrdiff_t */ +struct fftw_iodim64_do_not_use_me { + ptrdiff_t n; /* dimension size */ + ptrdiff_t is; /* input stride */ + ptrdiff_t os; /* output stride */ +}; + +typedef void (*fftw_write_char_func_do_not_use_me)(char c, void *); +typedef int (*fftw_read_char_func_do_not_use_me)(void *); + +/* + huge second-order macro that defines prototypes for all API + functions. We expand this macro for each supported precision + + X: name-mangling macro + R: real data type + C: complex data type +*/ + +#define FFTW_DEFINE_API(X, R, C) \ + \ +FFTW_DEFINE_COMPLEX(R, C); \ + \ +typedef struct X(plan_s) *X(plan); \ + \ +typedef struct fftw_iodim_do_not_use_me X(iodim); \ +typedef struct fftw_iodim64_do_not_use_me X(iodim64); \ + \ +typedef enum fftw_r2r_kind_do_not_use_me X(r2r_kind); \ + \ +typedef fftw_write_char_func_do_not_use_me X(write_char_func); \ +typedef fftw_read_char_func_do_not_use_me X(read_char_func); \ + \ +FFTW_EXTERN void X(execute)(const X(plan) p); \ + \ +FFTW_EXTERN X(plan) X(plan_dft)(int rank, const int *n, \ + C *in, C *out, int sign, unsigned flags); \ + \ +FFTW_EXTERN X(plan) X(plan_dft_1d)(int n, C *in, C *out, int sign, \ + unsigned flags); \ +FFTW_EXTERN X(plan) X(plan_dft_2d)(int n0, int n1, \ + C *in, C *out, int sign, unsigned flags); \ +FFTW_EXTERN X(plan) X(plan_dft_3d)(int n0, int n1, int n2, \ + C *in, C *out, int sign, unsigned flags); \ + \ +FFTW_EXTERN X(plan) X(plan_many_dft)(int rank, const int *n, \ + int howmany, \ + C *in, const int *inembed, \ + int istride, int idist, \ + C *out, const int *onembed, \ + int ostride, int odist, \ + int sign, unsigned flags); \ + \ +FFTW_EXTERN X(plan) X(plan_guru_dft)(int rank, const X(iodim) *dims, \ + int howmany_rank, \ + const X(iodim) *howmany_dims, \ + C *in, C *out, \ + int sign, unsigned flags); \ +FFTW_EXTERN X(plan) X(plan_guru_split_dft)(int rank, const X(iodim) *dims, \ + int howmany_rank, \ + const X(iodim) *howmany_dims, \ + R *ri, R *ii, R *ro, R *io, \ + unsigned flags); \ + \ +FFTW_EXTERN X(plan) X(plan_guru64_dft)(int rank, \ + const X(iodim64) *dims, \ + int howmany_rank, \ + const X(iodim64) *howmany_dims, \ + C *in, C *out, \ + int sign, unsigned flags); \ +FFTW_EXTERN X(plan) X(plan_guru64_split_dft)(int rank, \ + const X(iodim64) *dims, \ + int howmany_rank, \ + const X(iodim64) *howmany_dims, \ + R *ri, R *ii, R *ro, R *io, \ + unsigned flags); \ + \ +FFTW_EXTERN void X(execute_dft)(const X(plan) p, C *in, C *out); \ +FFTW_EXTERN void X(execute_split_dft)(const X(plan) p, R *ri, R *ii, \ + R *ro, R *io); \ + \ +FFTW_EXTERN X(plan) X(plan_many_dft_r2c)(int rank, const int *n, \ + int howmany, \ + R *in, const int *inembed, \ + int istride, int idist, \ + C *out, const int *onembed, \ + int ostride, int odist, \ + unsigned flags); \ + \ +FFTW_EXTERN X(plan) X(plan_dft_r2c)(int rank, const int *n, \ + R *in, C *out, unsigned flags); \ + \ +FFTW_EXTERN X(plan) X(plan_dft_r2c_1d)(int n,R *in,C *out,unsigned flags); \ +FFTW_EXTERN X(plan) X(plan_dft_r2c_2d)(int n0, int n1, \ + R *in, C *out, unsigned flags); \ +FFTW_EXTERN X(plan) X(plan_dft_r2c_3d)(int n0, int n1, \ + int n2, \ + R *in, C *out, unsigned flags); \ + \ + \ +FFTW_EXTERN X(plan) X(plan_many_dft_c2r)(int rank, const int *n, \ + int howmany, \ + C *in, const int *inembed, \ + int istride, int idist, \ + R *out, const int *onembed, \ + int ostride, int odist, \ + unsigned flags); \ + \ +FFTW_EXTERN X(plan) X(plan_dft_c2r)(int rank, const int *n, \ + C *in, R *out, unsigned flags); \ + \ +FFTW_EXTERN X(plan) X(plan_dft_c2r_1d)(int n,C *in,R *out,unsigned flags); \ +FFTW_EXTERN X(plan) X(plan_dft_c2r_2d)(int n0, int n1, \ + C *in, R *out, unsigned flags); \ +FFTW_EXTERN X(plan) X(plan_dft_c2r_3d)(int n0, int n1, \ + int n2, \ + C *in, R *out, unsigned flags); \ + \ +FFTW_EXTERN X(plan) X(plan_guru_dft_r2c)(int rank, const X(iodim) *dims, \ + int howmany_rank, \ + const X(iodim) *howmany_dims, \ + R *in, C *out, \ + unsigned flags); \ +FFTW_EXTERN X(plan) X(plan_guru_dft_c2r)(int rank, const X(iodim) *dims, \ + int howmany_rank, \ + const X(iodim) *howmany_dims, \ + C *in, R *out, \ + unsigned flags); \ + \ +FFTW_EXTERN X(plan) X(plan_guru_split_dft_r2c)( \ + int rank, const X(iodim) *dims, \ + int howmany_rank, \ + const X(iodim) *howmany_dims, \ + R *in, R *ro, R *io, \ + unsigned flags); \ +FFTW_EXTERN X(plan) X(plan_guru_split_dft_c2r)( \ + int rank, const X(iodim) *dims, \ + int howmany_rank, \ + const X(iodim) *howmany_dims, \ + R *ri, R *ii, R *out, \ + unsigned flags); \ + \ +FFTW_EXTERN X(plan) X(plan_guru64_dft_r2c)(int rank, \ + const X(iodim64) *dims, \ + int howmany_rank, \ + const X(iodim64) *howmany_dims, \ + R *in, C *out, \ + unsigned flags); \ +FFTW_EXTERN X(plan) X(plan_guru64_dft_c2r)(int rank, \ + const X(iodim64) *dims, \ + int howmany_rank, \ + const X(iodim64) *howmany_dims, \ + C *in, R *out, \ + unsigned flags); \ + \ +FFTW_EXTERN X(plan) X(plan_guru64_split_dft_r2c)( \ + int rank, const X(iodim64) *dims, \ + int howmany_rank, \ + const X(iodim64) *howmany_dims, \ + R *in, R *ro, R *io, \ + unsigned flags); \ +FFTW_EXTERN X(plan) X(plan_guru64_split_dft_c2r)( \ + int rank, const X(iodim64) *dims, \ + int howmany_rank, \ + const X(iodim64) *howmany_dims, \ + R *ri, R *ii, R *out, \ + unsigned flags); \ + \ +FFTW_EXTERN void X(execute_dft_r2c)(const X(plan) p, R *in, C *out); \ +FFTW_EXTERN void X(execute_dft_c2r)(const X(plan) p, C *in, R *out); \ + \ +FFTW_EXTERN void X(execute_split_dft_r2c)(const X(plan) p, \ + R *in, R *ro, R *io); \ +FFTW_EXTERN void X(execute_split_dft_c2r)(const X(plan) p, \ + R *ri, R *ii, R *out); \ + \ +FFTW_EXTERN X(plan) X(plan_many_r2r)(int rank, const int *n, \ + int howmany, \ + R *in, const int *inembed, \ + int istride, int idist, \ + R *out, const int *onembed, \ + int ostride, int odist, \ + const X(r2r_kind) *kind, unsigned flags); \ + \ +FFTW_EXTERN X(plan) X(plan_r2r)(int rank, const int *n, R *in, R *out, \ + const X(r2r_kind) *kind, unsigned flags); \ + \ +FFTW_EXTERN X(plan) X(plan_r2r_1d)(int n, R *in, R *out, \ + X(r2r_kind) kind, unsigned flags); \ +FFTW_EXTERN X(plan) X(plan_r2r_2d)(int n0, int n1, R *in, R *out, \ + X(r2r_kind) kind0, X(r2r_kind) kind1, \ + unsigned flags); \ +FFTW_EXTERN X(plan) X(plan_r2r_3d)(int n0, int n1, int n2, \ + R *in, R *out, X(r2r_kind) kind0, \ + X(r2r_kind) kind1, X(r2r_kind) kind2, \ + unsigned flags); \ + \ +FFTW_EXTERN X(plan) X(plan_guru_r2r)(int rank, const X(iodim) *dims, \ + int howmany_rank, \ + const X(iodim) *howmany_dims, \ + R *in, R *out, \ + const X(r2r_kind) *kind, unsigned flags); \ + \ +FFTW_EXTERN X(plan) X(plan_guru64_r2r)(int rank, const X(iodim64) *dims, \ + int howmany_rank, \ + const X(iodim64) *howmany_dims, \ + R *in, R *out, \ + const X(r2r_kind) *kind, unsigned flags); \ + \ +FFTW_EXTERN void X(execute_r2r)(const X(plan) p, R *in, R *out); \ + \ +FFTW_EXTERN void X(destroy_plan)(X(plan) p); \ +FFTW_EXTERN void X(forget_wisdom)(void); \ +FFTW_EXTERN void X(cleanup)(void); \ + \ +FFTW_EXTERN void X(set_timelimit)(double t); \ + \ +FFTW_EXTERN void X(plan_with_nthreads)(int nthreads); \ +FFTW_EXTERN int X(init_threads)(void); \ +FFTW_EXTERN void X(cleanup_threads)(void); \ + \ +FFTW_EXTERN int X(export_wisdom_to_filename)(const char *filename); \ +FFTW_EXTERN void X(export_wisdom_to_file)(FILE *output_file); \ +FFTW_EXTERN char *X(export_wisdom_to_string)(void); \ +FFTW_EXTERN void X(export_wisdom)(X(write_char_func) write_char, \ + void *data); \ +FFTW_EXTERN int X(import_system_wisdom)(void); \ +FFTW_EXTERN int X(import_wisdom_from_filename)(const char *filename); \ +FFTW_EXTERN int X(import_wisdom_from_file)(FILE *input_file); \ +FFTW_EXTERN int X(import_wisdom_from_string)(const char *input_string); \ +FFTW_EXTERN int X(import_wisdom)(X(read_char_func) read_char, void *data); \ + \ +FFTW_EXTERN void X(fprint_plan)(const X(plan) p, FILE *output_file); \ +FFTW_EXTERN void X(print_plan)(const X(plan) p); \ +FFTW_EXTERN char *X(sprint_plan)(const X(plan) p); \ + \ +FFTW_EXTERN void *X(malloc)(size_t n); \ +FFTW_EXTERN R *X(alloc_real)(size_t n); \ +FFTW_EXTERN C *X(alloc_complex)(size_t n); \ +FFTW_EXTERN void X(free)(void *p); \ + \ +FFTW_EXTERN void X(flops)(const X(plan) p, \ + double *add, double *mul, double *fmas); \ +FFTW_EXTERN double X(estimate_cost)(const X(plan) p); \ +FFTW_EXTERN double X(cost)(const X(plan) p); \ + \ +FFTW_EXTERN int X(alignment_of)(R *p); \ +FFTW_EXTERN const char X(version)[]; \ +FFTW_EXTERN const char X(cc)[]; \ +FFTW_EXTERN const char X(codelet_optim)[]; + + +/* end of FFTW_DEFINE_API macro */ + +FFTW_DEFINE_API(FFTW_MANGLE_DOUBLE, double, fftw_complex) +FFTW_DEFINE_API(FFTW_MANGLE_FLOAT, float, fftwf_complex) +FFTW_DEFINE_API(FFTW_MANGLE_LONG_DOUBLE, long double, fftwl_complex) + +/* __float128 (quad precision) is a gcc extension on i386, x86_64, and ia64 + for gcc >= 4.6 (compiled in FFTW with --enable-quad-precision) */ +#if (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)) \ + && !(defined(__ICC) || defined(__INTEL_COMPILER)) \ + && (defined(__i386__) || defined(__x86_64__) || defined(__ia64__)) +# if !defined(FFTW_NO_Complex) && defined(_Complex_I) && defined(complex) && defined(I) +/* note: __float128 is a typedef, which is not supported with the _Complex + keyword in gcc, so instead we use this ugly __attribute__ version. + However, we can't simply pass the __attribute__ version to + FFTW_DEFINE_API because the __attribute__ confuses gcc in pointer + types. Hence redefining FFTW_DEFINE_COMPLEX. Ugh. */ +# undef FFTW_DEFINE_COMPLEX +# define FFTW_DEFINE_COMPLEX(R, C) typedef _Complex float __attribute__((mode(TC))) C +# endif +FFTW_DEFINE_API(FFTW_MANGLE_QUAD, __float128, fftwq_complex) +#endif + +#define FFTW_FORWARD (-1) +#define FFTW_BACKWARD (+1) + +#define FFTW_NO_TIMELIMIT (-1.0) + +/* documented flags */ +#define FFTW_MEASURE (0U) +#define FFTW_DESTROY_INPUT (1U << 0) +#define FFTW_UNALIGNED (1U << 1) +#define FFTW_CONSERVE_MEMORY (1U << 2) +#define FFTW_EXHAUSTIVE (1U << 3) /* NO_EXHAUSTIVE is default */ +#define FFTW_PRESERVE_INPUT (1U << 4) /* cancels FFTW_DESTROY_INPUT */ +#define FFTW_PATIENT (1U << 5) /* IMPATIENT is default */ +#define FFTW_ESTIMATE (1U << 6) +#define FFTW_WISDOM_ONLY (1U << 21) + +/* undocumented beyond-guru flags */ +#define FFTW_ESTIMATE_PATIENT (1U << 7) +#define FFTW_BELIEVE_PCOST (1U << 8) +#define FFTW_NO_DFT_R2HC (1U << 9) +#define FFTW_NO_NONTHREADED (1U << 10) +#define FFTW_NO_BUFFERING (1U << 11) +#define FFTW_NO_INDIRECT_OP (1U << 12) +#define FFTW_ALLOW_LARGE_GENERIC (1U << 13) /* NO_LARGE_GENERIC is default */ +#define FFTW_NO_RANK_SPLITS (1U << 14) +#define FFTW_NO_VRANK_SPLITS (1U << 15) +#define FFTW_NO_VRECURSE (1U << 16) +#define FFTW_NO_SIMD (1U << 17) +#define FFTW_NO_SLOW (1U << 18) +#define FFTW_NO_FIXED_RADIX_LARGE_N (1U << 19) +#define FFTW_ALLOW_PRUNING (1U << 20) + +#ifdef __cplusplus +} /* extern "C" */ +#endif /* __cplusplus */ + +#endif /* FFTW3_H */ diff --git a/lib/lattice/Lattice_transfer.h b/lib/lattice/Lattice_transfer.h index 2fa72014..cc4617de 100644 --- a/lib/lattice/Lattice_transfer.h +++ b/lib/lattice/Lattice_transfer.h @@ -349,7 +349,7 @@ void localConvert(const Lattice &in,Lattice &out) assert(ig->_ldimensions[d] == og->_ldimensions[d]); } -PARALLEL_FOR_LOOP + //PARALLEL_FOR_LOOP for(int idx=0;idxlSites();idx++){ std::vector lcoor(ni); ig->LocalIndexToLocalCoor(idx,lcoor); @@ -446,6 +446,79 @@ void ExtractSlice(Lattice &lowDim, Lattice & higherDim,int slice, in } + +template +void InsertSliceLocal(Lattice &lowDim, Lattice & higherDim,int slice_lo,int slice_hi, int orthog) +{ + typedef typename vobj::scalar_object sobj; + sobj s; + + GridBase *lg = lowDim._grid; + GridBase *hg = higherDim._grid; + int nl = lg->_ndimension; + int nh = hg->_ndimension; + + assert(nl == nh); + assert(orthog=0); + + for(int d=0;d_processors[d] == hg->_processors[d]); + assert(lg->_ldimensions[d] == hg->_ldimensions[d]); + } + + // the above should guarantee that the operations are local + //PARALLEL_FOR_LOOP + for(int idx=0;idxlSites();idx++){ + std::vector lcoor(nl); + std::vector hcoor(nh); + lg->LocalIndexToLocalCoor(idx,lcoor); + if( lcoor[orthog] == slice_lo ) { + hcoor=lcoor; + hcoor[orthog] = slice_hi; + peekLocalSite(s,lowDim,lcoor); + pokeLocalSite(s,higherDim,hcoor); + } + } +} + + +template +void ExtractSliceLocal(Lattice &lowDim, Lattice & higherDim,int slice_lo,int slice_hi, int orthog) +{ + typedef typename vobj::scalar_object sobj; + sobj s; + + GridBase *lg = lowDim._grid; + GridBase *hg = higherDim._grid; + int nl = lg->_ndimension; + int nh = hg->_ndimension; + + assert(nl == nh); + assert(orthog=0); + + for(int d=0;d_processors[d] == hg->_processors[d]); + assert(lg->_ldimensions[d] == hg->_ldimensions[d]); + } + + // the above should guarantee that the operations are local + //PARALLEL_FOR_LOOP + for(int idx=0;idxlSites();idx++){ + std::vector lcoor(nl); + std::vector hcoor(nh); + lg->LocalIndexToLocalCoor(idx,lcoor); + if( lcoor[orthog] == slice_lo ) { + hcoor=lcoor; + hcoor[orthog] = slice_hi; + peekLocalSite(s,higherDim,hcoor); + pokeLocalSite(s,lowDim,lcoor); + } + } +} + + template void Replicate(Lattice &coarse,Lattice & fine) { diff --git a/lib/parallelIO/BinaryIO.h b/lib/parallelIO/BinaryIO.h index 184209dc..5eddb57d 100644 --- a/lib/parallelIO/BinaryIO.h +++ b/lib/parallelIO/BinaryIO.h @@ -194,22 +194,22 @@ class BinaryIO { std::vector site({x,y,z,t}); - if ( grid->IsBoss() ) { - fin.read((char *)&file_object,sizeof(file_object)); - bytes += sizeof(file_object); - if(ieee32big) be32toh_v((void *)&file_object,sizeof(file_object)); - if(ieee32) le32toh_v((void *)&file_object,sizeof(file_object)); - if(ieee64big) be64toh_v((void *)&file_object,sizeof(file_object)); - if(ieee64) le64toh_v((void *)&file_object,sizeof(file_object)); + if (grid->IsBoss()) { + fin.read((char *)&file_object, sizeof(file_object)); + bytes += sizeof(file_object); + if (ieee32big) be32toh_v((void *)&file_object, sizeof(file_object)); + if (ieee32) le32toh_v((void *)&file_object, sizeof(file_object)); + if (ieee64big) be64toh_v((void *)&file_object, sizeof(file_object)); + if (ieee64) le64toh_v((void *)&file_object, sizeof(file_object)); - munge(file_object,munged,csum); + munge(file_object, munged, csum); } // The boss who read the file has their value poked pokeSite(munged,Umu,site); }}}} timer.Stop(); std::cout<IsBoss() ) { - - if(ieee32big) htobe32_v((void *)&file_object,sizeof(file_object)); - if(ieee32) htole32_v((void *)&file_object,sizeof(file_object)); - if(ieee64big) htobe64_v((void *)&file_object,sizeof(file_object)); - if(ieee64) htole64_v((void *)&file_object,sizeof(file_object)); + + if(ieee32big) htobe32_v((void *)&file_object,sizeof(file_object)); + if(ieee32) htole32_v((void *)&file_object,sizeof(file_object)); + if(ieee64big) htobe64_v((void *)&file_object,sizeof(file_object)); + if(ieee64) htole64_v((void *)&file_object,sizeof(file_object)); - // NB could gather an xstrip as an optimisation. - fout.write((char *)&file_object,sizeof(file_object)); - bytes+=sizeof(file_object); + // NB could gather an xstrip as an optimisation. + fout.write((char *)&file_object,sizeof(file_object)); + bytes+=sizeof(file_object); } }}}} timer.Stop(); std::cout<ThisRank() ){ - // std::cout << "rank" << rank<<" Getting state for index "<Broadcast(rank,(void *)&saved[0],bytes); if ( grid->IsBoss() ) { - Uint32Checksum((uint32_t *)&saved[0],bytes,csum); - fout.write((char *)&saved[0],bytes); + Uint32Checksum((uint32_t *)&saved[0],bytes,csum); + fout.write((char *)&saved[0],bytes); } } @@ -355,14 +355,14 @@ class BinaryIO { int l_idx=parallel.generator_idx(o_idx,i_idx); if ( grid->IsBoss() ) { - fin.read((char *)&saved[0],bytes); - Uint32Checksum((uint32_t *)&saved[0],bytes,csum); + fin.read((char *)&saved[0],bytes); + Uint32Checksum((uint32_t *)&saved[0],bytes,csum); } grid->Broadcast(0,(void *)&saved[0],bytes); if( rank == grid->ThisRank() ){ - parallel.SetState(saved,l_idx); + parallel.SetState(saved,l_idx); } } @@ -415,15 +415,15 @@ class BinaryIO { if ( d == 0 ) parallel[d] = 0; if (parallel[d]) { - range[d] = grid->_ldimensions[d]; - start[d] = grid->_processor_coor[d]*range[d]; - ioproc[d]= grid->_processor_coor[d]; + range[d] = grid->_ldimensions[d]; + start[d] = grid->_processor_coor[d]*range[d]; + ioproc[d]= grid->_processor_coor[d]; } else { - range[d] = grid->_gdimensions[d]; - start[d] = 0; - ioproc[d]= 0; + range[d] = grid->_gdimensions[d]; + start[d] = 0; + ioproc[d]= 0; - if ( grid->_processor_coor[d] != 0 ) IOnode = 0; + if ( grid->_processor_coor[d] != 0 ) IOnode = 0; } slice_vol = slice_vol * range[d]; } @@ -434,9 +434,9 @@ class BinaryIO { std::cout<< std::dec ; std::cout<< GridLogMessage<< "Parallel read I/O to "<< file << " with " <_ndimension;d++){ - std::cout<< range[d]; - if( d< grid->_ndimension-1 ) - std::cout<< " x "; + std::cout<< range[d]; + if( d< grid->_ndimension-1 ) + std::cout<< " x "; } std::cout << std::endl; } @@ -463,7 +463,7 @@ class BinaryIO { // need to implement these loops in Nd independent way with a lexico conversion for(int tlex=0;tlex tsite(nd); // temporary mixed up site std::vector gsite(nd); std::vector lsite(nd); @@ -472,8 +472,8 @@ class BinaryIO { Lexicographic::CoorFromIndex(tsite,tlex,range); for(int d=0;d_ldimensions[d]; // local site - gsite[d] = tsite[d]+start[d]; // global site + lsite[d] = tsite[d]%grid->_ldimensions[d]; // local site + gsite[d] = tsite[d]+start[d]; // global site } ///////////////////////// @@ -487,29 +487,29 @@ class BinaryIO { // iorank reads from the seek //////////////////////////////// if (myrank == iorank) { - - fin.seekg(offset+g_idx*sizeof(fileObj)); - fin.read((char *)&fileObj,sizeof(fileObj)); - bytes+=sizeof(fileObj); - - if(ieee32big) be32toh_v((void *)&fileObj,sizeof(fileObj)); - if(ieee32) le32toh_v((void *)&fileObj,sizeof(fileObj)); - if(ieee64big) be64toh_v((void *)&fileObj,sizeof(fileObj)); - if(ieee64) le64toh_v((void *)&fileObj,sizeof(fileObj)); - - munge(fileObj,siteObj,csum); + + fin.seekg(offset+g_idx*sizeof(fileObj)); + fin.read((char *)&fileObj,sizeof(fileObj)); + bytes+=sizeof(fileObj); + + if(ieee32big) be32toh_v((void *)&fileObj,sizeof(fileObj)); + if(ieee32) le32toh_v((void *)&fileObj,sizeof(fileObj)); + if(ieee64big) be64toh_v((void *)&fileObj,sizeof(fileObj)); + if(ieee64) le64toh_v((void *)&fileObj,sizeof(fileObj)); + + munge(fileObj,siteObj,csum); - } + } // Possibly do transport through pt2pt if ( rank != iorank ) { - if ( (myrank == rank) || (myrank==iorank) ) { - grid->SendRecvPacket((void *)&siteObj,(void *)&siteObj,iorank,rank,sizeof(siteObj)); - } + if ( (myrank == rank) || (myrank==iorank) ) { + grid->SendRecvPacket((void *)&siteObj,(void *)&siteObj,iorank,rank,sizeof(siteObj)); + } } // Poke at destination if ( myrank == rank ) { - pokeLocalSite(siteObj,Umu,lsite); + pokeLocalSite(siteObj,Umu,lsite); } grid->Barrier(); // necessary? } @@ -520,7 +520,7 @@ class BinaryIO { timer.Stop(); std::cout<_ndimension-1 ) parallel[d] = 0; if (parallel[d]) { - range[d] = grid->_ldimensions[d]; - start[d] = grid->_processor_coor[d]*range[d]; - ioproc[d]= grid->_processor_coor[d]; + range[d] = grid->_ldimensions[d]; + start[d] = grid->_processor_coor[d]*range[d]; + ioproc[d]= grid->_processor_coor[d]; } else { - range[d] = grid->_gdimensions[d]; - start[d] = 0; - ioproc[d]= 0; + range[d] = grid->_gdimensions[d]; + start[d] = 0; + ioproc[d]= 0; - if ( grid->_processor_coor[d] != 0 ) IOnode = 0; + if ( grid->_processor_coor[d] != 0 ) IOnode = 0; } slice_vol = slice_vol * range[d]; @@ -577,9 +577,9 @@ class BinaryIO { grid->GlobalSum(tmp); std::cout<< GridLogMessage<< "Parallel write I/O from "<< file << " with " <_ndimension;d++){ - std::cout<< range[d]; - if( d< grid->_ndimension-1 ) - std::cout<< " x "; + std::cout<< range[d]; + if( d< grid->_ndimension-1 ) + std::cout<< " x "; } std::cout << std::endl; } @@ -610,7 +610,7 @@ class BinaryIO { // should aggregate a whole chunk and then write. // need to implement these loops in Nd independent way with a lexico conversion for(int tlex=0;tlex tsite(nd); // temporary mixed up site std::vector gsite(nd); std::vector lsite(nd); @@ -619,8 +619,8 @@ class BinaryIO { Lexicographic::CoorFromIndex(tsite,tlex,range); for(int d=0;d_ldimensions[d]; // local site - gsite[d] = tsite[d]+start[d]; // global site + lsite[d] = tsite[d]%grid->_ldimensions[d]; // local site + gsite[d] = tsite[d]+start[d]; // global site } @@ -640,26 +640,26 @@ class BinaryIO { // Pair of nodes may need to do pt2pt send if ( rank != iorank ) { // comms is necessary - if ( (myrank == rank) || (myrank==iorank) ) { // and we have to do it - // Send to IOrank - grid->SendRecvPacket((void *)&siteObj,(void *)&siteObj,rank,iorank,sizeof(siteObj)); - } + if ( (myrank == rank) || (myrank==iorank) ) { // and we have to do it + // Send to IOrank + grid->SendRecvPacket((void *)&siteObj,(void *)&siteObj,rank,iorank,sizeof(siteObj)); + } } grid->Barrier(); // necessary? if (myrank == iorank) { - - munge(siteObj,fileObj,csum); + + munge(siteObj,fileObj,csum); - if(ieee32big) htobe32_v((void *)&fileObj,sizeof(fileObj)); - if(ieee32) htole32_v((void *)&fileObj,sizeof(fileObj)); - if(ieee64big) htobe64_v((void *)&fileObj,sizeof(fileObj)); - if(ieee64) htole64_v((void *)&fileObj,sizeof(fileObj)); - - fout.seekp(offset+g_idx*sizeof(fileObj)); - fout.write((char *)&fileObj,sizeof(fileObj)); - bytes+=sizeof(fileObj); + if(ieee32big) htobe32_v((void *)&fileObj,sizeof(fileObj)); + if(ieee32) htole32_v((void *)&fileObj,sizeof(fileObj)); + if(ieee64big) htobe64_v((void *)&fileObj,sizeof(fileObj)); + if(ieee64) htole64_v((void *)&fileObj,sizeof(fileObj)); + + fout.seekp(offset+g_idx*sizeof(fileObj)); + fout.write((char *)&fileObj,sizeof(fileObj)); + bytes+=sizeof(fileObj); } } @@ -668,7 +668,7 @@ class BinaryIO { timer.Stop(); std::cout< #include #include #include #include + +// Include representations #include +#include +#include +#include + #include + +#include + #include #include #include -#include + + #endif diff --git a/lib/qcd/action/ActionBase.h b/lib/qcd/action/ActionBase.h index 7c9f6f1c..56d6b8e0 100644 --- a/lib/qcd/action/ActionBase.h +++ b/lib/qcd/action/ActionBase.h @@ -1,87 +1,153 @@ - /************************************************************************************* +/************************************************************************************* - Grid physics library, www.github.com/paboyle/Grid +Grid physics library, www.github.com/paboyle/Grid - Source file: ./lib/qcd/action/ActionBase.h +Source file: ./lib/qcd/action/ActionBase.h - Copyright (C) 2015 +Copyright (C) 2015 Author: Peter Boyle Author: neo - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ #ifndef QCD_ACTION_BASE #define QCD_ACTION_BASE namespace Grid { -namespace QCD{ - -template -class Action { +namespace QCD { +template +class Action { public: bool is_smeared = false; // Boundary conditions? // Heatbath? - virtual void refresh(const GaugeField &U, GridParallelRNG& pRNG) = 0;// refresh pseudofermions - virtual RealD S (const GaugeField &U) = 0; // evaluate the action - virtual void deriv(const GaugeField &U,GaugeField & dSdU ) = 0; // evaluate the action derivative - virtual ~Action() {}; + virtual void refresh(const GaugeField& U, + GridParallelRNG& pRNG) = 0; // refresh pseudofermions + virtual RealD S(const GaugeField& U) = 0; // evaluate the action + virtual void deriv(const GaugeField& U, + GaugeField& dSdU) = 0; // evaluate the action derivative + virtual ~Action(){}; +}; + +// Indexing of tuple types +template +struct Index; + +template +struct Index> { + static const std::size_t value = 0; +}; + +template +struct Index> { + static const std::size_t value = 1 + Index>::value; }; -// Could derive PseudoFermion action with a PF field, FermionField, and a Grid; implement refresh /* -template -class PseudoFermionAction : public Action { +template +struct ActionLevel { public: - FermionField Phi; - GridParallelRNG &pRNG; - GridBase &Grid; + typedef Action* + ActPtr; // now force the same colours as the rest of the code - PseudoFermionAction(GridBase &_Grid,GridParallelRNG &_pRNG) : Grid(_Grid), Phi(&_Grid), pRNG(_pRNG) { - }; + //Add supported representations here - virtual void refresh(const GaugeField &gauge) { - gaussian(Phi,pRNG); - }; -}; -*/ - -template struct ActionLevel{ -public: - - typedef Action* ActPtr; // now force the same colours as the rest of the code - - int multiplier; + unsigned int multiplier; std::vector actions; - ActionLevel(int mul = 1) : multiplier(mul) { - assert (mul > 0); + ActionLevel(unsigned int mul = 1) : actions(0), multiplier(mul) { + assert(mul >= 1); }; - - void push_back(ActPtr ptr){ - actions.push_back(ptr); + + void push_back(ActPtr ptr) { actions.push_back(ptr); } +}; +*/ + +template +struct ActionLevel { + public: + unsigned int multiplier; + + // Fundamental repr actions separated because of the smearing + typedef Action* ActPtr; + + // construct a tuple of vectors of the actions for the corresponding higher + // representation fields + typedef typename AccessTypes::VectorCollection action_collection; + action_collection actions_hirep; + typedef typename AccessTypes::FieldTypeCollection action_hirep_types; + + std::vector& actions; + + // Temporary conversion between ActionLevel and ActionLevelHirep + //ActionLevelHirep(ActionLevel& AL ):actions(AL.actions), multiplier(AL.multiplier){} + + ActionLevel(unsigned int mul = 1) : actions(std::get<0>(actions_hirep)), multiplier(mul) { + // initialize the hirep vectors to zero. + //apply(this->resize, actions_hirep, 0); //need a working resize + assert(mul >= 1); + }; + + //void push_back(ActPtr ptr) { actions.push_back(ptr); } + + + + template < class Field > + void push_back(Action* ptr) { + // insert only in the correct vector + std::get< Index < Field, action_hirep_types>::value >(actions_hirep).push_back(ptr); + }; + + + + template < class ActPtr> + static void resize(ActPtr ap, unsigned int n){ + ap->resize(n); + } + + //template + //auto getRepresentation(Repr& R)->decltype(std::get(R).U) {return std::get(R).U;} + + // Loop on tuple for a callable function + template + inline typename std::enable_if::value, void>::type apply( + Callable, Repr& R,Args&...) const {} + + template + inline typename std::enable_if::value, void>::type apply( + Callable fn, Repr& R, Args&... arguments) const { + fn(std::get(actions_hirep), std::get(R.rep), arguments...); + apply(fn, R, arguments...); + } + }; -template using ActionSet = std::vector >; +//template +//using ActionSet = std::vector >; -}} +template +using ActionSet = std::vector >; + +} +} #endif diff --git a/lib/qcd/action/Actions.h b/lib/qcd/action/Actions.h index be08cdde..ba6e577d 100644 --- a/lib/qcd/action/Actions.h +++ b/lib/qcd/action/Actions.h @@ -111,17 +111,30 @@ typedef SymanzikGaugeAction ConjugateSymanzikGaugeAction #define FermOp4dVecTemplateInstantiate(A) \ template class A; \ template class A; \ + template class A; \ + template class A; \ template class A; \ template class A; +#define AdjointFermOpTemplateInstantiate(A) \ + template class A; \ + template class A; + +#define TwoIndexFermOpTemplateInstantiate(A) \ + template class A; \ + template class A; + #define FermOp5dVecTemplateInstantiate(A) \ template class A; \ - template class A; + template class A; \ + template class A; \ + template class A; #define FermOpTemplateInstantiate(A) \ FermOp4dVecTemplateInstantiate(A) \ FermOp5dVecTemplateInstantiate(A) + #define GparityFermOpTemplateInstantiate(A) //////////////////////////////////////////// @@ -138,6 +151,7 @@ typedef SymanzikGaugeAction ConjugateSymanzikGaugeAction #include #include #include +#include #include #include #include @@ -166,6 +180,14 @@ typedef WilsonFermion WilsonFermionR; typedef WilsonFermion WilsonFermionF; typedef WilsonFermion WilsonFermionD; +typedef WilsonFermion WilsonAdjFermionR; +typedef WilsonFermion WilsonAdjFermionF; +typedef WilsonFermion WilsonAdjFermionD; + +typedef WilsonFermion WilsonTwoIndexSymmetricFermionR; +typedef WilsonFermion WilsonTwoIndexSymmetricFermionF; +typedef WilsonFermion WilsonTwoIndexSymmetricFermionD; + typedef WilsonTMFermion WilsonTMFermionR; typedef WilsonTMFermion WilsonTMFermionF; typedef WilsonTMFermion WilsonTMFermionD; @@ -176,6 +198,11 @@ typedef DomainWallFermion DomainWallFermionD; typedef MobiusFermion MobiusFermionR; typedef MobiusFermion MobiusFermionF; typedef MobiusFermion MobiusFermionD; + +typedef ZMobiusFermion ZMobiusFermionR; +typedef ZMobiusFermion ZMobiusFermionF; +typedef ZMobiusFermion ZMobiusFermionD; + typedef ScaledShamirFermion ScaledShamirFermionR; typedef ScaledShamirFermion ScaledShamirFermionF; typedef ScaledShamirFermion ScaledShamirFermionD; diff --git a/lib/qcd/action/fermion/CayleyFermion5D.cc b/lib/qcd/action/fermion/CayleyFermion5D.cc index 066cede4..c4196043 100644 --- a/lib/qcd/action/fermion/CayleyFermion5D.cc +++ b/lib/qcd/action/fermion/CayleyFermion5D.cc @@ -54,18 +54,18 @@ template void CayleyFermion5D::M5D (const FermionField &psi, FermionField &chi) { int Ls=this->Ls; - std::vector diag (Ls,1.0); - std::vector upper(Ls,-1.0); upper[Ls-1]=mass; - std::vector lower(Ls,-1.0); lower[0] =mass; + std::vector diag (Ls,1.0); + std::vector upper(Ls,-1.0); upper[Ls-1]=mass; + std::vector lower(Ls,-1.0); lower[0] =mass; M5D(psi,chi,chi,lower,diag,upper); } template void CayleyFermion5D::Meooe5D (const FermionField &psi, FermionField &Din) { int Ls=this->Ls; - std::vector diag = bs; - std::vector upper= cs; - std::vector lower= cs; + std::vector diag = bs; + std::vector upper= cs; + std::vector lower= cs; upper[Ls-1]=-mass*upper[Ls-1]; lower[0] =-mass*lower[0]; M5D(psi,psi,Din,lower,diag,upper); @@ -73,9 +73,9 @@ void CayleyFermion5D::Meooe5D (const FermionField &psi, FermionField &D template void CayleyFermion5D::Meo5D (const FermionField &psi, FermionField &chi) { int Ls=this->Ls; - std::vector diag = beo; - std::vector upper(Ls); - std::vector lower(Ls); + std::vector diag = beo; + std::vector upper(Ls); + std::vector lower(Ls); for(int i=0;i void CayleyFermion5D::Mooee (const FermionField &psi, FermionField &chi) { int Ls=this->Ls; - std::vector diag = bee; - std::vector upper(Ls); - std::vector lower(Ls); + std::vector diag = bee; + std::vector upper(Ls); + std::vector lower(Ls); for(int i=0;i void CayleyFermion5D::MooeeDag (const FermionField &psi, FermionField &chi) { int Ls=this->Ls; - std::vector diag = bee; - std::vector upper(Ls); - std::vector lower(Ls); + std::vector diag = bee; + std::vector upper(Ls); + std::vector lower(Ls); for (int s=0;s void CayleyFermion5D::M5Ddag (const FermionField &psi, FermionField &chi) { int Ls=this->Ls; - std::vector diag(Ls,1.0); - std::vector upper(Ls,-1.0); - std::vector lower(Ls,-1.0); + std::vector diag(Ls,1.0); + std::vector upper(Ls,-1.0); + std::vector lower(Ls,-1.0); upper[Ls-1]=-mass*upper[Ls-1]; lower[0] =-mass*lower[0]; M5Ddag(psi,chi,chi,lower,diag,upper); @@ -141,9 +141,9 @@ template void CayleyFermion5D::MeooeDag5D (const FermionField &psi, FermionField &Din) { int Ls=this->Ls; - std::vector diag =bs; - std::vector upper=cs; - std::vector lower=cs; + std::vector diag =bs; + std::vector upper=cs; + std::vector lower=cs; upper[Ls-1]=-mass*upper[Ls-1]; lower[0] =-mass*lower[0]; M5Ddag(psi,psi,Din,lower,diag,upper); @@ -273,11 +273,21 @@ void CayleyFermion5D::MeoDeriv(GaugeField &mat,const FermionField &U,const template void CayleyFermion5D::SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD b,RealD c) { - SetCoefficientsZolotarev(1.0,zdata,b,c); + std::vector gamma(this->Ls); + for(int s=0;sLs;s++) gamma[s] = zdata->gamma[s]; + SetCoefficientsInternal(1.0,gamma,b,c); } //Zolo template void CayleyFermion5D::SetCoefficientsZolotarev(RealD zolo_hi,Approx::zolotarev_data *zdata,RealD b,RealD c) +{ + std::vector gamma(this->Ls); + for(int s=0;sLs;s++) gamma[s] = zdata->gamma[s]; + SetCoefficientsInternal(zolo_hi,gamma,b,c); +} +//Zolo +template +void CayleyFermion5D::SetCoefficientsInternal(RealD zolo_hi,std::vector & gamma,RealD b,RealD c) { int Ls=this->Ls; @@ -315,7 +325,7 @@ void CayleyFermion5D::SetCoefficientsZolotarev(RealD zolo_hi,Approx::zolot double bmc = b-c; for(int i=0; i < Ls; i++){ as[i] = 1.0; - omega[i] = ((double)zdata->gamma[i])*zolo_hi; //NB reciprocal relative to Chroma NEF code + omega[i] = gamma[i]*zolo_hi; //NB reciprocal relative to Chroma NEF code bs[i] = 0.5*(bpc/omega[i] + bmc); cs[i] = 0.5*(bpc/omega[i] - bmc); } @@ -377,7 +387,7 @@ void CayleyFermion5D::SetCoefficientsZolotarev(RealD zolo_hi,Approx::zolot } { - double delta_d=mass*cee[Ls-1]; + Coeff_t delta_d=mass*cee[Ls-1]; for(int j=0;j &lower, - std::vector &diag, - std::vector &upper); + std::vector &lower, + std::vector &diag, + std::vector &upper); void M5Ddag(const FermionField &psi, const FermionField &phi, FermionField &chi, - std::vector &lower, - std::vector &diag, - std::vector &upper); + std::vector &lower, + std::vector &diag, + std::vector &upper); void MooeeInternal(const FermionField &in, FermionField &out,int dag,int inv); virtual void Instantiatable(void)=0; @@ -91,23 +91,23 @@ namespace Grid { RealD mass; // Cayley form Moebius (tanh and zolotarev) - std::vector omega; - std::vector bs; // S dependent coeffs - std::vector cs; - std::vector as; + std::vector omega; + std::vector bs; // S dependent coeffs + std::vector cs; + std::vector as; // For preconditioning Cayley form - std::vector bee; - std::vector cee; - std::vector aee; - std::vector beo; - std::vector ceo; - std::vector aeo; + std::vector bee; + std::vector cee; + std::vector aee; + std::vector beo; + std::vector ceo; + std::vector aeo; // LDU factorisation of the eeoo matrix - std::vector lee; - std::vector leem; - std::vector uee; - std::vector ueem; - std::vector dee; + std::vector lee; + std::vector leem; + std::vector uee; + std::vector ueem; + std::vector dee; // Constructors CayleyFermion5D(GaugeField &_Umu, @@ -117,20 +117,19 @@ namespace Grid { GridRedBlackCartesian &FourDimRedBlackGrid, RealD _mass,RealD _M5,const ImplParams &p= ImplParams()); - - protected: void SetCoefficientsZolotarev(RealD zolohi,Approx::zolotarev_data *zdata,RealD b,RealD c); void SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD b,RealD c); + void SetCoefficientsInternal(RealD zolo_hi,std::vector & gamma,RealD b,RealD c); }; } } #define INSTANTIATE_DPERP(A)\ template void CayleyFermion5D< A >::M5D(const FermionField &psi,const FermionField &phi,FermionField &chi,\ - std::vector &lower,std::vector &diag,std::vector &upper); \ + std::vector &lower,std::vector &diag,std::vector &upper); \ template void CayleyFermion5D< A >::M5Ddag(const FermionField &psi,const FermionField &phi,FermionField &chi,\ - std::vector &lower,std::vector &diag,std::vector &upper); \ + std::vector &lower,std::vector &diag,std::vector &upper); \ template void CayleyFermion5D< A >::MooeeInv (const FermionField &psi, FermionField &chi); \ template void CayleyFermion5D< A >::MooeeInvDag (const FermionField &psi, FermionField &chi); diff --git a/lib/qcd/action/fermion/CayleyFermion5Dcache.cc b/lib/qcd/action/fermion/CayleyFermion5Dcache.cc index 4791e7a2..62e91dd4 100644 --- a/lib/qcd/action/fermion/CayleyFermion5Dcache.cc +++ b/lib/qcd/action/fermion/CayleyFermion5Dcache.cc @@ -43,9 +43,9 @@ template void CayleyFermion5D::M5D(const FermionField &psi, const FermionField &phi, FermionField &chi, - std::vector &lower, - std::vector &diag, - std::vector &upper) + std::vector &lower, + std::vector &diag, + std::vector &upper) { int Ls =this->Ls; GridBase *grid=psi._grid; @@ -82,9 +82,9 @@ template void CayleyFermion5D::M5Ddag(const FermionField &psi, const FermionField &phi, FermionField &chi, - std::vector &lower, - std::vector &diag, - std::vector &upper) + std::vector &lower, + std::vector &diag, + std::vector &upper) { int Ls =this->Ls; GridBase *grid=psi._grid; @@ -204,6 +204,8 @@ PARALLEL_FOR_LOOP INSTANTIATE_DPERP(WilsonImplD); INSTANTIATE_DPERP(GparityWilsonImplF); INSTANTIATE_DPERP(GparityWilsonImplD); + INSTANTIATE_DPERP(ZWilsonImplF); + INSTANTIATE_DPERP(ZWilsonImplD); #endif }} diff --git a/lib/qcd/action/fermion/CayleyFermion5Dssp.cc b/lib/qcd/action/fermion/CayleyFermion5Dssp.cc index a2c96d87..ad7daddb 100644 --- a/lib/qcd/action/fermion/CayleyFermion5Dssp.cc +++ b/lib/qcd/action/fermion/CayleyFermion5Dssp.cc @@ -43,9 +43,9 @@ template void CayleyFermion5D::M5D(const FermionField &psi, const FermionField &phi, FermionField &chi, - std::vector &lower, - std::vector &diag, - std::vector &upper) + std::vector &lower, + std::vector &diag, + std::vector &upper) { int Ls=this->Ls; for(int s=0;s void CayleyFermion5D::M5Ddag(const FermionField &psi, const FermionField &phi, FermionField &chi, - std::vector &lower, - std::vector &diag, - std::vector &upper) + std::vector &lower, + std::vector &diag, + std::vector &upper) { int Ls=this->Ls; for(int s=0;s void CayleyFermion5D::M5D(const FermionField &psi, const FermionField &phi, FermionField &chi, - std::vector &lower, - std::vector &diag, - std::vector &upper) + std::vector &lower, + std::vector &diag, + std::vector &upper) { GridBase *grid=psi._grid; int Ls = this->Ls; @@ -121,9 +121,9 @@ template void CayleyFermion5D::M5Ddag(const FermionField &psi, const FermionField &phi, FermionField &chi, - std::vector &lower, - std::vector &diag, - std::vector &upper) + std::vector &lower, + std::vector &diag, + std::vector &upper) { GridBase *grid=psi._grid; int Ls = this->Ls; @@ -194,8 +194,8 @@ void CayleyFermion5D::MooeeInternal(const FermionField &psi, FermionField chi.checkerboard=psi.checkerboard; - Eigen::MatrixXd Pplus = Eigen::MatrixXd::Zero(Ls,Ls); - Eigen::MatrixXd Pminus = Eigen::MatrixXd::Zero(Ls,Ls); + Eigen::MatrixXcd Pplus = Eigen::MatrixXcd::Zero(Ls,Ls); + Eigen::MatrixXcd Pminus = Eigen::MatrixXcd::Zero(Ls,Ls); for(int s=0;s::MooeeInternal(const FermionField &psi, FermionField Pplus (0,Ls-1) = mass*cee[0]; Pminus(Ls-1,0) = mass*cee[Ls-1]; - Eigen::MatrixXd PplusMat ; - Eigen::MatrixXd PminusMat; + Eigen::MatrixXcd PplusMat ; + Eigen::MatrixXcd PminusMat; if ( inv ) { PplusMat =Pplus.inverse(); @@ -298,8 +298,12 @@ PARALLEL_FOR_LOOP INSTANTIATE_DPERP(DomainWallVec5dImplD); INSTANTIATE_DPERP(DomainWallVec5dImplF); +INSTANTIATE_DPERP(ZDomainWallVec5dImplD); +INSTANTIATE_DPERP(ZDomainWallVec5dImplF); template void CayleyFermion5D::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv); template void CayleyFermion5D::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv); +template void CayleyFermion5D::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv); +template void CayleyFermion5D::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv); }} diff --git a/lib/qcd/action/fermion/FermionOperatorImpl.h b/lib/qcd/action/fermion/FermionOperatorImpl.h index 4126d185..910bf488 100644 --- a/lib/qcd/action/fermion/FermionOperatorImpl.h +++ b/lib/qcd/action/fermion/FermionOperatorImpl.h @@ -1,35 +1,36 @@ - /************************************************************************************* +/************************************************************************************* - Grid physics library, www.github.com/paboyle/Grid +Grid physics library, www.github.com/paboyle/Grid - Source file: ./lib/qcd/action/fermion/FermionOperatorImpl.h +Source file: ./lib/qcd/action/fermion/FermionOperatorImpl.h - Copyright (C) 2015 +Copyright (C) 2015 Author: Peter Boyle Author: Peter Boyle Author: Peter Boyle Author: paboyle - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ -#ifndef GRID_QCD_FERMION_OPERATOR_IMPL_H -#define GRID_QCD_FERMION_OPERATOR_IMPL_H +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#ifndef GRID_QCD_FERMION_OPERATOR_IMPL_H +#define GRID_QCD_FERMION_OPERATOR_IMPL_H namespace Grid { @@ -99,253 +100,281 @@ namespace Grid { typedef typename Impl::SiteSpinor SiteSpinor; \ typedef typename Impl::SiteHalfSpinor SiteHalfSpinor; \ typedef typename Impl::Compressor Compressor; \ - typedef typename Impl::StencilImpl StencilImpl; \ - typedef typename Impl::ImplParams ImplParams; + typedef typename Impl::StencilImpl StencilImpl; \ + typedef typename Impl::ImplParams ImplParams; \ + typedef typename Impl::Coeff_t Coeff_t; #define INHERIT_IMPL_TYPES(Base) \ - INHERIT_GIMPL_TYPES(Base)\ + INHERIT_GIMPL_TYPES(Base) \ INHERIT_FIMPL_TYPES(Base) - + /////// // Single flavour four spinors with colour index /////// - template - class WilsonImpl : public PeriodicGaugeImpl< GaugeImplTypes< S, Nrepresentation> > { + template + class WilsonImpl + : public PeriodicGaugeImpl > { public: + static const int Dimension = Representation::Dimension; + typedef PeriodicGaugeImpl > Gimpl; + + //Necessary? + constexpr bool is_fundamental() const{return Dimension == Nc ? 1 : 0;} const bool LsVectorised=false; + typedef _Coeff_t Coeff_t; - typedef PeriodicGaugeImpl< GaugeImplTypes< S,Nrepresentation> > Gimpl; INHERIT_GIMPL_TYPES(Gimpl); - - template using iImplSpinor = iScalar, Ns> >; - template using iImplHalfSpinor = iScalar, Nhs> >; - template using iImplDoubledGaugeField = iVector >, Nds >; - - typedef iImplSpinor SiteSpinor; - typedef iImplHalfSpinor SiteHalfSpinor; - typedef iImplDoubledGaugeField SiteDoubledGaugeField; - - typedef Lattice FermionField; + + template using iImplSpinor = iScalar, Ns> >; + template using iImplHalfSpinor = iScalar, Nhs> >; + template using iImplDoubledGaugeField = iVector >, Nds>; + + typedef iImplSpinor SiteSpinor; + typedef iImplHalfSpinor SiteHalfSpinor; + typedef iImplDoubledGaugeField SiteDoubledGaugeField; + + typedef Lattice FermionField; typedef Lattice DoubledGaugeField; - - typedef WilsonCompressor Compressor; + + typedef WilsonCompressor Compressor; typedef WilsonImplParams ImplParams; - typedef WilsonStencil StencilImpl; - + typedef WilsonStencil StencilImpl; + ImplParams Params; - - WilsonImpl(const ImplParams &p= ImplParams()) : Params(p) {}; - + + WilsonImpl(const ImplParams &p = ImplParams()) : Params(p){}; + bool overlapCommsCompute(void) { return Params.overlapCommsCompute; }; - - inline void multLink(SiteHalfSpinor &phi,const SiteDoubledGaugeField &U,const SiteHalfSpinor &chi,int mu,StencilEntry *SE,StencilImpl &St){ - mult(&phi(),&U(mu),&chi()); - } - - template - inline void loadLinkElement(Simd & reg,ref &memory){ - reg = memory; - } - inline void DoubleStore(GridBase *GaugeGrid,DoubledGaugeField &Uds,const GaugeField &Umu) - { - conformable(Uds._grid,GaugeGrid); - conformable(Umu._grid,GaugeGrid); - GaugeLinkField U(GaugeGrid); - for(int mu=0;mu(Umu,mu); - PokeIndex(Uds,U,mu); - U = adj(Cshift(U,mu,-1)); - PokeIndex(Uds,U,mu+4); - } + + inline void multLink(SiteHalfSpinor &phi, + const SiteDoubledGaugeField &U, + const SiteHalfSpinor &chi, + int mu, + StencilEntry *SE, + StencilImpl &St) { + mult(&phi(), &U(mu), &chi()); } + template + inline void loadLinkElement(Simd ®, + ref &memory) { + reg = memory; + } + + inline void DoubleStore(GridBase *GaugeGrid, + DoubledGaugeField &Uds, + const GaugeField &Umu) { + conformable(Uds._grid, GaugeGrid); + conformable(Umu._grid, GaugeGrid); + GaugeLinkField U(GaugeGrid); + for (int mu = 0; mu < Nd; mu++) { + U = PeekIndex(Umu, mu); + PokeIndex(Uds, U, mu); + U = adj(Cshift(U, mu, -1)); + PokeIndex(Uds, U, mu + 4); + } + } + inline void InsertForce4D(GaugeField &mat, FermionField &Btilde, FermionField &A,int mu){ GaugeLinkField link(mat._grid); link = TraceIndex(outerProduct(Btilde,A)); PokeIndex(mat,link,mu); } - + inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField Ã,int mu){ - + int Ls=Btilde._grid->_fdimensions[0]; - + GaugeLinkField tmp(mat._grid); tmp = zero; -PARALLEL_FOR_LOOP - for(int sss=0;sssoSites();sss++){ - int sU=sss; - for(int s=0;s(outerProduct(Btilde[sF],Atilde[sF])); // ordering here + PARALLEL_FOR_LOOP + for(int sss=0;sssoSites();sss++){ + int sU=sss; + for(int s=0;s(outerProduct(Btilde[sF],Atilde[sF])); // ordering here + } } - } PokeIndex(mat,tmp,mu); } - }; - - /////// // Single flavour four spinors with colour index, 5d redblack /////// - template + template class DomainWallVec5dImpl : public PeriodicGaugeImpl< GaugeImplTypes< S,Nrepresentation> > { public: - + + static const int Dimension = Nrepresentation; const bool LsVectorised=true; - - typedef PeriodicGaugeImpl< GaugeImplTypes< S,Nrepresentation> > Gimpl; + typedef _Coeff_t Coeff_t; + typedef PeriodicGaugeImpl > Gimpl; INHERIT_GIMPL_TYPES(Gimpl); - template using iImplSpinor = iScalar, Ns> >; - template using iImplHalfSpinor = iScalar, Nhs> >; - template using iImplDoubledGaugeField = iVector >, Nds >; - template using iImplGaugeField = iVector >, Nd >; - template using iImplGaugeLink = iScalar > >; - - typedef iImplSpinor SiteSpinor; - typedef iImplHalfSpinor SiteHalfSpinor; - typedef Lattice FermionField; - + template using iImplSpinor = iScalar, Ns> >; + template using iImplHalfSpinor = iScalar, Nhs> >; + template using iImplDoubledGaugeField = iVector >, Nds>; + template using iImplGaugeField = iVector >, Nd>; + template using iImplGaugeLink = iScalar > >; + + typedef iImplSpinor SiteSpinor; + typedef iImplHalfSpinor SiteHalfSpinor; + typedef Lattice FermionField; + // Make the doubled gauge field a *scalar* - typedef iImplDoubledGaugeField SiteDoubledGaugeField; // This is a scalar - typedef iImplGaugeField SiteScalarGaugeField; // scalar - typedef iImplGaugeLink SiteScalarGaugeLink; // scalar - - typedef Lattice DoubledGaugeField; - - typedef WilsonCompressor Compressor; + typedef iImplDoubledGaugeField + SiteDoubledGaugeField; // This is a scalar + typedef iImplGaugeField + SiteScalarGaugeField; // scalar + typedef iImplGaugeLink + SiteScalarGaugeLink; // scalar + + typedef Lattice DoubledGaugeField; + + typedef WilsonCompressor Compressor; typedef WilsonImplParams ImplParams; - typedef WilsonStencil StencilImpl; - + typedef WilsonStencil StencilImpl; + ImplParams Params; - - DomainWallVec5dImpl(const ImplParams &p= ImplParams()) : Params(p) {}; - + + DomainWallVec5dImpl(const ImplParams &p = ImplParams()) : Params(p){}; + bool overlapCommsCompute(void) { return false; }; - - template - inline void loadLinkElement(Simd & reg,ref &memory){ - vsplat(reg,memory); + + template + inline void loadLinkElement(Simd ®, ref &memory) { + vsplat(reg, memory); } - inline void multLink(SiteHalfSpinor &phi,const SiteDoubledGaugeField &U,const SiteHalfSpinor &chi,int mu,StencilEntry *SE,StencilImpl &St) - { + inline void multLink(SiteHalfSpinor &phi, const SiteDoubledGaugeField &U, + const SiteHalfSpinor &chi, int mu, StencilEntry *SE, + StencilImpl &St) { SiteGaugeLink UU; - for(int i=0;i(Umu,mu); - U = adj(Cshift(U,mu,-1)); - PokeIndex(Uadj,U,mu); - } - - for(int lidx=0;lidxlSites();lidx++){ - std::vector lcoor; - GaugeGrid->LocalIndexToLocalCoor(lidx,lcoor); - - peekLocalSite(ScalarUmu,Umu,lcoor); - for(int mu=0;mu<4;mu++) ScalarUds(mu) = ScalarUmu(mu); - - peekLocalSite(ScalarUmu,Uadj,lcoor); - for(int mu=0;mu<4;mu++) ScalarUds(mu+4) = ScalarUmu(mu); - - pokeLocalSite(ScalarUds,Uds,lcoor); - } - - } - inline void InsertForce4D(GaugeField &mat, FermionField &Btilde, FermionField &A,int mu){ - assert(0); - } - - inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField Ã,int mu){ + GaugeLinkField U(Umu._grid); + GaugeField Uadj(Umu._grid); + for (int mu = 0; mu < Nd; mu++) { + U = PeekIndex(Umu, mu); + U = adj(Cshift(U, mu, -1)); + PokeIndex(Uadj, U, mu); + } + + for (int lidx = 0; lidx < GaugeGrid->lSites(); lidx++) { + std::vector lcoor; + GaugeGrid->LocalIndexToLocalCoor(lidx, lcoor); + + peekLocalSite(ScalarUmu, Umu, lcoor); + for (int mu = 0; mu < 4; mu++) ScalarUds(mu) = ScalarUmu(mu); + + peekLocalSite(ScalarUmu, Uadj, lcoor); + for (int mu = 0; mu < 4; mu++) ScalarUds(mu + 4) = ScalarUmu(mu); + + pokeLocalSite(ScalarUds, Uds, lcoor); + } + } + + inline void InsertForce4D(GaugeField &mat, FermionField &Btilde, + FermionField &A, int mu) { + assert(0); + } + + inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, + FermionField Ã, int mu) { assert(0); } - }; - - + //////////////////////////////////////////////////////////////////////////////////////// // Flavour doubled spinors; is Gparity the only? what about C*? //////////////////////////////////////////////////////////////////////////////////////// - - template - class GparityWilsonImpl : public ConjugateGaugeImpl< GaugeImplTypes >{ + + template + class GparityWilsonImpl + : public ConjugateGaugeImpl > { public: + static const int Dimension = Nrepresentation; const bool LsVectorised=false; + typedef _Coeff_t Coeff_t; typedef ConjugateGaugeImpl< GaugeImplTypes > Gimpl; - + INHERIT_GIMPL_TYPES(Gimpl); - - template using iImplSpinor = iVector, Ns>, Ngp >; - template using iImplHalfSpinor = iVector, Nhs>, Ngp >; - template using iImplDoubledGaugeField = iVector >, Nds >, Ngp >; - - typedef iImplSpinor SiteSpinor; - typedef iImplHalfSpinor SiteHalfSpinor; - typedef iImplDoubledGaugeField SiteDoubledGaugeField; - - typedef Lattice FermionField; + + template + using iImplSpinor = + iVector, Ns>, Ngp>; + template + using iImplHalfSpinor = + iVector, Nhs>, Ngp>; + template + using iImplDoubledGaugeField = + iVector >, Nds>, Ngp>; + + typedef iImplSpinor SiteSpinor; + typedef iImplHalfSpinor SiteHalfSpinor; + typedef iImplDoubledGaugeField SiteDoubledGaugeField; + + typedef Lattice FermionField; typedef Lattice DoubledGaugeField; - - typedef WilsonCompressor Compressor; - typedef WilsonStencil StencilImpl; + + typedef WilsonCompressor Compressor; + typedef WilsonStencil StencilImpl; typedef GparityWilsonImplParams ImplParams; - + ImplParams Params; - GparityWilsonImpl(const ImplParams &p= ImplParams()) : Params(p) {}; - + + GparityWilsonImpl(const ImplParams &p = ImplParams()) : Params(p){}; + bool overlapCommsCompute(void) { return Params.overlapCommsCompute; }; - // provide the multiply by link that is differentiated between Gparity (with flavour index) and non-Gparity - inline void multLink(SiteHalfSpinor &phi,const SiteDoubledGaugeField &U,const SiteHalfSpinor &chi,int mu,StencilEntry *SE,StencilImpl &St){ - + // provide the multiply by link that is differentiated between Gparity (with + // flavour index) and non-Gparity + inline void multLink(SiteHalfSpinor &phi, const SiteDoubledGaugeField &U, + const SiteHalfSpinor &chi, int mu, StencilEntry *SE, + StencilImpl &St) { typedef SiteHalfSpinor vobj; typedef typename SiteHalfSpinor::scalar_object sobj; - + vobj vtmp; sobj stmp; GridBase *grid = St._grid; - + const int Nsimd = grid->Nsimd(); - int direction = St._directions[mu]; - int distance = St._distances[mu]; - int ptype = St._permute_type[mu]; - int sl = St._grid->_simd_layout[direction]; - + int direction = St._directions[mu]; + int distance = St._distances[mu]; + int ptype = St._permute_type[mu]; + int sl = St._grid->_simd_layout[direction]; + // Fixme X.Y.Z.T hardcode in stencil - int mmu = mu % Nd; - + int mmu = mu % Nd; + // assert our assumptions - assert((distance==1)||(distance==-1)); // nearest neighbour stencil hard code - assert((sl==1)||(sl==2)); + assert((distance == 1) || (distance == -1)); // nearest neighbour stencil hard code + assert((sl == 1) || (sl == 2)); std::vector icoor; - + if ( SE->_around_the_world && Params.twists[mmu] ) { if ( sl == 2 ) { @@ -386,7 +415,7 @@ PARALLEL_FOR_LOOP mult(&phi(1),&U(1)(mu),&chi(1)); } - } + } inline void DoubleStore(GridBase *GaugeGrid,DoubledGaugeField &Uds,const GaugeField &Umu) { @@ -399,7 +428,7 @@ PARALLEL_FOR_LOOP GaugeLinkField Uconj(GaugeGrid); Lattice > coor(GaugeGrid); - + for(int mu=0;mu(Umu,mu); Uconj = conjugate(U); - + // This phase could come from a simple bc 1,1,-1,1 .. int neglink = GaugeGrid->GlobalDimensions()[mu]-1; if ( Params.twists[mu] ) { Uconj = where(coor==neglink,-Uconj,Uconj); } - -PARALLEL_FOR_LOOP - for(auto ss=U.begin();ss(outerProduct(Btilde[ss],A[ss])); - link[ss]() = ttmp(0,0) + conjugate(ttmp(1,1)) ; - } - PokeIndex(mat,link,mu); + auto tmp = TraceIndex(outerProduct(Btilde, A)); + PARALLEL_FOR_LOOP + for (auto ss = tmp.begin(); ss < tmp.end(); ss++) { + link[ss]() = tmp[ss](0, 0) - conjugate(tmp[ss](1, 1)); + } + PokeIndex(mat, link, mu); return; } - inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField Ã,int mu){ - - int Ls=Btilde._grid->_fdimensions[0]; - + + inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, + FermionField Ã, int mu) { + int Ls = Btilde._grid->_fdimensions[0]; + GaugeLinkField tmp(mat._grid); tmp = zero; -PARALLEL_FOR_LOOP - for(int ss=0;ssoSites();ss++){ - for(int s=0;s(outerProduct(Btilde[sF],Atilde[sF])); - tmp[ss]() = tmp[ss]()+ ttmp(0,0) + conjugate(ttmp(1,1)); + PARALLEL_FOR_LOOP + for (int ss = 0; ss < tmp._grid->oSites(); ss++) { + for (int s = 0; s < Ls; s++) { + int sF = s + Ls * ss; + auto ttmp = traceIndex(outerProduct(Btilde[sF], Atilde[sF])); + tmp[ss]() = tmp[ss]() + ttmp(0, 0) + conjugate(ttmp(1, 1)); + } } - } - PokeIndex(mat,tmp,mu); + PokeIndex(mat, tmp, mu); return; } }; - typedef WilsonImpl WilsonImplR; // Real.. whichever prec - typedef WilsonImpl WilsonImplF; // Float - typedef WilsonImpl WilsonImplD; // Double + typedef WilsonImpl WilsonImplR; // Real.. whichever prec + typedef WilsonImpl WilsonImplF; // Float + typedef WilsonImpl WilsonImplD; // Double + + + typedef WilsonImpl ZWilsonImplR; // Real.. whichever prec + typedef WilsonImpl ZWilsonImplF; // Float + typedef WilsonImpl ZWilsonImplD; // Double + + typedef WilsonImpl WilsonAdjImplR; // Real.. whichever prec + typedef WilsonImpl WilsonAdjImplF; // Float + typedef WilsonImpl WilsonAdjImplD; // Double + + typedef WilsonImpl WilsonTwoIndexSymmetricImplR; // Real.. whichever prec + typedef WilsonImpl WilsonTwoIndexSymmetricImplF; // Float + typedef WilsonImpl WilsonTwoIndexSymmetricImplD; // Double typedef DomainWallVec5dImpl DomainWallVec5dImplR; // Real.. whichever prec typedef DomainWallVec5dImpl DomainWallVec5dImplF; // Float typedef DomainWallVec5dImpl DomainWallVec5dImplD; // Double + + typedef DomainWallVec5dImpl ZDomainWallVec5dImplR; // Real.. whichever prec + typedef DomainWallVec5dImpl ZDomainWallVec5dImplF; // Float + typedef DomainWallVec5dImpl ZDomainWallVec5dImplD; // Double - typedef GparityWilsonImpl GparityWilsonImplR; // Real.. whichever prec - typedef GparityWilsonImpl GparityWilsonImplF; // Float - typedef GparityWilsonImpl GparityWilsonImplD; // Double - - } + typedef GparityWilsonImpl GparityWilsonImplR; // Real.. whichever prec + typedef GparityWilsonImpl GparityWilsonImplF; // Float + typedef GparityWilsonImpl GparityWilsonImplD; // Double +} } #endif diff --git a/lib/qcd/action/fermion/WilsonFermion.cc b/lib/qcd/action/fermion/WilsonFermion.cc index 59632409..d887e05b 100644 --- a/lib/qcd/action/fermion/WilsonFermion.cc +++ b/lib/qcd/action/fermion/WilsonFermion.cc @@ -1,319 +1,315 @@ - /************************************************************************************* +/************************************************************************************* - Grid physics library, www.github.com/paboyle/Grid +Grid physics library, www.github.com/paboyle/Grid - Source file: ./lib/qcd/action/fermion/WilsonFermion.cc +Source file: ./lib/qcd/action/fermion/WilsonFermion.cc - Copyright (C) 2015 +Copyright (C) 2015 Author: Peter Boyle Author: Peter Boyle Author: Peter Boyle Author: paboyle - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ #include namespace Grid { namespace QCD { - const std::vector WilsonFermionStatic::directions ({0,1,2,3, 0, 1, 2, 3}); - const std::vector WilsonFermionStatic::displacements({1,1,1,1,-1,-1,-1,-1}); - int WilsonFermionStatic::HandOptDslash; +const std::vector WilsonFermionStatic::directions({0, 1, 2, 3, 0, 1, 2, + 3}); +const std::vector WilsonFermionStatic::displacements({1, 1, 1, 1, -1, -1, + -1, -1}); +int WilsonFermionStatic::HandOptDslash; - ///////////////////////////////// - // Constructor and gauge import - ///////////////////////////////// +///////////////////////////////// +// Constructor and gauge import +///////////////////////////////// - template - WilsonFermion::WilsonFermion(GaugeField &_Umu, - GridCartesian &Fgrid, - GridRedBlackCartesian &Hgrid, - RealD _mass,const ImplParams &p) : - Kernels(p), - _grid(&Fgrid), - _cbgrid(&Hgrid), - Stencil (&Fgrid,npoint,Even,directions,displacements), - StencilEven(&Hgrid,npoint,Even,directions,displacements), // source is Even - StencilOdd (&Hgrid,npoint,Odd ,directions,displacements), // source is Odd - mass(_mass), - Lebesgue(_grid), - LebesgueEvenOdd(_cbgrid), - Umu(&Fgrid), - UmuEven(&Hgrid), - UmuOdd (&Hgrid) - { - // Allocate the required comms buffer - ImportGauge(_Umu); +template +WilsonFermion::WilsonFermion(GaugeField &_Umu, GridCartesian &Fgrid, + GridRedBlackCartesian &Hgrid, RealD _mass, + const ImplParams &p) + : Kernels(p), + _grid(&Fgrid), + _cbgrid(&Hgrid), + Stencil(&Fgrid, npoint, Even, directions, displacements), + StencilEven(&Hgrid, npoint, Even, directions, + displacements), // source is Even + StencilOdd(&Hgrid, npoint, Odd, directions, + displacements), // source is Odd + mass(_mass), + Lebesgue(_grid), + LebesgueEvenOdd(_cbgrid), + Umu(&Fgrid), + UmuEven(&Hgrid), + UmuOdd(&Hgrid) { + // Allocate the required comms buffer + ImportGauge(_Umu); +} + +template +void WilsonFermion::ImportGauge(const GaugeField &_Umu) { + GaugeField HUmu(_Umu._grid); + HUmu = _Umu * (-0.5); + Impl::DoubleStore(GaugeGrid(), Umu, HUmu); + pickCheckerboard(Even, UmuEven, Umu); + pickCheckerboard(Odd, UmuOdd, Umu); +} + +///////////////////////////// +// Implement the interface +///////////////////////////// + +template +RealD WilsonFermion::M(const FermionField &in, FermionField &out) { + out.checkerboard = in.checkerboard; + Dhop(in, out, DaggerNo); + return axpy_norm(out, 4 + mass, in, out); +} + +template +RealD WilsonFermion::Mdag(const FermionField &in, FermionField &out) { + out.checkerboard = in.checkerboard; + Dhop(in, out, DaggerYes); + return axpy_norm(out, 4 + mass, in, out); +} + +template +void WilsonFermion::Meooe(const FermionField &in, FermionField &out) { + if (in.checkerboard == Odd) { + DhopEO(in, out, DaggerNo); + } else { + DhopOE(in, out, DaggerNo); } - - template - void WilsonFermion::ImportGauge(const GaugeField &_Umu) - { - GaugeField HUmu(_Umu._grid); - HUmu = _Umu*(-0.5); - Impl::DoubleStore(GaugeGrid(),Umu,HUmu); - pickCheckerboard(Even,UmuEven,Umu); - pickCheckerboard(Odd ,UmuOdd,Umu); - } - - ///////////////////////////// - // Implement the interface - ///////////////////////////// - - template - RealD WilsonFermion::M(const FermionField &in, FermionField &out) - { - out.checkerboard=in.checkerboard; - Dhop(in,out,DaggerNo); - return axpy_norm(out,4+mass,in,out); +} +template +void WilsonFermion::MeooeDag(const FermionField &in, FermionField &out) { + if (in.checkerboard == Odd) { + DhopEO(in, out, DaggerYes); + } else { + DhopOE(in, out, DaggerYes); } +} - template - RealD WilsonFermion::Mdag(const FermionField &in, FermionField &out) - { - out.checkerboard=in.checkerboard; - Dhop(in,out,DaggerYes); - return axpy_norm(out,4+mass,in,out); - } +template +void WilsonFermion::Mooee(const FermionField &in, FermionField &out) { + out.checkerboard = in.checkerboard; + typename FermionField::scalar_type scal(4.0 + mass); + out = scal * in; +} - template - void WilsonFermion::Meooe(const FermionField &in, FermionField &out) - { - if ( in.checkerboard == Odd ) { - DhopEO(in,out,DaggerNo); - } else { - DhopOE(in,out,DaggerNo); - } - } - template - void WilsonFermion::MeooeDag(const FermionField &in, FermionField &out) - { - if ( in.checkerboard == Odd ) { - DhopEO(in,out,DaggerYes); - } else { - DhopOE(in,out,DaggerYes); +template +void WilsonFermion::MooeeDag(const FermionField &in, FermionField &out) { + out.checkerboard = in.checkerboard; + Mooee(in, out); +} + +template +void WilsonFermion::MooeeInv(const FermionField &in, FermionField &out) { + out.checkerboard = in.checkerboard; + out = (1.0 / (4.0 + mass)) * in; +} + +template +void WilsonFermion::MooeeInvDag(const FermionField &in, + FermionField &out) { + out.checkerboard = in.checkerboard; + MooeeInv(in, out); +} + +/////////////////////////////////// +// Internal +/////////////////////////////////// + +template +void WilsonFermion::DerivInternal(StencilImpl &st, DoubledGaugeField &U, + GaugeField &mat, const FermionField &A, + const FermionField &B, int dag) { + assert((dag == DaggerNo) || (dag == DaggerYes)); + + Compressor compressor(dag); + + FermionField Btilde(B._grid); + FermionField Atilde(B._grid); + Atilde = A; + + st.HaloExchange(B, compressor); + + for (int mu = 0; mu < Nd; mu++) { + //////////////////////////////////////////////////////////////////////// + // Flip gamma (1+g)<->(1-g) if dag + //////////////////////////////////////////////////////////////////////// + int gamma = mu; + if (!dag) gamma += Nd; + + //////////////////////// + // Call the single hop + //////////////////////// + PARALLEL_FOR_LOOP + for (int sss = 0; sss < B._grid->oSites(); sss++) { + Kernels::DiracOptDhopDir(st, U, st.comm_buf, sss, sss, B, Btilde, mu, + gamma); + } + + ////////////////////////////////////////////////// + // spin trace outer product + ////////////////////////////////////////////////// + Impl::InsertForce4D(mat, Btilde, Atilde, mu); + } +} + +template +void WilsonFermion::DhopDeriv(GaugeField &mat, const FermionField &U, + const FermionField &V, int dag) { + conformable(U._grid, _grid); + conformable(U._grid, V._grid); + conformable(U._grid, mat._grid); + + mat.checkerboard = U.checkerboard; + + DerivInternal(Stencil, Umu, mat, U, V, dag); +} + +template +void WilsonFermion::DhopDerivOE(GaugeField &mat, const FermionField &U, + const FermionField &V, int dag) { + conformable(U._grid, _cbgrid); + conformable(U._grid, V._grid); + conformable(U._grid, mat._grid); + + assert(V.checkerboard == Even); + assert(U.checkerboard == Odd); + mat.checkerboard = Odd; + + DerivInternal(StencilEven, UmuOdd, mat, U, V, dag); +} + +template +void WilsonFermion::DhopDerivEO(GaugeField &mat, const FermionField &U, + const FermionField &V, int dag) { + conformable(U._grid, _cbgrid); + conformable(U._grid, V._grid); + conformable(U._grid, mat._grid); + + assert(V.checkerboard == Odd); + assert(U.checkerboard == Even); + mat.checkerboard = Even; + + DerivInternal(StencilOdd, UmuEven, mat, U, V, dag); +} + +template +void WilsonFermion::Dhop(const FermionField &in, FermionField &out, + int dag) { + conformable(in._grid, _grid); // verifies full grid + conformable(in._grid, out._grid); + + out.checkerboard = in.checkerboard; + + DhopInternal(Stencil, Lebesgue, Umu, in, out, dag); +} + +template +void WilsonFermion::DhopOE(const FermionField &in, FermionField &out, + int dag) { + conformable(in._grid, _cbgrid); // verifies half grid + conformable(in._grid, out._grid); // drops the cb check + + assert(in.checkerboard == Even); + out.checkerboard = Odd; + + DhopInternal(StencilEven, LebesgueEvenOdd, UmuOdd, in, out, dag); +} + +template +void WilsonFermion::DhopEO(const FermionField &in, FermionField &out, + int dag) { + conformable(in._grid, _cbgrid); // verifies half grid + conformable(in._grid, out._grid); // drops the cb check + + assert(in.checkerboard == Odd); + out.checkerboard = Even; + + DhopInternal(StencilOdd, LebesgueEvenOdd, UmuEven, in, out, dag); +} + +template +void WilsonFermion::Mdir(const FermionField &in, FermionField &out, + int dir, int disp) { + DhopDir(in, out, dir, disp); +} + +template +void WilsonFermion::DhopDir(const FermionField &in, FermionField &out, + int dir, int disp) { + int skip = (disp == 1) ? 0 : 1; + int dirdisp = dir + skip * 4; + int gamma = dir + (1 - skip) * 4; + + DhopDirDisp(in, out, dirdisp, gamma, DaggerNo); +}; + +template +void WilsonFermion::DhopDirDisp(const FermionField &in, FermionField &out, + int dirdisp, int gamma, int dag) { + Compressor compressor(dag); + + Stencil.HaloExchange(in, compressor); + + PARALLEL_FOR_LOOP + for (int sss = 0; sss < in._grid->oSites(); sss++) { + Kernels::DiracOptDhopDir(Stencil, Umu, Stencil.comm_buf, sss, sss, in, out, + dirdisp, gamma); + } +}; + +template +void WilsonFermion::DhopInternal(StencilImpl &st, LebesgueOrder &lo, + DoubledGaugeField &U, + const FermionField &in, + FermionField &out, int dag) { + assert((dag == DaggerNo) || (dag == DaggerYes)); + + Compressor compressor(dag); + st.HaloExchange(in, compressor); + + if (dag == DaggerYes) { + PARALLEL_FOR_LOOP + for (int sss = 0; sss < in._grid->oSites(); sss++) { + Kernels::DiracOptDhopSiteDag(st, lo, U, st.comm_buf, sss, sss, 1, 1, in, + out); + } + } else { + PARALLEL_FOR_LOOP + for (int sss = 0; sss < in._grid->oSites(); sss++) { + Kernels::DiracOptDhopSite(st, lo, U, st.comm_buf, sss, sss, 1, 1, in, + out); } } +}; - template - void WilsonFermion::Mooee(const FermionField &in, FermionField &out) { - out.checkerboard = in.checkerboard; - typename FermionField::scalar_type scal(4.0+mass); - out = scal*in; - } - - template - void WilsonFermion::MooeeDag(const FermionField &in, FermionField &out) { - out.checkerboard = in.checkerboard; - Mooee(in,out); - } - - template - void WilsonFermion::MooeeInv(const FermionField &in, FermionField &out) { - out.checkerboard = in.checkerboard; - out = (1.0/(4.0+mass))*in; - } - - template - void WilsonFermion::MooeeInvDag(const FermionField &in, FermionField &out) { - out.checkerboard = in.checkerboard; - MooeeInv(in,out); - } - - /////////////////////////////////// - // Internal - /////////////////////////////////// - - template - void WilsonFermion::DerivInternal(StencilImpl & st, - DoubledGaugeField & U, - GaugeField &mat, - const FermionField &A, - const FermionField &B,int dag) { - - assert((dag==DaggerNo) ||(dag==DaggerYes)); - - Compressor compressor(dag); - - FermionField Btilde(B._grid); - FermionField Atilde(B._grid); - Atilde = A; - - st.HaloExchange(B,compressor); - - for(int mu=0;mu(1-g) if dag - //////////////////////////////////////////////////////////////////////// - int gamma = mu; - if ( !dag ) gamma+= Nd; - - //////////////////////// - // Call the single hop - //////////////////////// -PARALLEL_FOR_LOOP - for(int sss=0;sssoSites();sss++){ - Kernels::DiracOptDhopDir(st,U,st.comm_buf,sss,sss,B,Btilde,mu,gamma); - } - - ////////////////////////////////////////////////// - // spin trace outer product - ////////////////////////////////////////////////// - Impl::InsertForce4D(mat,Btilde,Atilde,mu); - - } - } - - template - void WilsonFermion::DhopDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag) - { - conformable(U._grid,_grid); - conformable(U._grid,V._grid); - conformable(U._grid,mat._grid); - - mat.checkerboard = U.checkerboard; - - DerivInternal(Stencil,Umu,mat,U,V,dag); - } - - template - void WilsonFermion::DhopDerivOE(GaugeField &mat,const FermionField &U,const FermionField &V,int dag) - { - conformable(U._grid,_cbgrid); - conformable(U._grid,V._grid); - conformable(U._grid,mat._grid); - - assert(V.checkerboard==Even); - assert(U.checkerboard==Odd); - mat.checkerboard = Odd; - - DerivInternal(StencilEven,UmuOdd,mat,U,V,dag); - } - - template - void WilsonFermion::DhopDerivEO(GaugeField &mat,const FermionField &U,const FermionField &V,int dag) - { - conformable(U._grid,_cbgrid); - conformable(U._grid,V._grid); - conformable(U._grid,mat._grid); - - assert(V.checkerboard==Odd); - assert(U.checkerboard==Even); - mat.checkerboard = Even; - - DerivInternal(StencilOdd,UmuEven,mat,U,V,dag); - } - - - template - void WilsonFermion::Dhop(const FermionField &in, FermionField &out,int dag) { - conformable(in._grid,_grid); // verifies full grid - conformable(in._grid,out._grid); - - out.checkerboard = in.checkerboard; - - DhopInternal(Stencil,Lebesgue,Umu,in,out,dag); - } - - template - void WilsonFermion::DhopOE(const FermionField &in, FermionField &out,int dag) { - conformable(in._grid,_cbgrid); // verifies half grid - conformable(in._grid,out._grid); // drops the cb check - - assert(in.checkerboard==Even); - out.checkerboard = Odd; - - DhopInternal(StencilEven,LebesgueEvenOdd,UmuOdd,in,out,dag); - } - - template - void WilsonFermion::DhopEO(const FermionField &in, FermionField &out,int dag) { - conformable(in._grid,_cbgrid); // verifies half grid - conformable(in._grid,out._grid); // drops the cb check - - assert(in.checkerboard==Odd); - out.checkerboard = Even; - - DhopInternal(StencilOdd,LebesgueEvenOdd,UmuEven,in,out,dag); - } - - template - void WilsonFermion::Mdir (const FermionField &in, FermionField &out,int dir,int disp) { - DhopDir(in,out,dir,disp); - } - - - template - void WilsonFermion::DhopDir(const FermionField &in, FermionField &out,int dir,int disp){ - - int skip = (disp==1) ? 0 : 1; - int dirdisp = dir+skip*4; - int gamma = dir+(1-skip)*4; - - DhopDirDisp(in,out,dirdisp,gamma,DaggerNo); - - }; - - template - void WilsonFermion::DhopDirDisp(const FermionField &in, FermionField &out,int dirdisp,int gamma,int dag) { - - Compressor compressor(dag); - - Stencil.HaloExchange(in,compressor); - -PARALLEL_FOR_LOOP - for(int sss=0;sssoSites();sss++){ - Kernels::DiracOptDhopDir(Stencil,Umu,Stencil.comm_buf,sss,sss,in,out,dirdisp,gamma); - } - - }; - - template - void WilsonFermion::DhopInternal(StencilImpl & st,LebesgueOrder& lo,DoubledGaugeField & U, - const FermionField &in, FermionField &out,int dag) - { - assert((dag==DaggerNo) ||(dag==DaggerYes)); - - Compressor compressor(dag); - st.HaloExchange(in,compressor); - - if ( dag == DaggerYes ) { -PARALLEL_FOR_LOOP - for(int sss=0;sssoSites();sss++){ - Kernels::DiracOptDhopSiteDag(st,lo,U,st.comm_buf,sss,sss,1,1,in,out); - } - } else { -PARALLEL_FOR_LOOP - for(int sss=0;sssoSites();sss++){ - Kernels::DiracOptDhopSite(st,lo,U,st.comm_buf,sss,sss,1,1,in,out); - } - } - }; - - - FermOpTemplateInstantiate(WilsonFermion); - GparityFermOpTemplateInstantiate(WilsonFermion); - - -}} - - - +FermOpTemplateInstantiate(WilsonFermion); +AdjointFermOpTemplateInstantiate(WilsonFermion); +TwoIndexFermOpTemplateInstantiate(WilsonFermion); +GparityFermOpTemplateInstantiate(WilsonFermion); +} +} diff --git a/lib/qcd/action/fermion/WilsonFermion.h b/lib/qcd/action/fermion/WilsonFermion.h index 3de2cac4..40616073 100644 --- a/lib/qcd/action/fermion/WilsonFermion.h +++ b/lib/qcd/action/fermion/WilsonFermion.h @@ -1,161 +1,155 @@ - /************************************************************************************* +/************************************************************************************* - Grid physics library, www.github.com/paboyle/Grid +Grid physics library, www.github.com/paboyle/Grid - Source file: ./lib/qcd/action/fermion/WilsonFermion.h +Source file: ./lib/qcd/action/fermion/WilsonFermion.h - Copyright (C) 2015 +Copyright (C) 2015 Author: Peter Boyle Author: Peter Boyle Author: paboyle - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ -#ifndef GRID_QCD_WILSON_FERMION_H -#define GRID_QCD_WILSON_FERMION_H +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#ifndef GRID_QCD_WILSON_FERMION_H +#define GRID_QCD_WILSON_FERMION_H namespace Grid { - namespace QCD { +namespace QCD { - class WilsonFermionStatic { - public: - static int HandOptDslash; // these are a temporary hack - static int MortonOrder; - static const std::vector directions ; - static const std::vector displacements; - static const int npoint=8; - }; +class WilsonFermionStatic { + public: + static int HandOptDslash; // these are a temporary hack + static int MortonOrder; + static const std::vector directions; + static const std::vector displacements; + static const int npoint = 8; +}; - template - class WilsonFermion : public WilsonKernels, public WilsonFermionStatic - { - public: - INHERIT_IMPL_TYPES(Impl); - typedef WilsonKernels Kernels; +template +class WilsonFermion : public WilsonKernels, public WilsonFermionStatic { + public: + INHERIT_IMPL_TYPES(Impl); + typedef WilsonKernels Kernels; - /////////////////////////////////////////////////////////////// - // Implement the abstract base - /////////////////////////////////////////////////////////////// - GridBase *GaugeGrid(void) { return _grid ;} - GridBase *GaugeRedBlackGrid(void) { return _cbgrid ;} - GridBase *FermionGrid(void) { return _grid;} - GridBase *FermionRedBlackGrid(void) { return _cbgrid;} + /////////////////////////////////////////////////////////////// + // Implement the abstract base + /////////////////////////////////////////////////////////////// + GridBase *GaugeGrid(void) { return _grid; } + GridBase *GaugeRedBlackGrid(void) { return _cbgrid; } + GridBase *FermionGrid(void) { return _grid; } + GridBase *FermionRedBlackGrid(void) { return _cbgrid; } - ////////////////////////////////////////////////////////////////// - // override multiply; cut number routines if pass dagger argument - // and also make interface more uniformly consistent - ////////////////////////////////////////////////////////////////// - RealD M(const FermionField &in, FermionField &out); - RealD Mdag(const FermionField &in, FermionField &out); + ////////////////////////////////////////////////////////////////// + // override multiply; cut number routines if pass dagger argument + // and also make interface more uniformly consistent + ////////////////////////////////////////////////////////////////// + RealD M(const FermionField &in, FermionField &out); + RealD Mdag(const FermionField &in, FermionField &out); - ///////////////////////////////////////////////////////// - // half checkerboard operations - // could remain virtual so we can derive Clover from Wilson base - ///////////////////////////////////////////////////////// - void Meooe(const FermionField &in, FermionField &out) ; - void MeooeDag(const FermionField &in, FermionField &out) ; + ///////////////////////////////////////////////////////// + // half checkerboard operations + // could remain virtual so we can derive Clover from Wilson base + ///////////////////////////////////////////////////////// + void Meooe(const FermionField &in, FermionField &out); + void MeooeDag(const FermionField &in, FermionField &out); - // allow override for twisted mass and clover - virtual void Mooee(const FermionField &in, FermionField &out) ; - virtual void MooeeDag(const FermionField &in, FermionField &out) ; - virtual void MooeeInv(const FermionField &in, FermionField &out) ; - virtual void MooeeInvDag(const FermionField &in, FermionField &out) ; + // allow override for twisted mass and clover + virtual void Mooee(const FermionField &in, FermionField &out); + virtual void MooeeDag(const FermionField &in, FermionField &out); + virtual void MooeeInv(const FermionField &in, FermionField &out); + virtual void MooeeInvDag(const FermionField &in, FermionField &out); - //////////////////////// - // Derivative interface - //////////////////////// - // Interface calls an internal routine - void DhopDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag); - void DhopDerivOE(GaugeField &mat,const FermionField &U,const FermionField &V,int dag); - void DhopDerivEO(GaugeField &mat,const FermionField &U,const FermionField &V,int dag); + //////////////////////// + // Derivative interface + //////////////////////// + // Interface calls an internal routine + void DhopDeriv(GaugeField &mat, const FermionField &U, const FermionField &V, + int dag); + void DhopDerivOE(GaugeField &mat, const FermionField &U, + const FermionField &V, int dag); + void DhopDerivEO(GaugeField &mat, const FermionField &U, + const FermionField &V, int dag); + + /////////////////////////////////////////////////////////////// + // non-hermitian hopping term; half cb or both + /////////////////////////////////////////////////////////////// + void Dhop(const FermionField &in, FermionField &out, int dag); + void DhopOE(const FermionField &in, FermionField &out, int dag); + void DhopEO(const FermionField &in, FermionField &out, int dag); + + /////////////////////////////////////////////////////////////// + // Multigrid assistance; force term uses too + /////////////////////////////////////////////////////////////// + void Mdir(const FermionField &in, FermionField &out, int dir, int disp); + void DhopDir(const FermionField &in, FermionField &out, int dir, int disp); + void DhopDirDisp(const FermionField &in, FermionField &out, int dirdisp, + int gamma, int dag); + + /////////////////////////////////////////////////////////////// + // Extra methods added by derived + /////////////////////////////////////////////////////////////// + void DerivInternal(StencilImpl &st, DoubledGaugeField &U, GaugeField &mat, + const FermionField &A, const FermionField &B, int dag); + + void DhopInternal(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, + const FermionField &in, FermionField &out, int dag); + + // Constructor + WilsonFermion(GaugeField &_Umu, GridCartesian &Fgrid, + GridRedBlackCartesian &Hgrid, RealD _mass, + const ImplParams &p = ImplParams()); + + // DoubleStore impl dependent + void ImportGauge(const GaugeField &_Umu); + + /////////////////////////////////////////////////////////////// + // Data members require to support the functionality + /////////////////////////////////////////////////////////////// + + // protected: + public: + RealD mass; + + GridBase *_grid; + GridBase *_cbgrid; + + // Defines the stencils for even and odd + StencilImpl Stencil; + StencilImpl StencilEven; + StencilImpl StencilOdd; + + // Copy of the gauge field , with even and odd subsets + DoubledGaugeField Umu; + DoubledGaugeField UmuEven; + DoubledGaugeField UmuOdd; + + LebesgueOrder Lebesgue; + LebesgueOrder LebesgueEvenOdd; +}; + +typedef WilsonFermion WilsonFermionF; +typedef WilsonFermion WilsonFermionD; - /////////////////////////////////////////////////////////////// - // non-hermitian hopping term; half cb or both - /////////////////////////////////////////////////////////////// - void Dhop(const FermionField &in, FermionField &out,int dag) ; - void DhopOE(const FermionField &in, FermionField &out,int dag) ; - void DhopEO(const FermionField &in, FermionField &out,int dag) ; - - /////////////////////////////////////////////////////////////// - // Multigrid assistance; force term uses too - /////////////////////////////////////////////////////////////// - void Mdir (const FermionField &in, FermionField &out,int dir,int disp) ; - void DhopDir(const FermionField &in, FermionField &out,int dir,int disp); - void DhopDirDisp(const FermionField &in, FermionField &out,int dirdisp,int gamma,int dag) ; - - /////////////////////////////////////////////////////////////// - // Extra methods added by derived - /////////////////////////////////////////////////////////////// - void DerivInternal(StencilImpl & st, - DoubledGaugeField & U, - GaugeField &mat, - const FermionField &A, - const FermionField &B, - int dag); - - void DhopInternal(StencilImpl & st,LebesgueOrder & lo,DoubledGaugeField & U, - const FermionField &in, FermionField &out,int dag) ; - - // Constructor - WilsonFermion(GaugeField &_Umu, - GridCartesian &Fgrid, - GridRedBlackCartesian &Hgrid, - RealD _mass, - const ImplParams &p= ImplParams() - ) ; - - // DoubleStore impl dependent - void ImportGauge(const GaugeField &_Umu); - - /////////////////////////////////////////////////////////////// - // Data members require to support the functionality - /////////////////////////////////////////////////////////////// - - // protected: - public: - - RealD mass; - - GridBase * _grid; - GridBase * _cbgrid; - - //Defines the stencils for even and odd - StencilImpl Stencil; - StencilImpl StencilEven; - StencilImpl StencilOdd; - - // Copy of the gauge field , with even and odd subsets - DoubledGaugeField Umu; - DoubledGaugeField UmuEven; - DoubledGaugeField UmuOdd; - - LebesgueOrder Lebesgue; - LebesgueOrder LebesgueEvenOdd; - - - }; - - typedef WilsonFermion WilsonFermionF; - typedef WilsonFermion WilsonFermionD; - - } +} } #endif diff --git a/lib/qcd/action/fermion/WilsonFermion5D.cc b/lib/qcd/action/fermion/WilsonFermion5D.cc index ef7e84ab..3ced3443 100644 --- a/lib/qcd/action/fermion/WilsonFermion5D.cc +++ b/lib/qcd/action/fermion/WilsonFermion5D.cc @@ -42,11 +42,11 @@ const std::vector WilsonFermion5DStatic::displacements({1,1,1,1,-1,-1,-1,-1 // 5d lattice for DWF. template WilsonFermion5D::WilsonFermion5D(GaugeField &_Umu, - GridCartesian &FiveDimGrid, - GridRedBlackCartesian &FiveDimRedBlackGrid, - GridCartesian &FourDimGrid, - GridRedBlackCartesian &FourDimRedBlackGrid, - RealD _M5,const ImplParams &p) : + GridCartesian &FiveDimGrid, + GridRedBlackCartesian &FiveDimRedBlackGrid, + GridCartesian &FourDimGrid, + GridRedBlackCartesian &FourDimRedBlackGrid, + RealD _M5,const ImplParams &p) : Kernels(p), _FiveDimGrid (&FiveDimGrid), _FiveDimRedBlackGrid(&FiveDimRedBlackGrid), @@ -135,10 +135,10 @@ WilsonFermion5D::WilsonFermion5D(GaugeField &_Umu, /* template WilsonFermion5D::WilsonFermion5D(int simd,GaugeField &_Umu, - GridCartesian &FiveDimGrid, - GridRedBlackCartesian &FiveDimRedBlackGrid, - GridCartesian &FourDimGrid, - RealD _M5,const ImplParams &p) : + GridCartesian &FiveDimGrid, + GridRedBlackCartesian &FiveDimRedBlackGrid, + GridCartesian &FourDimGrid, + RealD _M5,const ImplParams &p) : { int nsimd = Simd::Nsimd(); @@ -175,6 +175,73 @@ WilsonFermion5D::WilsonFermion5D(int simd,GaugeField &_Umu, } */ +template +void WilsonFermion5D::Report(void) +{ + std::vector latt = GridDefaultLatt(); + RealD volume = Ls; for(int mu=0;mu_Nprocessors; + + if ( DhopCalls > 0 ) { + std::cout << GridLogMessage << "#### Dhop calls report " << std::endl; + std::cout << GridLogMessage << "WilsonFermion5D Number of Dhop Calls : " << DhopCalls << std::endl; + std::cout << GridLogMessage << "WilsonFermion5D Total Communication time : " << DhopCommTime + << " us" << std::endl; + std::cout << GridLogMessage << "WilsonFermion5D CommTime/Calls : " + << DhopCommTime / DhopCalls << " us" << std::endl; + std::cout << GridLogMessage << "WilsonFermion5D Total Compute time : " + << DhopComputeTime << " us" << std::endl; + std::cout << GridLogMessage << "WilsonFermion5D ComputeTime/Calls : " + << DhopComputeTime / DhopCalls << " us" << std::endl; + + RealD mflops = 1344*volume*DhopCalls/DhopComputeTime; + std::cout << GridLogMessage << "Average mflops/s per call : " << mflops << std::endl; + std::cout << GridLogMessage << "Average mflops/s per call per node : " << mflops/NP << std::endl; + + } + + if ( DerivCalls > 0 ) { + std::cout << GridLogMessage << "#### Deriv calls report "<< std::endl; + std::cout << GridLogMessage << "WilsonFermion5D Number of Deriv Calls : " < 0 || DhopCalls > 0){ + std::cout << GridLogMessage << "WilsonFermion5D Stencil"< +void WilsonFermion5D::ZeroCounters(void) { + DhopCalls = 0; + DhopCommTime = 0; + DhopComputeTime = 0; + + DerivCalls = 0; + DerivCommTime = 0; + DerivComputeTime = 0; + DerivDhopComputeTime = 0; + + Stencil.ZeroCounters(); + StencilEven.ZeroCounters(); + StencilOdd.ZeroCounters(); +} + + template void WilsonFermion5D::ImportGauge(const GaugeField &_Umu) { @@ -215,12 +282,13 @@ PARALLEL_FOR_LOOP template void WilsonFermion5D::DerivInternal(StencilImpl & st, - DoubledGaugeField & U, - GaugeField &mat, - const FermionField &A, - const FermionField &B, - int dag) + DoubledGaugeField & U, + GaugeField &mat, + const FermionField &A, + const FermionField &B, + int dag) { + DerivCalls++; assert((dag==DaggerNo) ||(dag==DaggerYes)); conformable(st._grid,A._grid); @@ -231,51 +299,53 @@ void WilsonFermion5D::DerivInternal(StencilImpl & st, FermionField Btilde(B._grid); FermionField Atilde(B._grid); + DerivCommTime-=usecond(); st.HaloExchange(B,compressor); + DerivCommTime+=usecond(); Atilde=A; - for(int mu=0;muoSites();sss++){ - for(int s=0;soSites(); sss++) { + for (int s = 0; s < Ls; s++) { + int sU = sss; + int sF = s + Ls * sU; - assert ( sF< B._grid->oSites()); - assert ( sU< U._grid->oSites()); + assert(sF < B._grid->oSites()); + assert(sU < U._grid->oSites()); - Kernels::DiracOptDhopDir(st,U,st.comm_buf,sF,sU,B,Btilde,mu,gamma); - - //////////////////////////// - // spin trace outer product - //////////////////////////// + Kernels::DiracOptDhopDir(st, U, st.comm_buf, sF, sU, B, Btilde, mu, + gamma); + //////////////////////////// + // spin trace outer product + //////////////////////////// } - } - - Impl::InsertForce5D(mat,Btilde,Atilde,mu); - + DerivDhopComputeTime += usecond(); + Impl::InsertForce5D(mat, Btilde, Atilde, mu); } + DerivComputeTime += usecond(); } template void WilsonFermion5D::DhopDeriv( GaugeField &mat, - const FermionField &A, - const FermionField &B, - int dag) + const FermionField &A, + const FermionField &B, + int dag) { conformable(A._grid,FermionGrid()); conformable(A._grid,B._grid); @@ -288,9 +358,9 @@ void WilsonFermion5D::DhopDeriv( GaugeField &mat, template void WilsonFermion5D::DhopDerivEO(GaugeField &mat, - const FermionField &A, - const FermionField &B, - int dag) + const FermionField &A, + const FermionField &B, + int dag) { conformable(A._grid,FermionRedBlackGrid()); conformable(GaugeRedBlackGrid(),mat._grid); @@ -306,9 +376,9 @@ void WilsonFermion5D::DhopDerivEO(GaugeField &mat, template void WilsonFermion5D::DhopDerivOE(GaugeField &mat, - const FermionField &A, - const FermionField &B, - int dag) + const FermionField &A, + const FermionField &B, + int dag) { conformable(A._grid,FermionRedBlackGrid()); conformable(GaugeRedBlackGrid(),mat._grid); @@ -323,32 +393,39 @@ void WilsonFermion5D::DhopDerivOE(GaugeField &mat, template void WilsonFermion5D::DhopInternal(StencilImpl & st, LebesgueOrder &lo, - DoubledGaugeField & U, - const FermionField &in, FermionField &out,int dag) + DoubledGaugeField & U, + const FermionField &in, FermionField &out,int dag) { + DhopCalls++; // assert((dag==DaggerNo) ||(dag==DaggerYes)); Compressor compressor(dag); int LLs = in._grid->_rdimensions[0]; + DhopCommTime-=usecond(); st.HaloExchange(in,compressor); + DhopCommTime+=usecond(); + DhopComputeTime-=usecond(); // Dhop takes the 4d grid from U, and makes a 5d index for fermion - if ( dag == DaggerYes ) { -PARALLEL_FOR_LOOP - for(int ss=0;ssoSites();ss++){ - int sU=ss; - int sF=LLs*sU; - Kernels::DiracOptDhopSiteDag(st,lo,U,st.comm_buf,sF,sU,LLs,1,in,out); + if (dag == DaggerYes) { + PARALLEL_FOR_LOOP + for (int ss = 0; ss < U._grid->oSites(); ss++) { + int sU = ss; + int sF = LLs * sU; + Kernels::DiracOptDhopSiteDag(st, lo, U, st.comm_buf, sF, sU, LLs, 1, in, + out); } } else { -PARALLEL_FOR_LOOP - for(int ss=0;ssoSites();ss++){ - int sU=ss; - int sF=LLs*sU; - Kernels::DiracOptDhopSite(st,lo,U,st.comm_buf,sF,sU,LLs,1,in,out); + PARALLEL_FOR_LOOP + for (int ss = 0; ss < U._grid->oSites(); ss++) { + int sU = ss; + int sF = LLs * sU; + Kernels::DiracOptDhopSite(st, lo, U, st.comm_buf, sF, sU, LLs, 1, in, + out); } } + DhopComputeTime+=usecond(); } diff --git a/lib/qcd/action/fermion/WilsonFermion5D.h b/lib/qcd/action/fermion/WilsonFermion5D.h index 4ff37007..b9c35b7c 100644 --- a/lib/qcd/action/fermion/WilsonFermion5D.h +++ b/lib/qcd/action/fermion/WilsonFermion5D.h @@ -61,6 +61,17 @@ namespace Grid { INHERIT_IMPL_TYPES(Impl); typedef WilsonKernels Kernels; + void Report(void); + void ZeroCounters(void); + double DhopCalls; + double DhopCommTime; + double DhopComputeTime; + + double DerivCalls; + double DerivCommTime; + double DerivComputeTime; + double DerivDhopComputeTime; + /////////////////////////////////////////////////////////////// // Implement the abstract base /////////////////////////////////////////////////////////////// diff --git a/lib/qcd/action/fermion/WilsonKernels.cc b/lib/qcd/action/fermion/WilsonKernels.cc index d644f6ef..49bd98c5 100644 --- a/lib/qcd/action/fermion/WilsonKernels.cc +++ b/lib/qcd/action/fermion/WilsonKernels.cc @@ -1,98 +1,54 @@ - /************************************************************************************* +/************************************************************************************* - Grid physics library, www.github.com/paboyle/Grid +Grid physics library, www.github.com/paboyle/Grid - Source file: ./lib/qcd/action/fermion/WilsonKernels.cc +Source file: ./lib/qcd/action/fermion/WilsonKernels.cc - Copyright (C) 2015 +Copyright (C) 2015 Author: Peter Boyle Author: Peter Boyle Author: paboyle - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ #include namespace Grid { namespace QCD { - int WilsonKernelsStatic::HandOpt; - int WilsonKernelsStatic::AsmOpt; +int WilsonKernelsStatic::HandOpt; +int WilsonKernelsStatic::AsmOpt; -template -WilsonKernels::WilsonKernels(const ImplParams &p): Base(p) {}; +template +WilsonKernels::WilsonKernels(const ImplParams &p) : Base(p){}; -template -void WilsonKernels::DiracOptDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U, - std::vector > &buf, - int sF,int sU,int Ls, int Ns, const FermionField &in, FermionField &out) -{ -#ifdef AVX512 - if ( AsmOpt ) { +//////////////////////////////////////////// +// Generic implementation; move to different file? +//////////////////////////////////////////// - WilsonKernels::DiracOptAsmDhopSite(st,lo,U,buf,sF,sU,Ls,Ns,in,out); - - } else { -#else - { -#endif - for(int site=0;site::DiracOptHandDhopSite(st,lo,U,buf,sF,sU,in,out); - else WilsonKernels::DiracOptGenericDhopSite(st,lo,U,buf,sF,sU,in,out); - sF++; - } - sU++; - } - - } -} - -template -void WilsonKernels::DiracOptDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U, - std::vector > &buf, - int sF,int sU,int Ls, int Ns, const FermionField &in, FermionField &out) -{ - // No asm implementation yet. - // if ( AsmOpt ) WilsonKernels::DiracOptAsmDhopSiteDag(st,lo,U,buf,sF,sU,in,out); - // else - for(int site=0;site::DiracOptHandDhopSiteDag(st,lo,U,buf,sF,sU,in,out); - else WilsonKernels::DiracOptGenericDhopSiteDag(st,lo,U,buf,sF,sU,in,out); - sF++; - } - sU++; - } -} - - - //////////////////////////////////////////// - // Generic implementation; move to different file? - //////////////////////////////////////////// - -template -void WilsonKernels::DiracOptGenericDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U, - std::vector > &buf, - int sF,int sU,const FermionField &in, FermionField &out) -{ - SiteHalfSpinor tmp; - SiteHalfSpinor chi; +template +void WilsonKernels::DiracOptGenericDhopSiteDag( + StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, + std::vector > &buf, int sF, + int sU, const FermionField &in, FermionField &out) { + SiteHalfSpinor tmp; + SiteHalfSpinor chi; SiteHalfSpinor *chi_p; SiteHalfSpinor Uchi; SiteSpinor result; @@ -102,176 +58,175 @@ void WilsonKernels::DiracOptGenericDhopSiteDag(StencilImpl &st,LebesgueOrd /////////////////////////// // Xp /////////////////////////// - SE=st.GetEntry(ptype,Xp,sF); + SE = st.GetEntry(ptype, Xp, sF); - if (SE->_is_local ) { + if (SE->_is_local) { chi_p = χ - if ( SE->_permute ) { - spProjXp(tmp,in._odata[SE->_offset]); - permute(chi,tmp,ptype); + if (SE->_permute) { + spProjXp(tmp, in._odata[SE->_offset]); + permute(chi, tmp, ptype); } else { - spProjXp(chi,in._odata[SE->_offset]); + spProjXp(chi, in._odata[SE->_offset]); } - } else { - chi_p=&buf[SE->_offset]; + } else { + chi_p = &buf[SE->_offset]; } - - Impl::multLink(Uchi,U._odata[sU],*chi_p,Xp,SE,st); - spReconXp(result,Uchi); - + + Impl::multLink(Uchi, U._odata[sU], *chi_p, Xp, SE, st); + spReconXp(result, Uchi); + /////////////////////////// // Yp /////////////////////////// - SE=st.GetEntry(ptype,Yp,sF); + SE = st.GetEntry(ptype, Yp, sF); - if ( SE->_is_local ) { + if (SE->_is_local) { chi_p = χ - if ( SE->_permute ) { - spProjYp(tmp,in._odata[SE->_offset]); - permute(chi,tmp,ptype); + if (SE->_permute) { + spProjYp(tmp, in._odata[SE->_offset]); + permute(chi, tmp, ptype); } else { - spProjYp(chi,in._odata[SE->_offset]); + spProjYp(chi, in._odata[SE->_offset]); } - } else { - chi_p=&buf[SE->_offset]; + } else { + chi_p = &buf[SE->_offset]; } - Impl::multLink(Uchi,U._odata[sU],*chi_p,Yp,SE,st); - accumReconYp(result,Uchi); + Impl::multLink(Uchi, U._odata[sU], *chi_p, Yp, SE, st); + accumReconYp(result, Uchi); /////////////////////////// // Zp /////////////////////////// - SE=st.GetEntry(ptype,Zp,sF); + SE = st.GetEntry(ptype, Zp, sF); - if ( SE->_is_local ) { + if (SE->_is_local) { chi_p = χ - if ( SE->_permute ) { - spProjZp(tmp,in._odata[SE->_offset]); - permute(chi,tmp,ptype); + if (SE->_permute) { + spProjZp(tmp, in._odata[SE->_offset]); + permute(chi, tmp, ptype); } else { - spProjZp(chi,in._odata[SE->_offset]); + spProjZp(chi, in._odata[SE->_offset]); } - } else { - chi_p=&buf[SE->_offset]; + } else { + chi_p = &buf[SE->_offset]; } - Impl::multLink(Uchi,U._odata[sU],*chi_p,Zp,SE,st); - accumReconZp(result,Uchi); + Impl::multLink(Uchi, U._odata[sU], *chi_p, Zp, SE, st); + accumReconZp(result, Uchi); /////////////////////////// // Tp /////////////////////////// - SE=st.GetEntry(ptype,Tp,sF); + SE = st.GetEntry(ptype, Tp, sF); - if ( SE->_is_local ) { + if (SE->_is_local) { chi_p = χ - if ( SE->_permute ) { - spProjTp(tmp,in._odata[SE->_offset]); - permute(chi,tmp,ptype); + if (SE->_permute) { + spProjTp(tmp, in._odata[SE->_offset]); + permute(chi, tmp, ptype); } else { - spProjTp(chi,in._odata[SE->_offset]); + spProjTp(chi, in._odata[SE->_offset]); } } else { - chi_p=&buf[SE->_offset]; + chi_p = &buf[SE->_offset]; } - Impl::multLink(Uchi,U._odata[sU],*chi_p,Tp,SE,st); - accumReconTp(result,Uchi); + Impl::multLink(Uchi, U._odata[sU], *chi_p, Tp, SE, st); + accumReconTp(result, Uchi); /////////////////////////// // Xm /////////////////////////// - SE=st.GetEntry(ptype,Xm,sF); + SE = st.GetEntry(ptype, Xm, sF); - if ( SE->_is_local ) { + if (SE->_is_local) { chi_p = χ - if ( SE->_permute ) { - spProjXm(tmp,in._odata[SE->_offset]); - permute(chi,tmp,ptype); + if (SE->_permute) { + spProjXm(tmp, in._odata[SE->_offset]); + permute(chi, tmp, ptype); } else { - spProjXm(chi,in._odata[SE->_offset]); + spProjXm(chi, in._odata[SE->_offset]); } } else { - chi_p=&buf[SE->_offset]; + chi_p = &buf[SE->_offset]; } - Impl::multLink(Uchi,U._odata[sU],*chi_p,Xm,SE,st); - accumReconXm(result,Uchi); - + Impl::multLink(Uchi, U._odata[sU], *chi_p, Xm, SE, st); + accumReconXm(result, Uchi); + /////////////////////////// // Ym /////////////////////////// - SE=st.GetEntry(ptype,Ym,sF); + SE = st.GetEntry(ptype, Ym, sF); - if ( SE->_is_local ) { + if (SE->_is_local) { chi_p = χ - if ( SE->_permute ) { - spProjYm(tmp,in._odata[SE->_offset]); - permute(chi,tmp,ptype); + if (SE->_permute) { + spProjYm(tmp, in._odata[SE->_offset]); + permute(chi, tmp, ptype); } else { - spProjYm(chi,in._odata[SE->_offset]); + spProjYm(chi, in._odata[SE->_offset]); } } else { - chi_p=&buf[SE->_offset]; + chi_p = &buf[SE->_offset]; } - Impl::multLink(Uchi,U._odata[sU],*chi_p,Ym,SE,st); - accumReconYm(result,Uchi); - + Impl::multLink(Uchi, U._odata[sU], *chi_p, Ym, SE, st); + accumReconYm(result, Uchi); + /////////////////////////// // Zm /////////////////////////// - SE=st.GetEntry(ptype,Zm,sF); + SE = st.GetEntry(ptype, Zm, sF); - if ( SE->_is_local ) { + if (SE->_is_local) { chi_p = χ - if ( SE->_permute ) { - spProjZm(tmp,in._odata[SE->_offset]); - permute(chi,tmp,ptype); + if (SE->_permute) { + spProjZm(tmp, in._odata[SE->_offset]); + permute(chi, tmp, ptype); } else { - spProjZm(chi,in._odata[SE->_offset]); + spProjZm(chi, in._odata[SE->_offset]); } } else { - chi_p=&buf[SE->_offset]; + chi_p = &buf[SE->_offset]; } - Impl::multLink(Uchi,U._odata[sU],*chi_p,Zm,SE,st); - accumReconZm(result,Uchi); + Impl::multLink(Uchi, U._odata[sU], *chi_p, Zm, SE, st); + accumReconZm(result, Uchi); /////////////////////////// // Tm /////////////////////////// - SE=st.GetEntry(ptype,Tm,sF); + SE = st.GetEntry(ptype, Tm, sF); - if ( SE->_is_local ) { + if (SE->_is_local) { chi_p = χ - if ( SE->_permute ) { - spProjTm(tmp,in._odata[SE->_offset]); - permute(chi,tmp,ptype); - } else { - spProjTm(chi,in._odata[SE->_offset]); + if (SE->_permute) { + spProjTm(tmp, in._odata[SE->_offset]); + permute(chi, tmp, ptype); + } else { + spProjTm(chi, in._odata[SE->_offset]); } } else { - chi_p=&buf[SE->_offset]; + chi_p = &buf[SE->_offset]; } - Impl::multLink(Uchi,U._odata[sU],*chi_p,Tm,SE,st); - accumReconTm(result,Uchi); + Impl::multLink(Uchi, U._odata[sU], *chi_p, Tm, SE, st); + accumReconTm(result, Uchi); - vstream(out._odata[sF],result); + vstream(out._odata[sF], result); }; - - // Need controls to do interior, exterior, or both -template -void WilsonKernels::DiracOptGenericDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U, - std::vector > &buf, - int sF,int sU,const FermionField &in, FermionField &out) -{ - SiteHalfSpinor tmp; - SiteHalfSpinor chi; - SiteHalfSpinor *chi_p; +// Need controls to do interior, exterior, or both +template +void WilsonKernels::DiracOptGenericDhopSite( + StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, + std::vector > &buf, int sF, + int sU, const FermionField &in, FermionField &out) { + SiteHalfSpinor tmp; + SiteHalfSpinor chi; + SiteHalfSpinor *chi_p; SiteHalfSpinor Uchi; SiteSpinor result; StencilEntry *SE; @@ -280,296 +235,298 @@ void WilsonKernels::DiracOptGenericDhopSite(StencilImpl &st,LebesgueOrder /////////////////////////// // Xp /////////////////////////// - SE=st.GetEntry(ptype,Xm,sF); + SE = st.GetEntry(ptype, Xm, sF); - if ( SE->_is_local ) { + if (SE->_is_local) { chi_p = χ - if ( SE->_permute ) { - spProjXp(tmp,in._odata[SE->_offset]); - permute(chi,tmp,ptype); + if (SE->_permute) { + spProjXp(tmp, in._odata[SE->_offset]); + permute(chi, tmp, ptype); } else { - spProjXp(chi,in._odata[SE->_offset]); + spProjXp(chi, in._odata[SE->_offset]); } - } else { - chi_p=&buf[SE->_offset]; + } else { + chi_p = &buf[SE->_offset]; } - - Impl::multLink(Uchi,U._odata[sU],*chi_p,Xm,SE,st); - spReconXp(result,Uchi); - + + Impl::multLink(Uchi, U._odata[sU], *chi_p, Xm, SE, st); + spReconXp(result, Uchi); + /////////////////////////// // Yp /////////////////////////// - SE=st.GetEntry(ptype,Ym,sF); + SE = st.GetEntry(ptype, Ym, sF); - if ( SE->_is_local ) { + if (SE->_is_local) { chi_p = χ - if ( SE->_permute ) { - spProjYp(tmp,in._odata[SE->_offset]); - permute(chi,tmp,ptype); + if (SE->_permute) { + spProjYp(tmp, in._odata[SE->_offset]); + permute(chi, tmp, ptype); } else { - spProjYp(chi,in._odata[SE->_offset]); + spProjYp(chi, in._odata[SE->_offset]); } - } else { - chi_p=&buf[SE->_offset]; + } else { + chi_p = &buf[SE->_offset]; } - Impl::multLink(Uchi,U._odata[sU],*chi_p,Ym,SE,st); - accumReconYp(result,Uchi); + Impl::multLink(Uchi, U._odata[sU], *chi_p, Ym, SE, st); + accumReconYp(result, Uchi); /////////////////////////// // Zp /////////////////////////// - SE=st.GetEntry(ptype,Zm,sF); + SE = st.GetEntry(ptype, Zm, sF); - if ( SE->_is_local ) { + if (SE->_is_local) { chi_p = χ - if ( SE->_permute ) { - spProjZp(tmp,in._odata[SE->_offset]); - permute(chi,tmp,ptype); + if (SE->_permute) { + spProjZp(tmp, in._odata[SE->_offset]); + permute(chi, tmp, ptype); } else { - spProjZp(chi,in._odata[SE->_offset]); + spProjZp(chi, in._odata[SE->_offset]); } - } else { - chi_p=&buf[SE->_offset]; + } else { + chi_p = &buf[SE->_offset]; } - Impl::multLink(Uchi,U._odata[sU],*chi_p,Zm,SE,st); - accumReconZp(result,Uchi); + Impl::multLink(Uchi, U._odata[sU], *chi_p, Zm, SE, st); + accumReconZp(result, Uchi); /////////////////////////// // Tp /////////////////////////// - SE=st.GetEntry(ptype,Tm,sF); + SE = st.GetEntry(ptype, Tm, sF); - if ( SE->_is_local ) { + if (SE->_is_local) { chi_p = χ - if ( SE->_permute ) { - spProjTp(tmp,in._odata[SE->_offset]); - permute(chi,tmp,ptype); + if (SE->_permute) { + spProjTp(tmp, in._odata[SE->_offset]); + permute(chi, tmp, ptype); } else { - spProjTp(chi,in._odata[SE->_offset]); + spProjTp(chi, in._odata[SE->_offset]); } } else { - chi_p=&buf[SE->_offset]; + chi_p = &buf[SE->_offset]; } - Impl::multLink(Uchi,U._odata[sU],*chi_p,Tm,SE,st); - accumReconTp(result,Uchi); + Impl::multLink(Uchi, U._odata[sU], *chi_p, Tm, SE, st); + accumReconTp(result, Uchi); /////////////////////////// // Xm /////////////////////////// - SE=st.GetEntry(ptype,Xp,sF); + SE = st.GetEntry(ptype, Xp, sF); - if ( SE->_is_local ) { + if (SE->_is_local) { chi_p = χ - if ( SE->_permute ) { - spProjXm(tmp,in._odata[SE->_offset]); - permute(chi,tmp,ptype); + if (SE->_permute) { + spProjXm(tmp, in._odata[SE->_offset]); + permute(chi, tmp, ptype); } else { - spProjXm(chi,in._odata[SE->_offset]); + spProjXm(chi, in._odata[SE->_offset]); } } else { - chi_p=&buf[SE->_offset]; + chi_p = &buf[SE->_offset]; } - Impl::multLink(Uchi,U._odata[sU],*chi_p,Xp,SE,st); - accumReconXm(result,Uchi); + Impl::multLink(Uchi, U._odata[sU], *chi_p, Xp, SE, st); + accumReconXm(result, Uchi); /////////////////////////// // Ym /////////////////////////// - SE=st.GetEntry(ptype,Yp,sF); + SE = st.GetEntry(ptype, Yp, sF); - if ( SE->_is_local ) { + if (SE->_is_local) { chi_p = χ - if ( SE->_permute ) { - spProjYm(tmp,in._odata[SE->_offset]); - permute(chi,tmp,ptype); + if (SE->_permute) { + spProjYm(tmp, in._odata[SE->_offset]); + permute(chi, tmp, ptype); } else { - spProjYm(chi,in._odata[SE->_offset]); + spProjYm(chi, in._odata[SE->_offset]); } } else { - chi_p=&buf[SE->_offset]; + chi_p = &buf[SE->_offset]; } - Impl::multLink(Uchi,U._odata[sU],*chi_p,Yp,SE,st); - accumReconYm(result,Uchi); - + Impl::multLink(Uchi, U._odata[sU], *chi_p, Yp, SE, st); + accumReconYm(result, Uchi); + /////////////////////////// // Zm /////////////////////////// - SE=st.GetEntry(ptype,Zp,sF); + SE = st.GetEntry(ptype, Zp, sF); - if ( SE->_is_local ) { + if (SE->_is_local) { chi_p = χ - if ( SE->_permute ) { - spProjZm(tmp,in._odata[SE->_offset]); - permute(chi,tmp,ptype); + if (SE->_permute) { + spProjZm(tmp, in._odata[SE->_offset]); + permute(chi, tmp, ptype); } else { - spProjZm(chi,in._odata[SE->_offset]); + spProjZm(chi, in._odata[SE->_offset]); } } else { - chi_p=&buf[SE->_offset]; + chi_p = &buf[SE->_offset]; } - Impl::multLink(Uchi,U._odata[sU],*chi_p,Zp,SE,st); - accumReconZm(result,Uchi); + Impl::multLink(Uchi, U._odata[sU], *chi_p, Zp, SE, st); + accumReconZm(result, Uchi); /////////////////////////// // Tm /////////////////////////// - SE=st.GetEntry(ptype,Tp,sF); + SE = st.GetEntry(ptype, Tp, sF); - if ( SE->_is_local ) { + if (SE->_is_local) { chi_p = χ - if ( SE->_permute ) { - spProjTm(tmp,in._odata[SE->_offset]); - permute(chi,tmp,ptype); - } else { - spProjTm(chi,in._odata[SE->_offset]); + if (SE->_permute) { + spProjTm(tmp, in._odata[SE->_offset]); + permute(chi, tmp, ptype); + } else { + spProjTm(chi, in._odata[SE->_offset]); } } else { - chi_p=&buf[SE->_offset]; + chi_p = &buf[SE->_offset]; } - Impl::multLink(Uchi,U._odata[sU],*chi_p,Tp,SE,st); - accumReconTm(result,Uchi); + Impl::multLink(Uchi, U._odata[sU], *chi_p, Tp, SE, st); + accumReconTm(result, Uchi); - vstream(out._odata[sF],result); + vstream(out._odata[sF], result); }; -template -void WilsonKernels::DiracOptDhopDir(StencilImpl &st,DoubledGaugeField &U, - std::vector > &buf, - int sF,int sU,const FermionField &in, FermionField &out,int dir,int gamma) -{ - SiteHalfSpinor tmp; - SiteHalfSpinor chi; - SiteSpinor result; +template +void WilsonKernels::DiracOptDhopDir( + StencilImpl &st, DoubledGaugeField &U, + std::vector > &buf, int sF, + int sU, const FermionField &in, FermionField &out, int dir, int gamma) { + SiteHalfSpinor tmp; + SiteHalfSpinor chi; + SiteSpinor result; SiteHalfSpinor Uchi; StencilEntry *SE; int ptype; - SE=st.GetEntry(ptype,dir,sF); + SE = st.GetEntry(ptype, dir, sF); // Xp - if(gamma==Xp){ - if ( SE->_is_local && SE->_permute ) { - spProjXp(tmp,in._odata[SE->_offset]); - permute(chi,tmp,ptype); - } else if ( SE->_is_local ) { - spProjXp(chi,in._odata[SE->_offset]); - } else { - chi=buf[SE->_offset]; + if (gamma == Xp) { + if (SE->_is_local && SE->_permute) { + spProjXp(tmp, in._odata[SE->_offset]); + permute(chi, tmp, ptype); + } else if (SE->_is_local) { + spProjXp(chi, in._odata[SE->_offset]); + } else { + chi = buf[SE->_offset]; } - Impl::multLink(Uchi,U._odata[sU],chi,dir,SE,st); - spReconXp(result,Uchi); + Impl::multLink(Uchi, U._odata[sU], chi, dir, SE, st); + spReconXp(result, Uchi); } // Yp - if ( gamma==Yp ){ - if ( SE->_is_local && SE->_permute ) { - spProjYp(tmp,in._odata[SE->_offset]); - permute(chi,tmp,ptype); - } else if ( SE->_is_local ) { - spProjYp(chi,in._odata[SE->_offset]); - } else { - chi=buf[SE->_offset]; + if (gamma == Yp) { + if (SE->_is_local && SE->_permute) { + spProjYp(tmp, in._odata[SE->_offset]); + permute(chi, tmp, ptype); + } else if (SE->_is_local) { + spProjYp(chi, in._odata[SE->_offset]); + } else { + chi = buf[SE->_offset]; } - Impl::multLink(Uchi,U._odata[sU],chi,dir,SE,st); - spReconYp(result,Uchi); + Impl::multLink(Uchi, U._odata[sU], chi, dir, SE, st); + spReconYp(result, Uchi); } - + // Zp - if ( gamma ==Zp ){ - if ( SE->_is_local && SE->_permute ) { - spProjZp(tmp,in._odata[SE->_offset]); - permute(chi,tmp,ptype); - } else if ( SE->_is_local ) { - spProjZp(chi,in._odata[SE->_offset]); - } else { - chi=buf[SE->_offset]; + if (gamma == Zp) { + if (SE->_is_local && SE->_permute) { + spProjZp(tmp, in._odata[SE->_offset]); + permute(chi, tmp, ptype); + } else if (SE->_is_local) { + spProjZp(chi, in._odata[SE->_offset]); + } else { + chi = buf[SE->_offset]; } - Impl::multLink(Uchi,U._odata[sU],chi,dir,SE,st); - spReconZp(result,Uchi); + Impl::multLink(Uchi, U._odata[sU], chi, dir, SE, st); + spReconZp(result, Uchi); } - + // Tp - if ( gamma ==Tp ){ - if ( SE->_is_local && SE->_permute ) { - spProjTp(tmp,in._odata[SE->_offset]); - permute(chi,tmp,ptype); - } else if ( SE->_is_local ) { - spProjTp(chi,in._odata[SE->_offset]); - } else { - chi=buf[SE->_offset]; + if (gamma == Tp) { + if (SE->_is_local && SE->_permute) { + spProjTp(tmp, in._odata[SE->_offset]); + permute(chi, tmp, ptype); + } else if (SE->_is_local) { + spProjTp(chi, in._odata[SE->_offset]); + } else { + chi = buf[SE->_offset]; } - Impl::multLink(Uchi,U._odata[sU],chi,dir,SE,st); - spReconTp(result,Uchi); + Impl::multLink(Uchi, U._odata[sU], chi, dir, SE, st); + spReconTp(result, Uchi); } // Xm - if ( gamma==Xm ){ - if ( SE->_is_local && SE->_permute ) { - spProjXm(tmp,in._odata[SE->_offset]); - permute(chi,tmp,ptype); - } else if ( SE->_is_local ) { - spProjXm(chi,in._odata[SE->_offset]); - } else { - chi=buf[SE->_offset]; + if (gamma == Xm) { + if (SE->_is_local && SE->_permute) { + spProjXm(tmp, in._odata[SE->_offset]); + permute(chi, tmp, ptype); + } else if (SE->_is_local) { + spProjXm(chi, in._odata[SE->_offset]); + } else { + chi = buf[SE->_offset]; } - Impl::multLink(Uchi,U._odata[sU],chi,dir,SE,st); - spReconXm(result,Uchi); + Impl::multLink(Uchi, U._odata[sU], chi, dir, SE, st); + spReconXm(result, Uchi); } // Ym - if ( gamma == Ym ){ - if ( SE->_is_local && SE->_permute ) { - spProjYm(tmp,in._odata[SE->_offset]); - permute(chi,tmp,ptype); - } else if ( SE->_is_local ) { - spProjYm(chi,in._odata[SE->_offset]); - } else { - chi=buf[SE->_offset]; + if (gamma == Ym) { + if (SE->_is_local && SE->_permute) { + spProjYm(tmp, in._odata[SE->_offset]); + permute(chi, tmp, ptype); + } else if (SE->_is_local) { + spProjYm(chi, in._odata[SE->_offset]); + } else { + chi = buf[SE->_offset]; } - Impl::multLink(Uchi,U._odata[sU],chi,dir,SE,st); - spReconYm(result,Uchi); + Impl::multLink(Uchi, U._odata[sU], chi, dir, SE, st); + spReconYm(result, Uchi); } // Zm - if ( gamma == Zm ){ - if ( SE->_is_local && SE->_permute ) { - spProjZm(tmp,in._odata[SE->_offset]); - permute(chi,tmp,ptype); - } else if ( SE->_is_local ) { - spProjZm(chi,in._odata[SE->_offset]); - } else { - chi=buf[SE->_offset]; + if (gamma == Zm) { + if (SE->_is_local && SE->_permute) { + spProjZm(tmp, in._odata[SE->_offset]); + permute(chi, tmp, ptype); + } else if (SE->_is_local) { + spProjZm(chi, in._odata[SE->_offset]); + } else { + chi = buf[SE->_offset]; } - Impl::multLink(Uchi,U._odata[sU],chi,dir,SE,st); - spReconZm(result,Uchi); - } - - // Tm - if ( gamma==Tm ) { - if ( SE->_is_local && SE->_permute ) { - spProjTm(tmp,in._odata[SE->_offset]); - permute(chi,tmp,ptype); - } else if ( SE->_is_local ) { - spProjTm(chi,in._odata[SE->_offset]); - } else { - chi=buf[SE->_offset]; - } - Impl::multLink(Uchi,U._odata[sU],chi,dir,SE,st); - spReconTm(result,Uchi); + Impl::multLink(Uchi, U._odata[sU], chi, dir, SE, st); + spReconZm(result, Uchi); } - vstream(out._odata[sF],result); + // Tm + if (gamma == Tm) { + if (SE->_is_local && SE->_permute) { + spProjTm(tmp, in._odata[SE->_offset]); + permute(chi, tmp, ptype); + } else if (SE->_is_local) { + spProjTm(chi, in._odata[SE->_offset]); + } else { + chi = buf[SE->_offset]; + } + Impl::multLink(Uchi, U._odata[sU], chi, dir, SE, st); + spReconTm(result, Uchi); + } + + vstream(out._odata[sF], result); } - - FermOpTemplateInstantiate(WilsonKernels); +FermOpTemplateInstantiate(WilsonKernels); +AdjointFermOpTemplateInstantiate(WilsonKernels); +TwoIndexFermOpTemplateInstantiate(WilsonKernels); }} + diff --git a/lib/qcd/action/fermion/WilsonKernels.h b/lib/qcd/action/fermion/WilsonKernels.h index 231fa293..23c145de 100644 --- a/lib/qcd/action/fermion/WilsonKernels.h +++ b/lib/qcd/action/fermion/WilsonKernels.h @@ -1,34 +1,35 @@ - /************************************************************************************* +/************************************************************************************* - Grid physics library, www.github.com/paboyle/Grid +Grid physics library, www.github.com/paboyle/Grid - Source file: ./lib/qcd/action/fermion/WilsonKernels.h +Source file: ./lib/qcd/action/fermion/WilsonKernels.h - Copyright (C) 2015 +Copyright (C) 2015 Author: Peter Boyle Author: Peter Boyle Author: paboyle - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ -#ifndef GRID_QCD_DHOP_H -#define GRID_QCD_DHOP_H +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ +#ifndef GRID_QCD_DHOP_H +#define GRID_QCD_DHOP_H namespace Grid { @@ -48,51 +49,158 @@ namespace Grid { template class WilsonKernels : public FermionOperator , public WilsonKernelsStatic { public: - INHERIT_IMPL_TYPES(Impl); - typedef FermionOperator Base; + INHERIT_IMPL_TYPES(Impl); + typedef FermionOperator Base; public: - void DiracOptDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U, - std::vector > &buf, - int sF, int sU,int Ls, int Ns, const FermionField &in, FermionField &out); - - void DiracOptDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U, - std::vector > &buf, - int sF,int sU,int Ls, int Ns, const FermionField &in,FermionField &out); + template + typename std::enable_if::type + DiracOptDhopSite( + StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, + std::vector > &buf, + int sF, int sU, int Ls, int Ns, const FermionField &in, + FermionField &out) { +#ifdef AVX512 + if (AsmOpt) { + WilsonKernels::DiracOptAsmDhopSite(st, lo, U, buf, sF, sU, Ls, Ns, + in, out); + + } else { +#else + { +#endif + for (int site = 0; site < Ns; site++) { + for (int s = 0; s < Ls; s++) { + if (HandOpt) + WilsonKernels::DiracOptHandDhopSite(st, lo, U, buf, sF, sU, + in, out); + else + WilsonKernels::DiracOptGenericDhopSite(st, lo, U, buf, sF, sU, + in, out); + sF++; + } + sU++; + } + } + } + + template + typename std::enable_if<(Impl::Dimension != 3 || (Impl::Dimension == 3 && Nc != 3)) && EnableBool, void>::type + DiracOptDhopSite( + StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, + std::vector > &buf, + int sF, int sU, int Ls, int Ns, const FermionField &in, + FermionField &out) { + for (int site = 0; site < Ns; site++) { + for (int s = 0; s < Ls; s++) { + WilsonKernels::DiracOptGenericDhopSite(st, lo, U, buf, sF, sU, in, + out); + sF++; + } + sU++; + } + } + + template + typename std::enable_if::type + DiracOptDhopSiteDag( + StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, + std::vector > &buf, + int sF, int sU, int Ls, int Ns, const FermionField &in, + FermionField &out) { +#ifdef AVX512 + if (AsmOpt) { + WilsonKernels::DiracOptAsmDhopSiteDag(st, lo, U, buf, sF, sU, Ls, + Ns, in, out); + } else { +#else + { +#endif + for (int site = 0; site < Ns; site++) { + for (int s = 0; s < Ls; s++) { + if (HandOpt) + WilsonKernels::DiracOptHandDhopSiteDag(st, lo, U, buf, sF, sU, + in, out); + else + WilsonKernels::DiracOptGenericDhopSiteDag(st, lo, U, buf, sF, + sU, in, out); + sF++; + } + sU++; + } + } + } + + template + typename std::enable_if< + (Impl::Dimension != 3 || (Impl::Dimension == 3 && Nc != 3)) && EnableBool, + void>::type + DiracOptDhopSiteDag( + StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, + std::vector > &buf, + int sF, int sU, int Ls, int Ns, const FermionField &in, + FermionField &out) { + for (int site = 0; site < Ns; site++) { + for (int s = 0; s < Ls; s++) { + WilsonKernels::DiracOptGenericDhopSiteDag(st, lo, U, buf, sF, sU, + in, out); + sF++; + } + sU++; + } + } + + void DiracOptDhopDir( + StencilImpl &st, DoubledGaugeField &U, + std::vector > &buf, + int sF, int sU, const FermionField &in, FermionField &out, int dirdisp, + int gamma); + + private: + // Specialised variants + void DiracOptGenericDhopSite( + StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, + std::vector > &buf, + int sF, int sU, const FermionField &in, FermionField &out); + + void DiracOptGenericDhopSiteDag( + StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, + std::vector > &buf, + int sF, int sU, const FermionField &in, FermionField &out); + + void DiracOptAsmDhopSite( + StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, + std::vector > &buf, + int sF, int sU, int Ls, int Ns, const FermionField &in, + FermionField &out); + + void DiracOptAsmDhopSiteDag( + StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, + std::vector > &buf, + int sF, int sU, int Ls, int Ns, const FermionField &in, + FermionField &out); + + void DiracOptHandDhopSite( + StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, + std::vector > &buf, + int sF, int sU, const FermionField &in, FermionField &out); + + void DiracOptHandDhopSiteDag( + StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, + std::vector > &buf, + int sF, int sU, const FermionField &in, FermionField &out); + + public: + WilsonKernels(const ImplParams &p = ImplParams()); + }; + + } + } + + + + - void DiracOptDhopDir(StencilImpl &st,DoubledGaugeField &U, - std::vector > &buf, - int sF,int sU,const FermionField &in, FermionField &out,int dirdisp,int gamma); - - private: - // Specialised variants - void DiracOptGenericDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U, - std::vector > &buf, - int sF,int sU, const FermionField &in, FermionField &out); - - void DiracOptGenericDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U, - std::vector > &buf, - int sF,int sU,const FermionField &in,FermionField &out); - - void DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U, - std::vector > &buf, - int sF,int sU,int Ls, int Ns, const FermionField &in, FermionField &out); - - - void DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U, - std::vector > &buf, - int sF,int sU,const FermionField &in, FermionField &out); - - void DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U, - std::vector > &buf, - int sF,int sU,const FermionField &in, FermionField &out); - public: - - WilsonKernels(const ImplParams &p= ImplParams()); - - }; - - } -} #endif diff --git a/lib/qcd/action/fermion/WilsonKernelsAsm.cc b/lib/qcd/action/fermion/WilsonKernelsAsm.cc index 83871b7b..b09699ef 100644 --- a/lib/qcd/action/fermion/WilsonKernelsAsm.cc +++ b/lib/qcd/action/fermion/WilsonKernelsAsm.cc @@ -1,4 +1,4 @@ - /************************************************************************************* +/************************************************************************************* Grid physics library, www.github.com/paboyle/Grid @@ -26,59 +26,77 @@ Author: paboyle 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ +*************************************************************************************/ +/* END LEGAL */ #include + namespace Grid { -namespace QCD { + namespace QCD { + + /////////////////////////////////////////////////////////// + // Default to no assembler implementation + /////////////////////////////////////////////////////////// + template + void WilsonKernels::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, + std::vector > &buf, + int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) + { + assert(0); + } + template + void WilsonKernels::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, + std::vector > &buf, + int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) + { + assert(0); + } - /////////////////////////////////////////////////////////// - // Default to no assembler implementation - /////////////////////////////////////////////////////////// -template -void WilsonKernels::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, - std::vector > &buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) -{ - assert(0); -} #if defined(AVX512) - - - /////////////////////////////////////////////////////////// - // If we are AVX512 specialise the single precision routine - /////////////////////////////////////////////////////////// - + + + /////////////////////////////////////////////////////////// + // If we are AVX512 specialise the single precision routine + /////////////////////////////////////////////////////////// + #include #include - -static Vector signs; - -int setupSigns(void ){ - Vector bother(2); - signs = bother; - vrsign(signs[0]); - visign(signs[1]); - return 1; -} -static int signInit = setupSigns(); - + + static Vector signs; + + int setupSigns(void ){ + Vector bother(2); + signs = bother; + vrsign(signs[0]); + visign(signs[1]); + return 1; + } + static int signInit = setupSigns(); + #define label(A) ilabel(A) #define ilabel(A) ".globl\n" #A ":\n" - + #define MAYBEPERM(A,perm) if (perm) { A ; } #define MULT_2SPIN(ptr,pf) MULT_ADDSUB_2SPIN(ptr,pf) #define FX(A) WILSONASM_ ##A -template<> -void WilsonKernels::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, - std::vector > &buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) + +#undef KERNEL_DAG + template<> + void WilsonKernels::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, + std::vector > &buf, + int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) #include - + +#define KERNEL_DAG + template<> + void WilsonKernels::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, + std::vector > &buf, + int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) +#include + #undef VMOVIDUP #undef VMOVRDUP #undef MAYBEPERM @@ -89,32 +107,43 @@ void WilsonKernels::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrd #define VMOVIDUP(A,B,C) VBCASTIDUPf(A,B,C) #define VMOVRDUP(A,B,C) VBCASTRDUPf(A,B,C) #define MULT_2SPIN(ptr,pf) MULT_ADDSUB_2SPIN_LS(ptr,pf) -template<> -void WilsonKernels::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, - std::vector > &buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) + +#undef KERNEL_DAG + template<> + void WilsonKernels::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, + std::vector > &buf, + int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) #include - + +#define KERNEL_DAG + template<> + void WilsonKernels::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, + std::vector > &buf, + int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) +#include + #endif -template void WilsonKernels::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, - std::vector > &buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out); -template void WilsonKernels::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, - std::vector > &buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out); -template void WilsonKernels::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, - std::vector > &buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out); -template void WilsonKernels::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, - std::vector > &buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out); -template void WilsonKernels::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, - std::vector > &buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out); -template void WilsonKernels::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, - std::vector > &buf, - int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out); -}} +#define INSTANTIATE_ASM(A)\ +template void WilsonKernels::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,\ + std::vector > &buf,\ + int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);\ +template void WilsonKernels::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,\ + std::vector > &buf,\ + int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);\ + + +INSTANTIATE_ASM(WilsonImplF); +INSTANTIATE_ASM(WilsonImplD); +INSTANTIATE_ASM(ZWilsonImplF); +INSTANTIATE_ASM(ZWilsonImplD); +INSTANTIATE_ASM(GparityWilsonImplF); +INSTANTIATE_ASM(GparityWilsonImplD); +INSTANTIATE_ASM(DomainWallVec5dImplF); +INSTANTIATE_ASM(DomainWallVec5dImplD); +INSTANTIATE_ASM(ZDomainWallVec5dImplF); +INSTANTIATE_ASM(ZDomainWallVec5dImplD); + } +} diff --git a/lib/qcd/action/fermion/WilsonKernelsAsmBody.h b/lib/qcd/action/fermion/WilsonKernelsAsmBody.h index 4f3ef861..d236a774 100644 --- a/lib/qcd/action/fermion/WilsonKernelsAsmBody.h +++ b/lib/qcd/action/fermion/WilsonKernelsAsmBody.h @@ -30,7 +30,11 @@ basep = st.GetPFInfo(nent,plocal); nent++; if ( local ) { LOAD64(%r10,isigns); +#ifdef KERNEL_DAG + XP_PROJMEM(base); +#else XM_PROJMEM(base); +#endif MAYBEPERM(PERMUTE_DIR3,perm); } else { LOAD_CHI(base); @@ -41,15 +45,22 @@ MULT_2SPIN_DIR_PFXP(Xp,basep); } LOAD64(%r10,isigns); +#ifdef KERNEL_DAG + XP_RECON; +#else XM_RECON; - +#endif //////////////////////////////// // Yp //////////////////////////////// basep = st.GetPFInfo(nent,plocal); nent++; if ( local ) { LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit +#ifdef KERNEL_DAG + YP_PROJMEM(base); +#else YM_PROJMEM(base); +#endif MAYBEPERM(PERMUTE_DIR2,perm); } else { LOAD_CHI(base); @@ -60,7 +71,11 @@ MULT_2SPIN_DIR_PFYP(Yp,basep); } LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit +#ifdef KERNEL_DAG + YP_RECON_ACCUM; +#else YM_RECON_ACCUM; +#endif //////////////////////////////// // Zp @@ -68,7 +83,11 @@ basep = st.GetPFInfo(nent,plocal); nent++; if ( local ) { LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit +#ifdef KERNEL_DAG + ZP_PROJMEM(base); +#else ZM_PROJMEM(base); +#endif MAYBEPERM(PERMUTE_DIR1,perm); } else { LOAD_CHI(base); @@ -79,7 +98,11 @@ MULT_2SPIN_DIR_PFZP(Zp,basep); } LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit +#ifdef KERNEL_DAG + ZP_RECON_ACCUM; +#else ZM_RECON_ACCUM; +#endif //////////////////////////////// // Tp @@ -87,7 +110,11 @@ basep = st.GetPFInfo(nent,plocal); nent++; if ( local ) { LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit +#ifdef KERNEL_DAG + TP_PROJMEM(base); +#else TM_PROJMEM(base); +#endif MAYBEPERM(PERMUTE_DIR0,perm); } else { LOAD_CHI(base); @@ -98,7 +125,11 @@ MULT_2SPIN_DIR_PFTP(Tp,basep); } LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit +#ifdef KERNEL_DAG + TP_RECON_ACCUM; +#else TM_RECON_ACCUM; +#endif //////////////////////////////// // Xm @@ -107,7 +138,11 @@ // basep= st.GetPFInfo(nent,plocal); nent++; if ( local ) { LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit +#ifdef KERNEL_DAG + XM_PROJMEM(base); +#else XP_PROJMEM(base); +#endif MAYBEPERM(PERMUTE_DIR3,perm); } else { LOAD_CHI(base); @@ -118,7 +153,11 @@ MULT_2SPIN_DIR_PFXM(Xm,basep); } LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit +#ifdef KERNEL_DAG + XM_RECON_ACCUM; +#else XP_RECON_ACCUM; +#endif //////////////////////////////// // Ym @@ -126,7 +165,11 @@ basep= st.GetPFInfo(nent,plocal); nent++; if ( local ) { LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit +#ifdef KERNEL_DAG + YM_PROJMEM(base); +#else YP_PROJMEM(base); +#endif MAYBEPERM(PERMUTE_DIR2,perm); } else { LOAD_CHI(base); @@ -137,7 +180,11 @@ MULT_2SPIN_DIR_PFYM(Ym,basep); } LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit +#ifdef KERNEL_DAG + YM_RECON_ACCUM; +#else YP_RECON_ACCUM; +#endif //////////////////////////////// // Zm @@ -145,7 +192,11 @@ basep= st.GetPFInfo(nent,plocal); nent++; if ( local ) { LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit +#ifdef KERNEL_DAG + ZM_PROJMEM(base); +#else ZP_PROJMEM(base); +#endif MAYBEPERM(PERMUTE_DIR1,perm); } else { LOAD_CHI(base); @@ -156,7 +207,11 @@ MULT_2SPIN_DIR_PFZM(Zm,basep); } LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit +#ifdef KERNEL_DAG + ZM_RECON_ACCUM; +#else ZP_RECON_ACCUM; +#endif //////////////////////////////// // Tm @@ -164,7 +219,11 @@ basep= st.GetPFInfo(nent,plocal); nent++; if ( local ) { LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit +#ifdef KERNEL_DAG + TM_PROJMEM(base); +#else TP_PROJMEM(base); +#endif MAYBEPERM(PERMUTE_DIR0,perm); } else { LOAD_CHI(base); @@ -175,7 +234,11 @@ MULT_2SPIN_DIR_PFTM(Tm,basep); } LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit +#ifdef KERNEL_DAG + TM_RECON_ACCUM; +#else TP_RECON_ACCUM; +#endif basep= st.GetPFInfo(nent,plocal); nent++; SAVE_RESULT(base,basep); diff --git a/lib/qcd/action/fermion/WilsonKernelsHand.cc b/lib/qcd/action/fermion/WilsonKernelsHand.cc index d073e677..15c8ab56 100644 --- a/lib/qcd/action/fermion/WilsonKernelsHand.cc +++ b/lib/qcd/action/fermion/WilsonKernelsHand.cc @@ -311,8 +311,8 @@ namespace Grid { namespace QCD { -template -void WilsonKernels::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U, + template + void WilsonKernels::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U, std::vector > &buf, int ss,int sU,const FermionField &in, FermionField &out) { @@ -554,8 +554,8 @@ void WilsonKernels::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &l } } -template -void WilsonKernels::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U, + template + void WilsonKernels::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U, std::vector > &buf, int ss,int sU,const FermionField &in, FermionField &out) { @@ -839,46 +839,23 @@ void WilsonKernels::DiracOptHandDhopSiteDag(StencilImpl &st, ////////////// Wilson ; uses this implementation ///////////////////// // Need Nc=3 though // -template void WilsonKernels::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U, - std::vector > &buf, - int ss,int sU,const FermionField &in, FermionField &out); -template void WilsonKernels::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U, - std::vector > &buf, - int ss,int sU,const FermionField &in, FermionField &out); -template void WilsonKernels::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U, - std::vector > &buf, - int ss,int sU,const FermionField &in, FermionField &out); -template void WilsonKernels::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U, - std::vector > &buf, +#define INSTANTIATE_THEM(A) \ +template void WilsonKernels::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,\ + std::vector > &buf,\ + int ss,int sU,const FermionField &in, FermionField &out);\ +template void WilsonKernels::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,\ + std::vector > &buf,\ int ss,int sU,const FermionField &in, FermionField &out); - -template void WilsonKernels::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U, - std::vector > &buf, - int ss,int sU,const FermionField &in, FermionField &out); -template void WilsonKernels::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U, - std::vector > &buf, - int ss,int sU,const FermionField &in, FermionField &out); -template void WilsonKernels::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U, - std::vector > &buf, - int ss,int sU,const FermionField &in, FermionField &out); -template void WilsonKernels::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U, - std::vector > &buf, - int ss,int sU,const FermionField &in, FermionField &out); - - -template void WilsonKernels::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U, - std::vector > &buf, - int ss,int sU,const FermionField &in, FermionField &out); -template void WilsonKernels::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U, - std::vector > &buf, - int ss,int sU,const FermionField &in, FermionField &out); -template void WilsonKernels::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U, - std::vector > &buf, - int ss,int sU,const FermionField &in, FermionField &out); -template void WilsonKernels::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U, - std::vector > &buf, - int ss,int sU,const FermionField &in, FermionField &out); - +INSTANTIATE_THEM(WilsonImplF); +INSTANTIATE_THEM(WilsonImplD); +INSTANTIATE_THEM(ZWilsonImplF); +INSTANTIATE_THEM(ZWilsonImplD); +INSTANTIATE_THEM(GparityWilsonImplF); +INSTANTIATE_THEM(GparityWilsonImplD); +INSTANTIATE_THEM(DomainWallVec5dImplF); +INSTANTIATE_THEM(DomainWallVec5dImplD); +INSTANTIATE_THEM(ZDomainWallVec5dImplF); +INSTANTIATE_THEM(ZDomainWallVec5dImplD); }} diff --git a/lib/qcd/action/fermion/ZMobiusFermion.h b/lib/qcd/action/fermion/ZMobiusFermion.h new file mode 100644 index 00000000..d0e00657 --- /dev/null +++ b/lib/qcd/action/fermion/ZMobiusFermion.h @@ -0,0 +1,79 @@ + /************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/qcd/action/fermion/MobiusFermion.h + + Copyright (C) 2015 + +Author: Peter Boyle +Author: Peter Boyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +#ifndef GRID_QCD_ZMOBIUS_FERMION_H +#define GRID_QCD_ZMOBIUS_FERMION_H + +#include + +namespace Grid { + + namespace QCD { + + template + class ZMobiusFermion : public CayleyFermion5D + { + public: + INHERIT_IMPL_TYPES(Impl); + public: + + virtual void Instantiatable(void) {}; + // Constructors + ZMobiusFermion(GaugeField &_Umu, + GridCartesian &FiveDimGrid, + GridRedBlackCartesian &FiveDimRedBlackGrid, + GridCartesian &FourDimGrid, + GridRedBlackCartesian &FourDimRedBlackGrid, + RealD _mass,RealD _M5, + std::vector &gamma, RealD b,RealD c,const ImplParams &p= ImplParams()) : + + CayleyFermion5D(_Umu, + FiveDimGrid, + FiveDimRedBlackGrid, + FourDimGrid, + FourDimRedBlackGrid,_mass,_M5,p) + + { + RealD eps = 1.0; + + std::cout< zgamma(this->Ls); + for(int s=0;sLs;s++){ + zgamma[s] = gamma[s]; + } + + // Call base setter + this->SetCoefficientsInternal(1.0,zgamma,b,c); + } + + }; + + } +} + +#endif diff --git a/lib/qcd/action/gauge/GaugeImpl.h b/lib/qcd/action/gauge/GaugeImpl.h index 691d25f1..400381bb 100644 --- a/lib/qcd/action/gauge/GaugeImpl.h +++ b/lib/qcd/action/gauge/GaugeImpl.h @@ -30,7 +30,6 @@ directory #define GRID_QCD_GAUGE_IMPL_H namespace Grid { - namespace QCD { //////////////////////////////////////////////////////////////////////// @@ -52,7 +51,7 @@ public: typedef S Simd; template - using iImplGaugeLink = iScalar>>; + using iImplGaugeLink = iScalar>>; template using iImplGaugeField = iVector>, Nd>; @@ -64,7 +63,7 @@ public: // ugly typedef Lattice GaugeField; - // Move this elsewhere? + // Move this elsewhere? FIXME static inline void AddGaugeLink(GaugeField &U, GaugeLinkField &W, int mu) { // U[mu] += W PARALLEL_FOR_LOOP @@ -174,12 +173,19 @@ typedef GaugeImplTypes GimplTypesR; typedef GaugeImplTypes GimplTypesF; typedef GaugeImplTypes GimplTypesD; +typedef GaugeImplTypes::AdjointDimension> GimplAdjointTypesR; +typedef GaugeImplTypes::AdjointDimension> GimplAdjointTypesF; +typedef GaugeImplTypes::AdjointDimension> GimplAdjointTypesD; + typedef PeriodicGaugeImpl PeriodicGimplR; // Real.. whichever prec typedef PeriodicGaugeImpl PeriodicGimplF; // Float typedef PeriodicGaugeImpl PeriodicGimplD; // Double -typedef ConjugateGaugeImpl - ConjugateGimplR; // Real.. whichever prec +typedef PeriodicGaugeImpl PeriodicGimplAdjR; // Real.. whichever prec +typedef PeriodicGaugeImpl PeriodicGimplAdjF; // Float +typedef PeriodicGaugeImpl PeriodicGimplAdjD; // Double + +typedef ConjugateGaugeImpl ConjugateGimplR; // Real.. whichever prec typedef ConjugateGaugeImpl ConjugateGimplF; // Float typedef ConjugateGaugeImpl ConjugateGimplD; // Double } diff --git a/lib/qcd/action/pseudofermion/TwoFlavour.h b/lib/qcd/action/pseudofermion/TwoFlavour.h index 21d23853..6b65a95d 100644 --- a/lib/qcd/action/pseudofermion/TwoFlavour.h +++ b/lib/qcd/action/pseudofermion/TwoFlavour.h @@ -1,149 +1,151 @@ - /************************************************************************************* +/************************************************************************************* - Grid physics library, www.github.com/paboyle/Grid +Grid physics library, www.github.com/paboyle/Grid - Source file: ./lib/qcd/action/pseudofermion/TwoFlavour.h +Source file: ./lib/qcd/action/pseudofermion/TwoFlavour.h - Copyright (C) 2015 +Copyright (C) 2015 Author: Peter Boyle Author: Peter Boyle - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ #ifndef QCD_PSEUDOFERMION_TWO_FLAVOUR_H #define QCD_PSEUDOFERMION_TWO_FLAVOUR_H -namespace Grid{ - namespace QCD{ +namespace Grid { +namespace QCD { - //////////////////////////////////////////////////////////////////////// - // Two flavour pseudofermion action for any dop - //////////////////////////////////////////////////////////////////////// - template - class TwoFlavourPseudoFermionAction : public Action { - public: - INHERIT_IMPL_TYPES(Impl); +//////////////////////////////////////////////////////////////////////// +// Two flavour pseudofermion action for any dop +//////////////////////////////////////////////////////////////////////// +template +class TwoFlavourPseudoFermionAction : public Action { + public: + INHERIT_IMPL_TYPES(Impl); - private: - - FermionOperator & FermOp;// the basic operator + private: + FermionOperator &FermOp; // the basic operator - OperatorFunction &DerivativeSolver; + OperatorFunction &DerivativeSolver; - OperatorFunction &ActionSolver; + OperatorFunction &ActionSolver; - FermionField Phi; // the pseudo fermion field for this trajectory + FermionField Phi; // the pseudo fermion field for this trajectory - public: - ///////////////////////////////////////////////// - // Pass in required objects. - ///////////////////////////////////////////////// - TwoFlavourPseudoFermionAction(FermionOperator &Op, - OperatorFunction & DS, - OperatorFunction & AS - ) : FermOp(Op), DerivativeSolver(DS), ActionSolver(AS), Phi(Op.FermionGrid()) { - }; - - ////////////////////////////////////////////////////////////////////////////////////// - // Push the gauge field in to the dops. Assume any BC's and smearing already applied - ////////////////////////////////////////////////////////////////////////////////////// - virtual void refresh(const GaugeField &U, GridParallelRNG& pRNG) { + public: + ///////////////////////////////////////////////// + // Pass in required objects. + ///////////////////////////////////////////////// + TwoFlavourPseudoFermionAction(FermionOperator &Op, + OperatorFunction &DS, + OperatorFunction &AS) + : FermOp(Op), + DerivativeSolver(DS), + ActionSolver(AS), + Phi(Op.FermionGrid()){}; - // P(phi) = e^{- phi^dag (MdagM)^-1 phi} - // Phi = Mdag eta - // P(eta) = e^{- eta^dag eta} - // - // e^{x^2/2 sig^2} => sig^2 = 0.5. - // - // So eta should be of width sig = 1/sqrt(2). - // and must multiply by 0.707.... - // - // Chroma has this scale factor: two_flavor_monomial_w.h - // IroIro: does not use this scale. It is absorbed by a change of vars - // in the Phi integral, and thus is only an irrelevant prefactor for the partition function. - // - RealD scale = std::sqrt(0.5); - FermionField eta(FermOp.FermionGrid()); + ////////////////////////////////////////////////////////////////////////////////////// + // Push the gauge field in to the dops. Assume any BC's and smearing already + // applied + ////////////////////////////////////////////////////////////////////////////////////// + virtual void refresh(const GaugeField &U, GridParallelRNG &pRNG) { + // P(phi) = e^{- phi^dag (MdagM)^-1 phi} + // Phi = Mdag eta + // P(eta) = e^{- eta^dag eta} + // + // e^{x^2/2 sig^2} => sig^2 = 0.5. + // + // So eta should be of width sig = 1/sqrt(2). + // and must multiply by 0.707.... + // + // Chroma has this scale factor: two_flavor_monomial_w.h + // IroIro: does not use this scale. It is absorbed by a change of vars + // in the Phi integral, and thus is only an irrelevant prefactor for + // the partition function. + // + RealD scale = std::sqrt(0.5); + FermionField eta(FermOp.FermionGrid()); - gaussian(pRNG,eta); + gaussian(pRNG, eta); - FermOp.ImportGauge(U); - FermOp.Mdag(eta,Phi); + FermOp.ImportGauge(U); + FermOp.Mdag(eta, Phi); - Phi=Phi*scale; - - }; + Phi = Phi * scale; + }; - ////////////////////////////////////////////////////// - // S = phi^dag (Mdag M)^-1 phi - ////////////////////////////////////////////////////// - virtual RealD S(const GaugeField &U) { + ////////////////////////////////////////////////////// + // S = phi^dag (Mdag M)^-1 phi + ////////////////////////////////////////////////////// + virtual RealD S(const GaugeField &U) { + FermOp.ImportGauge(U); - FermOp.ImportGauge(U); + FermionField X(FermOp.FermionGrid()); + FermionField Y(FermOp.FermionGrid()); - FermionField X(FermOp.FermionGrid()); - FermionField Y(FermOp.FermionGrid()); - - MdagMLinearOperator ,FermionField> MdagMOp(FermOp); - X=zero; - ActionSolver(MdagMOp,Phi,X); - MdagMOp.Op(X,Y); + MdagMLinearOperator, FermionField> MdagMOp(FermOp); + X = zero; + ActionSolver(MdagMOp, Phi, X); + MdagMOp.Op(X, Y); - RealD action = norm2(Y); - std::cout << GridLogMessage << "Pseudofermion action "<, FermionField> MdagMOp(FermOp); - MdagMLinearOperator ,FermionField> MdagMOp(FermOp); + X = zero; + DerivativeSolver(MdagMOp, Phi, X); // X = (MdagM)^-1 phi + MdagMOp.Op(X, Y); // Y = M X = (Mdag)^-1 phi - X=zero; - DerivativeSolver(MdagMOp,Phi,X); - MdagMOp.Op(X,Y); + // Our conventions really make this UdSdU; We do not differentiate wrt Udag + // here. + // So must take dSdU - adj(dSdU) and left multiply by mom to get dS/dt. - // Our conventions really make this UdSdU; We do not differentiate wrt Udag here. - // So must take dSdU - adj(dSdU) and left multiply by mom to get dS/dt. + FermOp.MDeriv(tmp, Y, X, DaggerNo); + dSdU = tmp; + FermOp.MDeriv(tmp, X, Y, DaggerYes); + dSdU = dSdU + tmp; - FermOp.MDeriv(tmp , Y, X,DaggerNo ); dSdU=tmp; - FermOp.MDeriv(tmp , X, Y,DaggerYes); dSdU=dSdU+tmp; - - //dSdU = Ta(dSdU); - - }; - - }; - - } + // not taking here the traceless antihermitian component + }; +}; +} } #endif diff --git a/lib/qcd/action/pseudofermion/TwoFlavourEvenOdd.h b/lib/qcd/action/pseudofermion/TwoFlavourEvenOdd.h index 16b2d814..5af1761e 100644 --- a/lib/qcd/action/pseudofermion/TwoFlavourEvenOdd.h +++ b/lib/qcd/action/pseudofermion/TwoFlavourEvenOdd.h @@ -1,70 +1,66 @@ - /************************************************************************************* +/************************************************************************************* - Grid physics library, www.github.com/paboyle/Grid +Grid physics library, www.github.com/paboyle/Grid - Source file: ./lib/qcd/action/pseudofermion/TwoFlavourEvenOdd.h +Source file: ./lib/qcd/action/pseudofermion/TwoFlavourEvenOdd.h - Copyright (C) 2015 +Copyright (C) 2015 Author: Peter Boyle Author: Peter Boyle - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ #ifndef QCD_PSEUDOFERMION_TWO_FLAVOUR_EVEN_ODD_H #define QCD_PSEUDOFERMION_TWO_FLAVOUR_EVEN_ODD_H -namespace Grid{ - namespace QCD{ +namespace Grid { +namespace QCD { +//////////////////////////////////////////////////////////////////////// +// Two flavour pseudofermion action for any EO prec dop +//////////////////////////////////////////////////////////////////////// +template +class TwoFlavourEvenOddPseudoFermionAction + : public Action { + public: + INHERIT_IMPL_TYPES(Impl); + private: + FermionOperator &FermOp; // the basic operator - //////////////////////////////////////////////////////////////////////// - // Two flavour pseudofermion action for any EO prec dop - //////////////////////////////////////////////////////////////////////// - template - class TwoFlavourEvenOddPseudoFermionAction : public Action { + OperatorFunction &DerivativeSolver; + OperatorFunction &ActionSolver; - public: + FermionField PhiOdd; // the pseudo fermion field for this trajectory + FermionField PhiEven; // the pseudo fermion field for this trajectory - INHERIT_IMPL_TYPES(Impl); - - private: - - FermionOperator & FermOp;// the basic operator - - OperatorFunction &DerivativeSolver; - OperatorFunction &ActionSolver; - - FermionField PhiOdd; // the pseudo fermion field for this trajectory - FermionField PhiEven; // the pseudo fermion field for this trajectory - - public: - ///////////////////////////////////////////////// - // Pass in required objects. - ///////////////////////////////////////////////// - TwoFlavourEvenOddPseudoFermionAction(FermionOperator &Op, - OperatorFunction & DS, - OperatorFunction & AS - ) : - FermOp(Op), - DerivativeSolver(DS), - ActionSolver(AS), + public: + ///////////////////////////////////////////////// + // Pass in required objects. + ///////////////////////////////////////////////// + TwoFlavourEvenOddPseudoFermionAction(FermionOperator &Op, + OperatorFunction &DS, + OperatorFunction &AS) + : FermOp(Op), + DerivativeSolver(DS), + ActionSolver(AS), PhiEven(Op.FermionRedBlackGrid()), PhiOdd(Op.FermionRedBlackGrid()) {}; diff --git a/lib/qcd/action/pseudofermion/TwoFlavourEvenOddRatio.h b/lib/qcd/action/pseudofermion/TwoFlavourEvenOddRatio.h index edb6beaa..5e3b80d9 100644 --- a/lib/qcd/action/pseudofermion/TwoFlavourEvenOddRatio.h +++ b/lib/qcd/action/pseudofermion/TwoFlavourEvenOddRatio.h @@ -131,9 +131,11 @@ namespace Grid{ Vpc.MpcDag(PhiOdd,Y); // Y= Vdag phi X=zero; ActionSolver(Mpc,Y,X); // X= (MdagM)^-1 Vdag phi - Mpc.Mpc(X,Y); // Y= Mdag^-1 Vdag phi + //Mpc.Mpc(X,Y); // Y= Mdag^-1 Vdag phi + // Multiply by Ydag + RealD action = real(innerProduct(Y,X)); - RealD action = norm2(Y); + //RealD action = norm2(Y); // The EE factorised block; normally can replace with zero if det is constant (gauge field indept) // Only really clover term that creates this. Leave the EE portion as a future to do to make most diff --git a/lib/qcd/hmc/HmcRunner.h b/lib/qcd/hmc/HmcRunner.h index 5616582f..a31ba784 100644 --- a/lib/qcd/hmc/HmcRunner.h +++ b/lib/qcd/hmc/HmcRunner.h @@ -1,179 +1,191 @@ - /************************************************************************************* +/************************************************************************************* - Grid physics library, www.github.com/paboyle/Grid +Grid physics library, www.github.com/paboyle/Grid - Source file: ./lib/qcd/hmc/HmcRunner.h +Source file: ./lib/qcd/hmc/HmcRunner.h - Copyright (C) 2015 +Copyright (C) 2015 Author: paboyle - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ #ifndef HMC_RUNNER #define HMC_RUNNER -namespace Grid{ - namespace QCD{ +namespace Grid { +namespace QCD { - -template +template class NerscHmcRunnerTemplate { -public: - + public: INHERIT_GIMPL_TYPES(Gimpl); enum StartType_t { ColdStart, HotStart, TepidStart, CheckpointStart }; - ActionSet TheAction; + ActionSet TheAction; - GridCartesian * UGrid ; - GridCartesian * FGrid ; - GridRedBlackCartesian * UrbGrid ; - GridRedBlackCartesian * FrbGrid ; + GridCartesian *UGrid; + GridCartesian *FGrid; + GridRedBlackCartesian *UrbGrid; + GridRedBlackCartesian *FrbGrid; - virtual void BuildTheAction (int argc, char **argv) = 0; // necessary? - - - void Run (int argc, char **argv){ + virtual void BuildTheAction(int argc, char **argv) = 0; // necessary? + void Run(int argc, char **argv) { StartType_t StartType = HotStart; std::string arg; - if( GridCmdOptionExists(argv,argv+argc,"--StartType") ){ - arg = GridCmdOptionPayload(argv,argv+argc,"--StartType"); - if ( arg == "HotStart" ) { StartType = HotStart; } - else if ( arg == "ColdStart" ) { StartType = ColdStart; } - else if ( arg == "TepidStart" ) { StartType = TepidStart; } - else if ( arg == "CheckpointStart" ) { StartType = CheckpointStart; } - else assert(0); + if (GridCmdOptionExists(argv, argv + argc, "--StartType")) { + arg = GridCmdOptionPayload(argv, argv + argc, "--StartType"); + if (arg == "HotStart") { + StartType = HotStart; + } else if (arg == "ColdStart") { + StartType = ColdStart; + } else if (arg == "TepidStart") { + StartType = TepidStart; + } else if (arg == "CheckpointStart") { + StartType = CheckpointStart; + } else { + std::cout << GridLogError << "Unrecognized option in --StartType\n"; + std::cout << GridLogError << "Valid [HotStart, ColdStart, TepidStart, CheckpointStart]\n"; + assert(0); + } } int StartTraj = 0; - if( GridCmdOptionExists(argv,argv+argc,"--StartTrajectory") ){ - arg= GridCmdOptionPayload(argv,argv+argc,"--StartTrajectory"); + if (GridCmdOptionExists(argv, argv + argc, "--StartTrajectory")) { + arg = GridCmdOptionPayload(argv, argv + argc, "--StartTrajectory"); std::vector ivec(0); - GridCmdOptionIntVector(arg,ivec); + GridCmdOptionIntVector(arg, ivec); StartTraj = ivec[0]; - } + } int NumTraj = 1; - if( GridCmdOptionExists(argv,argv+argc,"--Trajectories") ){ - arg= GridCmdOptionPayload(argv,argv+argc,"--Trajectories"); + if (GridCmdOptionExists(argv, argv + argc, "--Trajectories")) { + arg = GridCmdOptionPayload(argv, argv + argc, "--Trajectories"); std::vector ivec(0); - GridCmdOptionIntVector(arg,ivec); + GridCmdOptionIntVector(arg, ivec); NumTraj = ivec[0]; } int NumThermalizations = 10; - if( GridCmdOptionExists(argv,argv+argc,"--Thermalizations") ){ - arg= GridCmdOptionPayload(argv,argv+argc,"--Thermalizations"); + if (GridCmdOptionExists(argv, argv + argc, "--Thermalizations")) { + arg = GridCmdOptionPayload(argv, argv + argc, "--Thermalizations"); std::vector ivec(0); - GridCmdOptionIntVector(arg,ivec); + GridCmdOptionIntVector(arg, ivec); NumThermalizations = ivec[0]; } + GridSerialRNG sRNG; + GridParallelRNG pRNG(UGrid); + LatticeGaugeField U(UGrid); // change this to an extended field (smearing class)? - GridSerialRNG sRNG; - GridParallelRNG pRNG(UGrid); - LatticeGaugeField U(UGrid); // change this to an extended field (smearing class) + std::vector SerSeed({1, 2, 3, 4, 5}); + std::vector ParSeed({6, 7, 8, 9, 10}); - std::vector SerSeed({1,2,3,4,5}); - std::vector ParSeed({6,7,8,9,10}); - - // Create integrator, including the smearing policy - // Smearing policy + // Smearing policy, only defined for Nc=3 + /* std::cout << GridLogDebug << " Creating the Stout class\n"; - double rho = 0.1; // smearing parameter, now hardcoded - int Nsmear = 1; // number of smearing levels + double rho = 0.1; // smearing parameter, now hardcoded + int Nsmear = 1; // number of smearing levels Smear_Stout Stout(rho); std::cout << GridLogDebug << " Creating the SmearedConfiguration class\n"; - SmearedConfiguration SmearingPolicy(UGrid, Nsmear, Stout); + //SmearedConfiguration SmearingPolicy(UGrid, Nsmear, Stout); std::cout << GridLogDebug << " done\n"; + */ ////////////// - typedef MinimumNorm2 > IntegratorType;// change here to change the algorithm - IntegratorParameters MDpar(20); + NoSmearing SmearingPolicy; + typedef MinimumNorm2, RepresentationsPolicy > + IntegratorType; // change here to change the algorithm + IntegratorParameters MDpar(20, 1.0); IntegratorType MDynamics(UGrid, MDpar, TheAction, SmearingPolicy); - // Checkpoint strategy - NerscHmcCheckpointer Checkpoint(std::string("ckpoint_lat"),std::string("ckpoint_rng"),1); - PlaquetteLogger PlaqLog(std::string("plaq")); + NerscHmcCheckpointer Checkpoint(std::string("ckpoint_lat"), + std::string("ckpoint_rng"), 1); + PlaquetteLogger PlaqLog(std::string("plaq")); HMCparameters HMCpar; - HMCpar.StartTrajectory = StartTraj; - HMCpar.Trajectories = NumTraj; + HMCpar.StartTrajectory = StartTraj; + HMCpar.Trajectories = NumTraj; HMCpar.NoMetropolisUntil = NumThermalizations; - - if ( StartType == HotStart ) { + if (StartType == HotStart) { // Hot start HMCpar.MetropolisTest = true; sRNG.SeedFixedIntegers(SerSeed); pRNG.SeedFixedIntegers(ParSeed); - SU3::HotConfiguration(pRNG, U); - } else if ( StartType == ColdStart ) { + SU::HotConfiguration(pRNG, U); + } else if (StartType == ColdStart) { // Cold start HMCpar.MetropolisTest = true; sRNG.SeedFixedIntegers(SerSeed); pRNG.SeedFixedIntegers(ParSeed); - SU3::ColdConfiguration(pRNG, U); - } else if ( StartType == TepidStart ) { + SU::ColdConfiguration(pRNG, U); + } else if (StartType == TepidStart) { // Tepid start HMCpar.MetropolisTest = true; sRNG.SeedFixedIntegers(SerSeed); pRNG.SeedFixedIntegers(ParSeed); - SU3::TepidConfiguration(pRNG, U); - } else if ( StartType == CheckpointStart ) { + SU::TepidConfiguration(pRNG, U); + } else if (StartType == CheckpointStart) { HMCpar.MetropolisTest = true; // CheckpointRestart Checkpoint.CheckpointRestore(StartTraj, U, sRNG, pRNG); } - // Attach the gauge field to the smearing Policy and create the fill the smeared set + // Attach the gauge field to the smearing Policy and create the fill the + // smeared set // notice that the unit configuration is singular in this procedure - std::cout << GridLogMessage << "Filling the smeared set\n"; + std::cout << GridLogMessage << "Filling the smeared set\n"; SmearingPolicy.set_GaugeField(U); - - HybridMonteCarlo HMC(HMCpar, MDynamics,sRNG,pRNG,U); + + HybridMonteCarlo HMC(HMCpar, MDynamics, sRNG, + pRNG, U); HMC.AddObservable(&Checkpoint); HMC.AddObservable(&PlaqLog); - + // Run it HMC.evolve(); - } - }; - typedef NerscHmcRunnerTemplate NerscHmcRunner; - typedef NerscHmcRunnerTemplate NerscHmcRunnerF; - typedef NerscHmcRunnerTemplate NerscHmcRunnerD; +typedef NerscHmcRunnerTemplate NerscHmcRunner; +typedef NerscHmcRunnerTemplate NerscHmcRunnerF; +typedef NerscHmcRunnerTemplate NerscHmcRunnerD; - typedef NerscHmcRunnerTemplate PeriodicNerscHmcRunner; - typedef NerscHmcRunnerTemplate PeriodicNerscHmcRunnerF; - typedef NerscHmcRunnerTemplate PeriodicNerscHmcRunnerD; +typedef NerscHmcRunnerTemplate PeriodicNerscHmcRunner; +typedef NerscHmcRunnerTemplate PeriodicNerscHmcRunnerF; +typedef NerscHmcRunnerTemplate PeriodicNerscHmcRunnerD; - typedef NerscHmcRunnerTemplate ConjugateNerscHmcRunner; - typedef NerscHmcRunnerTemplate ConjugateNerscHmcRunnerF; - typedef NerscHmcRunnerTemplate ConjugateNerscHmcRunnerD; +typedef NerscHmcRunnerTemplate ConjugateNerscHmcRunner; +typedef NerscHmcRunnerTemplate ConjugateNerscHmcRunnerF; +typedef NerscHmcRunnerTemplate ConjugateNerscHmcRunnerD; -}} +template +using NerscHmcRunnerHirep = NerscHmcRunnerTemplate; + + + +} +} #endif diff --git a/lib/qcd/hmc/integrators/Integrator.h b/lib/qcd/hmc/integrators/Integrator.h index db61b114..f89b7959 100644 --- a/lib/qcd/hmc/integrators/Integrator.h +++ b/lib/qcd/hmc/integrators/Integrator.h @@ -1,33 +1,34 @@ - /************************************************************************************* +/************************************************************************************* - Grid physics library, www.github.com/paboyle/Grid +Grid physics library, www.github.com/paboyle/Grid - Source file: ./lib/qcd/hmc/integrators/Integrator.h +Source file: ./lib/qcd/hmc/integrators/Integrator.h - Copyright (C) 2015 +Copyright (C) 2015 Author: Azusa Yamaguchi Author: Peter Boyle Author: neo Author: paboyle - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ +See the full license in the file "LICENSE" in the top level distribution +directory +*************************************************************************************/ +/* END LEGAL */ //-------------------------------------------------------------------- /*! @file Integrator.h * @brief Classes for the Molecular Dynamics integrator @@ -40,208 +41,278 @@ Author: paboyle #ifndef INTEGRATOR_INCLUDED #define INTEGRATOR_INCLUDED -//class Observer; +// class Observer; #include - namespace Grid{ - namespace QCD{ +namespace Grid { +namespace QCD { - struct IntegratorParameters{ +struct IntegratorParameters { + int Nexp; + int MDsteps; // number of outer steps + RealD trajL; // trajectory length + RealD stepsize; - int Nexp; - int MDsteps; // number of outer steps - RealD trajL; // trajectory length - RealD stepsize; - - IntegratorParameters(int MDsteps_, - RealD trajL_=1.0, - int Nexp_=12): - Nexp(Nexp_), - MDsteps(MDsteps_), - trajL(trajL_), - stepsize(trajL/MDsteps) - { - // empty body constructor - }; - - }; - - /*! @brief Class for Molecular Dynamics management */ - template - class Integrator { - - protected: - - typedef IntegratorParameters ParameterType; - - IntegratorParameters Params; - - const ActionSet as; - - int levels; // - double t_U; // Track time passing on each level and for U and for P - std::vector t_P; // - - GaugeField P; - - SmearingPolicy &Smearer; - - // Should match any legal (SU(n)) gauge field - // Need to use this template to match Ncol to pass to SU class - template void generate_momenta(Lattice< iVector< iScalar< iMatrix >, Nd> > & P,GridParallelRNG& pRNG){ - typedef Lattice< iScalar< iScalar< iMatrix > > > GaugeLinkField; - GaugeLinkField Pmu(P._grid); - Pmu = zero; - for(int mu=0;mu::GaussianLieAlgebraMatrix(pRNG, Pmu); - PokeIndex(P, Pmu, mu); - } - } - - - //ObserverList observers; // not yet - // typedef std::vector ObserverList; - // void register_observers(); - // void notify_observers(); - - void update_P(GaugeField&U, int level, double ep){ - t_P[level]+=ep; - update_P(P,U,level,ep); - - std::cout<is_smeared); - as[level].actions.at(a)->deriv(Us,force); // deriv should NOT include Ta - - std::cout<< GridLogIntegrator << "Smearing (on/off): "<is_smeared <is_smeared) Smearer.smeared_force(force); - force = Ta(force); - std::cout<< GridLogIntegrator << "Force average: "<< norm2(force)/(U._grid->gSites()) <(U, mu); - auto Pmu=PeekIndex(Mom, mu); - Umu = expMat(Pmu, ep, Params.Nexp)*Umu; - ProjectOnGroup(Umu); - PokeIndex(U, Umu, mu); - } - // Update the smeared fields, can be implemented as observer - Smearer.set_GaugeField(U); - } - - virtual void step (GaugeField& U,int level, int first,int last)=0; - -public: - - Integrator(GridBase* grid, - IntegratorParameters Par, - ActionSet & Aset, - SmearingPolicy &Sm): - Params(Par), - as(Aset), - P(grid), - levels(Aset.size()), - Smearer(Sm) - { - t_P.resize(levels,0.0); - t_U=0.0; - // initialization of smearer delegated outside of Integrator - }; - - virtual ~Integrator(){} - - //Initialization of momenta and actions - void refresh(GaugeField& U,GridParallelRNG &pRNG){ - std::cout<is_smeared); - as[level].actions.at(actionID)->refresh(Us, pRNG); - } - } - } - - // Calculate action - RealD S(GaugeField& U){// here also U not used - - LatticeComplex Hloc(U._grid); Hloc = zero; - // Momenta - for (int mu=0; mu (P, mu); - Hloc -= trace(Pmu*Pmu); - } - Complex Hsum = sum(Hloc); - - RealD H = Hsum.real(); - RealD Hterm; - std::cout<is_smeared); - Hterm = as[level].actions.at(actionID)->S(Us); - std::cout< +class Integrator { + protected: + typedef IntegratorParameters ParameterType; + + IntegratorParameters Params; + + const ActionSet as; + + int levels; // + double t_U; // Track time passing on each level and for U and for P + std::vector t_P; // + + GaugeField P; + + SmearingPolicy& Smearer; + + RepresentationPolicy Representations; + + // Should match any legal (SU(n)) gauge field + // Need to use this template to match Ncol to pass to SU class + template + void generate_momenta(Lattice >, Nd> >& P, + GridParallelRNG& pRNG) { + typedef Lattice > > > GaugeLinkField; + GaugeLinkField Pmu(P._grid); + Pmu = zero; + for (int mu = 0; mu < Nd; mu++) { + SU::GaussianFundamentalLieAlgebraMatrix(pRNG, Pmu); + PokeIndex(P, Pmu, mu); + } + } + + // ObserverList observers; // not yet + // typedef std::vector ObserverList; + // void register_observers(); + // void notify_observers(); + + void update_P(GaugeField& U, int level, double ep) { + t_P[level] += ep; + update_P(P, U, level, ep); + + std::cout << GridLogIntegrator << "[" << level << "] P " + << " dt " << ep << " : t_P " << t_P[level] << std::endl; + } + + // to be used by the actionlevel class to iterate + // over the representations + struct _updateP { + template + void operator()(std::vector*> repr_set, Repr& Rep, + GF& Mom, GF& U, double ep) { + for (int a = 0; a < repr_set.size(); ++a) { + FieldType forceR(U._grid); + // Implement smearing only for the fundamental representation now + repr_set.at(a)->deriv(Rep.U, forceR); + GF force = + Rep.RtoFundamentalProject(forceR); // Ta for the fundamental rep + std::cout << GridLogIntegrator << "Hirep Force average: " + << norm2(force) / (U._grid->gSites()) << std::endl; + Mom -= force * ep ; + } + } + } update_P_hireps{}; + + void update_P(GaugeField& Mom, GaugeField& U, int level, double ep) { + // input U actually not used in the fundamental case + // Fundamental updates, include smearing + for (int a = 0; a < as[level].actions.size(); ++a) { + GaugeField force(U._grid); + GaugeField& Us = Smearer.get_U(as[level].actions.at(a)->is_smeared); + as[level].actions.at(a)->deriv(Us, force); // deriv should NOT include Ta + + std::cout << GridLogIntegrator + << "Smearing (on/off): " << as[level].actions.at(a)->is_smeared + << std::endl; + if (as[level].actions.at(a)->is_smeared) Smearer.smeared_force(force); + force = Ta(force); + std::cout << GridLogIntegrator + << "Force average: " << norm2(force) / (U._grid->gSites()) + << std::endl; + Mom -= force * ep; + } + + // Force from the other representations + as[level].apply(update_P_hireps, Representations, Mom, U, ep); + } + + void update_U(GaugeField& U, double ep) { + update_U(P, U, ep); + + t_U += ep; + int fl = levels - 1; + std::cout << GridLogIntegrator << " " + << "[" << fl << "] U " + << " dt " << ep << " : t_U " << t_U << std::endl; + } + void update_U(GaugeField& Mom, GaugeField& U, double ep) { + // rewrite exponential to deal internally with the lorentz index? + for (int mu = 0; mu < Nd; mu++) { + auto Umu = PeekIndex(U, mu); + auto Pmu = PeekIndex(Mom, mu); + Umu = expMat(Pmu, ep, Params.Nexp) * Umu; + PokeIndex(U, ProjectOnGroup(Umu), mu); + } + + // Update the smeared fields, can be implemented as observer + Smearer.set_GaugeField(U); + // Update the higher representations fields + Representations.update(U); // void functions if fundamental representation + } + + virtual void step(GaugeField& U, int level, int first, int last) = 0; + + public: + Integrator(GridBase* grid, IntegratorParameters Par, + ActionSet& Aset, + SmearingPolicy& Sm) + : Params(Par), + as(Aset), + P(grid), + levels(Aset.size()), + Smearer(Sm), + Representations(grid) { + t_P.resize(levels, 0.0); + t_U = 0.0; + // initialization of smearer delegated outside of Integrator + }; + + virtual ~Integrator() {} + + // to be used by the actionlevel class to iterate + // over the representations + struct _refresh { + template + void operator()(std::vector*> repr_set, Repr& Rep, + GridParallelRNG& pRNG) { + for (int a = 0; a < repr_set.size(); ++a){ + repr_set.at(a)->refresh(Rep.U, pRNG); + + std::cout << GridLogDebug << "Hirep refreshing pseudofermions" << std::endl; + } + } + } refresh_hireps{}; + + // Initialization of momenta and actions + void refresh(GaugeField& U, GridParallelRNG& pRNG) { + std::cout << GridLogIntegrator << "Integrator refresh\n"; + generate_momenta(P, pRNG); + + // Update the smeared fields, can be implemented as observer + // necessary to keep the fields updated even after a reject + // of the Metropolis + Smearer.set_GaugeField(U); + // Set the (eventual) representations gauge fields + Representations.update(U); + + // The Smearer is attached to a pointer of the gauge field + // automatically gets the correct field + // whether or not has been accepted in the previous sweep + for (int level = 0; level < as.size(); ++level) { + for (int actionID = 0; actionID < as[level].actions.size(); ++actionID) { + // get gauge field from the SmearingPolicy and + // based on the boolean is_smeared in actionID + GaugeField& Us = + Smearer.get_U(as[level].actions.at(actionID)->is_smeared); + as[level].actions.at(actionID)->refresh(Us, pRNG); + } + + // Refresh the higher representation actions + as[level].apply(refresh_hireps, Representations, pRNG); + } + } + + // to be used by the actionlevel class to iterate + // over the representations + struct _S { + template + void operator()(std::vector*> repr_set, Repr& Rep, + int level, RealD& H) { + + for (int a = 0; a < repr_set.size(); ++a) { + RealD Hterm = repr_set.at(a)->S(Rep.U); + std::cout << GridLogMessage << "S Level " << level << " term " << a + << " H Hirep = " << Hterm << std::endl; + H += Hterm; + + } + } + } S_hireps{}; + + // Calculate action + RealD S(GaugeField& U) { // here also U not used + + LatticeComplex Hloc(U._grid); + Hloc = zero; + // Momenta + for (int mu = 0; mu < Nd; mu++) { + auto Pmu = PeekIndex(P, mu); + Hloc -= trace(Pmu * Pmu); + } + Complex Hsum = sum(Hloc); + + RealD H = Hsum.real(); + RealD Hterm; + std::cout << GridLogMessage << "Momentum action H_p = " << H << "\n"; + + // Actions + for (int level = 0; level < as.size(); ++level) { + for (int actionID = 0; actionID < as[level].actions.size(); ++actionID) { + // get gauge field from the SmearingPolicy and + // based on the boolean is_smeared in actionID + GaugeField& Us = + Smearer.get_U(as[level].actions.at(actionID)->is_smeared); + Hterm = as[level].actions.at(actionID)->S(Us); + std::cout << GridLogMessage << "S Level " << level << " term " + << actionID << " H = " << Hterm << std::endl; + H += Hterm; + } + as[level].apply(S_hireps, Representations, level, H); + } + + return H; + } + + void integrate(GaugeField& U) { + // reset the clocks + t_U = 0; + for (int level = 0; level < as.size(); ++level) { + t_P[level] = 0; + } + + for (int step = 0; step < Params.MDsteps; ++step) { // MD step + int first_step = (step == 0); + int last_step = (step == Params.MDsteps - 1); + this->step(U, 0, first_step, last_step); + } + + // Check the clocks all match on all levels + for (int level = 0; level < as.size(); ++level) { + assert(fabs(t_U - t_P[level]) < 1.0e-6); // must be the same + std::cout << GridLogIntegrator << " times[" << level + << "]= " << t_P[level] << " " << t_U << std::endl; + } + + // and that we indeed got to the end of the trajectory + assert(fabs(t_U - Params.trajL) < 1.0e-6); + } +}; } } -#endif//INTEGRATOR_INCLUDED +#endif // INTEGRATOR_INCLUDED diff --git a/lib/qcd/hmc/integrators/Integrator_algorithm.h b/lib/qcd/hmc/integrators/Integrator_algorithm.h index 66390742..cd289b08 100644 --- a/lib/qcd/hmc/integrators/Integrator_algorithm.h +++ b/lib/qcd/hmc/integrators/Integrator_algorithm.h @@ -91,17 +91,19 @@ namespace Grid{ * P 1/2 P 1/2 */ - template class LeapFrog : - public Integrator { + template > class LeapFrog : + public Integrator { public: - typedef LeapFrog Algorithm; + typedef LeapFrog Algorithm; LeapFrog(GridBase* grid, IntegratorParameters Par, - ActionSet & Aset, + ActionSet & Aset, SmearingPolicy & Sm): - Integrator(grid,Par,Aset,Sm) {}; + Integrator(grid,Par,Aset,Sm) {}; void step (GaugeField& U, int level,int _first, int _last){ @@ -138,8 +140,10 @@ namespace Grid{ } }; - template class MinimumNorm2 : - public Integrator { + template > class MinimumNorm2 : + public Integrator { private: const RealD lambda = 0.1931833275037836; @@ -147,9 +151,9 @@ namespace Grid{ MinimumNorm2(GridBase* grid, IntegratorParameters Par, - ActionSet & Aset, + ActionSet & Aset, SmearingPolicy& Sm): - Integrator(grid,Par,Aset,Sm) {}; + Integrator(grid,Par,Aset,Sm) {}; void step (GaugeField& U, int level, int _first,int _last){ @@ -197,8 +201,10 @@ namespace Grid{ }; - template class ForceGradient : - public Integrator { + template > class ForceGradient : + public Integrator { private: const RealD lambda = 1.0/6.0;; const RealD chi = 1.0/72.0; @@ -209,9 +215,9 @@ namespace Grid{ // Looks like dH scales as dt^4. tested wilson/wilson 2 level. ForceGradient(GridBase* grid, IntegratorParameters Par, - ActionSet & Aset, + ActionSet & Aset, SmearingPolicy &Sm): - Integrator(grid,Par,Aset, Sm) {}; + Integrator(grid,Par,Aset, Sm) {}; void FG_update_P(GaugeField&U, int level,double fg_dt,double ep){ diff --git a/lib/qcd/representations/adjoint.h b/lib/qcd/representations/adjoint.h new file mode 100644 index 00000000..facc72f1 --- /dev/null +++ b/lib/qcd/representations/adjoint.h @@ -0,0 +1,115 @@ +/* + * Policy classes for the HMC + * Author: Guido Cossu +*/ + +#ifndef ADJOINT_H +#define ADJOINT_H + +namespace Grid { +namespace QCD { + +/* +* This is an helper class for the HMC +* Should contain only the data for the adjoint representation +* and the facility to convert from the fundamental -> adjoint +*/ + +template +class AdjointRep { + public: + // typdef to be used by the Representations class in HMC to get the + // types for the higher representation fields + typedef typename SU_Adjoint::LatticeAdjMatrix LatticeMatrix; + typedef typename SU_Adjoint::LatticeAdjField LatticeField; + static const int Dimension = ncolour * ncolour - 1; + + LatticeField U; + + explicit AdjointRep(GridBase *grid) : U(grid) {} + + void update_representation(const LatticeGaugeField &Uin) { + std::cout << GridLogDebug << "Updating adjoint representation\n"; + // Uin is in the fundamental representation + // get the U in AdjointRep + // (U_adj)_B = tr[e^a U e^b U^dag] + // e^a = t^a/sqrt(T_F) + // where t^a is the generator in the fundamental + // T_F is 1/2 for the fundamental representation + conformable(U, Uin); + U = zero; + LatticeColourMatrix tmp(Uin._grid); + + Vector::Matrix> ta(Dimension); + + // Debug lines + // LatticeMatrix uno(Uin._grid); + // uno = 1.0; + //////////////// + + // FIXME probably not very efficient to get all the generators + // everytime + for (int a = 0; a < Dimension; a++) SU::generator(a, ta[a]); + + for (int mu = 0; mu < Nd; mu++) { + auto Uin_mu = peekLorentz(Uin, mu); + auto U_mu = peekLorentz(U, mu); + for (int a = 0; a < Dimension; a++) { + tmp = 2.0 * adj(Uin_mu) * ta[a] * Uin_mu; + for (int b = 0; b < Dimension; b++) + pokeColour(U_mu, trace(tmp * ta[b]), a, b); + } + pokeLorentz(U, U_mu, mu); + // Check matrix U_mu, must be real orthogonal + // reality + /* + LatticeMatrix Ucheck = U_mu - conjugate(U_mu); + std::cout << GridLogMessage << "Reality check: " << norm2(Ucheck) << + std::endl; + + Ucheck = U_mu * adj(U_mu) - uno; + std::cout << GridLogMessage << "orthogonality check: " << norm2(Ucheck) << + std::endl; + */ + } + } + + LatticeGaugeField RtoFundamentalProject(const LatticeField &in, + Real scale = 1.0) const { + LatticeGaugeField out(in._grid); + out = zero; + + for (int mu = 0; mu < Nd; mu++) { + LatticeColourMatrix out_mu(in._grid); // fundamental representation + LatticeMatrix in_mu = peekLorentz(in, mu); + + out_mu = zero; + + typename SU::LatticeAlgebraVector h(in._grid); + projectOnAlgebra(h, in_mu, double(Nc) * 2.0); // factor C(r)/C(fund) + FundamentalLieAlgebraMatrix(h, out_mu); // apply scale only once + pokeLorentz(out, out_mu, mu); + // Returns traceless antihermitian matrix Nc * Nc. + // Confirmed + } + return out; + } + + private: + void projectOnAlgebra(typename SU::LatticeAlgebraVector &h_out, + const LatticeMatrix &in, Real scale = 1.0) const { + SU_Adjoint::projectOnAlgebra(h_out, in, scale); + } + + void FundamentalLieAlgebraMatrix( + typename SU::LatticeAlgebraVector &h, + typename SU::LatticeMatrix &out, Real scale = 1.0) const { + SU::FundamentalLieAlgebraMatrix(h, out, scale); + } +}; + +typedef AdjointRep AdjointRepresentation; +} +} + +#endif \ No newline at end of file diff --git a/lib/qcd/representations/fundamental.h b/lib/qcd/representations/fundamental.h new file mode 100644 index 00000000..7d85d357 --- /dev/null +++ b/lib/qcd/representations/fundamental.h @@ -0,0 +1,45 @@ +/* + * Policy classes for the HMC + * Author: Guido Cossu +*/ + +#ifndef FUNDAMENTAL_H +#define FUNDAMENTAL_H + + +namespace Grid { +namespace QCD { + +/* +* This is an helper class for the HMC +* Empty since HMC updates already the fundamental representation +*/ + +template +class FundamentalRep { + public: + static const int Dimension = ncolour; + + // typdef to be used by the Representations class in HMC to get the + // types for the higher representation fields + typedef typename SU::LatticeMatrix LatticeMatrix; + typedef LatticeGaugeField LatticeField; + + explicit FundamentalRep(GridBase* grid) {} //do nothing + void update_representation(const LatticeGaugeField& Uin) {} // do nothing + + LatticeField RtoFundamentalProject(const LatticeField& in, Real scale = 1.0) const{ + return (scale * in); + } + +}; + +typedef FundamentalRep FundamentalRepresentation; + +} +} + + + + +#endif diff --git a/lib/qcd/representations/hmc_types.h b/lib/qcd/representations/hmc_types.h new file mode 100644 index 00000000..7ab15e9b --- /dev/null +++ b/lib/qcd/representations/hmc_types.h @@ -0,0 +1,91 @@ +#ifndef HMC_TYPES_H +#define HMC_TYPES_H + +#include +#include +#include +#include +#include + +namespace Grid { +namespace QCD { + +// Supported types +// enum {Fundamental, Adjoint} repr_type; + +// Utility to add support to the HMC for representations other than the +// fundamental +template +class Representations { + public: + typedef std::tuple Representation_type; + + // Size of the tuple, known at compile time + static const int tuple_size = sizeof...(Reptypes); + // The collection of types for the gauge fields + typedef std::tuple Representation_Fields; + + // To access the Reptypes (FundamentalRepresentation, AdjointRepresentation) + template + using repr_type = typename std::tuple_element::type; + // in order to get the typename of the field use + // type repr_type::LatticeField + + Representation_type rep; + + // Multiple types constructor + explicit Representations(GridBase* grid) : rep(Reptypes(grid)...){}; + + int size() { return tuple_size; } + + // update the fields + template + inline typename std::enable_if<(I == tuple_size), void>::type update( + LatticeGaugeField& U) {} + + template + inline typename std::enable_if<(I < tuple_size), void>::type update( + LatticeGaugeField& U) { + std::get(rep).update_representation(U); + update(U); + } + + + +}; + +typedef Representations NoHirep; + +// Helper classes to access the elements +// Strips the first N parameters from the tuple +// sequence of classes to obtain the S sequence +// Creates a type that is a tuple of vectors of the template type A +template