1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-11-10 07:55:35 +00:00

Merge branch 'develop' into feature/hadrons

This commit is contained in:
Antonin Portelli 2016-09-20 13:49:33 +01:00
commit a034e9901b
42 changed files with 1594 additions and 558 deletions

4
.gitignore vendored
View File

@ -94,6 +94,10 @@ build.sh
################ ################
lib/Eigen/* lib/Eigen/*
# FFTW source #
################
lib/fftw/*
# libtool macros # # libtool macros #
################## ##################
m4/lt* m4/lt*

View File

@ -23,6 +23,8 @@ matrix:
- libmpfr-dev - libmpfr-dev
- libgmp-dev - libgmp-dev
- libmpc-dev - libmpc-dev
- libopenmpi-dev
- openmpi-bin
- binutils-dev - binutils-dev
env: VERSION=-4.9 env: VERSION=-4.9
- compiler: gcc - compiler: gcc
@ -35,6 +37,8 @@ matrix:
- libmpfr-dev - libmpfr-dev
- libgmp-dev - libgmp-dev
- libmpc-dev - libmpc-dev
- libopenmpi-dev
- openmpi-bin
- binutils-dev - binutils-dev
env: VERSION=-5 env: VERSION=-5
- compiler: clang - compiler: clang
@ -47,6 +51,8 @@ matrix:
- libmpfr-dev - libmpfr-dev
- libgmp-dev - libgmp-dev
- libmpc-dev - libmpc-dev
- libopenmpi-dev
- openmpi-bin
- binutils-dev - binutils-dev
env: CLANG_LINK=http://llvm.org/releases/3.8.0/clang+llvm-3.8.0-x86_64-linux-gnu-ubuntu-14.04.tar.xz env: CLANG_LINK=http://llvm.org/releases/3.8.0/clang+llvm-3.8.0-x86_64-linux-gnu-ubuntu-14.04.tar.xz
- compiler: clang - compiler: clang
@ -59,6 +65,8 @@ matrix:
- libmpfr-dev - libmpfr-dev
- libgmp-dev - libgmp-dev
- libmpc-dev - libmpc-dev
- libopenmpi-dev
- openmpi-bin
- binutils-dev - binutils-dev
env: CLANG_LINK=http://llvm.org/releases/3.7.0/clang+llvm-3.7.0-x86_64-linux-gnu-ubuntu-14.04.tar.xz env: CLANG_LINK=http://llvm.org/releases/3.7.0/clang+llvm-3.7.0-x86_64-linux-gnu-ubuntu-14.04.tar.xz
@ -69,6 +77,7 @@ before_install:
- if [[ "$TRAVIS_OS_NAME" == "linux" ]] && [[ "$CC" == "clang" ]]; then export LD_LIBRARY_PATH="${GRIDDIR}/clang/lib:${LD_LIBRARY_PATH}"; fi - if [[ "$TRAVIS_OS_NAME" == "linux" ]] && [[ "$CC" == "clang" ]]; then export LD_LIBRARY_PATH="${GRIDDIR}/clang/lib:${LD_LIBRARY_PATH}"; fi
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew update; fi - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew update; fi
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install libmpc; fi - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install libmpc; fi
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install openmpi; fi
- if [[ "$TRAVIS_OS_NAME" == "osx" ]] && [[ "$CC" == "gcc" ]]; then brew install gcc5; fi - if [[ "$TRAVIS_OS_NAME" == "osx" ]] && [[ "$CC" == "gcc" ]]; then brew install gcc5; fi
install: install:
@ -92,3 +101,9 @@ script:
- ../configure --enable-precision=double --enable-simd=SSE4 --enable-comms=none - ../configure --enable-precision=double --enable-simd=SSE4 --enable-comms=none
- make -j4 - make -j4
- ./benchmarks/Benchmark_dwf --threads 1 - ./benchmarks/Benchmark_dwf --threads 1
- echo make clean
- if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then export CXXFLAGS='-DMPI_UINT32_T=MPI_UNSIGNED -DMPI_UINT64_T=MPI_UNSIGNED_LONG'; fi
- ../configure --enable-precision=single --enable-simd=SSE4 --enable-comms=mpi-auto
- make -j4
- if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then mpirun.openmpi -n 2 ./benchmarks/Benchmark_dwf --threads 1 --mpi 2.1.1.1; fi
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then mpirun -n 2 ./benchmarks/Benchmark_dwf --threads 1 --mpi 2.1.1.1; fi

View File

@ -68,10 +68,18 @@ Now you can execute the `configure` script to generate makefiles (here from a bu
``` bash ``` bash
mkdir build; cd build mkdir build; cd build
../configure --enable-precision=double --enable-simd=AVX --enable-comms=mpi --prefix=<path> ../configure --enable-precision=double --enable-simd=AVX --enable-comms=mpi-auto --prefix=<path>
``` ```
where `--enable-precision=` set the default precision (`single` or `double`), `--enable-simd=` set the SIMD type (see possible values below), `--enable-comms=` set the protocol used for communications (`none`, `mpi` or `shmem`), and `<path>` should be replaced by the prefix path where you want to install Grid. Other options are available, use `configure --help` to display them. Like with any other program using GNU autotool, the `CXX`, `CXXFLAGS`, `LDFLAGS`, ... environment variables can be modified to customise the build. where `--enable-precision=` set the default precision (`single` or `double`),
`--enable-simd=` set the SIMD type (see possible values below), `--enable-
comms=` set the protocol used for communications (`none`, `mpi`, `mpi-auto` or
`shmem`), and `<path>` should be replaced by the prefix path where you want to
install Grid. The `mpi-auto` communication option set `configure` to determine
automatically how to link to MPI. Other options are available, use `configure
--help` to display them. Like with any other program using GNU autotool, the
`CXX`, `CXXFLAGS`, `LDFLAGS`, ... environment variables can be modified to
customise the build.
Finally, you can build and install Grid: Finally, you can build and install Grid:

View File

@ -194,7 +194,7 @@ int main (int argc, char ** argv)
} }
} }
#if 0
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
std::cout<<GridLogMessage << "= Benchmarking sequential persistent halo exchange in "<<nmu<<" dimensions"<<std::endl; std::cout<<GridLogMessage << "= Benchmarking sequential persistent halo exchange in "<<nmu<<" dimensions"<<std::endl;
@ -315,7 +315,7 @@ int main (int argc, char ** argv)
} }
} }
#endif
Grid_finalize(); Grid_finalize();
} }

View File

@ -61,6 +61,8 @@ int main (int argc, char ** argv)
QCD::WilsonKernelsStatic::AsmOpt=0; QCD::WilsonKernelsStatic::AsmOpt=0;
} }
std::cout<<GridLogMessage << "=========================================================================="<<std::endl;
std::cout<<GridLogMessage << "= Benchmarking DWF"<<std::endl;
std::cout<<GridLogMessage << "=========================================================================="<<std::endl; std::cout<<GridLogMessage << "=========================================================================="<<std::endl;
std::cout<<GridLogMessage << "Volume \t\t\tProcs \t Dw \t eoDw \t sDw \t eosDw (Mflop/s) "<<std::endl; std::cout<<GridLogMessage << "Volume \t\t\tProcs \t Dw \t eoDw \t sDw \t eosDw (Mflop/s) "<<std::endl;
std::cout<<GridLogMessage << "=========================================================================="<<std::endl; std::cout<<GridLogMessage << "=========================================================================="<<std::endl;

View File

@ -0,0 +1,117 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./benchmarks/Benchmark_wilson.cc
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: Richard Rollins <rprollins@users.noreply.github.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid.h>
using namespace std;
using namespace Grid;
using namespace Grid::QCD;
template<class d>
struct scal {
d internal;
};
Gamma::GammaMatrix Gmu [] = {
Gamma::GammaX,
Gamma::GammaY,
Gamma::GammaZ,
Gamma::GammaT
};
bool overlapComms = false;
void bench_wilson (
LatticeFermion & src,
LatticeFermion & result,
WilsonFermionR & Dw,
double const volume,
int const dag );
int main (int argc, char ** argv)
{
Grid_init(&argc,&argv);
if( GridCmdOptionExists(argv,argv+argc,"--asynch") ){ overlapComms = true; }
typename WilsonFermionR::ImplParams params;
params.overlapCommsCompute = overlapComms;
std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
std::vector<int> mpi_layout = GridDefaultMpi();
std::vector<int> seeds({1,2,3,4});
RealD mass = 0.1;
std::cout<<GridLogMessage << "============================================================================="<< std::endl;
std::cout<<GridLogMessage << "= Benchmarking Wilson" << std::endl;
std::cout<<GridLogMessage << "============================================================================="<< std::endl;
std::cout<<GridLogMessage << "Volume\t\t\tWilson/MFLOPs\tWilsonDag/MFLOPs" << std::endl;
std::cout<<GridLogMessage << "============================================================================="<< std::endl;
int Lmax = 32;
int dmin = 0;
if ( getenv("LMAX") ) Lmax=atoi(getenv("LMAX"));
if ( getenv("DMIN") ) dmin=atoi(getenv("DMIN"));
for (int L=8; L<=Lmax; L*=2)
{
std::vector<int> latt_size = std::vector<int>(4,L);
for(int d=4; d>dmin; d--)
{
if ( d<=3 ) { latt_size[d] *= 2; }
std::cout << GridLogMessage;
std::copy( latt_size.begin(), --latt_size.end(), std::ostream_iterator<int>( std::cout, std::string("x").c_str() ) );
std::cout << latt_size.back() << "\t\t";
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
GridRedBlackCartesian RBGrid(latt_size,simd_layout,mpi_layout);
GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(seeds);
LatticeGaugeField Umu(&Grid); random(pRNG,Umu);
LatticeFermion src(&Grid); random(pRNG,src);
LatticeFermion result(&Grid); result=zero;
double volume = std::accumulate(latt_size.begin(),latt_size.end(),1,std::multiplies<int>());
WilsonFermionR Dw(Umu,Grid,RBGrid,mass,params);
bench_wilson(src,result,Dw,volume,DaggerNo);
bench_wilson(src,result,Dw,volume,DaggerYes);
std::cout << std::endl;
}
}
std::cout<<GridLogMessage << "============================================================================="<< std::endl;
Grid_finalize();
}
void bench_wilson (
LatticeFermion & src,
LatticeFermion & result,
WilsonFermionR & Dw,
double const volume,
int const dag )
{
int ncall = 1000;
double t0 = usecond();
for(int i=0; i<ncall; i++) { Dw.Dhop(src,result,dag); }
double t1 = usecond();
double flops = 1344 * volume * ncall;
std::cout << flops/(t1-t0) << "\t\t";
}

View File

@ -40,14 +40,20 @@ int main(int argc,char **argv)
std::ofstream os("zmm.dat"); std::ofstream os("zmm.dat");
os << "#V Ls Lxy Lzt C++ Asm OMP L1 " <<std::endl; os << "#V Ls Lxy Lzt C++ Asm OMP L1 " <<std::endl;
std::cout<<GridLogMessage << "====================================================================="<<std::endl;
std::cout<<GridLogMessage << "= Benchmarking ZMM"<<std::endl;
std::cout<<GridLogMessage << "====================================================================="<<std::endl;
std::cout<<GridLogMessage << "Volume \t\t\t\tC++DW/MFLOPs\tASM-DW/MFLOPs\tdiff"<<std::endl;
std::cout<<GridLogMessage << "====================================================================="<<std::endl;
for(int L=4;L<=32;L+=4){ for(int L=4;L<=32;L+=4){
for(int m=1;m<=2;m++){ for(int m=1;m<=2;m++){
for(int Ls=8;Ls<=16;Ls+=8){ for(int Ls=8;Ls<=16;Ls+=8){
std::vector<int> grid({L,L,m*L,m*L}); std::vector<int> grid({L,L,m*L,m*L});
std::cout << GridLogMessage <<"\t";
for(int i=0;i<4;i++) { for(int i=0;i<4;i++) {
std::cout << grid[i]<<"x"; std::cout << grid[i]<<"x";
} }
std::cout << Ls<<std::endl; std::cout << Ls<<"\t\t";
bench(os,grid,Ls); bench(os,grid,Ls);
} }
} }
@ -104,7 +110,6 @@ int bench(std::ofstream &os, std::vector<int> &latt4,int Ls)
RealD M5 =1.8; RealD M5 =1.8;
DomainWallFermionR Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5); DomainWallFermionR Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
std::cout<<GridLogMessage << "Calling Dw"<<std::endl;
int ncall=50; int ncall=50;
double t0=usecond(); double t0=usecond();
for(int i=0;i<ncall;i++){ for(int i=0;i<ncall;i++){
@ -116,7 +121,7 @@ int bench(std::ofstream &os, std::vector<int> &latt4,int Ls)
double flops=1344*volume/2; double flops=1344*volume/2;
mfc = flops*ncall/(t1-t0); mfc = flops*ncall/(t1-t0);
std::cout<<GridLogMessage << "Called C++ Dw"<< " mflop/s = "<< mfc<<std::endl; std::cout<<mfc<<"\t\t";
QCD::WilsonKernelsStatic::AsmOpt=1; QCD::WilsonKernelsStatic::AsmOpt=1;
t0=usecond(); t0=usecond();
@ -125,7 +130,7 @@ int bench(std::ofstream &os, std::vector<int> &latt4,int Ls)
} }
t1=usecond(); t1=usecond();
mfa = flops*ncall/(t1-t0); mfa = flops*ncall/(t1-t0);
std::cout<<GridLogMessage << "Called ASM Dw"<< " mflop/s = "<< mfa<<std::endl; std::cout<<mfa<<"\t\t";
/* /*
int dag=DaggerNo; int dag=DaggerNo;
t0=usecond(); t0=usecond();
@ -163,8 +168,7 @@ int bench(std::ofstream &os, std::vector<int> &latt4,int Ls)
//resulta = (-0.5) * resulta; //resulta = (-0.5) * resulta;
diff = resulto-resulta; diff = resulto-resulta;
std::cout<<GridLogMessage << "diff "<< norm2(diff)<<std::endl; std::cout<<norm2(diff)<<std::endl;
std::cout<<std::endl;
return 0; return 0;
} }

View File

@ -1,11 +1,18 @@
#!/usr/bin/env bash #!/usr/bin/env bash
EIGEN_URL='http://bitbucket.org/eigen/eigen/get/3.2.9.tar.bz2' EIGEN_URL='http://bitbucket.org/eigen/eigen/get/3.2.9.tar.bz2'
FFTW_URL=http://www.fftw.org/fftw-3.3.4.tar.gz
echo "-- deploying Eigen source..." echo "-- deploying Eigen source..."
wget ${EIGEN_URL} wget ${EIGEN_URL} --no-check-certificate
./scripts/update_eigen.sh `basename ${EIGEN_URL}` ./scripts/update_eigen.sh `basename ${EIGEN_URL}`
rm `basename ${EIGEN_URL}` rm `basename ${EIGEN_URL}`
echo "-- copying fftw prototypes..."
wget ${FFTW_URL}
./scripts/update_fftw.sh `basename ${FFTW_URL}`
rm `basename ${FFTW_URL}`
echo '-- generating Make.inc files...' echo '-- generating Make.inc files...'
./scripts/filelist ./scripts/filelist
echo '-- generating configure script...' echo '-- generating configure script...'

View File

@ -8,11 +8,20 @@ m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])])
############### Checks for programs ############### Checks for programs
AC_LANG(C++) AC_LANG(C++)
: ${CXXFLAGS="-O3"} CXXFLAGS="-O3 $CXXFLAGS"
AC_PROG_CXX AC_PROG_CXX
AC_PROG_RANLIB
############ openmp ###############
AC_OPENMP AC_OPENMP
ac_openmp=no
if test "${OPENMP_CXXFLAGS}X" != "X"; then
ac_openmp=yes
AM_CXXFLAGS="$OPENMP_CXXFLAGS $AM_CXXFLAGS" AM_CXXFLAGS="$OPENMP_CXXFLAGS $AM_CXXFLAGS"
LT_INIT([disable-shared]) AM_LDFLAGS="$OPENMP_CXXFLAGS $AM_LDFLAGS"
fi
############### Checks for header files ############### Checks for header files
AC_CHECK_HEADERS(stdint.h) AC_CHECK_HEADERS(stdint.h)
@ -29,7 +38,7 @@ AC_TYPE_SIZE_T
AC_TYPE_UINT32_T AC_TYPE_UINT32_T
AC_TYPE_UINT64_T AC_TYPE_UINT64_T
############### Options ############### GMP and MPFR #################
AC_ARG_WITH([gmp], AC_ARG_WITH([gmp],
[AS_HELP_STRING([--with-gmp=prefix], [AS_HELP_STRING([--with-gmp=prefix],
[try this for a non-standard install prefix of the GMP library])], [try this for a non-standard install prefix of the GMP library])],
@ -40,9 +49,12 @@ AC_ARG_WITH([mpfr],
[try this for a non-standard install prefix of the MPFR library])], [try this for a non-standard install prefix of the MPFR library])],
[AM_CXXFLAGS="-I$with_mpfr/include $AM_CXXFLAGS"] [AM_CXXFLAGS="-I$with_mpfr/include $AM_CXXFLAGS"]
[AM_LDFLAGS="-L$with_mpfr/lib $AM_LDFLAGS"]) [AM_LDFLAGS="-L$with_mpfr/lib $AM_LDFLAGS"])
################## lapack ####################
AC_ARG_ENABLE([lapack], AC_ARG_ENABLE([lapack],
[AC_HELP_STRING([--enable-lapack=yes|no|prefix], [enable LAPACK])], [AC_HELP_STRING([--enable-lapack=yes|no|prefix], [enable LAPACK])],
[ac_LAPACK=${enable_lapack}],[ac_LAPACK=no]) [ac_LAPACK=${enable_lapack}],[ac_LAPACK=no])
case ${ac_LAPACK} in case ${ac_LAPACK} in
no) no)
;; ;;
@ -54,6 +66,13 @@ case ${ac_LAPACK} in
AC_DEFINE([USE_LAPACK],[1],[use LAPACK]) AC_DEFINE([USE_LAPACK],[1],[use LAPACK])
esac esac
################## FFTW3 ####################
AC_ARG_WITH([fftw],
[AS_HELP_STRING([--with-fftw=prefix],
[try this for a non-standard install prefix of the FFTW3 library])],
[AM_CXXFLAGS="-I$with_fftw/include $AM_CXXFLAGS"]
[AM_LDFLAGS="-L$with_fftw/lib $AM_LDFLAGS"])
################ Get compiler informations ################ Get compiler informations
AC_LANG([C++]) AC_LANG([C++])
AX_CXX_COMPILE_STDCXX_11([noext],[mandatory]) AX_CXX_COMPILE_STDCXX_11([noext],[mandatory])
@ -67,7 +86,6 @@ AC_DEFINE_UNQUOTED([GXX_VERSION],["$GXX_VERSION"],
############### Checks for library functions ############### Checks for library functions
CXXFLAGS_CPY=$CXXFLAGS CXXFLAGS_CPY=$CXXFLAGS
LDFLAGS_CPY=$LDFLAGS LDFLAGS_CPY=$LDFLAGS
LIBS_CPY=$LIBS
CXXFLAGS="$AM_CXXFLAGS $CXXFLAGS" CXXFLAGS="$AM_CXXFLAGS $CXXFLAGS"
LDFLAGS="$AM_LDFLAGS $LDFLAGS" LDFLAGS="$AM_LDFLAGS $LDFLAGS"
AC_CHECK_FUNCS([gettimeofday]) AC_CHECK_FUNCS([gettimeofday])
@ -77,7 +95,7 @@ AC_CHECK_LIB([gmp],[__gmpf_init],
[have_mpfr=true] [have_mpfr=true]
[LIBS="$LIBS -lmpfr"], [LIBS="$LIBS -lmpfr"],
[AC_MSG_ERROR([MPFR library not found])])] [AC_MSG_ERROR([MPFR library not found])])]
[AC_DEFINE([HAVE_LIBGMP], [1], [Define to 1 if you have the `GMP' library (-lgmp).])] [AC_DEFINE([HAVE_LIBGMP], [1], [Define to 1 if you have the `GMP' library (-lgmp).])]
[have_gmp=true] [have_gmp=true]
[LIBS="$LIBS -lgmp"], [LIBS="$LIBS -lgmp"],
[AC_MSG_WARN([**** GMP library not found, Grid can still compile but RHMC will not work ****])]) [AC_MSG_WARN([**** GMP library not found, Grid can still compile but RHMC will not work ****])])
@ -86,6 +104,11 @@ if test "${ac_LAPACK}x" != "nox"; then
AC_CHECK_LIB([lapack],[LAPACKE_sbdsdc],[], AC_CHECK_LIB([lapack],[LAPACKE_sbdsdc],[],
[AC_MSG_ERROR("LAPACK enabled but library not found")]) [AC_MSG_ERROR("LAPACK enabled but library not found")])
fi fi
AC_CHECK_LIB([fftw3],[fftw_execute],
[AC_DEFINE([HAVE_FFTW],[1],[Define to 1 if you have the `FFTW' library (-lfftw3).])]
[have_fftw=true]
[LIBS="$LIBS -lfftw3 -lfftw3f"],
[AC_MSG_WARN([**** FFTW library not found, Grid can still compile but FFT-based routines will not work ****])])
CXXFLAGS=$CXXFLAGS_CPY CXXFLAGS=$CXXFLAGS_CPY
LDFLAGS=$LDFLAGS_CPY LDFLAGS=$LDFLAGS_CPY
@ -108,16 +131,19 @@ case ${ax_cv_cxx_compiler_vendor} in
SIMD_FLAGS='-mavx -mfma4';; SIMD_FLAGS='-mavx -mfma4';;
AVX2) AVX2)
AC_DEFINE([AVX2],[1],[AVX2 intrinsics]) AC_DEFINE([AVX2],[1],[AVX2 intrinsics])
SIMD_FLAGS='-mavx2';; SIMD_FLAGS='-mavx2 -mfma';;
AVX512|AVX512MIC|KNL) AVX512|AVX512MIC|KNL)
AC_DEFINE([AVX512],[1],[AVX512 intrinsics]) AC_DEFINE([AVX512],[1],[AVX512 intrinsics])
SIMD_FLAGS='-mavx512f -mavx512pf -mavx512er -mavx512cd';; SIMD_FLAGS='-mavx512f -mavx512pf -mavx512er -mavx512cd';;
IMCI|KNC) IMCI|KNC)
AC_DEFINE([IMCI],[1],[IMCI Intrinsics for Knights Corner]) AC_DEFINE([IMCI],[1],[IMCI intrinsics for Knights Corner])
SIMD_FLAGS='';; SIMD_FLAGS='';;
GEN) GEN)
AC_DEFINE([GENERIC_VEC],[1],[generic vector code]) AC_DEFINE([GENERIC_VEC],[1],[generic vector code])
SIMD_FLAGS='';; SIMD_FLAGS='';;
QPX|BGQ)
AC_DEFINE([QPX],[1],[QPX intrinsics for BG/Q])
SIMD_FLAGS='';;
*) *)
AC_MSG_ERROR(["SIMD option ${ac_SIMD} not supported by the GCC/Clang compiler"]);; AC_MSG_ERROR(["SIMD option ${ac_SIMD} not supported by the GCC/Clang compiler"]);;
esac;; esac;;
@ -294,15 +320,17 @@ Summary of configuration for $PACKAGE v$VERSION
- compiler version : ${ax_cv_gxx_version} - compiler version : ${ax_cv_gxx_version}
----- BUILD OPTIONS ----------------------------------- ----- BUILD OPTIONS -----------------------------------
- SIMD : ${ac_SIMD} - SIMD : ${ac_SIMD}
- communications type : ${ac_COMMS} - Threading : ${ac_openmp}
- default precision : ${ac_PRECISION} - Communications type : ${ac_COMMS}
- Default precision : ${ac_PRECISION}
- RNG choice : ${ac_RNG} - RNG choice : ${ac_RNG}
- GMP : `if test "x$have_gmp" = xtrue; then echo yes; else echo no; fi` - GMP : `if test "x$have_gmp" = xtrue; then echo yes; else echo no; fi`
- LAPACK : ${ac_LAPACK} - LAPACK : ${ac_LAPACK}
- FFTW : `if test "x$have_fftw" = xtrue; then echo yes; else echo no; fi`
- build DOXYGEN documentation : `if test "x$enable_doc" = xyes; then echo yes; else echo no; fi` - build DOXYGEN documentation : `if test "x$enable_doc" = xyes; then echo yes; else echo no; fi`
- graphs and diagrams : `if test "x$enable_dot" = xyes; then echo yes; else echo no; fi` - graphs and diagrams : `if test "x$enable_dot" = xyes; then echo yes; else echo no; fi`
----- BUILD FLAGS ------------------------------------- ----- BUILD FLAGS -------------------------------------
- CXXFLAGS: - CXXFLAGS:
`echo ${AM_CXXFLAGS} ${CXXFLAGS} | sed 's/ -/\n\t-/g' | sed 's/^-/\t-/g'` `echo ${AM_CXXFLAGS} ${CXXFLAGS} | sed 's/ -/\n\t-/g' | sed 's/^-/\t-/g'`
- LDFLAGS: - LDFLAGS:
`echo ${AM_LDFLAGS} ${LDFLAGS} | sed 's/ -/\n\t-/g' | sed 's/^-/\t-/g'` `echo ${AM_LDFLAGS} ${LDFLAGS} | sed 's/ -/\n\t-/g' | sed 's/^-/\t-/g'`

276
lib/FFT.h Normal file
View File

@ -0,0 +1,276 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/Cshift.h
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef _GRID_FFT_H_
#define _GRID_FFT_H_
#ifdef HAVE_FFTW
#include <fftw3.h>
#endif
namespace Grid {
template<class scalar> struct FFTW { };
#ifdef HAVE_FFTW
template<> struct FFTW<ComplexD> {
public:
typedef fftw_complex FFTW_scalar;
typedef fftw_plan FFTW_plan;
static FFTW_plan fftw_plan_many_dft(int rank, const int *n,int howmany,
FFTW_scalar *in, const int *inembed,
int istride, int idist,
FFTW_scalar *out, const int *onembed,
int ostride, int odist,
int sign, unsigned flags) {
return ::fftw_plan_many_dft(rank,n,howmany,in,inembed,istride,idist,out,onembed,ostride,odist,sign,flags);
}
static void fftw_flops(const FFTW_plan p,double *add, double *mul, double *fmas){
::fftw_flops(p,add,mul,fmas);
}
inline static void fftw_execute_dft(const FFTW_plan p,FFTW_scalar *in,FFTW_scalar *out) {
::fftw_execute_dft(p,in,out);
}
inline static void fftw_destroy_plan(const FFTW_plan p) {
::fftw_destroy_plan(p);
}
};
template<> struct FFTW<ComplexF> {
public:
typedef fftwf_complex FFTW_scalar;
typedef fftwf_plan FFTW_plan;
static FFTW_plan fftw_plan_many_dft(int rank, const int *n,int howmany,
FFTW_scalar *in, const int *inembed,
int istride, int idist,
FFTW_scalar *out, const int *onembed,
int ostride, int odist,
int sign, unsigned flags) {
return ::fftwf_plan_many_dft(rank,n,howmany,in,inembed,istride,idist,out,onembed,ostride,odist,sign,flags);
}
static void fftw_flops(const FFTW_plan p,double *add, double *mul, double *fmas){
::fftwf_flops(p,add,mul,fmas);
}
inline static void fftw_execute_dft(const FFTW_plan p,FFTW_scalar *in,FFTW_scalar *out) {
::fftwf_execute_dft(p,in,out);
}
inline static void fftw_destroy_plan(const FFTW_plan p) {
::fftwf_destroy_plan(p);
}
};
#endif
#ifndef FFTW_FORWARD
#define FFTW_FORWARD (-1)
#define FFTW_BACKWARD (+1)
#endif
class FFT {
private:
GridCartesian *vgrid;
GridCartesian *sgrid;
int Nd;
double flops;
double flops_call;
uint64_t usec;
std::vector<int> dimensions;
std::vector<int> processors;
std::vector<int> processor_coor;
public:
static const int forward=FFTW_FORWARD;
static const int backward=FFTW_BACKWARD;
double Flops(void) {return flops;}
double MFlops(void) {return flops/usec;}
FFT ( GridCartesian * grid ) :
vgrid(grid),
Nd(grid->_ndimension),
dimensions(grid->_fdimensions),
processors(grid->_processors),
processor_coor(grid->_processor_coor)
{
flops=0;
usec =0;
std::vector<int> layout(Nd,1);
sgrid = new GridCartesian(dimensions,layout,processors);
};
~FFT ( void) {
delete sgrid;
}
template<class vobj>
void FFT_dim(Lattice<vobj> &result,const Lattice<vobj> &source,int dim, int inverse){
conformable(result._grid,vgrid);
conformable(source._grid,vgrid);
int L = vgrid->_ldimensions[dim];
int G = vgrid->_fdimensions[dim];
std::vector<int> layout(Nd,1);
std::vector<int> pencil_gd(vgrid->_fdimensions);
pencil_gd[dim] = G*processors[dim];
// Pencil global vol LxLxGxLxL per node
GridCartesian pencil_g(pencil_gd,layout,processors);
// Construct pencils
typedef typename vobj::scalar_object sobj;
typedef typename sobj::scalar_type scalar;
Lattice<vobj> ssource(vgrid); ssource =source;
Lattice<sobj> pgsource(&pencil_g);
Lattice<sobj> pgresult(&pencil_g); pgresult=zero;
#ifndef HAVE_FFTW
assert(0);
#else
typedef typename FFTW<scalar>::FFTW_scalar FFTW_scalar;
typedef typename FFTW<scalar>::FFTW_plan FFTW_plan;
{
int Ncomp = sizeof(sobj)/sizeof(scalar);
int Nlow = 1;
for(int d=0;d<dim;d++){
Nlow*=vgrid->_ldimensions[d];
}
int rank = 1; /* 1d transforms */
int n[] = {G}; /* 1d transforms of length G */
int howmany = Ncomp;
int odist,idist,istride,ostride;
idist = odist = 1; /* Distance between consecutive FT's */
istride = ostride = Ncomp*Nlow; /* distance between two elements in the same FT */
int *inembed = n, *onembed = n;
int sign = FFTW_FORWARD;
if (inverse) sign = FFTW_BACKWARD;
FFTW_plan p;
{
FFTW_scalar *in = (FFTW_scalar *)&pgsource._odata[0];
FFTW_scalar *out= (FFTW_scalar *)&pgresult._odata[0];
p = FFTW<scalar>::fftw_plan_many_dft(rank,n,howmany,
in,inembed,
istride,idist,
out,onembed,
ostride, odist,
sign,FFTW_ESTIMATE);
}
double add,mul,fma;
FFTW<scalar>::fftw_flops(p,&add,&mul,&fma);
flops_call = add+mul+2.0*fma;
GridStopWatch timer;
// Barrel shift and collect global pencil
for(int p=0;p<processors[dim];p++) {
for(int idx=0;idx<sgrid->lSites();idx++) {
std::vector<int> lcoor(Nd);
sgrid->LocalIndexToLocalCoor(idx,lcoor);
sobj s;
peekLocalSite(s,ssource,lcoor);
lcoor[dim]+=p*L;
pokeLocalSite(s,pgsource,lcoor);
}
ssource = Cshift(ssource,dim,L);
}
// Loop over orthog coords
int NN=pencil_g.lSites();
GridStopWatch Timer;
Timer.Start();
PARALLEL_FOR_LOOP
for(int idx=0;idx<NN;idx++) {
std::vector<int> lcoor(Nd);
pencil_g.LocalIndexToLocalCoor(idx,lcoor);
if ( lcoor[dim] == 0 ) { // restricts loop to plane at lcoor[dim]==0
FFTW_scalar *in = (FFTW_scalar *)&pgsource._odata[idx];
FFTW_scalar *out= (FFTW_scalar *)&pgresult._odata[idx];
FFTW<scalar>::fftw_execute_dft(p,in,out);
}
}
Timer.Stop();
usec += Timer.useconds();
flops+= flops_call*NN;
int pc = processor_coor[dim];
for(int idx=0;idx<sgrid->lSites();idx++) {
std::vector<int> lcoor(Nd);
sgrid->LocalIndexToLocalCoor(idx,lcoor);
std::vector<int> gcoor = lcoor;
// extract the result
sobj s;
gcoor[dim] = lcoor[dim]+L*pc;
peekLocalSite(s,pgresult,gcoor);
pokeLocalSite(s,result,lcoor);
}
FFTW<scalar>::fftw_destroy_plan(p);
}
#endif
}
};
}
#endif

View File

@ -68,6 +68,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
#include <Grid/Simd.h> #include <Grid/Simd.h>
#include <Grid/Threads.h> #include <Grid/Threads.h>
#include <Grid/Lexicographic.h> #include <Grid/Lexicographic.h>
#include <Grid/Init.h>
#include <Grid/Communicator.h> #include <Grid/Communicator.h>
#include <Grid/Cartesian.h> #include <Grid/Cartesian.h>
#include <Grid/Tensors.h> #include <Grid/Tensors.h>
@ -78,7 +79,8 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
#include <Grid/parallelIO/BinaryIO.h> #include <Grid/parallelIO/BinaryIO.h>
#include <Grid/qcd/QCD.h> #include <Grid/qcd/QCD.h>
#include <Grid/parallelIO/NerscIO.h> #include <Grid/parallelIO/NerscIO.h>
#include <Grid/Init.h>
#include <Grid/FFT.h>
#include <Grid/qcd/hmc/NerscCheckpointer.h> #include <Grid/qcd/hmc/NerscCheckpointer.h>
#include <Grid/qcd/hmc/HmcRunner.h> #include <Grid/qcd/hmc/HmcRunner.h>

View File

@ -153,6 +153,7 @@ void GridParseLayout(char **argv,int argc,
assert(ompthreads.size()==1); assert(ompthreads.size()==1);
GridThread::SetThreads(ompthreads[0]); GridThread::SetThreads(ompthreads[0]);
} }
if( GridCmdOptionExists(argv,argv+argc,"--cores") ){ if( GridCmdOptionExists(argv,argv+argc,"--cores") ){
std::vector<int> cores(0); std::vector<int> cores(0);
arg= GridCmdOptionPayload(argv,argv+argc,"--cores"); arg= GridCmdOptionPayload(argv,argv+argc,"--cores");
@ -203,7 +204,6 @@ void Grid_init(int *argc,char ***argv)
GridLogConfigure(logstreams); GridLogConfigure(logstreams);
} }
if( GridCmdOptionExists(*argv,*argv+*argc,"--debug-signals") ){ if( GridCmdOptionExists(*argv,*argv+*argc,"--debug-signals") ){
Grid_debug_handler_init(); Grid_debug_handler_init();
} }

View File

@ -17,8 +17,8 @@ endif
include Make.inc include Make.inc
include Eigen.inc include Eigen.inc
lib_LTLIBRARIES = libGrid.la lib_LIBRARIES = libGrid.a
libGrid_la_SOURCES = $(CCFILES) $(extra_sources) libGrid_a_SOURCES = $(CCFILES) $(extra_sources)
libGrid_ladir = $(pkgincludedir) libGrid_adir = $(pkgincludedir)
nobase_dist_pkginclude_HEADERS = $(HFILES) $(eigen_files) Config.h nobase_dist_pkginclude_HEADERS = $(HFILES) $(eigen_files) Config.h

View File

@ -265,7 +265,7 @@
// _mm_prefetch((char *)&_entries[ent],_MM_HINT_T0); // _mm_prefetch((char *)&_entries[ent],_MM_HINT_T0);
} }
inline uint64_t GetInfo(int &ptype,int &local,int &perm,int point,int ent,uint64_t base) { inline uint64_t GetInfo(int &ptype,int &local,int &perm,int point,int ent,uint64_t base) {
_mm_prefetch((char *)&_entries[ent+1],_MM_HINT_T0); //_mm_prefetch((char *)&_entries[ent+1],_MM_HINT_T0);
local = _entries[ent]._is_local; local = _entries[ent]._is_local;
perm = _entries[ent]._permute; perm = _entries[ent]._permute;
if (perm) ptype = _permute_type[point]; if (perm) ptype = _permute_type[point];

View File

@ -127,21 +127,12 @@ class CartesianCommunicator {
int recv_from_rank, int recv_from_rank,
int bytes); int bytes);
void SendToRecvFromInit(std::vector<CommsRequest_t> &list,
void *xmit,
int xmit_to_rank,
void *recv,
int recv_from_rank,
int bytes);
void SendToRecvFromBegin(std::vector<CommsRequest_t> &list, void SendToRecvFromBegin(std::vector<CommsRequest_t> &list,
void *xmit, void *xmit,
int xmit_to_rank, int xmit_to_rank,
void *recv, void *recv,
int recv_from_rank, int recv_from_rank,
int bytes); int bytes);
void SendToRecvFromBegin(std::vector<CommsRequest_t> &list);
void SendToRecvFromComplete(std::vector<CommsRequest_t> &waitall); void SendToRecvFromComplete(std::vector<CommsRequest_t> &waitall);
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////

View File

@ -144,28 +144,6 @@ void CartesianCommunicator::SendRecvPacket(void *xmit,
} }
// Basic Halo comms primitive // Basic Halo comms primitive
// Basic Halo comms primitive
void CartesianCommunicator::SendToRecvFromInit(std::vector<CommsRequest_t> &list,
void *xmit,
int dest,
void *recv,
int from,
int bytes)
{
MPI_Request xrq;
MPI_Request rrq;
int rank = _processor;
int ierr;
ierr =MPI_Send_init(xmit, bytes, MPI_CHAR,dest,_processor,communicator,&xrq);
ierr|=MPI_Recv_init(recv, bytes, MPI_CHAR,dest,_processor,communicator,&rrq);
assert(ierr==0);
list.push_back(xrq);
list.push_back(rrq);
}
void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &list)
{
MPI_Startall(list.size(),&list[0]);
}
void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &list, void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &list,
void *xmit, void *xmit,
int dest, int dest,
@ -173,12 +151,17 @@ void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &lis
int from, int from,
int bytes) int bytes)
{ {
std::vector<CommsRequest_t> reqs(0); MPI_Request xrq;
SendToRecvFromInit(reqs,xmit,dest,recv,from,bytes); MPI_Request rrq;
SendToRecvFromBegin(reqs); int rank = _processor;
for(int i=0;i<reqs.size();i++){ int ierr;
list.push_back(reqs[i]); ierr =MPI_Isend(xmit, bytes, MPI_CHAR,dest,_processor,communicator,&xrq);
} ierr|=MPI_Irecv(recv, bytes, MPI_CHAR,from,from,communicator,&rrq);
assert(ierr==0);
list.push_back(xrq);
list.push_back(rrq);
} }
void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &list) void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &list)
{ {

View File

@ -84,19 +84,6 @@ void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &lis
{ {
assert(0); assert(0);
} }
void CartesianCommunicator::SendToRecvFromInit(std::vector<CommsRequest_t> &list,
void *xmit,
int dest,
void *recv,
int from,
int bytes)
{
assert(0);
}
void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &list)
{
assert(0);
}
void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &list) void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &list)
{ {
assert(0); assert(0);

View File

@ -268,10 +268,6 @@ void CartesianCommunicator::SendRecvPacket(void *xmit,
} }
// Basic Halo comms primitive // Basic Halo comms primitive
void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &list)
{
assert(0); //unimplemented
}
void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &list, void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &list,
void *xmit, void *xmit,
int dest, int dest,
@ -284,15 +280,6 @@ void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &lis
// shmem_putmem_nb(recv,xmit,bytes,dest,NULL); // shmem_putmem_nb(recv,xmit,bytes,dest,NULL);
shmem_putmem(recv,xmit,bytes,dest); shmem_putmem(recv,xmit,bytes,dest);
} }
void CartesianCommunicator::SendToRecvFromInit(std::vector<CommsRequest_t> &list,
void *xmit,
int dest,
void *recv,
int from,
int bytes)
{
assert(0); // Unimplemented
}
void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &list) void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &list)
{ {
// shmem_quiet(); // I'm done // shmem_quiet(); // I'm done

View File

@ -349,7 +349,7 @@ void localConvert(const Lattice<vobj> &in,Lattice<vvobj> &out)
assert(ig->_ldimensions[d] == og->_ldimensions[d]); assert(ig->_ldimensions[d] == og->_ldimensions[d]);
} }
PARALLEL_FOR_LOOP //PARALLEL_FOR_LOOP
for(int idx=0;idx<ig->lSites();idx++){ for(int idx=0;idx<ig->lSites();idx++){
std::vector<int> lcoor(ni); std::vector<int> lcoor(ni);
ig->LocalIndexToLocalCoor(idx,lcoor); ig->LocalIndexToLocalCoor(idx,lcoor);
@ -446,6 +446,79 @@ void ExtractSlice(Lattice<vobj> &lowDim, Lattice<vobj> & higherDim,int slice, in
} }
template<class vobj>
void InsertSliceLocal(Lattice<vobj> &lowDim, Lattice<vobj> & higherDim,int slice_lo,int slice_hi, int orthog)
{
typedef typename vobj::scalar_object sobj;
sobj s;
GridBase *lg = lowDim._grid;
GridBase *hg = higherDim._grid;
int nl = lg->_ndimension;
int nh = hg->_ndimension;
assert(nl == nh);
assert(orthog<nh);
assert(orthog>=0);
for(int d=0;d<nh;d++){
assert(lg->_processors[d] == hg->_processors[d]);
assert(lg->_ldimensions[d] == hg->_ldimensions[d]);
}
// the above should guarantee that the operations are local
//PARALLEL_FOR_LOOP
for(int idx=0;idx<lg->lSites();idx++){
std::vector<int> lcoor(nl);
std::vector<int> hcoor(nh);
lg->LocalIndexToLocalCoor(idx,lcoor);
if( lcoor[orthog] == slice_lo ) {
hcoor=lcoor;
hcoor[orthog] = slice_hi;
peekLocalSite(s,lowDim,lcoor);
pokeLocalSite(s,higherDim,hcoor);
}
}
}
template<class vobj>
void ExtractSliceLocal(Lattice<vobj> &lowDim, Lattice<vobj> & higherDim,int slice_lo,int slice_hi, int orthog)
{
typedef typename vobj::scalar_object sobj;
sobj s;
GridBase *lg = lowDim._grid;
GridBase *hg = higherDim._grid;
int nl = lg->_ndimension;
int nh = hg->_ndimension;
assert(nl == nh);
assert(orthog<nh);
assert(orthog>=0);
for(int d=0;d<nh;d++){
assert(lg->_processors[d] == hg->_processors[d]);
assert(lg->_ldimensions[d] == hg->_ldimensions[d]);
}
// the above should guarantee that the operations are local
//PARALLEL_FOR_LOOP
for(int idx=0;idx<lg->lSites();idx++){
std::vector<int> lcoor(nl);
std::vector<int> hcoor(nh);
lg->LocalIndexToLocalCoor(idx,lcoor);
if( lcoor[orthog] == slice_lo ) {
hcoor=lcoor;
hcoor[orthog] = slice_hi;
peekLocalSite(s,higherDim,hcoor);
pokeLocalSite(s,lowDim,lcoor);
}
}
}
template<class vobj> template<class vobj>
void Replicate(Lattice<vobj> &coarse,Lattice<vobj> & fine) void Replicate(Lattice<vobj> &coarse,Lattice<vobj> & fine)
{ {

View File

@ -194,22 +194,22 @@ class BinaryIO {
std::vector<int> site({x,y,z,t}); std::vector<int> site({x,y,z,t});
if ( grid->IsBoss() ) { if (grid->IsBoss()) {
fin.read((char *)&file_object,sizeof(file_object)); fin.read((char *)&file_object, sizeof(file_object));
bytes += sizeof(file_object); bytes += sizeof(file_object);
if(ieee32big) be32toh_v((void *)&file_object,sizeof(file_object)); if (ieee32big) be32toh_v((void *)&file_object, sizeof(file_object));
if(ieee32) le32toh_v((void *)&file_object,sizeof(file_object)); if (ieee32) le32toh_v((void *)&file_object, sizeof(file_object));
if(ieee64big) be64toh_v((void *)&file_object,sizeof(file_object)); if (ieee64big) be64toh_v((void *)&file_object, sizeof(file_object));
if(ieee64) le64toh_v((void *)&file_object,sizeof(file_object)); if (ieee64) le64toh_v((void *)&file_object, sizeof(file_object));
munge(file_object,munged,csum); munge(file_object, munged, csum);
} }
// The boss who read the file has their value poked // The boss who read the file has their value poked
pokeSite(munged,Umu,site); pokeSite(munged,Umu,site);
}}}} }}}}
timer.Stop(); timer.Stop();
std::cout<<GridLogPerformance<<"readObjectSerial: read "<< bytes <<" bytes in "<<timer.Elapsed() <<" " std::cout<<GridLogPerformance<<"readObjectSerial: read "<< bytes <<" bytes in "<<timer.Elapsed() <<" "
<< (double)bytes/ (double)timer.useconds() <<" MB/s " <<std::endl; << (double)bytes/ (double)timer.useconds() <<" MB/s " <<std::endl;
return csum; return csum;
} }
@ -254,20 +254,20 @@ class BinaryIO {
if ( grid->IsBoss() ) { if ( grid->IsBoss() ) {
if(ieee32big) htobe32_v((void *)&file_object,sizeof(file_object)); if(ieee32big) htobe32_v((void *)&file_object,sizeof(file_object));
if(ieee32) htole32_v((void *)&file_object,sizeof(file_object)); if(ieee32) htole32_v((void *)&file_object,sizeof(file_object));
if(ieee64big) htobe64_v((void *)&file_object,sizeof(file_object)); if(ieee64big) htobe64_v((void *)&file_object,sizeof(file_object));
if(ieee64) htole64_v((void *)&file_object,sizeof(file_object)); if(ieee64) htole64_v((void *)&file_object,sizeof(file_object));
// NB could gather an xstrip as an optimisation. // NB could gather an xstrip as an optimisation.
fout.write((char *)&file_object,sizeof(file_object)); fout.write((char *)&file_object,sizeof(file_object));
bytes+=sizeof(file_object); bytes+=sizeof(file_object);
} }
}}}} }}}}
timer.Stop(); timer.Stop();
std::cout<<GridLogPerformance<<"writeObjectSerial: wrote "<< bytes <<" bytes in "<<timer.Elapsed() <<" " std::cout<<GridLogPerformance<<"writeObjectSerial: wrote "<< bytes <<" bytes in "<<timer.Elapsed() <<" "
<< (double)bytes/timer.useconds() <<" MB/s " <<std::endl; << (double)bytes/timer.useconds() <<" MB/s " <<std::endl;
return csum; return csum;
} }
@ -305,15 +305,15 @@ class BinaryIO {
int l_idx=parallel.generator_idx(o_idx,i_idx); int l_idx=parallel.generator_idx(o_idx,i_idx);
if( rank == grid->ThisRank() ){ if( rank == grid->ThisRank() ){
// std::cout << "rank" << rank<<" Getting state for index "<<l_idx<<std::endl; // std::cout << "rank" << rank<<" Getting state for index "<<l_idx<<std::endl;
parallel.GetState(saved,l_idx); parallel.GetState(saved,l_idx);
} }
grid->Broadcast(rank,(void *)&saved[0],bytes); grid->Broadcast(rank,(void *)&saved[0],bytes);
if ( grid->IsBoss() ) { if ( grid->IsBoss() ) {
Uint32Checksum((uint32_t *)&saved[0],bytes,csum); Uint32Checksum((uint32_t *)&saved[0],bytes,csum);
fout.write((char *)&saved[0],bytes); fout.write((char *)&saved[0],bytes);
} }
} }
@ -355,14 +355,14 @@ class BinaryIO {
int l_idx=parallel.generator_idx(o_idx,i_idx); int l_idx=parallel.generator_idx(o_idx,i_idx);
if ( grid->IsBoss() ) { if ( grid->IsBoss() ) {
fin.read((char *)&saved[0],bytes); fin.read((char *)&saved[0],bytes);
Uint32Checksum((uint32_t *)&saved[0],bytes,csum); Uint32Checksum((uint32_t *)&saved[0],bytes,csum);
} }
grid->Broadcast(0,(void *)&saved[0],bytes); grid->Broadcast(0,(void *)&saved[0],bytes);
if( rank == grid->ThisRank() ){ if( rank == grid->ThisRank() ){
parallel.SetState(saved,l_idx); parallel.SetState(saved,l_idx);
} }
} }
@ -415,15 +415,15 @@ class BinaryIO {
if ( d == 0 ) parallel[d] = 0; if ( d == 0 ) parallel[d] = 0;
if (parallel[d]) { if (parallel[d]) {
range[d] = grid->_ldimensions[d]; range[d] = grid->_ldimensions[d];
start[d] = grid->_processor_coor[d]*range[d]; start[d] = grid->_processor_coor[d]*range[d];
ioproc[d]= grid->_processor_coor[d]; ioproc[d]= grid->_processor_coor[d];
} else { } else {
range[d] = grid->_gdimensions[d]; range[d] = grid->_gdimensions[d];
start[d] = 0; start[d] = 0;
ioproc[d]= 0; ioproc[d]= 0;
if ( grid->_processor_coor[d] != 0 ) IOnode = 0; if ( grid->_processor_coor[d] != 0 ) IOnode = 0;
} }
slice_vol = slice_vol * range[d]; slice_vol = slice_vol * range[d];
} }
@ -434,9 +434,9 @@ class BinaryIO {
std::cout<< std::dec ; std::cout<< std::dec ;
std::cout<< GridLogMessage<< "Parallel read I/O to "<< file << " with " <<tmp<< " IOnodes for subslice "; std::cout<< GridLogMessage<< "Parallel read I/O to "<< file << " with " <<tmp<< " IOnodes for subslice ";
for(int d=0;d<grid->_ndimension;d++){ for(int d=0;d<grid->_ndimension;d++){
std::cout<< range[d]; std::cout<< range[d];
if( d< grid->_ndimension-1 ) if( d< grid->_ndimension-1 )
std::cout<< " x "; std::cout<< " x ";
} }
std::cout << std::endl; std::cout << std::endl;
} }
@ -463,7 +463,7 @@ class BinaryIO {
// need to implement these loops in Nd independent way with a lexico conversion // need to implement these loops in Nd independent way with a lexico conversion
for(int tlex=0;tlex<slice_vol;tlex++){ for(int tlex=0;tlex<slice_vol;tlex++){
std::vector<int> tsite(nd); // temporary mixed up site std::vector<int> tsite(nd); // temporary mixed up site
std::vector<int> gsite(nd); std::vector<int> gsite(nd);
std::vector<int> lsite(nd); std::vector<int> lsite(nd);
@ -472,8 +472,8 @@ class BinaryIO {
Lexicographic::CoorFromIndex(tsite,tlex,range); Lexicographic::CoorFromIndex(tsite,tlex,range);
for(int d=0;d<nd;d++){ for(int d=0;d<nd;d++){
lsite[d] = tsite[d]%grid->_ldimensions[d]; // local site lsite[d] = tsite[d]%grid->_ldimensions[d]; // local site
gsite[d] = tsite[d]+start[d]; // global site gsite[d] = tsite[d]+start[d]; // global site
} }
///////////////////////// /////////////////////////
@ -487,29 +487,29 @@ class BinaryIO {
// iorank reads from the seek // iorank reads from the seek
//////////////////////////////// ////////////////////////////////
if (myrank == iorank) { if (myrank == iorank) {
fin.seekg(offset+g_idx*sizeof(fileObj)); fin.seekg(offset+g_idx*sizeof(fileObj));
fin.read((char *)&fileObj,sizeof(fileObj)); fin.read((char *)&fileObj,sizeof(fileObj));
bytes+=sizeof(fileObj); bytes+=sizeof(fileObj);
if(ieee32big) be32toh_v((void *)&fileObj,sizeof(fileObj)); if(ieee32big) be32toh_v((void *)&fileObj,sizeof(fileObj));
if(ieee32) le32toh_v((void *)&fileObj,sizeof(fileObj)); if(ieee32) le32toh_v((void *)&fileObj,sizeof(fileObj));
if(ieee64big) be64toh_v((void *)&fileObj,sizeof(fileObj)); if(ieee64big) be64toh_v((void *)&fileObj,sizeof(fileObj));
if(ieee64) le64toh_v((void *)&fileObj,sizeof(fileObj)); if(ieee64) le64toh_v((void *)&fileObj,sizeof(fileObj));
munge(fileObj,siteObj,csum); munge(fileObj,siteObj,csum);
} }
// Possibly do transport through pt2pt // Possibly do transport through pt2pt
if ( rank != iorank ) { if ( rank != iorank ) {
if ( (myrank == rank) || (myrank==iorank) ) { if ( (myrank == rank) || (myrank==iorank) ) {
grid->SendRecvPacket((void *)&siteObj,(void *)&siteObj,iorank,rank,sizeof(siteObj)); grid->SendRecvPacket((void *)&siteObj,(void *)&siteObj,iorank,rank,sizeof(siteObj));
} }
} }
// Poke at destination // Poke at destination
if ( myrank == rank ) { if ( myrank == rank ) {
pokeLocalSite(siteObj,Umu,lsite); pokeLocalSite(siteObj,Umu,lsite);
} }
grid->Barrier(); // necessary? grid->Barrier(); // necessary?
} }
@ -520,7 +520,7 @@ class BinaryIO {
timer.Stop(); timer.Stop();
std::cout<<GridLogPerformance<<"readObjectParallel: read "<< bytes <<" bytes in "<<timer.Elapsed() <<" " std::cout<<GridLogPerformance<<"readObjectParallel: read "<< bytes <<" bytes in "<<timer.Elapsed() <<" "
<< (double)bytes/timer.useconds() <<" MB/s " <<std::endl; << (double)bytes/timer.useconds() <<" MB/s " <<std::endl;
return csum; return csum;
} }
@ -558,15 +558,15 @@ class BinaryIO {
if ( d!= grid->_ndimension-1 ) parallel[d] = 0; if ( d!= grid->_ndimension-1 ) parallel[d] = 0;
if (parallel[d]) { if (parallel[d]) {
range[d] = grid->_ldimensions[d]; range[d] = grid->_ldimensions[d];
start[d] = grid->_processor_coor[d]*range[d]; start[d] = grid->_processor_coor[d]*range[d];
ioproc[d]= grid->_processor_coor[d]; ioproc[d]= grid->_processor_coor[d];
} else { } else {
range[d] = grid->_gdimensions[d]; range[d] = grid->_gdimensions[d];
start[d] = 0; start[d] = 0;
ioproc[d]= 0; ioproc[d]= 0;
if ( grid->_processor_coor[d] != 0 ) IOnode = 0; if ( grid->_processor_coor[d] != 0 ) IOnode = 0;
} }
slice_vol = slice_vol * range[d]; slice_vol = slice_vol * range[d];
@ -577,9 +577,9 @@ class BinaryIO {
grid->GlobalSum(tmp); grid->GlobalSum(tmp);
std::cout<< GridLogMessage<< "Parallel write I/O from "<< file << " with " <<tmp<< " IOnodes for subslice "; std::cout<< GridLogMessage<< "Parallel write I/O from "<< file << " with " <<tmp<< " IOnodes for subslice ";
for(int d=0;d<grid->_ndimension;d++){ for(int d=0;d<grid->_ndimension;d++){
std::cout<< range[d]; std::cout<< range[d];
if( d< grid->_ndimension-1 ) if( d< grid->_ndimension-1 )
std::cout<< " x "; std::cout<< " x ";
} }
std::cout << std::endl; std::cout << std::endl;
} }
@ -610,7 +610,7 @@ class BinaryIO {
// should aggregate a whole chunk and then write. // should aggregate a whole chunk and then write.
// need to implement these loops in Nd independent way with a lexico conversion // need to implement these loops in Nd independent way with a lexico conversion
for(int tlex=0;tlex<slice_vol;tlex++){ for(int tlex=0;tlex<slice_vol;tlex++){
std::vector<int> tsite(nd); // temporary mixed up site std::vector<int> tsite(nd); // temporary mixed up site
std::vector<int> gsite(nd); std::vector<int> gsite(nd);
std::vector<int> lsite(nd); std::vector<int> lsite(nd);
@ -619,8 +619,8 @@ class BinaryIO {
Lexicographic::CoorFromIndex(tsite,tlex,range); Lexicographic::CoorFromIndex(tsite,tlex,range);
for(int d=0;d<nd;d++){ for(int d=0;d<nd;d++){
lsite[d] = tsite[d]%grid->_ldimensions[d]; // local site lsite[d] = tsite[d]%grid->_ldimensions[d]; // local site
gsite[d] = tsite[d]+start[d]; // global site gsite[d] = tsite[d]+start[d]; // global site
} }
@ -640,26 +640,26 @@ class BinaryIO {
// Pair of nodes may need to do pt2pt send // Pair of nodes may need to do pt2pt send
if ( rank != iorank ) { // comms is necessary if ( rank != iorank ) { // comms is necessary
if ( (myrank == rank) || (myrank==iorank) ) { // and we have to do it if ( (myrank == rank) || (myrank==iorank) ) { // and we have to do it
// Send to IOrank // Send to IOrank
grid->SendRecvPacket((void *)&siteObj,(void *)&siteObj,rank,iorank,sizeof(siteObj)); grid->SendRecvPacket((void *)&siteObj,(void *)&siteObj,rank,iorank,sizeof(siteObj));
} }
} }
grid->Barrier(); // necessary? grid->Barrier(); // necessary?
if (myrank == iorank) { if (myrank == iorank) {
munge(siteObj,fileObj,csum); munge(siteObj,fileObj,csum);
if(ieee32big) htobe32_v((void *)&fileObj,sizeof(fileObj)); if(ieee32big) htobe32_v((void *)&fileObj,sizeof(fileObj));
if(ieee32) htole32_v((void *)&fileObj,sizeof(fileObj)); if(ieee32) htole32_v((void *)&fileObj,sizeof(fileObj));
if(ieee64big) htobe64_v((void *)&fileObj,sizeof(fileObj)); if(ieee64big) htobe64_v((void *)&fileObj,sizeof(fileObj));
if(ieee64) htole64_v((void *)&fileObj,sizeof(fileObj)); if(ieee64) htole64_v((void *)&fileObj,sizeof(fileObj));
fout.seekp(offset+g_idx*sizeof(fileObj)); fout.seekp(offset+g_idx*sizeof(fileObj));
fout.write((char *)&fileObj,sizeof(fileObj)); fout.write((char *)&fileObj,sizeof(fileObj));
bytes+=sizeof(fileObj); bytes+=sizeof(fileObj);
} }
} }
@ -668,7 +668,7 @@ class BinaryIO {
timer.Stop(); timer.Stop();
std::cout<<GridLogPerformance<<"writeObjectParallel: wrote "<< bytes <<" bytes in "<<timer.Elapsed() <<" " std::cout<<GridLogPerformance<<"writeObjectParallel: wrote "<< bytes <<" bytes in "<<timer.Elapsed() <<" "
<< (double)bytes/timer.useconds() <<" MB/s " <<std::endl; << (double)bytes/timer.useconds() <<" MB/s " <<std::endl;
return csum; return csum;
} }

View File

@ -55,11 +55,14 @@ namespace QCD {
////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////
// QCD iMatrix types // QCD iMatrix types
// Index conventions: Lorentz x Spin x Colour // Index conventions: Lorentz x Spin x Colour
// note: static const int or constexpr will work for type deductions
// with the intel compiler (up to version 17)
////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////
static const int ColourIndex = 2; #define ColourIndex 2
static const int SpinIndex = 1; #define SpinIndex 1
static const int LorentzIndex= 0; #define LorentzIndex 0
// Also should make these a named enum type // Also should make these a named enum type
static const int DaggerNo=0; static const int DaggerNo=0;
static const int DaggerYes=1; static const int DaggerYes=1;

View File

@ -111,12 +111,16 @@ typedef SymanzikGaugeAction<ConjugateGimplD> ConjugateSymanzikGaugeAction
#define FermOp4dVecTemplateInstantiate(A) \ #define FermOp4dVecTemplateInstantiate(A) \
template class A<WilsonImplF>; \ template class A<WilsonImplF>; \
template class A<WilsonImplD>; \ template class A<WilsonImplD>; \
template class A<ZWilsonImplF>; \
template class A<ZWilsonImplD>; \
template class A<GparityWilsonImplF>; \ template class A<GparityWilsonImplF>; \
template class A<GparityWilsonImplD>; template class A<GparityWilsonImplD>;
#define FermOp5dVecTemplateInstantiate(A) \ #define FermOp5dVecTemplateInstantiate(A) \
template class A<DomainWallVec5dImplF>; \ template class A<DomainWallVec5dImplF>; \
template class A<DomainWallVec5dImplD>; template class A<DomainWallVec5dImplD>; \
template class A<ZDomainWallVec5dImplF>; \
template class A<ZDomainWallVec5dImplD>;
#define FermOpTemplateInstantiate(A) \ #define FermOpTemplateInstantiate(A) \
FermOp4dVecTemplateInstantiate(A) \ FermOp4dVecTemplateInstantiate(A) \
@ -138,6 +142,7 @@ typedef SymanzikGaugeAction<ConjugateGimplD> ConjugateSymanzikGaugeAction
#include <Grid/qcd/action/fermion/DomainWallFermion.h> #include <Grid/qcd/action/fermion/DomainWallFermion.h>
#include <Grid/qcd/action/fermion/DomainWallFermion.h> #include <Grid/qcd/action/fermion/DomainWallFermion.h>
#include <Grid/qcd/action/fermion/MobiusFermion.h> #include <Grid/qcd/action/fermion/MobiusFermion.h>
#include <Grid/qcd/action/fermion/ZMobiusFermion.h>
#include <Grid/qcd/action/fermion/ScaledShamirFermion.h> #include <Grid/qcd/action/fermion/ScaledShamirFermion.h>
#include <Grid/qcd/action/fermion/MobiusZolotarevFermion.h> #include <Grid/qcd/action/fermion/MobiusZolotarevFermion.h>
#include <Grid/qcd/action/fermion/ShamirZolotarevFermion.h> #include <Grid/qcd/action/fermion/ShamirZolotarevFermion.h>
@ -176,6 +181,11 @@ typedef DomainWallFermion<WilsonImplD> DomainWallFermionD;
typedef MobiusFermion<WilsonImplR> MobiusFermionR; typedef MobiusFermion<WilsonImplR> MobiusFermionR;
typedef MobiusFermion<WilsonImplF> MobiusFermionF; typedef MobiusFermion<WilsonImplF> MobiusFermionF;
typedef MobiusFermion<WilsonImplD> MobiusFermionD; typedef MobiusFermion<WilsonImplD> MobiusFermionD;
typedef ZMobiusFermion<ZWilsonImplR> ZMobiusFermionR;
typedef ZMobiusFermion<ZWilsonImplF> ZMobiusFermionF;
typedef ZMobiusFermion<ZWilsonImplD> ZMobiusFermionD;
typedef ScaledShamirFermion<WilsonImplR> ScaledShamirFermionR; typedef ScaledShamirFermion<WilsonImplR> ScaledShamirFermionR;
typedef ScaledShamirFermion<WilsonImplF> ScaledShamirFermionF; typedef ScaledShamirFermion<WilsonImplF> ScaledShamirFermionF;
typedef ScaledShamirFermion<WilsonImplD> ScaledShamirFermionD; typedef ScaledShamirFermion<WilsonImplD> ScaledShamirFermionD;

View File

@ -54,18 +54,18 @@ template<class Impl>
void CayleyFermion5D<Impl>::M5D (const FermionField &psi, FermionField &chi) void CayleyFermion5D<Impl>::M5D (const FermionField &psi, FermionField &chi)
{ {
int Ls=this->Ls; int Ls=this->Ls;
std::vector<RealD> diag (Ls,1.0); std::vector<Coeff_t> diag (Ls,1.0);
std::vector<RealD> upper(Ls,-1.0); upper[Ls-1]=mass; std::vector<Coeff_t> upper(Ls,-1.0); upper[Ls-1]=mass;
std::vector<RealD> lower(Ls,-1.0); lower[0] =mass; std::vector<Coeff_t> lower(Ls,-1.0); lower[0] =mass;
M5D(psi,chi,chi,lower,diag,upper); M5D(psi,chi,chi,lower,diag,upper);
} }
template<class Impl> template<class Impl>
void CayleyFermion5D<Impl>::Meooe5D (const FermionField &psi, FermionField &Din) void CayleyFermion5D<Impl>::Meooe5D (const FermionField &psi, FermionField &Din)
{ {
int Ls=this->Ls; int Ls=this->Ls;
std::vector<RealD> diag = bs; std::vector<Coeff_t> diag = bs;
std::vector<RealD> upper= cs; std::vector<Coeff_t> upper= cs;
std::vector<RealD> lower= cs; std::vector<Coeff_t> lower= cs;
upper[Ls-1]=-mass*upper[Ls-1]; upper[Ls-1]=-mass*upper[Ls-1];
lower[0] =-mass*lower[0]; lower[0] =-mass*lower[0];
M5D(psi,psi,Din,lower,diag,upper); M5D(psi,psi,Din,lower,diag,upper);
@ -73,9 +73,9 @@ void CayleyFermion5D<Impl>::Meooe5D (const FermionField &psi, FermionField &D
template<class Impl> void CayleyFermion5D<Impl>::Meo5D (const FermionField &psi, FermionField &chi) template<class Impl> void CayleyFermion5D<Impl>::Meo5D (const FermionField &psi, FermionField &chi)
{ {
int Ls=this->Ls; int Ls=this->Ls;
std::vector<RealD> diag = beo; std::vector<Coeff_t> diag = beo;
std::vector<RealD> upper(Ls); std::vector<Coeff_t> upper(Ls);
std::vector<RealD> lower(Ls); std::vector<Coeff_t> lower(Ls);
for(int i=0;i<Ls;i++) { for(int i=0;i<Ls;i++) {
upper[i]=-ceo[i]; upper[i]=-ceo[i];
lower[i]=-ceo[i]; lower[i]=-ceo[i];
@ -88,9 +88,9 @@ template<class Impl>
void CayleyFermion5D<Impl>::Mooee (const FermionField &psi, FermionField &chi) void CayleyFermion5D<Impl>::Mooee (const FermionField &psi, FermionField &chi)
{ {
int Ls=this->Ls; int Ls=this->Ls;
std::vector<RealD> diag = bee; std::vector<Coeff_t> diag = bee;
std::vector<RealD> upper(Ls); std::vector<Coeff_t> upper(Ls);
std::vector<RealD> lower(Ls); std::vector<Coeff_t> lower(Ls);
for(int i=0;i<Ls;i++) { for(int i=0;i<Ls;i++) {
upper[i]=-cee[i]; upper[i]=-cee[i];
lower[i]=-cee[i]; lower[i]=-cee[i];
@ -104,9 +104,9 @@ template<class Impl>
void CayleyFermion5D<Impl>::MooeeDag (const FermionField &psi, FermionField &chi) void CayleyFermion5D<Impl>::MooeeDag (const FermionField &psi, FermionField &chi)
{ {
int Ls=this->Ls; int Ls=this->Ls;
std::vector<RealD> diag = bee; std::vector<Coeff_t> diag = bee;
std::vector<RealD> upper(Ls); std::vector<Coeff_t> upper(Ls);
std::vector<RealD> lower(Ls); std::vector<Coeff_t> lower(Ls);
for (int s=0;s<Ls;s++){ for (int s=0;s<Ls;s++){
// Assemble the 5d matrix // Assemble the 5d matrix
@ -129,9 +129,9 @@ template<class Impl>
void CayleyFermion5D<Impl>::M5Ddag (const FermionField &psi, FermionField &chi) void CayleyFermion5D<Impl>::M5Ddag (const FermionField &psi, FermionField &chi)
{ {
int Ls=this->Ls; int Ls=this->Ls;
std::vector<RealD> diag(Ls,1.0); std::vector<Coeff_t> diag(Ls,1.0);
std::vector<RealD> upper(Ls,-1.0); std::vector<Coeff_t> upper(Ls,-1.0);
std::vector<RealD> lower(Ls,-1.0); std::vector<Coeff_t> lower(Ls,-1.0);
upper[Ls-1]=-mass*upper[Ls-1]; upper[Ls-1]=-mass*upper[Ls-1];
lower[0] =-mass*lower[0]; lower[0] =-mass*lower[0];
M5Ddag(psi,chi,chi,lower,diag,upper); M5Ddag(psi,chi,chi,lower,diag,upper);
@ -141,9 +141,9 @@ template<class Impl>
void CayleyFermion5D<Impl>::MeooeDag5D (const FermionField &psi, FermionField &Din) void CayleyFermion5D<Impl>::MeooeDag5D (const FermionField &psi, FermionField &Din)
{ {
int Ls=this->Ls; int Ls=this->Ls;
std::vector<RealD> diag =bs; std::vector<Coeff_t> diag =bs;
std::vector<RealD> upper=cs; std::vector<Coeff_t> upper=cs;
std::vector<RealD> lower=cs; std::vector<Coeff_t> lower=cs;
upper[Ls-1]=-mass*upper[Ls-1]; upper[Ls-1]=-mass*upper[Ls-1];
lower[0] =-mass*lower[0]; lower[0] =-mass*lower[0];
M5Ddag(psi,psi,Din,lower,diag,upper); M5Ddag(psi,psi,Din,lower,diag,upper);
@ -273,11 +273,21 @@ void CayleyFermion5D<Impl>::MeoDeriv(GaugeField &mat,const FermionField &U,const
template<class Impl> template<class Impl>
void CayleyFermion5D<Impl>::SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD b,RealD c) void CayleyFermion5D<Impl>::SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD b,RealD c)
{ {
SetCoefficientsZolotarev(1.0,zdata,b,c); std::vector<Coeff_t> gamma(this->Ls);
for(int s=0;s<this->Ls;s++) gamma[s] = zdata->gamma[s];
SetCoefficientsInternal(1.0,gamma,b,c);
} }
//Zolo //Zolo
template<class Impl> template<class Impl>
void CayleyFermion5D<Impl>::SetCoefficientsZolotarev(RealD zolo_hi,Approx::zolotarev_data *zdata,RealD b,RealD c) void CayleyFermion5D<Impl>::SetCoefficientsZolotarev(RealD zolo_hi,Approx::zolotarev_data *zdata,RealD b,RealD c)
{
std::vector<Coeff_t> gamma(this->Ls);
for(int s=0;s<this->Ls;s++) gamma[s] = zdata->gamma[s];
SetCoefficientsInternal(zolo_hi,gamma,b,c);
}
//Zolo
template<class Impl>
void CayleyFermion5D<Impl>::SetCoefficientsInternal(RealD zolo_hi,std::vector<Coeff_t> & gamma,RealD b,RealD c)
{ {
int Ls=this->Ls; int Ls=this->Ls;
@ -315,7 +325,7 @@ void CayleyFermion5D<Impl>::SetCoefficientsZolotarev(RealD zolo_hi,Approx::zolot
double bmc = b-c; double bmc = b-c;
for(int i=0; i < Ls; i++){ for(int i=0; i < Ls; i++){
as[i] = 1.0; as[i] = 1.0;
omega[i] = ((double)zdata->gamma[i])*zolo_hi; //NB reciprocal relative to Chroma NEF code omega[i] = gamma[i]*zolo_hi; //NB reciprocal relative to Chroma NEF code
bs[i] = 0.5*(bpc/omega[i] + bmc); bs[i] = 0.5*(bpc/omega[i] + bmc);
cs[i] = 0.5*(bpc/omega[i] - bmc); cs[i] = 0.5*(bpc/omega[i] - bmc);
} }
@ -377,7 +387,7 @@ void CayleyFermion5D<Impl>::SetCoefficientsZolotarev(RealD zolo_hi,Approx::zolot
} }
{ {
double delta_d=mass*cee[Ls-1]; Coeff_t delta_d=mass*cee[Ls-1];
for(int j=0;j<Ls-1;j++) delta_d *= cee[j]/bee[j]; for(int j=0;j<Ls-1;j++) delta_d *= cee[j]/bee[j];
dee[Ls-1] += delta_d; dee[Ls-1] += delta_d;
} }

View File

@ -62,16 +62,16 @@ namespace Grid {
void M5D(const FermionField &psi, void M5D(const FermionField &psi,
const FermionField &phi, const FermionField &phi,
FermionField &chi, FermionField &chi,
std::vector<RealD> &lower, std::vector<Coeff_t> &lower,
std::vector<RealD> &diag, std::vector<Coeff_t> &diag,
std::vector<RealD> &upper); std::vector<Coeff_t> &upper);
void M5Ddag(const FermionField &psi, void M5Ddag(const FermionField &psi,
const FermionField &phi, const FermionField &phi,
FermionField &chi, FermionField &chi,
std::vector<RealD> &lower, std::vector<Coeff_t> &lower,
std::vector<RealD> &diag, std::vector<Coeff_t> &diag,
std::vector<RealD> &upper); std::vector<Coeff_t> &upper);
void MooeeInternal(const FermionField &in, FermionField &out,int dag,int inv); void MooeeInternal(const FermionField &in, FermionField &out,int dag,int inv);
virtual void Instantiatable(void)=0; virtual void Instantiatable(void)=0;
@ -91,23 +91,23 @@ namespace Grid {
RealD mass; RealD mass;
// Cayley form Moebius (tanh and zolotarev) // Cayley form Moebius (tanh and zolotarev)
std::vector<RealD> omega; std::vector<Coeff_t> omega;
std::vector<RealD> bs; // S dependent coeffs std::vector<Coeff_t> bs; // S dependent coeffs
std::vector<RealD> cs; std::vector<Coeff_t> cs;
std::vector<RealD> as; std::vector<Coeff_t> as;
// For preconditioning Cayley form // For preconditioning Cayley form
std::vector<RealD> bee; std::vector<Coeff_t> bee;
std::vector<RealD> cee; std::vector<Coeff_t> cee;
std::vector<RealD> aee; std::vector<Coeff_t> aee;
std::vector<RealD> beo; std::vector<Coeff_t> beo;
std::vector<RealD> ceo; std::vector<Coeff_t> ceo;
std::vector<RealD> aeo; std::vector<Coeff_t> aeo;
// LDU factorisation of the eeoo matrix // LDU factorisation of the eeoo matrix
std::vector<RealD> lee; std::vector<Coeff_t> lee;
std::vector<RealD> leem; std::vector<Coeff_t> leem;
std::vector<RealD> uee; std::vector<Coeff_t> uee;
std::vector<RealD> ueem; std::vector<Coeff_t> ueem;
std::vector<RealD> dee; std::vector<Coeff_t> dee;
// Constructors // Constructors
CayleyFermion5D(GaugeField &_Umu, CayleyFermion5D(GaugeField &_Umu,
@ -117,20 +117,19 @@ namespace Grid {
GridRedBlackCartesian &FourDimRedBlackGrid, GridRedBlackCartesian &FourDimRedBlackGrid,
RealD _mass,RealD _M5,const ImplParams &p= ImplParams()); RealD _mass,RealD _M5,const ImplParams &p= ImplParams());
protected: protected:
void SetCoefficientsZolotarev(RealD zolohi,Approx::zolotarev_data *zdata,RealD b,RealD c); void SetCoefficientsZolotarev(RealD zolohi,Approx::zolotarev_data *zdata,RealD b,RealD c);
void SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD b,RealD c); void SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD b,RealD c);
void SetCoefficientsInternal(RealD zolo_hi,std::vector<Coeff_t> & gamma,RealD b,RealD c);
}; };
} }
} }
#define INSTANTIATE_DPERP(A)\ #define INSTANTIATE_DPERP(A)\
template void CayleyFermion5D< A >::M5D(const FermionField &psi,const FermionField &phi,FermionField &chi,\ template void CayleyFermion5D< A >::M5D(const FermionField &psi,const FermionField &phi,FermionField &chi,\
std::vector<RealD> &lower,std::vector<RealD> &diag,std::vector<RealD> &upper); \ std::vector<Coeff_t> &lower,std::vector<Coeff_t> &diag,std::vector<Coeff_t> &upper); \
template void CayleyFermion5D< A >::M5Ddag(const FermionField &psi,const FermionField &phi,FermionField &chi,\ template void CayleyFermion5D< A >::M5Ddag(const FermionField &psi,const FermionField &phi,FermionField &chi,\
std::vector<RealD> &lower,std::vector<RealD> &diag,std::vector<RealD> &upper); \ std::vector<Coeff_t> &lower,std::vector<Coeff_t> &diag,std::vector<Coeff_t> &upper); \
template void CayleyFermion5D< A >::MooeeInv (const FermionField &psi, FermionField &chi); \ template void CayleyFermion5D< A >::MooeeInv (const FermionField &psi, FermionField &chi); \
template void CayleyFermion5D< A >::MooeeInvDag (const FermionField &psi, FermionField &chi); template void CayleyFermion5D< A >::MooeeInvDag (const FermionField &psi, FermionField &chi);

View File

@ -43,9 +43,9 @@ template<class Impl>
void CayleyFermion5D<Impl>::M5D(const FermionField &psi, void CayleyFermion5D<Impl>::M5D(const FermionField &psi,
const FermionField &phi, const FermionField &phi,
FermionField &chi, FermionField &chi,
std::vector<RealD> &lower, std::vector<Coeff_t> &lower,
std::vector<RealD> &diag, std::vector<Coeff_t> &diag,
std::vector<RealD> &upper) std::vector<Coeff_t> &upper)
{ {
int Ls =this->Ls; int Ls =this->Ls;
GridBase *grid=psi._grid; GridBase *grid=psi._grid;
@ -82,9 +82,9 @@ template<class Impl>
void CayleyFermion5D<Impl>::M5Ddag(const FermionField &psi, void CayleyFermion5D<Impl>::M5Ddag(const FermionField &psi,
const FermionField &phi, const FermionField &phi,
FermionField &chi, FermionField &chi,
std::vector<RealD> &lower, std::vector<Coeff_t> &lower,
std::vector<RealD> &diag, std::vector<Coeff_t> &diag,
std::vector<RealD> &upper) std::vector<Coeff_t> &upper)
{ {
int Ls =this->Ls; int Ls =this->Ls;
GridBase *grid=psi._grid; GridBase *grid=psi._grid;
@ -204,6 +204,8 @@ PARALLEL_FOR_LOOP
INSTANTIATE_DPERP(WilsonImplD); INSTANTIATE_DPERP(WilsonImplD);
INSTANTIATE_DPERP(GparityWilsonImplF); INSTANTIATE_DPERP(GparityWilsonImplF);
INSTANTIATE_DPERP(GparityWilsonImplD); INSTANTIATE_DPERP(GparityWilsonImplD);
INSTANTIATE_DPERP(ZWilsonImplF);
INSTANTIATE_DPERP(ZWilsonImplD);
#endif #endif
}} }}

View File

@ -43,9 +43,9 @@ template<class Impl>
void CayleyFermion5D<Impl>::M5D(const FermionField &psi, void CayleyFermion5D<Impl>::M5D(const FermionField &psi,
const FermionField &phi, const FermionField &phi,
FermionField &chi, FermionField &chi,
std::vector<RealD> &lower, std::vector<Coeff_t> &lower,
std::vector<RealD> &diag, std::vector<Coeff_t> &diag,
std::vector<RealD> &upper) std::vector<Coeff_t> &upper)
{ {
int Ls=this->Ls; int Ls=this->Ls;
for(int s=0;s<Ls;s++){ for(int s=0;s<Ls;s++){
@ -65,9 +65,9 @@ template<class Impl>
void CayleyFermion5D<Impl>::M5Ddag(const FermionField &psi, void CayleyFermion5D<Impl>::M5Ddag(const FermionField &psi,
const FermionField &phi, const FermionField &phi,
FermionField &chi, FermionField &chi,
std::vector<RealD> &lower, std::vector<Coeff_t> &lower,
std::vector<RealD> &diag, std::vector<Coeff_t> &diag,
std::vector<RealD> &upper) std::vector<Coeff_t> &upper)
{ {
int Ls=this->Ls; int Ls=this->Ls;
for(int s=0;s<Ls;s++){ for(int s=0;s<Ls;s++){

View File

@ -53,9 +53,9 @@ template<class Impl>
void CayleyFermion5D<Impl>::M5D(const FermionField &psi, void CayleyFermion5D<Impl>::M5D(const FermionField &psi,
const FermionField &phi, const FermionField &phi,
FermionField &chi, FermionField &chi,
std::vector<RealD> &lower, std::vector<Coeff_t> &lower,
std::vector<RealD> &diag, std::vector<Coeff_t> &diag,
std::vector<RealD> &upper) std::vector<Coeff_t> &upper)
{ {
GridBase *grid=psi._grid; GridBase *grid=psi._grid;
int Ls = this->Ls; int Ls = this->Ls;
@ -121,9 +121,9 @@ template<class Impl>
void CayleyFermion5D<Impl>::M5Ddag(const FermionField &psi, void CayleyFermion5D<Impl>::M5Ddag(const FermionField &psi,
const FermionField &phi, const FermionField &phi,
FermionField &chi, FermionField &chi,
std::vector<RealD> &lower, std::vector<Coeff_t> &lower,
std::vector<RealD> &diag, std::vector<Coeff_t> &diag,
std::vector<RealD> &upper) std::vector<Coeff_t> &upper)
{ {
GridBase *grid=psi._grid; GridBase *grid=psi._grid;
int Ls = this->Ls; int Ls = this->Ls;
@ -194,8 +194,8 @@ void CayleyFermion5D<Impl>::MooeeInternal(const FermionField &psi, FermionField
chi.checkerboard=psi.checkerboard; chi.checkerboard=psi.checkerboard;
Eigen::MatrixXd Pplus = Eigen::MatrixXd::Zero(Ls,Ls); Eigen::MatrixXcd Pplus = Eigen::MatrixXcd::Zero(Ls,Ls);
Eigen::MatrixXd Pminus = Eigen::MatrixXd::Zero(Ls,Ls); Eigen::MatrixXcd Pminus = Eigen::MatrixXcd::Zero(Ls,Ls);
for(int s=0;s<Ls;s++){ for(int s=0;s<Ls;s++){
Pplus(s,s) = bee[s]; Pplus(s,s) = bee[s];
@ -212,8 +212,8 @@ void CayleyFermion5D<Impl>::MooeeInternal(const FermionField &psi, FermionField
Pplus (0,Ls-1) = mass*cee[0]; Pplus (0,Ls-1) = mass*cee[0];
Pminus(Ls-1,0) = mass*cee[Ls-1]; Pminus(Ls-1,0) = mass*cee[Ls-1];
Eigen::MatrixXd PplusMat ; Eigen::MatrixXcd PplusMat ;
Eigen::MatrixXd PminusMat; Eigen::MatrixXcd PminusMat;
if ( inv ) { if ( inv ) {
PplusMat =Pplus.inverse(); PplusMat =Pplus.inverse();
@ -298,8 +298,12 @@ PARALLEL_FOR_LOOP
INSTANTIATE_DPERP(DomainWallVec5dImplD); INSTANTIATE_DPERP(DomainWallVec5dImplD);
INSTANTIATE_DPERP(DomainWallVec5dImplF); INSTANTIATE_DPERP(DomainWallVec5dImplF);
INSTANTIATE_DPERP(ZDomainWallVec5dImplD);
INSTANTIATE_DPERP(ZDomainWallVec5dImplF);
template void CayleyFermion5D<DomainWallVec5dImplF>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv); template void CayleyFermion5D<DomainWallVec5dImplF>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
template void CayleyFermion5D<DomainWallVec5dImplD>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv); template void CayleyFermion5D<DomainWallVec5dImplD>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
template void CayleyFermion5D<ZDomainWallVec5dImplF>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
template void CayleyFermion5D<ZDomainWallVec5dImplD>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
}} }}

View File

@ -100,7 +100,8 @@ namespace Grid {
typedef typename Impl::SiteHalfSpinor SiteHalfSpinor; \ typedef typename Impl::SiteHalfSpinor SiteHalfSpinor; \
typedef typename Impl::Compressor Compressor; \ typedef typename Impl::Compressor Compressor; \
typedef typename Impl::StencilImpl StencilImpl; \ typedef typename Impl::StencilImpl StencilImpl; \
typedef typename Impl::ImplParams ImplParams; typedef typename Impl::ImplParams ImplParams; \
typedef typename Impl::Coeff_t Coeff_t;
#define INHERIT_IMPL_TYPES(Base) \ #define INHERIT_IMPL_TYPES(Base) \
INHERIT_GIMPL_TYPES(Base)\ INHERIT_GIMPL_TYPES(Base)\
@ -109,12 +110,14 @@ namespace Grid {
/////// ///////
// Single flavour four spinors with colour index // Single flavour four spinors with colour index
/////// ///////
template<class S,int Nrepresentation=Nc> template<class S,int Nrepresentation=Nc,class _Coeff_t = RealD>
class WilsonImpl : public PeriodicGaugeImpl< GaugeImplTypes< S, Nrepresentation> > { class WilsonImpl : public PeriodicGaugeImpl< GaugeImplTypes< S, Nrepresentation> > {
public: public:
const bool LsVectorised=false; const bool LsVectorised=false;
typedef _Coeff_t Coeff_t;
typedef PeriodicGaugeImpl< GaugeImplTypes< S,Nrepresentation> > Gimpl; typedef PeriodicGaugeImpl< GaugeImplTypes< S,Nrepresentation> > Gimpl;
INHERIT_GIMPL_TYPES(Gimpl); INHERIT_GIMPL_TYPES(Gimpl);
@ -192,12 +195,13 @@ PARALLEL_FOR_LOOP
/////// ///////
// Single flavour four spinors with colour index, 5d redblack // Single flavour four spinors with colour index, 5d redblack
/////// ///////
template<class S,int Nrepresentation=Nc> template<class S,int Nrepresentation=Nc,class _Coeff_t = RealD>
class DomainWallVec5dImpl : public PeriodicGaugeImpl< GaugeImplTypes< S,Nrepresentation> > { class DomainWallVec5dImpl : public PeriodicGaugeImpl< GaugeImplTypes< S,Nrepresentation> > {
public: public:
const bool LsVectorised=true; const bool LsVectorised=true;
typedef _Coeff_t Coeff_t;
typedef PeriodicGaugeImpl< GaugeImplTypes< S,Nrepresentation> > Gimpl; typedef PeriodicGaugeImpl< GaugeImplTypes< S,Nrepresentation> > Gimpl;
INHERIT_GIMPL_TYPES(Gimpl); INHERIT_GIMPL_TYPES(Gimpl);
@ -287,12 +291,13 @@ PARALLEL_FOR_LOOP
// Flavour doubled spinors; is Gparity the only? what about C*? // Flavour doubled spinors; is Gparity the only? what about C*?
//////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////
template<class S,int Nrepresentation> template<class S,int Nrepresentation,class _Coeff_t = RealD>
class GparityWilsonImpl : public ConjugateGaugeImpl< GaugeImplTypes<S,Nrepresentation> >{ class GparityWilsonImpl : public ConjugateGaugeImpl< GaugeImplTypes<S,Nrepresentation> >{
public: public:
const bool LsVectorised=false; const bool LsVectorised=false;
typedef _Coeff_t Coeff_t;
typedef ConjugateGaugeImpl< GaugeImplTypes<S,Nrepresentation> > Gimpl; typedef ConjugateGaugeImpl< GaugeImplTypes<S,Nrepresentation> > Gimpl;
INHERIT_GIMPL_TYPES(Gimpl); INHERIT_GIMPL_TYPES(Gimpl);
@ -483,6 +488,18 @@ PARALLEL_FOR_LOOP
typedef WilsonImpl<vComplexF,Nc> WilsonImplF; // Float typedef WilsonImpl<vComplexF,Nc> WilsonImplF; // Float
typedef WilsonImpl<vComplexD,Nc> WilsonImplD; // Double typedef WilsonImpl<vComplexD,Nc> WilsonImplD; // Double
typedef WilsonImpl<vComplex ,Nc,ComplexD> ZWilsonImplR; // Real.. whichever prec
typedef WilsonImpl<vComplexF,Nc,ComplexD> ZWilsonImplF; // Float
typedef WilsonImpl<vComplexD,Nc,ComplexD> ZWilsonImplD; // Double
typedef DomainWallVec5dImpl<vComplex ,Nc> DomainWallVec5dImplR; // Real.. whichever prec
typedef DomainWallVec5dImpl<vComplexF,Nc> DomainWallVec5dImplF; // Float
typedef DomainWallVec5dImpl<vComplexD,Nc> DomainWallVec5dImplD; // Double
typedef DomainWallVec5dImpl<vComplex ,Nc,ComplexD> ZDomainWallVec5dImplR; // Real.. whichever prec
typedef DomainWallVec5dImpl<vComplexF,Nc,ComplexD> ZDomainWallVec5dImplF; // Float
typedef DomainWallVec5dImpl<vComplexD,Nc,ComplexD> ZDomainWallVec5dImplD; // Double
typedef DomainWallVec5dImpl<vComplex ,Nc> DomainWallVec5dImplR; // Real.. whichever prec typedef DomainWallVec5dImpl<vComplex ,Nc> DomainWallVec5dImplR; // Real.. whichever prec
typedef DomainWallVec5dImpl<vComplexF,Nc> DomainWallVec5dImplF; // Float typedef DomainWallVec5dImpl<vComplexF,Nc> DomainWallVec5dImplF; // Float
typedef DomainWallVec5dImpl<vComplexD,Nc> DomainWallVec5dImplD; // Double typedef DomainWallVec5dImpl<vComplexD,Nc> DomainWallVec5dImplD; // Double

View File

@ -68,16 +68,21 @@ void WilsonKernels<Impl>::DiracOptDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf, std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int sF,int sU,int Ls, int Ns, const FermionField &in, FermionField &out) int sF,int sU,int Ls, int Ns, const FermionField &in, FermionField &out)
{ {
// No asm implementation yet. #ifdef AVX512
// if ( AsmOpt ) WilsonKernels<Impl>::DiracOptAsmDhopSiteDag(st,lo,U,buf,sF,sU,in,out); if ( AsmOpt ) {
// else WilsonKernels<Impl>::DiracOptAsmDhopSiteDag(st,lo,U,buf,sF,sU,Ls,Ns,in,out);
for(int site=0;site<Ns;site++) { } else {
for(int s=0;s<Ls;s++) { #else
if (HandOpt) WilsonKernels<Impl>::DiracOptHandDhopSiteDag(st,lo,U,buf,sF,sU,in,out); {
else WilsonKernels<Impl>::DiracOptGenericDhopSiteDag(st,lo,U,buf,sF,sU,in,out); #endif
sF++; for(int site=0;site<Ns;site++) {
for(int s=0;s<Ls;s++) {
if (HandOpt) WilsonKernels<Impl>::DiracOptHandDhopSiteDag(st,lo,U,buf,sF,sU,in,out);
else WilsonKernels<Impl>::DiracOptGenericDhopSiteDag(st,lo,U,buf,sF,sU,in,out);
sF++;
}
sU++;
} }
sU++;
} }
} }

View File

@ -79,6 +79,10 @@ namespace Grid {
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf, std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int sF,int sU,int Ls, int Ns, const FermionField &in, FermionField &out); int sF,int sU,int Ls, int Ns, const FermionField &in, FermionField &out);
void DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int sF,int sU,int Ls, int Ns, const FermionField &in, FermionField &out);
void DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U, void DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf, std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
@ -92,7 +96,25 @@ namespace Grid {
WilsonKernels(const ImplParams &p= ImplParams()); WilsonKernels(const ImplParams &p= ImplParams());
}; };
///////////////////////////////////////////////////////////
// Default to no assembler implementation
///////////////////////////////////////////////////////////
template<class Impl>
void WilsonKernels<Impl >::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
{
assert(0);
}
template<class Impl>
void WilsonKernels<Impl >::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
{
assert(0);
}
} }
} }
#endif #endif

View File

@ -1,4 +1,4 @@
/************************************************************************************* /*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid Grid physics library, www.github.com/paboyle/Grid
@ -26,59 +26,56 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/ *************************************************************************************/
/* END LEGAL */ /* END LEGAL */
#include <Grid.h> #include <Grid.h>
namespace Grid { namespace Grid {
namespace QCD { namespace QCD {
///////////////////////////////////////////////////////////
// Default to no assembler implementation
///////////////////////////////////////////////////////////
template<class Impl>
void WilsonKernels<Impl >::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
{
assert(0);
}
#if defined(AVX512) #if defined(AVX512)
/////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////
// If we are AVX512 specialise the single precision routine // If we are AVX512 specialise the single precision routine
/////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////
#include <simd/Intel512wilson.h> #include <simd/Intel512wilson.h>
#include <simd/Intel512single.h> #include <simd/Intel512single.h>
static Vector<vComplexF> signs; static Vector<vComplexF> signs;
int setupSigns(void ){ int setupSigns(void ){
Vector<vComplexF> bother(2); Vector<vComplexF> bother(2);
signs = bother; signs = bother;
vrsign(signs[0]); vrsign(signs[0]);
visign(signs[1]); visign(signs[1]);
return 1; return 1;
} }
static int signInit = setupSigns(); static int signInit = setupSigns();
#define label(A) ilabel(A) #define label(A) ilabel(A)
#define ilabel(A) ".globl\n" #A ":\n" #define ilabel(A) ".globl\n" #A ":\n"
#define MAYBEPERM(A,perm) if (perm) { A ; } #define MAYBEPERM(A,perm) if (perm) { A ; }
#define MULT_2SPIN(ptr,pf) MULT_ADDSUB_2SPIN(ptr,pf) #define MULT_2SPIN(ptr,pf) MULT_ADDSUB_2SPIN(ptr,pf)
#define FX(A) WILSONASM_ ##A #define FX(A) WILSONASM_ ##A
template<>
void WilsonKernels<WilsonImplF>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, #undef KERNEL_DAG
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf, template<>
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) void WilsonKernels<WilsonImplF>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h> #include <qcd/action/fermion/WilsonKernelsAsmBody.h>
#define KERNEL_DAG
template<>
void WilsonKernels<WilsonImplF>::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
#undef VMOVIDUP #undef VMOVIDUP
#undef VMOVRDUP #undef VMOVRDUP
#undef MAYBEPERM #undef MAYBEPERM
@ -89,32 +86,22 @@ void WilsonKernels<WilsonImplF>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrd
#define VMOVIDUP(A,B,C) VBCASTIDUPf(A,B,C) #define VMOVIDUP(A,B,C) VBCASTIDUPf(A,B,C)
#define VMOVRDUP(A,B,C) VBCASTRDUPf(A,B,C) #define VMOVRDUP(A,B,C) VBCASTRDUPf(A,B,C)
#define MULT_2SPIN(ptr,pf) MULT_ADDSUB_2SPIN_LS(ptr,pf) #define MULT_2SPIN(ptr,pf) MULT_ADDSUB_2SPIN_LS(ptr,pf)
template<>
void WilsonKernels<DomainWallVec5dImplF>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, #undef KERNEL_DAG
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf, template<>
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) void WilsonKernels<DomainWallVec5dImplF>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h> #include <qcd/action/fermion/WilsonKernelsAsmBody.h>
#define KERNEL_DAG
template<>
void WilsonKernels<DomainWallVec5dImplF>::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
#endif #endif
}
template void WilsonKernels<WilsonImplF>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, }
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);
template void WilsonKernels<WilsonImplD>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);
template void WilsonKernels<GparityWilsonImplF>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);
template void WilsonKernels<GparityWilsonImplD>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);
template void WilsonKernels<DomainWallVec5dImplF>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);
template void WilsonKernels<DomainWallVec5dImplD>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);
}}

View File

@ -30,7 +30,11 @@
basep = st.GetPFInfo(nent,plocal); nent++; basep = st.GetPFInfo(nent,plocal); nent++;
if ( local ) { if ( local ) {
LOAD64(%r10,isigns); LOAD64(%r10,isigns);
#ifdef KERNEL_DAG
XP_PROJMEM(base);
#else
XM_PROJMEM(base); XM_PROJMEM(base);
#endif
MAYBEPERM(PERMUTE_DIR3,perm); MAYBEPERM(PERMUTE_DIR3,perm);
} else { } else {
LOAD_CHI(base); LOAD_CHI(base);
@ -41,15 +45,22 @@
MULT_2SPIN_DIR_PFXP(Xp,basep); MULT_2SPIN_DIR_PFXP(Xp,basep);
} }
LOAD64(%r10,isigns); LOAD64(%r10,isigns);
#ifdef KERNEL_DAG
XP_RECON;
#else
XM_RECON; XM_RECON;
#endif
//////////////////////////////// ////////////////////////////////
// Yp // Yp
//////////////////////////////// ////////////////////////////////
basep = st.GetPFInfo(nent,plocal); nent++; basep = st.GetPFInfo(nent,plocal); nent++;
if ( local ) { if ( local ) {
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
#ifdef KERNEL_DAG
YP_PROJMEM(base);
#else
YM_PROJMEM(base); YM_PROJMEM(base);
#endif
MAYBEPERM(PERMUTE_DIR2,perm); MAYBEPERM(PERMUTE_DIR2,perm);
} else { } else {
LOAD_CHI(base); LOAD_CHI(base);
@ -60,7 +71,11 @@
MULT_2SPIN_DIR_PFYP(Yp,basep); MULT_2SPIN_DIR_PFYP(Yp,basep);
} }
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
#ifdef KERNEL_DAG
YP_RECON_ACCUM;
#else
YM_RECON_ACCUM; YM_RECON_ACCUM;
#endif
//////////////////////////////// ////////////////////////////////
// Zp // Zp
@ -68,7 +83,11 @@
basep = st.GetPFInfo(nent,plocal); nent++; basep = st.GetPFInfo(nent,plocal); nent++;
if ( local ) { if ( local ) {
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
#ifdef KERNEL_DAG
ZP_PROJMEM(base);
#else
ZM_PROJMEM(base); ZM_PROJMEM(base);
#endif
MAYBEPERM(PERMUTE_DIR1,perm); MAYBEPERM(PERMUTE_DIR1,perm);
} else { } else {
LOAD_CHI(base); LOAD_CHI(base);
@ -79,7 +98,11 @@
MULT_2SPIN_DIR_PFZP(Zp,basep); MULT_2SPIN_DIR_PFZP(Zp,basep);
} }
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
#ifdef KERNEL_DAG
ZP_RECON_ACCUM;
#else
ZM_RECON_ACCUM; ZM_RECON_ACCUM;
#endif
//////////////////////////////// ////////////////////////////////
// Tp // Tp
@ -87,7 +110,11 @@
basep = st.GetPFInfo(nent,plocal); nent++; basep = st.GetPFInfo(nent,plocal); nent++;
if ( local ) { if ( local ) {
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
#ifdef KERNEL_DAG
TP_PROJMEM(base);
#else
TM_PROJMEM(base); TM_PROJMEM(base);
#endif
MAYBEPERM(PERMUTE_DIR0,perm); MAYBEPERM(PERMUTE_DIR0,perm);
} else { } else {
LOAD_CHI(base); LOAD_CHI(base);
@ -98,7 +125,11 @@
MULT_2SPIN_DIR_PFTP(Tp,basep); MULT_2SPIN_DIR_PFTP(Tp,basep);
} }
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
#ifdef KERNEL_DAG
TP_RECON_ACCUM;
#else
TM_RECON_ACCUM; TM_RECON_ACCUM;
#endif
//////////////////////////////// ////////////////////////////////
// Xm // Xm
@ -107,7 +138,11 @@
// basep= st.GetPFInfo(nent,plocal); nent++; // basep= st.GetPFInfo(nent,plocal); nent++;
if ( local ) { if ( local ) {
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
#ifdef KERNEL_DAG
XM_PROJMEM(base);
#else
XP_PROJMEM(base); XP_PROJMEM(base);
#endif
MAYBEPERM(PERMUTE_DIR3,perm); MAYBEPERM(PERMUTE_DIR3,perm);
} else { } else {
LOAD_CHI(base); LOAD_CHI(base);
@ -118,7 +153,11 @@
MULT_2SPIN_DIR_PFXM(Xm,basep); MULT_2SPIN_DIR_PFXM(Xm,basep);
} }
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
#ifdef KERNEL_DAG
XM_RECON_ACCUM;
#else
XP_RECON_ACCUM; XP_RECON_ACCUM;
#endif
//////////////////////////////// ////////////////////////////////
// Ym // Ym
@ -126,7 +165,11 @@
basep= st.GetPFInfo(nent,plocal); nent++; basep= st.GetPFInfo(nent,plocal); nent++;
if ( local ) { if ( local ) {
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
#ifdef KERNEL_DAG
YM_PROJMEM(base);
#else
YP_PROJMEM(base); YP_PROJMEM(base);
#endif
MAYBEPERM(PERMUTE_DIR2,perm); MAYBEPERM(PERMUTE_DIR2,perm);
} else { } else {
LOAD_CHI(base); LOAD_CHI(base);
@ -137,7 +180,11 @@
MULT_2SPIN_DIR_PFYM(Ym,basep); MULT_2SPIN_DIR_PFYM(Ym,basep);
} }
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
#ifdef KERNEL_DAG
YM_RECON_ACCUM;
#else
YP_RECON_ACCUM; YP_RECON_ACCUM;
#endif
//////////////////////////////// ////////////////////////////////
// Zm // Zm
@ -145,7 +192,11 @@
basep= st.GetPFInfo(nent,plocal); nent++; basep= st.GetPFInfo(nent,plocal); nent++;
if ( local ) { if ( local ) {
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
#ifdef KERNEL_DAG
ZM_PROJMEM(base);
#else
ZP_PROJMEM(base); ZP_PROJMEM(base);
#endif
MAYBEPERM(PERMUTE_DIR1,perm); MAYBEPERM(PERMUTE_DIR1,perm);
} else { } else {
LOAD_CHI(base); LOAD_CHI(base);
@ -156,7 +207,11 @@
MULT_2SPIN_DIR_PFZM(Zm,basep); MULT_2SPIN_DIR_PFZM(Zm,basep);
} }
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
#ifdef KERNEL_DAG
ZM_RECON_ACCUM;
#else
ZP_RECON_ACCUM; ZP_RECON_ACCUM;
#endif
//////////////////////////////// ////////////////////////////////
// Tm // Tm
@ -164,7 +219,11 @@
basep= st.GetPFInfo(nent,plocal); nent++; basep= st.GetPFInfo(nent,plocal); nent++;
if ( local ) { if ( local ) {
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
#ifdef KERNEL_DAG
TM_PROJMEM(base);
#else
TP_PROJMEM(base); TP_PROJMEM(base);
#endif
MAYBEPERM(PERMUTE_DIR0,perm); MAYBEPERM(PERMUTE_DIR0,perm);
} else { } else {
LOAD_CHI(base); LOAD_CHI(base);
@ -175,7 +234,11 @@
MULT_2SPIN_DIR_PFTM(Tm,basep); MULT_2SPIN_DIR_PFTM(Tm,basep);
} }
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
#ifdef KERNEL_DAG
TM_RECON_ACCUM;
#else
TP_RECON_ACCUM; TP_RECON_ACCUM;
#endif
basep= st.GetPFInfo(nent,plocal); nent++; basep= st.GetPFInfo(nent,plocal); nent++;
SAVE_RESULT(base,basep); SAVE_RESULT(base,basep);

View File

@ -839,46 +839,23 @@ void WilsonKernels<GparityWilsonImplD>::DiracOptHandDhopSiteDag(StencilImpl &st,
////////////// Wilson ; uses this implementation ///////////////////// ////////////// Wilson ; uses this implementation /////////////////////
// Need Nc=3 though // // Need Nc=3 though //
template void WilsonKernels<WilsonImplF>::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U, #define INSTANTIATE_THEM(A) \
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf, template void WilsonKernels<A>::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,\
int ss,int sU,const FermionField &in, FermionField &out); std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,\
template void WilsonKernels<WilsonImplD>::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U, int ss,int sU,const FermionField &in, FermionField &out);\
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf, template void WilsonKernels<A>::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,\
int ss,int sU,const FermionField &in, FermionField &out); std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,\
template void WilsonKernels<WilsonImplF>::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int ss,int sU,const FermionField &in, FermionField &out);
template void WilsonKernels<WilsonImplD>::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int ss,int sU,const FermionField &in, FermionField &out); int ss,int sU,const FermionField &in, FermionField &out);
INSTANTIATE_THEM(WilsonImplF);
template void WilsonKernels<GparityWilsonImplF>::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U, INSTANTIATE_THEM(WilsonImplD);
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf, INSTANTIATE_THEM(ZWilsonImplF);
int ss,int sU,const FermionField &in, FermionField &out); INSTANTIATE_THEM(ZWilsonImplD);
template void WilsonKernels<GparityWilsonImplD>::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U, INSTANTIATE_THEM(GparityWilsonImplF);
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf, INSTANTIATE_THEM(GparityWilsonImplD);
int ss,int sU,const FermionField &in, FermionField &out); INSTANTIATE_THEM(DomainWallVec5dImplF);
template void WilsonKernels<GparityWilsonImplF>::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U, INSTANTIATE_THEM(DomainWallVec5dImplD);
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf, INSTANTIATE_THEM(ZDomainWallVec5dImplF);
int ss,int sU,const FermionField &in, FermionField &out); INSTANTIATE_THEM(ZDomainWallVec5dImplD);
template void WilsonKernels<GparityWilsonImplD>::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int ss,int sU,const FermionField &in, FermionField &out);
template void WilsonKernels<DomainWallVec5dImplF>::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int ss,int sU,const FermionField &in, FermionField &out);
template void WilsonKernels<DomainWallVec5dImplD>::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int ss,int sU,const FermionField &in, FermionField &out);
template void WilsonKernels<DomainWallVec5dImplF>::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int ss,int sU,const FermionField &in, FermionField &out);
template void WilsonKernels<DomainWallVec5dImplD>::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int ss,int sU,const FermionField &in, FermionField &out);
}} }}

View File

@ -0,0 +1,79 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/MobiusFermion.h
Copyright (C) 2015
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_QCD_ZMOBIUS_FERMION_H
#define GRID_QCD_ZMOBIUS_FERMION_H
#include <Grid/Grid.h>
namespace Grid {
namespace QCD {
template<class Impl>
class ZMobiusFermion : public CayleyFermion5D<Impl>
{
public:
INHERIT_IMPL_TYPES(Impl);
public:
virtual void Instantiatable(void) {};
// Constructors
ZMobiusFermion(GaugeField &_Umu,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
RealD _mass,RealD _M5,
std::vector<ComplexD> &gamma, RealD b,RealD c,const ImplParams &p= ImplParams()) :
CayleyFermion5D<Impl>(_Umu,
FiveDimGrid,
FiveDimRedBlackGrid,
FourDimGrid,
FourDimRedBlackGrid,_mass,_M5,p)
{
RealD eps = 1.0;
std::cout<<GridLogMessage << "ZMobiusFermion (b="<<b<<",c="<<c<<") with Ls= "<<this->Ls<<" gamma passed in"<<std::endl;
std::vector<Coeff_t> zgamma(this->Ls);
for(int s=0;s<this->Ls;s++){
zgamma[s] = gamma[s];
}
// Call base setter
this->SetCoefficientsInternal(1.0,zgamma,b,c);
}
};
}
}
#endif

View File

@ -1,300 +1,421 @@
/************************************************************************************* /*******************************************************************************
Grid physics library, www.github.com/paboyle/Grid Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/simd/Grid_qpx.h Source file: ./lib/simd/Grid_qpx.h
Copyright (C) 2015 Copyright (C) 2016
Author: neo <cossu@post.kek.jp> Author: Antonin Portelli <antonin.portelli@me.com>
This program is free software; you can redistribute it and/or modify This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or the Free Software Foundation; either version 2 of the License, or
(at your option) any later version. (at your option) any later version.
This program is distributed in the hope that it will be useful, This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details. GNU General Public License for more details.
You should have received a copy of the GNU General Public License along You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc., with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/ ******************************************************************************/
/* END LEGAL */
//----------------------------------------------------------------------
/*! @file Grid_qpx.h
@brief Optimization libraries for QPX instructions set for BG/Q
Using intrinsics
*/
// Time-stamp: <2015-05-27 11:30:21 neo>
//----------------------------------------------------------------------
// lot of undefined functions
namespace Grid {
namespace Optimization { namespace Optimization {
typedef struct
{
float v0,v1,v2,v3;
} vector4float;
inline std::ostream & operator<<(std::ostream& stream, const vector4double a)
{
stream << "{"<<vec_extract(a,0)<<","<<vec_extract(a,1)<<","<<vec_extract(a,2)<<","<<vec_extract(a,3)<<"}";
return stream;
};
inline std::ostream & operator<<(std::ostream& stream, const vector4float a)
{
stream << "{"<< a.v0 <<","<< a.v1 <<","<< a.v2 <<","<< a.v3 <<"}";
return stream;
};
struct Vsplat{ struct Vsplat{
//Complex float //Complex float
inline float operator()(float a, float b){ inline vector4float operator()(float a, float b){
return {a,b,a,b}; return (vector4float){a, b, a, b};
} }
// Real float // Real float
inline float operator()(float a){ inline vector4float operator()(float a){
return {a,a,a,a}; return (vector4float){a, a, a, a};
} }
//Complex double //Complex double
inline vector4double operator()(double a, double b){ inline vector4double operator()(double a, double b){
return {a,b,a,b}; return (vector4double){a, b, a, b};
} }
//Real double //Real double
inline vector4double operator()(double a){ inline vector4double operator()(double a){
return {a,a,a,a}; return (vector4double){a, a, a, a};
} }
//Integer //Integer
inline int operator()(Integer a){ inline int operator()(Integer a){
#error return a;
} }
}; };
struct Vstore{ struct Vstore{
//Float //Float
inline void operator()(float a, float* F){ inline void operator()(vector4double a, float *f){
assert(0); vec_st(a, 0, f);
} }
inline void operator()(vector4double a, vector4float &f){
vec_st(a, 0, (float *)(&f));
}
inline void operator()(vector4float a, float *f){
f[0] = a.v0;
f[1] = a.v1;
f[2] = a.v2;
f[3] = a.v3;
}
//Double //Double
inline void operator()(vector4double a, double* D){ inline void operator()(vector4double a, double *d){
assert(0); vec_st(a, 0, d);
} }
//Integer //Integer
inline void operator()(int a, Integer* I){ inline void operator()(int a, Integer *i){
assert(0); i[0] = a;
} }
}; };
struct Vstream{ struct Vstream{
//Float //Float
inline void operator()(float * a, float b){ inline void operator()(float *f, vector4double a){
assert(0); vec_st(a, 0, f);
}
//Double
inline void operator()(double * a, vector4double b){
assert(0);
} }
inline void operator()(vector4float f, vector4double a){
vec_st(a, 0, (float *)(&f));
}
inline void operator()(float *f, vector4float a){
f[0] = a.v0;
f[1] = a.v1;
f[2] = a.v2;
f[3] = a.v3;
}
//Double
inline void operator()(double *d, vector4double a){
vec_st(a, 0, d);
}
}; };
struct Vset{ struct Vset{
// Complex float // Complex float
inline float operator()(Grid::ComplexF *a){ inline vector4float operator()(Grid::ComplexF *a){
return {a[0].real(),a[0].imag(),a[1].real(),a[1].imag(),a[2].real(),a[2].imag(),a[3].real(),a[3].imag()}; return (vector4float){a[0].real(), a[0].imag(), a[1].real(), a[1].imag()};
} }
// Complex double // Complex double
inline vector4double operator()(Grid::ComplexD *a){ inline vector4double operator()(Grid::ComplexD *a){
return {a[0].real(),a[0].imag(),a[1].real(),a[1].imag(),a[2].real(),a[2].imag(),a[3].real(),a[3].imag()}; return vec_ld(0, (double *)a);
} }
// Real float
inline float operator()(float *a){ // Real float
return {a[0],a[1],a[2],a[3],a[4],a[5],a[6],a[7]}; inline vector4float operator()(float *a){
return (vector4float){a[0], a[1], a[2], a[3]};
} }
inline vector4double operator()(vector4float a){
return vec_ld(0, (float *)(&a));
}
// Real double // Real double
inline vector4double operator()(double *a){ inline vector4double operator()(double *a){
return {a[0],a[1],a[2],a[3],a[4],a[5],a[6],a[7]}; return vec_ld(0, a);
} }
// Integer // Integer
inline int operator()(Integer *a){ inline int operator()(Integer *a){
#error return a[0];
} }
}; };
template <typename Out_type, typename In_type> template <typename Out_type, typename In_type>
struct Reduce{ struct Reduce{
//Need templated class to overload output type //Need templated class to overload output type
//General form must generate error if compiled //General form must generate error if compiled
inline Out_type operator()(In_type in){ inline Out_type operator()(In_type in){
printf("Error, using wrong Reduce function\n"); printf("Error, using wrong Reduce function\n");
exit(1); exit(1);
return 0; return 0;
} }
}; };
///////////////////////////////////////////////////// /////////////////////////////////////////////////////
// Arithmetic operations // Arithmetic operations
///////////////////////////////////////////////////// /////////////////////////////////////////////////////
#define FLOAT_WRAP_2(fn, pref)\
pref vector4float fn(vector4float a, vector4float b)\
{\
vector4double ad, bd, rd;\
vector4float r;\
\
ad = Vset()(a);\
bd = Vset()(b);\
rd = fn(ad, bd);\
Vstore()(rd, r);\
\
return r;\
}
#define FLOAT_WRAP_1(fn, pref)\
pref vector4float fn(vector4float a)\
{\
vector4double ad, rd;\
vector4float r;\
\
ad = Vset()(a);\
rd = fn(ad);\
Vstore()(rd, r);\
\
return r;\
}
struct Sum{ struct Sum{
//Complex/Real float
inline float operator()(float a, float b){
#error
}
//Complex/Real double //Complex/Real double
inline vector4double operator()(vector4double a, vector4double b){ inline vector4double operator()(vector4double a, vector4double b){
return vec_add(a,b); return vec_add(a, b);
} }
//Complex/Real float
FLOAT_WRAP_2(operator(), inline)
//Integer //Integer
inline int operator()(int a, int b){ inline int operator()(int a, int b){
#error return a + b;
} }
}; };
struct Sub{ struct Sub{
//Complex/Real float
inline float operator()(float a, float b){
#error
}
//Complex/Real double //Complex/Real double
inline vector4double operator()(vector4double a, vector4double b){ inline vector4double operator()(vector4double a, vector4double b){
#error return vec_sub(a, b);
} }
//Complex/Real float
FLOAT_WRAP_2(operator(), inline)
//Integer //Integer
inline floati operator()(int a, int b){ inline int operator()(int a, int b){
#error return a - b;
} }
}; };
struct MultComplex{ struct MultComplex{
// Complex float
inline float operator()(float a, float b){
#error
}
// Complex double // Complex double
inline vector4double operator()(vector4double a, vector4double b){ inline vector4double operator()(vector4double a, vector4double b){
#error return vec_xxnpmadd(a, b, vec_xmul(b, a));
} }
};
// Complex float
FLOAT_WRAP_2(operator(), inline)
};
struct Mult{ struct Mult{
// Real float
inline float operator()(float a, float b){
#error
}
// Real double // Real double
inline vector4double operator()(vector4double a, vector4double b){ inline vector4double operator()(vector4double a, vector4double b){
#error return vec_mul(a, b);
} }
// Real float
FLOAT_WRAP_2(operator(), inline)
// Integer // Integer
inline int operator()(int a, int b){ inline int operator()(int a, int b){
#error return a*b;
} }
}; };
struct Conj{ struct Conj{
// Complex single
inline float operator()(float in){
assert(0);
}
// Complex double // Complex double
inline vector4double operator()(vector4double in){ inline vector4double operator()(vector4double v){
assert(0); return vec_mul(v, (vector4double){1., -1., 1., -1.});
} }
// do not define for integer input
};
// Complex float
FLOAT_WRAP_1(operator(), inline)
};
struct TimesMinusI{ struct TimesMinusI{
//Complex single
inline float operator()(float in, float ret){
assert(0);
}
//Complex double //Complex double
inline vector4double operator()(vector4double in, vector4double ret){ inline vector4double operator()(vector4double v, vector4double ret){
assert(0); return vec_xxcpnmadd(v, (vector4double){1., 1., 1., 1.},
(vector4double){0., 0., 0., 0.});
} }
// Complex float
FLOAT_WRAP_2(operator(), inline)
}; };
struct TimesI{ struct TimesI{
//Complex single
inline float operator()(float in, float ret){
}
//Complex double //Complex double
inline vector4double operator()(vector4double in, vector4double ret){ inline vector4double operator()(vector4double v, vector4double ret){
return vec_xxcpnmadd(v, (vector4double){-1., -1., -1., -1.},
(vector4double){0., 0., 0., 0.});
} }
// Complex float
FLOAT_WRAP_2(operator(), inline)
}; };
struct Permute{
//Complex double
static inline vector4double Permute0(vector4double v){ //0123 -> 2301
return vec_perm(v, v, vec_gpci(02301));
};
static inline vector4double Permute1(vector4double v){ //0123 -> 1032
return vec_perm(v, v, vec_gpci(01032));
};
static inline vector4double Permute2(vector4double v){
return v;
};
static inline vector4double Permute3(vector4double v){
return v;
};
// Complex float
FLOAT_WRAP_1(Permute0, static inline)
FLOAT_WRAP_1(Permute1, static inline)
FLOAT_WRAP_1(Permute2, static inline)
FLOAT_WRAP_1(Permute3, static inline)
};
struct Rotate{
static inline vector4double rotate(vector4double v, int n){
switch(n){
case 0:
return v;
break;
case 1:
return vec_perm(v, v, vec_gpci(01230));
break;
case 2:
return vec_perm(v, v, vec_gpci(02301));
break;
case 3:
return vec_perm(v, v, vec_gpci(03012));
break;
default: assert(0);
}
}
static inline vector4float rotate(vector4float v, int n){
vector4double vd, rd;
vector4float r;
////////////////////////////////////////////// vd = Vset()(v);
// Some Template specialization rd = rotate(vd, n);
Vstore()(rd, r);
return r;
}
};
//Complex float Reduce //Complex float Reduce
template<> template<>
inline Grid::ComplexF Reduce<Grid::ComplexF, float>::operator()(float in){ inline Grid::ComplexF
assert(0); Reduce<Grid::ComplexF, vector4float>::operator()(vector4float v) { //2 complex
vector4float v1,v2;
v1 = Optimization::Permute::Permute0(v);
v1 = Optimization::Sum()(v1, v);
return Grid::ComplexF(v1.v0, v1.v1);
} }
//Real float Reduce //Real float Reduce
template<> template<>
inline Grid::RealF Reduce<Grid::RealF, float>::operator()(float in){ inline Grid::RealF
assert(0); Reduce<Grid::RealF, vector4float>::operator()(vector4float v){ //4 floats
vector4float v1,v2;
v1 = Optimization::Permute::Permute0(v);
v1 = Optimization::Sum()(v1, v);
v2 = Optimization::Permute::Permute1(v1);
v1 = Optimization::Sum()(v1, v2);
return v1.v0;
} }
//Complex double Reduce //Complex double Reduce
template<> template<>
inline Grid::ComplexD Reduce<Grid::ComplexD, vector4double>::operator()(vector4double in){ inline Grid::ComplexD
assert(0); Reduce<Grid::ComplexD, vector4double>::operator()(vector4double v){ //2 complex
vector4double v1;
v1 = Optimization::Permute::Permute0(v);
v1 = vec_add(v1, v);
return Grid::ComplexD(vec_extract(v1, 0), vec_extract(v1, 1));
} }
//Real double Reduce //Real double Reduce
template<> template<>
inline Grid::RealD Reduce<Grid::RealD, vector4double>::operator()(vector4double in){ inline Grid::RealD
assert(0); Reduce<Grid::RealD, vector4double>::operator()(vector4double v){ //4 doubles
} vector4double v1,v2;
v1 = Optimization::Permute::Permute0(v);
v1 = vec_add(v1, v);
v2 = Optimization::Permute::Permute1(v1);
v1 = vec_add(v1, v2);
return vec_extract(v1, 0);
}
//Integer Reduce //Integer Reduce
template<> template<>
inline Integer Reduce<Integer, floati>::operator()(float in){ inline Integer Reduce<Integer, int>::operator()(int in){
// FIXME unimplemented
printf("Reduce : Missing integer implementation -> FIX\n");
assert(0); assert(0);
} }
} }
////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// Here assign types // Here assign types
namespace Grid { typedef Optimization::vector4float SIMD_Ftype; // Single precision type
typedef float SIMD_Ftype __attribute__ ((vector_size (16))); // Single precision type typedef vector4double SIMD_Dtype; // Double precision type
typedef vector4double SIMD_Dtype; // Double precision type typedef int SIMD_Itype; // Integer type
typedef int SIMD_Itype; // Integer type
inline void v_prefetch0(int size, const char *ptr){}; // prefetch utilities
inline void v_prefetch0(int size, const char *ptr){};
// Function name aliases inline void prefetch_HINT_T0(const char *ptr){};
typedef Optimization::Vsplat VsplatSIMD;
typedef Optimization::Vstore VstoreSIMD;
typedef Optimization::Vset VsetSIMD;
typedef Optimization::Vstream VstreamSIMD;
template <typename S, typename T> using ReduceSIMD = Optimization::Reduce<S,T>;
// Arithmetic operations // Function name aliases
typedef Optimization::Sum SumSIMD; typedef Optimization::Vsplat VsplatSIMD;
typedef Optimization::Sub SubSIMD; typedef Optimization::Vstore VstoreSIMD;
typedef Optimization::Mult MultSIMD; typedef Optimization::Vset VsetSIMD;
typedef Optimization::MultComplex MultComplexSIMD; typedef Optimization::Vstream VstreamSIMD;
typedef Optimization::Conj ConjSIMD; template <typename S, typename T> using ReduceSIMD = Optimization::Reduce<S,T>;
typedef Optimization::TimesMinusI TimesMinusISIMD;
typedef Optimization::TimesI TimesISIMD;
// Arithmetic operations
typedef Optimization::Sum SumSIMD;
typedef Optimization::Sub SubSIMD;
typedef Optimization::Mult MultSIMD;
typedef Optimization::MultComplex MultComplexSIMD;
typedef Optimization::Conj ConjSIMD;
typedef Optimization::TimesMinusI TimesMinusISIMD;
typedef Optimization::TimesI TimesISIMD;
} }

View File

@ -388,6 +388,12 @@ class Grid_simd {
}; // end of Grid_simd class definition }; // end of Grid_simd class definition
inline void permute(ComplexD &y,ComplexD b, int perm) { y=b; }
inline void permute(ComplexF &y,ComplexF b, int perm) { y=b; }
inline void permute(RealD &y,RealD b, int perm) { y=b; }
inline void permute(RealF &y,RealF b, int perm) { y=b; }
//////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////
// General rotate // General rotate
//////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////

View File

@ -67,15 +67,13 @@ template <class scalar>
struct AsinRealFunctor { struct AsinRealFunctor {
scalar operator()(const scalar &a) const { return asin(real(a)); } scalar operator()(const scalar &a) const { return asin(real(a)); }
}; };
template <class scalar> template <class scalar>
struct LogRealFunctor { struct LogRealFunctor {
scalar operator()(const scalar &a) const { return log(real(a)); } scalar operator()(const scalar &a) const { return log(real(a)); }
}; };
template <class scalar> template <class scalar>
struct ExpRealFunctor { struct ExpFunctor {
scalar operator()(const scalar &a) const { return exp(real(a)); } scalar operator()(const scalar &a) const { return exp(a); }
}; };
template <class scalar> template <class scalar>
struct NotFunctor { struct NotFunctor {
@ -85,7 +83,6 @@ template <class scalar>
struct AbsRealFunctor { struct AbsRealFunctor {
scalar operator()(const scalar &a) const { return std::abs(real(a)); } scalar operator()(const scalar &a) const { return std::abs(real(a)); }
}; };
template <class scalar> template <class scalar>
struct PowRealFunctor { struct PowRealFunctor {
double y; double y;
@ -135,7 +132,6 @@ template <class Scalar>
inline Scalar rsqrt(const Scalar &r) { inline Scalar rsqrt(const Scalar &r) {
return (RSqrtRealFunctor<Scalar>(), r); return (RSqrtRealFunctor<Scalar>(), r);
} }
template <class S, class V> template <class S, class V>
inline Grid_simd<S, V> cos(const Grid_simd<S, V> &r) { inline Grid_simd<S, V> cos(const Grid_simd<S, V> &r) {
return SimdApply(CosRealFunctor<S>(), r); return SimdApply(CosRealFunctor<S>(), r);
@ -162,7 +158,7 @@ inline Grid_simd<S, V> abs(const Grid_simd<S, V> &r) {
} }
template <class S, class V> template <class S, class V>
inline Grid_simd<S, V> exp(const Grid_simd<S, V> &r) { inline Grid_simd<S, V> exp(const Grid_simd<S, V> &r) {
return SimdApply(ExpRealFunctor<S>(), r); return SimdApply(ExpFunctor<S>(), r);
} }
template <class S, class V> template <class S, class V>
inline Grid_simd<S, V> Not(const Grid_simd<S, V> &r) { inline Grid_simd<S, V> Not(const Grid_simd<S, V> &r) {

18
scripts/update_fftw.sh Executable file
View File

@ -0,0 +1,18 @@
#!/usr/bin/env bash
if (( $# != 1 )); then
echo "usage: `basename $0` <archive>" 1>&2
exit 1
fi
ARC=$1
INITDIR=`pwd`
rm -rf lib/fftw
mkdir lib/fftw
ARCDIR=`tar -tf ${ARC} | head -n1 | sed -e 's@/.*@@'`
tar -xf ${ARC}
cp ${ARCDIR}/api/fftw3.h lib/fftw/
cd ${INITDIR}
rm -rf ${ARCDIR}

View File

@ -157,10 +157,9 @@ void Tester(const functor &func)
std::cout << GridLogMessage << " " << func.name() << std::endl; std::cout << GridLogMessage << " " << func.name() << std::endl;
std::cout << GridLogDebug << v_input1 << std::endl; std::cout << GridLogDebug << v_input1 << std::endl;
std::cout << GridLogDebug << v_input2 << std::endl;
std::cout << GridLogDebug << v_result << std::endl; std::cout << GridLogDebug << v_result << std::endl;
int ok=0; int ok=0;
for(int i=0;i<Nsimd;i++){ for(int i=0;i<Nsimd;i++){
if ( abs(reference[i]-result[i])>1.0e-7){ if ( abs(reference[i]-result[i])>1.0e-7){

111
tests/core/Test_fft.cc Normal file
View File

@ -0,0 +1,111 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./tests/Test_cshift.cc
Copyright (C) 2015
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid.h>
using namespace Grid;
using namespace Grid::QCD;
int main (int argc, char ** argv)
{
Grid_init(&argc,&argv);
int threads = GridThread::GetThreads();
std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
std::vector<int> latt_size = GridDefaultLatt();
std::vector<int> simd_layout( { vComplexD::Nsimd(),1,1,1});
std::vector<int> mpi_layout = GridDefaultMpi();
int vol = 1;
for(int d=0;d<latt_size.size();d++){
vol = vol * latt_size[d];
}
GridCartesian Fine(latt_size,simd_layout,mpi_layout);
LatticeComplexD one(&Fine);
LatticeComplexD zz(&Fine);
LatticeComplexD C(&Fine);
LatticeComplexD Ctilde(&Fine);
LatticeComplexD coor(&Fine);
LatticeSpinMatrixD S(&Fine);
LatticeSpinMatrixD Stilde(&Fine);
std::vector<int> p({1,2,3,2});
one = ComplexD(1.0,0.0);
zz = ComplexD(0.0,0.0);
ComplexD ci(0.0,1.0);
C=zero;
for(int mu=0;mu<4;mu++){
RealD TwoPiL = M_PI * 2.0/ latt_size[mu];
LatticeCoordinate(coor,mu);
C = C - (TwoPiL * p[mu]) * coor;
}
C = exp(C*ci);
S=zero;
S = S+C;
FFT theFFT(&Fine);
theFFT.FFT_dim(Ctilde,C,0,FFT::forward); C=Ctilde; std::cout << theFFT.MFlops()<<std::endl;
theFFT.FFT_dim(Ctilde,C,1,FFT::forward); C=Ctilde; std::cout << theFFT.MFlops()<<std::endl;
theFFT.FFT_dim(Ctilde,C,2,FFT::forward); C=Ctilde; std::cout << theFFT.MFlops()<<std::endl;
theFFT.FFT_dim(Ctilde,C,3,FFT::forward); std::cout << theFFT.MFlops()<<std::endl;
// C=zero;
// Ctilde = where(abs(Ctilde)<1.0e-10,C,Ctilde);
TComplexD cVol;
cVol()()() = vol;
C=zero;
pokeSite(cVol,C,p);
C=C-Ctilde;
std::cout << "diff scalar "<<norm2(C) << std::endl;
theFFT.FFT_dim(Stilde,S,0,FFT::forward); S=Stilde; std::cout << theFFT.MFlops()<<std::endl;
theFFT.FFT_dim(Stilde,S,1,FFT::forward); S=Stilde;std::cout << theFFT.MFlops()<<std::endl;
theFFT.FFT_dim(Stilde,S,2,FFT::forward); S=Stilde;std::cout << theFFT.MFlops()<<std::endl;
theFFT.FFT_dim(Stilde,S,3,FFT::forward);std::cout << theFFT.MFlops()<<std::endl;
SpinMatrixD Sp;
Sp = zero; Sp = Sp+cVol;
S=zero;
pokeSite(Sp,S,p);
S= S-Stilde;
std::cout << "diff FT[SpinMat] "<<norm2(S) << std::endl;
Grid_finalize();
}

111
tests/core/Test_fftf.cc Normal file
View File

@ -0,0 +1,111 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./tests/Test_cshift.cc
Copyright (C) 2015
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid.h>
using namespace Grid;
using namespace Grid::QCD;
int main (int argc, char ** argv)
{
Grid_init(&argc,&argv);
int threads = GridThread::GetThreads();
std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
std::vector<int> latt_size = GridDefaultLatt();
std::vector<int> simd_layout( { vComplexF::Nsimd(),1,1,1});
std::vector<int> mpi_layout = GridDefaultMpi();
int vol = 1;
for(int d=0;d<latt_size.size();d++){
vol = vol * latt_size[d];
}
GridCartesian Fine(latt_size,simd_layout,mpi_layout);
LatticeComplexF one(&Fine);
LatticeComplexF zz(&Fine);
LatticeComplexF C(&Fine);
LatticeComplexF Ctilde(&Fine);
LatticeComplexF coor(&Fine);
LatticeSpinMatrixF S(&Fine);
LatticeSpinMatrixF Stilde(&Fine);
std::vector<int> p({1,2,3,2});
one = ComplexF(1.0,0.0);
zz = ComplexF(0.0,0.0);
ComplexF ci(0.0,1.0);
C=zero;
for(int mu=0;mu<4;mu++){
RealD TwoPiL = M_PI * 2.0/ latt_size[mu];
LatticeCoordinate(coor,mu);
C = C - (TwoPiL * p[mu]) * coor;
}
C = exp(C*ci);
S=zero;
S = S+C;
FFT theFFT(&Fine);
theFFT.FFT_dim(Ctilde,C,0,FFT::forward); C=Ctilde; std::cout << theFFT.MFlops()<<std::endl;
theFFT.FFT_dim(Ctilde,C,1,FFT::forward); C=Ctilde; std::cout << theFFT.MFlops()<<std::endl;
theFFT.FFT_dim(Ctilde,C,2,FFT::forward); C=Ctilde; std::cout << theFFT.MFlops()<<std::endl;
theFFT.FFT_dim(Ctilde,C,3,FFT::forward); std::cout << theFFT.MFlops()<<std::endl;
// C=zero;
// Ctilde = where(abs(Ctilde)<1.0e-10,C,Ctilde);
TComplexF cVol;
cVol()()() = vol;
C=zero;
pokeSite(cVol,C,p);
C=C-Ctilde;
std::cout << "diff scalar "<<norm2(C) << std::endl;
theFFT.FFT_dim(Stilde,S,0,FFT::forward); S=Stilde; std::cout << theFFT.MFlops()<<std::endl;
theFFT.FFT_dim(Stilde,S,1,FFT::forward); S=Stilde;std::cout << theFFT.MFlops()<<std::endl;
theFFT.FFT_dim(Stilde,S,2,FFT::forward); S=Stilde;std::cout << theFFT.MFlops()<<std::endl;
theFFT.FFT_dim(Stilde,S,3,FFT::forward);std::cout << theFFT.MFlops()<<std::endl;
SpinMatrixF Sp;
Sp = zero; Sp = Sp+cVol;
S=zero;
pokeSite(Sp,S,p);
S= S-Stilde;
std::cout << "diff FT[SpinMat] "<<norm2(S) << std::endl;
Grid_finalize();
}

View File

@ -44,6 +44,7 @@ struct scal {
}; };
typedef DomainWallFermion<DomainWallVec5dImplR> DomainWallVecFermionR; typedef DomainWallFermion<DomainWallVec5dImplR> DomainWallVecFermionR;
typedef ZMobiusFermion<ZDomainWallVec5dImplR> ZMobiusVecFermionR;
typedef MobiusFermion<DomainWallVec5dImplR> MobiusVecFermionR; typedef MobiusFermion<DomainWallVec5dImplR> MobiusVecFermionR;
typedef MobiusZolotarevFermion<DomainWallVec5dImplR> MobiusZolotarevVecFermionR; typedef MobiusZolotarevFermion<DomainWallVec5dImplR> MobiusZolotarevVecFermionR;
typedef ScaledShamirFermion<DomainWallVec5dImplR> ScaledShamirVecFermionR; typedef ScaledShamirFermion<DomainWallVec5dImplR> ScaledShamirVecFermionR;
@ -117,6 +118,17 @@ int main (int argc, char ** argv)
TestWhat<MobiusFermionR>(Dmob,FGrid,FrbGrid,UGrid,mass,M5,&RNG4,&RNG5); TestWhat<MobiusFermionR>(Dmob,FGrid,FrbGrid,UGrid,mass,M5,&RNG4,&RNG5);
TestWhat<MobiusVecFermionR>(sDmob,sFGrid,sFrbGrid,sUGrid,mass,M5,&sRNG4,&sRNG5); TestWhat<MobiusVecFermionR>(sDmob,sFGrid,sFrbGrid,sUGrid,mass,M5,&sRNG4,&sRNG5);
std::cout<<GridLogMessage<<"**************************************************************"<<std::endl;
std::cout<<GridLogMessage <<"Z-MobiusFermion test"<<std::endl;
std::cout<<GridLogMessage<<"**************************************************************"<<std::endl;
std::vector<ComplexD> gamma(Ls,std::complex<double>(1.0,0.0));
ZMobiusFermionR zDmob(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,gamma,b,c);
ZMobiusVecFermionR szDmob(Umu,*sFGrid,*sFrbGrid,*sUGrid,*sUrbGrid,mass,M5,gamma,b,c);
TestMoo(zDmob,szDmob);
TestWhat<ZMobiusFermionR>(zDmob,FGrid,FrbGrid,UGrid,mass,M5,&RNG4,&RNG5);
TestWhat<ZMobiusVecFermionR>(szDmob,sFGrid,sFrbGrid,sUGrid,mass,M5,&sRNG4,&sRNG5);
std::cout<<GridLogMessage<<"**************************************************************"<<std::endl; std::cout<<GridLogMessage<<"**************************************************************"<<std::endl;
std::cout<<GridLogMessage <<"MobiusZolotarevFermion test"<<std::endl; std::cout<<GridLogMessage <<"MobiusZolotarevFermion test"<<std::endl;
std::cout<<GridLogMessage<<"**************************************************************"<<std::endl; std::cout<<GridLogMessage<<"**************************************************************"<<std::endl;