1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-11-10 07:55:35 +00:00

Merge branch 'develop' into feature/feynman-rules

# Conflicts:
#	lib/Threads.h
#	lib/qcd/action/fermion/WilsonFermion.cc
#	lib/qcd/action/fermion/WilsonFermion.h
#	lib/qcd/utils/SUn.h
#	lib/simd/Grid_avx.h
#	lib/simd/Intel512common.h
This commit is contained in:
Antonin Portelli 2016-10-19 18:35:18 +01:00
commit 997fd882ff
84 changed files with 6162 additions and 2851 deletions

4
.gitignore vendored
View File

@ -94,6 +94,10 @@ build.sh
################ ################
lib/Eigen/* lib/Eigen/*
# FFTW source #
################
lib/fftw/*
# libtool macros # # libtool macros #
################## ##################
m4/lt* m4/lt*

View File

@ -9,10 +9,6 @@ matrix:
- os: osx - os: osx
osx_image: xcode7.2 osx_image: xcode7.2
compiler: clang compiler: clang
- os: osx
osx_image: xcode7.2
compiler: gcc
env: VERSION=-5
- compiler: gcc - compiler: gcc
addons: addons:
apt: apt:
@ -107,3 +103,4 @@ script:
- make -j4 - make -j4
- if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then mpirun.openmpi -n 2 ./benchmarks/Benchmark_dwf --threads 1 --mpi 2.1.1.1; fi - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then mpirun.openmpi -n 2 ./benchmarks/Benchmark_dwf --threads 1 --mpi 2.1.1.1; fi
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then mpirun -n 2 ./benchmarks/Benchmark_dwf --threads 1 --mpi 2.1.1.1; fi - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then mpirun -n 2 ./benchmarks/Benchmark_dwf --threads 1 --mpi 2.1.1.1; fi

View File

@ -86,18 +86,6 @@ int main (int argc, char ** argv)
LatticeFermion tmp(FGrid); LatticeFermion tmp(FGrid);
LatticeFermion err(FGrid); LatticeFermion err(FGrid);
/* src=zero;
std::vector<int> origin(5,0);
SpinColourVector f=zero;
for(int sp=0;sp<4;sp++){
for(int co=0;co<3;co++){
f()(sp)(co)=Complex(1.0,0.0);
}}
pokeSite(f,src,origin);
*/
ColourMatrix cm = Complex(1.0,0.0);
LatticeGaugeField Umu(UGrid); LatticeGaugeField Umu(UGrid);
random(RNG4,Umu); random(RNG4,Umu);
@ -144,10 +132,12 @@ int main (int argc, char ** argv)
DomainWallFermionR Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5); DomainWallFermionR Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
std::cout<<GridLogMessage << "Calling Dw"<<std::endl; std::cout<<GridLogMessage << "Naive wilson implementation "<<std::endl;
std::cout << GridLogMessage<< "Calling Dw"<<std::endl;
int ncall =100; int ncall =100;
if (1) { if (1) {
Dw.ZeroCounters();
double t0=usecond(); double t0=usecond();
for(int i=0;i<ncall;i++){ for(int i=0;i<ncall;i++){
__SSC_START; __SSC_START;
@ -166,7 +156,7 @@ int main (int argc, char ** argv)
std::cout<<GridLogMessage << "mflop/s per node = "<< flops/(t1-t0)/NP<<std::endl; std::cout<<GridLogMessage << "mflop/s per node = "<< flops/(t1-t0)/NP<<std::endl;
err = ref-result; err = ref-result;
std::cout<<GridLogMessage << "norm diff "<< norm2(err)<<std::endl; std::cout<<GridLogMessage << "norm diff "<< norm2(err)<<std::endl;
// Dw.Report(); Dw.Report();
} }
if (1) if (1)
@ -188,8 +178,9 @@ int main (int argc, char ** argv)
peekSite(tmp,src,site); peekSite(tmp,src,site);
pokeSite(tmp,ssrc,site); pokeSite(tmp,ssrc,site);
}}}}} }}}}}
std::cout<<"src norms "<< norm2(src)<<" " <<norm2(ssrc)<<std::endl; std::cout<<GridLogMessage<< "src norms "<< norm2(src)<<" " <<norm2(ssrc)<<std::endl;
double t0=usecond(); double t0=usecond();
sDw.ZeroCounters();
for(int i=0;i<ncall;i++){ for(int i=0;i<ncall;i++){
__SSC_START; __SSC_START;
sDw.Dhop(ssrc,sresult,0); sDw.Dhop(ssrc,sresult,0);
@ -199,23 +190,23 @@ int main (int argc, char ** argv)
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu]; double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
double flops=1344*volume*ncall; double flops=1344*volume*ncall;
std::cout<<GridLogMessage << "Called Dw sinner "<<ncall<<" times in "<<t1-t0<<" us"<<std::endl; std::cout<<GridLogMessage << "Called Dw s_inner "<<ncall<<" times in "<<t1-t0<<" us"<<std::endl;
std::cout<<GridLogMessage << "mflop/s = "<< flops/(t1-t0)<<std::endl; std::cout<<GridLogMessage << "mflop/s = "<< flops/(t1-t0)<<std::endl;
std::cout<<GridLogMessage << "mflop/s per node = "<< flops/(t1-t0)/NP<<std::endl; std::cout<<GridLogMessage << "mflop/s per node = "<< flops/(t1-t0)/NP<<std::endl;
// sDw.Report(); sDw.Report();
if(0){ if(0){
for(int i=0;i< PerformanceCounter::NumTypes(); i++ ){ for(int i=0;i< PerformanceCounter::NumTypes(); i++ ){
sDw.Dhop(ssrc,sresult,0); sDw.Dhop(ssrc,sresult,0);
PerformanceCounter Counter(i); PerformanceCounter Counter(i);
Counter.Start(); Counter.Start();
sDw.Dhop(ssrc,sresult,0); sDw.Dhop(ssrc,sresult,0);
Counter.Stop(); Counter.Stop();
Counter.Report(); Counter.Report();
} }
} }
std::cout<<"res norms "<< norm2(result)<<" " <<norm2(sresult)<<std::endl; std::cout<<GridLogMessage<< "res norms "<< norm2(result)<<" " <<norm2(sresult)<<std::endl;
RealF sum=0; RealF sum=0;
@ -230,12 +221,12 @@ int main (int argc, char ** argv)
peekSite(simd,sresult,site); peekSite(simd,sresult,site);
sum=sum+norm2(normal-simd); sum=sum+norm2(normal-simd);
if (norm2(normal-simd) > 1.0e-6 ) { if (norm2(normal-simd) > 1.0e-6 ) {
std::cout << "site "<<x<<","<<y<<","<<z<<","<<t<<","<<s<<" "<<norm2(normal-simd)<<std::endl; std::cout << "site "<<x<<","<<y<<","<<z<<","<<t<<","<<s<<" "<<norm2(normal-simd)<<std::endl;
std::cout << "site "<<x<<","<<y<<","<<z<<","<<t<<","<<s<<" normal "<<normal<<std::endl; std::cout << "site "<<x<<","<<y<<","<<z<<","<<t<<","<<s<<" normal "<<normal<<std::endl;
std::cout << "site "<<x<<","<<y<<","<<z<<","<<t<<","<<s<<" simd "<<simd<<std::endl; std::cout << "site "<<x<<","<<y<<","<<z<<","<<t<<","<<s<<" simd "<<simd<<std::endl;
} }
}}}}} }}}}}
std::cout<<" difference between normal and simd is "<<sum<<std::endl; std::cout<<GridLogMessage<<" difference between normal and simd is "<<sum<<std::endl;
if (1) { if (1) {
@ -259,17 +250,21 @@ int main (int argc, char ** argv)
sr_e = zero; sr_e = zero;
sr_o = zero; sr_o = zero;
sDw.ZeroCounters();
sDw.stat.init("DhopEO");
double t0=usecond(); double t0=usecond();
for(int i=0;i<ncall;i++){ for (int i = 0; i < ncall; i++) {
sDw.DhopEO(ssrc_o,sr_e,DaggerNo); sDw.DhopEO(ssrc_o, sr_e, DaggerNo);
} }
double t1=usecond(); double t1=usecond();
sDw.stat.print();
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu]; double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
double flops=(1344.0*volume*ncall)/2; double flops=(1344.0*volume*ncall)/2;
std::cout<<GridLogMessage << "sDeo mflop/s = "<< flops/(t1-t0)<<std::endl; std::cout<<GridLogMessage << "sDeo mflop/s = "<< flops/(t1-t0)<<std::endl;
std::cout<<GridLogMessage << "sDeo mflop/s per node "<< flops/(t1-t0)/NP<<std::endl; std::cout<<GridLogMessage << "sDeo mflop/s per node "<< flops/(t1-t0)/NP<<std::endl;
sDw.Report();
sDw.DhopEO(ssrc_o,sr_e,DaggerNo); sDw.DhopEO(ssrc_o,sr_e,DaggerNo);
sDw.DhopOE(ssrc_e,sr_o,DaggerNo); sDw.DhopOE(ssrc_e,sr_o,DaggerNo);
@ -294,18 +289,19 @@ int main (int argc, char ** argv)
// ref = src - Gamma(Gamma::GammaX)* src ; // 1+gamma_x // ref = src - Gamma(Gamma::GammaX)* src ; // 1+gamma_x
tmp = U[mu]*Cshift(src,mu+1,1); tmp = U[mu]*Cshift(src,mu+1,1);
for(int i=0;i<ref._odata.size();i++){ for(int i=0;i<ref._odata.size();i++){
ref._odata[i]+= tmp._odata[i] + Gamma(Gmu[mu])*tmp._odata[i]; ; ref._odata[i]+= tmp._odata[i] + Gamma(Gmu[mu])*tmp._odata[i]; ;
} }
tmp =adj(U[mu])*src; tmp =adj(U[mu])*src;
tmp =Cshift(tmp,mu+1,-1); tmp =Cshift(tmp,mu+1,-1);
for(int i=0;i<ref._odata.size();i++){ for(int i=0;i<ref._odata.size();i++){
ref._odata[i]+= tmp._odata[i] - Gamma(Gmu[mu])*tmp._odata[i]; ; ref._odata[i]+= tmp._odata[i] - Gamma(Gmu[mu])*tmp._odata[i]; ;
} }
} }
ref = -0.5*ref; ref = -0.5*ref;
} }
Dw.Dhop(src,result,1); Dw.Dhop(src,result,1);
std::cout << GridLogMessage << "Naive wilson implementation Dag" << std::endl;
std::cout<<GridLogMessage << "Called DwDag"<<std::endl; std::cout<<GridLogMessage << "Called DwDag"<<std::endl;
std::cout<<GridLogMessage << "norm result "<< norm2(result)<<std::endl; std::cout<<GridLogMessage << "norm result "<< norm2(result)<<std::endl;
std::cout<<GridLogMessage << "norm ref "<< norm2(ref)<<std::endl; std::cout<<GridLogMessage << "norm ref "<< norm2(ref)<<std::endl;
@ -327,6 +323,7 @@ int main (int argc, char ** argv)
std::cout<<GridLogMessage << "src_o"<<norm2(src_o)<<std::endl; std::cout<<GridLogMessage << "src_o"<<norm2(src_o)<<std::endl;
{ {
Dw.ZeroCounters();
double t0=usecond(); double t0=usecond();
for(int i=0;i<ncall;i++){ for(int i=0;i<ncall;i++){
Dw.DhopEO(src_o,r_e,DaggerNo); Dw.DhopEO(src_o,r_e,DaggerNo);
@ -338,6 +335,7 @@ int main (int argc, char ** argv)
std::cout<<GridLogMessage << "Deo mflop/s = "<< flops/(t1-t0)<<std::endl; std::cout<<GridLogMessage << "Deo mflop/s = "<< flops/(t1-t0)<<std::endl;
std::cout<<GridLogMessage << "Deo mflop/s per node "<< flops/(t1-t0)/NP<<std::endl; std::cout<<GridLogMessage << "Deo mflop/s per node "<< flops/(t1-t0)/NP<<std::endl;
Dw.Report();
} }
Dw.DhopEO(src_o,r_e,DaggerNo); Dw.DhopEO(src_o,r_e,DaggerNo);
Dw.DhopOE(src_e,r_o,DaggerNo); Dw.DhopOE(src_e,r_o,DaggerNo);

Binary file not shown.

View File

@ -51,7 +51,7 @@ int main (int argc, char ** argv)
{ {
Grid_init(&argc,&argv); Grid_init(&argc,&argv);
const int Ls=16; const int Ls=8;
int threads = GridThread::GetThreads(); int threads = GridThread::GetThreads();
std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl; std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;

Binary file not shown.

View File

@ -4,7 +4,7 @@ EIGEN_URL='http://bitbucket.org/eigen/eigen/get/3.2.9.tar.bz2'
FFTW_URL=http://www.fftw.org/fftw-3.3.4.tar.gz FFTW_URL=http://www.fftw.org/fftw-3.3.4.tar.gz
echo "-- deploying Eigen source..." echo "-- deploying Eigen source..."
wget ${EIGEN_URL} wget ${EIGEN_URL} --no-check-certificate
./scripts/update_eigen.sh `basename ${EIGEN_URL}` ./scripts/update_eigen.sh `basename ${EIGEN_URL}`
rm `basename ${EIGEN_URL}` rm `basename ${EIGEN_URL}`

View File

@ -1,15 +1,20 @@
AC_PREREQ([2.63]) AC_PREREQ([2.63])
AC_INIT([Grid], [0.5.1-dev], [https://github.com/paboyle/Grid], [Grid]) AC_INIT([Grid], [0.5.1-dev], [https://github.com/paboyle/Grid], [Grid])
AC_CANONICAL_BUILD
AC_CANONICAL_HOST
AC_CANONICAL_TARGET
AM_INIT_AUTOMAKE(subdir-objects) AM_INIT_AUTOMAKE(subdir-objects)
AC_CONFIG_MACRO_DIR([m4]) AC_CONFIG_MACRO_DIR([m4])
AC_CONFIG_SRCDIR([lib/Grid.h]) AC_CONFIG_SRCDIR([lib/Grid.h])
AC_CONFIG_HEADERS([lib/Config.h]) AC_CONFIG_HEADERS([lib/Config.h])
m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])]) m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])])
############### Checks for programs ############### Checks for programs
AC_LANG(C++) AC_LANG(C++)
CXXFLAGS="-O3 $CXXFLAGS" CXXFLAGS="-O3 $CXXFLAGS"
AC_PROG_CXX AC_PROG_CXX
AC_PROG_RANLIB
############ openmp ############### ############ openmp ###############
AC_OPENMP AC_OPENMP
@ -22,9 +27,6 @@ AM_CXXFLAGS="$OPENMP_CXXFLAGS $AM_CXXFLAGS"
AM_LDFLAGS="$OPENMP_CXXFLAGS $AM_LDFLAGS" AM_LDFLAGS="$OPENMP_CXXFLAGS $AM_LDFLAGS"
fi fi
############ libtool ###############
LT_INIT
############### Checks for header files ############### Checks for header files
AC_CHECK_HEADERS(stdint.h) AC_CHECK_HEADERS(stdint.h)
AC_CHECK_HEADERS(mm_malloc.h) AC_CHECK_HEADERS(mm_malloc.h)
@ -45,7 +47,7 @@ AC_ARG_WITH([gmp],
[AS_HELP_STRING([--with-gmp=prefix], [AS_HELP_STRING([--with-gmp=prefix],
[try this for a non-standard install prefix of the GMP library])], [try this for a non-standard install prefix of the GMP library])],
[AM_CXXFLAGS="-I$with_gmp/include $AM_CXXFLAGS"] [AM_CXXFLAGS="-I$with_gmp/include $AM_CXXFLAGS"]
[AM_LDFLAGS="-L$with_gmp/lib" $AM_LDFLAGS]) [AM_LDFLAGS="-L$with_gmp/lib $AM_LDFLAGS"])
AC_ARG_WITH([mpfr], AC_ARG_WITH([mpfr],
[AS_HELP_STRING([--with-mpfr=prefix], [AS_HELP_STRING([--with-mpfr=prefix],
[try this for a non-standard install prefix of the MPFR library])], [try this for a non-standard install prefix of the MPFR library])],
@ -68,6 +70,20 @@ case ${ac_LAPACK} in
AC_DEFINE([USE_LAPACK],[1],[use LAPACK]) AC_DEFINE([USE_LAPACK],[1],[use LAPACK])
esac esac
################## first-touch ####################
AC_ARG_ENABLE([numa],
[AC_HELP_STRING([--enable-numa=yes|no|prefix], [enable first touch numa opt])],
[ac_NUMA=${enable_NUMA}],[ac_NUMA=no])
case ${ac_NUMA} in
no)
;;
yes)
AC_DEFINE([GRID_NUMA],[1],[First touch numa locality]);;
*)
AC_DEFINE([GRID_NUMA],[1],[First touch numa locality]);;
esac
################## FFTW3 #################### ################## FFTW3 ####################
AC_ARG_WITH([fftw], AC_ARG_WITH([fftw],
[AS_HELP_STRING([--with-fftw=prefix], [AS_HELP_STRING([--with-fftw=prefix],
@ -75,23 +91,6 @@ AC_ARG_WITH([fftw],
[AM_CXXFLAGS="-I$with_fftw/include $AM_CXXFLAGS"] [AM_CXXFLAGS="-I$with_fftw/include $AM_CXXFLAGS"]
[AM_LDFLAGS="-L$with_fftw/lib $AM_LDFLAGS"]) [AM_LDFLAGS="-L$with_fftw/lib $AM_LDFLAGS"])
#
# What about the MKL library replacement for fftw3 ? How do we know if fftw_execute
# can be found in MKL?
#
AC_CHECK_LIB([fftw3],[fftw_execute],
[AC_DEFINE([HAVE_FFTW],[1],[Define to 1 if you have the `FFTW' library (-lfftw3).])] [ac_fftw=yes],
[ac_fftw=no])
case ${ac_fftw} in
no)
echo WARNING libfftw3 not found FFT routines will not work
;;
yes)
AM_LDFLAGS="$AM_LDFLAGS -lfftw3 -lfftw3f"
esac
################ Get compiler informations ################ Get compiler informations
AC_LANG([C++]) AC_LANG([C++])
AX_CXX_COMPILE_STDCXX_11([noext],[mandatory]) AX_CXX_COMPILE_STDCXX_11([noext],[mandatory])
@ -105,7 +104,6 @@ AC_DEFINE_UNQUOTED([GXX_VERSION],["$GXX_VERSION"],
############### Checks for library functions ############### Checks for library functions
CXXFLAGS_CPY=$CXXFLAGS CXXFLAGS_CPY=$CXXFLAGS
LDFLAGS_CPY=$LDFLAGS LDFLAGS_CPY=$LDFLAGS
LIBS_CPY=$LIBS
CXXFLAGS="$AM_CXXFLAGS $CXXFLAGS" CXXFLAGS="$AM_CXXFLAGS $CXXFLAGS"
LDFLAGS="$AM_LDFLAGS $LDFLAGS" LDFLAGS="$AM_LDFLAGS $LDFLAGS"
AC_CHECK_FUNCS([gettimeofday]) AC_CHECK_FUNCS([gettimeofday])
@ -124,11 +122,16 @@ if test "${ac_LAPACK}x" != "nox"; then
AC_CHECK_LIB([lapack],[LAPACKE_sbdsdc],[], AC_CHECK_LIB([lapack],[LAPACKE_sbdsdc],[],
[AC_MSG_ERROR("LAPACK enabled but library not found")]) [AC_MSG_ERROR("LAPACK enabled but library not found")])
fi fi
AC_CHECK_LIB([fftw3],[fftw_execute],
[AC_DEFINE([HAVE_FFTW],[1],[Define to 1 if you have the `FFTW' library (-lfftw3).])]
[have_fftw=true]
[LIBS="$LIBS -lfftw3 -lfftw3f"],
[AC_MSG_WARN([**** FFTW library not found, Grid can still compile but FFT-based routines will not work ****])])
CXXFLAGS=$CXXFLAGS_CPY CXXFLAGS=$CXXFLAGS_CPY
LDFLAGS=$LDFLAGS_CPY LDFLAGS=$LDFLAGS_CPY
############### SIMD instruction selection ############### SIMD instruction selection
AC_ARG_ENABLE([simd],[AC_HELP_STRING([--enable-simd=SSE4|AVX|AVXFMA4|AVX2|AVX512|AVX512MIC|IMCI|KNL|KNC],\ AC_ARG_ENABLE([simd],[AC_HELP_STRING([--enable-simd=SSE4|AVX|AVXFMA4|AVXFMA|AVX2|AVX512|AVX512MIC|IMCI|KNL|KNC],\
[Select instructions to be SSE4.0, AVX 1.0, AVX 2.0+FMA, AVX 512, IMCI])],\ [Select instructions to be SSE4.0, AVX 1.0, AVX 2.0+FMA, AVX 512, IMCI])],\
[ac_SIMD=${enable_simd}],[ac_SIMD=GEN]) [ac_SIMD=${enable_simd}],[ac_SIMD=GEN])
@ -144,6 +147,9 @@ case ${ax_cv_cxx_compiler_vendor} in
AVXFMA4) AVXFMA4)
AC_DEFINE([AVXFMA4],[1],[AVX intrinsics with FMA4]) AC_DEFINE([AVXFMA4],[1],[AVX intrinsics with FMA4])
SIMD_FLAGS='-mavx -mfma4';; SIMD_FLAGS='-mavx -mfma4';;
AVXFMA)
AC_DEFINE([AVXFMA],[1],[AVX intrinsics with FMA3])
SIMD_FLAGS='-mavx -mfma';;
AVX2) AVX2)
AC_DEFINE([AVX2],[1],[AVX2 intrinsics]) AC_DEFINE([AVX2],[1],[AVX2 intrinsics])
SIMD_FLAGS='-mavx2 -mfma';; SIMD_FLAGS='-mavx2 -mfma';;
@ -151,11 +157,14 @@ case ${ax_cv_cxx_compiler_vendor} in
AC_DEFINE([AVX512],[1],[AVX512 intrinsics]) AC_DEFINE([AVX512],[1],[AVX512 intrinsics])
SIMD_FLAGS='-mavx512f -mavx512pf -mavx512er -mavx512cd';; SIMD_FLAGS='-mavx512f -mavx512pf -mavx512er -mavx512cd';;
IMCI|KNC) IMCI|KNC)
AC_DEFINE([IMCI],[1],[IMCI Intrinsics for Knights Corner]) AC_DEFINE([IMCI],[1],[IMCI intrinsics for Knights Corner])
SIMD_FLAGS='';; SIMD_FLAGS='';;
GEN) GEN)
AC_DEFINE([GENERIC_VEC],[1],[generic vector code]) AC_DEFINE([GENERIC_VEC],[1],[generic vector code])
SIMD_FLAGS='';; SIMD_FLAGS='';;
QPX|BGQ)
AC_DEFINE([QPX],[1],[QPX intrinsics for BG/Q])
SIMD_FLAGS='';;
*) *)
AC_MSG_ERROR(["SIMD option ${ac_SIMD} not supported by the GCC/Clang compiler"]);; AC_MSG_ERROR(["SIMD option ${ac_SIMD} not supported by the GCC/Clang compiler"]);;
esac;; esac;;
@ -169,7 +178,10 @@ case ${ax_cv_cxx_compiler_vendor} in
SIMD_FLAGS='-mavx -xavx';; SIMD_FLAGS='-mavx -xavx';;
AVXFMA4) AVXFMA4)
AC_DEFINE([AVXFMA4],[1],[AVX intrinsics with FMA4]) AC_DEFINE([AVXFMA4],[1],[AVX intrinsics with FMA4])
SIMD_FLAGS='-mavx -xavx -mfma';; SIMD_FLAGS='-mavx -mfma';;
AVXFMA)
AC_DEFINE([AVXFMA],[1],[AVX intrinsics with FMA4])
SIMD_FLAGS='-mavx -mfma';;
AVX2) AVX2)
AC_DEFINE([AVX2],[1],[AVX2 intrinsics]) AC_DEFINE([AVX2],[1],[AVX2 intrinsics])
SIMD_FLAGS='-march=core-avx2 -xcore-avx2';; SIMD_FLAGS='-march=core-avx2 -xcore-avx2';;
@ -336,12 +348,15 @@ Summary of configuration for $PACKAGE v$VERSION
- RNG choice : ${ac_RNG} - RNG choice : ${ac_RNG}
- GMP : `if test "x$have_gmp" = xtrue; then echo yes; else echo no; fi` - GMP : `if test "x$have_gmp" = xtrue; then echo yes; else echo no; fi`
- LAPACK : ${ac_LAPACK} - LAPACK : ${ac_LAPACK}
- FFTW : ${ac_fftw} - FFTW : `if test "x$have_fftw" = xtrue; then echo yes; else echo no; fi`
- build DOXYGEN documentation : `if test "x$enable_doc" = xyes; then echo yes; else echo no; fi` - build DOXYGEN documentation : `if test "x$enable_doc" = xyes; then echo yes; else echo no; fi`
- graphs and diagrams : `if test "x$enable_dot" = xyes; then echo yes; else echo no; fi` - graphs and diagrams : `if test "x$enable_dot" = xyes; then echo yes; else echo no; fi`
----- BUILD FLAGS ------------------------------------- ----- BUILD FLAGS -------------------------------------
- CXXFLAGS: "${AM_CXXFLAGS} ${CXXFLAGS}" - CXXFLAGS:
- LDFLAGS: "${AM_LDFLAGS} ${LDFLAGS}" `echo ${AM_CXXFLAGS} ${CXXFLAGS} | tr ' ' '\n' | sed 's/^-/ -/g'`
- LIBS: "${LIBS} " - LDFLAGS:
`echo ${AM_LDFLAGS} ${LDFLAGS} | tr ' ' '\n' | sed 's/^-/ -/g'`
- LIBS:
`echo ${LIBS} | tr ' ' '\n' | sed 's/^-/ -/g'`
------------------------------------------------------- -------------------------------------------------------
" "

View File

@ -113,9 +113,8 @@ public:
#endif #endif
_Tp tmp; _Tp tmp;
#undef FIRST_TOUCH_OPTIMISE #ifdef GRID_NUMA
#ifdef FIRST_TOUCH_OPTIMISE #pragma omp parallel for schedule(static)
#pragma omp parallel for
for(int i=0;i<__n;i++){ for(int i=0;i<__n;i++){
ptr[i]=tmp; ptr[i]=tmp;
} }

View File

@ -270,15 +270,15 @@ void Grid_init(int *argc,char ***argv)
std::cout <<std::endl; std::cout <<std::endl;
std::cout <<COL_RED << "__|__|__|__|__"<< "|__|__|_"<<COL_PURPLE<<"_|__|__|"<< "__|__|__|__|__"<<std::endl; std::cout <<COL_RED << "__|__|__|__|__"<< "|__|__|_"<<COL_PURPLE<<"_|__|__|"<< "__|__|__|__|__"<<std::endl;
std::cout <<COL_RED << "__|__|__|__|__"<< "|__|__|_"<<COL_PURPLE<<"_|__|__|"<< "__|__|__|__|__"<<std::endl; std::cout <<COL_RED << "__|__|__|__|__"<< "|__|__|_"<<COL_PURPLE<<"_|__|__|"<< "__|__|__|__|__"<<std::endl;
std::cout <<COL_RED << "__|__| | | "<< "| | | "<<COL_PURPLE<<" | | |"<< " | | | _|__"<<std::endl; std::cout <<COL_RED << "__|_ | | | "<< "| | | "<<COL_PURPLE<<" | | |"<< " | | | _|__"<<std::endl;
std::cout <<COL_RED << "__|__ "<< " "<<COL_PURPLE<<" "<< " _|__"<<std::endl; std::cout <<COL_RED << "__|_ "<< " "<<COL_PURPLE<<" "<< " _|__"<<std::endl;
std::cout <<COL_RED << "__|_ "<<COL_GREEN<<" GGGG "<<COL_RED<<" RRRR "<<COL_BLUE <<" III "<<COL_PURPLE<<"DDDD "<<COL_PURPLE<<" _|__"<<std::endl; std::cout <<COL_RED << "__|_ "<<COL_GREEN<<" GGGG "<<COL_RED<<" RRRR "<<COL_BLUE <<" III "<<COL_PURPLE<<"DDDD "<<COL_PURPLE<<" _|__"<<std::endl;
std::cout <<COL_RED << "__|_ "<<COL_GREEN<<"G "<<COL_RED<<" R R "<<COL_BLUE <<" I "<<COL_PURPLE<<"D D "<<COL_PURPLE<<" _|__"<<std::endl; std::cout <<COL_RED << "__|_ "<<COL_GREEN<<"G "<<COL_RED<<" R R "<<COL_BLUE <<" I "<<COL_PURPLE<<"D D "<<COL_PURPLE<<" _|__"<<std::endl;
std::cout <<COL_RED << "__|_ "<<COL_GREEN<<"G "<<COL_RED<<" R R "<<COL_BLUE <<" I "<<COL_PURPLE<<"D D"<<COL_PURPLE<<" _|__"<<std::endl; std::cout <<COL_RED << "__|_ "<<COL_GREEN<<"G "<<COL_RED<<" R R "<<COL_BLUE <<" I "<<COL_PURPLE<<"D D"<<COL_PURPLE<<" _|__"<<std::endl;
std::cout <<COL_BLUE << "__|_ "<<COL_GREEN<<"G GG "<<COL_RED<<" RRRR "<<COL_BLUE <<" I "<<COL_PURPLE<<"D D"<<COL_GREEN <<" _|__"<<std::endl; std::cout <<COL_BLUE << "__|_ "<<COL_GREEN<<"G GG "<<COL_RED<<" RRRR "<<COL_BLUE <<" I "<<COL_PURPLE<<"D D"<<COL_GREEN <<" _|__"<<std::endl;
std::cout <<COL_BLUE << "__|_ "<<COL_GREEN<<"G G "<<COL_RED<<" R R "<<COL_BLUE <<" I "<<COL_PURPLE<<"D D "<<COL_GREEN <<" _|__"<<std::endl; std::cout <<COL_BLUE << "__|_ "<<COL_GREEN<<"G G "<<COL_RED<<" R R "<<COL_BLUE <<" I "<<COL_PURPLE<<"D D "<<COL_GREEN <<" _|__"<<std::endl;
std::cout <<COL_BLUE << "__|_ "<<COL_GREEN<<" GGGG "<<COL_RED<<" R R "<<COL_BLUE <<" III "<<COL_PURPLE<<"DDDD "<<COL_GREEN <<" _|__"<<std::endl; std::cout <<COL_BLUE << "__|_ "<<COL_GREEN<<" GGGG "<<COL_RED<<" R R "<<COL_BLUE <<" III "<<COL_PURPLE<<"DDDD "<<COL_GREEN <<" _|__"<<std::endl;
std::cout <<COL_BLUE << "__|__ "<< " "<<COL_GREEN <<" "<< " _|__"<<std::endl; std::cout <<COL_BLUE << "__|_ "<< " "<<COL_GREEN <<" "<< " _|__"<<std::endl;
std::cout <<COL_BLUE << "__|__|__|__|__"<< "|__|__|_"<<COL_GREEN <<"_|__|__|"<< "__|__|__|__|__"<<std::endl; std::cout <<COL_BLUE << "__|__|__|__|__"<< "|__|__|_"<<COL_GREEN <<"_|__|__|"<< "__|__|__|__|__"<<std::endl;
std::cout <<COL_BLUE << "__|__|__|__|__"<< "|__|__|_"<<COL_GREEN <<"_|__|__|"<< "__|__|__|__|__"<<std::endl; std::cout <<COL_BLUE << "__|__|__|__|__"<< "|__|__|_"<<COL_GREEN <<"_|__|__|"<< "__|__|__|__|__"<<std::endl;
std::cout <<COL_BLUE << " | | | | "<< "| | | "<<COL_GREEN <<" | | |"<< " | | | | "<<std::endl; std::cout <<COL_BLUE << " | | | | "<< "| | | "<<COL_GREEN <<" | | |"<< " | | | | "<<std::endl;

View File

@ -17,8 +17,8 @@ endif
include Make.inc include Make.inc
include Eigen.inc include Eigen.inc
lib_LTLIBRARIES = libGrid.la lib_LIBRARIES = libGrid.a
libGrid_la_SOURCES = $(CCFILES) $(extra_sources) libGrid_a_SOURCES = $(CCFILES) $(extra_sources)
libGrid_ladir = $(pkgincludedir) libGrid_adir = $(pkgincludedir)
nobase_dist_pkginclude_HEADERS = $(HFILES) $(eigen_files) Config.h nobase_dist_pkginclude_HEADERS = $(HFILES) $(eigen_files) Config.h

247
lib/Stat.cc Normal file
View File

@ -0,0 +1,247 @@
#include <Grid.h>
#include <PerfCount.h>
#include <Stat.h>
namespace Grid {
bool PmuStat::pmu_initialized=false;
void PmuStat::init(const char *regname)
{
#ifdef __x86_64__
name = regname;
if (!pmu_initialized)
{
std::cout<<"initialising pmu"<<std::endl;
pmu_initialized = true;
pmu_init();
}
clear();
#endif
}
void PmuStat::clear(void)
{
#ifdef __x86_64__
count = 0;
tregion = 0;
pmc0 = 0;
pmc1 = 0;
inst = 0;
cyc = 0;
ref = 0;
tcycles = 0;
reads = 0;
writes = 0;
#endif
}
void PmuStat::print(void)
{
#ifdef __x86_64__
std::cout <<"Reg "<<std::string(name)<<":\n";
std::cout <<" region "<<tregion<<std::endl;
std::cout <<" cycles "<<tcycles<<std::endl;
std::cout <<" inst "<<inst <<std::endl;
std::cout <<" cyc "<<cyc <<std::endl;
std::cout <<" ref "<<ref <<std::endl;
std::cout <<" pmc0 "<<pmc0 <<std::endl;
std::cout <<" pmc1 "<<pmc1 <<std::endl;
std::cout <<" count "<<count <<std::endl;
std::cout <<" reads "<<reads <<std::endl;
std::cout <<" writes "<<writes <<std::endl;
#endif
}
void PmuStat::start(void)
{
#ifdef __x86_64__
pmu_start();
++count;
xmemctrs(&mrstart, &mwstart);
tstart = __rdtsc();
#endif
}
void PmuStat::enter(int t)
{
#ifdef __x86_64__
counters[0][t] = __rdpmc(0);
counters[1][t] = __rdpmc(1);
counters[2][t] = __rdpmc((1<<30)|0);
counters[3][t] = __rdpmc((1<<30)|1);
counters[4][t] = __rdpmc((1<<30)|2);
counters[5][t] = __rdtsc();
#endif
}
void PmuStat::exit(int t)
{
#ifdef __x86_64__
counters[0][t] = __rdpmc(0) - counters[0][t];
counters[1][t] = __rdpmc(1) - counters[1][t];
counters[2][t] = __rdpmc((1<<30)|0) - counters[2][t];
counters[3][t] = __rdpmc((1<<30)|1) - counters[3][t];
counters[4][t] = __rdpmc((1<<30)|2) - counters[4][t];
counters[5][t] = __rdtsc() - counters[5][t];
#endif
}
void PmuStat::accum(int nthreads)
{
#ifdef __x86_64__
tend = __rdtsc();
xmemctrs(&mrend, &mwend);
pmu_stop();
for (int t = 0; t < nthreads; ++t) {
pmc0 += counters[0][t];
pmc1 += counters[1][t];
inst += counters[2][t];
cyc += counters[3][t];
ref += counters[4][t];
tcycles += counters[5][t];
}
uint64_t region = tend - tstart;
tregion += region;
uint64_t mreads = mrend - mrstart;
reads += mreads;
uint64_t mwrites = mwend - mwstart;
writes += mwrites;
#endif
}
void PmuStat::pmu_fini(void) {}
void PmuStat::pmu_start(void) {};
void PmuStat::pmu_stop(void) {};
void PmuStat::pmu_init(void)
{
#ifdef _KNIGHTS_LANDING_
KNLsetup();
#endif
}
void PmuStat::xmemctrs(uint64_t *mr, uint64_t *mw)
{
#ifdef _KNIGHTS_LANDING_
ctrs c;
KNLreadctrs(c);
uint64_t emr = 0, emw = 0;
for (int i = 0; i < NEDC; ++i)
{
emr += c.edcrd[i];
emw += c.edcwr[i];
}
*mr = emr;
*mw = emw;
#else
*mr = *mw = 0;
#endif
}
#ifdef _KNIGHTS_LANDING_
struct knl_gbl_ PmuStat::gbl;
#define PMU_MEM
void PmuStat::KNLevsetup(const char *ename, int &fd, int event, int umask)
{
char fname[1024];
snprintf(fname, sizeof(fname), "%s/type", ename);
FILE *fp = fopen(fname, "r");
if (fp == 0) {
::printf("open %s", fname);
::exit(0);
}
int type;
int ret = fscanf(fp, "%d", &type);
assert(ret == 1);
fclose(fp);
// std::cout << "Using PMU type "<<type<<" from " << std::string(ename) <<std::endl;
struct perf_event_attr hw = {};
hw.size = sizeof(hw);
hw.type = type;
// see /sys/devices/uncore_*/format/*
// All of the events we are interested in are configured the same way, but
// that isn't always true. Proper code would parse the format files
hw.config = event | (umask << 8);
//hw.read_format = PERF_FORMAT_GROUP;
// unfortunately the above only works within a single PMU; might
// as well just read them one at a time
int cpu = 0;
fd = perf_event_open(&hw, -1, cpu, -1, 0);
if (fd == -1) {
::printf("CPU %d, box %s, event 0x%lx", cpu, ename, hw.config);
::exit(0);
} else {
// std::cout << "event "<<std::string(ename)<<" set up for fd "<<fd<<" hw.config "<<hw.config <<std::endl;
}
}
void PmuStat::KNLsetup(void){
int ret;
char fname[1024];
// MC RPQ inserts and WPQ inserts (reads & writes)
for (int mc = 0; mc < NMC; ++mc)
{
::snprintf(fname, sizeof(fname), "/sys/devices/uncore_imc_%d",mc);
// RPQ Inserts
KNLevsetup(fname, gbl.mc_rd[mc], 0x1, 0x1);
// WPQ Inserts
KNLevsetup(fname, gbl.mc_wr[mc], 0x2, 0x1);
}
// EDC RPQ inserts and WPQ inserts
for (int edc=0; edc < NEDC; ++edc)
{
::snprintf(fname, sizeof(fname), "/sys/devices/uncore_edc_eclk_%d",edc);
// RPQ inserts
KNLevsetup(fname, gbl.edc_rd[edc], 0x1, 0x1);
// WPQ inserts
KNLevsetup(fname, gbl.edc_wr[edc], 0x2, 0x1);
}
// EDC HitE, HitM, MissE, MissM
for (int edc=0; edc < NEDC; ++edc)
{
::snprintf(fname, sizeof(fname), "/sys/devices/uncore_edc_uclk_%d", edc);
KNLevsetup(fname, gbl.edc_hite[edc], 0x2, 0x1);
KNLevsetup(fname, gbl.edc_hitm[edc], 0x2, 0x2);
KNLevsetup(fname, gbl.edc_misse[edc], 0x2, 0x4);
KNLevsetup(fname, gbl.edc_missm[edc], 0x2, 0x8);
}
}
uint64_t PmuStat::KNLreadctr(int fd)
{
uint64_t data;
size_t s = ::read(fd, &data, sizeof(data));
if (s != sizeof(uint64_t)){
::printf("read counter %lu", s);
::exit(0);
}
return data;
}
void PmuStat::KNLreadctrs(ctrs &c)
{
for (int i = 0; i < NMC; ++i)
{
c.mcrd[i] = KNLreadctr(gbl.mc_rd[i]);
c.mcwr[i] = KNLreadctr(gbl.mc_wr[i]);
}
for (int i = 0; i < NEDC; ++i)
{
c.edcrd[i] = KNLreadctr(gbl.edc_rd[i]);
c.edcwr[i] = KNLreadctr(gbl.edc_wr[i]);
}
for (int i = 0; i < NEDC; ++i)
{
c.edchite[i] = KNLreadctr(gbl.edc_hite[i]);
c.edchitm[i] = KNLreadctr(gbl.edc_hitm[i]);
c.edcmisse[i] = KNLreadctr(gbl.edc_misse[i]);
c.edcmissm[i] = KNLreadctr(gbl.edc_missm[i]);
}
}
#endif
}

104
lib/Stat.h Normal file
View File

@ -0,0 +1,104 @@
#ifndef _GRID_STAT_H
#define _GRID_STAT_H
#ifdef AVX512
#define _KNIGHTS_LANDING_ROOTONLY
#endif
namespace Grid {
///////////////////////////////////////////////////////////////////////////////
// Extra KNL counters from MCDRAM
///////////////////////////////////////////////////////////////////////////////
#ifdef _KNIGHTS_LANDING_
#define NMC 6
#define NEDC 8
struct ctrs
{
uint64_t mcrd[NMC];
uint64_t mcwr[NMC];
uint64_t edcrd[NEDC];
uint64_t edcwr[NEDC];
uint64_t edchite[NEDC];
uint64_t edchitm[NEDC];
uint64_t edcmisse[NEDC];
uint64_t edcmissm[NEDC];
};
// Peter/Azusa:
// Our modification of a code provided by Larry Meadows from Intel
// Verified by email exchange non-NDA, ok for github. Should be as uses /sys/devices/ FS
// so is already public and in the linux kernel for KNL.
struct knl_gbl_
{
int mc_rd[NMC];
int mc_wr[NMC];
int edc_rd[NEDC];
int edc_wr[NEDC];
int edc_hite[NEDC];
int edc_hitm[NEDC];
int edc_misse[NEDC];
int edc_missm[NEDC];
};
#endif
///////////////////////////////////////////////////////////////////////////////
class PmuStat
{
uint64_t counters[8][256];
#ifdef _KNIGHTS_LANDING_
static struct knl_gbl_ gbl;
#endif
const char *name;
uint64_t reads; // memory reads
uint64_t writes; // memory writes
uint64_t mrstart; // memory read counter at start of parallel region
uint64_t mrend; // memory read counter at end of parallel region
uint64_t mwstart; // memory write counter at start of parallel region
uint64_t mwend; // memory write counter at end of parallel region
// cumulative counters
uint64_t count; // number of invocations
uint64_t tregion; // total time in parallel region (from thread 0)
uint64_t tcycles; // total cycles inside parallel region
uint64_t inst, ref, cyc; // fixed counters
uint64_t pmc0, pmc1;// pmu
// add memory counters here
// temp variables
uint64_t tstart; // tsc at start of parallel region
uint64_t tend; // tsc at end of parallel region
// map for ctrs values
// 0 pmc0 start
// 1 pmc0 end
// 2 pmc1 start
// 3 pmc1 end
// 4 tsc start
// 5 tsc end
static bool pmu_initialized;
public:
static bool is_init(void){ return pmu_initialized;}
static void pmu_init(void);
static void pmu_fini(void);
static void pmu_start(void);
static void pmu_stop(void);
void accum(int nthreads);
static void xmemctrs(uint64_t *mr, uint64_t *mw);
void start(void);
void enter(int t);
void exit(int t);
void print(void);
void init(const char *regname);
void clear(void);
#ifdef _KNIGHTS_LANDING_
static void KNLsetup(void);
static uint64_t KNLreadctr(int fd);
static void KNLreadctrs(ctrs &c);
static void KNLevsetup(const char *ename, int &fd, int event, int umask);
#endif
};
}
#endif

View File

@ -70,9 +70,70 @@
namespace Grid { namespace Grid {
template<class vobj,class cobj,class compressor> void
Gather_plane_simple_table_compute (const Lattice<vobj> &rhs,std::vector<cobj,alignedAllocator<cobj> > &buffer,int dimension,int plane,int cbmask,compressor &compress, int off,std::vector<std::pair<int,int> >& table)
{
table.resize(0);
int rd = rhs._grid->_rdimensions[dimension];
if ( !rhs._grid->CheckerBoarded(dimension) ) {
cbmask = 0x3;
}
int so= plane*rhs._grid->_ostride[dimension]; // base offset for start of plane
int e1=rhs._grid->_slice_nblock[dimension];
int e2=rhs._grid->_slice_block[dimension];
int stride=rhs._grid->_slice_stride[dimension];
if ( cbmask == 0x3 ) {
table.resize(e1*e2);
for(int n=0;n<e1;n++){
for(int b=0;b<e2;b++){
int o = n*stride;
int bo = n*e2;
table[bo+b]=std::pair<int,int>(bo+b,o+b);
}
}
} else {
int bo=0;
table.resize(e1*e2/2);
for(int n=0;n<e1;n++){
for(int b=0;b<e2;b++){
int o = n*stride;
int ocb=1<<rhs._grid->CheckerBoardFromOindexTable(o+b);
if ( ocb &cbmask ) {
table[bo]=std::pair<int,int>(bo,o+b); bo++;
}
}
}
}
}
template<class vobj,class cobj,class compressor> void
Gather_plane_simple_table (std::vector<std::pair<int,int> >& table,const Lattice<vobj> &rhs,std::vector<cobj,alignedAllocator<cobj> > &buffer,
compressor &compress, int off,int so)
{
PARALLEL_FOR_LOOP
for(int i=0;i<table.size();i++){
buffer[off+table[i].first]=compress(rhs._odata[so+table[i].second]);
}
}
template<class vobj,class cobj,class compressor> void
Gather_plane_simple_stencil (const Lattice<vobj> &rhs,std::vector<cobj,alignedAllocator<cobj> > &buffer,int dimension,int plane,int cbmask,compressor &compress, int off,
double &t_table ,double & t_data )
{
std::vector<std::pair<int,int> > table;
Gather_plane_simple_table_compute (rhs, buffer,dimension,plane,cbmask,compress,off,table);
int so = plane*rhs._grid->_ostride[dimension]; // base offset for start of plane
Gather_plane_simple_table (table,rhs,buffer,compress,off,so);
}
struct StencilEntry { struct StencilEntry {
uint32_t _offset; uint64_t _offset;
uint32_t _byte_offset; uint64_t _byte_offset;
uint16_t _is_local; uint16_t _is_local;
uint16_t _permute; uint16_t _permute;
uint32_t _around_the_world; //256 bits, 32 bytes, 1/2 cacheline uint32_t _around_the_world; //256 bits, 32 bytes, 1/2 cacheline
@ -101,12 +162,14 @@
}; };
std::vector<Packet> Packets; std::vector<Packet> Packets;
int face_table_computed;
std::vector<std::vector<std::pair<int,int> > > face_table ;
#define SEND_IMMEDIATE #define SEND_IMMEDIATE
#define SERIAL_SENDS #define SERIAL_SENDS
void AddPacket(void *xmit,void * rcv, Integer to,Integer from,Integer bytes){ void AddPacket(void *xmit,void * rcv, Integer to,Integer from,Integer bytes){
comms_bytes+=2.0*bytes;
#ifdef SEND_IMMEDIATE #ifdef SEND_IMMEDIATE
commtime-=usecond(); commtime-=usecond();
_grid->SendToRecvFrom(xmit,to,rcv,from,bytes); _grid->SendToRecvFrom(xmit,to,rcv,from,bytes);
@ -256,7 +319,8 @@
if( _entries[i]._is_local ) { if( _entries[i]._is_local ) {
_entries[i]._byte_offset = _entries[i]._offset*sizeof(vobj); _entries[i]._byte_offset = _entries[i]._offset*sizeof(vobj);
} else { } else {
_entries[i]._byte_offset =(uint64_t)&comm_buf[0]+ _entries[i]._offset*sizeof(cobj); // PrecomputeByteOffsets [5] 16384/32768 140735768678528 140735781261056 2581581952
_entries[i]._byte_offset = _entries[i]._offset*sizeof(cobj);
} }
} }
}; };
@ -265,17 +329,21 @@
// _mm_prefetch((char *)&_entries[ent],_MM_HINT_T0); // _mm_prefetch((char *)&_entries[ent],_MM_HINT_T0);
} }
inline uint64_t GetInfo(int &ptype,int &local,int &perm,int point,int ent,uint64_t base) { inline uint64_t GetInfo(int &ptype,int &local,int &perm,int point,int ent,uint64_t base) {
_mm_prefetch((char *)&_entries[ent+1],_MM_HINT_T0); uint64_t cbase = (uint64_t)&comm_buf[0];
local = _entries[ent]._is_local; local = _entries[ent]._is_local;
perm = _entries[ent]._permute; perm = _entries[ent]._permute;
if (perm) ptype = _permute_type[point]; if (perm) ptype = _permute_type[point];
if (local) return base + _entries[ent]._byte_offset; if (local) {
else return _entries[ent]._byte_offset; return base + _entries[ent]._byte_offset;
} else {
return cbase + _entries[ent]._byte_offset;
}
} }
inline uint64_t GetPFInfo(int ent,uint64_t base) { inline uint64_t GetPFInfo(int ent,uint64_t base) {
uint64_t cbase = (uint64_t)&comm_buf[0];
int local = _entries[ent]._is_local; int local = _entries[ent]._is_local;
if (local) return base + _entries[ent]._byte_offset; if (local) return base + _entries[ent]._byte_offset;
else return _entries[ent]._byte_offset; else return cbase + _entries[ent]._byte_offset;
} }
// Comms buffers // Comms buffers
@ -301,6 +369,48 @@
double gathermtime; double gathermtime;
double splicetime; double splicetime;
double nosplicetime; double nosplicetime;
double t_data;
double t_table;
double calls;
void ZeroCounters(void) {
gathertime = 0.;
jointime = 0.;
commtime = 0.;
halogtime = 0.;
mergetime = 0.;
spintime = 0.;
gathermtime = 0.;
splicetime = 0.;
nosplicetime = 0.;
t_data = 0.0;
t_table= 0.0;
comms_bytes = 0.;
calls = 0.;
};
void Report(void) {
#define PRINTIT(A) \
std::cout << GridLogMessage << " Stencil " << #A << " "<< A/calls<<std::endl;
if ( calls > 0. ) {
std::cout << GridLogMessage << " Stencil calls "<<calls<<std::endl;
PRINTIT(halogtime);
PRINTIT(gathertime);
PRINTIT(gathermtime);
PRINTIT(mergetime);
if(comms_bytes>1.0){
PRINTIT(comms_bytes);
PRINTIT(commtime);
std::cout << GridLogMessage << " Stencil " << comms_bytes/commtime/1000. << " GB/s "<<std::endl;
}
PRINTIT(jointime);
PRINTIT(spintime);
PRINTIT(splicetime);
PRINTIT(nosplicetime);
PRINTIT(t_table);
PRINTIT(t_data);
}
};
#endif #endif
CartesianStencil(GridBase *grid, CartesianStencil(GridBase *grid,
@ -310,18 +420,7 @@
const std::vector<int> &distances) const std::vector<int> &distances)
: _permute_type(npoints), _comm_buf_size(npoints) : _permute_type(npoints), _comm_buf_size(npoints)
{ {
#ifdef TIMING_HACK face_table_computed=0;
gathertime=0;
jointime=0;
commtime=0;
halogtime=0;
mergetime=0;
spintime=0;
gathermtime=0;
splicetime=0;
nosplicetime=0;
comms_bytes=0;
#endif
_npoints = npoints; _npoints = npoints;
_grid = grid; _grid = grid;
_directions = directions; _directions = directions;
@ -623,6 +722,7 @@
template<class compressor> template<class compressor>
void HaloExchange(const Lattice<vobj> &source,compressor &compress) void HaloExchange(const Lattice<vobj> &source,compressor &compress)
{ {
calls++;
Mergers.resize(0); Mergers.resize(0);
Packets.resize(0); Packets.resize(0);
HaloGather(source,compress); HaloGather(source,compress);
@ -648,7 +748,7 @@
} }
#endif #endif
template<class compressor> template<class compressor>
void HaloGatherDir(const Lattice<vobj> &source,compressor &compress,int point) void HaloGatherDir(const Lattice<vobj> &source,compressor &compress,int point,int & face_idx)
{ {
int dimension = _directions[point]; int dimension = _directions[point];
int displacement = _distances[point]; int displacement = _distances[point];
@ -676,23 +776,23 @@
if ( sshift[0] == sshift[1] ) { if ( sshift[0] == sshift[1] ) {
if (splice_dim) { if (splice_dim) {
splicetime-=usecond(); splicetime-=usecond();
GatherSimd(source,dimension,shift,0x3,compress); GatherSimd(source,dimension,shift,0x3,compress,face_idx);
splicetime+=usecond(); splicetime+=usecond();
} else { } else {
nosplicetime-=usecond(); nosplicetime-=usecond();
Gather(source,dimension,shift,0x3,compress); Gather(source,dimension,shift,0x3,compress,face_idx);
nosplicetime+=usecond(); nosplicetime+=usecond();
} }
} else { } else {
if(splice_dim){ if(splice_dim){
splicetime-=usecond(); splicetime-=usecond();
GatherSimd(source,dimension,shift,0x1,compress);// if checkerboard is unfavourable take two passes GatherSimd(source,dimension,shift,0x1,compress,face_idx);// if checkerboard is unfavourable take two passes
GatherSimd(source,dimension,shift,0x2,compress);// both with block stride loop iteration GatherSimd(source,dimension,shift,0x2,compress,face_idx);// both with block stride loop iteration
splicetime+=usecond(); splicetime+=usecond();
} else { } else {
nosplicetime-=usecond(); nosplicetime-=usecond();
Gather(source,dimension,shift,0x1,compress); Gather(source,dimension,shift,0x1,compress,face_idx);
Gather(source,dimension,shift,0x2,compress); Gather(source,dimension,shift,0x2,compress,face_idx);
nosplicetime+=usecond(); nosplicetime+=usecond();
} }
} }
@ -710,17 +810,19 @@
u_comm_offset=0; u_comm_offset=0;
// Gather all comms buffers // Gather all comms buffers
int face_idx=0;
for(int point = 0 ; point < _npoints; point++) { for(int point = 0 ; point < _npoints; point++) {
compress.Point(point); compress.Point(point);
HaloGatherDir(source,compress,point); HaloGatherDir(source,compress,point,face_idx);
} }
face_table_computed=1;
assert(u_comm_offset==_unified_buffer_size); assert(u_comm_offset==_unified_buffer_size);
halogtime+=usecond(); halogtime+=usecond();
} }
template<class compressor> template<class compressor>
void Gather(const Lattice<vobj> &rhs,int dimension,int shift,int cbmask,compressor & compress) void Gather(const Lattice<vobj> &rhs,int dimension,int shift,int cbmask,compressor & compress,int &face_idx)
{ {
typedef typename cobj::vector_type vector_type; typedef typename cobj::vector_type vector_type;
typedef typename cobj::scalar_type scalar_type; typedef typename cobj::scalar_type scalar_type;
@ -757,8 +859,20 @@
int bytes = words * sizeof(cobj); int bytes = words * sizeof(cobj);
gathertime-=usecond(); gathertime-=usecond();
Gather_plane_simple (rhs,u_send_buf,dimension,sx,cbmask,compress,u_comm_offset); int so = sx*rhs._grid->_ostride[dimension]; // base offset for start of plane
if ( !face_table_computed ) {
t_table-=usecond();
face_table.resize(face_idx+1);
Gather_plane_simple_table_compute (rhs,u_send_buf,dimension,sx,cbmask,compress,u_comm_offset,face_table[face_idx]);
t_table+=usecond();
}
t_data-=usecond();
Gather_plane_simple_table (face_table[face_idx],rhs,u_send_buf,compress,u_comm_offset,so);
face_idx++;
t_data+=usecond();
gathertime+=usecond(); gathertime+=usecond();
// Gather_plane_simple_stencil (rhs,u_send_buf,dimension,sx,cbmask,compress,u_comm_offset,t_table,t_data);
int rank = _grid->_processor; int rank = _grid->_processor;
int recv_from_rank; int recv_from_rank;
@ -781,7 +895,7 @@
template<class compressor> template<class compressor>
void GatherSimd(const Lattice<vobj> &rhs,int dimension,int shift,int cbmask,compressor &compress) void GatherSimd(const Lattice<vobj> &rhs,int dimension,int shift,int cbmask,compressor &compress,int & face_idx)
{ {
const int Nsimd = _grid->Nsimd(); const int Nsimd = _grid->Nsimd();

View File

@ -37,7 +37,11 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
#ifdef GRID_OMP #ifdef GRID_OMP
#include <omp.h> #include <omp.h>
#ifdef GRID_NUMA
#define PARALLEL_FOR_LOOP _Pragma("omp parallel for schedule(static)")
#else
#define PARALLEL_FOR_LOOP _Pragma("omp parallel for schedule(runtime)") #define PARALLEL_FOR_LOOP _Pragma("omp parallel for schedule(runtime)")
#endif
#define PARALLEL_NESTED_LOOP2 _Pragma("omp parallel for collapse(2)") #define PARALLEL_NESTED_LOOP2 _Pragma("omp parallel for collapse(2)")
#else #else
#define PARALLEL_FOR_LOOP #define PARALLEL_FOR_LOOP

View File

@ -1,153 +1,168 @@
/************************************************************************************* /*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/algorithms/iterative/ConjugateGradient.h Source file: ./lib/algorithms/iterative/ConjugateGradient.h
Copyright (C) 2015 Copyright (C) 2015
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk> Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk> Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: paboyle <paboyle@ph.ed.ac.uk> Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or the Free Software Foundation; either version 2 of the License, or
(at your option) any later version. (at your option) any later version.
This program is distributed in the hope that it will be useful, This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details. GNU General Public License for more details.
You should have received a copy of the GNU General Public License along You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc., with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory See the full license in the file "LICENSE" in the top level distribution
*************************************************************************************/ directory
/* END LEGAL */ *************************************************************************************/
/* END LEGAL */
#ifndef GRID_CONJUGATE_GRADIENT_H #ifndef GRID_CONJUGATE_GRADIENT_H
#define GRID_CONJUGATE_GRADIENT_H #define GRID_CONJUGATE_GRADIENT_H
namespace Grid { namespace Grid {
///////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////
// Base classes for iterative processes based on operators // Base classes for iterative processes based on operators
// single input vec, single output vec. // single input vec, single output vec.
///////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////
template<class Field> template <class Field>
class ConjugateGradient : public OperatorFunction<Field> { class ConjugateGradient : public OperatorFunction<Field> {
public: public:
bool ErrorOnNoConverge; //throw an assert when the CG fails to converge. Defaults true. bool ErrorOnNoConverge; // throw an assert when the CG fails to converge.
RealD Tolerance; // Defaults true.
Integer MaxIterations; RealD Tolerance;
ConjugateGradient(RealD tol,Integer maxit, bool err_on_no_conv = true) : Tolerance(tol), MaxIterations(maxit), ErrorOnNoConverge(err_on_no_conv){ Integer MaxIterations;
}; ConjugateGradient(RealD tol, Integer maxit, bool err_on_no_conv = true)
: Tolerance(tol),
MaxIterations(maxit),
ErrorOnNoConverge(err_on_no_conv){};
void operator()(LinearOperatorBase<Field> &Linop, const Field &src,
Field &psi) {
psi.checkerboard = src.checkerboard;
conformable(psi, src);
void operator() (LinearOperatorBase<Field> &Linop,const Field &src, Field &psi){ RealD cp, c, a, d, b, ssq, qq, b_pred;
psi.checkerboard = src.checkerboard; Field p(src);
conformable(psi,src); Field mmp(src);
Field r(src);
RealD cp,c,a,d,b,ssq,qq,b_pred; // Initial residual computation & set up
RealD guess = norm2(psi);
Field p(src); assert(std::isnan(guess) == 0);
Field mmp(src);
Field r(src);
//Initial residual computation & set up
RealD guess = norm2(psi);
assert(std::isnan(guess)==0);
Linop.HermOpAndNorm(psi,mmp,d,b);
Linop.HermOpAndNorm(psi, mmp, d, b);
r= src-mmp;
p= r;
a =norm2(p);
cp =a;
ssq=norm2(src);
std::cout<<GridLogIterative <<std::setprecision(4)<< "ConjugateGradient: guess "<<guess<<std::endl; r = src - mmp;
std::cout<<GridLogIterative <<std::setprecision(4)<< "ConjugateGradient: src "<<ssq <<std::endl; p = r;
std::cout<<GridLogIterative <<std::setprecision(4)<< "ConjugateGradient: mp "<<d <<std::endl;
std::cout<<GridLogIterative <<std::setprecision(4)<< "ConjugateGradient: mmp "<<b <<std::endl;
std::cout<<GridLogIterative <<std::setprecision(4)<< "ConjugateGradient: cp,r "<<cp <<std::endl;
std::cout<<GridLogIterative <<std::setprecision(4)<< "ConjugateGradient: p "<<a <<std::endl;
RealD rsq = Tolerance* Tolerance*ssq; a = norm2(p);
cp = a;
//Check if guess is really REALLY good :) ssq = norm2(src);
if ( cp <= rsq ) {
return;
}
std::cout<<GridLogIterative << std::setprecision(4)<< "ConjugateGradient: k=0 residual "<<cp<<" target "<<rsq<<std::endl;
GridStopWatch LinalgTimer; std::cout << GridLogIterative << std::setprecision(4)
GridStopWatch MatrixTimer; << "ConjugateGradient: guess " << guess << std::endl;
GridStopWatch SolverTimer; std::cout << GridLogIterative << std::setprecision(4)
<< "ConjugateGradient: src " << ssq << std::endl;
std::cout << GridLogIterative << std::setprecision(4)
<< "ConjugateGradient: mp " << d << std::endl;
std::cout << GridLogIterative << std::setprecision(4)
<< "ConjugateGradient: mmp " << b << std::endl;
std::cout << GridLogIterative << std::setprecision(4)
<< "ConjugateGradient: cp,r " << cp << std::endl;
std::cout << GridLogIterative << std::setprecision(4)
<< "ConjugateGradient: p " << a << std::endl;
SolverTimer.Start(); RealD rsq = Tolerance * Tolerance * ssq;
int k;
for (k=1;k<=MaxIterations;k++){
c=cp;
MatrixTimer.Start(); // Check if guess is really REALLY good :)
Linop.HermOpAndNorm(p,mmp,d,qq); if (cp <= rsq) {
MatrixTimer.Stop(); return;
LinalgTimer.Start();
// RealD qqck = norm2(mmp);
// ComplexD dck = innerProduct(p,mmp);
a = c/d;
b_pred = a*(a*qq-d)/c;
cp = axpy_norm(r,-a,mmp,r);
b = cp/c;
// Fuse these loops ; should be really easy
psi= a*p+psi;
p = p*b+r;
LinalgTimer.Stop();
std::cout<<GridLogIterative<<"ConjugateGradient: Iteration " <<k<<" residual "<<cp<< " target "<< rsq<<std::endl;
// Stopping condition
if ( cp <= rsq ) {
SolverTimer.Stop();
Linop.HermOpAndNorm(psi,mmp,d,qq);
p=mmp-src;
RealD mmpnorm = sqrt(norm2(mmp));
RealD psinorm = sqrt(norm2(psi));
RealD srcnorm = sqrt(norm2(src));
RealD resnorm = sqrt(norm2(p));
RealD true_residual = resnorm/srcnorm;
std::cout<<GridLogMessage<<"ConjugateGradient: Converged on iteration " <<k
<<" computed residual "<<sqrt(cp/ssq)
<<" true residual " <<true_residual
<<" target "<<Tolerance<<std::endl;
std::cout<<GridLogMessage<<"Time elapsed: Total "<< SolverTimer.Elapsed() << " Matrix "<<MatrixTimer.Elapsed() << " Linalg "<<LinalgTimer.Elapsed();
std::cout<<std::endl;
if(ErrorOnNoConverge)
assert(true_residual/Tolerance < 1000.0);
return;
}
}
std::cout<<GridLogMessage<<"ConjugateGradient did NOT converge"<<std::endl;
if(ErrorOnNoConverge)
assert(0);
} }
};
std::cout << GridLogIterative << std::setprecision(4)
<< "ConjugateGradient: k=0 residual " << cp << " target " << rsq
<< std::endl;
GridStopWatch LinalgTimer;
GridStopWatch MatrixTimer;
GridStopWatch SolverTimer;
SolverTimer.Start();
int k;
for (k = 1; k <= MaxIterations; k++) {
c = cp;
MatrixTimer.Start();
Linop.HermOpAndNorm(p, mmp, d, qq);
MatrixTimer.Stop();
LinalgTimer.Start();
// RealD qqck = norm2(mmp);
// ComplexD dck = innerProduct(p,mmp);
a = c / d;
b_pred = a * (a * qq - d) / c;
cp = axpy_norm(r, -a, mmp, r);
b = cp / c;
// Fuse these loops ; should be really easy
psi = a * p + psi;
p = p * b + r;
LinalgTimer.Stop();
std::cout << GridLogIterative << "ConjugateGradient: Iteration " << k
<< " residual " << cp << " target " << rsq << std::endl;
// Stopping condition
if (cp <= rsq) {
SolverTimer.Stop();
Linop.HermOpAndNorm(psi, mmp, d, qq);
p = mmp - src;
RealD mmpnorm = sqrt(norm2(mmp));
RealD psinorm = sqrt(norm2(psi));
RealD srcnorm = sqrt(norm2(src));
RealD resnorm = sqrt(norm2(p));
RealD true_residual = resnorm / srcnorm;
std::cout << GridLogMessage
<< "ConjugateGradient: Converged on iteration " << k << std::endl;
std::cout << GridLogMessage << "Computed residual " << sqrt(cp / ssq)
<< " true residual " << true_residual << " target "
<< Tolerance << std::endl;
std::cout << GridLogMessage << "Time elapsed: Iterations "
<< SolverTimer.Elapsed() << " Matrix "
<< MatrixTimer.Elapsed() << " Linalg "
<< LinalgTimer.Elapsed();
std::cout << std::endl;
if (ErrorOnNoConverge) assert(true_residual / Tolerance < 1000.0);
return;
}
}
std::cout << GridLogMessage << "ConjugateGradient did NOT converge"
<< std::endl;
if (ErrorOnNoConverge) assert(0);
}
};
} }
#endif #endif

View File

@ -81,11 +81,8 @@ public:
virtual int CheckerBoardDestination(int source_cb,int shift,int dim)=0; virtual int CheckerBoardDestination(int source_cb,int shift,int dim)=0;
virtual int CheckerBoardShift(int source_cb,int dim,int shift,int osite)=0; virtual int CheckerBoardShift(int source_cb,int dim,int shift,int osite)=0;
virtual int CheckerBoardShiftForCB(int source_cb,int dim,int shift,int cb)=0; virtual int CheckerBoardShiftForCB(int source_cb,int dim,int shift,int cb)=0;
int CheckerBoardFromOindex (int Oindex){ virtual int CheckerBoardFromOindex (int Oindex)=0;
std::vector<int> ocoor; virtual int CheckerBoardFromOindexTable (int Oindex)=0;
oCoorFromOindex(ocoor,Oindex);
return CheckerBoard(ocoor);
}
////////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////////
// Local layout calculations // Local layout calculations

View File

@ -39,6 +39,13 @@ class GridCartesian: public GridBase {
public: public:
virtual int CheckerBoardFromOindexTable (int Oindex) {
return 0;
}
virtual int CheckerBoardFromOindex (int Oindex)
{
return 0;
}
virtual int CheckerBoarded(int dim){ virtual int CheckerBoarded(int dim){
return 0; return 0;
} }

View File

@ -43,6 +43,7 @@ class GridRedBlackCartesian : public GridBase
public: public:
std::vector<int> _checker_dim_mask; std::vector<int> _checker_dim_mask;
int _checker_dim; int _checker_dim;
std::vector<int> _checker_board;
virtual int CheckerBoarded(int dim){ virtual int CheckerBoarded(int dim){
if( dim==_checker_dim) return 1; if( dim==_checker_dim) return 1;
@ -72,12 +73,20 @@ public:
// or by looping over x,y,z and multiply rather than computing checkerboard. // or by looping over x,y,z and multiply rather than computing checkerboard.
if ( (source_cb+ocb)&1 ) { if ( (source_cb+ocb)&1 ) {
return (shift)/2; return (shift)/2;
} else { } else {
return (shift+1)/2; return (shift+1)/2;
} }
} }
virtual int CheckerBoardFromOindexTable (int Oindex) {
return _checker_board[Oindex];
}
virtual int CheckerBoardFromOindex (int Oindex)
{
std::vector<int> ocoor;
oCoorFromOindex(ocoor,Oindex);
return CheckerBoard(ocoor);
}
virtual int CheckerBoardShift(int source_cb,int dim,int shift,int osite){ virtual int CheckerBoardShift(int source_cb,int dim,int shift,int osite){
if(dim != _checker_dim) return shift; if(dim != _checker_dim) return shift;
@ -169,7 +178,7 @@ public:
// all elements of a simd vector must have same checkerboard. // all elements of a simd vector must have same checkerboard.
// If Ls vectorised, this must still be the case; e.g. dwf rb5d // If Ls vectorised, this must still be the case; e.g. dwf rb5d
if ( _simd_layout[d]>1 ) { if ( _simd_layout[d]>1 ) {
if ( d != _checker_dim ) { if ( checker_dim_mask[d] ) {
assert( (_rdimensions[d]&0x1) == 0 ); assert( (_rdimensions[d]&0x1) == 0 );
} }
} }
@ -185,6 +194,8 @@ public:
_ostride[d] = _ostride[d-1]*_rdimensions[d-1]; _ostride[d] = _ostride[d-1]*_rdimensions[d-1];
_istride[d] = _istride[d-1]*_simd_layout[d-1]; _istride[d] = _istride[d-1]*_simd_layout[d-1];
} }
} }
//////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////
@ -205,6 +216,18 @@ public:
_slice_nblock[d]=nblock; _slice_nblock[d]=nblock;
block = block*_rdimensions[d]; block = block*_rdimensions[d];
} }
////////////////////////////////////////////////
// Create a checkerboard lookup table
////////////////////////////////////////////////
int rvol = 1;
for(int d=0;d<_ndimension;d++){
rvol=rvol * _rdimensions[d];
}
_checker_board.resize(rvol);
for(int osite=0;osite<_osites;osite++){
_checker_board[osite] = CheckerBoardFromOindex (osite);
}
}; };
protected: protected:

View File

@ -1,3 +1,4 @@
/************************************************************************************* /*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid Grid physics library, www.github.com/paboyle/Grid
@ -56,6 +57,7 @@ Gather_plane_simple (const Lattice<vobj> &rhs,std::vector<cobj,alignedAllocator<
int e1=rhs._grid->_slice_nblock[dimension]; int e1=rhs._grid->_slice_nblock[dimension];
int e2=rhs._grid->_slice_block[dimension]; int e2=rhs._grid->_slice_block[dimension];
int stride=rhs._grid->_slice_stride[dimension]; int stride=rhs._grid->_slice_stride[dimension];
if ( cbmask == 0x3 ) { if ( cbmask == 0x3 ) {
PARALLEL_NESTED_LOOP2 PARALLEL_NESTED_LOOP2
@ -68,15 +70,20 @@ PARALLEL_NESTED_LOOP2
} }
} else { } else {
int bo=0; int bo=0;
std::vector<std::pair<int,int> > table;
for(int n=0;n<e1;n++){ for(int n=0;n<e1;n++){
for(int b=0;b<e2;b++){ for(int b=0;b<e2;b++){
int o = n*stride; int o = n*stride;
int ocb=1<<rhs._grid->CheckerBoardFromOindex(o+b);// Could easily be a table lookup int ocb=1<<rhs._grid->CheckerBoardFromOindexTable(o+b);
if ( ocb &cbmask ) { if ( ocb &cbmask ) {
buffer[off+bo++]=compress(rhs._odata[so+o+b]); table.push_back(std::pair<int,int> (bo++,o+b));
} }
} }
} }
PARALLEL_FOR_LOOP
for(int i=0;i<table.size();i++){
buffer[off+table[i].first]=compress(rhs._odata[so+table[i].second]);
}
} }
} }

412
lib/fftw/fftw3.h Normal file
View File

@ -0,0 +1,412 @@
/*
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
*
* The following statement of license applies *only* to this header file,
* and *not* to the other files distributed with FFTW or derived therefrom:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/***************************** NOTE TO USERS *********************************
*
* THIS IS A HEADER FILE, NOT A MANUAL
*
* If you want to know how to use FFTW, please read the manual,
* online at http://www.fftw.org/doc/ and also included with FFTW.
* For a quick start, see the manual's tutorial section.
*
* (Reading header files to learn how to use a library is a habit
* stemming from code lacking a proper manual. Arguably, it's a
* *bad* habit in most cases, because header files can contain
* interfaces that are not part of the public, stable API.)
*
****************************************************************************/
#ifndef FFTW3_H
#define FFTW3_H
#include <stdio.h>
#ifdef __cplusplus
extern "C"
{
#endif /* __cplusplus */
/* If <complex.h> is included, use the C99 complex type. Otherwise
define a type bit-compatible with C99 complex */
#if !defined(FFTW_NO_Complex) && defined(_Complex_I) && defined(complex) && defined(I)
# define FFTW_DEFINE_COMPLEX(R, C) typedef R _Complex C
#else
# define FFTW_DEFINE_COMPLEX(R, C) typedef R C[2]
#endif
#define FFTW_CONCAT(prefix, name) prefix ## name
#define FFTW_MANGLE_DOUBLE(name) FFTW_CONCAT(fftw_, name)
#define FFTW_MANGLE_FLOAT(name) FFTW_CONCAT(fftwf_, name)
#define FFTW_MANGLE_LONG_DOUBLE(name) FFTW_CONCAT(fftwl_, name)
#define FFTW_MANGLE_QUAD(name) FFTW_CONCAT(fftwq_, name)
/* IMPORTANT: for Windows compilers, you should add a line
#define FFTW_DLL
here and in kernel/ifftw.h if you are compiling/using FFTW as a
DLL, in order to do the proper importing/exporting, or
alternatively compile with -DFFTW_DLL or the equivalent
command-line flag. This is not necessary under MinGW/Cygwin, where
libtool does the imports/exports automatically. */
#if defined(FFTW_DLL) && (defined(_WIN32) || defined(__WIN32__))
/* annoying Windows syntax for shared-library declarations */
# if defined(COMPILING_FFTW) /* defined in api.h when compiling FFTW */
# define FFTW_EXTERN extern __declspec(dllexport)
# else /* user is calling FFTW; import symbol */
# define FFTW_EXTERN extern __declspec(dllimport)
# endif
#else
# define FFTW_EXTERN extern
#endif
enum fftw_r2r_kind_do_not_use_me {
FFTW_R2HC=0, FFTW_HC2R=1, FFTW_DHT=2,
FFTW_REDFT00=3, FFTW_REDFT01=4, FFTW_REDFT10=5, FFTW_REDFT11=6,
FFTW_RODFT00=7, FFTW_RODFT01=8, FFTW_RODFT10=9, FFTW_RODFT11=10
};
struct fftw_iodim_do_not_use_me {
int n; /* dimension size */
int is; /* input stride */
int os; /* output stride */
};
#include <stddef.h> /* for ptrdiff_t */
struct fftw_iodim64_do_not_use_me {
ptrdiff_t n; /* dimension size */
ptrdiff_t is; /* input stride */
ptrdiff_t os; /* output stride */
};
typedef void (*fftw_write_char_func_do_not_use_me)(char c, void *);
typedef int (*fftw_read_char_func_do_not_use_me)(void *);
/*
huge second-order macro that defines prototypes for all API
functions. We expand this macro for each supported precision
X: name-mangling macro
R: real data type
C: complex data type
*/
#define FFTW_DEFINE_API(X, R, C) \
\
FFTW_DEFINE_COMPLEX(R, C); \
\
typedef struct X(plan_s) *X(plan); \
\
typedef struct fftw_iodim_do_not_use_me X(iodim); \
typedef struct fftw_iodim64_do_not_use_me X(iodim64); \
\
typedef enum fftw_r2r_kind_do_not_use_me X(r2r_kind); \
\
typedef fftw_write_char_func_do_not_use_me X(write_char_func); \
typedef fftw_read_char_func_do_not_use_me X(read_char_func); \
\
FFTW_EXTERN void X(execute)(const X(plan) p); \
\
FFTW_EXTERN X(plan) X(plan_dft)(int rank, const int *n, \
C *in, C *out, int sign, unsigned flags); \
\
FFTW_EXTERN X(plan) X(plan_dft_1d)(int n, C *in, C *out, int sign, \
unsigned flags); \
FFTW_EXTERN X(plan) X(plan_dft_2d)(int n0, int n1, \
C *in, C *out, int sign, unsigned flags); \
FFTW_EXTERN X(plan) X(plan_dft_3d)(int n0, int n1, int n2, \
C *in, C *out, int sign, unsigned flags); \
\
FFTW_EXTERN X(plan) X(plan_many_dft)(int rank, const int *n, \
int howmany, \
C *in, const int *inembed, \
int istride, int idist, \
C *out, const int *onembed, \
int ostride, int odist, \
int sign, unsigned flags); \
\
FFTW_EXTERN X(plan) X(plan_guru_dft)(int rank, const X(iodim) *dims, \
int howmany_rank, \
const X(iodim) *howmany_dims, \
C *in, C *out, \
int sign, unsigned flags); \
FFTW_EXTERN X(plan) X(plan_guru_split_dft)(int rank, const X(iodim) *dims, \
int howmany_rank, \
const X(iodim) *howmany_dims, \
R *ri, R *ii, R *ro, R *io, \
unsigned flags); \
\
FFTW_EXTERN X(plan) X(plan_guru64_dft)(int rank, \
const X(iodim64) *dims, \
int howmany_rank, \
const X(iodim64) *howmany_dims, \
C *in, C *out, \
int sign, unsigned flags); \
FFTW_EXTERN X(plan) X(plan_guru64_split_dft)(int rank, \
const X(iodim64) *dims, \
int howmany_rank, \
const X(iodim64) *howmany_dims, \
R *ri, R *ii, R *ro, R *io, \
unsigned flags); \
\
FFTW_EXTERN void X(execute_dft)(const X(plan) p, C *in, C *out); \
FFTW_EXTERN void X(execute_split_dft)(const X(plan) p, R *ri, R *ii, \
R *ro, R *io); \
\
FFTW_EXTERN X(plan) X(plan_many_dft_r2c)(int rank, const int *n, \
int howmany, \
R *in, const int *inembed, \
int istride, int idist, \
C *out, const int *onembed, \
int ostride, int odist, \
unsigned flags); \
\
FFTW_EXTERN X(plan) X(plan_dft_r2c)(int rank, const int *n, \
R *in, C *out, unsigned flags); \
\
FFTW_EXTERN X(plan) X(plan_dft_r2c_1d)(int n,R *in,C *out,unsigned flags); \
FFTW_EXTERN X(plan) X(plan_dft_r2c_2d)(int n0, int n1, \
R *in, C *out, unsigned flags); \
FFTW_EXTERN X(plan) X(plan_dft_r2c_3d)(int n0, int n1, \
int n2, \
R *in, C *out, unsigned flags); \
\
\
FFTW_EXTERN X(plan) X(plan_many_dft_c2r)(int rank, const int *n, \
int howmany, \
C *in, const int *inembed, \
int istride, int idist, \
R *out, const int *onembed, \
int ostride, int odist, \
unsigned flags); \
\
FFTW_EXTERN X(plan) X(plan_dft_c2r)(int rank, const int *n, \
C *in, R *out, unsigned flags); \
\
FFTW_EXTERN X(plan) X(plan_dft_c2r_1d)(int n,C *in,R *out,unsigned flags); \
FFTW_EXTERN X(plan) X(plan_dft_c2r_2d)(int n0, int n1, \
C *in, R *out, unsigned flags); \
FFTW_EXTERN X(plan) X(plan_dft_c2r_3d)(int n0, int n1, \
int n2, \
C *in, R *out, unsigned flags); \
\
FFTW_EXTERN X(plan) X(plan_guru_dft_r2c)(int rank, const X(iodim) *dims, \
int howmany_rank, \
const X(iodim) *howmany_dims, \
R *in, C *out, \
unsigned flags); \
FFTW_EXTERN X(plan) X(plan_guru_dft_c2r)(int rank, const X(iodim) *dims, \
int howmany_rank, \
const X(iodim) *howmany_dims, \
C *in, R *out, \
unsigned flags); \
\
FFTW_EXTERN X(plan) X(plan_guru_split_dft_r2c)( \
int rank, const X(iodim) *dims, \
int howmany_rank, \
const X(iodim) *howmany_dims, \
R *in, R *ro, R *io, \
unsigned flags); \
FFTW_EXTERN X(plan) X(plan_guru_split_dft_c2r)( \
int rank, const X(iodim) *dims, \
int howmany_rank, \
const X(iodim) *howmany_dims, \
R *ri, R *ii, R *out, \
unsigned flags); \
\
FFTW_EXTERN X(plan) X(plan_guru64_dft_r2c)(int rank, \
const X(iodim64) *dims, \
int howmany_rank, \
const X(iodim64) *howmany_dims, \
R *in, C *out, \
unsigned flags); \
FFTW_EXTERN X(plan) X(plan_guru64_dft_c2r)(int rank, \
const X(iodim64) *dims, \
int howmany_rank, \
const X(iodim64) *howmany_dims, \
C *in, R *out, \
unsigned flags); \
\
FFTW_EXTERN X(plan) X(plan_guru64_split_dft_r2c)( \
int rank, const X(iodim64) *dims, \
int howmany_rank, \
const X(iodim64) *howmany_dims, \
R *in, R *ro, R *io, \
unsigned flags); \
FFTW_EXTERN X(plan) X(plan_guru64_split_dft_c2r)( \
int rank, const X(iodim64) *dims, \
int howmany_rank, \
const X(iodim64) *howmany_dims, \
R *ri, R *ii, R *out, \
unsigned flags); \
\
FFTW_EXTERN void X(execute_dft_r2c)(const X(plan) p, R *in, C *out); \
FFTW_EXTERN void X(execute_dft_c2r)(const X(plan) p, C *in, R *out); \
\
FFTW_EXTERN void X(execute_split_dft_r2c)(const X(plan) p, \
R *in, R *ro, R *io); \
FFTW_EXTERN void X(execute_split_dft_c2r)(const X(plan) p, \
R *ri, R *ii, R *out); \
\
FFTW_EXTERN X(plan) X(plan_many_r2r)(int rank, const int *n, \
int howmany, \
R *in, const int *inembed, \
int istride, int idist, \
R *out, const int *onembed, \
int ostride, int odist, \
const X(r2r_kind) *kind, unsigned flags); \
\
FFTW_EXTERN X(plan) X(plan_r2r)(int rank, const int *n, R *in, R *out, \
const X(r2r_kind) *kind, unsigned flags); \
\
FFTW_EXTERN X(plan) X(plan_r2r_1d)(int n, R *in, R *out, \
X(r2r_kind) kind, unsigned flags); \
FFTW_EXTERN X(plan) X(plan_r2r_2d)(int n0, int n1, R *in, R *out, \
X(r2r_kind) kind0, X(r2r_kind) kind1, \
unsigned flags); \
FFTW_EXTERN X(plan) X(plan_r2r_3d)(int n0, int n1, int n2, \
R *in, R *out, X(r2r_kind) kind0, \
X(r2r_kind) kind1, X(r2r_kind) kind2, \
unsigned flags); \
\
FFTW_EXTERN X(plan) X(plan_guru_r2r)(int rank, const X(iodim) *dims, \
int howmany_rank, \
const X(iodim) *howmany_dims, \
R *in, R *out, \
const X(r2r_kind) *kind, unsigned flags); \
\
FFTW_EXTERN X(plan) X(plan_guru64_r2r)(int rank, const X(iodim64) *dims, \
int howmany_rank, \
const X(iodim64) *howmany_dims, \
R *in, R *out, \
const X(r2r_kind) *kind, unsigned flags); \
\
FFTW_EXTERN void X(execute_r2r)(const X(plan) p, R *in, R *out); \
\
FFTW_EXTERN void X(destroy_plan)(X(plan) p); \
FFTW_EXTERN void X(forget_wisdom)(void); \
FFTW_EXTERN void X(cleanup)(void); \
\
FFTW_EXTERN void X(set_timelimit)(double t); \
\
FFTW_EXTERN void X(plan_with_nthreads)(int nthreads); \
FFTW_EXTERN int X(init_threads)(void); \
FFTW_EXTERN void X(cleanup_threads)(void); \
\
FFTW_EXTERN int X(export_wisdom_to_filename)(const char *filename); \
FFTW_EXTERN void X(export_wisdom_to_file)(FILE *output_file); \
FFTW_EXTERN char *X(export_wisdom_to_string)(void); \
FFTW_EXTERN void X(export_wisdom)(X(write_char_func) write_char, \
void *data); \
FFTW_EXTERN int X(import_system_wisdom)(void); \
FFTW_EXTERN int X(import_wisdom_from_filename)(const char *filename); \
FFTW_EXTERN int X(import_wisdom_from_file)(FILE *input_file); \
FFTW_EXTERN int X(import_wisdom_from_string)(const char *input_string); \
FFTW_EXTERN int X(import_wisdom)(X(read_char_func) read_char, void *data); \
\
FFTW_EXTERN void X(fprint_plan)(const X(plan) p, FILE *output_file); \
FFTW_EXTERN void X(print_plan)(const X(plan) p); \
FFTW_EXTERN char *X(sprint_plan)(const X(plan) p); \
\
FFTW_EXTERN void *X(malloc)(size_t n); \
FFTW_EXTERN R *X(alloc_real)(size_t n); \
FFTW_EXTERN C *X(alloc_complex)(size_t n); \
FFTW_EXTERN void X(free)(void *p); \
\
FFTW_EXTERN void X(flops)(const X(plan) p, \
double *add, double *mul, double *fmas); \
FFTW_EXTERN double X(estimate_cost)(const X(plan) p); \
FFTW_EXTERN double X(cost)(const X(plan) p); \
\
FFTW_EXTERN int X(alignment_of)(R *p); \
FFTW_EXTERN const char X(version)[]; \
FFTW_EXTERN const char X(cc)[]; \
FFTW_EXTERN const char X(codelet_optim)[];
/* end of FFTW_DEFINE_API macro */
FFTW_DEFINE_API(FFTW_MANGLE_DOUBLE, double, fftw_complex)
FFTW_DEFINE_API(FFTW_MANGLE_FLOAT, float, fftwf_complex)
FFTW_DEFINE_API(FFTW_MANGLE_LONG_DOUBLE, long double, fftwl_complex)
/* __float128 (quad precision) is a gcc extension on i386, x86_64, and ia64
for gcc >= 4.6 (compiled in FFTW with --enable-quad-precision) */
#if (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)) \
&& !(defined(__ICC) || defined(__INTEL_COMPILER)) \
&& (defined(__i386__) || defined(__x86_64__) || defined(__ia64__))
# if !defined(FFTW_NO_Complex) && defined(_Complex_I) && defined(complex) && defined(I)
/* note: __float128 is a typedef, which is not supported with the _Complex
keyword in gcc, so instead we use this ugly __attribute__ version.
However, we can't simply pass the __attribute__ version to
FFTW_DEFINE_API because the __attribute__ confuses gcc in pointer
types. Hence redefining FFTW_DEFINE_COMPLEX. Ugh. */
# undef FFTW_DEFINE_COMPLEX
# define FFTW_DEFINE_COMPLEX(R, C) typedef _Complex float __attribute__((mode(TC))) C
# endif
FFTW_DEFINE_API(FFTW_MANGLE_QUAD, __float128, fftwq_complex)
#endif
#define FFTW_FORWARD (-1)
#define FFTW_BACKWARD (+1)
#define FFTW_NO_TIMELIMIT (-1.0)
/* documented flags */
#define FFTW_MEASURE (0U)
#define FFTW_DESTROY_INPUT (1U << 0)
#define FFTW_UNALIGNED (1U << 1)
#define FFTW_CONSERVE_MEMORY (1U << 2)
#define FFTW_EXHAUSTIVE (1U << 3) /* NO_EXHAUSTIVE is default */
#define FFTW_PRESERVE_INPUT (1U << 4) /* cancels FFTW_DESTROY_INPUT */
#define FFTW_PATIENT (1U << 5) /* IMPATIENT is default */
#define FFTW_ESTIMATE (1U << 6)
#define FFTW_WISDOM_ONLY (1U << 21)
/* undocumented beyond-guru flags */
#define FFTW_ESTIMATE_PATIENT (1U << 7)
#define FFTW_BELIEVE_PCOST (1U << 8)
#define FFTW_NO_DFT_R2HC (1U << 9)
#define FFTW_NO_NONTHREADED (1U << 10)
#define FFTW_NO_BUFFERING (1U << 11)
#define FFTW_NO_INDIRECT_OP (1U << 12)
#define FFTW_ALLOW_LARGE_GENERIC (1U << 13) /* NO_LARGE_GENERIC is default */
#define FFTW_NO_RANK_SPLITS (1U << 14)
#define FFTW_NO_VRANK_SPLITS (1U << 15)
#define FFTW_NO_VRECURSE (1U << 16)
#define FFTW_NO_SIMD (1U << 17)
#define FFTW_NO_SLOW (1U << 18)
#define FFTW_NO_FIXED_RADIX_LARGE_N (1U << 19)
#define FFTW_ALLOW_PRUNING (1U << 20)
#ifdef __cplusplus
} /* extern "C" */
#endif /* __cplusplus */
#endif /* FFTW3_H */

View File

@ -194,22 +194,22 @@ class BinaryIO {
std::vector<int> site({x,y,z,t}); std::vector<int> site({x,y,z,t});
if ( grid->IsBoss() ) { if (grid->IsBoss()) {
fin.read((char *)&file_object,sizeof(file_object)); fin.read((char *)&file_object, sizeof(file_object));
bytes += sizeof(file_object); bytes += sizeof(file_object);
if(ieee32big) be32toh_v((void *)&file_object,sizeof(file_object)); if (ieee32big) be32toh_v((void *)&file_object, sizeof(file_object));
if(ieee32) le32toh_v((void *)&file_object,sizeof(file_object)); if (ieee32) le32toh_v((void *)&file_object, sizeof(file_object));
if(ieee64big) be64toh_v((void *)&file_object,sizeof(file_object)); if (ieee64big) be64toh_v((void *)&file_object, sizeof(file_object));
if(ieee64) le64toh_v((void *)&file_object,sizeof(file_object)); if (ieee64) le64toh_v((void *)&file_object, sizeof(file_object));
munge(file_object,munged,csum); munge(file_object, munged, csum);
} }
// The boss who read the file has their value poked // The boss who read the file has their value poked
pokeSite(munged,Umu,site); pokeSite(munged,Umu,site);
}}}} }}}}
timer.Stop(); timer.Stop();
std::cout<<GridLogPerformance<<"readObjectSerial: read "<< bytes <<" bytes in "<<timer.Elapsed() <<" " std::cout<<GridLogPerformance<<"readObjectSerial: read "<< bytes <<" bytes in "<<timer.Elapsed() <<" "
<< (double)bytes/ (double)timer.useconds() <<" MB/s " <<std::endl; << (double)bytes/ (double)timer.useconds() <<" MB/s " <<std::endl;
return csum; return csum;
} }
@ -254,20 +254,20 @@ class BinaryIO {
if ( grid->IsBoss() ) { if ( grid->IsBoss() ) {
if(ieee32big) htobe32_v((void *)&file_object,sizeof(file_object)); if(ieee32big) htobe32_v((void *)&file_object,sizeof(file_object));
if(ieee32) htole32_v((void *)&file_object,sizeof(file_object)); if(ieee32) htole32_v((void *)&file_object,sizeof(file_object));
if(ieee64big) htobe64_v((void *)&file_object,sizeof(file_object)); if(ieee64big) htobe64_v((void *)&file_object,sizeof(file_object));
if(ieee64) htole64_v((void *)&file_object,sizeof(file_object)); if(ieee64) htole64_v((void *)&file_object,sizeof(file_object));
// NB could gather an xstrip as an optimisation. // NB could gather an xstrip as an optimisation.
fout.write((char *)&file_object,sizeof(file_object)); fout.write((char *)&file_object,sizeof(file_object));
bytes+=sizeof(file_object); bytes+=sizeof(file_object);
} }
}}}} }}}}
timer.Stop(); timer.Stop();
std::cout<<GridLogPerformance<<"writeObjectSerial: wrote "<< bytes <<" bytes in "<<timer.Elapsed() <<" " std::cout<<GridLogPerformance<<"writeObjectSerial: wrote "<< bytes <<" bytes in "<<timer.Elapsed() <<" "
<< (double)bytes/timer.useconds() <<" MB/s " <<std::endl; << (double)bytes/timer.useconds() <<" MB/s " <<std::endl;
return csum; return csum;
} }
@ -305,15 +305,15 @@ class BinaryIO {
int l_idx=parallel.generator_idx(o_idx,i_idx); int l_idx=parallel.generator_idx(o_idx,i_idx);
if( rank == grid->ThisRank() ){ if( rank == grid->ThisRank() ){
// std::cout << "rank" << rank<<" Getting state for index "<<l_idx<<std::endl; // std::cout << "rank" << rank<<" Getting state for index "<<l_idx<<std::endl;
parallel.GetState(saved,l_idx); parallel.GetState(saved,l_idx);
} }
grid->Broadcast(rank,(void *)&saved[0],bytes); grid->Broadcast(rank,(void *)&saved[0],bytes);
if ( grid->IsBoss() ) { if ( grid->IsBoss() ) {
Uint32Checksum((uint32_t *)&saved[0],bytes,csum); Uint32Checksum((uint32_t *)&saved[0],bytes,csum);
fout.write((char *)&saved[0],bytes); fout.write((char *)&saved[0],bytes);
} }
} }
@ -355,14 +355,14 @@ class BinaryIO {
int l_idx=parallel.generator_idx(o_idx,i_idx); int l_idx=parallel.generator_idx(o_idx,i_idx);
if ( grid->IsBoss() ) { if ( grid->IsBoss() ) {
fin.read((char *)&saved[0],bytes); fin.read((char *)&saved[0],bytes);
Uint32Checksum((uint32_t *)&saved[0],bytes,csum); Uint32Checksum((uint32_t *)&saved[0],bytes,csum);
} }
grid->Broadcast(0,(void *)&saved[0],bytes); grid->Broadcast(0,(void *)&saved[0],bytes);
if( rank == grid->ThisRank() ){ if( rank == grid->ThisRank() ){
parallel.SetState(saved,l_idx); parallel.SetState(saved,l_idx);
} }
} }
@ -415,15 +415,15 @@ class BinaryIO {
if ( d == 0 ) parallel[d] = 0; if ( d == 0 ) parallel[d] = 0;
if (parallel[d]) { if (parallel[d]) {
range[d] = grid->_ldimensions[d]; range[d] = grid->_ldimensions[d];
start[d] = grid->_processor_coor[d]*range[d]; start[d] = grid->_processor_coor[d]*range[d];
ioproc[d]= grid->_processor_coor[d]; ioproc[d]= grid->_processor_coor[d];
} else { } else {
range[d] = grid->_gdimensions[d]; range[d] = grid->_gdimensions[d];
start[d] = 0; start[d] = 0;
ioproc[d]= 0; ioproc[d]= 0;
if ( grid->_processor_coor[d] != 0 ) IOnode = 0; if ( grid->_processor_coor[d] != 0 ) IOnode = 0;
} }
slice_vol = slice_vol * range[d]; slice_vol = slice_vol * range[d];
} }
@ -434,9 +434,9 @@ class BinaryIO {
std::cout<< std::dec ; std::cout<< std::dec ;
std::cout<< GridLogMessage<< "Parallel read I/O to "<< file << " with " <<tmp<< " IOnodes for subslice "; std::cout<< GridLogMessage<< "Parallel read I/O to "<< file << " with " <<tmp<< " IOnodes for subslice ";
for(int d=0;d<grid->_ndimension;d++){ for(int d=0;d<grid->_ndimension;d++){
std::cout<< range[d]; std::cout<< range[d];
if( d< grid->_ndimension-1 ) if( d< grid->_ndimension-1 )
std::cout<< " x "; std::cout<< " x ";
} }
std::cout << std::endl; std::cout << std::endl;
} }
@ -463,7 +463,7 @@ class BinaryIO {
// need to implement these loops in Nd independent way with a lexico conversion // need to implement these loops in Nd independent way with a lexico conversion
for(int tlex=0;tlex<slice_vol;tlex++){ for(int tlex=0;tlex<slice_vol;tlex++){
std::vector<int> tsite(nd); // temporary mixed up site std::vector<int> tsite(nd); // temporary mixed up site
std::vector<int> gsite(nd); std::vector<int> gsite(nd);
std::vector<int> lsite(nd); std::vector<int> lsite(nd);
@ -472,8 +472,8 @@ class BinaryIO {
Lexicographic::CoorFromIndex(tsite,tlex,range); Lexicographic::CoorFromIndex(tsite,tlex,range);
for(int d=0;d<nd;d++){ for(int d=0;d<nd;d++){
lsite[d] = tsite[d]%grid->_ldimensions[d]; // local site lsite[d] = tsite[d]%grid->_ldimensions[d]; // local site
gsite[d] = tsite[d]+start[d]; // global site gsite[d] = tsite[d]+start[d]; // global site
} }
///////////////////////// /////////////////////////
@ -487,29 +487,29 @@ class BinaryIO {
// iorank reads from the seek // iorank reads from the seek
//////////////////////////////// ////////////////////////////////
if (myrank == iorank) { if (myrank == iorank) {
fin.seekg(offset+g_idx*sizeof(fileObj)); fin.seekg(offset+g_idx*sizeof(fileObj));
fin.read((char *)&fileObj,sizeof(fileObj)); fin.read((char *)&fileObj,sizeof(fileObj));
bytes+=sizeof(fileObj); bytes+=sizeof(fileObj);
if(ieee32big) be32toh_v((void *)&fileObj,sizeof(fileObj)); if(ieee32big) be32toh_v((void *)&fileObj,sizeof(fileObj));
if(ieee32) le32toh_v((void *)&fileObj,sizeof(fileObj)); if(ieee32) le32toh_v((void *)&fileObj,sizeof(fileObj));
if(ieee64big) be64toh_v((void *)&fileObj,sizeof(fileObj)); if(ieee64big) be64toh_v((void *)&fileObj,sizeof(fileObj));
if(ieee64) le64toh_v((void *)&fileObj,sizeof(fileObj)); if(ieee64) le64toh_v((void *)&fileObj,sizeof(fileObj));
munge(fileObj,siteObj,csum); munge(fileObj,siteObj,csum);
} }
// Possibly do transport through pt2pt // Possibly do transport through pt2pt
if ( rank != iorank ) { if ( rank != iorank ) {
if ( (myrank == rank) || (myrank==iorank) ) { if ( (myrank == rank) || (myrank==iorank) ) {
grid->SendRecvPacket((void *)&siteObj,(void *)&siteObj,iorank,rank,sizeof(siteObj)); grid->SendRecvPacket((void *)&siteObj,(void *)&siteObj,iorank,rank,sizeof(siteObj));
} }
} }
// Poke at destination // Poke at destination
if ( myrank == rank ) { if ( myrank == rank ) {
pokeLocalSite(siteObj,Umu,lsite); pokeLocalSite(siteObj,Umu,lsite);
} }
grid->Barrier(); // necessary? grid->Barrier(); // necessary?
} }
@ -520,7 +520,7 @@ class BinaryIO {
timer.Stop(); timer.Stop();
std::cout<<GridLogPerformance<<"readObjectParallel: read "<< bytes <<" bytes in "<<timer.Elapsed() <<" " std::cout<<GridLogPerformance<<"readObjectParallel: read "<< bytes <<" bytes in "<<timer.Elapsed() <<" "
<< (double)bytes/timer.useconds() <<" MB/s " <<std::endl; << (double)bytes/timer.useconds() <<" MB/s " <<std::endl;
return csum; return csum;
} }
@ -558,15 +558,15 @@ class BinaryIO {
if ( d!= grid->_ndimension-1 ) parallel[d] = 0; if ( d!= grid->_ndimension-1 ) parallel[d] = 0;
if (parallel[d]) { if (parallel[d]) {
range[d] = grid->_ldimensions[d]; range[d] = grid->_ldimensions[d];
start[d] = grid->_processor_coor[d]*range[d]; start[d] = grid->_processor_coor[d]*range[d];
ioproc[d]= grid->_processor_coor[d]; ioproc[d]= grid->_processor_coor[d];
} else { } else {
range[d] = grid->_gdimensions[d]; range[d] = grid->_gdimensions[d];
start[d] = 0; start[d] = 0;
ioproc[d]= 0; ioproc[d]= 0;
if ( grid->_processor_coor[d] != 0 ) IOnode = 0; if ( grid->_processor_coor[d] != 0 ) IOnode = 0;
} }
slice_vol = slice_vol * range[d]; slice_vol = slice_vol * range[d];
@ -577,9 +577,9 @@ class BinaryIO {
grid->GlobalSum(tmp); grid->GlobalSum(tmp);
std::cout<< GridLogMessage<< "Parallel write I/O from "<< file << " with " <<tmp<< " IOnodes for subslice "; std::cout<< GridLogMessage<< "Parallel write I/O from "<< file << " with " <<tmp<< " IOnodes for subslice ";
for(int d=0;d<grid->_ndimension;d++){ for(int d=0;d<grid->_ndimension;d++){
std::cout<< range[d]; std::cout<< range[d];
if( d< grid->_ndimension-1 ) if( d< grid->_ndimension-1 )
std::cout<< " x "; std::cout<< " x ";
} }
std::cout << std::endl; std::cout << std::endl;
} }
@ -610,7 +610,7 @@ class BinaryIO {
// should aggregate a whole chunk and then write. // should aggregate a whole chunk and then write.
// need to implement these loops in Nd independent way with a lexico conversion // need to implement these loops in Nd independent way with a lexico conversion
for(int tlex=0;tlex<slice_vol;tlex++){ for(int tlex=0;tlex<slice_vol;tlex++){
std::vector<int> tsite(nd); // temporary mixed up site std::vector<int> tsite(nd); // temporary mixed up site
std::vector<int> gsite(nd); std::vector<int> gsite(nd);
std::vector<int> lsite(nd); std::vector<int> lsite(nd);
@ -619,8 +619,8 @@ class BinaryIO {
Lexicographic::CoorFromIndex(tsite,tlex,range); Lexicographic::CoorFromIndex(tsite,tlex,range);
for(int d=0;d<nd;d++){ for(int d=0;d<nd;d++){
lsite[d] = tsite[d]%grid->_ldimensions[d]; // local site lsite[d] = tsite[d]%grid->_ldimensions[d]; // local site
gsite[d] = tsite[d]+start[d]; // global site gsite[d] = tsite[d]+start[d]; // global site
} }
@ -640,26 +640,26 @@ class BinaryIO {
// Pair of nodes may need to do pt2pt send // Pair of nodes may need to do pt2pt send
if ( rank != iorank ) { // comms is necessary if ( rank != iorank ) { // comms is necessary
if ( (myrank == rank) || (myrank==iorank) ) { // and we have to do it if ( (myrank == rank) || (myrank==iorank) ) { // and we have to do it
// Send to IOrank // Send to IOrank
grid->SendRecvPacket((void *)&siteObj,(void *)&siteObj,rank,iorank,sizeof(siteObj)); grid->SendRecvPacket((void *)&siteObj,(void *)&siteObj,rank,iorank,sizeof(siteObj));
} }
} }
grid->Barrier(); // necessary? grid->Barrier(); // necessary?
if (myrank == iorank) { if (myrank == iorank) {
munge(siteObj,fileObj,csum); munge(siteObj,fileObj,csum);
if(ieee32big) htobe32_v((void *)&fileObj,sizeof(fileObj)); if(ieee32big) htobe32_v((void *)&fileObj,sizeof(fileObj));
if(ieee32) htole32_v((void *)&fileObj,sizeof(fileObj)); if(ieee32) htole32_v((void *)&fileObj,sizeof(fileObj));
if(ieee64big) htobe64_v((void *)&fileObj,sizeof(fileObj)); if(ieee64big) htobe64_v((void *)&fileObj,sizeof(fileObj));
if(ieee64) htole64_v((void *)&fileObj,sizeof(fileObj)); if(ieee64) htole64_v((void *)&fileObj,sizeof(fileObj));
fout.seekp(offset+g_idx*sizeof(fileObj)); fout.seekp(offset+g_idx*sizeof(fileObj));
fout.write((char *)&fileObj,sizeof(fileObj)); fout.write((char *)&fileObj,sizeof(fileObj));
bytes+=sizeof(fileObj); bytes+=sizeof(fileObj);
} }
} }
@ -668,7 +668,7 @@ class BinaryIO {
timer.Stop(); timer.Stop();
std::cout<<GridLogPerformance<<"writeObjectParallel: wrote "<< bytes <<" bytes in "<<timer.Elapsed() <<" " std::cout<<GridLogPerformance<<"writeObjectParallel: wrote "<< bytes <<" bytes in "<<timer.Elapsed() <<" "
<< (double)bytes/timer.useconds() <<" MB/s " <<std::endl; << (double)bytes/timer.useconds() <<" MB/s " <<std::endl;
return csum; return csum;
} }

View File

@ -55,11 +55,14 @@ namespace QCD {
////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////
// QCD iMatrix types // QCD iMatrix types
// Index conventions: Lorentz x Spin x Colour // Index conventions: Lorentz x Spin x Colour
// note: static const int or constexpr will work for type deductions
// with the intel compiler (up to version 17)
////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////
static const int ColourIndex = 2; #define ColourIndex 2
static const int SpinIndex = 1; #define SpinIndex 1
static const int LorentzIndex= 0; #define LorentzIndex 0
// Also should make these a named enum type // Also should make these a named enum type
static const int DaggerNo=0; static const int DaggerNo=0;
static const int DaggerYes=1; static const int DaggerYes=1;
@ -490,16 +493,27 @@ namespace QCD {
} //namespace QCD } //namespace QCD
} // Grid } // Grid
#include <Grid/qcd/utils/SpaceTimeGrid.h> #include <Grid/qcd/utils/SpaceTimeGrid.h>
#include <Grid/qcd/spin/Dirac.h> #include <Grid/qcd/spin/Dirac.h>
#include <Grid/qcd/spin/TwoSpinor.h> #include <Grid/qcd/spin/TwoSpinor.h>
#include <Grid/qcd/utils/LinalgUtils.h> #include <Grid/qcd/utils/LinalgUtils.h>
#include <Grid/qcd/utils/CovariantCshift.h> #include <Grid/qcd/utils/CovariantCshift.h>
// Include representations
#include <Grid/qcd/utils/SUn.h> #include <Grid/qcd/utils/SUn.h>
#include <Grid/qcd/utils/SUnAdjoint.h>
#include <Grid/qcd/utils/SUnTwoIndex.h>
#include <Grid/qcd/representations/hmc_types.h>
#include <Grid/qcd/action/Actions.h> #include <Grid/qcd/action/Actions.h>
#include <Grid/qcd/smearing/Smearing.h>
#include <Grid/qcd/hmc/integrators/Integrator.h> #include <Grid/qcd/hmc/integrators/Integrator.h>
#include <Grid/qcd/hmc/integrators/Integrator_algorithm.h> #include <Grid/qcd/hmc/integrators/Integrator_algorithm.h>
#include <Grid/qcd/hmc/HMC.h> #include <Grid/qcd/hmc/HMC.h>
#include <Grid/qcd/smearing/Smearing.h>
#endif #endif

View File

@ -1,87 +1,153 @@
/************************************************************************************* /*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/ActionBase.h Source file: ./lib/qcd/action/ActionBase.h
Copyright (C) 2015 Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk> Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: neo <cossu@post.kek.jp> Author: neo <cossu@post.kek.jp>
This program is free software; you can redistribute it and/or modify This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or the Free Software Foundation; either version 2 of the License, or
(at your option) any later version. (at your option) any later version.
This program is distributed in the hope that it will be useful, This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details. GNU General Public License for more details.
You should have received a copy of the GNU General Public License along You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc., with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory See the full license in the file "LICENSE" in the top level distribution
*************************************************************************************/ directory
/* END LEGAL */ *************************************************************************************/
/* END LEGAL */
#ifndef QCD_ACTION_BASE #ifndef QCD_ACTION_BASE
#define QCD_ACTION_BASE #define QCD_ACTION_BASE
namespace Grid { namespace Grid {
namespace QCD{ namespace QCD {
template<class GaugeField>
class Action {
template <class GaugeField>
class Action {
public: public:
bool is_smeared = false; bool is_smeared = false;
// Boundary conditions? // Heatbath? // Boundary conditions? // Heatbath?
virtual void refresh(const GaugeField &U, GridParallelRNG& pRNG) = 0;// refresh pseudofermions virtual void refresh(const GaugeField& U,
virtual RealD S (const GaugeField &U) = 0; // evaluate the action GridParallelRNG& pRNG) = 0; // refresh pseudofermions
virtual void deriv(const GaugeField &U,GaugeField & dSdU ) = 0; // evaluate the action derivative virtual RealD S(const GaugeField& U) = 0; // evaluate the action
virtual ~Action() {}; virtual void deriv(const GaugeField& U,
GaugeField& dSdU) = 0; // evaluate the action derivative
virtual ~Action(){};
};
// Indexing of tuple types
template <class T, class Tuple>
struct Index;
template <class T, class... Types>
struct Index<T, std::tuple<T, Types...>> {
static const std::size_t value = 0;
};
template <class T, class U, class... Types>
struct Index<T, std::tuple<U, Types...>> {
static const std::size_t value = 1 + Index<T, std::tuple<Types...>>::value;
}; };
// Could derive PseudoFermion action with a PF field, FermionField, and a Grid; implement refresh
/* /*
template<class GaugeField, class FermionField> template <class GaugeField>
class PseudoFermionAction : public Action<GaugeField> { struct ActionLevel {
public: public:
FermionField Phi; typedef Action<GaugeField>*
GridParallelRNG &pRNG; ActPtr; // now force the same colours as the rest of the code
GridBase &Grid;
PseudoFermionAction(GridBase &_Grid,GridParallelRNG &_pRNG) : Grid(_Grid), Phi(&_Grid), pRNG(_pRNG) { //Add supported representations here
};
virtual void refresh(const GaugeField &gauge) {
gaussian(Phi,pRNG);
};
}; unsigned int multiplier;
*/
template<class GaugeField> struct ActionLevel{
public:
typedef Action<GaugeField>* ActPtr; // now force the same colours as the rest of the code
int multiplier;
std::vector<ActPtr> actions; std::vector<ActPtr> actions;
ActionLevel(int mul = 1) : multiplier(mul) { ActionLevel(unsigned int mul = 1) : actions(0), multiplier(mul) {
assert (mul > 0); assert(mul >= 1);
}; };
void push_back(ActPtr ptr){ void push_back(ActPtr ptr) { actions.push_back(ptr); }
actions.push_back(ptr); };
*/
template <class GaugeField, class Repr = NoHirep >
struct ActionLevel {
public:
unsigned int multiplier;
// Fundamental repr actions separated because of the smearing
typedef Action<GaugeField>* ActPtr;
// construct a tuple of vectors of the actions for the corresponding higher
// representation fields
typedef typename AccessTypes<Action, Repr>::VectorCollection action_collection;
action_collection actions_hirep;
typedef typename AccessTypes<Action, Repr>::FieldTypeCollection action_hirep_types;
std::vector<ActPtr>& actions;
// Temporary conversion between ActionLevel and ActionLevelHirep
//ActionLevelHirep(ActionLevel<GaugeField>& AL ):actions(AL.actions), multiplier(AL.multiplier){}
ActionLevel(unsigned int mul = 1) : actions(std::get<0>(actions_hirep)), multiplier(mul) {
// initialize the hirep vectors to zero.
//apply(this->resize, actions_hirep, 0); //need a working resize
assert(mul >= 1);
};
//void push_back(ActPtr ptr) { actions.push_back(ptr); }
template < class Field >
void push_back(Action<Field>* ptr) {
// insert only in the correct vector
std::get< Index < Field, action_hirep_types>::value >(actions_hirep).push_back(ptr);
};
template < class ActPtr>
static void resize(ActPtr ap, unsigned int n){
ap->resize(n);
} }
//template <std::size_t I>
//auto getRepresentation(Repr& R)->decltype(std::get<I>(R).U) {return std::get<I>(R).U;}
// Loop on tuple for a callable function
template <std::size_t I = 1, typename Callable, typename ...Args>
inline typename std::enable_if<I == std::tuple_size<action_collection>::value, void>::type apply(
Callable, Repr& R,Args&...) const {}
template <std::size_t I = 1, typename Callable, typename ...Args>
inline typename std::enable_if<I < std::tuple_size<action_collection>::value, void>::type apply(
Callable fn, Repr& R, Args&... arguments) const {
fn(std::get<I>(actions_hirep), std::get<I>(R.rep), arguments...);
apply<I + 1>(fn, R, arguments...);
}
}; };
template<class GaugeField> using ActionSet = std::vector<ActionLevel< GaugeField > >;
//template <class GaugeField>
//using ActionSet = std::vector<ActionLevel<GaugeField> >;
}} template <class GaugeField, class R>
using ActionSet = std::vector<ActionLevel<GaugeField, R> >;
}
}
#endif #endif

View File

@ -116,6 +116,14 @@ typedef SymanzikGaugeAction<ConjugateGimplD> ConjugateSymanzikGaugeAction
template class A<GparityWilsonImplF>; \ template class A<GparityWilsonImplF>; \
template class A<GparityWilsonImplD>; template class A<GparityWilsonImplD>;
#define AdjointFermOpTemplateInstantiate(A) \
template class A<WilsonAdjImplF>; \
template class A<WilsonAdjImplD>;
#define TwoIndexFermOpTemplateInstantiate(A) \
template class A<WilsonTwoIndexSymmetricImplF>; \
template class A<WilsonTwoIndexSymmetricImplD>;
#define FermOp5dVecTemplateInstantiate(A) \ #define FermOp5dVecTemplateInstantiate(A) \
template class A<DomainWallVec5dImplF>; \ template class A<DomainWallVec5dImplF>; \
template class A<DomainWallVec5dImplD>; \ template class A<DomainWallVec5dImplD>; \
@ -126,6 +134,7 @@ typedef SymanzikGaugeAction<ConjugateGimplD> ConjugateSymanzikGaugeAction
FermOp4dVecTemplateInstantiate(A) \ FermOp4dVecTemplateInstantiate(A) \
FermOp5dVecTemplateInstantiate(A) FermOp5dVecTemplateInstantiate(A)
#define GparityFermOpTemplateInstantiate(A) #define GparityFermOpTemplateInstantiate(A)
//////////////////////////////////////////// ////////////////////////////////////////////
@ -171,6 +180,14 @@ typedef WilsonFermion<WilsonImplR> WilsonFermionR;
typedef WilsonFermion<WilsonImplF> WilsonFermionF; typedef WilsonFermion<WilsonImplF> WilsonFermionF;
typedef WilsonFermion<WilsonImplD> WilsonFermionD; typedef WilsonFermion<WilsonImplD> WilsonFermionD;
typedef WilsonFermion<WilsonAdjImplR> WilsonAdjFermionR;
typedef WilsonFermion<WilsonAdjImplF> WilsonAdjFermionF;
typedef WilsonFermion<WilsonAdjImplD> WilsonAdjFermionD;
typedef WilsonFermion<WilsonTwoIndexSymmetricImplR> WilsonTwoIndexSymmetricFermionR;
typedef WilsonFermion<WilsonTwoIndexSymmetricImplF> WilsonTwoIndexSymmetricFermionF;
typedef WilsonFermion<WilsonTwoIndexSymmetricImplD> WilsonTwoIndexSymmetricFermionD;
typedef WilsonTMFermion<WilsonImplR> WilsonTMFermionR; typedef WilsonTMFermion<WilsonImplR> WilsonTMFermionR;
typedef WilsonTMFermion<WilsonImplF> WilsonTMFermionF; typedef WilsonTMFermion<WilsonImplF> WilsonTMFermionF;
typedef WilsonTMFermion<WilsonImplD> WilsonTMFermionD; typedef WilsonTMFermion<WilsonImplD> WilsonTMFermionD;

View File

@ -1,35 +1,36 @@
/************************************************************************************* /*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/FermionOperatorImpl.h Source file: ./lib/qcd/action/fermion/FermionOperatorImpl.h
Copyright (C) 2015 Copyright (C) 2015
Author: Peter Boyle <pabobyle@ph.ed.ac.uk> Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk> Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local> Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk> Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or the Free Software Foundation; either version 2 of the License, or
(at your option) any later version. (at your option) any later version.
This program is distributed in the hope that it will be useful, This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details. GNU General Public License for more details.
You should have received a copy of the GNU General Public License along You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc., with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory See the full license in the file "LICENSE" in the top level distribution
*************************************************************************************/ directory
/* END LEGAL */ *************************************************************************************/
#ifndef GRID_QCD_FERMION_OPERATOR_IMPL_H /* END LEGAL */
#define GRID_QCD_FERMION_OPERATOR_IMPL_H #ifndef GRID_QCD_FERMION_OPERATOR_IMPL_H
#define GRID_QCD_FERMION_OPERATOR_IMPL_H
namespace Grid { namespace Grid {
@ -99,258 +100,281 @@ namespace Grid {
typedef typename Impl::SiteSpinor SiteSpinor; \ typedef typename Impl::SiteSpinor SiteSpinor; \
typedef typename Impl::SiteHalfSpinor SiteHalfSpinor; \ typedef typename Impl::SiteHalfSpinor SiteHalfSpinor; \
typedef typename Impl::Compressor Compressor; \ typedef typename Impl::Compressor Compressor; \
typedef typename Impl::StencilImpl StencilImpl; \ typedef typename Impl::StencilImpl StencilImpl; \
typedef typename Impl::ImplParams ImplParams; \ typedef typename Impl::ImplParams ImplParams; \
typedef typename Impl::Coeff_t Coeff_t; typedef typename Impl::Coeff_t Coeff_t;
#define INHERIT_IMPL_TYPES(Base) \ #define INHERIT_IMPL_TYPES(Base) \
INHERIT_GIMPL_TYPES(Base)\ INHERIT_GIMPL_TYPES(Base) \
INHERIT_FIMPL_TYPES(Base) INHERIT_FIMPL_TYPES(Base)
/////// ///////
// Single flavour four spinors with colour index // Single flavour four spinors with colour index
/////// ///////
template<class S,int Nrepresentation=Nc,class _Coeff_t = RealD> template <class S, class Representation = FundamentalRepresentation,class _Coeff_t = RealD >
class WilsonImpl : public PeriodicGaugeImpl< GaugeImplTypes< S, Nrepresentation> > { class WilsonImpl
: public PeriodicGaugeImpl<GaugeImplTypes<S, Representation::Dimension > > {
public: public:
static const int Dimension = Representation::Dimension;
typedef PeriodicGaugeImpl<GaugeImplTypes<S, Dimension > > Gimpl;
//Necessary?
constexpr bool is_fundamental() const{return Dimension == Nc ? 1 : 0;}
const bool LsVectorised=false; const bool LsVectorised=false;
typedef _Coeff_t Coeff_t; typedef _Coeff_t Coeff_t;
typedef PeriodicGaugeImpl< GaugeImplTypes< S,Nrepresentation> > Gimpl;
INHERIT_GIMPL_TYPES(Gimpl); INHERIT_GIMPL_TYPES(Gimpl);
template<typename vtype> using iImplSpinor = iScalar<iVector<iVector<vtype, Nrepresentation>, Ns> >; template <typename vtype> using iImplSpinor = iScalar<iVector<iVector<vtype, Dimension>, Ns> >;
template<typename vtype> using iImplHalfSpinor = iScalar<iVector<iVector<vtype, Nrepresentation>, Nhs> >; template <typename vtype> using iImplHalfSpinor = iScalar<iVector<iVector<vtype, Dimension>, Nhs> >;
template<typename vtype> using iImplDoubledGaugeField = iVector<iScalar<iMatrix<vtype, Nrepresentation> >, Nds >; template <typename vtype> using iImplDoubledGaugeField = iVector<iScalar<iMatrix<vtype, Dimension> >, Nds>;
typedef iImplSpinor <Simd> SiteSpinor; typedef iImplSpinor<Simd> SiteSpinor;
typedef iImplHalfSpinor<Simd> SiteHalfSpinor; typedef iImplHalfSpinor<Simd> SiteHalfSpinor;
typedef iImplDoubledGaugeField<Simd> SiteDoubledGaugeField; typedef iImplDoubledGaugeField<Simd> SiteDoubledGaugeField;
typedef Lattice<SiteSpinor> FermionField; typedef Lattice<SiteSpinor> FermionField;
typedef Lattice<SiteDoubledGaugeField> DoubledGaugeField; typedef Lattice<SiteDoubledGaugeField> DoubledGaugeField;
typedef WilsonCompressor<SiteHalfSpinor,SiteSpinor> Compressor; typedef WilsonCompressor<SiteHalfSpinor, SiteSpinor> Compressor;
typedef WilsonImplParams ImplParams; typedef WilsonImplParams ImplParams;
typedef WilsonStencil<SiteSpinor,SiteHalfSpinor> StencilImpl; typedef WilsonStencil<SiteSpinor, SiteHalfSpinor> StencilImpl;
ImplParams Params; ImplParams Params;
WilsonImpl(const ImplParams &p= ImplParams()) : Params(p) {}; WilsonImpl(const ImplParams &p = ImplParams()) : Params(p){};
bool overlapCommsCompute(void) { return Params.overlapCommsCompute; }; bool overlapCommsCompute(void) { return Params.overlapCommsCompute; };
inline void multLink(SiteHalfSpinor &phi,const SiteDoubledGaugeField &U,const SiteHalfSpinor &chi,int mu,StencilEntry *SE,StencilImpl &St){ inline void multLink(SiteHalfSpinor &phi,
mult(&phi(),&U(mu),&chi()); const SiteDoubledGaugeField &U,
} const SiteHalfSpinor &chi,
int mu,
template<class ref> StencilEntry *SE,
inline void loadLinkElement(Simd & reg,ref &memory){ StencilImpl &St) {
reg = memory; mult(&phi(), &U(mu), &chi());
}
inline void DoubleStore(GridBase *GaugeGrid,DoubledGaugeField &Uds,const GaugeField &Umu)
{
conformable(Uds._grid,GaugeGrid);
conformable(Umu._grid,GaugeGrid);
GaugeLinkField U(GaugeGrid);
for(int mu=0;mu<Nd;mu++){
U = PeekIndex<LorentzIndex>(Umu,mu);
PokeIndex<LorentzIndex>(Uds,U,mu);
U = adj(Cshift(U,mu,-1));
PokeIndex<LorentzIndex>(Uds,U,mu+4);
}
} }
template <class ref>
inline void loadLinkElement(Simd &reg,
ref &memory) {
reg = memory;
}
inline void DoubleStore(GridBase *GaugeGrid,
DoubledGaugeField &Uds,
const GaugeField &Umu) {
conformable(Uds._grid, GaugeGrid);
conformable(Umu._grid, GaugeGrid);
GaugeLinkField U(GaugeGrid);
for (int mu = 0; mu < Nd; mu++) {
U = PeekIndex<LorentzIndex>(Umu, mu);
PokeIndex<LorentzIndex>(Uds, U, mu);
U = adj(Cshift(U, mu, -1));
PokeIndex<LorentzIndex>(Uds, U, mu + 4);
}
}
inline void InsertForce4D(GaugeField &mat, FermionField &Btilde, FermionField &A,int mu){ inline void InsertForce4D(GaugeField &mat, FermionField &Btilde, FermionField &A,int mu){
GaugeLinkField link(mat._grid); GaugeLinkField link(mat._grid);
link = TraceIndex<SpinIndex>(outerProduct(Btilde,A)); link = TraceIndex<SpinIndex>(outerProduct(Btilde,A));
PokeIndex<LorentzIndex>(mat,link,mu); PokeIndex<LorentzIndex>(mat,link,mu);
} }
inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField &Atilde,int mu){ inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField &Atilde,int mu){
int Ls=Btilde._grid->_fdimensions[0]; int Ls=Btilde._grid->_fdimensions[0];
GaugeLinkField tmp(mat._grid); GaugeLinkField tmp(mat._grid);
tmp = zero; tmp = zero;
PARALLEL_FOR_LOOP
for(int sss=0;sss<tmp._grid->oSites();sss++){ PARALLEL_FOR_LOOP
int sU=sss; for(int sss=0;sss<tmp._grid->oSites();sss++){
for(int s=0;s<Ls;s++){ int sU=sss;
int sF = s+Ls*sU; for(int s=0;s<Ls;s++){
tmp[sU] = tmp[sU]+ traceIndex<SpinIndex>(outerProduct(Btilde[sF],Atilde[sF])); // ordering here int sF = s+Ls*sU;
tmp[sU] = tmp[sU]+ traceIndex<SpinIndex>(outerProduct(Btilde[sF],Atilde[sF])); // ordering here
}
} }
}
PokeIndex<LorentzIndex>(mat,tmp,mu); PokeIndex<LorentzIndex>(mat,tmp,mu);
} }
}; };
/////// ///////
// Single flavour four spinors with colour index, 5d redblack // Single flavour four spinors with colour index, 5d redblack
/////// ///////
template<class S,int Nrepresentation=Nc,class _Coeff_t = RealD> template<class S,int Nrepresentation=Nc,class _Coeff_t = RealD>
class DomainWallVec5dImpl : public PeriodicGaugeImpl< GaugeImplTypes< S,Nrepresentation> > { class DomainWallVec5dImpl : public PeriodicGaugeImpl< GaugeImplTypes< S,Nrepresentation> > {
public: public:
static const int Dimension = Nrepresentation;
const bool LsVectorised=true; const bool LsVectorised=true;
typedef _Coeff_t Coeff_t;
typedef _Coeff_t Coeff_t; typedef PeriodicGaugeImpl<GaugeImplTypes<S, Nrepresentation> > Gimpl;
typedef PeriodicGaugeImpl< GaugeImplTypes< S,Nrepresentation> > Gimpl;
INHERIT_GIMPL_TYPES(Gimpl); INHERIT_GIMPL_TYPES(Gimpl);
template<typename vtype> using iImplSpinor = iScalar<iVector<iVector<vtype, Nrepresentation>, Ns> >; template <typename vtype> using iImplSpinor = iScalar<iVector<iVector<vtype, Nrepresentation>, Ns> >;
template<typename vtype> using iImplHalfSpinor = iScalar<iVector<iVector<vtype, Nrepresentation>, Nhs> >; template <typename vtype> using iImplHalfSpinor = iScalar<iVector<iVector<vtype, Nrepresentation>, Nhs> >;
template<typename vtype> using iImplDoubledGaugeField = iVector<iScalar<iMatrix<vtype, Nrepresentation> >, Nds >; template <typename vtype> using iImplDoubledGaugeField = iVector<iScalar<iMatrix<vtype, Nrepresentation> >, Nds>;
template<typename vtype> using iImplGaugeField = iVector<iScalar<iMatrix<vtype, Nrepresentation> >, Nd >; template <typename vtype> using iImplGaugeField = iVector<iScalar<iMatrix<vtype, Nrepresentation> >, Nd>;
template<typename vtype> using iImplGaugeLink = iScalar<iScalar<iMatrix<vtype, Nrepresentation> > >; template <typename vtype> using iImplGaugeLink = iScalar<iScalar<iMatrix<vtype, Nrepresentation> > >;
typedef iImplSpinor <Simd> SiteSpinor; typedef iImplSpinor<Simd> SiteSpinor;
typedef iImplHalfSpinor<Simd> SiteHalfSpinor; typedef iImplHalfSpinor<Simd> SiteHalfSpinor;
typedef Lattice<SiteSpinor> FermionField; typedef Lattice<SiteSpinor> FermionField;
// Make the doubled gauge field a *scalar* // Make the doubled gauge field a *scalar*
typedef iImplDoubledGaugeField<typename Simd::scalar_type> SiteDoubledGaugeField; // This is a scalar typedef iImplDoubledGaugeField<typename Simd::scalar_type>
typedef iImplGaugeField<typename Simd::scalar_type> SiteScalarGaugeField; // scalar SiteDoubledGaugeField; // This is a scalar
typedef iImplGaugeLink <typename Simd::scalar_type> SiteScalarGaugeLink; // scalar typedef iImplGaugeField<typename Simd::scalar_type>
SiteScalarGaugeField; // scalar
typedef Lattice<SiteDoubledGaugeField> DoubledGaugeField; typedef iImplGaugeLink<typename Simd::scalar_type>
SiteScalarGaugeLink; // scalar
typedef WilsonCompressor<SiteHalfSpinor,SiteSpinor> Compressor;
typedef Lattice<SiteDoubledGaugeField> DoubledGaugeField;
typedef WilsonCompressor<SiteHalfSpinor, SiteSpinor> Compressor;
typedef WilsonImplParams ImplParams; typedef WilsonImplParams ImplParams;
typedef WilsonStencil<SiteSpinor,SiteHalfSpinor> StencilImpl; typedef WilsonStencil<SiteSpinor, SiteHalfSpinor> StencilImpl;
ImplParams Params; ImplParams Params;
DomainWallVec5dImpl(const ImplParams &p= ImplParams()) : Params(p) {}; DomainWallVec5dImpl(const ImplParams &p = ImplParams()) : Params(p){};
bool overlapCommsCompute(void) { return false; }; bool overlapCommsCompute(void) { return false; };
template<class ref> template <class ref>
inline void loadLinkElement(Simd & reg,ref &memory){ inline void loadLinkElement(Simd &reg, ref &memory) {
vsplat(reg,memory); vsplat(reg, memory);
} }
inline void multLink(SiteHalfSpinor &phi,const SiteDoubledGaugeField &U,const SiteHalfSpinor &chi,int mu,StencilEntry *SE,StencilImpl &St) inline void multLink(SiteHalfSpinor &phi, const SiteDoubledGaugeField &U,
{ const SiteHalfSpinor &chi, int mu, StencilEntry *SE,
StencilImpl &St) {
SiteGaugeLink UU; SiteGaugeLink UU;
for(int i=0;i<Nrepresentation;i++){ for (int i = 0; i < Nrepresentation; i++) {
for(int j=0;j<Nrepresentation;j++){ for (int j = 0; j < Nrepresentation; j++) {
vsplat(UU()()(i,j),U(mu)()(i,j)); vsplat(UU()()(i, j), U(mu)()(i, j));
} }
} }
mult(&phi(),&UU(),&chi()); mult(&phi(), &UU(), &chi());
} }
inline void DoubleStore(GridBase *GaugeGrid,DoubledGaugeField &Uds,const GaugeField &Umu) inline void DoubleStore(GridBase *GaugeGrid, DoubledGaugeField &Uds,
{ const GaugeField &Umu) {
SiteScalarGaugeField ScalarUmu; SiteScalarGaugeField ScalarUmu;
SiteDoubledGaugeField ScalarUds; SiteDoubledGaugeField ScalarUds;
GaugeLinkField U (Umu._grid);
GaugeField Uadj(Umu._grid);
for(int mu=0;mu<Nd;mu++){
U = PeekIndex<LorentzIndex>(Umu,mu);
U = adj(Cshift(U,mu,-1));
PokeIndex<LorentzIndex>(Uadj,U,mu);
}
for(int lidx=0;lidx<GaugeGrid->lSites();lidx++){
std::vector<int> lcoor;
GaugeGrid->LocalIndexToLocalCoor(lidx,lcoor);
peekLocalSite(ScalarUmu,Umu,lcoor);
for(int mu=0;mu<4;mu++) ScalarUds(mu) = ScalarUmu(mu);
peekLocalSite(ScalarUmu,Uadj,lcoor);
for(int mu=0;mu<4;mu++) ScalarUds(mu+4) = ScalarUmu(mu);
pokeLocalSite(ScalarUds,Uds,lcoor);
}
}
inline void InsertForce4D(GaugeField &mat, FermionField &Btilde, FermionField &A,int mu){ GaugeLinkField U(Umu._grid);
assert(0); GaugeField Uadj(Umu._grid);
} for (int mu = 0; mu < Nd; mu++) {
U = PeekIndex<LorentzIndex>(Umu, mu);
inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField &Atilde,int mu){ U = adj(Cshift(U, mu, -1));
PokeIndex<LorentzIndex>(Uadj, U, mu);
}
for (int lidx = 0; lidx < GaugeGrid->lSites(); lidx++) {
std::vector<int> lcoor;
GaugeGrid->LocalIndexToLocalCoor(lidx, lcoor);
peekLocalSite(ScalarUmu, Umu, lcoor);
for (int mu = 0; mu < 4; mu++) ScalarUds(mu) = ScalarUmu(mu);
peekLocalSite(ScalarUmu, Uadj, lcoor);
for (int mu = 0; mu < 4; mu++) ScalarUds(mu + 4) = ScalarUmu(mu);
pokeLocalSite(ScalarUds, Uds, lcoor);
}
}
inline void InsertForce4D(GaugeField &mat, FermionField &Btilde,
FermionField &A, int mu) {
assert(0);
}
inline void InsertForce5D(GaugeField &mat, FermionField &Btilde,
FermionField &Atilde, int mu) {
assert(0); assert(0);
} }
}; };
//////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////
// Flavour doubled spinors; is Gparity the only? what about C*? // Flavour doubled spinors; is Gparity the only? what about C*?
//////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////
template<class S,int Nrepresentation,class _Coeff_t = RealD> template <class S, int Nrepresentation,class _Coeff_t = RealD>
class GparityWilsonImpl : public ConjugateGaugeImpl< GaugeImplTypes<S,Nrepresentation> >{ class GparityWilsonImpl
: public ConjugateGaugeImpl<GaugeImplTypes<S, Nrepresentation> > {
public: public:
static const int Dimension = Nrepresentation;
const bool LsVectorised=false; const bool LsVectorised=false;
typedef _Coeff_t Coeff_t; typedef _Coeff_t Coeff_t;
typedef ConjugateGaugeImpl< GaugeImplTypes<S,Nrepresentation> > Gimpl; typedef ConjugateGaugeImpl< GaugeImplTypes<S,Nrepresentation> > Gimpl;
INHERIT_GIMPL_TYPES(Gimpl); INHERIT_GIMPL_TYPES(Gimpl);
template<typename vtype> using iImplSpinor = iVector<iVector<iVector<vtype, Nrepresentation>, Ns>, Ngp >; template <typename vtype>
template<typename vtype> using iImplHalfSpinor = iVector<iVector<iVector<vtype, Nrepresentation>, Nhs>, Ngp >; using iImplSpinor =
template<typename vtype> using iImplDoubledGaugeField = iVector<iVector<iScalar<iMatrix<vtype, Nrepresentation> >, Nds >, Ngp >; iVector<iVector<iVector<vtype, Nrepresentation>, Ns>, Ngp>;
template <typename vtype>
typedef iImplSpinor <Simd> SiteSpinor; using iImplHalfSpinor =
typedef iImplHalfSpinor<Simd> SiteHalfSpinor; iVector<iVector<iVector<vtype, Nrepresentation>, Nhs>, Ngp>;
typedef iImplDoubledGaugeField<Simd> SiteDoubledGaugeField; template <typename vtype>
using iImplDoubledGaugeField =
typedef Lattice<SiteSpinor> FermionField; iVector<iVector<iScalar<iMatrix<vtype, Nrepresentation> >, Nds>, Ngp>;
typedef iImplSpinor<Simd> SiteSpinor;
typedef iImplHalfSpinor<Simd> SiteHalfSpinor;
typedef iImplDoubledGaugeField<Simd> SiteDoubledGaugeField;
typedef Lattice<SiteSpinor> FermionField;
typedef Lattice<SiteDoubledGaugeField> DoubledGaugeField; typedef Lattice<SiteDoubledGaugeField> DoubledGaugeField;
typedef WilsonCompressor<SiteHalfSpinor,SiteSpinor> Compressor; typedef WilsonCompressor<SiteHalfSpinor, SiteSpinor> Compressor;
typedef WilsonStencil<SiteSpinor,SiteHalfSpinor> StencilImpl; typedef WilsonStencil<SiteSpinor, SiteHalfSpinor> StencilImpl;
typedef GparityWilsonImplParams ImplParams; typedef GparityWilsonImplParams ImplParams;
ImplParams Params; ImplParams Params;
GparityWilsonImpl(const ImplParams &p= ImplParams()) : Params(p) {};
GparityWilsonImpl(const ImplParams &p = ImplParams()) : Params(p){};
bool overlapCommsCompute(void) { return Params.overlapCommsCompute; }; bool overlapCommsCompute(void) { return Params.overlapCommsCompute; };
// provide the multiply by link that is differentiated between Gparity (with flavour index) and non-Gparity // provide the multiply by link that is differentiated between Gparity (with
inline void multLink(SiteHalfSpinor &phi,const SiteDoubledGaugeField &U,const SiteHalfSpinor &chi,int mu,StencilEntry *SE,StencilImpl &St){ // flavour index) and non-Gparity
inline void multLink(SiteHalfSpinor &phi, const SiteDoubledGaugeField &U,
const SiteHalfSpinor &chi, int mu, StencilEntry *SE,
StencilImpl &St) {
typedef SiteHalfSpinor vobj; typedef SiteHalfSpinor vobj;
typedef typename SiteHalfSpinor::scalar_object sobj; typedef typename SiteHalfSpinor::scalar_object sobj;
vobj vtmp; vobj vtmp;
sobj stmp; sobj stmp;
GridBase *grid = St._grid; GridBase *grid = St._grid;
const int Nsimd = grid->Nsimd(); const int Nsimd = grid->Nsimd();
int direction = St._directions[mu]; int direction = St._directions[mu];
int distance = St._distances[mu]; int distance = St._distances[mu];
int ptype = St._permute_type[mu]; int ptype = St._permute_type[mu];
int sl = St._grid->_simd_layout[direction]; int sl = St._grid->_simd_layout[direction];
// Fixme X.Y.Z.T hardcode in stencil // Fixme X.Y.Z.T hardcode in stencil
int mmu = mu % Nd; int mmu = mu % Nd;
// assert our assumptions // assert our assumptions
assert((distance==1)||(distance==-1)); // nearest neighbour stencil hard code assert((distance == 1) || (distance == -1)); // nearest neighbour stencil hard code
assert((sl==1)||(sl==2)); assert((sl == 1) || (sl == 2));
std::vector<int> icoor; std::vector<int> icoor;
if ( SE->_around_the_world && Params.twists[mmu] ) { if ( SE->_around_the_world && Params.twists[mmu] ) {
if ( sl == 2 ) { if ( sl == 2 ) {
@ -391,7 +415,7 @@ PARALLEL_FOR_LOOP
mult(&phi(1),&U(1)(mu),&chi(1)); mult(&phi(1),&U(1)(mu),&chi(1));
} }
} }
inline void DoubleStore(GridBase *GaugeGrid,DoubledGaugeField &Uds,const GaugeField &Umu) inline void DoubleStore(GridBase *GaugeGrid,DoubledGaugeField &Uds,const GaugeField &Umu)
{ {
@ -404,7 +428,7 @@ PARALLEL_FOR_LOOP
GaugeLinkField Uconj(GaugeGrid); GaugeLinkField Uconj(GaugeGrid);
Lattice<iScalar<vInteger> > coor(GaugeGrid); Lattice<iScalar<vInteger> > coor(GaugeGrid);
for(int mu=0;mu<Nd;mu++){ for(int mu=0;mu<Nd;mu++){
@ -412,19 +436,19 @@ PARALLEL_FOR_LOOP
U = PeekIndex<LorentzIndex>(Umu,mu); U = PeekIndex<LorentzIndex>(Umu,mu);
Uconj = conjugate(U); Uconj = conjugate(U);
// This phase could come from a simple bc 1,1,-1,1 .. // This phase could come from a simple bc 1,1,-1,1 ..
int neglink = GaugeGrid->GlobalDimensions()[mu]-1; int neglink = GaugeGrid->GlobalDimensions()[mu]-1;
if ( Params.twists[mu] ) { if ( Params.twists[mu] ) {
Uconj = where(coor==neglink,-Uconj,Uconj); Uconj = where(coor==neglink,-Uconj,Uconj);
} }
PARALLEL_FOR_LOOP
for(auto ss=U.begin();ss<U.end();ss++){ PARALLEL_FOR_LOOP
Uds[ss](0)(mu) = U[ss](); for(auto ss=U.begin();ss<U.end();ss++){
Uds[ss](1)(mu) = Uconj[ss](); Uds[ss](0)(mu) = U[ss]();
} Uds[ss](1)(mu) = Uconj[ss]();
}
U = adj(Cshift(U ,mu,-1)); // correct except for spanning the boundary U = adj(Cshift(U ,mu,-1)); // correct except for spanning the boundary
Uconj = adj(Cshift(Uconj,mu,-1)); Uconj = adj(Cshift(Uconj,mu,-1));
@ -434,80 +458,86 @@ PARALLEL_FOR_LOOP
Utmp = where(coor==0,Uconj,Utmp); Utmp = where(coor==0,Uconj,Utmp);
} }
PARALLEL_FOR_LOOP PARALLEL_FOR_LOOP
for(auto ss=U.begin();ss<U.end();ss++){ for(auto ss=U.begin();ss<U.end();ss++){
Uds[ss](0)(mu+4) = Utmp[ss](); Uds[ss](0)(mu+4) = Utmp[ss]();
} }
Utmp = Uconj; Utmp = Uconj;
if ( Params.twists[mu] ) { if ( Params.twists[mu] ) {
Utmp = where(coor==0,U,Utmp); Utmp = where(coor==0,U,Utmp);
} }
PARALLEL_FOR_LOOP PARALLEL_FOR_LOOP
for(auto ss=U.begin();ss<U.end();ss++){ for(auto ss=U.begin();ss<U.end();ss++){
Uds[ss](1)(mu+4) = Utmp[ss](); Uds[ss](1)(mu+4) = Utmp[ss]();
} }
} }
} }
inline void InsertForce4D(GaugeField &mat, FermionField &Btilde, FermionField &A,int mu){
inline void InsertForce4D(GaugeField &mat, FermionField &Btilde,
FermionField &A, int mu) {
// DhopDir provides U or Uconj depending on coor/flavour. // DhopDir provides U or Uconj depending on coor/flavour.
GaugeLinkField link(mat._grid); GaugeLinkField link(mat._grid);
// use lorentz for flavour as hack. // use lorentz for flavour as hack.
PARALLEL_FOR_LOOP auto tmp = TraceIndex<SpinIndex>(outerProduct(Btilde, A));
for(auto ss=link.begin();ss<link.end();ss++){ PARALLEL_FOR_LOOP
auto ttmp = traceIndex<SpinIndex>(outerProduct(Btilde[ss],A[ss])); for (auto ss = tmp.begin(); ss < tmp.end(); ss++) {
link[ss]() = ttmp(0,0) + conjugate(ttmp(1,1)) ; link[ss]() = tmp[ss](0, 0) - conjugate(tmp[ss](1, 1));
} }
PokeIndex<LorentzIndex>(mat,link,mu); PokeIndex<LorentzIndex>(mat, link, mu);
return; return;
} }
inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField &Atilde,int mu){
inline void InsertForce5D(GaugeField &mat, FermionField &Btilde,
int Ls=Btilde._grid->_fdimensions[0]; FermionField &Atilde, int mu) {
int Ls = Btilde._grid->_fdimensions[0];
GaugeLinkField tmp(mat._grid); GaugeLinkField tmp(mat._grid);
tmp = zero; tmp = zero;
PARALLEL_FOR_LOOP PARALLEL_FOR_LOOP
for(int ss=0;ss<tmp._grid->oSites();ss++){ for (int ss = 0; ss < tmp._grid->oSites(); ss++) {
for(int s=0;s<Ls;s++){ for (int s = 0; s < Ls; s++) {
int sF = s+Ls*ss; int sF = s + Ls * ss;
auto ttmp = traceIndex<SpinIndex>(outerProduct(Btilde[sF],Atilde[sF])); auto ttmp = traceIndex<SpinIndex>(outerProduct(Btilde[sF], Atilde[sF]));
tmp[ss]() = tmp[ss]()+ ttmp(0,0) + conjugate(ttmp(1,1)); tmp[ss]() = tmp[ss]() + ttmp(0, 0) + conjugate(ttmp(1, 1));
}
} }
} PokeIndex<LorentzIndex>(mat, tmp, mu);
PokeIndex<LorentzIndex>(mat,tmp,mu);
return; return;
} }
}; };
typedef WilsonImpl<vComplex ,Nc> WilsonImplR; // Real.. whichever prec typedef WilsonImpl<vComplex, FundamentalRepresentation > WilsonImplR; // Real.. whichever prec
typedef WilsonImpl<vComplexF,Nc> WilsonImplF; // Float typedef WilsonImpl<vComplexF, FundamentalRepresentation > WilsonImplF; // Float
typedef WilsonImpl<vComplexD,Nc> WilsonImplD; // Double typedef WilsonImpl<vComplexD, FundamentalRepresentation > WilsonImplD; // Double
typedef WilsonImpl<vComplex ,Nc,ComplexD> ZWilsonImplR; // Real.. whichever prec
typedef WilsonImpl<vComplexF,Nc,ComplexD> ZWilsonImplF; // Float typedef WilsonImpl<vComplex, FundamentalRepresentation, ComplexD > ZWilsonImplR; // Real.. whichever prec
typedef WilsonImpl<vComplexD,Nc,ComplexD> ZWilsonImplD; // Double typedef WilsonImpl<vComplexF, FundamentalRepresentation, ComplexD > ZWilsonImplF; // Float
typedef WilsonImpl<vComplexD, FundamentalRepresentation, ComplexD > ZWilsonImplD; // Double
typedef WilsonImpl<vComplex, AdjointRepresentation > WilsonAdjImplR; // Real.. whichever prec
typedef WilsonImpl<vComplexF, AdjointRepresentation > WilsonAdjImplF; // Float
typedef WilsonImpl<vComplexD, AdjointRepresentation > WilsonAdjImplD; // Double
typedef WilsonImpl<vComplex, TwoIndexSymmetricRepresentation > WilsonTwoIndexSymmetricImplR; // Real.. whichever prec
typedef WilsonImpl<vComplexF, TwoIndexSymmetricRepresentation > WilsonTwoIndexSymmetricImplF; // Float
typedef WilsonImpl<vComplexD, TwoIndexSymmetricRepresentation > WilsonTwoIndexSymmetricImplD; // Double
typedef DomainWallVec5dImpl<vComplex ,Nc> DomainWallVec5dImplR; // Real.. whichever prec typedef DomainWallVec5dImpl<vComplex ,Nc> DomainWallVec5dImplR; // Real.. whichever prec
typedef DomainWallVec5dImpl<vComplexF,Nc> DomainWallVec5dImplF; // Float typedef DomainWallVec5dImpl<vComplexF,Nc> DomainWallVec5dImplF; // Float
typedef DomainWallVec5dImpl<vComplexD,Nc> DomainWallVec5dImplD; // Double typedef DomainWallVec5dImpl<vComplexD,Nc> DomainWallVec5dImplD; // Double
typedef DomainWallVec5dImpl<vComplex ,Nc,ComplexD> ZDomainWallVec5dImplR; // Real.. whichever prec typedef DomainWallVec5dImpl<vComplex ,Nc,ComplexD> ZDomainWallVec5dImplR; // Real.. whichever prec
typedef DomainWallVec5dImpl<vComplexF,Nc,ComplexD> ZDomainWallVec5dImplF; // Float typedef DomainWallVec5dImpl<vComplexF,Nc,ComplexD> ZDomainWallVec5dImplF; // Float
typedef DomainWallVec5dImpl<vComplexD,Nc,ComplexD> ZDomainWallVec5dImplD; // Double typedef DomainWallVec5dImpl<vComplexD,Nc,ComplexD> ZDomainWallVec5dImplD; // Double
typedef DomainWallVec5dImpl<vComplex ,Nc> DomainWallVec5dImplR; // Real.. whichever prec typedef GparityWilsonImpl<vComplex, Nc> GparityWilsonImplR; // Real.. whichever prec
typedef DomainWallVec5dImpl<vComplexF,Nc> DomainWallVec5dImplF; // Float typedef GparityWilsonImpl<vComplexF, Nc> GparityWilsonImplF; // Float
typedef DomainWallVec5dImpl<vComplexD,Nc> DomainWallVec5dImplD; // Double typedef GparityWilsonImpl<vComplexD, Nc> GparityWilsonImplD; // Double
}
typedef GparityWilsonImpl<vComplex ,Nc> GparityWilsonImplR; // Real.. whichever prec
typedef GparityWilsonImpl<vComplexF,Nc> GparityWilsonImplF; // Float
typedef GparityWilsonImpl<vComplexD,Nc> GparityWilsonImplD; // Double
}
} }
#endif #endif

View File

@ -1,130 +1,129 @@
/************************************************************************************* /*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/WilsonFermion.cc Source file: ./lib/qcd/action/fermion/WilsonFermion.cc
Copyright (C) 2015 Copyright (C) 2015
Author: Peter Boyle <pabobyle@ph.ed.ac.uk> Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk> Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local> Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk> Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or the Free Software Foundation; either version 2 of the License, or
(at your option) any later version. (at your option) any later version.
This program is distributed in the hope that it will be useful, This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details. GNU General Public License for more details.
You should have received a copy of the GNU General Public License along You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc., with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory See the full license in the file "LICENSE" in the top level distribution
*************************************************************************************/ directory
/* END LEGAL */ *************************************************************************************/
/* END LEGAL */
#include <Grid.h> #include <Grid.h>
namespace Grid { namespace Grid {
namespace QCD { namespace QCD {
const std::vector<int> WilsonFermionStatic::directions ({0,1,2,3, 0, 1, 2, 3}); const std::vector<int> WilsonFermionStatic::directions({0, 1, 2, 3, 0, 1, 2,
const std::vector<int> WilsonFermionStatic::displacements({1,1,1,1,-1,-1,-1,-1}); 3});
int WilsonFermionStatic::HandOptDslash; const std::vector<int> WilsonFermionStatic::displacements({1, 1, 1, 1, -1, -1,
-1, -1});
int WilsonFermionStatic::HandOptDslash;
///////////////////////////////// /////////////////////////////////
// Constructor and gauge import // Constructor and gauge import
///////////////////////////////// /////////////////////////////////
template<class Impl> template <class Impl>
WilsonFermion<Impl>::WilsonFermion(GaugeField &_Umu, WilsonFermion<Impl>::WilsonFermion(GaugeField &_Umu, GridCartesian &Fgrid,
GridCartesian &Fgrid, GridRedBlackCartesian &Hgrid, RealD _mass,
GridRedBlackCartesian &Hgrid, const ImplParams &p)
RealD _mass,const ImplParams &p) : : Kernels(p),
Kernels(p), _grid(&Fgrid),
_grid(&Fgrid), _cbgrid(&Hgrid),
_cbgrid(&Hgrid), Stencil(&Fgrid, npoint, Even, directions, displacements),
Stencil (&Fgrid,npoint,Even,directions,displacements), StencilEven(&Hgrid, npoint, Even, directions,
StencilEven(&Hgrid,npoint,Even,directions,displacements), // source is Even displacements), // source is Even
StencilOdd (&Hgrid,npoint,Odd ,directions,displacements), // source is Odd StencilOdd(&Hgrid, npoint, Odd, directions,
mass(_mass), displacements), // source is Odd
Lebesgue(_grid), mass(_mass),
LebesgueEvenOdd(_cbgrid), Lebesgue(_grid),
Umu(&Fgrid), LebesgueEvenOdd(_cbgrid),
UmuEven(&Hgrid), Umu(&Fgrid),
UmuOdd (&Hgrid) UmuEven(&Hgrid),
{ UmuOdd(&Hgrid) {
// Allocate the required comms buffer // Allocate the required comms buffer
ImportGauge(_Umu); ImportGauge(_Umu);
}
template <class Impl>
void WilsonFermion<Impl>::ImportGauge(const GaugeField &_Umu) {
GaugeField HUmu(_Umu._grid);
HUmu = _Umu * (-0.5);
Impl::DoubleStore(GaugeGrid(), Umu, HUmu);
pickCheckerboard(Even, UmuEven, Umu);
pickCheckerboard(Odd, UmuOdd, Umu);
}
/////////////////////////////
// Implement the interface
/////////////////////////////
template <class Impl>
RealD WilsonFermion<Impl>::M(const FermionField &in, FermionField &out) {
out.checkerboard = in.checkerboard;
Dhop(in, out, DaggerNo);
return axpy_norm(out, 4 + mass, in, out);
}
template <class Impl>
RealD WilsonFermion<Impl>::Mdag(const FermionField &in, FermionField &out) {
out.checkerboard = in.checkerboard;
Dhop(in, out, DaggerYes);
return axpy_norm(out, 4 + mass, in, out);
}
template <class Impl>
void WilsonFermion<Impl>::Meooe(const FermionField &in, FermionField &out) {
if (in.checkerboard == Odd) {
DhopEO(in, out, DaggerNo);
} else {
DhopOE(in, out, DaggerNo);
} }
}
template<class Impl> template <class Impl>
void WilsonFermion<Impl>::ImportGauge(const GaugeField &_Umu) void WilsonFermion<Impl>::MeooeDag(const FermionField &in, FermionField &out) {
{ if (in.checkerboard == Odd) {
GaugeField HUmu(_Umu._grid); DhopEO(in, out, DaggerYes);
HUmu = _Umu*(-0.5); } else {
Impl::DoubleStore(GaugeGrid(),Umu,HUmu); DhopOE(in, out, DaggerYes);
pickCheckerboard(Even,UmuEven,Umu);
pickCheckerboard(Odd ,UmuOdd,Umu);
} }
}
///////////////////////////// template <class Impl>
// Implement the interface
/////////////////////////////
template<class Impl>
RealD WilsonFermion<Impl>::M(const FermionField &in, FermionField &out)
{
out.checkerboard=in.checkerboard;
Dhop(in,out,DaggerNo);
return axpy_norm(out,4+mass,in,out);
}
template<class Impl>
RealD WilsonFermion<Impl>::Mdag(const FermionField &in, FermionField &out)
{
out.checkerboard=in.checkerboard;
Dhop(in,out,DaggerYes);
return axpy_norm(out,4+mass,in,out);
}
template<class Impl>
void WilsonFermion<Impl>::Meooe(const FermionField &in, FermionField &out)
{
if ( in.checkerboard == Odd ) {
DhopEO(in,out,DaggerNo);
} else {
DhopOE(in,out,DaggerNo);
}
}
template<class Impl>
void WilsonFermion<Impl>::MeooeDag(const FermionField &in, FermionField &out)
{
if ( in.checkerboard == Odd ) {
DhopEO(in,out,DaggerYes);
} else {
DhopOE(in,out,DaggerYes);
}
}
template<class Impl>
void WilsonFermion<Impl>::Mooee(const FermionField &in, FermionField &out) { void WilsonFermion<Impl>::Mooee(const FermionField &in, FermionField &out) {
out.checkerboard = in.checkerboard; out.checkerboard = in.checkerboard;
typename FermionField::scalar_type scal(4.0+mass); typename FermionField::scalar_type scal(4.0 + mass);
out = scal*in; out = scal * in;
} }
template<class Impl> template <class Impl>
void WilsonFermion<Impl>::MooeeDag(const FermionField &in, FermionField &out) { void WilsonFermion<Impl>::MooeeDag(const FermionField &in, FermionField &out) {
out.checkerboard = in.checkerboard; out.checkerboard = in.checkerboard;
Mooee(in,out); Mooee(in, out);
} }
template<class Impl> template<class Impl>
void WilsonFermion<Impl>::MooeeInv(const FermionField &in, FermionField &out) { void WilsonFermion<Impl>::MooeeInv(const FermionField &in, FermionField &out) {
out.checkerboard = in.checkerboard; out.checkerboard = in.checkerboard;
@ -136,8 +135,9 @@ namespace QCD {
out.checkerboard = in.checkerboard; out.checkerboard = in.checkerboard;
MooeeInv(in,out); MooeeInv(in,out);
} }
template<class Impl> template<class Impl>
void WilsonFermion<Impl>:: MomentumSpacePropagator(FermionField &out, const FermionField &in,RealD _m) { void WilsonFermion<Impl>::MomentumSpacePropagator(FermionField &out, const FermionField &in,RealD _m) {
// what type LatticeComplex // what type LatticeComplex
conformable(_grid,out._grid); conformable(_grid,out._grid);
@ -190,184 +190,182 @@ namespace QCD {
out = num*denom; // [ -i gmu sin k + 2 sin^2 k/2 + m] / [ sin^2 k + (2 sin^2 k/2 + m)^2 ] out = num*denom; // [ -i gmu sin k + 2 sin^2 k/2 + m] / [ sin^2 k + (2 sin^2 k/2 + m)^2 ]
} }
///////////////////////////////////
// Internal
///////////////////////////////////
template<class Impl>
void WilsonFermion<Impl>::DerivInternal(StencilImpl & st,
DoubledGaugeField & U,
GaugeField &mat,
const FermionField &A,
const FermionField &B,int dag) {
assert((dag==DaggerNo) ||(dag==DaggerYes));
Compressor compressor(dag);
FermionField Btilde(B._grid);
FermionField Atilde(B._grid);
Atilde = A;
st.HaloExchange(B,compressor);
for(int mu=0;mu<Nd;mu++){
////////////////////////////////////////////////////////////////////////
// Flip gamma (1+g)<->(1-g) if dag
////////////////////////////////////////////////////////////////////////
int gamma = mu;
if ( !dag ) gamma+= Nd;
////////////////////////
// Call the single hop
////////////////////////
PARALLEL_FOR_LOOP
for(int sss=0;sss<B._grid->oSites();sss++){
Kernels::DiracOptDhopDir(st,U,st.comm_buf,sss,sss,B,Btilde,mu,gamma);
}
//////////////////////////////////////////////////
// spin trace outer product
//////////////////////////////////////////////////
Impl::InsertForce4D(mat,Btilde,Atilde,mu);
}
}
template<class Impl>
void WilsonFermion<Impl>::DhopDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag)
{
conformable(U._grid,_grid);
conformable(U._grid,V._grid);
conformable(U._grid,mat._grid);
mat.checkerboard = U.checkerboard;
DerivInternal(Stencil,Umu,mat,U,V,dag);
}
template<class Impl>
void WilsonFermion<Impl>::DhopDerivOE(GaugeField &mat,const FermionField &U,const FermionField &V,int dag)
{
conformable(U._grid,_cbgrid);
conformable(U._grid,V._grid);
conformable(U._grid,mat._grid);
assert(V.checkerboard==Even);
assert(U.checkerboard==Odd);
mat.checkerboard = Odd;
DerivInternal(StencilEven,UmuOdd,mat,U,V,dag);
}
template<class Impl>
void WilsonFermion<Impl>::DhopDerivEO(GaugeField &mat,const FermionField &U,const FermionField &V,int dag)
{
conformable(U._grid,_cbgrid);
conformable(U._grid,V._grid);
conformable(U._grid,mat._grid);
assert(V.checkerboard==Odd);
assert(U.checkerboard==Even);
mat.checkerboard = Even;
DerivInternal(StencilOdd,UmuEven,mat,U,V,dag);
}
template<class Impl>
void WilsonFermion<Impl>::Dhop(const FermionField &in, FermionField &out,int dag) {
conformable(in._grid,_grid); // verifies full grid
conformable(in._grid,out._grid);
out.checkerboard = in.checkerboard;
DhopInternal(Stencil,Lebesgue,Umu,in,out,dag);
}
template<class Impl>
void WilsonFermion<Impl>::DhopOE(const FermionField &in, FermionField &out,int dag) {
conformable(in._grid,_cbgrid); // verifies half grid
conformable(in._grid,out._grid); // drops the cb check
assert(in.checkerboard==Even);
out.checkerboard = Odd;
DhopInternal(StencilEven,LebesgueEvenOdd,UmuOdd,in,out,dag);
}
template<class Impl>
void WilsonFermion<Impl>::DhopEO(const FermionField &in, FermionField &out,int dag) {
conformable(in._grid,_cbgrid); // verifies half grid
conformable(in._grid,out._grid); // drops the cb check
assert(in.checkerboard==Odd);
out.checkerboard = Even;
DhopInternal(StencilOdd,LebesgueEvenOdd,UmuEven,in,out,dag);
}
template<class Impl>
void WilsonFermion<Impl>::Mdir (const FermionField &in, FermionField &out,int dir,int disp) {
DhopDir(in,out,dir,disp);
}
template<class Impl>
void WilsonFermion<Impl>::DhopDir(const FermionField &in, FermionField &out,int dir,int disp){
int skip = (disp==1) ? 0 : 1;
int dirdisp = dir+skip*4;
int gamma = dir+(1-skip)*4;
DhopDirDisp(in,out,dirdisp,gamma,DaggerNo);
};
template<class Impl>
void WilsonFermion<Impl>::DhopDirDisp(const FermionField &in, FermionField &out,int dirdisp,int gamma,int dag) {
Compressor compressor(dag);
Stencil.HaloExchange(in,compressor);
PARALLEL_FOR_LOOP
for(int sss=0;sss<in._grid->oSites();sss++){
Kernels::DiracOptDhopDir(Stencil,Umu,Stencil.comm_buf,sss,sss,in,out,dirdisp,gamma);
}
};
template<class Impl>
void WilsonFermion<Impl>::DhopInternal(StencilImpl & st,LebesgueOrder& lo,DoubledGaugeField & U,
const FermionField &in, FermionField &out,int dag)
{
assert((dag==DaggerNo) ||(dag==DaggerYes));
Compressor compressor(dag);
st.HaloExchange(in,compressor);
if ( dag == DaggerYes ) {
PARALLEL_FOR_LOOP
for(int sss=0;sss<in._grid->oSites();sss++){
Kernels::DiracOptDhopSiteDag(st,lo,U,st.comm_buf,sss,sss,1,1,in,out);
}
} else {
PARALLEL_FOR_LOOP
for(int sss=0;sss<in._grid->oSites();sss++){
Kernels::DiracOptDhopSite(st,lo,U,st.comm_buf,sss,sss,1,1,in,out);
}
}
};
FermOpTemplateInstantiate(WilsonFermion);
GparityFermOpTemplateInstantiate(WilsonFermion);
///////////////////////////////////
// Internal
///////////////////////////////////
}} template <class Impl>
void WilsonFermion<Impl>::DerivInternal(StencilImpl &st, DoubledGaugeField &U,
GaugeField &mat, const FermionField &A,
const FermionField &B, int dag) {
assert((dag == DaggerNo) || (dag == DaggerYes));
Compressor compressor(dag);
FermionField Btilde(B._grid);
FermionField Atilde(B._grid);
Atilde = A;
st.HaloExchange(B, compressor);
for (int mu = 0; mu < Nd; mu++) {
////////////////////////////////////////////////////////////////////////
// Flip gamma (1+g)<->(1-g) if dag
////////////////////////////////////////////////////////////////////////
int gamma = mu;
if (!dag) gamma += Nd;
////////////////////////
// Call the single hop
////////////////////////
PARALLEL_FOR_LOOP
for (int sss = 0; sss < B._grid->oSites(); sss++) {
Kernels::DiracOptDhopDir(st, U, st.comm_buf, sss, sss, B, Btilde, mu,
gamma);
}
//////////////////////////////////////////////////
// spin trace outer product
//////////////////////////////////////////////////
Impl::InsertForce4D(mat, Btilde, Atilde, mu);
}
}
template <class Impl>
void WilsonFermion<Impl>::DhopDeriv(GaugeField &mat, const FermionField &U,
const FermionField &V, int dag) {
conformable(U._grid, _grid);
conformable(U._grid, V._grid);
conformable(U._grid, mat._grid);
mat.checkerboard = U.checkerboard;
DerivInternal(Stencil, Umu, mat, U, V, dag);
}
template <class Impl>
void WilsonFermion<Impl>::DhopDerivOE(GaugeField &mat, const FermionField &U,
const FermionField &V, int dag) {
conformable(U._grid, _cbgrid);
conformable(U._grid, V._grid);
conformable(U._grid, mat._grid);
assert(V.checkerboard == Even);
assert(U.checkerboard == Odd);
mat.checkerboard = Odd;
DerivInternal(StencilEven, UmuOdd, mat, U, V, dag);
}
template <class Impl>
void WilsonFermion<Impl>::DhopDerivEO(GaugeField &mat, const FermionField &U,
const FermionField &V, int dag) {
conformable(U._grid, _cbgrid);
conformable(U._grid, V._grid);
conformable(U._grid, mat._grid);
assert(V.checkerboard == Odd);
assert(U.checkerboard == Even);
mat.checkerboard = Even;
DerivInternal(StencilOdd, UmuEven, mat, U, V, dag);
}
template <class Impl>
void WilsonFermion<Impl>::Dhop(const FermionField &in, FermionField &out,
int dag) {
conformable(in._grid, _grid); // verifies full grid
conformable(in._grid, out._grid);
out.checkerboard = in.checkerboard;
DhopInternal(Stencil, Lebesgue, Umu, in, out, dag);
}
template <class Impl>
void WilsonFermion<Impl>::DhopOE(const FermionField &in, FermionField &out,
int dag) {
conformable(in._grid, _cbgrid); // verifies half grid
conformable(in._grid, out._grid); // drops the cb check
assert(in.checkerboard == Even);
out.checkerboard = Odd;
DhopInternal(StencilEven, LebesgueEvenOdd, UmuOdd, in, out, dag);
}
template <class Impl>
void WilsonFermion<Impl>::DhopEO(const FermionField &in, FermionField &out,
int dag) {
conformable(in._grid, _cbgrid); // verifies half grid
conformable(in._grid, out._grid); // drops the cb check
assert(in.checkerboard == Odd);
out.checkerboard = Even;
DhopInternal(StencilOdd, LebesgueEvenOdd, UmuEven, in, out, dag);
}
template <class Impl>
void WilsonFermion<Impl>::Mdir(const FermionField &in, FermionField &out,
int dir, int disp) {
DhopDir(in, out, dir, disp);
}
template <class Impl>
void WilsonFermion<Impl>::DhopDir(const FermionField &in, FermionField &out,
int dir, int disp) {
int skip = (disp == 1) ? 0 : 1;
int dirdisp = dir + skip * 4;
int gamma = dir + (1 - skip) * 4;
DhopDirDisp(in, out, dirdisp, gamma, DaggerNo);
};
template <class Impl>
void WilsonFermion<Impl>::DhopDirDisp(const FermionField &in, FermionField &out,
int dirdisp, int gamma, int dag) {
Compressor compressor(dag);
Stencil.HaloExchange(in, compressor);
PARALLEL_FOR_LOOP
for (int sss = 0; sss < in._grid->oSites(); sss++) {
Kernels::DiracOptDhopDir(Stencil, Umu, Stencil.comm_buf, sss, sss, in, out,
dirdisp, gamma);
}
};
template <class Impl>
void WilsonFermion<Impl>::DhopInternal(StencilImpl &st, LebesgueOrder &lo,
DoubledGaugeField &U,
const FermionField &in,
FermionField &out, int dag) {
assert((dag == DaggerNo) || (dag == DaggerYes));
Compressor compressor(dag);
st.HaloExchange(in, compressor);
if (dag == DaggerYes) {
PARALLEL_FOR_LOOP
for (int sss = 0; sss < in._grid->oSites(); sss++) {
Kernels::DiracOptDhopSiteDag(st, lo, U, st.comm_buf, sss, sss, 1, 1, in,
out);
}
} else {
PARALLEL_FOR_LOOP
for (int sss = 0; sss < in._grid->oSites(); sss++) {
Kernels::DiracOptDhopSite(st, lo, U, st.comm_buf, sss, sss, 1, 1, in,
out);
}
}
};
FermOpTemplateInstantiate(WilsonFermion);
AdjointFermOpTemplateInstantiate(WilsonFermion);
TwoIndexFermOpTemplateInstantiate(WilsonFermion);
GparityFermOpTemplateInstantiate(WilsonFermion);
}
}

View File

@ -1,163 +1,154 @@
/************************************************************************************* /*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/WilsonFermion.h Source file: ./lib/qcd/action/fermion/WilsonFermion.h
Copyright (C) 2015 Copyright (C) 2015
Author: Peter Boyle <pabobyle@ph.ed.ac.uk> Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk> Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: paboyle <paboyle@ph.ed.ac.uk> Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or the Free Software Foundation; either version 2 of the License, or
(at your option) any later version. (at your option) any later version.
This program is distributed in the hope that it will be useful, This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details. GNU General Public License for more details.
You should have received a copy of the GNU General Public License along You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc., with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory See the full license in the file "LICENSE" in the top level distribution
*************************************************************************************/ directory
/* END LEGAL */ *************************************************************************************/
#ifndef GRID_QCD_WILSON_FERMION_H /* END LEGAL */
#define GRID_QCD_WILSON_FERMION_H #ifndef GRID_QCD_WILSON_FERMION_H
#define GRID_QCD_WILSON_FERMION_H
namespace Grid { namespace Grid {
namespace QCD { namespace QCD {
class WilsonFermionStatic { class WilsonFermionStatic {
public: public:
static int HandOptDslash; // these are a temporary hack static int HandOptDslash; // these are a temporary hack
static int MortonOrder; static int MortonOrder;
static const std::vector<int> directions ; static const std::vector<int> directions;
static const std::vector<int> displacements; static const std::vector<int> displacements;
static const int npoint=8; static const int npoint = 8;
}; };
template<class Impl> template <class Impl>
class WilsonFermion : public WilsonKernels<Impl>, public WilsonFermionStatic class WilsonFermion : public WilsonKernels<Impl>, public WilsonFermionStatic {
{ public:
public: INHERIT_IMPL_TYPES(Impl);
INHERIT_IMPL_TYPES(Impl); typedef WilsonKernels<Impl> Kernels;
typedef WilsonKernels<Impl> Kernels;
/////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////
// Implement the abstract base // Implement the abstract base
/////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////
GridBase *GaugeGrid(void) { return _grid ;} GridBase *GaugeGrid(void) { return _grid; }
GridBase *GaugeRedBlackGrid(void) { return _cbgrid ;} GridBase *GaugeRedBlackGrid(void) { return _cbgrid; }
GridBase *FermionGrid(void) { return _grid;} GridBase *FermionGrid(void) { return _grid; }
GridBase *FermionRedBlackGrid(void) { return _cbgrid;} GridBase *FermionRedBlackGrid(void) { return _cbgrid; }
////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////
// override multiply; cut number routines if pass dagger argument // override multiply; cut number routines if pass dagger argument
// and also make interface more uniformly consistent // and also make interface more uniformly consistent
////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////
RealD M(const FermionField &in, FermionField &out); RealD M(const FermionField &in, FermionField &out);
RealD Mdag(const FermionField &in, FermionField &out); RealD Mdag(const FermionField &in, FermionField &out);
///////////////////////////////////////////////////////// /////////////////////////////////////////////////////////
// half checkerboard operations // half checkerboard operations
// could remain virtual so we can derive Clover from Wilson base // could remain virtual so we can derive Clover from Wilson base
///////////////////////////////////////////////////////// /////////////////////////////////////////////////////////
void Meooe(const FermionField &in, FermionField &out) ; void Meooe(const FermionField &in, FermionField &out);
void MeooeDag(const FermionField &in, FermionField &out) ; void MeooeDag(const FermionField &in, FermionField &out);
// allow override for twisted mass and clover // allow override for twisted mass and clover
virtual void Mooee(const FermionField &in, FermionField &out) ; virtual void Mooee(const FermionField &in, FermionField &out);
virtual void MooeeDag(const FermionField &in, FermionField &out) ; virtual void MooeeDag(const FermionField &in, FermionField &out);
virtual void MooeeInv(const FermionField &in, FermionField &out) ; virtual void MooeeInv(const FermionField &in, FermionField &out);
virtual void MooeeInvDag(const FermionField &in, FermionField &out) ; virtual void MooeeInvDag(const FermionField &in, FermionField &out);
virtual void MomentumSpacePropagator(FermionField &out,const FermionField &in,RealD _mass) ; virtual void MomentumSpacePropagator(FermionField &out,const FermionField &in,RealD _mass) ;
//////////////////////// ////////////////////////
// Derivative interface // Derivative interface
//////////////////////// ////////////////////////
// Interface calls an internal routine // Interface calls an internal routine
void DhopDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag); void DhopDeriv(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
void DhopDerivOE(GaugeField &mat,const FermionField &U,const FermionField &V,int dag); void DhopDerivOE(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
void DhopDerivEO(GaugeField &mat,const FermionField &U,const FermionField &V,int dag); void DhopDerivEO(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
///////////////////////////////////////////////////////////////
// non-hermitian hopping term; half cb or both
///////////////////////////////////////////////////////////////
void Dhop(const FermionField &in, FermionField &out, int dag);
void DhopOE(const FermionField &in, FermionField &out, int dag);
void DhopEO(const FermionField &in, FermionField &out, int dag);
///////////////////////////////////////////////////////////////
// Multigrid assistance; force term uses too
///////////////////////////////////////////////////////////////
void Mdir(const FermionField &in, FermionField &out, int dir, int disp);
void DhopDir(const FermionField &in, FermionField &out, int dir, int disp);
void DhopDirDisp(const FermionField &in, FermionField &out, int dirdisp,
int gamma, int dag);
///////////////////////////////////////////////////////////////
// Extra methods added by derived
///////////////////////////////////////////////////////////////
void DerivInternal(StencilImpl &st, DoubledGaugeField &U, GaugeField &mat,
const FermionField &A, const FermionField &B, int dag);
void DhopInternal(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
const FermionField &in, FermionField &out, int dag);
// Constructor
WilsonFermion(GaugeField &_Umu, GridCartesian &Fgrid,
GridRedBlackCartesian &Hgrid, RealD _mass,
const ImplParams &p = ImplParams());
// DoubleStore impl dependent
void ImportGauge(const GaugeField &_Umu);
///////////////////////////////////////////////////////////////
// Data members require to support the functionality
///////////////////////////////////////////////////////////////
// protected:
public:
RealD mass;
GridBase *_grid;
GridBase *_cbgrid;
// Defines the stencils for even and odd
StencilImpl Stencil;
StencilImpl StencilEven;
StencilImpl StencilOdd;
// Copy of the gauge field , with even and odd subsets
DoubledGaugeField Umu;
DoubledGaugeField UmuEven;
DoubledGaugeField UmuOdd;
LebesgueOrder Lebesgue;
LebesgueOrder LebesgueEvenOdd;
};
typedef WilsonFermion<WilsonImplF> WilsonFermionF;
typedef WilsonFermion<WilsonImplD> WilsonFermionD;
/////////////////////////////////////////////////////////////// }
// non-hermitian hopping term; half cb or both
///////////////////////////////////////////////////////////////
void Dhop(const FermionField &in, FermionField &out,int dag) ;
void DhopOE(const FermionField &in, FermionField &out,int dag) ;
void DhopEO(const FermionField &in, FermionField &out,int dag) ;
///////////////////////////////////////////////////////////////
// Multigrid assistance; force term uses too
///////////////////////////////////////////////////////////////
void Mdir (const FermionField &in, FermionField &out,int dir,int disp) ;
void DhopDir(const FermionField &in, FermionField &out,int dir,int disp);
void DhopDirDisp(const FermionField &in, FermionField &out,int dirdisp,int gamma,int dag) ;
///////////////////////////////////////////////////////////////
// Extra methods added by derived
///////////////////////////////////////////////////////////////
void DerivInternal(StencilImpl & st,
DoubledGaugeField & U,
GaugeField &mat,
const FermionField &A,
const FermionField &B,
int dag);
void DhopInternal(StencilImpl & st,LebesgueOrder & lo,DoubledGaugeField & U,
const FermionField &in, FermionField &out,int dag) ;
// Constructor
WilsonFermion(GaugeField &_Umu,
GridCartesian &Fgrid,
GridRedBlackCartesian &Hgrid,
RealD _mass,
const ImplParams &p= ImplParams()
) ;
// DoubleStore impl dependent
void ImportGauge(const GaugeField &_Umu);
///////////////////////////////////////////////////////////////
// Data members require to support the functionality
///////////////////////////////////////////////////////////////
// protected:
public:
RealD mass;
GridBase * _grid;
GridBase * _cbgrid;
//Defines the stencils for even and odd
StencilImpl Stencil;
StencilImpl StencilEven;
StencilImpl StencilOdd;
// Copy of the gauge field , with even and odd subsets
DoubledGaugeField Umu;
DoubledGaugeField UmuEven;
DoubledGaugeField UmuOdd;
LebesgueOrder Lebesgue;
LebesgueOrder LebesgueEvenOdd;
};
typedef WilsonFermion<WilsonImplF> WilsonFermionF;
typedef WilsonFermion<WilsonImplD> WilsonFermionD;
}
} }
#endif #endif

View File

@ -42,11 +42,11 @@ const std::vector<int> WilsonFermion5DStatic::displacements({1,1,1,1,-1,-1,-1,-1
// 5d lattice for DWF. // 5d lattice for DWF.
template<class Impl> template<class Impl>
WilsonFermion5D<Impl>::WilsonFermion5D(GaugeField &_Umu, WilsonFermion5D<Impl>::WilsonFermion5D(GaugeField &_Umu,
GridCartesian &FiveDimGrid, GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid, GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid, GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid, GridRedBlackCartesian &FourDimRedBlackGrid,
RealD _M5,const ImplParams &p) : RealD _M5,const ImplParams &p) :
Kernels(p), Kernels(p),
_FiveDimGrid (&FiveDimGrid), _FiveDimGrid (&FiveDimGrid),
_FiveDimRedBlackGrid(&FiveDimRedBlackGrid), _FiveDimRedBlackGrid(&FiveDimRedBlackGrid),
@ -135,10 +135,10 @@ WilsonFermion5D<Impl>::WilsonFermion5D(GaugeField &_Umu,
/* /*
template<class Impl> template<class Impl>
WilsonFermion5D<Impl>::WilsonFermion5D(int simd,GaugeField &_Umu, WilsonFermion5D<Impl>::WilsonFermion5D(int simd,GaugeField &_Umu,
GridCartesian &FiveDimGrid, GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid, GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid, GridCartesian &FourDimGrid,
RealD _M5,const ImplParams &p) : RealD _M5,const ImplParams &p) :
{ {
int nsimd = Simd::Nsimd(); int nsimd = Simd::Nsimd();
@ -175,6 +175,73 @@ WilsonFermion5D<Impl>::WilsonFermion5D(int simd,GaugeField &_Umu,
} }
*/ */
template<class Impl>
void WilsonFermion5D<Impl>::Report(void)
{
std::vector<int> latt = GridDefaultLatt();
RealD volume = Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt[mu];
RealD NP = _FourDimGrid->_Nprocessors;
if ( DhopCalls > 0 ) {
std::cout << GridLogMessage << "#### Dhop calls report " << std::endl;
std::cout << GridLogMessage << "WilsonFermion5D Number of Dhop Calls : " << DhopCalls << std::endl;
std::cout << GridLogMessage << "WilsonFermion5D Total Communication time : " << DhopCommTime
<< " us" << std::endl;
std::cout << GridLogMessage << "WilsonFermion5D CommTime/Calls : "
<< DhopCommTime / DhopCalls << " us" << std::endl;
std::cout << GridLogMessage << "WilsonFermion5D Total Compute time : "
<< DhopComputeTime << " us" << std::endl;
std::cout << GridLogMessage << "WilsonFermion5D ComputeTime/Calls : "
<< DhopComputeTime / DhopCalls << " us" << std::endl;
RealD mflops = 1344*volume*DhopCalls/DhopComputeTime;
std::cout << GridLogMessage << "Average mflops/s per call : " << mflops << std::endl;
std::cout << GridLogMessage << "Average mflops/s per call per node : " << mflops/NP << std::endl;
}
if ( DerivCalls > 0 ) {
std::cout << GridLogMessage << "#### Deriv calls report "<< std::endl;
std::cout << GridLogMessage << "WilsonFermion5D Number of Deriv Calls : " <<DerivCalls <<std::endl;
std::cout << GridLogMessage << "WilsonFermion5D Total Communication time : " <<DerivCommTime <<" us"<<std::endl;
std::cout << GridLogMessage << "WilsonFermion5D CommTime/Calls : " <<DerivCommTime/DerivCalls<<" us" <<std::endl;
std::cout << GridLogMessage << "WilsonFermion5D Total Compute time : " <<DerivComputeTime <<" us"<<std::endl;
std::cout << GridLogMessage << "WilsonFermion5D ComputeTime/Calls : " <<DerivComputeTime/DerivCalls<<" us" <<std::endl;
std::cout << GridLogMessage << "WilsonFermion5D Total Dhop Compute time : " <<DerivDhopComputeTime <<" us"<<std::endl;
std::cout << GridLogMessage << "WilsonFermion5D Dhop ComputeTime/Calls : " <<DerivDhopComputeTime/DerivCalls<<" us" <<std::endl;
RealD mflops = 144*volume*DerivCalls/DerivDhopComputeTime;
std::cout << GridLogMessage << "Average mflops/s per call : " << mflops << std::endl;
std::cout << GridLogMessage << "Average mflops/s per call per node : " << mflops/NP << std::endl;
}
if (DerivCalls > 0 || DhopCalls > 0){
std::cout << GridLogMessage << "WilsonFermion5D Stencil"<<std::endl; Stencil.Report();
std::cout << GridLogMessage << "WilsonFermion5D StencilEven"<<std::endl; StencilEven.Report();
std::cout << GridLogMessage << "WilsonFermion5D StencilOdd"<<std::endl; StencilOdd.Report();
}
}
template<class Impl>
void WilsonFermion5D<Impl>::ZeroCounters(void) {
DhopCalls = 0;
DhopCommTime = 0;
DhopComputeTime = 0;
DerivCalls = 0;
DerivCommTime = 0;
DerivComputeTime = 0;
DerivDhopComputeTime = 0;
Stencil.ZeroCounters();
StencilEven.ZeroCounters();
StencilOdd.ZeroCounters();
}
template<class Impl> template<class Impl>
void WilsonFermion5D<Impl>::ImportGauge(const GaugeField &_Umu) void WilsonFermion5D<Impl>::ImportGauge(const GaugeField &_Umu)
{ {
@ -215,12 +282,13 @@ PARALLEL_FOR_LOOP
template<class Impl> template<class Impl>
void WilsonFermion5D<Impl>::DerivInternal(StencilImpl & st, void WilsonFermion5D<Impl>::DerivInternal(StencilImpl & st,
DoubledGaugeField & U, DoubledGaugeField & U,
GaugeField &mat, GaugeField &mat,
const FermionField &A, const FermionField &A,
const FermionField &B, const FermionField &B,
int dag) int dag)
{ {
DerivCalls++;
assert((dag==DaggerNo) ||(dag==DaggerYes)); assert((dag==DaggerNo) ||(dag==DaggerYes));
conformable(st._grid,A._grid); conformable(st._grid,A._grid);
@ -231,51 +299,53 @@ void WilsonFermion5D<Impl>::DerivInternal(StencilImpl & st,
FermionField Btilde(B._grid); FermionField Btilde(B._grid);
FermionField Atilde(B._grid); FermionField Atilde(B._grid);
DerivCommTime-=usecond();
st.HaloExchange(B,compressor); st.HaloExchange(B,compressor);
DerivCommTime+=usecond();
Atilde=A; Atilde=A;
for(int mu=0;mu<Nd;mu++){ DerivComputeTime-=usecond();
for (int mu = 0; mu < Nd; mu++) {
//////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////
// Flip gamma if dag // Flip gamma if dag
//////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////
int gamma = mu; int gamma = mu;
if ( !dag ) gamma+= Nd; if (!dag) gamma += Nd;
//////////////////////// ////////////////////////
// Call the single hop // Call the single hop
//////////////////////// ////////////////////////
PARALLEL_FOR_LOOP DerivDhopComputeTime -= usecond();
for(int sss=0;sss<U._grid->oSites();sss++){ PARALLEL_FOR_LOOP
for(int s=0;s<Ls;s++){ for (int sss = 0; sss < U._grid->oSites(); sss++) {
int sU=sss; for (int s = 0; s < Ls; s++) {
int sF = s+Ls*sU; int sU = sss;
int sF = s + Ls * sU;
assert ( sF< B._grid->oSites()); assert(sF < B._grid->oSites());
assert ( sU< U._grid->oSites()); assert(sU < U._grid->oSites());
Kernels::DiracOptDhopDir(st,U,st.comm_buf,sF,sU,B,Btilde,mu,gamma); Kernels::DiracOptDhopDir(st, U, st.comm_buf, sF, sU, B, Btilde, mu,
gamma);
////////////////////////////
// spin trace outer product
////////////////////////////
////////////////////////////
// spin trace outer product
////////////////////////////
} }
} }
DerivDhopComputeTime += usecond();
Impl::InsertForce5D(mat,Btilde,Atilde,mu); Impl::InsertForce5D(mat, Btilde, Atilde, mu);
} }
DerivComputeTime += usecond();
} }
template<class Impl> template<class Impl>
void WilsonFermion5D<Impl>::DhopDeriv( GaugeField &mat, void WilsonFermion5D<Impl>::DhopDeriv( GaugeField &mat,
const FermionField &A, const FermionField &A,
const FermionField &B, const FermionField &B,
int dag) int dag)
{ {
conformable(A._grid,FermionGrid()); conformable(A._grid,FermionGrid());
conformable(A._grid,B._grid); conformable(A._grid,B._grid);
@ -288,9 +358,9 @@ void WilsonFermion5D<Impl>::DhopDeriv( GaugeField &mat,
template<class Impl> template<class Impl>
void WilsonFermion5D<Impl>::DhopDerivEO(GaugeField &mat, void WilsonFermion5D<Impl>::DhopDerivEO(GaugeField &mat,
const FermionField &A, const FermionField &A,
const FermionField &B, const FermionField &B,
int dag) int dag)
{ {
conformable(A._grid,FermionRedBlackGrid()); conformable(A._grid,FermionRedBlackGrid());
conformable(GaugeRedBlackGrid(),mat._grid); conformable(GaugeRedBlackGrid(),mat._grid);
@ -306,9 +376,9 @@ void WilsonFermion5D<Impl>::DhopDerivEO(GaugeField &mat,
template<class Impl> template<class Impl>
void WilsonFermion5D<Impl>::DhopDerivOE(GaugeField &mat, void WilsonFermion5D<Impl>::DhopDerivOE(GaugeField &mat,
const FermionField &A, const FermionField &A,
const FermionField &B, const FermionField &B,
int dag) int dag)
{ {
conformable(A._grid,FermionRedBlackGrid()); conformable(A._grid,FermionRedBlackGrid());
conformable(GaugeRedBlackGrid(),mat._grid); conformable(GaugeRedBlackGrid(),mat._grid);
@ -323,32 +393,61 @@ void WilsonFermion5D<Impl>::DhopDerivOE(GaugeField &mat,
template<class Impl> template<class Impl>
void WilsonFermion5D<Impl>::DhopInternal(StencilImpl & st, LebesgueOrder &lo, void WilsonFermion5D<Impl>::DhopInternal(StencilImpl & st, LebesgueOrder &lo,
DoubledGaugeField & U, DoubledGaugeField & U,
const FermionField &in, FermionField &out,int dag) const FermionField &in, FermionField &out,int dag)
{ {
DhopCalls++;
// assert((dag==DaggerNo) ||(dag==DaggerYes)); // assert((dag==DaggerNo) ||(dag==DaggerYes));
Compressor compressor(dag); Compressor compressor(dag);
int LLs = in._grid->_rdimensions[0]; int LLs = in._grid->_rdimensions[0];
DhopCommTime-=usecond();
st.HaloExchange(in,compressor); st.HaloExchange(in,compressor);
DhopCommTime+=usecond();
DhopComputeTime-=usecond();
// Dhop takes the 4d grid from U, and makes a 5d index for fermion // Dhop takes the 4d grid from U, and makes a 5d index for fermion
if ( dag == DaggerYes ) { if (dag == DaggerYes) {
PARALLEL_FOR_LOOP PARALLEL_FOR_LOOP
for(int ss=0;ss<U._grid->oSites();ss++){ for (int ss = 0; ss < U._grid->oSites(); ss++) {
int sU=ss; int sU = ss;
int sF=LLs*sU; int sF = LLs * sU;
Kernels::DiracOptDhopSiteDag(st,lo,U,st.comm_buf,sF,sU,LLs,1,in,out); Kernels::DiracOptDhopSiteDag(st, lo, U, st.comm_buf, sF, sU, LLs, 1, in,
out);
} }
#ifdef AVX512
} else if (stat.is_init() ) {
int nthreads;
stat.start();
#pragma omp parallel
{
#pragma omp master
nthreads = omp_get_num_threads();
int mythread = omp_get_thread_num();
stat.enter(mythread);
#pragma omp for nowait
for(int ss=0;ss<U._grid->oSites();ss++)
{
int sU=ss;
int sF=LLs*sU;
Kernels::DiracOptDhopSite(st,lo,U,st.comm_buf,sF,sU,LLs,1,in,out);
}
stat.exit(mythread);
}
stat.accum(nthreads);
#endif
} else { } else {
PARALLEL_FOR_LOOP PARALLEL_FOR_LOOP
for(int ss=0;ss<U._grid->oSites();ss++){ for (int ss = 0; ss < U._grid->oSites(); ss++) {
int sU=ss; int sU = ss;
int sF=LLs*sU; int sF = LLs * sU;
Kernels::DiracOptDhopSite(st,lo,U,st.comm_buf,sF,sU,LLs,1,in,out); Kernels::DiracOptDhopSite(st, lo, U, st.comm_buf, sF, sU, LLs, 1, in,
out);
} }
} }
DhopComputeTime+=usecond();
} }

View File

@ -31,6 +31,8 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
#ifndef GRID_QCD_WILSON_FERMION_5D_H #ifndef GRID_QCD_WILSON_FERMION_5D_H
#define GRID_QCD_WILSON_FERMION_5D_H #define GRID_QCD_WILSON_FERMION_5D_H
#include <Grid/Stat.h>
namespace Grid { namespace Grid {
namespace QCD { namespace QCD {
@ -60,6 +62,18 @@ namespace Grid {
public: public:
INHERIT_IMPL_TYPES(Impl); INHERIT_IMPL_TYPES(Impl);
typedef WilsonKernels<Impl> Kernels; typedef WilsonKernels<Impl> Kernels;
PmuStat stat;
void Report(void);
void ZeroCounters(void);
double DhopCalls;
double DhopCommTime;
double DhopComputeTime;
double DerivCalls;
double DerivCommTime;
double DerivComputeTime;
double DerivDhopComputeTime;
/////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////
// Implement the abstract base // Implement the abstract base

View File

@ -1,103 +1,54 @@
/************************************************************************************* /*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/WilsonKernels.cc Source file: ./lib/qcd/action/fermion/WilsonKernels.cc
Copyright (C) 2015 Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk> Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local> Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
Author: paboyle <paboyle@ph.ed.ac.uk> Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or the Free Software Foundation; either version 2 of the License, or
(at your option) any later version. (at your option) any later version.
This program is distributed in the hope that it will be useful, This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details. GNU General Public License for more details.
You should have received a copy of the GNU General Public License along You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc., with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory See the full license in the file "LICENSE" in the top level distribution
*************************************************************************************/ directory
/* END LEGAL */ *************************************************************************************/
/* END LEGAL */
#include <Grid.h> #include <Grid.h>
namespace Grid { namespace Grid {
namespace QCD { namespace QCD {
int WilsonKernelsStatic::HandOpt; int WilsonKernelsStatic::HandOpt;
int WilsonKernelsStatic::AsmOpt; int WilsonKernelsStatic::AsmOpt;
template<class Impl> template <class Impl>
WilsonKernels<Impl>::WilsonKernels(const ImplParams &p): Base(p) {}; WilsonKernels<Impl>::WilsonKernels(const ImplParams &p) : Base(p){};
template<class Impl> ////////////////////////////////////////////
void WilsonKernels<Impl>::DiracOptDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U, // Generic implementation; move to different file?
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf, ////////////////////////////////////////////
int sF,int sU,int Ls, int Ns, const FermionField &in, FermionField &out)
{
#ifdef AVX512
if ( AsmOpt ) {
WilsonKernels<Impl>::DiracOptAsmDhopSite(st,lo,U,buf,sF,sU,Ls,Ns,in,out); template <class Impl>
void WilsonKernels<Impl>::DiracOptGenericDhopSiteDag(
} else { StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
#else std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf, int sF,
{ int sU, const FermionField &in, FermionField &out) {
#endif SiteHalfSpinor tmp;
for(int site=0;site<Ns;site++) { SiteHalfSpinor chi;
for(int s=0;s<Ls;s++) {
if (HandOpt) WilsonKernels<Impl>::DiracOptHandDhopSite(st,lo,U,buf,sF,sU,in,out);
else WilsonKernels<Impl>::DiracOptGenericDhopSite(st,lo,U,buf,sF,sU,in,out);
sF++;
}
sU++;
}
}
}
template<class Impl>
void WilsonKernels<Impl>::DiracOptDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int sF,int sU,int Ls, int Ns, const FermionField &in, FermionField &out)
{
#ifdef AVX512
if ( AsmOpt ) {
WilsonKernels<Impl>::DiracOptAsmDhopSiteDag(st,lo,U,buf,sF,sU,Ls,Ns,in,out);
} else {
#else
{
#endif
for(int site=0;site<Ns;site++) {
for(int s=0;s<Ls;s++) {
if (HandOpt) WilsonKernels<Impl>::DiracOptHandDhopSiteDag(st,lo,U,buf,sF,sU,in,out);
else WilsonKernels<Impl>::DiracOptGenericDhopSiteDag(st,lo,U,buf,sF,sU,in,out);
sF++;
}
sU++;
}
}
}
////////////////////////////////////////////
// Generic implementation; move to different file?
////////////////////////////////////////////
template<class Impl>
void WilsonKernels<Impl>::DiracOptGenericDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int sF,int sU,const FermionField &in, FermionField &out)
{
SiteHalfSpinor tmp;
SiteHalfSpinor chi;
SiteHalfSpinor *chi_p; SiteHalfSpinor *chi_p;
SiteHalfSpinor Uchi; SiteHalfSpinor Uchi;
SiteSpinor result; SiteSpinor result;
@ -107,176 +58,175 @@ void WilsonKernels<Impl>::DiracOptGenericDhopSiteDag(StencilImpl &st,LebesgueOrd
/////////////////////////// ///////////////////////////
// Xp // Xp
/////////////////////////// ///////////////////////////
SE=st.GetEntry(ptype,Xp,sF); SE = st.GetEntry(ptype, Xp, sF);
if (SE->_is_local ) { if (SE->_is_local) {
chi_p = &chi; chi_p = &chi;
if ( SE->_permute ) { if (SE->_permute) {
spProjXp(tmp,in._odata[SE->_offset]); spProjXp(tmp, in._odata[SE->_offset]);
permute(chi,tmp,ptype); permute(chi, tmp, ptype);
} else { } else {
spProjXp(chi,in._odata[SE->_offset]); spProjXp(chi, in._odata[SE->_offset]);
} }
} else { } else {
chi_p=&buf[SE->_offset]; chi_p = &buf[SE->_offset];
} }
Impl::multLink(Uchi,U._odata[sU],*chi_p,Xp,SE,st); Impl::multLink(Uchi, U._odata[sU], *chi_p, Xp, SE, st);
spReconXp(result,Uchi); spReconXp(result, Uchi);
/////////////////////////// ///////////////////////////
// Yp // Yp
/////////////////////////// ///////////////////////////
SE=st.GetEntry(ptype,Yp,sF); SE = st.GetEntry(ptype, Yp, sF);
if ( SE->_is_local ) { if (SE->_is_local) {
chi_p = &chi; chi_p = &chi;
if ( SE->_permute ) { if (SE->_permute) {
spProjYp(tmp,in._odata[SE->_offset]); spProjYp(tmp, in._odata[SE->_offset]);
permute(chi,tmp,ptype); permute(chi, tmp, ptype);
} else { } else {
spProjYp(chi,in._odata[SE->_offset]); spProjYp(chi, in._odata[SE->_offset]);
} }
} else { } else {
chi_p=&buf[SE->_offset]; chi_p = &buf[SE->_offset];
} }
Impl::multLink(Uchi,U._odata[sU],*chi_p,Yp,SE,st); Impl::multLink(Uchi, U._odata[sU], *chi_p, Yp, SE, st);
accumReconYp(result,Uchi); accumReconYp(result, Uchi);
/////////////////////////// ///////////////////////////
// Zp // Zp
/////////////////////////// ///////////////////////////
SE=st.GetEntry(ptype,Zp,sF); SE = st.GetEntry(ptype, Zp, sF);
if ( SE->_is_local ) { if (SE->_is_local) {
chi_p = &chi; chi_p = &chi;
if ( SE->_permute ) { if (SE->_permute) {
spProjZp(tmp,in._odata[SE->_offset]); spProjZp(tmp, in._odata[SE->_offset]);
permute(chi,tmp,ptype); permute(chi, tmp, ptype);
} else { } else {
spProjZp(chi,in._odata[SE->_offset]); spProjZp(chi, in._odata[SE->_offset]);
} }
} else { } else {
chi_p=&buf[SE->_offset]; chi_p = &buf[SE->_offset];
} }
Impl::multLink(Uchi,U._odata[sU],*chi_p,Zp,SE,st); Impl::multLink(Uchi, U._odata[sU], *chi_p, Zp, SE, st);
accumReconZp(result,Uchi); accumReconZp(result, Uchi);
/////////////////////////// ///////////////////////////
// Tp // Tp
/////////////////////////// ///////////////////////////
SE=st.GetEntry(ptype,Tp,sF); SE = st.GetEntry(ptype, Tp, sF);
if ( SE->_is_local ) { if (SE->_is_local) {
chi_p = &chi; chi_p = &chi;
if ( SE->_permute ) { if (SE->_permute) {
spProjTp(tmp,in._odata[SE->_offset]); spProjTp(tmp, in._odata[SE->_offset]);
permute(chi,tmp,ptype); permute(chi, tmp, ptype);
} else { } else {
spProjTp(chi,in._odata[SE->_offset]); spProjTp(chi, in._odata[SE->_offset]);
} }
} else { } else {
chi_p=&buf[SE->_offset]; chi_p = &buf[SE->_offset];
} }
Impl::multLink(Uchi,U._odata[sU],*chi_p,Tp,SE,st); Impl::multLink(Uchi, U._odata[sU], *chi_p, Tp, SE, st);
accumReconTp(result,Uchi); accumReconTp(result, Uchi);
/////////////////////////// ///////////////////////////
// Xm // Xm
/////////////////////////// ///////////////////////////
SE=st.GetEntry(ptype,Xm,sF); SE = st.GetEntry(ptype, Xm, sF);
if ( SE->_is_local ) { if (SE->_is_local) {
chi_p = &chi; chi_p = &chi;
if ( SE->_permute ) { if (SE->_permute) {
spProjXm(tmp,in._odata[SE->_offset]); spProjXm(tmp, in._odata[SE->_offset]);
permute(chi,tmp,ptype); permute(chi, tmp, ptype);
} else { } else {
spProjXm(chi,in._odata[SE->_offset]); spProjXm(chi, in._odata[SE->_offset]);
} }
} else { } else {
chi_p=&buf[SE->_offset]; chi_p = &buf[SE->_offset];
} }
Impl::multLink(Uchi,U._odata[sU],*chi_p,Xm,SE,st); Impl::multLink(Uchi, U._odata[sU], *chi_p, Xm, SE, st);
accumReconXm(result,Uchi); accumReconXm(result, Uchi);
/////////////////////////// ///////////////////////////
// Ym // Ym
/////////////////////////// ///////////////////////////
SE=st.GetEntry(ptype,Ym,sF); SE = st.GetEntry(ptype, Ym, sF);
if ( SE->_is_local ) { if (SE->_is_local) {
chi_p = &chi; chi_p = &chi;
if ( SE->_permute ) { if (SE->_permute) {
spProjYm(tmp,in._odata[SE->_offset]); spProjYm(tmp, in._odata[SE->_offset]);
permute(chi,tmp,ptype); permute(chi, tmp, ptype);
} else { } else {
spProjYm(chi,in._odata[SE->_offset]); spProjYm(chi, in._odata[SE->_offset]);
} }
} else { } else {
chi_p=&buf[SE->_offset]; chi_p = &buf[SE->_offset];
} }
Impl::multLink(Uchi,U._odata[sU],*chi_p,Ym,SE,st); Impl::multLink(Uchi, U._odata[sU], *chi_p, Ym, SE, st);
accumReconYm(result,Uchi); accumReconYm(result, Uchi);
/////////////////////////// ///////////////////////////
// Zm // Zm
/////////////////////////// ///////////////////////////
SE=st.GetEntry(ptype,Zm,sF); SE = st.GetEntry(ptype, Zm, sF);
if ( SE->_is_local ) { if (SE->_is_local) {
chi_p = &chi; chi_p = &chi;
if ( SE->_permute ) { if (SE->_permute) {
spProjZm(tmp,in._odata[SE->_offset]); spProjZm(tmp, in._odata[SE->_offset]);
permute(chi,tmp,ptype); permute(chi, tmp, ptype);
} else { } else {
spProjZm(chi,in._odata[SE->_offset]); spProjZm(chi, in._odata[SE->_offset]);
} }
} else { } else {
chi_p=&buf[SE->_offset]; chi_p = &buf[SE->_offset];
} }
Impl::multLink(Uchi,U._odata[sU],*chi_p,Zm,SE,st); Impl::multLink(Uchi, U._odata[sU], *chi_p, Zm, SE, st);
accumReconZm(result,Uchi); accumReconZm(result, Uchi);
/////////////////////////// ///////////////////////////
// Tm // Tm
/////////////////////////// ///////////////////////////
SE=st.GetEntry(ptype,Tm,sF); SE = st.GetEntry(ptype, Tm, sF);
if ( SE->_is_local ) { if (SE->_is_local) {
chi_p = &chi; chi_p = &chi;
if ( SE->_permute ) { if (SE->_permute) {
spProjTm(tmp,in._odata[SE->_offset]); spProjTm(tmp, in._odata[SE->_offset]);
permute(chi,tmp,ptype); permute(chi, tmp, ptype);
} else { } else {
spProjTm(chi,in._odata[SE->_offset]); spProjTm(chi, in._odata[SE->_offset]);
} }
} else { } else {
chi_p=&buf[SE->_offset]; chi_p = &buf[SE->_offset];
} }
Impl::multLink(Uchi,U._odata[sU],*chi_p,Tm,SE,st); Impl::multLink(Uchi, U._odata[sU], *chi_p, Tm, SE, st);
accumReconTm(result,Uchi); accumReconTm(result, Uchi);
vstream(out._odata[sF],result); vstream(out._odata[sF], result);
}; };
// Need controls to do interior, exterior, or both
// Need controls to do interior, exterior, or both template <class Impl>
template<class Impl> void WilsonKernels<Impl>::DiracOptGenericDhopSite(
void WilsonKernels<Impl>::DiracOptGenericDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U, StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf, std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf, int sF,
int sF,int sU,const FermionField &in, FermionField &out) int sU, const FermionField &in, FermionField &out) {
{ SiteHalfSpinor tmp;
SiteHalfSpinor tmp; SiteHalfSpinor chi;
SiteHalfSpinor chi; SiteHalfSpinor *chi_p;
SiteHalfSpinor *chi_p;
SiteHalfSpinor Uchi; SiteHalfSpinor Uchi;
SiteSpinor result; SiteSpinor result;
StencilEntry *SE; StencilEntry *SE;
@ -285,296 +235,298 @@ void WilsonKernels<Impl>::DiracOptGenericDhopSite(StencilImpl &st,LebesgueOrder
/////////////////////////// ///////////////////////////
// Xp // Xp
/////////////////////////// ///////////////////////////
SE=st.GetEntry(ptype,Xm,sF); SE = st.GetEntry(ptype, Xm, sF);
if ( SE->_is_local ) { if (SE->_is_local) {
chi_p = &chi; chi_p = &chi;
if ( SE->_permute ) { if (SE->_permute) {
spProjXp(tmp,in._odata[SE->_offset]); spProjXp(tmp, in._odata[SE->_offset]);
permute(chi,tmp,ptype); permute(chi, tmp, ptype);
} else { } else {
spProjXp(chi,in._odata[SE->_offset]); spProjXp(chi, in._odata[SE->_offset]);
} }
} else { } else {
chi_p=&buf[SE->_offset]; chi_p = &buf[SE->_offset];
} }
Impl::multLink(Uchi,U._odata[sU],*chi_p,Xm,SE,st); Impl::multLink(Uchi, U._odata[sU], *chi_p, Xm, SE, st);
spReconXp(result,Uchi); spReconXp(result, Uchi);
/////////////////////////// ///////////////////////////
// Yp // Yp
/////////////////////////// ///////////////////////////
SE=st.GetEntry(ptype,Ym,sF); SE = st.GetEntry(ptype, Ym, sF);
if ( SE->_is_local ) { if (SE->_is_local) {
chi_p = &chi; chi_p = &chi;
if ( SE->_permute ) { if (SE->_permute) {
spProjYp(tmp,in._odata[SE->_offset]); spProjYp(tmp, in._odata[SE->_offset]);
permute(chi,tmp,ptype); permute(chi, tmp, ptype);
} else { } else {
spProjYp(chi,in._odata[SE->_offset]); spProjYp(chi, in._odata[SE->_offset]);
} }
} else { } else {
chi_p=&buf[SE->_offset]; chi_p = &buf[SE->_offset];
} }
Impl::multLink(Uchi,U._odata[sU],*chi_p,Ym,SE,st); Impl::multLink(Uchi, U._odata[sU], *chi_p, Ym, SE, st);
accumReconYp(result,Uchi); accumReconYp(result, Uchi);
/////////////////////////// ///////////////////////////
// Zp // Zp
/////////////////////////// ///////////////////////////
SE=st.GetEntry(ptype,Zm,sF); SE = st.GetEntry(ptype, Zm, sF);
if ( SE->_is_local ) { if (SE->_is_local) {
chi_p = &chi; chi_p = &chi;
if ( SE->_permute ) { if (SE->_permute) {
spProjZp(tmp,in._odata[SE->_offset]); spProjZp(tmp, in._odata[SE->_offset]);
permute(chi,tmp,ptype); permute(chi, tmp, ptype);
} else { } else {
spProjZp(chi,in._odata[SE->_offset]); spProjZp(chi, in._odata[SE->_offset]);
} }
} else { } else {
chi_p=&buf[SE->_offset]; chi_p = &buf[SE->_offset];
} }
Impl::multLink(Uchi,U._odata[sU],*chi_p,Zm,SE,st); Impl::multLink(Uchi, U._odata[sU], *chi_p, Zm, SE, st);
accumReconZp(result,Uchi); accumReconZp(result, Uchi);
/////////////////////////// ///////////////////////////
// Tp // Tp
/////////////////////////// ///////////////////////////
SE=st.GetEntry(ptype,Tm,sF); SE = st.GetEntry(ptype, Tm, sF);
if ( SE->_is_local ) { if (SE->_is_local) {
chi_p = &chi; chi_p = &chi;
if ( SE->_permute ) { if (SE->_permute) {
spProjTp(tmp,in._odata[SE->_offset]); spProjTp(tmp, in._odata[SE->_offset]);
permute(chi,tmp,ptype); permute(chi, tmp, ptype);
} else { } else {
spProjTp(chi,in._odata[SE->_offset]); spProjTp(chi, in._odata[SE->_offset]);
} }
} else { } else {
chi_p=&buf[SE->_offset]; chi_p = &buf[SE->_offset];
} }
Impl::multLink(Uchi,U._odata[sU],*chi_p,Tm,SE,st); Impl::multLink(Uchi, U._odata[sU], *chi_p, Tm, SE, st);
accumReconTp(result,Uchi); accumReconTp(result, Uchi);
/////////////////////////// ///////////////////////////
// Xm // Xm
/////////////////////////// ///////////////////////////
SE=st.GetEntry(ptype,Xp,sF); SE = st.GetEntry(ptype, Xp, sF);
if ( SE->_is_local ) { if (SE->_is_local) {
chi_p = &chi; chi_p = &chi;
if ( SE->_permute ) { if (SE->_permute) {
spProjXm(tmp,in._odata[SE->_offset]); spProjXm(tmp, in._odata[SE->_offset]);
permute(chi,tmp,ptype); permute(chi, tmp, ptype);
} else { } else {
spProjXm(chi,in._odata[SE->_offset]); spProjXm(chi, in._odata[SE->_offset]);
} }
} else { } else {
chi_p=&buf[SE->_offset]; chi_p = &buf[SE->_offset];
} }
Impl::multLink(Uchi,U._odata[sU],*chi_p,Xp,SE,st); Impl::multLink(Uchi, U._odata[sU], *chi_p, Xp, SE, st);
accumReconXm(result,Uchi); accumReconXm(result, Uchi);
/////////////////////////// ///////////////////////////
// Ym // Ym
/////////////////////////// ///////////////////////////
SE=st.GetEntry(ptype,Yp,sF); SE = st.GetEntry(ptype, Yp, sF);
if ( SE->_is_local ) { if (SE->_is_local) {
chi_p = &chi; chi_p = &chi;
if ( SE->_permute ) { if (SE->_permute) {
spProjYm(tmp,in._odata[SE->_offset]); spProjYm(tmp, in._odata[SE->_offset]);
permute(chi,tmp,ptype); permute(chi, tmp, ptype);
} else { } else {
spProjYm(chi,in._odata[SE->_offset]); spProjYm(chi, in._odata[SE->_offset]);
} }
} else { } else {
chi_p=&buf[SE->_offset]; chi_p = &buf[SE->_offset];
} }
Impl::multLink(Uchi,U._odata[sU],*chi_p,Yp,SE,st); Impl::multLink(Uchi, U._odata[sU], *chi_p, Yp, SE, st);
accumReconYm(result,Uchi); accumReconYm(result, Uchi);
/////////////////////////// ///////////////////////////
// Zm // Zm
/////////////////////////// ///////////////////////////
SE=st.GetEntry(ptype,Zp,sF); SE = st.GetEntry(ptype, Zp, sF);
if ( SE->_is_local ) { if (SE->_is_local) {
chi_p = &chi; chi_p = &chi;
if ( SE->_permute ) { if (SE->_permute) {
spProjZm(tmp,in._odata[SE->_offset]); spProjZm(tmp, in._odata[SE->_offset]);
permute(chi,tmp,ptype); permute(chi, tmp, ptype);
} else { } else {
spProjZm(chi,in._odata[SE->_offset]); spProjZm(chi, in._odata[SE->_offset]);
} }
} else { } else {
chi_p=&buf[SE->_offset]; chi_p = &buf[SE->_offset];
} }
Impl::multLink(Uchi,U._odata[sU],*chi_p,Zp,SE,st); Impl::multLink(Uchi, U._odata[sU], *chi_p, Zp, SE, st);
accumReconZm(result,Uchi); accumReconZm(result, Uchi);
/////////////////////////// ///////////////////////////
// Tm // Tm
/////////////////////////// ///////////////////////////
SE=st.GetEntry(ptype,Tp,sF); SE = st.GetEntry(ptype, Tp, sF);
if ( SE->_is_local ) { if (SE->_is_local) {
chi_p = &chi; chi_p = &chi;
if ( SE->_permute ) { if (SE->_permute) {
spProjTm(tmp,in._odata[SE->_offset]); spProjTm(tmp, in._odata[SE->_offset]);
permute(chi,tmp,ptype); permute(chi, tmp, ptype);
} else { } else {
spProjTm(chi,in._odata[SE->_offset]); spProjTm(chi, in._odata[SE->_offset]);
} }
} else { } else {
chi_p=&buf[SE->_offset]; chi_p = &buf[SE->_offset];
} }
Impl::multLink(Uchi,U._odata[sU],*chi_p,Tp,SE,st); Impl::multLink(Uchi, U._odata[sU], *chi_p, Tp, SE, st);
accumReconTm(result,Uchi); accumReconTm(result, Uchi);
vstream(out._odata[sF],result); vstream(out._odata[sF], result);
}; };
template<class Impl> template <class Impl>
void WilsonKernels<Impl>::DiracOptDhopDir(StencilImpl &st,DoubledGaugeField &U, void WilsonKernels<Impl>::DiracOptDhopDir(
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf, StencilImpl &st, DoubledGaugeField &U,
int sF,int sU,const FermionField &in, FermionField &out,int dir,int gamma) std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf, int sF,
{ int sU, const FermionField &in, FermionField &out, int dir, int gamma) {
SiteHalfSpinor tmp; SiteHalfSpinor tmp;
SiteHalfSpinor chi; SiteHalfSpinor chi;
SiteSpinor result; SiteSpinor result;
SiteHalfSpinor Uchi; SiteHalfSpinor Uchi;
StencilEntry *SE; StencilEntry *SE;
int ptype; int ptype;
SE=st.GetEntry(ptype,dir,sF); SE = st.GetEntry(ptype, dir, sF);
// Xp // Xp
if(gamma==Xp){ if (gamma == Xp) {
if ( SE->_is_local && SE->_permute ) { if (SE->_is_local && SE->_permute) {
spProjXp(tmp,in._odata[SE->_offset]); spProjXp(tmp, in._odata[SE->_offset]);
permute(chi,tmp,ptype); permute(chi, tmp, ptype);
} else if ( SE->_is_local ) { } else if (SE->_is_local) {
spProjXp(chi,in._odata[SE->_offset]); spProjXp(chi, in._odata[SE->_offset]);
} else { } else {
chi=buf[SE->_offset]; chi = buf[SE->_offset];
} }
Impl::multLink(Uchi,U._odata[sU],chi,dir,SE,st); Impl::multLink(Uchi, U._odata[sU], chi, dir, SE, st);
spReconXp(result,Uchi); spReconXp(result, Uchi);
} }
// Yp // Yp
if ( gamma==Yp ){ if (gamma == Yp) {
if ( SE->_is_local && SE->_permute ) { if (SE->_is_local && SE->_permute) {
spProjYp(tmp,in._odata[SE->_offset]); spProjYp(tmp, in._odata[SE->_offset]);
permute(chi,tmp,ptype); permute(chi, tmp, ptype);
} else if ( SE->_is_local ) { } else if (SE->_is_local) {
spProjYp(chi,in._odata[SE->_offset]); spProjYp(chi, in._odata[SE->_offset]);
} else { } else {
chi=buf[SE->_offset]; chi = buf[SE->_offset];
} }
Impl::multLink(Uchi,U._odata[sU],chi,dir,SE,st); Impl::multLink(Uchi, U._odata[sU], chi, dir, SE, st);
spReconYp(result,Uchi); spReconYp(result, Uchi);
} }
// Zp // Zp
if ( gamma ==Zp ){ if (gamma == Zp) {
if ( SE->_is_local && SE->_permute ) { if (SE->_is_local && SE->_permute) {
spProjZp(tmp,in._odata[SE->_offset]); spProjZp(tmp, in._odata[SE->_offset]);
permute(chi,tmp,ptype); permute(chi, tmp, ptype);
} else if ( SE->_is_local ) { } else if (SE->_is_local) {
spProjZp(chi,in._odata[SE->_offset]); spProjZp(chi, in._odata[SE->_offset]);
} else { } else {
chi=buf[SE->_offset]; chi = buf[SE->_offset];
} }
Impl::multLink(Uchi,U._odata[sU],chi,dir,SE,st); Impl::multLink(Uchi, U._odata[sU], chi, dir, SE, st);
spReconZp(result,Uchi); spReconZp(result, Uchi);
} }
// Tp // Tp
if ( gamma ==Tp ){ if (gamma == Tp) {
if ( SE->_is_local && SE->_permute ) { if (SE->_is_local && SE->_permute) {
spProjTp(tmp,in._odata[SE->_offset]); spProjTp(tmp, in._odata[SE->_offset]);
permute(chi,tmp,ptype); permute(chi, tmp, ptype);
} else if ( SE->_is_local ) { } else if (SE->_is_local) {
spProjTp(chi,in._odata[SE->_offset]); spProjTp(chi, in._odata[SE->_offset]);
} else { } else {
chi=buf[SE->_offset]; chi = buf[SE->_offset];
} }
Impl::multLink(Uchi,U._odata[sU],chi,dir,SE,st); Impl::multLink(Uchi, U._odata[sU], chi, dir, SE, st);
spReconTp(result,Uchi); spReconTp(result, Uchi);
} }
// Xm // Xm
if ( gamma==Xm ){ if (gamma == Xm) {
if ( SE->_is_local && SE->_permute ) { if (SE->_is_local && SE->_permute) {
spProjXm(tmp,in._odata[SE->_offset]); spProjXm(tmp, in._odata[SE->_offset]);
permute(chi,tmp,ptype); permute(chi, tmp, ptype);
} else if ( SE->_is_local ) { } else if (SE->_is_local) {
spProjXm(chi,in._odata[SE->_offset]); spProjXm(chi, in._odata[SE->_offset]);
} else { } else {
chi=buf[SE->_offset]; chi = buf[SE->_offset];
} }
Impl::multLink(Uchi,U._odata[sU],chi,dir,SE,st); Impl::multLink(Uchi, U._odata[sU], chi, dir, SE, st);
spReconXm(result,Uchi); spReconXm(result, Uchi);
} }
// Ym // Ym
if ( gamma == Ym ){ if (gamma == Ym) {
if ( SE->_is_local && SE->_permute ) { if (SE->_is_local && SE->_permute) {
spProjYm(tmp,in._odata[SE->_offset]); spProjYm(tmp, in._odata[SE->_offset]);
permute(chi,tmp,ptype); permute(chi, tmp, ptype);
} else if ( SE->_is_local ) { } else if (SE->_is_local) {
spProjYm(chi,in._odata[SE->_offset]); spProjYm(chi, in._odata[SE->_offset]);
} else { } else {
chi=buf[SE->_offset]; chi = buf[SE->_offset];
} }
Impl::multLink(Uchi,U._odata[sU],chi,dir,SE,st); Impl::multLink(Uchi, U._odata[sU], chi, dir, SE, st);
spReconYm(result,Uchi); spReconYm(result, Uchi);
} }
// Zm // Zm
if ( gamma == Zm ){ if (gamma == Zm) {
if ( SE->_is_local && SE->_permute ) { if (SE->_is_local && SE->_permute) {
spProjZm(tmp,in._odata[SE->_offset]); spProjZm(tmp, in._odata[SE->_offset]);
permute(chi,tmp,ptype); permute(chi, tmp, ptype);
} else if ( SE->_is_local ) { } else if (SE->_is_local) {
spProjZm(chi,in._odata[SE->_offset]); spProjZm(chi, in._odata[SE->_offset]);
} else { } else {
chi=buf[SE->_offset]; chi = buf[SE->_offset];
} }
Impl::multLink(Uchi,U._odata[sU],chi,dir,SE,st); Impl::multLink(Uchi, U._odata[sU], chi, dir, SE, st);
spReconZm(result,Uchi); spReconZm(result, Uchi);
}
// Tm
if ( gamma==Tm ) {
if ( SE->_is_local && SE->_permute ) {
spProjTm(tmp,in._odata[SE->_offset]);
permute(chi,tmp,ptype);
} else if ( SE->_is_local ) {
spProjTm(chi,in._odata[SE->_offset]);
} else {
chi=buf[SE->_offset];
}
Impl::multLink(Uchi,U._odata[sU],chi,dir,SE,st);
spReconTm(result,Uchi);
} }
vstream(out._odata[sF],result); // Tm
if (gamma == Tm) {
if (SE->_is_local && SE->_permute) {
spProjTm(tmp, in._odata[SE->_offset]);
permute(chi, tmp, ptype);
} else if (SE->_is_local) {
spProjTm(chi, in._odata[SE->_offset]);
} else {
chi = buf[SE->_offset];
}
Impl::multLink(Uchi, U._odata[sU], chi, dir, SE, st);
spReconTm(result, Uchi);
}
vstream(out._odata[sF], result);
} }
FermOpTemplateInstantiate(WilsonKernels);
FermOpTemplateInstantiate(WilsonKernels); AdjointFermOpTemplateInstantiate(WilsonKernels);
TwoIndexFermOpTemplateInstantiate(WilsonKernels);
}} }}

View File

@ -1,34 +1,35 @@
/************************************************************************************* /*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/WilsonKernels.h Source file: ./lib/qcd/action/fermion/WilsonKernels.h
Copyright (C) 2015 Copyright (C) 2015
Author: Peter Boyle <pabobyle@ph.ed.ac.uk> Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk> Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: paboyle <paboyle@ph.ed.ac.uk> Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or the Free Software Foundation; either version 2 of the License, or
(at your option) any later version. (at your option) any later version.
This program is distributed in the hope that it will be useful, This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details. GNU General Public License for more details.
You should have received a copy of the GNU General Public License along You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc., with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory See the full license in the file "LICENSE" in the top level distribution
*************************************************************************************/ directory
/* END LEGAL */ *************************************************************************************/
#ifndef GRID_QCD_DHOP_H /* END LEGAL */
#define GRID_QCD_DHOP_H #ifndef GRID_QCD_DHOP_H
#define GRID_QCD_DHOP_H
namespace Grid { namespace Grid {
@ -48,55 +49,158 @@ namespace Grid {
template<class Impl> class WilsonKernels : public FermionOperator<Impl> , public WilsonKernelsStatic { template<class Impl> class WilsonKernels : public FermionOperator<Impl> , public WilsonKernelsStatic {
public: public:
INHERIT_IMPL_TYPES(Impl); INHERIT_IMPL_TYPES(Impl);
typedef FermionOperator<Impl> Base; typedef FermionOperator<Impl> Base;
public: public:
void DiracOptDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U, template <bool EnableBool = true>
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf, typename std::enable_if<Impl::Dimension == 3 && Nc == 3 &&EnableBool, void>::type
int sF, int sU,int Ls, int Ns, const FermionField &in, FermionField &out); DiracOptDhopSite(
StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
void DiracOptDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U, std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf, int sF, int sU, int Ls, int Ns, const FermionField &in,
int sF,int sU,int Ls, int Ns, const FermionField &in,FermionField &out); FermionField &out) {
#ifdef AVX512
if (AsmOpt) {
WilsonKernels<Impl>::DiracOptAsmDhopSite(st, lo, U, buf, sF, sU, Ls, Ns,
in, out);
} else {
#else
{
#endif
for (int site = 0; site < Ns; site++) {
for (int s = 0; s < Ls; s++) {
if (HandOpt)
WilsonKernels<Impl>::DiracOptHandDhopSite(st, lo, U, buf, sF, sU,
in, out);
else
WilsonKernels<Impl>::DiracOptGenericDhopSite(st, lo, U, buf, sF, sU,
in, out);
sF++;
}
sU++;
}
}
}
template <bool EnableBool = true>
typename std::enable_if<(Impl::Dimension != 3 || (Impl::Dimension == 3 && Nc != 3)) && EnableBool, void>::type
DiracOptDhopSite(
StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf,
int sF, int sU, int Ls, int Ns, const FermionField &in,
FermionField &out) {
for (int site = 0; site < Ns; site++) {
for (int s = 0; s < Ls; s++) {
WilsonKernels<Impl>::DiracOptGenericDhopSite(st, lo, U, buf, sF, sU, in,
out);
sF++;
}
sU++;
}
}
template <bool EnableBool = true>
typename std::enable_if<Impl::Dimension == 3 && Nc == 3 && EnableBool,
void>::type
DiracOptDhopSiteDag(
StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf,
int sF, int sU, int Ls, int Ns, const FermionField &in,
FermionField &out) {
#ifdef AVX512
if (AsmOpt) {
WilsonKernels<Impl>::DiracOptAsmDhopSiteDag(st, lo, U, buf, sF, sU, Ls,
Ns, in, out);
} else {
#else
{
#endif
for (int site = 0; site < Ns; site++) {
for (int s = 0; s < Ls; s++) {
if (HandOpt)
WilsonKernels<Impl>::DiracOptHandDhopSiteDag(st, lo, U, buf, sF, sU,
in, out);
else
WilsonKernels<Impl>::DiracOptGenericDhopSiteDag(st, lo, U, buf, sF,
sU, in, out);
sF++;
}
sU++;
}
}
}
template <bool EnableBool = true>
typename std::enable_if<
(Impl::Dimension != 3 || (Impl::Dimension == 3 && Nc != 3)) && EnableBool,
void>::type
DiracOptDhopSiteDag(
StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf,
int sF, int sU, int Ls, int Ns, const FermionField &in,
FermionField &out) {
for (int site = 0; site < Ns; site++) {
for (int s = 0; s < Ls; s++) {
WilsonKernels<Impl>::DiracOptGenericDhopSiteDag(st, lo, U, buf, sF, sU,
in, out);
sF++;
}
sU++;
}
}
void DiracOptDhopDir(
StencilImpl &st, DoubledGaugeField &U,
std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf,
int sF, int sU, const FermionField &in, FermionField &out, int dirdisp,
int gamma);
private:
// Specialised variants
void DiracOptGenericDhopSite(
StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf,
int sF, int sU, const FermionField &in, FermionField &out);
void DiracOptGenericDhopSiteDag(
StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf,
int sF, int sU, const FermionField &in, FermionField &out);
void DiracOptAsmDhopSite(
StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf,
int sF, int sU, int Ls, int Ns, const FermionField &in,
FermionField &out);
void DiracOptAsmDhopSiteDag(
StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf,
int sF, int sU, int Ls, int Ns, const FermionField &in,
FermionField &out);
void DiracOptHandDhopSite(
StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf,
int sF, int sU, const FermionField &in, FermionField &out);
void DiracOptHandDhopSiteDag(
StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf,
int sF, int sU, const FermionField &in, FermionField &out);
public:
WilsonKernels(const ImplParams &p = ImplParams());
};
}
}
void DiracOptDhopDir(StencilImpl &st,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int sF,int sU,const FermionField &in, FermionField &out,int dirdisp,int gamma);
private:
// Specialised variants
void DiracOptGenericDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int sF,int sU, const FermionField &in, FermionField &out);
void DiracOptGenericDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int sF,int sU,const FermionField &in,FermionField &out);
void DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int sF,int sU,int Ls, int Ns, const FermionField &in, FermionField &out);
void DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int sF,int sU,int Ls, int Ns, const FermionField &in, FermionField &out);
void DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int sF,int sU,const FermionField &in, FermionField &out);
void DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int sF,int sU,const FermionField &in, FermionField &out);
public:
WilsonKernels(const ImplParams &p= ImplParams());
};
}
}
#endif #endif

View File

@ -1,4 +1,4 @@
/************************************************************************************* /*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid Grid physics library, www.github.com/paboyle/Grid
@ -26,68 +26,77 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/ *************************************************************************************/
/* END LEGAL */ /* END LEGAL */
#include <Grid.h> #include <Grid.h>
namespace Grid { namespace Grid {
namespace QCD { namespace QCD {
///////////////////////////////////////////////////////////
// Default to no assembler implementation
///////////////////////////////////////////////////////////
template<class Impl>
void WilsonKernels<Impl >::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
{
assert(0);
}
template<class Impl>
void WilsonKernels<Impl >::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
{
assert(0);
}
///////////////////////////////////////////////////////////
// Default to no assembler implementation
///////////////////////////////////////////////////////////
template<class Impl>
void WilsonKernels<Impl >::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
{
assert(0);
}
#if defined(AVX512) #if defined(AVX512)
/////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////
// If we are AVX512 specialise the single precision routine // If we are AVX512 specialise the single precision routine
/////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////
#include <simd/Intel512wilson.h> #include <simd/Intel512wilson.h>
#include <simd/Intel512single.h> #include <simd/Intel512single.h>
static Vector<vComplexF> signs; static Vector<vComplexF> signs;
int setupSigns(void ){ int setupSigns(void ){
Vector<vComplexF> bother(2); Vector<vComplexF> bother(2);
signs = bother; signs = bother;
vrsign(signs[0]); vrsign(signs[0]);
visign(signs[1]); visign(signs[1]);
return 1; return 1;
} }
static int signInit = setupSigns(); static int signInit = setupSigns();
#define label(A) ilabel(A) #define label(A) ilabel(A)
#define ilabel(A) ".globl\n" #A ":\n" #define ilabel(A) ".globl\n" #A ":\n"
#define MAYBEPERM(A,perm) if (perm) { A ; } #define MAYBEPERM(A,perm) if (perm) { A ; }
#define MULT_2SPIN(ptr,pf) MULT_ADDSUB_2SPIN(ptr,pf) #define MULT_2SPIN(ptr,pf) MULT_ADDSUB_2SPIN(ptr,pf)
#define FX(A) WILSONASM_ ##A #define FX(A) WILSONASM_ ##A
#undef KERNEL_DAG #undef KERNEL_DAG
template<> template<>
void WilsonKernels<WilsonImplF>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, void WilsonKernels<WilsonImplF>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf, std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h> #include <qcd/action/fermion/WilsonKernelsAsmBody.h>
#define KERNEL_DAG #define KERNEL_DAG
template<> template<>
void WilsonKernels<WilsonImplF>::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, void WilsonKernels<WilsonImplF>::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf, std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h> #include <qcd/action/fermion/WilsonKernelsAsmBody.h>
#undef VMOVIDUP #undef VMOVIDUP
#undef VMOVRDUP #undef VMOVRDUP
#undef MAYBEPERM #undef MAYBEPERM
@ -98,43 +107,43 @@ void WilsonKernels<WilsonImplF>::DiracOptAsmDhopSiteDag(StencilImpl &st,Lebesgue
#define VMOVIDUP(A,B,C) VBCASTIDUPf(A,B,C) #define VMOVIDUP(A,B,C) VBCASTIDUPf(A,B,C)
#define VMOVRDUP(A,B,C) VBCASTRDUPf(A,B,C) #define VMOVRDUP(A,B,C) VBCASTRDUPf(A,B,C)
#define MULT_2SPIN(ptr,pf) MULT_ADDSUB_2SPIN_LS(ptr,pf) #define MULT_2SPIN(ptr,pf) MULT_ADDSUB_2SPIN_LS(ptr,pf)
#undef KERNEL_DAG #undef KERNEL_DAG
template<> template<>
void WilsonKernels<DomainWallVec5dImplF>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, void WilsonKernels<DomainWallVec5dImplF>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf, std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h> #include <qcd/action/fermion/WilsonKernelsAsmBody.h>
#define KERNEL_DAG #define KERNEL_DAG
template<> template<>
void WilsonKernels<DomainWallVec5dImplF>::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, void WilsonKernels<DomainWallVec5dImplF>::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf, std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h> #include <qcd/action/fermion/WilsonKernelsAsmBody.h>
#endif #endif
#define INSTANTIATE_ASM(A)\
template void WilsonKernels<A>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,\
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,\
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);\
template void WilsonKernels<A>::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,\
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,\
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);\
template void WilsonKernels<WilsonImplF>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);
template void WilsonKernels<WilsonImplD>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, INSTANTIATE_ASM(WilsonImplF);
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf, INSTANTIATE_ASM(WilsonImplD);
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out); INSTANTIATE_ASM(ZWilsonImplF);
template void WilsonKernels<GparityWilsonImplF>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, INSTANTIATE_ASM(ZWilsonImplD);
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf, INSTANTIATE_ASM(GparityWilsonImplF);
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out); INSTANTIATE_ASM(GparityWilsonImplD);
template void WilsonKernels<GparityWilsonImplD>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, INSTANTIATE_ASM(DomainWallVec5dImplF);
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf, INSTANTIATE_ASM(DomainWallVec5dImplD);
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out); INSTANTIATE_ASM(ZDomainWallVec5dImplF);
template void WilsonKernels<DomainWallVec5dImplF>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, INSTANTIATE_ASM(ZDomainWallVec5dImplD);
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf, }
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out); }
template void WilsonKernels<DomainWallVec5dImplD>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);
}}

View File

@ -134,7 +134,9 @@
//////////////////////////////// ////////////////////////////////
// Xm // Xm
//////////////////////////////// ////////////////////////////////
#ifndef STREAM_STORE
basep= (uint64_t) &out._odata[ss]; basep= (uint64_t) &out._odata[ss];
#endif
// basep= st.GetPFInfo(nent,plocal); nent++; // basep= st.GetPFInfo(nent,plocal); nent++;
if ( local ) { if ( local ) {
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
@ -229,7 +231,9 @@
LOAD_CHI(base); LOAD_CHI(base);
} }
base= (uint64_t) &out._odata[ss]; base= (uint64_t) &out._odata[ss];
#ifndef STREAM_STORE
PREFETCH_CHIMU(base); PREFETCH_CHIMU(base);
#endif
{ {
MULT_2SPIN_DIR_PFTM(Tm,basep); MULT_2SPIN_DIR_PFTM(Tm,basep);
} }

View File

@ -311,8 +311,8 @@ namespace Grid {
namespace QCD { namespace QCD {
template<class Impl> template<class Impl>
void WilsonKernels<Impl >::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U, void WilsonKernels<Impl>::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf, std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int ss,int sU,const FermionField &in, FermionField &out) int ss,int sU,const FermionField &in, FermionField &out)
{ {
@ -554,8 +554,8 @@ void WilsonKernels<Impl >::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &l
} }
} }
template<class Impl> template<class Impl>
void WilsonKernels<Impl >::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U, void WilsonKernels<Impl>::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf, std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int ss,int sU,const FermionField &in, FermionField &out) int ss,int sU,const FermionField &in, FermionField &out)
{ {

View File

@ -30,7 +30,6 @@ directory
#define GRID_QCD_GAUGE_IMPL_H #define GRID_QCD_GAUGE_IMPL_H
namespace Grid { namespace Grid {
namespace QCD { namespace QCD {
//////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////
@ -52,7 +51,7 @@ public:
typedef S Simd; typedef S Simd;
template <typename vtype> template <typename vtype>
using iImplGaugeLink = iScalar<iScalar<iMatrix<vtype, Nrepresentation>>>; using iImplGaugeLink = iScalar<iScalar<iMatrix<vtype, Nrepresentation>>>;
template <typename vtype> template <typename vtype>
using iImplGaugeField = iVector<iScalar<iMatrix<vtype, Nrepresentation>>, Nd>; using iImplGaugeField = iVector<iScalar<iMatrix<vtype, Nrepresentation>>, Nd>;
@ -64,7 +63,7 @@ public:
// ugly // ugly
typedef Lattice<SiteGaugeField> GaugeField; typedef Lattice<SiteGaugeField> GaugeField;
// Move this elsewhere? // Move this elsewhere? FIXME
static inline void AddGaugeLink(GaugeField &U, GaugeLinkField &W, static inline void AddGaugeLink(GaugeField &U, GaugeLinkField &W,
int mu) { // U[mu] += W int mu) { // U[mu] += W
PARALLEL_FOR_LOOP PARALLEL_FOR_LOOP
@ -174,12 +173,19 @@ typedef GaugeImplTypes<vComplex, Nc> GimplTypesR;
typedef GaugeImplTypes<vComplexF, Nc> GimplTypesF; typedef GaugeImplTypes<vComplexF, Nc> GimplTypesF;
typedef GaugeImplTypes<vComplexD, Nc> GimplTypesD; typedef GaugeImplTypes<vComplexD, Nc> GimplTypesD;
typedef GaugeImplTypes<vComplex, SU<Nc>::AdjointDimension> GimplAdjointTypesR;
typedef GaugeImplTypes<vComplexF, SU<Nc>::AdjointDimension> GimplAdjointTypesF;
typedef GaugeImplTypes<vComplexD, SU<Nc>::AdjointDimension> GimplAdjointTypesD;
typedef PeriodicGaugeImpl<GimplTypesR> PeriodicGimplR; // Real.. whichever prec typedef PeriodicGaugeImpl<GimplTypesR> PeriodicGimplR; // Real.. whichever prec
typedef PeriodicGaugeImpl<GimplTypesF> PeriodicGimplF; // Float typedef PeriodicGaugeImpl<GimplTypesF> PeriodicGimplF; // Float
typedef PeriodicGaugeImpl<GimplTypesD> PeriodicGimplD; // Double typedef PeriodicGaugeImpl<GimplTypesD> PeriodicGimplD; // Double
typedef ConjugateGaugeImpl<GimplTypesR> typedef PeriodicGaugeImpl<GimplAdjointTypesR> PeriodicGimplAdjR; // Real.. whichever prec
ConjugateGimplR; // Real.. whichever prec typedef PeriodicGaugeImpl<GimplAdjointTypesF> PeriodicGimplAdjF; // Float
typedef PeriodicGaugeImpl<GimplAdjointTypesD> PeriodicGimplAdjD; // Double
typedef ConjugateGaugeImpl<GimplTypesR> ConjugateGimplR; // Real.. whichever prec
typedef ConjugateGaugeImpl<GimplTypesF> ConjugateGimplF; // Float typedef ConjugateGaugeImpl<GimplTypesF> ConjugateGimplF; // Float
typedef ConjugateGaugeImpl<GimplTypesD> ConjugateGimplD; // Double typedef ConjugateGaugeImpl<GimplTypesD> ConjugateGimplD; // Double
} }

View File

@ -1,149 +1,151 @@
/************************************************************************************* /*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/pseudofermion/TwoFlavour.h Source file: ./lib/qcd/action/pseudofermion/TwoFlavour.h
Copyright (C) 2015 Copyright (C) 2015
Author: Peter Boyle <pabobyle@ph.ed.ac.uk> Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk> Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or the Free Software Foundation; either version 2 of the License, or
(at your option) any later version. (at your option) any later version.
This program is distributed in the hope that it will be useful, This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details. GNU General Public License for more details.
You should have received a copy of the GNU General Public License along You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc., with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory See the full license in the file "LICENSE" in the top level distribution
*************************************************************************************/ directory
/* END LEGAL */ *************************************************************************************/
/* END LEGAL */
#ifndef QCD_PSEUDOFERMION_TWO_FLAVOUR_H #ifndef QCD_PSEUDOFERMION_TWO_FLAVOUR_H
#define QCD_PSEUDOFERMION_TWO_FLAVOUR_H #define QCD_PSEUDOFERMION_TWO_FLAVOUR_H
namespace Grid{ namespace Grid {
namespace QCD{ namespace QCD {
//////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////
// Two flavour pseudofermion action for any dop // Two flavour pseudofermion action for any dop
//////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////
template<class Impl> template <class Impl>
class TwoFlavourPseudoFermionAction : public Action<typename Impl::GaugeField> { class TwoFlavourPseudoFermionAction : public Action<typename Impl::GaugeField> {
public: public:
INHERIT_IMPL_TYPES(Impl); INHERIT_IMPL_TYPES(Impl);
private: private:
FermionOperator<Impl> &FermOp; // the basic operator
FermionOperator<Impl> & FermOp;// the basic operator
OperatorFunction<FermionField> &DerivativeSolver; OperatorFunction<FermionField> &DerivativeSolver;
OperatorFunction<FermionField> &ActionSolver; OperatorFunction<FermionField> &ActionSolver;
FermionField Phi; // the pseudo fermion field for this trajectory FermionField Phi; // the pseudo fermion field for this trajectory
public: public:
///////////////////////////////////////////////// /////////////////////////////////////////////////
// Pass in required objects. // Pass in required objects.
///////////////////////////////////////////////// /////////////////////////////////////////////////
TwoFlavourPseudoFermionAction(FermionOperator<Impl> &Op, TwoFlavourPseudoFermionAction(FermionOperator<Impl> &Op,
OperatorFunction<FermionField> & DS, OperatorFunction<FermionField> &DS,
OperatorFunction<FermionField> & AS OperatorFunction<FermionField> &AS)
) : FermOp(Op), DerivativeSolver(DS), ActionSolver(AS), Phi(Op.FermionGrid()) { : FermOp(Op),
}; DerivativeSolver(DS),
ActionSolver(AS),
////////////////////////////////////////////////////////////////////////////////////// Phi(Op.FermionGrid()){};
// Push the gauge field in to the dops. Assume any BC's and smearing already applied
//////////////////////////////////////////////////////////////////////////////////////
virtual void refresh(const GaugeField &U, GridParallelRNG& pRNG) {
// P(phi) = e^{- phi^dag (MdagM)^-1 phi} //////////////////////////////////////////////////////////////////////////////////////
// Phi = Mdag eta // Push the gauge field in to the dops. Assume any BC's and smearing already
// P(eta) = e^{- eta^dag eta} // applied
// //////////////////////////////////////////////////////////////////////////////////////
// e^{x^2/2 sig^2} => sig^2 = 0.5. virtual void refresh(const GaugeField &U, GridParallelRNG &pRNG) {
// // P(phi) = e^{- phi^dag (MdagM)^-1 phi}
// So eta should be of width sig = 1/sqrt(2). // Phi = Mdag eta
// and must multiply by 0.707.... // P(eta) = e^{- eta^dag eta}
// //
// Chroma has this scale factor: two_flavor_monomial_w.h // e^{x^2/2 sig^2} => sig^2 = 0.5.
// IroIro: does not use this scale. It is absorbed by a change of vars //
// in the Phi integral, and thus is only an irrelevant prefactor for the partition function. // So eta should be of width sig = 1/sqrt(2).
// // and must multiply by 0.707....
RealD scale = std::sqrt(0.5); //
FermionField eta(FermOp.FermionGrid()); // Chroma has this scale factor: two_flavor_monomial_w.h
// IroIro: does not use this scale. It is absorbed by a change of vars
// in the Phi integral, and thus is only an irrelevant prefactor for
// the partition function.
//
RealD scale = std::sqrt(0.5);
FermionField eta(FermOp.FermionGrid());
gaussian(pRNG,eta); gaussian(pRNG, eta);
FermOp.ImportGauge(U); FermOp.ImportGauge(U);
FermOp.Mdag(eta,Phi); FermOp.Mdag(eta, Phi);
Phi=Phi*scale; Phi = Phi * scale;
};
};
////////////////////////////////////////////////////// //////////////////////////////////////////////////////
// S = phi^dag (Mdag M)^-1 phi // S = phi^dag (Mdag M)^-1 phi
////////////////////////////////////////////////////// //////////////////////////////////////////////////////
virtual RealD S(const GaugeField &U) { virtual RealD S(const GaugeField &U) {
FermOp.ImportGauge(U);
FermOp.ImportGauge(U); FermionField X(FermOp.FermionGrid());
FermionField Y(FermOp.FermionGrid());
FermionField X(FermOp.FermionGrid()); MdagMLinearOperator<FermionOperator<Impl>, FermionField> MdagMOp(FermOp);
FermionField Y(FermOp.FermionGrid()); X = zero;
ActionSolver(MdagMOp, Phi, X);
MdagMLinearOperator<FermionOperator<Impl> ,FermionField> MdagMOp(FermOp); MdagMOp.Op(X, Y);
X=zero;
ActionSolver(MdagMOp,Phi,X);
MdagMOp.Op(X,Y);
RealD action = norm2(Y); RealD action = norm2(Y);
std::cout << GridLogMessage << "Pseudofermion action "<<action<<std::endl; std::cout << GridLogMessage << "Pseudofermion action " << action
return action; << std::endl;
}; return action;
};
////////////////////////////////////////////////////// //////////////////////////////////////////////////////
// dS/du = - phi^dag (Mdag M)^-1 [ Mdag dM + dMdag M ] (Mdag M)^-1 phi // dS/du = - phi^dag (Mdag M)^-1 [ Mdag dM + dMdag M ] (Mdag M)^-1 phi
// = - phi^dag M^-1 dM (MdagM)^-1 phi - phi^dag (MdagM)^-1 dMdag dM (Mdag)^-1 phi // = - phi^dag M^-1 dM (MdagM)^-1 phi - phi^dag (MdagM)^-1 dMdag dM
// // (Mdag)^-1 phi
// = - Ydag dM X - Xdag dMdag Y //
// // = - Ydag dM X - Xdag dMdag Y
////////////////////////////////////////////////////// //
virtual void deriv(const GaugeField &U,GaugeField & dSdU) { //////////////////////////////////////////////////////
virtual void deriv(const GaugeField &U, GaugeField &dSdU) {
FermOp.ImportGauge(U);
FermOp.ImportGauge(U); FermionField X(FermOp.FermionGrid());
FermionField Y(FermOp.FermionGrid());
GaugeField tmp(FermOp.GaugeGrid());
FermionField X(FermOp.FermionGrid()); MdagMLinearOperator<FermionOperator<Impl>, FermionField> MdagMOp(FermOp);
FermionField Y(FermOp.FermionGrid());
GaugeField tmp(FermOp.GaugeGrid());
MdagMLinearOperator<FermionOperator<Impl> ,FermionField> MdagMOp(FermOp); X = zero;
DerivativeSolver(MdagMOp, Phi, X); // X = (MdagM)^-1 phi
MdagMOp.Op(X, Y); // Y = M X = (Mdag)^-1 phi
X=zero; // Our conventions really make this UdSdU; We do not differentiate wrt Udag
DerivativeSolver(MdagMOp,Phi,X); // here.
MdagMOp.Op(X,Y); // So must take dSdU - adj(dSdU) and left multiply by mom to get dS/dt.
// Our conventions really make this UdSdU; We do not differentiate wrt Udag here. FermOp.MDeriv(tmp, Y, X, DaggerNo);
// So must take dSdU - adj(dSdU) and left multiply by mom to get dS/dt. dSdU = tmp;
FermOp.MDeriv(tmp, X, Y, DaggerYes);
dSdU = dSdU + tmp;
FermOp.MDeriv(tmp , Y, X,DaggerNo ); dSdU=tmp; // not taking here the traceless antihermitian component
FermOp.MDeriv(tmp , X, Y,DaggerYes); dSdU=dSdU+tmp; };
};
//dSdU = Ta(dSdU); }
};
};
}
} }
#endif #endif

View File

@ -1,70 +1,66 @@
/************************************************************************************* /*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/pseudofermion/TwoFlavourEvenOdd.h Source file: ./lib/qcd/action/pseudofermion/TwoFlavourEvenOdd.h
Copyright (C) 2015 Copyright (C) 2015
Author: Peter Boyle <pabobyle@ph.ed.ac.uk> Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk> Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or the Free Software Foundation; either version 2 of the License, or
(at your option) any later version. (at your option) any later version.
This program is distributed in the hope that it will be useful, This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details. GNU General Public License for more details.
You should have received a copy of the GNU General Public License along You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc., with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory See the full license in the file "LICENSE" in the top level distribution
*************************************************************************************/ directory
/* END LEGAL */ *************************************************************************************/
/* END LEGAL */
#ifndef QCD_PSEUDOFERMION_TWO_FLAVOUR_EVEN_ODD_H #ifndef QCD_PSEUDOFERMION_TWO_FLAVOUR_EVEN_ODD_H
#define QCD_PSEUDOFERMION_TWO_FLAVOUR_EVEN_ODD_H #define QCD_PSEUDOFERMION_TWO_FLAVOUR_EVEN_ODD_H
namespace Grid{ namespace Grid {
namespace QCD{ namespace QCD {
////////////////////////////////////////////////////////////////////////
// Two flavour pseudofermion action for any EO prec dop
////////////////////////////////////////////////////////////////////////
template <class Impl>
class TwoFlavourEvenOddPseudoFermionAction
: public Action<typename Impl::GaugeField> {
public:
INHERIT_IMPL_TYPES(Impl);
private:
FermionOperator<Impl> &FermOp; // the basic operator
//////////////////////////////////////////////////////////////////////// OperatorFunction<FermionField> &DerivativeSolver;
// Two flavour pseudofermion action for any EO prec dop OperatorFunction<FermionField> &ActionSolver;
////////////////////////////////////////////////////////////////////////
template<class Impl>
class TwoFlavourEvenOddPseudoFermionAction : public Action<typename Impl::GaugeField> {
public: FermionField PhiOdd; // the pseudo fermion field for this trajectory
FermionField PhiEven; // the pseudo fermion field for this trajectory
INHERIT_IMPL_TYPES(Impl); public:
/////////////////////////////////////////////////
private: // Pass in required objects.
/////////////////////////////////////////////////
FermionOperator<Impl> & FermOp;// the basic operator TwoFlavourEvenOddPseudoFermionAction(FermionOperator<Impl> &Op,
OperatorFunction<FermionField> &DS,
OperatorFunction<FermionField> &DerivativeSolver; OperatorFunction<FermionField> &AS)
OperatorFunction<FermionField> &ActionSolver; : FermOp(Op),
DerivativeSolver(DS),
FermionField PhiOdd; // the pseudo fermion field for this trajectory ActionSolver(AS),
FermionField PhiEven; // the pseudo fermion field for this trajectory
public:
/////////////////////////////////////////////////
// Pass in required objects.
/////////////////////////////////////////////////
TwoFlavourEvenOddPseudoFermionAction(FermionOperator<Impl> &Op,
OperatorFunction<FermionField> & DS,
OperatorFunction<FermionField> & AS
) :
FermOp(Op),
DerivativeSolver(DS),
ActionSolver(AS),
PhiEven(Op.FermionRedBlackGrid()), PhiEven(Op.FermionRedBlackGrid()),
PhiOdd(Op.FermionRedBlackGrid()) PhiOdd(Op.FermionRedBlackGrid())
{}; {};

View File

@ -131,9 +131,11 @@ namespace Grid{
Vpc.MpcDag(PhiOdd,Y); // Y= Vdag phi Vpc.MpcDag(PhiOdd,Y); // Y= Vdag phi
X=zero; X=zero;
ActionSolver(Mpc,Y,X); // X= (MdagM)^-1 Vdag phi ActionSolver(Mpc,Y,X); // X= (MdagM)^-1 Vdag phi
Mpc.Mpc(X,Y); // Y= Mdag^-1 Vdag phi //Mpc.Mpc(X,Y); // Y= Mdag^-1 Vdag phi
// Multiply by Ydag
RealD action = real(innerProduct(Y,X));
RealD action = norm2(Y); //RealD action = norm2(Y);
// The EE factorised block; normally can replace with zero if det is constant (gauge field indept) // The EE factorised block; normally can replace with zero if det is constant (gauge field indept)
// Only really clover term that creates this. Leave the EE portion as a future to do to make most // Only really clover term that creates this. Leave the EE portion as a future to do to make most

View File

@ -1,179 +1,191 @@
/************************************************************************************* /*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/hmc/HmcRunner.h Source file: ./lib/qcd/hmc/HmcRunner.h
Copyright (C) 2015 Copyright (C) 2015
Author: paboyle <paboyle@ph.ed.ac.uk> Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or the Free Software Foundation; either version 2 of the License, or
(at your option) any later version. (at your option) any later version.
This program is distributed in the hope that it will be useful, This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details. GNU General Public License for more details.
You should have received a copy of the GNU General Public License along You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc., with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory See the full license in the file "LICENSE" in the top level distribution
*************************************************************************************/ directory
/* END LEGAL */ *************************************************************************************/
/* END LEGAL */
#ifndef HMC_RUNNER #ifndef HMC_RUNNER
#define HMC_RUNNER #define HMC_RUNNER
namespace Grid{ namespace Grid {
namespace QCD{ namespace QCD {
template <class Gimpl, class RepresentationsPolicy = NoHirep >
template<class Gimpl>
class NerscHmcRunnerTemplate { class NerscHmcRunnerTemplate {
public: public:
INHERIT_GIMPL_TYPES(Gimpl); INHERIT_GIMPL_TYPES(Gimpl);
enum StartType_t { ColdStart, HotStart, TepidStart, CheckpointStart }; enum StartType_t { ColdStart, HotStart, TepidStart, CheckpointStart };
ActionSet<GaugeField> TheAction; ActionSet<GaugeField, RepresentationsPolicy> TheAction;
GridCartesian * UGrid ; GridCartesian *UGrid;
GridCartesian * FGrid ; GridCartesian *FGrid;
GridRedBlackCartesian * UrbGrid ; GridRedBlackCartesian *UrbGrid;
GridRedBlackCartesian * FrbGrid ; GridRedBlackCartesian *FrbGrid;
virtual void BuildTheAction (int argc, char **argv) = 0; // necessary? virtual void BuildTheAction(int argc, char **argv) = 0; // necessary?
void Run (int argc, char **argv){
void Run(int argc, char **argv) {
StartType_t StartType = HotStart; StartType_t StartType = HotStart;
std::string arg; std::string arg;
if( GridCmdOptionExists(argv,argv+argc,"--StartType") ){ if (GridCmdOptionExists(argv, argv + argc, "--StartType")) {
arg = GridCmdOptionPayload(argv,argv+argc,"--StartType"); arg = GridCmdOptionPayload(argv, argv + argc, "--StartType");
if ( arg == "HotStart" ) { StartType = HotStart; } if (arg == "HotStart") {
else if ( arg == "ColdStart" ) { StartType = ColdStart; } StartType = HotStart;
else if ( arg == "TepidStart" ) { StartType = TepidStart; } } else if (arg == "ColdStart") {
else if ( arg == "CheckpointStart" ) { StartType = CheckpointStart; } StartType = ColdStart;
else assert(0); } else if (arg == "TepidStart") {
StartType = TepidStart;
} else if (arg == "CheckpointStart") {
StartType = CheckpointStart;
} else {
std::cout << GridLogError << "Unrecognized option in --StartType\n";
std::cout << GridLogError << "Valid [HotStart, ColdStart, TepidStart, CheckpointStart]\n";
assert(0);
}
} }
int StartTraj = 0; int StartTraj = 0;
if( GridCmdOptionExists(argv,argv+argc,"--StartTrajectory") ){ if (GridCmdOptionExists(argv, argv + argc, "--StartTrajectory")) {
arg= GridCmdOptionPayload(argv,argv+argc,"--StartTrajectory"); arg = GridCmdOptionPayload(argv, argv + argc, "--StartTrajectory");
std::vector<int> ivec(0); std::vector<int> ivec(0);
GridCmdOptionIntVector(arg,ivec); GridCmdOptionIntVector(arg, ivec);
StartTraj = ivec[0]; StartTraj = ivec[0];
} }
int NumTraj = 1; int NumTraj = 1;
if( GridCmdOptionExists(argv,argv+argc,"--Trajectories") ){ if (GridCmdOptionExists(argv, argv + argc, "--Trajectories")) {
arg= GridCmdOptionPayload(argv,argv+argc,"--Trajectories"); arg = GridCmdOptionPayload(argv, argv + argc, "--Trajectories");
std::vector<int> ivec(0); std::vector<int> ivec(0);
GridCmdOptionIntVector(arg,ivec); GridCmdOptionIntVector(arg, ivec);
NumTraj = ivec[0]; NumTraj = ivec[0];
} }
int NumThermalizations = 10; int NumThermalizations = 10;
if( GridCmdOptionExists(argv,argv+argc,"--Thermalizations") ){ if (GridCmdOptionExists(argv, argv + argc, "--Thermalizations")) {
arg= GridCmdOptionPayload(argv,argv+argc,"--Thermalizations"); arg = GridCmdOptionPayload(argv, argv + argc, "--Thermalizations");
std::vector<int> ivec(0); std::vector<int> ivec(0);
GridCmdOptionIntVector(arg,ivec); GridCmdOptionIntVector(arg, ivec);
NumThermalizations = ivec[0]; NumThermalizations = ivec[0];
} }
GridSerialRNG sRNG;
GridParallelRNG pRNG(UGrid);
LatticeGaugeField U(UGrid); // change this to an extended field (smearing class)?
GridSerialRNG sRNG; std::vector<int> SerSeed({1, 2, 3, 4, 5});
GridParallelRNG pRNG(UGrid); std::vector<int> ParSeed({6, 7, 8, 9, 10});
LatticeGaugeField U(UGrid); // change this to an extended field (smearing class)
std::vector<int> SerSeed({1,2,3,4,5});
std::vector<int> ParSeed({6,7,8,9,10});
// Create integrator, including the smearing policy // Create integrator, including the smearing policy
// Smearing policy // Smearing policy, only defined for Nc=3
/*
std::cout << GridLogDebug << " Creating the Stout class\n"; std::cout << GridLogDebug << " Creating the Stout class\n";
double rho = 0.1; // smearing parameter, now hardcoded double rho = 0.1; // smearing parameter, now hardcoded
int Nsmear = 1; // number of smearing levels int Nsmear = 1; // number of smearing levels
Smear_Stout<Gimpl> Stout(rho); Smear_Stout<Gimpl> Stout(rho);
std::cout << GridLogDebug << " Creating the SmearedConfiguration class\n"; std::cout << GridLogDebug << " Creating the SmearedConfiguration class\n";
SmearedConfiguration<Gimpl> SmearingPolicy(UGrid, Nsmear, Stout); //SmearedConfiguration<Gimpl> SmearingPolicy(UGrid, Nsmear, Stout);
std::cout << GridLogDebug << " done\n"; std::cout << GridLogDebug << " done\n";
*/
////////////// //////////////
typedef MinimumNorm2<GaugeField, SmearedConfiguration<Gimpl> > IntegratorType;// change here to change the algorithm NoSmearing<Gimpl> SmearingPolicy;
IntegratorParameters MDpar(20); typedef MinimumNorm2<GaugeField, NoSmearing<Gimpl>, RepresentationsPolicy >
IntegratorType; // change here to change the algorithm
IntegratorParameters MDpar(20, 1.0);
IntegratorType MDynamics(UGrid, MDpar, TheAction, SmearingPolicy); IntegratorType MDynamics(UGrid, MDpar, TheAction, SmearingPolicy);
// Checkpoint strategy // Checkpoint strategy
NerscHmcCheckpointer<Gimpl> Checkpoint(std::string("ckpoint_lat"),std::string("ckpoint_rng"),1); NerscHmcCheckpointer<Gimpl> Checkpoint(std::string("ckpoint_lat"),
PlaquetteLogger<Gimpl> PlaqLog(std::string("plaq")); std::string("ckpoint_rng"), 1);
PlaquetteLogger<Gimpl> PlaqLog(std::string("plaq"));
HMCparameters HMCpar; HMCparameters HMCpar;
HMCpar.StartTrajectory = StartTraj; HMCpar.StartTrajectory = StartTraj;
HMCpar.Trajectories = NumTraj; HMCpar.Trajectories = NumTraj;
HMCpar.NoMetropolisUntil = NumThermalizations; HMCpar.NoMetropolisUntil = NumThermalizations;
if ( StartType == HotStart ) { if (StartType == HotStart) {
// Hot start // Hot start
HMCpar.MetropolisTest = true; HMCpar.MetropolisTest = true;
sRNG.SeedFixedIntegers(SerSeed); sRNG.SeedFixedIntegers(SerSeed);
pRNG.SeedFixedIntegers(ParSeed); pRNG.SeedFixedIntegers(ParSeed);
SU3::HotConfiguration(pRNG, U); SU<Nc>::HotConfiguration(pRNG, U);
} else if ( StartType == ColdStart ) { } else if (StartType == ColdStart) {
// Cold start // Cold start
HMCpar.MetropolisTest = true; HMCpar.MetropolisTest = true;
sRNG.SeedFixedIntegers(SerSeed); sRNG.SeedFixedIntegers(SerSeed);
pRNG.SeedFixedIntegers(ParSeed); pRNG.SeedFixedIntegers(ParSeed);
SU3::ColdConfiguration(pRNG, U); SU<Nc>::ColdConfiguration(pRNG, U);
} else if ( StartType == TepidStart ) { } else if (StartType == TepidStart) {
// Tepid start // Tepid start
HMCpar.MetropolisTest = true; HMCpar.MetropolisTest = true;
sRNG.SeedFixedIntegers(SerSeed); sRNG.SeedFixedIntegers(SerSeed);
pRNG.SeedFixedIntegers(ParSeed); pRNG.SeedFixedIntegers(ParSeed);
SU3::TepidConfiguration(pRNG, U); SU<Nc>::TepidConfiguration(pRNG, U);
} else if ( StartType == CheckpointStart ) { } else if (StartType == CheckpointStart) {
HMCpar.MetropolisTest = true; HMCpar.MetropolisTest = true;
// CheckpointRestart // CheckpointRestart
Checkpoint.CheckpointRestore(StartTraj, U, sRNG, pRNG); Checkpoint.CheckpointRestore(StartTraj, U, sRNG, pRNG);
} }
// Attach the gauge field to the smearing Policy and create the fill the smeared set // Attach the gauge field to the smearing Policy and create the fill the
// smeared set
// notice that the unit configuration is singular in this procedure // notice that the unit configuration is singular in this procedure
std::cout << GridLogMessage << "Filling the smeared set\n"; std::cout << GridLogMessage << "Filling the smeared set\n";
SmearingPolicy.set_GaugeField(U); SmearingPolicy.set_GaugeField(U);
HybridMonteCarlo<GaugeField,IntegratorType> HMC(HMCpar, MDynamics,sRNG,pRNG,U); HybridMonteCarlo<GaugeField, IntegratorType> HMC(HMCpar, MDynamics, sRNG,
pRNG, U);
HMC.AddObservable(&Checkpoint); HMC.AddObservable(&Checkpoint);
HMC.AddObservable(&PlaqLog); HMC.AddObservable(&PlaqLog);
// Run it // Run it
HMC.evolve(); HMC.evolve();
} }
}; };
typedef NerscHmcRunnerTemplate<PeriodicGimplR> NerscHmcRunner; typedef NerscHmcRunnerTemplate<PeriodicGimplR> NerscHmcRunner;
typedef NerscHmcRunnerTemplate<PeriodicGimplF> NerscHmcRunnerF; typedef NerscHmcRunnerTemplate<PeriodicGimplF> NerscHmcRunnerF;
typedef NerscHmcRunnerTemplate<PeriodicGimplD> NerscHmcRunnerD; typedef NerscHmcRunnerTemplate<PeriodicGimplD> NerscHmcRunnerD;
typedef NerscHmcRunnerTemplate<PeriodicGimplR> PeriodicNerscHmcRunner; typedef NerscHmcRunnerTemplate<PeriodicGimplR> PeriodicNerscHmcRunner;
typedef NerscHmcRunnerTemplate<PeriodicGimplF> PeriodicNerscHmcRunnerF; typedef NerscHmcRunnerTemplate<PeriodicGimplF> PeriodicNerscHmcRunnerF;
typedef NerscHmcRunnerTemplate<PeriodicGimplD> PeriodicNerscHmcRunnerD; typedef NerscHmcRunnerTemplate<PeriodicGimplD> PeriodicNerscHmcRunnerD;
typedef NerscHmcRunnerTemplate<ConjugateGimplR> ConjugateNerscHmcRunner; typedef NerscHmcRunnerTemplate<ConjugateGimplR> ConjugateNerscHmcRunner;
typedef NerscHmcRunnerTemplate<ConjugateGimplF> ConjugateNerscHmcRunnerF; typedef NerscHmcRunnerTemplate<ConjugateGimplF> ConjugateNerscHmcRunnerF;
typedef NerscHmcRunnerTemplate<ConjugateGimplD> ConjugateNerscHmcRunnerD; typedef NerscHmcRunnerTemplate<ConjugateGimplD> ConjugateNerscHmcRunnerD;
}} template <class RepresentationsPolicy>
using NerscHmcRunnerHirep = NerscHmcRunnerTemplate<PeriodicGimplR, RepresentationsPolicy>;
}
}
#endif #endif

View File

@ -1,33 +1,34 @@
/************************************************************************************* /*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/hmc/integrators/Integrator.h Source file: ./lib/qcd/hmc/integrators/Integrator.h
Copyright (C) 2015 Copyright (C) 2015
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk> Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk> Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: neo <cossu@post.kek.jp> Author: neo <cossu@post.kek.jp>
Author: paboyle <paboyle@ph.ed.ac.uk> Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or the Free Software Foundation; either version 2 of the License, or
(at your option) any later version. (at your option) any later version.
This program is distributed in the hope that it will be useful, This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details. GNU General Public License for more details.
You should have received a copy of the GNU General Public License along You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc., with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory See the full license in the file "LICENSE" in the top level distribution
*************************************************************************************/ directory
/* END LEGAL */ *************************************************************************************/
/* END LEGAL */
//-------------------------------------------------------------------- //--------------------------------------------------------------------
/*! @file Integrator.h /*! @file Integrator.h
* @brief Classes for the Molecular Dynamics integrator * @brief Classes for the Molecular Dynamics integrator
@ -40,208 +41,278 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
#ifndef INTEGRATOR_INCLUDED #ifndef INTEGRATOR_INCLUDED
#define INTEGRATOR_INCLUDED #define INTEGRATOR_INCLUDED
//class Observer; // class Observer;
#include <memory> #include <memory>
namespace Grid{ namespace Grid {
namespace QCD{ namespace QCD {
struct IntegratorParameters{ struct IntegratorParameters {
int Nexp;
int MDsteps; // number of outer steps
RealD trajL; // trajectory length
RealD stepsize;
int Nexp; IntegratorParameters(int MDsteps_, RealD trajL_ = 1.0, int Nexp_ = 12)
int MDsteps; // number of outer steps : Nexp(Nexp_),
RealD trajL; // trajectory length MDsteps(MDsteps_),
RealD stepsize; trajL(trajL_),
stepsize(trajL / MDsteps){
IntegratorParameters(int MDsteps_, // empty body constructor
RealD trajL_=1.0, };
int Nexp_=12):
Nexp(Nexp_),
MDsteps(MDsteps_),
trajL(trajL_),
stepsize(trajL/MDsteps)
{
// empty body constructor
};
};
/*! @brief Class for Molecular Dynamics management */
template<class GaugeField, class SmearingPolicy>
class Integrator {
protected:
typedef IntegratorParameters ParameterType;
IntegratorParameters Params;
const ActionSet<GaugeField> as;
int levels; //
double t_U; // Track time passing on each level and for U and for P
std::vector<double> t_P; //
GaugeField P;
SmearingPolicy &Smearer;
// Should match any legal (SU(n)) gauge field
// Need to use this template to match Ncol to pass to SU<N> class
template<int Ncol,class vec> void generate_momenta(Lattice< iVector< iScalar< iMatrix<vec,Ncol> >, Nd> > & P,GridParallelRNG& pRNG){
typedef Lattice< iScalar< iScalar< iMatrix<vec,Ncol> > > > GaugeLinkField;
GaugeLinkField Pmu(P._grid);
Pmu = zero;
for(int mu=0;mu<Nd;mu++){
SU<Ncol>::GaussianLieAlgebraMatrix(pRNG, Pmu);
PokeIndex<LorentzIndex>(P, Pmu, mu);
}
}
//ObserverList observers; // not yet
// typedef std::vector<Observer*> ObserverList;
// void register_observers();
// void notify_observers();
void update_P(GaugeField&U, int level, double ep){
t_P[level]+=ep;
update_P(P,U,level,ep);
std::cout<<GridLogIntegrator<<"["<<level<<"] P " << " dt "<< ep <<" : t_P "<< t_P[level] <<std::endl;
}
void update_P(GaugeField &Mom,GaugeField&U, int level,double ep){
// input U actually not used...
for(int a=0; a<as[level].actions.size(); ++a){
GaugeField force(U._grid);
GaugeField& Us = Smearer.get_U(as[level].actions.at(a)->is_smeared);
as[level].actions.at(a)->deriv(Us,force); // deriv should NOT include Ta
std::cout<< GridLogIntegrator << "Smearing (on/off): "<<as[level].actions.at(a)->is_smeared <<std::endl;
if (as[level].actions.at(a)->is_smeared) Smearer.smeared_force(force);
force = Ta(force);
std::cout<< GridLogIntegrator << "Force average: "<< norm2(force)/(U._grid->gSites()) <<std::endl;
Mom -= force*ep;
}
}
void update_U(GaugeField&U, double ep){
update_U(P,U,ep);
t_U+=ep;
int fl = levels-1;
std::cout<< GridLogIntegrator <<" "<<"["<<fl<<"] U " << " dt "<< ep <<" : t_U "<< t_U <<std::endl;
}
void update_U(GaugeField &Mom, GaugeField&U, double ep){
//rewrite exponential to deal automatically with the lorentz index?
// GaugeLinkField Umu(U._grid);
// GaugeLinkField Pmu(U._grid);
for (int mu = 0; mu < Nd; mu++){
auto Umu=PeekIndex<LorentzIndex>(U, mu);
auto Pmu=PeekIndex<LorentzIndex>(Mom, mu);
Umu = expMat(Pmu, ep, Params.Nexp)*Umu;
ProjectOnGroup(Umu);
PokeIndex<LorentzIndex>(U, Umu, mu);
}
// Update the smeared fields, can be implemented as observer
Smearer.set_GaugeField(U);
}
virtual void step (GaugeField& U,int level, int first,int last)=0;
public:
Integrator(GridBase* grid,
IntegratorParameters Par,
ActionSet<GaugeField> & Aset,
SmearingPolicy &Sm):
Params(Par),
as(Aset),
P(grid),
levels(Aset.size()),
Smearer(Sm)
{
t_P.resize(levels,0.0);
t_U=0.0;
// initialization of smearer delegated outside of Integrator
};
virtual ~Integrator(){}
//Initialization of momenta and actions
void refresh(GaugeField& U,GridParallelRNG &pRNG){
std::cout<<GridLogIntegrator<< "Integrator refresh\n";
generate_momenta(P,pRNG);
for(int level=0; level< as.size(); ++level){
for(int actionID=0; actionID<as[level].actions.size(); ++actionID){
// get gauge field from the SmearingPolicy and
// based on the boolean is_smeared in actionID
GaugeField& Us = Smearer.get_U(as[level].actions.at(actionID)->is_smeared);
as[level].actions.at(actionID)->refresh(Us, pRNG);
}
}
}
// Calculate action
RealD S(GaugeField& U){// here also U not used
LatticeComplex Hloc(U._grid); Hloc = zero;
// Momenta
for (int mu=0; mu <Nd; mu++){
auto Pmu = PeekIndex<LorentzIndex>(P, mu);
Hloc -= trace(Pmu*Pmu);
}
Complex Hsum = sum(Hloc);
RealD H = Hsum.real();
RealD Hterm;
std::cout<<GridLogMessage << "Momentum action H_p = "<< H << "\n";
// Actions
for(int level=0; level<as.size(); ++level){
for(int actionID=0; actionID<as[level].actions.size(); ++actionID){
// get gauge field from the SmearingPolicy and
// based on the boolean is_smeared in actionID
GaugeField& Us = Smearer.get_U(as[level].actions.at(actionID)->is_smeared);
Hterm = as[level].actions.at(actionID)->S(Us);
std::cout<<GridLogMessage << "S Level "<<level<<" term "<<actionID<<" H = "<<Hterm<<std::endl;
H += Hterm;
}
}
return H;
}
void integrate(GaugeField& U){
// reset the clocks
t_U=0;
for(int level=0; level<as.size(); ++level){
t_P[level]=0;
}
for(int step=0; step< Params.MDsteps; ++step){ // MD step
int first_step = (step==0);
int last_step = (step==Params.MDsteps-1);
this->step(U,0,first_step,last_step);
}
// Check the clocks all match on all levels
for(int level=0; level<as.size(); ++level){
assert(fabs(t_U - t_P[level])<1.0e-6); // must be the same
std::cout<<GridLogIntegrator<<" times["<<level<<"]= "<<t_P[level]<< " " << t_U <<std::endl;
}
// and that we indeed got to the end of the trajectory
assert(fabs(t_U-Params.trajL) < 1.0e-6);
}
}; };
/*! @brief Class for Molecular Dynamics management */
template <class GaugeField, class SmearingPolicy, class RepresentationPolicy>
class Integrator {
protected:
typedef IntegratorParameters ParameterType;
IntegratorParameters Params;
const ActionSet<GaugeField, RepresentationPolicy> as;
int levels; //
double t_U; // Track time passing on each level and for U and for P
std::vector<double> t_P; //
GaugeField P;
SmearingPolicy& Smearer;
RepresentationPolicy Representations;
// Should match any legal (SU(n)) gauge field
// Need to use this template to match Ncol to pass to SU<N> class
template <int Ncol, class vec>
void generate_momenta(Lattice<iVector<iScalar<iMatrix<vec, Ncol> >, Nd> >& P,
GridParallelRNG& pRNG) {
typedef Lattice<iScalar<iScalar<iMatrix<vec, Ncol> > > > GaugeLinkField;
GaugeLinkField Pmu(P._grid);
Pmu = zero;
for (int mu = 0; mu < Nd; mu++) {
SU<Ncol>::GaussianFundamentalLieAlgebraMatrix(pRNG, Pmu);
PokeIndex<LorentzIndex>(P, Pmu, mu);
}
}
// ObserverList observers; // not yet
// typedef std::vector<Observer*> ObserverList;
// void register_observers();
// void notify_observers();
void update_P(GaugeField& U, int level, double ep) {
t_P[level] += ep;
update_P(P, U, level, ep);
std::cout << GridLogIntegrator << "[" << level << "] P "
<< " dt " << ep << " : t_P " << t_P[level] << std::endl;
}
// to be used by the actionlevel class to iterate
// over the representations
struct _updateP {
template <class FieldType, class GF, class Repr>
void operator()(std::vector<Action<FieldType>*> repr_set, Repr& Rep,
GF& Mom, GF& U, double ep) {
for (int a = 0; a < repr_set.size(); ++a) {
FieldType forceR(U._grid);
// Implement smearing only for the fundamental representation now
repr_set.at(a)->deriv(Rep.U, forceR);
GF force =
Rep.RtoFundamentalProject(forceR); // Ta for the fundamental rep
std::cout << GridLogIntegrator << "Hirep Force average: "
<< norm2(force) / (U._grid->gSites()) << std::endl;
Mom -= force * ep ;
}
}
} update_P_hireps{};
void update_P(GaugeField& Mom, GaugeField& U, int level, double ep) {
// input U actually not used in the fundamental case
// Fundamental updates, include smearing
for (int a = 0; a < as[level].actions.size(); ++a) {
GaugeField force(U._grid);
GaugeField& Us = Smearer.get_U(as[level].actions.at(a)->is_smeared);
as[level].actions.at(a)->deriv(Us, force); // deriv should NOT include Ta
std::cout << GridLogIntegrator
<< "Smearing (on/off): " << as[level].actions.at(a)->is_smeared
<< std::endl;
if (as[level].actions.at(a)->is_smeared) Smearer.smeared_force(force);
force = Ta(force);
std::cout << GridLogIntegrator
<< "Force average: " << norm2(force) / (U._grid->gSites())
<< std::endl;
Mom -= force * ep;
}
// Force from the other representations
as[level].apply(update_P_hireps, Representations, Mom, U, ep);
}
void update_U(GaugeField& U, double ep) {
update_U(P, U, ep);
t_U += ep;
int fl = levels - 1;
std::cout << GridLogIntegrator << " "
<< "[" << fl << "] U "
<< " dt " << ep << " : t_U " << t_U << std::endl;
}
void update_U(GaugeField& Mom, GaugeField& U, double ep) {
// rewrite exponential to deal internally with the lorentz index?
for (int mu = 0; mu < Nd; mu++) {
auto Umu = PeekIndex<LorentzIndex>(U, mu);
auto Pmu = PeekIndex<LorentzIndex>(Mom, mu);
Umu = expMat(Pmu, ep, Params.Nexp) * Umu;
PokeIndex<LorentzIndex>(U, ProjectOnGroup(Umu), mu);
}
// Update the smeared fields, can be implemented as observer
Smearer.set_GaugeField(U);
// Update the higher representations fields
Representations.update(U); // void functions if fundamental representation
}
virtual void step(GaugeField& U, int level, int first, int last) = 0;
public:
Integrator(GridBase* grid, IntegratorParameters Par,
ActionSet<GaugeField, RepresentationPolicy>& Aset,
SmearingPolicy& Sm)
: Params(Par),
as(Aset),
P(grid),
levels(Aset.size()),
Smearer(Sm),
Representations(grid) {
t_P.resize(levels, 0.0);
t_U = 0.0;
// initialization of smearer delegated outside of Integrator
};
virtual ~Integrator() {}
// to be used by the actionlevel class to iterate
// over the representations
struct _refresh {
template <class FieldType, class Repr>
void operator()(std::vector<Action<FieldType>*> repr_set, Repr& Rep,
GridParallelRNG& pRNG) {
for (int a = 0; a < repr_set.size(); ++a){
repr_set.at(a)->refresh(Rep.U, pRNG);
std::cout << GridLogDebug << "Hirep refreshing pseudofermions" << std::endl;
}
}
} refresh_hireps{};
// Initialization of momenta and actions
void refresh(GaugeField& U, GridParallelRNG& pRNG) {
std::cout << GridLogIntegrator << "Integrator refresh\n";
generate_momenta(P, pRNG);
// Update the smeared fields, can be implemented as observer
// necessary to keep the fields updated even after a reject
// of the Metropolis
Smearer.set_GaugeField(U);
// Set the (eventual) representations gauge fields
Representations.update(U);
// The Smearer is attached to a pointer of the gauge field
// automatically gets the correct field
// whether or not has been accepted in the previous sweep
for (int level = 0; level < as.size(); ++level) {
for (int actionID = 0; actionID < as[level].actions.size(); ++actionID) {
// get gauge field from the SmearingPolicy and
// based on the boolean is_smeared in actionID
GaugeField& Us =
Smearer.get_U(as[level].actions.at(actionID)->is_smeared);
as[level].actions.at(actionID)->refresh(Us, pRNG);
}
// Refresh the higher representation actions
as[level].apply(refresh_hireps, Representations, pRNG);
}
}
// to be used by the actionlevel class to iterate
// over the representations
struct _S {
template <class FieldType, class Repr>
void operator()(std::vector<Action<FieldType>*> repr_set, Repr& Rep,
int level, RealD& H) {
for (int a = 0; a < repr_set.size(); ++a) {
RealD Hterm = repr_set.at(a)->S(Rep.U);
std::cout << GridLogMessage << "S Level " << level << " term " << a
<< " H Hirep = " << Hterm << std::endl;
H += Hterm;
}
}
} S_hireps{};
// Calculate action
RealD S(GaugeField& U) { // here also U not used
LatticeComplex Hloc(U._grid);
Hloc = zero;
// Momenta
for (int mu = 0; mu < Nd; mu++) {
auto Pmu = PeekIndex<LorentzIndex>(P, mu);
Hloc -= trace(Pmu * Pmu);
}
Complex Hsum = sum(Hloc);
RealD H = Hsum.real();
RealD Hterm;
std::cout << GridLogMessage << "Momentum action H_p = " << H << "\n";
// Actions
for (int level = 0; level < as.size(); ++level) {
for (int actionID = 0; actionID < as[level].actions.size(); ++actionID) {
// get gauge field from the SmearingPolicy and
// based on the boolean is_smeared in actionID
GaugeField& Us =
Smearer.get_U(as[level].actions.at(actionID)->is_smeared);
Hterm = as[level].actions.at(actionID)->S(Us);
std::cout << GridLogMessage << "S Level " << level << " term "
<< actionID << " H = " << Hterm << std::endl;
H += Hterm;
}
as[level].apply(S_hireps, Representations, level, H);
}
return H;
}
void integrate(GaugeField& U) {
// reset the clocks
t_U = 0;
for (int level = 0; level < as.size(); ++level) {
t_P[level] = 0;
}
for (int step = 0; step < Params.MDsteps; ++step) { // MD step
int first_step = (step == 0);
int last_step = (step == Params.MDsteps - 1);
this->step(U, 0, first_step, last_step);
}
// Check the clocks all match on all levels
for (int level = 0; level < as.size(); ++level) {
assert(fabs(t_U - t_P[level]) < 1.0e-6); // must be the same
std::cout << GridLogIntegrator << " times[" << level
<< "]= " << t_P[level] << " " << t_U << std::endl;
}
// and that we indeed got to the end of the trajectory
assert(fabs(t_U - Params.trajL) < 1.0e-6);
}
};
} }
} }
#endif//INTEGRATOR_INCLUDED #endif // INTEGRATOR_INCLUDED

View File

@ -91,17 +91,19 @@ namespace Grid{
* P 1/2 P 1/2 * P 1/2 P 1/2
*/ */
template<class GaugeField, class SmearingPolicy> class LeapFrog : template<class GaugeField,
public Integrator<GaugeField, SmearingPolicy> { class SmearingPolicy,
class RepresentationPolicy = Representations< FundamentalRepresentation > > class LeapFrog :
public Integrator<GaugeField, SmearingPolicy, RepresentationPolicy> {
public: public:
typedef LeapFrog<GaugeField, SmearingPolicy> Algorithm; typedef LeapFrog<GaugeField, SmearingPolicy, RepresentationPolicy> Algorithm;
LeapFrog(GridBase* grid, LeapFrog(GridBase* grid,
IntegratorParameters Par, IntegratorParameters Par,
ActionSet<GaugeField> & Aset, ActionSet<GaugeField, RepresentationPolicy> & Aset,
SmearingPolicy & Sm): SmearingPolicy & Sm):
Integrator<GaugeField, SmearingPolicy>(grid,Par,Aset,Sm) {}; Integrator<GaugeField, SmearingPolicy, RepresentationPolicy>(grid,Par,Aset,Sm) {};
void step (GaugeField& U, int level,int _first, int _last){ void step (GaugeField& U, int level,int _first, int _last){
@ -138,8 +140,10 @@ namespace Grid{
} }
}; };
template<class GaugeField, class SmearingPolicy> class MinimumNorm2 : template<class GaugeField,
public Integrator<GaugeField, SmearingPolicy> { class SmearingPolicy,
class RepresentationPolicy = Representations < FundamentalRepresentation > > class MinimumNorm2 :
public Integrator<GaugeField, SmearingPolicy, RepresentationPolicy> {
private: private:
const RealD lambda = 0.1931833275037836; const RealD lambda = 0.1931833275037836;
@ -147,9 +151,9 @@ namespace Grid{
MinimumNorm2(GridBase* grid, MinimumNorm2(GridBase* grid,
IntegratorParameters Par, IntegratorParameters Par,
ActionSet<GaugeField> & Aset, ActionSet<GaugeField, RepresentationPolicy> & Aset,
SmearingPolicy& Sm): SmearingPolicy& Sm):
Integrator<GaugeField, SmearingPolicy>(grid,Par,Aset,Sm) {}; Integrator<GaugeField, SmearingPolicy, RepresentationPolicy>(grid,Par,Aset,Sm) {};
void step (GaugeField& U, int level, int _first,int _last){ void step (GaugeField& U, int level, int _first,int _last){
@ -197,8 +201,10 @@ namespace Grid{
}; };
template<class GaugeField, class SmearingPolicy> class ForceGradient : template<class GaugeField,
public Integrator<GaugeField, SmearingPolicy> { class SmearingPolicy,
class RepresentationPolicy = Representations< FundamentalRepresentation > > class ForceGradient :
public Integrator<GaugeField, SmearingPolicy, RepresentationPolicy> {
private: private:
const RealD lambda = 1.0/6.0;; const RealD lambda = 1.0/6.0;;
const RealD chi = 1.0/72.0; const RealD chi = 1.0/72.0;
@ -209,9 +215,9 @@ namespace Grid{
// Looks like dH scales as dt^4. tested wilson/wilson 2 level. // Looks like dH scales as dt^4. tested wilson/wilson 2 level.
ForceGradient(GridBase* grid, ForceGradient(GridBase* grid,
IntegratorParameters Par, IntegratorParameters Par,
ActionSet<GaugeField> & Aset, ActionSet<GaugeField, RepresentationPolicy> & Aset,
SmearingPolicy &Sm): SmearingPolicy &Sm):
Integrator<GaugeField, SmearingPolicy>(grid,Par,Aset, Sm) {}; Integrator<GaugeField, SmearingPolicy, RepresentationPolicy>(grid,Par,Aset, Sm) {};
void FG_update_P(GaugeField&U, int level,double fg_dt,double ep){ void FG_update_P(GaugeField&U, int level,double fg_dt,double ep){

View File

@ -0,0 +1,115 @@
/*
* Policy classes for the HMC
* Author: Guido Cossu
*/
#ifndef ADJOINT_H
#define ADJOINT_H
namespace Grid {
namespace QCD {
/*
* This is an helper class for the HMC
* Should contain only the data for the adjoint representation
* and the facility to convert from the fundamental -> adjoint
*/
template <int ncolour>
class AdjointRep {
public:
// typdef to be used by the Representations class in HMC to get the
// types for the higher representation fields
typedef typename SU_Adjoint<ncolour>::LatticeAdjMatrix LatticeMatrix;
typedef typename SU_Adjoint<ncolour>::LatticeAdjField LatticeField;
static const int Dimension = ncolour * ncolour - 1;
LatticeField U;
explicit AdjointRep(GridBase *grid) : U(grid) {}
void update_representation(const LatticeGaugeField &Uin) {
std::cout << GridLogDebug << "Updating adjoint representation\n";
// Uin is in the fundamental representation
// get the U in AdjointRep
// (U_adj)_B = tr[e^a U e^b U^dag]
// e^a = t^a/sqrt(T_F)
// where t^a is the generator in the fundamental
// T_F is 1/2 for the fundamental representation
conformable(U, Uin);
U = zero;
LatticeColourMatrix tmp(Uin._grid);
Vector<typename SU<ncolour>::Matrix> ta(Dimension);
// Debug lines
// LatticeMatrix uno(Uin._grid);
// uno = 1.0;
////////////////
// FIXME probably not very efficient to get all the generators
// everytime
for (int a = 0; a < Dimension; a++) SU<ncolour>::generator(a, ta[a]);
for (int mu = 0; mu < Nd; mu++) {
auto Uin_mu = peekLorentz(Uin, mu);
auto U_mu = peekLorentz(U, mu);
for (int a = 0; a < Dimension; a++) {
tmp = 2.0 * adj(Uin_mu) * ta[a] * Uin_mu;
for (int b = 0; b < Dimension; b++)
pokeColour(U_mu, trace(tmp * ta[b]), a, b);
}
pokeLorentz(U, U_mu, mu);
// Check matrix U_mu, must be real orthogonal
// reality
/*
LatticeMatrix Ucheck = U_mu - conjugate(U_mu);
std::cout << GridLogMessage << "Reality check: " << norm2(Ucheck) <<
std::endl;
Ucheck = U_mu * adj(U_mu) - uno;
std::cout << GridLogMessage << "orthogonality check: " << norm2(Ucheck) <<
std::endl;
*/
}
}
LatticeGaugeField RtoFundamentalProject(const LatticeField &in,
Real scale = 1.0) const {
LatticeGaugeField out(in._grid);
out = zero;
for (int mu = 0; mu < Nd; mu++) {
LatticeColourMatrix out_mu(in._grid); // fundamental representation
LatticeMatrix in_mu = peekLorentz(in, mu);
out_mu = zero;
typename SU<ncolour>::LatticeAlgebraVector h(in._grid);
projectOnAlgebra(h, in_mu, double(Nc) * 2.0); // factor C(r)/C(fund)
FundamentalLieAlgebraMatrix(h, out_mu); // apply scale only once
pokeLorentz(out, out_mu, mu);
// Returns traceless antihermitian matrix Nc * Nc.
// Confirmed
}
return out;
}
private:
void projectOnAlgebra(typename SU<ncolour>::LatticeAlgebraVector &h_out,
const LatticeMatrix &in, Real scale = 1.0) const {
SU_Adjoint<ncolour>::projectOnAlgebra(h_out, in, scale);
}
void FundamentalLieAlgebraMatrix(
typename SU<ncolour>::LatticeAlgebraVector &h,
typename SU<ncolour>::LatticeMatrix &out, Real scale = 1.0) const {
SU<ncolour>::FundamentalLieAlgebraMatrix(h, out, scale);
}
};
typedef AdjointRep<Nc> AdjointRepresentation;
}
}
#endif

View File

@ -0,0 +1,45 @@
/*
* Policy classes for the HMC
* Author: Guido Cossu
*/
#ifndef FUNDAMENTAL_H
#define FUNDAMENTAL_H
namespace Grid {
namespace QCD {
/*
* This is an helper class for the HMC
* Empty since HMC updates already the fundamental representation
*/
template <int ncolour>
class FundamentalRep {
public:
static const int Dimension = ncolour;
// typdef to be used by the Representations class in HMC to get the
// types for the higher representation fields
typedef typename SU<ncolour>::LatticeMatrix LatticeMatrix;
typedef LatticeGaugeField LatticeField;
explicit FundamentalRep(GridBase* grid) {} //do nothing
void update_representation(const LatticeGaugeField& Uin) {} // do nothing
LatticeField RtoFundamentalProject(const LatticeField& in, Real scale = 1.0) const{
return (scale * in);
}
};
typedef FundamentalRep<Nc> FundamentalRepresentation;
}
}
#endif

View File

@ -0,0 +1,91 @@
#ifndef HMC_TYPES_H
#define HMC_TYPES_H
#include <Grid/qcd/representations/adjoint.h>
#include <Grid/qcd/representations/two_index.h>
#include <Grid/qcd/representations/fundamental.h>
#include <tuple>
#include <utility>
namespace Grid {
namespace QCD {
// Supported types
// enum {Fundamental, Adjoint} repr_type;
// Utility to add support to the HMC for representations other than the
// fundamental
template <class... Reptypes>
class Representations {
public:
typedef std::tuple<Reptypes...> Representation_type;
// Size of the tuple, known at compile time
static const int tuple_size = sizeof...(Reptypes);
// The collection of types for the gauge fields
typedef std::tuple<typename Reptypes::LatticeField...> Representation_Fields;
// To access the Reptypes (FundamentalRepresentation, AdjointRepresentation)
template <std::size_t N>
using repr_type = typename std::tuple_element<N, Representation_type>::type;
// in order to get the typename of the field use
// type repr_type<I>::LatticeField
Representation_type rep;
// Multiple types constructor
explicit Representations(GridBase* grid) : rep(Reptypes(grid)...){};
int size() { return tuple_size; }
// update the fields
template <std::size_t I = 0>
inline typename std::enable_if<(I == tuple_size), void>::type update(
LatticeGaugeField& U) {}
template <std::size_t I = 0>
inline typename std::enable_if<(I < tuple_size), void>::type update(
LatticeGaugeField& U) {
std::get<I>(rep).update_representation(U);
update<I + 1>(U);
}
};
typedef Representations<FundamentalRepresentation> NoHirep;
// Helper classes to access the elements
// Strips the first N parameters from the tuple
// sequence of classes to obtain the S sequence
// Creates a type that is a tuple of vectors of the template type A
template <template <typename> class A, class TupleClass,
size_t N = TupleClass::tuple_size, size_t... S>
struct AccessTypes : AccessTypes<A, TupleClass, N - 1, N - 1, S...> {};
template <template <typename> class A, class TupleClass, size_t... S>
struct AccessTypes<A, TupleClass, 0, S...> {
public:
typedef typename TupleClass::Representation_Fields Rfields;
template <std::size_t N>
using elem = typename std::tuple_element<N, Rfields>::type; // fields types
typedef std::tuple<std::vector< A< elem<S> >* > ... > VectorCollection;
typedef std::tuple< elem<S> ... > FieldTypeCollection;
// Debug
void return_size() {
std::cout << GridLogMessage
<< "Access:" << std::tuple_size<std::tuple<elem<S>...> >::value
<< "\n";
std::cout << GridLogMessage
<< "Access vectors:" << std::tuple_size<VectorCollection>::value
<< "\n";
}
};
}
}
#endif

View File

@ -0,0 +1,99 @@
/*
* Policy classes for the HMC
* Authors: Guido Cossu, David Preti
*/
#ifndef SUN2INDEX_H_H
#define SUN2INDEX_H_H
namespace Grid {
namespace QCD {
/*
* This is an helper class for the HMC
* Should contain only the data for the two index representations
* and the facility to convert from the fundamental -> two index
* The templated parameter TwoIndexSymmetry choses between the
* symmetric and antisymmetric representations
*
* There is an
* enum TwoIndexSymmetry { Symmetric = 1, AntiSymmetric = -1 };
* in the SUnTwoIndex.h file
*/
template <int ncolour, TwoIndexSymmetry S>
class TwoIndexRep {
public:
// typdef to be used by the Representations class in HMC to get the
// types for the higher representation fields
typedef typename SU_TwoIndex<ncolour, S>::LatticeTwoIndexMatrix LatticeMatrix;
typedef typename SU_TwoIndex<ncolour, S>::LatticeTwoIndexField LatticeField;
static const int Dimension = ncolour * (ncolour + S) / 2;
LatticeField U;
explicit TwoIndexRep(GridBase *grid) : U(grid) {}
void update_representation(const LatticeGaugeField &Uin) {
std::cout << GridLogDebug << "Updating TwoIndex representation\n";
// Uin is in the fundamental representation
// get the U in TwoIndexRep
// (U)_{(ij)(lk)} = tr [ adj(e^(ij)) U e^(lk) transpose(U) ]
conformable(U, Uin);
U = zero;
LatticeColourMatrix tmp(Uin._grid);
Vector<typename SU<ncolour>::Matrix> eij(Dimension);
for (int a = 0; a < Dimension; a++)
SU_TwoIndex<ncolour, S>::base(a, eij[a]);
for (int mu = 0; mu < Nd; mu++) {
auto Uin_mu = peekLorentz(Uin, mu);
auto U_mu = peekLorentz(U, mu);
for (int a = 0; a < Dimension; a++) {
tmp = transpose(Uin_mu) * adj(eij[a]) * Uin_mu;
for (int b = 0; b < Dimension; b++)
pokeColour(U_mu, trace(tmp * eij[b]), a, b);
}
pokeLorentz(U, U_mu, mu);
}
}
LatticeGaugeField RtoFundamentalProject(const LatticeField &in,
Real scale = 1.0) const {
LatticeGaugeField out(in._grid);
out = zero;
for (int mu = 0; mu < Nd; mu++) {
LatticeColourMatrix out_mu(in._grid); // fundamental representation
LatticeMatrix in_mu = peekLorentz(in, mu);
out_mu = zero;
typename SU<ncolour>::LatticeAlgebraVector h(in._grid);
projectOnAlgebra(h, in_mu, double(Nc + 2 * S)); // factor T(r)/T(fund)
FundamentalLieAlgebraMatrix(h, out_mu); // apply scale only once
pokeLorentz(out, out_mu, mu);
}
return out;
}
private:
void projectOnAlgebra(typename SU<ncolour>::LatticeAlgebraVector &h_out,
const LatticeMatrix &in, Real scale = 1.0) const {
SU_TwoIndex<ncolour, S>::projectOnAlgebra(h_out, in, scale);
}
void FundamentalLieAlgebraMatrix(
typename SU<ncolour>::LatticeAlgebraVector &h,
typename SU<ncolour>::LatticeMatrix &out, Real scale = 1.0) const {
SU<ncolour>::FundamentalLieAlgebraMatrix(h, out, scale);
}
};
typedef TwoIndexRep<Nc, Symmetric> TwoIndexSymmetricRepresentation;
typedef TwoIndexRep<Nc, AntiSymmetric> TwoIndexAntiSymmetricRepresentation;
}
}
#endif

View File

@ -10,6 +10,29 @@ namespace Grid {
namespace QCD { namespace QCD {
//trivial class for no smearing
template< class Gimpl >
class NoSmearing {
public:
INHERIT_GIMPL_TYPES(Gimpl);
GaugeField*
ThinLinks;
NoSmearing(): ThinLinks(NULL) {}
void set_GaugeField(GaugeField& U) { ThinLinks = &U; }
void smeared_force(GaugeField& SigmaTilde) const {}
GaugeField& get_SmearedU() { return *ThinLinks; }
GaugeField& get_U(bool smeared = false) {
return *ThinLinks;
}
};
/*! /*!
@brief Smeared configuration container @brief Smeared configuration container
@ -201,6 +224,8 @@ class SmearedConfiguration {
SmearedConfiguration() SmearedConfiguration()
: smearingLevels(0), StoutSmearing(), SmearedSet(), ThinLinks(NULL) {} : smearingLevels(0), StoutSmearing(), SmearedSet(), ThinLinks(NULL) {}
// attach the smeared routines to the thin links U and fill the smeared set // attach the smeared routines to the thin links U and fill the smeared set
void set_GaugeField(GaugeField& U) { fill_smearedSet(U); } void set_GaugeField(GaugeField& U) { fill_smearedSet(U); }

View File

@ -18,14 +18,12 @@ class Smear_Stout : public Smear<Gimpl> {
INHERIT_GIMPL_TYPES(Gimpl) INHERIT_GIMPL_TYPES(Gimpl)
Smear_Stout(Smear<Gimpl>* base) : SmearBase(base) { Smear_Stout(Smear<Gimpl>* base) : SmearBase(base) {
static_assert(Nc == 3, assert(Nc == 3);// "Stout smearing currently implemented only for Nc==3");
"Stout smearing currently implemented only for Nc==3");
} }
/*! Default constructor */ /*! Default constructor */
Smear_Stout(double rho = 1.0) : SmearBase(new Smear_APE<Gimpl>(rho)) { Smear_Stout(double rho = 1.0) : SmearBase(new Smear_APE<Gimpl>(rho)) {
static_assert(Nc == 3, assert(Nc == 3);// "Stout smearing currently implemented only for Nc==3");
"Stout smearing currently implemented only for Nc==3");
} }
~Smear_Stout() {} // delete SmearBase... ~Smear_Stout() {} // delete SmearBase...

File diff suppressed because it is too large Load Diff

182
lib/qcd/utils/SUnAdjoint.h Normal file
View File

@ -0,0 +1,182 @@
#ifndef QCD_UTIL_SUNADJOINT_H
#define QCD_UTIL_SUNADJOINT_H
////////////////////////////////////////////////////////////////////////
//
// * Adjoint representation generators
//
// * Normalisation for the fundamental generators:
// trace ta tb = 1/2 delta_ab = T_F delta_ab
// T_F = 1/2 for SU(N) groups
//
//
// base for NxN hermitian traceless matrices
// normalized to 1:
//
// (e_Adj)^a = t^a / sqrt(T_F)
//
// then the real, antisymmetric generators for the adjoint representations
// are computed ( shortcut: e^a == (e_Adj)^a )
//
// (iT_adj)^d_ba = i tr[e^a t^d e^b - t^d e^a e^b]
//
////////////////////////////////////////////////////////////////////////
namespace Grid {
namespace QCD {
template <int ncolour>
class SU_Adjoint : public SU<ncolour> {
public:
static const int Dimension = ncolour * ncolour - 1;
template <typename vtype>
using iSUnAdjointMatrix =
iScalar<iScalar<iMatrix<vtype, Dimension > > >;
// Actually the adjoint matrices are real...
// Consider this overhead... FIXME
typedef iSUnAdjointMatrix<Complex> AMatrix;
typedef iSUnAdjointMatrix<ComplexF> AMatrixF;
typedef iSUnAdjointMatrix<ComplexD> AMatrixD;
typedef iSUnAdjointMatrix<vComplex> vAMatrix;
typedef iSUnAdjointMatrix<vComplexF> vAMatrixF;
typedef iSUnAdjointMatrix<vComplexD> vAMatrixD;
typedef Lattice<vAMatrix> LatticeAdjMatrix;
typedef Lattice<vAMatrixF> LatticeAdjMatrixF;
typedef Lattice<vAMatrixD> LatticeAdjMatrixD;
typedef Lattice<iVector<iScalar<iMatrix<vComplex, Dimension> >, Nd> >
LatticeAdjField;
typedef Lattice<iVector<iScalar<iMatrix<vComplexF, Dimension> >, Nd> >
LatticeAdjFieldF;
typedef Lattice<iVector<iScalar<iMatrix<vComplexD, Dimension> >, Nd> >
LatticeAdjFieldD;
template <class cplx>
static void generator(int Index, iSUnAdjointMatrix<cplx> &iAdjTa) {
// returns i(T_Adj)^index necessary for the projectors
// see definitions above
iAdjTa = zero;
Vector<typename SU<ncolour>::template iSUnMatrix<cplx> > ta(ncolour * ncolour - 1);
typename SU<ncolour>::template iSUnMatrix<cplx> tmp;
// FIXME not very efficient to get all the generators everytime
for (int a = 0; a < Dimension; a++) SU<ncolour>::generator(a, ta[a]);
for (int a = 0; a < Dimension; a++) {
tmp = ta[a] * ta[Index] - ta[Index] * ta[a];
for (int b = 0; b < (ncolour * ncolour - 1); b++) {
typename SU<ncolour>::template iSUnMatrix<cplx> tmp1 =
2.0 * tmp * ta[b]; // 2.0 from the normalization
Complex iTr = TensorRemove(timesI(trace(tmp1)));
//iAdjTa()()(b, a) = iTr;
iAdjTa()()(a, b) = iTr;
}
}
}
static void printGenerators(void) {
for (int gen = 0; gen < Dimension; gen++) {
AMatrix ta;
generator(gen, ta);
std::cout << GridLogMessage << "Nc = " << ncolour << " t_" << gen
<< std::endl;
std::cout << GridLogMessage << ta << std::endl;
}
}
static void testGenerators(void) {
AMatrix adjTa;
std::cout << GridLogMessage << "Adjoint - Checking if real" << std::endl;
for (int a = 0; a < Dimension; a++) {
generator(a, adjTa);
std::cout << GridLogMessage << a << std::endl;
assert(norm2(adjTa - conjugate(adjTa)) < 1.0e-6);
}
std::cout << GridLogMessage << std::endl;
std::cout << GridLogMessage << "Adjoint - Checking if antisymmetric"
<< std::endl;
for (int a = 0; a < Dimension; a++) {
generator(a, adjTa);
std::cout << GridLogMessage << a << std::endl;
assert(norm2(adjTa + transpose(adjTa)) < 1.0e-6);
}
std::cout << GridLogMessage << std::endl;
}
static void AdjointLieAlgebraMatrix(
const typename SU<ncolour>::LatticeAlgebraVector &h,
LatticeAdjMatrix &out, Real scale = 1.0) {
conformable(h, out);
GridBase *grid = out._grid;
LatticeAdjMatrix la(grid);
AMatrix iTa;
out = zero;
for (int a = 0; a < Dimension; a++) {
generator(a, iTa);
la = peekColour(h, a) * iTa;
out += la;
}
out *= scale;
}
// Projects the algebra components a lattice matrix (of dimension ncol*ncol -1 )
static void projectOnAlgebra(typename SU<ncolour>::LatticeAlgebraVector &h_out, const LatticeAdjMatrix &in, Real scale = 1.0) {
conformable(h_out, in);
h_out = zero;
AMatrix iTa;
Real coefficient = - 1.0/(ncolour) * scale;// 1/Nc for the normalization of the trace in the adj rep
for (int a = 0; a < Dimension; a++) {
generator(a, iTa);
auto tmp = real(trace(iTa * in)) * coefficient;
pokeColour(h_out, tmp, a);
}
}
// a projector that keeps the generators stored to avoid the overhead of recomputing them
static void projector(typename SU<ncolour>::LatticeAlgebraVector &h_out, const LatticeAdjMatrix &in, Real scale = 1.0) {
conformable(h_out, in);
static std::vector<AMatrix> iTa(Dimension); // to store the generators
h_out = zero;
static bool precalculated = false;
if (!precalculated){
precalculated = true;
for (int a = 0; a < Dimension; a++) generator(a, iTa[a]);
}
Real coefficient = -1.0 / (ncolour) * scale; // 1/Nc for the normalization of
// the trace in the adj rep
for (int a = 0; a < Dimension; a++) {
auto tmp = real(trace(iTa[a] * in)) * coefficient;
pokeColour(h_out, tmp, a);
}
}
};
// Some useful type names
typedef SU_Adjoint<2> SU2Adjoint;
typedef SU_Adjoint<3> SU3Adjoint;
typedef SU_Adjoint<4> SU4Adjoint;
typedef SU_Adjoint<5> SU5Adjoint;
typedef SU_Adjoint<Nc> AdjointMatrices;
}
}
#endif

276
lib/qcd/utils/SUnTwoIndex.h Normal file
View File

@ -0,0 +1,276 @@
////////////////////////////////////////////////////////////////////////
//
// * Two index representation generators
//
// * Normalisation for the fundamental generators:
// trace ta tb = 1/2 delta_ab = T_F delta_ab
// T_F = 1/2 for SU(N) groups
//
//
// base for NxN two index (anti-symmetric) matrices
// normalized to 1 (d_ij is the kroenecker delta)
//
// (e^(ij)_{kl} = 1 / sqrt(2) (d_ik d_jl +/- d_jk d_il)
//
// Then the generators are written as
//
// (iT_a)^(ij)(lk) = i * ( tr[e^(ij)^dag e^(lk) T^trasp_a] +
// tr[e^(lk)e^(ij)^dag T_a] ) //
//
//
////////////////////////////////////////////////////////////////////////
// Authors: David Preti, Guido Cossu
#ifndef QCD_UTIL_SUN2INDEX_H
#define QCD_UTIL_SUN2INDEX_H
namespace Grid {
namespace QCD {
enum TwoIndexSymmetry { Symmetric = 1, AntiSymmetric = -1 };
inline Real delta(int a, int b) { return (a == b) ? 1.0 : 0.0; }
template <int ncolour, TwoIndexSymmetry S>
class SU_TwoIndex : public SU<ncolour> {
public:
static const int Dimension = ncolour * (ncolour + S) / 2;
static const int NumGenerators = SU<ncolour>::AdjointDimension;
template <typename vtype>
using iSUnTwoIndexMatrix = iScalar<iScalar<iMatrix<vtype, Dimension> > >;
typedef iSUnTwoIndexMatrix<Complex> TIMatrix;
typedef iSUnTwoIndexMatrix<ComplexF> TIMatrixF;
typedef iSUnTwoIndexMatrix<ComplexD> TIMatrixD;
typedef iSUnTwoIndexMatrix<vComplex> vTIMatrix;
typedef iSUnTwoIndexMatrix<vComplexF> vTIMatrixF;
typedef iSUnTwoIndexMatrix<vComplexD> vTIMatrixD;
typedef Lattice<vTIMatrix> LatticeTwoIndexMatrix;
typedef Lattice<vTIMatrixF> LatticeTwoIndexMatrixF;
typedef Lattice<vTIMatrixD> LatticeTwoIndexMatrixD;
typedef Lattice<iVector<iScalar<iMatrix<vComplex, Dimension> >, Nd> >
LatticeTwoIndexField;
typedef Lattice<iVector<iScalar<iMatrix<vComplexF, Dimension> >, Nd> >
LatticeTwoIndexFieldF;
typedef Lattice<iVector<iScalar<iMatrix<vComplexD, Dimension> >, Nd> >
LatticeTwoIndexFieldD;
template <typename vtype>
using iSUnMatrix = iScalar<iScalar<iMatrix<vtype, ncolour> > >;
typedef iSUnMatrix<Complex> Matrix;
typedef iSUnMatrix<ComplexF> MatrixF;
typedef iSUnMatrix<ComplexD> MatrixD;
template <class cplx>
static void base(int Index, iSUnMatrix<cplx> &eij) {
// returns (e)^(ij)_{kl} necessary for change of base U_F -> U_R
assert(Index < NumGenerators);
eij = zero;
// for the linearisation of the 2 indexes
static int a[ncolour * (ncolour - 1) / 2][2]; // store the a <-> i,j
static bool filled = false;
if (!filled) {
int counter = 0;
for (int i = 1; i < ncolour; i++) {
for (int j = 0; j < i; j++) {
a[counter][0] = i;
a[counter][1] = j;
counter++;
}
}
filled = true;
}
if (Index < ncolour * (ncolour - 1) / 2) {
baseOffDiagonal(a[Index][0], a[Index][1], eij);
} else {
baseDiagonal(Index, eij);
}
}
template <class cplx>
static void baseDiagonal(int Index, iSUnMatrix<cplx> &eij) {
eij = zero;
eij()()(Index - ncolour * (ncolour - 1) / 2,
Index - ncolour * (ncolour - 1) / 2) = 1.0;
}
template <class cplx>
static void baseOffDiagonal(int i, int j, iSUnMatrix<cplx> &eij) {
eij = zero;
for (int k = 0; k < ncolour; k++)
for (int l = 0; l < ncolour; l++)
eij()()(l, k) = delta(i, k) * delta(j, l) +
S * delta(j, k) * delta(i, l);
RealD nrm = 1. / std::sqrt(2.0);
eij = eij * nrm;
}
static void printBase(void) {
for (int gen = 0; gen < Dimension; gen++) {
Matrix tmp;
base(gen, tmp);
std::cout << GridLogMessage << "Nc = " << ncolour << " t_" << gen
<< std::endl;
std::cout << GridLogMessage << tmp << std::endl;
}
}
template <class cplx>
static void generator(int Index, iSUnTwoIndexMatrix<cplx> &i2indTa) {
Vector<typename SU<ncolour>::template iSUnMatrix<cplx> > ta(
ncolour * ncolour - 1);
Vector<typename SU<ncolour>::template iSUnMatrix<cplx> > eij(Dimension);
typename SU<ncolour>::template iSUnMatrix<cplx> tmp;
i2indTa = zero;
for (int a = 0; a < ncolour * ncolour - 1; a++)
SU<ncolour>::generator(a, ta[a]);
for (int a = 0; a < Dimension; a++) base(a, eij[a]);
for (int a = 0; a < Dimension; a++) {
tmp = transpose(ta[Index]) * adj(eij[a]) + adj(eij[a]) * ta[Index];
for (int b = 0; b < Dimension; b++) {
typename SU<ncolour>::template iSUnMatrix<cplx> tmp1 =
tmp * eij[b];
Complex iTr = TensorRemove(timesI(trace(tmp1)));
i2indTa()()(a, b) = iTr;
}
}
}
static void printGenerators(void) {
for (int gen = 0; gen < ncolour * ncolour - 1; gen++) {
TIMatrix i2indTa;
generator(gen, i2indTa);
std::cout << GridLogMessage << "Nc = " << ncolour << " t_" << gen
<< std::endl;
std::cout << GridLogMessage << i2indTa << std::endl;
}
}
static void testGenerators(void) {
TIMatrix i2indTa, i2indTb;
std::cout << GridLogMessage << "2IndexRep - Checking if traceless"
<< std::endl;
for (int a = 0; a < ncolour * ncolour - 1; a++) {
generator(a, i2indTa);
std::cout << GridLogMessage << a << std::endl;
assert(norm2(trace(i2indTa)) < 1.0e-6);
}
std::cout << GridLogMessage << std::endl;
std::cout << GridLogMessage << "2IndexRep - Checking if antihermitean"
<< std::endl;
for (int a = 0; a < ncolour * ncolour - 1; a++) {
generator(a, i2indTa);
std::cout << GridLogMessage << a << std::endl;
assert(norm2(adj(i2indTa) + i2indTa) < 1.0e-6);
}
std::cout << GridLogMessage << std::endl;
std::cout << GridLogMessage
<< "2IndexRep - Checking Tr[Ta*Tb]=delta(a,b)*(N +- 2)/2"
<< std::endl;
for (int a = 0; a < ncolour * ncolour - 1; a++) {
for (int b = 0; b < ncolour * ncolour - 1; b++) {
generator(a, i2indTa);
generator(b, i2indTb);
// generator returns iTa, so we need a minus sign here
Complex Tr = -TensorRemove(trace(i2indTa * i2indTb));
std::cout << GridLogMessage << "a=" << a << "b=" << b << "Tr=" << Tr
<< std::endl;
}
}
std::cout << GridLogMessage << std::endl;
}
static void TwoIndexLieAlgebraMatrix(
const typename SU<ncolour>::LatticeAlgebraVector &h,
LatticeTwoIndexMatrix &out, Real scale = 1.0) {
conformable(h, out);
GridBase *grid = out._grid;
LatticeTwoIndexMatrix la(grid);
TIMatrix i2indTa;
out = zero;
for (int a = 0; a < ncolour * ncolour - 1; a++) {
generator(a, i2indTa);
la = peekColour(h, a) * i2indTa;
out += la;
}
out *= scale;
}
// Projects the algebra components
// of a lattice matrix ( of dimension ncol*ncol -1 )
static void projectOnAlgebra(
typename SU<ncolour>::LatticeAlgebraVector &h_out,
const LatticeTwoIndexMatrix &in, Real scale = 1.0) {
conformable(h_out, in);
h_out = zero;
TIMatrix i2indTa;
Real coefficient = -2.0 / (ncolour + 2 * S) * scale;
// 2/(Nc +/- 2) for the normalization of the trace in the two index rep
for (int a = 0; a < ncolour * ncolour - 1; a++) {
generator(a, i2indTa);
auto tmp = real(trace(i2indTa * in)) * coefficient;
pokeColour(h_out, tmp, a);
}
}
// a projector that keeps the generators stored to avoid the overhead of
// recomputing them
static void projector(typename SU<ncolour>::LatticeAlgebraVector &h_out,
const LatticeTwoIndexMatrix &in, Real scale = 1.0) {
conformable(h_out, in);
// to store the generators
static std::vector<TIMatrix> i2indTa(ncolour * ncolour -1);
h_out = zero;
static bool precalculated = false;
if (!precalculated) {
precalculated = true;
for (int a = 0; a < ncolour * ncolour - 1; a++) generator(a, i2indTa[a]);
}
Real coefficient =
-2.0 / (ncolour + 2 * S) * scale; // 2/(Nc +/- 2) for the normalization
// of the trace in the two index rep
for (int a = 0; a < ncolour * ncolour - 1; a++) {
auto tmp = real(trace(i2indTa[a] * in)) * coefficient;
pokeColour(h_out, tmp, a);
}
}
};
// Some useful type names
typedef SU_TwoIndex<Nc, Symmetric> TwoIndexSymmMatrices;
typedef SU_TwoIndex<Nc, AntiSymmetric> TwoIndexAntiSymmMatrices;
typedef SU_TwoIndex<2, Symmetric> SU2TwoIndexSymm;
typedef SU_TwoIndex<3, Symmetric> SU3TwoIndexSymm;
typedef SU_TwoIndex<4, Symmetric> SU4TwoIndexSymm;
typedef SU_TwoIndex<5, Symmetric> SU5TwoIndexSymm;
typedef SU_TwoIndex<2, AntiSymmetric> SU2TwoIndexAntiSymm;
typedef SU_TwoIndex<3, AntiSymmetric> SU3TwoIndexAntiSymm;
typedef SU_TwoIndex<4, AntiSymmetric> SU4TwoIndexAntiSymm;
typedef SU_TwoIndex<5, AntiSymmetric> SU5TwoIndexAntiSymm;
}
}
#endif

View File

@ -454,7 +454,7 @@ namespace Optimization {
#define _mm256_alignr_epi64_grid(ret,a,b,n) ret=(__m256d) _mm256_alignr_epi8((__m256i)a,(__m256i)b,(n*8)%16) #define _mm256_alignr_epi64_grid(ret,a,b,n) ret=(__m256d) _mm256_alignr_epi8((__m256i)a,(__m256i)b,(n*8)%16)
#endif #endif
#if defined (AVX1) || defined (AVXFMA4) #if defined (AVX1) || defined (AVXFMA)
#define _mm256_alignr_epi32_grid(ret,a,b,n) { \ #define _mm256_alignr_epi32_grid(ret,a,b,n) { \
__m128 aa, bb; \ __m128 aa, bb; \
\ \

View File

@ -370,7 +370,67 @@ namespace Optimization {
////////////////////////////////////////////// //////////////////////////////////////////////
// Some Template specialization // Some Template specialization
// Hack for CLANG until mm512_reduce_add_ps etc... are implemented in GCC and Clang releases
#undef GNU_CLANG_COMPILER
#ifdef GNU_CLANG_COMPILER
//Complex float Reduce
template<>
inline Grid::ComplexF Reduce<Grid::ComplexF, __m512>::operator()(__m512 in){
__m512 v1,v2;
v1=Optimization::Permute::Permute0(in); // avx 512; quad complex single
v1= _mm512_add_ps(v1,in);
v2=Optimization::Permute::Permute1(v1);
v1 = _mm512_add_ps(v1,v2);
v2=Optimization::Permute::Permute2(v1);
v1 = _mm512_add_ps(v1,v2);
u512f conv; conv.v = v1;
return Grid::ComplexF(conv.f[0],conv.f[1]);
}
//Real float Reduce
template<>
inline Grid::RealF Reduce<Grid::RealF, __m512>::operator()(__m512 in){
__m512 v1,v2;
v1 = Optimization::Permute::Permute0(in); // avx 512; octo-double
v1 = _mm512_add_ps(v1,in);
v2 = Optimization::Permute::Permute1(v1);
v1 = _mm512_add_ps(v1,v2);
v2 = Optimization::Permute::Permute2(v1);
v1 = _mm512_add_ps(v1,v2);
v2 = Optimization::Permute::Permute3(v1);
v1 = _mm512_add_ps(v1,v2);
u512f conv; conv.v=v1;
return conv.f[0];
}
//Complex double Reduce
template<>
inline Grid::ComplexD Reduce<Grid::ComplexD, __m512d>::operator()(__m512d in){
__m512d v1;
v1 = Optimization::Permute::Permute0(in); // sse 128; paired complex single
v1 = _mm512_add_pd(v1,in);
v1 = Optimization::Permute::Permute1(in); // sse 128; paired complex single
v1 = _mm512_add_pd(v1,in);
u512d conv; conv.v = v1;
return Grid::ComplexD(conv.f[0],conv.f[1]);
}
//Real double Reduce
template<>
inline Grid::RealD Reduce<Grid::RealD, __m512d>::operator()(__m512d in){
__m512d v1,v2;
v1 = Optimization::Permute::Permute0(in); // avx 512; quad double
v1 = _mm512_add_pd(v1,in);
v2 = Optimization::Permute::Permute1(v1);
v1 = _mm512_add_pd(v1,v2);
v2 = Optimization::Permute::Permute2(v1);
v1 = _mm512_add_pd(v1,v2);
u512d conv; conv.v = v1;
return conv.f[0];
}
#else
//Complex float Reduce //Complex float Reduce
template<> template<>
inline Grid::ComplexF Reduce<Grid::ComplexF, __m512>::operator()(__m512 in){ inline Grid::ComplexF Reduce<Grid::ComplexF, __m512>::operator()(__m512 in){
@ -382,7 +442,6 @@ namespace Optimization {
return _mm512_reduce_add_ps(in); return _mm512_reduce_add_ps(in);
} }
//Complex double Reduce //Complex double Reduce
template<> template<>
inline Grid::ComplexD Reduce<Grid::ComplexD, __m512d>::operator()(__m512d in){ inline Grid::ComplexD Reduce<Grid::ComplexD, __m512d>::operator()(__m512d in){
@ -402,6 +461,7 @@ namespace Optimization {
printf("Reduce : Missing integer implementation -> FIX\n"); printf("Reduce : Missing integer implementation -> FIX\n");
assert(0); assert(0);
} }
#endif
} }

View File

@ -1,300 +1,421 @@
/************************************************************************************* /*******************************************************************************
Grid physics library, www.github.com/paboyle/Grid Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/simd/Grid_qpx.h Source file: ./lib/simd/Grid_qpx.h
Copyright (C) 2015 Copyright (C) 2016
Author: neo <cossu@post.kek.jp> Author: Antonin Portelli <antonin.portelli@me.com>
This program is free software; you can redistribute it and/or modify This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or the Free Software Foundation; either version 2 of the License, or
(at your option) any later version. (at your option) any later version.
This program is distributed in the hope that it will be useful, This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details. GNU General Public License for more details.
You should have received a copy of the GNU General Public License along You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc., with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/ ******************************************************************************/
/* END LEGAL */
//----------------------------------------------------------------------
/*! @file Grid_qpx.h
@brief Optimization libraries for QPX instructions set for BG/Q
Using intrinsics
*/
// Time-stamp: <2015-05-27 11:30:21 neo>
//----------------------------------------------------------------------
// lot of undefined functions
namespace Grid {
namespace Optimization { namespace Optimization {
typedef struct
{
float v0,v1,v2,v3;
} vector4float;
inline std::ostream & operator<<(std::ostream& stream, const vector4double a)
{
stream << "{"<<vec_extract(a,0)<<","<<vec_extract(a,1)<<","<<vec_extract(a,2)<<","<<vec_extract(a,3)<<"}";
return stream;
};
inline std::ostream & operator<<(std::ostream& stream, const vector4float a)
{
stream << "{"<< a.v0 <<","<< a.v1 <<","<< a.v2 <<","<< a.v3 <<"}";
return stream;
};
struct Vsplat{ struct Vsplat{
//Complex float //Complex float
inline float operator()(float a, float b){ inline vector4float operator()(float a, float b){
return {a,b,a,b}; return (vector4float){a, b, a, b};
} }
// Real float // Real float
inline float operator()(float a){ inline vector4float operator()(float a){
return {a,a,a,a}; return (vector4float){a, a, a, a};
} }
//Complex double //Complex double
inline vector4double operator()(double a, double b){ inline vector4double operator()(double a, double b){
return {a,b,a,b}; return (vector4double){a, b, a, b};
} }
//Real double //Real double
inline vector4double operator()(double a){ inline vector4double operator()(double a){
return {a,a,a,a}; return (vector4double){a, a, a, a};
} }
//Integer //Integer
inline int operator()(Integer a){ inline int operator()(Integer a){
#error return a;
} }
}; };
struct Vstore{ struct Vstore{
//Float //Float
inline void operator()(float a, float* F){ inline void operator()(vector4double a, float *f){
assert(0); vec_st(a, 0, f);
} }
inline void operator()(vector4double a, vector4float &f){
vec_st(a, 0, (float *)(&f));
}
inline void operator()(vector4float a, float *f){
f[0] = a.v0;
f[1] = a.v1;
f[2] = a.v2;
f[3] = a.v3;
}
//Double //Double
inline void operator()(vector4double a, double* D){ inline void operator()(vector4double a, double *d){
assert(0); vec_st(a, 0, d);
} }
//Integer //Integer
inline void operator()(int a, Integer* I){ inline void operator()(int a, Integer *i){
assert(0); i[0] = a;
} }
}; };
struct Vstream{ struct Vstream{
//Float //Float
inline void operator()(float * a, float b){ inline void operator()(float *f, vector4double a){
assert(0); vec_st(a, 0, f);
}
//Double
inline void operator()(double * a, vector4double b){
assert(0);
} }
inline void operator()(vector4float f, vector4double a){
vec_st(a, 0, (float *)(&f));
}
inline void operator()(float *f, vector4float a){
f[0] = a.v0;
f[1] = a.v1;
f[2] = a.v2;
f[3] = a.v3;
}
//Double
inline void operator()(double *d, vector4double a){
vec_st(a, 0, d);
}
}; };
struct Vset{ struct Vset{
// Complex float // Complex float
inline float operator()(Grid::ComplexF *a){ inline vector4float operator()(Grid::ComplexF *a){
return {a[0].real(),a[0].imag(),a[1].real(),a[1].imag(),a[2].real(),a[2].imag(),a[3].real(),a[3].imag()}; return (vector4float){a[0].real(), a[0].imag(), a[1].real(), a[1].imag()};
} }
// Complex double // Complex double
inline vector4double operator()(Grid::ComplexD *a){ inline vector4double operator()(Grid::ComplexD *a){
return {a[0].real(),a[0].imag(),a[1].real(),a[1].imag(),a[2].real(),a[2].imag(),a[3].real(),a[3].imag()}; return vec_ld(0, (double *)a);
} }
// Real float
inline float operator()(float *a){ // Real float
return {a[0],a[1],a[2],a[3],a[4],a[5],a[6],a[7]}; inline vector4float operator()(float *a){
return (vector4float){a[0], a[1], a[2], a[3]};
} }
inline vector4double operator()(vector4float a){
return vec_ld(0, (float *)(&a));
}
// Real double // Real double
inline vector4double operator()(double *a){ inline vector4double operator()(double *a){
return {a[0],a[1],a[2],a[3],a[4],a[5],a[6],a[7]}; return vec_ld(0, a);
} }
// Integer // Integer
inline int operator()(Integer *a){ inline int operator()(Integer *a){
#error return a[0];
} }
}; };
template <typename Out_type, typename In_type> template <typename Out_type, typename In_type>
struct Reduce{ struct Reduce{
//Need templated class to overload output type //Need templated class to overload output type
//General form must generate error if compiled //General form must generate error if compiled
inline Out_type operator()(In_type in){ inline Out_type operator()(In_type in){
printf("Error, using wrong Reduce function\n"); printf("Error, using wrong Reduce function\n");
exit(1); exit(1);
return 0; return 0;
} }
}; };
///////////////////////////////////////////////////// /////////////////////////////////////////////////////
// Arithmetic operations // Arithmetic operations
///////////////////////////////////////////////////// /////////////////////////////////////////////////////
#define FLOAT_WRAP_2(fn, pref)\
pref vector4float fn(vector4float a, vector4float b)\
{\
vector4double ad, bd, rd;\
vector4float r;\
\
ad = Vset()(a);\
bd = Vset()(b);\
rd = fn(ad, bd);\
Vstore()(rd, r);\
\
return r;\
}
#define FLOAT_WRAP_1(fn, pref)\
pref vector4float fn(vector4float a)\
{\
vector4double ad, rd;\
vector4float r;\
\
ad = Vset()(a);\
rd = fn(ad);\
Vstore()(rd, r);\
\
return r;\
}
struct Sum{ struct Sum{
//Complex/Real float
inline float operator()(float a, float b){
#error
}
//Complex/Real double //Complex/Real double
inline vector4double operator()(vector4double a, vector4double b){ inline vector4double operator()(vector4double a, vector4double b){
return vec_add(a,b); return vec_add(a, b);
} }
//Complex/Real float
FLOAT_WRAP_2(operator(), inline)
//Integer //Integer
inline int operator()(int a, int b){ inline int operator()(int a, int b){
#error return a + b;
} }
}; };
struct Sub{ struct Sub{
//Complex/Real float
inline float operator()(float a, float b){
#error
}
//Complex/Real double //Complex/Real double
inline vector4double operator()(vector4double a, vector4double b){ inline vector4double operator()(vector4double a, vector4double b){
#error return vec_sub(a, b);
} }
//Complex/Real float
FLOAT_WRAP_2(operator(), inline)
//Integer //Integer
inline floati operator()(int a, int b){ inline int operator()(int a, int b){
#error return a - b;
} }
}; };
struct MultComplex{ struct MultComplex{
// Complex float
inline float operator()(float a, float b){
#error
}
// Complex double // Complex double
inline vector4double operator()(vector4double a, vector4double b){ inline vector4double operator()(vector4double a, vector4double b){
#error return vec_xxnpmadd(a, b, vec_xmul(b, a));
} }
};
// Complex float
FLOAT_WRAP_2(operator(), inline)
};
struct Mult{ struct Mult{
// Real float
inline float operator()(float a, float b){
#error
}
// Real double // Real double
inline vector4double operator()(vector4double a, vector4double b){ inline vector4double operator()(vector4double a, vector4double b){
#error return vec_mul(a, b);
} }
// Real float
FLOAT_WRAP_2(operator(), inline)
// Integer // Integer
inline int operator()(int a, int b){ inline int operator()(int a, int b){
#error return a*b;
} }
}; };
struct Conj{ struct Conj{
// Complex single
inline float operator()(float in){
assert(0);
}
// Complex double // Complex double
inline vector4double operator()(vector4double in){ inline vector4double operator()(vector4double v){
assert(0); return vec_mul(v, (vector4double){1., -1., 1., -1.});
} }
// do not define for integer input
};
// Complex float
FLOAT_WRAP_1(operator(), inline)
};
struct TimesMinusI{ struct TimesMinusI{
//Complex single
inline float operator()(float in, float ret){
assert(0);
}
//Complex double //Complex double
inline vector4double operator()(vector4double in, vector4double ret){ inline vector4double operator()(vector4double v, vector4double ret){
assert(0); return vec_xxcpnmadd(v, (vector4double){1., 1., 1., 1.},
(vector4double){0., 0., 0., 0.});
} }
// Complex float
FLOAT_WRAP_2(operator(), inline)
}; };
struct TimesI{ struct TimesI{
//Complex single
inline float operator()(float in, float ret){
}
//Complex double //Complex double
inline vector4double operator()(vector4double in, vector4double ret){ inline vector4double operator()(vector4double v, vector4double ret){
return vec_xxcpnmadd(v, (vector4double){-1., -1., -1., -1.},
(vector4double){0., 0., 0., 0.});
} }
// Complex float
FLOAT_WRAP_2(operator(), inline)
}; };
struct Permute{
//Complex double
static inline vector4double Permute0(vector4double v){ //0123 -> 2301
return vec_perm(v, v, vec_gpci(02301));
};
static inline vector4double Permute1(vector4double v){ //0123 -> 1032
return vec_perm(v, v, vec_gpci(01032));
};
static inline vector4double Permute2(vector4double v){
return v;
};
static inline vector4double Permute3(vector4double v){
return v;
};
// Complex float
FLOAT_WRAP_1(Permute0, static inline)
FLOAT_WRAP_1(Permute1, static inline)
FLOAT_WRAP_1(Permute2, static inline)
FLOAT_WRAP_1(Permute3, static inline)
};
struct Rotate{
static inline vector4double rotate(vector4double v, int n){
switch(n){
case 0:
return v;
break;
case 1:
return vec_perm(v, v, vec_gpci(01230));
break;
case 2:
return vec_perm(v, v, vec_gpci(02301));
break;
case 3:
return vec_perm(v, v, vec_gpci(03012));
break;
default: assert(0);
}
}
static inline vector4float rotate(vector4float v, int n){
vector4double vd, rd;
vector4float r;
////////////////////////////////////////////// vd = Vset()(v);
// Some Template specialization rd = rotate(vd, n);
Vstore()(rd, r);
return r;
}
};
//Complex float Reduce //Complex float Reduce
template<> template<>
inline Grid::ComplexF Reduce<Grid::ComplexF, float>::operator()(float in){ inline Grid::ComplexF
assert(0); Reduce<Grid::ComplexF, vector4float>::operator()(vector4float v) { //2 complex
vector4float v1,v2;
v1 = Optimization::Permute::Permute0(v);
v1 = Optimization::Sum()(v1, v);
return Grid::ComplexF(v1.v0, v1.v1);
} }
//Real float Reduce //Real float Reduce
template<> template<>
inline Grid::RealF Reduce<Grid::RealF, float>::operator()(float in){ inline Grid::RealF
assert(0); Reduce<Grid::RealF, vector4float>::operator()(vector4float v){ //4 floats
vector4float v1,v2;
v1 = Optimization::Permute::Permute0(v);
v1 = Optimization::Sum()(v1, v);
v2 = Optimization::Permute::Permute1(v1);
v1 = Optimization::Sum()(v1, v2);
return v1.v0;
} }
//Complex double Reduce //Complex double Reduce
template<> template<>
inline Grid::ComplexD Reduce<Grid::ComplexD, vector4double>::operator()(vector4double in){ inline Grid::ComplexD
assert(0); Reduce<Grid::ComplexD, vector4double>::operator()(vector4double v){ //2 complex
vector4double v1;
v1 = Optimization::Permute::Permute0(v);
v1 = vec_add(v1, v);
return Grid::ComplexD(vec_extract(v1, 0), vec_extract(v1, 1));
} }
//Real double Reduce //Real double Reduce
template<> template<>
inline Grid::RealD Reduce<Grid::RealD, vector4double>::operator()(vector4double in){ inline Grid::RealD
assert(0); Reduce<Grid::RealD, vector4double>::operator()(vector4double v){ //4 doubles
} vector4double v1,v2;
v1 = Optimization::Permute::Permute0(v);
v1 = vec_add(v1, v);
v2 = Optimization::Permute::Permute1(v1);
v1 = vec_add(v1, v2);
return vec_extract(v1, 0);
}
//Integer Reduce //Integer Reduce
template<> template<>
inline Integer Reduce<Integer, floati>::operator()(float in){ inline Integer Reduce<Integer, int>::operator()(int in){
// FIXME unimplemented
printf("Reduce : Missing integer implementation -> FIX\n");
assert(0); assert(0);
} }
} }
////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// Here assign types // Here assign types
namespace Grid { typedef Optimization::vector4float SIMD_Ftype; // Single precision type
typedef float SIMD_Ftype __attribute__ ((vector_size (16))); // Single precision type typedef vector4double SIMD_Dtype; // Double precision type
typedef vector4double SIMD_Dtype; // Double precision type typedef int SIMD_Itype; // Integer type
typedef int SIMD_Itype; // Integer type
inline void v_prefetch0(int size, const char *ptr){}; // prefetch utilities
inline void v_prefetch0(int size, const char *ptr){};
// Function name aliases inline void prefetch_HINT_T0(const char *ptr){};
typedef Optimization::Vsplat VsplatSIMD;
typedef Optimization::Vstore VstoreSIMD;
typedef Optimization::Vset VsetSIMD;
typedef Optimization::Vstream VstreamSIMD;
template <typename S, typename T> using ReduceSIMD = Optimization::Reduce<S,T>;
// Arithmetic operations // Function name aliases
typedef Optimization::Sum SumSIMD; typedef Optimization::Vsplat VsplatSIMD;
typedef Optimization::Sub SubSIMD; typedef Optimization::Vstore VstoreSIMD;
typedef Optimization::Mult MultSIMD; typedef Optimization::Vset VsetSIMD;
typedef Optimization::MultComplex MultComplexSIMD; typedef Optimization::Vstream VstreamSIMD;
typedef Optimization::Conj ConjSIMD; template <typename S, typename T> using ReduceSIMD = Optimization::Reduce<S,T>;
typedef Optimization::TimesMinusI TimesMinusISIMD;
typedef Optimization::TimesI TimesISIMD;
// Arithmetic operations
typedef Optimization::Sum SumSIMD;
typedef Optimization::Sub SubSIMD;
typedef Optimization::Mult MultSIMD;
typedef Optimization::MultComplex MultComplexSIMD;
typedef Optimization::Conj ConjSIMD;
typedef Optimization::TimesMinusI TimesMinusISIMD;
typedef Optimization::TimesI TimesISIMD;
} }

View File

@ -138,9 +138,14 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
#define ZLOADf(OFF,PTR,ri,ir) VLOADf(OFF,PTR,ir) VSHUFf(ir,ri) #define ZLOADf(OFF,PTR,ri,ir) VLOADf(OFF,PTR,ir) VSHUFf(ir,ri)
#define ZLOADd(OFF,PTR,ri,ir) VLOADd(OFF,PTR,ir) VSHUFd(ir,ri) #define ZLOADd(OFF,PTR,ri,ir) VLOADd(OFF,PTR,ir) VSHUFd(ir,ri)
#define STREAM_STORE
#ifdef STREAM_STORE
#define VSTOREf(OFF,PTR,SRC) "vmovntps " #SRC "," #OFF "*64(" #PTR ")" ";\n" #define VSTOREf(OFF,PTR,SRC) "vmovntps " #SRC "," #OFF "*64(" #PTR ")" ";\n"
#define VSTOREd(OFF,PTR,SRC) "vmovntpd " #SRC "," #OFF "*64(" #PTR ")" ";\n" #define VSTOREd(OFF,PTR,SRC) "vmovntpd " #SRC "," #OFF "*64(" #PTR ")" ";\n"
#else
#define VSTOREf(OFF,PTR,SRC) "vmovaps " #SRC "," #OFF "*64(" #PTR ")" ";\n"
#define VSTOREd(OFF,PTR,SRC) "vmovapd " #SRC "," #OFF "*64(" #PTR ")" ";\n"
#endif
// Swaps Re/Im ; could unify this with IMCI // Swaps Re/Im ; could unify this with IMCI
#define VSHUFd(A,DEST) "vpshufd $0x4e," #A "," #DEST ";\n" #define VSHUFd(A,DEST) "vpshufd $0x4e," #A "," #DEST ";\n"

View File

@ -56,7 +56,7 @@ namespace Grid {
temp = unit + temp*arg; temp = unit + temp*arg;
} }
return ProjectOnGroup(temp);//maybe not strictly necessary return temp;
} }

Binary file not shown.

View File

@ -13,21 +13,22 @@ echo CCFILES=$CCFILES >> Make.inc
# tests Make.inc # tests Make.inc
cd $home/tests cd $home/tests
dirs=`find . -type d ` dirs=`find . -type d -not -path '*/\.*'`
for subdir in $dirs; do for subdir in $dirs; do
cd $home/tests/$subdir cd $home/tests/$subdir
TESTS=`ls T*.cc` pwd
TESTLIST=`echo ${TESTS} | sed s/.cc//g ` TESTS=`ls T*.cc`
PREF=`[ $subdir = '.' ] && echo noinst || echo EXTRA` TESTLIST=`echo ${TESTS} | sed s/.cc//g `
echo "tests: ${TESTLIST}" > Make.inc PREF=`[ $subdir = '.' ] && echo noinst || echo EXTRA`
echo ${PREF}_PROGRAMS = ${TESTLIST} >> Make.inc echo "tests: ${TESTLIST}" > Make.inc
echo ${PREF}_PROGRAMS = ${TESTLIST} >> Make.inc
echo >> Make.inc
for f in $TESTS; do
BNAME=`basename $f .cc`
echo ${BNAME}_SOURCES=$f >> Make.inc
echo ${BNAME}_LDADD=-lGrid>> Make.inc
echo >> Make.inc echo >> Make.inc
for f in $TESTS; do done
BNAME=`basename $f .cc`
echo ${BNAME}_SOURCES=$f >> Make.inc
echo ${BNAME}_LDADD=-lGrid>> Make.inc
echo >> Make.inc
done
done done
# benchmarks Make.inc # benchmarks Make.inc
@ -38,10 +39,10 @@ TESTLIST=`echo ${TESTS} | sed s/.cc//g `
echo bin_PROGRAMS = ${TESTLIST} > Make.inc echo bin_PROGRAMS = ${TESTLIST} > Make.inc
echo >> Make.inc echo >> Make.inc
for f in $TESTS; do for f in $TESTS; do
BNAME=`basename $f .cc` BNAME=`basename $f .cc`
echo ${BNAME}_SOURCES=$f >> Make.inc echo ${BNAME}_SOURCES=$f >> Make.inc
echo ${BNAME}_LDADD=-lGrid>> Make.inc echo ${BNAME}_LDADD=-lGrid>> Make.inc
echo >> Make.inc echo >> Make.inc
done done
cd .. cd ..

4
scripts/reconfigure_script Executable file
View File

@ -0,0 +1,4 @@
aclocal -I m4
autoheader -f
automake -f --add-missing
autoconf -f

View File

@ -157,10 +157,9 @@ void Tester(const functor &func)
std::cout << GridLogMessage << " " << func.name() << std::endl; std::cout << GridLogMessage << " " << func.name() << std::endl;
std::cout << GridLogDebug << v_input1 << std::endl; std::cout << GridLogDebug << v_input1 << std::endl;
std::cout << GridLogDebug << v_input2 << std::endl;
std::cout << GridLogDebug << v_result << std::endl; std::cout << GridLogDebug << v_result << std::endl;
int ok=0; int ok=0;
for(int i=0;i<Nsimd;i++){ for(int i=0;i<Nsimd;i++){
if ( abs(reference[i]-result[i])>1.0e-7){ if ( abs(reference[i]-result[i])>1.0e-7){

View File

@ -1,76 +1,532 @@
/************************************************************************************* /*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid Grid physics library, www.github.com/paboyle/Grid
Source file: ./tests/Test_lie_generators.cc Source file: ./tests/Test_lie_generators.cc
Copyright (C) 2015 Copyright (C) 2015
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk> Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk> Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Guido Cossu <guido.cossu@ed.ac.uk>
This program is free software; you can redistribute it and/or modify This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or the Free Software Foundation; either version 2 of the License, or
(at your option) any later version. (at your option) any later version.
This program is distributed in the hope that it will be useful, This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details. GNU General Public License for more details.
You should have received a copy of the GNU General Public License along You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc., with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory See the full license in the file "LICENSE" in the top level distribution
*************************************************************************************/ directory
/* END LEGAL */ *************************************************************************************/
/* END LEGAL */
#include <Grid/Grid.h> #include <Grid/Grid.h>
#include <Grid/qcd/utils/CovariantCshift.h>
#include <Grid/qcd/utils/SUn.h>
#include <Grid/qcd/utils/SUnAdjoint.h>
#include <Grid/qcd/utils/SUnTwoIndex.h>
#include <Grid/qcd/representations/adjoint.h>
#include <Grid/qcd/representations/two_index.h>
#include <Grid/qcd/utils/WilsonLoops.h>
using namespace std; using namespace std;
using namespace Grid; using namespace Grid;
using namespace Grid::QCD; using namespace Grid::QCD;
int main(int argc, char** argv) {
Grid_init(&argc, &argv);
int main (int argc, char ** argv) std::vector<int> latt({4, 4, 4, 8});
{ GridCartesian* grid = SpaceTimeGrid::makeFourDimGrid(
Grid_init(&argc,&argv); latt, GridDefaultSimd(Nd, vComplex::Nsimd()), GridDefaultMpi());
std::vector<int> latt({4,4,4,8}); GridRedBlackCartesian* rbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(grid);
GridCartesian * grid = SpaceTimeGrid::makeFourDimGrid(latt,
GridDefaultSimd(Nd,vComplex::Nsimd()),
GridDefaultMpi());
GridRedBlackCartesian * rbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(grid);
std::cout<<GridLogMessage<<"*********************************************"<<std::endl; std::cout << GridLogMessage << "*********************************************"
std::cout<<GridLogMessage<<"* Generators for SU(2)"<<std::endl; << std::endl;
std::cout<<GridLogMessage<<"*********************************************"<<std::endl; std::cout << GridLogMessage << "* Generators for SU(2)" << std::endl;
std::cout << GridLogMessage << "*********************************************"
<< std::endl;
SU2::printGenerators(); SU2::printGenerators();
std::cout << "Dimension of adjoint representation: "<< SU2Adjoint::Dimension << std::endl;
SU2Adjoint::printGenerators();
SU2::testGenerators(); SU2::testGenerators();
SU2Adjoint::testGenerators();
std::cout<<GridLogMessage<<"*********************************************"<<std::endl; std::cout << GridLogMessage << "*********************************************"
std::cout<<GridLogMessage<<"* Generators for SU(3)"<<std::endl; << std::endl;
std::cout<<GridLogMessage<<"*********************************************"<<std::endl; std::cout << GridLogMessage << "* Generators for SU(3)" << std::endl;
std::cout << GridLogMessage << "*********************************************"
<< std::endl;
SU3::printGenerators(); SU3::printGenerators();
std::cout << "Dimension of adjoint representation: "<< SU3Adjoint::Dimension << std::endl;
SU3Adjoint::printGenerators();
SU3::testGenerators(); SU3::testGenerators();
SU3Adjoint::testGenerators();
// std::cout<<GridLogMessage<<"*********************************************"<<std::endl; std::cout<<GridLogMessage<<"*********************************************"<<std::endl;
// std::cout<<GridLogMessage<<"* Generators for SU(4)"<<std::endl; std::cout<<GridLogMessage<<"* Generators for SU(4)"<<std::endl;
// std::cout<<GridLogMessage<<"*********************************************"<<std::endl; std::cout<<GridLogMessage<<"*********************************************"<<std::endl;
// SU4::printGenerators(); SU4::printGenerators();
// SU4::testGenerators(); std::cout << "Dimension of adjoint representation: "<< SU4Adjoint::Dimension << std::endl;
SU4Adjoint::printGenerators();
SU4::testGenerators();
SU4Adjoint::testGenerators();
// std::cout<<GridLogMessage<<"*********************************************"<<std::endl; // Projectors
// std::cout<<GridLogMessage<<"* Generators for SU(5)"<<std::endl; GridParallelRNG gridRNG(grid);
// std::cout<<GridLogMessage<<"*********************************************"<<std::endl; gridRNG.SeedRandomDevice();
// SU5::printGenerators(); SU3Adjoint::LatticeAdjMatrix Gauss(grid);
// SU5::testGenerators(); SU3::LatticeAlgebraVector ha(grid);
SU3::LatticeAlgebraVector hb(grid);
random(gridRNG,Gauss);
std::cout << GridLogMessage << "Start projectOnAlgebra" << std::endl;
SU3Adjoint::projectOnAlgebra(ha, Gauss);
std::cout << GridLogMessage << "end projectOnAlgebra" << std::endl;
std::cout << GridLogMessage << "Start projector" << std::endl;
SU3Adjoint::projector(hb, Gauss);
std::cout << GridLogMessage << "end projector" << std::endl;
std::cout << GridLogMessage << "ReStart projector" << std::endl;
SU3Adjoint::projector(hb, Gauss);
std::cout << GridLogMessage << "end projector" << std::endl;
SU3::LatticeAlgebraVector diff = ha -hb;
std::cout << GridLogMessage << "Difference: " << norm2(diff) << std::endl;
Grid_finalize(); // Testing HMC representation classes
AdjointRep<Nc> AdjRep(grid);
// AdjointRepresentation has the predefined number of colours Nc
Representations<FundamentalRepresentation, AdjointRepresentation, TwoIndexSymmetricRepresentation> RepresentationTypes(grid);
LatticeGaugeField U(grid), V(grid);
SU<Nc>::HotConfiguration<LatticeGaugeField>(gridRNG, U);
SU<Nc>::HotConfiguration<LatticeGaugeField>(gridRNG, V);
// Adjoint representation
// Test group structure
// (U_f * V_f)_r = U_r * V_r
LatticeGaugeField UV(grid);
UV = zero;
for (int mu = 0; mu < Nd; mu++) {
SU<Nc>::LatticeMatrix Umu = peekLorentz(U,mu);
SU<Nc>::LatticeMatrix Vmu = peekLorentz(V,mu);
pokeLorentz(UV,Umu*Vmu, mu);
}
AdjRep.update_representation(UV);
typename AdjointRep<Nc>::LatticeField UVr = AdjRep.U; // (U_f * V_f)_r
AdjRep.update_representation(U);
typename AdjointRep<Nc>::LatticeField Ur = AdjRep.U; // U_r
AdjRep.update_representation(V);
typename AdjointRep<Nc>::LatticeField Vr = AdjRep.U; // V_r
typename AdjointRep<Nc>::LatticeField UrVr(grid);
UrVr = zero;
for (int mu = 0; mu < Nd; mu++) {
typename AdjointRep<Nc>::LatticeMatrix Urmu = peekLorentz(Ur,mu);
typename AdjointRep<Nc>::LatticeMatrix Vrmu = peekLorentz(Vr,mu);
pokeLorentz(UrVr,Urmu*Vrmu, mu);
}
typename AdjointRep<Nc>::LatticeField Diff_check = UVr - UrVr;
std::cout << GridLogMessage << "Group structure SU("<<Nc<<") check difference (Adjoint representation) : " << norm2(Diff_check) << std::endl;
// Check correspondence of algebra and group transformations
// Create a random vector
SU<Nc>::LatticeAlgebraVector h_adj(grid);
typename AdjointRep<Nc>::LatticeMatrix Ar(grid);
random(gridRNG,h_adj);
h_adj = real(h_adj);
SU_Adjoint<Nc>::AdjointLieAlgebraMatrix(h_adj,Ar);
// Re-extract h_adj
SU<Nc>::LatticeAlgebraVector h_adj2(grid);
SU_Adjoint<Nc>::projectOnAlgebra(h_adj2, Ar);
SU<Nc>::LatticeAlgebraVector h_diff = h_adj - h_adj2;
std::cout << GridLogMessage << "Projections structure check vector difference (Adjoint representation) : " << norm2(h_diff) << std::endl;
// Exponentiate
typename AdjointRep<Nc>::LatticeMatrix Uadj(grid);
Uadj = expMat(Ar, 1.0, 16);
typename AdjointRep<Nc>::LatticeMatrix uno(grid);
uno = 1.0;
// Check matrix Uadj, must be real orthogonal
typename AdjointRep<Nc>::LatticeMatrix Ucheck = Uadj - conjugate(Uadj);
std::cout << GridLogMessage << "Reality check: " << norm2(Ucheck)
<< std::endl;
Ucheck = Uadj * adj(Uadj) - uno;
std::cout << GridLogMessage << "orthogonality check 1: " << norm2(Ucheck)
<< std::endl;
Ucheck = adj(Uadj) * Uadj - uno;
std::cout << GridLogMessage << "orthogonality check 2: " << norm2(Ucheck)
<< std::endl;
// Construct the fundamental matrix in the group
SU<Nc>::LatticeMatrix Af(grid);
SU<Nc>::FundamentalLieAlgebraMatrix(h_adj,Af);
SU<Nc>::LatticeMatrix Ufund(grid);
Ufund = expMat(Af, 1.0, 16);
// Check unitarity
SU<Nc>::LatticeMatrix uno_f(grid);
uno_f = 1.0;
SU<Nc>::LatticeMatrix UnitCheck(grid);
UnitCheck = Ufund * adj(Ufund) - uno_f;
std::cout << GridLogMessage << "unitarity check 1: " << norm2(UnitCheck)
<< std::endl;
UnitCheck = adj(Ufund) * Ufund - uno_f;
std::cout << GridLogMessage << "unitarity check 2: " << norm2(UnitCheck)
<< std::endl;
// Tranform to the adjoint representation
U = zero; // fill this with only one direction
pokeLorentz(U,Ufund,0); // the representation transf acts on full gauge fields
AdjRep.update_representation(U);
Ur = AdjRep.U; // U_r
typename AdjointRep<Nc>::LatticeMatrix Ur0 = peekLorentz(Ur,0); // this should be the same as Uadj
typename AdjointRep<Nc>::LatticeMatrix Diff_check_mat = Ur0 - Uadj;
std::cout << GridLogMessage << "Projections structure check group difference : " << norm2(Diff_check_mat) << std::endl;
// TwoIndexRep tests
std::cout << GridLogMessage << "*********************************************"
<< std::endl;
std::cout << GridLogMessage << "*********************************************"
<< std::endl;
std::cout << GridLogMessage << "* eS^{ij} base for SU(2)" << std::endl;
std::cout << GridLogMessage << "*********************************************"
<< std::endl;
std::cout << GridLogMessage << "Dimension of Two Index Symmetric representation: "<< SU2TwoIndexSymm::Dimension << std::endl;
SU2TwoIndexSymm::printBase();
std::cout << GridLogMessage << "*********************************************"
<< std::endl;
std::cout << GridLogMessage << "Generators of Two Index Symmetric representation: "<< SU2TwoIndexSymm::Dimension << std::endl;
SU2TwoIndexSymm::printGenerators();
std::cout << GridLogMessage << "Test of Two Index Symmetric Generators: "<< SU2TwoIndexSymm::Dimension << std::endl;
SU2TwoIndexSymm::testGenerators();
std::cout << GridLogMessage << "*********************************************"
<< std::endl;
std::cout << GridLogMessage << "*********************************************"
<< std::endl;
std::cout << GridLogMessage << "* eAS^{ij} base for SU(2)" << std::endl;
std::cout << GridLogMessage << "*********************************************"
<< std::endl;
std::cout << GridLogMessage << "Dimension of Two Index anti-Symmetric representation: "<< SU2TwoIndexAntiSymm::Dimension << std::endl;
SU2TwoIndexAntiSymm::printBase();
std::cout << GridLogMessage << "*********************************************"
<< std::endl;
std::cout << GridLogMessage << "Dimension of Two Index anti-Symmetric representation: "<< SU2TwoIndexAntiSymm::Dimension << std::endl;
SU2TwoIndexAntiSymm::printGenerators();
std::cout << GridLogMessage << "Test of Two Index anti-Symmetric Generators: "<< SU2TwoIndexAntiSymm::Dimension << std::endl;
SU2TwoIndexAntiSymm::testGenerators();
std::cout << GridLogMessage << "*********************************************"
<< std::endl;
std::cout << GridLogMessage << "Test for the Two Index Symmetric projectors"
<< std::endl;
// Projectors
SU3TwoIndexSymm::LatticeTwoIndexMatrix Gauss2(grid);
random(gridRNG,Gauss2);
std::cout << GridLogMessage << "Start projectOnAlgebra" << std::endl;
SU3TwoIndexSymm::projectOnAlgebra(ha, Gauss2);
std::cout << GridLogMessage << "end projectOnAlgebra" << std::endl;
std::cout << GridLogMessage << "Start projector" << std::endl;
SU3TwoIndexSymm::projector(hb, Gauss2);
std::cout << GridLogMessage << "end projector" << std::endl;
std::cout << GridLogMessage << "ReStart projector" << std::endl;
SU3TwoIndexSymm::projector(hb, Gauss2);
std::cout << GridLogMessage << "end projector" << std::endl;
SU3::LatticeAlgebraVector diff2 = ha - hb;
std::cout << GridLogMessage << "Difference: " << norm2(diff) << std::endl;
std::cout << GridLogMessage << "*********************************************"
<< std::endl;
std::cout << GridLogMessage << "*********************************************"
<< std::endl;
std::cout << GridLogMessage << "Test for the Two index anti-Symmetric projectors"
<< std::endl;
// Projectors
SU3TwoIndexAntiSymm::LatticeTwoIndexMatrix Gauss2a(grid);
random(gridRNG,Gauss2a);
std::cout << GridLogMessage << "Start projectOnAlgebra" << std::endl;
SU3TwoIndexAntiSymm::projectOnAlgebra(ha, Gauss2a);
std::cout << GridLogMessage << "end projectOnAlgebra" << std::endl;
std::cout << GridLogMessage << "Start projector" << std::endl;
SU3TwoIndexAntiSymm::projector(hb, Gauss2a);
std::cout << GridLogMessage << "end projector" << std::endl;
std::cout << GridLogMessage << "ReStart projector" << std::endl;
SU3TwoIndexAntiSymm::projector(hb, Gauss2a);
std::cout << GridLogMessage << "end projector" << std::endl;
SU3::LatticeAlgebraVector diff2a = ha - hb;
std::cout << GridLogMessage << "Difference: " << norm2(diff2a) << std::endl;
std::cout << GridLogMessage << "*********************************************"
<< std::endl;
std::cout << GridLogMessage << "Two index Symmetric: Checking Group Structure"
<< std::endl;
// Testing HMC representation classes
TwoIndexRep< Nc, Symmetric > TIndexRep(grid);
// Test group structure
// (U_f * V_f)_r = U_r * V_r
LatticeGaugeField U2(grid), V2(grid);
SU<Nc>::HotConfiguration<LatticeGaugeField>(gridRNG, U2);
SU<Nc>::HotConfiguration<LatticeGaugeField>(gridRNG, V2);
LatticeGaugeField UV2(grid);
UV2 = zero;
for (int mu = 0; mu < Nd; mu++) {
SU<Nc>::LatticeMatrix Umu2 = peekLorentz(U2,mu);
SU<Nc>::LatticeMatrix Vmu2 = peekLorentz(V2,mu);
pokeLorentz(UV2,Umu2*Vmu2, mu);
}
TIndexRep.update_representation(UV2);
typename TwoIndexRep< Nc, Symmetric >::LatticeField UVr2 = TIndexRep.U; // (U_f * V_f)_r
TIndexRep.update_representation(U2);
typename TwoIndexRep< Nc, Symmetric >::LatticeField Ur2 = TIndexRep.U; // U_r
TIndexRep.update_representation(V2);
typename TwoIndexRep< Nc, Symmetric >::LatticeField Vr2 = TIndexRep.U; // V_r
typename TwoIndexRep< Nc, Symmetric >::LatticeField Ur2Vr2(grid);
Ur2Vr2 = zero;
for (int mu = 0; mu < Nd; mu++) {
typename TwoIndexRep< Nc, Symmetric >::LatticeMatrix Urmu2 = peekLorentz(Ur2,mu);
typename TwoIndexRep< Nc, Symmetric >::LatticeMatrix Vrmu2 = peekLorentz(Vr2,mu);
pokeLorentz(Ur2Vr2,Urmu2*Vrmu2, mu);
}
typename TwoIndexRep< Nc, Symmetric >::LatticeField Diff_check2 = UVr2 - Ur2Vr2;
std::cout << GridLogMessage << "Group structure SU("<<Nc<<") check difference (Two Index Symmetric): " << norm2(Diff_check2) << std::endl;
// Check correspondence of algebra and group transformations
// Create a random vector
SU<Nc>::LatticeAlgebraVector h_sym(grid);
typename TwoIndexRep< Nc, Symmetric>::LatticeMatrix Ar_sym(grid);
random(gridRNG,h_sym);
h_sym = real(h_sym);
SU_TwoIndex<Nc,Symmetric>::TwoIndexLieAlgebraMatrix(h_sym,Ar_sym);
// Re-extract h_sym
SU<Nc>::LatticeAlgebraVector h_sym2(grid);
SU_TwoIndex< Nc, Symmetric>::projectOnAlgebra(h_sym2, Ar_sym);
SU<Nc>::LatticeAlgebraVector h_diff_sym = h_sym - h_sym2;
std::cout << GridLogMessage << "Projections structure check vector difference (Two Index Symmetric): " << norm2(h_diff_sym) << std::endl;
// Exponentiate
typename TwoIndexRep< Nc, Symmetric>::LatticeMatrix U2iS(grid);
U2iS = expMat(Ar_sym, 1.0, 16);
typename TwoIndexRep< Nc, Symmetric>::LatticeMatrix uno2iS(grid);
uno2iS = 1.0;
// Check matrix U2iS, must be real orthogonal
typename TwoIndexRep< Nc, Symmetric>::LatticeMatrix Ucheck2iS = U2iS - conjugate(U2iS);
std::cout << GridLogMessage << "Reality check: " << norm2(Ucheck2iS)
<< std::endl;
Ucheck2iS = U2iS * adj(U2iS) - uno2iS;
std::cout << GridLogMessage << "orthogonality check 1: " << norm2(Ucheck2iS)
<< std::endl;
Ucheck2iS = adj(U2iS) * U2iS - uno2iS;
std::cout << GridLogMessage << "orthogonality check 2: " << norm2(Ucheck2iS)
<< std::endl;
// Construct the fundamental matrix in the group
SU<Nc>::LatticeMatrix Af_sym(grid);
SU<Nc>::FundamentalLieAlgebraMatrix(h_sym,Af_sym);
SU<Nc>::LatticeMatrix Ufund2(grid);
Ufund2 = expMat(Af_sym, 1.0, 16);
SU<Nc>::LatticeMatrix UnitCheck2(grid);
UnitCheck2 = Ufund2 * adj(Ufund2) - uno_f;
std::cout << GridLogMessage << "unitarity check 1: " << norm2(UnitCheck2)
<< std::endl;
UnitCheck2 = adj(Ufund2) * Ufund2 - uno_f;
std::cout << GridLogMessage << "unitarity check 2: " << norm2(UnitCheck2)
<< std::endl;
// Tranform to the 2Index Sym representation
U = zero; // fill this with only one direction
pokeLorentz(U,Ufund2,0); // the representation transf acts on full gauge fields
TIndexRep.update_representation(U);
Ur2 = TIndexRep.U; // U_r
typename TwoIndexRep< Nc, Symmetric>::LatticeMatrix Ur02 = peekLorentz(Ur2,0); // this should be the same as U2iS
typename TwoIndexRep< Nc, Symmetric>::LatticeMatrix Diff_check_mat2 = Ur02 - U2iS;
std::cout << GridLogMessage << "Projections structure check group difference (Two Index Symmetric): " << norm2(Diff_check_mat2) << std::endl;
if (TwoIndexRep<Nc, AntiSymmetric >::Dimension != 1){
std::cout << GridLogMessage << "*********************************************"
<< std::endl;
std::cout << GridLogMessage << "Two Index anti-Symmetric: Check Group Structure"
<< std::endl;
// Testing HMC representation classes
TwoIndexRep< Nc, AntiSymmetric > TIndexRepA(grid);
// Test group structure
// (U_f * V_f)_r = U_r * V_r
LatticeGaugeField U2A(grid), V2A(grid);
SU<Nc>::HotConfiguration<LatticeGaugeField>(gridRNG, U2A);
SU<Nc>::HotConfiguration<LatticeGaugeField>(gridRNG, V2A);
LatticeGaugeField UV2A(grid);
UV2A = zero;
for (int mu = 0; mu < Nd; mu++) {
SU<Nc>::LatticeMatrix Umu2A = peekLorentz(U2,mu);
SU<Nc>::LatticeMatrix Vmu2A = peekLorentz(V2,mu);
pokeLorentz(UV2A,Umu2A*Vmu2A, mu);
}
TIndexRep.update_representation(UV2A);
typename TwoIndexRep< Nc, AntiSymmetric >::LatticeField UVr2A = TIndexRepA.U; // (U_f * V_f)_r
TIndexRep.update_representation(U2A);
typename TwoIndexRep< Nc, AntiSymmetric >::LatticeField Ur2A = TIndexRepA.U; // U_r
TIndexRep.update_representation(V2A);
typename TwoIndexRep< Nc, AntiSymmetric >::LatticeField Vr2A = TIndexRepA.U; // V_r
typename TwoIndexRep< Nc, AntiSymmetric >::LatticeField Ur2Vr2A(grid);
Ur2Vr2A = zero;
for (int mu = 0; mu < Nd; mu++) {
typename TwoIndexRep< Nc, AntiSymmetric >::LatticeMatrix Urmu2A = peekLorentz(Ur2A,mu);
typename TwoIndexRep< Nc, AntiSymmetric >::LatticeMatrix Vrmu2A = peekLorentz(Vr2A,mu);
pokeLorentz(Ur2Vr2A,Urmu2A*Vrmu2A, mu);
}
typename TwoIndexRep< Nc, AntiSymmetric >::LatticeField Diff_check2A = UVr2A - Ur2Vr2A;
std::cout << GridLogMessage << "Group structure SU("<<Nc<<") check difference (Two Index anti-Symmetric): " << norm2(Diff_check2A) << std::endl;
// Check correspondence of algebra and group transformations
// Create a random vector
SU<Nc>::LatticeAlgebraVector h_Asym(grid);
typename TwoIndexRep< Nc, AntiSymmetric>::LatticeMatrix Ar_Asym(grid);
random(gridRNG,h_Asym);
h_Asym = real(h_Asym);
SU_TwoIndex< Nc, AntiSymmetric>::TwoIndexLieAlgebraMatrix(h_Asym,Ar_Asym);
// Re-extract h_sym
SU<Nc>::LatticeAlgebraVector h_Asym2(grid);
SU_TwoIndex< Nc, AntiSymmetric>::projectOnAlgebra(h_Asym2, Ar_Asym);
SU<Nc>::LatticeAlgebraVector h_diff_Asym = h_Asym - h_Asym2;
std::cout << GridLogMessage << "Projections structure check vector difference (Two Index anti-Symmetric): " << norm2(h_diff_Asym) << std::endl;
// Exponentiate
typename TwoIndexRep< Nc, AntiSymmetric>::LatticeMatrix U2iAS(grid);
U2iAS = expMat(Ar_Asym, 1.0, 16);
typename TwoIndexRep< Nc, AntiSymmetric>::LatticeMatrix uno2iAS(grid);
uno2iAS = 1.0;
// Check matrix U2iS, must be real orthogonal
typename TwoIndexRep< Nc, AntiSymmetric>::LatticeMatrix Ucheck2iAS = U2iAS - conjugate(U2iAS);
std::cout << GridLogMessage << "Reality check: " << norm2(Ucheck2iAS)
<< std::endl;
Ucheck2iAS = U2iAS * adj(U2iAS) - uno2iAS;
std::cout << GridLogMessage << "orthogonality check 1: " << norm2(Ucheck2iAS)
<< std::endl;
Ucheck2iAS = adj(U2iAS) * U2iAS - uno2iAS;
std::cout << GridLogMessage << "orthogonality check 2: " << norm2(Ucheck2iAS)
<< std::endl;
// Construct the fundamental matrix in the group
SU<Nc>::LatticeMatrix Af_Asym(grid);
SU<Nc>::FundamentalLieAlgebraMatrix(h_Asym,Af_Asym);
SU<Nc>::LatticeMatrix Ufund2A(grid);
Ufund2A = expMat(Af_Asym, 1.0, 16);
SU<Nc>::LatticeMatrix UnitCheck2A(grid);
UnitCheck2A = Ufund2A * adj(Ufund2A) - uno_f;
std::cout << GridLogMessage << "unitarity check 1: " << norm2(UnitCheck2A)
<< std::endl;
UnitCheck2A = adj(Ufund2A) * Ufund2A - uno_f;
std::cout << GridLogMessage << "unitarity check 2: " << norm2(UnitCheck2A)
<< std::endl;
// Tranform to the 2Index Sym representation
U = zero; // fill this with only one direction
pokeLorentz(U,Ufund2A,0); // the representation transf acts on full gauge fields
TIndexRepA.update_representation(U);
Ur2A = TIndexRepA.U; // U_r
typename TwoIndexRep< Nc, AntiSymmetric>::LatticeMatrix Ur02A = peekLorentz(Ur2A,0); // this should be the same as U2iS
typename TwoIndexRep< Nc, AntiSymmetric>::LatticeMatrix Diff_check_mat2A = Ur02A - U2iAS;
std::cout << GridLogMessage << "Projections structure check group difference (Two Index anti-Symmetric): " << norm2(Diff_check_mat2A) << std::endl;
} else {
std::cout << GridLogMessage << "Skipping Two Index anti-Symmetric tests "
"because representation is trivial (dim = 1)"
<< std::endl;
} }
Grid_finalize();
}

View File

@ -1,30 +1,30 @@
/************************************************************************************* /*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid Grid physics library, www.github.com/paboyle/Grid
Source file: ./tests/Test_zmm.cc Source file: ./tests/Test_zmm.cc
Copyright (C) 2015 Copyright (C) 2015
Author: paboyle <paboyle@ph.ed.ac.uk> Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or the Free Software Foundation; either version 2 of the License, or
(at your option) any later version. (at your option) any later version.
This program is distributed in the hope that it will be useful, This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details. GNU General Public License for more details.
You should have received a copy of the GNU General Public License along You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc., with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/ *************************************************************************************/
/* END LEGAL */ /* END LEGAL */
#include <Grid/Grid.h> #include <Grid/Grid.h>
#include <Grid/PerfCount.h> #include <Grid/PerfCount.h>

View File

@ -96,7 +96,7 @@ int main (int argc, char ** argv)
for(int mu=0;mu<Nd;mu++){ for(int mu=0;mu<Nd;mu++){
SU3::GaussianLieAlgebraMatrix(RNG4, mommu); // Traceless antihermitian momentum; gaussian in lie alg SU3::GaussianFundamentalLieAlgebraMatrix(RNG4, mommu); // Traceless antihermitian momentum; gaussian in lie alg
PokeIndex<LorentzIndex>(mom,mommu,mu); PokeIndex<LorentzIndex>(mom,mommu,mu);

View File

@ -96,7 +96,7 @@ int main (int argc, char ** argv)
for(int mu=0;mu<Nd;mu++){ for(int mu=0;mu<Nd;mu++){
SU3::GaussianLieAlgebraMatrix(RNG4, mommu); // Traceless antihermitian momentum; gaussian in lie alg SU3::GaussianFundamentalLieAlgebraMatrix(RNG4, mommu); // Traceless antihermitian momentum; gaussian in lie alg
PokeIndex<LorentzIndex>(mom,mommu,mu); PokeIndex<LorentzIndex>(mom,mommu,mu);

View File

@ -113,7 +113,7 @@ int main (int argc, char ** argv)
for(int mu=0;mu<Nd;mu++){ for(int mu=0;mu<Nd;mu++){
SU3::GaussianLieAlgebraMatrix(RNG4, mommu); // Traceless antihermitian momentum; gaussian in lie alg SU3::GaussianFundamentalLieAlgebraMatrix(RNG4, mommu); // Traceless antihermitian momentum; gaussian in lie alg
Hmom -= real(sum(trace(mommu*mommu))); Hmom -= real(sum(trace(mommu*mommu)));

View File

@ -82,7 +82,7 @@ int main (int argc, char ** argv)
for(int mu=0;mu<Nd;mu++){ for(int mu=0;mu<Nd;mu++){
SU3::GaussianLieAlgebraMatrix(pRNG, mommu); // Traceless antihermitian momentum; gaussian in lie alg SU3::GaussianFundamentalLieAlgebraMatrix(pRNG, mommu); // Traceless antihermitian momentum; gaussian in lie alg
PokeIndex<LorentzIndex>(mom,mommu,mu); PokeIndex<LorentzIndex>(mom,mommu,mu);

View File

@ -100,7 +100,7 @@ int main (int argc, char ** argv)
for(int mu=0;mu<Nd;mu++){ for(int mu=0;mu<Nd;mu++){
SU3::GaussianLieAlgebraMatrix(RNG4, mommu); // Traceless antihermitian momentum; gaussian in lie alg SU3::GaussianFundamentalLieAlgebraMatrix(RNG4, mommu); // Traceless antihermitian momentum; gaussian in lie alg
PokeIndex<LorentzIndex>(mom,mommu,mu); PokeIndex<LorentzIndex>(mom,mommu,mu);
@ -169,7 +169,7 @@ int main (int argc, char ** argv)
// //
// Pmu = zero; // Pmu = zero;
// for(int mu=0;mu<Nd;mu++){ // for(int mu=0;mu<Nd;mu++){
// SU<Ncol>::GaussianLieAlgebraMatrix(pRNG, Pmu); // SU<Ncol>::GaussianFundamentalLieAlgebraMatrix(pRNG, Pmu);
// PokeIndex<LorentzIndex>(P, Pmu, mu); // PokeIndex<LorentzIndex>(P, Pmu, mu);
// } // }
// //

View File

@ -100,7 +100,7 @@ int main (int argc, char ** argv)
for(int mu=0;mu<Nd;mu++){ for(int mu=0;mu<Nd;mu++){
SU3::GaussianLieAlgebraMatrix(RNG4, mommu); // Traceless antihermitian momentum; gaussian in lie alg SU3::GaussianFundamentalLieAlgebraMatrix(RNG4, mommu); // Traceless antihermitian momentum; gaussian in lie alg
PokeIndex<LorentzIndex>(mom,mommu,mu); PokeIndex<LorentzIndex>(mom,mommu,mu);

View File

@ -96,7 +96,7 @@ int main (int argc, char ** argv)
for(int mu=0;mu<Nd;mu++){ for(int mu=0;mu<Nd;mu++){
SU3::GaussianLieAlgebraMatrix(RNG4, mommu); // Traceless antihermitian momentum; gaussian in lie alg SU3::GaussianFundamentalLieAlgebraMatrix(RNG4, mommu); // Traceless antihermitian momentum; gaussian in lie alg
PokeIndex<LorentzIndex>(mom,mommu,mu); PokeIndex<LorentzIndex>(mom,mommu,mu);

View File

@ -81,7 +81,7 @@ int main (int argc, char ** argv)
for(int mu=0;mu<Nd;mu++){ for(int mu=0;mu<Nd;mu++){
SU3::GaussianLieAlgebraMatrix(pRNG, mommu); // Traceless antihermitian momentum; gaussian in lie alg SU3::GaussianFundamentalLieAlgebraMatrix(pRNG, mommu); // Traceless antihermitian momentum; gaussian in lie alg
PokeIndex<LorentzIndex>(mom,mommu,mu); PokeIndex<LorentzIndex>(mom,mommu,mu);

View File

@ -58,8 +58,8 @@ int main (int argc, char ** argv)
LatticeGaugeField U(&Grid); LatticeGaugeField U(&Grid);
SU3::HotConfiguration(pRNG,U); //SU2::HotConfiguration(pRNG,U);
// SU3::ColdConfiguration(pRNG,U); SU3::ColdConfiguration(pRNG,U);
//////////////////////////////////// ////////////////////////////////////
// Unmodified matrix element // Unmodified matrix element
@ -76,6 +76,8 @@ int main (int argc, char ** argv)
Dw.MDeriv(tmp , Mphi, phi,DaggerNo ); UdSdU=tmp; Dw.MDeriv(tmp , Mphi, phi,DaggerNo ); UdSdU=tmp;
Dw.MDeriv(tmp , phi, Mphi,DaggerYes ); UdSdU=(UdSdU+tmp); Dw.MDeriv(tmp , phi, Mphi,DaggerYes ); UdSdU=(UdSdU+tmp);
// Take the trace
UdSdU = Ta(UdSdU);
LatticeFermion Ftmp (&Grid); LatticeFermion Ftmp (&Grid);
@ -93,7 +95,7 @@ int main (int argc, char ** argv)
for(int mu=0;mu<Nd;mu++){ for(int mu=0;mu<Nd;mu++){
SU3::GaussianLieAlgebraMatrix(pRNG, mommu); // Traceless antihermitian momentum; gaussian in lie alg SU3::GaussianFundamentalLieAlgebraMatrix(pRNG, mommu); // Traceless antihermitian momentum; gaussian in lie alg
Hmom -= real(sum(trace(mommu*mommu))); Hmom -= real(sum(trace(mommu*mommu)));
@ -140,11 +142,13 @@ int main (int argc, char ** argv)
LatticeComplex dS(&Grid); dS = zero; LatticeComplex dS(&Grid); dS = zero;
LatticeComplex dSmom(&Grid); dSmom = zero; LatticeComplex dSmom(&Grid); dSmom = zero;
LatticeComplex dSmom2(&Grid); dSmom2 = zero; LatticeComplex dSmom2(&Grid); dSmom2 = zero;
for(int mu=0;mu<Nd;mu++){ for(int mu=0;mu<Nd;mu++){
mommu = PeekIndex<LorentzIndex>(UdSdU,mu); mommu = PeekIndex<LorentzIndex>(UdSdU,mu);
mommu=Ta(mommu)*2.0; mommu=Ta(mommu)*2.0;
PokeIndex<LorentzIndex>(UdSdU,mommu,mu); PokeIndex<LorentzIndex>(UdSdU,mommu,mu);
} }
for(int mu=0;mu<Nd;mu++){ for(int mu=0;mu<Nd;mu++){
mommu = PeekIndex<LorentzIndex>(mom,mu); mommu = PeekIndex<LorentzIndex>(mom,mu);
@ -168,7 +172,7 @@ int main (int argc, char ** argv)
dSmom2 = dSmom2 - trace(forcemu*forcemu) *(0.25* dt*dt); dSmom2 = dSmom2 - trace(forcemu*forcemu) *(0.25* dt*dt);
// Update mom action density // Update mom action density
mommu = mommu + forcemu*(dt*0.5); mommu = mommu + forcemu*(dt * 0.5);
Hmomprime -= real(sum(trace(mommu*mommu))); Hmomprime -= real(sum(trace(mommu*mommu)));

View File

@ -103,7 +103,7 @@ int main (int argc, char ** argv)
for(int mu=0;mu<Nd;mu++){ for(int mu=0;mu<Nd;mu++){
SU3::GaussianLieAlgebraMatrix(pRNG, mommu); // Traceless antihermitian momentum; gaussian in lie alg SU3::GaussianFundamentalLieAlgebraMatrix(pRNG, mommu); // Traceless antihermitian momentum; gaussian in lie alg
// Dw.DoubleStore(Dw.Umu,Uprime); // update U _and_ Udag // Dw.DoubleStore(Dw.Umu,Uprime); // update U _and_ Udag
Dw.DhopDirDisp(phi,Ftmp,mu,mu+4,DaggerYes); Dw.DhopDirDisp(phi,Ftmp,mu,mu+4,DaggerYes);

View File

@ -86,7 +86,7 @@ int main (int argc, char ** argv)
LatticeColourMatrix Umu_save(&Grid); LatticeColourMatrix Umu_save(&Grid);
LatticeColourMatrix dU (&Grid); LatticeColourMatrix dU (&Grid);
LatticeColourMatrix mom(&Grid); LatticeColourMatrix mom(&Grid);
SU3::GaussianLieAlgebraMatrix(pRNG, mom); // Traceless antihermitian momentum; gaussian in lie alg SU3::GaussianFundamentalLieAlgebraMatrix(pRNG, mom); // Traceless antihermitian momentum; gaussian in lie alg
// check mom is as i expect // check mom is as i expect

63
tests/hmc/Make.inc Normal file
View File

@ -0,0 +1,63 @@
tests: Test_hmc_EODWFRatio Test_hmc_EODWFRatio_Gparity Test_hmc_EOWilsonFermionGauge Test_hmc_EOWilsonRatio Test_hmc_GparityIwasakiGauge Test_hmc_GparityWilsonGauge Test_hmc_IwasakiGauge Test_hmc_RectGauge Test_hmc_WilsonAdjointFermionGauge Test_hmc_WilsonFermionGauge Test_hmc_WilsonGauge Test_hmc_WilsonMixedRepresentationsFermionGauge Test_hmc_WilsonRatio Test_hmc_WilsonTwoIndexSymmetricFermionGauge Test_multishift_sqrt Test_remez Test_rhmc_EOWilson1p1 Test_rhmc_EOWilsonRatio Test_rhmc_Wilson1p1 Test_rhmc_WilsonRatio
EXTRA_PROGRAMS = Test_hmc_EODWFRatio Test_hmc_EODWFRatio_Gparity Test_hmc_EOWilsonFermionGauge Test_hmc_EOWilsonRatio Test_hmc_GparityIwasakiGauge Test_hmc_GparityWilsonGauge Test_hmc_IwasakiGauge Test_hmc_RectGauge Test_hmc_WilsonAdjointFermionGauge Test_hmc_WilsonFermionGauge Test_hmc_WilsonGauge Test_hmc_WilsonMixedRepresentationsFermionGauge Test_hmc_WilsonRatio Test_hmc_WilsonTwoIndexSymmetricFermionGauge Test_multishift_sqrt Test_remez Test_rhmc_EOWilson1p1 Test_rhmc_EOWilsonRatio Test_rhmc_Wilson1p1 Test_rhmc_WilsonRatio
Test_hmc_EODWFRatio_SOURCES=Test_hmc_EODWFRatio.cc
Test_hmc_EODWFRatio_LDADD=-lGrid
Test_hmc_EODWFRatio_Gparity_SOURCES=Test_hmc_EODWFRatio_Gparity.cc
Test_hmc_EODWFRatio_Gparity_LDADD=-lGrid
Test_hmc_EOWilsonFermionGauge_SOURCES=Test_hmc_EOWilsonFermionGauge.cc
Test_hmc_EOWilsonFermionGauge_LDADD=-lGrid
Test_hmc_EOWilsonRatio_SOURCES=Test_hmc_EOWilsonRatio.cc
Test_hmc_EOWilsonRatio_LDADD=-lGrid
Test_hmc_GparityIwasakiGauge_SOURCES=Test_hmc_GparityIwasakiGauge.cc
Test_hmc_GparityIwasakiGauge_LDADD=-lGrid
Test_hmc_GparityWilsonGauge_SOURCES=Test_hmc_GparityWilsonGauge.cc
Test_hmc_GparityWilsonGauge_LDADD=-lGrid
Test_hmc_IwasakiGauge_SOURCES=Test_hmc_IwasakiGauge.cc
Test_hmc_IwasakiGauge_LDADD=-lGrid
Test_hmc_RectGauge_SOURCES=Test_hmc_RectGauge.cc
Test_hmc_RectGauge_LDADD=-lGrid
Test_hmc_WilsonAdjointFermionGauge_SOURCES=Test_hmc_WilsonAdjointFermionGauge.cc
Test_hmc_WilsonAdjointFermionGauge_LDADD=-lGrid
Test_hmc_WilsonFermionGauge_SOURCES=Test_hmc_WilsonFermionGauge.cc
Test_hmc_WilsonFermionGauge_LDADD=-lGrid
Test_hmc_WilsonGauge_SOURCES=Test_hmc_WilsonGauge.cc
Test_hmc_WilsonGauge_LDADD=-lGrid
Test_hmc_WilsonMixedRepresentationsFermionGauge_SOURCES=Test_hmc_WilsonMixedRepresentationsFermionGauge.cc
Test_hmc_WilsonMixedRepresentationsFermionGauge_LDADD=-lGrid
Test_hmc_WilsonRatio_SOURCES=Test_hmc_WilsonRatio.cc
Test_hmc_WilsonRatio_LDADD=-lGrid
Test_hmc_WilsonTwoIndexSymmetricFermionGauge_SOURCES=Test_hmc_WilsonTwoIndexSymmetricFermionGauge.cc
Test_hmc_WilsonTwoIndexSymmetricFermionGauge_LDADD=-lGrid
Test_multishift_sqrt_SOURCES=Test_multishift_sqrt.cc
Test_multishift_sqrt_LDADD=-lGrid
Test_remez_SOURCES=Test_remez.cc
Test_remez_LDADD=-lGrid
Test_rhmc_EOWilson1p1_SOURCES=Test_rhmc_EOWilson1p1.cc
Test_rhmc_EOWilson1p1_LDADD=-lGrid
Test_rhmc_EOWilsonRatio_SOURCES=Test_rhmc_EOWilsonRatio.cc
Test_rhmc_EOWilsonRatio_LDADD=-lGrid
Test_rhmc_Wilson1p1_SOURCES=Test_rhmc_Wilson1p1.cc
Test_rhmc_Wilson1p1_LDADD=-lGrid
Test_rhmc_WilsonRatio_SOURCES=Test_rhmc_WilsonRatio.cc
Test_rhmc_WilsonRatio_LDADD=-lGrid

View File

@ -75,7 +75,15 @@ public:
Level1.push_back(&Waction); Level1.push_back(&Waction);
TheAction.push_back(Level1); TheAction.push_back(Level1);
NumOp.ZeroCounters();
DenOp.ZeroCounters();
Run(argc,argv); Run(argc,argv);
std::cout << GridLogMessage << "Numerator report, Pauli-Villars term : " << std::endl;
NumOp.Report();
std::cout << GridLogMessage << "Denominator report, Dw(m) term (includes CG) : " << std::endl;
DenOp.Report();
}; };
}; };

View File

@ -67,7 +67,7 @@ public:
TwoFlavourEvenOddPseudoFermionAction<ImplPolicy> Nf2(FermOp,CG,CG); TwoFlavourEvenOddPseudoFermionAction<ImplPolicy> Nf2(FermOp,CG,CG);
//Set smearing (true/false), default: false //Set smearing (true/false), default: false
Nf2.is_smeared=false; Nf2.is_smeared=true;
//Collect actions //Collect actions
ActionLevel<LatticeGaugeField> Level1(1); ActionLevel<LatticeGaugeField> Level1(1);

View File

@ -0,0 +1,107 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./tests/Test_hmc_WilsonAdjointFermionGauge.cc
Copyright (C) 2015
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: neo <cossu@post.kek.jp>
Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#include "Grid/Grid.h"
using namespace std;
using namespace Grid;
using namespace Grid::QCD;
namespace Grid {
namespace QCD {
// Here change the allowed (higher) representations
typedef Representations< FundamentalRepresentation, AdjointRepresentation > TheRepresentations;
class HmcRunner : public NerscHmcRunnerHirep< TheRepresentations > {
public:
void BuildTheAction(int argc, char **argv)
{
typedef WilsonAdjImplR ImplPolicy; // gauge field implemetation for the pseudofermions
typedef WilsonAdjFermionR FermionAction; // type of lattice fermions (Wilson, DW, ...)
typedef typename FermionAction::FermionField FermionField;
UGrid = SpaceTimeGrid::makeFourDimGrid(
GridDefaultLatt(), GridDefaultSimd(Nd, vComplex::Nsimd()),
GridDefaultMpi());
UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
FGrid = UGrid;
FrbGrid = UrbGrid;
// temporarily need a gauge field
//LatticeGaugeField U(UGrid);
AdjointRepresentation::LatticeField U(UGrid);
// Gauge action
WilsonGaugeActionR Waction(2.25);
Real mass = -0.95;
FermionAction FermOp(U, *FGrid, *FrbGrid, mass);
ConjugateGradient<FermionField> CG(1.0e-8, 10000, false);
ConjugateResidual<FermionField> CR(1.0e-8, 10000);
// Pass two solvers: one for the force computation and one for the action
TwoFlavourPseudoFermionAction<ImplPolicy> Nf2(FermOp, CG, CG);
// Set smearing (true/false), default: false
Nf2.is_smeared = false;
// Collect actions
ActionLevel<LatticeGaugeField, TheRepresentations > Level1(1);
Level1.push_back(&Nf2);
ActionLevel<LatticeGaugeField, TheRepresentations > Level2(4);
Level2.push_back(&Waction);
TheAction.push_back(Level1);
TheAction.push_back(Level2);
Run(argc, argv);
};
};
}
}
int main(int argc, char **argv) {
Grid_init(&argc, &argv);
int threads = GridThread::GetThreads();
std::cout << GridLogMessage << "Grid is setup to use " << threads
<< " threads" << std::endl;
HmcRunner TheHMC;
TheHMC.BuildTheAction(argc, argv);
}

View File

@ -1,77 +1,76 @@
/************************************************************************************* /*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid Grid physics library, www.github.com/paboyle/Grid
Source file: ./tests/Test_hmc_WilsonFermionGauge.cc Source file: ./tests/Test_hmc_WilsonFermionGauge.cc
Copyright (C) 2015 Copyright (C) 2015
Author: Peter Boyle <pabobyle@ph.ed.ac.uk> Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk> Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: neo <cossu@post.kek.jp> Author: neo <cossu@post.kek.jp>
Author: paboyle <paboyle@ph.ed.ac.uk> Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or the Free Software Foundation; either version 2 of the License, or
(at your option) any later version. (at your option) any later version.
This program is distributed in the hope that it will be useful, This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details. GNU General Public License for more details.
You should have received a copy of the GNU General Public License along You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc., with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/ *************************************************************************************/
/* END LEGAL */ /* END LEGAL */
#include <Grid/Grid.h> #include <Grid/Grid.h>
using namespace std; using namespace std;
using namespace Grid; using namespace Grid;
using namespace Grid::QCD; using namespace Grid::QCD;
namespace Grid { namespace Grid {
namespace QCD { namespace QCD {
class HmcRunner : public NerscHmcRunner { class HmcRunner : public NerscHmcRunner {
public: public:
void BuildTheAction(int argc, char **argv)
void BuildTheAction (int argc, char **argv)
{ {
typedef WilsonImplR ImplPolicy; typedef WilsonImplR ImplPolicy;
typedef WilsonFermionR FermionAction; typedef WilsonFermionR FermionAction;
typedef typename FermionAction::FermionField FermionField; typedef typename FermionAction::FermionField FermionField;
UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi()); UGrid = SpaceTimeGrid::makeFourDimGrid(
GridDefaultLatt(), GridDefaultSimd(Nd, vComplex::Nsimd()),
GridDefaultMpi());
UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
FGrid = UGrid; FGrid = UGrid;
FrbGrid = UrbGrid; FrbGrid = UrbGrid;
// temporarily need a gauge field // temporarily need a gauge field
LatticeGaugeField U(UGrid); LatticeGaugeField U(UGrid);
// Gauge action // Gauge action
WilsonGaugeActionR Waction(5.6); WilsonGaugeActionR Waction(5.6);
Real mass=-0.77; Real mass = -0.77;
FermionAction FermOp(U,*FGrid,*FrbGrid,mass); FermionAction FermOp(U, *FGrid, *FrbGrid, mass);
ConjugateGradient<FermionField> CG(1.0e-8,10000);
TwoFlavourPseudoFermionAction<ImplPolicy> Nf2(FermOp,CG,CG); ConjugateGradient<FermionField> CG(1.0e-8, 10000);
//Set smearing (true/false), default: false TwoFlavourPseudoFermionAction<ImplPolicy> Nf2(FermOp, CG, CG);
// Set smearing (true/false), default: false
Nf2.is_smeared = true; Nf2.is_smeared = true;
// Collect actions
//Collect actions
ActionLevel<LatticeGaugeField> Level1(1); ActionLevel<LatticeGaugeField> Level1(1);
Level1.push_back(&Nf2); Level1.push_back(&Nf2);
@ -81,24 +80,20 @@ public:
TheAction.push_back(Level1); TheAction.push_back(Level1);
TheAction.push_back(Level2); TheAction.push_back(Level2);
Run(argc,argv); Run(argc, argv);
}; };
}; };
}
}}
int main (int argc, char ** argv)
{
Grid_init(&argc,&argv);
int threads = GridThread::GetThreads();
std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
HmcRunner TheHMC;
TheHMC.BuildTheAction(argc,argv);
} }
int main(int argc, char **argv) {
Grid_init(&argc, &argv);
int threads = GridThread::GetThreads();
std::cout << GridLogMessage << "Grid is setup to use " << threads
<< " threads" << std::endl;
HmcRunner TheHMC;
TheHMC.BuildTheAction(argc, argv);
}

View File

@ -0,0 +1,113 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./tests/Test_hmc_WilsonAdjointFermionGauge.cc
Copyright (C) 2015
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: neo <cossu@post.kek.jp>
Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#include "Grid/Grid.h"
using namespace std;
using namespace Grid;
using namespace Grid::QCD;
namespace Grid {
namespace QCD {
// Here change the allowed (higher) representations
typedef Representations< FundamentalRepresentation, AdjointRepresentation , TwoIndexSymmetricRepresentation> TheRepresentations;
class HmcRunner : public NerscHmcRunnerHirep< TheRepresentations > {
public:
void BuildTheAction(int argc, char **argv)
{
typedef WilsonAdjImplR AdjImplPolicy; // gauge field implemetation for the pseudofermions
typedef WilsonAdjFermionR AdjFermionAction; // type of lattice fermions (Wilson, DW, ...)
typedef WilsonTwoIndexSymmetricImplR SymmImplPolicy;
typedef WilsonTwoIndexSymmetricFermionR SymmFermionAction;
typedef typename AdjFermionAction::FermionField AdjFermionField;
typedef typename SymmFermionAction::FermionField SymmFermionField;
UGrid = SpaceTimeGrid::makeFourDimGrid(
GridDefaultLatt(), GridDefaultSimd(Nd, vComplex::Nsimd()),
GridDefaultMpi());
UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
FGrid = UGrid;
FrbGrid = UrbGrid;
// temporarily need a gauge field
//LatticeGaugeField U(UGrid);
AdjointRepresentation::LatticeField UA(UGrid);
TwoIndexSymmetricRepresentation::LatticeField US(UGrid);
// Gauge action
WilsonGaugeActionR Waction(5.6);
Real adjoint_mass = -0.1;
Real symm_mass = -0.5;
AdjFermionAction AdjFermOp(UA, *FGrid, *FrbGrid, adjoint_mass);
SymmFermionAction SymmFermOp(US, *FGrid, *FrbGrid, symm_mass);
ConjugateGradient<AdjFermionField> CG_adj(1.0e-8, 10000, false);
ConjugateGradient<SymmFermionField> CG_symm(1.0e-8, 10000, false);
// Pass two solvers: one for the force computation and one for the action
TwoFlavourPseudoFermionAction<AdjImplPolicy> Nf2_Adj(AdjFermOp, CG_adj, CG_adj);
TwoFlavourPseudoFermionAction<SymmImplPolicy> Nf2_Symm(SymmFermOp, CG_symm, CG_symm);
// Collect actions
ActionLevel<LatticeGaugeField, TheRepresentations > Level1(1);
Level1.push_back(&Nf2_Adj);
Level1.push_back(&Nf2_Symm);
ActionLevel<LatticeGaugeField, TheRepresentations > Level2(4);
Level2.push_back(&Waction);
TheAction.push_back(Level1);
TheAction.push_back(Level2);
Run(argc, argv);
};
};
}
}
int main(int argc, char **argv) {
Grid_init(&argc, &argv);
int threads = GridThread::GetThreads();
std::cout << GridLogMessage << "Grid is setup to use " << threads
<< " threads" << std::endl;
HmcRunner TheHMC;
TheHMC.BuildTheAction(argc, argv);
}

View File

@ -0,0 +1,103 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./tests/Test_hmc_WilsonAdjointFermionGauge.cc
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: neo <cossu@post.kek.jp>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#include "Grid/Grid.h"
using namespace std;
using namespace Grid;
using namespace Grid::QCD;
namespace Grid {
namespace QCD {
// Here change the allowed (higher) representations
typedef Representations< FundamentalRepresentation, TwoIndexSymmetricRepresentation > TheRepresentations;
class HmcRunner : public NerscHmcRunnerHirep< TheRepresentations > {
public:
void BuildTheAction(int argc, char **argv)
{
typedef WilsonTwoIndexSymmetricImplR ImplPolicy; // gauge field implemetation for the pseudofermions
typedef WilsonTwoIndexSymmetricFermionR FermionAction; // type of lattice fermions (Wilson, DW, ...)
typedef typename FermionAction::FermionField FermionField;
UGrid = SpaceTimeGrid::makeFourDimGrid(
GridDefaultLatt(), GridDefaultSimd(Nd, vComplex::Nsimd()),
GridDefaultMpi());
UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
FGrid = UGrid;
FrbGrid = UrbGrid;
// temporarily need a gauge field
TwoIndexSymmetricRepresentation::LatticeField U(UGrid);
// Gauge action
WilsonGaugeActionR Waction(2.0);
Real mass = -0.0;
FermionAction FermOp(U, *FGrid, *FrbGrid, mass);
ConjugateGradient<FermionField> CG(1.0e-8, 10000, false);
// Pass two solvers: one for the force computation and one for the action
TwoFlavourPseudoFermionAction<ImplPolicy> Nf2(FermOp, CG, CG);
// Set smearing (true/false), default: false
Nf2.is_smeared = false;
// Collect actions
ActionLevel<LatticeGaugeField, TheRepresentations > Level1(1);
Level1.push_back(&Nf2);
ActionLevel<LatticeGaugeField, TheRepresentations > Level2(4);
Level2.push_back(&Waction);
TheAction.push_back(Level1);
TheAction.push_back(Level2);
Run(argc, argv);
};
};
}
}
int main(int argc, char **argv) {
Grid_init(&argc, &argv);
int threads = GridThread::GetThreads();
std::cout << GridLogMessage << "Grid is setup to use " << threads
<< " threads" << std::endl;
HmcRunner TheHMC;
TheHMC.BuildTheAction(argc, argv);
}

0
tests/qdpxx/Test_qdpxx_loops_staples.cc Executable file → Normal file
View File

0
tests/qdpxx/Test_qdpxx_munprec.cc Executable file → Normal file
View File

View File

@ -1,87 +1,105 @@
/************************************************************************************* /*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid Grid physics library, www.github.com/paboyle/Grid
Source file: ./tests/Test_dwf_cg_prec.cc Source file: ./tests/Test_dwf_cg_prec.cc
Copyright (C) 2015 Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk> Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or the Free Software Foundation; either version 2 of the License, or
(at your option) any later version. (at your option) any later version.
This program is distributed in the hope that it will be useful, This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details. GNU General Public License for more details.
You should have received a copy of the GNU General Public License along You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc., with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory See the full license in the file "LICENSE" in the top level distribution
*************************************************************************************/ directory
/* END LEGAL */ *************************************************************************************/
/* END LEGAL */
#include <Grid/Grid.h> #include <Grid/Grid.h>
using namespace std; using namespace std;
using namespace Grid; using namespace Grid;
using namespace Grid::QCD; using namespace Grid::QCD;
template<class d> template <class d>
struct scal { struct scal {
d internal; d internal;
}; };
Gamma::GammaMatrix Gmu [] = { Gamma::GammaMatrix Gmu[] = {Gamma::GammaX, Gamma::GammaY, Gamma::GammaZ,
Gamma::GammaX, Gamma::GammaT};
Gamma::GammaY,
Gamma::GammaZ,
Gamma::GammaT
};
int main (int argc, char ** argv) int main(int argc, char** argv) {
{ Grid_init(&argc, &argv);
Grid_init(&argc,&argv);
const int Ls=8; const int Ls = 16;
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi()); GridCartesian* UGrid = SpaceTimeGrid::makeFourDimGrid(
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); GridDefaultLatt(), GridDefaultSimd(Nd, vComplex::Nsimd()),
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid); GridDefaultMpi());
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid); GridRedBlackCartesian* UrbGrid =
SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
GridCartesian* FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls, UGrid);
GridRedBlackCartesian* FrbGrid =
SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls, UGrid);
std::vector<int> seeds4({1,2,3,4}); std::vector<int> seeds4({1, 2, 3, 4});
std::vector<int> seeds5({5,6,7,8}); std::vector<int> seeds5({5, 6, 7, 8});
GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5); GridParallelRNG RNG5(FGrid);
GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4); RNG5.SeedFixedIntegers(seeds5);
GridParallelRNG RNG4(UGrid);
RNG4.SeedFixedIntegers(seeds4);
LatticeFermion src(FGrid); random(RNG5,src); LatticeFermion src(FGrid);
LatticeFermion result(FGrid); result=zero; random(RNG5, src);
LatticeGaugeField Umu(UGrid); LatticeFermion result(FGrid);
result = zero;
LatticeGaugeField Umu(UGrid);
SU3::HotConfiguration(RNG4,Umu); SU3::HotConfiguration(RNG4, Umu);
std::vector<LatticeColourMatrix> U(4,UGrid); std::cout << GridLogMessage << "Lattice dimensions: " << GridDefaultLatt()
for(int mu=0;mu<Nd;mu++){ << " Ls: " << Ls << std::endl;
U[mu] = PeekIndex<LorentzIndex>(Umu,mu);
std::vector<LatticeColourMatrix> U(4, UGrid);
for (int mu = 0; mu < Nd; mu++) {
U[mu] = PeekIndex<LorentzIndex>(Umu, mu);
} }
RealD mass=0.1;
RealD M5=1.8;
DomainWallFermionR Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
LatticeFermion src_o(FrbGrid); RealD mass = 0.01;
RealD M5 = 1.8;
DomainWallFermionR Ddwf(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mass, M5);
LatticeFermion src_o(FrbGrid);
LatticeFermion result_o(FrbGrid); LatticeFermion result_o(FrbGrid);
pickCheckerboard(Odd,src_o,src); pickCheckerboard(Odd, src_o, src);
result_o=zero; result_o = zero;
SchurDiagMooeeOperator<DomainWallFermionR,LatticeFermion> HermOpEO(Ddwf); GridStopWatch CGTimer;
ConjugateGradient<LatticeFermion> CG(1.0e-8,10000);
CG(HermOpEO,src_o,result_o); SchurDiagMooeeOperator<DomainWallFermionR, LatticeFermion> HermOpEO(Ddwf);
ConjugateGradient<LatticeFermion> CG(1.0e-8, 10000, 0);// switch off the assert
CGTimer.Start();
CG(HermOpEO, src_o, result_o);
CGTimer.Stop();
std::cout << GridLogMessage << "Total CG time : " << CGTimer.Elapsed()
<< std::endl;
std::cout << GridLogMessage << "######## Dhop calls summary" << std::endl;
Ddwf.Report();
Grid_finalize(); Grid_finalize();
} }

View File

@ -83,6 +83,7 @@ int main (int argc, char ** argv)
SchurDiagMooeeOperator<WilsonFermionR,LatticeFermion> HermOpEO(Dw); SchurDiagMooeeOperator<WilsonFermionR,LatticeFermion> HermOpEO(Dw);
ConjugateGradient<LatticeFermion> CG(1.0e-8,10000); ConjugateGradient<LatticeFermion> CG(1.0e-8,10000);
CG(HermOpEO,src_o,result_o); CG(HermOpEO,src_o,result_o);
Grid_finalize(); Grid_finalize();
} }