1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-11-14 01:35:36 +00:00

Merge branch 'develop' of https://github.com/paboyle/Grid into develop

This commit is contained in:
paboyle 2016-08-22 18:29:36 +01:00
commit 88be3b39bb
16 changed files with 344 additions and 127 deletions

View File

@ -23,6 +23,8 @@ matrix:
- libmpfr-dev - libmpfr-dev
- libgmp-dev - libgmp-dev
- libmpc-dev - libmpc-dev
- libopenmpi-dev
- openmpi-bin
- binutils-dev - binutils-dev
env: VERSION=-4.9 env: VERSION=-4.9
- compiler: gcc - compiler: gcc
@ -35,6 +37,8 @@ matrix:
- libmpfr-dev - libmpfr-dev
- libgmp-dev - libgmp-dev
- libmpc-dev - libmpc-dev
- libopenmpi-dev
- openmpi-bin
- binutils-dev - binutils-dev
env: VERSION=-5 env: VERSION=-5
- compiler: clang - compiler: clang
@ -47,6 +51,8 @@ matrix:
- libmpfr-dev - libmpfr-dev
- libgmp-dev - libgmp-dev
- libmpc-dev - libmpc-dev
- libopenmpi-dev
- openmpi-bin
- binutils-dev - binutils-dev
env: CLANG_LINK=http://llvm.org/releases/3.8.0/clang+llvm-3.8.0-x86_64-linux-gnu-ubuntu-14.04.tar.xz env: CLANG_LINK=http://llvm.org/releases/3.8.0/clang+llvm-3.8.0-x86_64-linux-gnu-ubuntu-14.04.tar.xz
- compiler: clang - compiler: clang
@ -59,6 +65,8 @@ matrix:
- libmpfr-dev - libmpfr-dev
- libgmp-dev - libgmp-dev
- libmpc-dev - libmpc-dev
- libopenmpi-dev
- openmpi-bin
- binutils-dev - binutils-dev
env: CLANG_LINK=http://llvm.org/releases/3.7.0/clang+llvm-3.7.0-x86_64-linux-gnu-ubuntu-14.04.tar.xz env: CLANG_LINK=http://llvm.org/releases/3.7.0/clang+llvm-3.7.0-x86_64-linux-gnu-ubuntu-14.04.tar.xz
@ -69,6 +77,7 @@ before_install:
- if [[ "$TRAVIS_OS_NAME" == "linux" ]] && [[ "$CC" == "clang" ]]; then export LD_LIBRARY_PATH="${GRIDDIR}/clang/lib:${LD_LIBRARY_PATH}"; fi - if [[ "$TRAVIS_OS_NAME" == "linux" ]] && [[ "$CC" == "clang" ]]; then export LD_LIBRARY_PATH="${GRIDDIR}/clang/lib:${LD_LIBRARY_PATH}"; fi
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew update; fi - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew update; fi
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install libmpc; fi - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install libmpc; fi
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install openmpi; fi
- if [[ "$TRAVIS_OS_NAME" == "osx" ]] && [[ "$CC" == "gcc" ]]; then brew install gcc5; fi - if [[ "$TRAVIS_OS_NAME" == "osx" ]] && [[ "$CC" == "gcc" ]]; then brew install gcc5; fi
install: install:
@ -92,3 +101,9 @@ script:
- ../configure --enable-precision=double --enable-simd=SSE4 --enable-comms=none - ../configure --enable-precision=double --enable-simd=SSE4 --enable-comms=none
- make -j4 - make -j4
- ./benchmarks/Benchmark_dwf --threads 1 - ./benchmarks/Benchmark_dwf --threads 1
- echo make clean
- if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then export CXXFLAGS='-DMPI_UINT32_T=MPI_UNSIGNED -DMPI_UINT64_T=MPI_UNSIGNED_LONG'; fi
- ../configure --enable-precision=single --enable-simd=SSE4 --enable-comms=mpi-auto
- make -j4
- if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then mpirun.openmpi -n 2 ./benchmarks/Benchmark_dwf --threads 1 --mpi 2.1.1.1; fi
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then mpirun -n 2 ./benchmarks/Benchmark_dwf --threads 1 --mpi 2.1.1.1; fi

View File

@ -8,11 +8,11 @@ m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])])
############### Checks for programs ############### Checks for programs
AC_LANG(C++) AC_LANG(C++)
: ${CXXFLAGS="-O3"} CXXFLAGS="-O3 $CXXFLAGS"
AC_PROG_CXX AC_PROG_CXX
AC_OPENMP AC_OPENMP
AM_CXXFLAGS="$OPENMP_CXXFLAGS $AM_CXXFLAGS" AM_CXXFLAGS="$OPENMP_CXXFLAGS $AM_CXXFLAGS"
LT_INIT([disable-shared]) LT_INIT
############### Checks for header files ############### Checks for header files
AC_CHECK_HEADERS(stdint.h) AC_CHECK_HEADERS(stdint.h)

View File

@ -111,12 +111,16 @@ typedef SymanzikGaugeAction<ConjugateGimplD> ConjugateSymanzikGaugeAction
#define FermOp4dVecTemplateInstantiate(A) \ #define FermOp4dVecTemplateInstantiate(A) \
template class A<WilsonImplF>; \ template class A<WilsonImplF>; \
template class A<WilsonImplD>; \ template class A<WilsonImplD>; \
template class A<ZWilsonImplF>; \
template class A<ZWilsonImplD>; \
template class A<GparityWilsonImplF>; \ template class A<GparityWilsonImplF>; \
template class A<GparityWilsonImplD>; template class A<GparityWilsonImplD>;
#define FermOp5dVecTemplateInstantiate(A) \ #define FermOp5dVecTemplateInstantiate(A) \
template class A<DomainWallVec5dImplF>; \ template class A<DomainWallVec5dImplF>; \
template class A<DomainWallVec5dImplD>; template class A<DomainWallVec5dImplD>; \
template class A<ZDomainWallVec5dImplF>; \
template class A<ZDomainWallVec5dImplD>;
#define FermOpTemplateInstantiate(A) \ #define FermOpTemplateInstantiate(A) \
FermOp4dVecTemplateInstantiate(A) \ FermOp4dVecTemplateInstantiate(A) \
@ -138,6 +142,7 @@ typedef SymanzikGaugeAction<ConjugateGimplD> ConjugateSymanzikGaugeAction
#include <Grid/qcd/action/fermion/DomainWallFermion.h> #include <Grid/qcd/action/fermion/DomainWallFermion.h>
#include <Grid/qcd/action/fermion/DomainWallFermion.h> #include <Grid/qcd/action/fermion/DomainWallFermion.h>
#include <Grid/qcd/action/fermion/MobiusFermion.h> #include <Grid/qcd/action/fermion/MobiusFermion.h>
#include <Grid/qcd/action/fermion/ZMobiusFermion.h>
#include <Grid/qcd/action/fermion/ScaledShamirFermion.h> #include <Grid/qcd/action/fermion/ScaledShamirFermion.h>
#include <Grid/qcd/action/fermion/MobiusZolotarevFermion.h> #include <Grid/qcd/action/fermion/MobiusZolotarevFermion.h>
#include <Grid/qcd/action/fermion/ShamirZolotarevFermion.h> #include <Grid/qcd/action/fermion/ShamirZolotarevFermion.h>
@ -176,6 +181,11 @@ typedef DomainWallFermion<WilsonImplD> DomainWallFermionD;
typedef MobiusFermion<WilsonImplR> MobiusFermionR; typedef MobiusFermion<WilsonImplR> MobiusFermionR;
typedef MobiusFermion<WilsonImplF> MobiusFermionF; typedef MobiusFermion<WilsonImplF> MobiusFermionF;
typedef MobiusFermion<WilsonImplD> MobiusFermionD; typedef MobiusFermion<WilsonImplD> MobiusFermionD;
typedef ZMobiusFermion<ZWilsonImplR> ZMobiusFermionR;
typedef ZMobiusFermion<ZWilsonImplF> ZMobiusFermionF;
typedef ZMobiusFermion<ZWilsonImplD> ZMobiusFermionD;
typedef ScaledShamirFermion<WilsonImplR> ScaledShamirFermionR; typedef ScaledShamirFermion<WilsonImplR> ScaledShamirFermionR;
typedef ScaledShamirFermion<WilsonImplF> ScaledShamirFermionF; typedef ScaledShamirFermion<WilsonImplF> ScaledShamirFermionF;
typedef ScaledShamirFermion<WilsonImplD> ScaledShamirFermionD; typedef ScaledShamirFermion<WilsonImplD> ScaledShamirFermionD;

View File

@ -54,18 +54,18 @@ template<class Impl>
void CayleyFermion5D<Impl>::M5D (const FermionField &psi, FermionField &chi) void CayleyFermion5D<Impl>::M5D (const FermionField &psi, FermionField &chi)
{ {
int Ls=this->Ls; int Ls=this->Ls;
std::vector<RealD> diag (Ls,1.0); std::vector<Coeff_t> diag (Ls,1.0);
std::vector<RealD> upper(Ls,-1.0); upper[Ls-1]=mass; std::vector<Coeff_t> upper(Ls,-1.0); upper[Ls-1]=mass;
std::vector<RealD> lower(Ls,-1.0); lower[0] =mass; std::vector<Coeff_t> lower(Ls,-1.0); lower[0] =mass;
M5D(psi,chi,chi,lower,diag,upper); M5D(psi,chi,chi,lower,diag,upper);
} }
template<class Impl> template<class Impl>
void CayleyFermion5D<Impl>::Meooe5D (const FermionField &psi, FermionField &Din) void CayleyFermion5D<Impl>::Meooe5D (const FermionField &psi, FermionField &Din)
{ {
int Ls=this->Ls; int Ls=this->Ls;
std::vector<RealD> diag = bs; std::vector<Coeff_t> diag = bs;
std::vector<RealD> upper= cs; std::vector<Coeff_t> upper= cs;
std::vector<RealD> lower= cs; std::vector<Coeff_t> lower= cs;
upper[Ls-1]=-mass*upper[Ls-1]; upper[Ls-1]=-mass*upper[Ls-1];
lower[0] =-mass*lower[0]; lower[0] =-mass*lower[0];
M5D(psi,psi,Din,lower,diag,upper); M5D(psi,psi,Din,lower,diag,upper);
@ -73,9 +73,9 @@ void CayleyFermion5D<Impl>::Meooe5D (const FermionField &psi, FermionField &D
template<class Impl> void CayleyFermion5D<Impl>::Meo5D (const FermionField &psi, FermionField &chi) template<class Impl> void CayleyFermion5D<Impl>::Meo5D (const FermionField &psi, FermionField &chi)
{ {
int Ls=this->Ls; int Ls=this->Ls;
std::vector<RealD> diag = beo; std::vector<Coeff_t> diag = beo;
std::vector<RealD> upper(Ls); std::vector<Coeff_t> upper(Ls);
std::vector<RealD> lower(Ls); std::vector<Coeff_t> lower(Ls);
for(int i=0;i<Ls;i++) { for(int i=0;i<Ls;i++) {
upper[i]=-ceo[i]; upper[i]=-ceo[i];
lower[i]=-ceo[i]; lower[i]=-ceo[i];
@ -88,9 +88,9 @@ template<class Impl>
void CayleyFermion5D<Impl>::Mooee (const FermionField &psi, FermionField &chi) void CayleyFermion5D<Impl>::Mooee (const FermionField &psi, FermionField &chi)
{ {
int Ls=this->Ls; int Ls=this->Ls;
std::vector<RealD> diag = bee; std::vector<Coeff_t> diag = bee;
std::vector<RealD> upper(Ls); std::vector<Coeff_t> upper(Ls);
std::vector<RealD> lower(Ls); std::vector<Coeff_t> lower(Ls);
for(int i=0;i<Ls;i++) { for(int i=0;i<Ls;i++) {
upper[i]=-cee[i]; upper[i]=-cee[i];
lower[i]=-cee[i]; lower[i]=-cee[i];
@ -104,9 +104,9 @@ template<class Impl>
void CayleyFermion5D<Impl>::MooeeDag (const FermionField &psi, FermionField &chi) void CayleyFermion5D<Impl>::MooeeDag (const FermionField &psi, FermionField &chi)
{ {
int Ls=this->Ls; int Ls=this->Ls;
std::vector<RealD> diag = bee; std::vector<Coeff_t> diag = bee;
std::vector<RealD> upper(Ls); std::vector<Coeff_t> upper(Ls);
std::vector<RealD> lower(Ls); std::vector<Coeff_t> lower(Ls);
for (int s=0;s<Ls;s++){ for (int s=0;s<Ls;s++){
// Assemble the 5d matrix // Assemble the 5d matrix
@ -129,9 +129,9 @@ template<class Impl>
void CayleyFermion5D<Impl>::M5Ddag (const FermionField &psi, FermionField &chi) void CayleyFermion5D<Impl>::M5Ddag (const FermionField &psi, FermionField &chi)
{ {
int Ls=this->Ls; int Ls=this->Ls;
std::vector<RealD> diag(Ls,1.0); std::vector<Coeff_t> diag(Ls,1.0);
std::vector<RealD> upper(Ls,-1.0); std::vector<Coeff_t> upper(Ls,-1.0);
std::vector<RealD> lower(Ls,-1.0); std::vector<Coeff_t> lower(Ls,-1.0);
upper[Ls-1]=-mass*upper[Ls-1]; upper[Ls-1]=-mass*upper[Ls-1];
lower[0] =-mass*lower[0]; lower[0] =-mass*lower[0];
M5Ddag(psi,chi,chi,lower,diag,upper); M5Ddag(psi,chi,chi,lower,diag,upper);
@ -141,9 +141,9 @@ template<class Impl>
void CayleyFermion5D<Impl>::MeooeDag5D (const FermionField &psi, FermionField &Din) void CayleyFermion5D<Impl>::MeooeDag5D (const FermionField &psi, FermionField &Din)
{ {
int Ls=this->Ls; int Ls=this->Ls;
std::vector<RealD> diag =bs; std::vector<Coeff_t> diag =bs;
std::vector<RealD> upper=cs; std::vector<Coeff_t> upper=cs;
std::vector<RealD> lower=cs; std::vector<Coeff_t> lower=cs;
upper[Ls-1]=-mass*upper[Ls-1]; upper[Ls-1]=-mass*upper[Ls-1];
lower[0] =-mass*lower[0]; lower[0] =-mass*lower[0];
M5Ddag(psi,psi,Din,lower,diag,upper); M5Ddag(psi,psi,Din,lower,diag,upper);
@ -273,11 +273,21 @@ void CayleyFermion5D<Impl>::MeoDeriv(GaugeField &mat,const FermionField &U,const
template<class Impl> template<class Impl>
void CayleyFermion5D<Impl>::SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD b,RealD c) void CayleyFermion5D<Impl>::SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD b,RealD c)
{ {
SetCoefficientsZolotarev(1.0,zdata,b,c); std::vector<Coeff_t> gamma(this->Ls);
for(int s=0;s<this->Ls;s++) gamma[s] = zdata->gamma[s];
SetCoefficientsInternal(1.0,gamma,b,c);
} }
//Zolo //Zolo
template<class Impl> template<class Impl>
void CayleyFermion5D<Impl>::SetCoefficientsZolotarev(RealD zolo_hi,Approx::zolotarev_data *zdata,RealD b,RealD c) void CayleyFermion5D<Impl>::SetCoefficientsZolotarev(RealD zolo_hi,Approx::zolotarev_data *zdata,RealD b,RealD c)
{
std::vector<Coeff_t> gamma(this->Ls);
for(int s=0;s<this->Ls;s++) gamma[s] = zdata->gamma[s];
SetCoefficientsInternal(zolo_hi,gamma,b,c);
}
//Zolo
template<class Impl>
void CayleyFermion5D<Impl>::SetCoefficientsInternal(RealD zolo_hi,std::vector<Coeff_t> & gamma,RealD b,RealD c)
{ {
int Ls=this->Ls; int Ls=this->Ls;
@ -315,7 +325,7 @@ void CayleyFermion5D<Impl>::SetCoefficientsZolotarev(RealD zolo_hi,Approx::zolot
double bmc = b-c; double bmc = b-c;
for(int i=0; i < Ls; i++){ for(int i=0; i < Ls; i++){
as[i] = 1.0; as[i] = 1.0;
omega[i] = ((double)zdata->gamma[i])*zolo_hi; //NB reciprocal relative to Chroma NEF code omega[i] = gamma[i]*zolo_hi; //NB reciprocal relative to Chroma NEF code
bs[i] = 0.5*(bpc/omega[i] + bmc); bs[i] = 0.5*(bpc/omega[i] + bmc);
cs[i] = 0.5*(bpc/omega[i] - bmc); cs[i] = 0.5*(bpc/omega[i] - bmc);
} }
@ -377,7 +387,7 @@ void CayleyFermion5D<Impl>::SetCoefficientsZolotarev(RealD zolo_hi,Approx::zolot
} }
{ {
double delta_d=mass*cee[Ls-1]; Coeff_t delta_d=mass*cee[Ls-1];
for(int j=0;j<Ls-1;j++) delta_d *= cee[j]/bee[j]; for(int j=0;j<Ls-1;j++) delta_d *= cee[j]/bee[j];
dee[Ls-1] += delta_d; dee[Ls-1] += delta_d;
} }

View File

@ -62,16 +62,16 @@ namespace Grid {
void M5D(const FermionField &psi, void M5D(const FermionField &psi,
const FermionField &phi, const FermionField &phi,
FermionField &chi, FermionField &chi,
std::vector<RealD> &lower, std::vector<Coeff_t> &lower,
std::vector<RealD> &diag, std::vector<Coeff_t> &diag,
std::vector<RealD> &upper); std::vector<Coeff_t> &upper);
void M5Ddag(const FermionField &psi, void M5Ddag(const FermionField &psi,
const FermionField &phi, const FermionField &phi,
FermionField &chi, FermionField &chi,
std::vector<RealD> &lower, std::vector<Coeff_t> &lower,
std::vector<RealD> &diag, std::vector<Coeff_t> &diag,
std::vector<RealD> &upper); std::vector<Coeff_t> &upper);
void MooeeInternal(const FermionField &in, FermionField &out,int dag,int inv); void MooeeInternal(const FermionField &in, FermionField &out,int dag,int inv);
virtual void Instantiatable(void)=0; virtual void Instantiatable(void)=0;
@ -91,23 +91,23 @@ namespace Grid {
RealD mass; RealD mass;
// Cayley form Moebius (tanh and zolotarev) // Cayley form Moebius (tanh and zolotarev)
std::vector<RealD> omega; std::vector<Coeff_t> omega;
std::vector<RealD> bs; // S dependent coeffs std::vector<Coeff_t> bs; // S dependent coeffs
std::vector<RealD> cs; std::vector<Coeff_t> cs;
std::vector<RealD> as; std::vector<Coeff_t> as;
// For preconditioning Cayley form // For preconditioning Cayley form
std::vector<RealD> bee; std::vector<Coeff_t> bee;
std::vector<RealD> cee; std::vector<Coeff_t> cee;
std::vector<RealD> aee; std::vector<Coeff_t> aee;
std::vector<RealD> beo; std::vector<Coeff_t> beo;
std::vector<RealD> ceo; std::vector<Coeff_t> ceo;
std::vector<RealD> aeo; std::vector<Coeff_t> aeo;
// LDU factorisation of the eeoo matrix // LDU factorisation of the eeoo matrix
std::vector<RealD> lee; std::vector<Coeff_t> lee;
std::vector<RealD> leem; std::vector<Coeff_t> leem;
std::vector<RealD> uee; std::vector<Coeff_t> uee;
std::vector<RealD> ueem; std::vector<Coeff_t> ueem;
std::vector<RealD> dee; std::vector<Coeff_t> dee;
// Constructors // Constructors
CayleyFermion5D(GaugeField &_Umu, CayleyFermion5D(GaugeField &_Umu,
@ -117,20 +117,19 @@ namespace Grid {
GridRedBlackCartesian &FourDimRedBlackGrid, GridRedBlackCartesian &FourDimRedBlackGrid,
RealD _mass,RealD _M5,const ImplParams &p= ImplParams()); RealD _mass,RealD _M5,const ImplParams &p= ImplParams());
protected: protected:
void SetCoefficientsZolotarev(RealD zolohi,Approx::zolotarev_data *zdata,RealD b,RealD c); void SetCoefficientsZolotarev(RealD zolohi,Approx::zolotarev_data *zdata,RealD b,RealD c);
void SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD b,RealD c); void SetCoefficientsTanh(Approx::zolotarev_data *zdata,RealD b,RealD c);
void SetCoefficientsInternal(RealD zolo_hi,std::vector<Coeff_t> & gamma,RealD b,RealD c);
}; };
} }
} }
#define INSTANTIATE_DPERP(A)\ #define INSTANTIATE_DPERP(A)\
template void CayleyFermion5D< A >::M5D(const FermionField &psi,const FermionField &phi,FermionField &chi,\ template void CayleyFermion5D< A >::M5D(const FermionField &psi,const FermionField &phi,FermionField &chi,\
std::vector<RealD> &lower,std::vector<RealD> &diag,std::vector<RealD> &upper); \ std::vector<Coeff_t> &lower,std::vector<Coeff_t> &diag,std::vector<Coeff_t> &upper); \
template void CayleyFermion5D< A >::M5Ddag(const FermionField &psi,const FermionField &phi,FermionField &chi,\ template void CayleyFermion5D< A >::M5Ddag(const FermionField &psi,const FermionField &phi,FermionField &chi,\
std::vector<RealD> &lower,std::vector<RealD> &diag,std::vector<RealD> &upper); \ std::vector<Coeff_t> &lower,std::vector<Coeff_t> &diag,std::vector<Coeff_t> &upper); \
template void CayleyFermion5D< A >::MooeeInv (const FermionField &psi, FermionField &chi); \ template void CayleyFermion5D< A >::MooeeInv (const FermionField &psi, FermionField &chi); \
template void CayleyFermion5D< A >::MooeeInvDag (const FermionField &psi, FermionField &chi); template void CayleyFermion5D< A >::MooeeInvDag (const FermionField &psi, FermionField &chi);

View File

@ -43,9 +43,9 @@ template<class Impl>
void CayleyFermion5D<Impl>::M5D(const FermionField &psi, void CayleyFermion5D<Impl>::M5D(const FermionField &psi,
const FermionField &phi, const FermionField &phi,
FermionField &chi, FermionField &chi,
std::vector<RealD> &lower, std::vector<Coeff_t> &lower,
std::vector<RealD> &diag, std::vector<Coeff_t> &diag,
std::vector<RealD> &upper) std::vector<Coeff_t> &upper)
{ {
int Ls =this->Ls; int Ls =this->Ls;
GridBase *grid=psi._grid; GridBase *grid=psi._grid;
@ -82,9 +82,9 @@ template<class Impl>
void CayleyFermion5D<Impl>::M5Ddag(const FermionField &psi, void CayleyFermion5D<Impl>::M5Ddag(const FermionField &psi,
const FermionField &phi, const FermionField &phi,
FermionField &chi, FermionField &chi,
std::vector<RealD> &lower, std::vector<Coeff_t> &lower,
std::vector<RealD> &diag, std::vector<Coeff_t> &diag,
std::vector<RealD> &upper) std::vector<Coeff_t> &upper)
{ {
int Ls =this->Ls; int Ls =this->Ls;
GridBase *grid=psi._grid; GridBase *grid=psi._grid;
@ -204,6 +204,8 @@ PARALLEL_FOR_LOOP
INSTANTIATE_DPERP(WilsonImplD); INSTANTIATE_DPERP(WilsonImplD);
INSTANTIATE_DPERP(GparityWilsonImplF); INSTANTIATE_DPERP(GparityWilsonImplF);
INSTANTIATE_DPERP(GparityWilsonImplD); INSTANTIATE_DPERP(GparityWilsonImplD);
INSTANTIATE_DPERP(ZWilsonImplF);
INSTANTIATE_DPERP(ZWilsonImplD);
#endif #endif
}} }}

View File

@ -43,9 +43,9 @@ template<class Impl>
void CayleyFermion5D<Impl>::M5D(const FermionField &psi, void CayleyFermion5D<Impl>::M5D(const FermionField &psi,
const FermionField &phi, const FermionField &phi,
FermionField &chi, FermionField &chi,
std::vector<RealD> &lower, std::vector<Coeff_t> &lower,
std::vector<RealD> &diag, std::vector<Coeff_t> &diag,
std::vector<RealD> &upper) std::vector<Coeff_t> &upper)
{ {
int Ls=this->Ls; int Ls=this->Ls;
for(int s=0;s<Ls;s++){ for(int s=0;s<Ls;s++){
@ -65,9 +65,9 @@ template<class Impl>
void CayleyFermion5D<Impl>::M5Ddag(const FermionField &psi, void CayleyFermion5D<Impl>::M5Ddag(const FermionField &psi,
const FermionField &phi, const FermionField &phi,
FermionField &chi, FermionField &chi,
std::vector<RealD> &lower, std::vector<Coeff_t> &lower,
std::vector<RealD> &diag, std::vector<Coeff_t> &diag,
std::vector<RealD> &upper) std::vector<Coeff_t> &upper)
{ {
int Ls=this->Ls; int Ls=this->Ls;
for(int s=0;s<Ls;s++){ for(int s=0;s<Ls;s++){

View File

@ -53,9 +53,9 @@ template<class Impl>
void CayleyFermion5D<Impl>::M5D(const FermionField &psi, void CayleyFermion5D<Impl>::M5D(const FermionField &psi,
const FermionField &phi, const FermionField &phi,
FermionField &chi, FermionField &chi,
std::vector<RealD> &lower, std::vector<Coeff_t> &lower,
std::vector<RealD> &diag, std::vector<Coeff_t> &diag,
std::vector<RealD> &upper) std::vector<Coeff_t> &upper)
{ {
GridBase *grid=psi._grid; GridBase *grid=psi._grid;
int Ls = this->Ls; int Ls = this->Ls;
@ -121,9 +121,9 @@ template<class Impl>
void CayleyFermion5D<Impl>::M5Ddag(const FermionField &psi, void CayleyFermion5D<Impl>::M5Ddag(const FermionField &psi,
const FermionField &phi, const FermionField &phi,
FermionField &chi, FermionField &chi,
std::vector<RealD> &lower, std::vector<Coeff_t> &lower,
std::vector<RealD> &diag, std::vector<Coeff_t> &diag,
std::vector<RealD> &upper) std::vector<Coeff_t> &upper)
{ {
GridBase *grid=psi._grid; GridBase *grid=psi._grid;
int Ls = this->Ls; int Ls = this->Ls;
@ -194,8 +194,8 @@ void CayleyFermion5D<Impl>::MooeeInternal(const FermionField &psi, FermionField
chi.checkerboard=psi.checkerboard; chi.checkerboard=psi.checkerboard;
Eigen::MatrixXd Pplus = Eigen::MatrixXd::Zero(Ls,Ls); Eigen::MatrixXcd Pplus = Eigen::MatrixXcd::Zero(Ls,Ls);
Eigen::MatrixXd Pminus = Eigen::MatrixXd::Zero(Ls,Ls); Eigen::MatrixXcd Pminus = Eigen::MatrixXcd::Zero(Ls,Ls);
for(int s=0;s<Ls;s++){ for(int s=0;s<Ls;s++){
Pplus(s,s) = bee[s]; Pplus(s,s) = bee[s];
@ -212,8 +212,8 @@ void CayleyFermion5D<Impl>::MooeeInternal(const FermionField &psi, FermionField
Pplus (0,Ls-1) = mass*cee[0]; Pplus (0,Ls-1) = mass*cee[0];
Pminus(Ls-1,0) = mass*cee[Ls-1]; Pminus(Ls-1,0) = mass*cee[Ls-1];
Eigen::MatrixXd PplusMat ; Eigen::MatrixXcd PplusMat ;
Eigen::MatrixXd PminusMat; Eigen::MatrixXcd PminusMat;
if ( inv ) { if ( inv ) {
PplusMat =Pplus.inverse(); PplusMat =Pplus.inverse();
@ -298,8 +298,12 @@ PARALLEL_FOR_LOOP
INSTANTIATE_DPERP(DomainWallVec5dImplD); INSTANTIATE_DPERP(DomainWallVec5dImplD);
INSTANTIATE_DPERP(DomainWallVec5dImplF); INSTANTIATE_DPERP(DomainWallVec5dImplF);
INSTANTIATE_DPERP(ZDomainWallVec5dImplD);
INSTANTIATE_DPERP(ZDomainWallVec5dImplF);
template void CayleyFermion5D<DomainWallVec5dImplF>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv); template void CayleyFermion5D<DomainWallVec5dImplF>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
template void CayleyFermion5D<DomainWallVec5dImplD>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv); template void CayleyFermion5D<DomainWallVec5dImplD>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
template void CayleyFermion5D<ZDomainWallVec5dImplF>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
template void CayleyFermion5D<ZDomainWallVec5dImplD>::MooeeInternal(const FermionField &psi, FermionField &chi,int dag, int inv);
}} }}

View File

@ -100,7 +100,8 @@ namespace Grid {
typedef typename Impl::SiteHalfSpinor SiteHalfSpinor; \ typedef typename Impl::SiteHalfSpinor SiteHalfSpinor; \
typedef typename Impl::Compressor Compressor; \ typedef typename Impl::Compressor Compressor; \
typedef typename Impl::StencilImpl StencilImpl; \ typedef typename Impl::StencilImpl StencilImpl; \
typedef typename Impl::ImplParams ImplParams; typedef typename Impl::ImplParams ImplParams; \
typedef typename Impl::Coeff_t Coeff_t;
#define INHERIT_IMPL_TYPES(Base) \ #define INHERIT_IMPL_TYPES(Base) \
INHERIT_GIMPL_TYPES(Base)\ INHERIT_GIMPL_TYPES(Base)\
@ -109,12 +110,14 @@ namespace Grid {
/////// ///////
// Single flavour four spinors with colour index // Single flavour four spinors with colour index
/////// ///////
template<class S,int Nrepresentation=Nc> template<class S,int Nrepresentation=Nc,class _Coeff_t = RealD>
class WilsonImpl : public PeriodicGaugeImpl< GaugeImplTypes< S, Nrepresentation> > { class WilsonImpl : public PeriodicGaugeImpl< GaugeImplTypes< S, Nrepresentation> > {
public: public:
const bool LsVectorised=false; const bool LsVectorised=false;
typedef _Coeff_t Coeff_t;
typedef PeriodicGaugeImpl< GaugeImplTypes< S,Nrepresentation> > Gimpl; typedef PeriodicGaugeImpl< GaugeImplTypes< S,Nrepresentation> > Gimpl;
INHERIT_GIMPL_TYPES(Gimpl); INHERIT_GIMPL_TYPES(Gimpl);
@ -192,12 +195,13 @@ PARALLEL_FOR_LOOP
/////// ///////
// Single flavour four spinors with colour index, 5d redblack // Single flavour four spinors with colour index, 5d redblack
/////// ///////
template<class S,int Nrepresentation=Nc> template<class S,int Nrepresentation=Nc,class _Coeff_t = RealD>
class DomainWallVec5dImpl : public PeriodicGaugeImpl< GaugeImplTypes< S,Nrepresentation> > { class DomainWallVec5dImpl : public PeriodicGaugeImpl< GaugeImplTypes< S,Nrepresentation> > {
public: public:
const bool LsVectorised=true; const bool LsVectorised=true;
typedef _Coeff_t Coeff_t;
typedef PeriodicGaugeImpl< GaugeImplTypes< S,Nrepresentation> > Gimpl; typedef PeriodicGaugeImpl< GaugeImplTypes< S,Nrepresentation> > Gimpl;
INHERIT_GIMPL_TYPES(Gimpl); INHERIT_GIMPL_TYPES(Gimpl);
@ -287,12 +291,13 @@ PARALLEL_FOR_LOOP
// Flavour doubled spinors; is Gparity the only? what about C*? // Flavour doubled spinors; is Gparity the only? what about C*?
//////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////
template<class S,int Nrepresentation> template<class S,int Nrepresentation,class _Coeff_t = RealD>
class GparityWilsonImpl : public ConjugateGaugeImpl< GaugeImplTypes<S,Nrepresentation> >{ class GparityWilsonImpl : public ConjugateGaugeImpl< GaugeImplTypes<S,Nrepresentation> >{
public: public:
const bool LsVectorised=false; const bool LsVectorised=false;
typedef _Coeff_t Coeff_t;
typedef ConjugateGaugeImpl< GaugeImplTypes<S,Nrepresentation> > Gimpl; typedef ConjugateGaugeImpl< GaugeImplTypes<S,Nrepresentation> > Gimpl;
INHERIT_GIMPL_TYPES(Gimpl); INHERIT_GIMPL_TYPES(Gimpl);
@ -483,6 +488,18 @@ PARALLEL_FOR_LOOP
typedef WilsonImpl<vComplexF,Nc> WilsonImplF; // Float typedef WilsonImpl<vComplexF,Nc> WilsonImplF; // Float
typedef WilsonImpl<vComplexD,Nc> WilsonImplD; // Double typedef WilsonImpl<vComplexD,Nc> WilsonImplD; // Double
typedef WilsonImpl<vComplex ,Nc,ComplexD> ZWilsonImplR; // Real.. whichever prec
typedef WilsonImpl<vComplexF,Nc,ComplexD> ZWilsonImplF; // Float
typedef WilsonImpl<vComplexD,Nc,ComplexD> ZWilsonImplD; // Double
typedef DomainWallVec5dImpl<vComplex ,Nc> DomainWallVec5dImplR; // Real.. whichever prec
typedef DomainWallVec5dImpl<vComplexF,Nc> DomainWallVec5dImplF; // Float
typedef DomainWallVec5dImpl<vComplexD,Nc> DomainWallVec5dImplD; // Double
typedef DomainWallVec5dImpl<vComplex ,Nc,ComplexD> ZDomainWallVec5dImplR; // Real.. whichever prec
typedef DomainWallVec5dImpl<vComplexF,Nc,ComplexD> ZDomainWallVec5dImplF; // Float
typedef DomainWallVec5dImpl<vComplexD,Nc,ComplexD> ZDomainWallVec5dImplD; // Double
typedef DomainWallVec5dImpl<vComplex ,Nc> DomainWallVec5dImplR; // Real.. whichever prec typedef DomainWallVec5dImpl<vComplex ,Nc> DomainWallVec5dImplR; // Real.. whichever prec
typedef DomainWallVec5dImpl<vComplexF,Nc> DomainWallVec5dImplF; // Float typedef DomainWallVec5dImpl<vComplexF,Nc> DomainWallVec5dImplF; // Float
typedef DomainWallVec5dImpl<vComplexD,Nc> DomainWallVec5dImplD; // Double typedef DomainWallVec5dImpl<vComplexD,Nc> DomainWallVec5dImplD; // Double

View File

@ -68,9 +68,13 @@ void WilsonKernels<Impl>::DiracOptDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf, std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int sF,int sU,int Ls, int Ns, const FermionField &in, FermionField &out) int sF,int sU,int Ls, int Ns, const FermionField &in, FermionField &out)
{ {
// No asm implementation yet. #ifdef AVX512
// if ( AsmOpt ) WilsonKernels<Impl>::DiracOptAsmDhopSiteDag(st,lo,U,buf,sF,sU,in,out); if ( AsmOpt ) {
// else WilsonKernels<Impl>::DiracOptAsmDhopSiteDag(st,lo,U,buf,sF,sU,Ls,Ns,in,out);
} else {
#else
{
#endif
for(int site=0;site<Ns;site++) { for(int site=0;site<Ns;site++) {
for(int s=0;s<Ls;s++) { for(int s=0;s<Ls;s++) {
if (HandOpt) WilsonKernels<Impl>::DiracOptHandDhopSiteDag(st,lo,U,buf,sF,sU,in,out); if (HandOpt) WilsonKernels<Impl>::DiracOptHandDhopSiteDag(st,lo,U,buf,sF,sU,in,out);
@ -80,6 +84,7 @@ void WilsonKernels<Impl>::DiracOptDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,
sU++; sU++;
} }
} }
}
//////////////////////////////////////////// ////////////////////////////////////////////

View File

@ -79,6 +79,10 @@ namespace Grid {
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf, std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int sF,int sU,int Ls, int Ns, const FermionField &in, FermionField &out); int sF,int sU,int Ls, int Ns, const FermionField &in, FermionField &out);
void DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int sF,int sU,int Ls, int Ns, const FermionField &in, FermionField &out);
void DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U, void DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf, std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,

View File

@ -73,12 +73,21 @@ static int signInit = setupSigns();
#define MAYBEPERM(A,perm) if (perm) { A ; } #define MAYBEPERM(A,perm) if (perm) { A ; }
#define MULT_2SPIN(ptr,pf) MULT_ADDSUB_2SPIN(ptr,pf) #define MULT_2SPIN(ptr,pf) MULT_ADDSUB_2SPIN(ptr,pf)
#define FX(A) WILSONASM_ ##A #define FX(A) WILSONASM_ ##A
#undef KERNEL_DAG
template<> template<>
void WilsonKernels<WilsonImplF>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, void WilsonKernels<WilsonImplF>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf, std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h> #include <qcd/action/fermion/WilsonKernelsAsmBody.h>
#define KERNEL_DAG
template<>
void WilsonKernels<WilsonImplF>::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
#undef VMOVIDUP #undef VMOVIDUP
#undef VMOVRDUP #undef VMOVRDUP
#undef MAYBEPERM #undef MAYBEPERM
@ -89,14 +98,25 @@ void WilsonKernels<WilsonImplF>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrd
#define VMOVIDUP(A,B,C) VBCASTIDUPf(A,B,C) #define VMOVIDUP(A,B,C) VBCASTIDUPf(A,B,C)
#define VMOVRDUP(A,B,C) VBCASTRDUPf(A,B,C) #define VMOVRDUP(A,B,C) VBCASTRDUPf(A,B,C)
#define MULT_2SPIN(ptr,pf) MULT_ADDSUB_2SPIN_LS(ptr,pf) #define MULT_2SPIN(ptr,pf) MULT_ADDSUB_2SPIN_LS(ptr,pf)
#undef KERNEL_DAG
template<> template<>
void WilsonKernels<DomainWallVec5dImplF>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, void WilsonKernels<DomainWallVec5dImplF>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf, std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h> #include <qcd/action/fermion/WilsonKernelsAsmBody.h>
#define KERNEL_DAG
template<>
void WilsonKernels<DomainWallVec5dImplF>::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
#endif #endif
template void WilsonKernels<WilsonImplF>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, template void WilsonKernels<WilsonImplF>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf, std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out); int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);

View File

@ -30,7 +30,11 @@
basep = st.GetPFInfo(nent,plocal); nent++; basep = st.GetPFInfo(nent,plocal); nent++;
if ( local ) { if ( local ) {
LOAD64(%r10,isigns); LOAD64(%r10,isigns);
#ifdef KERNEL_DAG
XP_PROJMEM(base);
#else
XM_PROJMEM(base); XM_PROJMEM(base);
#endif
MAYBEPERM(PERMUTE_DIR3,perm); MAYBEPERM(PERMUTE_DIR3,perm);
} else { } else {
LOAD_CHI(base); LOAD_CHI(base);
@ -41,15 +45,22 @@
MULT_2SPIN_DIR_PFXP(Xp,basep); MULT_2SPIN_DIR_PFXP(Xp,basep);
} }
LOAD64(%r10,isigns); LOAD64(%r10,isigns);
#ifdef KERNEL_DAG
XP_RECON;
#else
XM_RECON; XM_RECON;
#endif
//////////////////////////////// ////////////////////////////////
// Yp // Yp
//////////////////////////////// ////////////////////////////////
basep = st.GetPFInfo(nent,plocal); nent++; basep = st.GetPFInfo(nent,plocal); nent++;
if ( local ) { if ( local ) {
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
#ifdef KERNEL_DAG
YP_PROJMEM(base);
#else
YM_PROJMEM(base); YM_PROJMEM(base);
#endif
MAYBEPERM(PERMUTE_DIR2,perm); MAYBEPERM(PERMUTE_DIR2,perm);
} else { } else {
LOAD_CHI(base); LOAD_CHI(base);
@ -60,7 +71,11 @@
MULT_2SPIN_DIR_PFYP(Yp,basep); MULT_2SPIN_DIR_PFYP(Yp,basep);
} }
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
#ifdef KERNEL_DAG
YP_RECON_ACCUM;
#else
YM_RECON_ACCUM; YM_RECON_ACCUM;
#endif
//////////////////////////////// ////////////////////////////////
// Zp // Zp
@ -68,7 +83,11 @@
basep = st.GetPFInfo(nent,plocal); nent++; basep = st.GetPFInfo(nent,plocal); nent++;
if ( local ) { if ( local ) {
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
#ifdef KERNEL_DAG
ZP_PROJMEM(base);
#else
ZM_PROJMEM(base); ZM_PROJMEM(base);
#endif
MAYBEPERM(PERMUTE_DIR1,perm); MAYBEPERM(PERMUTE_DIR1,perm);
} else { } else {
LOAD_CHI(base); LOAD_CHI(base);
@ -79,7 +98,11 @@
MULT_2SPIN_DIR_PFZP(Zp,basep); MULT_2SPIN_DIR_PFZP(Zp,basep);
} }
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
#ifdef KERNEL_DAG
ZP_RECON_ACCUM;
#else
ZM_RECON_ACCUM; ZM_RECON_ACCUM;
#endif
//////////////////////////////// ////////////////////////////////
// Tp // Tp
@ -87,7 +110,11 @@
basep = st.GetPFInfo(nent,plocal); nent++; basep = st.GetPFInfo(nent,plocal); nent++;
if ( local ) { if ( local ) {
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
#ifdef KERNEL_DAG
TP_PROJMEM(base);
#else
TM_PROJMEM(base); TM_PROJMEM(base);
#endif
MAYBEPERM(PERMUTE_DIR0,perm); MAYBEPERM(PERMUTE_DIR0,perm);
} else { } else {
LOAD_CHI(base); LOAD_CHI(base);
@ -98,7 +125,11 @@
MULT_2SPIN_DIR_PFTP(Tp,basep); MULT_2SPIN_DIR_PFTP(Tp,basep);
} }
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
#ifdef KERNEL_DAG
TP_RECON_ACCUM;
#else
TM_RECON_ACCUM; TM_RECON_ACCUM;
#endif
//////////////////////////////// ////////////////////////////////
// Xm // Xm
@ -107,7 +138,11 @@
// basep= st.GetPFInfo(nent,plocal); nent++; // basep= st.GetPFInfo(nent,plocal); nent++;
if ( local ) { if ( local ) {
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
#ifdef KERNEL_DAG
XM_PROJMEM(base);
#else
XP_PROJMEM(base); XP_PROJMEM(base);
#endif
MAYBEPERM(PERMUTE_DIR3,perm); MAYBEPERM(PERMUTE_DIR3,perm);
} else { } else {
LOAD_CHI(base); LOAD_CHI(base);
@ -118,7 +153,11 @@
MULT_2SPIN_DIR_PFXM(Xm,basep); MULT_2SPIN_DIR_PFXM(Xm,basep);
} }
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
#ifdef KERNEL_DAG
XM_RECON_ACCUM;
#else
XP_RECON_ACCUM; XP_RECON_ACCUM;
#endif
//////////////////////////////// ////////////////////////////////
// Ym // Ym
@ -126,7 +165,11 @@
basep= st.GetPFInfo(nent,plocal); nent++; basep= st.GetPFInfo(nent,plocal); nent++;
if ( local ) { if ( local ) {
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
#ifdef KERNEL_DAG
YM_PROJMEM(base);
#else
YP_PROJMEM(base); YP_PROJMEM(base);
#endif
MAYBEPERM(PERMUTE_DIR2,perm); MAYBEPERM(PERMUTE_DIR2,perm);
} else { } else {
LOAD_CHI(base); LOAD_CHI(base);
@ -137,7 +180,11 @@
MULT_2SPIN_DIR_PFYM(Ym,basep); MULT_2SPIN_DIR_PFYM(Ym,basep);
} }
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
#ifdef KERNEL_DAG
YM_RECON_ACCUM;
#else
YP_RECON_ACCUM; YP_RECON_ACCUM;
#endif
//////////////////////////////// ////////////////////////////////
// Zm // Zm
@ -145,7 +192,11 @@
basep= st.GetPFInfo(nent,plocal); nent++; basep= st.GetPFInfo(nent,plocal); nent++;
if ( local ) { if ( local ) {
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
#ifdef KERNEL_DAG
ZM_PROJMEM(base);
#else
ZP_PROJMEM(base); ZP_PROJMEM(base);
#endif
MAYBEPERM(PERMUTE_DIR1,perm); MAYBEPERM(PERMUTE_DIR1,perm);
} else { } else {
LOAD_CHI(base); LOAD_CHI(base);
@ -156,7 +207,11 @@
MULT_2SPIN_DIR_PFZM(Zm,basep); MULT_2SPIN_DIR_PFZM(Zm,basep);
} }
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
#ifdef KERNEL_DAG
ZM_RECON_ACCUM;
#else
ZP_RECON_ACCUM; ZP_RECON_ACCUM;
#endif
//////////////////////////////// ////////////////////////////////
// Tm // Tm
@ -164,7 +219,11 @@
basep= st.GetPFInfo(nent,plocal); nent++; basep= st.GetPFInfo(nent,plocal); nent++;
if ( local ) { if ( local ) {
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
#ifdef KERNEL_DAG
TM_PROJMEM(base);
#else
TP_PROJMEM(base); TP_PROJMEM(base);
#endif
MAYBEPERM(PERMUTE_DIR0,perm); MAYBEPERM(PERMUTE_DIR0,perm);
} else { } else {
LOAD_CHI(base); LOAD_CHI(base);
@ -175,7 +234,11 @@
MULT_2SPIN_DIR_PFTM(Tm,basep); MULT_2SPIN_DIR_PFTM(Tm,basep);
} }
LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit LOAD64(%r10,isigns); // times i => shuffle and xor the real part sign bit
#ifdef KERNEL_DAG
TM_RECON_ACCUM;
#else
TP_RECON_ACCUM; TP_RECON_ACCUM;
#endif
basep= st.GetPFInfo(nent,plocal); nent++; basep= st.GetPFInfo(nent,plocal); nent++;
SAVE_RESULT(base,basep); SAVE_RESULT(base,basep);

View File

@ -839,46 +839,23 @@ void WilsonKernels<GparityWilsonImplD>::DiracOptHandDhopSiteDag(StencilImpl &st,
////////////// Wilson ; uses this implementation ///////////////////// ////////////// Wilson ; uses this implementation /////////////////////
// Need Nc=3 though // // Need Nc=3 though //
template void WilsonKernels<WilsonImplF>::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U, #define INSTANTIATE_THEM(A) \
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf, template void WilsonKernels<A>::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,\
int ss,int sU,const FermionField &in, FermionField &out); std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,\
template void WilsonKernels<WilsonImplD>::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U, int ss,int sU,const FermionField &in, FermionField &out);\
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf, template void WilsonKernels<A>::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,\
int ss,int sU,const FermionField &in, FermionField &out); std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,\
template void WilsonKernels<WilsonImplF>::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int ss,int sU,const FermionField &in, FermionField &out);
template void WilsonKernels<WilsonImplD>::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int ss,int sU,const FermionField &in, FermionField &out);
template void WilsonKernels<GparityWilsonImplF>::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int ss,int sU,const FermionField &in, FermionField &out);
template void WilsonKernels<GparityWilsonImplD>::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int ss,int sU,const FermionField &in, FermionField &out);
template void WilsonKernels<GparityWilsonImplF>::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int ss,int sU,const FermionField &in, FermionField &out);
template void WilsonKernels<GparityWilsonImplD>::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int ss,int sU,const FermionField &in, FermionField &out);
template void WilsonKernels<DomainWallVec5dImplF>::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int ss,int sU,const FermionField &in, FermionField &out);
template void WilsonKernels<DomainWallVec5dImplD>::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int ss,int sU,const FermionField &in, FermionField &out);
template void WilsonKernels<DomainWallVec5dImplF>::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int ss,int sU,const FermionField &in, FermionField &out);
template void WilsonKernels<DomainWallVec5dImplD>::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int ss,int sU,const FermionField &in, FermionField &out); int ss,int sU,const FermionField &in, FermionField &out);
INSTANTIATE_THEM(WilsonImplF);
INSTANTIATE_THEM(WilsonImplD);
INSTANTIATE_THEM(ZWilsonImplF);
INSTANTIATE_THEM(ZWilsonImplD);
INSTANTIATE_THEM(GparityWilsonImplF);
INSTANTIATE_THEM(GparityWilsonImplD);
INSTANTIATE_THEM(DomainWallVec5dImplF);
INSTANTIATE_THEM(DomainWallVec5dImplD);
INSTANTIATE_THEM(ZDomainWallVec5dImplF);
INSTANTIATE_THEM(ZDomainWallVec5dImplD);
}} }}

View File

@ -0,0 +1,79 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/MobiusFermion.h
Copyright (C) 2015
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_QCD_ZMOBIUS_FERMION_H
#define GRID_QCD_ZMOBIUS_FERMION_H
#include <Grid/Grid.h>
namespace Grid {
namespace QCD {
template<class Impl>
class ZMobiusFermion : public CayleyFermion5D<Impl>
{
public:
INHERIT_IMPL_TYPES(Impl);
public:
virtual void Instantiatable(void) {};
// Constructors
ZMobiusFermion(GaugeField &_Umu,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
RealD _mass,RealD _M5,
std::vector<ComplexD> &gamma, RealD b,RealD c,const ImplParams &p= ImplParams()) :
CayleyFermion5D<Impl>(_Umu,
FiveDimGrid,
FiveDimRedBlackGrid,
FourDimGrid,
FourDimRedBlackGrid,_mass,_M5,p)
{
RealD eps = 1.0;
std::cout<<GridLogMessage << "ZMobiusFermion (b="<<b<<",c="<<c<<") with Ls= "<<this->Ls<<" gamma passed in"<<std::endl;
std::vector<Coeff_t> zgamma(this->Ls);
for(int s=0;s<this->Ls;s++){
zgamma[s] = gamma[s];
}
// Call base setter
this->SetCoefficientsInternal(1.0,zgamma,b,c);
}
};
}
}
#endif

View File

@ -44,6 +44,7 @@ struct scal {
}; };
typedef DomainWallFermion<DomainWallVec5dImplR> DomainWallVecFermionR; typedef DomainWallFermion<DomainWallVec5dImplR> DomainWallVecFermionR;
typedef ZMobiusFermion<ZDomainWallVec5dImplR> ZMobiusVecFermionR;
typedef MobiusFermion<DomainWallVec5dImplR> MobiusVecFermionR; typedef MobiusFermion<DomainWallVec5dImplR> MobiusVecFermionR;
typedef MobiusZolotarevFermion<DomainWallVec5dImplR> MobiusZolotarevVecFermionR; typedef MobiusZolotarevFermion<DomainWallVec5dImplR> MobiusZolotarevVecFermionR;
typedef ScaledShamirFermion<DomainWallVec5dImplR> ScaledShamirVecFermionR; typedef ScaledShamirFermion<DomainWallVec5dImplR> ScaledShamirVecFermionR;
@ -117,6 +118,17 @@ int main (int argc, char ** argv)
TestWhat<MobiusFermionR>(Dmob,FGrid,FrbGrid,UGrid,mass,M5,&RNG4,&RNG5); TestWhat<MobiusFermionR>(Dmob,FGrid,FrbGrid,UGrid,mass,M5,&RNG4,&RNG5);
TestWhat<MobiusVecFermionR>(sDmob,sFGrid,sFrbGrid,sUGrid,mass,M5,&sRNG4,&sRNG5); TestWhat<MobiusVecFermionR>(sDmob,sFGrid,sFrbGrid,sUGrid,mass,M5,&sRNG4,&sRNG5);
std::cout<<GridLogMessage<<"**************************************************************"<<std::endl;
std::cout<<GridLogMessage <<"Z-MobiusFermion test"<<std::endl;
std::cout<<GridLogMessage<<"**************************************************************"<<std::endl;
std::vector<ComplexD> gamma(Ls,std::complex<double>(1.0,0.0));
ZMobiusFermionR zDmob(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,gamma,b,c);
ZMobiusVecFermionR szDmob(Umu,*sFGrid,*sFrbGrid,*sUGrid,*sUrbGrid,mass,M5,gamma,b,c);
TestMoo(zDmob,szDmob);
TestWhat<ZMobiusFermionR>(zDmob,FGrid,FrbGrid,UGrid,mass,M5,&RNG4,&RNG5);
TestWhat<ZMobiusVecFermionR>(szDmob,sFGrid,sFrbGrid,sUGrid,mass,M5,&sRNG4,&sRNG5);
std::cout<<GridLogMessage<<"**************************************************************"<<std::endl; std::cout<<GridLogMessage<<"**************************************************************"<<std::endl;
std::cout<<GridLogMessage <<"MobiusZolotarevFermion test"<<std::endl; std::cout<<GridLogMessage <<"MobiusZolotarevFermion test"<<std::endl;
std::cout<<GridLogMessage<<"**************************************************************"<<std::endl; std::cout<<GridLogMessage<<"**************************************************************"<<std::endl;