mirror of
https://github.com/paboyle/Grid.git
synced 2025-04-04 11:15:55 +01:00
Reintroducing the hand unrolled loops
This commit is contained in:
parent
638d6675ee
commit
2f38ebc446
18
TODO
18
TODO
@ -19,11 +19,13 @@ Policies:
|
|||||||
* Support different boundary conditions (finite temp, chem. potential ... )
|
* Support different boundary conditions (finite temp, chem. potential ... )
|
||||||
* Support different fermion representations?
|
* Support different fermion representations?
|
||||||
- contained entirely within the integrator presently
|
- contained entirely within the integrator presently
|
||||||
- Sign of force term.
|
|
||||||
- Rename "Ta" as too unclear
|
- Sign of force term.
|
||||||
|
|
||||||
- Reversibility test.
|
- Reversibility test.
|
||||||
|
|
||||||
|
- Rename "Ta" as too unclear
|
||||||
|
|
||||||
- Lanczos
|
- Lanczos
|
||||||
|
|
||||||
- Rectangle gauge actions.
|
- Rectangle gauge actions.
|
||||||
@ -31,11 +33,13 @@ Policies:
|
|||||||
Symanzik,
|
Symanzik,
|
||||||
... etc...
|
... etc...
|
||||||
|
|
||||||
- Prepare multigrid for HMC.
|
- Prepare multigrid for HMC. - Alternate setup schemes.
|
||||||
- Alternate setup schemes.
|
|
||||||
* Support for ILDG --- ugly, not done
|
- Support for ILDG --- ugly, not done
|
||||||
* Flavour matrices?
|
|
||||||
* FFTnD ?
|
- Flavour matrices?
|
||||||
|
|
||||||
|
- FFTnD ?
|
||||||
|
|
||||||
================================================================
|
================================================================
|
||||||
* Hacks and bug fixes to clean up and Audits
|
* Hacks and bug fixes to clean up and Audits
|
||||||
|
106
configure
vendored
106
configure
vendored
@ -6359,105 +6359,15 @@ fi
|
|||||||
done
|
done
|
||||||
|
|
||||||
|
|
||||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __gmpf_init in -lgmp" >&5
|
#AC_CHECK_LIB([gmp],[__gmpf_init],,
|
||||||
$as_echo_n "checking for __gmpf_init in -lgmp... " >&6; }
|
# [AC_MSG_ERROR(GNU Multiple Precision GMP library was not found in your system.
|
||||||
if ${ac_cv_lib_gmp___gmpf_init+:} false; then :
|
#Please install or provide the correct path to your installation
|
||||||
$as_echo_n "(cached) " >&6
|
#Info at: http://www.gmplib.org)])
|
||||||
else
|
|
||||||
ac_check_lib_save_LIBS=$LIBS
|
|
||||||
LIBS="-lgmp $LIBS"
|
|
||||||
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
|
|
||||||
/* end confdefs.h. */
|
|
||||||
|
|
||||||
/* Override any GCC internal prototype to avoid an error.
|
|
||||||
Use char because int might match the return type of a GCC
|
|
||||||
builtin and then its argument prototype would still apply. */
|
|
||||||
#ifdef __cplusplus
|
|
||||||
extern "C"
|
|
||||||
#endif
|
|
||||||
char __gmpf_init ();
|
|
||||||
int
|
|
||||||
main ()
|
|
||||||
{
|
|
||||||
return __gmpf_init ();
|
|
||||||
;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
_ACEOF
|
|
||||||
if ac_fn_cxx_try_link "$LINENO"; then :
|
|
||||||
ac_cv_lib_gmp___gmpf_init=yes
|
|
||||||
else
|
|
||||||
ac_cv_lib_gmp___gmpf_init=no
|
|
||||||
fi
|
|
||||||
rm -f core conftest.err conftest.$ac_objext \
|
|
||||||
conftest$ac_exeext conftest.$ac_ext
|
|
||||||
LIBS=$ac_check_lib_save_LIBS
|
|
||||||
fi
|
|
||||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_gmp___gmpf_init" >&5
|
|
||||||
$as_echo "$ac_cv_lib_gmp___gmpf_init" >&6; }
|
|
||||||
if test "x$ac_cv_lib_gmp___gmpf_init" = xyes; then :
|
|
||||||
cat >>confdefs.h <<_ACEOF
|
|
||||||
#define HAVE_LIBGMP 1
|
|
||||||
_ACEOF
|
|
||||||
|
|
||||||
LIBS="-lgmp $LIBS"
|
|
||||||
|
|
||||||
else
|
|
||||||
as_fn_error $? "GNU Multiple Precision GMP library was not found in your system.
|
|
||||||
Please install or provide the correct path to your installation
|
|
||||||
Info at: http://www.gmplib.org" "$LINENO" 5
|
|
||||||
fi
|
|
||||||
|
|
||||||
|
|
||||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for mpfr_init in -lmpfr" >&5
|
|
||||||
$as_echo_n "checking for mpfr_init in -lmpfr... " >&6; }
|
|
||||||
if ${ac_cv_lib_mpfr_mpfr_init+:} false; then :
|
|
||||||
$as_echo_n "(cached) " >&6
|
|
||||||
else
|
|
||||||
ac_check_lib_save_LIBS=$LIBS
|
|
||||||
LIBS="-lmpfr $LIBS"
|
|
||||||
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
|
|
||||||
/* end confdefs.h. */
|
|
||||||
|
|
||||||
/* Override any GCC internal prototype to avoid an error.
|
|
||||||
Use char because int might match the return type of a GCC
|
|
||||||
builtin and then its argument prototype would still apply. */
|
|
||||||
#ifdef __cplusplus
|
|
||||||
extern "C"
|
|
||||||
#endif
|
|
||||||
char mpfr_init ();
|
|
||||||
int
|
|
||||||
main ()
|
|
||||||
{
|
|
||||||
return mpfr_init ();
|
|
||||||
;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
_ACEOF
|
|
||||||
if ac_fn_cxx_try_link "$LINENO"; then :
|
|
||||||
ac_cv_lib_mpfr_mpfr_init=yes
|
|
||||||
else
|
|
||||||
ac_cv_lib_mpfr_mpfr_init=no
|
|
||||||
fi
|
|
||||||
rm -f core conftest.err conftest.$ac_objext \
|
|
||||||
conftest$ac_exeext conftest.$ac_ext
|
|
||||||
LIBS=$ac_check_lib_save_LIBS
|
|
||||||
fi
|
|
||||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_mpfr_mpfr_init" >&5
|
|
||||||
$as_echo "$ac_cv_lib_mpfr_mpfr_init" >&6; }
|
|
||||||
if test "x$ac_cv_lib_mpfr_mpfr_init" = xyes; then :
|
|
||||||
cat >>confdefs.h <<_ACEOF
|
|
||||||
#define HAVE_LIBMPFR 1
|
|
||||||
_ACEOF
|
|
||||||
|
|
||||||
LIBS="-lmpfr $LIBS"
|
|
||||||
|
|
||||||
else
|
|
||||||
as_fn_error $? "GNU Multiple Precision MPFR library was not found in your system.
|
|
||||||
Please install or provide the correct path to your installation
|
|
||||||
Info at: http://www.mpfr.org/" "$LINENO" 5
|
|
||||||
fi
|
|
||||||
|
|
||||||
|
#AC_CHECK_LIB([mpfr],[mpfr_init],,
|
||||||
|
# [AC_MSG_ERROR(GNU Multiple Precision MPFR library was not found in your system.
|
||||||
|
#Please install or provide the correct path to your installation
|
||||||
|
#Info at: http://www.mpfr.org/)])
|
||||||
|
|
||||||
# Check whether --enable-simd was given.
|
# Check whether --enable-simd was given.
|
||||||
if test "${enable_simd+set}" = set; then :
|
if test "${enable_simd+set}" = set; then :
|
||||||
|
16
configure.ac
16
configure.ac
@ -55,15 +55,15 @@ echo :::::::::::::::::::::::::::::::::::::::::::
|
|||||||
|
|
||||||
AC_CHECK_FUNCS([gettimeofday])
|
AC_CHECK_FUNCS([gettimeofday])
|
||||||
|
|
||||||
AC_CHECK_LIB([gmp],[__gmpf_init],,
|
#AC_CHECK_LIB([gmp],[__gmpf_init],,
|
||||||
[AC_MSG_ERROR(GNU Multiple Precision GMP library was not found in your system.
|
# [AC_MSG_ERROR(GNU Multiple Precision GMP library was not found in your system.
|
||||||
Please install or provide the correct path to your installation
|
#Please install or provide the correct path to your installation
|
||||||
Info at: http://www.gmplib.org)])
|
#Info at: http://www.gmplib.org)])
|
||||||
|
|
||||||
AC_CHECK_LIB([mpfr],[mpfr_init],,
|
#AC_CHECK_LIB([mpfr],[mpfr_init],,
|
||||||
[AC_MSG_ERROR(GNU Multiple Precision MPFR library was not found in your system.
|
# [AC_MSG_ERROR(GNU Multiple Precision MPFR library was not found in your system.
|
||||||
Please install or provide the correct path to your installation
|
#Please install or provide the correct path to your installation
|
||||||
Info at: http://www.mpfr.org/)])
|
#Info at: http://www.mpfr.org/)])
|
||||||
|
|
||||||
AC_ARG_ENABLE([simd],[AC_HELP_STRING([--enable-simd=SSE4|AVX|AVX2|AVX512|MIC],\
|
AC_ARG_ENABLE([simd],[AC_HELP_STRING([--enable-simd=SSE4|AVX|AVX2|AVX512|MIC],\
|
||||||
[Select instructions to be SSE4.0, AVX 1.0, AVX 2.0+FMA, AVX 512, MIC])],\
|
[Select instructions to be SSE4.0, AVX 1.0, AVX 2.0+FMA, AVX 512, MIC])],\
|
||||||
|
@ -56,12 +56,6 @@
|
|||||||
/* Define to 1 if you have the <inttypes.h> header file. */
|
/* Define to 1 if you have the <inttypes.h> header file. */
|
||||||
#undef HAVE_INTTYPES_H
|
#undef HAVE_INTTYPES_H
|
||||||
|
|
||||||
/* Define to 1 if you have the `gmp' library (-lgmp). */
|
|
||||||
#undef HAVE_LIBGMP
|
|
||||||
|
|
||||||
/* Define to 1 if you have the `mpfr' library (-lmpfr). */
|
|
||||||
#undef HAVE_LIBMPFR
|
|
||||||
|
|
||||||
/* Define to 1 if you have the <malloc.h> header file. */
|
/* Define to 1 if you have the <malloc.h> header file. */
|
||||||
#undef HAVE_MALLOC_H
|
#undef HAVE_MALLOC_H
|
||||||
|
|
||||||
|
@ -17,7 +17,8 @@
|
|||||||
|
|
||||||
#include <stddef.h>
|
#include <stddef.h>
|
||||||
|
|
||||||
#include <algorithms/approx/bigfloat.h>
|
//#include <algorithms/approx/bigfloat.h>
|
||||||
|
#include <algorithms/approx/bigfloat_double.h>
|
||||||
|
|
||||||
#define JMAX 10000 //Maximum number of iterations of Newton's approximation
|
#define JMAX 10000 //Maximum number of iterations of Newton's approximation
|
||||||
#define SUM_MAX 10 // Maximum number of terms in exponential
|
#define SUM_MAX 10 // Maximum number of terms in exponential
|
||||||
|
@ -54,10 +54,10 @@ typedef WilsonGaugeAction<LatticeGaugeFieldD> WilsonGaugeActionD;
|
|||||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
#define FermOpTemplateInstantiate(A) \
|
#define FermOpTemplateInstantiate(A) \
|
||||||
template class A<GparityWilsonImplF>; \
|
|
||||||
template class A<GparityWilsonImplD>; \
|
|
||||||
template class A<WilsonImplF>; \
|
template class A<WilsonImplF>; \
|
||||||
template class A<WilsonImplD>;
|
template class A<WilsonImplD>;
|
||||||
|
// template class A<GparityWilsonImplF>; \
|
||||||
|
// template class A<GparityWilsonImplD>;
|
||||||
|
|
||||||
////////////////////////////////////////////
|
////////////////////////////////////////////
|
||||||
// Fermion operators / actions
|
// Fermion operators / actions
|
||||||
@ -79,8 +79,8 @@ typedef WilsonGaugeAction<LatticeGaugeFieldD> WilsonGaugeActionD;
|
|||||||
#include <qcd/action/fermion/OverlapWilsonCayleyZolotarevFermion.h>
|
#include <qcd/action/fermion/OverlapWilsonCayleyZolotarevFermion.h>
|
||||||
|
|
||||||
#include <qcd/action/fermion/ContinuedFractionFermion5D.h> // Continued fraction
|
#include <qcd/action/fermion/ContinuedFractionFermion5D.h> // Continued fraction
|
||||||
#include <qcd/action/fermion/OverlapWilsonContFracTanhFermion.h>
|
#include <qcd/action/fermion/OverlapWilsonContfracTanhFermion.h>
|
||||||
#include <qcd/action/fermion/OverlapWilsonContFracZolotarevFermion.h>
|
#include <qcd/action/fermion/OverlapWilsonContfracZolotarevFermion.h>
|
||||||
|
|
||||||
#include <qcd/action/fermion/PartialFractionFermion5D.h> // Partial fraction
|
#include <qcd/action/fermion/PartialFractionFermion5D.h> // Partial fraction
|
||||||
#include <qcd/action/fermion/OverlapWilsonPartialFractionTanhFermion.h>
|
#include <qcd/action/fermion/OverlapWilsonPartialFractionTanhFermion.h>
|
||||||
|
@ -28,18 +28,31 @@ namespace Grid {
|
|||||||
void DiracOptDhopDir(CartesianStencil &st,DoubledGaugeField &U,
|
void DiracOptDhopDir(CartesianStencil &st,DoubledGaugeField &U,
|
||||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
||||||
int sF,int sU,const FermionField &in, FermionField &out,int dirdisp,int gamma);
|
int sF,int sU,const FermionField &in, FermionField &out,int dirdisp,int gamma);
|
||||||
|
#define HANDOPT
|
||||||
|
#ifdef HANDOPT
|
||||||
|
void DiracOptHandDhopSite(CartesianStencil &st,DoubledGaugeField &U,
|
||||||
|
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
||||||
|
int sF,int sU,const FermionField &in, FermionField &out);
|
||||||
|
|
||||||
|
void DiracOptHandDhopSiteDag(CartesianStencil &st,DoubledGaugeField &U,
|
||||||
|
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
||||||
|
int sF,int sU,const FermionField &in, FermionField &out);
|
||||||
|
#else
|
||||||
|
|
||||||
void DiracOptHandDhopSite(CartesianStencil &st,DoubledGaugeField &U,
|
void DiracOptHandDhopSite(CartesianStencil &st,DoubledGaugeField &U,
|
||||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
||||||
int sF,int sU,const FermionField &in, FermionField &out){
|
int sF,int sU,const FermionField &in, FermionField &out)
|
||||||
|
{
|
||||||
DiracOptDhopSite(st,U,buf,sF,sU,in,out); // will template override for Wilson Nc=3
|
DiracOptDhopSite(st,U,buf,sF,sU,in,out); // will template override for Wilson Nc=3
|
||||||
}
|
}
|
||||||
|
|
||||||
void DiracOptHandDhopSiteDag(CartesianStencil &st,DoubledGaugeField &U,
|
void DiracOptHandDhopSiteDag(CartesianStencil &st,DoubledGaugeField &U,
|
||||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
||||||
int sF,int sU,const FermionField &in, FermionField &out){
|
int sF,int sU,const FermionField &in, FermionField &out)
|
||||||
|
{
|
||||||
DiracOptDhopSiteDag(st,U,buf,sF,sU,in,out); // will template override for Wilson Nc=3
|
DiracOptDhopSiteDag(st,U,buf,sF,sU,in,out); // will template override for Wilson Nc=3
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
WilsonKernels(const ImplParams &p= ImplParams()) : Base(p) {};
|
WilsonKernels(const ImplParams &p= ImplParams()) : Base(p) {};
|
||||||
|
|
||||||
|
@ -3,7 +3,7 @@
|
|||||||
#define REGISTER
|
#define REGISTER
|
||||||
|
|
||||||
#define LOAD_CHIMU \
|
#define LOAD_CHIMU \
|
||||||
const vSpinColourVector & ref (in._odata[offset]); \
|
const SiteSpinor & ref (in._odata[offset]); \
|
||||||
Chimu_00=ref()(0)(0);\
|
Chimu_00=ref()(0)(0);\
|
||||||
Chimu_01=ref()(0)(1);\
|
Chimu_01=ref()(0)(1);\
|
||||||
Chimu_02=ref()(0)(2);\
|
Chimu_02=ref()(0)(2);\
|
||||||
@ -18,7 +18,7 @@
|
|||||||
Chimu_32=ref()(3)(2);
|
Chimu_32=ref()(3)(2);
|
||||||
|
|
||||||
#define LOAD_CHI\
|
#define LOAD_CHI\
|
||||||
const vHalfSpinColourVector &ref(buf[offset]); \
|
const SiteHalfSpinor &ref(buf[offset]); \
|
||||||
Chi_00 = ref()(0)(0);\
|
Chi_00 = ref()(0)(0);\
|
||||||
Chi_01 = ref()(0)(1);\
|
Chi_01 = ref()(0)(1);\
|
||||||
Chi_02 = ref()(0)(2);\
|
Chi_02 = ref()(0)(2);\
|
||||||
@ -280,11 +280,11 @@
|
|||||||
namespace Grid {
|
namespace Grid {
|
||||||
namespace QCD {
|
namespace QCD {
|
||||||
|
|
||||||
#if 0
|
#ifdef HANDOPT
|
||||||
template<class Simd>
|
template<class Impl>
|
||||||
void WilsonKernels<WilsonImpl<Simd,3> >::DiracOptHandDhopSite(CartesianStencil &st,DoubledGaugeField &U,
|
void WilsonKernels<Impl >::DiracOptHandDhopSite(CartesianStencil &st,DoubledGaugeField &U,
|
||||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
||||||
int sF,int sU,const FermionField &in, FermionField &out)
|
int ss,int sU,const FermionField &in, FermionField &out)
|
||||||
{
|
{
|
||||||
REGISTER Simd result_00; // 12 regs on knc
|
REGISTER Simd result_00; // 12 regs on knc
|
||||||
REGISTER Simd result_01;
|
REGISTER Simd result_01;
|
||||||
@ -339,14 +339,14 @@ void WilsonKernels<WilsonImpl<Simd,3> >::DiracOptHandDhopSite(CartesianStencil &
|
|||||||
#define Chimu_32 UChi_12
|
#define Chimu_32 UChi_12
|
||||||
|
|
||||||
|
|
||||||
|
StencilEntry *SE;
|
||||||
int offset,local,perm, ptype;
|
int offset,local,perm, ptype;
|
||||||
int ss=sF;
|
|
||||||
|
|
||||||
// Xp
|
// Xp
|
||||||
offset = st._offsets [Xp][ss];
|
SE=st.GetEntry(ptype,Xp,ss);
|
||||||
local = st._is_local[Xp][ss];
|
offset = SE->_offset;
|
||||||
perm = st._permute[Xp][ss];
|
local = SE->_is_local;
|
||||||
ptype = st._permute_type[Xp];
|
perm = SE->_permute;
|
||||||
|
|
||||||
if ( local ) {
|
if ( local ) {
|
||||||
LOAD_CHIMU;
|
LOAD_CHIMU;
|
||||||
@ -364,10 +364,10 @@ void WilsonKernels<WilsonImpl<Simd,3> >::DiracOptHandDhopSite(CartesianStencil &
|
|||||||
XP_RECON;
|
XP_RECON;
|
||||||
|
|
||||||
// Yp
|
// Yp
|
||||||
offset = st._offsets [Yp][ss];
|
SE=st.GetEntry(ptype,Yp,ss);
|
||||||
local = st._is_local[Yp][ss];
|
offset = SE->_offset;
|
||||||
perm = st._permute[Yp][ss];
|
local = SE->_is_local;
|
||||||
ptype = st._permute_type[Yp];
|
perm = SE->_permute;
|
||||||
|
|
||||||
if ( local ) {
|
if ( local ) {
|
||||||
LOAD_CHIMU;
|
LOAD_CHIMU;
|
||||||
@ -385,10 +385,10 @@ void WilsonKernels<WilsonImpl<Simd,3> >::DiracOptHandDhopSite(CartesianStencil &
|
|||||||
|
|
||||||
|
|
||||||
// Zp
|
// Zp
|
||||||
offset = st._offsets [Zp][ss];
|
SE=st.GetEntry(ptype,Zp,ss);
|
||||||
local = st._is_local[Zp][ss];
|
offset = SE->_offset;
|
||||||
perm = st._permute[Zp][ss];
|
local = SE->_is_local;
|
||||||
ptype = st._permute_type[Zp];
|
perm = SE->_permute;
|
||||||
|
|
||||||
if ( local ) {
|
if ( local ) {
|
||||||
LOAD_CHIMU;
|
LOAD_CHIMU;
|
||||||
@ -405,10 +405,10 @@ void WilsonKernels<WilsonImpl<Simd,3> >::DiracOptHandDhopSite(CartesianStencil &
|
|||||||
ZP_RECON_ACCUM;
|
ZP_RECON_ACCUM;
|
||||||
|
|
||||||
// Tp
|
// Tp
|
||||||
offset = st._offsets [Tp][ss];
|
SE=st.GetEntry(ptype,Tp,ss);
|
||||||
local = st._is_local[Tp][ss];
|
offset = SE->_offset;
|
||||||
perm = st._permute[Tp][ss];
|
local = SE->_is_local;
|
||||||
ptype = st._permute_type[Tp];
|
perm = SE->_permute;
|
||||||
|
|
||||||
if ( local ) {
|
if ( local ) {
|
||||||
LOAD_CHIMU;
|
LOAD_CHIMU;
|
||||||
@ -425,10 +425,10 @@ void WilsonKernels<WilsonImpl<Simd,3> >::DiracOptHandDhopSite(CartesianStencil &
|
|||||||
TP_RECON_ACCUM;
|
TP_RECON_ACCUM;
|
||||||
|
|
||||||
// Xm
|
// Xm
|
||||||
offset = st._offsets [Xm][ss];
|
SE=st.GetEntry(ptype,Xm,ss);
|
||||||
local = st._is_local[Xm][ss];
|
offset = SE->_offset;
|
||||||
perm = st._permute[Xm][ss];
|
local = SE->_is_local;
|
||||||
ptype = st._permute_type[Xm];
|
perm = SE->_permute;
|
||||||
|
|
||||||
if ( local ) {
|
if ( local ) {
|
||||||
LOAD_CHIMU;
|
LOAD_CHIMU;
|
||||||
@ -445,10 +445,10 @@ void WilsonKernels<WilsonImpl<Simd,3> >::DiracOptHandDhopSite(CartesianStencil &
|
|||||||
XM_RECON_ACCUM;
|
XM_RECON_ACCUM;
|
||||||
|
|
||||||
// Ym
|
// Ym
|
||||||
offset = st._offsets [Ym][ss];
|
SE=st.GetEntry(ptype,Ym,ss);
|
||||||
local = st._is_local[Ym][ss];
|
offset = SE->_offset;
|
||||||
perm = st._permute[Ym][ss];
|
local = SE->_is_local;
|
||||||
ptype = st._permute_type[Ym];
|
perm = SE->_permute;
|
||||||
|
|
||||||
if ( local ) {
|
if ( local ) {
|
||||||
LOAD_CHIMU;
|
LOAD_CHIMU;
|
||||||
@ -465,10 +465,10 @@ void WilsonKernels<WilsonImpl<Simd,3> >::DiracOptHandDhopSite(CartesianStencil &
|
|||||||
YM_RECON_ACCUM;
|
YM_RECON_ACCUM;
|
||||||
|
|
||||||
// Zm
|
// Zm
|
||||||
offset = st._offsets [Zm][ss];
|
SE=st.GetEntry(ptype,Zm,ss);
|
||||||
local = st._is_local[Zm][ss];
|
offset = SE->_offset;
|
||||||
perm = st._permute[Zm][ss];
|
local = SE->_is_local;
|
||||||
ptype = st._permute_type[Zm];
|
perm = SE->_permute;
|
||||||
|
|
||||||
if ( local ) {
|
if ( local ) {
|
||||||
LOAD_CHIMU;
|
LOAD_CHIMU;
|
||||||
@ -485,10 +485,10 @@ void WilsonKernels<WilsonImpl<Simd,3> >::DiracOptHandDhopSite(CartesianStencil &
|
|||||||
ZM_RECON_ACCUM;
|
ZM_RECON_ACCUM;
|
||||||
|
|
||||||
// Tm
|
// Tm
|
||||||
offset = st._offsets [Tm][ss];
|
SE=st.GetEntry(ptype,Tm,ss);
|
||||||
local = st._is_local[Tm][ss];
|
offset = SE->_offset;
|
||||||
perm = st._permute[Tm][ss];
|
local = SE->_is_local;
|
||||||
ptype = st._permute_type[Tm];
|
perm = SE->_permute;
|
||||||
|
|
||||||
if ( local ) {
|
if ( local ) {
|
||||||
LOAD_CHIMU;
|
LOAD_CHIMU;
|
||||||
@ -505,7 +505,7 @@ void WilsonKernels<WilsonImpl<Simd,3> >::DiracOptHandDhopSite(CartesianStencil &
|
|||||||
TM_RECON_ACCUM;
|
TM_RECON_ACCUM;
|
||||||
|
|
||||||
{
|
{
|
||||||
vSpinColourVector & ref (out._odata[ss]);
|
SiteSpinor & ref (out._odata[ss]);
|
||||||
vstream(ref()(0)(0),result_00*(-0.5));
|
vstream(ref()(0)(0),result_00*(-0.5));
|
||||||
vstream(ref()(0)(1),result_01*(-0.5));
|
vstream(ref()(0)(1),result_01*(-0.5));
|
||||||
vstream(ref()(0)(2),result_02*(-0.5));
|
vstream(ref()(0)(2),result_02*(-0.5));
|
||||||
@ -521,10 +521,10 @@ void WilsonKernels<WilsonImpl<Simd,3> >::DiracOptHandDhopSite(CartesianStencil &
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class Simd>
|
template<class Impl>
|
||||||
void WilsonKernels<WilsonImpl<Simd,3> >::DiracOptHandDhopSiteDag(CartesianStencil &st,DoubledGaugeField &U,
|
void WilsonKernels<Impl >::DiracOptHandDhopSiteDag(CartesianStencil &st,DoubledGaugeField &U,
|
||||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
||||||
int ss,int sU,const FermionField &in, FermionField &out)
|
int ss,int sU,const FermionField &in, FermionField &out)
|
||||||
{
|
{
|
||||||
REGISTER Simd result_00; // 12 regs on knc
|
REGISTER Simd result_00; // 12 regs on knc
|
||||||
REGISTER Simd result_01;
|
REGISTER Simd result_01;
|
||||||
@ -580,12 +580,13 @@ void WilsonKernels<WilsonImpl<Simd,3> >::DiracOptHandDhopSiteDag(CartesianStenci
|
|||||||
|
|
||||||
|
|
||||||
int offset,local,perm, ptype;
|
int offset,local,perm, ptype;
|
||||||
|
StencilEntry *SE;
|
||||||
|
|
||||||
// Xp
|
// Xp
|
||||||
offset = st._offsets [Xp][ss];
|
SE=st.GetEntry(ptype,Xp,ss);
|
||||||
local = st._is_local[Xp][ss];
|
offset = SE->_offset;
|
||||||
perm = st._permute[Xp][ss];
|
local = SE->_is_local;
|
||||||
ptype = st._permute_type[Xp];
|
perm = SE->_permute;
|
||||||
|
|
||||||
if ( local ) {
|
if ( local ) {
|
||||||
LOAD_CHIMU;
|
LOAD_CHIMU;
|
||||||
@ -602,10 +603,10 @@ void WilsonKernels<WilsonImpl<Simd,3> >::DiracOptHandDhopSiteDag(CartesianStenci
|
|||||||
XM_RECON;
|
XM_RECON;
|
||||||
|
|
||||||
// Yp
|
// Yp
|
||||||
offset = st._offsets [Yp][ss];
|
SE=st.GetEntry(ptype,Yp,ss);
|
||||||
local = st._is_local[Yp][ss];
|
offset = SE->_offset;
|
||||||
perm = st._permute[Yp][ss];
|
local = SE->_is_local;
|
||||||
ptype = st._permute_type[Yp];
|
perm = SE->_permute;
|
||||||
|
|
||||||
if ( local ) {
|
if ( local ) {
|
||||||
LOAD_CHIMU;
|
LOAD_CHIMU;
|
||||||
@ -623,10 +624,10 @@ void WilsonKernels<WilsonImpl<Simd,3> >::DiracOptHandDhopSiteDag(CartesianStenci
|
|||||||
|
|
||||||
|
|
||||||
// Zp
|
// Zp
|
||||||
offset = st._offsets [Zp][ss];
|
SE=st.GetEntry(ptype,Zp,ss);
|
||||||
local = st._is_local[Zp][ss];
|
offset = SE->_offset;
|
||||||
perm = st._permute[Zp][ss];
|
local = SE->_is_local;
|
||||||
ptype = st._permute_type[Zp];
|
perm = SE->_permute;
|
||||||
|
|
||||||
if ( local ) {
|
if ( local ) {
|
||||||
LOAD_CHIMU;
|
LOAD_CHIMU;
|
||||||
@ -643,10 +644,10 @@ void WilsonKernels<WilsonImpl<Simd,3> >::DiracOptHandDhopSiteDag(CartesianStenci
|
|||||||
ZM_RECON_ACCUM;
|
ZM_RECON_ACCUM;
|
||||||
|
|
||||||
// Tp
|
// Tp
|
||||||
offset = st._offsets [Tp][ss];
|
SE=st.GetEntry(ptype,Tp,ss);
|
||||||
local = st._is_local[Tp][ss];
|
offset = SE->_offset;
|
||||||
perm = st._permute[Tp][ss];
|
local = SE->_is_local;
|
||||||
ptype = st._permute_type[Tp];
|
perm = SE->_permute;
|
||||||
|
|
||||||
if ( local ) {
|
if ( local ) {
|
||||||
LOAD_CHIMU;
|
LOAD_CHIMU;
|
||||||
@ -663,10 +664,10 @@ void WilsonKernels<WilsonImpl<Simd,3> >::DiracOptHandDhopSiteDag(CartesianStenci
|
|||||||
TM_RECON_ACCUM;
|
TM_RECON_ACCUM;
|
||||||
|
|
||||||
// Xm
|
// Xm
|
||||||
offset = st._offsets [Xm][ss];
|
SE=st.GetEntry(ptype,Xm,ss);
|
||||||
local = st._is_local[Xm][ss];
|
offset = SE->_offset;
|
||||||
perm = st._permute[Xm][ss];
|
local = SE->_is_local;
|
||||||
ptype = st._permute_type[Xm];
|
perm = SE->_permute;
|
||||||
|
|
||||||
if ( local ) {
|
if ( local ) {
|
||||||
LOAD_CHIMU;
|
LOAD_CHIMU;
|
||||||
@ -684,10 +685,10 @@ void WilsonKernels<WilsonImpl<Simd,3> >::DiracOptHandDhopSiteDag(CartesianStenci
|
|||||||
|
|
||||||
|
|
||||||
// Ym
|
// Ym
|
||||||
offset = st._offsets [Ym][ss];
|
SE=st.GetEntry(ptype,Ym,ss);
|
||||||
local = st._is_local[Ym][ss];
|
offset = SE->_offset;
|
||||||
perm = st._permute[Ym][ss];
|
local = SE->_is_local;
|
||||||
ptype = st._permute_type[Ym];
|
perm = SE->_permute;
|
||||||
|
|
||||||
if ( local ) {
|
if ( local ) {
|
||||||
LOAD_CHIMU;
|
LOAD_CHIMU;
|
||||||
@ -704,11 +705,11 @@ void WilsonKernels<WilsonImpl<Simd,3> >::DiracOptHandDhopSiteDag(CartesianStenci
|
|||||||
YP_RECON_ACCUM;
|
YP_RECON_ACCUM;
|
||||||
|
|
||||||
// Zm
|
// Zm
|
||||||
offset = st._offsets [Zm][ss];
|
SE=st.GetEntry(ptype,Zm,ss);
|
||||||
local = st._is_local[Zm][ss];
|
offset = SE->_offset;
|
||||||
perm = st._permute[Zm][ss];
|
local = SE->_is_local;
|
||||||
ptype = st._permute_type[Zm];
|
perm = SE->_permute;
|
||||||
|
|
||||||
if ( local ) {
|
if ( local ) {
|
||||||
LOAD_CHIMU;
|
LOAD_CHIMU;
|
||||||
ZP_PROJ;
|
ZP_PROJ;
|
||||||
@ -724,11 +725,11 @@ void WilsonKernels<WilsonImpl<Simd,3> >::DiracOptHandDhopSiteDag(CartesianStenci
|
|||||||
ZP_RECON_ACCUM;
|
ZP_RECON_ACCUM;
|
||||||
|
|
||||||
// Tm
|
// Tm
|
||||||
offset = st._offsets [Tm][ss];
|
SE=st.GetEntry(ptype,Tm,ss);
|
||||||
local = st._is_local[Tm][ss];
|
offset = SE->_offset;
|
||||||
perm = st._permute[Tm][ss];
|
local = SE->_is_local;
|
||||||
ptype = st._permute_type[Tm];
|
perm = SE->_permute;
|
||||||
|
|
||||||
if ( local ) {
|
if ( local ) {
|
||||||
LOAD_CHIMU;
|
LOAD_CHIMU;
|
||||||
TP_PROJ;
|
TP_PROJ;
|
||||||
@ -759,5 +760,6 @@ void WilsonKernels<WilsonImpl<Simd,3> >::DiracOptHandDhopSiteDag(CartesianStenci
|
|||||||
vstream(ref()(3)(2),result_32*(-0.5));
|
vstream(ref()(3)(2),result_32*(-0.5));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
FermOpTemplateInstantiate(WilsonKernels);
|
||||||
#endif
|
#endif
|
||||||
}}
|
}}
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
|
|
||||||
bin_PROGRAMS = Test_GaugeAction Test_cayley_cg Test_cayley_coarsen_support Test_cayley_even_odd Test_cayley_ldop_cr Test_cf_coarsen_support Test_cf_cr_unprec Test_cheby Test_contfrac_cg Test_contfrac_even_odd Test_contfrac_force Test_cshift Test_cshift_red_black Test_dwf_cg_prec Test_dwf_cg_schur Test_dwf_cg_unprec Test_dwf_cr_unprec Test_dwf_even_odd Test_dwf_force Test_dwf_fpgcr Test_dwf_hdcr Test_gamma Test_gparity Test_gpwilson_even_odd Test_hmc_EODWFRatio Test_hmc_EOWilsonFermionGauge Test_hmc_EOWilsonRatio Test_hmc_WilsonFermionGauge Test_hmc_WilsonGauge Test_hmc_WilsonRatio Test_lie_generators Test_main Test_multishift_sqrt Test_nersc_io Test_partfrac_force Test_quenched_update Test_remez Test_rhmc_EOWilson1p1 Test_rhmc_EOWilsonRatio Test_rhmc_Wilson1p1 Test_rhmc_WilsonRatio Test_rng Test_rng_fixed Test_serialisation Test_simd Test_stencil Test_wilson_cg_prec Test_wilson_cg_schur Test_wilson_cg_unprec Test_wilson_cr_unprec Test_wilson_even_odd Test_wilson_force Test_wilson_force_phiMdagMphi Test_wilson_force_phiMphi
|
bin_PROGRAMS = Test_GaugeAction Test_cayley_cg Test_cayley_coarsen_support Test_cayley_even_odd Test_cayley_ldop_cr Test_cf_coarsen_support Test_cf_cr_unprec Test_cheby Test_contfrac_cg Test_contfrac_even_odd Test_contfrac_force Test_cshift Test_cshift_red_black Test_dwf_cg_prec Test_dwf_cg_schur Test_dwf_cg_unprec Test_dwf_cr_unprec Test_dwf_even_odd Test_dwf_force Test_dwf_fpgcr Test_dwf_hdcr Test_gamma Test_hmc_EODWFRatio Test_hmc_EOWilsonFermionGauge Test_hmc_EOWilsonRatio Test_hmc_WilsonFermionGauge Test_hmc_WilsonGauge Test_hmc_WilsonRatio Test_lie_generators Test_main Test_multishift_sqrt Test_nersc_io Test_partfrac_force Test_quenched_update Test_remez Test_rhmc_EOWilson1p1 Test_rhmc_EOWilsonRatio Test_rhmc_Wilson1p1 Test_rhmc_WilsonRatio Test_rng Test_rng_fixed Test_serialisation Test_simd Test_stencil Test_wilson_cg_prec Test_wilson_cg_schur Test_wilson_cg_unprec Test_wilson_cr_unprec Test_wilson_even_odd Test_wilson_force Test_wilson_force_phiMdagMphi Test_wilson_force_phiMphi
|
||||||
|
|
||||||
|
|
||||||
Test_GaugeAction_SOURCES=Test_GaugeAction.cc
|
Test_GaugeAction_SOURCES=Test_GaugeAction.cc
|
||||||
@ -90,12 +90,12 @@ Test_gamma_SOURCES=Test_gamma.cc
|
|||||||
Test_gamma_LDADD=-lGrid
|
Test_gamma_LDADD=-lGrid
|
||||||
|
|
||||||
|
|
||||||
Test_gparity_SOURCES=Test_gparity.cc
|
#Test_gparity_SOURCES=Test_gparity.cc
|
||||||
Test_gparity_LDADD=-lGrid
|
#Test_gparity_LDADD=-lGrid
|
||||||
|
|
||||||
|
|
||||||
Test_gpwilson_even_odd_SOURCES=Test_gpwilson_even_odd.cc
|
#Test_gpwilson_even_odd_SOURCES=Test_gpwilson_even_odd.cc
|
||||||
Test_gpwilson_even_odd_LDADD=-lGrid
|
#Test_gpwilson_even_odd_LDADD=-lGrid
|
||||||
|
|
||||||
|
|
||||||
Test_hmc_EODWFRatio_SOURCES=Test_hmc_EODWFRatio.cc
|
Test_hmc_EODWFRatio_SOURCES=Test_hmc_EODWFRatio.cc
|
||||||
|
Loading…
x
Reference in New Issue
Block a user