1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-06-13 04:37:05 +01:00

Reintroducing the hand unrolled loops

This commit is contained in:
Peter Boyle
2015-09-08 17:45:30 +01:00
parent 638d6675ee
commit 2f38ebc446
9 changed files with 134 additions and 210 deletions

View File

@ -56,12 +56,6 @@
/* Define to 1 if you have the <inttypes.h> header file. */
#undef HAVE_INTTYPES_H
/* Define to 1 if you have the `gmp' library (-lgmp). */
#undef HAVE_LIBGMP
/* Define to 1 if you have the `mpfr' library (-lmpfr). */
#undef HAVE_LIBMPFR
/* Define to 1 if you have the <malloc.h> header file. */
#undef HAVE_MALLOC_H

View File

@ -17,7 +17,8 @@
#include <stddef.h>
#include <algorithms/approx/bigfloat.h>
//#include <algorithms/approx/bigfloat.h>
#include <algorithms/approx/bigfloat_double.h>
#define JMAX 10000 //Maximum number of iterations of Newton's approximation
#define SUM_MAX 10 // Maximum number of terms in exponential

View File

@ -54,10 +54,10 @@ typedef WilsonGaugeAction<LatticeGaugeFieldD> WilsonGaugeActionD;
////////////////////////////////////////////////////////////////////////////////////////////////////
#define FermOpTemplateInstantiate(A) \
template class A<GparityWilsonImplF>; \
template class A<GparityWilsonImplD>; \
template class A<WilsonImplF>; \
template class A<WilsonImplD>;
template class A<WilsonImplD>;
// template class A<GparityWilsonImplF>; \
// template class A<GparityWilsonImplD>;
////////////////////////////////////////////
// Fermion operators / actions
@ -79,8 +79,8 @@ typedef WilsonGaugeAction<LatticeGaugeFieldD> WilsonGaugeActionD;
#include <qcd/action/fermion/OverlapWilsonCayleyZolotarevFermion.h>
#include <qcd/action/fermion/ContinuedFractionFermion5D.h> // Continued fraction
#include <qcd/action/fermion/OverlapWilsonContFracTanhFermion.h>
#include <qcd/action/fermion/OverlapWilsonContFracZolotarevFermion.h>
#include <qcd/action/fermion/OverlapWilsonContfracTanhFermion.h>
#include <qcd/action/fermion/OverlapWilsonContfracZolotarevFermion.h>
#include <qcd/action/fermion/PartialFractionFermion5D.h> // Partial fraction
#include <qcd/action/fermion/OverlapWilsonPartialFractionTanhFermion.h>

View File

@ -28,18 +28,31 @@ namespace Grid {
void DiracOptDhopDir(CartesianStencil &st,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int sF,int sU,const FermionField &in, FermionField &out,int dirdisp,int gamma);
#define HANDOPT
#ifdef HANDOPT
void DiracOptHandDhopSite(CartesianStencil &st,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int sF,int sU,const FermionField &in, FermionField &out);
void DiracOptHandDhopSiteDag(CartesianStencil &st,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int sF,int sU,const FermionField &in, FermionField &out);
#else
void DiracOptHandDhopSite(CartesianStencil &st,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int sF,int sU,const FermionField &in, FermionField &out){
int sF,int sU,const FermionField &in, FermionField &out)
{
DiracOptDhopSite(st,U,buf,sF,sU,in,out); // will template override for Wilson Nc=3
}
void DiracOptHandDhopSiteDag(CartesianStencil &st,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int sF,int sU,const FermionField &in, FermionField &out){
int sF,int sU,const FermionField &in, FermionField &out)
{
DiracOptDhopSiteDag(st,U,buf,sF,sU,in,out); // will template override for Wilson Nc=3
}
#endif
WilsonKernels(const ImplParams &p= ImplParams()) : Base(p) {};

View File

@ -3,7 +3,7 @@
#define REGISTER
#define LOAD_CHIMU \
const vSpinColourVector & ref (in._odata[offset]); \
const SiteSpinor & ref (in._odata[offset]); \
Chimu_00=ref()(0)(0);\
Chimu_01=ref()(0)(1);\
Chimu_02=ref()(0)(2);\
@ -18,7 +18,7 @@
Chimu_32=ref()(3)(2);
#define LOAD_CHI\
const vHalfSpinColourVector &ref(buf[offset]); \
const SiteHalfSpinor &ref(buf[offset]); \
Chi_00 = ref()(0)(0);\
Chi_01 = ref()(0)(1);\
Chi_02 = ref()(0)(2);\
@ -280,11 +280,11 @@
namespace Grid {
namespace QCD {
#if 0
template<class Simd>
void WilsonKernels<WilsonImpl<Simd,3> >::DiracOptHandDhopSite(CartesianStencil &st,DoubledGaugeField &U,
#ifdef HANDOPT
template<class Impl>
void WilsonKernels<Impl >::DiracOptHandDhopSite(CartesianStencil &st,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int sF,int sU,const FermionField &in, FermionField &out)
int ss,int sU,const FermionField &in, FermionField &out)
{
REGISTER Simd result_00; // 12 regs on knc
REGISTER Simd result_01;
@ -339,14 +339,14 @@ void WilsonKernels<WilsonImpl<Simd,3> >::DiracOptHandDhopSite(CartesianStencil &
#define Chimu_32 UChi_12
StencilEntry *SE;
int offset,local,perm, ptype;
int ss=sF;
// Xp
offset = st._offsets [Xp][ss];
local = st._is_local[Xp][ss];
perm = st._permute[Xp][ss];
ptype = st._permute_type[Xp];
SE=st.GetEntry(ptype,Xp,ss);
offset = SE->_offset;
local = SE->_is_local;
perm = SE->_permute;
if ( local ) {
LOAD_CHIMU;
@ -364,10 +364,10 @@ void WilsonKernels<WilsonImpl<Simd,3> >::DiracOptHandDhopSite(CartesianStencil &
XP_RECON;
// Yp
offset = st._offsets [Yp][ss];
local = st._is_local[Yp][ss];
perm = st._permute[Yp][ss];
ptype = st._permute_type[Yp];
SE=st.GetEntry(ptype,Yp,ss);
offset = SE->_offset;
local = SE->_is_local;
perm = SE->_permute;
if ( local ) {
LOAD_CHIMU;
@ -385,10 +385,10 @@ void WilsonKernels<WilsonImpl<Simd,3> >::DiracOptHandDhopSite(CartesianStencil &
// Zp
offset = st._offsets [Zp][ss];
local = st._is_local[Zp][ss];
perm = st._permute[Zp][ss];
ptype = st._permute_type[Zp];
SE=st.GetEntry(ptype,Zp,ss);
offset = SE->_offset;
local = SE->_is_local;
perm = SE->_permute;
if ( local ) {
LOAD_CHIMU;
@ -405,10 +405,10 @@ void WilsonKernels<WilsonImpl<Simd,3> >::DiracOptHandDhopSite(CartesianStencil &
ZP_RECON_ACCUM;
// Tp
offset = st._offsets [Tp][ss];
local = st._is_local[Tp][ss];
perm = st._permute[Tp][ss];
ptype = st._permute_type[Tp];
SE=st.GetEntry(ptype,Tp,ss);
offset = SE->_offset;
local = SE->_is_local;
perm = SE->_permute;
if ( local ) {
LOAD_CHIMU;
@ -425,10 +425,10 @@ void WilsonKernels<WilsonImpl<Simd,3> >::DiracOptHandDhopSite(CartesianStencil &
TP_RECON_ACCUM;
// Xm
offset = st._offsets [Xm][ss];
local = st._is_local[Xm][ss];
perm = st._permute[Xm][ss];
ptype = st._permute_type[Xm];
SE=st.GetEntry(ptype,Xm,ss);
offset = SE->_offset;
local = SE->_is_local;
perm = SE->_permute;
if ( local ) {
LOAD_CHIMU;
@ -445,10 +445,10 @@ void WilsonKernels<WilsonImpl<Simd,3> >::DiracOptHandDhopSite(CartesianStencil &
XM_RECON_ACCUM;
// Ym
offset = st._offsets [Ym][ss];
local = st._is_local[Ym][ss];
perm = st._permute[Ym][ss];
ptype = st._permute_type[Ym];
SE=st.GetEntry(ptype,Ym,ss);
offset = SE->_offset;
local = SE->_is_local;
perm = SE->_permute;
if ( local ) {
LOAD_CHIMU;
@ -465,10 +465,10 @@ void WilsonKernels<WilsonImpl<Simd,3> >::DiracOptHandDhopSite(CartesianStencil &
YM_RECON_ACCUM;
// Zm
offset = st._offsets [Zm][ss];
local = st._is_local[Zm][ss];
perm = st._permute[Zm][ss];
ptype = st._permute_type[Zm];
SE=st.GetEntry(ptype,Zm,ss);
offset = SE->_offset;
local = SE->_is_local;
perm = SE->_permute;
if ( local ) {
LOAD_CHIMU;
@ -485,10 +485,10 @@ void WilsonKernels<WilsonImpl<Simd,3> >::DiracOptHandDhopSite(CartesianStencil &
ZM_RECON_ACCUM;
// Tm
offset = st._offsets [Tm][ss];
local = st._is_local[Tm][ss];
perm = st._permute[Tm][ss];
ptype = st._permute_type[Tm];
SE=st.GetEntry(ptype,Tm,ss);
offset = SE->_offset;
local = SE->_is_local;
perm = SE->_permute;
if ( local ) {
LOAD_CHIMU;
@ -505,7 +505,7 @@ void WilsonKernels<WilsonImpl<Simd,3> >::DiracOptHandDhopSite(CartesianStencil &
TM_RECON_ACCUM;
{
vSpinColourVector & ref (out._odata[ss]);
SiteSpinor & ref (out._odata[ss]);
vstream(ref()(0)(0),result_00*(-0.5));
vstream(ref()(0)(1),result_01*(-0.5));
vstream(ref()(0)(2),result_02*(-0.5));
@ -521,10 +521,10 @@ void WilsonKernels<WilsonImpl<Simd,3> >::DiracOptHandDhopSite(CartesianStencil &
}
}
template<class Simd>
void WilsonKernels<WilsonImpl<Simd,3> >::DiracOptHandDhopSiteDag(CartesianStencil &st,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int ss,int sU,const FermionField &in, FermionField &out)
template<class Impl>
void WilsonKernels<Impl >::DiracOptHandDhopSiteDag(CartesianStencil &st,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int ss,int sU,const FermionField &in, FermionField &out)
{
REGISTER Simd result_00; // 12 regs on knc
REGISTER Simd result_01;
@ -580,12 +580,13 @@ void WilsonKernels<WilsonImpl<Simd,3> >::DiracOptHandDhopSiteDag(CartesianStenci
int offset,local,perm, ptype;
StencilEntry *SE;
// Xp
offset = st._offsets [Xp][ss];
local = st._is_local[Xp][ss];
perm = st._permute[Xp][ss];
ptype = st._permute_type[Xp];
SE=st.GetEntry(ptype,Xp,ss);
offset = SE->_offset;
local = SE->_is_local;
perm = SE->_permute;
if ( local ) {
LOAD_CHIMU;
@ -602,10 +603,10 @@ void WilsonKernels<WilsonImpl<Simd,3> >::DiracOptHandDhopSiteDag(CartesianStenci
XM_RECON;
// Yp
offset = st._offsets [Yp][ss];
local = st._is_local[Yp][ss];
perm = st._permute[Yp][ss];
ptype = st._permute_type[Yp];
SE=st.GetEntry(ptype,Yp,ss);
offset = SE->_offset;
local = SE->_is_local;
perm = SE->_permute;
if ( local ) {
LOAD_CHIMU;
@ -623,10 +624,10 @@ void WilsonKernels<WilsonImpl<Simd,3> >::DiracOptHandDhopSiteDag(CartesianStenci
// Zp
offset = st._offsets [Zp][ss];
local = st._is_local[Zp][ss];
perm = st._permute[Zp][ss];
ptype = st._permute_type[Zp];
SE=st.GetEntry(ptype,Zp,ss);
offset = SE->_offset;
local = SE->_is_local;
perm = SE->_permute;
if ( local ) {
LOAD_CHIMU;
@ -643,10 +644,10 @@ void WilsonKernels<WilsonImpl<Simd,3> >::DiracOptHandDhopSiteDag(CartesianStenci
ZM_RECON_ACCUM;
// Tp
offset = st._offsets [Tp][ss];
local = st._is_local[Tp][ss];
perm = st._permute[Tp][ss];
ptype = st._permute_type[Tp];
SE=st.GetEntry(ptype,Tp,ss);
offset = SE->_offset;
local = SE->_is_local;
perm = SE->_permute;
if ( local ) {
LOAD_CHIMU;
@ -663,10 +664,10 @@ void WilsonKernels<WilsonImpl<Simd,3> >::DiracOptHandDhopSiteDag(CartesianStenci
TM_RECON_ACCUM;
// Xm
offset = st._offsets [Xm][ss];
local = st._is_local[Xm][ss];
perm = st._permute[Xm][ss];
ptype = st._permute_type[Xm];
SE=st.GetEntry(ptype,Xm,ss);
offset = SE->_offset;
local = SE->_is_local;
perm = SE->_permute;
if ( local ) {
LOAD_CHIMU;
@ -684,10 +685,10 @@ void WilsonKernels<WilsonImpl<Simd,3> >::DiracOptHandDhopSiteDag(CartesianStenci
// Ym
offset = st._offsets [Ym][ss];
local = st._is_local[Ym][ss];
perm = st._permute[Ym][ss];
ptype = st._permute_type[Ym];
SE=st.GetEntry(ptype,Ym,ss);
offset = SE->_offset;
local = SE->_is_local;
perm = SE->_permute;
if ( local ) {
LOAD_CHIMU;
@ -704,11 +705,11 @@ void WilsonKernels<WilsonImpl<Simd,3> >::DiracOptHandDhopSiteDag(CartesianStenci
YP_RECON_ACCUM;
// Zm
offset = st._offsets [Zm][ss];
local = st._is_local[Zm][ss];
perm = st._permute[Zm][ss];
ptype = st._permute_type[Zm];
SE=st.GetEntry(ptype,Zm,ss);
offset = SE->_offset;
local = SE->_is_local;
perm = SE->_permute;
if ( local ) {
LOAD_CHIMU;
ZP_PROJ;
@ -724,11 +725,11 @@ void WilsonKernels<WilsonImpl<Simd,3> >::DiracOptHandDhopSiteDag(CartesianStenci
ZP_RECON_ACCUM;
// Tm
offset = st._offsets [Tm][ss];
local = st._is_local[Tm][ss];
perm = st._permute[Tm][ss];
ptype = st._permute_type[Tm];
SE=st.GetEntry(ptype,Tm,ss);
offset = SE->_offset;
local = SE->_is_local;
perm = SE->_permute;
if ( local ) {
LOAD_CHIMU;
TP_PROJ;
@ -759,5 +760,6 @@ void WilsonKernels<WilsonImpl<Simd,3> >::DiracOptHandDhopSiteDag(CartesianStenci
vstream(ref()(3)(2),result_32*(-0.5));
}
}
FermOpTemplateInstantiate(WilsonKernels);
#endif
}}