1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-09-20 09:15:38 +01:00

definitions reconciled.

This commit is contained in:
Jung 2016-01-25 16:30:59 -05:00
parent 7aa8d5e8af
commit bd84c23298
5 changed files with 84 additions and 403 deletions

View File

@ -11,8 +11,7 @@
Note this class requires the gnu multiprecision (GNU MP) library. Note this class requires the gnu multiprecision (GNU MP) library.
*/ */
// fixing ::max_align_t error for some versions of g++/clang++
#include<stddef.h>
#include<math.h> #include<math.h>
#include<stdio.h> #include<stdio.h>
#include<stdlib.h> #include<stdlib.h>

View File

@ -143,7 +143,7 @@ public:
} }
} }
std::cout<<GridLogMessage<<"ConjugateGradient did NOT converge"<<std::endl; std::cout<<GridLogMessage<<"ConjugateGradient did NOT converge"<<std::endl;
// assert(0); assert(0);
} }
}; };
} }

View File

@ -596,8 +596,7 @@ void WilsonKernels<Impl>::DiracOptDhopDir(StencilImpl &st,DoubledGaugeField &U,
vstream(out._odata[sF],result*(-0.5)); vstream(out._odata[sF],result*(-0.5));
} }
//#if ( ! defined(AVX512) ) && ( ! defined(IMCI) ) #if ( ! defined(AVX512) ) && ( ! defined(IMCI) )
#if 1
template<class Impl> template<class Impl>
void WilsonKernels<Impl>::DiracOptAsmDhopSite(StencilImpl &st,DoubledGaugeField &U, void WilsonKernels<Impl>::DiracOptAsmDhopSite(StencilImpl &st,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf, std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
@ -608,6 +607,5 @@ void WilsonKernels<Impl>::DiracOptAsmDhopSite(StencilImpl &st,DoubledGaugeField
#endif #endif
FermOpTemplateInstantiate(WilsonKernels); FermOpTemplateInstantiate(WilsonKernels);
GparityFermOpTemplateInstantiate(WilsonKernels);
}} }}

View File

@ -60,13 +60,8 @@ namespace Grid {
void DiracOptAsmDhopSite(StencilImpl &st,DoubledGaugeField &U, void DiracOptAsmDhopSite(StencilImpl &st,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf, std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
#if 0 int sF,int sU,const FermionField &in, FermionField &out,bool local= true, bool nonlocal=true);
//<<<<<<< HEAD
int sF,int sU,const FermionField &in, FermionField &out,bool local= true, bool nonlocal=true);
// int sF,int sU,const FermionField &in, FermionField &out,uint64_t *);
// doesn't seem to work with Gparity at the moment
#undef HANDOPT
#if 1
void DiracOptHandDhopSite(StencilImpl &st,DoubledGaugeField &U, void DiracOptHandDhopSite(StencilImpl &st,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf, std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int sF,int sU,const FermionField &in, FermionField &out,bool local= true, bool nonlocal=true); int sF,int sU,const FermionField &in, FermionField &out,bool local= true, bool nonlocal=true);
@ -74,19 +69,6 @@ namespace Grid {
void DiracOptHandDhopSiteDag(StencilImpl &st,DoubledGaugeField &U, void DiracOptHandDhopSiteDag(StencilImpl &st,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf, std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int sF,int sU,const FermionField &in, FermionField &out,bool local= true, bool nonlocal=true); int sF,int sU,const FermionField &in, FermionField &out,bool local= true, bool nonlocal=true);
#endif
#else
int sF,int sU,const FermionField &in, FermionField &out,bool local= true, bool nonlocal=true);
int DiracOptHandDhopSite(StencilImpl &st,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int sF,int sU,const FermionField &in, FermionField &out,bool local= true, bool nonlocal=true);
int DiracOptHandDhopSiteDag(StencilImpl &st,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int sF,int sU,const FermionField &in, FermionField &out,bool local= true, bool nonlocal=true);
//>>>>>>> fc6ad657514c7966291c19f22af89de5d5a96f93
#endif
WilsonKernels(const ImplParams &p= ImplParams()); WilsonKernels(const ImplParams &p= ImplParams());

View File

@ -310,7 +310,7 @@ namespace QCD {
template<class Impl> template<class Impl>
int WilsonKernels<Impl >::DiracOptHandDhopSiteDag(StencilImpl &st,DoubledGaugeField &U, void WilsonKernels<Impl >::DiracOptHandDhopSiteDag(StencilImpl &st,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf, std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int ss,int sU,const FermionField &in, FermionField &out, bool Local, bool Nonlocal) int ss,int sU,const FermionField &in, FermionField &out, bool Local, bool Nonlocal)
{ {
@ -318,21 +318,21 @@ int WilsonKernels<Impl >::DiracOptHandDhopSiteDag(StencilImpl &st,DoubledGaugeFi
typedef typename Simd::scalar_type S; typedef typename Simd::scalar_type S;
typedef typename Simd::vector_type V; typedef typename Simd::vector_type V;
REGISTER Simd result_00 ; zeroit(result_00); // 12 regs on knc REGISTER Simd result_00; // 12 regs on knc
REGISTER Simd result_01 ; zeroit(result_01); // 12 regs on knc REGISTER Simd result_01;
REGISTER Simd result_02 ; zeroit(result_02); // 12 regs on knc REGISTER Simd result_02;
REGISTER Simd result_10 ; zeroit(result_10); // 12 regs on knc REGISTER Simd result_10;
REGISTER Simd result_11 ; zeroit(result_11); // 12 regs on knc REGISTER Simd result_11;
REGISTER Simd result_12 ; zeroit(result_12); // 12 regs on knc REGISTER Simd result_12;
REGISTER Simd result_20 ; zeroit(result_20); // 12 regs on knc REGISTER Simd result_20;
REGISTER Simd result_21 ; zeroit(result_21); // 12 regs on knc REGISTER Simd result_21;
REGISTER Simd result_22 ; zeroit(result_22); // 12 regs on knc REGISTER Simd result_22;
REGISTER Simd result_30 ; zeroit(result_30); // 12 regs on knc REGISTER Simd result_30;
REGISTER Simd result_31 ; zeroit(result_31); // 12 regs on knc REGISTER Simd result_31;
REGISTER Simd result_32 ; zeroit(result_32); // 12 regs on knc REGISTER Simd result_32; // 20 left
REGISTER Simd Chi_00; // two spinor; 6 regs REGISTER Simd Chi_00; // two spinor; 6 regs
REGISTER Simd Chi_01; REGISTER Simd Chi_01;
@ -372,178 +372,172 @@ int WilsonKernels<Impl >::DiracOptHandDhopSiteDag(StencilImpl &st,DoubledGaugeFi
StencilEntry *SE; StencilEntry *SE;
int offset, ptype; int offset,local,perm, ptype;
int num = 0;
// Xp // Xp
SE=st.GetEntry(ptype,Xp,ss); SE=st.GetEntry(ptype,Xp,ss);
offset = SE->_offset; offset = SE->_offset;
local = SE->_is_local;
perm = SE->_permute;
if (Local && SE->_is_local ) { if ( local ) {
LOAD_CHIMU; LOAD_CHIMU;
XP_PROJ; XP_PROJ;
if ( SE->_permute ) { if ( perm) {
PERMUTE_DIR(3); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc... PERMUTE_DIR(3); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
} }
} else {
}
if ( Nonlocal && (!SE->_is_local) ) {
LOAD_CHI; LOAD_CHI;
} }
if ( (Local && SE->_is_local) || ( Nonlocal && (!SE->_is_local)) ) { {
MULT_2SPIN(Xp); MULT_2SPIN(Xp);
XP_RECON_ACCUM;
num++;
} }
XP_RECON;
// Yp // Yp
SE=st.GetEntry(ptype,Yp,ss); SE=st.GetEntry(ptype,Yp,ss);
offset = SE->_offset; offset = SE->_offset;
local = SE->_is_local;
perm = SE->_permute;
if (Local && SE->_is_local ) { if ( local ) {
LOAD_CHIMU; LOAD_CHIMU;
YP_PROJ; YP_PROJ;
if ( SE->_permute ) { if ( perm) {
PERMUTE_DIR(2); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc... PERMUTE_DIR(2); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
} }
} } else {
if ( Nonlocal && (!SE->_is_local) ) {
LOAD_CHI; LOAD_CHI;
} }
if ( (Local && SE->_is_local) || ( Nonlocal && (!SE->_is_local)) ) { {
MULT_2SPIN(Yp); MULT_2SPIN(Yp);
YP_RECON_ACCUM;
num++;
} }
YP_RECON_ACCUM;
// Zp // Zp
SE=st.GetEntry(ptype,Zp,ss); SE=st.GetEntry(ptype,Zp,ss);
offset = SE->_offset; offset = SE->_offset;
local = SE->_is_local;
perm = SE->_permute;
if (Local && SE->_is_local ) { if ( local ) {
LOAD_CHIMU; LOAD_CHIMU;
ZP_PROJ; ZP_PROJ;
if ( SE->_permute ) { if ( perm) {
PERMUTE_DIR(1); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc... PERMUTE_DIR(1); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
} }
} } else {
if ( Nonlocal && (!SE->_is_local) ) {
LOAD_CHI; LOAD_CHI;
} }
{
if ( (Local && SE->_is_local) || ( Nonlocal && (!SE->_is_local)) ) {
MULT_2SPIN(Zp); MULT_2SPIN(Zp);
ZP_RECON_ACCUM;
num++;
} }
ZP_RECON_ACCUM;
// Tp // Tp
SE=st.GetEntry(ptype,Tp,ss); SE=st.GetEntry(ptype,Tp,ss);
offset = SE->_offset; offset = SE->_offset;
local = SE->_is_local;
perm = SE->_permute;
if (Local && SE->_is_local ) { if ( local ) {
LOAD_CHIMU; LOAD_CHIMU;
TP_PROJ; TP_PROJ;
if ( SE->_permute ) { if ( perm) {
PERMUTE_DIR(0); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc... PERMUTE_DIR(0); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
} }
} } else {
if ( Nonlocal && (!SE->_is_local) ) {
LOAD_CHI; LOAD_CHI;
} }
if ( (Local && SE->_is_local) || ( Nonlocal && (!SE->_is_local)) ) { {
MULT_2SPIN(Tp); MULT_2SPIN(Tp);
TP_RECON_ACCUM;
num++;
} }
TP_RECON_ACCUM;
// Xm // Xm
SE=st.GetEntry(ptype,Xm,ss); SE=st.GetEntry(ptype,Xm,ss);
offset = SE->_offset; offset = SE->_offset;
local = SE->_is_local;
perm = SE->_permute;
if (Local && SE->_is_local ) { if ( local ) {
LOAD_CHIMU; LOAD_CHIMU;
XM_PROJ; XM_PROJ;
if ( SE->_permute ) { if ( perm) {
PERMUTE_DIR(3); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc... PERMUTE_DIR(3); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
} }
} } else {
if ( Nonlocal && (!SE->_is_local) ) {
LOAD_CHI; LOAD_CHI;
} }
if ( (Local && SE->_is_local) || ( Nonlocal && (!SE->_is_local)) ) { {
MULT_2SPIN(Xm); MULT_2SPIN(Xm);
XM_RECON_ACCUM;
num++;
} }
XM_RECON_ACCUM;
// Ym // Ym
SE=st.GetEntry(ptype,Ym,ss); SE=st.GetEntry(ptype,Ym,ss);
offset = SE->_offset; offset = SE->_offset;
local = SE->_is_local;
perm = SE->_permute;
if (Local && SE->_is_local ) { if ( local ) {
LOAD_CHIMU; LOAD_CHIMU;
YM_PROJ; YM_PROJ;
if ( SE->_permute ) { if ( perm) {
PERMUTE_DIR(2); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc... PERMUTE_DIR(2); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
} }
} } else {
if ( Nonlocal && (!SE->_is_local) ) {
LOAD_CHI; LOAD_CHI;
} }
if ( (Local && SE->_is_local) || ( Nonlocal && (!SE->_is_local)) ) { {
MULT_2SPIN(Ym); MULT_2SPIN(Ym);
YM_RECON_ACCUM;
num++;
} }
YM_RECON_ACCUM;
// Zm // Zm
SE=st.GetEntry(ptype,Zm,ss); SE=st.GetEntry(ptype,Zm,ss);
offset = SE->_offset; offset = SE->_offset;
local = SE->_is_local;
perm = SE->_permute;
if (Local && SE->_is_local ) { if ( local ) {
LOAD_CHIMU; LOAD_CHIMU;
ZM_PROJ; ZM_PROJ;
if ( SE->_permute ) { if ( perm) {
PERMUTE_DIR(1); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc... PERMUTE_DIR(1); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
} }
} } else {
if ( Nonlocal && (!SE->_is_local) ) {
LOAD_CHI; LOAD_CHI;
} }
if ( (Local && SE->_is_local) || ( Nonlocal && (!SE->_is_local)) ) { {
MULT_2SPIN(Zm); MULT_2SPIN(Zm);
ZM_RECON_ACCUM;
num++;
} }
ZM_RECON_ACCUM;
// Tm // Tm
SE=st.GetEntry(ptype,Tm,ss); SE=st.GetEntry(ptype,Tm,ss);
offset = SE->_offset; offset = SE->_offset;
local = SE->_is_local;
perm = SE->_permute;
if (Local && SE->_is_local ) { if ( local ) {
LOAD_CHIMU; LOAD_CHIMU;
TM_PROJ; TM_PROJ;
if ( SE->_permute ) { if ( perm) {
PERMUTE_DIR(0); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc... PERMUTE_DIR(0); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
} }
} } else {
if ( Nonlocal && (!SE->_is_local) ) {
LOAD_CHI; LOAD_CHI;
} }
if ( (Local && SE->_is_local) || ( Nonlocal && (!SE->_is_local)) ) { {
MULT_2SPIN(Tm); MULT_2SPIN(Tm);
TM_RECON_ACCUM;
num++;
} }
TM_RECON_ACCUM;
SiteSpinor & ref (out._odata[ss]); {
if ( Local ) { SiteSpinor & ref (out._odata[ss]);
vstream(ref()(0)(0),result_00*(-0.5)); vstream(ref()(0)(0),result_00*(-0.5));
vstream(ref()(0)(1),result_01*(-0.5)); vstream(ref()(0)(1),result_01*(-0.5));
vstream(ref()(0)(2),result_02*(-0.5)); vstream(ref()(0)(2),result_02*(-0.5));
@ -556,295 +550,9 @@ int WilsonKernels<Impl >::DiracOptHandDhopSiteDag(StencilImpl &st,DoubledGaugeFi
vstream(ref()(3)(0),result_30*(-0.5)); vstream(ref()(3)(0),result_30*(-0.5));
vstream(ref()(3)(1),result_31*(-0.5)); vstream(ref()(3)(1),result_31*(-0.5));
vstream(ref()(3)(2),result_32*(-0.5)); vstream(ref()(3)(2),result_32*(-0.5));
return 1;
} else if ( num ) {
vstream(ref()(0)(0),ref()(0)(0)+result_00*(-0.5));
vstream(ref()(0)(1),ref()(0)(1)+result_01*(-0.5));
vstream(ref()(0)(2),ref()(0)(2)+result_02*(-0.5));
vstream(ref()(1)(0),ref()(1)(0)+result_10*(-0.5));
vstream(ref()(1)(1),ref()(1)(1)+result_11*(-0.5));
vstream(ref()(1)(2),ref()(1)(2)+result_12*(-0.5));
vstream(ref()(2)(0),ref()(2)(0)+result_20*(-0.5));
vstream(ref()(2)(1),ref()(2)(1)+result_21*(-0.5));
vstream(ref()(2)(2),ref()(2)(2)+result_22*(-0.5));
vstream(ref()(3)(0),ref()(3)(0)+result_30*(-0.5));
vstream(ref()(3)(1),ref()(3)(1)+result_31*(-0.5));
vstream(ref()(3)(2),ref()(3)(2)+result_32*(-0.5));
return 1;
} }
return 0;
} }
template<class Impl>
int WilsonKernels<Impl >::DiracOptHandDhopSite(StencilImpl &st,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int ss,int sU,const FermionField &in, FermionField &out, bool Local, bool Nonlocal)
{
// std::cout << "Hand op Dhop "<<std::endl;
typedef typename Simd::scalar_type S;
typedef typename Simd::vector_type V;
REGISTER Simd result_00 ; zeroit(result_00); // 12 regs on knc
REGISTER Simd result_01 ; zeroit(result_01); // 12 regs on knc
REGISTER Simd result_02 ; zeroit(result_02); // 12 regs on knc
REGISTER Simd result_10 ; zeroit(result_10); // 12 regs on knc
REGISTER Simd result_11 ; zeroit(result_11); // 12 regs on knc
REGISTER Simd result_12 ; zeroit(result_12); // 12 regs on knc
REGISTER Simd result_20 ; zeroit(result_20); // 12 regs on knc
REGISTER Simd result_21 ; zeroit(result_21); // 12 regs on knc
REGISTER Simd result_22 ; zeroit(result_22); // 12 regs on knc
REGISTER Simd result_30 ; zeroit(result_30); // 12 regs on knc
REGISTER Simd result_31 ; zeroit(result_31); // 12 regs on knc
REGISTER Simd result_32 ; zeroit(result_32); // 12 regs on knc
REGISTER Simd Chi_00; // two spinor; 6 regs
REGISTER Simd Chi_01;
REGISTER Simd Chi_02;
REGISTER Simd Chi_10;
REGISTER Simd Chi_11;
REGISTER Simd Chi_12; // 14 left
REGISTER Simd UChi_00; // two spinor; 6 regs
REGISTER Simd UChi_01;
REGISTER Simd UChi_02;
REGISTER Simd UChi_10;
REGISTER Simd UChi_11;
REGISTER Simd UChi_12; // 8 left
REGISTER Simd U_00; // two rows of U matrix
REGISTER Simd U_10;
REGISTER Simd U_20;
REGISTER Simd U_01;
REGISTER Simd U_11;
REGISTER Simd U_21; // 2 reg left.
#define Chimu_00 Chi_00
#define Chimu_01 Chi_01
#define Chimu_02 Chi_02
#define Chimu_10 Chi_10
#define Chimu_11 Chi_11
#define Chimu_12 Chi_12
#define Chimu_20 UChi_00
#define Chimu_21 UChi_01
#define Chimu_22 UChi_02
#define Chimu_30 UChi_10
#define Chimu_31 UChi_11
#define Chimu_32 UChi_12
StencilEntry *SE;
int offset, ptype;
int num = 0;
// Xp
SE=st.GetEntry(ptype,Xp,ss);
offset = SE->_offset;
if (Local && SE->_is_local ) {
LOAD_CHIMU;
XM_PROJ;
if ( SE->_permute ) {
PERMUTE_DIR(3); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
}
}
if ( Nonlocal && (!SE->_is_local) ) {
LOAD_CHI;
}
if ( (Local && SE->_is_local) || ( Nonlocal && (!SE->_is_local)) ) {
MULT_2SPIN(Xp);
XM_RECON_ACCUM;
num++;
}
// Yp
SE=st.GetEntry(ptype,Yp,ss);
offset = SE->_offset;
if (Local && SE->_is_local ) {
LOAD_CHIMU;
YM_PROJ;
if ( SE->_permute ) {
PERMUTE_DIR(2); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
}
}
if ( Nonlocal && (!SE->_is_local) ) {
LOAD_CHI;
}
if ( (Local && SE->_is_local) || ( Nonlocal && (!SE->_is_local)) ) {
MULT_2SPIN(Yp);
YM_RECON_ACCUM;
num++;
}
// Zp
SE=st.GetEntry(ptype,Zp,ss);
offset = SE->_offset;
if (Local && SE->_is_local ) {
LOAD_CHIMU;
ZM_PROJ;
if ( SE->_permute ) {
PERMUTE_DIR(1); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
}
}
if ( Nonlocal && (!SE->_is_local) ) {
LOAD_CHI;
}
if ( (Local && SE->_is_local) || ( Nonlocal && (!SE->_is_local)) ) {
MULT_2SPIN(Zp);
ZM_RECON_ACCUM;
num++;
}
// Tp
SE=st.GetEntry(ptype,Tp,ss);
offset = SE->_offset;
if (Local && SE->_is_local ) {
LOAD_CHIMU;
TM_PROJ;
if ( SE->_permute ) {
PERMUTE_DIR(0); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
}
}
if ( Nonlocal && (!SE->_is_local) ) {
LOAD_CHI;
}
if ( (Local && SE->_is_local) || ( Nonlocal && (!SE->_is_local)) ) {
MULT_2SPIN(Tp);
TM_RECON_ACCUM;
num++;
}
// Xm
SE=st.GetEntry(ptype,Xm,ss);
offset = SE->_offset;
if (Local && SE->_is_local ) {
LOAD_CHIMU;
XP_PROJ;
if ( SE->_permute ) {
PERMUTE_DIR(3); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
}
}
if ( Nonlocal && (!SE->_is_local) ) {
LOAD_CHI;
}
if ( (Local && SE->_is_local) || ( Nonlocal && (!SE->_is_local)) ) {
MULT_2SPIN(Xm);
XP_RECON_ACCUM;
num++;
}
// Ym
SE=st.GetEntry(ptype,Ym,ss);
offset = SE->_offset;
if (Local && SE->_is_local ) {
LOAD_CHIMU;
YP_PROJ;
if ( SE->_permute ) {
PERMUTE_DIR(2); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
}
}
if ( Nonlocal && (!SE->_is_local) ) {
LOAD_CHI;
}
if ( (Local && SE->_is_local) || ( Nonlocal && (!SE->_is_local)) ) {
MULT_2SPIN(Ym);
YP_RECON_ACCUM;
num++;
}
// Zm
SE=st.GetEntry(ptype,Zm,ss);
offset = SE->_offset;
if (Local && SE->_is_local ) {
LOAD_CHIMU;
ZP_PROJ;
if ( SE->_permute ) {
PERMUTE_DIR(1); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
}
}
if ( Nonlocal && (!SE->_is_local) ) {
LOAD_CHI;
}
if ( (Local && SE->_is_local) || ( Nonlocal && (!SE->_is_local)) ) {
MULT_2SPIN(Zm);
ZP_RECON_ACCUM;
num++;
}
// Tm
SE=st.GetEntry(ptype,Tm,ss);
offset = SE->_offset;
if (Local && SE->_is_local ) {
LOAD_CHIMU;
TP_PROJ;
if ( SE->_permute ) {
PERMUTE_DIR(0); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
}
}
if ( Nonlocal && (!SE->_is_local) ) {
LOAD_CHI;
}
if ( (Local && SE->_is_local) || ( Nonlocal && (!SE->_is_local)) ) {
MULT_2SPIN(Tm);
TP_RECON_ACCUM;
num++;
}
SiteSpinor & ref (out._odata[ss]);
if ( Local ) {
vstream(ref()(0)(0),result_00*(-0.5));
vstream(ref()(0)(1),result_01*(-0.5));
vstream(ref()(0)(2),result_02*(-0.5));
vstream(ref()(1)(0),result_10*(-0.5));
vstream(ref()(1)(1),result_11*(-0.5));
vstream(ref()(1)(2),result_12*(-0.5));
vstream(ref()(2)(0),result_20*(-0.5));
vstream(ref()(2)(1),result_21*(-0.5));
vstream(ref()(2)(2),result_22*(-0.5));
vstream(ref()(3)(0),result_30*(-0.5));
vstream(ref()(3)(1),result_31*(-0.5));
vstream(ref()(3)(2),result_32*(-0.5));
return 1;
} else if ( num ) {
vstream(ref()(0)(0),ref()(0)(0)+result_00*(-0.5));
vstream(ref()(0)(1),ref()(0)(1)+result_01*(-0.5));
vstream(ref()(0)(2),ref()(0)(2)+result_02*(-0.5));
vstream(ref()(1)(0),ref()(1)(0)+result_10*(-0.5));
vstream(ref()(1)(1),ref()(1)(1)+result_11*(-0.5));
vstream(ref()(1)(2),ref()(1)(2)+result_12*(-0.5));
vstream(ref()(2)(0),ref()(2)(0)+result_20*(-0.5));
vstream(ref()(2)(1),ref()(2)(1)+result_21*(-0.5));
vstream(ref()(2)(2),ref()(2)(2)+result_22*(-0.5));
vstream(ref()(3)(0),ref()(3)(0)+result_30*(-0.5));
vstream(ref()(3)(1),ref()(3)(1)+result_31*(-0.5));
vstream(ref()(3)(2),ref()(3)(2)+result_32*(-0.5));
return 1;
}
return 0;
}
/*
template<class Impl> template<class Impl>
void WilsonKernels<Impl >::DiracOptHandDhopSite(StencilImpl &st,DoubledGaugeField &U, void WilsonKernels<Impl >::DiracOptHandDhopSite(StencilImpl &st,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf, std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
@ -1087,21 +795,16 @@ void WilsonKernels<Impl >::DiracOptHandDhopSite(StencilImpl &st,DoubledGaugeFiel
vstream(ref()(3)(2),result_32*(-0.5)); vstream(ref()(3)(2),result_32*(-0.5));
} }
} }
*/
//////////////////////////////////////////////// ////////////////////////////////////////////////
// Specialise Gparity to simple implementation // Specialise Gparity to simple implementation
//////////////////////////////////////////////// ////////////////////////////////////////////////
//template<class Impl> template<>
//int WilsonKernels<Impl >::DiracOptHandDhopSiteDag(StencilImpl &st,DoubledGaugeField &U,
// std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
// int ss,int sU,const FermionField &in, FermionField &out, bool Local, bool Nonlocal)
#if 0
template
void WilsonKernels<GparityWilsonImplF>::DiracOptHandDhopSite(StencilImpl &st,DoubledGaugeField &U, void WilsonKernels<GparityWilsonImplF>::DiracOptHandDhopSite(StencilImpl &st,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf, std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int sF,int sU,const FermionField &in, FermionField &out, bool Local, bool Nonlocal) int sF,int sU,const FermionField &in, FermionField &out, bool Local, bool Nonlocal)
{ {
DiracOptDhopSite(st,U,buf,sF,sU,in,out,Local,Nonlocal); // will template override for Wilson Nc=3 DiracOptDhopSite(st,U,buf,sF,sU,in,out); // will template override for Wilson Nc=3
} }
template<> template<>
@ -1109,7 +812,7 @@ void WilsonKernels<GparityWilsonImplF>::DiracOptHandDhopSiteDag(StencilImpl &st,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf, std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int sF,int sU,const FermionField &in, FermionField &out, bool Local, bool Nonlocal) int sF,int sU,const FermionField &in, FermionField &out, bool Local, bool Nonlocal)
{ {
DiracOptDhopSiteDag(st,U,buf,sF,sU,in,out,Local,Nonlocal); // will template override for Wilson Nc=3 DiracOptDhopSiteDag(st,U,buf,sF,sU,in,out); // will template override for Wilson Nc=3
} }
template<> template<>
@ -1117,7 +820,7 @@ void WilsonKernels<GparityWilsonImplD>::DiracOptHandDhopSite(StencilImpl &st,Dou
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf, std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int sF,int sU,const FermionField &in, FermionField &out, bool Local, bool Nonlocal) int sF,int sU,const FermionField &in, FermionField &out, bool Local, bool Nonlocal)
{ {
DiracOptDhopSite(st,U,buf,sF,sU,in,out,Local,Nonlocal); // will template override for Wilson Nc=3 DiracOptDhopSite(st,U,buf,sF,sU,in,out); // will template override for Wilson Nc=3
} }
template<> template<>
@ -1125,9 +828,8 @@ void WilsonKernels<GparityWilsonImplD>::DiracOptHandDhopSiteDag(StencilImpl &st,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf, std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
int sF,int sU,const FermionField &in, FermionField &out, bool Local, bool Nonlocal) int sF,int sU,const FermionField &in, FermionField &out, bool Local, bool Nonlocal)
{ {
DiracOptDhopSiteDag(st,U,buf,sF,sU,in,out,Local,Nonlocal); // will template override for Wilson Nc=3 DiracOptDhopSiteDag(st,U,buf,sF,sU,in,out); // will template override for Wilson Nc=3
} }
#endif