|
|
|
@ -310,7 +310,7 @@ namespace QCD {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
template<class Impl>
|
|
|
|
|
int WilsonKernels<Impl >::DiracOptHandDhopSiteDag(StencilImpl &st,DoubledGaugeField &U,
|
|
|
|
|
void WilsonKernels<Impl >::DiracOptHandDhopSiteDag(StencilImpl &st,DoubledGaugeField &U,
|
|
|
|
|
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
|
|
|
|
int ss,int sU,const FermionField &in, FermionField &out, bool Local, bool Nonlocal)
|
|
|
|
|
{
|
|
|
|
@ -318,21 +318,21 @@ int WilsonKernels<Impl >::DiracOptHandDhopSiteDag(StencilImpl &st,DoubledGaugeFi
|
|
|
|
|
typedef typename Simd::scalar_type S;
|
|
|
|
|
typedef typename Simd::vector_type V;
|
|
|
|
|
|
|
|
|
|
REGISTER Simd result_00 ; zeroit(result_00); // 12 regs on knc
|
|
|
|
|
REGISTER Simd result_01 ; zeroit(result_01); // 12 regs on knc
|
|
|
|
|
REGISTER Simd result_02 ; zeroit(result_02); // 12 regs on knc
|
|
|
|
|
REGISTER Simd result_00; // 12 regs on knc
|
|
|
|
|
REGISTER Simd result_01;
|
|
|
|
|
REGISTER Simd result_02;
|
|
|
|
|
|
|
|
|
|
REGISTER Simd result_10 ; zeroit(result_10); // 12 regs on knc
|
|
|
|
|
REGISTER Simd result_11 ; zeroit(result_11); // 12 regs on knc
|
|
|
|
|
REGISTER Simd result_12 ; zeroit(result_12); // 12 regs on knc
|
|
|
|
|
REGISTER Simd result_10;
|
|
|
|
|
REGISTER Simd result_11;
|
|
|
|
|
REGISTER Simd result_12;
|
|
|
|
|
|
|
|
|
|
REGISTER Simd result_20 ; zeroit(result_20); // 12 regs on knc
|
|
|
|
|
REGISTER Simd result_21 ; zeroit(result_21); // 12 regs on knc
|
|
|
|
|
REGISTER Simd result_22 ; zeroit(result_22); // 12 regs on knc
|
|
|
|
|
REGISTER Simd result_20;
|
|
|
|
|
REGISTER Simd result_21;
|
|
|
|
|
REGISTER Simd result_22;
|
|
|
|
|
|
|
|
|
|
REGISTER Simd result_30 ; zeroit(result_30); // 12 regs on knc
|
|
|
|
|
REGISTER Simd result_31 ; zeroit(result_31); // 12 regs on knc
|
|
|
|
|
REGISTER Simd result_32 ; zeroit(result_32); // 12 regs on knc
|
|
|
|
|
REGISTER Simd result_30;
|
|
|
|
|
REGISTER Simd result_31;
|
|
|
|
|
REGISTER Simd result_32; // 20 left
|
|
|
|
|
|
|
|
|
|
REGISTER Simd Chi_00; // two spinor; 6 regs
|
|
|
|
|
REGISTER Simd Chi_01;
|
|
|
|
@ -372,178 +372,172 @@ int WilsonKernels<Impl >::DiracOptHandDhopSiteDag(StencilImpl &st,DoubledGaugeFi
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
StencilEntry *SE;
|
|
|
|
|
int offset, ptype;
|
|
|
|
|
int num = 0;
|
|
|
|
|
int offset,local,perm, ptype;
|
|
|
|
|
|
|
|
|
|
// Xp
|
|
|
|
|
SE=st.GetEntry(ptype,Xp,ss);
|
|
|
|
|
offset = SE->_offset;
|
|
|
|
|
local = SE->_is_local;
|
|
|
|
|
perm = SE->_permute;
|
|
|
|
|
|
|
|
|
|
if (Local && SE->_is_local ) {
|
|
|
|
|
if ( local ) {
|
|
|
|
|
LOAD_CHIMU;
|
|
|
|
|
XP_PROJ;
|
|
|
|
|
if ( SE->_permute ) {
|
|
|
|
|
if ( perm) {
|
|
|
|
|
PERMUTE_DIR(3); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if ( Nonlocal && (!SE->_is_local) ) {
|
|
|
|
|
} else {
|
|
|
|
|
LOAD_CHI;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if ( (Local && SE->_is_local) || ( Nonlocal && (!SE->_is_local)) ) {
|
|
|
|
|
{
|
|
|
|
|
MULT_2SPIN(Xp);
|
|
|
|
|
XP_RECON_ACCUM;
|
|
|
|
|
num++;
|
|
|
|
|
}
|
|
|
|
|
XP_RECON;
|
|
|
|
|
|
|
|
|
|
// Yp
|
|
|
|
|
SE=st.GetEntry(ptype,Yp,ss);
|
|
|
|
|
offset = SE->_offset;
|
|
|
|
|
local = SE->_is_local;
|
|
|
|
|
perm = SE->_permute;
|
|
|
|
|
|
|
|
|
|
if (Local && SE->_is_local ) {
|
|
|
|
|
if ( local ) {
|
|
|
|
|
LOAD_CHIMU;
|
|
|
|
|
YP_PROJ;
|
|
|
|
|
if ( SE->_permute ) {
|
|
|
|
|
if ( perm) {
|
|
|
|
|
PERMUTE_DIR(2); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if ( Nonlocal && (!SE->_is_local) ) {
|
|
|
|
|
} else {
|
|
|
|
|
LOAD_CHI;
|
|
|
|
|
}
|
|
|
|
|
if ( (Local && SE->_is_local) || ( Nonlocal && (!SE->_is_local)) ) {
|
|
|
|
|
{
|
|
|
|
|
MULT_2SPIN(Yp);
|
|
|
|
|
YP_RECON_ACCUM;
|
|
|
|
|
num++;
|
|
|
|
|
}
|
|
|
|
|
YP_RECON_ACCUM;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// Zp
|
|
|
|
|
SE=st.GetEntry(ptype,Zp,ss);
|
|
|
|
|
offset = SE->_offset;
|
|
|
|
|
local = SE->_is_local;
|
|
|
|
|
perm = SE->_permute;
|
|
|
|
|
|
|
|
|
|
if (Local && SE->_is_local ) {
|
|
|
|
|
if ( local ) {
|
|
|
|
|
LOAD_CHIMU;
|
|
|
|
|
ZP_PROJ;
|
|
|
|
|
if ( SE->_permute ) {
|
|
|
|
|
if ( perm) {
|
|
|
|
|
PERMUTE_DIR(1); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if ( Nonlocal && (!SE->_is_local) ) {
|
|
|
|
|
} else {
|
|
|
|
|
LOAD_CHI;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if ( (Local && SE->_is_local) || ( Nonlocal && (!SE->_is_local)) ) {
|
|
|
|
|
{
|
|
|
|
|
MULT_2SPIN(Zp);
|
|
|
|
|
ZP_RECON_ACCUM;
|
|
|
|
|
num++;
|
|
|
|
|
}
|
|
|
|
|
ZP_RECON_ACCUM;
|
|
|
|
|
|
|
|
|
|
// Tp
|
|
|
|
|
SE=st.GetEntry(ptype,Tp,ss);
|
|
|
|
|
offset = SE->_offset;
|
|
|
|
|
local = SE->_is_local;
|
|
|
|
|
perm = SE->_permute;
|
|
|
|
|
|
|
|
|
|
if (Local && SE->_is_local ) {
|
|
|
|
|
if ( local ) {
|
|
|
|
|
LOAD_CHIMU;
|
|
|
|
|
TP_PROJ;
|
|
|
|
|
if ( SE->_permute ) {
|
|
|
|
|
if ( perm) {
|
|
|
|
|
PERMUTE_DIR(0); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if ( Nonlocal && (!SE->_is_local) ) {
|
|
|
|
|
} else {
|
|
|
|
|
LOAD_CHI;
|
|
|
|
|
}
|
|
|
|
|
if ( (Local && SE->_is_local) || ( Nonlocal && (!SE->_is_local)) ) {
|
|
|
|
|
{
|
|
|
|
|
MULT_2SPIN(Tp);
|
|
|
|
|
TP_RECON_ACCUM;
|
|
|
|
|
num++;
|
|
|
|
|
}
|
|
|
|
|
TP_RECON_ACCUM;
|
|
|
|
|
|
|
|
|
|
// Xm
|
|
|
|
|
SE=st.GetEntry(ptype,Xm,ss);
|
|
|
|
|
offset = SE->_offset;
|
|
|
|
|
local = SE->_is_local;
|
|
|
|
|
perm = SE->_permute;
|
|
|
|
|
|
|
|
|
|
if (Local && SE->_is_local ) {
|
|
|
|
|
if ( local ) {
|
|
|
|
|
LOAD_CHIMU;
|
|
|
|
|
XM_PROJ;
|
|
|
|
|
if ( SE->_permute ) {
|
|
|
|
|
if ( perm) {
|
|
|
|
|
PERMUTE_DIR(3); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if ( Nonlocal && (!SE->_is_local) ) {
|
|
|
|
|
} else {
|
|
|
|
|
LOAD_CHI;
|
|
|
|
|
}
|
|
|
|
|
if ( (Local && SE->_is_local) || ( Nonlocal && (!SE->_is_local)) ) {
|
|
|
|
|
{
|
|
|
|
|
MULT_2SPIN(Xm);
|
|
|
|
|
XM_RECON_ACCUM;
|
|
|
|
|
num++;
|
|
|
|
|
}
|
|
|
|
|
XM_RECON_ACCUM;
|
|
|
|
|
|
|
|
|
|
// Ym
|
|
|
|
|
SE=st.GetEntry(ptype,Ym,ss);
|
|
|
|
|
offset = SE->_offset;
|
|
|
|
|
local = SE->_is_local;
|
|
|
|
|
perm = SE->_permute;
|
|
|
|
|
|
|
|
|
|
if (Local && SE->_is_local ) {
|
|
|
|
|
if ( local ) {
|
|
|
|
|
LOAD_CHIMU;
|
|
|
|
|
YM_PROJ;
|
|
|
|
|
if ( SE->_permute ) {
|
|
|
|
|
if ( perm) {
|
|
|
|
|
PERMUTE_DIR(2); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if ( Nonlocal && (!SE->_is_local) ) {
|
|
|
|
|
} else {
|
|
|
|
|
LOAD_CHI;
|
|
|
|
|
}
|
|
|
|
|
if ( (Local && SE->_is_local) || ( Nonlocal && (!SE->_is_local)) ) {
|
|
|
|
|
{
|
|
|
|
|
MULT_2SPIN(Ym);
|
|
|
|
|
YM_RECON_ACCUM;
|
|
|
|
|
num++;
|
|
|
|
|
}
|
|
|
|
|
YM_RECON_ACCUM;
|
|
|
|
|
|
|
|
|
|
// Zm
|
|
|
|
|
SE=st.GetEntry(ptype,Zm,ss);
|
|
|
|
|
offset = SE->_offset;
|
|
|
|
|
local = SE->_is_local;
|
|
|
|
|
perm = SE->_permute;
|
|
|
|
|
|
|
|
|
|
if (Local && SE->_is_local ) {
|
|
|
|
|
if ( local ) {
|
|
|
|
|
LOAD_CHIMU;
|
|
|
|
|
ZM_PROJ;
|
|
|
|
|
if ( SE->_permute ) {
|
|
|
|
|
if ( perm) {
|
|
|
|
|
PERMUTE_DIR(1); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if ( Nonlocal && (!SE->_is_local) ) {
|
|
|
|
|
} else {
|
|
|
|
|
LOAD_CHI;
|
|
|
|
|
}
|
|
|
|
|
if ( (Local && SE->_is_local) || ( Nonlocal && (!SE->_is_local)) ) {
|
|
|
|
|
{
|
|
|
|
|
MULT_2SPIN(Zm);
|
|
|
|
|
ZM_RECON_ACCUM;
|
|
|
|
|
num++;
|
|
|
|
|
}
|
|
|
|
|
ZM_RECON_ACCUM;
|
|
|
|
|
|
|
|
|
|
// Tm
|
|
|
|
|
SE=st.GetEntry(ptype,Tm,ss);
|
|
|
|
|
offset = SE->_offset;
|
|
|
|
|
local = SE->_is_local;
|
|
|
|
|
perm = SE->_permute;
|
|
|
|
|
|
|
|
|
|
if (Local && SE->_is_local ) {
|
|
|
|
|
if ( local ) {
|
|
|
|
|
LOAD_CHIMU;
|
|
|
|
|
TM_PROJ;
|
|
|
|
|
if ( SE->_permute ) {
|
|
|
|
|
if ( perm) {
|
|
|
|
|
PERMUTE_DIR(0); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if ( Nonlocal && (!SE->_is_local) ) {
|
|
|
|
|
} else {
|
|
|
|
|
LOAD_CHI;
|
|
|
|
|
}
|
|
|
|
|
if ( (Local && SE->_is_local) || ( Nonlocal && (!SE->_is_local)) ) {
|
|
|
|
|
{
|
|
|
|
|
MULT_2SPIN(Tm);
|
|
|
|
|
TM_RECON_ACCUM;
|
|
|
|
|
num++;
|
|
|
|
|
}
|
|
|
|
|
TM_RECON_ACCUM;
|
|
|
|
|
|
|
|
|
|
SiteSpinor & ref (out._odata[ss]);
|
|
|
|
|
if ( Local ) {
|
|
|
|
|
{
|
|
|
|
|
SiteSpinor & ref (out._odata[ss]);
|
|
|
|
|
vstream(ref()(0)(0),result_00*(-0.5));
|
|
|
|
|
vstream(ref()(0)(1),result_01*(-0.5));
|
|
|
|
|
vstream(ref()(0)(2),result_02*(-0.5));
|
|
|
|
@ -556,295 +550,9 @@ int WilsonKernels<Impl >::DiracOptHandDhopSiteDag(StencilImpl &st,DoubledGaugeFi
|
|
|
|
|
vstream(ref()(3)(0),result_30*(-0.5));
|
|
|
|
|
vstream(ref()(3)(1),result_31*(-0.5));
|
|
|
|
|
vstream(ref()(3)(2),result_32*(-0.5));
|
|
|
|
|
return 1;
|
|
|
|
|
} else if ( num ) {
|
|
|
|
|
vstream(ref()(0)(0),ref()(0)(0)+result_00*(-0.5));
|
|
|
|
|
vstream(ref()(0)(1),ref()(0)(1)+result_01*(-0.5));
|
|
|
|
|
vstream(ref()(0)(2),ref()(0)(2)+result_02*(-0.5));
|
|
|
|
|
vstream(ref()(1)(0),ref()(1)(0)+result_10*(-0.5));
|
|
|
|
|
vstream(ref()(1)(1),ref()(1)(1)+result_11*(-0.5));
|
|
|
|
|
vstream(ref()(1)(2),ref()(1)(2)+result_12*(-0.5));
|
|
|
|
|
vstream(ref()(2)(0),ref()(2)(0)+result_20*(-0.5));
|
|
|
|
|
vstream(ref()(2)(1),ref()(2)(1)+result_21*(-0.5));
|
|
|
|
|
vstream(ref()(2)(2),ref()(2)(2)+result_22*(-0.5));
|
|
|
|
|
vstream(ref()(3)(0),ref()(3)(0)+result_30*(-0.5));
|
|
|
|
|
vstream(ref()(3)(1),ref()(3)(1)+result_31*(-0.5));
|
|
|
|
|
vstream(ref()(3)(2),ref()(3)(2)+result_32*(-0.5));
|
|
|
|
|
return 1;
|
|
|
|
|
}
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
template<class Impl>
|
|
|
|
|
int WilsonKernels<Impl >::DiracOptHandDhopSite(StencilImpl &st,DoubledGaugeField &U,
|
|
|
|
|
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
|
|
|
|
int ss,int sU,const FermionField &in, FermionField &out, bool Local, bool Nonlocal)
|
|
|
|
|
{
|
|
|
|
|
// std::cout << "Hand op Dhop "<<std::endl;
|
|
|
|
|
typedef typename Simd::scalar_type S;
|
|
|
|
|
typedef typename Simd::vector_type V;
|
|
|
|
|
|
|
|
|
|
REGISTER Simd result_00 ; zeroit(result_00); // 12 regs on knc
|
|
|
|
|
REGISTER Simd result_01 ; zeroit(result_01); // 12 regs on knc
|
|
|
|
|
REGISTER Simd result_02 ; zeroit(result_02); // 12 regs on knc
|
|
|
|
|
|
|
|
|
|
REGISTER Simd result_10 ; zeroit(result_10); // 12 regs on knc
|
|
|
|
|
REGISTER Simd result_11 ; zeroit(result_11); // 12 regs on knc
|
|
|
|
|
REGISTER Simd result_12 ; zeroit(result_12); // 12 regs on knc
|
|
|
|
|
|
|
|
|
|
REGISTER Simd result_20 ; zeroit(result_20); // 12 regs on knc
|
|
|
|
|
REGISTER Simd result_21 ; zeroit(result_21); // 12 regs on knc
|
|
|
|
|
REGISTER Simd result_22 ; zeroit(result_22); // 12 regs on knc
|
|
|
|
|
|
|
|
|
|
REGISTER Simd result_30 ; zeroit(result_30); // 12 regs on knc
|
|
|
|
|
REGISTER Simd result_31 ; zeroit(result_31); // 12 regs on knc
|
|
|
|
|
REGISTER Simd result_32 ; zeroit(result_32); // 12 regs on knc
|
|
|
|
|
|
|
|
|
|
REGISTER Simd Chi_00; // two spinor; 6 regs
|
|
|
|
|
REGISTER Simd Chi_01;
|
|
|
|
|
REGISTER Simd Chi_02;
|
|
|
|
|
|
|
|
|
|
REGISTER Simd Chi_10;
|
|
|
|
|
REGISTER Simd Chi_11;
|
|
|
|
|
REGISTER Simd Chi_12; // 14 left
|
|
|
|
|
|
|
|
|
|
REGISTER Simd UChi_00; // two spinor; 6 regs
|
|
|
|
|
REGISTER Simd UChi_01;
|
|
|
|
|
REGISTER Simd UChi_02;
|
|
|
|
|
|
|
|
|
|
REGISTER Simd UChi_10;
|
|
|
|
|
REGISTER Simd UChi_11;
|
|
|
|
|
REGISTER Simd UChi_12; // 8 left
|
|
|
|
|
|
|
|
|
|
REGISTER Simd U_00; // two rows of U matrix
|
|
|
|
|
REGISTER Simd U_10;
|
|
|
|
|
REGISTER Simd U_20;
|
|
|
|
|
REGISTER Simd U_01;
|
|
|
|
|
REGISTER Simd U_11;
|
|
|
|
|
REGISTER Simd U_21; // 2 reg left.
|
|
|
|
|
|
|
|
|
|
#define Chimu_00 Chi_00
|
|
|
|
|
#define Chimu_01 Chi_01
|
|
|
|
|
#define Chimu_02 Chi_02
|
|
|
|
|
#define Chimu_10 Chi_10
|
|
|
|
|
#define Chimu_11 Chi_11
|
|
|
|
|
#define Chimu_12 Chi_12
|
|
|
|
|
#define Chimu_20 UChi_00
|
|
|
|
|
#define Chimu_21 UChi_01
|
|
|
|
|
#define Chimu_22 UChi_02
|
|
|
|
|
#define Chimu_30 UChi_10
|
|
|
|
|
#define Chimu_31 UChi_11
|
|
|
|
|
#define Chimu_32 UChi_12
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
StencilEntry *SE;
|
|
|
|
|
int offset, ptype;
|
|
|
|
|
int num = 0;
|
|
|
|
|
|
|
|
|
|
// Xp
|
|
|
|
|
SE=st.GetEntry(ptype,Xp,ss);
|
|
|
|
|
offset = SE->_offset;
|
|
|
|
|
|
|
|
|
|
if (Local && SE->_is_local ) {
|
|
|
|
|
LOAD_CHIMU;
|
|
|
|
|
XM_PROJ;
|
|
|
|
|
if ( SE->_permute ) {
|
|
|
|
|
PERMUTE_DIR(3); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if ( Nonlocal && (!SE->_is_local) ) {
|
|
|
|
|
LOAD_CHI;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if ( (Local && SE->_is_local) || ( Nonlocal && (!SE->_is_local)) ) {
|
|
|
|
|
MULT_2SPIN(Xp);
|
|
|
|
|
XM_RECON_ACCUM;
|
|
|
|
|
num++;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// Yp
|
|
|
|
|
SE=st.GetEntry(ptype,Yp,ss);
|
|
|
|
|
offset = SE->_offset;
|
|
|
|
|
|
|
|
|
|
if (Local && SE->_is_local ) {
|
|
|
|
|
LOAD_CHIMU;
|
|
|
|
|
YM_PROJ;
|
|
|
|
|
if ( SE->_permute ) {
|
|
|
|
|
PERMUTE_DIR(2); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if ( Nonlocal && (!SE->_is_local) ) {
|
|
|
|
|
LOAD_CHI;
|
|
|
|
|
}
|
|
|
|
|
if ( (Local && SE->_is_local) || ( Nonlocal && (!SE->_is_local)) ) {
|
|
|
|
|
MULT_2SPIN(Yp);
|
|
|
|
|
YM_RECON_ACCUM;
|
|
|
|
|
num++;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// Zp
|
|
|
|
|
SE=st.GetEntry(ptype,Zp,ss);
|
|
|
|
|
offset = SE->_offset;
|
|
|
|
|
|
|
|
|
|
if (Local && SE->_is_local ) {
|
|
|
|
|
LOAD_CHIMU;
|
|
|
|
|
ZM_PROJ;
|
|
|
|
|
if ( SE->_permute ) {
|
|
|
|
|
PERMUTE_DIR(1); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if ( Nonlocal && (!SE->_is_local) ) {
|
|
|
|
|
LOAD_CHI;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if ( (Local && SE->_is_local) || ( Nonlocal && (!SE->_is_local)) ) {
|
|
|
|
|
MULT_2SPIN(Zp);
|
|
|
|
|
ZM_RECON_ACCUM;
|
|
|
|
|
num++;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Tp
|
|
|
|
|
SE=st.GetEntry(ptype,Tp,ss);
|
|
|
|
|
offset = SE->_offset;
|
|
|
|
|
|
|
|
|
|
if (Local && SE->_is_local ) {
|
|
|
|
|
LOAD_CHIMU;
|
|
|
|
|
TM_PROJ;
|
|
|
|
|
if ( SE->_permute ) {
|
|
|
|
|
PERMUTE_DIR(0); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if ( Nonlocal && (!SE->_is_local) ) {
|
|
|
|
|
LOAD_CHI;
|
|
|
|
|
}
|
|
|
|
|
if ( (Local && SE->_is_local) || ( Nonlocal && (!SE->_is_local)) ) {
|
|
|
|
|
MULT_2SPIN(Tp);
|
|
|
|
|
TM_RECON_ACCUM;
|
|
|
|
|
num++;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Xm
|
|
|
|
|
SE=st.GetEntry(ptype,Xm,ss);
|
|
|
|
|
offset = SE->_offset;
|
|
|
|
|
|
|
|
|
|
if (Local && SE->_is_local ) {
|
|
|
|
|
LOAD_CHIMU;
|
|
|
|
|
XP_PROJ;
|
|
|
|
|
if ( SE->_permute ) {
|
|
|
|
|
PERMUTE_DIR(3); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if ( Nonlocal && (!SE->_is_local) ) {
|
|
|
|
|
LOAD_CHI;
|
|
|
|
|
}
|
|
|
|
|
if ( (Local && SE->_is_local) || ( Nonlocal && (!SE->_is_local)) ) {
|
|
|
|
|
MULT_2SPIN(Xm);
|
|
|
|
|
XP_RECON_ACCUM;
|
|
|
|
|
num++;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Ym
|
|
|
|
|
SE=st.GetEntry(ptype,Ym,ss);
|
|
|
|
|
offset = SE->_offset;
|
|
|
|
|
|
|
|
|
|
if (Local && SE->_is_local ) {
|
|
|
|
|
LOAD_CHIMU;
|
|
|
|
|
YP_PROJ;
|
|
|
|
|
if ( SE->_permute ) {
|
|
|
|
|
PERMUTE_DIR(2); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if ( Nonlocal && (!SE->_is_local) ) {
|
|
|
|
|
LOAD_CHI;
|
|
|
|
|
}
|
|
|
|
|
if ( (Local && SE->_is_local) || ( Nonlocal && (!SE->_is_local)) ) {
|
|
|
|
|
MULT_2SPIN(Ym);
|
|
|
|
|
YP_RECON_ACCUM;
|
|
|
|
|
num++;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Zm
|
|
|
|
|
SE=st.GetEntry(ptype,Zm,ss);
|
|
|
|
|
offset = SE->_offset;
|
|
|
|
|
|
|
|
|
|
if (Local && SE->_is_local ) {
|
|
|
|
|
LOAD_CHIMU;
|
|
|
|
|
ZP_PROJ;
|
|
|
|
|
if ( SE->_permute ) {
|
|
|
|
|
PERMUTE_DIR(1); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if ( Nonlocal && (!SE->_is_local) ) {
|
|
|
|
|
LOAD_CHI;
|
|
|
|
|
}
|
|
|
|
|
if ( (Local && SE->_is_local) || ( Nonlocal && (!SE->_is_local)) ) {
|
|
|
|
|
MULT_2SPIN(Zm);
|
|
|
|
|
ZP_RECON_ACCUM;
|
|
|
|
|
num++;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Tm
|
|
|
|
|
SE=st.GetEntry(ptype,Tm,ss);
|
|
|
|
|
offset = SE->_offset;
|
|
|
|
|
|
|
|
|
|
if (Local && SE->_is_local ) {
|
|
|
|
|
LOAD_CHIMU;
|
|
|
|
|
TP_PROJ;
|
|
|
|
|
if ( SE->_permute ) {
|
|
|
|
|
PERMUTE_DIR(0); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc...
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if ( Nonlocal && (!SE->_is_local) ) {
|
|
|
|
|
LOAD_CHI;
|
|
|
|
|
}
|
|
|
|
|
if ( (Local && SE->_is_local) || ( Nonlocal && (!SE->_is_local)) ) {
|
|
|
|
|
MULT_2SPIN(Tm);
|
|
|
|
|
TP_RECON_ACCUM;
|
|
|
|
|
num++;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
SiteSpinor & ref (out._odata[ss]);
|
|
|
|
|
if ( Local ) {
|
|
|
|
|
vstream(ref()(0)(0),result_00*(-0.5));
|
|
|
|
|
vstream(ref()(0)(1),result_01*(-0.5));
|
|
|
|
|
vstream(ref()(0)(2),result_02*(-0.5));
|
|
|
|
|
vstream(ref()(1)(0),result_10*(-0.5));
|
|
|
|
|
vstream(ref()(1)(1),result_11*(-0.5));
|
|
|
|
|
vstream(ref()(1)(2),result_12*(-0.5));
|
|
|
|
|
vstream(ref()(2)(0),result_20*(-0.5));
|
|
|
|
|
vstream(ref()(2)(1),result_21*(-0.5));
|
|
|
|
|
vstream(ref()(2)(2),result_22*(-0.5));
|
|
|
|
|
vstream(ref()(3)(0),result_30*(-0.5));
|
|
|
|
|
vstream(ref()(3)(1),result_31*(-0.5));
|
|
|
|
|
vstream(ref()(3)(2),result_32*(-0.5));
|
|
|
|
|
return 1;
|
|
|
|
|
} else if ( num ) {
|
|
|
|
|
vstream(ref()(0)(0),ref()(0)(0)+result_00*(-0.5));
|
|
|
|
|
vstream(ref()(0)(1),ref()(0)(1)+result_01*(-0.5));
|
|
|
|
|
vstream(ref()(0)(2),ref()(0)(2)+result_02*(-0.5));
|
|
|
|
|
vstream(ref()(1)(0),ref()(1)(0)+result_10*(-0.5));
|
|
|
|
|
vstream(ref()(1)(1),ref()(1)(1)+result_11*(-0.5));
|
|
|
|
|
vstream(ref()(1)(2),ref()(1)(2)+result_12*(-0.5));
|
|
|
|
|
vstream(ref()(2)(0),ref()(2)(0)+result_20*(-0.5));
|
|
|
|
|
vstream(ref()(2)(1),ref()(2)(1)+result_21*(-0.5));
|
|
|
|
|
vstream(ref()(2)(2),ref()(2)(2)+result_22*(-0.5));
|
|
|
|
|
vstream(ref()(3)(0),ref()(3)(0)+result_30*(-0.5));
|
|
|
|
|
vstream(ref()(3)(1),ref()(3)(1)+result_31*(-0.5));
|
|
|
|
|
vstream(ref()(3)(2),ref()(3)(2)+result_32*(-0.5));
|
|
|
|
|
return 1;
|
|
|
|
|
}
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
template<class Impl>
|
|
|
|
|
void WilsonKernels<Impl >::DiracOptHandDhopSite(StencilImpl &st,DoubledGaugeField &U,
|
|
|
|
|
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
|
|
|
@ -1087,21 +795,16 @@ void WilsonKernels<Impl >::DiracOptHandDhopSite(StencilImpl &st,DoubledGaugeFiel
|
|
|
|
|
vstream(ref()(3)(2),result_32*(-0.5));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
////////////////////////////////////////////////
|
|
|
|
|
// Specialise Gparity to simple implementation
|
|
|
|
|
////////////////////////////////////////////////
|
|
|
|
|
//template<class Impl>
|
|
|
|
|
//int WilsonKernels<Impl >::DiracOptHandDhopSiteDag(StencilImpl &st,DoubledGaugeField &U,
|
|
|
|
|
// std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
|
|
|
|
// int ss,int sU,const FermionField &in, FermionField &out, bool Local, bool Nonlocal)
|
|
|
|
|
#if 0
|
|
|
|
|
template
|
|
|
|
|
template<>
|
|
|
|
|
void WilsonKernels<GparityWilsonImplF>::DiracOptHandDhopSite(StencilImpl &st,DoubledGaugeField &U,
|
|
|
|
|
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
|
|
|
|
int sF,int sU,const FermionField &in, FermionField &out, bool Local, bool Nonlocal)
|
|
|
|
|
{
|
|
|
|
|
DiracOptDhopSite(st,U,buf,sF,sU,in,out,Local,Nonlocal); // will template override for Wilson Nc=3
|
|
|
|
|
DiracOptDhopSite(st,U,buf,sF,sU,in,out); // will template override for Wilson Nc=3
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template<>
|
|
|
|
@ -1109,7 +812,7 @@ void WilsonKernels<GparityWilsonImplF>::DiracOptHandDhopSiteDag(StencilImpl &st,
|
|
|
|
|
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
|
|
|
|
int sF,int sU,const FermionField &in, FermionField &out, bool Local, bool Nonlocal)
|
|
|
|
|
{
|
|
|
|
|
DiracOptDhopSiteDag(st,U,buf,sF,sU,in,out,Local,Nonlocal); // will template override for Wilson Nc=3
|
|
|
|
|
DiracOptDhopSiteDag(st,U,buf,sF,sU,in,out); // will template override for Wilson Nc=3
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template<>
|
|
|
|
@ -1117,7 +820,7 @@ void WilsonKernels<GparityWilsonImplD>::DiracOptHandDhopSite(StencilImpl &st,Dou
|
|
|
|
|
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
|
|
|
|
int sF,int sU,const FermionField &in, FermionField &out, bool Local, bool Nonlocal)
|
|
|
|
|
{
|
|
|
|
|
DiracOptDhopSite(st,U,buf,sF,sU,in,out,Local,Nonlocal); // will template override for Wilson Nc=3
|
|
|
|
|
DiracOptDhopSite(st,U,buf,sF,sU,in,out); // will template override for Wilson Nc=3
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template<>
|
|
|
|
@ -1125,9 +828,8 @@ void WilsonKernels<GparityWilsonImplD>::DiracOptHandDhopSiteDag(StencilImpl &st,
|
|
|
|
|
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
|
|
|
|
int sF,int sU,const FermionField &in, FermionField &out, bool Local, bool Nonlocal)
|
|
|
|
|
{
|
|
|
|
|
DiracOptDhopSiteDag(st,U,buf,sF,sU,in,out,Local,Nonlocal); // will template override for Wilson Nc=3
|
|
|
|
|
DiracOptDhopSiteDag(st,U,buf,sF,sU,in,out); // will template override for Wilson Nc=3
|
|
|
|
|
}
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|