1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-11-10 07:55:35 +00:00

Cheaper implementation

This commit is contained in:
Peter Boyle 2016-02-15 16:02:36 -06:00
parent 42a9ac71d2
commit a0fc47c6f9
4 changed files with 71 additions and 67 deletions

View File

@ -64,7 +64,9 @@ namespace QCD {
template<class Impl>
void WilsonFermion<Impl>::ImportGauge(const GaugeField &_Umu)
{
Impl::DoubleStore(GaugeGrid(),Umu,_Umu);
GaugeField HUmu(_Umu._grid);
HUmu = _Umu*(-0.5);
Impl::DoubleStore(GaugeGrid(),Umu,HUmu);
pickCheckerboard(Even,UmuEven,Umu);
pickCheckerboard(Odd ,UmuOdd,Umu);
}

View File

@ -108,7 +108,9 @@ WilsonFermion5D<Impl>::WilsonFermion5D(GaugeField &_Umu,
template<class Impl>
void WilsonFermion5D<Impl>::ImportGauge(const GaugeField &_Umu)
{
Impl::DoubleStore(GaugeGrid(),Umu,_Umu);
GaugeField HUmu(_Umu._grid);
HUmu = _Umu*(-0.5);
Impl::DoubleStore(GaugeGrid(),Umu,HUmu);
pickCheckerboard(Even,UmuEven,Umu);
pickCheckerboard(Odd ,UmuOdd,Umu);
}

View File

@ -244,9 +244,9 @@ void WilsonKernels<Impl>::DiracOptDhopSiteDag(StencilImpl &st,DoubledGaugeField
}
if ( local ) {
vstream(out._odata[sF],result*(-0.5));
vstream(out._odata[sF],result);
} else if ( num ) {
vstream(out._odata[sF],out._odata[sF]+result*(-0.5));
vstream(out._odata[sF],out._odata[sF]+result);
}
};
@ -461,9 +461,9 @@ void WilsonKernels<Impl>::DiracOptDhopSite(StencilImpl &st,DoubledGaugeField &U,
}
if ( local ) {
vstream(out._odata[sF],result*(-0.5));
vstream(out._odata[sF],result);
} else if ( num ) {
vstream(out._odata[sF],out._odata[sF]+result*(-0.5));
vstream(out._odata[sF],out._odata[sF]+result);
}
};
@ -593,7 +593,7 @@ void WilsonKernels<Impl>::DiracOptDhopDir(StencilImpl &st,DoubledGaugeField &U,
spReconTm(result,Uchi);
}
vstream(out._odata[sF],result*(-0.5));
vstream(out._odata[sF],result);
}
#if ( ! defined(AVX512) ) && ( ! defined(IMCI) )

View File

@ -544,32 +544,32 @@ int WilsonKernels<Impl >::DiracOptHandDhopSiteDag(StencilImpl &st,DoubledGaugeFi
SiteSpinor & ref (out._odata[ss]);
if ( Local ) {
vstream(ref()(0)(0),result_00*(-0.5));
vstream(ref()(0)(1),result_01*(-0.5));
vstream(ref()(0)(2),result_02*(-0.5));
vstream(ref()(1)(0),result_10*(-0.5));
vstream(ref()(1)(1),result_11*(-0.5));
vstream(ref()(1)(2),result_12*(-0.5));
vstream(ref()(2)(0),result_20*(-0.5));
vstream(ref()(2)(1),result_21*(-0.5));
vstream(ref()(2)(2),result_22*(-0.5));
vstream(ref()(3)(0),result_30*(-0.5));
vstream(ref()(3)(1),result_31*(-0.5));
vstream(ref()(3)(2),result_32*(-0.5));
vstream(ref()(0)(0),result_00);
vstream(ref()(0)(1),result_01);
vstream(ref()(0)(2),result_02);
vstream(ref()(1)(0),result_10);
vstream(ref()(1)(1),result_11);
vstream(ref()(1)(2),result_12);
vstream(ref()(2)(0),result_20);
vstream(ref()(2)(1),result_21);
vstream(ref()(2)(2),result_22);
vstream(ref()(3)(0),result_30);
vstream(ref()(3)(1),result_31);
vstream(ref()(3)(2),result_32);
return 1;
} else if ( num ) {
vstream(ref()(0)(0),ref()(0)(0)+result_00*(-0.5));
vstream(ref()(0)(1),ref()(0)(1)+result_01*(-0.5));
vstream(ref()(0)(2),ref()(0)(2)+result_02*(-0.5));
vstream(ref()(1)(0),ref()(1)(0)+result_10*(-0.5));
vstream(ref()(1)(1),ref()(1)(1)+result_11*(-0.5));
vstream(ref()(1)(2),ref()(1)(2)+result_12*(-0.5));
vstream(ref()(2)(0),ref()(2)(0)+result_20*(-0.5));
vstream(ref()(2)(1),ref()(2)(1)+result_21*(-0.5));
vstream(ref()(2)(2),ref()(2)(2)+result_22*(-0.5));
vstream(ref()(3)(0),ref()(3)(0)+result_30*(-0.5));
vstream(ref()(3)(1),ref()(3)(1)+result_31*(-0.5));
vstream(ref()(3)(2),ref()(3)(2)+result_32*(-0.5));
vstream(ref()(0)(0),ref()(0)(0)+result_00);
vstream(ref()(0)(1),ref()(0)(1)+result_01);
vstream(ref()(0)(2),ref()(0)(2)+result_02);
vstream(ref()(1)(0),ref()(1)(0)+result_10);
vstream(ref()(1)(1),ref()(1)(1)+result_11);
vstream(ref()(1)(2),ref()(1)(2)+result_12);
vstream(ref()(2)(0),ref()(2)(0)+result_20);
vstream(ref()(2)(1),ref()(2)(1)+result_21);
vstream(ref()(2)(2),ref()(2)(2)+result_22);
vstream(ref()(3)(0),ref()(3)(0)+result_30);
vstream(ref()(3)(1),ref()(3)(1)+result_31);
vstream(ref()(3)(2),ref()(3)(2)+result_32);
return 1;
}
return 0;
@ -813,32 +813,32 @@ int WilsonKernels<Impl >::DiracOptHandDhopSite(StencilImpl &st,DoubledGaugeField
SiteSpinor & ref (out._odata[ss]);
if ( Local ) {
vstream(ref()(0)(0),result_00*(-0.5));
vstream(ref()(0)(1),result_01*(-0.5));
vstream(ref()(0)(2),result_02*(-0.5));
vstream(ref()(1)(0),result_10*(-0.5));
vstream(ref()(1)(1),result_11*(-0.5));
vstream(ref()(1)(2),result_12*(-0.5));
vstream(ref()(2)(0),result_20*(-0.5));
vstream(ref()(2)(1),result_21*(-0.5));
vstream(ref()(2)(2),result_22*(-0.5));
vstream(ref()(3)(0),result_30*(-0.5));
vstream(ref()(3)(1),result_31*(-0.5));
vstream(ref()(3)(2),result_32*(-0.5));
vstream(ref()(0)(0),result_00);
vstream(ref()(0)(1),result_01);
vstream(ref()(0)(2),result_02);
vstream(ref()(1)(0),result_10);
vstream(ref()(1)(1),result_11);
vstream(ref()(1)(2),result_12);
vstream(ref()(2)(0),result_20);
vstream(ref()(2)(1),result_21);
vstream(ref()(2)(2),result_22);
vstream(ref()(3)(0),result_30);
vstream(ref()(3)(1),result_31);
vstream(ref()(3)(2),result_32);
return 1;
} else if ( num ) {
vstream(ref()(0)(0),ref()(0)(0)+result_00*(-0.5));
vstream(ref()(0)(1),ref()(0)(1)+result_01*(-0.5));
vstream(ref()(0)(2),ref()(0)(2)+result_02*(-0.5));
vstream(ref()(1)(0),ref()(1)(0)+result_10*(-0.5));
vstream(ref()(1)(1),ref()(1)(1)+result_11*(-0.5));
vstream(ref()(1)(2),ref()(1)(2)+result_12*(-0.5));
vstream(ref()(2)(0),ref()(2)(0)+result_20*(-0.5));
vstream(ref()(2)(1),ref()(2)(1)+result_21*(-0.5));
vstream(ref()(2)(2),ref()(2)(2)+result_22*(-0.5));
vstream(ref()(3)(0),ref()(3)(0)+result_30*(-0.5));
vstream(ref()(3)(1),ref()(3)(1)+result_31*(-0.5));
vstream(ref()(3)(2),ref()(3)(2)+result_32*(-0.5));
vstream(ref()(0)(0),ref()(0)(0)+result_00);
vstream(ref()(0)(1),ref()(0)(1)+result_01);
vstream(ref()(0)(2),ref()(0)(2)+result_02);
vstream(ref()(1)(0),ref()(1)(0)+result_10);
vstream(ref()(1)(1),ref()(1)(1)+result_11);
vstream(ref()(1)(2),ref()(1)(2)+result_12);
vstream(ref()(2)(0),ref()(2)(0)+result_20);
vstream(ref()(2)(1),ref()(2)(1)+result_21);
vstream(ref()(2)(2),ref()(2)(2)+result_22);
vstream(ref()(3)(0),ref()(3)(0)+result_30);
vstream(ref()(3)(1),ref()(3)(1)+result_31);
vstream(ref()(3)(2),ref()(3)(2)+result_32);
return 1;
}
return 0;
@ -1073,18 +1073,18 @@ void WilsonKernels<Impl >::DiracOptHandDhopSite(StencilImpl &st,DoubledGaugeFiel
{
SiteSpinor & ref (out._odata[ss]);
vstream(ref()(0)(0),result_00*(-0.5));
vstream(ref()(0)(1),result_01*(-0.5));
vstream(ref()(0)(2),result_02*(-0.5));
vstream(ref()(1)(0),result_10*(-0.5));
vstream(ref()(1)(1),result_11*(-0.5));
vstream(ref()(1)(2),result_12*(-0.5));
vstream(ref()(2)(0),result_20*(-0.5));
vstream(ref()(2)(1),result_21*(-0.5));
vstream(ref()(2)(2),result_22*(-0.5));
vstream(ref()(3)(0),result_30*(-0.5));
vstream(ref()(3)(1),result_31*(-0.5));
vstream(ref()(3)(2),result_32*(-0.5));
vstream(ref()(0)(0),result_00);
vstream(ref()(0)(1),result_01);
vstream(ref()(0)(2),result_02);
vstream(ref()(1)(0),result_10);
vstream(ref()(1)(1),result_11);
vstream(ref()(1)(2),result_12);
vstream(ref()(2)(0),result_20);
vstream(ref()(2)(1),result_21);
vstream(ref()(2)(2),result_22);
vstream(ref()(3)(0),result_30);
vstream(ref()(3)(1),result_31);
vstream(ref()(3)(2),result_32);
}
}
*/