From a0fc47c6f9869a1490207e27df4882b693a2315d Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Mon, 15 Feb 2016 16:02:36 -0600 Subject: [PATCH] Cheaper implementation --- lib/qcd/action/fermion/WilsonFermion.cc | 4 +- lib/qcd/action/fermion/WilsonFermion5D.cc | 4 +- lib/qcd/action/fermion/WilsonKernels.cc | 10 +- lib/qcd/action/fermion/WilsonKernelsHand.cc | 120 ++++++++++---------- 4 files changed, 71 insertions(+), 67 deletions(-) diff --git a/lib/qcd/action/fermion/WilsonFermion.cc b/lib/qcd/action/fermion/WilsonFermion.cc index 57d7862b..92cd7f92 100644 --- a/lib/qcd/action/fermion/WilsonFermion.cc +++ b/lib/qcd/action/fermion/WilsonFermion.cc @@ -64,7 +64,9 @@ namespace QCD { template void WilsonFermion::ImportGauge(const GaugeField &_Umu) { - Impl::DoubleStore(GaugeGrid(),Umu,_Umu); + GaugeField HUmu(_Umu._grid); + HUmu = _Umu*(-0.5); + Impl::DoubleStore(GaugeGrid(),Umu,HUmu); pickCheckerboard(Even,UmuEven,Umu); pickCheckerboard(Odd ,UmuOdd,Umu); } diff --git a/lib/qcd/action/fermion/WilsonFermion5D.cc b/lib/qcd/action/fermion/WilsonFermion5D.cc index 57e51d67..d438839b 100644 --- a/lib/qcd/action/fermion/WilsonFermion5D.cc +++ b/lib/qcd/action/fermion/WilsonFermion5D.cc @@ -108,7 +108,9 @@ WilsonFermion5D::WilsonFermion5D(GaugeField &_Umu, template void WilsonFermion5D::ImportGauge(const GaugeField &_Umu) { - Impl::DoubleStore(GaugeGrid(),Umu,_Umu); + GaugeField HUmu(_Umu._grid); + HUmu = _Umu*(-0.5); + Impl::DoubleStore(GaugeGrid(),Umu,HUmu); pickCheckerboard(Even,UmuEven,Umu); pickCheckerboard(Odd ,UmuOdd,Umu); } diff --git a/lib/qcd/action/fermion/WilsonKernels.cc b/lib/qcd/action/fermion/WilsonKernels.cc index c53e2d7c..7410024f 100644 --- a/lib/qcd/action/fermion/WilsonKernels.cc +++ b/lib/qcd/action/fermion/WilsonKernels.cc @@ -244,9 +244,9 @@ void WilsonKernels::DiracOptDhopSiteDag(StencilImpl &st,DoubledGaugeField } if ( local ) { - vstream(out._odata[sF],result*(-0.5)); + vstream(out._odata[sF],result); } else if ( num ) { - vstream(out._odata[sF],out._odata[sF]+result*(-0.5)); + vstream(out._odata[sF],out._odata[sF]+result); } }; @@ -461,9 +461,9 @@ void WilsonKernels::DiracOptDhopSite(StencilImpl &st,DoubledGaugeField &U, } if ( local ) { - vstream(out._odata[sF],result*(-0.5)); + vstream(out._odata[sF],result); } else if ( num ) { - vstream(out._odata[sF],out._odata[sF]+result*(-0.5)); + vstream(out._odata[sF],out._odata[sF]+result); } }; @@ -593,7 +593,7 @@ void WilsonKernels::DiracOptDhopDir(StencilImpl &st,DoubledGaugeField &U, spReconTm(result,Uchi); } - vstream(out._odata[sF],result*(-0.5)); + vstream(out._odata[sF],result); } #if ( ! defined(AVX512) ) && ( ! defined(IMCI) ) diff --git a/lib/qcd/action/fermion/WilsonKernelsHand.cc b/lib/qcd/action/fermion/WilsonKernelsHand.cc index 11289301..35a969c3 100644 --- a/lib/qcd/action/fermion/WilsonKernelsHand.cc +++ b/lib/qcd/action/fermion/WilsonKernelsHand.cc @@ -544,32 +544,32 @@ int WilsonKernels::DiracOptHandDhopSiteDag(StencilImpl &st,DoubledGaugeFi SiteSpinor & ref (out._odata[ss]); if ( Local ) { - vstream(ref()(0)(0),result_00*(-0.5)); - vstream(ref()(0)(1),result_01*(-0.5)); - vstream(ref()(0)(2),result_02*(-0.5)); - vstream(ref()(1)(0),result_10*(-0.5)); - vstream(ref()(1)(1),result_11*(-0.5)); - vstream(ref()(1)(2),result_12*(-0.5)); - vstream(ref()(2)(0),result_20*(-0.5)); - vstream(ref()(2)(1),result_21*(-0.5)); - vstream(ref()(2)(2),result_22*(-0.5)); - vstream(ref()(3)(0),result_30*(-0.5)); - vstream(ref()(3)(1),result_31*(-0.5)); - vstream(ref()(3)(2),result_32*(-0.5)); + vstream(ref()(0)(0),result_00); + vstream(ref()(0)(1),result_01); + vstream(ref()(0)(2),result_02); + vstream(ref()(1)(0),result_10); + vstream(ref()(1)(1),result_11); + vstream(ref()(1)(2),result_12); + vstream(ref()(2)(0),result_20); + vstream(ref()(2)(1),result_21); + vstream(ref()(2)(2),result_22); + vstream(ref()(3)(0),result_30); + vstream(ref()(3)(1),result_31); + vstream(ref()(3)(2),result_32); return 1; } else if ( num ) { - vstream(ref()(0)(0),ref()(0)(0)+result_00*(-0.5)); - vstream(ref()(0)(1),ref()(0)(1)+result_01*(-0.5)); - vstream(ref()(0)(2),ref()(0)(2)+result_02*(-0.5)); - vstream(ref()(1)(0),ref()(1)(0)+result_10*(-0.5)); - vstream(ref()(1)(1),ref()(1)(1)+result_11*(-0.5)); - vstream(ref()(1)(2),ref()(1)(2)+result_12*(-0.5)); - vstream(ref()(2)(0),ref()(2)(0)+result_20*(-0.5)); - vstream(ref()(2)(1),ref()(2)(1)+result_21*(-0.5)); - vstream(ref()(2)(2),ref()(2)(2)+result_22*(-0.5)); - vstream(ref()(3)(0),ref()(3)(0)+result_30*(-0.5)); - vstream(ref()(3)(1),ref()(3)(1)+result_31*(-0.5)); - vstream(ref()(3)(2),ref()(3)(2)+result_32*(-0.5)); + vstream(ref()(0)(0),ref()(0)(0)+result_00); + vstream(ref()(0)(1),ref()(0)(1)+result_01); + vstream(ref()(0)(2),ref()(0)(2)+result_02); + vstream(ref()(1)(0),ref()(1)(0)+result_10); + vstream(ref()(1)(1),ref()(1)(1)+result_11); + vstream(ref()(1)(2),ref()(1)(2)+result_12); + vstream(ref()(2)(0),ref()(2)(0)+result_20); + vstream(ref()(2)(1),ref()(2)(1)+result_21); + vstream(ref()(2)(2),ref()(2)(2)+result_22); + vstream(ref()(3)(0),ref()(3)(0)+result_30); + vstream(ref()(3)(1),ref()(3)(1)+result_31); + vstream(ref()(3)(2),ref()(3)(2)+result_32); return 1; } return 0; @@ -813,32 +813,32 @@ int WilsonKernels::DiracOptHandDhopSite(StencilImpl &st,DoubledGaugeField SiteSpinor & ref (out._odata[ss]); if ( Local ) { - vstream(ref()(0)(0),result_00*(-0.5)); - vstream(ref()(0)(1),result_01*(-0.5)); - vstream(ref()(0)(2),result_02*(-0.5)); - vstream(ref()(1)(0),result_10*(-0.5)); - vstream(ref()(1)(1),result_11*(-0.5)); - vstream(ref()(1)(2),result_12*(-0.5)); - vstream(ref()(2)(0),result_20*(-0.5)); - vstream(ref()(2)(1),result_21*(-0.5)); - vstream(ref()(2)(2),result_22*(-0.5)); - vstream(ref()(3)(0),result_30*(-0.5)); - vstream(ref()(3)(1),result_31*(-0.5)); - vstream(ref()(3)(2),result_32*(-0.5)); + vstream(ref()(0)(0),result_00); + vstream(ref()(0)(1),result_01); + vstream(ref()(0)(2),result_02); + vstream(ref()(1)(0),result_10); + vstream(ref()(1)(1),result_11); + vstream(ref()(1)(2),result_12); + vstream(ref()(2)(0),result_20); + vstream(ref()(2)(1),result_21); + vstream(ref()(2)(2),result_22); + vstream(ref()(3)(0),result_30); + vstream(ref()(3)(1),result_31); + vstream(ref()(3)(2),result_32); return 1; } else if ( num ) { - vstream(ref()(0)(0),ref()(0)(0)+result_00*(-0.5)); - vstream(ref()(0)(1),ref()(0)(1)+result_01*(-0.5)); - vstream(ref()(0)(2),ref()(0)(2)+result_02*(-0.5)); - vstream(ref()(1)(0),ref()(1)(0)+result_10*(-0.5)); - vstream(ref()(1)(1),ref()(1)(1)+result_11*(-0.5)); - vstream(ref()(1)(2),ref()(1)(2)+result_12*(-0.5)); - vstream(ref()(2)(0),ref()(2)(0)+result_20*(-0.5)); - vstream(ref()(2)(1),ref()(2)(1)+result_21*(-0.5)); - vstream(ref()(2)(2),ref()(2)(2)+result_22*(-0.5)); - vstream(ref()(3)(0),ref()(3)(0)+result_30*(-0.5)); - vstream(ref()(3)(1),ref()(3)(1)+result_31*(-0.5)); - vstream(ref()(3)(2),ref()(3)(2)+result_32*(-0.5)); + vstream(ref()(0)(0),ref()(0)(0)+result_00); + vstream(ref()(0)(1),ref()(0)(1)+result_01); + vstream(ref()(0)(2),ref()(0)(2)+result_02); + vstream(ref()(1)(0),ref()(1)(0)+result_10); + vstream(ref()(1)(1),ref()(1)(1)+result_11); + vstream(ref()(1)(2),ref()(1)(2)+result_12); + vstream(ref()(2)(0),ref()(2)(0)+result_20); + vstream(ref()(2)(1),ref()(2)(1)+result_21); + vstream(ref()(2)(2),ref()(2)(2)+result_22); + vstream(ref()(3)(0),ref()(3)(0)+result_30); + vstream(ref()(3)(1),ref()(3)(1)+result_31); + vstream(ref()(3)(2),ref()(3)(2)+result_32); return 1; } return 0; @@ -1073,18 +1073,18 @@ void WilsonKernels::DiracOptHandDhopSite(StencilImpl &st,DoubledGaugeFiel { SiteSpinor & ref (out._odata[ss]); - vstream(ref()(0)(0),result_00*(-0.5)); - vstream(ref()(0)(1),result_01*(-0.5)); - vstream(ref()(0)(2),result_02*(-0.5)); - vstream(ref()(1)(0),result_10*(-0.5)); - vstream(ref()(1)(1),result_11*(-0.5)); - vstream(ref()(1)(2),result_12*(-0.5)); - vstream(ref()(2)(0),result_20*(-0.5)); - vstream(ref()(2)(1),result_21*(-0.5)); - vstream(ref()(2)(2),result_22*(-0.5)); - vstream(ref()(3)(0),result_30*(-0.5)); - vstream(ref()(3)(1),result_31*(-0.5)); - vstream(ref()(3)(2),result_32*(-0.5)); + vstream(ref()(0)(0),result_00); + vstream(ref()(0)(1),result_01); + vstream(ref()(0)(2),result_02); + vstream(ref()(1)(0),result_10); + vstream(ref()(1)(1),result_11); + vstream(ref()(1)(2),result_12); + vstream(ref()(2)(0),result_20); + vstream(ref()(2)(1),result_21); + vstream(ref()(2)(2),result_22); + vstream(ref()(3)(0),result_30); + vstream(ref()(3)(1),result_31); + vstream(ref()(3)(2),result_32); } } */