From 5050833b42196629438ed674fee22b02e35d61fa Mon Sep 17 00:00:00 2001 From: nmeyer-ur Date: Tue, 2 Jun 2020 13:08:57 +0200 Subject: [PATCH] revert changes due to performance penalty in Wilson using MPI --- Grid/simd/Grid_a64fx-fixedsize.h | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/Grid/simd/Grid_a64fx-fixedsize.h b/Grid/simd/Grid_a64fx-fixedsize.h index 6f27a4ec..602d56f6 100644 --- a/Grid/simd/Grid_a64fx-fixedsize.h +++ b/Grid/simd/Grid_a64fx-fixedsize.h @@ -419,7 +419,6 @@ struct Conj{ } }; -/* struct TimesMinusI{ // Complex float inline vecf operator()(vecf a, vecf b){ @@ -442,11 +441,14 @@ struct TimesMinusI{ return svneg_m(a_v, pg_odd, a_v); } }; -*/ // alternative implementation using fcadd // this is not optimal because we have op1 = op2 + TimesMinusI(op3) etc // ideally we have AddTimesMinusI(op1,op2,op3) +// +// makes performance worse in Benchmark_wilson using MPI +// increases halogtime and gathertime +/* struct TimesMinusI{ // Complex float inline vecf operator()(vecf a, vecf b){ @@ -463,8 +465,8 @@ struct TimesMinusI{ return svcadd_x(pg1, z_v, a, 270); } }; +*/ -/* struct TimesI{ // Complex float inline vecf operator()(vecf a, vecf b){ @@ -487,11 +489,15 @@ struct TimesI{ return svneg_m(a_v, pg_even, a_v); } }; -*/ + // alternative implementation using fcadd // this is not optimal because we have op1 = op2 + TimesI(op3) etc // ideally we have AddTimesI(op1,op2,op3) +// +// makes performance worse in Benchmark_wilson using MPI +// increases halogtime and gathertime +/* struct TimesI{ // Complex float inline vecf operator()(vecf a, vecf b){ @@ -508,7 +514,7 @@ struct TimesI{ return svcadd_x(pg1, z_v, a, 90); } }; - +*/ struct PrecisionChange {