diff --git a/lib/qcd/action/fermion/CayleyFermion5D.h b/lib/qcd/action/fermion/CayleyFermion5D.h index 2392fcf0..86255be6 100644 --- a/lib/qcd/action/fermion/CayleyFermion5D.h +++ b/lib/qcd/action/fermion/CayleyFermion5D.h @@ -33,9 +33,29 @@ namespace Grid { namespace QCD { - template struct switcheroo { static int iscomplex() { return 0; } }; - template<> struct switcheroo { static int iscomplex() { return 1; } }; - template<> struct switcheroo { static int iscomplex() { return 1; } }; + template struct switcheroo { + static inline int iscomplex() { return 0; } + + template + static inline vec mult(vec a, vec b) { + return real_mult(a,b); + } + }; + template<> struct switcheroo { + static inline int iscomplex() { return 1; } + + template + static inline vec mult(vec a, vec b) { + return a*b; + } + }; + template<> struct switcheroo { + static inline int iscomplex() { return 1; } + template + static inline vec mult(vec a, vec b) { + return a*b; + } + }; template diff --git a/lib/qcd/action/fermion/CayleyFermion5Dvec.cc b/lib/qcd/action/fermion/CayleyFermion5Dvec.cc index f8c64b91..ed742ea3 100644 --- a/lib/qcd/action/fermion/CayleyFermion5Dvec.cc +++ b/lib/qcd/action/fermion/CayleyFermion5Dvec.cc @@ -161,18 +161,18 @@ PARALLEL_FOR_LOOP } // Can force these to real arithmetic and save 2x. - Simd p_00 = real_mult(d[v]()()(), phi[ss+v]()(0)(0)) + real_mult(l[v]()()(),hm_00); - Simd p_01 = real_mult(d[v]()()(), phi[ss+v]()(0)(1)) + real_mult(l[v]()()(),hm_01); - Simd p_02 = real_mult(d[v]()()(), phi[ss+v]()(0)(2)) + real_mult(l[v]()()(),hm_02); - Simd p_10 = real_mult(d[v]()()(), phi[ss+v]()(1)(0)) + real_mult(l[v]()()(),hm_10); - Simd p_11 = real_mult(d[v]()()(), phi[ss+v]()(1)(1)) + real_mult(l[v]()()(),hm_11); - Simd p_12 = real_mult(d[v]()()(), phi[ss+v]()(1)(2)) + real_mult(l[v]()()(),hm_12); - Simd p_20 = real_mult(d[v]()()(), phi[ss+v]()(2)(0)) + real_mult(u[v]()()(),hp_00); - Simd p_21 = real_mult(d[v]()()(), phi[ss+v]()(2)(1)) + real_mult(u[v]()()(),hp_01); - Simd p_22 = real_mult(d[v]()()(), phi[ss+v]()(2)(2)) + real_mult(u[v]()()(),hp_02); - Simd p_30 = real_mult(d[v]()()(), phi[ss+v]()(3)(0)) + real_mult(u[v]()()(),hp_10); - Simd p_31 = real_mult(d[v]()()(), phi[ss+v]()(3)(1)) + real_mult(u[v]()()(),hp_11); - Simd p_32 = real_mult(d[v]()()(), phi[ss+v]()(3)(2)) + real_mult(u[v]()()(),hp_12); + Simd p_00 = switcheroo::mult(d[v]()()(), phi[ss+v]()(0)(0)) + switcheroo::mult(l[v]()()(),hm_00); + Simd p_01 = switcheroo::mult(d[v]()()(), phi[ss+v]()(0)(1)) + switcheroo::mult(l[v]()()(),hm_01); + Simd p_02 = switcheroo::mult(d[v]()()(), phi[ss+v]()(0)(2)) + switcheroo::mult(l[v]()()(),hm_02); + Simd p_10 = switcheroo::mult(d[v]()()(), phi[ss+v]()(1)(0)) + switcheroo::mult(l[v]()()(),hm_10); + Simd p_11 = switcheroo::mult(d[v]()()(), phi[ss+v]()(1)(1)) + switcheroo::mult(l[v]()()(),hm_11); + Simd p_12 = switcheroo::mult(d[v]()()(), phi[ss+v]()(1)(2)) + switcheroo::mult(l[v]()()(),hm_12); + Simd p_20 = switcheroo::mult(d[v]()()(), phi[ss+v]()(2)(0)) + switcheroo::mult(u[v]()()(),hp_00); + Simd p_21 = switcheroo::mult(d[v]()()(), phi[ss+v]()(2)(1)) + switcheroo::mult(u[v]()()(),hp_01); + Simd p_22 = switcheroo::mult(d[v]()()(), phi[ss+v]()(2)(2)) + switcheroo::mult(u[v]()()(),hp_02); + Simd p_30 = switcheroo::mult(d[v]()()(), phi[ss+v]()(3)(0)) + switcheroo::mult(u[v]()()(),hp_10); + Simd p_31 = switcheroo::mult(d[v]()()(), phi[ss+v]()(3)(1)) + switcheroo::mult(u[v]()()(),hp_11); + Simd p_32 = switcheroo::mult(d[v]()()(), phi[ss+v]()(3)(2)) + switcheroo::mult(u[v]()()(),hp_12); vstream(chi[ss+v]()(0)(0),p_00); vstream(chi[ss+v]()(0)(1),p_01); @@ -299,19 +299,19 @@ PARALLEL_FOR_LOOP hm_12.v = Optimization::Rotate::tRotate<2*Simd::Nsimd()-2>(hm_12.v); } - Simd p_00 = real_mult(d[v]()()(), phi[ss+v]()(0)(0)) + real_mult(u[v]()()(),hp_00); - Simd p_01 = real_mult(d[v]()()(), phi[ss+v]()(0)(1)) + real_mult(u[v]()()(),hp_01); - Simd p_02 = real_mult(d[v]()()(), phi[ss+v]()(0)(2)) + real_mult(u[v]()()(),hp_02); - Simd p_10 = real_mult(d[v]()()(), phi[ss+v]()(1)(0)) + real_mult(u[v]()()(),hp_10); - Simd p_11 = real_mult(d[v]()()(), phi[ss+v]()(1)(1)) + real_mult(u[v]()()(),hp_11); - Simd p_12 = real_mult(d[v]()()(), phi[ss+v]()(1)(2)) + real_mult(u[v]()()(),hp_12); + Simd p_00 = switcheroo::mult(d[v]()()(), phi[ss+v]()(0)(0)) + switcheroo::mult(u[v]()()(),hp_00); + Simd p_01 = switcheroo::mult(d[v]()()(), phi[ss+v]()(0)(1)) + switcheroo::mult(u[v]()()(),hp_01); + Simd p_02 = switcheroo::mult(d[v]()()(), phi[ss+v]()(0)(2)) + switcheroo::mult(u[v]()()(),hp_02); + Simd p_10 = switcheroo::mult(d[v]()()(), phi[ss+v]()(1)(0)) + switcheroo::mult(u[v]()()(),hp_10); + Simd p_11 = switcheroo::mult(d[v]()()(), phi[ss+v]()(1)(1)) + switcheroo::mult(u[v]()()(),hp_11); + Simd p_12 = switcheroo::mult(d[v]()()(), phi[ss+v]()(1)(2)) + switcheroo::mult(u[v]()()(),hp_12); - Simd p_20 = real_mult(d[v]()()(), phi[ss+v]()(2)(0)) + real_mult(l[v]()()(),hm_00); - Simd p_21 = real_mult(d[v]()()(), phi[ss+v]()(2)(1)) + real_mult(l[v]()()(),hm_01); - Simd p_22 = real_mult(d[v]()()(), phi[ss+v]()(2)(2)) + real_mult(l[v]()()(),hm_02); - Simd p_30 = real_mult(d[v]()()(), phi[ss+v]()(3)(0)) + real_mult(l[v]()()(),hm_10); - Simd p_31 = real_mult(d[v]()()(), phi[ss+v]()(3)(1)) + real_mult(l[v]()()(),hm_11); - Simd p_32 = real_mult(d[v]()()(), phi[ss+v]()(3)(2)) + real_mult(l[v]()()(),hm_12); + Simd p_20 = switcheroo::mult(d[v]()()(), phi[ss+v]()(2)(0)) + switcheroo::mult(l[v]()()(),hm_00); + Simd p_21 = switcheroo::mult(d[v]()()(), phi[ss+v]()(2)(1)) + switcheroo::mult(l[v]()()(),hm_01); + Simd p_22 = switcheroo::mult(d[v]()()(), phi[ss+v]()(2)(2)) + switcheroo::mult(l[v]()()(),hm_02); + Simd p_30 = switcheroo::mult(d[v]()()(), phi[ss+v]()(3)(0)) + switcheroo::mult(l[v]()()(),hm_10); + Simd p_31 = switcheroo::mult(d[v]()()(), phi[ss+v]()(3)(1)) + switcheroo::mult(l[v]()()(),hm_11); + Simd p_32 = switcheroo::mult(d[v]()()(), phi[ss+v]()(3)(2)) + switcheroo::mult(l[v]()()(),hm_12); vstream(chi[ss+v]()(0)(0),p_00); vstream(chi[ss+v]()(0)(1),p_01); @@ -544,13 +544,13 @@ void CayleyFermion5D::MooeeInternalZAsm(const FermionField &psi, FermionFi for(int co=0;co