diff --git a/lib/qcd/action/fermion/WilsonKernelsAsm.cc b/lib/qcd/action/fermion/WilsonKernelsAsm.cc index 2fc9b035..8bd55d61 100644 --- a/lib/qcd/action/fermion/WilsonKernelsAsm.cc +++ b/lib/qcd/action/fermion/WilsonKernelsAsm.cc @@ -10,6 +10,7 @@ Author: Peter Boyle Author: paboyle +Author: Guido Cossu This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -55,23 +56,24 @@ WilsonKernels::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo, #if defined(AVX512) #include -#if defined(GRID_DEFAULT_PRECISION_SINGLE) /////////////////////////////////////////////////////////// // If we are AVX512 specialise the single precision routine /////////////////////////////////////////////////////////// #include -static Vector signs; - - int setupSigns(void ){ - Vector bother(2); +static Vector signsF; + + template + int setupSigns(Vector& signs ){ + Vector bother(2); signs = bother; vrsign(signs[0]); visign(signs[1]); return 1; } - static int signInit = setupSigns(); + + static int signInitF = setupSigns(signsF); #define label(A) ilabel(A) #define ilabel(A) ".globl\n" #A ":\n" @@ -80,6 +82,7 @@ static Vector signs; #define MULT_2SPIN(ptr,pf) MULT_ADDSUB_2SPIN(ptr,pf) #define FX(A) WILSONASM_ ##A #define COMPLEX_TYPE vComplexF +#define signs signsF #undef KERNEL_DAG template<> void @@ -116,34 +119,22 @@ WilsonKernels::DiracOptAsmDhopSiteDag(StencilImpl &st,Lebe int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) #include #undef COMPLEX_TYPE +#undef signs +#undef VMOVRDUP +#undef MAYBEPERM +#undef MULT_2SPIN +#undef FX -#endif //Single precision - -#if defined(GRID_DEFAULT_PRECISION_DOUBLE) -//temporary separating the two sections -//for debug in isolation -//can be unified - - /////////////////////////////////////////////////////////// - // If we are AVX512 specialise the double precision routine - /////////////////////////////////////////////////////////// +/////////////////////////////////////////////////////////// +// If we are AVX512 specialise the double precision routine +/////////////////////////////////////////////////////////// #include -static Vector signs; +static Vector signsD; +#define signs signsD +static int signInitD = setupSigns(signsD); - int setupSigns(void ){ - Vector bother(2); - signs = bother; - vrsign(signs[0]); - visign(signs[1]); - return 1; - } - static int signInit = setupSigns(); - -#define label(A) ilabel(A) -#define ilabel(A) ".globl\n" #A ":\n" - #define MAYBEPERM(A,perm) if (perm) { A ; } #define MULT_2SPIN(ptr,pf) MULT_ADDSUB_2SPIN(ptr,pf) #define FX(A) WILSONASM_ ##A @@ -185,7 +176,11 @@ WilsonKernels::DiracOptAsmDhopSiteDag(StencilImpl &st,Lebe #include #undef COMPLEX_TYPE -#endif //Double precision +#undef signs +#undef VMOVRDUP +#undef MAYBEPERM +#undef MULT_2SPIN +#undef FX #endif //AVX512