From e73b909a483ac66d504d98f2fb8462d431dd334e Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Wed, 2 Jan 2019 12:05:30 +0000 Subject: [PATCH] Make tests running past nvcc. Different NVCC versions proving tricky to keep happy. This is 9.2 --- Grid/simd/Grid_avx512.h | 2 +- Grid/simd/Grid_vector_types.h | 31 ++++++++++++++----------- Grid/tensors/Tensor_exp.h | 3 ++- tests/core/Test_main.cc | 2 +- tests/forces/Test_gp_plaq_force.cc | 4 ++-- tests/forces/Test_wilsonclover_force.cc | 4 ++-- 6 files changed, 25 insertions(+), 21 deletions(-) diff --git a/Grid/simd/Grid_avx512.h b/Grid/simd/Grid_avx512.h index 826dfc3e..99dbbb6d 100644 --- a/Grid/simd/Grid_avx512.h +++ b/Grid/simd/Grid_avx512.h @@ -481,7 +481,7 @@ struct Rotate{ // Some Template specialization // Hack for CLANG until mm512_reduce_add_ps etc... are implemented in GCC and Clang releases -#ifndef __INTEL_COMPILER +#if 0 #warning "Slow reduction due to incomplete reduce intrinsics" //Complex float Reduce template<> diff --git a/Grid/simd/Grid_vector_types.h b/Grid/simd/Grid_vector_types.h index 34176340..760981ff 100644 --- a/Grid/simd/Grid_vector_types.h +++ b/Grid/simd/Grid_vector_types.h @@ -746,11 +746,12 @@ accelerator_inline Grid_simd operator/(Grid_simd a, Grid_simd ret = a * conjugate(b) ; den = b * conjugate(b) ; - - auto real_den = toReal(den); - - ret.v=binary(ret.v, real_den.v, DivSIMD()); + // duplicates real part + auto real_den = toReal(den); + simd zden; + memcpy((void *)&zden.v,(void *)&real_den.v,sizeof(zden)); + ret.v=binary(ret.v, zden.v, DivSIMD()); return ret; }; @@ -839,26 +840,28 @@ accelerator_inline Grid_simd trace(const Grid_simd &arg) { // insert real into complex and zero imag; //////////////////////////////////////////////////////////// + +template struct toRealMapper {}; +template<> struct toRealMapper { typedef vRealF Realified; }; +template<> struct toRealMapper { typedef vRealD Realified; }; // real = toReal( complex ) -template = 0> -accelerator_inline Grid_simd toReal(const Grid_simd, V> &in) { - typedef Grid_simd simd; - simd ret; - typename simd::conv_t conv; - conv.v = in.v; // copy the vector content (bytewise) - for (int i = 0; i < simd::Nsimd(); i += 2) { +template // must be a real arg +accelerator_inline typename toRealMapper::Realified toReal(const Csimd &in) { + typedef typename toRealMapper::Realified Rsimd; + Rsimd ret; + typename Rsimd::conv_t conv; + memcpy((void *)&conv.v,(void *)&in.v,sizeof(conv.v)); + for (int i = 0; i < Rsimd::Nsimd(); i += 2) { conv.s[i + 1] = conv.s[i]; // duplicate (r,r);(r,r);(r,r); etc... } - ret.v = conv.v; + memcpy((void *)&ret.v,(void *)&conv.v,sizeof(ret.v)); return ret; } - template struct toComplexMapper {}; template<> struct toComplexMapper { typedef vComplexF Complexified; }; template<> struct toComplexMapper { typedef vComplexD Complexified; }; - // complex = toComplex( real ) template // must be a real arg accelerator_inline typename toComplexMapper::Complexified toComplex(const Rsimd &in) { diff --git a/Grid/tensors/Tensor_exp.h b/Grid/tensors/Tensor_exp.h index 6f9e8360..11d37f9c 100644 --- a/Grid/tensors/Tensor_exp.h +++ b/Grid/tensors/Tensor_exp.h @@ -55,6 +55,7 @@ template accelerator_inline iVector Exponentiate(c // Specialisation: Cayley-Hamilton exponential for SU(3) +#ifndef GRID_NVCC template::TensorLevel == 0>::type * =nullptr> accelerator_inline iMatrix Exponentiate(const iMatrix &arg, RealD alpha , Integer Nexp = DEFAULT_MAT_EXP ) { @@ -114,7 +115,7 @@ accelerator_inline iMatrix Exponentiate(const iMatrix &arg, Re return (f0 * unit + timesMinusI(f1) * arg*alpha - f2 * iQ2); } - +#endif // General exponential diff --git a/tests/core/Test_main.cc b/tests/core/Test_main.cc index a023e0d1..cd2373ce 100644 --- a/tests/core/Test_main.cc +++ b/tests/core/Test_main.cc @@ -179,7 +179,7 @@ int main(int argc, char **argv) { std::cout << "Norm2 LatticeReal : "<< norm2(BarReal) << std::endl; std::cout << "Norm2 LatticeComplex : "<< norm2(BarComplex) << std::endl; - exit(0); + // exit(0); TComplex tr = trace(cmat); diff --git a/tests/forces/Test_gp_plaq_force.cc b/tests/forces/Test_gp_plaq_force.cc index 32b5167a..e8702399 100644 --- a/tests/forces/Test_gp_plaq_force.cc +++ b/tests/forces/Test_gp_plaq_force.cc @@ -87,9 +87,9 @@ int main (int argc, char ** argv) auto Uprime_v = Uprime.View(); auto U_v = U.View(); auto mom_v = mom.View(); - parallel_for(auto i=mom_v.begin();ioSites(); ss++) + thread_loop( (int ss = 0; ss < mom.Grid()->oSites(); ss++), { Uprime_v[ss]._internal[mu] = ProjectOnGroup(Exponentiate(mom_v[ss]._internal[mu], dt, 12) * U_v[ss]._internal[mu]); - } + }); } std::cout << GridLogMessage << "Initial mom hamiltonian is " << Hmom << std::endl;