From 4730d4692a28c7de0555f5f8d63e4adec1f76b22 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Thu, 5 Jul 2018 07:03:33 -0400 Subject: [PATCH] Fast lane extract, saturates bandwidth on Volta for SU3 benchmarks --- lib/simd/Grid_vector_types.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/lib/simd/Grid_vector_types.h b/lib/simd/Grid_vector_types.h index fd7aaba1..a51bfaed 100644 --- a/lib/simd/Grid_vector_types.h +++ b/lib/simd/Grid_vector_types.h @@ -504,6 +504,7 @@ public: /////////////////////////////// // Define available types /////////////////////////////// + typedef Grid_simd vRealF; typedef Grid_simd vRealD; typedef Grid_simd vInteger; @@ -519,6 +520,19 @@ typedef Grid_simd , SIMD_Ftype> vComplexF; typedef Grid_simd , SIMD_Dtype> vComplexD; #endif +///////////////////////////////////////// +// Pointer type to use on extractLane +///////////////////////////////////////// +template class ExtractTypeMap { public: typedef _scalar extract_type;}; +#ifdef GPU_VEC +template<> class ExtractTypeMap< complex > { public: typedef half2 extract_type;}; +template<> class ExtractTypeMap< complex< float> > { public: typedef float2 extract_type;}; +template<> class ExtractTypeMap< complex< double> > { public: typedef double2 extract_type;}; +#endif + +///////////////////////////////////////// +// Permute +///////////////////////////////////////// accelerator_inline void permute(ComplexD &y,ComplexD b, int perm) { y=b; } accelerator_inline void permute(ComplexF &y,ComplexF b, int perm) { y=b; }