1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-04-05 19:55:56 +01:00

Fast lane extract, saturates bandwidth on Volta for SU3 benchmarks

This commit is contained in:
Peter Boyle 2018-07-05 07:03:33 -04:00
parent 1bb456c0c5
commit 4730d4692a

View File

@ -504,6 +504,7 @@ public:
///////////////////////////////
// Define available types
///////////////////////////////
typedef Grid_simd<float , SIMD_Ftype> vRealF;
typedef Grid_simd<double , SIMD_Dtype> vRealD;
typedef Grid_simd<Integer, SIMD_Itype> vInteger;
@ -519,6 +520,19 @@ typedef Grid_simd<complex<float> , SIMD_Ftype> vComplexF;
typedef Grid_simd<complex<double> , SIMD_Dtype> vComplexD;
#endif
/////////////////////////////////////////
// Pointer type to use on extractLane
/////////////////////////////////////////
template<class _scalar> class ExtractTypeMap { public: typedef _scalar extract_type;};
#ifdef GPU_VEC
template<> class ExtractTypeMap< complex<uint16_t> > { public: typedef half2 extract_type;};
template<> class ExtractTypeMap< complex< float> > { public: typedef float2 extract_type;};
template<> class ExtractTypeMap< complex< double> > { public: typedef double2 extract_type;};
#endif
/////////////////////////////////////////
// Permute
/////////////////////////////////////////
accelerator_inline void permute(ComplexD &y,ComplexD b, int perm) { y=b; }
accelerator_inline void permute(ComplexF &y,ComplexF b, int perm) { y=b; }