From 202078eb1b9d02aa5eee05f1ffe0528801a3ef05 Mon Sep 17 00:00:00 2001 From: azusayamaguchi Date: Fri, 21 Oct 2016 09:07:20 +0100 Subject: [PATCH 1/2] Cray / OpenSHMEM ordering differs --- lib/AlignedAllocator.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/lib/AlignedAllocator.h b/lib/AlignedAllocator.h index 89731246..8301d811 100644 --- a/lib/AlignedAllocator.h +++ b/lib/AlignedAllocator.h @@ -130,8 +130,12 @@ public: #ifdef GRID_COMMS_SHMEM pointer allocate(size_type __n, const void* _p= 0) - { + { +#ifdef CRAY _Tp *ptr = (_Tp *) shmem_align(__n*sizeof(_Tp),64); +#else + _Tp *ptr = (_Tp *) shmem_align(64,__n*sizeof(_Tp)); +#endif #ifdef PARANOID_SYMMETRIC_HEAP static void * bcast; static long psync[_SHMEM_REDUCE_SYNC_SIZE]; From 20a091c3eddfdb67a82ece6413740a93650a2f98 Mon Sep 17 00:00:00 2001 From: azusayamaguchi Date: Fri, 21 Oct 2016 09:08:36 +0100 Subject: [PATCH 2/2] Intel vs. Clang intrinsics differences absorbed --- lib/simd/Grid_avx512.h | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/lib/simd/Grid_avx512.h b/lib/simd/Grid_avx512.h index 821898d9..d4fd446c 100644 --- a/lib/simd/Grid_avx512.h +++ b/lib/simd/Grid_avx512.h @@ -41,6 +41,22 @@ Author: paboyle namespace Grid{ namespace Optimization { + + template + union uconv { + __m512 f; + vtype v; + }; + + union u512f { + __m512 v; + float f[8]; + }; + + union u512d { + __m512 v; + double f[4]; + }; struct Vsplat{ //Complex float @@ -361,8 +377,9 @@ namespace Optimization { // Some Template specialization // Hack for CLANG until mm512_reduce_add_ps etc... are implemented in GCC and Clang releases -#undef GNU_CLANG_COMPILER -#ifdef GNU_CLANG_COMPILER + +#ifndef __INTEL_COMPILER +#warning "Slow reduction due to incomplete reduce intrinsics" //Complex float Reduce template<> inline Grid::ComplexF Reduce::operator()(__m512 in){