1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-11-10 07:55:35 +00:00

correct tbl2 for sp

This commit is contained in:
nmeyer-ur 2020-06-12 17:12:34 +02:00
parent 2402b4940e
commit 8dbf790f62
2 changed files with 7 additions and 5 deletions

View File

@ -82,7 +82,7 @@ NAMESPACE_BEGIN(Optimization);
}; };
}; };
#else // not defines ARMCLANGCOMPAT #else // no ARMCLANGCOMPAT
#define vec_imm vec #define vec_imm vec
// SIMD vector types // SIMD vector types
template <typename T> template <typename T>
@ -181,7 +181,7 @@ struct acle<float>{
} }
static inline vec<uint32_t> tbl2(){ static inline vec<uint32_t> tbl2(){
//const vec<uint32_t> t = {2, 3, 0, 1, 6, 7, 4, 5, 10, 11, 8, 9, 14, 15, 12, 13}; //const vec<uint32_t> t = {2, 3, 0, 1, 6, 7, 4, 5, 10, 11, 8, 9, 14, 15, 12, 13};
const vec_imm<uint32_t> t = {4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11}; const vec_imm<uint32_t> t = {2, 3, 0, 1, 6, 7, 4, 5, 10, 11, 8, 9, 14, 15, 12, 13};
return t; return t;
} }
static inline vec<uint32_t> tbl_exch1a(){ // Exchange1 static inline vec<uint32_t> tbl_exch1a(){ // Exchange1
@ -889,6 +889,7 @@ inline Integer Reduce<Integer, veci>::operator()(veci in){
} }
#undef svred #undef svred
#undef vec_imm
NAMESPACE_END(Optimization) NAMESPACE_END(Optimization)

View File

@ -122,7 +122,7 @@ accelerator_inline Grid_half sfw_float_to_half(float ff) {
#if defined(A64FX) // VLA #if defined(A64FX) // VLA
#pragma message("building for A64FX / SVE ACLE VLA") #pragma message("building for A64FX / SVE ACLE VLA")
#if defined(ARMCLANGCOMPAT) #if defined(ARMCLANGCOMPAT)
#pragma message("applying armclang patch") #pragma message("applying data types patch")
#endif #endif
#include "Grid_a64fx-2.h" #include "Grid_a64fx-2.h"
#endif #endif
@ -327,8 +327,9 @@ public:
// FIXME -- alias this to an accelerator_inline MAC struct. // FIXME -- alias this to an accelerator_inline MAC struct.
// FIXME VLA build error // safety exclude fxmac from VLA (causing wrong results?)
#if defined(A64FX) || defined(A64FXFIXEDSIZE) //#if defined(A64FX) || defined(A64FXFIXEDSIZE)
#if defined(A64FXFIXEDSIZE)
friend accelerator_inline void mac(Grid_simd *__restrict__ y, friend accelerator_inline void mac(Grid_simd *__restrict__ y,
const Grid_simd *__restrict__ a, const Grid_simd *__restrict__ a,
const Grid_simd *__restrict__ x) { const Grid_simd *__restrict__ x) {