mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-10 15:55:37 +00:00
unions for tables eliminate explicit loads, gcc does not complain
This commit is contained in:
parent
55a55660cb
commit
e1a5b3ea49
@ -70,6 +70,17 @@ typedef svuint64_t lutd __attribute__((arm_sve_vector_bits(512))); // LUTs for
|
|||||||
NAMESPACE_BEGIN(Grid);
|
NAMESPACE_BEGIN(Grid);
|
||||||
NAMESPACE_BEGIN(Optimization);
|
NAMESPACE_BEGIN(Optimization);
|
||||||
|
|
||||||
|
// convenience union types for tables eliminate loads
|
||||||
|
union ulutf {
|
||||||
|
lutf v;
|
||||||
|
uint32_t s[16];
|
||||||
|
};
|
||||||
|
|
||||||
|
union ulutd {
|
||||||
|
lutd v;
|
||||||
|
uint64_t s[8];
|
||||||
|
};
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
struct acle{};
|
struct acle{};
|
||||||
|
|
||||||
@ -77,19 +88,31 @@ template <>
|
|||||||
struct acle<double>{
|
struct acle<double>{
|
||||||
static inline pred pg1(){return svptrue_b64();}
|
static inline pred pg1(){return svptrue_b64();}
|
||||||
static inline lutd tbl_swap(){
|
static inline lutd tbl_swap(){
|
||||||
|
/*
|
||||||
const uint64_t t[8] = {1, 0, 3, 2, 5, 4, 7, 6};
|
const uint64_t t[8] = {1, 0, 3, 2, 5, 4, 7, 6};
|
||||||
pred pg1 = svptrue_b64();
|
pred pg1 = svptrue_b64();
|
||||||
return svld1(pg1, t);
|
return svld1(pg1, t);
|
||||||
|
*/
|
||||||
|
const ulutd t = { .s = {1, 0, 3, 2, 5, 4, 7, 6} };
|
||||||
|
return t.v;
|
||||||
}
|
}
|
||||||
static inline lutd tbl0(){
|
static inline lutd tbl0(){
|
||||||
|
/*
|
||||||
const uint64_t t[8] = {4, 5, 6, 7, 0, 1, 2, 3};
|
const uint64_t t[8] = {4, 5, 6, 7, 0, 1, 2, 3};
|
||||||
pred pg1 = svptrue_b64();
|
pred pg1 = svptrue_b64();
|
||||||
return svld1(pg1, t);
|
return svld1(pg1, t);
|
||||||
|
*/
|
||||||
|
const ulutd t = { .s = {4, 5, 6, 7, 0, 1, 2, 3} };
|
||||||
|
return t.v;
|
||||||
}
|
}
|
||||||
static inline lutd tbl1(){
|
static inline lutd tbl1(){
|
||||||
|
/*
|
||||||
const uint64_t t[8] = {2, 3, 0, 1, 6, 7, 4, 5};
|
const uint64_t t[8] = {2, 3, 0, 1, 6, 7, 4, 5};
|
||||||
pred pg1 = svptrue_b64();
|
pred pg1 = svptrue_b64();
|
||||||
return svld1(pg1, t);
|
return svld1(pg1, t);
|
||||||
|
*/
|
||||||
|
const ulutd t = { .s = {2, 3, 0, 1, 6, 7, 4, 5} };
|
||||||
|
return t.v;
|
||||||
}
|
}
|
||||||
static inline pred pg_even(){return svzip1_b64(svptrue_b64(), svpfalse_b());}
|
static inline pred pg_even(){return svzip1_b64(svptrue_b64(), svpfalse_b());}
|
||||||
static inline pred pg_odd() {return svzip1_b64(svpfalse_b(), svptrue_b64());}
|
static inline pred pg_odd() {return svzip1_b64(svpfalse_b(), svptrue_b64());}
|
||||||
@ -101,24 +124,40 @@ struct acle<float>{
|
|||||||
static inline pred pg1(){return svptrue_b32();}
|
static inline pred pg1(){return svptrue_b32();}
|
||||||
// exchange neighboring elements
|
// exchange neighboring elements
|
||||||
static inline lutf tbl_swap(){
|
static inline lutf tbl_swap(){
|
||||||
|
/*
|
||||||
const uint32_t t[16] = {1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14};
|
const uint32_t t[16] = {1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14};
|
||||||
pred pg1 = svptrue_b32();
|
pred pg1 = svptrue_b32();
|
||||||
return svld1(pg1, t);
|
return svld1(pg1, t);
|
||||||
|
*/
|
||||||
|
const ulutf t = { .s = {1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14} };
|
||||||
|
return t.v;
|
||||||
}
|
}
|
||||||
static inline lutf tbl0(){
|
static inline lutf tbl0(){
|
||||||
|
/*
|
||||||
const uint32_t t[16] = {8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7};
|
const uint32_t t[16] = {8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7};
|
||||||
pred pg1 = svptrue_b32();
|
pred pg1 = svptrue_b32();
|
||||||
return svld1(pg1, t);
|
return svld1(pg1, t);
|
||||||
|
*/
|
||||||
|
const ulutf t = { .s = {8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7} };
|
||||||
|
return t.v;
|
||||||
}
|
}
|
||||||
static inline lutf tbl1(){
|
static inline lutf tbl1(){
|
||||||
|
/*
|
||||||
const uint32_t t[16] = {4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11};
|
const uint32_t t[16] = {4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11};
|
||||||
pred pg1 = svptrue_b32();
|
pred pg1 = svptrue_b32();
|
||||||
return svld1(pg1, t);
|
return svld1(pg1, t);
|
||||||
|
*/
|
||||||
|
const ulutf t = { .s = {4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11} };
|
||||||
|
return t.v;
|
||||||
}
|
}
|
||||||
static inline lutf tbl2(){
|
static inline lutf tbl2(){
|
||||||
|
/*
|
||||||
const uint32_t t[16] = {2, 3, 0, 1, 6, 7, 4, 5, 10, 11, 8, 9, 14, 15, 12, 13};
|
const uint32_t t[16] = {2, 3, 0, 1, 6, 7, 4, 5, 10, 11, 8, 9, 14, 15, 12, 13};
|
||||||
pred pg1 = svptrue_b32();
|
pred pg1 = svptrue_b32();
|
||||||
return svld1(pg1, t);
|
return svld1(pg1, t);
|
||||||
|
*/
|
||||||
|
const ulutf t = { .s = {2, 3, 0, 1, 6, 7, 4, 5, 10, 11, 8, 9, 14, 15, 12, 13} };
|
||||||
|
return t.v;
|
||||||
}
|
}
|
||||||
static inline pred pg_even(){return svzip1_b32(svptrue_b32(), svpfalse_b());}
|
static inline pred pg_even(){return svzip1_b32(svptrue_b32(), svpfalse_b());}
|
||||||
static inline pred pg_odd() {return svzip1_b32(svpfalse_b(), svptrue_b32());}
|
static inline pred pg_odd() {return svzip1_b32(svpfalse_b(), svptrue_b32());}
|
||||||
|
Loading…
Reference in New Issue
Block a user