1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-06-25 11:12:02 +01:00

RRII gpu option

This commit is contained in:
Peter Boyle
2022-10-11 14:44:55 -04:00
parent 584a3ee45c
commit 551a5f8dc8
24 changed files with 1099 additions and 270 deletions

View File

@ -271,14 +271,14 @@ struct Conj{
struct TimesMinusI{
//Complex single
inline __m512 operator()(__m512 in, __m512 ret){
inline __m512 operator()(__m512 in){
//__m512 tmp = _mm512_mask_sub_ps(in,0xaaaa,_mm512_setzero_ps(),in); // real -imag
//return _mm512_shuffle_ps(tmp,tmp,_MM_SELECT_FOUR_FOUR(2,3,1,0)); // 0x4E??
__m512 tmp = _mm512_shuffle_ps(in,in,_MM_SELECT_FOUR_FOUR(2,3,0,1));
return _mm512_mask_sub_ps(tmp,0xaaaa,_mm512_setzero_ps(),tmp);
}
//Complex double
inline __m512d operator()(__m512d in, __m512d ret){
inline __m512d operator()(__m512d in){
//__m512d tmp = _mm512_mask_sub_pd(in,0xaa,_mm512_setzero_pd(),in); // real -imag
//return _mm512_shuffle_pd(tmp,tmp,0x55);
__m512d tmp = _mm512_shuffle_pd(in,in,0x55);
@ -288,17 +288,16 @@ struct TimesMinusI{
struct TimesI{
//Complex single
inline __m512 operator()(__m512 in, __m512 ret){
inline __m512 operator()(__m512 in){
__m512 tmp = _mm512_shuffle_ps(in,in,_MM_SELECT_FOUR_FOUR(2,3,0,1));
return _mm512_mask_sub_ps(tmp,0x5555,_mm512_setzero_ps(),tmp);
}
//Complex double
inline __m512d operator()(__m512d in, __m512d ret){
inline __m512d operator()(__m512d in){
__m512d tmp = _mm512_shuffle_pd(in,in,0x55);
return _mm512_mask_sub_pd(tmp,0x55,_mm512_setzero_pd(),tmp);
}
};
// Gpermute utilities consider coalescing into 1 Gpermute