mirror of
https://github.com/paboyle/Grid.git
synced 2025-06-10 19:36:56 +01:00
Merge branch 'feature/fft-opt' into feature/feynman-rules
# Conflicts: # lib/FFT.h # lib/qcd/action/fermion/WilsonFermion5D.h # tests/core/Test_fft.cc
This commit is contained in:
@ -42,20 +42,14 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
namespace Grid{
|
||||
namespace Optimization {
|
||||
|
||||
template<class vtype>
|
||||
union uconv {
|
||||
__m512 f;
|
||||
vtype v;
|
||||
};
|
||||
|
||||
union u512f {
|
||||
__m512 v;
|
||||
float f[8];
|
||||
float f[16];
|
||||
};
|
||||
|
||||
union u512d {
|
||||
__m512 v;
|
||||
double f[4];
|
||||
__m512d v;
|
||||
double f[8];
|
||||
};
|
||||
|
||||
struct Vsplat{
|
||||
@ -388,9 +382,14 @@ namespace Optimization {
|
||||
// Some Template specialization
|
||||
|
||||
// Hack for CLANG until mm512_reduce_add_ps etc... are implemented in GCC and Clang releases
|
||||
<<<<<<< HEAD
|
||||
#define GNU_CLANG_COMPILER
|
||||
#ifdef GNU_CLANG_COMPILER
|
||||
=======
|
||||
|
||||
#ifndef __INTEL_COMPILER
|
||||
#warning "Slow reduction due to incomplete reduce intrinsics"
|
||||
>>>>>>> develop
|
||||
//Complex float Reduce
|
||||
template<>
|
||||
inline Grid::ComplexF Reduce<Grid::ComplexF, __m512>::operator()(__m512 in){
|
||||
|
@ -53,7 +53,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
#define ZMULMEM2SPd(O,P,tmp,B,C,Briir,Biirr,Criir,Ciirr)\
|
||||
VSHUFMEMd(O,P,tmp) \
|
||||
VMULMEMd(O,P,B,Biirr) \
|
||||
VMULMEMd(O,P,B,Biirr) \
|
||||
VMULMEMd(O,P,C,Ciirr) \
|
||||
VMULd(tmp,B,Briir) \
|
||||
VMULd(tmp,C,Criir)
|
||||
|
@ -37,7 +37,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
// Opcodes common
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
#define MASK_REGS \
|
||||
__asm__ ("mov $0xAAAA, %%eax \n"\
|
||||
__asm__ ("mov $0xAAAA, %%eax \n"\
|
||||
"kmovw %%eax, %%k6 \n"\
|
||||
"mov $0x5555, %%eax \n"\
|
||||
"kmovw %%eax, %%k7 \n" : : : "%eax");
|
||||
|
Reference in New Issue
Block a user