1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-06-10 19:36:56 +01:00

Merge branch 'feature/fft-opt' into feature/feynman-rules

# Conflicts:
#	lib/FFT.h
#	lib/qcd/action/fermion/WilsonFermion5D.h
#	tests/core/Test_fft.cc
This commit is contained in:
2016-10-26 18:44:47 +01:00
34 changed files with 2498 additions and 2203 deletions

View File

@ -42,20 +42,14 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
namespace Grid{
namespace Optimization {
template<class vtype>
union uconv {
__m512 f;
vtype v;
};
union u512f {
__m512 v;
float f[8];
float f[16];
};
union u512d {
__m512 v;
double f[4];
__m512d v;
double f[8];
};
struct Vsplat{
@ -388,9 +382,14 @@ namespace Optimization {
// Some Template specialization
// Hack for CLANG until mm512_reduce_add_ps etc... are implemented in GCC and Clang releases
<<<<<<< HEAD
#define GNU_CLANG_COMPILER
#ifdef GNU_CLANG_COMPILER
=======
#ifndef __INTEL_COMPILER
#warning "Slow reduction due to incomplete reduce intrinsics"
>>>>>>> develop
//Complex float Reduce
template<>
inline Grid::ComplexF Reduce<Grid::ComplexF, __m512>::operator()(__m512 in){

View File

@ -53,7 +53,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
#define ZMULMEM2SPd(O,P,tmp,B,C,Briir,Biirr,Criir,Ciirr)\
VSHUFMEMd(O,P,tmp) \
VMULMEMd(O,P,B,Biirr) \
VMULMEMd(O,P,B,Biirr) \
VMULMEMd(O,P,C,Ciirr) \
VMULd(tmp,B,Briir) \
VMULd(tmp,C,Criir)

View File

@ -37,7 +37,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
// Opcodes common
////////////////////////////////////////////////////////////////////////////////////////////////////
#define MASK_REGS \
__asm__ ("mov $0xAAAA, %%eax \n"\
__asm__ ("mov $0xAAAA, %%eax \n"\
"kmovw %%eax, %%k6 \n"\
"mov $0x5555, %%eax \n"\
"kmovw %%eax, %%k7 \n" : : : "%eax");