Merge branch 'feature/fft-opt' into feature/feynman-rules

# Conflicts: # lib/FFT.h # lib/qcd/action/fermion/WilsonFermion5D.h # tests/core/Test_fft.cc
2025-07-27 01:37:07 +01:00 · 2016-10-26 18:44:47 +01:00
parent bca861e112 14ddf2c234
commit ca21003f01
34 changed files with 2498 additions and 2203 deletions
--- a/lib/simd/Grid_avx512.h
+++ b/lib/simd/Grid_avx512.h
@@ -42,20 +42,14 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
 namespace Grid{
 namespace Optimization {

-  template<class vtype>
-  union uconv {
-    __m512 f;
-    vtype v;
-  };
-
  union u512f {
    __m512 v;
-    float f[8];
+    float f[16];
  };

  union u512d {
-    __m512 v;
-    double f[4];
+    __m512d v;
+    double f[8];
  };
  
  struct Vsplat{
@@ -388,9 +382,14 @@ namespace Optimization {
  // Some Template specialization

  // Hack for CLANG until mm512_reduce_add_ps etc... are implemented in GCC and Clang releases
+<<<<<<< HEAD
+#define GNU_CLANG_COMPILER 
+#ifdef GNU_CLANG_COMPILER
+=======

 #ifndef __INTEL_COMPILER
 #warning "Slow reduction due to incomplete reduce intrinsics"
+>>>>>>> develop
  //Complex float Reduce
  template<>
    inline Grid::ComplexF Reduce<Grid::ComplexF, __m512>::operator()(__m512 in){
--- a/lib/simd/Intel512avx.h
+++ b/lib/simd/Intel512avx.h
@@ -53,7 +53,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>

 #define ZMULMEM2SPd(O,P,tmp,B,C,Briir,Biirr,Criir,Ciirr)\
  VSHUFMEMd(O,P,tmp)  \
-  VMULMEMd(O,P,B,Biirr)  \ 
+  VMULMEMd(O,P,B,Biirr)  \
  VMULMEMd(O,P,C,Ciirr)  \
  VMULd(tmp,B,Briir)  \
  VMULd(tmp,C,Criir) 
--- a/lib/simd/Intel512common.h
+++ b/lib/simd/Intel512common.h
@@ -37,7 +37,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
 // Opcodes common 
 ////////////////////////////////////////////////////////////////////////////////////////////////////
 #define MASK_REGS \
-  __asm__ ("mov     $0xAAAA, %%eax \n"\ 
+  __asm__ ("mov     $0xAAAA, %%eax \n"\
           "kmovw    %%eax, %%k6 \n"\
           "mov     $0x5555, %%eax \n"\
           "kmovw    %%eax, %%k7 \n" : : : "%eax");