mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-11-03 21:44:33 +00:00 
			
		
		
		
	Corrected AVX regression error. Tested.
This commit is contained in:
		@@ -2,7 +2,7 @@
 | 
			
		||||
/* lib/Grid_config.h.in.  Generated from configure.ac by autoheader.  */
 | 
			
		||||
 | 
			
		||||
/* AVX */
 | 
			
		||||
/* #undef AVX1 */
 | 
			
		||||
#define AVX1 1
 | 
			
		||||
 | 
			
		||||
/* AVX2 */
 | 
			
		||||
/* #undef AVX2 */
 | 
			
		||||
@@ -93,14 +93,11 @@
 | 
			
		||||
/* Define to the one symbol short name of this package. */
 | 
			
		||||
#define PACKAGE_TARNAME "grid"
 | 
			
		||||
 | 
			
		||||
/* Define to the home page for this package. */
 | 
			
		||||
#define PACKAGE_URL ""
 | 
			
		||||
 | 
			
		||||
/* Define to the version of this package. */
 | 
			
		||||
#define PACKAGE_VERSION "1.0"
 | 
			
		||||
 | 
			
		||||
/* SSE4 */
 | 
			
		||||
#define SSE4 1
 | 
			
		||||
/* #undef SSE4 */
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if you have the ANSI C header files. */
 | 
			
		||||
#define STDC_HEADERS 1
 | 
			
		||||
 
 | 
			
		||||
@@ -92,9 +92,6 @@
 | 
			
		||||
/* Define to the one symbol short name of this package. */
 | 
			
		||||
#undef PACKAGE_TARNAME
 | 
			
		||||
 | 
			
		||||
/* Define to the home page for this package. */
 | 
			
		||||
#undef PACKAGE_URL
 | 
			
		||||
 | 
			
		||||
/* Define to the version of this package. */
 | 
			
		||||
#undef PACKAGE_VERSION
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -299,7 +299,7 @@ namespace Optimization {
 | 
			
		||||
  //////////////////////////////////////////////
 | 
			
		||||
  // Some Template specialization
 | 
			
		||||
  template < typename vtype > 
 | 
			
		||||
    void permute(vtype a, vtype b, int perm) {
 | 
			
		||||
    void permute(vtype &a, vtype &b, int perm) {
 | 
			
		||||
    union { 
 | 
			
		||||
      __m256 f;
 | 
			
		||||
      vtype v;
 | 
			
		||||
@@ -320,11 +320,16 @@ namespace Optimization {
 | 
			
		||||
  template<>
 | 
			
		||||
    inline Grid::ComplexF Reduce<Grid::ComplexF, __m256>::operator()(__m256 in){
 | 
			
		||||
    __m256 v1,v2;
 | 
			
		||||
    union { 
 | 
			
		||||
      __m256 v;
 | 
			
		||||
      float f[8];
 | 
			
		||||
    } conv;
 | 
			
		||||
    Optimization::permute(v1,in,0); // sse 128; paired complex single
 | 
			
		||||
    v1 = _mm256_add_ps(v1,in);
 | 
			
		||||
    Optimization::permute(v2,v1,1); // avx 256; quad complex single
 | 
			
		||||
    v1 = _mm256_add_ps(v1,v2);
 | 
			
		||||
    return Grid::ComplexF(v1[0],v1[1]);
 | 
			
		||||
    conv.v = v1;
 | 
			
		||||
    return Grid::ComplexF(conv.f[0],conv.f[1]);
 | 
			
		||||
  }
 | 
			
		||||
  //Real float Reduce
 | 
			
		||||
  template<>
 | 
			
		||||
 
 | 
			
		||||
@@ -77,9 +77,7 @@ inline void Gpermute(vsimd &y,const vsimd &b,int perm){
 | 
			
		||||
      switch (perm){
 | 
			
		||||
#if defined(AVX1)||defined(AVX2)
 | 
			
		||||
      // 8x32 bits=>3 permutes
 | 
			
		||||
      case 2: 
 | 
			
		||||
	conv.f = _mm256_shuffle_ps(conv.f,conv.f,_MM_SHUFFLE(2,3,0,1)); 
 | 
			
		||||
	break;
 | 
			
		||||
      case 2: conv.f = _mm256_shuffle_ps(conv.f,conv.f,_MM_SHUFFLE(2,3,0,1)); break;
 | 
			
		||||
      case 1: conv.f = _mm256_shuffle_ps(conv.f,conv.f,_MM_SHUFFLE(1,0,3,2)); break;
 | 
			
		||||
      case 0: conv.f = _mm256_permute2f128_ps(conv.f,conv.f,0x01); break;
 | 
			
		||||
#endif
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user