mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-11-03 05:24:32 +00:00 
			
		
		
		
	Partial implementation of the vector types SIMD
Implementing SSE4 now A systematic series of tests must be written.
This commit is contained in:
		
							
								
								
									
										127
									
								
								lib/Grid_config.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										127
									
								
								lib/Grid_config.h
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,127 @@
 | 
			
		||||
/* lib/Grid_config.h.  Generated from Grid_config.h.in by configure.  */
 | 
			
		||||
/* lib/Grid_config.h.in.  Generated from configure.ac by autoheader.  */
 | 
			
		||||
 | 
			
		||||
/* AVX */
 | 
			
		||||
/* #undef AVX1 */
 | 
			
		||||
 | 
			
		||||
/* AVX2 */
 | 
			
		||||
/* #undef AVX2 */
 | 
			
		||||
 | 
			
		||||
/* AVX512 */
 | 
			
		||||
/* #undef AVX512 */
 | 
			
		||||
 | 
			
		||||
/* GRID_COMMS_MPI */
 | 
			
		||||
/* #undef GRID_COMMS_MPI */
 | 
			
		||||
 | 
			
		||||
/* GRID_COMMS_NONE */
 | 
			
		||||
#define GRID_COMMS_NONE 1
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if you have the declaration of `be64toh', and to 0 if you
 | 
			
		||||
   don't. */
 | 
			
		||||
#define HAVE_DECL_BE64TOH 1
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if you have the declaration of `ntohll', and to 0 if you don't.
 | 
			
		||||
   */
 | 
			
		||||
#define HAVE_DECL_NTOHLL 0
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if you have the <endian.h> header file. */
 | 
			
		||||
#define HAVE_ENDIAN_H 1
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if you have the `gettimeofday' function. */
 | 
			
		||||
#define HAVE_GETTIMEOFDAY 1
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if you have the <gmp.h> header file. */
 | 
			
		||||
#define HAVE_GMP_H 1
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if you have the <inttypes.h> header file. */
 | 
			
		||||
#define HAVE_INTTYPES_H 1
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if you have the `gmp' library (-lgmp). */
 | 
			
		||||
#define HAVE_LIBGMP 1
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if you have the `mpfr' library (-lmpfr). */
 | 
			
		||||
#define HAVE_LIBMPFR 1
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if you have the <malloc.h> header file. */
 | 
			
		||||
#define HAVE_MALLOC_H 1
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if you have the <malloc/malloc.h> header file. */
 | 
			
		||||
/* #undef HAVE_MALLOC_MALLOC_H */
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if you have the <memory.h> header file. */
 | 
			
		||||
#define HAVE_MEMORY_H 1
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if you have the <mm_malloc.h> header file. */
 | 
			
		||||
#define HAVE_MM_MALLOC_H 1
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if you have the <stdint.h> header file. */
 | 
			
		||||
#define HAVE_STDINT_H 1
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if you have the <stdlib.h> header file. */
 | 
			
		||||
#define HAVE_STDLIB_H 1
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if you have the <strings.h> header file. */
 | 
			
		||||
#define HAVE_STRINGS_H 1
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if you have the <string.h> header file. */
 | 
			
		||||
#define HAVE_STRING_H 1
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if you have the <sys/stat.h> header file. */
 | 
			
		||||
#define HAVE_SYS_STAT_H 1
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if you have the <sys/types.h> header file. */
 | 
			
		||||
#define HAVE_SYS_TYPES_H 1
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if you have the <unistd.h> header file. */
 | 
			
		||||
#define HAVE_UNISTD_H 1
 | 
			
		||||
 | 
			
		||||
/* Name of package */
 | 
			
		||||
#define PACKAGE "grid"
 | 
			
		||||
 | 
			
		||||
/* Define to the address where bug reports for this package should be sent. */
 | 
			
		||||
#define PACKAGE_BUGREPORT "paboyle@ph.ed.ac.uk"
 | 
			
		||||
 | 
			
		||||
/* Define to the full name of this package. */
 | 
			
		||||
#define PACKAGE_NAME "Grid"
 | 
			
		||||
 | 
			
		||||
/* Define to the full name and version of this package. */
 | 
			
		||||
#define PACKAGE_STRING "Grid 1.0"
 | 
			
		||||
 | 
			
		||||
/* Define to the one symbol short name of this package. */
 | 
			
		||||
#define PACKAGE_TARNAME "grid"
 | 
			
		||||
 | 
			
		||||
/* Define to the home page for this package. */
 | 
			
		||||
#define PACKAGE_URL ""
 | 
			
		||||
 | 
			
		||||
/* Define to the version of this package. */
 | 
			
		||||
#define PACKAGE_VERSION "1.0"
 | 
			
		||||
 | 
			
		||||
/* SSE4 */
 | 
			
		||||
#define SSE4 1
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if you have the ANSI C header files. */
 | 
			
		||||
#define STDC_HEADERS 1
 | 
			
		||||
 | 
			
		||||
/* Version number of package */
 | 
			
		||||
#define VERSION "1.0"
 | 
			
		||||
 | 
			
		||||
/* Define for Solaris 2.5.1 so the uint32_t typedef from <sys/synch.h>,
 | 
			
		||||
   <pthread.h>, or <semaphore.h> is not used. If the typedef were allowed, the
 | 
			
		||||
   #define below would cause a syntax error. */
 | 
			
		||||
/* #undef _UINT32_T */
 | 
			
		||||
 | 
			
		||||
/* Define for Solaris 2.5.1 so the uint64_t typedef from <sys/synch.h>,
 | 
			
		||||
   <pthread.h>, or <semaphore.h> is not used. If the typedef were allowed, the
 | 
			
		||||
   #define below would cause a syntax error. */
 | 
			
		||||
/* #undef _UINT64_T */
 | 
			
		||||
 | 
			
		||||
/* Define to `unsigned int' if <sys/types.h> does not define. */
 | 
			
		||||
/* #undef size_t */
 | 
			
		||||
 | 
			
		||||
/* Define to the type of an unsigned integer type of width exactly 32 bits if
 | 
			
		||||
   such a type exists and the standard includes do not define it. */
 | 
			
		||||
/* #undef uint32_t */
 | 
			
		||||
 | 
			
		||||
/* Define to the type of an unsigned integer type of width exactly 64 bits if
 | 
			
		||||
   such a type exists and the standard includes do not define it. */
 | 
			
		||||
/* #undef uint64_t */
 | 
			
		||||
							
								
								
									
										158
									
								
								lib/Makefile.am
									
									
									
									
									
								
							
							
						
						
									
										158
									
								
								lib/Makefile.am
									
									
									
									
									
								
							@@ -14,87 +14,89 @@ endif
 | 
			
		||||
# Libraries
 | 
			
		||||
#
 | 
			
		||||
lib_LIBRARIES = libGrid.a
 | 
			
		||||
libGrid_a_SOURCES =\
 | 
			
		||||
	 Grid_init.cc\
 | 
			
		||||
	stencil/Grid_stencil_common.cc\
 | 
			
		||||
	qcd/Grid_qcd_dirac.cc\
 | 
			
		||||
	qcd/Grid_qcd_wilson_dop.cc\
 | 
			
		||||
	algorithms/approx/Zolotarev.cc\
 | 
			
		||||
	algorithms/approx/Remez.cc\
 | 
			
		||||
libGrid_a_SOURCES =				\
 | 
			
		||||
	Grid_init.cc				\
 | 
			
		||||
	stencil/Grid_stencil_common.cc		\
 | 
			
		||||
	qcd/Grid_qcd_dirac.cc			\
 | 
			
		||||
	qcd/Grid_qcd_wilson_dop.cc		\
 | 
			
		||||
	algorithms/approx/Zolotarev.cc		\
 | 
			
		||||
	algorithms/approx/Remez.cc		\
 | 
			
		||||
	$(extra_sources)
 | 
			
		||||
 | 
			
		||||
#
 | 
			
		||||
# Include files
 | 
			
		||||
#
 | 
			
		||||
nobase_include_HEADERS = algorithms/approx/bigfloat.h\
 | 
			
		||||
	algorithms/approx/Chebyshev.h\
 | 
			
		||||
	algorithms/approx/Remez.h\
 | 
			
		||||
	algorithms/approx/Zolotarev.h\
 | 
			
		||||
	algorithms/iterative/ConjugateGradient.h\
 | 
			
		||||
	algorithms/iterative/NormalEquations.h\
 | 
			
		||||
	algorithms/iterative/SchurRedBlack.h\
 | 
			
		||||
	algorithms/LinearOperator.h\
 | 
			
		||||
	algorithms/SparseMatrix.h\
 | 
			
		||||
	cartesian/Grid_cartesian_base.h\
 | 
			
		||||
	cartesian/Grid_cartesian_full.h\
 | 
			
		||||
	cartesian/Grid_cartesian_red_black.h\
 | 
			
		||||
	communicator/Grid_communicator_base.h\
 | 
			
		||||
	cshift/Grid_cshift_common.h\
 | 
			
		||||
	cshift/Grid_cshift_mpi.h\
 | 
			
		||||
	cshift/Grid_cshift_none.h\
 | 
			
		||||
	Grid.h\
 | 
			
		||||
	Grid_algorithms.h\
 | 
			
		||||
	Grid_aligned_allocator.h\
 | 
			
		||||
	Grid_cartesian.h\
 | 
			
		||||
	Grid_communicator.h\
 | 
			
		||||
	Grid_comparison.h\
 | 
			
		||||
	Grid_cshift.h\
 | 
			
		||||
	Grid_extract.h\
 | 
			
		||||
	Grid_lattice.h\
 | 
			
		||||
	Grid_math.h\
 | 
			
		||||
	Grid_simd.h\
 | 
			
		||||
	Grid_stencil.h\
 | 
			
		||||
	Grid_threads.h\
 | 
			
		||||
	lattice/Grid_lattice_arith.h\
 | 
			
		||||
	lattice/Grid_lattice_base.h\
 | 
			
		||||
	lattice/Grid_lattice_comparison.h\
 | 
			
		||||
	lattice/Grid_lattice_conformable.h\
 | 
			
		||||
	lattice/Grid_lattice_coordinate.h\
 | 
			
		||||
	lattice/Grid_lattice_ET.h\
 | 
			
		||||
	lattice/Grid_lattice_local.h\
 | 
			
		||||
	lattice/Grid_lattice_overload.h\
 | 
			
		||||
	lattice/Grid_lattice_peekpoke.h\
 | 
			
		||||
	lattice/Grid_lattice_reality.h\
 | 
			
		||||
	lattice/Grid_lattice_reduction.h\
 | 
			
		||||
	lattice/Grid_lattice_rng.h\
 | 
			
		||||
	lattice/Grid_lattice_trace.h\
 | 
			
		||||
	lattice/Grid_lattice_transfer.h\
 | 
			
		||||
	lattice/Grid_lattice_transpose.h\
 | 
			
		||||
	lattice/Grid_lattice_where.h\
 | 
			
		||||
	math/Grid_math_arith.h\
 | 
			
		||||
	math/Grid_math_arith_add.h\
 | 
			
		||||
	math/Grid_math_arith_mac.h\
 | 
			
		||||
	math/Grid_math_arith_mul.h\
 | 
			
		||||
	math/Grid_math_arith_scalar.h\
 | 
			
		||||
	math/Grid_math_arith_sub.h\
 | 
			
		||||
	math/Grid_math_inner.h\
 | 
			
		||||
	math/Grid_math_outer.h\
 | 
			
		||||
	math/Grid_math_peek.h\
 | 
			
		||||
	math/Grid_math_poke.h\
 | 
			
		||||
	math/Grid_math_reality.h\
 | 
			
		||||
	math/Grid_math_tensors.h\
 | 
			
		||||
	math/Grid_math_trace.h\
 | 
			
		||||
	math/Grid_math_traits.h\
 | 
			
		||||
	math/Grid_math_transpose.h\
 | 
			
		||||
	parallelIO/GridNerscIO.h\
 | 
			
		||||
	qcd/Grid_qcd.h\
 | 
			
		||||
	qcd/Grid_qcd_2spinor.h\
 | 
			
		||||
	qcd/Grid_qcd_dirac.h\
 | 
			
		||||
	qcd/Grid_qcd_wilson_dop.h\
 | 
			
		||||
	simd/Grid_vComplexD.h\
 | 
			
		||||
	simd/Grid_vComplexF.h\
 | 
			
		||||
	simd/Grid_vInteger.h\
 | 
			
		||||
	simd/Grid_vRealD.h\
 | 
			
		||||
	simd/Grid_vRealF.h\
 | 
			
		||||
	simd/Grid_vector_types.h
 | 
			
		||||
nobase_include_HEADERS = algorithms/approx/bigfloat.h		\
 | 
			
		||||
	algorithms/approx/Chebyshev.h				\
 | 
			
		||||
	algorithms/approx/Remez.h				\
 | 
			
		||||
	algorithms/approx/Zolotarev.h				\
 | 
			
		||||
	algorithms/iterative/ConjugateGradient.h		\
 | 
			
		||||
	algorithms/iterative/NormalEquations.h			\
 | 
			
		||||
	algorithms/iterative/SchurRedBlack.h			\
 | 
			
		||||
	algorithms/LinearOperator.h				\
 | 
			
		||||
	algorithms/SparseMatrix.h				\
 | 
			
		||||
	cartesian/Grid_cartesian_base.h				\
 | 
			
		||||
	cartesian/Grid_cartesian_full.h				\
 | 
			
		||||
	cartesian/Grid_cartesian_red_black.h			\
 | 
			
		||||
	communicator/Grid_communicator_base.h			\
 | 
			
		||||
	cshift/Grid_cshift_common.h				\
 | 
			
		||||
	cshift/Grid_cshift_mpi.h				\
 | 
			
		||||
	cshift/Grid_cshift_none.h				\
 | 
			
		||||
	Grid.h							\
 | 
			
		||||
	Grid_algorithms.h					\
 | 
			
		||||
	Grid_aligned_allocator.h				\
 | 
			
		||||
	Grid_cartesian.h					\
 | 
			
		||||
	Grid_communicator.h					\
 | 
			
		||||
	Grid_comparison.h					\
 | 
			
		||||
	Grid_cshift.h						\
 | 
			
		||||
	Grid_extract.h						\
 | 
			
		||||
	Grid_lattice.h						\
 | 
			
		||||
	Grid_math.h						\
 | 
			
		||||
	Grid_simd.h						\
 | 
			
		||||
	Grid_stencil.h						\
 | 
			
		||||
	Grid_threads.h						\
 | 
			
		||||
	lattice/Grid_lattice_arith.h				\
 | 
			
		||||
	lattice/Grid_lattice_base.h				\
 | 
			
		||||
	lattice/Grid_lattice_comparison.h			\
 | 
			
		||||
	lattice/Grid_lattice_conformable.h			\
 | 
			
		||||
	lattice/Grid_lattice_coordinate.h			\
 | 
			
		||||
	lattice/Grid_lattice_ET.h				\
 | 
			
		||||
	lattice/Grid_lattice_local.h				\
 | 
			
		||||
	lattice/Grid_lattice_overload.h				\
 | 
			
		||||
	lattice/Grid_lattice_peekpoke.h				\
 | 
			
		||||
	lattice/Grid_lattice_reality.h				\
 | 
			
		||||
	lattice/Grid_lattice_reduction.h			\
 | 
			
		||||
	lattice/Grid_lattice_rng.h				\
 | 
			
		||||
	lattice/Grid_lattice_trace.h				\
 | 
			
		||||
	lattice/Grid_lattice_transfer.h				\
 | 
			
		||||
	lattice/Grid_lattice_transpose.h			\
 | 
			
		||||
	lattice/Grid_lattice_where.h				\
 | 
			
		||||
	math/Grid_math_arith.h					\
 | 
			
		||||
	math/Grid_math_arith_add.h				\
 | 
			
		||||
	math/Grid_math_arith_mac.h				\
 | 
			
		||||
	math/Grid_math_arith_mul.h				\
 | 
			
		||||
	math/Grid_math_arith_scalar.h				\
 | 
			
		||||
	math/Grid_math_arith_sub.h				\
 | 
			
		||||
	math/Grid_math_inner.h					\
 | 
			
		||||
	math/Grid_math_outer.h					\
 | 
			
		||||
	math/Grid_math_peek.h					\
 | 
			
		||||
	math/Grid_math_poke.h					\
 | 
			
		||||
	math/Grid_math_reality.h				\
 | 
			
		||||
	math/Grid_math_tensors.h				\
 | 
			
		||||
	math/Grid_math_trace.h					\
 | 
			
		||||
	math/Grid_math_traits.h					\
 | 
			
		||||
	math/Grid_math_transpose.h				\
 | 
			
		||||
	parallelIO/GridNerscIO.h				\
 | 
			
		||||
	qcd/Grid_qcd.h						\
 | 
			
		||||
	qcd/Grid_qcd_2spinor.h					\
 | 
			
		||||
	qcd/Grid_qcd_dirac.h					\
 | 
			
		||||
	qcd/Grid_qcd_wilson_dop.h				\
 | 
			
		||||
	simd/Grid_vComplexD.h					\
 | 
			
		||||
	simd/Grid_vComplexF.h					\
 | 
			
		||||
	simd/Grid_vInteger.h					\
 | 
			
		||||
	simd/Grid_vRealD.h					\
 | 
			
		||||
	simd/Grid_vRealF.h					\
 | 
			
		||||
	simd/Grid_vector_types.h				\
 | 
			
		||||
	simd/Grid_sse4.h					
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										0
									
								
								lib/algorithms/approx/.dirstamp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										0
									
								
								lib/algorithms/approx/.dirstamp
									
									
									
									
									
										Normal file
									
								
							
							
								
								
									
										0
									
								
								lib/communicator/.dirstamp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										0
									
								
								lib/communicator/.dirstamp
									
									
									
									
									
										Normal file
									
								
							
							
								
								
									
										0
									
								
								lib/qcd/.dirstamp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										0
									
								
								lib/qcd/.dirstamp
									
									
									
									
									
										Normal file
									
								
							
							
								
								
									
										0
									
								
								lib/simd/.dirstamp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										0
									
								
								lib/simd/.dirstamp
									
									
									
									
									
										Normal file
									
								
							@@ -1,19 +0,0 @@
 | 
			
		||||
//----------------------------------------------------------------------
 | 
			
		||||
/*! @file Grid_vector_types.h
 | 
			
		||||
  @brief Defines templated class to deal with inner vector types
 | 
			
		||||
*/
 | 
			
		||||
// Time-stamp: <2015-05-19 13:53:47 neo>
 | 
			
		||||
//----------------------------------------------------------------------
 | 
			
		||||
 | 
			
		||||
namespace Optimization {
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Here assign types 
 | 
			
		||||
namespace Grid {
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										194
									
								
								lib/simd/Grid_sse4.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										194
									
								
								lib/simd/Grid_sse4.h
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,194 @@
 | 
			
		||||
//----------------------------------------------------------------------
 | 
			
		||||
/*! @file Grid_sse4.h
 | 
			
		||||
  @brief Optimization libraries
 | 
			
		||||
*/
 | 
			
		||||
// Time-stamp: <2015-05-19 17:06:51 neo>
 | 
			
		||||
//----------------------------------------------------------------------
 | 
			
		||||
 | 
			
		||||
#include <pmmintrin.h>
 | 
			
		||||
 | 
			
		||||
namespace Optimization {
 | 
			
		||||
  
 | 
			
		||||
  struct Vsplat{
 | 
			
		||||
    //Complex float
 | 
			
		||||
    inline __m128 operator()(float a, float b){
 | 
			
		||||
      return _mm_set_ps(b,a,b,a);
 | 
			
		||||
    }
 | 
			
		||||
    // Real float
 | 
			
		||||
    inline __m128 operator()(float a){
 | 
			
		||||
      return _mm_set_ps(a,a,a,a);
 | 
			
		||||
    }
 | 
			
		||||
    //Complex double
 | 
			
		||||
    inline __m128d operator()(double a, double b){
 | 
			
		||||
      return _mm_set_pd(b,a);
 | 
			
		||||
    }
 | 
			
		||||
    //Real double
 | 
			
		||||
    inline __m128d operator()(double a){
 | 
			
		||||
      return _mm_set_pd(a,a);
 | 
			
		||||
    }
 | 
			
		||||
    //Integer
 | 
			
		||||
    inline __m128i operator()(Integer a){
 | 
			
		||||
      return _mm_set1_epi32(a);
 | 
			
		||||
    }
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  struct Vstore{
 | 
			
		||||
    //Float 
 | 
			
		||||
    inline void operator()(__m128 a, float* F){
 | 
			
		||||
      _mm_store_ps(F,a);
 | 
			
		||||
    }
 | 
			
		||||
    //Double
 | 
			
		||||
    inline void operator()(__m128d a, double* D){
 | 
			
		||||
      _mm_store_pd(D,a);
 | 
			
		||||
    }
 | 
			
		||||
    //Integer
 | 
			
		||||
    inline void operator()(__m128i a, Integer* I){
 | 
			
		||||
      _mm_store_si128((__m128i *)I,a);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  struct Vset{
 | 
			
		||||
    // Complex float 
 | 
			
		||||
    inline __m128 operator()(Grid::ComplexF *a){
 | 
			
		||||
      return _mm_set_ps(a[1].imag(), a[1].real(),a[0].imag(),a[0].real());
 | 
			
		||||
    }
 | 
			
		||||
    // Complex double 
 | 
			
		||||
    inline __m128d operator()(Grid::ComplexD *a){
 | 
			
		||||
      return _mm_set_pd(a[0].imag(),a[0].real());
 | 
			
		||||
    }
 | 
			
		||||
    // Real float 
 | 
			
		||||
    inline __m128 operator()(float *a){
 | 
			
		||||
      return _mm_set_ps(a[3],a[2],a[1],a[0]);
 | 
			
		||||
    }
 | 
			
		||||
    // Real double
 | 
			
		||||
    inline __m128d operator()(double *a){
 | 
			
		||||
      return _mm_set_pd(a[1],a[0]);
 | 
			
		||||
    }
 | 
			
		||||
    // Integer
 | 
			
		||||
    inline __m128i operator()(Integer *a){
 | 
			
		||||
      return _mm_set_epi32(a[0],a[1],a[2],a[3]);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  struct Reduce{
 | 
			
		||||
    //Complex float
 | 
			
		||||
    inline Grid::ComplexF operator()(__m128 in){
 | 
			
		||||
      union {
 | 
			
		||||
	__m128 v1;  
 | 
			
		||||
	float f[4]; 
 | 
			
		||||
      } u128;
 | 
			
		||||
      u128.v1 = _mm_add_ps(in, _mm_shuffle_ps(in,in, 0b01001110)); // FIXME Prefer to use _MM_SHUFFLE macros
 | 
			
		||||
      return Grid::ComplexF(u128.f[0], u128.f[1]);   
 | 
			
		||||
    }
 | 
			
		||||
    //Complex double
 | 
			
		||||
    inline Grid::ComplexD operator()(__m128d in){
 | 
			
		||||
      printf("Missing complex double implementation -> FIX\n");
 | 
			
		||||
      return Grid::ComplexD(0,0); // FIXME wrong
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  /////////////////////////////////////////////////////
 | 
			
		||||
  // Arithmetic operations
 | 
			
		||||
  /////////////////////////////////////////////////////
 | 
			
		||||
  struct Sum{
 | 
			
		||||
    //Complex/Real float
 | 
			
		||||
    inline __m128 operator()(__m128 a, __m128 b){
 | 
			
		||||
      return _mm_add_ps(a,b);
 | 
			
		||||
    }
 | 
			
		||||
    //Complex/Real double
 | 
			
		||||
    inline __m128d operator()(__m128d a, __m128d b){
 | 
			
		||||
      return _mm_add_pd(a,b);
 | 
			
		||||
    }
 | 
			
		||||
    //Integer
 | 
			
		||||
    inline __m128i operator()(__m128i a, __m128i b){
 | 
			
		||||
      return _mm_add_epi32(a,b);
 | 
			
		||||
    }
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  struct Sub{
 | 
			
		||||
    //Complex/Real float
 | 
			
		||||
    inline __m128 operator()(__m128 a, __m128 b){
 | 
			
		||||
      return _mm_sub_ps(a,b);
 | 
			
		||||
    }
 | 
			
		||||
    //Complex/Real double
 | 
			
		||||
    inline __m128d operator()(__m128d a, __m128d b){
 | 
			
		||||
      return _mm_sub_pd(a,b);
 | 
			
		||||
    }
 | 
			
		||||
    //Integer
 | 
			
		||||
    inline __m128i operator()(__m128i a, __m128i b){
 | 
			
		||||
      return _mm_sub_epi32(a,b);
 | 
			
		||||
    }
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  struct MultComplex{
 | 
			
		||||
    // Complex float
 | 
			
		||||
    inline __m128 operator()(__m128 a, __m128 b){
 | 
			
		||||
      __m128 ymm0,ymm1,ymm2;
 | 
			
		||||
      ymm0 = _mm_shuffle_ps(a,a,_MM_SHUFFLE(2,2,0,0)); // ymm0 <- ar ar,
 | 
			
		||||
      ymm0 = _mm_mul_ps(ymm0,b);        // ymm0 <- ar bi, ar br
 | 
			
		||||
      ymm1 = _mm_shuffle_ps(b,b,_MM_SHUFFLE(2,3,0,1)); // ymm1 <- br,bi
 | 
			
		||||
      ymm2 = _mm_shuffle_ps(a,a,_MM_SHUFFLE(3,3,1,1)); // ymm2 <- ai,ai
 | 
			
		||||
      ymm1 = _mm_mul_ps(ymm1,ymm2);       // ymm1 <- br ai, ai bi
 | 
			
		||||
      return _mm_addsub_ps(ymm0,ymm1);    
 | 
			
		||||
    }
 | 
			
		||||
    // Complex double
 | 
			
		||||
    inline __m128d operator()(__m128d a, __m128d b){
 | 
			
		||||
      __m128d ymm0,ymm1,ymm2;
 | 
			
		||||
      ymm0 = _mm_shuffle_pd(a,a,0x0); // ymm0 <- ar ar,
 | 
			
		||||
      ymm0 = _mm_mul_pd(ymm0,b);        // ymm0 <- ar bi, ar br
 | 
			
		||||
      ymm1 = _mm_shuffle_pd(b,b,0x1); // ymm1 <- br,bi   b01
 | 
			
		||||
      ymm2 = _mm_shuffle_pd(a,a,0x3); // ymm2 <- ai,ai   b11
 | 
			
		||||
      ymm1 = _mm_mul_pd(ymm1,ymm2);       // ymm1 <- br ai, ai bi
 | 
			
		||||
      return _mm_addsub_pd(ymm0,ymm1);  
 | 
			
		||||
    }
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  struct Mult{
 | 
			
		||||
    // Real float
 | 
			
		||||
    inline __m128 operator()(__m128 a, __m128 b){
 | 
			
		||||
      return _mm_mul_ps(a,b);
 | 
			
		||||
    }
 | 
			
		||||
    // Real double
 | 
			
		||||
    inline __m128d operator()(__m128d a, __m128d b){
 | 
			
		||||
      return _mm_mul_pd(a,b);
 | 
			
		||||
    }
 | 
			
		||||
    // Integer
 | 
			
		||||
    inline __m128i operator()(__m128i a, __m128i b){
 | 
			
		||||
      return _mm_mul_epi32(a,b);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Here assign types 
 | 
			
		||||
namespace Grid {
 | 
			
		||||
  typedef __m128 SIMD_Ftype;  // Single precision type
 | 
			
		||||
  typedef __m128d SIMD_Dtype; // Double precision type
 | 
			
		||||
  typedef __m128i SIMD_Itype; // Integer type
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  // Function names
 | 
			
		||||
  typedef Optimization::Vsplat VsplatSIMD;
 | 
			
		||||
  typedef Optimization::Vstore VstoreSIMD;
 | 
			
		||||
 | 
			
		||||
  // Arithmetic operations
 | 
			
		||||
  typedef Optimization::Sum         SumSIMD;
 | 
			
		||||
  typedef Optimization::Sub         SubSIMD;
 | 
			
		||||
  typedef Optimization::Mult        MultSIMD;
 | 
			
		||||
  typedef Optimization::MultComplex MultComplexSIMD;
 | 
			
		||||
  typedef Optimization::Vset        VsetSIMD;
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
@@ -1,12 +1,14 @@
 | 
			
		||||
//----------------------------------------------------------------------
 | 
			
		||||
//---------------------------------------------------------------------------
 | 
			
		||||
/*! @file Grid_vector_types.h
 | 
			
		||||
  @brief Defines templated class to deal with inner vector types
 | 
			
		||||
  @brief Defines templated class Grid_simd to deal with inner vector types
 | 
			
		||||
*/
 | 
			
		||||
// Time-stamp: <2015-05-19 13:41:47 neo>
 | 
			
		||||
//----------------------------------------------------------------------
 | 
			
		||||
// Time-stamp: <2015-05-19 17:20:36 neo>
 | 
			
		||||
//---------------------------------------------------------------------------
 | 
			
		||||
#ifndef GRID_VECTOR_TYPES
 | 
			
		||||
#define GRID_VECTOR_TYPES
 | 
			
		||||
 | 
			
		||||
#include "Grid_sse4.h"
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
 | 
			
		||||
@@ -27,18 +29,20 @@ namespace Grid {
 | 
			
		||||
  template < typename T > 
 | 
			
		||||
    struct is_complex< std::complex<T> >: std::true_type {};
 | 
			
		||||
  ////////////////////////////////////////////////////////
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  // Define the operation templates functors
 | 
			
		||||
  template < class SIMD, class Operation > 
 | 
			
		||||
    SIMD binary(SIMD src_1, SIMD src_2, Operation op){
 | 
			
		||||
  // general forms to allow for vsplat syntax
 | 
			
		||||
  // need explicit declaration of types when used since
 | 
			
		||||
  // clang cannot automatically determine the output type sometimes
 | 
			
		||||
  template < class Out, class Input1, class Input2, class Operation > 
 | 
			
		||||
    Out binary(Input1 src_1, Input2 src_2, Operation op){
 | 
			
		||||
    return op(src_1, src_2);
 | 
			
		||||
  }
 | 
			
		||||
  } 
 | 
			
		||||
 | 
			
		||||
  template < class SIMD, class Operation > 
 | 
			
		||||
    SIMD unary(SIMD src, Operation op){
 | 
			
		||||
  template < class SIMDout, class Input, class Operation > 
 | 
			
		||||
    SIMDout unary(Input src, Operation op){
 | 
			
		||||
    return op(src);
 | 
			
		||||
  }
 | 
			
		||||
  } 
 | 
			
		||||
 | 
			
		||||
  ///////////////////////////////////////////////
 | 
			
		||||
 | 
			
		||||
  /*
 | 
			
		||||
@@ -74,36 +78,42 @@ namespace Grid {
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
       
 | 
			
		||||
    ///////////////////////////////////////////////
 | 
			
		||||
    // mac, mult, sub, add, adj
 | 
			
		||||
    // Should do an AVX2 version with mac.
 | 
			
		||||
    ///////////////////////////////////////////////
 | 
			
		||||
    friend inline void mac (Grid_simd * __restrict__ y,const Grid_simd * __restrict__ a,const Grid_simd *__restrict__ x){ *y = (*a)*(*x)+(*y); };
 | 
			
		||||
    friend inline void mult(Grid_simd * __restrict__ y,const Grid_simd * __restrict__ l,const Grid_simd *__restrict__ r){ *y = (*l) * (*r); }
 | 
			
		||||
    friend inline void sub (Grid_simd * __restrict__ y,const Grid_simd * __restrict__ l,const Grid_simd *__restrict__ r){ *y = (*l) - (*r); }
 | 
			
		||||
    friend inline void add (Grid_simd * __restrict__ y,const Grid_simd * __restrict__ l,const Grid_simd *__restrict__ r){ *y = (*l) + (*r); }
 | 
			
		||||
    //not for integer types... FIXME
 | 
			
		||||
    friend inline Grid_simd adj(const Grid_simd &in){ return conj(in); }
 | 
			
		||||
        
 | 
			
		||||
    //////////////////////////////////
 | 
			
		||||
    // Initialise to 1,0,i
 | 
			
		||||
    //////////////////////////////////
 | 
			
		||||
    ///////////////////////////////////////////////
 | 
			
		||||
    // Initialise to 1,0,i for the correct types
 | 
			
		||||
    ///////////////////////////////////////////////
 | 
			
		||||
    // if not complex overload here 
 | 
			
		||||
    friend inline void vone(Grid_simd &ret)      { vsplat(ret,1.0); }
 | 
			
		||||
    friend inline void vzero(Grid_simd &ret)     { vsplat(ret,0.0); }
 | 
			
		||||
 | 
			
		||||
    template <  class S = Scalar_type,typename std::enable_if < !is_complex < S >::value, int >::type = 0 > 
 | 
			
		||||
      friend inline void vone(Grid_simd &ret)      { vsplat(ret,1.0); }
 | 
			
		||||
    template <  class S = Scalar_type,typename std::enable_if < !is_complex < S >::value, int >::type = 0 > 
 | 
			
		||||
      friend inline void vzero(Grid_simd &ret)     { vsplat(ret,0.0); }
 | 
			
		||||
    
 | 
			
		||||
    // overload for complex type
 | 
			
		||||
    template <  class S = Scalar_type,typename std::enable_if < is_complex < S >::value, int >::type = 0 > 
 | 
			
		||||
    friend inline void vone(Grid_simd &ret)      { vsplat(ret,1.0,0.0); }
 | 
			
		||||
      friend inline void vone(Grid_simd &ret)      { vsplat(ret,1.0,0.0); }
 | 
			
		||||
    template < class S = Scalar_type,typename std::enable_if < is_complex < S >::value, int >::type = 0 > 
 | 
			
		||||
    friend inline void vzero(Grid_simd &ret)     { vsplat(ret,0.0,0.0); }
 | 
			
		||||
 | 
			
		||||
      friend inline void vzero(Grid_simd &ret)     { vsplat(ret,0.0,0.0); }// use xor?
 | 
			
		||||
    
 | 
			
		||||
    // For integral type
 | 
			
		||||
    template <  class S = Scalar_type,typename std::enable_if < std::is_integral < S >::value, int >::type = 0 > 
 | 
			
		||||
    friend inline void vone(Grid_simd &ret)      { vsplat(ret,1); }
 | 
			
		||||
      friend inline void vone(Grid_simd &ret)      { vsplat(ret,1); }
 | 
			
		||||
    template <  class S = Scalar_type,typename std::enable_if < std::is_integral < S >::value, int >::type = 0 > 
 | 
			
		||||
    friend inline void vzero(Grid_simd &ret)      { vsplat(ret,0); }
 | 
			
		||||
      friend inline void vzero(Grid_simd &ret)      { vsplat(ret,0); }
 | 
			
		||||
    template <  class S = Scalar_type,typename std::enable_if < std::is_integral < S >::value, int >::type = 0 > 
 | 
			
		||||
      friend inline void vtrue (Grid_simd &ret){vsplat(ret,0xFFFFFFFF);}
 | 
			
		||||
    template <  class S = Scalar_type,typename std::enable_if < std::is_integral < S >::value, int >::type = 0 > 
 | 
			
		||||
      friend inline void vfalse(vInteger &ret){vsplat(ret,0);}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    // do not compile if real or integer, send an error message from the compiler
 | 
			
		||||
    template < class S = Scalar_type,typename std::enable_if < is_complex < S >::value, int >::type = 0 > 
 | 
			
		||||
@@ -114,31 +124,44 @@ namespace Grid {
 | 
			
		||||
    ////////////////////////////////////
 | 
			
		||||
    friend inline Grid_simd operator + (Grid_simd a, Grid_simd b)
 | 
			
		||||
    {
 | 
			
		||||
      vComplexF ret;
 | 
			
		||||
      // FIXME call the binary op
 | 
			
		||||
      Grid_simd ret;
 | 
			
		||||
      ret.v = binary<Vector_type>(a.v, b.v, SumSIMD());
 | 
			
		||||
      return ret;
 | 
			
		||||
    };
 | 
			
		||||
        
 | 
			
		||||
    friend inline Grid_simd operator - (Grid_simd a, Grid_simd b)
 | 
			
		||||
    {
 | 
			
		||||
      vComplexF ret;
 | 
			
		||||
      // FIXME call the binary op
 | 
			
		||||
      Grid_simd ret;
 | 
			
		||||
      ret.v = binary<Vector_type>(a.v, b.v, SubSIMD());
 | 
			
		||||
      return ret;
 | 
			
		||||
    };
 | 
			
		||||
        
 | 
			
		||||
    friend inline Grid_simd operator * (Grid_simd a, Grid_simd b)
 | 
			
		||||
    // Distinguish between complex types and others
 | 
			
		||||
    template < class S = Scalar_type, typename std::enable_if < is_complex < S >::value, int >::type = 0 >
 | 
			
		||||
      friend inline Grid_simd operator * (Grid_simd a, Grid_simd b)
 | 
			
		||||
      {
 | 
			
		||||
	vComplexF ret;
 | 
			
		||||
	// FIXME call the binary op
 | 
			
		||||
	Grid_simd ret;
 | 
			
		||||
	ret.v = binary<Vector_type>(a.v,b.v, MultComplexSIMD());
 | 
			
		||||
	return ret;
 | 
			
		||||
      };
 | 
			
		||||
      
 | 
			
		||||
 | 
			
		||||
    // Real/Integer types
 | 
			
		||||
    template <  class S = Scalar_type,typename std::enable_if < !is_complex < S >::value, int >::type = 0 > 
 | 
			
		||||
    friend inline Grid_simd operator * (Grid_simd a, Grid_simd b)
 | 
			
		||||
      {
 | 
			
		||||
	Grid_simd ret;
 | 
			
		||||
	ret.v = binary<Vector_type>(a.v,b.v, MultSIMD());
 | 
			
		||||
	return ret;
 | 
			
		||||
      };
 | 
			
		||||
    
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    ////////////////////////////////////////////////////////////////////////
 | 
			
		||||
    // FIXME:  gonna remove these load/store, get, set, prefetch
 | 
			
		||||
    ////////////////////////////////////////////////////////////////////////
 | 
			
		||||
    friend inline void vset(Grid_simd &ret, Scalar_type *a){
 | 
			
		||||
      // FIXME set 
 | 
			
		||||
      ret.v = unary<Vector_type>(a, VsetSIMD());
 | 
			
		||||
    }
 | 
			
		||||
        
 | 
			
		||||
    ///////////////////////
 | 
			
		||||
@@ -147,34 +170,33 @@ namespace Grid {
 | 
			
		||||
    // overload if complex
 | 
			
		||||
    template < class S = Scalar_type > 
 | 
			
		||||
    friend inline void vsplat(Grid_simd &ret, typename std::enable_if< is_complex < S >::value, S>::type c){
 | 
			
		||||
      Real a= real(c);
 | 
			
		||||
      Real b= imag(c);
 | 
			
		||||
      Real a = real(c);
 | 
			
		||||
      Real b = imag(c);
 | 
			
		||||
      vsplat(ret,a,b);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // this only for the complex version
 | 
			
		||||
    template < class S = Scalar_type, typename std::enable_if < is_complex < S >::value, int >::type = 0 > 
 | 
			
		||||
    friend inline void vsplat(Grid_simd &ret,Real a, Real b){
 | 
			
		||||
      // FIXME add operator
 | 
			
		||||
      ret.v = binary<Vector_type>(a, b, VsplatSIMD());
 | 
			
		||||
    }    
 | 
			
		||||
 | 
			
		||||
    //if real fill with a, if complex fill with a in the real part
 | 
			
		||||
    //if real fill with a, if complex fill with a in the real part (first function above)
 | 
			
		||||
    friend inline void vsplat(Grid_simd &ret,Real a){
 | 
			
		||||
      // FIXME add operator
 | 
			
		||||
      ret.v = unary<Vector_type>(a, VsplatSIMD());
 | 
			
		||||
    }    
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    friend inline void vstore(const Grid_simd &ret, Scalar_type *a){
 | 
			
		||||
      //FIXME
 | 
			
		||||
      binary<void>(ret.v, (Real*)a, VstoreSIMD());
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    friend inline void vprefetch(const Grid_simd &v)
 | 
			
		||||
    {
 | 
			
		||||
      _mm_prefetch((const char*)&v.v,_MM_HINT_T0);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    friend inline Scalar_type Reduce(const Grid_simd & in)
 | 
			
		||||
    {
 | 
			
		||||
      // FIXME add operator
 | 
			
		||||
@@ -221,6 +243,7 @@ namespace Grid {
 | 
			
		||||
    inline Grid_simd &operator *=(const Grid_simd &r) {
 | 
			
		||||
      *this = (*this)*r;
 | 
			
		||||
      return *this;
 | 
			
		||||
      // return (*this)*r; ?
 | 
			
		||||
    }
 | 
			
		||||
    inline Grid_simd &operator +=(const Grid_simd &r) {
 | 
			
		||||
      *this = *this+r;
 | 
			
		||||
@@ -233,6 +256,12 @@ namespace Grid {
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
     friend inline void permute(Grid_simd &y,Grid_simd b,int perm)
 | 
			
		||||
      {
 | 
			
		||||
        Gpermute<Grid_simd>(y,b,perm);
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
     /*
 | 
			
		||||
    friend inline void permute(Grid_simd &y,Grid_simd b,int perm)
 | 
			
		||||
    {
 | 
			
		||||
      Gpermute<Grid_simd>(y,b,perm);
 | 
			
		||||
@@ -253,7 +282,7 @@ namespace Grid {
 | 
			
		||||
    {
 | 
			
		||||
      Gextract<Grid_simd,Scalar_type>(y,extracted);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
     */
 | 
			
		||||
 | 
			
		||||
  };// end of Grid_simd class definition 
 | 
			
		||||
 | 
			
		||||
@@ -286,11 +315,11 @@ namespace Grid {
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  // Define available types (now change names to avoid clashing)
 | 
			
		||||
  typedef __m128 SIMD_type;// decided at compilation time
 | 
			
		||||
  typedef Grid_simd< float                 , SIMD_type > MyRealF;
 | 
			
		||||
  typedef Grid_simd< double                , SIMD_type > MyRealD;
 | 
			
		||||
  typedef Grid_simd< std::complex< float > , SIMD_type > MyComplexF;
 | 
			
		||||
  typedef Grid_simd< std::complex< double >, SIMD_type > MyComplexD;
 | 
			
		||||
 | 
			
		||||
  typedef Grid_simd< float                 , SIMD_Ftype > MyRealF;
 | 
			
		||||
  typedef Grid_simd< double                , SIMD_Dtype > MyRealD;
 | 
			
		||||
  typedef Grid_simd< std::complex< float > , SIMD_Ftype > MyComplexF;
 | 
			
		||||
  typedef Grid_simd< std::complex< double >, SIMD_Dtype > MyComplexD;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										1
									
								
								lib/stamp-h1
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1
									
								
								lib/stamp-h1
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1 @@
 | 
			
		||||
timestamp for lib/Grid_config.h
 | 
			
		||||
							
								
								
									
										0
									
								
								lib/stencil/.dirstamp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										0
									
								
								lib/stencil/.dirstamp
									
									
									
									
									
										Normal file
									
								
							
		Reference in New Issue
	
	Block a user