diff --git a/Makefile.in b/Makefile.in index b9fba168..b6894ef6 100644 --- a/Makefile.in +++ b/Makefile.in @@ -1,7 +1,7 @@ -# Makefile.in generated by automake 1.15 from Makefile.am. +# Makefile.in generated by automake 1.14.1 from Makefile.am. # @configure_input@ -# Copyright (C) 1994-2014 Free Software Foundation, Inc. +# Copyright (C) 1994-2013 Free Software Foundation, Inc. # This Makefile.in is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, @@ -14,17 +14,7 @@ @SET_MAKE@ VPATH = @srcdir@ -am__is_gnu_make = { \ - if test -z '$(MAKELEVEL)'; then \ - false; \ - elif test -n '$(MAKE_HOST)'; then \ - true; \ - elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ - true; \ - else \ - false; \ - fi; \ -} +am__is_gnu_make = test -n '$(MAKEFILE_LIST)' && test -n '$(MAKELEVEL)' am__make_running_with_option = \ case $${target_option-} in \ ?) ;; \ @@ -89,12 +79,14 @@ build_triplet = @build@ host_triplet = @host@ target_triplet = @target@ subdir = . +DIST_COMMON = INSTALL NEWS README AUTHORS ChangeLog \ + $(srcdir)/Makefile.in $(srcdir)/Makefile.am \ + $(top_srcdir)/configure $(am__configure_deps) COPYING TODO \ + compile config.guess config.sub depcomp install-sh missing ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/configure.ac am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ $(ACLOCAL_M4) -DIST_COMMON = $(srcdir)/Makefile.am $(top_srcdir)/configure \ - $(am__configure_deps) $(am__DIST_COMMON) am__CONFIG_DISTCLEAN_FILES = config.status config.cache config.log \ configure.lineno config.status.lineno mkinstalldirs = $(install_sh) -d @@ -157,9 +149,6 @@ ETAGS = etags CTAGS = ctags CSCOPE = cscope DIST_SUBDIRS = $(SUBDIRS) -am__DIST_COMMON = $(srcdir)/Makefile.in AUTHORS COPYING ChangeLog \ - INSTALL NEWS README TODO compile config.guess config.sub \ - depcomp install-sh missing DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) distdir = $(PACKAGE)-$(VERSION) top_distdir = $(distdir) @@ -325,6 +314,7 @@ $(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps) echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu Makefile'; \ $(am__cd) $(top_srcdir) && \ $(AUTOMAKE) --gnu Makefile +.PRECIOUS: Makefile Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status @case '$?' in \ *config.status*) \ @@ -531,15 +521,15 @@ dist-xz: distdir $(am__post_remove_distdir) dist-tarZ: distdir - @echo WARNING: "Support for distribution archives compressed with" \ - "legacy program 'compress' is deprecated." >&2 + @echo WARNING: "Support for shar distribution archives is" \ + "deprecated." >&2 @echo WARNING: "It will be removed altogether in Automake 2.0" >&2 tardir=$(distdir) && $(am__tar) | compress -c >$(distdir).tar.Z $(am__post_remove_distdir) dist-shar: distdir - @echo WARNING: "Support for shar distribution archives is" \ - "deprecated." >&2 + @echo WARNING: "Support for distribution archives compressed with" \ + "legacy program 'compress' is deprecated." >&2 @echo WARNING: "It will be removed altogether in Automake 2.0" >&2 shar $(distdir) | GZIP=$(GZIP_ENV) gzip -c >$(distdir).shar.gz $(am__post_remove_distdir) @@ -575,17 +565,17 @@ distcheck: dist esac chmod -R a-w $(distdir) chmod u+w $(distdir) - mkdir $(distdir)/_build $(distdir)/_build/sub $(distdir)/_inst + mkdir $(distdir)/_build $(distdir)/_inst chmod a-w $(distdir) test -d $(distdir)/_build || exit 0; \ dc_install_base=`$(am__cd) $(distdir)/_inst && pwd | sed -e 's,^[^:\\/]:[\\/],/,'` \ && dc_destdir="$${TMPDIR-/tmp}/am-dc-$$$$/" \ && am__cwd=`pwd` \ - && $(am__cd) $(distdir)/_build/sub \ - && ../../configure \ + && $(am__cd) $(distdir)/_build \ + && ../configure \ $(AM_DISTCHECK_CONFIGURE_FLAGS) \ $(DISTCHECK_CONFIGURE_FLAGS) \ - --srcdir=../.. --prefix="$$dc_install_base" \ + --srcdir=.. --prefix="$$dc_install_base" \ && $(MAKE) $(AM_MAKEFLAGS) \ && $(MAKE) $(AM_MAKEFLAGS) dvi \ && $(MAKE) $(AM_MAKEFLAGS) check \ @@ -759,8 +749,6 @@ uninstall-am: maintainer-clean-generic mostlyclean mostlyclean-generic pdf \ pdf-am ps ps-am tags tags-am uninstall uninstall-am -.PRECIOUS: Makefile - # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. diff --git a/aclocal.m4 b/aclocal.m4 index bf79d078..389763bf 100644 --- a/aclocal.m4 +++ b/aclocal.m4 @@ -1,6 +1,6 @@ -# generated automatically by aclocal 1.15 -*- Autoconf -*- +# generated automatically by aclocal 1.14.1 -*- Autoconf -*- -# Copyright (C) 1996-2014 Free Software Foundation, Inc. +# Copyright (C) 1996-2013 Free Software Foundation, Inc. # This file is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, @@ -20,7 +20,7 @@ You have another version of autoconf. It may work, but is not guaranteed to. If you have problems, you may need to regenerate the build system entirely. To do so, use the procedure documented by the package, typically 'autoreconf'.])]) -# Copyright (C) 2002-2014 Free Software Foundation, Inc. +# Copyright (C) 2002-2013 Free Software Foundation, Inc. # # This file is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, @@ -32,10 +32,10 @@ To do so, use the procedure documented by the package, typically 'autoreconf'.]) # generated from the m4 files accompanying Automake X.Y. # (This private macro should not be called outside this file.) AC_DEFUN([AM_AUTOMAKE_VERSION], -[am__api_version='1.15' +[am__api_version='1.14' dnl Some users find AM_AUTOMAKE_VERSION and mistake it for a way to dnl require some minimum version. Point them to the right macro. -m4_if([$1], [1.15], [], +m4_if([$1], [1.14.1], [], [AC_FATAL([Do not call $0, use AM_INIT_AUTOMAKE([$1]).])])dnl ]) @@ -51,14 +51,14 @@ m4_define([_AM_AUTOCONF_VERSION], []) # Call AM_AUTOMAKE_VERSION and AM_AUTOMAKE_VERSION so they can be traced. # This function is AC_REQUIREd by AM_INIT_AUTOMAKE. AC_DEFUN([AM_SET_CURRENT_AUTOMAKE_VERSION], -[AM_AUTOMAKE_VERSION([1.15])dnl +[AM_AUTOMAKE_VERSION([1.14.1])dnl m4_ifndef([AC_AUTOCONF_VERSION], [m4_copy([m4_PACKAGE_VERSION], [AC_AUTOCONF_VERSION])])dnl _AM_AUTOCONF_VERSION(m4_defn([AC_AUTOCONF_VERSION]))]) # AM_AUX_DIR_EXPAND -*- Autoconf -*- -# Copyright (C) 2001-2014 Free Software Foundation, Inc. +# Copyright (C) 2001-2013 Free Software Foundation, Inc. # # This file is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, @@ -103,14 +103,15 @@ _AM_AUTOCONF_VERSION(m4_defn([AC_AUTOCONF_VERSION]))]) # configured tree to be moved without reconfiguration. AC_DEFUN([AM_AUX_DIR_EXPAND], -[AC_REQUIRE([AC_CONFIG_AUX_DIR_DEFAULT])dnl -# Expand $ac_aux_dir to an absolute path. -am_aux_dir=`cd "$ac_aux_dir" && pwd` +[dnl Rely on autoconf to set up CDPATH properly. +AC_PREREQ([2.50])dnl +# expand $ac_aux_dir to an absolute path +am_aux_dir=`cd $ac_aux_dir && pwd` ]) # AM_CONDITIONAL -*- Autoconf -*- -# Copyright (C) 1997-2014 Free Software Foundation, Inc. +# Copyright (C) 1997-2013 Free Software Foundation, Inc. # # This file is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, @@ -141,7 +142,7 @@ AC_CONFIG_COMMANDS_PRE( Usually this means the macro was only invoked conditionally.]]) fi])]) -# Copyright (C) 1999-2014 Free Software Foundation, Inc. +# Copyright (C) 1999-2013 Free Software Foundation, Inc. # # This file is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, @@ -332,7 +333,7 @@ _AM_SUBST_NOTMAKE([am__nodep])dnl # Generate code to set up dependency tracking. -*- Autoconf -*- -# Copyright (C) 1999-2014 Free Software Foundation, Inc. +# Copyright (C) 1999-2013 Free Software Foundation, Inc. # # This file is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, @@ -408,7 +409,7 @@ AC_DEFUN([AM_OUTPUT_DEPENDENCY_COMMANDS], # Do all the work for Automake. -*- Autoconf -*- -# Copyright (C) 1996-2014 Free Software Foundation, Inc. +# Copyright (C) 1996-2013 Free Software Foundation, Inc. # # This file is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, @@ -498,8 +499,8 @@ AC_REQUIRE([AC_PROG_MKDIR_P])dnl # # AC_SUBST([mkdir_p], ['$(MKDIR_P)']) -# We need awk for the "check" target (and possibly the TAP driver). The -# system "awk" is bad on some platforms. +# We need awk for the "check" target. The system "awk" is bad on +# some platforms. AC_REQUIRE([AC_PROG_AWK])dnl AC_REQUIRE([AC_PROG_MAKE_SET])dnl AC_REQUIRE([AM_SET_LEADING_DOT])dnl @@ -572,11 +573,7 @@ to "yes", and re-run configure. END AC_MSG_ERROR([Your 'rm' program is bad, sorry.]) fi -fi -dnl The trailing newline in this macro's definition is deliberate, for -dnl backward compatibility and to allow trailing 'dnl'-style comments -dnl after the AM_INIT_AUTOMAKE invocation. See automake bug#16841. -]) +fi]) dnl Hook into '_AC_COMPILER_EXEEXT' early to learn its expansion. Do not dnl add the conditional right here, as _AC_COMPILER_EXEEXT may be further @@ -605,7 +602,7 @@ for _am_header in $config_headers :; do done echo "timestamp for $_am_arg" >`AS_DIRNAME(["$_am_arg"])`/stamp-h[]$_am_stamp_count]) -# Copyright (C) 2001-2014 Free Software Foundation, Inc. +# Copyright (C) 2001-2013 Free Software Foundation, Inc. # # This file is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, @@ -616,7 +613,7 @@ echo "timestamp for $_am_arg" >`AS_DIRNAME(["$_am_arg"])`/stamp-h[]$_am_stamp_co # Define $install_sh. AC_DEFUN([AM_PROG_INSTALL_SH], [AC_REQUIRE([AM_AUX_DIR_EXPAND])dnl -if test x"${install_sh+set}" != xset; then +if test x"${install_sh}" != xset; then case $am_aux_dir in *\ * | *\ *) install_sh="\${SHELL} '$am_aux_dir/install-sh'" ;; @@ -626,7 +623,7 @@ if test x"${install_sh+set}" != xset; then fi AC_SUBST([install_sh])]) -# Copyright (C) 2003-2014 Free Software Foundation, Inc. +# Copyright (C) 2003-2013 Free Software Foundation, Inc. # # This file is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, @@ -647,7 +644,7 @@ AC_SUBST([am__leading_dot])]) # Check to see how 'make' treats includes. -*- Autoconf -*- -# Copyright (C) 2001-2014 Free Software Foundation, Inc. +# Copyright (C) 2001-2013 Free Software Foundation, Inc. # # This file is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, @@ -697,7 +694,7 @@ rm -f confinc confmf # Fake the existence of programs that GNU maintainers use. -*- Autoconf -*- -# Copyright (C) 1997-2014 Free Software Foundation, Inc. +# Copyright (C) 1997-2013 Free Software Foundation, Inc. # # This file is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, @@ -736,7 +733,7 @@ fi # Helper functions for option handling. -*- Autoconf -*- -# Copyright (C) 2001-2014 Free Software Foundation, Inc. +# Copyright (C) 2001-2013 Free Software Foundation, Inc. # # This file is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, @@ -767,7 +764,7 @@ AC_DEFUN([_AM_IF_OPTION], # Check to make sure that the build environment is sane. -*- Autoconf -*- -# Copyright (C) 1996-2014 Free Software Foundation, Inc. +# Copyright (C) 1996-2013 Free Software Foundation, Inc. # # This file is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, @@ -848,7 +845,7 @@ AC_CONFIG_COMMANDS_PRE( rm -f conftest.file ]) -# Copyright (C) 2009-2014 Free Software Foundation, Inc. +# Copyright (C) 2009-2013 Free Software Foundation, Inc. # # This file is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, @@ -908,7 +905,7 @@ AC_SUBST([AM_BACKSLASH])dnl _AM_SUBST_NOTMAKE([AM_BACKSLASH])dnl ]) -# Copyright (C) 2001-2014 Free Software Foundation, Inc. +# Copyright (C) 2001-2013 Free Software Foundation, Inc. # # This file is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, @@ -936,7 +933,7 @@ fi INSTALL_STRIP_PROGRAM="\$(install_sh) -c -s" AC_SUBST([INSTALL_STRIP_PROGRAM])]) -# Copyright (C) 2006-2014 Free Software Foundation, Inc. +# Copyright (C) 2006-2013 Free Software Foundation, Inc. # # This file is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, @@ -955,7 +952,7 @@ AC_DEFUN([AM_SUBST_NOTMAKE], [_AM_SUBST_NOTMAKE($@)]) # Check how to create a tarball. -*- Autoconf -*- -# Copyright (C) 2004-2014 Free Software Foundation, Inc. +# Copyright (C) 2004-2013 Free Software Foundation, Inc. # # This file is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, diff --git a/config.guess b/config.guess index a12faba2..5f6aa02d 120000 --- a/config.guess +++ b/config.guess @@ -1 +1 @@ -/opt/local/share/automake-1.15/config.guess \ No newline at end of file +/usr/share/automake-1.14/config.guess \ No newline at end of file diff --git a/config.sub b/config.sub index e3c9b5ca..0abfe18c 120000 --- a/config.sub +++ b/config.sub @@ -1 +1 @@ -/opt/local/share/automake-1.15/config.sub \ No newline at end of file +/usr/share/automake-1.14/config.sub \ No newline at end of file diff --git a/configure b/configure index 6e27ab11..8c9e8c59 100755 --- a/configure +++ b/configure @@ -2466,7 +2466,7 @@ test -n "$target_alias" && NONENONEs,x,x, && program_prefix=${target_alias}- -am__api_version='1.15' +am__api_version='1.14' # Find a good install program. We prefer a C program (faster), # so one script is as good as another. But avoid the broken or @@ -2638,8 +2638,8 @@ test "$program_suffix" != NONE && ac_script='s/[\\$]/&&/g;s/;s,x,x,$//' program_transform_name=`$as_echo "$program_transform_name" | sed "$ac_script"` -# Expand $ac_aux_dir to an absolute path. -am_aux_dir=`cd "$ac_aux_dir" && pwd` +# expand $ac_aux_dir to an absolute path +am_aux_dir=`cd $ac_aux_dir && pwd` if test x"${MISSING+set}" != xset; then case $am_aux_dir in @@ -2658,7 +2658,7 @@ else $as_echo "$as_me: WARNING: 'missing' script is too old or missing" >&2;} fi -if test x"${install_sh+set}" != xset; then +if test x"${install_sh}" != xset; then case $am_aux_dir in *\ * | *\ *) install_sh="\${SHELL} '$am_aux_dir/install-sh'" ;; @@ -2986,8 +2986,8 @@ MAKEINFO=${MAKEINFO-"${am_missing_run}makeinfo"} # mkdir_p='$(MKDIR_P)' -# We need awk for the "check" target (and possibly the TAP driver). The -# system "awk" is bad on some platforms. +# We need awk for the "check" target. The system "awk" is bad on +# some platforms. # Always define AMTAR for backward compatibility. Yes, it's still used # in the wild :-( We should find a proper way to deprecate it ... AMTAR='$${TAR-tar}' @@ -3046,7 +3046,6 @@ END fi - ac_config_headers="$ac_config_headers lib/Grid_config.h" # Check whether --enable-silent-rules was given. diff --git a/lib/Grid_init.cc b/lib/Grid_init.cc index 572672eb..7ff698c6 100644 --- a/lib/Grid_init.cc +++ b/lib/Grid_init.cc @@ -14,6 +14,7 @@ #include #include #include +#include #undef __X86_64 #define MAC diff --git a/lib/simd/Grid_sse4.h b/lib/simd/Grid_sse4.h index ed4039b7..ddc3490b 100644 --- a/lib/simd/Grid_sse4.h +++ b/lib/simd/Grid_sse4.h @@ -1,8 +1,10 @@ //---------------------------------------------------------------------- /*! @file Grid_sse4.h - @brief Optimization libraries + @brief Optimization libraries for SSE4 instructions set + + Using intrinsics */ -// Time-stamp: <2015-05-19 17:06:51 neo> +// Time-stamp: <2015-05-20 16:45:39 neo> //---------------------------------------------------------------------- #include @@ -49,6 +51,20 @@ namespace Optimization { }; + struct Vstream{ + //Float + inline void operator()(__m128 a, __m128 b){ + _mm_stream_ps((float *)&a,b); + } + //Double + inline void operator()(__m128d a, __m128d b){ + _mm_stream_pd((double *)&a,b); + } + + + }; + + struct Vset{ // Complex float @@ -75,27 +91,20 @@ namespace Optimization { }; + template struct Reduce{ - //Complex float - inline Grid::ComplexF operator()(__m128 in){ - union { - __m128 v1; - float f[4]; - } u128; - u128.v1 = _mm_add_ps(in, _mm_shuffle_ps(in,in, 0b01001110)); // FIXME Prefer to use _MM_SHUFFLE macros - return Grid::ComplexF(u128.f[0], u128.f[1]); + //Need templated class to overload output type + //General form must generate error if compiled + inline Out_type operator()(In_type in){ + printf("Error, using wrong Reduce function\n"); + exit(1); + return 0; } - //Complex double - inline Grid::ComplexD operator()(__m128d in){ - printf("Missing complex double implementation -> FIX\n"); - return Grid::ComplexD(0,0); // FIXME wrong - } - - - }; + + ///////////////////////////////////////////////////// // Arithmetic operations ///////////////////////////////////////////////////// @@ -129,25 +138,26 @@ namespace Optimization { } }; + struct MultComplex{ // Complex float inline __m128 operator()(__m128 a, __m128 b){ __m128 ymm0,ymm1,ymm2; ymm0 = _mm_shuffle_ps(a,a,_MM_SHUFFLE(2,2,0,0)); // ymm0 <- ar ar, - ymm0 = _mm_mul_ps(ymm0,b); // ymm0 <- ar bi, ar br + ymm0 = _mm_mul_ps(ymm0,b); // ymm0 <- ar bi, ar br ymm1 = _mm_shuffle_ps(b,b,_MM_SHUFFLE(2,3,0,1)); // ymm1 <- br,bi ymm2 = _mm_shuffle_ps(a,a,_MM_SHUFFLE(3,3,1,1)); // ymm2 <- ai,ai - ymm1 = _mm_mul_ps(ymm1,ymm2); // ymm1 <- br ai, ai bi + ymm1 = _mm_mul_ps(ymm1,ymm2); // ymm1 <- br ai, ai bi return _mm_addsub_ps(ymm0,ymm1); } // Complex double inline __m128d operator()(__m128d a, __m128d b){ __m128d ymm0,ymm1,ymm2; - ymm0 = _mm_shuffle_pd(a,a,0x0); // ymm0 <- ar ar, + ymm0 = _mm_shuffle_pd(a,a,0x0); // ymm0 <- ar ar, ymm0 = _mm_mul_pd(ymm0,b); // ymm0 <- ar bi, ar br - ymm1 = _mm_shuffle_pd(b,b,0x1); // ymm1 <- br,bi b01 - ymm2 = _mm_shuffle_pd(a,a,0x3); // ymm2 <- ai,ai b11 - ymm1 = _mm_mul_pd(ymm1,ymm2); // ymm1 <- br ai, ai bi + ymm1 = _mm_shuffle_pd(b,b,0x1); // ymm1 <- br,bi b01 + ymm2 = _mm_shuffle_pd(a,a,0x3); // ymm2 <- ai,ai b11 + ymm1 = _mm_mul_pd(ymm1,ymm2); // ymm1 <- br ai, ai bi return _mm_addsub_pd(ymm0,ymm1); } }; @@ -165,14 +175,112 @@ namespace Optimization { inline __m128i operator()(__m128i a, __m128i b){ return _mm_mul_epi32(a,b); } + }; + + + struct Conj{ + // Complex single + inline __m128 operator()(__m128 in){ + return _mm_xor_ps(_mm_addsub_ps(_mm_setzero_ps(),in), _mm_set1_ps(-0.f)); + } + // Complex double + inline __m128d operator()(__m128d in){ + return _mm_xor_pd(_mm_addsub_pd(_mm_setzero_pd(),in), _mm_set1_pd(-0.f));//untested + } + // do not define for integer input + }; + + struct TimesMinusI{ + //Complex single + inline __m128 operator()(__m128 in, __m128 ret){ + __m128 tmp =_mm_addsub_ps(_mm_setzero_ps(),in); // r,-i + return _mm_shuffle_ps(tmp,tmp,_MM_SHUFFLE(2,3,0,1)); + } + //Complex double + inline __m128d operator()(__m128d in, __m128d ret){ + __m128d tmp =_mm_addsub_pd(_mm_setzero_pd(),in); // r,-i + return _mm_shuffle_pd(tmp,tmp,0x1); + } + + + }; + + struct TimesI{ + //Complex single + inline __m128 operator()(__m128 in, __m128 ret){ + __m128 tmp =_mm_shuffle_ps(in,in,_MM_SHUFFLE(2,3,0,1)); + return _mm_addsub_ps(_mm_setzero_ps(),tmp); // r,-i + } + //Complex double + inline __m128d operator()(__m128d in, __m128d ret){ + __m128d tmp = _mm_shuffle_pd(in,in,0x1); + return _mm_addsub_pd(_mm_setzero_pd(),tmp); // r,-i + } + }; + + ////////////////////////////////////////////// + // Some Template specialization + + //Complex float Reduce + template<> + inline Grid::ComplexF Reduce::operator()(__m128 in){ + union { + __m128 v1; + float f[4]; + } u128; + u128.v1 = _mm_add_ps(in, _mm_shuffle_ps(in,in, 0b01001110)); // FIXME Prefer to use _MM_SHUFFLE macros + return Grid::ComplexF(u128.f[0], u128.f[1]); + } + //Real float Reduce + template<> + inline Grid::RealF Reduce::operator()(__m128 in){ + // FIXME Hack + const Grid::RealF * ptr = (const Grid::RealF *) ∈ + Grid::RealF ret = 0; + for(int i=0;i< 4 ;i++){ // 4 number of simd lanes for float + ret = ret+ptr[i]; + } + return ret; + } + + + //Complex double Reduce + template<> + inline Grid::ComplexD Reduce::operator()(__m128d in){ + printf("Reduce : Missing good complex double implementation -> FIX\n"); + return Grid::ComplexD(in[0], in[1]); // inefficient + } + + //Real double Reduce + template<> + inline Grid::RealD Reduce::operator()(__m128d in){ + // FIXME Hack + const Grid::RealD * ptr =(const Grid::RealD *) ∈ + Grid::RealD ret = 0; + for(int i=0;i< 2 ;i++){// 2 number of simd lanes for float + ret = ret+ptr[i]; + } + return ret; + } + + //Integer Reduce + template<> + inline Integer Reduce::operator()(__m128i in){ + // FIXME unimplemented + printf("Reduce : Missing integer implementation -> FIX\n"); + assert(0); + } + + } +////////////////////////////////////////////////////////////////////////////////////// // Here assign types namespace Grid { typedef __m128 SIMD_Ftype; // Single precision type @@ -180,15 +288,21 @@ namespace Grid { typedef __m128i SIMD_Itype; // Integer type - // Function names - typedef Optimization::Vsplat VsplatSIMD; - typedef Optimization::Vstore VstoreSIMD; + // Function name aliases + typedef Optimization::Vsplat VsplatSIMD; + typedef Optimization::Vstore VstoreSIMD; + typedef Optimization::Vset VsetSIMD; + typedef Optimization::Vstream VstreamSIMD; + template using ReduceSIMD = Optimization::Reduce; + // Arithmetic operations typedef Optimization::Sum SumSIMD; typedef Optimization::Sub SubSIMD; typedef Optimization::Mult MultSIMD; typedef Optimization::MultComplex MultComplexSIMD; - typedef Optimization::Vset VsetSIMD; + typedef Optimization::Conj ConjSIMD; + typedef Optimization::TimesMinusI TimesMinusISIMD; + typedef Optimization::TimesI TimesISIMD; } diff --git a/lib/simd/Grid_vector_types.h b/lib/simd/Grid_vector_types.h index 97958fe4..57480621 100644 --- a/lib/simd/Grid_vector_types.h +++ b/lib/simd/Grid_vector_types.h @@ -2,7 +2,7 @@ /*! @file Grid_vector_types.h @brief Defines templated class Grid_simd to deal with inner vector types */ -// Time-stamp: <2015-05-19 17:20:36 neo> +// Time-stamp: <2015-05-20 17:31:55 neo> //--------------------------------------------------------------------------- #ifndef GRID_VECTOR_TYPES #define GRID_VECTOR_TYPES @@ -22,6 +22,16 @@ namespace Grid { typedef T type; }; + // type alias used to simplify the syntax of std::enable_if + template using Invoke = + typename T::type; + template using EnableIf = + Invoke>; + template using NotEnableIf = + Invoke>; + + + //////////////////////////////////////////////////////// // Check for complexity with type traits template @@ -94,31 +104,32 @@ namespace Grid { // Initialise to 1,0,i for the correct types /////////////////////////////////////////////// // if not complex overload here - template < class S = Scalar_type,typename std::enable_if < !is_complex < S >::value, int >::type = 0 > + template < class S = Scalar_type, NotEnableIf,int> = 0 > friend inline void vone(Grid_simd &ret) { vsplat(ret,1.0); } - template < class S = Scalar_type,typename std::enable_if < !is_complex < S >::value, int >::type = 0 > + template < class S = Scalar_type, NotEnableIf,int> = 0 > friend inline void vzero(Grid_simd &ret) { vsplat(ret,0.0); } - // overload for complex type - template < class S = Scalar_type,typename std::enable_if < is_complex < S >::value, int >::type = 0 > + // For complex types + template < class S = Scalar_type, EnableIf, int> = 0 > friend inline void vone(Grid_simd &ret) { vsplat(ret,1.0,0.0); } - template < class S = Scalar_type,typename std::enable_if < is_complex < S >::value, int >::type = 0 > + template < class S = Scalar_type, EnableIf, int> = 0 > friend inline void vzero(Grid_simd &ret) { vsplat(ret,0.0,0.0); }// use xor? - - // For integral type - template < class S = Scalar_type,typename std::enable_if < std::is_integral < S >::value, int >::type = 0 > + template < class S = Scalar_type, EnableIf, int> = 0 > + friend inline void vcomplex_i(Grid_simd &ret){ vsplat(ret,0.0,1.0);} + + // For integral types + template < class S = Scalar_type, EnableIf, int> = 0 > friend inline void vone(Grid_simd &ret) { vsplat(ret,1); } - template < class S = Scalar_type,typename std::enable_if < std::is_integral < S >::value, int >::type = 0 > + template < class S = Scalar_type, EnableIf, int> = 0 > friend inline void vzero(Grid_simd &ret) { vsplat(ret,0); } - template < class S = Scalar_type,typename std::enable_if < std::is_integral < S >::value, int >::type = 0 > + template < class S = Scalar_type, EnableIf, int> = 0 > friend inline void vtrue (Grid_simd &ret){vsplat(ret,0xFFFFFFFF);} - template < class S = Scalar_type,typename std::enable_if < std::is_integral < S >::value, int >::type = 0 > + template < class S = Scalar_type, EnableIf, int> = 0 > friend inline void vfalse(vInteger &ret){vsplat(ret,0);} - // do not compile if real or integer, send an error message from the compiler - template < class S = Scalar_type,typename std::enable_if < is_complex < S >::value, int >::type = 0 > - friend inline void vcomplex_i(Grid_simd &ret){ vsplat(ret,0.0,1.0);} + + //////////////////////////////////// // Arithmetic operator overloads +,-,* @@ -138,7 +149,7 @@ namespace Grid { }; // Distinguish between complex types and others - template < class S = Scalar_type, typename std::enable_if < is_complex < S >::value, int >::type = 0 > + template < class S = Scalar_type, EnableIf, int> = 0 > friend inline Grid_simd operator * (Grid_simd a, Grid_simd b) { Grid_simd ret; @@ -147,7 +158,7 @@ namespace Grid { }; // Real/Integer types - template < class S = Scalar_type,typename std::enable_if < !is_complex < S >::value, int >::type = 0 > + template < class S = Scalar_type, NotEnableIf, int> = 0 > friend inline Grid_simd operator * (Grid_simd a, Grid_simd b) { Grid_simd ret; @@ -156,8 +167,6 @@ namespace Grid { }; - - //////////////////////////////////////////////////////////////////////// // FIXME: gonna remove these load/store, get, set, prefetch //////////////////////////////////////////////////////////////////////// @@ -170,14 +179,14 @@ namespace Grid { /////////////////////// // overload if complex template < class S = Scalar_type > - friend inline void vsplat(Grid_simd &ret, typename std::enable_if< is_complex < S >::value, S>::type c){ + friend inline void vsplat(Grid_simd &ret, EnableIf, S> c){ Real a = real(c); Real b = imag(c); vsplat(ret,a,b); } - // this only for the complex version - template < class S = Scalar_type, typename std::enable_if < is_complex < S >::value, int >::type = 0 > + // this is only for the complex version + template < class S = Scalar_type, EnableIf, int> = 0 > friend inline void vsplat(Grid_simd &ret,Real a, Real b){ ret.v = binary(a, b, VsplatSIMD()); } @@ -187,22 +196,45 @@ namespace Grid { ret.v = unary(a, VsplatSIMD()); } - + /////////////////////// + // Vstore + /////////////////////// friend inline void vstore(const Grid_simd &ret, Scalar_type *a){ binary(ret.v, (Real*)a, VstoreSIMD()); } + /////////////////////// + // Vstream + /////////////////////// + friend inline void vstream(Grid_simd &out,const Grid_simd &in){ + binary(out.v, in.v, VstreamSIMD()); + } + + template < class S = Scalar_type, EnableIf, int> = 0 > + friend inline void vstream(Grid_simd &out,const Grid_simd &in){ + out=in; + } + + /////////////////////// + // Vprefetch + /////////////////////// friend inline void vprefetch(const Grid_simd &v) { _mm_prefetch((const char*)&v.v,_MM_HINT_T0); } + /////////////////////// + // Reduce + /////////////////////// friend inline Scalar_type Reduce(const Grid_simd & in) { - // FIXME add operator + return unary(in.v, ReduceSIMD()); } + //////////////////////////// + // opreator scalar * simd + //////////////////////////// friend inline Grid_simd operator * (const Scalar_type &a, Grid_simd b){ Grid_simd va; vsplat(va,a); @@ -215,25 +247,63 @@ namespace Grid { /////////////////////// // Conjugate /////////////////////// - + template < class S = Scalar_type, EnableIf, int> = 0 > friend inline Grid_simd conjugate(const Grid_simd &in){ - Grid_simd ret ; vzero(ret); - // FIXME add operator + Grid_simd ret ; + ret.v = unary(in.v, ConjSIMD()); return ret; } + template < class S = Scalar_type, NotEnableIf, int> = 0 > + friend inline Grid_simd conjugate(const Grid_simd &in){ + return in; // for real objects + } + + + /////////////////////// + // timesMinusI + /////////////////////// + template < class S = Scalar_type, EnableIf, int> = 0 > + friend inline void timesMinusI( Grid_simd &ret,const Grid_simd &in){ + ret.v = binary(in.v, ret.v, TimesMinusISIMD()); + } + + template < class S = Scalar_type, EnableIf, int> = 0 > friend inline Grid_simd timesMinusI(const Grid_simd &in){ Grid_simd ret; - vzero(ret); - // FIXME add operator + timesMinusI(ret,in); return ret; } - friend inline Grid_simd timesI(const Grid_simd &in){ - Grid_simd ret; vzero(ret); - // FIXME add operator - return ret; + + template < class S = Scalar_type, NotEnableIf, int> = 0 > + friend inline Grid_simd timesMinusI(const Grid_simd &in){ + return in; + } + + + /////////////////////// + // timesI + /////////////////////// + template < class S = Scalar_type, EnableIf, int> = 0 > + friend inline void timesI(Grid_simd &ret,const Grid_simd &in){ + ret.v = binary(in.v, ret.v, TimesISIMD()); } + template < class S = Scalar_type, EnableIf, int> = 0 > + friend inline Grid_simd timesI(const Grid_simd &in){ + Grid_simd ret; + timesI(ret,in); + return ret; + } + + template < class S = Scalar_type, NotEnableIf, int> = 0 > + friend inline Grid_simd timesI(const Grid_simd &in){ + return in; + } + + + /////////////////////// // Unary negation + /////////////////////// friend inline Grid_simd operator -(const Grid_simd &r) { vComplexF ret; vzero(ret); @@ -257,41 +327,22 @@ namespace Grid { - friend inline void permute(Grid_simd &y,Grid_simd b,int perm) - { - Gpermute(y,b,perm); - } - /* + //////////////////////////////////////////////////////////////////// + // General permute; assumes vector length is same across + // all subtypes; may not be a good assumption, but could + // add the vector width as a template param for BG/Q for example + //////////////////////////////////////////////////////////////////// friend inline void permute(Grid_simd &y,Grid_simd b,int perm) { Gpermute(y,b,perm); } - friend inline void merge(Grid_simd &y,std::vector &extracted) - { - Gmerge(y,extracted); - } - friend inline void extract(const Grid_simd &y,std::vector &extracted) - { - Gextract(y,extracted); - } - friend inline void merge(Grid_simd &y,std::vector &extracted) - { - Gmerge(y,extracted); - } - friend inline void extract(const Grid_simd &y,std::vector &extracted) - { - Gextract(y,extracted); - } - */ - + + };// end of Grid_simd class definition - - - template inline Grid_simd< scalar_type, vector_type> innerProduct(const Grid_simd< scalar_type, vector_type> & l, const Grid_simd< scalar_type, vector_type> & r) { @@ -315,7 +366,7 @@ namespace Grid { } - // Define available types (now change names to avoid clashing) + // Define available types (now change names to avoid clashing with the rest of the code) typedef Grid_simd< float , SIMD_Ftype > MyRealF; typedef Grid_simd< double , SIMD_Dtype > MyRealD; @@ -324,6 +375,29 @@ namespace Grid { + + //////////////////////////////////////////////////////////////////// + // Temporary hack to keep independent from the rest of the code + template<> struct isGridTensor { + static const bool value = false; + static const bool notvalue = true; + }; + template<> struct isGridTensor { + static const bool value = false; + static const bool notvalue = true; + }; + template<> struct isGridTensor { + static const bool value = false; + static const bool notvalue = true; + }; + template<> struct isGridTensor { + static const bool value = false; + static const bool notvalue = true; + }; + + + + } #endif diff --git a/tests/Grid_main.cc b/tests/Grid_main.cc index 33b1cf4f..10e74099 100644 --- a/tests/Grid_main.cc +++ b/tests/Grid_main.cc @@ -161,30 +161,40 @@ int main (int argc, char ** argv) ///////// Tests the new class Grid_simd std::complex ctest(3.0,2.0); std::complex ctestf(3.0,2.0); - MyComplexF TestMe1(1.0); // fill real part + MyComplexF TestMe1(1.0); // fills only real part MyComplexD TestMe2(ctest); MyComplexD TestMe3(ctest);// compiler generate conversion of basic types //MyRealF TestMe5(ctest);// Must generate compiler error - MyRealD TestMe4(2.0); + MyRealD TestRe1(2.0); + MyRealF TestRe2(3.0); + vone(TestRe2); + MyComplexF TestMe6(ctestf); MyComplexF TestMe7(ctestf); MyComplexD TheSum= TestMe2*TestMe3; MyComplexF TheSumF= TestMe6*TestMe7; + + double dsum[2]; _mm_store_pd(dsum, TheSum.v); for (int i =0; i< 2; i++) printf("%f\n", dsum[i]); + MyComplexD TheSumI = timesMinusI(TheSum); + MyComplexF TheSumIF = timesMinusI(TheSumF); float fsum[4]; _mm_store_ps(fsum, TheSumF.v); for (int i =0; i< 4; i++) printf("%f\n", fsum[i]); - vstore(TheSum, &ctest); + vstore(TheSumI, &ctest); + std::complex sum = Reduce(TheSumF); std::cout << ctest<< std::endl; + std::cout << sum<< std::endl; + #endif /////////////////////// diff --git a/tests/Grid_simd_new.cc b/tests/Grid_simd_new.cc new file mode 100644 index 00000000..41781304 --- /dev/null +++ b/tests/Grid_simd_new.cc @@ -0,0 +1,165 @@ +#include +#include "simd/Grid_vector_types.h" +#include + +using namespace std; +using namespace Grid; +using namespace Grid::QCD; + +class funcPlus { +public: + funcPlus() {}; + template void operator()(vec &rr,vec &i1,vec &i2) const { rr = i1+i2;} + std::string name(void) const { return std::string("Plus"); } +}; +class funcMinus { +public: + funcMinus() {}; + template void operator()(vec &rr,vec &i1,vec &i2) const { rr = i1-i2;} + std::string name(void) const { return std::string("Minus"); } +}; +class funcTimes { +public: + funcTimes() {}; + template void operator()(vec &rr,vec &i1,vec &i2) const { rr = i1*i2;} + std::string name(void) const { return std::string("Times"); } +}; +class funcConj { +public: + funcConj() {}; + template void operator()(vec &rr,vec &i1,vec &i2) const { rr = conjugate(i1);} + std::string name(void) const { return std::string("Conj"); } +}; +class funcAdj { +public: + funcAdj() {}; + template void operator()(vec &rr,vec &i1,vec &i2) const { rr = adj(i1);} + std::string name(void) const { return std::string("Adj"); } +}; + +class funcTimesI { +public: + funcTimesI() {}; + template void operator()(vec &rr,vec &i1,vec &i2) const { rr = timesI(i1);} + std::string name(void) const { return std::string("timesI"); } +}; + +class funcTimesMinusI { +public: + funcTimesMinusI() {}; + template void operator()(vec &rr,vec &i1,vec &i2) const { rr = timesMinusI(i1);} + std::string name(void) const { return std::string("timesMinusI"); } +}; + +template +void Tester(const functor &func) +{ + GridSerialRNG sRNG; + sRNG.SeedRandomDevice(); + + int Nsimd = vec::Nsimd(); + + std::vector input1(Nsimd); + std::vector input2(Nsimd); + std::vector result(Nsimd); + std::vector reference(Nsimd); + + std::vector > buf(3); + vec & v_input1 = buf[0]; + vec & v_input2 = buf[1]; + vec & v_result = buf[2]; + + + for(int i=0;i(v_input1,input1); + merge(v_input2,input2); + merge(v_result,result); + + func(v_result,v_input1,v_input2); + + for(int i=0;i(v_result,result); + std::cout << " " << func.name()<0){ + std::cout<< "*****" << std::endl; + std::cout<< "["< latt_size = GridDefaultLatt(); + std::vector simd_layout = GridDefaultSimd(4,MyComplexF::Nsimd()); + std::vector mpi_layout = GridDefaultMpi(); + + GridCartesian Grid(latt_size,simd_layout,mpi_layout); + std::vector seeds({1,2,3,4}); + + // Insist that operations on random scalars gives + // identical results to on vectors. + + std::cout << "==================================="<< std::endl; + std::cout << "Testing MyComplexF "<(funcTimesI()); + Tester(funcTimesMinusI()); + Tester(funcPlus()); + Tester(funcMinus()); + Tester(funcTimes()); + Tester(funcConj()); + Tester(funcAdj()); + + std::cout << "==================================="<< std::endl; + std::cout << "Testing MyComplexD "<(funcTimesI()); + Tester(funcTimesMinusI()); + Tester(funcPlus()); + Tester(funcMinus()); + Tester(funcTimes()); + Tester(funcConj()); + Tester(funcAdj()); + + std::cout << "==================================="<< std::endl; + std::cout << "Testing MyRealF "<(funcPlus()); + Tester(funcMinus()); + Tester(funcTimes()); + Tester(funcAdj()); + + std::cout << "==================================="<< std::endl; + std::cout << "Testing MyRealD "<(funcPlus()); + Tester(funcMinus()); + Tester(funcTimes()); + Tester(funcAdj()); + + Grid_finalize(); +} diff --git a/tests/Makefile.am b/tests/Makefile.am index 80f3a34c..95642ed8 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -5,7 +5,7 @@ AM_LDFLAGS = -L$(top_builddir)/lib # # Test code # -bin_PROGRAMS = Grid_main Grid_stencil Grid_nersc_io Grid_cshift Grid_gamma Grid_simd Grid_rng Grid_remez Grid_rng_fixed +bin_PROGRAMS = Grid_main Grid_stencil Grid_nersc_io Grid_cshift Grid_gamma Grid_simd Grid_rng Grid_remez Grid_rng_fixed Grid_simd_new Grid_main_SOURCES = Grid_main.cc Grid_main_LDADD = -lGrid @@ -33,3 +33,6 @@ Grid_stencil_LDADD = -lGrid Grid_simd_SOURCES = Grid_simd.cc Grid_simd_LDADD = -lGrid + +Grid_simd_new_SOURCES = Grid_simd_new.cc +Grid_simd_new_LDADD = -lGrid