1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-11-13 01:05:36 +00:00

Merge pull request #7 from coppolachan/master

Added full support for SSE4
This commit is contained in:
Peter Boyle 2015-05-22 05:58:59 +01:00
commit 96e5c5c6ca
11 changed files with 512 additions and 161 deletions

View File

@ -1,7 +1,7 @@
# Makefile.in generated by automake 1.15 from Makefile.am. # Makefile.in generated by automake 1.14.1 from Makefile.am.
# @configure_input@ # @configure_input@
# Copyright (C) 1994-2014 Free Software Foundation, Inc. # Copyright (C) 1994-2013 Free Software Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation # This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it, # gives unlimited permission to copy and/or distribute it,
@ -14,17 +14,7 @@
@SET_MAKE@ @SET_MAKE@
VPATH = @srcdir@ VPATH = @srcdir@
am__is_gnu_make = { \ am__is_gnu_make = test -n '$(MAKEFILE_LIST)' && test -n '$(MAKELEVEL)'
if test -z '$(MAKELEVEL)'; then \
false; \
elif test -n '$(MAKE_HOST)'; then \
true; \
elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \
true; \
else \
false; \
fi; \
}
am__make_running_with_option = \ am__make_running_with_option = \
case $${target_option-} in \ case $${target_option-} in \
?) ;; \ ?) ;; \
@ -89,12 +79,14 @@ build_triplet = @build@
host_triplet = @host@ host_triplet = @host@
target_triplet = @target@ target_triplet = @target@
subdir = . subdir = .
DIST_COMMON = INSTALL NEWS README AUTHORS ChangeLog \
$(srcdir)/Makefile.in $(srcdir)/Makefile.am \
$(top_srcdir)/configure $(am__configure_deps) COPYING TODO \
compile config.guess config.sub depcomp install-sh missing
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
am__aclocal_m4_deps = $(top_srcdir)/configure.ac am__aclocal_m4_deps = $(top_srcdir)/configure.ac
am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
$(ACLOCAL_M4) $(ACLOCAL_M4)
DIST_COMMON = $(srcdir)/Makefile.am $(top_srcdir)/configure \
$(am__configure_deps) $(am__DIST_COMMON)
am__CONFIG_DISTCLEAN_FILES = config.status config.cache config.log \ am__CONFIG_DISTCLEAN_FILES = config.status config.cache config.log \
configure.lineno config.status.lineno configure.lineno config.status.lineno
mkinstalldirs = $(install_sh) -d mkinstalldirs = $(install_sh) -d
@ -157,9 +149,6 @@ ETAGS = etags
CTAGS = ctags CTAGS = ctags
CSCOPE = cscope CSCOPE = cscope
DIST_SUBDIRS = $(SUBDIRS) DIST_SUBDIRS = $(SUBDIRS)
am__DIST_COMMON = $(srcdir)/Makefile.in AUTHORS COPYING ChangeLog \
INSTALL NEWS README TODO compile config.guess config.sub \
depcomp install-sh missing
DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
distdir = $(PACKAGE)-$(VERSION) distdir = $(PACKAGE)-$(VERSION)
top_distdir = $(distdir) top_distdir = $(distdir)
@ -325,6 +314,7 @@ $(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps)
echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu Makefile'; \ echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu Makefile'; \
$(am__cd) $(top_srcdir) && \ $(am__cd) $(top_srcdir) && \
$(AUTOMAKE) --gnu Makefile $(AUTOMAKE) --gnu Makefile
.PRECIOUS: Makefile
Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
@case '$?' in \ @case '$?' in \
*config.status*) \ *config.status*) \
@ -531,15 +521,15 @@ dist-xz: distdir
$(am__post_remove_distdir) $(am__post_remove_distdir)
dist-tarZ: distdir dist-tarZ: distdir
@echo WARNING: "Support for distribution archives compressed with" \ @echo WARNING: "Support for shar distribution archives is" \
"legacy program 'compress' is deprecated." >&2 "deprecated." >&2
@echo WARNING: "It will be removed altogether in Automake 2.0" >&2 @echo WARNING: "It will be removed altogether in Automake 2.0" >&2
tardir=$(distdir) && $(am__tar) | compress -c >$(distdir).tar.Z tardir=$(distdir) && $(am__tar) | compress -c >$(distdir).tar.Z
$(am__post_remove_distdir) $(am__post_remove_distdir)
dist-shar: distdir dist-shar: distdir
@echo WARNING: "Support for shar distribution archives is" \ @echo WARNING: "Support for distribution archives compressed with" \
"deprecated." >&2 "legacy program 'compress' is deprecated." >&2
@echo WARNING: "It will be removed altogether in Automake 2.0" >&2 @echo WARNING: "It will be removed altogether in Automake 2.0" >&2
shar $(distdir) | GZIP=$(GZIP_ENV) gzip -c >$(distdir).shar.gz shar $(distdir) | GZIP=$(GZIP_ENV) gzip -c >$(distdir).shar.gz
$(am__post_remove_distdir) $(am__post_remove_distdir)
@ -575,17 +565,17 @@ distcheck: dist
esac esac
chmod -R a-w $(distdir) chmod -R a-w $(distdir)
chmod u+w $(distdir) chmod u+w $(distdir)
mkdir $(distdir)/_build $(distdir)/_build/sub $(distdir)/_inst mkdir $(distdir)/_build $(distdir)/_inst
chmod a-w $(distdir) chmod a-w $(distdir)
test -d $(distdir)/_build || exit 0; \ test -d $(distdir)/_build || exit 0; \
dc_install_base=`$(am__cd) $(distdir)/_inst && pwd | sed -e 's,^[^:\\/]:[\\/],/,'` \ dc_install_base=`$(am__cd) $(distdir)/_inst && pwd | sed -e 's,^[^:\\/]:[\\/],/,'` \
&& dc_destdir="$${TMPDIR-/tmp}/am-dc-$$$$/" \ && dc_destdir="$${TMPDIR-/tmp}/am-dc-$$$$/" \
&& am__cwd=`pwd` \ && am__cwd=`pwd` \
&& $(am__cd) $(distdir)/_build/sub \ && $(am__cd) $(distdir)/_build \
&& ../../configure \ && ../configure \
$(AM_DISTCHECK_CONFIGURE_FLAGS) \ $(AM_DISTCHECK_CONFIGURE_FLAGS) \
$(DISTCHECK_CONFIGURE_FLAGS) \ $(DISTCHECK_CONFIGURE_FLAGS) \
--srcdir=../.. --prefix="$$dc_install_base" \ --srcdir=.. --prefix="$$dc_install_base" \
&& $(MAKE) $(AM_MAKEFLAGS) \ && $(MAKE) $(AM_MAKEFLAGS) \
&& $(MAKE) $(AM_MAKEFLAGS) dvi \ && $(MAKE) $(AM_MAKEFLAGS) dvi \
&& $(MAKE) $(AM_MAKEFLAGS) check \ && $(MAKE) $(AM_MAKEFLAGS) check \
@ -759,8 +749,6 @@ uninstall-am:
maintainer-clean-generic mostlyclean mostlyclean-generic pdf \ maintainer-clean-generic mostlyclean mostlyclean-generic pdf \
pdf-am ps ps-am tags tags-am uninstall uninstall-am pdf-am ps ps-am tags tags-am uninstall uninstall-am
.PRECIOUS: Makefile
# Tell versions [3.59,3.63) of GNU make to not export all variables. # Tell versions [3.59,3.63) of GNU make to not export all variables.
# Otherwise a system limit (for SysV at least) may be exceeded. # Otherwise a system limit (for SysV at least) may be exceeded.

61
aclocal.m4 vendored
View File

@ -1,6 +1,6 @@
# generated automatically by aclocal 1.15 -*- Autoconf -*- # generated automatically by aclocal 1.14.1 -*- Autoconf -*-
# Copyright (C) 1996-2014 Free Software Foundation, Inc. # Copyright (C) 1996-2013 Free Software Foundation, Inc.
# This file is free software; the Free Software Foundation # This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it, # gives unlimited permission to copy and/or distribute it,
@ -20,7 +20,7 @@ You have another version of autoconf. It may work, but is not guaranteed to.
If you have problems, you may need to regenerate the build system entirely. If you have problems, you may need to regenerate the build system entirely.
To do so, use the procedure documented by the package, typically 'autoreconf'.])]) To do so, use the procedure documented by the package, typically 'autoreconf'.])])
# Copyright (C) 2002-2014 Free Software Foundation, Inc. # Copyright (C) 2002-2013 Free Software Foundation, Inc.
# #
# This file is free software; the Free Software Foundation # This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it, # gives unlimited permission to copy and/or distribute it,
@ -32,10 +32,10 @@ To do so, use the procedure documented by the package, typically 'autoreconf'.])
# generated from the m4 files accompanying Automake X.Y. # generated from the m4 files accompanying Automake X.Y.
# (This private macro should not be called outside this file.) # (This private macro should not be called outside this file.)
AC_DEFUN([AM_AUTOMAKE_VERSION], AC_DEFUN([AM_AUTOMAKE_VERSION],
[am__api_version='1.15' [am__api_version='1.14'
dnl Some users find AM_AUTOMAKE_VERSION and mistake it for a way to dnl Some users find AM_AUTOMAKE_VERSION and mistake it for a way to
dnl require some minimum version. Point them to the right macro. dnl require some minimum version. Point them to the right macro.
m4_if([$1], [1.15], [], m4_if([$1], [1.14.1], [],
[AC_FATAL([Do not call $0, use AM_INIT_AUTOMAKE([$1]).])])dnl [AC_FATAL([Do not call $0, use AM_INIT_AUTOMAKE([$1]).])])dnl
]) ])
@ -51,14 +51,14 @@ m4_define([_AM_AUTOCONF_VERSION], [])
# Call AM_AUTOMAKE_VERSION and AM_AUTOMAKE_VERSION so they can be traced. # Call AM_AUTOMAKE_VERSION and AM_AUTOMAKE_VERSION so they can be traced.
# This function is AC_REQUIREd by AM_INIT_AUTOMAKE. # This function is AC_REQUIREd by AM_INIT_AUTOMAKE.
AC_DEFUN([AM_SET_CURRENT_AUTOMAKE_VERSION], AC_DEFUN([AM_SET_CURRENT_AUTOMAKE_VERSION],
[AM_AUTOMAKE_VERSION([1.15])dnl [AM_AUTOMAKE_VERSION([1.14.1])dnl
m4_ifndef([AC_AUTOCONF_VERSION], m4_ifndef([AC_AUTOCONF_VERSION],
[m4_copy([m4_PACKAGE_VERSION], [AC_AUTOCONF_VERSION])])dnl [m4_copy([m4_PACKAGE_VERSION], [AC_AUTOCONF_VERSION])])dnl
_AM_AUTOCONF_VERSION(m4_defn([AC_AUTOCONF_VERSION]))]) _AM_AUTOCONF_VERSION(m4_defn([AC_AUTOCONF_VERSION]))])
# AM_AUX_DIR_EXPAND -*- Autoconf -*- # AM_AUX_DIR_EXPAND -*- Autoconf -*-
# Copyright (C) 2001-2014 Free Software Foundation, Inc. # Copyright (C) 2001-2013 Free Software Foundation, Inc.
# #
# This file is free software; the Free Software Foundation # This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it, # gives unlimited permission to copy and/or distribute it,
@ -103,14 +103,15 @@ _AM_AUTOCONF_VERSION(m4_defn([AC_AUTOCONF_VERSION]))])
# configured tree to be moved without reconfiguration. # configured tree to be moved without reconfiguration.
AC_DEFUN([AM_AUX_DIR_EXPAND], AC_DEFUN([AM_AUX_DIR_EXPAND],
[AC_REQUIRE([AC_CONFIG_AUX_DIR_DEFAULT])dnl [dnl Rely on autoconf to set up CDPATH properly.
# Expand $ac_aux_dir to an absolute path. AC_PREREQ([2.50])dnl
am_aux_dir=`cd "$ac_aux_dir" && pwd` # expand $ac_aux_dir to an absolute path
am_aux_dir=`cd $ac_aux_dir && pwd`
]) ])
# AM_CONDITIONAL -*- Autoconf -*- # AM_CONDITIONAL -*- Autoconf -*-
# Copyright (C) 1997-2014 Free Software Foundation, Inc. # Copyright (C) 1997-2013 Free Software Foundation, Inc.
# #
# This file is free software; the Free Software Foundation # This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it, # gives unlimited permission to copy and/or distribute it,
@ -141,7 +142,7 @@ AC_CONFIG_COMMANDS_PRE(
Usually this means the macro was only invoked conditionally.]]) Usually this means the macro was only invoked conditionally.]])
fi])]) fi])])
# Copyright (C) 1999-2014 Free Software Foundation, Inc. # Copyright (C) 1999-2013 Free Software Foundation, Inc.
# #
# This file is free software; the Free Software Foundation # This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it, # gives unlimited permission to copy and/or distribute it,
@ -332,7 +333,7 @@ _AM_SUBST_NOTMAKE([am__nodep])dnl
# Generate code to set up dependency tracking. -*- Autoconf -*- # Generate code to set up dependency tracking. -*- Autoconf -*-
# Copyright (C) 1999-2014 Free Software Foundation, Inc. # Copyright (C) 1999-2013 Free Software Foundation, Inc.
# #
# This file is free software; the Free Software Foundation # This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it, # gives unlimited permission to copy and/or distribute it,
@ -408,7 +409,7 @@ AC_DEFUN([AM_OUTPUT_DEPENDENCY_COMMANDS],
# Do all the work for Automake. -*- Autoconf -*- # Do all the work for Automake. -*- Autoconf -*-
# Copyright (C) 1996-2014 Free Software Foundation, Inc. # Copyright (C) 1996-2013 Free Software Foundation, Inc.
# #
# This file is free software; the Free Software Foundation # This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it, # gives unlimited permission to copy and/or distribute it,
@ -498,8 +499,8 @@ AC_REQUIRE([AC_PROG_MKDIR_P])dnl
# <http://lists.gnu.org/archive/html/automake/2012-07/msg00001.html> # <http://lists.gnu.org/archive/html/automake/2012-07/msg00001.html>
# <http://lists.gnu.org/archive/html/automake/2012-07/msg00014.html> # <http://lists.gnu.org/archive/html/automake/2012-07/msg00014.html>
AC_SUBST([mkdir_p], ['$(MKDIR_P)']) AC_SUBST([mkdir_p], ['$(MKDIR_P)'])
# We need awk for the "check" target (and possibly the TAP driver). The # We need awk for the "check" target. The system "awk" is bad on
# system "awk" is bad on some platforms. # some platforms.
AC_REQUIRE([AC_PROG_AWK])dnl AC_REQUIRE([AC_PROG_AWK])dnl
AC_REQUIRE([AC_PROG_MAKE_SET])dnl AC_REQUIRE([AC_PROG_MAKE_SET])dnl
AC_REQUIRE([AM_SET_LEADING_DOT])dnl AC_REQUIRE([AM_SET_LEADING_DOT])dnl
@ -572,11 +573,7 @@ to "yes", and re-run configure.
END END
AC_MSG_ERROR([Your 'rm' program is bad, sorry.]) AC_MSG_ERROR([Your 'rm' program is bad, sorry.])
fi fi
fi fi])
dnl The trailing newline in this macro's definition is deliberate, for
dnl backward compatibility and to allow trailing 'dnl'-style comments
dnl after the AM_INIT_AUTOMAKE invocation. See automake bug#16841.
])
dnl Hook into '_AC_COMPILER_EXEEXT' early to learn its expansion. Do not dnl Hook into '_AC_COMPILER_EXEEXT' early to learn its expansion. Do not
dnl add the conditional right here, as _AC_COMPILER_EXEEXT may be further dnl add the conditional right here, as _AC_COMPILER_EXEEXT may be further
@ -605,7 +602,7 @@ for _am_header in $config_headers :; do
done done
echo "timestamp for $_am_arg" >`AS_DIRNAME(["$_am_arg"])`/stamp-h[]$_am_stamp_count]) echo "timestamp for $_am_arg" >`AS_DIRNAME(["$_am_arg"])`/stamp-h[]$_am_stamp_count])
# Copyright (C) 2001-2014 Free Software Foundation, Inc. # Copyright (C) 2001-2013 Free Software Foundation, Inc.
# #
# This file is free software; the Free Software Foundation # This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it, # gives unlimited permission to copy and/or distribute it,
@ -616,7 +613,7 @@ echo "timestamp for $_am_arg" >`AS_DIRNAME(["$_am_arg"])`/stamp-h[]$_am_stamp_co
# Define $install_sh. # Define $install_sh.
AC_DEFUN([AM_PROG_INSTALL_SH], AC_DEFUN([AM_PROG_INSTALL_SH],
[AC_REQUIRE([AM_AUX_DIR_EXPAND])dnl [AC_REQUIRE([AM_AUX_DIR_EXPAND])dnl
if test x"${install_sh+set}" != xset; then if test x"${install_sh}" != xset; then
case $am_aux_dir in case $am_aux_dir in
*\ * | *\ *) *\ * | *\ *)
install_sh="\${SHELL} '$am_aux_dir/install-sh'" ;; install_sh="\${SHELL} '$am_aux_dir/install-sh'" ;;
@ -626,7 +623,7 @@ if test x"${install_sh+set}" != xset; then
fi fi
AC_SUBST([install_sh])]) AC_SUBST([install_sh])])
# Copyright (C) 2003-2014 Free Software Foundation, Inc. # Copyright (C) 2003-2013 Free Software Foundation, Inc.
# #
# This file is free software; the Free Software Foundation # This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it, # gives unlimited permission to copy and/or distribute it,
@ -647,7 +644,7 @@ AC_SUBST([am__leading_dot])])
# Check to see how 'make' treats includes. -*- Autoconf -*- # Check to see how 'make' treats includes. -*- Autoconf -*-
# Copyright (C) 2001-2014 Free Software Foundation, Inc. # Copyright (C) 2001-2013 Free Software Foundation, Inc.
# #
# This file is free software; the Free Software Foundation # This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it, # gives unlimited permission to copy and/or distribute it,
@ -697,7 +694,7 @@ rm -f confinc confmf
# Fake the existence of programs that GNU maintainers use. -*- Autoconf -*- # Fake the existence of programs that GNU maintainers use. -*- Autoconf -*-
# Copyright (C) 1997-2014 Free Software Foundation, Inc. # Copyright (C) 1997-2013 Free Software Foundation, Inc.
# #
# This file is free software; the Free Software Foundation # This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it, # gives unlimited permission to copy and/or distribute it,
@ -736,7 +733,7 @@ fi
# Helper functions for option handling. -*- Autoconf -*- # Helper functions for option handling. -*- Autoconf -*-
# Copyright (C) 2001-2014 Free Software Foundation, Inc. # Copyright (C) 2001-2013 Free Software Foundation, Inc.
# #
# This file is free software; the Free Software Foundation # This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it, # gives unlimited permission to copy and/or distribute it,
@ -767,7 +764,7 @@ AC_DEFUN([_AM_IF_OPTION],
# Check to make sure that the build environment is sane. -*- Autoconf -*- # Check to make sure that the build environment is sane. -*- Autoconf -*-
# Copyright (C) 1996-2014 Free Software Foundation, Inc. # Copyright (C) 1996-2013 Free Software Foundation, Inc.
# #
# This file is free software; the Free Software Foundation # This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it, # gives unlimited permission to copy and/or distribute it,
@ -848,7 +845,7 @@ AC_CONFIG_COMMANDS_PRE(
rm -f conftest.file rm -f conftest.file
]) ])
# Copyright (C) 2009-2014 Free Software Foundation, Inc. # Copyright (C) 2009-2013 Free Software Foundation, Inc.
# #
# This file is free software; the Free Software Foundation # This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it, # gives unlimited permission to copy and/or distribute it,
@ -908,7 +905,7 @@ AC_SUBST([AM_BACKSLASH])dnl
_AM_SUBST_NOTMAKE([AM_BACKSLASH])dnl _AM_SUBST_NOTMAKE([AM_BACKSLASH])dnl
]) ])
# Copyright (C) 2001-2014 Free Software Foundation, Inc. # Copyright (C) 2001-2013 Free Software Foundation, Inc.
# #
# This file is free software; the Free Software Foundation # This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it, # gives unlimited permission to copy and/or distribute it,
@ -936,7 +933,7 @@ fi
INSTALL_STRIP_PROGRAM="\$(install_sh) -c -s" INSTALL_STRIP_PROGRAM="\$(install_sh) -c -s"
AC_SUBST([INSTALL_STRIP_PROGRAM])]) AC_SUBST([INSTALL_STRIP_PROGRAM])])
# Copyright (C) 2006-2014 Free Software Foundation, Inc. # Copyright (C) 2006-2013 Free Software Foundation, Inc.
# #
# This file is free software; the Free Software Foundation # This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it, # gives unlimited permission to copy and/or distribute it,
@ -955,7 +952,7 @@ AC_DEFUN([AM_SUBST_NOTMAKE], [_AM_SUBST_NOTMAKE($@)])
# Check how to create a tarball. -*- Autoconf -*- # Check how to create a tarball. -*- Autoconf -*-
# Copyright (C) 2004-2014 Free Software Foundation, Inc. # Copyright (C) 2004-2013 Free Software Foundation, Inc.
# #
# This file is free software; the Free Software Foundation # This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it, # gives unlimited permission to copy and/or distribute it,

2
config.guess vendored
View File

@ -1 +1 @@
/opt/local/share/automake-1.15/config.guess /usr/share/automake-1.14/config.guess

2
config.sub vendored
View File

@ -1 +1 @@
/opt/local/share/automake-1.15/config.sub /usr/share/automake-1.14/config.sub

13
configure vendored
View File

@ -2466,7 +2466,7 @@ test -n "$target_alias" &&
NONENONEs,x,x, && NONENONEs,x,x, &&
program_prefix=${target_alias}- program_prefix=${target_alias}-
am__api_version='1.15' am__api_version='1.14'
# Find a good install program. We prefer a C program (faster), # Find a good install program. We prefer a C program (faster),
# so one script is as good as another. But avoid the broken or # so one script is as good as another. But avoid the broken or
@ -2638,8 +2638,8 @@ test "$program_suffix" != NONE &&
ac_script='s/[\\$]/&&/g;s/;s,x,x,$//' ac_script='s/[\\$]/&&/g;s/;s,x,x,$//'
program_transform_name=`$as_echo "$program_transform_name" | sed "$ac_script"` program_transform_name=`$as_echo "$program_transform_name" | sed "$ac_script"`
# Expand $ac_aux_dir to an absolute path. # expand $ac_aux_dir to an absolute path
am_aux_dir=`cd "$ac_aux_dir" && pwd` am_aux_dir=`cd $ac_aux_dir && pwd`
if test x"${MISSING+set}" != xset; then if test x"${MISSING+set}" != xset; then
case $am_aux_dir in case $am_aux_dir in
@ -2658,7 +2658,7 @@ else
$as_echo "$as_me: WARNING: 'missing' script is too old or missing" >&2;} $as_echo "$as_me: WARNING: 'missing' script is too old or missing" >&2;}
fi fi
if test x"${install_sh+set}" != xset; then if test x"${install_sh}" != xset; then
case $am_aux_dir in case $am_aux_dir in
*\ * | *\ *) *\ * | *\ *)
install_sh="\${SHELL} '$am_aux_dir/install-sh'" ;; install_sh="\${SHELL} '$am_aux_dir/install-sh'" ;;
@ -2986,8 +2986,8 @@ MAKEINFO=${MAKEINFO-"${am_missing_run}makeinfo"}
# <http://lists.gnu.org/archive/html/automake/2012-07/msg00014.html> # <http://lists.gnu.org/archive/html/automake/2012-07/msg00014.html>
mkdir_p='$(MKDIR_P)' mkdir_p='$(MKDIR_P)'
# We need awk for the "check" target (and possibly the TAP driver). The # We need awk for the "check" target. The system "awk" is bad on
# system "awk" is bad on some platforms. # some platforms.
# Always define AMTAR for backward compatibility. Yes, it's still used # Always define AMTAR for backward compatibility. Yes, it's still used
# in the wild :-( We should find a proper way to deprecate it ... # in the wild :-( We should find a proper way to deprecate it ...
AMTAR='$${TAR-tar}' AMTAR='$${TAR-tar}'
@ -3046,7 +3046,6 @@ END
fi fi
ac_config_headers="$ac_config_headers lib/Grid_config.h" ac_config_headers="$ac_config_headers lib/Grid_config.h"
# Check whether --enable-silent-rules was given. # Check whether --enable-silent-rules was given.

View File

@ -13,6 +13,7 @@
#include <iostream> #include <iostream>
#include <Grid.h> #include <Grid.h>
#include <algorithm> #include <algorithm>
#include <iterator>
#undef __X86_64 #undef __X86_64
#define MAC #define MAC

View File

@ -1,8 +1,10 @@
//---------------------------------------------------------------------- //----------------------------------------------------------------------
/*! @file Grid_sse4.h /*! @file Grid_sse4.h
@brief Optimization libraries @brief Optimization libraries for SSE4 instructions set
Using intrinsics
*/ */
// Time-stamp: <2015-05-19 17:06:51 neo> // Time-stamp: <2015-05-20 16:45:39 neo>
//---------------------------------------------------------------------- //----------------------------------------------------------------------
#include <pmmintrin.h> #include <pmmintrin.h>
@ -49,6 +51,20 @@ namespace Optimization {
}; };
struct Vstream{
//Float
inline void operator()(__m128 a, __m128 b){
_mm_stream_ps((float *)&a,b);
}
//Double
inline void operator()(__m128d a, __m128d b){
_mm_stream_pd((double *)&a,b);
}
};
struct Vset{ struct Vset{
// Complex float // Complex float
@ -75,27 +91,20 @@ namespace Optimization {
}; };
template <typename Out_type, typename In_type>
struct Reduce{ struct Reduce{
//Complex float //Need templated class to overload output type
inline Grid::ComplexF operator()(__m128 in){ //General form must generate error if compiled
union { inline Out_type operator()(In_type in){
__m128 v1; printf("Error, using wrong Reduce function\n");
float f[4]; exit(1);
} u128; return 0;
u128.v1 = _mm_add_ps(in, _mm_shuffle_ps(in,in, 0b01001110)); // FIXME Prefer to use _MM_SHUFFLE macros
return Grid::ComplexF(u128.f[0], u128.f[1]);
} }
//Complex double
inline Grid::ComplexD operator()(__m128d in){
printf("Missing complex double implementation -> FIX\n");
return Grid::ComplexD(0,0); // FIXME wrong
}
}; };
///////////////////////////////////////////////////// /////////////////////////////////////////////////////
// Arithmetic operations // Arithmetic operations
///////////////////////////////////////////////////// /////////////////////////////////////////////////////
@ -129,25 +138,26 @@ namespace Optimization {
} }
}; };
struct MultComplex{ struct MultComplex{
// Complex float // Complex float
inline __m128 operator()(__m128 a, __m128 b){ inline __m128 operator()(__m128 a, __m128 b){
__m128 ymm0,ymm1,ymm2; __m128 ymm0,ymm1,ymm2;
ymm0 = _mm_shuffle_ps(a,a,_MM_SHUFFLE(2,2,0,0)); // ymm0 <- ar ar, ymm0 = _mm_shuffle_ps(a,a,_MM_SHUFFLE(2,2,0,0)); // ymm0 <- ar ar,
ymm0 = _mm_mul_ps(ymm0,b); // ymm0 <- ar bi, ar br ymm0 = _mm_mul_ps(ymm0,b); // ymm0 <- ar bi, ar br
ymm1 = _mm_shuffle_ps(b,b,_MM_SHUFFLE(2,3,0,1)); // ymm1 <- br,bi ymm1 = _mm_shuffle_ps(b,b,_MM_SHUFFLE(2,3,0,1)); // ymm1 <- br,bi
ymm2 = _mm_shuffle_ps(a,a,_MM_SHUFFLE(3,3,1,1)); // ymm2 <- ai,ai ymm2 = _mm_shuffle_ps(a,a,_MM_SHUFFLE(3,3,1,1)); // ymm2 <- ai,ai
ymm1 = _mm_mul_ps(ymm1,ymm2); // ymm1 <- br ai, ai bi ymm1 = _mm_mul_ps(ymm1,ymm2); // ymm1 <- br ai, ai bi
return _mm_addsub_ps(ymm0,ymm1); return _mm_addsub_ps(ymm0,ymm1);
} }
// Complex double // Complex double
inline __m128d operator()(__m128d a, __m128d b){ inline __m128d operator()(__m128d a, __m128d b){
__m128d ymm0,ymm1,ymm2; __m128d ymm0,ymm1,ymm2;
ymm0 = _mm_shuffle_pd(a,a,0x0); // ymm0 <- ar ar, ymm0 = _mm_shuffle_pd(a,a,0x0); // ymm0 <- ar ar,
ymm0 = _mm_mul_pd(ymm0,b); // ymm0 <- ar bi, ar br ymm0 = _mm_mul_pd(ymm0,b); // ymm0 <- ar bi, ar br
ymm1 = _mm_shuffle_pd(b,b,0x1); // ymm1 <- br,bi b01 ymm1 = _mm_shuffle_pd(b,b,0x1); // ymm1 <- br,bi b01
ymm2 = _mm_shuffle_pd(a,a,0x3); // ymm2 <- ai,ai b11 ymm2 = _mm_shuffle_pd(a,a,0x3); // ymm2 <- ai,ai b11
ymm1 = _mm_mul_pd(ymm1,ymm2); // ymm1 <- br ai, ai bi ymm1 = _mm_mul_pd(ymm1,ymm2); // ymm1 <- br ai, ai bi
return _mm_addsub_pd(ymm0,ymm1); return _mm_addsub_pd(ymm0,ymm1);
} }
}; };
@ -165,14 +175,112 @@ namespace Optimization {
inline __m128i operator()(__m128i a, __m128i b){ inline __m128i operator()(__m128i a, __m128i b){
return _mm_mul_epi32(a,b); return _mm_mul_epi32(a,b);
} }
};
struct Conj{
// Complex single
inline __m128 operator()(__m128 in){
return _mm_xor_ps(_mm_addsub_ps(_mm_setzero_ps(),in), _mm_set1_ps(-0.f));
}
// Complex double
inline __m128d operator()(__m128d in){
return _mm_xor_pd(_mm_addsub_pd(_mm_setzero_pd(),in), _mm_set1_pd(-0.f));//untested
}
// do not define for integer input
};
struct TimesMinusI{
//Complex single
inline __m128 operator()(__m128 in, __m128 ret){
__m128 tmp =_mm_addsub_ps(_mm_setzero_ps(),in); // r,-i
return _mm_shuffle_ps(tmp,tmp,_MM_SHUFFLE(2,3,0,1));
}
//Complex double
inline __m128d operator()(__m128d in, __m128d ret){
__m128d tmp =_mm_addsub_pd(_mm_setzero_pd(),in); // r,-i
return _mm_shuffle_pd(tmp,tmp,0x1);
}
};
struct TimesI{
//Complex single
inline __m128 operator()(__m128 in, __m128 ret){
__m128 tmp =_mm_shuffle_ps(in,in,_MM_SHUFFLE(2,3,0,1));
return _mm_addsub_ps(_mm_setzero_ps(),tmp); // r,-i
}
//Complex double
inline __m128d operator()(__m128d in, __m128d ret){
__m128d tmp = _mm_shuffle_pd(in,in,0x1);
return _mm_addsub_pd(_mm_setzero_pd(),tmp); // r,-i
}
}; };
//////////////////////////////////////////////
// Some Template specialization
//Complex float Reduce
template<>
inline Grid::ComplexF Reduce<Grid::ComplexF, __m128>::operator()(__m128 in){
union {
__m128 v1;
float f[4];
} u128;
u128.v1 = _mm_add_ps(in, _mm_shuffle_ps(in,in, 0b01001110)); // FIXME Prefer to use _MM_SHUFFLE macros
return Grid::ComplexF(u128.f[0], u128.f[1]);
}
//Real float Reduce
template<>
inline Grid::RealF Reduce<Grid::RealF, __m128>::operator()(__m128 in){
// FIXME Hack
const Grid::RealF * ptr = (const Grid::RealF *) &in;
Grid::RealF ret = 0;
for(int i=0;i< 4 ;i++){ // 4 number of simd lanes for float
ret = ret+ptr[i];
}
return ret;
}
//Complex double Reduce
template<>
inline Grid::ComplexD Reduce<Grid::ComplexD, __m128d>::operator()(__m128d in){
printf("Reduce : Missing good complex double implementation -> FIX\n");
return Grid::ComplexD(in[0], in[1]); // inefficient
}
//Real double Reduce
template<>
inline Grid::RealD Reduce<Grid::RealD, __m128d>::operator()(__m128d in){
// FIXME Hack
const Grid::RealD * ptr =(const Grid::RealD *) &in;
Grid::RealD ret = 0;
for(int i=0;i< 2 ;i++){// 2 number of simd lanes for float
ret = ret+ptr[i];
}
return ret;
}
//Integer Reduce
template<>
inline Integer Reduce<Integer, __m128i>::operator()(__m128i in){
// FIXME unimplemented
printf("Reduce : Missing integer implementation -> FIX\n");
assert(0);
}
} }
//////////////////////////////////////////////////////////////////////////////////////
// Here assign types // Here assign types
namespace Grid { namespace Grid {
typedef __m128 SIMD_Ftype; // Single precision type typedef __m128 SIMD_Ftype; // Single precision type
@ -180,15 +288,21 @@ namespace Grid {
typedef __m128i SIMD_Itype; // Integer type typedef __m128i SIMD_Itype; // Integer type
// Function names // Function name aliases
typedef Optimization::Vsplat VsplatSIMD; typedef Optimization::Vsplat VsplatSIMD;
typedef Optimization::Vstore VstoreSIMD; typedef Optimization::Vstore VstoreSIMD;
typedef Optimization::Vset VsetSIMD;
typedef Optimization::Vstream VstreamSIMD;
template <typename S, typename T> using ReduceSIMD = Optimization::Reduce<S,T>;
// Arithmetic operations // Arithmetic operations
typedef Optimization::Sum SumSIMD; typedef Optimization::Sum SumSIMD;
typedef Optimization::Sub SubSIMD; typedef Optimization::Sub SubSIMD;
typedef Optimization::Mult MultSIMD; typedef Optimization::Mult MultSIMD;
typedef Optimization::MultComplex MultComplexSIMD; typedef Optimization::MultComplex MultComplexSIMD;
typedef Optimization::Vset VsetSIMD; typedef Optimization::Conj ConjSIMD;
typedef Optimization::TimesMinusI TimesMinusISIMD;
typedef Optimization::TimesI TimesISIMD;
} }

View File

@ -2,7 +2,7 @@
/*! @file Grid_vector_types.h /*! @file Grid_vector_types.h
@brief Defines templated class Grid_simd to deal with inner vector types @brief Defines templated class Grid_simd to deal with inner vector types
*/ */
// Time-stamp: <2015-05-19 17:20:36 neo> // Time-stamp: <2015-05-20 17:31:55 neo>
//--------------------------------------------------------------------------- //---------------------------------------------------------------------------
#ifndef GRID_VECTOR_TYPES #ifndef GRID_VECTOR_TYPES
#define GRID_VECTOR_TYPES #define GRID_VECTOR_TYPES
@ -22,6 +22,16 @@ namespace Grid {
typedef T type; typedef T type;
}; };
// type alias used to simplify the syntax of std::enable_if
template <typename T> using Invoke =
typename T::type;
template <typename Condition, typename ReturnType> using EnableIf =
Invoke<std::enable_if<Condition::value, ReturnType>>;
template <typename Condition, typename ReturnType> using NotEnableIf =
Invoke<std::enable_if<!Condition::value, ReturnType>>;
//////////////////////////////////////////////////////// ////////////////////////////////////////////////////////
// Check for complexity with type traits // Check for complexity with type traits
template <typename T> template <typename T>
@ -94,31 +104,32 @@ namespace Grid {
// Initialise to 1,0,i for the correct types // Initialise to 1,0,i for the correct types
/////////////////////////////////////////////// ///////////////////////////////////////////////
// if not complex overload here // if not complex overload here
template < class S = Scalar_type,typename std::enable_if < !is_complex < S >::value, int >::type = 0 > template < class S = Scalar_type, NotEnableIf<is_complex < S >,int> = 0 >
friend inline void vone(Grid_simd &ret) { vsplat(ret,1.0); } friend inline void vone(Grid_simd &ret) { vsplat(ret,1.0); }
template < class S = Scalar_type,typename std::enable_if < !is_complex < S >::value, int >::type = 0 > template < class S = Scalar_type, NotEnableIf<is_complex < S >,int> = 0 >
friend inline void vzero(Grid_simd &ret) { vsplat(ret,0.0); } friend inline void vzero(Grid_simd &ret) { vsplat(ret,0.0); }
// overload for complex type // For complex types
template < class S = Scalar_type,typename std::enable_if < is_complex < S >::value, int >::type = 0 > template < class S = Scalar_type, EnableIf<is_complex < S >, int> = 0 >
friend inline void vone(Grid_simd &ret) { vsplat(ret,1.0,0.0); } friend inline void vone(Grid_simd &ret) { vsplat(ret,1.0,0.0); }
template < class S = Scalar_type,typename std::enable_if < is_complex < S >::value, int >::type = 0 > template < class S = Scalar_type, EnableIf<is_complex < S >, int> = 0 >
friend inline void vzero(Grid_simd &ret) { vsplat(ret,0.0,0.0); }// use xor? friend inline void vzero(Grid_simd &ret) { vsplat(ret,0.0,0.0); }// use xor?
template < class S = Scalar_type, EnableIf<is_complex < S >, int> = 0 >
// For integral type friend inline void vcomplex_i(Grid_simd &ret){ vsplat(ret,0.0,1.0);}
template < class S = Scalar_type,typename std::enable_if < std::is_integral < S >::value, int >::type = 0 >
// For integral types
template < class S = Scalar_type, EnableIf<std::is_integral < S >, int> = 0 >
friend inline void vone(Grid_simd &ret) { vsplat(ret,1); } friend inline void vone(Grid_simd &ret) { vsplat(ret,1); }
template < class S = Scalar_type,typename std::enable_if < std::is_integral < S >::value, int >::type = 0 > template < class S = Scalar_type, EnableIf<std::is_integral < S >, int> = 0 >
friend inline void vzero(Grid_simd &ret) { vsplat(ret,0); } friend inline void vzero(Grid_simd &ret) { vsplat(ret,0); }
template < class S = Scalar_type,typename std::enable_if < std::is_integral < S >::value, int >::type = 0 > template < class S = Scalar_type, EnableIf<std::is_integral < S >, int> = 0 >
friend inline void vtrue (Grid_simd &ret){vsplat(ret,0xFFFFFFFF);} friend inline void vtrue (Grid_simd &ret){vsplat(ret,0xFFFFFFFF);}
template < class S = Scalar_type,typename std::enable_if < std::is_integral < S >::value, int >::type = 0 > template < class S = Scalar_type, EnableIf<std::is_integral < S >, int> = 0 >
friend inline void vfalse(vInteger &ret){vsplat(ret,0);} friend inline void vfalse(vInteger &ret){vsplat(ret,0);}
// do not compile if real or integer, send an error message from the compiler
template < class S = Scalar_type,typename std::enable_if < is_complex < S >::value, int >::type = 0 >
friend inline void vcomplex_i(Grid_simd &ret){ vsplat(ret,0.0,1.0);}
//////////////////////////////////// ////////////////////////////////////
// Arithmetic operator overloads +,-,* // Arithmetic operator overloads +,-,*
@ -138,7 +149,7 @@ namespace Grid {
}; };
// Distinguish between complex types and others // Distinguish between complex types and others
template < class S = Scalar_type, typename std::enable_if < is_complex < S >::value, int >::type = 0 > template < class S = Scalar_type, EnableIf<is_complex < S >, int> = 0 >
friend inline Grid_simd operator * (Grid_simd a, Grid_simd b) friend inline Grid_simd operator * (Grid_simd a, Grid_simd b)
{ {
Grid_simd ret; Grid_simd ret;
@ -147,7 +158,7 @@ namespace Grid {
}; };
// Real/Integer types // Real/Integer types
template < class S = Scalar_type,typename std::enable_if < !is_complex < S >::value, int >::type = 0 > template < class S = Scalar_type, NotEnableIf<is_complex < S >, int> = 0 >
friend inline Grid_simd operator * (Grid_simd a, Grid_simd b) friend inline Grid_simd operator * (Grid_simd a, Grid_simd b)
{ {
Grid_simd ret; Grid_simd ret;
@ -156,8 +167,6 @@ namespace Grid {
}; };
//////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////
// FIXME: gonna remove these load/store, get, set, prefetch // FIXME: gonna remove these load/store, get, set, prefetch
//////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////
@ -170,14 +179,14 @@ namespace Grid {
/////////////////////// ///////////////////////
// overload if complex // overload if complex
template < class S = Scalar_type > template < class S = Scalar_type >
friend inline void vsplat(Grid_simd &ret, typename std::enable_if< is_complex < S >::value, S>::type c){ friend inline void vsplat(Grid_simd &ret, EnableIf<is_complex < S >, S> c){
Real a = real(c); Real a = real(c);
Real b = imag(c); Real b = imag(c);
vsplat(ret,a,b); vsplat(ret,a,b);
} }
// this only for the complex version // this is only for the complex version
template < class S = Scalar_type, typename std::enable_if < is_complex < S >::value, int >::type = 0 > template < class S = Scalar_type, EnableIf<is_complex < S >, int> = 0 >
friend inline void vsplat(Grid_simd &ret,Real a, Real b){ friend inline void vsplat(Grid_simd &ret,Real a, Real b){
ret.v = binary<Vector_type>(a, b, VsplatSIMD()); ret.v = binary<Vector_type>(a, b, VsplatSIMD());
} }
@ -187,22 +196,45 @@ namespace Grid {
ret.v = unary<Vector_type>(a, VsplatSIMD()); ret.v = unary<Vector_type>(a, VsplatSIMD());
} }
///////////////////////
// Vstore
///////////////////////
friend inline void vstore(const Grid_simd &ret, Scalar_type *a){ friend inline void vstore(const Grid_simd &ret, Scalar_type *a){
binary<void>(ret.v, (Real*)a, VstoreSIMD()); binary<void>(ret.v, (Real*)a, VstoreSIMD());
} }
///////////////////////
// Vstream
///////////////////////
friend inline void vstream(Grid_simd &out,const Grid_simd &in){
binary<void>(out.v, in.v, VstreamSIMD());
}
template < class S = Scalar_type, EnableIf<std::is_integral < S >, int> = 0 >
friend inline void vstream(Grid_simd &out,const Grid_simd &in){
out=in;
}
///////////////////////
// Vprefetch
///////////////////////
friend inline void vprefetch(const Grid_simd &v) friend inline void vprefetch(const Grid_simd &v)
{ {
_mm_prefetch((const char*)&v.v,_MM_HINT_T0); _mm_prefetch((const char*)&v.v,_MM_HINT_T0);
} }
///////////////////////
// Reduce
///////////////////////
friend inline Scalar_type Reduce(const Grid_simd & in) friend inline Scalar_type Reduce(const Grid_simd & in)
{ {
// FIXME add operator return unary<Scalar_type>(in.v, ReduceSIMD<Scalar_type, Vector_type>());
} }
////////////////////////////
// opreator scalar * simd
////////////////////////////
friend inline Grid_simd operator * (const Scalar_type &a, Grid_simd b){ friend inline Grid_simd operator * (const Scalar_type &a, Grid_simd b){
Grid_simd va; Grid_simd va;
vsplat(va,a); vsplat(va,a);
@ -215,25 +247,63 @@ namespace Grid {
/////////////////////// ///////////////////////
// Conjugate // Conjugate
/////////////////////// ///////////////////////
template < class S = Scalar_type, EnableIf<is_complex < S >, int> = 0 >
friend inline Grid_simd conjugate(const Grid_simd &in){ friend inline Grid_simd conjugate(const Grid_simd &in){
Grid_simd ret ; vzero(ret); Grid_simd ret ;
// FIXME add operator ret.v = unary<Vector_type>(in.v, ConjSIMD());
return ret; return ret;
} }
template < class S = Scalar_type, NotEnableIf<is_complex < S >, int> = 0 >
friend inline Grid_simd conjugate(const Grid_simd &in){
return in; // for real objects
}
///////////////////////
// timesMinusI
///////////////////////
template < class S = Scalar_type, EnableIf<is_complex < S >, int> = 0 >
friend inline void timesMinusI( Grid_simd &ret,const Grid_simd &in){
ret.v = binary<Vector_type>(in.v, ret.v, TimesMinusISIMD());
}
template < class S = Scalar_type, EnableIf<is_complex < S >, int> = 0 >
friend inline Grid_simd timesMinusI(const Grid_simd &in){ friend inline Grid_simd timesMinusI(const Grid_simd &in){
Grid_simd ret; Grid_simd ret;
vzero(ret); timesMinusI(ret,in);
// FIXME add operator
return ret; return ret;
} }
friend inline Grid_simd timesI(const Grid_simd &in){
Grid_simd ret; vzero(ret); template < class S = Scalar_type, NotEnableIf<is_complex < S >, int> = 0 >
// FIXME add operator friend inline Grid_simd timesMinusI(const Grid_simd &in){
return ret; return in;
}
///////////////////////
// timesI
///////////////////////
template < class S = Scalar_type, EnableIf<is_complex < S >, int> = 0 >
friend inline void timesI(Grid_simd &ret,const Grid_simd &in){
ret.v = binary<Vector_type>(in.v, ret.v, TimesISIMD());
} }
template < class S = Scalar_type, EnableIf<is_complex < S >, int> = 0 >
friend inline Grid_simd timesI(const Grid_simd &in){
Grid_simd ret;
timesI(ret,in);
return ret;
}
template < class S = Scalar_type, NotEnableIf<is_complex < S >, int> = 0 >
friend inline Grid_simd timesI(const Grid_simd &in){
return in;
}
///////////////////////
// Unary negation // Unary negation
///////////////////////
friend inline Grid_simd operator -(const Grid_simd &r) { friend inline Grid_simd operator -(const Grid_simd &r) {
vComplexF ret; vComplexF ret;
vzero(ret); vzero(ret);
@ -257,41 +327,22 @@ namespace Grid {
friend inline void permute(Grid_simd &y,Grid_simd b,int perm)
{
Gpermute<Grid_simd>(y,b,perm);
}
/* ////////////////////////////////////////////////////////////////////
// General permute; assumes vector length is same across
// all subtypes; may not be a good assumption, but could
// add the vector width as a template param for BG/Q for example
////////////////////////////////////////////////////////////////////
friend inline void permute(Grid_simd &y,Grid_simd b,int perm) friend inline void permute(Grid_simd &y,Grid_simd b,int perm)
{ {
Gpermute<Grid_simd>(y,b,perm); Gpermute<Grid_simd>(y,b,perm);
} }
friend inline void merge(Grid_simd &y,std::vector<Scalar_type *> &extracted)
{
Gmerge<Grid_simd,Scalar_type >(y,extracted);
}
friend inline void extract(const Grid_simd &y,std::vector<Scalar_type *> &extracted)
{
Gextract<Grid_simd,Scalar_type>(y,extracted);
}
friend inline void merge(Grid_simd &y,std::vector<Scalar_type > &extracted)
{
Gmerge<Grid_simd,Scalar_type >(y,extracted);
}
friend inline void extract(const Grid_simd &y,std::vector<Scalar_type > &extracted)
{
Gextract<Grid_simd,Scalar_type>(y,extracted);
}
*/
};// end of Grid_simd class definition };// end of Grid_simd class definition
template<class scalar_type, class vector_type > template<class scalar_type, class vector_type >
inline Grid_simd< scalar_type, vector_type> innerProduct(const Grid_simd< scalar_type, vector_type> & l, const Grid_simd< scalar_type, vector_type> & r) inline Grid_simd< scalar_type, vector_type> innerProduct(const Grid_simd< scalar_type, vector_type> & l, const Grid_simd< scalar_type, vector_type> & r)
{ {
@ -315,7 +366,7 @@ namespace Grid {
} }
// Define available types (now change names to avoid clashing) // Define available types (now change names to avoid clashing with the rest of the code)
typedef Grid_simd< float , SIMD_Ftype > MyRealF; typedef Grid_simd< float , SIMD_Ftype > MyRealF;
typedef Grid_simd< double , SIMD_Dtype > MyRealD; typedef Grid_simd< double , SIMD_Dtype > MyRealD;
@ -324,6 +375,29 @@ namespace Grid {
////////////////////////////////////////////////////////////////////
// Temporary hack to keep independent from the rest of the code
template<> struct isGridTensor<MyRealD > {
static const bool value = false;
static const bool notvalue = true;
};
template<> struct isGridTensor<MyRealF > {
static const bool value = false;
static const bool notvalue = true;
};
template<> struct isGridTensor<MyComplexD > {
static const bool value = false;
static const bool notvalue = true;
};
template<> struct isGridTensor<MyComplexF > {
static const bool value = false;
static const bool notvalue = true;
};
} }
#endif #endif

View File

@ -161,30 +161,40 @@ int main (int argc, char ** argv)
///////// Tests the new class Grid_simd ///////// Tests the new class Grid_simd
std::complex<double> ctest(3.0,2.0); std::complex<double> ctest(3.0,2.0);
std::complex<float> ctestf(3.0,2.0); std::complex<float> ctestf(3.0,2.0);
MyComplexF TestMe1(1.0); // fill real part MyComplexF TestMe1(1.0); // fills only real part
MyComplexD TestMe2(ctest); MyComplexD TestMe2(ctest);
MyComplexD TestMe3(ctest);// compiler generate conversion of basic types MyComplexD TestMe3(ctest);// compiler generate conversion of basic types
//MyRealF TestMe5(ctest);// Must generate compiler error //MyRealF TestMe5(ctest);// Must generate compiler error
MyRealD TestMe4(2.0); MyRealD TestRe1(2.0);
MyRealF TestRe2(3.0);
vone(TestRe2);
MyComplexF TestMe6(ctestf); MyComplexF TestMe6(ctestf);
MyComplexF TestMe7(ctestf); MyComplexF TestMe7(ctestf);
MyComplexD TheSum= TestMe2*TestMe3; MyComplexD TheSum= TestMe2*TestMe3;
MyComplexF TheSumF= TestMe6*TestMe7; MyComplexF TheSumF= TestMe6*TestMe7;
double dsum[2]; double dsum[2];
_mm_store_pd(dsum, TheSum.v); _mm_store_pd(dsum, TheSum.v);
for (int i =0; i< 2; i++) for (int i =0; i< 2; i++)
printf("%f\n", dsum[i]); printf("%f\n", dsum[i]);
MyComplexD TheSumI = timesMinusI(TheSum);
MyComplexF TheSumIF = timesMinusI(TheSumF);
float fsum[4]; float fsum[4];
_mm_store_ps(fsum, TheSumF.v); _mm_store_ps(fsum, TheSumF.v);
for (int i =0; i< 4; i++) for (int i =0; i< 4; i++)
printf("%f\n", fsum[i]); printf("%f\n", fsum[i]);
vstore(TheSum, &ctest); vstore(TheSumI, &ctest);
std::complex<float> sum = Reduce(TheSumF);
std::cout << ctest<< std::endl; std::cout << ctest<< std::endl;
std::cout << sum<< std::endl;
#endif #endif
/////////////////////// ///////////////////////

165
tests/Grid_simd_new.cc Normal file
View File

@ -0,0 +1,165 @@
#include <Grid.h>
#include "simd/Grid_vector_types.h"
#include <parallelIO/GridNerscIO.h>
using namespace std;
using namespace Grid;
using namespace Grid::QCD;
class funcPlus {
public:
funcPlus() {};
template<class vec> void operator()(vec &rr,vec &i1,vec &i2) const { rr = i1+i2;}
std::string name(void) const { return std::string("Plus"); }
};
class funcMinus {
public:
funcMinus() {};
template<class vec> void operator()(vec &rr,vec &i1,vec &i2) const { rr = i1-i2;}
std::string name(void) const { return std::string("Minus"); }
};
class funcTimes {
public:
funcTimes() {};
template<class vec> void operator()(vec &rr,vec &i1,vec &i2) const { rr = i1*i2;}
std::string name(void) const { return std::string("Times"); }
};
class funcConj {
public:
funcConj() {};
template<class vec> void operator()(vec &rr,vec &i1,vec &i2) const { rr = conjugate(i1);}
std::string name(void) const { return std::string("Conj"); }
};
class funcAdj {
public:
funcAdj() {};
template<class vec> void operator()(vec &rr,vec &i1,vec &i2) const { rr = adj(i1);}
std::string name(void) const { return std::string("Adj"); }
};
class funcTimesI {
public:
funcTimesI() {};
template<class vec> void operator()(vec &rr,vec &i1,vec &i2) const { rr = timesI(i1);}
std::string name(void) const { return std::string("timesI"); }
};
class funcTimesMinusI {
public:
funcTimesMinusI() {};
template<class vec> void operator()(vec &rr,vec &i1,vec &i2) const { rr = timesMinusI(i1);}
std::string name(void) const { return std::string("timesMinusI"); }
};
template<class scal, class vec,class functor >
void Tester(const functor &func)
{
GridSerialRNG sRNG;
sRNG.SeedRandomDevice();
int Nsimd = vec::Nsimd();
std::vector<scal> input1(Nsimd);
std::vector<scal> input2(Nsimd);
std::vector<scal> result(Nsimd);
std::vector<scal> reference(Nsimd);
std::vector<vec,alignedAllocator<vec> > buf(3);
vec & v_input1 = buf[0];
vec & v_input2 = buf[1];
vec & v_result = buf[2];
for(int i=0;i<Nsimd;i++){
random(sRNG,input1[i]);
random(sRNG,input2[i]);
random(sRNG,result[i]);
}
merge<vec,scal>(v_input1,input1);
merge<vec,scal>(v_input2,input2);
merge<vec,scal>(v_result,result);
func(v_result,v_input1,v_input2);
for(int i=0;i<Nsimd;i++) {
func(reference[i],input1[i],input2[i]);
}
extract<vec,scal>(v_result,result);
std::cout << " " << func.name()<<std::endl;
int ok=0;
for(int i=0;i<Nsimd;i++){
if ( abs(reference[i]-result[i])>0){
std::cout<< "*****" << std::endl;
std::cout<< "["<<i<<"] "<< abs(reference[i]-result[i]) << " " <<reference[i]<< " " << result[i]<<std::endl;
ok++;
}
}
if ( ok==0 ) std::cout << " OK!" <<std::endl;
}
int main (int argc, char ** argv)
{
Grid_init(&argc,&argv);
std::vector<int> latt_size = GridDefaultLatt();
std::vector<int> simd_layout = GridDefaultSimd(4,MyComplexF::Nsimd());
std::vector<int> mpi_layout = GridDefaultMpi();
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
std::vector<int> seeds({1,2,3,4});
// Insist that operations on random scalars gives
// identical results to on vectors.
std::cout << "==================================="<< std::endl;
std::cout << "Testing MyComplexF "<<std::endl;
std::cout << "==================================="<< std::endl;
Tester<ComplexF,MyComplexF>(funcTimesI());
Tester<ComplexF,MyComplexF>(funcTimesMinusI());
Tester<ComplexF,MyComplexF>(funcPlus());
Tester<ComplexF,MyComplexF>(funcMinus());
Tester<ComplexF,MyComplexF>(funcTimes());
Tester<ComplexF,MyComplexF>(funcConj());
Tester<ComplexF,MyComplexF>(funcAdj());
std::cout << "==================================="<< std::endl;
std::cout << "Testing MyComplexD "<<std::endl;
std::cout << "==================================="<< std::endl;
Tester<ComplexD,MyComplexD>(funcTimesI());
Tester<ComplexD,MyComplexD>(funcTimesMinusI());
Tester<ComplexD,MyComplexD>(funcPlus());
Tester<ComplexD,MyComplexD>(funcMinus());
Tester<ComplexD,MyComplexD>(funcTimes());
Tester<ComplexD,MyComplexD>(funcConj());
Tester<ComplexD,MyComplexD>(funcAdj());
std::cout << "==================================="<< std::endl;
std::cout << "Testing MyRealF "<<std::endl;
std::cout << "==================================="<< std::endl;
Tester<RealF,MyRealF>(funcPlus());
Tester<RealF,MyRealF>(funcMinus());
Tester<RealF,MyRealF>(funcTimes());
Tester<RealF,MyRealF>(funcAdj());
std::cout << "==================================="<< std::endl;
std::cout << "Testing MyRealD "<<std::endl;
std::cout << "==================================="<< std::endl;
Tester<RealD,MyRealD>(funcPlus());
Tester<RealD,MyRealD>(funcMinus());
Tester<RealD,MyRealD>(funcTimes());
Tester<RealD,MyRealD>(funcAdj());
Grid_finalize();
}

View File

@ -5,7 +5,7 @@ AM_LDFLAGS = -L$(top_builddir)/lib
# #
# Test code # Test code
# #
bin_PROGRAMS = Grid_main Grid_stencil Grid_nersc_io Grid_cshift Grid_gamma Grid_simd Grid_rng Grid_remez Grid_rng_fixed bin_PROGRAMS = Grid_main Grid_stencil Grid_nersc_io Grid_cshift Grid_gamma Grid_simd Grid_rng Grid_remez Grid_rng_fixed Grid_simd_new
Grid_main_SOURCES = Grid_main.cc Grid_main_SOURCES = Grid_main.cc
Grid_main_LDADD = -lGrid Grid_main_LDADD = -lGrid
@ -33,3 +33,6 @@ Grid_stencil_LDADD = -lGrid
Grid_simd_SOURCES = Grid_simd.cc Grid_simd_SOURCES = Grid_simd.cc
Grid_simd_LDADD = -lGrid Grid_simd_LDADD = -lGrid
Grid_simd_new_SOURCES = Grid_simd_new.cc
Grid_simd_new_LDADD = -lGrid